├── __init__.py
├── utils
    ├── __init__.py
    ├── upsample.py
    └── register.py
├── core
    ├── losses
    │   ├── ghm_loss.py
    │   ├── balanced_l1_loss.py
    │   ├── __init__.py
    │   ├── l1_loss.py
    │   ├── cross_entropy.py
    │   └── generalized_focal_loss.py
    ├── layers
    │   ├── csrc
    │   │   └── CMakeLists.txt
    │   ├── deformable_roi_pooling.py
    │   ├── activations.py
    │   ├── scale.py
    │   ├── max_in_out.py
    │   ├── nearest_upsamling.py
    │   ├── proposal_layer.py
    │   ├── __init__.py
    │   ├── position_sensitive_average_pooling.py
    │   ├── drop_block.py
    │   └── weight_standardization_conv2d.py
    ├── anchors
    │   ├── ssd_anchor_generator.py
    │   └── __init__.py
    ├── samplers
    │   ├── iou_balanced_negative_sampler.py
    │   ├── instance_balanced_positive_sampler.py
    │   ├── __init__.py
    │   ├── combined_sampler.py
    │   ├── random_sampler.py
    │   ├── pseudo_sampler.py
    │   ├── ohem_sampler.py
    │   └── sampler.py
    ├── metrics
    │   ├── __init__.py
    │   └── no_op_metric.py
    ├── learning_rate_schedules
    │   ├── __init__.py
    │   └── step_decay.py
    ├── optimizers
    │   ├── __init__.py
    │   ├── accum_optimizer.py
    │   └── lookahead_optimizer.py
    ├── assigners
    │   ├── __init__.py
    │   ├── assigner.py
    │   ├── uniform_assigner.py
    │   └── min_cost_assigner.py
    ├── __init__.py
    ├── bbox
    │   ├── __init__.py
    │   ├── bbox_transform.py
    │   └── overlaps.py
    └── builder.py
├── models
    ├── detectors
    │   ├── paa.py
    │   ├── detector.py
    │   ├── __init__.py
    │   ├── one_stage.py
    │   ├── two_stage.py
    │   └── detr.py
    ├── backbones
    │   ├── acnet.py
    │   ├── repvgg.py
    │   ├── shufflenet_v1.py
    │   ├── shufflenet_v2.py
    │   ├── darknet53.py
    │   ├── __init__.py
    │   └── backbone.py
    ├── heads
    │   ├── bbox_heads
    │   │   └── __init__.py
    │   ├── dense_heads
    │   │   ├── __init__.py
    │   │   └── retinanet_head.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   └── base_roi_head.py
    │   ├── anchor_free_heads
    │   │   └── __init__.py
    │   ├── __init__.py
    │   └── head.py
    ├── __init__.py
    ├── builder.py
    └── necks
    │   ├── __init__.py
    │   ├── feature_fusion_pyramid.py
    │   ├── path_aggregation_neck.py
    │   └── dlaup.py
├── data
    ├── images
    │   └── panda.jpg
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   └── dataset.py
    ├── builder.py
    └── augmentations
    │   ├── __init__.py
    │   └── mixup.py
├── logs
    ├── events.out.tfevents.1617251244.bail.141074.7195.v2
    ├── events.out.tfevents.1617251266.bail.141421.7195.v2
    ├── events.out.tfevents.1617253323.bail.168083.21721.v2
    ├── events.out.tfevents.1617253358.bail.168621.21721.v2
    ├── events.out.tfevents.1617253447.bail.169883.21721.v2
    ├── events.out.tfevents.1617253477.bail.170342.21721.v2
    ├── events.out.tfevents.1617253496.bail.170673.21721.v2
    ├── events.out.tfevents.1617253615.bail.172245.21721.v2
    ├── events.out.tfevents.1617672692.bail.4032193.7195.v2
    ├── events.out.tfevents.1617672814.bail.4033287.7372.v2
    ├── events.out.tfevents.1617673033.bail.4036164.7374.v2
    ├── events.out.tfevents.1617673230.bail.4038971.7374.v2
    ├── events.out.tfevents.1617673732.bail.4045563.7374.v2
    ├── events.out.tfevents.1617673754.bail.4045938.7374.v2
    ├── events.out.tfevents.1617673850.bail.4047306.7374.v2
    ├── events.out.tfevents.1617673895.bail.4047984.7374.v2
    ├── events.out.tfevents.1617673960.bail.4048987.7374.v2
    ├── events.out.tfevents.1617673981.bail.4049390.7374.v2
    ├── events.out.tfevents.1617674034.bail.4050216.7374.v2
    ├── events.out.tfevents.1617674077.bail.4050915.7374.v2
    ├── events.out.tfevents.1617674147.bail.4051983.7374.v2
    ├── events.out.tfevents.1617674268.bail.4053636.7376.v2
    ├── events.out.tfevents.1617674295.bail.4054108.7377.v2
    ├── events.out.tfevents.1617674328.bail.4054652.7382.v2
    ├── events.out.tfevents.1617674400.bail.4055796.7376.v2
    ├── events.out.tfevents.1617674438.bail.4056411.7376.v2
    ├── events.out.tfevents.1617674491.bail.4057234.7376.v2
    ├── events.out.tfevents.1617674515.bail.4057654.7377.v2
    ├── events.out.tfevents.1617674556.bail.4058324.7377.v2
    ├── events.out.tfevents.1617674688.bail.4060116.7377.v2
    ├── events.out.tfevents.1617674720.bail.4060652.7377.v2
    ├── events.out.tfevents.1617674978.bail.4064067.7377.v2
    ├── events.out.tfevents.1617675005.bail.4064577.7377.v2
    ├── events.out.tfevents.1617675053.bail.4065391.7377.v2
    ├── events.out.tfevents.1617675271.bail.4068526.7373.v2
    ├── events.out.tfevents.1617675471.bail.4071386.7375.v2
    ├── events.out.tfevents.1617675640.bail.4073700.7379.v2
    ├── events.out.tfevents.1617675886.bail.4076950.7379.v2
    ├── events.out.tfevents.1617675939.bail.4077770.7379.v2
    ├── events.out.tfevents.1617675973.bail.4078309.7379.v2
    ├── events.out.tfevents.1617676041.bail.4079359.7382.v2
    ├── events.out.tfevents.1617676071.bail.4079888.7380.v2
    ├── events.out.tfevents.1617676113.bail.4080534.7380.v2
    ├── events.out.tfevents.1617676313.bail.4083267.7381.v2
    ├── events.out.tfevents.1617676338.bail.4083710.7382.v2
    ├── events.out.tfevents.1617676398.bail.4084624.7386.v2
    ├── events.out.tfevents.1617676432.bail.4085178.7390.v2
    ├── events.out.tfevents.1617676473.bail.4085910.7390.v2
    └── events.out.tfevents.1617676528.bail.4086783.7377.v2
├── trainers
    └── __init__.py
├── README.md
├── .gitignore
├── create_coco_dataset.py
├── configs
    ├── __init__.py
    └── onenet_config.py
├── train.py
├── yamls
    ├── gfl_x101_32x4d_fpn_mstrain_2x_coco.yaml
    ├── gfl_r50_fpn_1x_coco.yaml
    ├── gfl_r101_fpn_mstrain_2x_coco.yaml
    ├── gfl_r50_fpn_mstrain_2x_coco.yaml
    ├── gflv2_r50_fpn_1x.yaml
    ├── retinanet_r101_fpn_2x_coco.yaml
    ├── retinanet_r50_fpn_2x_coco.yaml
    ├── retinanet_x101_32x4d_fpn_2x_coco.yaml
    ├── retinanet_x101_64x4d_fpn_2x_coco.yaml
    ├── gflv2_r101_fpn_ms2x.yaml
    ├── gflv2_r50_fpn_ms2x.yaml
    ├── atss_r50_fpn_1x_coco.yaml
    ├── atss_r101_fpn_1x_coco.yaml
    ├── YOLOF_R50_C5_1x.yaml
    ├── YOLOF_R101_C5_1x.yaml
    ├── YOLOF_X_101_64x4d_C5_1x.yaml
    ├── YOLOF_R101_DC5_1x.yaml
    └── YOLOF_R50_DC5_1x.yaml
└── export_saved_model.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/losses/ghm_loss.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/detectors/paa.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/backbones/acnet.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/backbones/repvgg.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/layers/csrc/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/losses/balanced_l1_loss.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/backbones/shufflenet_v1.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/backbones/shufflenet_v2.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/anchors/ssd_anchor_generator.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/layers/deformable_roi_pooling.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/heads/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/heads/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/heads/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/heads/anchor_free_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/samplers/iou_balanced_negative_sampler.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/core/samplers/instance_balanced_positive_sampler.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/backbones/darknet53.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf 
2 | 
3 | 


--------------------------------------------------------------------------------
/data/images/panda.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/data/images/panda.jpg


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617251244.bail.141074.7195.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617251244.bail.141074.7195.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617251266.bail.141421.7195.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617251266.bail.141421.7195.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617253323.bail.168083.21721.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253323.bail.168083.21721.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617253358.bail.168621.21721.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253358.bail.168621.21721.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617253447.bail.169883.21721.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253447.bail.169883.21721.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617253477.bail.170342.21721.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253477.bail.170342.21721.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617253496.bail.170673.21721.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253496.bail.170673.21721.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617253615.bail.172245.21721.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253615.bail.172245.21721.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617672692.bail.4032193.7195.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617672692.bail.4032193.7195.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617672814.bail.4033287.7372.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617672814.bail.4033287.7372.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673033.bail.4036164.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673033.bail.4036164.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673230.bail.4038971.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673230.bail.4038971.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673732.bail.4045563.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673732.bail.4045563.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673754.bail.4045938.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673754.bail.4045938.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673850.bail.4047306.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673850.bail.4047306.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673895.bail.4047984.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673895.bail.4047984.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673960.bail.4048987.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673960.bail.4048987.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617673981.bail.4049390.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673981.bail.4049390.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674034.bail.4050216.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674034.bail.4050216.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674077.bail.4050915.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674077.bail.4050915.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674147.bail.4051983.7374.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674147.bail.4051983.7374.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674268.bail.4053636.7376.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674268.bail.4053636.7376.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674295.bail.4054108.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674295.bail.4054108.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674328.bail.4054652.7382.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674328.bail.4054652.7382.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674400.bail.4055796.7376.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674400.bail.4055796.7376.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674438.bail.4056411.7376.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674438.bail.4056411.7376.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674491.bail.4057234.7376.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674491.bail.4057234.7376.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674515.bail.4057654.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674515.bail.4057654.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674556.bail.4058324.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674556.bail.4058324.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674688.bail.4060116.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674688.bail.4060116.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674720.bail.4060652.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674720.bail.4060652.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617674978.bail.4064067.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674978.bail.4064067.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675005.bail.4064577.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675005.bail.4064577.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675053.bail.4065391.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675053.bail.4065391.7377.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675271.bail.4068526.7373.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675271.bail.4068526.7373.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675471.bail.4071386.7375.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675471.bail.4071386.7375.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675640.bail.4073700.7379.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675640.bail.4073700.7379.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675886.bail.4076950.7379.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675886.bail.4076950.7379.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675939.bail.4077770.7379.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675939.bail.4077770.7379.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617675973.bail.4078309.7379.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675973.bail.4078309.7379.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676041.bail.4079359.7382.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676041.bail.4079359.7382.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676071.bail.4079888.7380.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676071.bail.4079888.7380.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676113.bail.4080534.7380.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676113.bail.4080534.7380.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676313.bail.4083267.7381.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676313.bail.4083267.7381.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676338.bail.4083710.7382.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676338.bail.4083710.7382.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676398.bail.4084624.7386.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676398.bail.4084624.7386.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676432.bail.4085178.7390.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676432.bail.4085178.7390.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676473.bail.4085910.7390.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676473.bail.4085910.7390.v2


--------------------------------------------------------------------------------
/logs/events.out.tfevents.1617676528.bail.4086783.7377.v2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676528.bail.4086783.7377.v2


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
 1 | from .datasets.coco_dataset import COCODataset
 2 | from .builder import build_dataset
 3 | 
 4 | 
 5 | 
 6 | __all__ = [
 7 |     "build_dataset"
 8 | ]
 9 | 
10 | 


--------------------------------------------------------------------------------
/core/anchors/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_generator_v2 import AnchorGeneratorV2
3 | 
4 | 
5 | __all__ = [
6 |     "AnchorGenerator", "AnchorGeneratorV2"
7 | ]
8 | 


--------------------------------------------------------------------------------
/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | from .multi_gpu_trainer import MultiGPUTrainer
2 | from .single_gpu_trainer import SingleGPUTrainer
3 | 
4 | 
5 | __all__ = [
6 |     "MultiGPUTrainer", "SingleGPUTrainer"
7 | ]
8 | 


--------------------------------------------------------------------------------
/core/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .average_precision import AP
2 | from .wider_face_ap import WiderFaceAP
3 | from .mean_average_precision import mAP
4 | 
5 | 
6 | __all__ = [
7 |     "AP", "WiderFaceAP", "mAP"
8 | ]
9 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ### 描述
 2 | 
 3 | letsdet是一个基于Tensorflow的目标检测算法库，其中许多模块模仿了MMDetection[^1]，从而也继承了MMDetection的一些特定，例如模块化等。
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | [^1]: https://github.com/open-mmlab/mmdetection
22 | 
23 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *
 2 | from .necks import *
 3 | from .heads import *
 4 | from .detectors import *
 5 | from .builder import build_backbone, build_neck, build_head, build_detector
 6 | 
 7 | 
 8 | __all__ = [
 9 |     "build_backbone", "build_neck", "build_head", "build_detector"
10 | ]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | .Python
 8 | *.egg
 9 | MANIFEST
10 | 
11 | # Environments
12 | .env
13 | .venv
14 | env/
15 | venv/
16 | ENV/
17 | env.bak/
18 | venv.bak/
19 | 
20 | # vscode
21 | .vscode
22 | .idea
23 | .DS_Store
24 | 
25 | 


--------------------------------------------------------------------------------
/core/learning_rate_schedules/__init__.py:
--------------------------------------------------------------------------------
1 | from .step_decay import StepDecay
2 | from .tflr import PolynomialDecay, ExponentialDecay, CosineDecay, LinearCosineDecay, PiecewiseConstantDecay
3 | 
4 | 
5 | __all__ = [
6 |     "StepDecay", "PolynomialDecay", "ExponentialDecay", "CosineDecay", 
7 |     "LinearCosineDecay", "PiecewiseConstantDecay"
8 | ]
9 | 


--------------------------------------------------------------------------------
/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dataset import Dataset
 2 | 
 3 | from .objects365_dataset import Objects365Dataset
 4 | 
 5 | 
 6 | DATASET = {
 7 |     "objects365": Objects365Dataset 
 8 | }
 9 | 
10 | 
11 | def build_dataset(name, **kwargs):
12 |     return DATASET[name](**kwargs).dataset()
13 | 
14 | 
15 | __all__ = [
16 |     "Dataset",
17 |     "build_dataset"
18 | ]
19 | 


--------------------------------------------------------------------------------
/data/builder.py:
--------------------------------------------------------------------------------
 1 | from utils.register import Register
 2 | 
 3 | 
 4 | DATASETS = Register("dataset")
 5 | AUGMENTATIONS = Register("augmentations")
 6 | 
 7 | 
 8 | def build_dataset(dataset, **kwargs):
 9 |     return DATASETS[dataset](**kwargs).dataset()
10 | 
11 | 
12 | def build_augmentation(augmentation, **kwargs):
13 |     return AUGMENTATIONS[augmentation](**kwargs)
14 | 
15 | 


--------------------------------------------------------------------------------
/core/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .sampler import Sampler
 2 | from .ohem_sampler import OHEMSampler
 3 | from .pseudo_sampler import PseudoSampler
 4 | from .random_sampler import RandomSampler
 5 | from .combined_sampler import CombinedSampler
 6 | 
 7 | 
 8 | __all__ =[ 
 9 |     "Sampler",
10 |     "OHEMSampler",
11 |     "PseudoSampler",
12 |     "RandomSampler",
13 |     "CombinedSampler"
14 | ]
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/core/optimizers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accum_optimizer import AccumOptimizer
 2 | from .lookahead_optimizer import LookaheadOptimizer
 3 | from .tfoptimizers import SGD, Adadelta, Adagrad, Adam, Adamax, Nadam, RMSprop
 4 | from .gradient_centralization import SGDGC, AdamGC
 5 | 
 6 | 
 7 | __all_ = [
 8 |     "SGD", "Adadelta",  "Adagrad", "Adam", "Adamax", "Nadam",  "RMSprop", 
 9 |     "SGDGC", "AdamGC", "AccumOptimizer", "LookaheadOptimizer"
10 | ]
11 | 


--------------------------------------------------------------------------------
/core/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fcos_assigner import FCOSAssigner
 2 | from .atss_assigner import ATSSAssigner
 3 | from .max_iou_assigner import MaxIoUAssigner
 4 | from .uniform_assigner import UniformAssigner
 5 | from .min_cost_assigner import MinCostAssigner
 6 | from .center_heatmap_assigner import CenterHeatmapAssigner
 7 | 
 8 | 
 9 | __all__ = [
10 |     "ATSSAssigner",
11 |     "FCOSAssigner",
12 |     "MaxIoUAssigner",
13 |     "MinCostAssigner",
14 |     "UniformAssigner",
15 |     "CenterHeatmapAssigner"
16 | ]
17 | 


--------------------------------------------------------------------------------
/data/augmentations/__init__.py:
--------------------------------------------------------------------------------
 1 | from .mosaic import Mosaic
 2 | from .mixup import Mixup
 3 | from .transforms import Pad
 4 | from .transforms import Resize
 5 | from .transforms import RandCropOrPad
 6 | from .transforms import RandomDistortColor
 7 | from .transforms import FlipLeftToRight
 8 | from .transforms import SSDCrop
 9 | 
10 | 
11 | __all__ = [
12 |     "Pad",
13 |     "Resize",
14 |     "Mixup",
15 |     "Mosaic",
16 |     "RandCropOrPad",
17 |     "SSDCrop",
18 |     "RandomDistortColor",
19 |     "FlipLeftToRight",
20 | ]
21 | 


--------------------------------------------------------------------------------
/core/metrics/no_op_metric.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | from ..builder import METRICS
 3 | 
 4 | 
 5 | @METRICS.register
 6 | class NoOpMetric(tf.keras.metrics.Metric):
 7 |     def __init__(self, **kwargs):
 8 |         super(NoOpMetric, self).__init__(**kwargs)
 9 | 
10 |         self.value = self.add_weight(name="no_op_value", initializer="zeros")
11 |     
12 |     def update_state(self, value, sample_weight=None):
13 |         self.value.assign(tf.cast(value, self.value.dtype))
14 | 
15 |     def result(self):
16 |         return self.value
17 | 


--------------------------------------------------------------------------------
/models/detectors/detector.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta
 2 | from abc import abstractclassmethod
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class Detector(metaclass=ABCMeta):
 7 |     def __init__(self, cfg, training=True):
 8 |         self.cfg = cfg
 9 |         self.training = training
10 |     
11 |     @abstractclassmethod
12 |     def compute_losses(self, predictions, image_info):
13 |         raise NotImplementedError()
14 |     
15 |     @abstractclassmethod
16 |     def save_weights(self, name):
17 |         raise NotImplementedError()
18 | 
19 | 


--------------------------------------------------------------------------------
/utils/upsample.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | 
 3 | 
 4 | def nearest_upsample2d(inputs, factor):
 5 |     # Instead of broadcasting with a 6-d tensor, we're using stacking here
 6 |     # for TfLite compatibity.
 7 |     bs, h, w, c = tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2], tf.shape(inputs)[3]
 8 |     # bs = -1 if bs is None else bs
 9 |     data = tf.reshape(inputs, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, factor, 1, factor, 1], dtype=inputs.dtype)
10 |     
11 |     return tf.reshape(data, [bs, h * scale, w * scale, c])
12 | 


--------------------------------------------------------------------------------
/models/builder.py:
--------------------------------------------------------------------------------
 1 | from utils.register import Register
 2 | 
 3 | 
 4 | BACKBONES = Register("backbones")
 5 | NECKS = Register("necks")
 6 | HEADS = Register("heads")
 7 | DETECTORS = Register("detectors")
 8 | 
 9 | 
10 | def build_backbone(backbone, **kwargs):
11 |     return BACKBONES[backbone](**kwargs)
12 | 
13 | 
14 | def build_neck(neck, **kwargs):
15 |     return NECKS[neck](**kwargs)
16 | 
17 | 
18 | def build_head(head, **kwargs):
19 |     return HEADS[head](**kwargs)
20 | 
21 | 
22 | def build_detector(detector, **kwargs):
23 |     return DETECTORS[detector](**kwargs)
24 | 
25 | 


--------------------------------------------------------------------------------
/models/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fpn import FPN
 2 | from .bifpn import BiFPN
 3 | from .dlaup import dla_up
 4 | from .nas_fpn import nas_fpn
 5 | from .dilated_encoder import DilatedEncoder
 6 | from .centernet_deconv import centernet_deconv 
 7 | from .path_aggregation_neck import path_aggregation_neck
 8 | from .feature_fusion_pyramid import feature_fusion_pyramid
 9 | 
10 | 
11 | __all__ = [
12 |     "FPN",
13 |     "BiFPN",
14 |     "dla_up",
15 |     "nas_fpn",
16 |     "DilatedEncoder",
17 |     "centernet_deconv",
18 |     "path_aggregation_neck",
19 |     "feature_fusion_pyramid",
20 | ]
21 | 


--------------------------------------------------------------------------------
/core/layers/activations.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | 
 3 | 
 4 | @tf.custom_gradient
 5 | def _mish(x):
 6 |     x1 = tf.nn.tanh(tf.nn.softplus(x))
 7 |     
 8 |     def _grad(dy):
 9 |         dx = x1 + x * tf.nn.sigmoid(x) * (1 - x1 * x1)
10 | 
11 |         return dx * dy
12 |     
13 |     return x * x1, _grad
14 | 
15 | 
16 | class Mish(tf.keras.layers.Layer):
17 |     def __init__(self, **kwargs):
18 |         super(Mish, self).__init__(**kwargs)
19 | 
20 |     def call(self, inputs):
21 |         # x = inputs * (tf.nn.tanh(tf.nn.softplus(inputs)))
22 |         
23 |         return _mish(inputs)
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/core/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .sampler import Sampler
 3 | from ..builder import SAMPLERS
 4 | 
 5 | 
 6 | @SAMPLERS.register
 7 | class CombinedSampler(Sampler):
 8 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 9 |         super(CombinedSampler, self).__init__(**kwargs)
10 | 
11 |         self.positive_sampler = pos_sampler
12 |         self.negative_sampler = neg_sampler
13 |     
14 |     def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs):
15 |         raise NotImplementedError
16 |     
17 |     def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs):
18 |         raise NotImplementedError


--------------------------------------------------------------------------------
/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .detector import Detector
 2 | 
 3 | from .gfl import GFL
 4 | from .atss import ATSS
 5 | from .fcos import FCOS
 6 | from .gflv2 import GFLV2
 7 | from .onenet import OneNet
 8 | from .yolov4 import YOLOv4
 9 | from .yolov5 import YOLOv5
10 | from .centernet import CenterNet
11 | from .retinanet import RetinaNet
12 | from .faster_rcnn import FasterRCNN
13 | from .efficientdet import EfficientDet
14 | 
15 | 
16 | 
17 | __all__ = [
18 |     "GFL",
19 |     "ATSS",
20 |     "FCOS",
21 |     "GFLV2",
22 |     "OneNet",
23 |     "YOLOv4",
24 |     "YOLOv5",
25 |     "CenterNet",
26 |     "RetinaNet",
27 |     "FasterRCNN",
28 |     "EfficientDet",
29 | ]
30 | 
31 | 


--------------------------------------------------------------------------------
/core/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .focal_loss import FocalLoss, ModifiedFocalLoss
 2 | from .l1_loss import SmoothL1Loss, RegL1Loss
 3 | from .cross_entropy import CrossEntropy, BinaryCrossEntropy
 4 | from .iou_loss import IoULoss, BoundedIoULoss, GIoULoss, DIoULoss, CIoULoss 
 5 | from .generalized_focal_loss import DistributionFocalLoss, QualityFocalLoss
 6 | 
 7 | __all__ = [
 8 |     "ModifiedFocalLoss",
 9 |     "FocalLoss",
10 |     "RegL1Loss",
11 |     "SmoothL1Loss",
12 |     "CrossEntropy",
13 |     "BinaryCrossEntropy",
14 |     "IoULoss",
15 |     "BoundedIoULoss",
16 |     "GIoULoss",
17 |     "DIoULoss",
18 |     "CIoULoss",
19 |     "QualityFocalLoss",
20 |     "DistributionFocalLoss"
21 | ]
22 | 
23 | 


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
 1 | from core.layers.nms import *
 2 | from .assigners import *
 3 | from .samplers import *
 4 | from .losses import *
 5 | from .metrics import *
 6 | from .optimizers import *
 7 | from .metrics import *
 8 | from .learning_rate_schedules import *
 9 | from .builder import (
10 |     build_assigner, build_sampler, 
11 |     build_loss, build_optimizer, 
12 |     build_learning_rate_scheduler, 
13 |     build_metric, build_nms,
14 |     build_anchor_generator
15 | )
16 | 
17 | 
18 | __all__ = [
19 |     "build_assigner", "build_sampler", "build_loss", "build_optimizer", 
20 |     "build_learning_rate_scheduler", "build_metric", "build_anchor_generator"
21 | ]
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/core/layers/scale.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | 
 3 | 
 4 | class Scale(tf.keras.layers.Layer):
 5 |     def __init__(self, value, **kwargs):
 6 |         super(Scale, self).__init__(**kwargs)
 7 | 
 8 |         self.value = value
 9 | 
10 |     def build(self, input_shape):
11 |         self.scale = self.add_weight(name="scale",
12 |                                      trainable=True,
13 |                                      shape=[],
14 |                                      dtype=self.dtype,
15 |                                      initializer=tf.keras.initializers.Constant(self.value))
16 | 
17 |     def call(self, inputs, **kwargs):
18 |         return inputs * self.scale
19 | 
20 |     def compute_output_shape(self, input_shape):
21 |         return input_shape
22 | 
23 |     def get_config(self):
24 |         config = {"value": self.value}
25 | 
26 |         base_config = super(Scale, self).get_config()
27 | 
28 |         return dict(list(base_config.items()) + list(config.items()))
29 | 


--------------------------------------------------------------------------------
/models/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .head import BaseHead
 2 | from .dense_heads.anchor_head import AnchorHead
 3 | from .dense_heads.atss_head import ATSSHead
 4 | from .dense_heads.retinanet_head import RetinaNetHead
 5 | from .dense_heads.fcos_head import FCOSHead
 6 | from .dense_heads.rpn_head import RPNHead
 7 | from .bbox_heads.bbox_head import BBoxHead
 8 | from .roi_heads.standard_roi_head import StandardRoIHead
 9 | from .dense_heads.gfl_head import GFLHead
10 | from .dense_heads.gflv2_head import GFLV2Head
11 | from .dense_heads.yolof_head import YOLOFHead
12 | from .anchor_free_heads.center_heatmap_head import CenterHeatmapHead
13 | from .anchor_free_heads.onenet_head import OneNetHead
14 | 
15 | 
16 | __all__ = [
17 |     "BaseHead", 
18 |     "AnchorHead", 
19 |     "ATSSHead", 
20 |     "RetinaNetHead", 
21 |     "FCOSHead", 
22 |     "RPNHead", 
23 |     "BBoxHead", 
24 |     "StandardRoIHead", 
25 |     "GFLHead", 
26 |     "GFLV2Head",
27 |     "CenterHeatmapHead", 
28 |     "OneNetHead", 
29 |     "YOLOFHead"
30 | ]
31 | 


--------------------------------------------------------------------------------
/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bbox_transform import Box2Delta
 2 | from .bbox_transform import Delta2Box
 3 | from .overlaps import compute_iou 
 4 | from .overlaps import compute_unaligned_iou
 5 | from .bbox_transform import Distance2Box
 6 | from .bbox_transform import Box2Distance
 7 | 
 8 | 
 9 | def build_decoder(decoder, **kwargs):
10 |     if decoder == "Delta2Box":
11 |         return Delta2Box(**kwargs)
12 |     
13 |     if decoder == "Distance2Box":
14 |         return Distance2Box()
15 |     
16 |     raise TypeError("Could not interpret bbox decoder function identifier: {}".format(repr(decoder)))
17 | 
18 | 
19 | def build_encoder(encoder, **kwargs):
20 |     if encoder == "Box2Delta":
21 |         return Box2Delta(**kwargs)
22 |     
23 |     if encoder == "Box2Distance":
24 |         return Box2Distance()
25 |     
26 |     raise TypeError("Could not interpret bbox encoder function identifier: {}".format(repr(encoder)))
27 | 
28 | 
29 | __all__ = [
30 |     "build_encoder",
31 |     "build_decoder",
32 |     "compute_iou",
33 |     "compute_unaligned_iou"
34 | ]
35 | 


--------------------------------------------------------------------------------
/core/learning_rate_schedules/step_decay.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from ..builder import LR_SCHEDULERS
 3 | 
 4 | 
 5 | @LR_SCHEDULERS.register
 6 | class StepDecay(tf.keras.optimizers.schedules.LearningRateSchedule):
 7 |     def __init__(self, initial_learning_rate, decay_steps, decay_rate, name="StepDecay"):
 8 |         super(StepDecay, self).__init__(name=name)
 9 | 
10 |         self.lr = initial_learning_rate
11 |         self.decay_steps = decay_steps
12 |         self.decay_rate = decay_rate
13 | 
14 |     def __call__(self, global_step):
15 |         with tf.name_scope("StepDecay"):
16 |             self.lr = tf.convert_to_tensor(self.lr, name="initial_learning_rate")
17 |             dtype = self.lr.dtype
18 |             decay_rate = tf.cast(self.decay_rate, dtype)
19 | 
20 |             if tf.equal(global_step % self.decay_steps, 0):
21 |                 self.lr = tf.multiply(self.lr, tf.pow(decay_rate, global_step // self.decay_steps))
22 | 
23 |             return self.lr
24 | 
25 |     def get_config(self):
26 |         return {"initial_learning_rate": self.lr,
27 |                 "decay_steps": self.decay_steps,
28 |                 "decay_rate": self.decay_rate,
29 |                 "name": self.name}
30 | 


--------------------------------------------------------------------------------
/core/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .sampler import Sampler
 3 | from ..builder import SAMPLERS
 4 | 
 5 | 
 6 | @SAMPLERS.register
 7 | class RandomSampler(Sampler):
 8 |     def __init__(self, num_proposals, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs):
 9 |         super(RandomSampler, self).__init__(num_proposals, pos_fraction, neg_pos_ub, add_gt_as_proposals)
10 |             
11 |     def _random_choice(self, indices, num):
12 |         return tf.random.shuffle(indices)[:num]
13 | 
14 |     def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs):
15 |         pos_inds = tf.squeeze(tf.where(assigned_labels >= 1), 1)
16 |         
17 |         if tf.size(pos_inds) <= num_expected_proposals:
18 |             return pos_inds
19 |         
20 |         return self._random_choice(pos_inds, num_expected_proposals)
21 |     
22 |     def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs):
23 |         neg_inds = tf.squeeze(tf.where(assigned_labels == 0), 1)
24 |         if tf.size(neg_inds) <= num_expected_proposals:
25 |             return neg_inds
26 |         
27 |         return self._random_choice(neg_inds, num_expected_proposals)        
28 | 


--------------------------------------------------------------------------------
/core/builder.py:
--------------------------------------------------------------------------------
 1 | from utils.register import Register
 2 | 
 3 | 
 4 | ASSIGNERS = Register(name="assigners")
 5 | 
 6 | SAMPLERS = Register(name="samplers")
 7 | 
 8 | LOSSES = Register(name="losses")
 9 | 
10 | OPTIMIZERS = Register(name="optimizers")
11 | 
12 | LR_SCHEDULERS = Register(name="lr_schedulers")
13 | 
14 | METRICS = Register(name="metrics")
15 | 
16 | ANCHOR_GENERATORS = Register(name="anchor_generator")
17 | 
18 | NMS = Register(name="nms")
19 | 
20 | 
21 | def build_assigner(assigner, **kwargs):
22 |     return ASSIGNERS[assigner](**kwargs)
23 | 
24 | 
25 | def build_sampler(sampler, **kwargs):
26 |     return SAMPLERS[sampler](**kwargs)
27 | 
28 | 
29 | def build_loss(loss, **kwargs):
30 |     return LOSSES[loss](**kwargs)
31 | 
32 | 
33 | def build_learning_rate_scheduler(scheduler, **kwargs):
34 |     return LR_SCHEDULERS[scheduler](**kwargs)
35 | 
36 | 
37 | def build_metric(metric, **kwargs):
38 |     return METRICS[metric](**kwargs)
39 | 
40 | 
41 | def build_optimizer(optimizer, **kwargs):
42 |     return OPTIMIZERS[optimizer](**kwargs)
43 | 
44 | 
45 | def build_nms(nms, **kwargs):
46 |     return NMS[nms](**kwargs)
47 | 
48 | 
49 | def build_anchor_generator(generator, **kwargs):
50 |     return ANCHOR_GENERATORS[generator](**kwargs)
51 |     


--------------------------------------------------------------------------------
/models/heads/roi_heads/base_roi_head.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from ..head import BaseHead
 3 | 
 4 | 
 5 | class BaseRoIHead(BaseHead):
 6 |     def __init__(self, cfg, test_cfg, num_classes=80, is_training=True, **kwargs):
 7 |         super(BaseRoIHead, self).__init__(cfg=cfg, test_cfg=test_cfg, num_classes=num_classes, is_training=is_training, **kwargs)
 8 |         
 9 |         if cfg.get("bbox_head"):
10 |             self.pooled_size = cfg.bbox_head.roi_pooling.pooled_size
11 |             self._make_bbox_head(cfg.bbox_head)
12 |         
13 |         if cfg.get("mask_head"):
14 |             self._make_mask_head(cfg.mask_head)
15 |     
16 |     @property
17 |     def min_level(self):
18 |         return self.cfg.get("min_level")
19 |     
20 |     @property
21 |     def max_level(self):
22 |         return self.cfg.get("max_level")
23 |     
24 |     def _make_bbox_head(self, bbox_head_cfg):
25 |         raise NotImplementedError()
26 | 
27 |     def _make_mask_head(self, mask_head_cfg):
28 |         raise NotImplementedError()
29 | 
30 |     @property
31 |     def has_bbox_head(self):
32 |         return hasattr(self, "bbox_head")
33 |     
34 |     @property
35 |     def has_mask_head(self):
36 |         return hasattr(self, "mask_head")
37 |     
38 |     


--------------------------------------------------------------------------------
/utils/register.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | class Register:
 5 |     """Module register"""
 6 | 
 7 |     def __init__(self, name):
 8 |         self._dict = {}
 9 |         self._name = name
10 | 
11 |     def __setitem__(self, key, value):
12 |         if not callable(value):
13 |             raise Exception("Value of a Registry must be a callable.")
14 |         if key is None:
15 |             key = value.__name__
16 |         if key in self._dict:
17 |             logging.warning("Key %s already in registry %s." % (key, self._name))
18 |         
19 |         self._dict[key] = value
20 | 
21 |     def register(self, param):
22 |         """Decorator to register a function or class."""
23 | 
24 |         def decorator(key, value):
25 |             self[key] = value
26 |             return value
27 | 
28 |         if callable(param):
29 |             # @reg.register
30 |             return decorator(None, param)
31 |             # @reg.register('alias')
32 |         return lambda x: decorator(param, x)
33 | 
34 |     def __getitem__(self, key):
35 |         try:
36 |             return self._dict[key]
37 |         except Exception as e:
38 |             logging.error(f"module {key} not found: {e}")
39 |             raise e
40 | 
41 |     def __contains__(self, key):
42 |         return key in self._dict
43 | 
44 |     def keys(self):
45 |         """key"""
46 |         return self._dict.keys()
47 | 
48 | 


--------------------------------------------------------------------------------
/core/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .sampler import Sampler
 3 | from ..builder import SAMPLERS
 4 | 
 5 | 
 6 | @SAMPLERS.register
 7 | class PseudoSampler(Sampler):
 8 |     def __init__(self, **kwargs):
 9 |         pass
10 |     
11 |     def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs):
12 |         raise NotImplementedError
13 |     
14 |     def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs):
15 |         raise NotImplementedError
16 | 
17 |     def sample(self, assigned_boxes, assigned_labels, **kwargs):
18 |         """Sample positive and negative boxes.
19 | 
20 |             Args:
21 |                 assigned_boxes (Tensor): The assigned boxes in assigner.
22 |                 assigned_labels (Tensor): The assigned labels in assigner.
23 |             
24 |             Returns:
25 |                 A dict -> target_boxes, target_labels, box_weights, label_weights
26 |         """
27 |         pos_mask = assigned_labels >= 1
28 |         box_weights = tf.cast(pos_mask, tf.float32)
29 |         
30 |         valid_mask = assigned_labels >= 0
31 |         target_labels = tf.where(valid_mask, tf.cast(assigned_labels, tf.int64), tf.zeros_like(assigned_labels, tf.int64))
32 |         label_weights = tf.cast(valid_mask, tf.float32)
33 | 
34 |         return assigned_boxes, target_labels, box_weights, label_weights
35 |         


--------------------------------------------------------------------------------
/core/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .sampler import Sampler
 3 | from ..builder import SAMPLERS
 4 | 
 5 | 
 6 | @SAMPLERS.register
 7 | class OHEMSampler(Sampler):
 8 |     def __init__(self, num_proposals, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs):
 9 |         super(OHEMSampler, self).__init__(num_proposals, pos_fraction, neg_pos_ub, add_gt_as_proposals)
10 |     
11 |     def _hard_mining(self, losses, indices, num):
12 |         valid_losses = tf.gather(losses, indices)
13 | 
14 |         _, top_k_inds = tf.nn.top_k(valid_losses, k=num)
15 | 
16 |         return tf.stop_gradient(top_k_inds)
17 | 
18 |     def _sample_positive(self, assigned_labels, losses, num_expected_proposals, **kwargs):
19 |         pos_inds = tf.where(assigned_labels >= 1)
20 |         pos_inds = tf.squeeze(pos_inds, 1)
21 |         if tf.size(pos_inds) <= num_expected_proposals:
22 |             return pos_inds
23 |         
24 |         return self._hard_mining(losses, pos_inds, num_expected_proposals)
25 |     
26 |     def _sample_negative(self, assigned_labels, losses, num_expected_proposals, **kwargs):
27 |         neg_inds = tf.where(assigned_labels == 0)
28 |         neg_inds = tf.squeeze(neg_inds, 1)
29 |         if tf.size(neg_inds) <= num_expected_proposals:
30 |             return neg_inds
31 |         
32 |         return self._hard_mining(losses, neg_inds, num_expected_proposals)      
33 | 


--------------------------------------------------------------------------------
/create_coco_dataset.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import argparse
 3 | from data.datasets.coco_dataset import COCODataset
 4 | 
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument("--dataset_dir", default=None, type=str,
 8 |                     help="""The directory contains image and anntotation filePycharmProjects:
 9 |                             │  └─COCO
10 |                             │      ├─train2017(training images)
11 |                             │      ├─val2017(valimages)
12 |                             │      └─annotations""")
13 | parser.add_argument("--phase", default="train", type=str, 
14 |                     help="The phase of dataset, e.g. for `train2017`, the value should be `train`."
15 |                             " for `val2017`, the value should be `val`.")
16 | parser.add_argument("--version", default=2017, type=int, 
17 |                     help="The version of dataset, e.g. for `train2017`, the value should be `2017`," 
18 |                             " for `val2017`, the value should be `2017`.")
19 | parser.add_argument("--max_images_per_tfrecord", default=20000, type=int,
20 |                     help="The maximum images per tfrecord.")
21 | 
22 | args = parser.parse_args()
23 | 
24 | assert args.dataset_dir is not None, "Must provide dataset directory."
25 | 
26 | coco = COCODataset(args.dataset_dir, training=True)
27 | coco.create_tf_record(phase=args.phase, version=args.version, max_imgs_per_tfrecord=args.max_images_per_tfrecord)
28 |     


--------------------------------------------------------------------------------
/core/layers/max_in_out.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class MaxInOut(tf.keras.layers.Layer):
 5 |     def __init__(self, num_negative, num_positive, axis=-1, **kwargs):
 6 |         super(MaxInOut, self).__init__(**kwargs)
 7 |         self.num_pos = num_positive
 8 |         self.num_neg = num_negative
 9 |         self.axis = axis
10 | 
11 |         self._max_in = num_negative > 1
12 | 
13 |     def build(self, input_shape):
14 |         super(MaxInOut, self).__init__(input_shape)
15 | 
16 |     def call(self, inputs):
17 |         neg, pos = tf.split(inputs, [self.num_neg, self.num_pos], self.axis)
18 |         if self._max_in:
19 |             neg = tf.reduce_max(neg, axis=self.axis, keepdims=True)
20 |         else:
21 |             pos = tf.reduce_max(pos, axis=self.axis, keepdims=True)
22 | 
23 |         outputs = tf.concat([neg, pos], axis=self.axis)
24 | 
25 |         return outputs
26 | 
27 |     def compute_output_shape(self, input_shape):
28 |         if self.axis == -1 or self.axis == 3:
29 |             return tf.TensorShape([input_shape[0], input_shape[1], input_shape[2], 2])
30 |         else:
31 |             return tf.TensorShape([input_shape[0], 2, input_shape[2], input_shape[3]])
32 | 
33 |     def get_config(self):
34 |         config = {
35 |             'num_positive': self.num_pos,
36 |             "num_negative": self.num_neg,
37 |             "axis": self.axis
38 |         }
39 | 
40 |         base_config = super(MaxInOut, self).get_config()
41 | 
42 |         return dict(list(base_config.items()) + list(config.items()))
43 | 


--------------------------------------------------------------------------------
/core/losses/l1_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from ..builder import LOSSES
 3 | 
 4 | 
 5 | # @LOSSES.register
 6 | # class SmoothL1Loss(tf.keras.losses.Loss):
 7 | #     def __init__(self, delta=1.0, weight=1., reduction=tf.keras.losses.Reduction.NONE):
 8 | #         super(SmoothL1Loss, self).__init__(reduction=reduction, name="SmoothL1Loss")
 9 | #         self.weight = weight
10 | #         self.delta = delta
11 | 
12 | #     def _smooth_l1_loss(self, y_true, y_pred):
13 | #         diff = tf.math.abs(y_pred - y_true)
14 | #         loss = tf.where(diff < self.delta, 0.5 * diff * diff / self.delta, diff - 0.5 * self.delta)  
15 | 
16 | #         return loss           
17 | 
18 | #     def call(self, y_true, y_pred):
19 | #         loss = self._smooth_l1_loss(y_true, y_pred)
20 | 
21 | #         return loss * self.weight
22 | 
23 | @LOSSES.register
24 | class SmoothL1Loss(tf.keras.losses.Huber):
25 |     def __init__(self, delta=1.0, weight=1., reduction=tf.keras.losses.Reduction.NONE):
26 |         super(SmoothL1Loss, self).__init__(reduction=reduction)
27 |         
28 |         self.weight = weight
29 |         self.delta = delta
30 |     
31 |     def call(self, y_true, y_pred):
32 |         loss = super(SmoothL1Loss, self).call(y_true, y_pred)
33 | 
34 |         return loss * self.weight
35 | 
36 | 
37 | @LOSSES.register
38 | class RegL1Loss(tf.keras.losses.Loss):
39 |     def __init__(self, weight=1., reduction=tf.keras.losses.Reduction.NONE):
40 |         super(RegL1Loss, self).__init__(reduction=reduction)
41 | 
42 |         self.weight = weight
43 | 
44 |     def call(self, y_true, y_pred):
45 |         loss = tf.math.abs(y_true - y_pred) * self.weight
46 |        
47 |         return loss
48 | 


--------------------------------------------------------------------------------
/core/layers/nearest_upsamling.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class NearestUpsampling2D(tf.keras.layers.Layer):
 5 |     """Nearest neighbor upsampling implementation.
 6 | 
 7 |     Args:
 8 |         scale: An integer multiple to scale resolution of input data.
 9 |     """
10 |     def __init__(self, scale, **kwargs):
11 |         super(NearestUpsampling2D, self).__init__(**kwargs)
12 |         if "data_format" in kwargs:
13 |             data_format = kwargs.pop("data_format")
14 |             assert data_format in {"channels_first", "channels_last"}
15 |             self.data_format = data_format 
16 | 
17 |         self.scale = scale
18 | 
19 |     def build(self, input_shape):
20 |         super(NearestUpsampling2D, self).__init__(input_shape)
21 | 
22 |     def call(self, inputs, **kwargs):
23 |         # Instead of broadcasting with a 6-d tensor, we're using stacking here
24 |         # for TfLite compatibity.
25 |         bs, h, w, c = tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2], tf.shape(inputs)[3]
26 |         # bs, h, w, c = inputs.get_shape().as_list()
27 |         # bs = -1 if bs is None else bs
28 |         # outputs = tf.stack([inputs] * self.scale, axis=3)
29 |         # outputs = tf.stack([outputs] * self.scale, axis=2)
30 |         scale = self.scale
31 |         data = tf.reshape(inputs, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=inputs.dtype)
32 |         return tf.reshape(data, [bs, h * scale, w * scale, c])
33 | 
34 |     def compute_output_shape(self, input_shape):
35 |         batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], input_shape[3]
36 |         return tf.TensorShape([batch_size, h * self.scale, w * self.scale, c])
37 | 
38 | 


--------------------------------------------------------------------------------
/core/bbox/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from utils import box_utils
 3 | 
 4 | 
 5 | class Box2Delta(object):
 6 |     def __init__(self, weights=None):
 7 |         self.weights = weights
 8 | 
 9 |     def __call__(self, proposals, boxes):
10 |         return box_utils.encode_boxes(boxes, proposals, self.weights)
11 | 
12 | 
13 | class Delta2Box(object):
14 |     def __init__(self, weights=None):
15 |         self.weights = weights
16 | 
17 |     def __call__(self, proposals, delta):
18 |         return box_utils.decode_boxes(delta, proposals, self.weights)
19 | 
20 | 
21 | class Distance2Box(object):
22 |     def __call__(self, distances, grid_y, grid_x):
23 |         with tf.name_scope("distance2box"):
24 |             grid_y = tf.cast(tf.expand_dims(grid_y, 0), distances.dtype)
25 |             grid_x = tf.cast(tf.expand_dims(grid_x, 0), distances.dtype)
26 | 
27 |             boxes = tf.stack([grid_y - distances[..., 0],
28 |                               grid_x - distances[..., 1],
29 |                               grid_y + distances[..., 2],
30 |                               grid_x + distances[..., 3]], axis=-1)
31 |             
32 |             return boxes
33 | 
34 | 
35 | class Box2Distance(object):
36 |     def __call__(self, boxes, grid_y, grid_x):
37 |         with tf.name_scope("box2distance"):
38 |             grid_y = tf.cast(tf.expand_dims(grid_y, 0), boxes.dtype)
39 |             grid_x = tf.cast(tf.expand_dims(grid_x, 0), boxes.dtype)
40 | 
41 |             dist = tf.stack([grid_y - boxes[..., 0],
42 |                              grid_x - boxes[..., 1],
43 |                              boxes[..., 2] - grid_y,
44 |                              boxes[..., 3] - grid_x], axis=-1)
45 |             
46 |             return dist
47 | 


--------------------------------------------------------------------------------
/core/layers/proposal_layer.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | 
 3 | 
 4 | def _single_level_rois_select(boxes, scores, topk, max_nms_outputs, nms_threshold):
 5 |     boxes = tf.cast(boxes, tf.float32)
 6 |     scores = tf.cast(scores, tf.float32)
 7 |     scores = tf.squeeze(scores, -1)
 8 | 
 9 |     topk_scores, topk_indices = tf.nn.top_k(scores, k=topk)
10 |     topk_indices = tf.stack(
11 |         [tf.tile(tf.range(tf.shape(boxes)[0])[:, None], [1, tf.shape(topk_scores)[1]]), topk_indices], -1)
12 |     topk_boxes = tf.gather_nd(boxes, topk_indices)
13 | 
14 |     nmsed_boxes, nmsed_scores, _, _ = tf.image.combined_non_max_suppression(
15 |         tf.expand_dims(topk_boxes, -2), 
16 |         tf.expand_dims(topk_scores, -1), 
17 |         max_nms_outputs, 
18 |         max_nms_outputs, 
19 |         nms_threshold)
20 | 
21 |     return nmsed_boxes, nmsed_scores
22 | 
23 | 
24 | class ProposalLayer(tf.keras.layers.Layer):
25 |     def __init__(self, pre_nms_size=12000, post_nms_size=2000, max_total_size=2000, iou_threshold=0.7, min_size=0, **kwargs):
26 |         super(ProposalLayer, self).__init__(**kwargs)
27 | 
28 |         self.min_size = min_size
29 |         self.nms_pre = pre_nms_size
30 |         self.nms_post = post_nms_size
31 |         self.iou_threshold = iou_threshold
32 |         self.max_total_size = max_total_size
33 |     
34 |     def call(self, boxes, scores):
35 |         selected_boxes, selected_scores = _single_level_rois_select(
36 |             boxes, scores, self.nms_pre, self.max_total_size, self.iou_threshold)
37 | 
38 |         return selected_boxes[:, :self.nms_post], selected_scores[:, :self.nms_post]
39 |     
40 |     def compute_output_shape(self, input_shape):
41 |         return tf.TensorShape([input_shape[0], self.nms_post, 4])
42 | 


--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_config import Config
 2 | from .yolov5_config import get_yolov5_config
 3 | from .atss_config import get_atss_config
 4 | from .fcos_config import get_fcos_config
 5 | from .faster_rcnn_config import get_faster_rcnn_config
 6 | from .efficientdet_config import get_efficientdet_config
 7 | from .gfl_config import get_gfl_config
 8 | from .centernet_config import get_centernet_config
 9 | from .retinanet_config import get_retinanet_config
10 | 
11 | 
12 | CONFIG_DICT = {
13 |     "EfficientDetD0": lambda x: get_efficientdet_config("EfficientDetD0", x),
14 |     "EfficientDetD1": lambda x: get_efficientdet_config("EfficientDetD1", x),
15 |     "EfficientDetD2": lambda x: get_efficientdet_config("EfficientDetD2", x),
16 |     "EfficientDetD3": lambda x: get_efficientdet_config("EfficientDetD3", x),
17 |     "EfficientDetD4": lambda x: get_efficientdet_config("EfficientDetD4", x),
18 |     "EfficientDetD5": lambda x: get_efficientdet_config("EfficientDetD5", x),
19 |     "EfficientDetD6": lambda x: get_efficientdet_config("EfficientDetD6", x),
20 |     "EfficientDetD7": lambda x: get_efficientdet_config("EfficientDetD7", x),
21 |     "FasterRCNN": lambda x: get_faster_rcnn_config(x),
22 |     "FCOS": lambda x: get_fcos_config(x),
23 |     "ATSS": lambda x: get_atss_config(x),
24 |     "GFL": lambda x: get_gfl_config(x),
25 |     "YOLOv5s": lambda x: get_yolov5_config(x, .33, .50, "yolov5s"),
26 |     "YOLOv5m": lambda x: get_yolov5_config(x, .67, .75, "yolov5m"),
27 |     "YOLOv5l": lambda x: get_yolov5_config(x, 1., 1., "yolov5l"),
28 |     "YOLOv5x": lambda x: get_yolov5_config(x, 1.22, 1.25, "yolov5x"),
29 |     "CenterNet": lambda x: get_centernet_config(x),
30 |     "RetinaNet": lambda x: get_retinanet_config(x),
31 |     "OneNet": lambda x: get_onenet_config(x),
32 | }
33 | 
34 | 
35 | def build_configs(name):
36 |     return CONFIG_DICT[name]
37 | 


--------------------------------------------------------------------------------
/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbone import Backbone
 2 | 
 3 | from .vgg import VGG16, VGG19
 4 | from .densenet import DenseNet121, DenseNet169, DenseNet201
 5 | from .resnet import ResNet50, ResNet101, ResNet152, CaffeResNet50, CaffeResNet101, CaffeResNet152
 6 | from .resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2
 7 | from .efficientnet import (
 8 |     EfficientNetB0, 
 9 |     EfficientNetB1, 
10 |     EfficientNetB2,
11 |     EfficientNetB3,
12 |     EfficientNetB4, 
13 |     EfficientNetB5,
14 |     EfficientNetB6, 
15 |     EfficientNetB7
16 | )
17 | from .resnext import ResNeXt50_32X4D, ResNeXt101_32X4D, ResNeXt101_64X4D, ResNeXt101B_64X4D
18 | from .dla import DLA34, DLA46C, DLA46XC, DLA60, DLA60C, DLA60X, DLA60XC, DLA102, DLA102X, DLA102X2, DLA169
19 | from .resnet_v1b import (
20 |     ResNet50V1D, ResNet101V1D, ResNet152V1D,
21 |     ResNet50V1E, ResNet101V1E, ResNet152V1E
22 | )
23 | from .hourglass import HourglassNet
24 | 
25 | 
26 | __all__ = [
27 |     "VGG16", 
28 |     "VGG19", 
29 |     "HourglassNet",
30 |     "ResNet50", 
31 |     "ResNet101", 
32 |     "ResNet152", 
33 |     "CaffeResNet50", 
34 |     "CaffeResNet101", 
35 |     "CaffeResNet152",
36 |     "ResNet50V2", 
37 |     "ResNet101V2", 
38 |     "ResNet152V2",
39 |     "DenseNet121", 
40 |     "DenseNet169", 
41 |     "DenseNet201",
42 |     "EfficientNetB0", 
43 |     "EfficientNetB1", 
44 |     "EfficientNetB2", 
45 |     "EfficientNetB3", 
46 |     "EfficientNetB4", 
47 |     "EfficientNetB5", 
48 |     "EfficientNetB6", 
49 |     "EfficientNetB7",
50 |     "DLA34", 
51 |     "DLA46C", 
52 |     "DLA46XC", 
53 |     "DLA60", 
54 |     "DLA60C", 
55 |     "DLA60X", 
56 |     "DLA60XC", 
57 |     "DLA102", 
58 |     "DLA102X", 
59 |     "DLA102X2", 
60 |     "DLA169",
61 |     "ResNet50V1D", 
62 |     "ResNet101V1D", 
63 |     "ResNet152V1D",
64 |     "ResNet50V1E", 
65 |     "ResNet101V1E", 
66 |     "ResNet152V1E",
67 |     "ResNeXt50_32X4D", 
68 |     "ResNeXt101_32X4D", 
69 |     "ResNeXt101_64X4D", 
70 |     "ResNeXt101B_64X4D"
71 | ]
72 | 
73 | 


--------------------------------------------------------------------------------
/core/losses/cross_entropy.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from ..builder import LOSSES
 3 | 
 4 | 
 5 | @LOSSES.register
 6 | class BinaryCrossEntropy(tf.keras.losses.Loss):
 7 |     def __init__(self,
 8 |                  from_logits=True,
 9 |                  label_smoothing=0.0,
10 |                  weight=1.,
11 |                  reduction=tf.keras.losses.Reduction.NONE,
12 |                  name="BinaryCrossEntropy"):
13 |         super(BinaryCrossEntropy, self).__init__(reduction=reduction, name=name)
14 | 
15 |         assert from_logits
16 |         self.weight = weight
17 |         self.from_logits = from_logits
18 |         self.label_smoothing = label_smoothing
19 | 
20 |     def call(self, y_true, y_pred):
21 |         smooth_y_true = tf.cond(
22 |             tf.greater(self.label_smoothing, 0.),
23 |             lambda: (y_true * (1. - self.label_smoothing) +  
24 |                 self.label_smoothing / (tf.cast(tf.shape(y_true)[-1], y_true.dtype) - 1.)),
25 |             lambda: y_true)
26 |         
27 |         return tf.nn.sigmoid_cross_entropy_with_logits(labels=smooth_y_true, logits=y_pred) * self.weight
28 | 
29 | 
30 | @LOSSES.register
31 | class CrossEntropy(tf.keras.losses.Loss):
32 |     def __init__(self,
33 |                  from_logits=True,
34 |                  label_smoothing=0.01,
35 |                  weight=1.,
36 |                  reduction=tf.keras.losses.Reduction.NONE,
37 |                  name="CrossEntropy"):
38 |         super(CrossEntropy, self).__init__(reduction=reduction, name=name)
39 | 
40 |         self.weight = weight
41 |         self.from_logits = from_logits
42 |         self.label_smoothing = label_smoothing
43 | 
44 |     def call(self, y_true, y_pred):
45 |         smooth_y_true = tf.cond(
46 |             tf.greater(self.label_smoothing, 0.),
47 |             lambda: ((y_true * (1. - self.label_smoothing) + 
48 |                 self.label_smoothing / (tf.cast(tf.shape(y_true)[-1], y_true.dtype) - 1.))),
49 |             lambda: y_true)
50 | 
51 |         return tf.nn.softmax_cross_entropy_with_logits(labels=smooth_y_true, logits=y_pred) * self.weight
52 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import argparse
 4 | import tensorflow as tf
 5 | from configs import Config
 6 | from configs import build_configs
 7 | from trainers import MultiGPUTrainer
 8 | from trainers import SingleGPUTrainer
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument("--detector", 
14 |                         type=str,
15 |                         default="CenterNet",
16 |                         help="The detector name, e.g.`efficientdet`, `efficient_fcos`.")
17 |     parser.add_argument("--gpus", 
18 |                         type=str,
19 |                         default="0,1,2,3",
20 |                         help="Use multi-gpu training or not, default False, means use one gpu.")
21 |     parser.add_argument("--cfg",
22 |                         type=str,
23 |                         default=None,
24 |                         help="The conifg file (yaml), if None, using default.")
25 |     parser.add_argument("--num_classes",
26 |                         type=int,
27 |                         default=80,
28 |                         help="The number of classes, default 80 (COCO).")
29 | 
30 |     args = parser.parse_args()
31 | 
32 |     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
33 | 
34 |     tf.random.set_seed(2333)
35 |     # tf.config.optimizer.set_jit(True)
36 | 
37 |     logger = logging.getLogger()
38 |     logger.setLevel(logging.INFO)
39 |     logging.basicConfig(format="%(asctime)s %(levelname)s - %(message)s",
40 |                         datefmt="%Y-%m-%d %H:%M:%S")
41 | 
42 |     physical_devices = tf.config.experimental.list_physical_devices("GPU")
43 |     for device in physical_devices:
44 |         tf.config.experimental.set_memory_growth(device, True)
45 | 
46 |     if args.cfg is None:
47 |         cfg = build_configs(args.detector)(args.num_classes)
48 |     else:
49 |         cfg = Config()
50 |         cfg.parse_from_yaml(args.cfg)
51 | 
52 |     num_gpus = len(args.gpus.strip().split(","))
53 |     if num_gpus > 1:
54 |         trainer = MultiGPUTrainer(cfg=cfg, logger=logger)
55 |     else:
56 |         trainer = SingleGPUTrainer(cfg=cfg, logger=logger)
57 |     
58 |     trainer.run()
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/core/assigners/assigner.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class Assigner(object):
 5 |     def __init__(self, dtype=tf.float32):
 6 | 
 7 |         self.dtype = dtype
 8 | 
 9 |     @property
10 |     def _param_dtype(self):
11 |         if self.dtype == tf.float16 or self.dtype == tf.bfloat16:
12 |             return tf.float32
13 | 
14 |         return self.dtype or tf.float32
15 | 
16 |     def assign(self, gt_boxes, gt_labels, proposals):
17 |         """Assign gt to boxes/
18 | 
19 |         This method assign a gt box to every box (proposal/anchor), each box
20 |         will be assigned with -1, 0 or a positive number. -1 means don't care,
21 |         0 means negative sample, positive number is the index (1-based) of
22 |         assigned gt.
23 | 
24 |         The assignment is done in following steps, the order matters:
25 |         1. initialize target boxes and labels.
26 |         2. assign proposals whose iou with all gts < neg_iou_thresh to  0.
27 |         3. for each box, if the iou with its nearest gt >= pos_iou_thresh,
28 |             assign it to that box.
29 |         4. for each gt box, assign its best proposals (may be more than
30 |             one) to itself.
31 | 
32 |         Args:
33 |             proposals (Tensor): Bounding boxes to be assigned, shape (n, 4).
34 |             gt_boxes (Tensor): Ground-truth boxes, shape (k, 4).
35 |             gt_labels (Tensor): Ground-truth labels, shape (k, ).
36 | 
37 |         Returns:
38 |             target_boxes (Tensor), target_labels (Tensor).
39 |         """        
40 |         raise NotImplementedError()
41 | 
42 |     def assign_wrt_overlaps(self, overlaps, gt_boxes, gt_labels):
43 |         """Assign w.r.t. the overlaps of boxes with gts.
44 | 
45 |         Args:
46 |             overlaps (Tensor): Overlaps between k gt_boxes and n proposals,
47 |                 shape (k, n).
48 |             gt_boxes (Tensor): Ground-truth boxes, shape (k, 4).
49 |             gt_labels (Tensor): Ground-truth labels, shape (k, ).
50 | 
51 |         Returns:
52 |             target_boxes (Tensor), target_labels (Tensor).
53 |         """
54 |         raise NotImplementedError()
55 |     
56 |     def __call__(self, gt_boxes, gt_labels, proposals):
57 |         with tf.name_scope("max_iou_assigner"):
58 |             return self.assign(gt_boxes, gt_labels, proposals)


--------------------------------------------------------------------------------
/models/heads/dense_heads/retinanet_head.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import tensorflow as tf
 3 | from ...builder import HEADS
 4 | from .anchor_head import AnchorHead 
 5 | from core.layers import build_activation
 6 | from core.layers import build_convolution
 7 | from core.layers import build_normalization
 8 | 
 9 | 
10 | @HEADS.register
11 | class RetinaNetHead(AnchorHead):
12 |     def __init__(self, **kwargs):
13 |         super(RetinaNetHead, self).__init__(**kwargs)
14 |         
15 |         self._make_shared_convs()
16 |         self._make_init_layers()
17 |         self._init_anchor_generators()
18 |     
19 |     def _make_init_layers(self):
20 |         self.classifier = tf.keras.layers.Conv2D(
21 |             filters=self.num_anchors * self.num_classes,
22 |             kernel_size=(3, 3),
23 |             strides=(1, 1),
24 |             padding="same",
25 |             kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
26 |             bias_initializer=tf.keras.initializers.Constant(-math.log((1. - self.cfg.prior) / self.cfg.prior)),
27 |             name="predicted_class")
28 | 
29 |         self.regressor = tf.keras.layers.Conv2D(
30 |             filters=self.num_anchors * 4,
31 |             kernel_size=(3, 3),
32 |             strides=(1, 1),
33 |             padding="same",
34 |             kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
35 |             name="predicted_box")
36 |         
37 |     def call(self, inputs, training=None):
38 |         predicted_boxes = dict()
39 |         predicted_labels = dict()
40 |         total_anchors = dict()
41 |         for i, level in enumerate(range(self.min_level, self.max_level + 1)):
42 |             box_feat = self.box_shared_convs(inputs[i], training=training)
43 |             label_feat = self.class_shared_convs(inputs[i], training=training)
44 | 
45 |             pred_boxes = self.regressor(box_feat)
46 |             pred_labels = self.classifier(label_feat)
47 | 
48 |             h, w = tf.shape(box_feat)[1], tf.shape(box_feat)[2]
49 |             anchors = self.anchor_generators[i](h, w)
50 | 
51 |             predicted_boxes["level%d" % level] = pred_boxes
52 |             predicted_labels["level%d" % level] = pred_labels
53 |             total_anchors["level%d" % level] = anchors 
54 |                     
55 |         outputs = dict(boxes=predicted_boxes, 
56 |                     labels=predicted_labels, 
57 |                     total_anchors=total_anchors)
58 |         
59 |         if self.is_training:
60 |             return outputs
61 |         
62 |         return self.get_boxes(outputs)
63 |     


--------------------------------------------------------------------------------
/models/detectors/one_stage.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf 
 3 | from . import Detector
 4 | from ..builder import DETECTORS
 5 | from ..builder import build_head
 6 | from ..builder import build_neck
 7 | from ..builder import build_backbone
 8 | 
 9 | 
10 | class OneStageDetector(Detector):
11 |     def __init__(self, cfg, training=True):
12 |         super(OneStageDetector, self).__init__(cfg, training=training)
13 | 
14 |         self.data_format = cfg.data_format
15 | 
16 |         inputs = tf.keras.Input(shape=(None, None, 3))
17 |         self.backbone = build_backbone(input_tensor=inputs, **cfg.backbone.as_dict())
18 |         x = self.backbone(inputs)
19 |         
20 |         if cfg.get("neck"):
21 |             if isinstance(x, (list, tuple)):
22 |                 input_shapes = [i.shape.as_list()[1:] for i in x]
23 |                 if cfg.neck.get("downsample_ratio"):  
24 |                     first_level = int(np.log2(cfg.neck.downsample_ratio))   ## for centernet
25 |                     x = x[first_level:]
26 |             else:
27 |                 input_shapes = x.shape.as_list()[1:]
28 |             self.neck = build_neck(input_shapes=input_shapes, name="neck", **cfg.neck.as_dict())
29 |             x = self.neck(x)
30 | 
31 |         if cfg.get("anchors"):
32 |             self.head = build_head(cfg.head.head, 
33 |                                    cfg=cfg.head, 
34 |                                    test_cfg=cfg.test,
35 |                                    anchor_cfg=cfg.anchors, 
36 |                                    num_classes=cfg.num_classes,
37 |                                    is_training=training, 
38 |                                    name="head")
39 |         else:
40 |             self.head = build_head(cfg.head.head, 
41 |                                    cfg=cfg.head,
42 |                                    test_cfg=cfg.test, 
43 |                                    num_classes=cfg.num_classes, 
44 |                                    is_training=training,
45 |                                    name="head")
46 |         x = self.head(x)
47 |         self.detector = tf.keras.Model(inputs=inputs, outputs=x)
48 |     
49 |     def load_pretrained_weights(self, pretrained_weights_path=None):
50 |         if pretrained_weights_path:
51 |             self.backbone.load_weights(pretrained_weights_path, by_name=True, skip_mismatch=True)
52 |             print("Restored pre-trained weights from %s." % pretrained_weights_path)
53 |         
54 |         else:
55 |             print("Train model from scratch.")
56 |     
57 |     def compute_losses(self, predictions, image_info):
58 |         return self.head.compute_losses(predictions, image_info)
59 |     
60 |     def save_weights(self, name):
61 |         self.detector.save_weights(name)
62 |     
63 |     @tf.function
64 |     def __call__(self, inputs, training):
65 |         x = self.detector(inputs, training=training)
66 |         return x
67 | 
68 | 


--------------------------------------------------------------------------------
/models/detectors/two_stage.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | from . import Detector
 3 | from utils import box_utils
 4 | from ..builder import build_neck
 5 | from ..builder import build_head
 6 | from core.bbox import build_decoder
 7 | from ..builder import build_backbone
 8 | from core.layers import ProposalLayer
 9 | 
10 | 
11 | class TwoStageDetector(Detector):
12 |     def __init__(self, cfg, training=True, **kwargs):
13 |         super(TwoStageDetector, self).__init__(cfg, **kwargs)
14 | 
15 |         self.data_format = cfg.data_format
16 | 
17 |         inputs = tf.keras.Input(shape=(None, None, 3))
18 |         self.backbone = build_backbone(input_tensor=inputs, **cfg.backbone.as_dict())
19 |         x = self.backbone(inputs)
20 |         
21 |         if cfg.get("neck"):
22 |             if isinstance(x, (list, tuple)):
23 |                 input_shapes = [i.shape.as_list()[1:] for i in x]
24 |             else:
25 |                 input_shapes = x.shape.as_list()[1:]
26 |             self.neck = build_neck(input_shapes=input_shapes, name="neck", **cfg.neck.as_dict())
27 |             x = self.neck(x)
28 | 
29 |         if cfg.get("anchors"):
30 |             self.rpn_head = build_head(cfg.rpn_head.head, 
31 |                                        cfg=cfg.rpn_head, 
32 |                                        anchor_cfg=cfg.anchors, 
33 |                                        is_training=training, 
34 |                                        name="rpn_head")
35 |         else:
36 |             self.rpn_head = build_head(cfg.rpn_head.head, 
37 |                                        cfg=cfg.rpn_head,                                       
38 |                                        is_training=training,
39 |                                        name="rpn_head")
40 |         rpn_ouputs, proposals = self.rpn_head(x)
41 |         x = build_head(cfg.roi_head.head, 
42 |                        cfg=cfg.roi_head, 
43 |                        test_cfg=cfg.test, 
44 |                        num_classes=cfg.num_classes,  
45 |                        is_training=training,
46 |                        name="roi_heads")([x, proposals])
47 |         
48 |         self.detector = tf.keras.Model(inputs=inputs, outputs=[proposals, x])
49 |     
50 |     def load_pretrained_weights(self, pretrained_weights_path=None):
51 |         if pretrained_weights_path:
52 |             self.backbone.load_weights(pretrained_weights_path, by_name=True, skip_mismatch=True)
53 |             print("Restored pre-trained weights from %s." % pretrained_weights_path)
54 |         
55 |         else:
56 |             print("Train model from scratch.")
57 |     
58 |     def compute_losses(self, rpn_outputs, rcnn_ouputs, image_info):
59 |         return self.rpn_head.compute_losses(rpn_outputs, image_info)
60 |     
61 |     def save_weights(self, name):
62 |         self.detector.save_weights(name)
63 |     
64 |     @tf.function
65 |     def __call__(self, inputs, training):
66 |         x = self.detector(inputs, training=training)
67 |         return x
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/data/augmentations/mixup.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_probability as tfp
 3 | from ..builder import AUGMENTATIONS 
 4 | 
 5 | 
 6 | @AUGMENTATIONS.register
 7 | class Mixup(object):
 8 |     def __init__(self, batch_size, alpha, prob=0.5, max_boxes=200):
 9 |         self.alpha = alpha
10 |         self.batch_size = batch_size
11 |         self.prob = prob
12 |         self.max_boxes = max_boxes
13 |     
14 |     def _mixup(self, images, boxes, labels):
15 |         """Applies Mixup regularization to a batch of images and labels.
16 | 
17 |         [1] Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, David Lopez-Paz
18 |             Mixup: Beyond Empirical Risk Minimization.
19 |             ICLR'18, https://arxiv.org/abs/1710.09412
20 | 
21 |         Args:
22 |             images: A batch of images of shape [batch_size, ...]
23 |             labels: A batch of labels of shape [batch_size, num_classes]
24 | 
25 |         Returns:
26 |             A tuple of (images, boxes, labels) with the same dimensions as the input with
27 |             Mixup regularization applied.
28 |         """
29 |         mix_weight = tfp.distributions.Beta(self.alpha, self.alpha).sample([self.batch_size, 1])
30 |         mix_weight = tf.maximum(mix_weight, 1. - mix_weight)
31 |         images_mix_weight = tf.reshape(mix_weight, [self.batch_size, 1, 1, 1])
32 |         # Mixup on a single batch is implemented by taking a weighted sum with the same batch in reverse.
33 |         image_dtype = images.dtype
34 |         images = tf.cast(images, mix_weight.dtype)
35 |         images_mix = images * images_mix_weight + images[::-1] * (1. - images_mix_weight)
36 |         
37 |         boxes_mix = tf.concat([boxes, boxes[::-1]], 1)
38 |         labels_mix = tf.concat([labels, labels[::-1]], 1)
39 | 
40 |         def _fn(b, l):
41 |             valid = l != 0
42 |             l = tf.boolean_mask(l, valid)
43 |             b = tf.boolean_mask(b, valid)
44 |             num = tf.size(l)
45 |             if num < self.max_boxes:
46 |                 l = tf.concat([l, tf.zeros([self.max_boxes - num], l.dtype)], 0)
47 |                 b = tf.concat([b, tf.zeros([self.max_boxes - num, 4], b.dtype)], 0)
48 |             else:
49 |                 l = l[:self.max_boxes]
50 |                 b = b[:self.max_boxes]
51 |             
52 |             return b, l
53 |         
54 |         boxes_mix, labels_mix = tf.map_fn(
55 |             lambda inp: _fn(*inp), 
56 |             elems=(boxes_mix, labels_mix),
57 |             fn_output_signature=(boxes_mix.dtype, labels_mix.dtype))
58 | 
59 |         images_mix = tf.cast(images_mix, image_dtype)
60 | 
61 |         return images_mix, boxes_mix, labels_mix
62 |     
63 |     def __call__(self, images, images_info):
64 |         with tf.name_scope("mixup"):
65 |             images = tf.cast(images, tf.uint8)
66 |             images, images_info["boxes"], images_info["labels"] = tf.cond(
67 |                 tf.random.uniform([]) >= self.prob,
68 |                 lambda: self._mixup(images, images_info["boxes"], images_info["labels"]),
69 |                 lambda: (images, images_info["boxes"], images_info["labels"]))
70 |             
71 |             return images, images_info
72 | 


--------------------------------------------------------------------------------
/core/samplers/sampler.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from abc import ABCMeta
 3 | from abc import abstractmethod
 4 | 
 5 | 
 6 | class Sampler(metaclass=ABCMeta):
 7 |     def __init__(self, num_proposals, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=False, **kwargs):
 8 |         self.num_proposals = num_proposals
 9 |         self.pos_fraction = pos_fraction
10 |         self.neg_pos_ub = neg_pos_ub
11 |         self.add_gt_as_proposals = add_gt_as_proposals
12 | 
13 |         self.positive_sampler = self
14 |         self.negative_sampler = self
15 |     
16 |     @abstractmethod
17 |     def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs):
18 |         pass
19 |     
20 |     @abstractmethod
21 |     def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs):
22 |         pass
23 | 
24 |     def sample(self, assigned_boxes, assigned_labels, gt_boxes=None, gt_labels=None, **kwargs):
25 |         """Sample positive and negative boxes.
26 | 
27 |             Args:
28 |                 assigned_boxes (Tensor): The assigned boxes in assigner.
29 |                 assigned_labels (Tensor): The assigned labels in assigner.
30 |                 gt_boxes (Tensor): ground truth boxes.
31 |                 gt_labels (Tensor): ground truth labels.
32 |             
33 |             Returns:
34 |                 A dict -> target_boxes, target_labels, box_weights, label_weights
35 |         """
36 |         if self.add_gt_as_proposals and gt_boxes is not None:
37 |             assigned_boxes = tf.concat([gt_boxes, assigned_boxes], 0)
38 |             assigned_labels = tf.concat([gt_labels, assigned_labels], 0)
39 |         
40 |         num_expected_pos = int(self.num_proposals * self.pos_fraction)
41 |         pos_inds = self.positive_sampler._sample_positive(assigned_labels, num_expected_pos, **kwargs)
42 |         num_sampled_pos = tf.size(pos_inds)
43 |         num_expected_neg = self.num_proposals - num_sampled_pos
44 | 
45 |         if self.neg_pos_ub >= 0:
46 |             _pos = tf.maximum(1, num_expected_pos)
47 |             neg_upper_bound = num_expected_neg * _pos
48 | 
49 |             if num_expected_neg > neg_upper_bound:
50 |                 num_expected_neg = neg_upper_bound
51 | 
52 |         neg_inds = self.negative_sampler._sample_negative(assigned_labels, num_expected_neg, **kwargs)
53 | 
54 |         box_weights = tf.zeros_like(assigned_labels, dtype=tf.float32)
55 |         box_weights = tf.tensor_scatter_nd_update(
56 |             box_weights, pos_inds[:, None], tf.ones_like(pos_inds, box_weights.dtype))
57 |         label_weights = tf.tensor_scatter_nd_update(
58 |             box_weights, neg_inds[:, None], tf.ones_like(neg_inds, box_weights.dtype))
59 | 
60 |         # target_labels = tf.where(label_weights >= 1, assigned_labels, tf.zeros_like(assigned_labels))
61 |         # box_weights = tf.expand_dims(box_weights, -1)  
62 | 
63 |         return assigned_boxes, assigned_labels, box_weights, label_weights
64 |     
65 |     def __call__(self, assigned_boxes, assigned_labels, gt_boxes=None, gt_labels=None, **kwargs):
66 |         with tf.name_scope("sample"):
67 |             return self.sample(assigned_boxes, assigned_labels, gt_boxes=gt_boxes, gt_labels=gt_labels, **kwargs)
68 | 
69 | 


--------------------------------------------------------------------------------
/core/assigners/uniform_assigner.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | from .assigner import Assigner
 3 | from ..builder import ASSIGNERS 
 4 | from utils import box_utils
 5 | 
 6 | 
 7 | @ASSIGNERS.register
 8 | class UniformAssigner(Assigner):
 9 |     """
10 |         Uniform Matching between the anchors and gt boxes, which can achieve
11 |         balance in positive anchors.
12 | 
13 |         Args:
14 |             match_times(int): Number of positive anchors for each gt box.
15 |     """
16 |     def __init__(self, 
17 |                  pos_ignore_thresh: float = 0.7,
18 |                  neg_ignore_thresh: float = 0.15, 
19 |                  match_times: int = 4, **kwargs):
20 |         super(UniformAssigner, self).__init__(**kwargs)
21 | 
22 |         self.match_times = match_times
23 |         self.pos_ignore_thresh = pos_ignore_thresh
24 |         self.neg_ignore_thresh = neg_ignore_thresh
25 |     
26 |     def _cdist(self, x, y):
27 |         with tf.name_scope("cdist"):
28 |             x = tf.expand_dims(x, 0)
29 |             y = tf.expand_dims(y, 1)
30 | 
31 |             dist = tf.sqrt(tf.reduce_sum(tf.square(x - y), -1))
32 | 
33 |             return dist
34 |     
35 |     def assign(self, gt_boxes, gt_labels, anchors, predicted_boxes):
36 |         with tf.name_scope("assign"):
37 |             # Compute the L1 cost between boxes
38 |             # Note that we use anchors and predict boxes both
39 |             C = self._cdist(predicted_boxes, gt_boxes)
40 |             C1 = self._cdist(anchors, gt_boxes)
41 | 
42 |             _, indices = tf.nn.top_k(C, k=self.match_times)
43 |             _, indices2 = tf.nn.top_k(C1, k=self.match_times)
44 | 
45 |             indices = tf.transpose(indices)
46 |             indices2 = tf.transpose(indices2)
47 |             indices = tf.reshape(indices, [-1, 1])
48 |             indices2 = tf.reshape(indices2, [-1, 1])
49 |             indices = tf.concat([indices, indices2], 0)
50 |             gt_boxes = tf.tile(gt_boxes, [self.match_times * 2, 1])
51 |             gt_labels = tf.tile(gt_labels, [self.match_times * 2])
52 | 
53 |             anchor_ious = box_utils.bbox_overlap(anchors, gt_boxes)
54 |             pos_anchor_ious = tf.gather_nd(anchor_ious, tf.concat([indices, gt_labels[:, None]], -1))
55 |             pos_ignore_mask = pos_anchor_ious < self.neg_ignore_thresh
56 | 
57 |             gt_labels = tf.where(pos_ignore_mask, 0 - tf.ones_like(gt_labels), gt_labels)
58 |             
59 |             tgt_boxes = tf.scatter_nd(indices, gt_boxes, tf.shape(predicted_boxes))
60 |             tgt_labels = tf.scatter_nd(indices, gt_labels, tf.shape(predicted_boxes[:, 0]))
61 | 
62 |             pred_ious = box_utils.bbox_overlap(predicted_boxes, gt_boxes) 
63 |             pred_max_ious = tf.reduce_max(pred_ious, 1)
64 | 
65 |             neg_ignore_mask = pred_max_ious > self.neg_ignore_thresh
66 |             tgt_labels = tf.where(neg_ignore_mask, 0 - tf.ones_like(tgt_labels), tgt_labels)
67 | 
68 |             return tgt_boxes, tgt_labels
69 | 
70 |     def __call__(self, gt_boxes, gt_labels, anchors, pred_boxes):
71 |         return self.assign(gt_boxes, gt_labels, anchors, pred_boxes)
72 | 
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     pboxes = tf.random.uniform([100, 4])
77 |     anchors = tf.random.uniform([100, 4])
78 | 
79 |     gt_boxes = tf.random.uniform([2, 4])
80 |     gt_labels = tf.constant([2, 3])
81 | 
82 |     assigner = UniformAssigner(8)
83 |     assigner.assign(gt_boxes, gt_labels, anchors, pboxes)


--------------------------------------------------------------------------------
/core/assigners/min_cost_assigner.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .assigner import Assigner
 3 | from ..builder import ASSIGNERS
 4 | from core.bbox import compute_unaligned_iou
 5 | 
 6 | 
 7 | @ASSIGNERS.register
 8 | class MinCostAssigner(Assigner):
 9 |     def __init__(self, class_weight=1., l1_weight=1., iou_weight=1., iou_type="giou", alpha=0.25, gamma=2., **kwargs):
10 |         super(MinCostAssigner, self).__init__(**kwargs)
11 | 
12 |         self._class_weights = class_weight
13 |         self._l1_weight = l1_weight
14 |         self._iou_weight = iou_weight
15 |         self._iou_type = iou_type
16 | 
17 |         self._gamma = gamma
18 |         self._alpha = alpha
19 |     
20 |     def assign(self, gt_boxes, gt_labels, pred_boxes, pred_labels):
21 |         with tf.name_scope("assign"):
22 |             valid_mask = gt_labels > 0
23 |             gt_labels = tf.boolean_mask(gt_labels, valid_mask) - 1
24 |             gt_boxes = tf.boolean_mask(gt_boxes, valid_mask)
25 |             
26 |             # Compute the classification cost.
27 |             num_classes = tf.shape(pred_labels)[-1]
28 |             hw = tf.shape(pred_boxes)[:2]
29 |             hwhw = tf.tile(tf.cast(hw, tf.float32), [2])
30 |             pred_boxes = tf.reshape(pred_boxes, [hw[0] * hw[1], 4])
31 |             pred_labels = tf.reshape(pred_labels, [hw[0] * hw[1], num_classes])
32 | 
33 |             pred_probs = tf.nn.sigmoid(pred_labels)
34 | 
35 |             neg_label_cost = (1 - self._alpha) * tf.pow(pred_probs, self._gamma) * (-tf.math.log(1 - pred_probs + 1e-8))
36 |             pos_label_cost = self._alpha * tf.pow(1 - pred_probs, self._gamma) * (-tf.math.log(pred_probs + 1e-8))
37 |             label_cost = tf.gather(neg_label_cost, gt_labels, axis=-1) - tf.gather(pos_label_cost, gt_labels, axis=-1)
38 | 
39 |             # Compute the L1 cost between boxes
40 |             bbox_cost = tf.reduce_sum(tf.abs(tf.expand_dims(pred_boxes, 1) / hwhw - tf.expand_dims(gt_boxes, 0) / hwhw), 2)
41 | 
42 |             # Comput the IoU cost between boxes
43 |             giou_cost = compute_unaligned_iou(gt_boxes, pred_boxes, self._iou_type)
44 | 
45 |             cost = self._class_weights * label_cost + self._l1_weight * bbox_cost + self._iou_weight * giou_cost
46 | 
47 |             inds = tf.argmin(cost, 0)
48 | 
49 |             tgt_boxes = tf.zeros_like(pred_boxes)
50 |             tgt_labels = tf.zeros_like(pred_labels)
51 | 
52 |             tgt_boxes = tf.tensor_scatter_nd_update(tgt_boxes, inds[:, None], gt_boxes)
53 |             tgt_labels = tf.tensor_scatter_nd_update(tgt_labels, inds[:, None], tf.one_hot(gt_labels, num_classes))
54 |             
55 |             return tgt_boxes, tgt_labels
56 |             
57 |     def __call__(self, gt_boxes, gt_labels, pred_boxes, pred_labels):
58 |         return self.assign(gt_boxes, gt_labels, pred_boxes, pred_labels)
59 | 
60 | 
61 | def test():
62 |     import numpy as np
63 | 
64 |     pred_boxes = tf.random.uniform([64, 64, 4], 0, 255)
65 |     pred_labels = tf.random.uniform([64, 64, 80], -5., 5.)
66 | 
67 |     gt_boxes = tf.constant([[32, 120, 120, 256], [200, 201, 434, 472]], tf.float32)
68 |     gt_labels = tf.constant([1, 23], tf.int32)
69 |     
70 |     assigner = MinCostAssigner()
71 |     boxes, labels = assigner(gt_boxes, gt_labels, pred_boxes, pred_labels)
72 |     print(gt_boxes)
73 |     print(tf.gather_nd(boxes, tf.where(boxes > 0)))
74 |     print(tf.where(labels == 1))
75 |     
76 | 
77 | if __name__ == "__main__":
78 |     test()
79 | 
80 | 


--------------------------------------------------------------------------------
/data/datasets/dataset.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import tensorflow as tf
 3 | from ..augmentations import Mixup
 4 | from ..augmentations import Mosaic
 5 | from ..builder import build_augmentation
 6 | 
 7 | 
 8 | class Dataset(object):
 9 |     def __init__(self, 
10 |                  dataset_dir, 
11 |                  training=True,
12 |                  batch_size=32, 
13 |                  augmentations=[],
14 |                  max_boxes=200,
15 |                  skip_crowd=True,
16 |                  mosaic=None,
17 |                  mixup=None,
18 |                  dtype=tf.float32,
19 |                  **kwargs):
20 |         self.dataset_dir = dataset_dir
21 |         self.training = training
22 |         self.batch_size = batch_size
23 |         self.max_boxes = max_boxes
24 |         self.skip_crowd = skip_crowd
25 |         self.dtype = dtype
26 |         
27 |         if mosaic is not None:
28 |             self.mosaic = Mosaic(max_boxes=max_boxes, **mosaic)
29 |             assert "ResizeV2" in [list(n.keys())[0] for n in augmentations], "Whe using Mosaic, ResizeV2 shoud in augmentations."
30 |         if mixup is not None:
31 |             self.mixup = Mixup(batch_size=batch_size, max_boxes=max_boxes, **mixup)
32 |         
33 |         self.augmentations = [build_augmentation(**kw) for kw in augmentations]
34 |     
35 |     def compose(self, image, image_info):
36 |         for aug in self.augmentations:
37 |             image, image_info = aug(image, image_info)
38 |         
39 |         return image, image_info
40 | 
41 |     def is_valid_jpg(self, jpg_file):
42 |         with open(jpg_file, 'rb') as f:
43 |             f.seek(-2, 2)
44 |             buf = f.read()
45 |             f.close()
46 |             return buf == b'\xff\xd9'  # 判定jpg是否包含结束字段
47 |     
48 |     def _bytes_list_feature(self, value):
49 |         if not isinstance(value, list):
50 |             value = [value]
51 |         return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
52 |     
53 |     def _int64_list_feature(self, value):
54 |         if not isinstance(value, list):
55 |             value = [value]
56 |         
57 |         return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
58 |     
59 |     def _float_list_feature(self, value):
60 |         if not isinstance(value, list):
61 |             value = [value]
62 |         
63 |         return tf.train.Feature(float_list=tf.train.FloatList(value=value))
64 | 
65 |     def create_tfrecord(self, image_dir, image_info_file, output_dir, num_shards):
66 |         raise NotImplementedError()
67 | 
68 |     def parser(self, serialized):
69 |         raise NotImplementedError()
70 | 
71 |     def dataset(self):
72 |         with tf.device("/cpu:0"):
73 |             dataset = tf.data.TFRecordDataset(self.tf_record_sources)
74 |             dataset = dataset.map(map_func=self.parser)
75 |             
76 |             if hasattr(self, "mosaic"):
77 |                 self.batch_size *= 4
78 |             if self.training:
79 |                 dataset = dataset.shuffle(buffer_size=self.batch_size * 10)
80 |             dataset = dataset.batch(batch_size=self.batch_size, drop_remainder=True)
81 |             
82 |             if hasattr(self, "mosaic"):
83 |                 dataset = dataset.map(self.mosaic)
84 |             
85 |             # call mixup shoud after mosaic
86 |             if hasattr(self, "mixup"):
87 |                 dataset = dataset.map(self.mixup)
88 |            
89 |             return dataset.prefetch(tf.data.experimental.AUTOTUNE)


--------------------------------------------------------------------------------
/core/optimizers/accum_optimizer.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.keras import backend as K
 2 | from tensorflow.python.keras.optimizers import Optimizer
 3 | from ..builder import OPTIMIZERS 
 4 | 
 5 | 
 6 | @OPTIMIZERS.register
 7 | class AccumOptimizer(Optimizer):
 8 |     """继承Optimizer类，包装原有优化器，实现梯度累积。
 9 |        # 参数
10 |            optimizer：优化器实例，支持目前所有的keras优化器；
11 |            steps_per_update：累积的步数。
12 |        # 返回
13 |            一个新的keras优化器
14 |        Inheriting Optimizer class, wrapping the original optimizer
15 |        to achieve a new corresponding optimizer of gradient accumulation.
16 |        # Arguments
17 |            optimizer: an instance of keras optimizer (supporting
18 |                        all keras optimizers currently available);
19 |            steps_per_update: the steps of gradient accumulation
20 |        # Returns
21 |            a new keras optimizer.
22 |        """
23 |     def __init__(self, optimizer, steps_per_update=1, **kwargs):
24 |         super(AccumOptimizer, self).__init__(**kwargs)
25 | 
26 |         self.optimizer = optimizer
27 |         with K.name_scope(self.__class__.__name__):
28 |             self.steps_per_update = steps_per_update
29 |             self.iterations = K.variable(0, "int64", "iteration")
30 |             self.cond = K.equal(self.iterations % steps_per_update, 0)
31 |             self.lr = self.optimizer.lr
32 | 
33 |             self.accum_grads = None
34 | 
35 |             self.optimizer.lr = K.switch(self.cond, self.lr, 0)
36 |             for attr in ["momentum", "rho", "beta_1", "beta_2"]:
37 |                 if hasattr(self.optimizer, attr):
38 |                     value = getattr(self.optimizer, attr)
39 |                     setattr(self, attr, value)
40 |                     setattr(self.optimizer, attr, 1. - 1e-7)
41 | 
42 |             for cfg in self.optimizer.get_config():
43 |                 if not hasattr(self, cfg):
44 |                     value = getattr(self.optimizer, cfg)
45 |                     setattr(self, cfg, value)
46 | 
47 |             # Cover the original get_gradients method with accumulative gradients.
48 |             def get_gradients(loss, params):
49 |                 return [ag / self.steps_per_update for ag in self.accum_grads]
50 | 
51 |             self.optimizer.get_gradients = get_gradients
52 | 
53 |     def get_updates(self, loss, params):
54 |         self.updates = [
55 |             K.update_add(self.iterations, 1),
56 |             K.update_add(self.optimizer.iterations, K.constant(self.cond, "int64"))
57 |         ]
58 | 
59 |         # accumulate gradients
60 |         self.accum_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
61 |         grads = self.get_gradients(loss, params)
62 |         for g, ag in zip(grads, self.accum_grads):
63 |             self.updates.append(K.update(ag, K.switch(self.cond, ag * 0, ag + g)))
64 | 
65 |         self.updates.extend(self.optimizer.get_updates()[1:])
66 |         self.weights.extend(self.optimizer.weights)
67 | 
68 |         return self.updates
69 | 
70 |     def get_config(self):
71 |         iterations = K.eval(self.iterations)
72 |         K.set_value(self.iterations, 0)
73 |         config = self.optimizer.get_config()
74 |         K.set_value(self.iterations, iterations)
75 | 
76 |         return config
77 | 
78 |     @property
79 |     def learning_rate(self):
80 |         return self.optimizer.learning_rate
81 | 
82 |     @learning_rate.setter
83 |     def learning_rate(self, value):
84 |         self.optimizer.learning_rate = value
85 | 
86 | 


--------------------------------------------------------------------------------
/models/necks/feature_fusion_pyramid.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from core.layers import build_activation
 3 | from core.layers import build_convolution
 4 | from core.layers import build_normalization 
 5 | from models.builder import NECKS 
 6 | 
 7 | 
 8 | @NECKS.register("FeautreFusionPyramid")
 9 | def feature_fusion_pyramid(inputs,
10 |                            convolution="conv2d",
11 |                            normalization="batch_norm",
12 |                            activation="relu",
13 |                            output_filters=(),
14 |                            num_outputs=6,
15 |                            group=32,
16 |                            weight_decay=0.,
17 |                            add_extra_conv=False,
18 |                            use_multiplication=False):
19 |     assert len(inputs) == len(output_filters)
20 |     num_inputs = len(inputs)
21 | 
22 |     output_filters = [output_filters] * num_inputs\
23 |         if isinstance(output_filters, (int, float)) else output_filters
24 | 
25 |     # build top-down path
26 |     kernel_regularizer = tf.keras.regularizers.l2(weight_decay)
27 |     for i in range(num_inputs - 1, 0, -1):
28 |         top = tf.keras.layers.Conv2DTranspose(filters=output_filters[i-1],
29 |                                               kernel_size=(4, 4),
30 |                                               strides=(2, 2),
31 |                                               padding="same",
32 |                                               kernel_regularizer=kernel_regularizer)(inputs[i])
33 |         if use_multiplication:
34 |             inputs[i-1] = tf.keras.layers.Multiply()([top, inputs[i-1]])
35 |         else:
36 |             inputs[i-1] = tf.keras.layers.Add()([top, inputs[i-1]])
37 |         inputs[i-1] = conv_block(convolution,
38 |                                  filters=256,
39 |                                  kernel_size=(1, 1),
40 |                                  strides=(1, 1),
41 |                                  kernel_regularizer=kernel_regularizer,
42 |                                  normalization=normalization,
43 |                                  group=group,
44 |                                  activation=activation,
45 |                                  name="reduced_conv2d_" + str(i+1))(inputs[i-1])
46 | 
47 |     inputs[-1] = conv_block(convolution,
48 |                             filters=256,
49 |                             kernel_size=(1, 1),
50 |                             strides=(1, 1),
51 |                             kernel_regularizer=kernel_regularizer,
52 |                             normalization=normalization,
53 |                             group=group,
54 |                             activation=activation,
55 |                             name="reduced_conv2d_" + str(i + 1))(inputs[-1])
56 | 
57 |     for i in range(num_inputs, num_outputs):
58 |         if add_extra_conv:
59 |             inputs.append(conv_block(convolution,
60 |                                      filters=256,
61 |                                      kernel_size=(3, 3),
62 |                                      strides=(2, 2),
63 |                                      kernel_regularizer=kernel_regularizer,
64 |                                      normalization=normalization,
65 |                                      group=group,
66 |                                      activation=activation,
67 |                                      name="reduced_conv2d_" + str(i + 1)))
68 |         else:
69 |             inputs.append(tf.keras.layers.MaxPool2D(
70 |                 (2, 2), (2, 2), "same", name="extra_max_pool_" + str(i+1))(inputs[-1]))
71 | 
72 |     return inputs
73 | 
74 | 


--------------------------------------------------------------------------------
/models/backbones/backbone.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class Backbone(object):
 6 |     def __init__(self,
 7 |                  name,
 8 |                  convolution='conv2d',
 9 |                  kernel_initializer=tf.keras.initializers.VarianceScaling(), 
10 |                  normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-5, axis=-1, trainable=True),
11 |                  activation=dict(activation="relu"), 
12 |                  output_indices=(3, 4),
13 |                  strides=(2, 2, 2, 2, 2),
14 |                  dilation_rates=(1, 1, 1, 1, 1),
15 |                  frozen_stages=(-1,),
16 |                  data_format="channels_last",
17 |                  input_shape=None,
18 |                  input_tensor=None,
19 |                  dropblock=None,
20 |                  num_classes=1000,
21 |                  drop_rate=0.5):
22 |         """The backbone base class.
23 | 
24 |         Args:
25 |             convolution: (str) the convolution using in backbone.
26 |             normalization: (dict) the normalization layer, default None, if None, means not use normalization.
27 |             activation: (dict) activation name.
28 |             output_indices: (list[tuple]) the indices for outputs, e.g. [3, 4, 5] means
29 |                 output the stage 3, stage 4 and stage 5 in backbone.
30 |             strides: (list[tuple]) the strides for every stage in backbone, e.g. [1, 1, 1, 1, 1].
31 |             dilation_rates: (list[tuple]) the dilation_rates for every stage in backbone.
32 |             frozen_stages: (list[tuple]) the indices for which stage should be frozen,
33 |                 e.g. [1, 2, 3] means frozen stage 1, stage 2 and stage 3.
34 |             frozen_batch_normalization: (bool) Does frozen batch normalization.
35 |         """
36 |         assert isinstance(output_indices, (list, tuple)) or output_indices is None
37 |         assert isinstance(strides, (list, tuple)) or strides is None
38 |         assert isinstance(frozen_stages, (list, tuple)) or frozen_stages is None
39 |         assert isinstance(dilation_rates, (list, tuple)) or dilation_rates is None
40 | 
41 |         self.name = name
42 |         self.output_indices = output_indices
43 |         self.strides = strides
44 |         self.frozen_stages = frozen_stages
45 |         self.dilation_rates = dilation_rates
46 |         self.normalization = normalization 
47 |         self.convolution = convolution
48 |         self.activation = activation
49 |         self.dropblock = dropblock 
50 |         self.num_classes = num_classes
51 |         self.drop_rate = drop_rate
52 |         self.kernel_initializer = kernel_initializer
53 |         self.data_format = data_format
54 |         
55 |         self._rgb_mean = np.array([[[[0.485, 0.456, 0.406]]]]) * 255.
56 |         self._rgb_std = np.array([[[[0.229, 0.224, 0.225]]]]) * 255.
57 | 
58 |         if input_tensor is None:
59 |             img_input = tf.keras.layers.Input(shape=input_shape)
60 |         else:
61 |             if not tf.keras.backend.is_keras_tensor(input_tensor):
62 |                 img_input = tf.keras.layers.Input(tensor=input_tensor, shape=input_shape)
63 |             else:
64 |                 img_input = input_tensor
65 |                 
66 |         self.img_input = img_input
67 |         self.input_shape = input_shape
68 |         self.input_tensor = input_tensor
69 |         if output_indices:
70 |             self._is_classifier = -1 in self.output_indices
71 | 
72 |     def build_model(self):
73 |         raise NotImplementedError()
74 | 
75 |     def init_weights(self, pre_trained_weights_path):
76 |         pass
77 | 
78 |     def load_pre_trained_weights(self, pre_trained_weights_path):
79 |         pass
80 | 


--------------------------------------------------------------------------------
/core/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_addons as tfa
 3 | from .activations import Mish
 4 | from .scale import Scale
 5 | from .max_in_out import MaxInOut
 6 | from .drop_block import DropBlock2D
 7 | from .nms import FastNonMaxSuppression
 8 | from .nms import NonMaxSuppression
 9 | from .nms import CombinedNonMaxSuppression
10 | from .nms import SoftNonMaxSuppression
11 | # from .normalizations import L2Normalization
12 | from .dcnv2 import DCNv2
13 | from .normalizations import GroupNormalization
14 | from .nearest_upsamling import NearestUpsampling2D
15 | from .weight_standardization_conv2d import WSConv2D
16 | from .normalizations import FilterResponseNormalization
17 | from .normalizations import InstanceNormalization
18 | from .normalizations import FrozenBatchNormalization
19 | from .proposal_layer import ProposalLayer
20 | from .position_sensitive_roi_pooling import PSRoIPooling
21 | from .position_sensitive_average_pooling import PSAvgPooling
22 | from .roi_pooling import SingleLevelAlignedRoIPooling, MultiLevelAlignedRoIPooling
23 | 
24 | 
25 | def build_convolution(convolution, **kwargs):
26 |     if convolution == "depthwise_conv2d":
27 |         return tf.keras.layers.DepthwiseConv2D(**kwargs)
28 |     elif convolution == "wsconv2d":
29 |         return WSConv2D(**kwargs)
30 |     elif convolution == "conv2d":
31 |         return tf.keras.layers.Conv2D(**kwargs)
32 |     elif convolution == "separable_conv2d":
33 |         return tf.keras.layers.SeparableConv2D(**kwargs)
34 |     elif convolution == "dcnv2":
35 |         return DCNv2(**kwargs)
36 |     else:
37 |         raise TypeError("Could not interpret convolution function identifier: {}".format(repr(convolution)))
38 | 
39 | 
40 | def build_normalization(normalization, **kwargs):
41 |     if normalization == "group_norm":
42 |         return GroupNormalization(**kwargs)
43 |     elif normalization == "batch_norm":
44 |         return tf.keras.layers.BatchNormalization(**kwargs)
45 |     elif normalization == "frozen_batch_norm":
46 |         return FrozenBatchNormalization(**kwargs)
47 |     # elif normalization == "switchable_norm":
48 |     #     return SwitchableNormalization(**kwargs)
49 |     elif normalization == "filter_response_norm":
50 |         return FilterResponseNormalization(**kwargs)
51 |     elif normalization == "sync_batch_norm":
52 |         return tf.keras.layers.experimental.SyncBatchNormalization(**kwargs)
53 |     else:
54 |         raise TypeError("Could not interpret normalization function identifier: {}".format(
55 |             repr(normalization)))
56 | 
57 | 
58 | def build_activation(**kwargs):
59 |     if kwargs["activation"] == "leaky_relu":
60 |         kwargs.pop("activation")
61 |         return tf.keras.layers.LeakyReLU(**kwargs)
62 |     if kwargs["activation"] == "mish":
63 |         kwargs.pop("activation")
64 |         return Mish(**kwargs)
65 | 
66 |     return tf.keras.layers.Activation(**kwargs)
67 | 
68 | 
69 | def build_roi_pooling(roi_pooling, **kwargs):
70 |     if roi_pooling == "SingleLevelAlignedRoIPooling":
71 |         return SingleLevelAlignedRoIPooling(**kwargs)
72 |     
73 |     if roi_pooling == "MultiLevelAlignedRoIPooling":
74 |         return MultiLevelAlignedRoIPooling(**kwargs)
75 | 
76 |     if roi_pooling == "PSRoIPooling":
77 |         return PSRoIPooling(**kwargs)
78 |     
79 |     if roi_pooling == "PSAvgPooling":
80 |         return PSAvgPooling(**kwargs)
81 |     
82 |     raise TypeError("Could not interpret roi_pooling function identifier: {}".format(repr(roi_pooling)))
83 | 
84 | 
85 | __all__ = [
86 |     "Scale",
87 |     "MaxInOut",
88 |     "DropBlock2D",
89 |     "L2Normalization",
90 |     "build_activation",
91 |     "build_convolution",
92 |     "build_normalization",
93 |     "NearestUpsampling2D",
94 |     "build_roi_pooling",
95 |     "ProposalLayer"
96 | ]
97 | 


--------------------------------------------------------------------------------
/yamls/gfl_x101_32x4d_fpn_mstrain_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNeXt101_32X4D
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFL
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   assigner:
 52 |     assigner: ATSSAssigner
 53 |     topk: 9
 54 |   bbox_decoder:
 55 |     decoder: Distance2Box
 56 |     weights: null
 57 |   bbox_encoder:
 58 |     encoder: Box2Distance
 59 |     weights: null
 60 |   bbox_loss:
 61 |     loss: GIoULoss
 62 |     reduction: sum
 63 |     weight: 2.0
 64 |   dfl_loss:
 65 |     loss: DistributionFocalLoss
 66 |     reduction: sum
 67 |     weight: 0.25
 68 |   dropblock: null
 69 |   feat_dims: 256
 70 |   head: GFLHead
 71 |   label_loss:
 72 |     beta: 2.0
 73 |     from_logits: true
 74 |     loss: QualityFocalLoss
 75 |     reduction: sum
 76 |     weight: 1.0
 77 |   max_level: 7
 78 |   min_level: 3
 79 |   normalization:
 80 |     groups: 32
 81 |     normalization: group_norm
 82 |   num_classes: 80
 83 |   prior: 0.01
 84 |   reg_max: 16
 85 |   repeats: 4
 86 |   sampler:
 87 |     sampler: PseudoSampler
 88 |   use_sigmoid: true
 89 | input_shape: !!python/tuple
 90 | - 1024
 91 | - 1024
 92 | - 3
 93 | neck:
 94 |   add_extra_convs: true
 95 |   feat_dims: 256
 96 |   max_level: 5
 97 |   min_level: 3
 98 |   neck: FPN
 99 |   num_output_levels: 5
100 |   relu_before_extra_convs: true
101 | num_classes: 80
102 | test:
103 |   iou_threshold: 0.6
104 |   nms: CombinedNonMaxSuppression
105 |   post_nms_size: 100
106 |   pre_nms_size: 5000
107 |   score_threshold: 0.3
108 | train:
109 |   checkpoint_dir: checkpoints/gfl
110 |   dataset:
111 |     augmentations:
112 |     - FlipLeftToRight:
113 |         probability: 0.5
114 |     - RandomDistortColor:
115 |         probability: 1.0
116 |     - Resize:
117 |         max_scale: 2.0
118 |         min_scale: 0.5
119 |         size: &id001 !!python/tuple
120 |         - 1024
121 |         - 1024
122 |         strides: 32
123 |     batch_size: 4
124 |     dataset: COCODataset
125 |     dataset_dir: /data/bail/COCO
126 |     num_samples: 118287
127 |     training: true
128 |   gradient_clip_norm: 10.0
129 |   log_every_n_steps: 100
130 |   mixed_precision:
131 |     loss_scale: null
132 |   optimizer:
133 |     momentum: 0.9
134 |     optimizer: SGD
135 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
136 |   save_ckpt_steps: 5000
137 |   scheduler:
138 |     learning_rate_scheduler:
139 |       boundaries:
140 |       - 16
141 |       - 22
142 |       scheduler: PiecewiseConstantDecay
143 |       values:
144 |       - 0.02
145 |       - 0.002
146 |       - 0.0002
147 |     train_epochs: 24
148 |     warmup:
149 |       steps: 800
150 |       warmup_learning_rate: 0.001
151 |   summary_dir: logs/gfl
152 | val:
153 |   dataset:
154 |     augmentations:
155 |     - Resize:
156 |         max_scale: 1.0
157 |         min_scale: 1.0
158 |         size: *id001
159 |         strides: 32
160 |     batch_size: 4
161 |     dataset: COCODataset
162 |     dataset_dir: /data/bail/COCO
163 |     training: false
164 |   samples: 5000
165 | weight_decay: 0.0001
166 | 


--------------------------------------------------------------------------------
/models/necks/path_aggregation_neck.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from core.layers import build_activation
 3 | from core.layers import build_convolution
 4 | from core.layers import build_normalization 
 5 | from models.builder import NECKS
 6 | 
 7 | 
 8 | @NECKS.register("PAN")
 9 | def path_aggregation_neck(inputs,
10 |                           convolution="conv2d",
11 |                           normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True),
12 |                           activation=dict(activation="relu"),
13 |                           feat_dims=64,
14 |                           min_level=3,
15 |                           max_level=7,
16 |                           add_extra_conv=False,
17 |                           dropblock=None,
18 |                           weight_decay=0.,
19 |                           use_multiplication=False,
20 |                           name="path_aggregation_neck"):
21 |     kernel_regularizer = (tf.keras.regularizers.l2(weight_decay) 
22 |                           if weight_decay is not None and weight_decay > 0 else None) 
23 |     num_outputs = max_level - min_level + 1
24 |     output_filters = [output_filters] * num_outputs \
25 |         if isinstance(output_filters, int) else output_filters
26 |     features = []
27 |     num_inputs = len(inputs)
28 |     for i, features in enumerate(inputs):
29 |         x = conv_block(convolution="conv2d",
30 |                        filters=feat_dims,
31 |                        kernel_size=(1, 1),
32 |                        strides=(1, 1),
33 |                        kernel_regularizer=kernel_regularizer,
34 |                        normalization=normalization,
35 |                        activation=activation,
36 |                        dropblock=dropblock,
37 |                        name="top_down_conv2d_%d" % (i+1))(features)
38 |         features.append(x)
39 | 
40 |     for i in range(num_inputs - 1, 0, -1):
41 |         top = tf.keras.layers.UpSampling2D((2, 2), interpolation="nearest")(features[i+1])
42 |         if use_multiplication:
43 |             features[i] = tf.keras.layers.Multiply()([features[i], top])
44 |         else:
45 |             features[i] = tf.keras.layers.Add()([features[i], top])
46 | 
47 |     for i in range(1, num_inputs):
48 |         x = conv_block(convolution="conv2d",
49 |                        filters=feat_dims,
50 |                        kernel_size=(3, 3),
51 |                        strides=(2, 2),
52 |                        kernel_regularizer=kernel_regularizer,
53 |                        normalization=normalization,
54 |                        activation=activation,
55 |                        dropblock=dropblock,
56 |                        name="bottom_up_conv2d_%d" % (i+1))(features[i-1])
57 |         if use_multiplication:
58 |             features[i] = tf.keras.layers.Multiply()([x, features[i]])
59 |         else:
60 |             features[i] = tf.keras.layers.Add()([x, features[i]])
61 | 
62 |     for i in range(num_inputs, num_outputs):
63 |         if add_extra_conv:
64 |             features.append((conv_block(convolution,
65 |                                         filters=output_filters[i],
66 |                                         kernel_size=(3, 3),
67 |                                         strides=(2, 2),
68 |                                         kernel_regularizer=kernel_regularizer,
69 |                                         normalization=normalization,
70 |                                         group=group,
71 |                                         activation=activation,
72 |                                         name="extra_conv2d_%d" % (i + 1))(features[-1])))
73 |         else:
74 |             features.append(tf.keras.layers.MaxPool2D(pool_size=(2, 2),
75 |                                                       strides=(2, 2))(features[-1]))
76 | 
77 |     return features
78 | 


--------------------------------------------------------------------------------
/yamls/gfl_r50_fpn_1x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet50
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFL
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   assigner:
 52 |     assigner: ATSSAssigner
 53 |     topk: 9
 54 |   bbox_decoder:
 55 |     decoder: Distance2Box
 56 |     weights: null
 57 |   bbox_encoder:
 58 |     encoder: Box2Distance
 59 |     weights: null
 60 |   bbox_loss:
 61 |     loss: GIoULoss
 62 |     reduction: sum
 63 |     weight: 2.0
 64 |   dfl_loss:
 65 |     loss: DistributionFocalLoss
 66 |     reduction: sum
 67 |     weight: 0.25
 68 |   dropblock: null
 69 |   feat_dims: 256
 70 |   head: GFLHead
 71 |   label_loss:
 72 |     beta: 2.0
 73 |     from_logits: true
 74 |     loss: QualityFocalLoss
 75 |     reduction: sum
 76 |     weight: 1.0
 77 |   max_level: 7
 78 |   min_level: 3
 79 |   normalization:
 80 |     groups: 32
 81 |     normalization: group_norm
 82 |   num_classes: 80
 83 |   prior: 0.01
 84 |   reg_max: 16
 85 |   repeats: 4
 86 |   sampler:
 87 |     sampler: PseudoSampler
 88 |   use_sigmoid: true
 89 | input_shape: !!python/tuple
 90 | - 1024
 91 | - 1024
 92 | - 3
 93 | neck:
 94 |   add_extra_convs: true
 95 |   feat_dims: 256
 96 |   max_level: 5
 97 |   min_level: 3
 98 |   neck: FPN
 99 |   num_output_levels: 5
100 |   relu_before_extra_convs: true
101 | num_classes: 80
102 | test:
103 |   iou_threshold: 0.6
104 |   nms: CombinedNonMaxSuppression
105 |   post_nms_size: 100
106 |   pre_nms_size: 5000
107 |   score_threshold: 0.3
108 | train:
109 |   checkpoint_dir: checkpoints/gfl
110 |   dataset:
111 |     augmentations:
112 |     - augmentation: FlipLeftToRight
113 |       probability: 0.5
114 |     - augmentation: RandomDistortColor
115 |     - augmentation: Resize
116 |       img_scale:
117 |       - &id001 !!python/tuple
118 |         - 1333
119 |         - 800
120 |       keep_ratio: true
121 |     - augmentation: Pad
122 |       size_divisor: 32
123 |     batch_size: 4
124 |     dataset: COCODataset
125 |     dataset_dir: /data/bail/COCO
126 |     num_samples: 118287
127 |     training: true
128 |   gradient_clip_norm: 10.0
129 |   log_every_n_steps: 100
130 |   mixed_precision:
131 |     loss_scale: null
132 |   optimizer:
133 |     momentum: 0.9
134 |     optimizer: SGD
135 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
136 |   save_ckpt_steps: 5000
137 |   scheduler:
138 |     learning_rate_scheduler:
139 |       boundaries:
140 |       - 16
141 |       - 22
142 |       scheduler: PiecewiseConstantDecay
143 |       values:
144 |       - 0.02
145 |       - 0.002
146 |       - 0.0002
147 |     train_epochs: 24
148 |     warmup:
149 |       steps: 800
150 |       warmup_learning_rate: 0.001
151 |   summary_dir: logs/gfl
152 | val:
153 |   dataset:
154 |     augmentations:
155 |     - augmentation: Resize
156 |       img_scale:
157 |       - *id001
158 |       keep_ratio: true
159 |     - augmentation: Pad
160 |       size_divisor: 32
161 |     batch_size: 4
162 |     dataset: COCODataset
163 |     dataset_dir: /data/bail/COCO
164 |     training: false
165 |   samples: 5000
166 | weight_decay: 0.0001
167 | 


--------------------------------------------------------------------------------
/yamls/gfl_r101_fpn_mstrain_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet101
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFL
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   assigner:
 52 |     assigner: ATSSAssigner
 53 |     topk: 9
 54 |   bbox_decoder:
 55 |     decoder: Distance2Box
 56 |     weights: null
 57 |   bbox_encoder:
 58 |     encoder: Box2Distance
 59 |     weights: null
 60 |   bbox_loss:
 61 |     loss: GIoULoss
 62 |     reduction: sum
 63 |     weight: 2.0
 64 |   dfl_loss:
 65 |     loss: DistributionFocalLoss
 66 |     reduction: sum
 67 |     weight: 0.25
 68 |   dropblock: null
 69 |   feat_dims: 256
 70 |   head: GFLHead
 71 |   label_loss:
 72 |     beta: 2.0
 73 |     from_logits: true
 74 |     loss: QualityFocalLoss
 75 |     reduction: sum
 76 |     weight: 1.0
 77 |   max_level: 7
 78 |   min_level: 3
 79 |   normalization:
 80 |     groups: 32
 81 |     normalization: group_norm
 82 |   num_classes: 80
 83 |   prior: 0.01
 84 |   reg_max: 16
 85 |   repeats: 4
 86 |   sampler:
 87 |     sampler: PseudoSampler
 88 |   use_sigmoid: true
 89 | input_shape: !!python/tuple
 90 | - 1024
 91 | - 1024
 92 | - 3
 93 | neck:
 94 |   add_extra_convs: true
 95 |   feat_dims: 256
 96 |   max_level: 5
 97 |   min_level: 3
 98 |   neck: FPN
 99 |   num_output_levels: 5
100 |   relu_before_extra_convs: true
101 | num_classes: 80
102 | test:
103 |   iou_threshold: 0.6
104 |   nms: CombinedNonMaxSuppression
105 |   post_nms_size: 100
106 |   pre_nms_size: 5000
107 |   score_threshold: 0.3
108 | train:
109 |   checkpoint_dir: checkpoints/gfl
110 |   dataset:
111 |     augmentations:
112 |     - augmentation: FlipLeftToRight
113 |       probability: 0.5
114 |     - augmentation: RandomDistortColor
115 |     - augmentation: Resize
116 |       img_scale:
117 |       - &id001 !!python/tuple
118 |         - 1333
119 |         - 800
120 |       keep_ratio: true
121 |     - augmentation: Pad
122 |       size_divisor: 32
123 |     batch_size: 4
124 |     dataset: COCODataset
125 |     dataset_dir: /data/bail/COCO
126 |     num_samples: 118287
127 |     training: true
128 |   gradient_clip_norm: 10.0
129 |   log_every_n_steps: 100
130 |   mixed_precision:
131 |     loss_scale: null
132 |   optimizer:
133 |     momentum: 0.9
134 |     optimizer: SGD
135 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
136 |   save_ckpt_steps: 5000
137 |   scheduler:
138 |     learning_rate_scheduler:
139 |       boundaries:
140 |       - 16
141 |       - 22
142 |       scheduler: PiecewiseConstantDecay
143 |       values:
144 |       - 0.02
145 |       - 0.002
146 |       - 0.0002
147 |     train_epochs: 24
148 |     warmup:
149 |       steps: 800
150 |       warmup_learning_rate: 0.001
151 |   summary_dir: logs/gfl
152 | val:
153 |   dataset:
154 |     augmentations:
155 |     - augmentation: Resize
156 |       img_scale:
157 |       - *id001
158 |       keep_ratio: true
159 |     - augmentation: Pad
160 |       size_divisor: 32
161 |     batch_size: 4
162 |     dataset: COCODataset
163 |     dataset_dir: /data/bail/COCO
164 |     training: false
165 |   samples: 5000
166 | weight_decay: 0.0001
167 | 


--------------------------------------------------------------------------------
/yamls/gfl_r50_fpn_mstrain_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet50
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFL
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   assigner:
 52 |     assigner: ATSSAssigner
 53 |     topk: 9
 54 |   bbox_decoder:
 55 |     decoder: Distance2Box
 56 |     weights: null
 57 |   bbox_encoder:
 58 |     encoder: Box2Distance
 59 |     weights: null
 60 |   bbox_loss:
 61 |     loss: GIoULoss
 62 |     reduction: sum
 63 |     weight: 2.0
 64 |   dfl_loss:
 65 |     loss: DistributionFocalLoss
 66 |     reduction: sum
 67 |     weight: 0.25
 68 |   dropblock: null
 69 |   feat_dims: 256
 70 |   head: GFLHead
 71 |   label_loss:
 72 |     beta: 2.0
 73 |     from_logits: true
 74 |     loss: QualityFocalLoss
 75 |     reduction: sum
 76 |     weight: 1.0
 77 |   max_level: 7
 78 |   min_level: 3
 79 |   normalization:
 80 |     groups: 32
 81 |     normalization: group_norm
 82 |   num_classes: 80
 83 |   prior: 0.01
 84 |   reg_max: 16
 85 |   repeats: 4
 86 |   sampler:
 87 |     sampler: PseudoSampler
 88 |   use_sigmoid: true
 89 | input_shape: !!python/tuple
 90 | - 1024
 91 | - 1024
 92 | - 3
 93 | neck:
 94 |   add_extra_convs: true
 95 |   feat_dims: 256
 96 |   max_level: 5
 97 |   min_level: 3
 98 |   neck: FPN
 99 |   num_output_levels: 5
100 |   relu_before_extra_convs: true
101 | num_classes: 80
102 | test:
103 |   iou_threshold: 0.6
104 |   nms: CombinedNonMaxSuppression
105 |   post_nms_size: 100
106 |   pre_nms_size: 5000
107 |   score_threshold: 0.3
108 | train:
109 |   checkpoint_dir: checkpoints/gfl
110 |   dataset:
111 |     augmentations:
112 |     - augmentation: FlipLeftToRight
113 |       probability: 0.5
114 |     - augmentation: RandomDistortColor
115 |     - augmentation: Resize
116 |       img_scale:
117 |       - &id001 !!python/tuple
118 |         - 1333
119 |         - 800
120 |       keep_ratio: true
121 |     - augmentation: Pad
122 |       size_divisor: 32
123 |     batch_size: 4
124 |     dataset: COCODataset
125 |     dataset_dir: /data/bail/COCO
126 |     num_samples: 118287
127 |     training: true
128 |   gradient_clip_norm: 10.0
129 |   log_every_n_steps: 100
130 |   mixed_precision:
131 |     loss_scale: null
132 |   optimizer:
133 |     momentum: 0.9
134 |     optimizer: SGD
135 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
136 |   save_ckpt_steps: 5000
137 |   scheduler:
138 |     learning_rate_scheduler:
139 |       boundaries:
140 |       - 16
141 |       - 22
142 |       scheduler: PiecewiseConstantDecay
143 |       values:
144 |       - 0.02
145 |       - 0.002
146 |       - 0.0002
147 |     train_epochs: 24
148 |     warmup:
149 |       steps: 800
150 |       warmup_learning_rate: 0.001
151 |   summary_dir: logs/gfl
152 | val:
153 |   dataset:
154 |     augmentations:
155 |     - augmentation: Resize
156 |       img_scale:
157 |       - *id001
158 |       keep_ratio: true
159 |     - augmentation: Pad
160 |       size_divisor: 32
161 |     batch_size: 4
162 |     dataset: COCODataset
163 |     dataset_dir: /data/bail/COCO
164 |     training: false
165 |   samples: 5000
166 | weight_decay: 0.0001
167 | 


--------------------------------------------------------------------------------
/yamls/gflv2_r50_fpn_1x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet50
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFLV2
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   add_mean: true
 52 |   assigner:
 53 |     assigner: ATSSAssigner
 54 |     topk: 9
 55 |   bbox_decoder:
 56 |     decoder: Distance2Box
 57 |     weights: null
 58 |   bbox_encoder:
 59 |     encoder: Box2Distance
 60 |     weights: null
 61 |   bbox_loss:
 62 |     loss: GIoULoss
 63 |     reduction: sum
 64 |     weight: 2.0
 65 |   dfl_loss:
 66 |     loss: DistributionFocalLoss
 67 |     reduction: sum
 68 |     weight: 0.25
 69 |   dropblock: null
 70 |   feat_dims: 256
 71 |   head: GFLV2Head
 72 |   label_loss:
 73 |     beta: 2.0
 74 |     from_logits: false
 75 |     loss: QualityFocalLoss
 76 |     reduction: sum
 77 |     weight: 1.0
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization:
 81 |     groups: 32
 82 |     normalization: group_norm
 83 |   num_classes: 80
 84 |   prior: 0.01
 85 |   quality_filters: 64
 86 |   reg_max: 16
 87 |   reg_topk: 4
 88 |   repeats: 4
 89 |   sampler:
 90 |     sampler: PseudoSampler
 91 |   use_sigmoid: true
 92 | input_shape: !!python/tuple
 93 | - 1024
 94 | - 1024
 95 | - 3
 96 | neck:
 97 |   add_extra_convs: true
 98 |   feat_dims: 256
 99 |   max_level: 5
100 |   min_level: 3
101 |   neck: FPN
102 |   num_output_levels: 5
103 |   relu_before_extra_convs: true
104 | num_classes: 80
105 | test:
106 |   iou_threshold: 0.6
107 |   nms: CombinedNonMaxSuppression
108 |   post_nms_size: 100
109 |   pre_nms_size: 1000
110 |   score_threshold: 0.3
111 | train:
112 |   checkpoint_dir: checkpoints/gfl
113 |   dataset:
114 |     augmentations:
115 |     - FlipLeftToRight:
116 |         probability: 0.5
117 |     - RandomDistortColor:
118 |         probability: 1.0
119 |     - Resize:
120 |         max_scale: 2.0
121 |         min_scale: 0.5
122 |         size: &id001 !!python/tuple
123 |         - 1024
124 |         - 1024
125 |         strides: 32
126 |     batch_size: 4
127 |     dataset: COCODataset
128 |     dataset_dir: /data/bail/COCO
129 |     num_samples: 118287
130 |     training: true
131 |   gradient_clip_norm: 10.0
132 |   log_every_n_steps: 100
133 |   mixed_precision:
134 |     loss_scale: null
135 |   optimizer:
136 |     momentum: 0.9
137 |     optimizer: SGD
138 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
139 |   save_ckpt_steps: 5000
140 |   scheduler:
141 |     learning_rate_scheduler:
142 |       boundaries:
143 |       - 16
144 |       - 22
145 |       scheduler: PiecewiseConstantDecay
146 |       values:
147 |       - 0.02
148 |       - 0.002
149 |       - 0.0002
150 |     train_epochs: 24
151 |     warmup:
152 |       steps: 800
153 |       warmup_learning_rate: 0.001
154 |   summary_dir: logs/gfl
155 | val:
156 |   dataset:
157 |     augmentations:
158 |     - Resize:
159 |         max_scale: 1.0
160 |         min_scale: 1.0
161 |         size: *id001
162 |         strides: 32
163 |     batch_size: 4
164 |     dataset: COCODataset
165 |     dataset_dir: /data/bail/COCO
166 |     training: false
167 |   samples: 5000
168 | weight_decay: 0.0001
169 | 


--------------------------------------------------------------------------------
/yamls/retinanet_r101_fpn_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 0.5
  4 |   - 1.0
  5 |   - 2.0
  6 |   generator: AnchorGeneratorV2
  7 |   num_anchors: 9
  8 |   octave_base_scale: 4
  9 |   scales_per_octave: 3
 10 |   strides:
 11 |   - 8
 12 |   - 16
 13 |   - 32
 14 |   - 64
 15 |   - 128
 16 | assigner:
 17 |   assigner: MaxIoUAssigner
 18 |   neg_iou_thresh: 0.4
 19 |   pos_iou_thresh: 0.5
 20 | backbone:
 21 |   activation:
 22 |     activation: relu
 23 |   backbone: ResNet101
 24 |   dilation_rates:
 25 |   - 1
 26 |   - 1
 27 |   - 1
 28 |   - 1
 29 |   - 1
 30 |   dropblock: null
 31 |   frozen_stages:
 32 |   - 1
 33 |   normalization:
 34 |     epsilon: 0.0001
 35 |     momentum: 0.997
 36 |     normalization: batch_norm
 37 |     trainable: false
 38 |   output_indices:
 39 |   - 3
 40 |   - 4
 41 |   - 5
 42 |   strides:
 43 |   - 2
 44 |   - 2
 45 |   - 2
 46 |   - 2
 47 |   - 2
 48 | bbox_decoder:
 49 |   decoder: Delta2Box
 50 |   weights:
 51 |   - 1.0
 52 |   - 1.0
 53 |   - 1.0
 54 |   - 1.0
 55 | bbox_encoder:
 56 |   encoder: Box2Delta
 57 |   weights:
 58 |   - 1.0
 59 |   - 1.0
 60 |   - 1.0
 61 |   - 1.0
 62 | bbox_loss:
 63 |   loss: SmoothL1Loss
 64 |   reduction: sum
 65 |   weight: 1.0
 66 | data_format: channels_last
 67 | detector: GFL
 68 | dtype: float16
 69 | excluding_weight_names:
 70 | - predicted_box
 71 | - predicted_class
 72 | head:
 73 |   activation:
 74 |     activation: relu
 75 |   dropblock: null
 76 |   feat_dims: 256
 77 |   head: RetinaNetHead
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization: null
 81 |   num_classes: 80
 82 |   prior: 0.01
 83 |   repeats: 4
 84 |   use_sigmoid: true
 85 | input_shape: !!python/tuple
 86 | - 1024
 87 | - 1024
 88 | - 3
 89 | label_loss:
 90 |   alpha: 0.25
 91 |   from_logits: true
 92 |   gamma: 2.0
 93 |   loss: FocalLoss
 94 |   reduction: sum
 95 |   weight: 1.0
 96 | neck:
 97 |   add_extra_convs: true
 98 |   add_extra_convs_on_c5: true
 99 |   feat_dims: 256
100 |   max_level: 5
101 |   min_level: 3
102 |   neck: FPN
103 |   num_output_levels: 5
104 |   relu_before_extra_convs: false
105 | num_classes: 80
106 | sampler:
107 |   sampler: PseudoSampler
108 | test:
109 |   iou_threshold: 0.6
110 |   nms: CombinedNonMaxSuppression
111 |   post_nms_size: 100
112 |   pre_nms_size: 5000
113 |   score_threshold: 0.35
114 | train:
115 |   checkpoint_dir: checkpoints/retinanet
116 |   dataset:
117 |     augmentations:
118 |     - FlipLeftToRight:
119 |         probability: 0.5
120 |     - RandomDistortColor:
121 |         probability: 1.0
122 |     - Resize:
123 |         max_scale: 2.0
124 |         min_scale: 0.5
125 |         size: &id001 !!python/tuple
126 |         - 1024
127 |         - 1024
128 |         strides: 32
129 |     batch_size: 4
130 |     dataset: COCODataset
131 |     dataset_dir: /data/bail/COCO
132 |     num_samples: 118287
133 |     training: true
134 |   gradient_clip_norm: 10.0
135 |   input_size: *id001
136 |   log_every_n_steps: 100
137 |   mixed_precision:
138 |     loss_scale: null
139 |   optimizer:
140 |     momentum: 0.9
141 |     optimizer: SGD
142 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
143 |   save_ckpt_steps: 5000
144 |   scheduler:
145 |     learning_rate_scheduler:
146 |       boundaries:
147 |       - 16
148 |       - 22
149 |       scheduler: PiecewiseConstantDecay
150 |       values:
151 |       - 0.02
152 |       - 0.002
153 |       - 0.0002
154 |     train_epochs: 24
155 |     warmup:
156 |       steps: 800
157 |       warmup_learning_rate: 0.001
158 |   summary_dir: logs/retinanet
159 | val:
160 |   dataset:
161 |     augmentations:
162 |     - Resize:
163 |         max_scale: 1.0
164 |         min_scale: 1.0
165 |         size: *id001
166 |         strides: 32
167 |     batch_size: 4
168 |     dataset: COCODataset
169 |     dataset_dir: /data/bail/COCO
170 |     training: false
171 |   input_size: *id001
172 |   samples: 5000
173 | weight_decay: 0.0001
174 | 


--------------------------------------------------------------------------------
/yamls/retinanet_r50_fpn_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 0.5
  4 |   - 1.0
  5 |   - 2.0
  6 |   generator: AnchorGeneratorV2
  7 |   num_anchors: 9
  8 |   octave_base_scale: 4
  9 |   scales_per_octave: 3
 10 |   strides:
 11 |   - 8
 12 |   - 16
 13 |   - 32
 14 |   - 64
 15 |   - 128
 16 | assigner:
 17 |   assigner: MaxIoUAssigner
 18 |   neg_iou_thresh: 0.4
 19 |   pos_iou_thresh: 0.5
 20 | backbone:
 21 |   activation:
 22 |     activation: relu
 23 |   backbone: ResNet50
 24 |   dilation_rates:
 25 |   - 1
 26 |   - 1
 27 |   - 1
 28 |   - 1
 29 |   - 1
 30 |   dropblock: null
 31 |   frozen_stages:
 32 |   - 1
 33 |   normalization:
 34 |     epsilon: 0.0001
 35 |     momentum: 0.997
 36 |     normalization: batch_norm
 37 |     trainable: false
 38 |   output_indices:
 39 |   - 3
 40 |   - 4
 41 |   - 5
 42 |   strides:
 43 |   - 2
 44 |   - 2
 45 |   - 2
 46 |   - 2
 47 |   - 2
 48 | bbox_decoder:
 49 |   decoder: Delta2Box
 50 |   weights:
 51 |   - 1.0
 52 |   - 1.0
 53 |   - 1.0
 54 |   - 1.0
 55 | bbox_encoder:
 56 |   encoder: Box2Delta
 57 |   weights:
 58 |   - 1.0
 59 |   - 1.0
 60 |   - 1.0
 61 |   - 1.0
 62 | bbox_loss:
 63 |   loss: SmoothL1Loss
 64 |   reduction: sum
 65 |   weight: 1.0
 66 | data_format: channels_last
 67 | detector: GFL
 68 | dtype: float16
 69 | excluding_weight_names:
 70 | - predicted_box
 71 | - predicted_class
 72 | head:
 73 |   activation:
 74 |     activation: relu
 75 |   dropblock: null
 76 |   feat_dims: 256
 77 |   head: RetinaNetHead
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization: null
 81 |   num_classes: 80
 82 |   prior: 0.01
 83 |   repeats: 4
 84 |   use_sigmoid: true
 85 | input_shape: !!python/tuple
 86 | - 1024
 87 | - 1024
 88 | - 3
 89 | label_loss:
 90 |   alpha: 0.25
 91 |   from_logits: true
 92 |   gamma: 2.0
 93 |   loss: FocalLoss
 94 |   reduction: sum
 95 |   weight: 1.0
 96 | neck:
 97 |   add_extra_convs: true
 98 |   add_extra_convs_on_c5: true
 99 |   feat_dims: 256
100 |   max_level: 5
101 |   min_level: 3
102 |   neck: FPN
103 |   num_output_levels: 5
104 |   relu_before_extra_convs: false
105 | num_classes: 80
106 | sampler:
107 |   sampler: PseudoSampler
108 | test:
109 |   iou_threshold: 0.6
110 |   nms: CombinedNonMaxSuppression
111 |   post_nms_size: 100
112 |   pre_nms_size: 5000
113 |   score_threshold: 0.35
114 | train:
115 |   checkpoint_dir: checkpoints/retinanet
116 |   dataset:
117 |     augmentations:
118 |     - FlipLeftToRight:
119 |         probability: 0.5
120 |     - RandomDistortColor:
121 |         probability: 1.0
122 |     - Resize:
123 |         max_scale: 2.0
124 |         min_scale: 0.5
125 |         size: &id001 !!python/tuple
126 |         - 1024
127 |         - 1024
128 |         strides: 32
129 |     batch_size: 4
130 |     dataset: COCODataset
131 |     dataset_dir: /data/bail/COCO
132 |     num_samples: 118287
133 |     training: true
134 |   gradient_clip_norm: 10.0
135 |   input_size: *id001
136 |   log_every_n_steps: 100
137 |   mixed_precision:
138 |     loss_scale: null
139 |   optimizer:
140 |     momentum: 0.9
141 |     optimizer: SGD
142 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
143 |   save_ckpt_steps: 5000
144 |   scheduler:
145 |     learning_rate_scheduler:
146 |       boundaries:
147 |       - 16
148 |       - 22
149 |       scheduler: PiecewiseConstantDecay
150 |       values:
151 |       - 0.02
152 |       - 0.002
153 |       - 0.0002
154 |     train_epochs: 24
155 |     warmup:
156 |       steps: 800
157 |       warmup_learning_rate: 0.001
158 |   summary_dir: logs/retinanet
159 | val:
160 |   dataset:
161 |     augmentations:
162 |     - Resize:
163 |         max_scale: 1.0
164 |         min_scale: 1.0
165 |         size: *id001
166 |         strides: 32
167 |     batch_size: 4
168 |     dataset: COCODataset
169 |     dataset_dir: /data/bail/COCO
170 |     training: false
171 |   input_size: *id001
172 |   samples: 5000
173 | weight_decay: 0.0001
174 | 


--------------------------------------------------------------------------------
/yamls/retinanet_x101_32x4d_fpn_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 0.5
  4 |   - 1.0
  5 |   - 2.0
  6 |   generator: AnchorGeneratorV2
  7 |   num_anchors: 9
  8 |   octave_base_scale: 4
  9 |   scales_per_octave: 3
 10 |   strides:
 11 |   - 8
 12 |   - 16
 13 |   - 32
 14 |   - 64
 15 |   - 128
 16 | assigner:
 17 |   assigner: MaxIoUAssigner
 18 |   neg_iou_thresh: 0.4
 19 |   pos_iou_thresh: 0.5
 20 | backbone:
 21 |   activation:
 22 |     activation: relu
 23 |   backbone: ResNeXt101_32X4D
 24 |   dilation_rates:
 25 |   - 1
 26 |   - 1
 27 |   - 1
 28 |   - 1
 29 |   - 1
 30 |   dropblock: null
 31 |   frozen_stages:
 32 |   - 1
 33 |   normalization:
 34 |     epsilon: 0.0001
 35 |     momentum: 0.997
 36 |     normalization: batch_norm
 37 |     trainable: false
 38 |   output_indices:
 39 |   - 3
 40 |   - 4
 41 |   - 5
 42 |   strides:
 43 |   - 2
 44 |   - 2
 45 |   - 2
 46 |   - 2
 47 |   - 2
 48 | bbox_decoder:
 49 |   decoder: Delta2Box
 50 |   weights:
 51 |   - 1.0
 52 |   - 1.0
 53 |   - 1.0
 54 |   - 1.0
 55 | bbox_encoder:
 56 |   encoder: Box2Delta
 57 |   weights:
 58 |   - 1.0
 59 |   - 1.0
 60 |   - 1.0
 61 |   - 1.0
 62 | bbox_loss:
 63 |   loss: SmoothL1Loss
 64 |   reduction: sum
 65 |   weight: 1.0
 66 | data_format: channels_last
 67 | detector: GFL
 68 | dtype: float16
 69 | excluding_weight_names:
 70 | - predicted_box
 71 | - predicted_class
 72 | head:
 73 |   activation:
 74 |     activation: relu
 75 |   dropblock: null
 76 |   feat_dims: 256
 77 |   head: RetinaNetHead
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization: null
 81 |   num_classes: 80
 82 |   prior: 0.01
 83 |   repeats: 4
 84 |   use_sigmoid: true
 85 | input_shape: !!python/tuple
 86 | - 1024
 87 | - 1024
 88 | - 3
 89 | label_loss:
 90 |   alpha: 0.25
 91 |   from_logits: true
 92 |   gamma: 2.0
 93 |   loss: FocalLoss
 94 |   reduction: sum
 95 |   weight: 1.0
 96 | neck:
 97 |   add_extra_convs: true
 98 |   add_extra_convs_on_c5: true
 99 |   feat_dims: 256
100 |   max_level: 5
101 |   min_level: 3
102 |   neck: FPN
103 |   num_output_levels: 5
104 |   relu_before_extra_convs: false
105 | num_classes: 80
106 | sampler:
107 |   sampler: PseudoSampler
108 | test:
109 |   iou_threshold: 0.6
110 |   nms: CombinedNonMaxSuppression
111 |   post_nms_size: 100
112 |   pre_nms_size: 5000
113 |   score_threshold: 0.35
114 | train:
115 |   checkpoint_dir: checkpoints/retinanet
116 |   dataset:
117 |     augmentations:
118 |     - FlipLeftToRight:
119 |         probability: 0.5
120 |     - RandomDistortColor:
121 |         probability: 1.0
122 |     - Resize:
123 |         max_scale: 2.0
124 |         min_scale: 0.5
125 |         size: &id001 !!python/tuple
126 |         - 1024
127 |         - 1024
128 |         strides: 32
129 |     batch_size: 4
130 |     dataset: COCODataset
131 |     dataset_dir: /data/bail/COCO
132 |     num_samples: 118287
133 |     training: true
134 |   gradient_clip_norm: 10.0
135 |   input_size: *id001
136 |   log_every_n_steps: 100
137 |   mixed_precision:
138 |     loss_scale: null
139 |   optimizer:
140 |     momentum: 0.9
141 |     optimizer: SGD
142 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
143 |   save_ckpt_steps: 5000
144 |   scheduler:
145 |     learning_rate_scheduler:
146 |       boundaries:
147 |       - 16
148 |       - 22
149 |       scheduler: PiecewiseConstantDecay
150 |       values:
151 |       - 0.02
152 |       - 0.002
153 |       - 0.0002
154 |     train_epochs: 24
155 |     warmup:
156 |       steps: 800
157 |       warmup_learning_rate: 0.001
158 |   summary_dir: logs/retinanet
159 | val:
160 |   dataset:
161 |     augmentations:
162 |     - Resize:
163 |         max_scale: 1.0
164 |         min_scale: 1.0
165 |         size: *id001
166 |         strides: 32
167 |     batch_size: 4
168 |     dataset: COCODataset
169 |     dataset_dir: /data/bail/COCO
170 |     training: false
171 |   input_size: *id001
172 |   samples: 5000
173 | weight_decay: 0.0001
174 | 


--------------------------------------------------------------------------------
/yamls/retinanet_x101_64x4d_fpn_2x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 0.5
  4 |   - 1.0
  5 |   - 2.0
  6 |   generator: AnchorGeneratorV2
  7 |   num_anchors: 9
  8 |   octave_base_scale: 4
  9 |   scales_per_octave: 3
 10 |   strides:
 11 |   - 8
 12 |   - 16
 13 |   - 32
 14 |   - 64
 15 |   - 128
 16 | assigner:
 17 |   assigner: MaxIoUAssigner
 18 |   neg_iou_thresh: 0.4
 19 |   pos_iou_thresh: 0.5
 20 | backbone:
 21 |   activation:
 22 |     activation: relu
 23 |   backbone: ResNeXt101_64X4D
 24 |   dilation_rates:
 25 |   - 1
 26 |   - 1
 27 |   - 1
 28 |   - 1
 29 |   - 1
 30 |   dropblock: null
 31 |   frozen_stages:
 32 |   - 1
 33 |   normalization:
 34 |     epsilon: 0.0001
 35 |     momentum: 0.997
 36 |     normalization: batch_norm
 37 |     trainable: false
 38 |   output_indices:
 39 |   - 3
 40 |   - 4
 41 |   - 5
 42 |   strides:
 43 |   - 2
 44 |   - 2
 45 |   - 2
 46 |   - 2
 47 |   - 2
 48 | bbox_decoder:
 49 |   decoder: Delta2Box
 50 |   weights:
 51 |   - 1.0
 52 |   - 1.0
 53 |   - 1.0
 54 |   - 1.0
 55 | bbox_encoder:
 56 |   encoder: Box2Delta
 57 |   weights:
 58 |   - 1.0
 59 |   - 1.0
 60 |   - 1.0
 61 |   - 1.0
 62 | bbox_loss:
 63 |   loss: SmoothL1Loss
 64 |   reduction: sum
 65 |   weight: 1.0
 66 | data_format: channels_last
 67 | detector: GFL
 68 | dtype: float16
 69 | excluding_weight_names:
 70 | - predicted_box
 71 | - predicted_class
 72 | head:
 73 |   activation:
 74 |     activation: relu
 75 |   dropblock: null
 76 |   feat_dims: 256
 77 |   head: RetinaNetHead
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization: null
 81 |   num_classes: 80
 82 |   prior: 0.01
 83 |   repeats: 4
 84 |   use_sigmoid: true
 85 | input_shape: !!python/tuple
 86 | - 1024
 87 | - 1024
 88 | - 3
 89 | label_loss:
 90 |   alpha: 0.25
 91 |   from_logits: true
 92 |   gamma: 2.0
 93 |   loss: FocalLoss
 94 |   reduction: sum
 95 |   weight: 1.0
 96 | neck:
 97 |   add_extra_convs: true
 98 |   add_extra_convs_on_c5: true
 99 |   feat_dims: 256
100 |   max_level: 5
101 |   min_level: 3
102 |   neck: FPN
103 |   num_output_levels: 5
104 |   relu_before_extra_convs: false
105 | num_classes: 80
106 | sampler:
107 |   sampler: PseudoSampler
108 | test:
109 |   iou_threshold: 0.6
110 |   nms: CombinedNonMaxSuppression
111 |   post_nms_size: 100
112 |   pre_nms_size: 5000
113 |   score_threshold: 0.35
114 | train:
115 |   checkpoint_dir: checkpoints/retinanet
116 |   dataset:
117 |     augmentations:
118 |     - FlipLeftToRight:
119 |         probability: 0.5
120 |     - RandomDistortColor:
121 |         probability: 1.0
122 |     - Resize:
123 |         max_scale: 2.0
124 |         min_scale: 0.5
125 |         size: &id001 !!python/tuple
126 |         - 1024
127 |         - 1024
128 |         strides: 32
129 |     batch_size: 4
130 |     dataset: COCODataset
131 |     dataset_dir: /data/bail/COCO
132 |     num_samples: 118287
133 |     training: true
134 |   gradient_clip_norm: 10.0
135 |   input_size: *id001
136 |   log_every_n_steps: 100
137 |   mixed_precision:
138 |     loss_scale: null
139 |   optimizer:
140 |     momentum: 0.9
141 |     optimizer: SGD
142 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
143 |   save_ckpt_steps: 5000
144 |   scheduler:
145 |     learning_rate_scheduler:
146 |       boundaries:
147 |       - 16
148 |       - 22
149 |       scheduler: PiecewiseConstantDecay
150 |       values:
151 |       - 0.02
152 |       - 0.002
153 |       - 0.0002
154 |     train_epochs: 24
155 |     warmup:
156 |       steps: 800
157 |       warmup_learning_rate: 0.001
158 |   summary_dir: logs/retinanet
159 | val:
160 |   dataset:
161 |     augmentations:
162 |     - Resize:
163 |         max_scale: 1.0
164 |         min_scale: 1.0
165 |         size: *id001
166 |         strides: 32
167 |     batch_size: 4
168 |     dataset: COCODataset
169 |     dataset_dir: /data/bail/COCO
170 |     training: false
171 |   input_size: *id001
172 |   samples: 5000
173 | weight_decay: 0.0001
174 | 


--------------------------------------------------------------------------------
/yamls/gflv2_r101_fpn_ms2x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet101
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFLV2
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   add_mean: true
 52 |   assigner:
 53 |     assigner: ATSSAssigner
 54 |     topk: 9
 55 |   bbox_decoder:
 56 |     decoder: Distance2Box
 57 |     weights: null
 58 |   bbox_encoder:
 59 |     encoder: Box2Distance
 60 |     weights: null
 61 |   bbox_loss:
 62 |     loss: GIoULoss
 63 |     reduction: sum
 64 |     weight: 2.0
 65 |   dfl_loss:
 66 |     loss: DistributionFocalLoss
 67 |     reduction: sum
 68 |     weight: 0.25
 69 |   dropblock: null
 70 |   feat_dims: 256
 71 |   head: GFLV2Head
 72 |   label_loss:
 73 |     beta: 2.0
 74 |     from_logits: false
 75 |     loss: QualityFocalLoss
 76 |     reduction: sum
 77 |     weight: 1.0
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization:
 81 |     groups: 32
 82 |     normalization: group_norm
 83 |   num_classes: 80
 84 |   prior: 0.01
 85 |   quality_filters: 64
 86 |   reg_max: 16
 87 |   reg_topk: 4
 88 |   repeats: 4
 89 |   sampler:
 90 |     sampler: PseudoSampler
 91 |   use_sigmoid: true
 92 | input_shape: !!python/tuple
 93 | - 1024
 94 | - 1024
 95 | - 3
 96 | neck:
 97 |   add_extra_convs: true
 98 |   feat_dims: 256
 99 |   max_level: 5
100 |   min_level: 3
101 |   neck: FPN
102 |   num_output_levels: 5
103 |   relu_before_extra_convs: true
104 | num_classes: 80
105 | test:
106 |   iou_threshold: 0.6
107 |   nms: CombinedNonMaxSuppression
108 |   post_nms_size: 100
109 |   pre_nms_size: 1000
110 |   score_threshold: 0.35
111 | train:
112 |   checkpoint_dir: checkpoints/gfl
113 |   dataset:
114 |     augmentations:
115 |     - augmentation: FlipLeftToRight
116 |       probability: 0.5
117 |     - augmentation: RandomDistortColor
118 |     - augmentation: Resize
119 |       img_scale:
120 |       - &id001 !!python/tuple
121 |         - 1333
122 |         - 800
123 |       keep_ratio: true
124 |     - augmentation: Pad
125 |       size_divisor: 32
126 |     batch_size: 4
127 |     dataset: COCODataset
128 |     dataset_dir: /data/bail/COCO
129 |     num_samples: 118287
130 |     training: true
131 |   gradient_clip_norm: 10.0
132 |   log_every_n_steps: 100
133 |   mixed_precision:
134 |     loss_scale: null
135 |   optimizer:
136 |     momentum: 0.9
137 |     optimizer: SGD
138 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
139 |   save_ckpt_steps: 5000
140 |   scheduler:
141 |     learning_rate_scheduler:
142 |       boundaries:
143 |       - 16
144 |       - 22
145 |       scheduler: PiecewiseConstantDecay
146 |       values:
147 |       - 0.02
148 |       - 0.002
149 |       - 0.0002
150 |     train_epochs: 24
151 |     warmup:
152 |       steps: 800
153 |       warmup_learning_rate: 0.001
154 |   summary_dir: logs/gfl
155 | val:
156 |   dataset:
157 |     augmentations:
158 |     - augmentation: Resize
159 |       img_scale:
160 |       - *id001
161 |       keep_ratio: true
162 |     - augmentation: Pad
163 |       size_divisor: 32
164 |     batch_size: 4
165 |     dataset: COCODataset
166 |     dataset_dir: /data/bail/COCO
167 |     training: false
168 |   samples: 5000
169 | weight_decay: 0.0001
170 | 


--------------------------------------------------------------------------------
/yamls/gflv2_r50_fpn_ms2x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet50
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: GFLV2
 44 | dtype: float16
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | head:
 49 |   activation:
 50 |     activation: relu
 51 |   add_mean: true
 52 |   assigner:
 53 |     assigner: ATSSAssigner
 54 |     topk: 9
 55 |   bbox_decoder:
 56 |     decoder: Distance2Box
 57 |     weights: null
 58 |   bbox_encoder:
 59 |     encoder: Box2Distance
 60 |     weights: null
 61 |   bbox_loss:
 62 |     loss: GIoULoss
 63 |     reduction: sum
 64 |     weight: 2.0
 65 |   dfl_loss:
 66 |     loss: DistributionFocalLoss
 67 |     reduction: sum
 68 |     weight: 0.25
 69 |   dropblock: null
 70 |   feat_dims: 256
 71 |   head: GFLV2Head
 72 |   label_loss:
 73 |     beta: 2.0
 74 |     from_logits: false
 75 |     loss: QualityFocalLoss
 76 |     reduction: sum
 77 |     weight: 1.0
 78 |   max_level: 7
 79 |   min_level: 3
 80 |   normalization:
 81 |     groups: 32
 82 |     normalization: group_norm
 83 |   num_classes: 80
 84 |   prior: 0.01
 85 |   quality_filters: 64
 86 |   reg_max: 16
 87 |   reg_topk: 4
 88 |   repeats: 4
 89 |   sampler:
 90 |     sampler: PseudoSampler
 91 |   use_sigmoid: true
 92 | input_shape: !!python/tuple
 93 | - 1024
 94 | - 1024
 95 | - 3
 96 | neck:
 97 |   add_extra_convs: true
 98 |   feat_dims: 256
 99 |   max_level: 5
100 |   min_level: 3
101 |   neck: FPN
102 |   num_output_levels: 5
103 |   relu_before_extra_convs: true
104 | num_classes: 80
105 | test:
106 |   iou_threshold: 0.6
107 |   nms: CombinedNonMaxSuppression
108 |   post_nms_size: 100
109 |   pre_nms_size: 1000
110 |   score_threshold: 0.35
111 | train:
112 |   checkpoint_dir: checkpoints/gfl
113 |   dataset:
114 |     augmentations:
115 |     - augmentation: FlipLeftToRight
116 |       probability: 0.5
117 |     - augmentation: RandomDistortColor
118 |     - augmentation: Resize
119 |       img_scale:
120 |       - &id001 !!python/tuple
121 |         - 1333
122 |         - 800
123 |       keep_ratio: true
124 |     - augmentation: Pad
125 |       size_divisor: 32
126 |     batch_size: 4
127 |     dataset: COCODataset
128 |     dataset_dir: /data/bail/COCO
129 |     num_samples: 118287
130 |     training: true
131 |   gradient_clip_norm: 10.0
132 |   log_every_n_steps: 100
133 |   mixed_precision:
134 |     loss_scale: null
135 |   optimizer:
136 |     momentum: 0.9
137 |     optimizer: SGD
138 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
139 |   save_ckpt_steps: 5000
140 |   scheduler:
141 |     learning_rate_scheduler:
142 |       boundaries:
143 |       - 16
144 |       - 22
145 |       scheduler: PiecewiseConstantDecay
146 |       values:
147 |       - 0.02
148 |       - 0.002
149 |       - 0.0002
150 |     train_epochs: 24
151 |     warmup:
152 |       steps: 800
153 |       warmup_learning_rate: 0.001
154 |   summary_dir: logs/gfl
155 | val:
156 |   dataset:
157 |     augmentations:
158 |     - augmentation: Resize
159 |       img_scale:
160 |       - *id001
161 |       keep_ratio: true
162 |     - augmentation: Pad
163 |       size_divisor: 32
164 |     batch_size: 4
165 |     dataset: COCODataset
166 |     dataset_dir: /data/bail/COCO
167 |     training: false
168 |   samples: 5000
169 | weight_decay: 0.0001
170 | 


--------------------------------------------------------------------------------
/models/heads/head.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from utils import box_utils
 3 | from core import build_loss
 4 | from core import build_sampler
 5 | from core import build_assigner
 6 | from core.builder import build_nms
 7 | from core.bbox import build_decoder
 8 | from core.bbox import build_encoder
 9 | from ..common import ConvNormActBlock 
10 | 
11 | 
12 | class BaseHead(tf.keras.Model):
13 |     def __init__(self, cfg, test_cfg, anchor_cfg=None, num_classes=80, is_training=True, data_format="channels_last", **kwargs):
14 |         super(BaseHead, self).__init__(**kwargs)
15 |       
16 |         self.num_classes = num_classes
17 |         self.cfg = cfg
18 |         self.anchor_cfg = anchor_cfg
19 |         self.test_cfg = test_cfg
20 |         self.is_training = is_training
21 |         self.data_format = data_format
22 | 
23 |         if test_cfg and test_cfg.get("nms") is not None:
24 |             self.nms = build_nms(**test_cfg.as_dict()) 
25 |        
26 |         self.use_sigmoid = True
27 |         if cfg.get("use_sigmoid") is not None:
28 |             self.use_sigmoid = cfg.use_sigmoid 
29 |         self._label_dims = num_classes if self.use_sigmoid else num_classes + 1
30 | 
31 |         self.bbox_loss_func = build_loss(**cfg.bbox_loss.as_dict()) if cfg.get("bbox_loss") is not None else None
32 |         self._use_iou_loss = False
33 |         if self.bbox_loss_func is not None:
34 |             self._use_iou_loss = "IoU" in cfg.bbox_loss.loss
35 |         self.label_loss_func = build_loss(**cfg.label_loss.as_dict()) if cfg.get("label_loss") is not None else None
36 | 
37 |         self.sampler = build_sampler(**cfg.sampler.as_dict()) if cfg.get("sampler") is not None else None
38 |         self.assigner = build_assigner(**cfg.assigner.as_dict()) if cfg.get("assigner") is not None else None
39 | 
40 |         self.bbox_decoder = build_decoder(**cfg.bbox_decoder.as_dict()) if cfg.get("bbox_decoder") is not None else None
41 |         self.bbox_encoder = build_encoder(**cfg.bbox_encoder.as_dict()) if cfg.get("bbox_encoder") is not None else None
42 |           
43 |     @property
44 |     def min_level(self):
45 |         if self.cfg.get("min_level"):
46 |             return self.cfg.min_level
47 |         
48 |         return None
49 |     
50 |     @property
51 |     def max_level(self):
52 |         if self.cfg.get("max_level"):
53 |             return self.cfg.max_level
54 |         return None
55 |          
56 |     def _make_shared_convs(self):
57 |         self.box_shared_convs = tf.keras.Sequential(name="box_net")
58 |         self.class_shared_convs = tf.keras.Sequential(name="cls_net")
59 | 
60 |         for i in range(self.cfg.repeats):
61 |             self.box_shared_convs.add(
62 |                 ConvNormActBlock(filters=self.cfg.feat_dims,
63 |                                  kernel_size=(3, 3),
64 |                                  padding="same",
65 |                                  strides=(1, 1),
66 |                                  normalization=self.cfg.normalization.as_dict() if self.cfg.normalization else None,
67 |                                  activation=self.cfg.activation.as_dict(),
68 |                                  name="%d" % i))
69 |             self.class_shared_convs.add(
70 |                 ConvNormActBlock(filters=self.cfg.feat_dims,
71 |                                  kernel_size=(3, 3),
72 |                                  strides=(1, 1),
73 |                                  padding="same",
74 |                                  normalization=self.cfg.normalization.as_dict() if self.cfg.normalization else None,
75 |                                  activation=self.cfg.activation.as_dict(),
76 |                                  name="%d" % i))
77 |                    
78 |     def get_targets(self, gt_boxes, gt_labels, total_anchors):
79 |         raise NotImplementedError()
80 | 
81 |     def compute_losses(self, predictions, image_info):
82 |         raise NotImplementedError()
83 | 
84 |     def get_boxes(self, outputs):
85 |         raise NotImplementedError()
86 | 
87 |     


--------------------------------------------------------------------------------
/export_saved_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import tensorflow as tf 
 3 | from models import build_detector
 4 | from configs import build_configs
 5 | from core import build_optimizer
 6 | 
 7 | 
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument("--detector", required=True, type=str)
10 | parser.add_argument("--config", type=str, default=None, help="The yaml file, default None.")
11 | parser.add_argument("--saved_model_dir", required=True, default=None, type=str)
12 | parser.add_argument("--ckpt", type=str, default=None, help="The checkpoint dir or h5 file.")
13 | 
14 | parser.add_argument("--nms", type=str, default="CombinedNonMaxSuppression", help="The NMS type.")
15 | parser.add_argument("--iou_threshold", type=float, default=0.5, help="The iou threshold for NMS.")
16 | parser.add_argument("--score_threshold", type=float, default=0.3, help="The score threshold for NMS.")
17 | parser.add_argument("--update_threshold", type=float, default=0.1, help="The update threshold for MatrixNMS.")
18 | parser.add_argument("--pre_nms_size", type=int, default=4000, help="The number of detections before NMS.")
19 | parser.add_argument("--post_nms_size", type=int, default=100, help="The number of detections after NMS.")
20 | parser.add_argument("--nms_kernel", default="gaussian", type=str, help="The kernel type of MatrixNMS.")
21 | parser.add_argument("--nms_sigma", default=2.0, type=float, help="The sigma for MatrixNMS or SoftNMS.")
22 | parser.add_argument("--nms_type", type=str, default=None, 
23 |                     help="If [--nms] is NonMaxSuppressionWithQuality, the [--nms_type] is necessary.")
24 | 
25 | args = parser.parse_args()
26 | 
27 | 
28 | cfg = build_configs(args.detector)
29 | 
30 | if args.config is None:
31 |     cfg.test.nms = args.nms
32 |     cfg.test.iou_threshold = args.iou_threshold
33 |     cfg.test.score_threshold = args.score_threshold
34 |     cfg.test.pre_nms_size = args.pre_nms_size
35 |     cfg.test.post_nms_size = args.post_nms_size
36 | 
37 |     if args.nms == "MatrixNonMaxSuppression":
38 |         cfg.test.update_threshold = args.update_threshold
39 |         cfg.test.kernel = args.nms_kernel
40 | 
41 |     if args.nms == "NonMaxSuppressionWithQuality":
42 |         assert args.nms_type is not None, "When [--nms] is `NonMaxSuppressionWithQuality`, [--nms_type] is necessary."
43 | 
44 |     if args.nms in ["MatrixNonMaxSuppression", "SoftNonMaxSuppression"]:
45 |         cfg.test.sigma = args.nms_sigma
46 | 
47 |     if args.nms == "NonMaxSuppressionWithQuality":
48 |         cfg.test.nms_type = args.nms_type
49 |         if args.nms_type in ["soft_nms", "matrix_nms"]:
50 |             cfg.test.sigma = args.nms_sigma
51 | else:
52 |     cfg.override(args.config)
53 | 
54 | detector = build_detector(cfg.detector, return_loss=False, cfg=cfg)
55 | images = tf.random.uniform([1, cfg.train.input_size[0], cfg.train.input_size[1], 3])
56 | images = tf.cast(images, tf.uint8)
57 | detector(images)
58 | 
59 | if args.ckpt is not None and ".h5" in args.ckpt:
60 |     detector.load_weights(args.ckpt)
61 | else:
62 |     optimizer = build_optimizer(**cfg.train.optimizer.as_dict())
63 | 
64 |     checkpoint = tf.train.Checkpoint(optimizer=optimizer, detector=detector)
65 |     manager = tf.train.CheckpointManager(
66 |         checkpoint=checkpoint, directory=cfg.train.checkpoint_dir, max_to_keep=10)
67 |     latest_checkpoint = manager.latest_checkpoint
68 |     checkpoint.restore(latest_checkpoint)
69 | 
70 | 
71 | saved_model_dir = args.saved_model_dir or "./saved_model/" + args.detector
72 | 
73 | tf.saved_model.save(detector, saved_model_dir)
74 | print("saved model to %s" % saved_model_dir)
75 | 
76 | # images = tf.random.uniform([1, cfg.train.input_size[0], cfg.train.input_size[1], 3])
77 | # image_info = {"valid_size": tf.constant([[cfg.train.input_size[0], cfg.train.input_size[1]]]), 
78 | #               "input_size": tf.constant([[cfg.train.input_size[0], cfg.train.input_size[1]]]), 
79 | #               "scale_factor": 1.}
80 | # print(detector((images, image_info), training=False))
81 | 


--------------------------------------------------------------------------------
/yamls/atss_r50_fpn_1x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet50
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - -1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: ATSS
 44 | dtype: float32
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | - predicted_centerness
 49 | head:
 50 |   activation:
 51 |     activation: relu
 52 |   assigner:
 53 |     assigner: ATSSAssigner
 54 |     topk: 9
 55 |   bbox_decoder:
 56 |     decoder: Delta2Box
 57 |     weights:
 58 |     - 10.0
 59 |     - 10.0
 60 |     - 5.0
 61 |     - 5.0
 62 |   bbox_encoder:
 63 |     encoder: Box2Delta
 64 |     weights:
 65 |     - 10.0
 66 |     - 10.0
 67 |     - 5.0
 68 |     - 5.0
 69 |   bbox_loss:
 70 |     loss: GIoULoss
 71 |     reduction: sum
 72 |     weight: 2.0
 73 |   centerness_loss:
 74 |     from_logits: true
 75 |     loss: BinaryCrossEntropy
 76 |     reduction: sum
 77 |     weight: 1.0
 78 |   convolution: conv2d
 79 |   dropblock: null
 80 |   feat_dims: 256
 81 |   head: ATSSHead
 82 |   label_loss:
 83 |     alpha: 0.25
 84 |     from_logits: true
 85 |     gamma: 2.0
 86 |     label_smoothing: 0.0
 87 |     loss: FocalLoss
 88 |     reduction: sum
 89 |     weight: 1.0
 90 |   max_level: 7
 91 |   min_level: 3
 92 |   normalization:
 93 |     groups: 32
 94 |     normalization: group_norm
 95 |   num_classes: 80
 96 |   prior: 0.01
 97 |   repeats: 4
 98 |   sampler:
 99 |     sampler: PseudoSampler
100 |   use_sigmoid: true
101 | input_shape: !!python/tuple
102 | - 1024
103 | - 1024
104 | - 3
105 | neck:
106 |   add_extra_convs: true
107 |   feat_dims: 256
108 |   max_level: 5
109 |   min_level: 3
110 |   neck: FPN
111 |   num_output_levels: 5
112 |   relu_before_extra_convs: true
113 | num_classes: 80
114 | test:
115 |   iou_threshold: 0.6
116 |   nms: CombinedNonMaxSuppression
117 |   nms_type: nms
118 |   post_nms_size: 100
119 |   pre_nms_size: 1000
120 |   score_threshold: 0.35
121 | train:
122 |   checkpoint_dir: checkpoints/atss
123 |   dataset:
124 |     augmentations:
125 |     - augmentation: FlipLeftToRight
126 |       probability: 0.5
127 |     - augmentation: RandomDistortColor
128 |     - augmentation: Resize
129 |       img_scale: !!python/tuple
130 |       - 0.2
131 |       - 2
132 |       keep_ratio: true
133 |       multiscale_mode: range
134 |     - augmentation: Pad
135 |       size_divisor: 32
136 |     batch_size: 4
137 |     dataset: COCODataset
138 |     dataset_dir: /data/bail/COCO
139 |     num_classes: 80
140 |     num_samples: 118287
141 |     training: true
142 |   gradient_clip_norm: 0.0
143 |   log_every_n_steps: 100
144 |   mixed_precision:
145 |     loss_scale: null
146 |   optimizer:
147 |     momentum: 0.9
148 |     optimizer: SGD
149 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
150 |   save_ckpt_steps: 5000
151 |   scheduler:
152 |     learning_rate_scheduler:
153 |       initial_learning_rate: 0.02
154 |       scheduler: CosineDecay
155 |     train_epochs: 36
156 |     warmup:
157 |       steps: 800
158 |       warmup_learning_rate: 0.001
159 |   summary_dir: logs/atss
160 | val:
161 |   dataset:
162 |     augmentations:
163 |     - augmentation: Resize
164 |       img_scale:
165 |       - !!python/tuple
166 |         - 1333
167 |         - !!python/tuple
168 |           - 1024
169 |           - 1024
170 |       keep_ratio: true
171 |     batch_size: 4
172 |     dataset: COCODataset
173 |     dataset_dir: /data/bail/COCO
174 |     training: false
175 |   samples: 5000
176 | weight_decay: 0.0001
177 | 


--------------------------------------------------------------------------------
/yamls/atss_r101_fpn_1x_coco.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGeneratorV2
  5 |   num_anchors: 1
  6 |   octave_base_scale: 8
  7 |   scales_per_octave: 1
  8 |   strides:
  9 |   - 8
 10 |   - 16
 11 |   - 32
 12 |   - 64
 13 |   - 128
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: ResNet101
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 1
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - -1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 3
 34 |   - 4
 35 |   - 5
 36 |   strides:
 37 |   - 2
 38 |   - 2
 39 |   - 2
 40 |   - 2
 41 |   - 2
 42 | data_format: channels_last
 43 | detector: ATSS
 44 | dtype: float32
 45 | excluding_weight_names:
 46 | - predicted_box
 47 | - predicted_class
 48 | - predicted_centerness
 49 | head:
 50 |   activation:
 51 |     activation: relu
 52 |   assigner:
 53 |     assigner: ATSSAssigner
 54 |     topk: 9
 55 |   bbox_decoder:
 56 |     decoder: Delta2Box
 57 |     weights:
 58 |     - 10.0
 59 |     - 10.0
 60 |     - 5.0
 61 |     - 5.0
 62 |   bbox_encoder:
 63 |     encoder: Box2Delta
 64 |     weights:
 65 |     - 10.0
 66 |     - 10.0
 67 |     - 5.0
 68 |     - 5.0
 69 |   bbox_loss:
 70 |     loss: GIoULoss
 71 |     reduction: sum
 72 |     weight: 2.0
 73 |   centerness_loss:
 74 |     from_logits: true
 75 |     loss: BinaryCrossEntropy
 76 |     reduction: sum
 77 |     weight: 1.0
 78 |   convolution: conv2d
 79 |   dropblock: null
 80 |   feat_dims: 256
 81 |   head: ATSSHead
 82 |   label_loss:
 83 |     alpha: 0.25
 84 |     from_logits: true
 85 |     gamma: 2.0
 86 |     label_smoothing: 0.0
 87 |     loss: FocalLoss
 88 |     reduction: sum
 89 |     weight: 1.0
 90 |   max_level: 7
 91 |   min_level: 3
 92 |   normalization:
 93 |     groups: 32
 94 |     normalization: group_norm
 95 |   num_classes: 80
 96 |   prior: 0.01
 97 |   repeats: 4
 98 |   sampler:
 99 |     sampler: PseudoSampler
100 |   use_sigmoid: true
101 | input_shape: !!python/tuple
102 | - 1024
103 | - 1024
104 | - 3
105 | neck:
106 |   add_extra_convs: true
107 |   feat_dims: 256
108 |   max_level: 5
109 |   min_level: 3
110 |   neck: FPN
111 |   num_output_levels: 5
112 |   relu_before_extra_convs: true
113 | num_classes: 80
114 | test:
115 |   iou_threshold: 0.6
116 |   nms: CombinedNonMaxSuppression
117 |   nms_type: nms
118 |   post_nms_size: 100
119 |   pre_nms_size: 1000
120 |   score_threshold: 0.35
121 | train:
122 |   checkpoint_dir: checkpoints/atss
123 |   dataset:
124 |     augmentations:
125 |     - augmentation: FlipLeftToRight
126 |       probability: 0.5
127 |     - augmentation: RandomDistortColor
128 |     - augmentation: Resize
129 |       img_scale: !!python/tuple
130 |       - 0.2
131 |       - 2
132 |       keep_ratio: true
133 |       multiscale_mode: range
134 |     - augmentation: Pad
135 |       size_divisor: 32
136 |     batch_size: 4
137 |     dataset: COCODataset
138 |     dataset_dir: /data/bail/COCO
139 |     num_classes: 80
140 |     num_samples: 118287
141 |     training: true
142 |   gradient_clip_norm: 0.0
143 |   log_every_n_steps: 100
144 |   mixed_precision:
145 |     loss_scale: null
146 |   optimizer:
147 |     momentum: 0.9
148 |     optimizer: SGD
149 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
150 |   save_ckpt_steps: 5000
151 |   scheduler:
152 |     learning_rate_scheduler:
153 |       initial_learning_rate: 0.02
154 |       scheduler: CosineDecay
155 |     train_epochs: 36
156 |     warmup:
157 |       steps: 800
158 |       warmup_learning_rate: 0.001
159 |   summary_dir: logs/atss
160 | val:
161 |   dataset:
162 |     augmentations:
163 |     - augmentation: Resize
164 |       img_scale:
165 |       - !!python/tuple
166 |         - 1333
167 |         - !!python/tuple
168 |           - 1024
169 |           - 1024
170 |       keep_ratio: true
171 |     batch_size: 4
172 |     dataset: COCODataset
173 |     dataset_dir: /data/bail/COCO
174 |     training: false
175 |   samples: 5000
176 | weight_decay: 0.0001
177 | 


--------------------------------------------------------------------------------
/configs/onenet_config.py:
--------------------------------------------------------------------------------
 1 | from configs import Config
 2 | 
 3 | 
 4 | def get_onenet_config(num_classes=80):
 5 |     h = Config()
 6 |     
 7 |     input_size = (512, 512)
 8 |     h.detector = "OneNet"
 9 |     h.dtype = "float16"
10 |     h.data_format = "channels_last"
11 |     h.input_shape = (input_size[0], input_size[1], 3)
12 |     h.num_classes = num_classes
13 |     h.backbone = dict(backbone="ResNet18",
14 |                       dropblock=None, 
15 |                       normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
16 |                       activation=dict(activation="relu"),
17 |                       strides=[2, 2, 2, 2, 2],
18 |                       dilation_rates=[1, 1, 1, 1, 1],
19 |                       output_indices=[2, 3, 4, 5],
20 |                       frozen_stages=[1, ])
21 |     
22 |     h.neck=dict(neck="CenterNetDeconv", 
23 |                 normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False),
24 |                 activation=dict(activation="relu"))
25 |     h.head=dict(head="OneNetHead",
26 |                 activation=dict(activation="relu"),
27 |                 feat_dims=64,
28 |                 dropblock=None,
29 |                 num_classes=num_classes,
30 |                 strides=4,
31 |                 prior=0.01,
32 |                 use_sigmoid=True,
33 |                 assigner = dict(assigner="MinCostAssigner", class_weight=2., l1_weight=2., iou_weight=5., iou_type="giou", alpha=0.25, gamma=2.),
34 |                 label_loss = dict(loss="FocalLoss", alpha=0.25, gamma=2., reduction="sum"),
35 |                 bbox_loss = dict(loss="RegL1Loss", weight=1., reduction="sum"))
36 |   
37 |     h.weight_decay = 1e-4
38 |     h.excluding_weight_names = ["predicted_box", "predicted_class"]
39 |     h.train=dict(dataset=dict(dataset="COCODataset",
40 |                               batch_size=4,
41 |                               dataset_dir="/data/bail/COCO",
42 |                               training=True,
43 |                               augmentations=[
44 |                                   dict(augmentation="FlipLeftToRight", probability=0.5),
45 |                                   dict(augmentation="RandomDistortColor"),
46 |                                   dict(augmentation="Resize", img_scale=(0.2, 2), multiscale_mode="range", keep_ratio=True),
47 |                                   dict(augmentation="RandCropOrPad", size=(input_size, input_size), clip_box_base_center=False),
48 |                               ],
49 |                               num_samples=118287),
50 |                   pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt",
51 | 
52 |                   optimizer=dict(optimizer="SGD", momentum=0.9),
53 |                   mixed_precision=dict(loss_scale=None),  # The loss scale in mixed precision training. If None, use dynamic.
54 |                   gradient_clip_norm=10.0,
55 | 
56 |                   scheduler=dict(train_epochs=24,
57 |                                  learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay",
58 |                                                               boundaries=[16, 22],
59 |                                                               values=[0.02, 0.002, 0.0002]),
60 |                                  warmup=dict(warmup_learning_rate=0.001, steps=800)),
61 |                   checkpoint_dir="checkpoints/onenet",
62 |                   summary_dir="logs/onenet",
63 |                   log_every_n_steps=100,
64 |                   save_ckpt_steps=5000)
65 |     h.val=dict(dataset=dict(dataset="COCODataset", 
66 |                             batch_size=4,  
67 |                             dataset_dir="/data/bail/COCO", 
68 |                             training=False, 
69 |                             augmentations=[
70 |                                 dict(augmentation="Resize", img_scale=[(1333, input_size)], keep_ratio=True),
71 |                                 dict(augmentation="Pad", size_divisor=32)
72 |                             ]),
73 |                samples=5000)
74 |     h.test=dict(topk=100, score_threshold=0.3)
75 | 
76 |     return h
77 | 


--------------------------------------------------------------------------------
/yamls/YOLOF_R50_C5_1x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGenerator
  5 |   num_anchors: 5
  6 |   scales:
  7 |   - 32
  8 |   - 64
  9 |   - 128
 10 |   - 256
 11 |   - 512
 12 |   strides: 32
 13 | backbone:
 14 |   activation:
 15 |     activation: relu
 16 |   backbone: CaffeResNet50
 17 |   dilation_rates:
 18 |   - 1
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   dropblock: null
 24 |   frozen_stages:
 25 |   - 1
 26 |   normalization:
 27 |     epsilon: 0.0001
 28 |     momentum: 0.997
 29 |     normalization: batch_norm
 30 |     trainable: false
 31 |   output_indices:
 32 |   - 5
 33 |   strides:
 34 |   - 2
 35 |   - 2
 36 |   - 2
 37 |   - 2
 38 |   - 2
 39 | data_format: channels_last
 40 | detector: YOLOF
 41 | dtype: float16
 42 | excluding_weight_names:
 43 | - predicted_box
 44 | - predicted_class
 45 | head:
 46 |   activation:
 47 |     activation: relu
 48 |   assigner:
 49 |     assigner: UniformAssigner
 50 |     match_times: 8
 51 |     neg_ignore_thresh: 0.15
 52 |     pos_ignore_thresh: 0.7
 53 |   bbox_decoder:
 54 |     decoder: Delta2Box
 55 |     weights:
 56 |     - 1.0
 57 |     - 1.0
 58 |     - 1.0
 59 |     - 1.0
 60 |   bbox_encoder:
 61 |     encoder: Box2Delta
 62 |     weights:
 63 |     - 1.0
 64 |     - 1.0
 65 |     - 1.0
 66 |     - 1.0
 67 |   bbox_loss:
 68 |     loss: GIoULoss
 69 |     reduction: sum
 70 |     weight: 2.0
 71 |   cls_num_convs: 2
 72 |   feat_dims: 512
 73 |   head: YOLOFHead
 74 |   kernel_initializer: he_normal
 75 |   label_loss:
 76 |     alpha: 0.25
 77 |     from_logits: true
 78 |     gamma: 2.0
 79 |     loss: FocalLoss
 80 |     reduction: sum
 81 |     weight: 1.0
 82 |   normalization:
 83 |     axis: -1
 84 |     epsilon: 0.001
 85 |     momentum: 0.9
 86 |     normalization: batch_norm
 87 |     trainable: true
 88 |   prior: 0.01
 89 |   reg_num_convs: 4
 90 |   sampler:
 91 |     sampler: PseudoSampler
 92 |   use_sigmoid: true
 93 | input_shape: !!python/tuple
 94 | - 1024
 95 | - 1024
 96 | - 3
 97 | neck:
 98 |   activation:
 99 |     activation: relu
100 |   data_format: channels_last
101 |   dilation_rates:
102 |   - 2
103 |   - 4
104 |   - 6
105 |   - 8
106 |   filters: 512
107 |   kernel_initializer: he_normal
108 |   midfilters: 128
109 |   neck: DilatedEncoder
110 |   normalization:
111 |     axis: -1
112 |     epsilon: 0.001
113 |     momentum: 0.9
114 |     normalization: batch_norm
115 |     trainable: true
116 | num_classes: 80
117 | test:
118 |   iou_threshold: 0.5
119 |   nms: CombinedNonMaxSuppression
120 |   post_nms_size: 100
121 |   pre_nms_size: 2000
122 |   score_threshold: 0.35
123 | train:
124 |   checkpoint_dir: checkpoints/yolof
125 |   dataset:
126 |     augmentations:
127 |     - augmentation: FlipLeftToRight
128 |       probability: 0.5
129 |     - augmentation: RandomDistortColor
130 |     - augmentation: Resize
131 |       img_scale:
132 |       - &id001 !!python/tuple
133 |         - 1333
134 |         - 800
135 |       keep_ratio: true
136 |     - augmentation: Pad
137 |       size_divisor: 32
138 |     batch_size: 4
139 |     dataset: COCODataset
140 |     dataset_dir: /data/bail/COCO
141 |     num_samples: 118287
142 |     training: true
143 |   gradient_clip_norm: 10.0
144 |   log_every_n_steps: 100
145 |   mixed_precision:
146 |     loss_scale: null
147 |   optimizer:
148 |     momentum: 0.9
149 |     optimizer: SGD
150 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
151 |   save_ckpt_steps: 5000
152 |   scheduler:
153 |     learning_rate_scheduler:
154 |       boundaries:
155 |       - 16
156 |       - 22
157 |       scheduler: PiecewiseConstantDecay
158 |       values:
159 |       - 0.02
160 |       - 0.002
161 |       - 0.0002
162 |     train_epochs: 24
163 |     warmup:
164 |       steps: 800
165 |       warmup_learning_rate: 0.001
166 |   summary_dir: logs/yolof
167 | val:
168 |   dataset:
169 |     augmentations:
170 |     - augmentation: Resize
171 |       img_scale:
172 |       - *id001
173 |       keep_ratio: true
174 |     - augmentation: Pad
175 |       size_divisor: 32
176 |     batch_size: 4
177 |     dataset: COCODataset
178 |     dataset_dir: /data/bail/COCO
179 |     training: false
180 |   samples: 5000
181 | weight_decay: 0.0001
182 | 


--------------------------------------------------------------------------------
/yamls/YOLOF_R101_C5_1x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGenerator
  5 |   num_anchors: 5
  6 |   scales:
  7 |   - 32
  8 |   - 64
  9 |   - 128
 10 |   - 256
 11 |   - 512
 12 |   strides: 32
 13 | backbone:
 14 |   activation:
 15 |     activation: relu
 16 |   backbone: CaffeResNet101
 17 |   dilation_rates:
 18 |   - 1
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   dropblock: null
 24 |   frozen_stages:
 25 |   - 1
 26 |   normalization:
 27 |     epsilon: 0.0001
 28 |     momentum: 0.997
 29 |     normalization: batch_norm
 30 |     trainable: false
 31 |   output_indices:
 32 |   - 5
 33 |   strides:
 34 |   - 2
 35 |   - 2
 36 |   - 2
 37 |   - 2
 38 |   - 2
 39 | data_format: channels_last
 40 | detector: YOLOF
 41 | dtype: float16
 42 | excluding_weight_names:
 43 | - predicted_box
 44 | - predicted_class
 45 | head:
 46 |   activation:
 47 |     activation: relu
 48 |   assigner:
 49 |     assigner: UniformAssigner
 50 |     match_times: 8
 51 |     neg_ignore_thresh: 0.15
 52 |     pos_ignore_thresh: 0.7
 53 |   bbox_decoder:
 54 |     decoder: Delta2Box
 55 |     weights:
 56 |     - 1.0
 57 |     - 1.0
 58 |     - 1.0
 59 |     - 1.0
 60 |   bbox_encoder:
 61 |     encoder: Box2Delta
 62 |     weights:
 63 |     - 1.0
 64 |     - 1.0
 65 |     - 1.0
 66 |     - 1.0
 67 |   bbox_loss:
 68 |     loss: GIoULoss
 69 |     reduction: sum
 70 |     weight: 2.0
 71 |   cls_num_convs: 2
 72 |   feat_dims: 512
 73 |   head: YOLOFHead
 74 |   kernel_initializer: he_normal
 75 |   label_loss:
 76 |     alpha: 0.25
 77 |     from_logits: true
 78 |     gamma: 2.0
 79 |     loss: FocalLoss
 80 |     reduction: sum
 81 |     weight: 1.0
 82 |   normalization:
 83 |     axis: -1
 84 |     epsilon: 0.001
 85 |     momentum: 0.9
 86 |     normalization: batch_norm
 87 |     trainable: true
 88 |   prior: 0.01
 89 |   reg_num_convs: 4
 90 |   sampler:
 91 |     sampler: PseudoSampler
 92 |   use_sigmoid: true
 93 | input_shape: !!python/tuple
 94 | - 1024
 95 | - 1024
 96 | - 3
 97 | neck:
 98 |   activation:
 99 |     activation: relu
100 |   data_format: channels_last
101 |   dilation_rates:
102 |   - 2
103 |   - 4
104 |   - 6
105 |   - 8
106 |   filters: 512
107 |   kernel_initializer: he_normal
108 |   midfilters: 128
109 |   neck: DilatedEncoder
110 |   normalization:
111 |     axis: -1
112 |     epsilon: 0.001
113 |     momentum: 0.9
114 |     normalization: batch_norm
115 |     trainable: true
116 | num_classes: 80
117 | test:
118 |   iou_threshold: 0.5
119 |   nms: CombinedNonMaxSuppression
120 |   post_nms_size: 100
121 |   pre_nms_size: 2000
122 |   score_threshold: 0.35
123 | train:
124 |   checkpoint_dir: checkpoints/yolof
125 |   dataset:
126 |     augmentations:
127 |     - augmentation: FlipLeftToRight
128 |       probability: 0.5
129 |     - augmentation: RandomDistortColor
130 |     - augmentation: Resize
131 |       img_scale:
132 |       - &id001 !!python/tuple
133 |         - 1333
134 |         - 800
135 |       keep_ratio: true
136 |     - augmentation: Pad
137 |       size_divisor: 32
138 |     batch_size: 4
139 |     dataset: COCODataset
140 |     dataset_dir: /data/bail/COCO
141 |     num_samples: 118287
142 |     training: true
143 |   gradient_clip_norm: 10.0
144 |   log_every_n_steps: 100
145 |   mixed_precision:
146 |     loss_scale: null
147 |   optimizer:
148 |     momentum: 0.9
149 |     optimizer: SGD
150 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
151 |   save_ckpt_steps: 5000
152 |   scheduler:
153 |     learning_rate_scheduler:
154 |       boundaries:
155 |       - 16
156 |       - 22
157 |       scheduler: PiecewiseConstantDecay
158 |       values:
159 |       - 0.02
160 |       - 0.002
161 |       - 0.0002
162 |     train_epochs: 24
163 |     warmup:
164 |       steps: 800
165 |       warmup_learning_rate: 0.001
166 |   summary_dir: logs/yolof
167 | val:
168 |   dataset:
169 |     augmentations:
170 |     - augmentation: Resize
171 |       img_scale:
172 |       - *id001
173 |       keep_ratio: true
174 |     - augmentation: Pad
175 |       size_divisor: 32
176 |     batch_size: 4
177 |     dataset: COCODataset
178 |     dataset_dir: /data/bail/COCO
179 |     training: false
180 |   samples: 5000
181 | weight_decay: 0.0001
182 | 


--------------------------------------------------------------------------------
/yamls/YOLOF_X_101_64x4d_C5_1x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGenerator
  5 |   num_anchors: 5
  6 |   scales:
  7 |   - 32
  8 |   - 64
  9 |   - 128
 10 |   - 256
 11 |   - 512
 12 |   strides: 32
 13 | backbone:
 14 |   activation:
 15 |     activation: relu
 16 |   backbone: ResNeXt101_64X4D
 17 |   dilation_rates:
 18 |   - 1
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   dropblock: null
 24 |   frozen_stages:
 25 |   - 1
 26 |   normalization:
 27 |     epsilon: 0.0001
 28 |     momentum: 0.997
 29 |     normalization: batch_norm
 30 |     trainable: false
 31 |   output_indices:
 32 |   - 5
 33 |   strides:
 34 |   - 2
 35 |   - 2
 36 |   - 2
 37 |   - 2
 38 |   - 2
 39 | data_format: channels_last
 40 | detector: YOLOF
 41 | dtype: float16
 42 | excluding_weight_names:
 43 | - predicted_box
 44 | - predicted_class
 45 | head:
 46 |   activation:
 47 |     activation: relu
 48 |   assigner:
 49 |     assigner: UniformAssigner
 50 |     match_times: 8
 51 |     neg_ignore_thresh: 0.15
 52 |     pos_ignore_thresh: 0.7
 53 |   bbox_decoder:
 54 |     decoder: Delta2Box
 55 |     weights:
 56 |     - 1.0
 57 |     - 1.0
 58 |     - 1.0
 59 |     - 1.0
 60 |   bbox_encoder:
 61 |     encoder: Box2Delta
 62 |     weights:
 63 |     - 1.0
 64 |     - 1.0
 65 |     - 1.0
 66 |     - 1.0
 67 |   bbox_loss:
 68 |     loss: GIoULoss
 69 |     reduction: sum
 70 |     weight: 2.0
 71 |   cls_num_convs: 2
 72 |   feat_dims: 512
 73 |   head: YOLOFHead
 74 |   kernel_initializer: he_normal
 75 |   label_loss:
 76 |     alpha: 0.25
 77 |     from_logits: true
 78 |     gamma: 2.0
 79 |     loss: FocalLoss
 80 |     reduction: sum
 81 |     weight: 1.0
 82 |   normalization:
 83 |     axis: -1
 84 |     epsilon: 0.001
 85 |     momentum: 0.9
 86 |     normalization: batch_norm
 87 |     trainable: true
 88 |   prior: 0.01
 89 |   reg_num_convs: 4
 90 |   sampler:
 91 |     sampler: PseudoSampler
 92 |   use_sigmoid: true
 93 | input_shape: !!python/tuple
 94 | - 1024
 95 | - 1024
 96 | - 3
 97 | neck:
 98 |   activation:
 99 |     activation: relu
100 |   data_format: channels_last
101 |   dilation_rates:
102 |   - 2
103 |   - 4
104 |   - 6
105 |   - 8
106 |   filters: 512
107 |   kernel_initializer: he_normal
108 |   midfilters: 128
109 |   neck: DilatedEncoder
110 |   normalization:
111 |     axis: -1
112 |     epsilon: 0.001
113 |     momentum: 0.9
114 |     normalization: batch_norm
115 |     trainable: true
116 | num_classes: 80
117 | test:
118 |   iou_threshold: 0.5
119 |   nms: CombinedNonMaxSuppression
120 |   post_nms_size: 100
121 |   pre_nms_size: 2000
122 |   score_threshold: 0.35
123 | train:
124 |   checkpoint_dir: checkpoints/yolof
125 |   dataset:
126 |     augmentations:
127 |     - augmentation: FlipLeftToRight
128 |       probability: 0.5
129 |     - augmentation: RandomDistortColor
130 |     - augmentation: Resize
131 |       img_scale:
132 |       - &id001 !!python/tuple
133 |         - 1333
134 |         - 800
135 |       keep_ratio: true
136 |     - augmentation: Pad
137 |       size_divisor: 32
138 |     batch_size: 4
139 |     dataset: COCODataset
140 |     dataset_dir: /data/bail/COCO
141 |     num_samples: 118287
142 |     training: true
143 |   gradient_clip_norm: 10.0
144 |   log_every_n_steps: 100
145 |   mixed_precision:
146 |     loss_scale: null
147 |   optimizer:
148 |     momentum: 0.9
149 |     optimizer: SGD
150 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
151 |   save_ckpt_steps: 5000
152 |   scheduler:
153 |     learning_rate_scheduler:
154 |       boundaries:
155 |       - 16
156 |       - 22
157 |       scheduler: PiecewiseConstantDecay
158 |       values:
159 |       - 0.02
160 |       - 0.002
161 |       - 0.0002
162 |     train_epochs: 24
163 |     warmup:
164 |       steps: 800
165 |       warmup_learning_rate: 0.001
166 |   summary_dir: logs/yolof
167 | val:
168 |   dataset:
169 |     augmentations:
170 |     - augmentation: Resize
171 |       img_scale:
172 |       - *id001
173 |       keep_ratio: true
174 |     - augmentation: Pad
175 |       size_divisor: 32
176 |     batch_size: 4
177 |     dataset: COCODataset
178 |     dataset_dir: /data/bail/COCO
179 |     training: false
180 |   samples: 5000
181 | weight_decay: 0.0001
182 | 


--------------------------------------------------------------------------------
/yamls/YOLOF_R101_DC5_1x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGenerator
  5 |   num_anchors: 6
  6 |   scales:
  7 |   - 16
  8 |   - 32
  9 |   - 64
 10 |   - 128
 11 |   - 256
 12 |   - 512
 13 |   strides: 16
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: CaffeResNet101
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 2
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 5
 34 |   strides:
 35 |   - 2
 36 |   - 2
 37 |   - 2
 38 |   - 2
 39 |   - 1
 40 | data_format: channels_last
 41 | detector: YOLOF
 42 | dtype: float16
 43 | excluding_weight_names:
 44 | - predicted_box
 45 | - predicted_class
 46 | head:
 47 |   activation:
 48 |     activation: relu
 49 |   assigner:
 50 |     assigner: UniformAssigner
 51 |     match_times: 8
 52 |     neg_ignore_thresh: 0.15
 53 |     pos_ignore_thresh: 0.7
 54 |   bbox_decoder:
 55 |     decoder: Delta2Box
 56 |     weights:
 57 |     - 1.0
 58 |     - 1.0
 59 |     - 1.0
 60 |     - 1.0
 61 |   bbox_encoder:
 62 |     encoder: Box2Delta
 63 |     weights:
 64 |     - 1.0
 65 |     - 1.0
 66 |     - 1.0
 67 |     - 1.0
 68 |   bbox_loss:
 69 |     loss: GIoULoss
 70 |     reduction: sum
 71 |     weight: 2.0
 72 |   cls_num_convs: 2
 73 |   feat_dims: 512
 74 |   head: YOLOFHead
 75 |   kernel_initializer: he_normal
 76 |   label_loss:
 77 |     alpha: 0.25
 78 |     from_logits: true
 79 |     gamma: 2.0
 80 |     loss: FocalLoss
 81 |     reduction: sum
 82 |     weight: 1.0
 83 |   normalization:
 84 |     axis: -1
 85 |     epsilon: 0.001
 86 |     momentum: 0.9
 87 |     normalization: batch_norm
 88 |     trainable: true
 89 |   prior: 0.01
 90 |   reg_num_convs: 4
 91 |   sampler:
 92 |     sampler: PseudoSampler
 93 |   use_sigmoid: true
 94 | input_shape: !!python/tuple
 95 | - 1024
 96 | - 1024
 97 | - 3
 98 | neck:
 99 |   activation:
100 |     activation: relu
101 |   data_format: channels_last
102 |   dilation_rates:
103 |   - 4
104 |   - 8
105 |   - 12
106 |   - 16
107 |   filters: 512
108 |   kernel_initializer: he_normal
109 |   midfilters: 128
110 |   neck: DilatedEncoder
111 |   normalization:
112 |     axis: -1
113 |     epsilon: 0.001
114 |     momentum: 0.9
115 |     normalization: batch_norm
116 |     trainable: true
117 | num_classes: 80
118 | test:
119 |   iou_threshold: 0.6
120 |   nms: CombinedNonMaxSuppression
121 |   post_nms_size: 100
122 |   pre_nms_size: 2000
123 |   score_threshold: 0.25
124 | train:
125 |   checkpoint_dir: checkpoints/yolof
126 |   dataset:
127 |     augmentations:
128 |     - augmentation: FlipLeftToRight
129 |       probability: 0.5
130 |     - augmentation: RandomDistortColor
131 |     - augmentation: Resize
132 |       img_scale:
133 |       - &id001 !!python/tuple
134 |         - 1333
135 |         - 800
136 |       keep_ratio: true
137 |     - augmentation: Pad
138 |       size_divisor: 32
139 |     batch_size: 4
140 |     dataset: COCODataset
141 |     dataset_dir: /data/bail/COCO
142 |     num_samples: 118287
143 |     training: true
144 |   gradient_clip_norm: 10.0
145 |   log_every_n_steps: 100
146 |   mixed_precision:
147 |     loss_scale: null
148 |   optimizer:
149 |     momentum: 0.9
150 |     optimizer: SGD
151 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
152 |   save_ckpt_steps: 5000
153 |   scheduler:
154 |     learning_rate_scheduler:
155 |       boundaries:
156 |       - 16
157 |       - 22
158 |       scheduler: PiecewiseConstantDecay
159 |       values:
160 |       - 0.02
161 |       - 0.002
162 |       - 0.0002
163 |     train_epochs: 24
164 |     warmup:
165 |       steps: 800
166 |       warmup_learning_rate: 0.001
167 |   summary_dir: logs/yolof
168 | val:
169 |   dataset:
170 |     augmentations:
171 |     - augmentation: Resize
172 |       img_scale:
173 |       - *id001
174 |       keep_ratio: true
175 |     - augmentation: Pad
176 |       size_divisor: 32
177 |     batch_size: 4
178 |     dataset: COCODataset
179 |     dataset_dir: /data/bail/COCO
180 |     training: false
181 |   samples: 5000
182 | weight_decay: 0.0001
183 | 


--------------------------------------------------------------------------------
/yamls/YOLOF_R50_DC5_1x.yaml:
--------------------------------------------------------------------------------
  1 | anchors:
  2 |   aspect_ratios:
  3 |   - 1.0
  4 |   generator: AnchorGenerator
  5 |   num_anchors: 6
  6 |   scales:
  7 |   - 16
  8 |   - 32
  9 |   - 64
 10 |   - 128
 11 |   - 256
 12 |   - 512
 13 |   strides: 16
 14 | backbone:
 15 |   activation:
 16 |     activation: relu
 17 |   backbone: CaffeResNet50
 18 |   dilation_rates:
 19 |   - 1
 20 |   - 1
 21 |   - 1
 22 |   - 1
 23 |   - 2
 24 |   dropblock: null
 25 |   frozen_stages:
 26 |   - 1
 27 |   normalization:
 28 |     epsilon: 0.0001
 29 |     momentum: 0.997
 30 |     normalization: batch_norm
 31 |     trainable: false
 32 |   output_indices:
 33 |   - 5
 34 |   strides:
 35 |   - 2
 36 |   - 2
 37 |   - 2
 38 |   - 2
 39 |   - 1
 40 | data_format: channels_last
 41 | detector: YOLOF
 42 | dtype: float16
 43 | excluding_weight_names:
 44 | - predicted_box
 45 | - predicted_class
 46 | head:
 47 |   activation:
 48 |     activation: relu
 49 |   assigner:
 50 |     assigner: UniformAssigner
 51 |     match_times: 8
 52 |     neg_ignore_thresh: 0.15
 53 |     pos_ignore_thresh: 0.7
 54 |   bbox_decoder:
 55 |     decoder: Delta2Box
 56 |     weights:
 57 |     - 1.0
 58 |     - 1.0
 59 |     - 1.0
 60 |     - 1.0
 61 |   bbox_encoder:
 62 |     encoder: Box2Delta
 63 |     weights:
 64 |     - 1.0
 65 |     - 1.0
 66 |     - 1.0
 67 |     - 1.0
 68 |   bbox_loss:
 69 |     loss: GIoULoss
 70 |     reduction: sum
 71 |     weight: 2.0
 72 |   cls_num_convs: 2
 73 |   feat_dims: 512
 74 |   head: YOLOFHead
 75 |   kernel_initializer: he_normal
 76 |   label_loss:
 77 |     alpha: 0.25
 78 |     from_logits: true
 79 |     gamma: 2.0
 80 |     loss: FocalLoss
 81 |     reduction: sum
 82 |     weight: 1.0
 83 |   normalization:
 84 |     axis: -1
 85 |     epsilon: 0.001
 86 |     momentum: 0.9
 87 |     normalization: batch_norm
 88 |     trainable: true
 89 |   prior: 0.01
 90 |   reg_num_convs: 4
 91 |   sampler:
 92 |     sampler: PseudoSampler
 93 |   use_sigmoid: true
 94 | input_shape: !!python/tuple
 95 | - 1024
 96 | - 1024
 97 | - 3
 98 | neck:
 99 |   activation:
100 |     activation: relu
101 |   data_format: channels_last
102 |   dilation_rates:
103 |   - 4
104 |   - 8
105 |   - 12
106 |   - 16
107 |   filters: 512
108 |   kernel_initializer: he_normal
109 |   midfilters: 128
110 |   neck: DilatedEncoder
111 |   normalization:
112 |     axis: -1
113 |     epsilon: 0.001
114 |     momentum: 0.9
115 |     normalization: batch_norm
116 |     trainable: true
117 | num_classes: 80
118 | test:
119 |   iou_threshold: 0.5
120 |   nms: CombinedNonMaxSuppression
121 |   post_nms_size: 100
122 |   pre_nms_size: 2000
123 |   score_threshold: 0.35
124 | train:
125 |   checkpoint_dir: checkpoints/yolof
126 |   dataset:
127 |     augmentations:
128 |     - augmentation: FlipLeftToRight
129 |       probability: 0.5
130 |     - augmentation: RandomDistortColor
131 |     - augmentation: Resize
132 |       img_scale:
133 |       - &id001 !!python/tuple
134 |         - 1333
135 |         - 800
136 |       keep_ratio: true
137 |     - augmentation: Pad
138 |       size_divisor: 32
139 |     batch_size: 4
140 |     dataset: COCODataset
141 |     dataset_dir: /data/bail/COCO
142 |     num_samples: 118287
143 |     training: true
144 |   gradient_clip_norm: 10.0
145 |   log_every_n_steps: 100
146 |   mixed_precision:
147 |     loss_scale: null
148 |   optimizer:
149 |     momentum: 0.9
150 |     optimizer: SGD
151 |   pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt
152 |   save_ckpt_steps: 5000
153 |   scheduler:
154 |     learning_rate_scheduler:
155 |       boundaries:
156 |       - 16
157 |       - 22
158 |       scheduler: PiecewiseConstantDecay
159 |       values:
160 |       - 0.02
161 |       - 0.002
162 |       - 0.0002
163 |     train_epochs: 24
164 |     warmup:
165 |       steps: 800
166 |       warmup_learning_rate: 0.001
167 |   summary_dir: logs/yolof
168 | val:
169 |   dataset:
170 |     augmentations:
171 |     - augmentation: Resize
172 |       img_scale:
173 |       - *id001
174 |       keep_ratio: true
175 |     - augmentation: Pad
176 |       size_divisor: 32
177 |     batch_size: 4
178 |     dataset: COCODataset
179 |     dataset_dir: /data/bail/COCO
180 |     training: false
181 |   samples: 5000
182 | weight_decay: 0.0001
183 | 


--------------------------------------------------------------------------------
/core/losses/generalized_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from ..builder import LOSSES
 3 | 
 4 | 
 5 | @LOSSES.register
 6 | class QualityFocalLoss(tf.keras.losses.Loss):
 7 |     def __init__(self,
 8 |                  from_logits=True,
 9 |                  use_sigmoid=True,
10 |                  beta=2.0,
11 |                  reduction=tf.keras.losses.Reduction.SUM,
12 |                  weight=1.,
13 |                  name="QualityFocalLoss"):
14 |         super(QualityFocalLoss, self).__init__(reduction=reduction, name=name)
15 | 
16 |         assert use_sigmoid, "Only support sigmoid."
17 |         self.use_sigmoid = use_sigmoid
18 |         self.from_logits = from_logits
19 |         self.weight = weight
20 |         self.beta = beta
21 | 
22 |     def _quality_focal_loss(self, labels, scores, y_pred, beta=2.0, from_logits=True):
23 |         """
24 |         labels (Tensor): Target category label (one-hot) with shape (B, N, C).
25 |         scores (Tensor): target quality label with shape (B, N,).
26 |         y_pred (Tensor): Predicted joint representation of classification
27 |             and quality (IoU) estimation with shape (B, N, C), C is the number of
28 |             classes.
29 |         beta (foat): The beta parameter for calculating the modulating factor.
30 |             Defaults to 2.0.
31 |         from_logits (bool): Is the `y_pred` from logits, default is True.
32 |         """
33 |         with tf.name_scope("quality_focal_loss"):
34 |             pos_mask = labels == 1.
35 |             pos_scores = tf.boolean_mask(scores, tf.reduce_any(pos_mask, -1))
36 |             labels = tf.tensor_scatter_nd_update(labels, tf.where(pos_mask), pos_scores)
37 |             
38 |             if from_logits:
39 |                 loss = tf.nn.sigmoid_cross_entropy_with_logits(labels, y_pred)
40 |             else:
41 |                 loss = tf.keras.losses.binary_crossentropy(labels, y_pred, False)
42 | 
43 |             scale_factor = tf.nn.sigmoid(y_pred)
44 |             num_classes = tf.shape(y_pred)[-1]
45 |             pos_scale_factor = tf.abs(tf.tile(tf.expand_dims(scores, -1), [1, 1, num_classes]) - scale_factor)
46 |             scale_factor = tf.where(pos_mask, pos_scale_factor, scale_factor)
47 |             scale_factor = tf.pow(scale_factor, beta) 
48 |             
49 |             weighted_loss = scale_factor * loss * self.weight
50 |             
51 |             return weighted_loss
52 | 
53 |     def call(self, y_true, y_pred):
54 |         y_true, qulaity_scores = y_true
55 |         return self._quality_focal_loss(y_true, qulaity_scores, y_pred, self.beta, self.from_logits)
56 | 
57 | 
58 | @LOSSES.register
59 | class DistributionFocalLoss(tf.keras.losses.Loss):
60 |     """Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning
61 |     Qualified and Distributed Bounding Boxes for Dense Object Detection
62 |     <https://arxiv.org/abs/2006.04388>`_.
63 | 
64 |     Returns:
65 |         torch.Tensor: Loss tensor with shape (N,).
66 |     """
67 |     def __init__(self,
68 |                  from_logits=True,
69 |                  reduction=tf.keras.losses.Reduction.SUM,
70 |                  weight=1.,
71 |                  name="FocalLoss"):
72 |         super(DistributionFocalLoss, self).__init__(reduction=reduction, name=name)
73 | 
74 |         assert from_logits, "Only support logits."
75 |         self.from_logits = from_logits
76 | 
77 |         self.weight = weight
78 |     
79 |     def _distribution_focal_loss(self, y_true, y_pred):
80 |         with tf.name_scope("distribution_focal_loss"):
81 |             dist_left = tf.cast(y_true, tf.int64)
82 |             dist_right = dist_left + 1
83 | 
84 |             weight_left = tf.cast(dist_right, tf.float32) - y_true
85 |             weight_right = y_true - tf.cast(dist_left, tf.float32)
86 | 
87 |             loss_left = tf.nn.sparse_softmax_cross_entropy_with_logits(
88 |                 labels=dist_left, logits=y_pred) * weight_left
89 |             loss_right = tf.nn.sparse_softmax_cross_entropy_with_logits(
90 |                 labels=dist_right, logits=y_pred) * weight_right
91 | 
92 |             return loss_left + loss_right
93 |     
94 |     def call(self, y_true, y_pred):
95 |         return self._distribution_focal_loss(y_true, y_pred) * self.weight
96 | 


--------------------------------------------------------------------------------
/models/detectors/detr.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf 
 2 | 
 3 | 
 4 | class Encoder(tf.keras.Model):
 5 |     def __init__(self,
 6 |                  num_heads, 
 7 |                  dim, 
 8 |                  hidden_dim=2048, 
 9 |                  dropout=0.1, 
10 |                  activation="relu", 
11 |                  **kwargs):
12 |         super(Encoder, self).__init__(**kwargs)
13 |    
14 |         self.self_attn = tf.keras.layers.MultiHeadAttention(
15 |             num_heads=num_heads,
16 |             key_dim=dim,
17 |             dropout=dropout,
18 |             name="self_attn")
19 |         self.dropout1 = tf.keras.layers.Dropout(rate=dropout, name="dropout1")
20 |         self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, name="norm1")
21 |     
22 |         self.linear1 = tf.keras.layers.Dense(units=hidden_dim, activation=activation, name="linear1")
23 |         self.dropout2 = tf.keras.layers.Dropout(rate=dropout, name="/dropout2")
24 |         self.linear2 = tf.keras.layers.Dense(units=hidden_dim, name="/linear2")
25 |         self.dropout3 = tf.keras.layers.Dropout(rate=dropout, name="/dropout3")
26 |         self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, name="/norm2")
27 |     
28 |     def call(self, src, src_mask=None, pos_embed=None, training=None):
29 |         query = key = src if pos_embed is None else pos_embed + src
30 |         src2 = self.self_attn(query=query, key=key, value=src, mask=src_mask, training=training)
31 |         src += self.dropout1(src2, training=training)
32 |         src = self.norm1(src)
33 |         src2 = self.linear2(self.dropout2(self.linear1(src), training=training))
34 |         src += self.dropout3(src2, training=training)
35 |         src = self.norm2(src)
36 | 
37 |         return src 
38 | 
39 | 
40 | class Decoder(tf.keras.Model):
41 |     def __init__(self, 
42 |                  dim, 
43 |                  num_heads,
44 |                  hidden_dim=2048, 
45 |                  dropout=0.1, 
46 |                  activation="relu",
47 |                  **kwargs):
48 |         super(Decoder, self).__init__(**kwargs)
49 |     
50 |         self.self_attn = tf.keras.layers.MultiHeadAttention(
51 |             num_heads=num_heads,
52 |             key_dim=dim,
53 |             dropout=dropout,
54 |             name="/self_attn")
55 |         self.dropout1 = tf.keras.layers.Dropout(rate=dropout, name="dropout1")(x)
56 |         self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, name="norm1")(x)
57 |         
58 |         self.multihead_attn = tf.keras.layers.MultiHeadAttention(
59 |             num_heads=num_heads,
60 |             key_dim=dim,
61 |             dropout=dropout,
62 |             name="multihead_attn")
63 |         self.dropout2 = tf.keras.layers.Dropout(rate=dropout, name="dropout2")
64 |         self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, name="norm2")
65 | 
66 |         self.linear1 = tf.keras.layers.Dense(units=hidden_dim, activation=activation, name="linear1")
67 |         self.dropout3 = tf.keras.layers.Dropout(rate=dropout, name="dropout3")
68 |         self.linear2 = tf.keras.layers.Dense(units=hidden_dim, name="linear2")
69 |         self.dropout4 = tf.keras.layers.Dropout(rate=dropout, name="dropout4")
70 |         self.norm3 = tf.keras.layers.LayerNormalization(axis=-1, name="norm3")
71 |     
72 |     def call(self, target, memory, target_mask=None, memeory_mask=None, pos_embed=None, query_pos_embed=None, training=None):
73 |         q = k = target if query_pos_embed is None else target + query_pos_embed
74 | 
75 |         target2 = self.self_attn(query=q, key=k, value=target, mask=target_mask, training=training)
76 |         target += self.dropout1(target2, training=training)
77 |         target = self.norm1(target)
78 | 
79 |         target2 = self.multihead_attn(
80 |             query=target if query_pos_embed is None else target + query_pos_embed,
81 |             key=memory if pos_embed is None else memory + pos_embed,
82 |             value=memory,
83 |             mask=memory,
84 |             training=training)
85 |         target += self.dropout2(target2)
86 |         target = self.norm2(target)
87 |         target2 = self.linear2(self.dropout2(self.linear1(target), training=training))
88 |         target += self.dropout3(target2, training=training)
89 |         target = self.norm2(target)
90 | 
91 |         return target
92 | 
93 | 
94 | 
95 |     
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/core/layers/position_sensitive_average_pooling.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | class PSAvgPooling(tf.keras.layers.Layer):
 5 |     def __init__(self, num_boxes, crop_size=(9, 9), num_spatial_bins=(3, 3), **kwargs):
 6 |         super(PSAvgPooling, self).__init__(**kwargs)
 7 | 
 8 |         total_bins = 1
 9 |         bin_crop_size = []
10 |         for num_bins, crop_dim in zip(num_spatial_bins, crop_size):
11 |             if num_bins < 1:
12 |                 raise ValueError("num_spatial_bins should be >= 1.")
13 | 
14 |             if crop_dim % num_bins != 0:
15 |                 raise ValueError("crop_size should be divisible by num_spatial_bins.")
16 | 
17 |             total_bins *= num_bins
18 |             bin_crop_size.append(crop_dim // num_bins)
19 | 
20 |         if bin_crop_size[0] != bin_crop_size[1]:
21 |             raise ValueError("Only support square bin crop size for now.")
22 | 
23 |         self.num_spatial_bins = num_spatial_bins
24 |         self.bin_crop_size = bin_crop_size
25 |         self.total_bins = total_bins
26 |         self.num_boxes = num_boxes
27 | 
28 |     def build(self, input_shape):
29 |         self.weights = self.add_weight(name="weight",
30 |                                        shape=[1, 1, 1, 1, self.num_spatial_bins],
31 |                                        dtype=self.dtype,
32 |                                        initializer=tf.keras.initializers.Ones())
33 | 
34 |     def _one_image_pooling(self, image, boxes):
35 |         y1, x1, y2, x2 = tf.unstack(boxes, 4, -1)
36 |         ps_boxes = tf.TensorArray(size=self.total_bins, dtype=boxes.dtype)
37 | 
38 |         i = tf.constant(0, tf.int32)
39 |         for bin_y in tf.range(self.num_spatial_bins[0], dtype=boxes.dtype):
40 |             step_y = (y2 - y1) / tf.cast(self.num_spatial_bins[0], dtype=boxes.dtype)
41 |             for bin_x in tf.range(self.num_spatial_bins[1], dtype=boxes.dtype):
42 |                 step_x = (x2 - x1) / tf.cast(self.num_spatial_bins[1], dtype=boxes.dtype)
43 |                 i += 1
44 |                 box = tf.convert_to_tensor([y1 + bin_y * step_y,
45 |                                             x1 + bin_x * step_x,
46 |                                             y1 + bin_y * (step_y + 1),
47 |                                             x1 + bin_x * (step_x + 1)], dtype=boxes.dtype)
48 |                 ps_boxes.write(i, box)
49 | 
50 |         step_split = tf.shape(image)[-1] // self.total_bins
51 |         image_crops = tf.TensorArray(size=self.total_bins, dtype=image.dtype)
52 |         # image_splits = tf.split(image, self.total_bins, axis=-1)
53 |         for i in tf.range(self.total_bins):
54 |             split = image[..., i*step_split: (i+1)*step_split]
55 |             split_crop = tf.image.crop_and_resize(image=tf.expand_dims(split, 0),
56 |                                                   boxes=ps_boxes.read(i),
57 |                                                   box_indices=tf.zeros(tf.shape(boxes)[0], dtype=tf.int32),
58 |                                                   crop_size=self.bin_crop_size)
59 |             image_crops.write(i, split_crop)
60 | 
61 |         features = image_crops.stack(axis=-1)  # [num_boxes, crop_height, crop_width, depth, num_bins]
62 |         features = tf.reduce_mean(features, [1, 2], keepdims=True)  # [num_boxes, 1, 1, depth, num_bins]
63 | 
64 |         features = tf.reduce_mean(features * self.weights, -1)  # [num_boxes, 1, 1, depth]
65 | 
66 |         ps_boxes.close()
67 |         image_crops.close()
68 | 
69 |         return features
70 | 
71 |     def call(self, inputs, boxes):
72 |         batch_size = tf.shape(inputs)[0]
73 |         features = tf.TensorArray(size=batch_size)
74 | 
75 |         for i in tf.range(batch_size):
76 |             feat = self._one_image_pooling(inputs[0], boxes[0])
77 |             features.write(i, feat)
78 | 
79 |         return features.concate(axis=0)
80 | 
81 |     def compute_output_shape(self, input_shape):
82 |         return tf.TensorShape([self.num_boxes, 1, 1, input_shape[-1] // self.total_bins])
83 | 
84 |     def get_config(self,):
85 |         config = {"num_boxes": self.num_boxes,
86 |                   "bin_crop_size": self.bin_crop_size,
87 |                   "num_spatial_bins": self.num_spatial_bins}
88 | 
89 |         base_config = super(PSAvgPooling, self).get_config()
90 | 
91 |         return dict(list(base_config.items()) + list(config.items()))
92 | 


--------------------------------------------------------------------------------
/core/optimizers/lookahead_optimizer.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.keras import backend
  3 | from tensorflow.python.distribute import distribution_strategy_context
  4 | from ..builder import OPTIMIZERS 
  5 | 
  6 | 
  7 | @OPTIMIZERS.register
  8 | class LookaheadOptimizer(tf.keras.optimizers.Optimizer):
  9 |     def __init__(self, optimizer, k=5, alpha=0.5, name=None, **kwargs):
 10 |         super(LookaheadOptimizer, self).__init__(name=name, **kwargs)
 11 | 
 12 |         self.k = tf.constant(k, dtype=tf.float32)
 13 |         self.alpha = tf.constant(alpha, dtype=tf.float32)
 14 |         self._optimizer = optimizer
 15 | 
 16 |         self._iterations = self._optimizer.iterations
 17 |         self.slow_weights = []
 18 | 
 19 |         self.add_slow_weights = True
 20 | 
 21 |         self.replica_context = tf.distribute.get_replica_context()
 22 | 
 23 |     def _create_slots(self, var_list):
 24 |         for var in var_list:
 25 |             self.add_slot(var, "slow")
 26 | 
 27 |     def _resource_apply_dense(self, grad, var, apply_state=None):
 28 |         pass
 29 | 
 30 |     def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
 31 |         pass
 32 | 
 33 |     def _update_weights(self, fast_weights, slow_weights, alpha):
 34 |         def _update_slow_weight(slow_weight, fast_weight, a):
 35 |             slow_weight.assign_add(a * (fast_weight - slow_weight))
 36 | 
 37 |         def _update_fast_weight(fast_weight, slow_weight):
 38 |             fast_weight.assign(slow_weight)
 39 | 
 40 |         if tf.equal(tf.cast(self._iterations, tf.float32) % self.k, 0):
 41 |             if distribution_strategy_context.has_strategy():
 42 |                 distribution = distribution_strategy_context.get_replica_context()
 43 | 
 44 |                 for fast, slow in zip(fast_weights, slow_weights):
 45 |                     distribution.extended.call_for_each_replica(_update_slow_weight,
 46 |                                                                 args=(slow, fast.value(), alpha))
 47 |                     distribution.extended.call_for_each_replica(_update_fast_weight,
 48 |                                                                 args=(fast, slow.value()))
 49 |             else:
 50 |                 for fast, slow in zip(fast_weights, slow_weights):
 51 |                     _update_slow_weight(slow, fast.value(), alpha)
 52 |                     _update_fast_weight(fast, slow.value())
 53 | 
 54 |     def apply_gradients(self, grads_and_vars, name=None):
 55 |         fast_weights = [v for _, v in grads_and_vars]
 56 |         if self.add_slow_weights:
 57 |             self.slow_weights = [
 58 |                 tf.Variable(initial_value=w.value(),
 59 |                             trainable=False,
 60 |                             name=w.name.split(":")[0] + "/slow")
 61 |                 for w in fast_weights
 62 |             ]
 63 |             self.add_slow_weights = False
 64 | 
 65 |         res = self._optimizer.apply_gradients(grads_and_vars, name=name)
 66 | 
 67 |         self._update_weights(fast_weights, self.slow_weights, self.alpha)
 68 | 
 69 |         return res
 70 | 
 71 |     def get_config(self):
 72 |         config = self._optimizer.get_config()
 73 | 
 74 |         return config
 75 | 
 76 |     @property
 77 |     def learning_rate(self):
 78 |         return self._optimizer.learning_rate
 79 | 
 80 |     @learning_rate.setter
 81 |     def learning_rate(self, value):
 82 |         self._optimizer.learning_rate = value
 83 | 
 84 |     @property
 85 |     def lr(self):
 86 |         return self._optimizer.lr
 87 | 
 88 |     @lr.setter
 89 |     def lr(self, lr):
 90 |         self._optimizer.lr = lr
 91 | 
 92 |     def get_weights(self):
 93 |         return self._optimizer.get_weights()
 94 | 
 95 |     def set_weights(self, weights):
 96 |         return self._optimizer.set_weights(weights)
 97 | 
 98 |     @property
 99 |     def iterations(self):
100 |         return self._optimizer.iterations
101 | 
102 |     @iterations.setter
103 |     def iterations(self, variable):
104 |         self._optimizer.iterations = variable
105 | 
106 |     def get_slot_names(self):
107 |         return self._optimizer.get_slot_names()
108 | 
109 |     def variables(self):
110 |         return self._optimizer.variables()
111 | 
112 |     @property
113 |     def weights(self):
114 |         return self._optimizer.weights
115 | 
116 | 


--------------------------------------------------------------------------------
/core/layers/drop_block.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | 
  4 | class DropBlock2D(tf.keras.layers.Layer):
  5 |     def __init__(self, block_size=7, drop_rate=0.1, data_format="channels_last", **kwargs):
  6 |         super(DropBlock2D, self).__init__(**kwargs)
  7 | 
  8 |         self.block_size = block_size
  9 |         self.keep_prob = 1. - drop_rate
 10 |         
 11 |         assert data_format in {"channels_first", "channels_last"}
 12 |         self.data_format = data_format
 13 | 
 14 |     def build(self, input_shape):
 15 |         super(DropBlock2D, self).build(input_shape)
 16 | 
 17 |     def _drop_block_nhwc(self, inputs):
 18 |         with tf.name_scope("drop_block_nhwc"):
 19 |             input_shape = tf.shape(inputs)
 20 |             top, left = input_shape[1] // 2, input_shape[2] // 2
 21 |             bottom, right = input_shape[1] - top, input_shape[2] - left
 22 |             padding = [[0, 0], [top, bottom], [left, right], [0, 0]]
 23 | 
 24 |             feat_size = tf.cast(input_shape[1:3], self.dtype)
 25 |             gamma1 = (1. - self.keep_prob) / (self.block_size * self.block_size)
 26 |             gamma2 = (feat_size[0] * feat_size[1]) / (feat_size[0] - self.block_size + 1.) / \
 27 |                      (feat_size[1] - self.block_size + 1.)
 28 | 
 29 |             gamma = gamma1 * gamma2
 30 | 
 31 |             mask_shape = [input_shape[0],
 32 |                           input_shape[1] - self.block_size + 1,
 33 |                           input_shape[2] - self.block_size + 1,
 34 |                           input_shape[3]]
 35 |             mask = tf.nn.relu(tf.sign(gamma - tf.random.uniform(mask_shape, 0, 1, dtype=self.dtype)))
 36 | 
 37 |             mask = tf.pad(mask, paddings=padding)
 38 |             mask = tf.nn.max_pool(mask, [1, self.block_size, self.block_size, 1], [1, 1, 1, 1], "SAME", "NHWC")
 39 | 
 40 |             mask = 1. - mask
 41 |             mask = mask * tf.cast(tf.size(mask), mask.dtype) / tf.reduce_sum(mask)
 42 |             mask = tf.cast(mask, inputs.dtype)
 43 |             outputs = mask * inputs
 44 | 
 45 |             return outputs
 46 |     
 47 |     def _drop_block_nchw(self, inputs):
 48 |         with tf.name_scope("drop_block_nchw"):
 49 |             input_shape = tf.shape(inputs)
 50 |             top, left = input_shape[2] // 2, input_shape[3] // 2
 51 |             bottom, right = input_shape[2] - top, input_shape[3] - left
 52 |             padding = [[0, 0], [0, 0], [top, bottom], [left, right]]
 53 | 
 54 |             feat_size = tf.cast(input_shape[2:], self.dtype)
 55 |             gamma1 = (1. - self.keep_prob) / (self.block_size * self.block_size)
 56 |             gamma2 = (feat_size[0] * feat_size[1]) / (feat_size[0] - self.block_size + 1.) / \
 57 |                      (feat_size[1] - self.block_size + 1.)
 58 | 
 59 |             gamma = gamma1 * gamma2
 60 | 
 61 |             mask_shape = [input_shape[0],
 62 |                           input_shape[1],
 63 |                           input_shape[2] - self.block_size + 1,
 64 |                           input_shape[3] - self.block_size + 1]
 65 |             mask = tf.nn.relu(tf.sign(gamma - tf.random.uniform(mask_shape, 0, 1, dtype=self.dtype)))
 66 | 
 67 |             mask = tf.pad(mask, paddings=padding)
 68 |             mask = tf.nn.max_pool(mask, [1, self.block_size, self.block_size, 1], [1, 1, 1, 1], "SAME", "NCHW")
 69 | 
 70 |             mask = 1. - mask
 71 |             mask = mask * tf.cast(tf.size(mask), mask.dtype) / tf.reduce_sum(mask)
 72 |             mask = tf.cast(mask, inputs.dtype)
 73 |             outputs = mask * inputs
 74 | 
 75 |             return outputs
 76 | 
 77 |     def call(self, inputs, training=None):
 78 |         if training is None:
 79 |             training = tf.keras.backend.learning_phase()
 80 |             
 81 |         if training:
 82 |             if self.data_format == "channels_first":
 83 |                 return self._drop_block_nchw(inputs)
 84 |                 
 85 |             return self._drop_block_nhwc(inputs)
 86 | 
 87 |         return inputs
 88 | 
 89 |     def compute_output_shape(self, input_shape):
 90 |         return input_shape
 91 | 
 92 |     def get_config(self):
 93 |         config = {"block_size": self.block_size,
 94 |                   "keep_probability": self.keep_prob}
 95 | 
 96 |         base_config = super(DropBlock2D, self).get_config()
 97 | 
 98 |         return dict(list(base_config.items()) + list(config.items()))
 99 | 
100 | 


--------------------------------------------------------------------------------
/models/necks/dlaup.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf 
 3 | from ..common import ConvNormActBlock
 4 | from ..builder import NECKS
 5 | 
 6 | 
 7 | def ida_up(inputs, 
 8 |            kernel_size,
 9 |            infilters, 
10 |            outfilters, 
11 |            up_factors,
12 |            data_format="channels_last", 
13 |            normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-5, axis=-1, trainable=True),
14 |            activation=dict(activation="relu"),
15 |            kernel_initializer="he_normal",
16 |            name="ida_up"):
17 |     assert len(inputs) == len(infilters), '{} vs {} inputs'.format(len(infilters), len(inputs))
18 | 
19 |     for i, inp in enumerate(inputs):
20 |         if infilters[i] == outfilters:
21 |             x = inp
22 |         else:
23 |             x = ConvNormActBlock(outfilters,
24 |                                  kernel_size=1,
25 |                                  strides=1,
26 |                                  kernel_initializer=kernel_initializer,
27 |                                  activation=activation,
28 |                                  normalization=normalization,
29 |                                  name=name + "/proj_%d" % i)(inp)
30 |         if up_factors[i] != 1:
31 |             kernel_size = (up_factors[i] * 2, up_factors[i] * 2)
32 |             strides = (up_factors[i], up_factors[i])
33 |             x = tf.keras.layers.Conv2DTranspose(filters=outfilters,
34 |                                                 kernel_size=kernel_size,
35 |                                                 strides=strides,
36 |                                                 padding="same",
37 |                                                 output_padding=None,
38 |                                                 groups=outfilters,
39 |                                                 kernel_initializer=kernel_initializer,
40 |                                                 name=name + "/up_%d" % i,
41 |                                                 use_bias=False)(x)
42 |         inputs[i] = x
43 |     
44 |     x = inputs[0]
45 |     outputs = []
46 |     channel_axis = -1 if data_format == "channels_last" else 1
47 |     for i in range(1, len(inputs)):
48 |         x = tf.keras.layers.Concatenate(axis=channel_axis, name=name + "/cat%d" % i)([x, inputs[i]])
49 |         x = ConvNormActBlock(outfilters,
50 |                              kernel_size=kernel_size,
51 |                              strides=1,
52 |                              kernel_initializer=kernel_initializer,
53 |                              activation=activation,
54 |                              normalization=normalization,
55 |                              name=name + "/node_%d" % i)(x)
56 |         
57 |         outputs.append(x)
58 |     
59 |     return x, outputs
60 | 
61 | 
62 | @NECKS.register("DLAUp")
63 | def dla_up(filters=None, 
64 |            input_shapes=None, 
65 |            downsample_ratio=4,
66 |            data_format="channels_last", 
67 |            normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-5, axis=-1, trainable=True),
68 |            activation=dict(activation="relu"),
69 |            kernel_initializer="he_normal",
70 |            name="dla_up"):
71 |     assert downsample_ratio in [2, 4, 8, 16]
72 | 
73 |     first_level = int(np.log2(downsample_ratio))
74 |     scales = [2 ** i for i in range(len(filters[first_level:]))]
75 | 
76 |     inputs = [tf.keras.Input(shape=shape) for shape in input_shapes[first_level:]]
77 | 
78 |     layers = [i for i in inputs]
79 |     infilters = filters
80 |     scales = np.array(scales, dtype=int)
81 |     for i in range(len(filters[first_level:]) - 1):
82 |         j = -i - 2
83 |         x, y = ida_up(layers[j:],
84 |                       kernel_size=3,
85 |                       infilters=infilters[j:],
86 |                       outfilters=filters[j],
87 |                       up_factors=scales[j:] // scales[j],
88 |                       data_format=data_format,
89 |                       normalization=normalization,
90 |                       activation=activation,
91 |                       kernel_initializer=kernel_initializer,
92 |                       name=name + "/ida_%d" % i)
93 |         scales[j+1:] = scales[j]
94 |         infilters[j+1:] = [filters[j] for _ in filters[j+1:]]
95 |         layers[-i-1:] = y
96 |     
97 |     return tf.keras.Model(inputs=inputs, outputs=x, name=name)
98 |         
99 | 


--------------------------------------------------------------------------------
/core/layers/weight_standardization_conv2d.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.keras.utils import conv_utils
 3 | from tensorflow.python.keras.engine.input_spec import InputSpec
 4 | 
 5 | 
 6 | class WSConv2D(tf.keras.layers.Conv2D):
 7 |     def __init__(self,
 8 |                  filters,
 9 |                  kernel_size,
10 |                  strides=(1, 1),
11 |                  padding='valid',
12 |                  data_format=None,
13 |                  dilation_rate=(1, 1),
14 |                  activation=None,
15 |                  use_bias=True,
16 |                  kernel_initializer='glorot_uniform',
17 |                  bias_initializer='zeros',
18 |                  kernel_regularizer=None,
19 |                  bias_regularizer=None,
20 |                  activity_regularizer=None,
21 |                  kernel_constraint=None,
22 |                  bias_constraint=None,
23 |                  **kwargs):
24 |         super(WSConv2D, self).__init__(filters,
25 |                                        kernel_size,
26 |                                        strides=strides,
27 |                                        padding=padding,
28 |                                        data_format=data_format,
29 |                                        dilation_rate=dilation_rate,
30 |                                        activation=activation,
31 |                                        use_bias=use_bias,
32 |                                        kernel_initializer=kernel_initializer,
33 |                                        bias_initializer=bias_initializer,
34 |                                        kernel_regularizer=kernel_regularizer,
35 |                                        bias_regularizer=bias_regularizer,
36 |                                        activity_regularizer=activity_regularizer,
37 |                                        kernel_constraint=kernel_constraint,
38 |                                        bias_constraint=bias_constraint,
39 |                                        **kwargs)
40 | 
41 |     def build(self, input_shape):
42 |         input_shape = tf.TensorShape(input_shape)
43 |         channel_axis = self._get_channel_axis()
44 |         if input_shape.dims[channel_axis].value is None:
45 |             raise ValueError('The channel dimension of the inputs '
46 |                              'should be defined. Found `None`.')
47 |         input_dim = int(input_shape[channel_axis])
48 |         kernel_shape = self.kernel_size + (input_dim, self.filters)
49 | 
50 |         self.kernel = self.add_weight(
51 |             name='kernel',
52 |             shape=kernel_shape,
53 |             initializer=self.kernel_initializer,
54 |             regularizer=self.kernel_regularizer,
55 |             constraint=self.kernel_constraint,
56 |             trainable=True,
57 |             dtype=self.dtype)
58 |         if self.use_bias:
59 |             self.bias = self.add_weight(
60 |                 name='bias',
61 |                 shape=(self.filters,),
62 |                 initializer=self.bias_initializer,
63 |                 regularizer=self.bias_regularizer,
64 |                 constraint=self.bias_constraint,
65 |                 trainable=True,
66 |                 dtype=self.dtype)
67 |         else:
68 |             self.bias = None
69 | 
70 |         mean, variance = tf.nn.moments(self.kernel.value(), [0, 1, 2], keepdims=True)
71 |         self.kernel.assign_sub(mean)
72 |         self.kernel.assign(self.kernel.value() / (tf.sqrt(variance) + 1e-5))
73 | 
74 |         self.input_spec = tf.keras.backend.InputSpec(ndim=self.rank + 2,
75 |                                                      axes={channel_axis: input_dim})
76 |         self._convolution_op = tf.nn.Convolution(input_shape,
77 |                                                  filter_shape=self.kernel.shape,
78 |                                                  dilation_rate=self.dilation_rate,
79 |                                                  strides=self.strides,
80 |                                                  padding=self._get_padding_op(),
81 |                                                  data_format=conv_utils.convert_data_format(self.data_format,
82 |                                                                                             self.rank + 2))
83 |         self.built = True
84 | 
85 |     def call(self, inputs):
86 |         return super(WSConv2D, self).call(inputs)
87 | 
88 |     def compute_output_shape(self, input_shape):
89 |         return super(WSConv2D, self).compute_output_shape(input_shape)
90 | 


--------------------------------------------------------------------------------
/core/bbox/overlaps.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | def _get_v(b1_height, b1_width, b2_height, b2_width):
  6 |     """Get the consistency measurement of aspect ratio for ciou."""
  7 | 
  8 |     @tf.custom_gradient
  9 |     def _get_grad_v(height, width):
 10 |         """backpropogate gradient."""
 11 |         arctan = (tf.atan(tf.math.divide_no_nan(b1_width, b1_height)) - 
 12 |                   tf.atan(tf.math.divide_no_nan(width, height)))
 13 |         v = 4 * ((arctan / math.pi) ** 2)
 14 | 
 15 |         def _grad_v(dv):
 16 |             """Grad for eager mode."""
 17 |             gdw = dv * 8 * arctan * height / (math.pi**2)
 18 |             gdh = -dv * 8 * arctan * width / (math.pi**2)
 19 |             return [gdh, gdw]
 20 | 
 21 |         # def _grad_v_graph(dv, variables):
 22 |         #     """Grad for graph mode."""
 23 |         #     gdw = dv * 8 * arctan * height / (math.pi ** 2)
 24 |         #     gdh = -dv * 8 * arctan * width / (math.pi ** 2)
 25 | 
 26 |         #     return [gdh, gdw], tf.gradients(v, variables, grad_ys=dv)
 27 |       
 28 |         return v, _grad_v
 29 | 
 30 |     return _get_grad_v(b2_height, b2_width)
 31 | 
 32 | 
 33 | def compute_iou(target_boxes, predicted_boxes, iou_type="iou"):
 34 |     """Computing the IoU for aligned boxes.
 35 |     
 36 |     Args:
 37 |         predicted_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max].
 38 |         target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max].
 39 |         iou_type: one of ['iou', 'ciou', 'diou', 'giou'].
 40 |     Returns:
 41 |         IoU loss float `Tensor`.
 42 |     """
 43 |     iou_type = iou_type.lower()
 44 |     assert iou_type in ["iou", "ciou", "diou", "giou"]
 45 |     t_y1, t_x1, t_y2, t_x2 = tf.unstack(target_boxes, num=4, axis=-1)
 46 |     p_y1, p_x1, p_y2, p_x2 = tf.unstack(predicted_boxes, num=4, axis=-1)
 47 | 
 48 |     zeros = tf.zeros_like(t_y1)
 49 |     p_width = tf.maximum(zeros, p_x2 - p_x1)
 50 |     p_height = tf.maximum(zeros, p_y2 - p_y1)
 51 |     t_width = tf.maximum(zeros, t_x2 - t_x1)
 52 |     t_height = tf.maximum(zeros, t_y2 - t_y1)
 53 |     p_area = p_width * p_height
 54 |     t_area = t_width * t_height
 55 | 
 56 |     # intersection
 57 |     i_x1 = tf.maximum(t_x1, p_x1)
 58 |     i_y1 = tf.maximum(t_y1, p_y1)
 59 |     i_x2 = tf.minimum(t_x2, p_x2)
 60 |     i_y2 = tf.minimum(t_y2, p_y2)
 61 |     i_width = tf.maximum(zeros, i_x2 - i_x1)
 62 |     i_height = tf.maximum(zeros, i_y2 - i_y1)
 63 |     i_area = i_width * i_height
 64 | 
 65 |     # union
 66 |     u_area = p_area + t_area - i_area
 67 |     iou_v = tf.math.divide_no_nan(i_area, u_area)
 68 |     if iou_type == "iou":
 69 |         return iou_v
 70 |     
 71 |     # enclose 
 72 |     e_y1 = tf.minimum(p_y1, t_y1)
 73 |     e_x1 = tf.minimum(p_x1, t_x1)
 74 |     e_y2 = tf.maximum(p_y2, t_y2)
 75 |     e_x2 = tf.maximum(p_x2, t_x2)
 76 | 
 77 |     assert iou_type in ["diou", "ciou", "giou"]
 78 |     if iou_type == "giou":
 79 |         e_width = e_x2 - e_x1
 80 |         e_height = e_y2 - e_y1
 81 |         e_area = e_width * e_height
 82 |         giou_v = iou_v - tf.math.divide_no_nan(e_area - iou_v, e_area)
 83 | 
 84 |         return giou_v
 85 |     
 86 |     assert iou_type in ["diou", "ciou"]
 87 |     # box center
 88 |     p_center = tf.stack([(p_y1 + p_y2) / 2, (p_x1 + p_x2) / 2], axis=-1)
 89 |     t_center = tf.stack([(t_y1 + t_y2) / 2, (t_x1 + t_x2) / 2], axis=-1)
 90 | 
 91 |     center_dist = tf.linalg.norm(p_center - t_center, axis=-1) ** 2
 92 |     diag_dist = tf.linalg.norm(tf.stack([e_y2 - e_y1, e_x2 - e_x1], -1), axis=-1) ** 2
 93 |     diou_v = iou_v - tf.math.divide_no_nan(center_dist, diag_dist)
 94 | 
 95 |     if iou_type == "diou":
 96 |         return diou_v
 97 |     
 98 |     assert iou_type == "ciou"
 99 | 
100 |     v = _get_v(p_height, p_width, t_height, t_width)
101 |     alpha = tf.math.divide_no_nan(v, (1 - iou_v) + v)
102 | 
103 |     return diou_v - alpha * v
104 | 
105 | 
106 | def compute_unaligned_iou(target_boxes, predicted_boxes, iou_type="iou"):
107 |     """Computing the IoU for unaligned boxes.
108 |     
109 |     Args:
110 |         predicted_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max].
111 |         target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max].
112 |         iou_type: one of ['iou', 'ciou', 'diou', 'giou'].
113 |     Returns:
114 |         IoU loss float `Tensor`.
115 |     """
116 | 
117 |     predicted_boxes = tf.expand_dims(predicted_boxes, 1)
118 |     target_boxes = tf.expand_dims(target_boxes, 0)
119 |     
120 |     return compute_iou(target_boxes, predicted_boxes)
121 | 


--------------------------------------------------------------------------------