├── .gitignore
├── LICENSE
├── README.md
├── configs
    ├── _base_
    │   ├── datasets
    │   │   ├── duo_detection.py
    │   │   └── duo_detection_mmyolo.py
    │   ├── default_runtime.py
    │   └── default_runtime_mmyolo.py
    ├── cascade_rcnn
    │   ├── cascade_rcnn_r50_1x_duo.py
    │   └── unitmodule_cascade_rcnn_r50_1x_duo.py
    ├── detr
    │   ├── detr_r50_500e_duo.py
    │   └── unitmodule_detr_r50_500e_duo.py
    ├── dino
    │   ├── dino_4scale_r50_1x_duo.py
    │   └── unitmodule_dino_4scale_r50_1x_duo.py
    ├── faster_rcnn
    │   ├── faster_rcnn_r50_1x_duo.py
    │   └── unitmodule_faster_rcnn_r50_1x_duo.py
    ├── fcos
    │   ├── fcos_r50_1x_duo.py
    │   └── unitmodule_fcos_r50_1x_duo.py
    ├── retinanet
    │   ├── retinanet_r50_1x_duo.py
    │   └── unitmodule_retinanet_r50_1x_duo.py
    ├── rtmdet
    │   ├── rtmdet_s_100e_duo.py
    │   └── unitmodule_rtmdet_s_100e_duo.py
    ├── tood
    │   ├── tood_r50_1x_duo.py
    │   └── unitmodule_tood_r50_1x_duo.py
    ├── unitmodule
    │   └── unitmodule.py
    ├── yolov5
    │   ├── unitmodule_yolov5_s_100e_duo.py
    │   └── yolov5_s_100e_duo.py
    ├── yolov6
    │   ├── unitmodule_yolov6_s_100e_duo.py
    │   └── yolov6_s_100e_duo.py
    ├── yolov7
    │   ├── unitmodule_yolov7_t_100e_duo.py
    │   └── yolov7_t_100e_duo.py
    ├── yolov8
    │   ├── unitmodule_yolov8_s_100e_duo.py
    │   └── yolov8_s_100e_duo.py
    └── yolox
    │   ├── unitmodule_yolox_s_100e_duo.py
    │   └── yolox_s_100e_duo.py
├── requirements.txt
├── tools
    ├── dist_test.sh
    ├── dist_train.sh
    ├── test.py
    └── train.py
└── unitmodule
    ├── __init__.py
    ├── datasets
        ├── __init__.py
        └── transforms
        │   ├── __init__.py
        │   └── colorspace.py
    └── models
        ├── __init__.py
        ├── data_preprocessors
            ├── __init__.py
            ├── data_preprocessor.py
            └── unit_module.py
        ├── detectors
            ├── __init__.py
            ├── unit_detectors.py
            └── unit_distributed.py
        └── losses
            ├── __init__.py
            ├── assisting_color_cast_loss.py
            ├── color_cast_loss.py
            ├── saturated_pixel_loss.py
            ├── total_variation_loss.py
            └── transmission_loss.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # The repo
  2 | .idea
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | pip-wheel-metadata/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # IPython
 84 | profile_default/
 85 | ipython_config.py
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 LEFTeyex
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # <center> UnitModule
 2 | 
 3 | ### Installation
 4 | 
 5 | This project is based on [MMDetection](https://github.com/open-mmlab/mmdetection/tree/main).
 6 | 
 7 | - Python 3.8
 8 | - Pytorch 1.11.0+cu113
 9 | 
10 | **Step 1.** Create a conda virtual environment and activate it.
11 | 
12 | ```bash
13 | conda create -n unitmodule python=3.8 -y
14 | conda activate unitmodule
15 | ```
16 | 
17 | **Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/).
18 | 
19 | Linux and Windows
20 | 
21 | ```bash
22 | # Wheel CUDA 11.3
23 | pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
24 | ```
25 | 
26 | ```bash
27 | # Conda CUDA 11.3
28 | conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch
29 | ```
30 | 
31 | **Step 3.** Install MMDetection and dependent packages.
32 | 
33 | ```bash
34 | pip install -U openmim
35 | mim install mmengine==0.7.4
36 | mim install mmcv==2.0.0
37 | mim install mmdet==3.0.0
38 | mim install mmyolo==0.5.0
39 | pip install -r requirements.txt
40 | ```
41 | 
42 | ### Dataset
43 | 
44 | The data structure DUO looks like below:
45 | 
46 | ```text
47 | # DUO
48 | 
49 | data
50 | ├── DUO
51 | │   ├── annotations
52 | │   │   ├── instances_train.json
53 | │   │   ├── instances_test.json
54 | │   ├── images
55 | │   │   ├── train
56 | │   │   ├── test
57 | ```
58 | 
59 | ### Training
60 | 
61 | ```bash
62 | bash tools/dist_train.sh configs/yolox/yolox_s_100e_duo.py 2
63 | ```
64 | 
65 | ### Test
66 | 
67 | ```bash
68 | bash tools/dist_test.sh configs/yolox/yolox_s_100e_duo.py yolox_s_100e_duo.pth 2
69 | ```


--------------------------------------------------------------------------------
/configs/_base_/datasets/duo_detection.py:
--------------------------------------------------------------------------------
 1 | data_root = 'data/DUO/'
 2 | 
 3 | train_img_file = 'images/train'
 4 | val_img_file = 'images/test'
 5 | train_ann_file = 'annotations/instances_train.json'
 6 | val_ann_file = 'annotations/instances_test.json'
 7 | 
 8 | mean_bgr = [85.603, 148.034, 64.697]
 9 | std_bgr = [32.28, 39.201, 26.55]
10 | mean_rgb = [64.697, 148.034, 85.603]
11 | std_rgb = [26.55, 39.201, 32.28]
12 | 
13 | classes = ('holothurian', 'echinus', 'scallop', 'starfish')
14 | 
15 | img_scale = (1333, 800)
16 | dataset_type = 'CocoDataset'
17 | evaluator_type = 'CocoMetric'
18 | train_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='LoadAnnotations', with_bbox=True),
21 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
22 |     dict(type='RandomFlip', prob=0.5),
23 |     dict(type='PackDetInputs')
24 | ]
25 | test_pipeline = [
26 |     dict(type='LoadImageFromFile'),
27 |     dict(type='Resize', scale=img_scale, keep_ratio=True),
28 |     dict(type='LoadAnnotations', with_bbox=True),
29 |     dict(
30 |         type='PackDetInputs',
31 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
32 |                    'scale_factor'))
33 | ]
34 | 
35 | num_gpu = 2
36 | train_bs = 4
37 | val_bs = 1
38 | auto_scale_lr = dict(enable=False, base_batch_size=train_bs * num_gpu)
39 | train_dataloader = dict(
40 |     batch_size=train_bs,
41 |     num_workers=train_bs,
42 |     persistent_workers=True,
43 |     sampler=dict(type='DefaultSampler', shuffle=True),
44 |     batch_sampler=dict(type='AspectRatioBatchSampler'),
45 |     dataset=dict(
46 |         type=dataset_type,
47 |         metainfo=dict(classes=classes),
48 |         data_root=data_root,
49 |         ann_file=train_ann_file,
50 |         data_prefix=dict(img=train_img_file),
51 |         filter_cfg=dict(filter_empty_gt=True, min_size=32),
52 |         pipeline=train_pipeline,
53 |     ))
54 | 
55 | val_dataloader = dict(
56 |     batch_size=val_bs,
57 |     num_workers=val_bs * 2,
58 |     persistent_workers=True,
59 |     drop_last=False,
60 |     sampler=dict(type='DefaultSampler', shuffle=False),
61 |     dataset=dict(
62 |         type=dataset_type,
63 |         metainfo=dict(classes=classes),
64 |         data_root=data_root,
65 |         ann_file=val_ann_file,
66 |         data_prefix=dict(img=val_img_file),
67 |         test_mode=True,
68 |         pipeline=test_pipeline,
69 |     ))
70 | 
71 | test_dataloader = val_dataloader
72 | 
73 | val_evaluator = dict(
74 |     type=evaluator_type,
75 |     ann_file=data_root + val_ann_file,
76 |     metric='bbox',
77 |     format_only=False)
78 | test_evaluator = val_evaluator
79 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/duo_detection_mmyolo.py:
--------------------------------------------------------------------------------
 1 | data_root = 'data/DUO/'
 2 | 
 3 | train_img_file = 'images/train'
 4 | val_img_file = 'images/test'
 5 | train_ann_file = 'annotations/instances_train.json'
 6 | val_ann_file = 'annotations/instances_test.json'
 7 | 
 8 | mean_bgr = [85.603, 148.034, 64.697]
 9 | std_bgr = [32.28, 39.201, 26.55]
10 | mean_rgb = [64.697, 148.034, 85.603]
11 | std_rgb = [26.55, 39.201, 32.28]
12 | 
13 | classes = ('holothurian', 'echinus', 'scallop', 'starfish')
14 | 
15 | img_scale = (640, 640)
16 | dataset_type = 'YOLOv5CocoDataset'
17 | evaluator_type = 'mmdet.CocoMetric'
18 | train_pipeline = [
19 |     dict(type='LoadImageFromFile'),
20 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
21 |     dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
22 |     dict(type='mmdet.Pad',
23 |          pad_to_square=True,
24 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
25 |     dict(type='mmdet.RandomFlip', prob=0.5),
26 |     dict(type='mmdet.PackDetInputs')
27 | ]
28 | test_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
31 |     dict(type='mmdet.Pad',
32 |          pad_to_square=True,
33 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
34 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
35 |     dict(
36 |         type='mmdet.PackDetInputs',
37 |         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
38 |                    'scale_factor'))
39 | ]
40 | 
41 | num_gpu = 2
42 | train_bs = 4
43 | val_bs = 1
44 | auto_scale_lr = dict(enable=False, base_batch_size=train_bs * num_gpu)
45 | train_dataloader = dict(
46 |     batch_size=train_bs,
47 |     num_workers=train_bs,
48 |     persistent_workers=True,
49 |     collate_fn=dict(type='yolov5_collate'),
50 |     sampler=dict(type='DefaultSampler', shuffle=True),
51 |     batch_sampler=dict(type='mmdet.AspectRatioBatchSampler'),
52 |     dataset=dict(
53 |         type=dataset_type,
54 |         metainfo=dict(classes=classes),
55 |         data_root=data_root,
56 |         ann_file=train_ann_file,
57 |         data_prefix=dict(img=train_img_file),
58 |         filter_cfg=dict(filter_empty_gt=True, min_size=32),
59 |         pipeline=train_pipeline,
60 |     ))
61 | 
62 | val_dataloader = dict(
63 |     batch_size=val_bs,
64 |     num_workers=val_bs * 2,
65 |     persistent_workers=True,
66 |     drop_last=False,
67 |     sampler=dict(type='DefaultSampler', shuffle=False),
68 |     dataset=dict(
69 |         type=dataset_type,
70 |         metainfo=dict(classes=classes),
71 |         data_root=data_root,
72 |         ann_file=val_ann_file,
73 |         data_prefix=dict(img=val_img_file),
74 |         test_mode=True,
75 |         pipeline=test_pipeline,
76 |     ))
77 | 
78 | test_dataloader = val_dataloader
79 | 
80 | val_evaluator = dict(
81 |     type=evaluator_type,
82 |     ann_file=data_root + val_ann_file,
83 |     metric='bbox',
84 |     format_only=False)
85 | test_evaluator = val_evaluator
86 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | default_scope = 'mmdet'
 2 | log_level = 'INFO'
 3 | load_from = None
 4 | resume = False
 5 | 
 6 | env_cfg = dict(
 7 |     cudnn_benchmark=False,
 8 |     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
 9 |     dist_cfg=dict(backend='nccl'),
10 | )
11 | randomness = dict(seed=None)
12 | 
13 | vis_backends = [
14 |     dict(type='LocalVisBackend'),
15 |     dict(type='TensorboardVisBackend')
16 | ]
17 | visualizer = dict(
18 |     type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
19 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
20 | default_hooks = dict(
21 |     timer=dict(type='IterTimerHook'),
22 |     logger=dict(type='LoggerHook', interval=50),
23 |     param_scheduler=dict(type='ParamSchedulerHook'),
24 |     checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='coco/bbox_mAP'),
25 |     sampler_seed=dict(type='DistSamplerSeedHook'),
26 |     visualization=dict(type='DetVisualizationHook'))
27 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime_mmyolo.py:
--------------------------------------------------------------------------------
 1 | default_scope = 'mmyolo'
 2 | log_level = 'INFO'
 3 | load_from = None
 4 | resume = False
 5 | 
 6 | env_cfg = dict(
 7 |     cudnn_benchmark=False,
 8 |     mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
 9 |     dist_cfg=dict(backend='nccl'),
10 | )
11 | randomness = dict(seed=None)
12 | 
13 | vis_backends = [
14 |     dict(type='LocalVisBackend'),
15 |     dict(type='TensorboardVisBackend')
16 | ]
17 | visualizer = dict(
18 |     type='mmdet.DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
19 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
20 | default_hooks = dict(
21 |     timer=dict(type='IterTimerHook'),
22 |     logger=dict(type='LoggerHook', interval=50),
23 |     param_scheduler=dict(type='ParamSchedulerHook'),
24 |     checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='coco/bbox_mAP'),
25 |     sampler_seed=dict(type='DistSamplerSeedHook'),
26 |     visualization=dict(type='mmdet.DetVisualizationHook'))
27 | 


--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection.py',
  3 |     '../_base_/default_runtime.py',
  4 | ]
  5 | max_epochs = 12
  6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
  7 | val_cfg = dict(type='ValLoop')
  8 | test_cfg = dict(type='TestLoop')
  9 | 
 10 | param_scheduler = [
 11 |     dict(
 12 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
 13 |     dict(
 14 |         type='MultiStepLR',
 15 |         begin=0,
 16 |         milestones=[8, 11],
 17 |         gamma=0.1)
 18 | ]
 19 | optim_wrapper = dict(
 20 |     type='OptimWrapper',
 21 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
 22 | 
 23 | num_classes = 4
 24 | model = dict(
 25 |     type='CascadeRCNN',
 26 |     data_preprocessor=dict(
 27 |         type='DetDataPreprocessor',
 28 |         mean=_base_.mean_rgb,
 29 |         std=_base_.std_rgb,
 30 |         bgr_to_rgb=True,
 31 |         pad_size_divisor=32),
 32 |     backbone=dict(
 33 |         type='ResNet',
 34 |         depth=50,
 35 |         num_stages=4,
 36 |         out_indices=(0, 1, 2, 3),
 37 |         frozen_stages=1,
 38 |         norm_cfg=dict(type='BN', requires_grad=True),
 39 |         norm_eval=True,
 40 |         style='pytorch',
 41 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 42 |     neck=dict(
 43 |         type='FPN',
 44 |         in_channels=[256, 512, 1024, 2048],
 45 |         out_channels=256,
 46 |         num_outs=5),
 47 |     rpn_head=dict(
 48 |         type='RPNHead',
 49 |         in_channels=256,
 50 |         feat_channels=256,
 51 |         anchor_generator=dict(
 52 |             type='AnchorGenerator',
 53 |             scales=[8],
 54 |             ratios=[0.5, 1.0, 2.0],
 55 |             strides=[4, 8, 16, 32, 64]),
 56 |         bbox_coder=dict(
 57 |             type='DeltaXYWHBBoxCoder',
 58 |             target_means=[0.0, 0.0, 0.0, 0.0],
 59 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 60 |         loss_cls=dict(
 61 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 62 |         loss_bbox=dict(
 63 |             type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
 64 |     roi_head=dict(
 65 |         type='CascadeRoIHead',
 66 |         num_stages=3,
 67 |         stage_loss_weights=[1, 0.5, 0.25],
 68 |         bbox_roi_extractor=dict(
 69 |             type='SingleRoIExtractor',
 70 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 71 |             out_channels=256,
 72 |             featmap_strides=[4, 8, 16, 32]),
 73 |         bbox_head=[
 74 |             dict(
 75 |                 type='Shared2FCBBoxHead',
 76 |                 in_channels=256,
 77 |                 fc_out_channels=1024,
 78 |                 roi_feat_size=7,
 79 |                 num_classes=num_classes,
 80 |                 bbox_coder=dict(
 81 |                     type='DeltaXYWHBBoxCoder',
 82 |                     target_means=[0.0, 0.0, 0.0, 0.0],
 83 |                     target_stds=[0.1, 0.1, 0.2, 0.2]),
 84 |                 reg_class_agnostic=True,
 85 |                 loss_cls=dict(
 86 |                     type='CrossEntropyLoss',
 87 |                     use_sigmoid=False,
 88 |                     loss_weight=1.0),
 89 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
 90 |                                loss_weight=1.0)),
 91 |             dict(
 92 |                 type='Shared2FCBBoxHead',
 93 |                 in_channels=256,
 94 |                 fc_out_channels=1024,
 95 |                 roi_feat_size=7,
 96 |                 num_classes=num_classes,
 97 |                 bbox_coder=dict(
 98 |                     type='DeltaXYWHBBoxCoder',
 99 |                     target_means=[0.0, 0.0, 0.0, 0.0],
100 |                     target_stds=[0.05, 0.05, 0.1, 0.1]),
101 |                 reg_class_agnostic=True,
102 |                 loss_cls=dict(
103 |                     type='CrossEntropyLoss',
104 |                     use_sigmoid=False,
105 |                     loss_weight=1.0),
106 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
107 |                                loss_weight=1.0)),
108 |             dict(
109 |                 type='Shared2FCBBoxHead',
110 |                 in_channels=256,
111 |                 fc_out_channels=1024,
112 |                 roi_feat_size=7,
113 |                 num_classes=num_classes,
114 |                 bbox_coder=dict(
115 |                     type='DeltaXYWHBBoxCoder',
116 |                     target_means=[0.0, 0.0, 0.0, 0.0],
117 |                     target_stds=[0.033, 0.033, 0.067, 0.067]),
118 |                 reg_class_agnostic=True,
119 |                 loss_cls=dict(
120 |                     type='CrossEntropyLoss',
121 |                     use_sigmoid=False,
122 |                     loss_weight=1.0),
123 |                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
124 |         ]),
125 |     train_cfg=dict(
126 |         rpn=dict(
127 |             assigner=dict(
128 |                 type='MaxIoUAssigner',
129 |                 pos_iou_thr=0.7,
130 |                 neg_iou_thr=0.3,
131 |                 min_pos_iou=0.3,
132 |                 match_low_quality=True,
133 |                 ignore_iof_thr=-1),
134 |             sampler=dict(
135 |                 type='RandomSampler',
136 |                 num=256,
137 |                 pos_fraction=0.5,
138 |                 neg_pos_ub=-1,
139 |                 add_gt_as_proposals=False),
140 |             allowed_border=0,
141 |             pos_weight=-1,
142 |             debug=False),
143 |         rpn_proposal=dict(
144 |             nms_pre=2000,
145 |             max_per_img=2000,
146 |             nms=dict(type='nms', iou_threshold=0.7),
147 |             min_bbox_size=0),
148 |         rcnn=[
149 |             dict(
150 |                 assigner=dict(
151 |                     type='MaxIoUAssigner',
152 |                     pos_iou_thr=0.5,
153 |                     neg_iou_thr=0.5,
154 |                     min_pos_iou=0.5,
155 |                     match_low_quality=False,
156 |                     ignore_iof_thr=-1),
157 |                 sampler=dict(
158 |                     type='RandomSampler',
159 |                     num=512,
160 |                     pos_fraction=0.25,
161 |                     neg_pos_ub=-1,
162 |                     add_gt_as_proposals=True),
163 |                 pos_weight=-1,
164 |                 debug=False),
165 |             dict(
166 |                 assigner=dict(
167 |                     type='MaxIoUAssigner',
168 |                     pos_iou_thr=0.6,
169 |                     neg_iou_thr=0.6,
170 |                     min_pos_iou=0.6,
171 |                     match_low_quality=False,
172 |                     ignore_iof_thr=-1),
173 |                 sampler=dict(
174 |                     type='RandomSampler',
175 |                     num=512,
176 |                     pos_fraction=0.25,
177 |                     neg_pos_ub=-1,
178 |                     add_gt_as_proposals=True),
179 |                 pos_weight=-1,
180 |                 debug=False),
181 |             dict(
182 |                 assigner=dict(
183 |                     type='MaxIoUAssigner',
184 |                     pos_iou_thr=0.7,
185 |                     neg_iou_thr=0.7,
186 |                     min_pos_iou=0.7,
187 |                     match_low_quality=False,
188 |                     ignore_iof_thr=-1),
189 |                 sampler=dict(
190 |                     type='RandomSampler',
191 |                     num=512,
192 |                     pos_fraction=0.25,
193 |                     neg_pos_ub=-1,
194 |                     add_gt_as_proposals=True),
195 |                 pos_weight=-1,
196 |                 debug=False)
197 |         ]),
198 |     test_cfg=dict(
199 |         rpn=dict(
200 |             nms_pre=1000,
201 |             max_per_img=1000,
202 |             nms=dict(type='nms', iou_threshold=0.7),
203 |             min_bbox_size=0),
204 |         rcnn=dict(
205 |             score_thr=0.05,
206 |             nms=dict(type='nms', iou_threshold=0.5),
207 |             max_per_img=100)))
208 | 


--------------------------------------------------------------------------------
/configs/cascade_rcnn/unitmodule_cascade_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './cascade_rcnn_r50_1x_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitCascadeRCNN',
 8 |     data_preprocessor=dict(
 9 |         type='UnitDetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='LoadAnnotations', with_bbox=True),
18 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='RandomFlip', prob=0.5),
21 |     dict(type='PackDetInputs')
22 | ]
23 | 
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 | 


--------------------------------------------------------------------------------
/configs/detr/detr_r50_500e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection.py',
  3 |     '../_base_/default_runtime.py',
  4 | ]
  5 | max_epochs = 500
  6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
  7 | val_cfg = dict(type='ValLoop')
  8 | test_cfg = dict(type='TestLoop')
  9 | 
 10 | param_scheduler = [
 11 |     dict(
 12 |         type='MultiStepLR',
 13 |         begin=0,
 14 |         end=max_epochs,
 15 |         by_epoch=True,
 16 |         milestones=[334],
 17 |         gamma=0.1)
 18 | ]
 19 | optim_wrapper = dict(
 20 |     type='OptimWrapper',
 21 |     optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
 22 |     clip_grad=dict(max_norm=0.1, norm_type=2),
 23 |     paramwise_cfg=dict(
 24 |         custom_keys=dict(backbone=dict(lr_mult=0.1, decay_mult=1.0))))
 25 | 
 26 | num_classes = 4
 27 | model = dict(
 28 |     type='DETR',
 29 |     num_queries=100,
 30 |     data_preprocessor=dict(
 31 |         type='DetDataPreprocessor',
 32 |         mean=_base_.mean_rgb,
 33 |         std=_base_.std_rgb,
 34 |         bgr_to_rgb=True,
 35 |         pad_size_divisor=32),
 36 |     backbone=dict(
 37 |         type='ResNet',
 38 |         depth=50,
 39 |         num_stages=4,
 40 |         out_indices=(3,),
 41 |         frozen_stages=1,
 42 |         norm_cfg=dict(type='BN', requires_grad=False),
 43 |         norm_eval=True,
 44 |         style='pytorch',
 45 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 46 |     neck=dict(
 47 |         type='ChannelMapper',
 48 |         in_channels=[2048],
 49 |         kernel_size=1,
 50 |         out_channels=256,
 51 |         act_cfg=None,
 52 |         norm_cfg=None,
 53 |         num_outs=1),
 54 |     encoder=dict(
 55 |         num_layers=6,
 56 |         layer_cfg=dict(
 57 |             self_attn_cfg=dict(
 58 |                 embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
 59 |             ffn_cfg=dict(
 60 |                 embed_dims=256,
 61 |                 feedforward_channels=2048,
 62 |                 num_fcs=2,
 63 |                 ffn_drop=0.1,
 64 |                 act_cfg=dict(type='ReLU', inplace=True)))),
 65 |     decoder=dict(
 66 |         num_layers=6,
 67 |         layer_cfg=dict(
 68 |             self_attn_cfg=dict(
 69 |                 embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
 70 |             cross_attn_cfg=dict(
 71 |                 embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
 72 |             ffn_cfg=dict(
 73 |                 embed_dims=256,
 74 |                 feedforward_channels=2048,
 75 |                 num_fcs=2,
 76 |                 ffn_drop=0.1,
 77 |                 act_cfg=dict(type='ReLU', inplace=True))),
 78 |         return_intermediate=True),
 79 |     positional_encoding=dict(num_feats=128, normalize=True),
 80 |     bbox_head=dict(
 81 |         type='DETRHead',
 82 |         num_classes=num_classes,
 83 |         embed_dims=256,
 84 |         loss_cls=dict(
 85 |             type='CrossEntropyLoss',
 86 |             bg_cls_weight=0.1,
 87 |             use_sigmoid=False,
 88 |             loss_weight=1.0,
 89 |             class_weight=1.0),
 90 |         loss_bbox=dict(type='L1Loss', loss_weight=5.0),
 91 |         loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
 92 |     train_cfg=dict(
 93 |         assigner=dict(
 94 |             type='HungarianAssigner',
 95 |             match_costs=[
 96 |                 dict(type='ClassificationCost', weight=1.0),
 97 |                 dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
 98 |                 dict(type='IoUCost', iou_mode='giou', weight=2.0)
 99 |             ])),
100 |     test_cfg=dict(max_per_img=100))
101 | 


--------------------------------------------------------------------------------
/configs/detr/unitmodule_detr_r50_500e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './detr_r50_500e_duo.py',
 3 | ]
 4 | 
 5 | with_unit_module = True
 6 | norm_cfg = dict(type='GN', num_groups=8)
 7 | act_cfg = dict(type='ReLU')
 8 | 
 9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 | 
12 | unit_module = dict(
13 |     type='UnitModule',
14 |     unit_backbone=dict(
15 |         type='UnitBackbone',
16 |         stem_channels=(c_s1, c_s2),
17 |         large_kernels=(k_1, k_2),
18 |         small_kernels=(3, 3),
19 |         dw_ratio=1.0,
20 |         norm_cfg=norm_cfg,
21 |         act_cfg=act_cfg),
22 |     t_head=dict(
23 |         type='THead',
24 |         in_channels=c_s2,
25 |         hid_channels=c_s2,
26 |         out_channels=3,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=act_cfg),
29 |     a_head=dict(type='AHead'),
30 |     loss_t=dict(type='TransmissionLoss', loss_weight=1000),
31 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01),
32 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 |     alpha=0.9,
36 |     t_min=0.001)
37 | 
38 | model = dict(
39 |     type='UnitDETR',
40 |     data_preprocessor=dict(
41 |         type='UnitDetDataPreprocessor',
42 |         unit_module=unit_module)
43 | )
44 | 
45 | optim_wrapper = dict(clip_grad=dict(max_norm=0.1, norm_type=2))
46 | 
47 | train_pipeline = [
48 |     dict(type='LoadImageFromFile'),
49 |     dict(type='LoadAnnotations', with_bbox=True),
50 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 |     dict(type='RandomFlip', prob=0.5),
53 |     dict(type='PackDetInputs')
54 | ]
55 | 
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 | 


--------------------------------------------------------------------------------
/configs/dino/dino_4scale_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/duo_detection.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | max_epochs = 12
 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
 7 | val_cfg = dict(type='ValLoop')
 8 | test_cfg = dict(type='TestLoop')
 9 | 
10 | param_scheduler = [
11 |     dict(
12 |         type='MultiStepLR',
13 |         begin=0,
14 |         end=max_epochs,
15 |         by_epoch=True,
16 |         milestones=[11],
17 |         gamma=0.1)
18 | ]
19 | optim_wrapper = dict(
20 |     type='OptimWrapper',
21 |     optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
22 |     clip_grad=dict(max_norm=0.1, norm_type=2),
23 |     paramwise_cfg=dict(custom_keys=dict(backbone=dict(lr_mult=0.1))))
24 | 
25 | num_classes = 4
26 | model = dict(
27 |     type='DINO',
28 |     num_queries=900,
29 |     with_box_refine=True,
30 |     as_two_stage=True,
31 |     data_preprocessor=dict(
32 |         type='DetDataPreprocessor',
33 |         mean=_base_.mean_rgb,
34 |         std=_base_.std_rgb,
35 |         bgr_to_rgb=True,
36 |         pad_size_divisor=32),
37 |     backbone=dict(
38 |         type='ResNet',
39 |         depth=50,
40 |         num_stages=4,
41 |         out_indices=(1, 2, 3),
42 |         frozen_stages=1,
43 |         norm_cfg=dict(type='BN', requires_grad=False),
44 |         norm_eval=True,
45 |         style='pytorch',
46 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
47 |     neck=dict(
48 |         type='ChannelMapper',
49 |         in_channels=[512, 1024, 2048],
50 |         kernel_size=1,
51 |         out_channels=256,
52 |         act_cfg=None,
53 |         norm_cfg=dict(type='GN', num_groups=32),
54 |         num_outs=4),
55 |     encoder=dict(
56 |         num_layers=6,
57 |         layer_cfg=dict(
58 |             self_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0),
59 |             ffn_cfg=dict(
60 |                 embed_dims=256, feedforward_channels=2048, ffn_drop=0.0))),
61 |     decoder=dict(
62 |         num_layers=6,
63 |         return_intermediate=True,
64 |         layer_cfg=dict(
65 |             self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
66 |             cross_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0),
67 |             ffn_cfg=dict(
68 |                 embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)),
69 |         post_norm_cfg=None),
70 |     positional_encoding=dict(
71 |         num_feats=128, normalize=True, offset=0.0, temperature=20),
72 |     bbox_head=dict(
73 |         type='DINOHead',
74 |         num_classes=num_classes,
75 |         sync_cls_avg_factor=True,
76 |         loss_cls=dict(
77 |             type='FocalLoss',
78 |             use_sigmoid=True,
79 |             gamma=2.0,
80 |             alpha=0.25,
81 |             loss_weight=1.0),
82 |         loss_bbox=dict(type='L1Loss', loss_weight=5.0),
83 |         loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
84 |     dn_cfg=dict(
85 |         label_noise_scale=0.5,
86 |         box_noise_scale=1.0,
87 |         group_cfg=dict(dynamic=True, num_groups=None, num_dn_queries=100)),
88 |     train_cfg=dict(
89 |         assigner=dict(
90 |             type='HungarianAssigner',
91 |             match_costs=[
92 |                 dict(type='FocalLossCost', weight=2.0),
93 |                 dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
94 |                 dict(type='IoUCost', iou_mode='giou', weight=2.0)
95 |             ])),
96 |     test_cfg=dict(max_per_img=300))
97 | 


--------------------------------------------------------------------------------
/configs/dino/unitmodule_dino_4scale_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './dino_4scale_r50_1x_duo.py',
 3 | ]
 4 | 
 5 | with_unit_module = True
 6 | norm_cfg = dict(type='GN', num_groups=8)
 7 | act_cfg = dict(type='ReLU')
 8 | 
 9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 | 
12 | unit_module = dict(
13 |     type='UnitModule',
14 |     unit_backbone=dict(
15 |         type='UnitBackbone',
16 |         stem_channels=(c_s1, c_s2),
17 |         large_kernels=(k_1, k_2),
18 |         small_kernels=(3, 3),
19 |         dw_ratio=1.0,
20 |         norm_cfg=norm_cfg,
21 |         act_cfg=act_cfg),
22 |     t_head=dict(
23 |         type='THead',
24 |         in_channels=c_s2,
25 |         hid_channels=c_s2,
26 |         out_channels=3,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=act_cfg),
29 |     a_head=dict(type='AHead'),
30 |     loss_t=dict(type='TransmissionLoss', loss_weight=1000),
31 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01),
32 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 |     alpha=0.9,
36 |     t_min=0.001)
37 | 
38 | model = dict(
39 |     type='UnitDINO',
40 |     data_preprocessor=dict(
41 |         type='UnitDetDataPreprocessor',
42 |         unit_module=unit_module)
43 | )
44 | 
45 | optim_wrapper = dict(clip_grad=dict(max_norm=0.1, norm_type=2))
46 | 
47 | train_pipeline = [
48 |     dict(type='LoadImageFromFile'),
49 |     dict(type='LoadAnnotations', with_bbox=True),
50 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 |     dict(type='RandomFlip', prob=0.5),
53 |     dict(type='PackDetInputs')
54 | ]
55 | 
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 | 


--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection.py',
  3 |     '../_base_/default_runtime.py',
  4 | ]
  5 | max_epochs = 12
  6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
  7 | val_cfg = dict(type='ValLoop')
  8 | test_cfg = dict(type='TestLoop')
  9 | 
 10 | param_scheduler = [
 11 |     dict(
 12 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
 13 |     dict(
 14 |         type='MultiStepLR',
 15 |         begin=0,
 16 |         end=max_epochs,
 17 |         by_epoch=True,
 18 |         milestones=[8, 11],
 19 |         gamma=0.1)
 20 | ]
 21 | optim_wrapper = dict(
 22 |     type='OptimWrapper',
 23 |     optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
 24 | 
 25 | num_classes = 4
 26 | model = dict(
 27 |     type='FasterRCNN',
 28 |     data_preprocessor=dict(
 29 |         type='DetDataPreprocessor',
 30 |         mean=_base_.mean_rgb,
 31 |         std=_base_.std_rgb,
 32 |         bgr_to_rgb=True,
 33 |         pad_size_divisor=32),
 34 |     backbone=dict(
 35 |         type='ResNet',
 36 |         depth=50,
 37 |         num_stages=4,
 38 |         out_indices=(0, 1, 2, 3),
 39 |         frozen_stages=1,
 40 |         norm_cfg=dict(type='BN', requires_grad=True),
 41 |         norm_eval=True,
 42 |         style='pytorch',
 43 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 44 |     neck=dict(
 45 |         type='FPN',
 46 |         in_channels=[256, 512, 1024, 2048],
 47 |         out_channels=256,
 48 |         num_outs=5),
 49 |     rpn_head=dict(
 50 |         type='RPNHead',
 51 |         in_channels=256,
 52 |         feat_channels=256,
 53 |         anchor_generator=dict(
 54 |             type='AnchorGenerator',
 55 |             scales=[8],
 56 |             ratios=[0.5, 1.0, 2.0],
 57 |             strides=[4, 8, 16, 32, 64]),
 58 |         bbox_coder=dict(
 59 |             type='DeltaXYWHBBoxCoder',
 60 |             target_means=[0.0, 0.0, 0.0, 0.0],
 61 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 62 |         loss_cls=dict(
 63 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 64 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 65 |     roi_head=dict(
 66 |         type='StandardRoIHead',
 67 |         bbox_roi_extractor=dict(
 68 |             type='SingleRoIExtractor',
 69 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 70 |             out_channels=256,
 71 |             featmap_strides=[4, 8, 16, 32]),
 72 |         bbox_head=dict(
 73 |             type='Shared2FCBBoxHead',
 74 |             in_channels=256,
 75 |             fc_out_channels=1024,
 76 |             roi_feat_size=7,
 77 |             num_classes=num_classes,
 78 |             bbox_coder=dict(
 79 |                 type='DeltaXYWHBBoxCoder',
 80 |                 target_means=[0.0, 0.0, 0.0, 0.0],
 81 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 82 |             reg_class_agnostic=False,
 83 |             loss_cls=dict(
 84 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 85 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 86 |     train_cfg=dict(
 87 |         rpn=dict(
 88 |             assigner=dict(
 89 |                 type='MaxIoUAssigner',
 90 |                 pos_iou_thr=0.7,
 91 |                 neg_iou_thr=0.3,
 92 |                 min_pos_iou=0.3,
 93 |                 match_low_quality=True,
 94 |                 ignore_iof_thr=-1),
 95 |             sampler=dict(
 96 |                 type='RandomSampler',
 97 |                 num=256,
 98 |                 pos_fraction=0.5,
 99 |                 neg_pos_ub=-1,
100 |                 add_gt_as_proposals=False),
101 |             allowed_border=-1,
102 |             pos_weight=-1,
103 |             debug=False),
104 |         rpn_proposal=dict(
105 |             nms_pre=2000,
106 |             max_per_img=1000,
107 |             nms=dict(type='nms', iou_threshold=0.7),
108 |             min_bbox_size=0),
109 |         rcnn=dict(
110 |             assigner=dict(
111 |                 type='MaxIoUAssigner',
112 |                 pos_iou_thr=0.5,
113 |                 neg_iou_thr=0.5,
114 |                 min_pos_iou=0.5,
115 |                 match_low_quality=False,
116 |                 ignore_iof_thr=-1),
117 |             sampler=dict(
118 |                 type='RandomSampler',
119 |                 num=512,
120 |                 pos_fraction=0.25,
121 |                 neg_pos_ub=-1,
122 |                 add_gt_as_proposals=True),
123 |             pos_weight=-1,
124 |             debug=False)),
125 |     test_cfg=dict(
126 |         rpn=dict(
127 |             nms_pre=1000,
128 |             max_per_img=1000,
129 |             nms=dict(type='nms', iou_threshold=0.7),
130 |             min_bbox_size=0),
131 |         rcnn=dict(
132 |             score_thr=0.05,
133 |             nms=dict(type='nms', iou_threshold=0.5),
134 |             max_per_img=100)))
135 | 
136 | 


--------------------------------------------------------------------------------
/configs/faster_rcnn/unitmodule_faster_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './faster_rcnn_r50_1x_duo.py',
 3 | ]
 4 | 
 5 | with_unit_module = True
 6 | norm_cfg = dict(type='GN', num_groups=8)
 7 | act_cfg = dict(type='ReLU')
 8 | 
 9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 | 
12 | unit_module = dict(
13 |     type='UnitModule',
14 |     unit_backbone=dict(
15 |         type='UnitBackbone',
16 |         stem_channels=(c_s1, c_s2),
17 |         large_kernels=(k_1, k_2),
18 |         small_kernels=(3, 3),
19 |         dw_ratio=1.0,
20 |         norm_cfg=norm_cfg,
21 |         act_cfg=act_cfg),
22 |     t_head=dict(
23 |         type='THead',
24 |         in_channels=c_s2,
25 |         hid_channels=c_s2,
26 |         out_channels=3,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=act_cfg),
29 |     a_head=dict(type='AHead'),
30 |     loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 |     alpha=0.9,
36 |     t_min=0.001)
37 | 
38 | model = dict(
39 |     type='UnitFasterRCNN',
40 |     data_preprocessor=dict(
41 |         type='UnitDetDataPreprocessor',
42 |         unit_module=unit_module)
43 | )
44 | 
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 | 
47 | train_pipeline = [
48 |     dict(type='LoadImageFromFile'),
49 |     dict(type='LoadAnnotations', with_bbox=True),
50 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 |     dict(type='RandomFlip', prob=0.5),
53 |     dict(type='PackDetInputs')
54 | ]
55 | 
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 | 


--------------------------------------------------------------------------------
/configs/fcos/fcos_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/duo_detection.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | max_epochs = 12
 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
 7 | val_cfg = dict(type='ValLoop')
 8 | test_cfg = dict(type='TestLoop')
 9 | 
10 | param_scheduler = [
11 |     dict(
12 |         type='ConstantLR',
13 |         factor=1.0 / 3,
14 |         by_epoch=False,
15 |         begin=0,
16 |         end=500),
17 |     dict(
18 |         type='MultiStepLR',
19 |         begin=0,
20 |         end=max_epochs,
21 |         by_epoch=True,
22 |         milestones=[8, 11],
23 |         gamma=0.1)
24 | ]
25 | optim_wrapper = dict(
26 |     type='OptimWrapper',
27 |     optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
28 |     paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0),
29 |     clip_grad=dict(max_norm=35, norm_type=2))
30 | 
31 | num_classes = 4
32 | model = dict(
33 |     type='FCOS',
34 |     data_preprocessor=dict(
35 |         type='DetDataPreprocessor',
36 |         mean=_base_.mean_bgr,
37 |         std=[1.0, 1.0, 1.0],
38 |         bgr_to_rgb=False,
39 |         pad_size_divisor=32),
40 |     backbone=dict(
41 |         type='ResNet',
42 |         depth=50,
43 |         num_stages=4,
44 |         out_indices=(0, 1, 2, 3),
45 |         frozen_stages=1,
46 |         norm_cfg=dict(type='BN', requires_grad=False),
47 |         norm_eval=True,
48 |         style='caffe',
49 |         init_cfg=dict(
50 |             type='Pretrained',
51 |             checkpoint='open-mmlab://detectron/resnet50_caffe')),
52 |     neck=dict(
53 |         type='FPN',
54 |         in_channels=[256, 512, 1024, 2048],
55 |         out_channels=256,
56 |         start_level=1,
57 |         add_extra_convs='on_output',
58 |         num_outs=5,
59 |         relu_before_extra_convs=True),
60 |     bbox_head=dict(
61 |         type='FCOSHead',
62 |         num_classes=num_classes,
63 |         in_channels=256,
64 |         stacked_convs=4,
65 |         feat_channels=256,
66 |         strides=[8, 16, 32, 64, 128],
67 |         loss_cls=dict(
68 |             type='FocalLoss',
69 |             use_sigmoid=True,
70 |             gamma=2.0,
71 |             alpha=0.25,
72 |             loss_weight=1.0),
73 |         loss_bbox=dict(type='IoULoss', loss_weight=1.0),
74 |         loss_centerness=dict(
75 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
76 |     test_cfg=dict(
77 |         nms_pre=1000,
78 |         min_bbox_size=0,
79 |         score_thr=0.05,
80 |         nms=dict(type='nms', iou_threshold=0.5),
81 |         max_per_img=100))
82 | 


--------------------------------------------------------------------------------
/configs/fcos/unitmodule_fcos_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './fcos_r50_1x_duo.py',
 3 | ]
 4 | 
 5 | with_unit_module = True
 6 | norm_cfg = dict(type='GN', num_groups=8)
 7 | act_cfg = dict(type='ReLU')
 8 | 
 9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 | 
12 | unit_module = dict(
13 |     type='UnitModule',
14 |     unit_backbone=dict(
15 |         type='UnitBackbone',
16 |         stem_channels=(c_s1, c_s2),
17 |         large_kernels=(k_1, k_2),
18 |         small_kernels=(3, 3),
19 |         dw_ratio=1.0,
20 |         norm_cfg=norm_cfg,
21 |         act_cfg=act_cfg),
22 |     t_head=dict(
23 |         type='THead',
24 |         in_channels=c_s2,
25 |         hid_channels=c_s2,
26 |         out_channels=3,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=act_cfg),
29 |     a_head=dict(type='AHead'),
30 |     loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 |     alpha=0.9,
36 |     t_min=0.001)
37 | 
38 | model = dict(
39 |     type='UnitFCOS',
40 |     data_preprocessor=dict(
41 |         type='UnitDetDataPreprocessor',
42 |         unit_module=unit_module)
43 | )
44 | 
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 | 
47 | train_pipeline = [
48 |     dict(type='LoadImageFromFile'),
49 |     dict(type='LoadAnnotations', with_bbox=True),
50 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 |     dict(type='RandomFlip', prob=0.5),
53 |     dict(type='PackDetInputs')
54 | ]
55 | 
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 | 


--------------------------------------------------------------------------------
/configs/retinanet/retinanet_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/duo_detection.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | max_epochs = 12
 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
 7 | val_cfg = dict(type='ValLoop')
 8 | test_cfg = dict(type='TestLoop')
 9 | 
10 | param_scheduler = [
11 |     dict(
12 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
13 |     dict(
14 |         type='MultiStepLR',
15 |         begin=0,
16 |         end=max_epochs,
17 |         by_epoch=True,
18 |         milestones=[8, 11],
19 |         gamma=0.1)
20 | ]
21 | optim_wrapper = dict(
22 |     type='OptimWrapper',
23 |     optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
24 | 
25 | num_classes = 4
26 | model = dict(
27 |     type='RetinaNet',
28 |     data_preprocessor=dict(
29 |         type='DetDataPreprocessor',
30 |         mean=_base_.mean_rgb,
31 |         std=_base_.std_rgb,
32 |         bgr_to_rgb=True,
33 |         pad_size_divisor=32),
34 |     backbone=dict(
35 |         type='ResNet',
36 |         depth=50,
37 |         num_stages=4,
38 |         out_indices=(0, 1, 2, 3),
39 |         frozen_stages=1,
40 |         norm_cfg=dict(type='BN', requires_grad=True),
41 |         norm_eval=True,
42 |         style='pytorch',
43 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44 |     neck=dict(
45 |         type='FPN',
46 |         in_channels=[256, 512, 1024, 2048],
47 |         out_channels=256,
48 |         start_level=1,
49 |         add_extra_convs='on_input',
50 |         num_outs=5),
51 |     bbox_head=dict(
52 |         type='RetinaHead',
53 |         num_classes=num_classes,
54 |         in_channels=256,
55 |         stacked_convs=4,
56 |         feat_channels=256,
57 |         anchor_generator=dict(
58 |             type='AnchorGenerator',
59 |             octave_base_scale=4,
60 |             scales_per_octave=3,
61 |             ratios=[0.5, 1.0, 2.0],
62 |             strides=[8, 16, 32, 64, 128]),
63 |         bbox_coder=dict(
64 |             type='DeltaXYWHBBoxCoder',
65 |             target_means=[0.0, 0.0, 0.0, 0.0],
66 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
67 |         loss_cls=dict(
68 |             type='FocalLoss',
69 |             use_sigmoid=True,
70 |             gamma=2.0,
71 |             alpha=0.25,
72 |             loss_weight=1.0),
73 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
74 |     train_cfg=dict(
75 |         assigner=dict(
76 |             type='MaxIoUAssigner',
77 |             pos_iou_thr=0.5,
78 |             neg_iou_thr=0.4,
79 |             min_pos_iou=0,
80 |             ignore_iof_thr=-1),
81 |         sampler=dict(type='PseudoSampler'),
82 |         allowed_border=-1,
83 |         pos_weight=-1,
84 |         debug=False),
85 |     test_cfg=dict(
86 |         nms_pre=1000,
87 |         min_bbox_size=0,
88 |         score_thr=0.05,
89 |         nms=dict(type='nms', iou_threshold=0.5),
90 |         max_per_img=100))
91 | 


--------------------------------------------------------------------------------
/configs/retinanet/unitmodule_retinanet_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './retinanet_r50_1x_duo.py',
 3 | ]
 4 | 
 5 | with_unit_module = True
 6 | norm_cfg = dict(type='GN', num_groups=8)
 7 | act_cfg = dict(type='ReLU')
 8 | 
 9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 | 
12 | unit_module = dict(
13 |     type='UnitModule',
14 |     unit_backbone=dict(
15 |         type='UnitBackbone',
16 |         stem_channels=(c_s1, c_s2),
17 |         large_kernels=(k_1, k_2),
18 |         small_kernels=(3, 3),
19 |         dw_ratio=1.0,
20 |         norm_cfg=norm_cfg,
21 |         act_cfg=act_cfg),
22 |     t_head=dict(
23 |         type='THead',
24 |         in_channels=c_s2,
25 |         hid_channels=c_s2,
26 |         out_channels=3,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=act_cfg),
29 |     a_head=dict(type='AHead'),
30 |     loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 |     alpha=0.9,
36 |     t_min=0.001)
37 | 
38 | model = dict(
39 |     type='UnitRetinaNet',
40 |     data_preprocessor=dict(
41 |         type='UnitDetDataPreprocessor',
42 |         unit_module=unit_module)
43 | )
44 | 
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 | 
47 | train_pipeline = [
48 |     dict(type='LoadImageFromFile'),
49 |     dict(type='LoadAnnotations', with_bbox=True),
50 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 |     dict(type='RandomFlip', prob=0.5),
53 |     dict(type='PackDetInputs')
54 | ]
55 | 
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 | 


--------------------------------------------------------------------------------
/configs/rtmdet/rtmdet_s_100e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection_mmyolo.py',
  3 |     '../_base_/default_runtime_mmyolo.py',
  4 | ]
  5 | env_cfg = dict(cudnn_benchmark=True)
  6 | 
  7 | max_epochs = 100
  8 | num_last_epochs = 15
  9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
 10 |                  val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
 11 | val_cfg = dict(type='ValLoop')
 12 | test_cfg = dict(type='TestLoop')
 13 | 
 14 | param_scheduler = [
 15 |     dict(
 16 |         type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0,
 17 |         end=1000),
 18 |     dict(
 19 |         type='CosineAnnealingLR',
 20 |         eta_min=0.0002,
 21 |         begin=max_epochs // 2,
 22 |         T_max=max_epochs - num_last_epochs,
 23 |         end=max_epochs - num_last_epochs,
 24 |         by_epoch=True,
 25 |         convert_to_iter_based=True)
 26 | ]
 27 | optim_wrapper = dict(
 28 |     type='OptimWrapper',
 29 |     optimizer=dict(type='AdamW', lr=0.004, weight_decay=0.05),
 30 |     paramwise_cfg=dict(
 31 |         norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
 32 | 
 33 | custom_hooks = [
 34 |     dict(
 35 |         type='EMAHook',
 36 |         ema_type='ExpMomentumEMA',
 37 |         momentum=0.0002,
 38 |         update_buffers=True,
 39 |         strict_load=False,
 40 |         priority=49),
 41 | ]
 42 | 
 43 | checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'
 44 | num_classes = 4
 45 | model = dict(
 46 |     type='YOLODetector',
 47 |     data_preprocessor=dict(
 48 |         type='YOLOv5DetDataPreprocessor',
 49 |         mean=_base_.mean_bgr,
 50 |         std=_base_.std_bgr,
 51 |         bgr_to_rgb=False),
 52 |     backbone=dict(
 53 |         type='CSPNeXt',
 54 |         arch='P5',
 55 |         expand_ratio=0.5,
 56 |         deepen_factor=0.33,
 57 |         widen_factor=0.5,
 58 |         channel_attention=True,
 59 |         norm_cfg=dict(type='BN'),
 60 |         act_cfg=dict(type='SiLU', inplace=True),
 61 |         init_cfg=dict(
 62 |             type='Pretrained',
 63 |             prefix='backbone.',
 64 |             checkpoint=checkpoint,
 65 |             map_location='cpu')),
 66 |     neck=dict(
 67 |         type='CSPNeXtPAFPN',
 68 |         deepen_factor=0.33,
 69 |         widen_factor=0.5,
 70 |         in_channels=[256, 512, 1024],
 71 |         out_channels=256,
 72 |         num_csp_blocks=3,
 73 |         expand_ratio=0.5,
 74 |         norm_cfg=dict(type='BN'),
 75 |         act_cfg=dict(type='SiLU', inplace=True)),
 76 |     bbox_head=dict(
 77 |         type='RTMDetHead',
 78 |         head_module=dict(
 79 |             type='RTMDetSepBNHeadModule',
 80 |             num_classes=num_classes,
 81 |             in_channels=256,
 82 |             stacked_convs=2,
 83 |             feat_channels=256,
 84 |             norm_cfg=dict(type='BN'),
 85 |             act_cfg=dict(type='SiLU', inplace=True),
 86 |             share_conv=True,
 87 |             pred_kernel_size=1,
 88 |             featmap_strides=[8, 16, 32],
 89 |             widen_factor=0.5),
 90 |         prior_generator=dict(
 91 |             type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
 92 |         bbox_coder=dict(type='DistancePointBBoxCoder'),
 93 |         loss_cls=dict(
 94 |             type='mmdet.QualityFocalLoss',
 95 |             use_sigmoid=True,
 96 |             beta=2.0,
 97 |             loss_weight=1.0),
 98 |         loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)),
 99 |     train_cfg=dict(
100 |         assigner=dict(
101 |             type='BatchDynamicSoftLabelAssigner',
102 |             num_classes=num_classes,
103 |             topk=13,
104 |             iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
105 |         allowed_border=-1,
106 |         pos_weight=-1,
107 |         debug=False),
108 |     test_cfg=dict(
109 |         multi_label=True,
110 |         nms_pre=30000,
111 |         score_thr=0.001,
112 |         nms=dict(type='nms', iou_threshold=0.65),
113 |         max_per_img=300))
114 | 


--------------------------------------------------------------------------------
/configs/rtmdet/unitmodule_rtmdet_s_100e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './rtmdet_s_100e_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitYOLODetector',
 8 |     data_preprocessor=dict(
 9 |         type='UnitYOLOv5DetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 |     dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='mmdet.Pad',
21 |          pad_to_square=True,
22 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
23 |     dict(type='mmdet.RandomFlip', prob=0.5),
24 |     dict(type='mmdet.PackDetInputs')
25 | ]
26 | 
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 | 


--------------------------------------------------------------------------------
/configs/tood/tood_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../_base_/datasets/duo_detection.py',
 3 |     '../_base_/default_runtime.py',
 4 | ]
 5 | max_epochs = 12
 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
 7 | val_cfg = dict(type='ValLoop')
 8 | test_cfg = dict(type='TestLoop')
 9 | 
10 | param_scheduler = [
11 |     dict(
12 |         type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
13 |     dict(
14 |         type='MultiStepLR',
15 |         begin=0,
16 |         end=max_epochs,
17 |         by_epoch=True,
18 |         milestones=[8, 11],
19 |         gamma=0.1)
20 | ]
21 | optim_wrapper = dict(
22 |     type='OptimWrapper',
23 |     optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
24 | 
25 | num_classes = 4
26 | model = dict(
27 |     type='TOOD',
28 |     data_preprocessor=dict(
29 |         type='DetDataPreprocessor',
30 |         mean=_base_.mean_rgb,
31 |         std=_base_.std_rgb,
32 |         bgr_to_rgb=True,
33 |         pad_size_divisor=32),
34 |     backbone=dict(
35 |         type='ResNet',
36 |         depth=50,
37 |         num_stages=4,
38 |         out_indices=(0, 1, 2, 3),
39 |         frozen_stages=1,
40 |         norm_cfg=dict(type='BN', requires_grad=True),
41 |         norm_eval=True,
42 |         style='pytorch',
43 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44 |     neck=dict(
45 |         type='FPN',
46 |         in_channels=[256, 512, 1024, 2048],
47 |         out_channels=256,
48 |         start_level=1,
49 |         add_extra_convs='on_output',
50 |         num_outs=5),
51 |     bbox_head=dict(
52 |         type='TOODHead',
53 |         num_classes=num_classes,
54 |         in_channels=256,
55 |         stacked_convs=6,
56 |         feat_channels=256,
57 |         anchor_type='anchor_free',
58 |         anchor_generator=dict(
59 |             type='AnchorGenerator',
60 |             ratios=[1.0],
61 |             octave_base_scale=8,
62 |             scales_per_octave=1,
63 |             strides=[8, 16, 32, 64, 128]),
64 |         bbox_coder=dict(
65 |             type='DeltaXYWHBBoxCoder',
66 |             target_means=[0.0, 0.0, 0.0, 0.0],
67 |             target_stds=[0.1, 0.1, 0.2, 0.2]),
68 |         initial_loss_cls=dict(
69 |             type='FocalLoss',
70 |             use_sigmoid=True,
71 |             activated=True,
72 |             gamma=2.0,
73 |             alpha=0.25,
74 |             loss_weight=1.0),
75 |         loss_cls=dict(
76 |             type='QualityFocalLoss',
77 |             use_sigmoid=True,
78 |             activated=True,
79 |             beta=2.0,
80 |             loss_weight=1.0),
81 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
82 |     train_cfg=dict(
83 |         initial_epoch=4,
84 |         initial_assigner=dict(type='ATSSAssigner', topk=9),
85 |         assigner=dict(type='TaskAlignedAssigner', topk=13),
86 |         alpha=1,
87 |         beta=6,
88 |         allowed_border=-1,
89 |         pos_weight=-1,
90 |         debug=False),
91 |     test_cfg=dict(
92 |         nms_pre=1000,
93 |         min_bbox_size=0,
94 |         score_thr=0.05,
95 |         nms=dict(type='nms', iou_threshold=0.6),
96 |         max_per_img=100))
97 | 


--------------------------------------------------------------------------------
/configs/tood/unitmodule_tood_r50_1x_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './tood_r50_1x_duo.py',
 3 | ]
 4 | 
 5 | with_unit_module = True
 6 | norm_cfg = dict(type='GN', num_groups=8)
 7 | act_cfg = dict(type='ReLU')
 8 | 
 9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 | 
12 | unit_module = dict(
13 |     type='UnitModule',
14 |     unit_backbone=dict(
15 |         type='UnitBackbone',
16 |         stem_channels=(c_s1, c_s2),
17 |         large_kernels=(k_1, k_2),
18 |         small_kernels=(3, 3),
19 |         dw_ratio=1.0,
20 |         norm_cfg=norm_cfg,
21 |         act_cfg=act_cfg),
22 |     t_head=dict(
23 |         type='THead',
24 |         in_channels=c_s2,
25 |         hid_channels=c_s2,
26 |         out_channels=3,
27 |         norm_cfg=norm_cfg,
28 |         act_cfg=act_cfg),
29 |     a_head=dict(type='AHead'),
30 |     loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 |     alpha=0.9,
36 |     t_min=0.001)
37 | 
38 | model = dict(
39 |     type='UnitTOOD',
40 |     data_preprocessor=dict(
41 |         type='UnitDetDataPreprocessor',
42 |         unit_module=unit_module)
43 | )
44 | 
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 | 
47 | train_pipeline = [
48 |     dict(type='LoadImageFromFile'),
49 |     dict(type='LoadAnnotations', with_bbox=True),
50 |     dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 |     dict(type='RandomFlip', prob=0.5),
53 |     dict(type='PackDetInputs')
54 | ]
55 | 
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 | 


--------------------------------------------------------------------------------
/configs/unitmodule/unitmodule.py:
--------------------------------------------------------------------------------
 1 | with_unit_module = True
 2 | norm_cfg = dict(type='GN', num_groups=8)
 3 | act_cfg = dict(type='ReLU')
 4 | 
 5 | k_1, k_2 = 9, 9
 6 | c_s1, c_s2 = 32, 32
 7 | 
 8 | unit_module = dict(
 9 |     type='UnitModule',
10 |     unit_backbone=dict(
11 |         type='UnitBackbone',
12 |         stem_channels=(c_s1, c_s2),
13 |         large_kernels=(k_1, k_2),
14 |         small_kernels=(3, 3),
15 |         dw_ratio=1.0,
16 |         norm_cfg=norm_cfg,
17 |         act_cfg=act_cfg),
18 |     t_head=dict(
19 |         type='THead',
20 |         in_channels=c_s2,
21 |         hid_channels=c_s2,
22 |         out_channels=3,
23 |         norm_cfg=norm_cfg,
24 |         act_cfg=act_cfg),
25 |     a_head=dict(type='AHead'),
26 |     loss_t=dict(type='TransmissionLoss', loss_weight=500),
27 |     loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01),
28 |     loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
29 |     loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
30 |     loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
31 |     alpha=0.9,
32 |     t_min=0.001)
33 | 


--------------------------------------------------------------------------------
/configs/yolov5/unitmodule_yolov5_s_100e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './yolov5_s_100e_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitYOLODetector',
 8 |     data_preprocessor=dict(
 9 |         type='UnitYOLOv5DetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 |     dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='mmdet.Pad',
21 |          pad_to_square=True,
22 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
23 |     dict(type='mmdet.RandomFlip', prob=0.5),
24 |     dict(type='mmdet.PackDetInputs')
25 | ]
26 | 
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 | 


--------------------------------------------------------------------------------
/configs/yolov5/yolov5_s_100e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection_mmyolo.py',
  3 |     '../_base_/default_runtime_mmyolo.py',
  4 | ]
  5 | env_cfg = dict(cudnn_benchmark=True)
  6 | 
  7 | max_epochs = 100
  8 | num_last_epochs = 15
  9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
 10 |                  val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
 11 | val_cfg = dict(type='ValLoop')
 12 | test_cfg = dict(type='TestLoop')
 13 | 
 14 | optim_wrapper = dict(
 15 |     type='OptimWrapper',
 16 |     optimizer=dict(
 17 |         type='SGD',
 18 |         lr=0.01,
 19 |         momentum=0.937,
 20 |         weight_decay=0.0005,
 21 |         nesterov=True,
 22 |         batch_size_per_gpu=_base_.train_bs),
 23 |     constructor='YOLOv5OptimizerConstructor')
 24 | default_hooks = dict(
 25 |     param_scheduler=dict(
 26 |         type='YOLOv5ParamSchedulerHook',
 27 |         scheduler_type='linear',
 28 |         lr_factor=0.01,
 29 |         max_epochs=max_epochs),
 30 | )
 31 | custom_hooks = [
 32 |     dict(
 33 |         type='EMAHook',
 34 |         ema_type='ExpMomentumEMA',
 35 |         momentum=0.0001,
 36 |         update_buffers=True,
 37 |         strict_load=False,
 38 |         priority=49)
 39 | ]
 40 | 
 41 | num_classes = 4
 42 | # anchors for DUO
 43 | anchors = [[(13, 12), (20, 18), (27, 25)],
 44 |            [(35, 31), (44, 39), (55, 52)],
 45 |            [(80, 45), (74, 69), (116, 102)]]
 46 | num_det_layers = 3
 47 | model = dict(
 48 |     type='YOLODetector',
 49 |     data_preprocessor=dict(
 50 |         type='YOLOv5DetDataPreprocessor',
 51 |         mean=[0.0, 0.0, 0.0],
 52 |         std=[255.0, 255.0, 255.0],
 53 |         bgr_to_rgb=True),
 54 |     backbone=dict(
 55 |         type='YOLOv5CSPDarknet',
 56 |         deepen_factor=0.33,
 57 |         widen_factor=0.5,
 58 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 59 |         act_cfg=dict(type='SiLU', inplace=True)),
 60 |     neck=dict(
 61 |         type='YOLOv5PAFPN',
 62 |         deepen_factor=0.33,
 63 |         widen_factor=0.5,
 64 |         in_channels=[256, 512, 1024],
 65 |         out_channels=[256, 512, 1024],
 66 |         num_csp_blocks=3,
 67 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 68 |         act_cfg=dict(type='SiLU', inplace=True)),
 69 |     bbox_head=dict(
 70 |         type='YOLOv5Head',
 71 |         head_module=dict(
 72 |             type='YOLOv5HeadModule',
 73 |             num_classes=num_classes,
 74 |             in_channels=[256, 512, 1024],
 75 |             widen_factor=0.5,
 76 |             featmap_strides=[8, 16, 32],
 77 |             num_base_priors=3),
 78 |         prior_generator=dict(
 79 |             type='mmdet.YOLOAnchorGenerator',
 80 |             base_sizes=anchors,
 81 |             strides=[8, 16, 32]),
 82 |         loss_cls=dict(
 83 |             type='mmdet.CrossEntropyLoss',
 84 |             use_sigmoid=True,
 85 |             reduction='mean',
 86 |             loss_weight=0.5),
 87 |         loss_bbox=dict(
 88 |             type='IoULoss',
 89 |             iou_mode='ciou',
 90 |             bbox_format='xywh',
 91 |             eps=1e-07,
 92 |             reduction='mean',
 93 |             loss_weight=0.05,
 94 |             return_iou=True),
 95 |         loss_obj=dict(
 96 |             type='mmdet.CrossEntropyLoss',
 97 |             use_sigmoid=True,
 98 |             reduction='mean',
 99 |             loss_weight=1.0),
100 |         prior_match_thr=4.0,
101 |         obj_level_weights=[4.0, 1.0, 0.4]),
102 |     test_cfg=dict(
103 |         multi_label=True,
104 |         nms_pre=30000,
105 |         score_thr=0.001,
106 |         nms=dict(type='nms', iou_threshold=0.65),
107 |         max_per_img=300))
108 | 


--------------------------------------------------------------------------------
/configs/yolov6/unitmodule_yolov6_s_100e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './yolov6_s_100e_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitYOLODetector',
 8 |     data_preprocessor=dict(
 9 |         type='UnitYOLOv5DetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 |     dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='mmdet.Pad',
21 |          pad_to_square=True,
22 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
23 |     dict(type='mmdet.RandomFlip', prob=0.5),
24 |     dict(type='mmdet.PackDetInputs')
25 | ]
26 | 
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 | 


--------------------------------------------------------------------------------
/configs/yolov6/yolov6_s_100e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection_mmyolo.py',
  3 |     '../_base_/default_runtime_mmyolo.py',
  4 | ]
  5 | env_cfg = dict(cudnn_benchmark=True)
  6 | 
  7 | max_epochs = 100
  8 | num_last_epochs = 15
  9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
 10 |                  val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
 11 | val_cfg = dict(type='ValLoop')
 12 | test_cfg = dict(type='TestLoop')
 13 | 
 14 | optim_wrapper = dict(
 15 |     type='OptimWrapper',
 16 |     optimizer=dict(
 17 |         type='SGD',
 18 |         lr=0.01,
 19 |         momentum=0.937,
 20 |         weight_decay=0.0005,
 21 |         nesterov=True,
 22 |         batch_size_per_gpu=_base_.train_bs),
 23 |     constructor='YOLOv5OptimizerConstructor')
 24 | default_hooks = dict(
 25 |     param_scheduler=dict(
 26 |         type='YOLOv5ParamSchedulerHook',
 27 |         scheduler_type='cosine',
 28 |         lr_factor=0.01,
 29 |         max_epochs=max_epochs)
 30 | )
 31 | custom_hooks = [
 32 |     dict(
 33 |         type='EMAHook',
 34 |         ema_type='ExpMomentumEMA',
 35 |         momentum=0.0001,
 36 |         update_buffers=True,
 37 |         strict_load=False,
 38 |         priority=49)
 39 | ]
 40 | 
 41 | num_classes = 4
 42 | model = dict(
 43 |     type='YOLODetector',
 44 |     data_preprocessor=dict(
 45 |         type='YOLOv5DetDataPreprocessor',
 46 |         mean=[0.0, 0.0, 0.0],
 47 |         std=[255.0, 255.0, 255.0],
 48 |         bgr_to_rgb=True),
 49 |     backbone=dict(
 50 |         type='YOLOv6EfficientRep',
 51 |         deepen_factor=0.33,
 52 |         widen_factor=0.5,
 53 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 54 |         act_cfg=dict(type='ReLU', inplace=True)),
 55 |     neck=dict(
 56 |         type='YOLOv6RepPAFPN',
 57 |         deepen_factor=0.33,
 58 |         widen_factor=0.5,
 59 |         in_channels=[256, 512, 1024],
 60 |         out_channels=[128, 256, 512],
 61 |         num_csp_blocks=12,
 62 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 63 |         act_cfg=dict(type='ReLU', inplace=True)),
 64 |     bbox_head=dict(
 65 |         type='YOLOv6Head',
 66 |         head_module=dict(
 67 |             type='YOLOv6HeadModule',
 68 |             num_classes=num_classes,
 69 |             in_channels=[128, 256, 512],
 70 |             widen_factor=0.5,
 71 |             norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 72 |             act_cfg=dict(type='SiLU', inplace=True),
 73 |             featmap_strides=[8, 16, 32]),
 74 |         loss_bbox=dict(
 75 |             type='IoULoss',
 76 |             iou_mode='giou',
 77 |             bbox_format='xyxy',
 78 |             reduction='mean',
 79 |             loss_weight=2.5,
 80 |             return_iou=False)),
 81 |     train_cfg=dict(
 82 |         initial_epoch=4,
 83 |         initial_assigner=dict(
 84 |             type='BatchATSSAssigner',
 85 |             num_classes=num_classes,
 86 |             topk=9,
 87 |             iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
 88 |         assigner=dict(
 89 |             type='BatchTaskAlignedAssigner',
 90 |             num_classes=num_classes,
 91 |             topk=13,
 92 |             alpha=1,
 93 |             beta=6)),
 94 |     test_cfg=dict(
 95 |         multi_label=True,
 96 |         nms_pre=30000,
 97 |         score_thr=0.001,
 98 |         nms=dict(type='nms', iou_threshold=0.65),
 99 |         max_per_img=300))
100 | 


--------------------------------------------------------------------------------
/configs/yolov7/unitmodule_yolov7_t_100e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './yolov7_t_100e_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitYOLODetector',
 8 |     data_preprocessor=dict(
 9 |         type='UnitYOLOv5DetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=55, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 |     dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='mmdet.Pad',
21 |          pad_to_square=True,
22 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
23 |     dict(type='mmdet.RandomFlip', prob=0.5),
24 |     dict(type='mmdet.PackDetInputs')
25 | ]
26 | 
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 | 


--------------------------------------------------------------------------------
/configs/yolov7/yolov7_t_100e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection_mmyolo.py',
  3 |     '../_base_/default_runtime_mmyolo.py',
  4 | ]
  5 | env_cfg = dict(cudnn_benchmark=True)
  6 | 
  7 | max_epochs = 100
  8 | num_last_epochs = 15
  9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
 10 |                  val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
 11 | val_cfg = dict(type='ValLoop')
 12 | test_cfg = dict(type='TestLoop')
 13 | 
 14 | optim_wrapper = dict(
 15 |     type='OptimWrapper',
 16 |     optimizer=dict(
 17 |         type='SGD',
 18 |         lr=0.01,
 19 |         momentum=0.937,
 20 |         weight_decay=0.0005,
 21 |         nesterov=True,
 22 |         batch_size_per_gpu=_base_.train_bs),
 23 |     constructor='YOLOv7OptimWrapperConstructor')
 24 | default_hooks = dict(
 25 |     param_scheduler=dict(
 26 |         type='YOLOv5ParamSchedulerHook',
 27 |         scheduler_type='cosine',
 28 |         lr_factor=0.01,
 29 |         max_epochs=max_epochs),
 30 | )
 31 | custom_hooks = [
 32 |     dict(
 33 |         type='EMAHook',
 34 |         ema_type='ExpMomentumEMA',
 35 |         momentum=0.0001,
 36 |         update_buffers=True,
 37 |         strict_load=False,
 38 |         priority=49)
 39 | ]
 40 | 
 41 | num_classes = 4
 42 | # anchors for DUO
 43 | anchors = [[(13, 12), (20, 18), (27, 25)],
 44 |            [(35, 31), (44, 39), (55, 52)],
 45 |            [(80, 45), (74, 69), (116, 102)]]
 46 | model = dict(
 47 |     type='YOLODetector',
 48 |     data_preprocessor=dict(
 49 |         type='YOLOv5DetDataPreprocessor',
 50 |         mean=[0.0, 0.0, 0.0],
 51 |         std=[255.0, 255.0, 255.0],
 52 |         bgr_to_rgb=True),
 53 |     backbone=dict(
 54 |         type='YOLOv7Backbone',
 55 |         arch='Tiny',
 56 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 57 |         act_cfg=dict(type='LeakyReLU', inplace=True, negative_slope=0.1)),
 58 |     neck=dict(
 59 |         type='YOLOv7PAFPN',
 60 |         block_cfg=dict(type='TinyDownSampleBlock', middle_ratio=0.25),
 61 |         upsample_feats_cat_first=False,
 62 |         in_channels=[128, 256, 512],
 63 |         out_channels=[64, 128, 256],
 64 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 65 |         act_cfg=dict(type='LeakyReLU', inplace=True, negative_slope=0.1),
 66 |         is_tiny_version=True,
 67 |         use_repconv_outs=False),
 68 |     bbox_head=dict(
 69 |         type='YOLOv7Head',
 70 |         head_module=dict(
 71 |             type='YOLOv7HeadModule',
 72 |             num_classes=num_classes,
 73 |             in_channels=[128, 256, 512],
 74 |             featmap_strides=[8, 16, 32],
 75 |             num_base_priors=3),
 76 |         prior_generator=dict(
 77 |             type='mmdet.YOLOAnchorGenerator',
 78 |             base_sizes=anchors,
 79 |             strides=[8, 16, 32]),
 80 |         loss_cls=dict(
 81 |             type='mmdet.CrossEntropyLoss',
 82 |             use_sigmoid=True,
 83 |             reduction='mean',
 84 |             loss_weight=0.5),
 85 |         loss_bbox=dict(
 86 |             type='IoULoss',
 87 |             iou_mode='ciou',
 88 |             bbox_format='xywh',
 89 |             reduction='mean',
 90 |             loss_weight=0.05,
 91 |             return_iou=True),
 92 |         loss_obj=dict(
 93 |             type='mmdet.CrossEntropyLoss',
 94 |             use_sigmoid=True,
 95 |             reduction='mean',
 96 |             loss_weight=1.0),
 97 |         prior_match_thr=4.0,
 98 |         obj_level_weights=[4.0, 1.0, 0.4],
 99 |         simota_candidate_topk=10,
100 |         simota_iou_weight=3.0,
101 |         simota_cls_weight=1.0),
102 |     test_cfg=dict(
103 |         multi_label=True,
104 |         nms_pre=30000,
105 |         score_thr=0.001,
106 |         nms=dict(type='nms', iou_threshold=0.65),
107 |         max_per_img=300))
108 | 


--------------------------------------------------------------------------------
/configs/yolov8/unitmodule_yolov8_s_100e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './yolov8_s_100e_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitYOLODetector',
 8 |     data_preprocessor=dict(
 9 |         type='UnitYOLOv5DetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=10, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 |     dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='mmdet.Pad',
21 |          pad_to_square=True,
22 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
23 |     dict(type='mmdet.RandomFlip', prob=0.5),
24 |     dict(type='mmdet.PackDetInputs')
25 | ]
26 | 
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 | 


--------------------------------------------------------------------------------
/configs/yolov8/yolov8_s_100e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection_mmyolo.py',
  3 |     '../_base_/default_runtime_mmyolo.py',
  4 | ]
  5 | env_cfg = dict(cudnn_benchmark=True)
  6 | 
  7 | max_epochs = 100
  8 | num_last_epochs = 15
  9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
 10 |                  val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
 11 | val_cfg = dict(type='ValLoop')
 12 | test_cfg = dict(type='TestLoop')
 13 | 
 14 | optim_wrapper = dict(
 15 |     type='OptimWrapper',
 16 |     clip_grad=dict(max_norm=10.0),
 17 |     optimizer=dict(
 18 |         type='SGD',
 19 |         lr=0.01,
 20 |         momentum=0.937,
 21 |         weight_decay=0.0005,
 22 |         nesterov=True,
 23 |         batch_size_per_gpu=_base_.train_bs),
 24 |     constructor='YOLOv5OptimizerConstructor')
 25 | default_hooks = dict(
 26 |     param_scheduler=dict(
 27 |         type='YOLOv5ParamSchedulerHook',
 28 |         scheduler_type='linear',
 29 |         lr_factor=0.01,
 30 |         max_epochs=max_epochs),
 31 | )
 32 | custom_hooks = [
 33 |     dict(
 34 |         type='EMAHook',
 35 |         ema_type='ExpMomentumEMA',
 36 |         momentum=0.0001,
 37 |         update_buffers=True,
 38 |         strict_load=False,
 39 |         priority=49),
 40 | ]
 41 | 
 42 | num_classes = 4
 43 | model = dict(
 44 |     type='YOLODetector',
 45 |     data_preprocessor=dict(
 46 |         type='YOLOv5DetDataPreprocessor',
 47 |         mean=[0.0, 0.0, 0.0],
 48 |         std=[255.0, 255.0, 255.0],
 49 |         bgr_to_rgb=True),
 50 |     backbone=dict(
 51 |         type='YOLOv8CSPDarknet',
 52 |         arch='P5',
 53 |         last_stage_out_channels=1024,
 54 |         deepen_factor=0.33,
 55 |         widen_factor=0.5,
 56 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 57 |         act_cfg=dict(type='SiLU', inplace=True)),
 58 |     neck=dict(
 59 |         type='YOLOv8PAFPN',
 60 |         deepen_factor=0.33,
 61 |         widen_factor=0.5,
 62 |         in_channels=[256, 512, 1024],
 63 |         out_channels=[256, 512, 1024],
 64 |         num_csp_blocks=3,
 65 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 66 |         act_cfg=dict(type='SiLU', inplace=True)),
 67 |     bbox_head=dict(
 68 |         type='YOLOv8Head',
 69 |         head_module=dict(
 70 |             type='YOLOv8HeadModule',
 71 |             num_classes=num_classes,
 72 |             in_channels=[256, 512, 1024],
 73 |             widen_factor=0.5,
 74 |             reg_max=16,
 75 |             norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 76 |             act_cfg=dict(type='SiLU', inplace=True),
 77 |             featmap_strides=[8, 16, 32]),
 78 |         prior_generator=dict(
 79 |             type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]),
 80 |         bbox_coder=dict(type='DistancePointBBoxCoder'),
 81 |         loss_cls=dict(
 82 |             type='mmdet.CrossEntropyLoss',
 83 |             use_sigmoid=True,
 84 |             reduction='none',
 85 |             loss_weight=0.5),
 86 |         loss_bbox=dict(
 87 |             type='IoULoss',
 88 |             iou_mode='ciou',
 89 |             bbox_format='xyxy',
 90 |             reduction='sum',
 91 |             loss_weight=7.5,
 92 |             return_iou=False),
 93 |         loss_dfl=dict(
 94 |             type='mmdet.DistributionFocalLoss',
 95 |             reduction='mean',
 96 |             loss_weight=0.375)),
 97 |     train_cfg=dict(
 98 |         assigner=dict(
 99 |             type='BatchTaskAlignedAssigner',
100 |             num_classes=num_classes,
101 |             use_ciou=True,
102 |             topk=10,
103 |             alpha=0.5,
104 |             beta=6.0,
105 |             eps=1e-09)),
106 |     test_cfg=dict(
107 |         multi_label=True,
108 |         nms_pre=30000,
109 |         score_thr=0.001,
110 |         nms=dict(type='nms', iou_threshold=0.7),
111 |         max_per_img=300))
112 | 


--------------------------------------------------------------------------------
/configs/yolox/unitmodule_yolox_s_100e_duo.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     './yolox_s_100e_duo.py',
 3 |     '../unitmodule/unitmodule.py',
 4 | ]
 5 | 
 6 | model = dict(
 7 |     type='UnitYOLODetector',
 8 |     data_preprocessor=dict(
 9 |         type='UnitYOLOv5DetDataPreprocessor',
10 |         unit_module=_base_.unit_module)
11 | )
12 | 
13 | optim_wrapper = dict(clip_grad=dict(max_norm=55, norm_type=2))
14 | 
15 | train_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 |     dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 |     dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 |     dict(type='mmdet.Pad',
21 |          pad_to_square=True,
22 |          pad_val=dict(img=(114.0, 114.0, 114.0))),
23 |     dict(type='mmdet.RandomFlip', prob=0.5),
24 |     dict(type='mmdet.PackDetInputs')
25 | ]
26 | 
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 | 


--------------------------------------------------------------------------------
/configs/yolox/yolox_s_100e_duo.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../_base_/datasets/duo_detection_mmyolo.py',
  3 |     '../_base_/default_runtime_mmyolo.py',
  4 | ]
  5 | max_epochs = 100
  6 | num_last_epochs = 15
  7 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
  8 |                  val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
  9 | val_cfg = dict(type='ValLoop')
 10 | test_cfg = dict(type='TestLoop')
 11 | 
 12 | param_scheduler = [
 13 |     dict(
 14 |         type='mmdet.QuadraticWarmupLR',
 15 |         by_epoch=True,
 16 |         begin=0,
 17 |         end=5,
 18 |         convert_to_iter_based=True),
 19 |     dict(
 20 |         type='CosineAnnealingLR',
 21 |         eta_min=0.0005,
 22 |         begin=5,
 23 |         T_max=max_epochs - num_last_epochs,
 24 |         end=max_epochs - num_last_epochs,
 25 |         by_epoch=True,
 26 |         convert_to_iter_based=True),
 27 |     dict(type='ConstantLR', by_epoch=True, factor=1, begin=max_epochs - num_last_epochs, end=max_epochs)
 28 | ]
 29 | optim_wrapper = dict(
 30 |     type='OptimWrapper',
 31 |     optimizer=dict(
 32 |         type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True),
 33 |     paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0))
 34 | 
 35 | custom_hooks = [
 36 |     dict(type='mmdet.SyncNormHook', priority=48),
 37 |     dict(
 38 |         type='EMAHook',
 39 |         ema_type='ExpMomentumEMA',
 40 |         momentum=0.0001,
 41 |         update_buffers=True,
 42 |         strict_load=False,
 43 |         priority=49)
 44 | ]
 45 | 
 46 | num_classes = 4
 47 | model = dict(
 48 |     type='YOLODetector',
 49 |     init_cfg=dict(
 50 |         type='Kaiming',
 51 |         layer='Conv2d',
 52 |         a=2.23606797749979,
 53 |         distribution='uniform',
 54 |         mode='fan_in',
 55 |         nonlinearity='leaky_relu'),
 56 |     use_syncbn=False,
 57 |     data_preprocessor=dict(
 58 |         type='YOLOv5DetDataPreprocessor',
 59 |         pad_size_divisor=32,
 60 |         batch_augments=[
 61 |             dict(
 62 |                 type='YOLOXBatchSyncRandomResize',
 63 |                 random_size_range=(480, 800),
 64 |                 size_divisor=32,
 65 |                 interval=10)
 66 |         ]),
 67 |     backbone=dict(
 68 |         type='YOLOXCSPDarknet',
 69 |         deepen_factor=0.33,
 70 |         widen_factor=0.5,
 71 |         out_indices=(2, 3, 4),
 72 |         spp_kernal_sizes=(5, 9, 13),
 73 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 74 |         act_cfg=dict(type='SiLU', inplace=True)),
 75 |     neck=dict(
 76 |         type='YOLOXPAFPN',
 77 |         deepen_factor=0.33,
 78 |         widen_factor=0.5,
 79 |         in_channels=[256, 512, 1024],
 80 |         out_channels=256,
 81 |         norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 82 |         act_cfg=dict(type='SiLU', inplace=True)),
 83 |     bbox_head=dict(
 84 |         type='YOLOXHead',
 85 |         head_module=dict(
 86 |             type='YOLOXHeadModule',
 87 |             num_classes=num_classes,
 88 |             in_channels=256,
 89 |             feat_channels=256,
 90 |             widen_factor=0.5,
 91 |             stacked_convs=2,
 92 |             featmap_strides=(8, 16, 32),
 93 |             use_depthwise=False,
 94 |             norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
 95 |             act_cfg=dict(type='SiLU', inplace=True)),
 96 |         loss_cls=dict(
 97 |             type='mmdet.CrossEntropyLoss',
 98 |             use_sigmoid=True,
 99 |             reduction='sum',
100 |             loss_weight=1.0),
101 |         loss_bbox=dict(
102 |             type='mmdet.IoULoss',
103 |             mode='square',
104 |             eps=1e-16,
105 |             reduction='sum',
106 |             loss_weight=5.0),
107 |         loss_obj=dict(
108 |             type='mmdet.CrossEntropyLoss',
109 |             use_sigmoid=True,
110 |             reduction='sum',
111 |             loss_weight=1.0),
112 |         loss_bbox_aux=dict(
113 |             type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
114 |     train_cfg=dict(
115 |         assigner=dict(
116 |             type='mmdet.SimOTAAssigner',
117 |             center_radius=2.5,
118 |             iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
119 |     test_cfg=dict(
120 |         yolox_style=True,
121 |         multi_label=True,
122 |         score_thr=0.001,
123 |         max_per_img=300,
124 |         nms=dict(type='nms', iou_threshold=0.65)))
125 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | future
2 | tensorboard


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | NNODES=${NNODES:-1}
 7 | NODE_RANK=${NODE_RANK:-0}
 8 | PORT=${PORT:-29500}
 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 |     --nnodes=$NNODES \
14 |     --node_rank=$NODE_RANK \
15 |     --master_addr=$MASTER_ADDR \
16 |     --nproc_per_node=$GPUS \
17 |     --master_port=$PORT \
18 |     $(dirname "$0")/test.py \
19 |     $CONFIG \
20 |     $CHECKPOINT \
21 |     --launcher pytorch \
22 |     ${@:4}
23 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29500}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --launcher pytorch ${@:3}
20 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import os
  4 | import os.path as osp
  5 | import warnings
  6 | from copy import deepcopy
  7 | 
  8 | from mmdet.engine.hooks.utils import trigger_visualization_hook
  9 | from mmdet.evaluation import DumpDetResults
 10 | from mmdet.registry import RUNNERS
 11 | from mmdet.utils import setup_cache_size_limit_of_dynamo
 12 | from mmengine import ConfigDict
 13 | from mmengine.config import Config, DictAction
 14 | from mmengine.runner import Runner
 15 | 
 16 | from unitmodule.models.detectors import register_unit_distributed
 17 | 
 18 | 
 19 | def parse_args():
 20 |     parser = argparse.ArgumentParser(
 21 |         description='MMDet test (and eval) a model')
 22 |     parser.add_argument('config', help='test config file path')
 23 |     parser.add_argument('checkpoint', help='checkpoint file')
 24 |     parser.add_argument(
 25 |         '--work-dir',
 26 |         help='the directory to save the file containing evaluation metrics')
 27 |     parser.add_argument(
 28 |         '--out',
 29 |         type=str,
 30 |         help='dump predictions to a pickle file for offline evaluation')
 31 |     parser.add_argument(
 32 |         '--show', action='store_true', help='show prediction results')
 33 |     parser.add_argument(
 34 |         '--show-dir',
 35 |         help='directory where painted images will be saved. '
 36 |              'If specified, it will be automatically saved '
 37 |              'to the work_dir/timestamp/show_dir')
 38 |     parser.add_argument(
 39 |         '--wait-time', type=float, default=2, help='the interval of show (s)')
 40 |     parser.add_argument(
 41 |         '--cfg-options',
 42 |         nargs='+',
 43 |         action=DictAction,
 44 |         help='override some settings in the used config, the key-value pair '
 45 |              'in xxx=yyy format will be merged into config file. If the value to '
 46 |              'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 47 |              'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 48 |              'Note that the quotation marks are necessary and that no white space '
 49 |              'is allowed.')
 50 |     parser.add_argument(
 51 |         '--launcher',
 52 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 53 |         default='none',
 54 |         help='job launcher')
 55 |     parser.add_argument('--tta', action='store_true')
 56 |     # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
 57 |     # will pass the `--local-rank` parameter to `tools/train.py` instead
 58 |     # of `--local_rank`.
 59 |     parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
 60 |     args = parser.parse_args()
 61 |     if 'LOCAL_RANK' not in os.environ:
 62 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 63 |     return args
 64 | 
 65 | 
 66 | def main():
 67 |     args = parse_args()
 68 | 
 69 |     # Reduce the number of repeated compilations and improve
 70 |     # testing speed.
 71 |     setup_cache_size_limit_of_dynamo()
 72 | 
 73 |     # load config
 74 |     cfg = Config.fromfile(args.config)
 75 |     cfg.launcher = args.launcher
 76 |     if args.cfg_options is not None:
 77 |         cfg.merge_from_dict(args.cfg_options)
 78 | 
 79 |     # --------------------------------------------------------
 80 |     # dynamic import customs modules
 81 |     # import modules from import_dir as a/b/c/ dir, registry will be updated
 82 |     if hasattr(cfg, 'import_dir'):
 83 |         import importlib
 84 | 
 85 |         import_dir = cfg.import_dir
 86 |         module_path = import_dir.replace('/', '.')
 87 |         import_lib = importlib.import_module(module_path)
 88 | 
 89 |     # dynamic import for ddp of UnitModule if key with_unit_module is True
 90 |     register_unit_distributed(cfg)
 91 |     # --------------------------------------------------------
 92 | 
 93 |     # work_dir is determined in this priority: CLI > segment in file > filename
 94 |     if args.work_dir is not None:
 95 |         # update configs according to CLI args if args.work_dir is not None
 96 |         cfg.work_dir = args.work_dir
 97 |     elif cfg.get('work_dir', None) is None:
 98 |         # use config filename as default work_dir if cfg.work_dir is None
 99 |         cfg.work_dir = osp.join('./work_dirs',
100 |                                 osp.splitext(osp.basename(args.config))[0])
101 | 
102 |     cfg.load_from = args.checkpoint
103 | 
104 |     if args.show or args.show_dir:
105 |         cfg = trigger_visualization_hook(cfg, args)
106 | 
107 |     if args.tta:
108 | 
109 |         if 'tta_model' not in cfg:
110 |             warnings.warn('Cannot find ``tta_model`` in config, '
111 |                           'we will set it as default.')
112 |             cfg.tta_model = dict(
113 |                 type='DetTTAModel',
114 |                 tta_cfg=dict(
115 |                     nms=dict(type='nms', iou_threshold=0.5), max_per_img=100))
116 |         if 'tta_pipeline' not in cfg:
117 |             warnings.warn('Cannot find ``tta_pipeline`` in config, '
118 |                           'we will set it as default.')
119 |             test_data_cfg = cfg.test_dataloader.dataset
120 |             while 'dataset' in test_data_cfg:
121 |                 test_data_cfg = test_data_cfg['dataset']
122 |             cfg.tta_pipeline = deepcopy(test_data_cfg.pipeline)
123 |             flip_tta = dict(
124 |                 type='TestTimeAug',
125 |                 transforms=[
126 |                     [
127 |                         dict(type='RandomFlip', prob=1.),
128 |                         dict(type='RandomFlip', prob=0.)
129 |                     ],
130 |                     [
131 |                         dict(
132 |                             type='PackDetInputs',
133 |                             meta_keys=('img_id', 'img_path', 'ori_shape',
134 |                                        'img_shape', 'scale_factor', 'flip',
135 |                                        'flip_direction'))
136 |                     ],
137 |                 ])
138 |             cfg.tta_pipeline[-1] = flip_tta
139 |         cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
140 |         cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline
141 | 
142 |     # build the runner from config
143 |     if 'runner_type' not in cfg:
144 |         # build the default runner
145 |         runner = Runner.from_cfg(cfg)
146 |     else:
147 |         # build customized runner from the registry
148 |         # if 'runner_type' is set in the cfg
149 |         runner = RUNNERS.build(cfg)
150 | 
151 |     # add `DumpResults` dummy metric
152 |     if args.out is not None:
153 |         assert args.out.endswith(('.pkl', '.pickle')), \
154 |             'The dump file must be a pkl file.'
155 |         runner.test_evaluator.metrics.append(
156 |             DumpDetResults(out_file_path=args.out))
157 | 
158 |     # start testing
159 |     runner.test()
160 | 
161 | 
162 | if __name__ == '__main__':
163 |     main()
164 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import logging
  4 | import os
  5 | import os.path as osp
  6 | 
  7 | from mmdet.utils import setup_cache_size_limit_of_dynamo
  8 | from mmengine.config import Config, DictAction
  9 | from mmengine.logging import print_log
 10 | from mmengine.registry import RUNNERS
 11 | from mmengine.runner import Runner
 12 | 
 13 | from unitmodule.models.detectors import register_unit_distributed
 14 | 
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(description='Train a detector')
 18 |     parser.add_argument('config', help='train config file path')
 19 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 20 |     parser.add_argument(
 21 |         '--amp',
 22 |         action='store_true',
 23 |         default=False,
 24 |         help='enable automatic-mixed-precision training')
 25 |     parser.add_argument(
 26 |         '--auto-scale-lr',
 27 |         action='store_true',
 28 |         help='enable automatically scaling LR.')
 29 |     parser.add_argument(
 30 |         '--resume',
 31 |         nargs='?',
 32 |         type=str,
 33 |         const='auto',
 34 |         help='If specify checkpoint path, resume from it, while if not '
 35 |              'specify, try to auto resume from the latest checkpoint '
 36 |              'in the work directory.')
 37 |     parser.add_argument(
 38 |         '--cfg-options',
 39 |         nargs='+',
 40 |         action=DictAction,
 41 |         help='override some settings in the used config, the key-value pair '
 42 |              'in xxx=yyy format will be merged into config file. If the value to '
 43 |              'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 44 |              'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 45 |              'Note that the quotation marks are necessary and that no white space '
 46 |              'is allowed.')
 47 |     parser.add_argument(
 48 |         '--launcher',
 49 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 50 |         default='none',
 51 |         help='job launcher')
 52 |     # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
 53 |     # will pass the `--local-rank` parameter to `tools/train.py` instead
 54 |     # of `--local_rank`.
 55 |     parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
 56 |     args = parser.parse_args()
 57 |     if 'LOCAL_RANK' not in os.environ:
 58 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 59 | 
 60 |     return args
 61 | 
 62 | 
 63 | def main():
 64 |     args = parse_args()
 65 | 
 66 |     # Reduce the number of repeated compilations and improve
 67 |     # training speed.
 68 |     setup_cache_size_limit_of_dynamo()
 69 | 
 70 |     # load config
 71 |     cfg = Config.fromfile(args.config)
 72 |     cfg.launcher = args.launcher
 73 |     if args.cfg_options is not None:
 74 |         cfg.merge_from_dict(args.cfg_options)
 75 | 
 76 |     # --------------------------------------------------------
 77 |     # dynamic import customs modules
 78 |     # import modules from import_dir as a/b/c/ dir, registry will be updated
 79 |     if hasattr(cfg, 'import_dir'):
 80 |         import importlib
 81 | 
 82 |         import_dir = cfg.import_dir
 83 |         module_path = import_dir.replace('/', '.')
 84 |         import_lib = importlib.import_module(module_path)
 85 | 
 86 |     # dynamic import for ddp of UnitModule if key with_unit_module is True
 87 |     register_unit_distributed(cfg)
 88 |     # --------------------------------------------------------
 89 | 
 90 |     # work_dir is determined in this priority: CLI > segment in file > filename
 91 |     if args.work_dir is not None:
 92 |         # update configs according to CLI args if args.work_dir is not None
 93 |         cfg.work_dir = args.work_dir
 94 |     elif cfg.get('work_dir', None) is None:
 95 |         # use config filename as default work_dir if cfg.work_dir is None
 96 |         cfg.work_dir = osp.join('./work_dirs',
 97 |                                 osp.splitext(osp.basename(args.config))[0])
 98 | 
 99 |     # enable automatic-mixed-precision training
100 |     if args.amp is True:
101 |         optim_wrapper = cfg.optim_wrapper.type
102 |         if optim_wrapper == 'AmpOptimWrapper':
103 |             print_log(
104 |                 'AMP training is already enabled in your config.',
105 |                 logger='current',
106 |                 level=logging.WARNING)
107 |         else:
108 |             assert optim_wrapper == 'OptimWrapper', (
109 |                 '`--amp` is only supported when the optimizer wrapper type is '
110 |                 f'`OptimWrapper` but got {optim_wrapper}.')
111 |             cfg.optim_wrapper.type = 'AmpOptimWrapper'
112 |             cfg.optim_wrapper.loss_scale = 'dynamic'
113 | 
114 |     # enable automatically scaling LR
115 |     if args.auto_scale_lr:
116 |         if 'auto_scale_lr' in cfg and \
117 |                 'enable' in cfg.auto_scale_lr and \
118 |                 'base_batch_size' in cfg.auto_scale_lr:
119 |             cfg.auto_scale_lr.enable = True
120 |         else:
121 |             raise RuntimeError('Can not find "auto_scale_lr" or '
122 |                                '"auto_scale_lr.enable" or '
123 |                                '"auto_scale_lr.base_batch_size" in your'
124 |                                ' configuration file.')
125 | 
126 |     # resume is determined in this priority: resume from > auto_resume
127 |     if args.resume == 'auto':
128 |         cfg.resume = True
129 |         cfg.load_from = None
130 |     elif args.resume is not None:
131 |         cfg.resume = True
132 |         cfg.load_from = args.resume
133 | 
134 |     # build the runner from config
135 |     if 'runner_type' not in cfg:
136 |         # build the default runner
137 |         runner = Runner.from_cfg(cfg)
138 |     else:
139 |         # build customized runner from the registry
140 |         # if 'runner_type' is set in the cfg
141 |         runner = RUNNERS.build(cfg)
142 | 
143 |     # start training
144 |     runner.train()
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     main()
149 | 


--------------------------------------------------------------------------------
/unitmodule/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .models import *
3 | 


--------------------------------------------------------------------------------
/unitmodule/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import *
2 | 


--------------------------------------------------------------------------------
/unitmodule/datasets/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .colorspace import UnderwaterColorRandomTransfer
2 | 
3 | __all__ = ['UnderwaterColorRandomTransfer']
4 | 


--------------------------------------------------------------------------------
/unitmodule/datasets/transforms/colorspace.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from mmcv.transforms import BaseTransform
  4 | from mmcv.transforms.utils import cache_randomness
  5 | from mmengine.registry import TRANSFORMS
  6 | 
  7 | 
  8 | @TRANSFORMS.register_module()
  9 | class UnderwaterColorRandomTransfer(BaseTransform):
 10 |     """Transfer underwater image color by converting HSV color space.
 11 | 
 12 |     HSV is (Hue, Saturation, Value).
 13 |     The uint8 image(255)(h, w, c) convert to HSV that
 14 |     H in [0, 180),
 15 |     S in [0, 255],
 16 |     V in [0, 255].
 17 | 
 18 |     Required Keys:
 19 | 
 20 |     - img
 21 | 
 22 |     Modified Keys:
 23 | 
 24 |     - img
 25 | 
 26 |     Args:
 27 |         hue_prob (float): The probability for hue in range [0, 1]. Defaults to 0.5.
 28 |         saturation_prob (float): The probability for saturation in range [0, 1]. Defaults to 0.5.
 29 |         value_prob (float): The probability for value in range [0, 1]. Defaults to 0.5.
 30 |         hue_delta (int): delta of hue. Defaults to 5.
 31 |         saturation_delta (int): delta of saturation. Defaults to 30.
 32 |         value_delta (int): delta of value. Defaults to 30.
 33 | 
 34 |     Notes:
 35 |         The underwater_hue_interval got from the hue mean in underwater dataset,
 36 |         which get the hue mean by convert color from BGR to HSV.
 37 |         dataset     |    hue min     |     hue max
 38 |         ------------|----------------|-------------
 39 |         DUO         |    18.7551     |     95.4836
 40 |         URPC2020    |    17.9668     |     99.6359
 41 |         URPC2021    |    17.9668     |     103.2373
 42 |         UIEB        |    25.5417     |     116.3379
 43 |         ------------|----------------|-------------
 44 |         hue interval       18                116
 45 |     """
 46 |     underwater_hue_interval = (18, 116)
 47 | 
 48 |     def __init__(self,
 49 |                  hue_prob: float = 0.5,
 50 |                  saturation_prob: float = 0.5,
 51 |                  value_prob: float = 0.5,
 52 |                  hue_delta: int = 5,
 53 |                  saturation_delta: int = 30,
 54 |                  value_delta: int = 30) -> None:
 55 |         assert 0 <= hue_prob <= 1.0
 56 |         assert 0 <= saturation_prob <= 1.0
 57 |         assert 0 <= value_prob <= 1.0
 58 | 
 59 |         self.hue_prob = hue_prob
 60 |         self.saturation_prob = saturation_prob
 61 |         self.value_prob = value_prob
 62 |         self.hue_delta = hue_delta
 63 |         self.saturation_delta = saturation_delta
 64 |         self.value_delta = value_delta
 65 | 
 66 |         self._hue_min, self._hue_max = self.underwater_hue_interval
 67 |         self._hue_middle = (self._hue_min + self._hue_max) / 2
 68 | 
 69 |     @cache_randomness
 70 |     def _random_hue(self):
 71 |         return np.random.rand() < self.hue_prob
 72 | 
 73 |     @cache_randomness
 74 |     def _random_saturation(self):
 75 |         return np.random.rand() < self.saturation_prob
 76 | 
 77 |     @cache_randomness
 78 |     def _random_value(self):
 79 |         return np.random.rand() < self.value_prob
 80 | 
 81 |     @staticmethod
 82 |     def _random_mult():
 83 |         return np.random.uniform(-1, 1)
 84 | 
 85 |     @cache_randomness
 86 |     def _get_hue_gain(self, img):
 87 |         """Get hue gain value and keep it in underwater hue interval."""
 88 |         img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
 89 |         hue_mean = np.mean(img_hsv[..., 0])
 90 |         hue_gain = self._random_mult() * self.hue_delta
 91 | 
 92 |         # img_hue is in the underwater hue interval
 93 |         if self._hue_min < hue_mean < self._hue_max:
 94 |             hue = np.clip(hue_mean + hue_gain, self._hue_min, self._hue_max)
 95 |             hue_gain = hue - hue_mean
 96 | 
 97 |         # img_hue is out of the underwater hue interval
 98 |         else:
 99 |             hue_gain = np.abs(hue_gain)
100 |             if hue_mean >= self._hue_max:
101 |                 hue_gain = -hue_gain
102 | 
103 |         return np.array(hue_gain, dtype=np.int16)
104 | 
105 |     @cache_randomness
106 |     def _get_saturation_gain(self):
107 |         gain = self._random_mult() * self.saturation_delta
108 |         return np.array(gain, dtype=np.int16)
109 | 
110 |     @cache_randomness
111 |     def _get_value_gain(self):
112 |         gain = self._random_mult() * self.value_delta
113 |         return np.array(gain, dtype=np.int16)
114 | 
115 |     def transform(self, results: dict) -> dict:
116 |         hue_able = self._random_hue()
117 |         saturation_able = self._random_saturation()
118 |         value_able = self._random_value()
119 | 
120 |         if not any((hue_able, saturation_able, value_able)):
121 |             return results
122 | 
123 |         img = results['img']
124 |         img_dtype = img.dtype
125 | 
126 |         assert img_dtype == np.uint8
127 |         # convert color uint8 from BGR to HSV
128 |         img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
129 | 
130 |         if hue_able:
131 |             hue_gain = self._get_hue_gain(img)
132 |             img_hsv[..., 0] = (img_hsv[..., 0] + hue_gain) % 180
133 | 
134 |         if saturation_able:
135 |             saturation_gain = self._get_saturation_gain()
136 |             img_hsv[..., 1] = np.clip(img_hsv[..., 1] + saturation_gain, 0, 255)
137 | 
138 |         if value_able:
139 |             value_gain = self._get_value_gain()
140 |             img_hsv[..., 2] = np.clip(img_hsv[..., 2] + value_gain, 0, 255)
141 | 
142 |         # convert color from HSV to BGR
143 |         img = cv2.cvtColor(img_hsv.astype(img_dtype), cv2.COLOR_HSV2BGR)
144 | 
145 |         results['img'] = img
146 |         return results
147 | 
148 |     def __repr__(self):
149 |         repr_str = self.__class__.__name__
150 |         repr_str += f'(underwater_hue_interval={self.underwater_hue_interval}, '
151 |         repr_str += f'hue_prob={self.hue_prob}, '
152 |         repr_str += f'saturation_prob={self.saturation_prob}, '
153 |         repr_str += f'value_prob={self.value_prob}, '
154 |         repr_str += f'hue_delta={self.hue_delta}, '
155 |         repr_str += f'saturation_delta={self.saturation_delta}, '
156 |         repr_str += f'value_delta={self.value_delta})'
157 |         return repr_str
158 | 


--------------------------------------------------------------------------------
/unitmodule/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_preprocessors import *
2 | from .detectors import *
3 | from .losses import *
4 | 


--------------------------------------------------------------------------------
/unitmodule/models/data_preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_preprocessor import UnitDetDataPreprocessor
2 | from .unit_module import UnitModule
3 | 
4 | __all__ = ['UnitDetDataPreprocessor', 'UnitModule']
5 | 


--------------------------------------------------------------------------------
/unitmodule/models/data_preprocessors/data_preprocessor.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from functools import reduce
  3 | from numbers import Number
  4 | from typing import Sequence, List, Tuple, Optional, Union
  5 | 
  6 | import numpy as np
  7 | import torch.nn.functional as F
  8 | from mmdet.models.data_preprocessors import DetDataPreprocessor
  9 | from mmengine.model import BaseModule
 10 | from mmengine.registry import MODELS
 11 | from mmyolo.models.data_preprocessors import YOLOv5DetDataPreprocessor
 12 | 
 13 | 
 14 | def sum_dict(a, b):
 15 |     temp = dict()
 16 |     for key in (a.keys() | b.keys()):
 17 |         temp[key] = sum([d.get(key, 0) for d in (a, b)])
 18 |     return temp
 19 | 
 20 | 
 21 | @MODELS.register_module()
 22 | class UnitDetDataPreprocessor(DetDataPreprocessor, BaseModule):
 23 |     def __init__(self,
 24 |                  unit_module: dict,
 25 |                  pad_mode: str = 'reflect',
 26 |                  mean: Sequence[Number] = None,
 27 |                  std: Sequence[Number] = None,
 28 |                  pad_size_divisor: int = 1,
 29 |                  pad_value: Union[float, int] = 0,
 30 |                  pad_mask: bool = False,
 31 |                  mask_pad_value: int = 0,
 32 |                  pad_seg: bool = False,
 33 |                  seg_pad_value: int = 255,
 34 |                  bgr_to_rgb: bool = False,
 35 |                  rgb_to_bgr: bool = False,
 36 |                  boxtype2tensor: bool = True,
 37 |                  non_blocking: Optional[bool] = False,
 38 |                  batch_augments: Optional[List[dict]] = None,
 39 |                  init_cfg=None):
 40 |         super().__init__(
 41 |             mean=mean,
 42 |             std=std,
 43 |             pad_size_divisor=pad_size_divisor,
 44 |             pad_value=pad_value,
 45 |             pad_mask=pad_mask,
 46 |             mask_pad_value=mask_pad_value,
 47 |             pad_seg=pad_seg,
 48 |             seg_pad_value=seg_pad_value,
 49 |             bgr_to_rgb=bgr_to_rgb,
 50 |             rgb_to_bgr=rgb_to_bgr,
 51 |             boxtype2tensor=boxtype2tensor,
 52 |             non_blocking=non_blocking,
 53 |             batch_augments=batch_augments)
 54 | 
 55 |         # BaseModule __init__
 56 |         self._is_init = False
 57 |         self.init_cfg = copy.deepcopy(init_cfg)
 58 | 
 59 |         assert pad_mode in ('reflect', 'circular'), \
 60 |             f"Excepted ('reflect', 'circular'), but got {pad_mode}"
 61 |         self.pad_mode = pad_mode
 62 |         self.unit_module = MODELS.build(unit_module)
 63 | 
 64 |     def forward(self,
 65 |                 data: dict,
 66 |                 training: bool = False) -> Union[Tuple[dict, dict], dict]:
 67 |         data = self.cast_data(data)
 68 |         data['inputs'], losses = self.unit_module_forward(data['inputs'], training)
 69 | 
 70 |         data = super(UnitDetDataPreprocessor, self).forward(data, training)
 71 |         return (data, losses) if training else data
 72 | 
 73 |     def unit_module_forward(self, batch_inputs, training: bool = False) -> Tuple[list, dict]:
 74 |         outputs = []
 75 |         losses = []
 76 |         for batch_input in batch_inputs:
 77 |             # padding
 78 |             oh, ow = batch_input.shape[1:]
 79 |             pad_h = int(np.ceil(oh / self.pad_size_divisor)) * self.pad_size_divisor
 80 |             pad_w = int(np.ceil(ow / self.pad_size_divisor)) * self.pad_size_divisor
 81 |             p2d = (0, (pad_w - ow), 0, (pad_h - oh))
 82 |             batch_input = batch_input.float()
 83 |             batch_input_pad = F.pad(batch_input, p2d, self.pad_mode)
 84 | 
 85 |             # UnitModule forward
 86 |             batch_input_pad = batch_input_pad.unsqueeze(0) / 255.
 87 |             if training:
 88 |                 batch_output_pad, _losses = self.unit_module(batch_input_pad, training)
 89 |                 losses.append(_losses)
 90 |             else:
 91 |                 batch_output_pad = self.unit_module(batch_input_pad, training)
 92 |             batch_output_pad = batch_output_pad.squeeze(0)
 93 | 
 94 |             # remove padding
 95 |             batch_output = batch_output_pad[..., :oh, :ow] * 255.
 96 |             outputs.append(batch_output)
 97 | 
 98 |         if training:
 99 |             n = len(losses)
100 |             losses = reduce(sum_dict, losses)
101 |             for k, v in losses.items():
102 |                 losses[k] = v / n
103 | 
104 |         return outputs, losses
105 | 
106 | 
107 | @MODELS.register_module()
108 | class UnitYOLOv5DetDataPreprocessor(YOLOv5DetDataPreprocessor, BaseModule):
109 |     def __init__(self,
110 |                  unit_module: dict,
111 |                  pad_mode: str = 'reflect',
112 |                  mean: Sequence[Number] = None,
113 |                  std: Sequence[Number] = None,
114 |                  pad_size_divisor: int = 1,
115 |                  pad_value: Union[float, int] = 0,
116 |                  pad_mask: bool = False,
117 |                  mask_pad_value: int = 0,
118 |                  pad_seg: bool = False,
119 |                  seg_pad_value: int = 255,
120 |                  bgr_to_rgb: bool = False,
121 |                  rgb_to_bgr: bool = False,
122 |                  boxtype2tensor: bool = True,
123 |                  non_blocking: Optional[bool] = True,
124 |                  batch_augments: Optional[List[dict]] = None,
125 |                  init_cfg=None):
126 |         super().__init__(
127 |             mean=mean,
128 |             std=std,
129 |             pad_size_divisor=pad_size_divisor,
130 |             pad_value=pad_value,
131 |             pad_mask=pad_mask,
132 |             mask_pad_value=mask_pad_value,
133 |             pad_seg=pad_seg,
134 |             seg_pad_value=seg_pad_value,
135 |             bgr_to_rgb=bgr_to_rgb,
136 |             rgb_to_bgr=rgb_to_bgr,
137 |             boxtype2tensor=boxtype2tensor,
138 |             non_blocking=non_blocking,
139 |             batch_augments=batch_augments)
140 | 
141 |         # BaseModule __init__
142 |         self._is_init = False
143 |         self.init_cfg = copy.deepcopy(init_cfg)
144 | 
145 |         assert pad_mode in ('reflect', 'circular'), \
146 |             f"Excepted ('reflect', 'circular'), but got {pad_mode}"
147 |         self.pad_mode = pad_mode
148 |         self.unit_module = MODELS.build(unit_module)
149 | 
150 |     def forward(self,
151 |                 data: dict,
152 |                 training: bool = False) -> Union[Tuple[dict, dict], dict]:
153 |         data = self.cast_data(data)
154 |         data['inputs'], losses = self.unit_module_forward(data['inputs'], training)
155 | 
156 |         data = super(UnitYOLOv5DetDataPreprocessor, self).forward(data, training)
157 |         return (data, losses) if training else data
158 | 
159 |     def unit_module_forward(self, batch_inputs, training: bool = False) -> Tuple[list, dict]:
160 |         losses = {}
161 |         if training:
162 |             batch_inputs = batch_inputs.float()
163 |             batch_inputs = batch_inputs / 255.
164 |             batch_inputs, losses = self.unit_module(batch_inputs, training)
165 |             outputs = batch_inputs * 255.
166 |         else:
167 |             outputs = []
168 |             for batch_input in batch_inputs:
169 |                 # padding
170 |                 oh, ow = batch_input.shape[1:]
171 |                 pad_h = int(np.ceil(oh / self.pad_size_divisor)) * self.pad_size_divisor
172 |                 pad_w = int(np.ceil(ow / self.pad_size_divisor)) * self.pad_size_divisor
173 |                 p2d = (0, (pad_w - ow), 0, (pad_h - oh))
174 |                 batch_input = batch_input.float()
175 |                 batch_input_pad = F.pad(batch_input, p2d, self.pad_mode)
176 | 
177 |                 # UnitModule forward
178 |                 batch_input_pad = batch_input_pad.unsqueeze(0) / 255.
179 |                 batch_output_pad = self.unit_module(batch_input_pad, training)
180 |                 batch_output_pad = batch_output_pad.squeeze(0)
181 | 
182 |                 # remove padding
183 |                 batch_output = batch_output_pad[..., :oh, :ow] * 255.
184 |                 outputs.append(batch_output)
185 | 
186 |         return outputs, losses
187 | 


--------------------------------------------------------------------------------
/unitmodule/models/data_preprocessors/unit_module.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Tuple, Union
  2 | 
  3 | import mmcv.cnn as cnn
  4 | import torch
  5 | import torch.nn as nn
  6 | from mmcv.cnn import build_activation_layer, build_norm_layer
  7 | from mmengine.model import BaseModule
  8 | from mmengine.registry import MODELS
  9 | from torch import Tensor
 10 | 
 11 | 
 12 | class LargeKernelLayer(BaseModule):
 13 |     def __init__(self,
 14 |                  channels: int,
 15 |                  large_kernel: int,
 16 |                  small_kernel: int,
 17 |                  padding_mode: str = 'reflect',
 18 |                  norm_cfg: Optional[dict] = None,
 19 |                  act_cfg: Optional[dict] = None,
 20 |                  init_cfg: Optional[dict] = None):
 21 |         super().__init__(init_cfg)
 22 |         if norm_cfg is None:
 23 |             norm_cfg = dict(type='GN', num_groups=8)
 24 |         if act_cfg is None:
 25 |             act_cfg = dict(type='ReLU')
 26 | 
 27 |         common_kwargs = dict(padding_mode=padding_mode,
 28 |                              groups=channels,
 29 |                              norm_cfg=norm_cfg,
 30 |                              act_cfg=None)
 31 | 
 32 |         self.dw_large = cnn.ConvModule(channels, channels, large_kernel,
 33 |                                        padding=large_kernel // 2, **common_kwargs)
 34 |         self.dw_small = cnn.ConvModule(channels, channels, small_kernel,
 35 |                                        padding=small_kernel // 2, **common_kwargs)
 36 |         self.act = build_activation_layer(act_cfg)
 37 | 
 38 |     def forward(self, x) -> Tensor:
 39 |         x_large = self.dw_large(x)
 40 |         x_small = self.dw_small(x)
 41 |         return self.act(x_large + x_small)
 42 | 
 43 | 
 44 | class LKBlock(BaseModule):
 45 |     def __init__(self,
 46 |                  channels: int,
 47 |                  large_kernel: int,
 48 |                  small_kernel: int,
 49 |                  dw_ratio: float = 1.0,
 50 |                  padding_mode: str = 'reflect',
 51 |                  norm_cfg: Optional[dict] = None,
 52 |                  act_cfg: Optional[dict] = None,
 53 |                  init_cfg: Optional[dict] = None):
 54 |         super().__init__(init_cfg)
 55 |         if norm_cfg is None:
 56 |             norm_cfg = dict(type='GN', num_groups=8)
 57 |         if act_cfg is None:
 58 |             act_cfg = dict(type='ReLU')
 59 |         dw_channels = int(channels * dw_ratio)
 60 | 
 61 |         self.pw1 = cnn.ConvModule(channels, dw_channels, 1, 1,
 62 |                                   norm_cfg=norm_cfg, act_cfg=act_cfg)
 63 |         self.dw = LargeKernelLayer(dw_channels, large_kernel, small_kernel,
 64 |                                    padding_mode=padding_mode,
 65 |                                    norm_cfg=norm_cfg, act_cfg=act_cfg)
 66 |         self.pw2 = cnn.ConvModule(dw_channels, channels, 1, 1,
 67 |                                   norm_cfg=norm_cfg, act_cfg=None)
 68 |         self.norm = build_norm_layer(norm_cfg, channels)[1]
 69 | 
 70 |     def forward(self, x) -> Tensor:
 71 |         y = self.pw1(x)
 72 |         y = self.dw(y)
 73 |         y = self.pw2(y)
 74 |         x = self.norm(x + y)
 75 |         return x
 76 | 
 77 | 
 78 | @MODELS.register_module()
 79 | class UnitBackbone(BaseModule):
 80 |     def __init__(self,
 81 |                  stem_channels: Tuple[int],
 82 |                  large_kernels: Tuple[int],
 83 |                  small_kernels: Tuple[int],
 84 |                  in_channels: int = 3,
 85 |                  dw_ratio: float = 1.0,
 86 |                  padding_mode: str = 'reflect',
 87 |                  norm_cfg: Optional[dict] = None,
 88 |                  act_cfg: Optional[dict] = None,
 89 |                  init_cfg: Optional[dict] = None):
 90 |         super().__init__(init_cfg)
 91 |         assert len(large_kernels) == len(small_kernels)
 92 |         if norm_cfg is None:
 93 |             norm_cfg = dict(type='GN', num_groups=8)
 94 |         if act_cfg is None:
 95 |             act_cfg = dict(type='ReLU')
 96 |         inc = in_channels
 97 | 
 98 |         stem_layers = []
 99 |         for outc in stem_channels:
100 |             stem_layers.append(
101 |                 cnn.ConvModule(inc, outc, 3, 2,
102 |                                padding=1, padding_mode=padding_mode,
103 |                                norm_cfg=norm_cfg, act_cfg=act_cfg))
104 |             inc = outc
105 |         self.stem = nn.Sequential(*stem_layers)
106 | 
107 |         layers = []
108 |         for large_k, small_k in zip(large_kernels, small_kernels):
109 |             layers.append(
110 |                 LKBlock(inc, large_k, small_k, dw_ratio,
111 |                         padding_mode, norm_cfg, act_cfg))
112 |         self.layers = nn.Sequential(*layers)
113 | 
114 |     def forward(self, x) -> Tensor:
115 |         x = self.stem(x)
116 |         x = self.layers(x)
117 |         return x
118 | 
119 | 
120 | @MODELS.register_module()
121 | class THead(BaseModule):
122 |     def __init__(self,
123 |                  in_channels: int,
124 |                  hid_channels: int,
125 |                  out_channels: int = 3,
126 |                  padding_mode: str = 'reflect',
127 |                  norm_cfg: Optional[dict] = None,
128 |                  act_cfg: Optional[dict] = None,
129 |                  init_cfg: Optional[dict] = None):
130 |         super().__init__(init_cfg)
131 |         if norm_cfg is None:
132 |             norm_cfg = dict(type='GN', num_groups=8)
133 |         if act_cfg is None:
134 |             act_cfg = dict(type='ReLU')
135 | 
136 |         self.up1 = nn.Upsample(scale_factor=2, mode='bilinear')
137 |         self.up2 = nn.Upsample(scale_factor=2, mode='bilinear')
138 |         self.conv1 = cnn.ConvModule(in_channels, hid_channels, 3, 1,
139 |                                     padding=1, padding_mode=padding_mode,
140 |                                     norm_cfg=norm_cfg, act_cfg=act_cfg)
141 |         self.conv2 = cnn.ConvModule(hid_channels, out_channels, 3, 1,
142 |                                     padding=1, padding_mode=padding_mode,
143 |                                     norm_cfg=None, act_cfg=None)
144 | 
145 |     def forward(self, x) -> Tensor:
146 |         x = self.conv1(self.up1(x))
147 |         x = self.conv2(self.up2(x))
148 |         x = torch.sigmoid(x)
149 |         return x
150 | 
151 | 
152 | @MODELS.register_module()
153 | class AHead(BaseModule):
154 |     def __init__(self,
155 |                  mean_dim: Union[int, Tuple[int]] = (-2, -1),
156 |                  init_cfg=None):
157 |         super().__init__(init_cfg)
158 |         self.mean_dim = mean_dim
159 | 
160 |     def forward(self, x) -> Tensor:
161 |         return torch.mean(x, dim=self.mean_dim, keepdim=True)
162 | 
163 | 
164 | @MODELS.register_module()
165 | class UnitModule(BaseModule):
166 |     def __init__(self,
167 |                  unit_backbone: dict,
168 |                  t_head: dict,
169 |                  a_head: dict,
170 |                  loss_t: dict,
171 |                  loss_acc: Optional[dict] = None,
172 |                  loss_cc: Optional[dict] = None,
173 |                  loss_sp: Optional[dict] = None,
174 |                  loss_tv: Optional[dict] = None,
175 |                  alpha: float = 0.9,
176 |                  t_min: float = 0.001,
177 |                  init_cfg=None):
178 |         super().__init__(init_cfg)
179 |         assert 0 < alpha < 1
180 |         assert 0 <= t_min < 0.1
181 | 
182 |         self.alpha = alpha
183 |         self.t_min = t_min
184 | 
185 |         self.unit_backbone = MODELS.build(unit_backbone)
186 |         self.t_head = MODELS.build(t_head)
187 |         self.a_head = MODELS.build(a_head)
188 | 
189 |         self.loss_t = MODELS.build(loss_t)
190 |         self.loss_acc = MODELS.build(loss_acc) if loss_acc else None
191 |         self.loss_cc = MODELS.build(loss_cc) if loss_cc else None
192 |         self.loss_sp = MODELS.build(loss_sp) if loss_sp else None
193 |         self.loss_tv = MODELS.build(loss_tv) if loss_tv else None
194 | 
195 |     def forward(self, x, training: bool = False) -> Union[Tensor, Tuple[Tensor, dict]]:
196 |         if training:
197 |             return self.loss(x)
198 |         else:  # training == False
199 |             return self.predict(x)
200 | 
201 |     def _forward(self, x) -> Tuple[Tensor, Tensor]:
202 |         feature = self.unit_backbone(x)
203 |         t = self.t_head(feature)
204 |         a = self.a_head(x)
205 |         return t, a
206 | 
207 |     def predict(self, x, show: bool = False) -> Union[Tensor, tuple]:
208 |         t, a = self._forward(x)
209 |         t = torch.clamp(t, min=self.t_min)
210 | 
211 |         x = self.denoise(x, t, a)
212 |         x = torch.clamp(x, 0, 1)
213 |         return (x, t, a) if show else x
214 | 
215 |     def loss(self, x) -> Tuple[Tensor, dict]:
216 |         feature = self.unit_backbone(x)
217 |         t = self.t_head(feature)
218 |         a = self.a_head(x)
219 | 
220 |         t = torch.clamp(t, min=self.t_min)
221 | 
222 |         # get x of denoise
223 |         x_denoise = self.denoise(x, t, a)
224 | 
225 |         # create fake x with noise and predict its t and A
226 |         x_fake = self.noise(x, self.alpha, a)
227 |         t_fake, a_fake = self._forward(x_fake)
228 |         x_fake_denoise = self.denoise(x_fake, t_fake, a_fake)
229 | 
230 |         loss_t = self.loss_t(self.alpha * t, t_fake)
231 |         losses = dict(loss_t=loss_t)
232 |         if self.loss_acc:
233 |             loss_acc = self.loss_acc(feature, a)
234 |             losses.update(loss_acc=loss_acc)
235 | 
236 |         if self.loss_cc:
237 |             loss_cc = self.loss_cc(x_denoise)
238 |             losses.update(loss_cc=loss_cc)
239 | 
240 |         if self.loss_sp:
241 |             loss_sp = self.loss_sp(x_denoise, x_fake_denoise)
242 |             losses.update(loss_sp=loss_sp)
243 | 
244 |         if self.loss_tv:
245 |             loss_tv = self.loss_tv(x_denoise)
246 |             losses.update(loss_tv=loss_tv)
247 | 
248 |         x_denoise = torch.clamp(x_denoise, 0, 1)
249 |         return x_denoise, losses
250 | 
251 |     @staticmethod
252 |     def noise(x, t, a) -> Tensor:
253 |         """Noise image"""
254 |         return x * t + (1 - t) * a
255 | 
256 |     @staticmethod
257 |     def denoise(x, t, a) -> Tensor:
258 |         """Denoise image"""
259 |         return (x - (1 - t) * a) / t
260 | 


--------------------------------------------------------------------------------
/unitmodule/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .unit_detectors import (UnitCascadeRCNN, UnitDETR, UnitDINO,
 2 |                              UnitFasterRCNN, UnitFCOS, UnitRetinaNet,
 3 |                              UnitTOOD, UnitYOLODetector)
 4 | 
 5 | 
 6 | def register_unit_distributed(cfg):
 7 |     if cfg.get('with_unit_module'):
 8 |         # switch MMDistributedDataParallel to fit model with UnitModule
 9 |         import unitmodule.models.detectors.unit_distributed
10 | 
11 | 
12 | __all__ = [
13 |     'UnitCascadeRCNN', 'UnitDETR', 'UnitDINO',
14 |     'UnitFasterRCNN', 'UnitFCOS', 'UnitRetinaNet',
15 |     'UnitTOOD', 'UnitYOLODetector',
16 |     'register_unit_distributed',
17 | ]
18 | 


--------------------------------------------------------------------------------
/unitmodule/models/detectors/unit_detectors.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Union
 2 | 
 3 | import torch
 4 | from mmdet.models.detectors import (CascadeRCNN, DETR, DINO,
 5 |                                     FasterRCNN, FCOS, RetinaNet, TOOD)
 6 | from mmengine.optim import OptimWrapper
 7 | from mmengine.registry import MODELS
 8 | from mmyolo.models.detectors import YOLODetector
 9 | 
10 | 
11 | def train_step_with_unit_module(self, data: Union[dict, tuple, list],
12 |                                 optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
13 |     """With the UnitModule loss"""
14 |     with optim_wrapper.optim_context(self):
15 |         data, unit_losses = self.data_preprocessor(data, True)
16 |         losses = self._run_forward(data, mode='loss')
17 |     losses.update(unit_losses)
18 |     parsed_losses, log_vars = self.parse_losses(losses)
19 |     optim_wrapper.update_params(parsed_losses)
20 |     return log_vars
21 | 
22 | 
23 | def with_unit_module(cls):
24 |     cls.train_step = train_step_with_unit_module
25 |     return cls
26 | 
27 | 
28 | @MODELS.register_module()
29 | @with_unit_module
30 | class UnitCascadeRCNN(CascadeRCNN):
31 |     """CascadeRCNN with UnitModule"""
32 | 
33 | 
34 | @MODELS.register_module()
35 | @with_unit_module
36 | class UnitDETR(DETR):
37 |     """DETR with UnitModule"""
38 | 
39 | 
40 | @MODELS.register_module()
41 | @with_unit_module
42 | class UnitDINO(DINO):
43 |     """DINO with UnitModule"""
44 | 
45 | 
46 | @MODELS.register_module()
47 | @with_unit_module
48 | class UnitFasterRCNN(FasterRCNN):
49 |     """FasterRCNN with UnitModule"""
50 | 
51 | 
52 | @MODELS.register_module()
53 | @with_unit_module
54 | class UnitFCOS(FCOS):
55 |     """FCOS with UnitModule"""
56 | 
57 | 
58 | @MODELS.register_module()
59 | @with_unit_module
60 | class UnitRetinaNet(RetinaNet):
61 |     """RetinaNet with UnitModule"""
62 | 
63 | 
64 | @MODELS.register_module()
65 | @with_unit_module
66 | class UnitTOOD(TOOD):
67 |     """TOOD with UnitModule"""
68 | 
69 | 
70 | @MODELS.register_module()
71 | @with_unit_module
72 | class UnitYOLODetector(YOLODetector):
73 |     """YOLODetector with UnitModule"""
74 | 


--------------------------------------------------------------------------------
/unitmodule/models/detectors/unit_distributed.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Union
 2 | 
 3 | import torch
 4 | from mmengine.model.utils import detect_anomalous_params
 5 | from mmengine.model.wrappers import MMDistributedDataParallel
 6 | from mmengine.optim import OptimWrapper
 7 | from mmengine.registry import MODEL_WRAPPERS
 8 | 
 9 | 
10 | def ddp_train_step_with_unit_module(self, data: Union[dict, tuple, list],
11 |                                     optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
12 |     with optim_wrapper.optim_context(self):
13 |         data, unit_losses = self.module.data_preprocessor(data, training=True)
14 |         losses = self._run_forward(data, mode='loss')
15 |     losses.update(unit_losses)
16 |     parsed_loss, log_vars = self.module.parse_losses(losses)
17 |     optim_wrapper.update_params(parsed_loss)
18 |     if self.detect_anomalous_params:
19 |         detect_anomalous_params(parsed_loss, model=self)
20 |     return log_vars
21 | 
22 | 
23 | # switch MMDistributedDataParallel train_step and register it
24 | MMDistributedDataParallel.train_step = ddp_train_step_with_unit_module
25 | MODEL_WRAPPERS.register_module(module=MMDistributedDataParallel, force=True)
26 | 


--------------------------------------------------------------------------------
/unitmodule/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assisting_color_cast_loss import AssistingColorCastLoss
 2 | from .color_cast_loss import ColorCastLoss
 3 | from .saturated_pixel_loss import SaturatedPixelLoss
 4 | from .total_variation_loss import TotalVariationLoss
 5 | from .transmission_loss import TransmissionLoss
 6 | 
 7 | __all__ = [
 8 |     'AssistingColorCastLoss', 'ColorCastLoss', 'SaturatedPixelLoss',
 9 |     'TotalVariationLoss', 'TransmissionLoss',
10 | ]
11 | 


--------------------------------------------------------------------------------
/unitmodule/models/losses/assisting_color_cast_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmengine.registry import MODELS
 4 | from torch import Tensor
 5 | from torchvision.ops import RoIPool
 6 | 
 7 | 
 8 | @MODELS.register_module()
 9 | class AssistingColorCastLoss(nn.Module):
10 |     def __init__(self, channels: int, loss_weight: float = 1.0):
11 |         super().__init__()
12 |         self.loss_weight = loss_weight
13 |         self.loss_fn = nn.MSELoss(reduction='mean')
14 | 
15 |         self.roi_pooling = RoIPool((7, 7), 1)
16 |         self.down_conv = nn.Conv2d(channels, 3, 1, 1)
17 |         self.acc_head = nn.Sequential(
18 |             nn.Linear(49, 32),
19 |             nn.Linear(32, 16),
20 |             nn.Linear(16, 1))
21 | 
22 |     def forward(self, feature: Tensor, a: Tensor) -> Tensor:
23 |         device = feature.device
24 |         b, _, h, w = feature.shape
25 |         a = a.squeeze(-1).squeeze(-1)  # (b, 3)
26 |         boxes = [torch.tensor(
27 |             [[0, 0, h - 1, w - 1]],
28 |             dtype=torch.float32).to(device) for _ in range(b)]
29 | 
30 |         feature = self.roi_pooling(feature, boxes)
31 |         feature = self.down_conv(feature).view(b, 3, -1)
32 |         color_cast = self.acc_head(feature).squeeze(-1)  # (b, 3)
33 | 
34 |         loss = self.loss_fn(color_cast, a)
35 |         return self.loss_weight * loss
36 | 


--------------------------------------------------------------------------------
/unitmodule/models/losses/color_cast_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmengine.registry import MODELS
 4 | from torch import Tensor
 5 | 
 6 | 
 7 | @MODELS.register_module()
 8 | class ColorCastLoss(nn.Module):
 9 |     def __init__(self, loss_weight: float = 1.0):
10 |         super().__init__()
11 |         self.loss_weight = loss_weight
12 |         self.loss_fn = nn.MSELoss(reduction='mean')
13 | 
14 |     def forward(self, x: Tensor) -> Tensor:
15 |         x = torch.mean(x, dim=(-2, -1))
16 |         # from color channel (0, 1, 2) corresponding to (1, 2, 0)
17 |         loss = self.loss_fn(x, x[:, [1, 2, 0]])
18 |         return self.loss_weight * loss
19 | 


--------------------------------------------------------------------------------
/unitmodule/models/losses/saturated_pixel_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmengine.registry import MODELS
 4 | from torch import Tensor
 5 | 
 6 | 
 7 | @MODELS.register_module()
 8 | class SaturatedPixelLoss(nn.Module):
 9 |     def __init__(self, loss_weight: float = 1.0):
10 |         super().__init__()
11 |         self.loss_weight = loss_weight
12 | 
13 |     def forward(self, a: Tensor, b: Tensor) -> Tensor:
14 |         zero = a.new_zeros(1)
15 |         one = a.new_ones(1)
16 | 
17 |         loss_max = (torch.max(a, one) + torch.max(b, one) - 2 * one).nanmean()
18 |         loss_min = -(torch.min(a, zero) + torch.min(b, zero)).nanmean()
19 |         loss = loss_max + loss_min
20 |         return self.loss_weight * loss
21 | 


--------------------------------------------------------------------------------
/unitmodule/models/losses/total_variation_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmengine.registry import MODELS
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class TotalVariationLoss(nn.Module):
 8 |     def __init__(self, loss_weight: float = 1.0):
 9 |         super().__init__()
10 |         self.loss_weight = loss_weight
11 |         self.loss_fn = nn.MSELoss(reduction='mean')
12 | 
13 |     def forward(self, x: Tensor) -> Tensor:
14 |         _, _, h, w, = x.shape
15 |         h_tv = self.loss_fn(x[:, :, 1:, :], x[:, :, :h - 1, :])
16 |         w_tv = self.loss_fn(x[:, :, :, 1:], x[:, :, :, :w - 1])
17 |         loss = h_tv + w_tv
18 |         return self.loss_weight * loss
19 | 


--------------------------------------------------------------------------------
/unitmodule/models/losses/transmission_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmengine.registry import MODELS
 3 | from torch import Tensor
 4 | 
 5 | 
 6 | @MODELS.register_module()
 7 | class TransmissionLoss(nn.Module):
 8 |     def __init__(self, loss_weight: float = 1.0):
 9 |         super().__init__()
10 |         self.loss_weight = loss_weight
11 |         self.loss_fn = nn.MSELoss(reduction='mean')
12 | 
13 |     def forward(self, a: Tensor, b: Tensor) -> Tensor:
14 |         loss = self.loss_fn(a, b)
15 |         return self.loss_weight * loss
16 | 


--------------------------------------------------------------------------------