├── .gitignore
├── LICENSE
├── README.md
├── configs
├── _base_
│ ├── datasets
│ │ ├── duo_detection.py
│ │ └── duo_detection_mmyolo.py
│ ├── default_runtime.py
│ └── default_runtime_mmyolo.py
├── cascade_rcnn
│ ├── cascade_rcnn_r50_1x_duo.py
│ └── unitmodule_cascade_rcnn_r50_1x_duo.py
├── detr
│ ├── detr_r50_500e_duo.py
│ └── unitmodule_detr_r50_500e_duo.py
├── dino
│ ├── dino_4scale_r50_1x_duo.py
│ └── unitmodule_dino_4scale_r50_1x_duo.py
├── faster_rcnn
│ ├── faster_rcnn_r50_1x_duo.py
│ └── unitmodule_faster_rcnn_r50_1x_duo.py
├── fcos
│ ├── fcos_r50_1x_duo.py
│ └── unitmodule_fcos_r50_1x_duo.py
├── retinanet
│ ├── retinanet_r50_1x_duo.py
│ └── unitmodule_retinanet_r50_1x_duo.py
├── rtmdet
│ ├── rtmdet_s_100e_duo.py
│ └── unitmodule_rtmdet_s_100e_duo.py
├── tood
│ ├── tood_r50_1x_duo.py
│ └── unitmodule_tood_r50_1x_duo.py
├── unitmodule
│ └── unitmodule.py
├── yolov5
│ ├── unitmodule_yolov5_s_100e_duo.py
│ └── yolov5_s_100e_duo.py
├── yolov6
│ ├── unitmodule_yolov6_s_100e_duo.py
│ └── yolov6_s_100e_duo.py
├── yolov7
│ ├── unitmodule_yolov7_t_100e_duo.py
│ └── yolov7_t_100e_duo.py
├── yolov8
│ ├── unitmodule_yolov8_s_100e_duo.py
│ └── yolov8_s_100e_duo.py
└── yolox
│ ├── unitmodule_yolox_s_100e_duo.py
│ └── yolox_s_100e_duo.py
├── requirements.txt
├── tools
├── dist_test.sh
├── dist_train.sh
├── test.py
└── train.py
└── unitmodule
├── __init__.py
├── datasets
├── __init__.py
└── transforms
│ ├── __init__.py
│ └── colorspace.py
└── models
├── __init__.py
├── data_preprocessors
├── __init__.py
├── data_preprocessor.py
└── unit_module.py
├── detectors
├── __init__.py
├── unit_detectors.py
└── unit_distributed.py
└── losses
├── __init__.py
├── assisting_color_cast_loss.py
├── color_cast_loss.py
├── saturated_pixel_loss.py
├── total_variation_loss.py
└── transmission_loss.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # The repo
2 | .idea
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | pip-wheel-metadata/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 | local_settings.py
64 | db.sqlite3
65 | db.sqlite3-journal
66 |
67 | # Flask stuff:
68 | instance/
69 | .webassets-cache
70 |
71 | # Scrapy stuff:
72 | .scrapy
73 |
74 | # Sphinx documentation
75 | docs/_build/
76 |
77 | # PyBuilder
78 | target/
79 |
80 | # Jupyter Notebook
81 | .ipynb_checkpoints
82 |
83 | # IPython
84 | profile_default/
85 | ipython_config.py
86 |
87 | # pyenv
88 | .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 LEFTeyex
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #
UnitModule
2 |
3 | ### Installation
4 |
5 | This project is based on [MMDetection](https://github.com/open-mmlab/mmdetection/tree/main).
6 |
7 | - Python 3.8
8 | - Pytorch 1.11.0+cu113
9 |
10 | **Step 1.** Create a conda virtual environment and activate it.
11 |
12 | ```bash
13 | conda create -n unitmodule python=3.8 -y
14 | conda activate unitmodule
15 | ```
16 |
17 | **Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/).
18 |
19 | Linux and Windows
20 |
21 | ```bash
22 | # Wheel CUDA 11.3
23 | pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113
24 | ```
25 |
26 | ```bash
27 | # Conda CUDA 11.3
28 | conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch
29 | ```
30 |
31 | **Step 3.** Install MMDetection and dependent packages.
32 |
33 | ```bash
34 | pip install -U openmim
35 | mim install mmengine==0.7.4
36 | mim install mmcv==2.0.0
37 | mim install mmdet==3.0.0
38 | mim install mmyolo==0.5.0
39 | pip install -r requirements.txt
40 | ```
41 |
42 | ### Dataset
43 |
44 | The data structure DUO looks like below:
45 |
46 | ```text
47 | # DUO
48 |
49 | data
50 | ├── DUO
51 | │ ├── annotations
52 | │ │ ├── instances_train.json
53 | │ │ ├── instances_test.json
54 | │ ├── images
55 | │ │ ├── train
56 | │ │ ├── test
57 | ```
58 |
59 | ### Training
60 |
61 | ```bash
62 | bash tools/dist_train.sh configs/yolox/yolox_s_100e_duo.py 2
63 | ```
64 |
65 | ### Test
66 |
67 | ```bash
68 | bash tools/dist_test.sh configs/yolox/yolox_s_100e_duo.py yolox_s_100e_duo.pth 2
69 | ```
--------------------------------------------------------------------------------
/configs/_base_/datasets/duo_detection.py:
--------------------------------------------------------------------------------
1 | data_root = 'data/DUO/'
2 |
3 | train_img_file = 'images/train'
4 | val_img_file = 'images/test'
5 | train_ann_file = 'annotations/instances_train.json'
6 | val_ann_file = 'annotations/instances_test.json'
7 |
8 | mean_bgr = [85.603, 148.034, 64.697]
9 | std_bgr = [32.28, 39.201, 26.55]
10 | mean_rgb = [64.697, 148.034, 85.603]
11 | std_rgb = [26.55, 39.201, 32.28]
12 |
13 | classes = ('holothurian', 'echinus', 'scallop', 'starfish')
14 |
15 | img_scale = (1333, 800)
16 | dataset_type = 'CocoDataset'
17 | evaluator_type = 'CocoMetric'
18 | train_pipeline = [
19 | dict(type='LoadImageFromFile'),
20 | dict(type='LoadAnnotations', with_bbox=True),
21 | dict(type='Resize', scale=img_scale, keep_ratio=True),
22 | dict(type='RandomFlip', prob=0.5),
23 | dict(type='PackDetInputs')
24 | ]
25 | test_pipeline = [
26 | dict(type='LoadImageFromFile'),
27 | dict(type='Resize', scale=img_scale, keep_ratio=True),
28 | dict(type='LoadAnnotations', with_bbox=True),
29 | dict(
30 | type='PackDetInputs',
31 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
32 | 'scale_factor'))
33 | ]
34 |
35 | num_gpu = 2
36 | train_bs = 4
37 | val_bs = 1
38 | auto_scale_lr = dict(enable=False, base_batch_size=train_bs * num_gpu)
39 | train_dataloader = dict(
40 | batch_size=train_bs,
41 | num_workers=train_bs,
42 | persistent_workers=True,
43 | sampler=dict(type='DefaultSampler', shuffle=True),
44 | batch_sampler=dict(type='AspectRatioBatchSampler'),
45 | dataset=dict(
46 | type=dataset_type,
47 | metainfo=dict(classes=classes),
48 | data_root=data_root,
49 | ann_file=train_ann_file,
50 | data_prefix=dict(img=train_img_file),
51 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
52 | pipeline=train_pipeline,
53 | ))
54 |
55 | val_dataloader = dict(
56 | batch_size=val_bs,
57 | num_workers=val_bs * 2,
58 | persistent_workers=True,
59 | drop_last=False,
60 | sampler=dict(type='DefaultSampler', shuffle=False),
61 | dataset=dict(
62 | type=dataset_type,
63 | metainfo=dict(classes=classes),
64 | data_root=data_root,
65 | ann_file=val_ann_file,
66 | data_prefix=dict(img=val_img_file),
67 | test_mode=True,
68 | pipeline=test_pipeline,
69 | ))
70 |
71 | test_dataloader = val_dataloader
72 |
73 | val_evaluator = dict(
74 | type=evaluator_type,
75 | ann_file=data_root + val_ann_file,
76 | metric='bbox',
77 | format_only=False)
78 | test_evaluator = val_evaluator
79 |
--------------------------------------------------------------------------------
/configs/_base_/datasets/duo_detection_mmyolo.py:
--------------------------------------------------------------------------------
1 | data_root = 'data/DUO/'
2 |
3 | train_img_file = 'images/train'
4 | val_img_file = 'images/test'
5 | train_ann_file = 'annotations/instances_train.json'
6 | val_ann_file = 'annotations/instances_test.json'
7 |
8 | mean_bgr = [85.603, 148.034, 64.697]
9 | std_bgr = [32.28, 39.201, 26.55]
10 | mean_rgb = [64.697, 148.034, 85.603]
11 | std_rgb = [26.55, 39.201, 32.28]
12 |
13 | classes = ('holothurian', 'echinus', 'scallop', 'starfish')
14 |
15 | img_scale = (640, 640)
16 | dataset_type = 'YOLOv5CocoDataset'
17 | evaluator_type = 'mmdet.CocoMetric'
18 | train_pipeline = [
19 | dict(type='LoadImageFromFile'),
20 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
21 | dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
22 | dict(type='mmdet.Pad',
23 | pad_to_square=True,
24 | pad_val=dict(img=(114.0, 114.0, 114.0))),
25 | dict(type='mmdet.RandomFlip', prob=0.5),
26 | dict(type='mmdet.PackDetInputs')
27 | ]
28 | test_pipeline = [
29 | dict(type='LoadImageFromFile'),
30 | dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
31 | dict(type='mmdet.Pad',
32 | pad_to_square=True,
33 | pad_val=dict(img=(114.0, 114.0, 114.0))),
34 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
35 | dict(
36 | type='mmdet.PackDetInputs',
37 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
38 | 'scale_factor'))
39 | ]
40 |
41 | num_gpu = 2
42 | train_bs = 4
43 | val_bs = 1
44 | auto_scale_lr = dict(enable=False, base_batch_size=train_bs * num_gpu)
45 | train_dataloader = dict(
46 | batch_size=train_bs,
47 | num_workers=train_bs,
48 | persistent_workers=True,
49 | collate_fn=dict(type='yolov5_collate'),
50 | sampler=dict(type='DefaultSampler', shuffle=True),
51 | batch_sampler=dict(type='mmdet.AspectRatioBatchSampler'),
52 | dataset=dict(
53 | type=dataset_type,
54 | metainfo=dict(classes=classes),
55 | data_root=data_root,
56 | ann_file=train_ann_file,
57 | data_prefix=dict(img=train_img_file),
58 | filter_cfg=dict(filter_empty_gt=True, min_size=32),
59 | pipeline=train_pipeline,
60 | ))
61 |
62 | val_dataloader = dict(
63 | batch_size=val_bs,
64 | num_workers=val_bs * 2,
65 | persistent_workers=True,
66 | drop_last=False,
67 | sampler=dict(type='DefaultSampler', shuffle=False),
68 | dataset=dict(
69 | type=dataset_type,
70 | metainfo=dict(classes=classes),
71 | data_root=data_root,
72 | ann_file=val_ann_file,
73 | data_prefix=dict(img=val_img_file),
74 | test_mode=True,
75 | pipeline=test_pipeline,
76 | ))
77 |
78 | test_dataloader = val_dataloader
79 |
80 | val_evaluator = dict(
81 | type=evaluator_type,
82 | ann_file=data_root + val_ann_file,
83 | metric='bbox',
84 | format_only=False)
85 | test_evaluator = val_evaluator
86 |
--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | default_scope = 'mmdet'
2 | log_level = 'INFO'
3 | load_from = None
4 | resume = False
5 |
6 | env_cfg = dict(
7 | cudnn_benchmark=False,
8 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
9 | dist_cfg=dict(backend='nccl'),
10 | )
11 | randomness = dict(seed=None)
12 |
13 | vis_backends = [
14 | dict(type='LocalVisBackend'),
15 | dict(type='TensorboardVisBackend')
16 | ]
17 | visualizer = dict(
18 | type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
19 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
20 | default_hooks = dict(
21 | timer=dict(type='IterTimerHook'),
22 | logger=dict(type='LoggerHook', interval=50),
23 | param_scheduler=dict(type='ParamSchedulerHook'),
24 | checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='coco/bbox_mAP'),
25 | sampler_seed=dict(type='DistSamplerSeedHook'),
26 | visualization=dict(type='DetVisualizationHook'))
27 |
--------------------------------------------------------------------------------
/configs/_base_/default_runtime_mmyolo.py:
--------------------------------------------------------------------------------
1 | default_scope = 'mmyolo'
2 | log_level = 'INFO'
3 | load_from = None
4 | resume = False
5 |
6 | env_cfg = dict(
7 | cudnn_benchmark=False,
8 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
9 | dist_cfg=dict(backend='nccl'),
10 | )
11 | randomness = dict(seed=None)
12 |
13 | vis_backends = [
14 | dict(type='LocalVisBackend'),
15 | dict(type='TensorboardVisBackend')
16 | ]
17 | visualizer = dict(
18 | type='mmdet.DetLocalVisualizer', vis_backends=vis_backends, name='visualizer')
19 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
20 | default_hooks = dict(
21 | timer=dict(type='IterTimerHook'),
22 | logger=dict(type='LoggerHook', interval=50),
23 | param_scheduler=dict(type='ParamSchedulerHook'),
24 | checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='coco/bbox_mAP'),
25 | sampler_seed=dict(type='DistSamplerSeedHook'),
26 | visualization=dict(type='mmdet.DetVisualizationHook'))
27 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 12
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
13 | dict(
14 | type='MultiStepLR',
15 | begin=0,
16 | milestones=[8, 11],
17 | gamma=0.1)
18 | ]
19 | optim_wrapper = dict(
20 | type='OptimWrapper',
21 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
22 |
23 | num_classes = 4
24 | model = dict(
25 | type='CascadeRCNN',
26 | data_preprocessor=dict(
27 | type='DetDataPreprocessor',
28 | mean=_base_.mean_rgb,
29 | std=_base_.std_rgb,
30 | bgr_to_rgb=True,
31 | pad_size_divisor=32),
32 | backbone=dict(
33 | type='ResNet',
34 | depth=50,
35 | num_stages=4,
36 | out_indices=(0, 1, 2, 3),
37 | frozen_stages=1,
38 | norm_cfg=dict(type='BN', requires_grad=True),
39 | norm_eval=True,
40 | style='pytorch',
41 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
42 | neck=dict(
43 | type='FPN',
44 | in_channels=[256, 512, 1024, 2048],
45 | out_channels=256,
46 | num_outs=5),
47 | rpn_head=dict(
48 | type='RPNHead',
49 | in_channels=256,
50 | feat_channels=256,
51 | anchor_generator=dict(
52 | type='AnchorGenerator',
53 | scales=[8],
54 | ratios=[0.5, 1.0, 2.0],
55 | strides=[4, 8, 16, 32, 64]),
56 | bbox_coder=dict(
57 | type='DeltaXYWHBBoxCoder',
58 | target_means=[0.0, 0.0, 0.0, 0.0],
59 | target_stds=[1.0, 1.0, 1.0, 1.0]),
60 | loss_cls=dict(
61 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
62 | loss_bbox=dict(
63 | type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
64 | roi_head=dict(
65 | type='CascadeRoIHead',
66 | num_stages=3,
67 | stage_loss_weights=[1, 0.5, 0.25],
68 | bbox_roi_extractor=dict(
69 | type='SingleRoIExtractor',
70 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
71 | out_channels=256,
72 | featmap_strides=[4, 8, 16, 32]),
73 | bbox_head=[
74 | dict(
75 | type='Shared2FCBBoxHead',
76 | in_channels=256,
77 | fc_out_channels=1024,
78 | roi_feat_size=7,
79 | num_classes=num_classes,
80 | bbox_coder=dict(
81 | type='DeltaXYWHBBoxCoder',
82 | target_means=[0.0, 0.0, 0.0, 0.0],
83 | target_stds=[0.1, 0.1, 0.2, 0.2]),
84 | reg_class_agnostic=True,
85 | loss_cls=dict(
86 | type='CrossEntropyLoss',
87 | use_sigmoid=False,
88 | loss_weight=1.0),
89 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
90 | loss_weight=1.0)),
91 | dict(
92 | type='Shared2FCBBoxHead',
93 | in_channels=256,
94 | fc_out_channels=1024,
95 | roi_feat_size=7,
96 | num_classes=num_classes,
97 | bbox_coder=dict(
98 | type='DeltaXYWHBBoxCoder',
99 | target_means=[0.0, 0.0, 0.0, 0.0],
100 | target_stds=[0.05, 0.05, 0.1, 0.1]),
101 | reg_class_agnostic=True,
102 | loss_cls=dict(
103 | type='CrossEntropyLoss',
104 | use_sigmoid=False,
105 | loss_weight=1.0),
106 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
107 | loss_weight=1.0)),
108 | dict(
109 | type='Shared2FCBBoxHead',
110 | in_channels=256,
111 | fc_out_channels=1024,
112 | roi_feat_size=7,
113 | num_classes=num_classes,
114 | bbox_coder=dict(
115 | type='DeltaXYWHBBoxCoder',
116 | target_means=[0.0, 0.0, 0.0, 0.0],
117 | target_stds=[0.033, 0.033, 0.067, 0.067]),
118 | reg_class_agnostic=True,
119 | loss_cls=dict(
120 | type='CrossEntropyLoss',
121 | use_sigmoid=False,
122 | loss_weight=1.0),
123 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
124 | ]),
125 | train_cfg=dict(
126 | rpn=dict(
127 | assigner=dict(
128 | type='MaxIoUAssigner',
129 | pos_iou_thr=0.7,
130 | neg_iou_thr=0.3,
131 | min_pos_iou=0.3,
132 | match_low_quality=True,
133 | ignore_iof_thr=-1),
134 | sampler=dict(
135 | type='RandomSampler',
136 | num=256,
137 | pos_fraction=0.5,
138 | neg_pos_ub=-1,
139 | add_gt_as_proposals=False),
140 | allowed_border=0,
141 | pos_weight=-1,
142 | debug=False),
143 | rpn_proposal=dict(
144 | nms_pre=2000,
145 | max_per_img=2000,
146 | nms=dict(type='nms', iou_threshold=0.7),
147 | min_bbox_size=0),
148 | rcnn=[
149 | dict(
150 | assigner=dict(
151 | type='MaxIoUAssigner',
152 | pos_iou_thr=0.5,
153 | neg_iou_thr=0.5,
154 | min_pos_iou=0.5,
155 | match_low_quality=False,
156 | ignore_iof_thr=-1),
157 | sampler=dict(
158 | type='RandomSampler',
159 | num=512,
160 | pos_fraction=0.25,
161 | neg_pos_ub=-1,
162 | add_gt_as_proposals=True),
163 | pos_weight=-1,
164 | debug=False),
165 | dict(
166 | assigner=dict(
167 | type='MaxIoUAssigner',
168 | pos_iou_thr=0.6,
169 | neg_iou_thr=0.6,
170 | min_pos_iou=0.6,
171 | match_low_quality=False,
172 | ignore_iof_thr=-1),
173 | sampler=dict(
174 | type='RandomSampler',
175 | num=512,
176 | pos_fraction=0.25,
177 | neg_pos_ub=-1,
178 | add_gt_as_proposals=True),
179 | pos_weight=-1,
180 | debug=False),
181 | dict(
182 | assigner=dict(
183 | type='MaxIoUAssigner',
184 | pos_iou_thr=0.7,
185 | neg_iou_thr=0.7,
186 | min_pos_iou=0.7,
187 | match_low_quality=False,
188 | ignore_iof_thr=-1),
189 | sampler=dict(
190 | type='RandomSampler',
191 | num=512,
192 | pos_fraction=0.25,
193 | neg_pos_ub=-1,
194 | add_gt_as_proposals=True),
195 | pos_weight=-1,
196 | debug=False)
197 | ]),
198 | test_cfg=dict(
199 | rpn=dict(
200 | nms_pre=1000,
201 | max_per_img=1000,
202 | nms=dict(type='nms', iou_threshold=0.7),
203 | min_bbox_size=0),
204 | rcnn=dict(
205 | score_thr=0.05,
206 | nms=dict(type='nms', iou_threshold=0.5),
207 | max_per_img=100)))
208 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/unitmodule_cascade_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './cascade_rcnn_r50_1x_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitCascadeRCNN',
8 | data_preprocessor=dict(
9 | type='UnitDetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='LoadAnnotations', with_bbox=True),
18 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='RandomFlip', prob=0.5),
21 | dict(type='PackDetInputs')
22 | ]
23 |
24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
25 |
--------------------------------------------------------------------------------
/configs/detr/detr_r50_500e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 500
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='MultiStepLR',
13 | begin=0,
14 | end=max_epochs,
15 | by_epoch=True,
16 | milestones=[334],
17 | gamma=0.1)
18 | ]
19 | optim_wrapper = dict(
20 | type='OptimWrapper',
21 | optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
22 | clip_grad=dict(max_norm=0.1, norm_type=2),
23 | paramwise_cfg=dict(
24 | custom_keys=dict(backbone=dict(lr_mult=0.1, decay_mult=1.0))))
25 |
26 | num_classes = 4
27 | model = dict(
28 | type='DETR',
29 | num_queries=100,
30 | data_preprocessor=dict(
31 | type='DetDataPreprocessor',
32 | mean=_base_.mean_rgb,
33 | std=_base_.std_rgb,
34 | bgr_to_rgb=True,
35 | pad_size_divisor=32),
36 | backbone=dict(
37 | type='ResNet',
38 | depth=50,
39 | num_stages=4,
40 | out_indices=(3,),
41 | frozen_stages=1,
42 | norm_cfg=dict(type='BN', requires_grad=False),
43 | norm_eval=True,
44 | style='pytorch',
45 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
46 | neck=dict(
47 | type='ChannelMapper',
48 | in_channels=[2048],
49 | kernel_size=1,
50 | out_channels=256,
51 | act_cfg=None,
52 | norm_cfg=None,
53 | num_outs=1),
54 | encoder=dict(
55 | num_layers=6,
56 | layer_cfg=dict(
57 | self_attn_cfg=dict(
58 | embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
59 | ffn_cfg=dict(
60 | embed_dims=256,
61 | feedforward_channels=2048,
62 | num_fcs=2,
63 | ffn_drop=0.1,
64 | act_cfg=dict(type='ReLU', inplace=True)))),
65 | decoder=dict(
66 | num_layers=6,
67 | layer_cfg=dict(
68 | self_attn_cfg=dict(
69 | embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
70 | cross_attn_cfg=dict(
71 | embed_dims=256, num_heads=8, dropout=0.1, batch_first=True),
72 | ffn_cfg=dict(
73 | embed_dims=256,
74 | feedforward_channels=2048,
75 | num_fcs=2,
76 | ffn_drop=0.1,
77 | act_cfg=dict(type='ReLU', inplace=True))),
78 | return_intermediate=True),
79 | positional_encoding=dict(num_feats=128, normalize=True),
80 | bbox_head=dict(
81 | type='DETRHead',
82 | num_classes=num_classes,
83 | embed_dims=256,
84 | loss_cls=dict(
85 | type='CrossEntropyLoss',
86 | bg_cls_weight=0.1,
87 | use_sigmoid=False,
88 | loss_weight=1.0,
89 | class_weight=1.0),
90 | loss_bbox=dict(type='L1Loss', loss_weight=5.0),
91 | loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
92 | train_cfg=dict(
93 | assigner=dict(
94 | type='HungarianAssigner',
95 | match_costs=[
96 | dict(type='ClassificationCost', weight=1.0),
97 | dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
98 | dict(type='IoUCost', iou_mode='giou', weight=2.0)
99 | ])),
100 | test_cfg=dict(max_per_img=100))
101 |
--------------------------------------------------------------------------------
/configs/detr/unitmodule_detr_r50_500e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './detr_r50_500e_duo.py',
3 | ]
4 |
5 | with_unit_module = True
6 | norm_cfg = dict(type='GN', num_groups=8)
7 | act_cfg = dict(type='ReLU')
8 |
9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 |
12 | unit_module = dict(
13 | type='UnitModule',
14 | unit_backbone=dict(
15 | type='UnitBackbone',
16 | stem_channels=(c_s1, c_s2),
17 | large_kernels=(k_1, k_2),
18 | small_kernels=(3, 3),
19 | dw_ratio=1.0,
20 | norm_cfg=norm_cfg,
21 | act_cfg=act_cfg),
22 | t_head=dict(
23 | type='THead',
24 | in_channels=c_s2,
25 | hid_channels=c_s2,
26 | out_channels=3,
27 | norm_cfg=norm_cfg,
28 | act_cfg=act_cfg),
29 | a_head=dict(type='AHead'),
30 | loss_t=dict(type='TransmissionLoss', loss_weight=1000),
31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01),
32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 | alpha=0.9,
36 | t_min=0.001)
37 |
38 | model = dict(
39 | type='UnitDETR',
40 | data_preprocessor=dict(
41 | type='UnitDetDataPreprocessor',
42 | unit_module=unit_module)
43 | )
44 |
45 | optim_wrapper = dict(clip_grad=dict(max_norm=0.1, norm_type=2))
46 |
47 | train_pipeline = [
48 | dict(type='LoadImageFromFile'),
49 | dict(type='LoadAnnotations', with_bbox=True),
50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 | dict(type='RandomFlip', prob=0.5),
53 | dict(type='PackDetInputs')
54 | ]
55 |
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 |
--------------------------------------------------------------------------------
/configs/dino/dino_4scale_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 12
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='MultiStepLR',
13 | begin=0,
14 | end=max_epochs,
15 | by_epoch=True,
16 | milestones=[11],
17 | gamma=0.1)
18 | ]
19 | optim_wrapper = dict(
20 | type='OptimWrapper',
21 | optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001),
22 | clip_grad=dict(max_norm=0.1, norm_type=2),
23 | paramwise_cfg=dict(custom_keys=dict(backbone=dict(lr_mult=0.1))))
24 |
25 | num_classes = 4
26 | model = dict(
27 | type='DINO',
28 | num_queries=900,
29 | with_box_refine=True,
30 | as_two_stage=True,
31 | data_preprocessor=dict(
32 | type='DetDataPreprocessor',
33 | mean=_base_.mean_rgb,
34 | std=_base_.std_rgb,
35 | bgr_to_rgb=True,
36 | pad_size_divisor=32),
37 | backbone=dict(
38 | type='ResNet',
39 | depth=50,
40 | num_stages=4,
41 | out_indices=(1, 2, 3),
42 | frozen_stages=1,
43 | norm_cfg=dict(type='BN', requires_grad=False),
44 | norm_eval=True,
45 | style='pytorch',
46 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
47 | neck=dict(
48 | type='ChannelMapper',
49 | in_channels=[512, 1024, 2048],
50 | kernel_size=1,
51 | out_channels=256,
52 | act_cfg=None,
53 | norm_cfg=dict(type='GN', num_groups=32),
54 | num_outs=4),
55 | encoder=dict(
56 | num_layers=6,
57 | layer_cfg=dict(
58 | self_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0),
59 | ffn_cfg=dict(
60 | embed_dims=256, feedforward_channels=2048, ffn_drop=0.0))),
61 | decoder=dict(
62 | num_layers=6,
63 | return_intermediate=True,
64 | layer_cfg=dict(
65 | self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0),
66 | cross_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0),
67 | ffn_cfg=dict(
68 | embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)),
69 | post_norm_cfg=None),
70 | positional_encoding=dict(
71 | num_feats=128, normalize=True, offset=0.0, temperature=20),
72 | bbox_head=dict(
73 | type='DINOHead',
74 | num_classes=num_classes,
75 | sync_cls_avg_factor=True,
76 | loss_cls=dict(
77 | type='FocalLoss',
78 | use_sigmoid=True,
79 | gamma=2.0,
80 | alpha=0.25,
81 | loss_weight=1.0),
82 | loss_bbox=dict(type='L1Loss', loss_weight=5.0),
83 | loss_iou=dict(type='GIoULoss', loss_weight=2.0)),
84 | dn_cfg=dict(
85 | label_noise_scale=0.5,
86 | box_noise_scale=1.0,
87 | group_cfg=dict(dynamic=True, num_groups=None, num_dn_queries=100)),
88 | train_cfg=dict(
89 | assigner=dict(
90 | type='HungarianAssigner',
91 | match_costs=[
92 | dict(type='FocalLossCost', weight=2.0),
93 | dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),
94 | dict(type='IoUCost', iou_mode='giou', weight=2.0)
95 | ])),
96 | test_cfg=dict(max_per_img=300))
97 |
--------------------------------------------------------------------------------
/configs/dino/unitmodule_dino_4scale_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './dino_4scale_r50_1x_duo.py',
3 | ]
4 |
5 | with_unit_module = True
6 | norm_cfg = dict(type='GN', num_groups=8)
7 | act_cfg = dict(type='ReLU')
8 |
9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 |
12 | unit_module = dict(
13 | type='UnitModule',
14 | unit_backbone=dict(
15 | type='UnitBackbone',
16 | stem_channels=(c_s1, c_s2),
17 | large_kernels=(k_1, k_2),
18 | small_kernels=(3, 3),
19 | dw_ratio=1.0,
20 | norm_cfg=norm_cfg,
21 | act_cfg=act_cfg),
22 | t_head=dict(
23 | type='THead',
24 | in_channels=c_s2,
25 | hid_channels=c_s2,
26 | out_channels=3,
27 | norm_cfg=norm_cfg,
28 | act_cfg=act_cfg),
29 | a_head=dict(type='AHead'),
30 | loss_t=dict(type='TransmissionLoss', loss_weight=1000),
31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01),
32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 | alpha=0.9,
36 | t_min=0.001)
37 |
38 | model = dict(
39 | type='UnitDINO',
40 | data_preprocessor=dict(
41 | type='UnitDetDataPreprocessor',
42 | unit_module=unit_module)
43 | )
44 |
45 | optim_wrapper = dict(clip_grad=dict(max_norm=0.1, norm_type=2))
46 |
47 | train_pipeline = [
48 | dict(type='LoadImageFromFile'),
49 | dict(type='LoadAnnotations', with_bbox=True),
50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 | dict(type='RandomFlip', prob=0.5),
53 | dict(type='PackDetInputs')
54 | ]
55 |
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 12
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
13 | dict(
14 | type='MultiStepLR',
15 | begin=0,
16 | end=max_epochs,
17 | by_epoch=True,
18 | milestones=[8, 11],
19 | gamma=0.1)
20 | ]
21 | optim_wrapper = dict(
22 | type='OptimWrapper',
23 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001))
24 |
25 | num_classes = 4
26 | model = dict(
27 | type='FasterRCNN',
28 | data_preprocessor=dict(
29 | type='DetDataPreprocessor',
30 | mean=_base_.mean_rgb,
31 | std=_base_.std_rgb,
32 | bgr_to_rgb=True,
33 | pad_size_divisor=32),
34 | backbone=dict(
35 | type='ResNet',
36 | depth=50,
37 | num_stages=4,
38 | out_indices=(0, 1, 2, 3),
39 | frozen_stages=1,
40 | norm_cfg=dict(type='BN', requires_grad=True),
41 | norm_eval=True,
42 | style='pytorch',
43 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44 | neck=dict(
45 | type='FPN',
46 | in_channels=[256, 512, 1024, 2048],
47 | out_channels=256,
48 | num_outs=5),
49 | rpn_head=dict(
50 | type='RPNHead',
51 | in_channels=256,
52 | feat_channels=256,
53 | anchor_generator=dict(
54 | type='AnchorGenerator',
55 | scales=[8],
56 | ratios=[0.5, 1.0, 2.0],
57 | strides=[4, 8, 16, 32, 64]),
58 | bbox_coder=dict(
59 | type='DeltaXYWHBBoxCoder',
60 | target_means=[0.0, 0.0, 0.0, 0.0],
61 | target_stds=[1.0, 1.0, 1.0, 1.0]),
62 | loss_cls=dict(
63 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
64 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
65 | roi_head=dict(
66 | type='StandardRoIHead',
67 | bbox_roi_extractor=dict(
68 | type='SingleRoIExtractor',
69 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
70 | out_channels=256,
71 | featmap_strides=[4, 8, 16, 32]),
72 | bbox_head=dict(
73 | type='Shared2FCBBoxHead',
74 | in_channels=256,
75 | fc_out_channels=1024,
76 | roi_feat_size=7,
77 | num_classes=num_classes,
78 | bbox_coder=dict(
79 | type='DeltaXYWHBBoxCoder',
80 | target_means=[0.0, 0.0, 0.0, 0.0],
81 | target_stds=[0.1, 0.1, 0.2, 0.2]),
82 | reg_class_agnostic=False,
83 | loss_cls=dict(
84 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
85 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
86 | train_cfg=dict(
87 | rpn=dict(
88 | assigner=dict(
89 | type='MaxIoUAssigner',
90 | pos_iou_thr=0.7,
91 | neg_iou_thr=0.3,
92 | min_pos_iou=0.3,
93 | match_low_quality=True,
94 | ignore_iof_thr=-1),
95 | sampler=dict(
96 | type='RandomSampler',
97 | num=256,
98 | pos_fraction=0.5,
99 | neg_pos_ub=-1,
100 | add_gt_as_proposals=False),
101 | allowed_border=-1,
102 | pos_weight=-1,
103 | debug=False),
104 | rpn_proposal=dict(
105 | nms_pre=2000,
106 | max_per_img=1000,
107 | nms=dict(type='nms', iou_threshold=0.7),
108 | min_bbox_size=0),
109 | rcnn=dict(
110 | assigner=dict(
111 | type='MaxIoUAssigner',
112 | pos_iou_thr=0.5,
113 | neg_iou_thr=0.5,
114 | min_pos_iou=0.5,
115 | match_low_quality=False,
116 | ignore_iof_thr=-1),
117 | sampler=dict(
118 | type='RandomSampler',
119 | num=512,
120 | pos_fraction=0.25,
121 | neg_pos_ub=-1,
122 | add_gt_as_proposals=True),
123 | pos_weight=-1,
124 | debug=False)),
125 | test_cfg=dict(
126 | rpn=dict(
127 | nms_pre=1000,
128 | max_per_img=1000,
129 | nms=dict(type='nms', iou_threshold=0.7),
130 | min_bbox_size=0),
131 | rcnn=dict(
132 | score_thr=0.05,
133 | nms=dict(type='nms', iou_threshold=0.5),
134 | max_per_img=100)))
135 |
136 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/unitmodule_faster_rcnn_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './faster_rcnn_r50_1x_duo.py',
3 | ]
4 |
5 | with_unit_module = True
6 | norm_cfg = dict(type='GN', num_groups=8)
7 | act_cfg = dict(type='ReLU')
8 |
9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 |
12 | unit_module = dict(
13 | type='UnitModule',
14 | unit_backbone=dict(
15 | type='UnitBackbone',
16 | stem_channels=(c_s1, c_s2),
17 | large_kernels=(k_1, k_2),
18 | small_kernels=(3, 3),
19 | dw_ratio=1.0,
20 | norm_cfg=norm_cfg,
21 | act_cfg=act_cfg),
22 | t_head=dict(
23 | type='THead',
24 | in_channels=c_s2,
25 | hid_channels=c_s2,
26 | out_channels=3,
27 | norm_cfg=norm_cfg,
28 | act_cfg=act_cfg),
29 | a_head=dict(type='AHead'),
30 | loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 | alpha=0.9,
36 | t_min=0.001)
37 |
38 | model = dict(
39 | type='UnitFasterRCNN',
40 | data_preprocessor=dict(
41 | type='UnitDetDataPreprocessor',
42 | unit_module=unit_module)
43 | )
44 |
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 |
47 | train_pipeline = [
48 | dict(type='LoadImageFromFile'),
49 | dict(type='LoadAnnotations', with_bbox=True),
50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 | dict(type='RandomFlip', prob=0.5),
53 | dict(type='PackDetInputs')
54 | ]
55 |
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 |
--------------------------------------------------------------------------------
/configs/fcos/fcos_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 12
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='ConstantLR',
13 | factor=1.0 / 3,
14 | by_epoch=False,
15 | begin=0,
16 | end=500),
17 | dict(
18 | type='MultiStepLR',
19 | begin=0,
20 | end=max_epochs,
21 | by_epoch=True,
22 | milestones=[8, 11],
23 | gamma=0.1)
24 | ]
25 | optim_wrapper = dict(
26 | type='OptimWrapper',
27 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001),
28 | paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0),
29 | clip_grad=dict(max_norm=35, norm_type=2))
30 |
31 | num_classes = 4
32 | model = dict(
33 | type='FCOS',
34 | data_preprocessor=dict(
35 | type='DetDataPreprocessor',
36 | mean=_base_.mean_bgr,
37 | std=[1.0, 1.0, 1.0],
38 | bgr_to_rgb=False,
39 | pad_size_divisor=32),
40 | backbone=dict(
41 | type='ResNet',
42 | depth=50,
43 | num_stages=4,
44 | out_indices=(0, 1, 2, 3),
45 | frozen_stages=1,
46 | norm_cfg=dict(type='BN', requires_grad=False),
47 | norm_eval=True,
48 | style='caffe',
49 | init_cfg=dict(
50 | type='Pretrained',
51 | checkpoint='open-mmlab://detectron/resnet50_caffe')),
52 | neck=dict(
53 | type='FPN',
54 | in_channels=[256, 512, 1024, 2048],
55 | out_channels=256,
56 | start_level=1,
57 | add_extra_convs='on_output',
58 | num_outs=5,
59 | relu_before_extra_convs=True),
60 | bbox_head=dict(
61 | type='FCOSHead',
62 | num_classes=num_classes,
63 | in_channels=256,
64 | stacked_convs=4,
65 | feat_channels=256,
66 | strides=[8, 16, 32, 64, 128],
67 | loss_cls=dict(
68 | type='FocalLoss',
69 | use_sigmoid=True,
70 | gamma=2.0,
71 | alpha=0.25,
72 | loss_weight=1.0),
73 | loss_bbox=dict(type='IoULoss', loss_weight=1.0),
74 | loss_centerness=dict(
75 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
76 | test_cfg=dict(
77 | nms_pre=1000,
78 | min_bbox_size=0,
79 | score_thr=0.05,
80 | nms=dict(type='nms', iou_threshold=0.5),
81 | max_per_img=100))
82 |
--------------------------------------------------------------------------------
/configs/fcos/unitmodule_fcos_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './fcos_r50_1x_duo.py',
3 | ]
4 |
5 | with_unit_module = True
6 | norm_cfg = dict(type='GN', num_groups=8)
7 | act_cfg = dict(type='ReLU')
8 |
9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 |
12 | unit_module = dict(
13 | type='UnitModule',
14 | unit_backbone=dict(
15 | type='UnitBackbone',
16 | stem_channels=(c_s1, c_s2),
17 | large_kernels=(k_1, k_2),
18 | small_kernels=(3, 3),
19 | dw_ratio=1.0,
20 | norm_cfg=norm_cfg,
21 | act_cfg=act_cfg),
22 | t_head=dict(
23 | type='THead',
24 | in_channels=c_s2,
25 | hid_channels=c_s2,
26 | out_channels=3,
27 | norm_cfg=norm_cfg,
28 | act_cfg=act_cfg),
29 | a_head=dict(type='AHead'),
30 | loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 | alpha=0.9,
36 | t_min=0.001)
37 |
38 | model = dict(
39 | type='UnitFCOS',
40 | data_preprocessor=dict(
41 | type='UnitDetDataPreprocessor',
42 | unit_module=unit_module)
43 | )
44 |
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 |
47 | train_pipeline = [
48 | dict(type='LoadImageFromFile'),
49 | dict(type='LoadAnnotations', with_bbox=True),
50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 | dict(type='RandomFlip', prob=0.5),
53 | dict(type='PackDetInputs')
54 | ]
55 |
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 |
--------------------------------------------------------------------------------
/configs/retinanet/retinanet_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 12
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
13 | dict(
14 | type='MultiStepLR',
15 | begin=0,
16 | end=max_epochs,
17 | by_epoch=True,
18 | milestones=[8, 11],
19 | gamma=0.1)
20 | ]
21 | optim_wrapper = dict(
22 | type='OptimWrapper',
23 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
24 |
25 | num_classes = 4
26 | model = dict(
27 | type='RetinaNet',
28 | data_preprocessor=dict(
29 | type='DetDataPreprocessor',
30 | mean=_base_.mean_rgb,
31 | std=_base_.std_rgb,
32 | bgr_to_rgb=True,
33 | pad_size_divisor=32),
34 | backbone=dict(
35 | type='ResNet',
36 | depth=50,
37 | num_stages=4,
38 | out_indices=(0, 1, 2, 3),
39 | frozen_stages=1,
40 | norm_cfg=dict(type='BN', requires_grad=True),
41 | norm_eval=True,
42 | style='pytorch',
43 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44 | neck=dict(
45 | type='FPN',
46 | in_channels=[256, 512, 1024, 2048],
47 | out_channels=256,
48 | start_level=1,
49 | add_extra_convs='on_input',
50 | num_outs=5),
51 | bbox_head=dict(
52 | type='RetinaHead',
53 | num_classes=num_classes,
54 | in_channels=256,
55 | stacked_convs=4,
56 | feat_channels=256,
57 | anchor_generator=dict(
58 | type='AnchorGenerator',
59 | octave_base_scale=4,
60 | scales_per_octave=3,
61 | ratios=[0.5, 1.0, 2.0],
62 | strides=[8, 16, 32, 64, 128]),
63 | bbox_coder=dict(
64 | type='DeltaXYWHBBoxCoder',
65 | target_means=[0.0, 0.0, 0.0, 0.0],
66 | target_stds=[1.0, 1.0, 1.0, 1.0]),
67 | loss_cls=dict(
68 | type='FocalLoss',
69 | use_sigmoid=True,
70 | gamma=2.0,
71 | alpha=0.25,
72 | loss_weight=1.0),
73 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
74 | train_cfg=dict(
75 | assigner=dict(
76 | type='MaxIoUAssigner',
77 | pos_iou_thr=0.5,
78 | neg_iou_thr=0.4,
79 | min_pos_iou=0,
80 | ignore_iof_thr=-1),
81 | sampler=dict(type='PseudoSampler'),
82 | allowed_border=-1,
83 | pos_weight=-1,
84 | debug=False),
85 | test_cfg=dict(
86 | nms_pre=1000,
87 | min_bbox_size=0,
88 | score_thr=0.05,
89 | nms=dict(type='nms', iou_threshold=0.5),
90 | max_per_img=100))
91 |
--------------------------------------------------------------------------------
/configs/retinanet/unitmodule_retinanet_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './retinanet_r50_1x_duo.py',
3 | ]
4 |
5 | with_unit_module = True
6 | norm_cfg = dict(type='GN', num_groups=8)
7 | act_cfg = dict(type='ReLU')
8 |
9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 |
12 | unit_module = dict(
13 | type='UnitModule',
14 | unit_backbone=dict(
15 | type='UnitBackbone',
16 | stem_channels=(c_s1, c_s2),
17 | large_kernels=(k_1, k_2),
18 | small_kernels=(3, 3),
19 | dw_ratio=1.0,
20 | norm_cfg=norm_cfg,
21 | act_cfg=act_cfg),
22 | t_head=dict(
23 | type='THead',
24 | in_channels=c_s2,
25 | hid_channels=c_s2,
26 | out_channels=3,
27 | norm_cfg=norm_cfg,
28 | act_cfg=act_cfg),
29 | a_head=dict(type='AHead'),
30 | loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 | alpha=0.9,
36 | t_min=0.001)
37 |
38 | model = dict(
39 | type='UnitRetinaNet',
40 | data_preprocessor=dict(
41 | type='UnitDetDataPreprocessor',
42 | unit_module=unit_module)
43 | )
44 |
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 |
47 | train_pipeline = [
48 | dict(type='LoadImageFromFile'),
49 | dict(type='LoadAnnotations', with_bbox=True),
50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 | dict(type='RandomFlip', prob=0.5),
53 | dict(type='PackDetInputs')
54 | ]
55 |
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 |
--------------------------------------------------------------------------------
/configs/rtmdet/rtmdet_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection_mmyolo.py',
3 | '../_base_/default_runtime_mmyolo.py',
4 | ]
5 | env_cfg = dict(cudnn_benchmark=True)
6 |
7 | max_epochs = 100
8 | num_last_epochs = 15
9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 |
14 | param_scheduler = [
15 | dict(
16 | type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0,
17 | end=1000),
18 | dict(
19 | type='CosineAnnealingLR',
20 | eta_min=0.0002,
21 | begin=max_epochs // 2,
22 | T_max=max_epochs - num_last_epochs,
23 | end=max_epochs - num_last_epochs,
24 | by_epoch=True,
25 | convert_to_iter_based=True)
26 | ]
27 | optim_wrapper = dict(
28 | type='OptimWrapper',
29 | optimizer=dict(type='AdamW', lr=0.004, weight_decay=0.05),
30 | paramwise_cfg=dict(
31 | norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
32 |
33 | custom_hooks = [
34 | dict(
35 | type='EMAHook',
36 | ema_type='ExpMomentumEMA',
37 | momentum=0.0002,
38 | update_buffers=True,
39 | strict_load=False,
40 | priority=49),
41 | ]
42 |
43 | checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'
44 | num_classes = 4
45 | model = dict(
46 | type='YOLODetector',
47 | data_preprocessor=dict(
48 | type='YOLOv5DetDataPreprocessor',
49 | mean=_base_.mean_bgr,
50 | std=_base_.std_bgr,
51 | bgr_to_rgb=False),
52 | backbone=dict(
53 | type='CSPNeXt',
54 | arch='P5',
55 | expand_ratio=0.5,
56 | deepen_factor=0.33,
57 | widen_factor=0.5,
58 | channel_attention=True,
59 | norm_cfg=dict(type='BN'),
60 | act_cfg=dict(type='SiLU', inplace=True),
61 | init_cfg=dict(
62 | type='Pretrained',
63 | prefix='backbone.',
64 | checkpoint=checkpoint,
65 | map_location='cpu')),
66 | neck=dict(
67 | type='CSPNeXtPAFPN',
68 | deepen_factor=0.33,
69 | widen_factor=0.5,
70 | in_channels=[256, 512, 1024],
71 | out_channels=256,
72 | num_csp_blocks=3,
73 | expand_ratio=0.5,
74 | norm_cfg=dict(type='BN'),
75 | act_cfg=dict(type='SiLU', inplace=True)),
76 | bbox_head=dict(
77 | type='RTMDetHead',
78 | head_module=dict(
79 | type='RTMDetSepBNHeadModule',
80 | num_classes=num_classes,
81 | in_channels=256,
82 | stacked_convs=2,
83 | feat_channels=256,
84 | norm_cfg=dict(type='BN'),
85 | act_cfg=dict(type='SiLU', inplace=True),
86 | share_conv=True,
87 | pred_kernel_size=1,
88 | featmap_strides=[8, 16, 32],
89 | widen_factor=0.5),
90 | prior_generator=dict(
91 | type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]),
92 | bbox_coder=dict(type='DistancePointBBoxCoder'),
93 | loss_cls=dict(
94 | type='mmdet.QualityFocalLoss',
95 | use_sigmoid=True,
96 | beta=2.0,
97 | loss_weight=1.0),
98 | loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)),
99 | train_cfg=dict(
100 | assigner=dict(
101 | type='BatchDynamicSoftLabelAssigner',
102 | num_classes=num_classes,
103 | topk=13,
104 | iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
105 | allowed_border=-1,
106 | pos_weight=-1,
107 | debug=False),
108 | test_cfg=dict(
109 | multi_label=True,
110 | nms_pre=30000,
111 | score_thr=0.001,
112 | nms=dict(type='nms', iou_threshold=0.65),
113 | max_per_img=300))
114 |
--------------------------------------------------------------------------------
/configs/rtmdet/unitmodule_rtmdet_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './rtmdet_s_100e_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitYOLODetector',
8 | data_preprocessor=dict(
9 | type='UnitYOLOv5DetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='mmdet.Pad',
21 | pad_to_square=True,
22 | pad_val=dict(img=(114.0, 114.0, 114.0))),
23 | dict(type='mmdet.RandomFlip', prob=0.5),
24 | dict(type='mmdet.PackDetInputs')
25 | ]
26 |
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 |
--------------------------------------------------------------------------------
/configs/tood/tood_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection.py',
3 | '../_base_/default_runtime.py',
4 | ]
5 | max_epochs = 12
6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1)
7 | val_cfg = dict(type='ValLoop')
8 | test_cfg = dict(type='TestLoop')
9 |
10 | param_scheduler = [
11 | dict(
12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
13 | dict(
14 | type='MultiStepLR',
15 | begin=0,
16 | end=max_epochs,
17 | by_epoch=True,
18 | milestones=[8, 11],
19 | gamma=0.1)
20 | ]
21 | optim_wrapper = dict(
22 | type='OptimWrapper',
23 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
24 |
25 | num_classes = 4
26 | model = dict(
27 | type='TOOD',
28 | data_preprocessor=dict(
29 | type='DetDataPreprocessor',
30 | mean=_base_.mean_rgb,
31 | std=_base_.std_rgb,
32 | bgr_to_rgb=True,
33 | pad_size_divisor=32),
34 | backbone=dict(
35 | type='ResNet',
36 | depth=50,
37 | num_stages=4,
38 | out_indices=(0, 1, 2, 3),
39 | frozen_stages=1,
40 | norm_cfg=dict(type='BN', requires_grad=True),
41 | norm_eval=True,
42 | style='pytorch',
43 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
44 | neck=dict(
45 | type='FPN',
46 | in_channels=[256, 512, 1024, 2048],
47 | out_channels=256,
48 | start_level=1,
49 | add_extra_convs='on_output',
50 | num_outs=5),
51 | bbox_head=dict(
52 | type='TOODHead',
53 | num_classes=num_classes,
54 | in_channels=256,
55 | stacked_convs=6,
56 | feat_channels=256,
57 | anchor_type='anchor_free',
58 | anchor_generator=dict(
59 | type='AnchorGenerator',
60 | ratios=[1.0],
61 | octave_base_scale=8,
62 | scales_per_octave=1,
63 | strides=[8, 16, 32, 64, 128]),
64 | bbox_coder=dict(
65 | type='DeltaXYWHBBoxCoder',
66 | target_means=[0.0, 0.0, 0.0, 0.0],
67 | target_stds=[0.1, 0.1, 0.2, 0.2]),
68 | initial_loss_cls=dict(
69 | type='FocalLoss',
70 | use_sigmoid=True,
71 | activated=True,
72 | gamma=2.0,
73 | alpha=0.25,
74 | loss_weight=1.0),
75 | loss_cls=dict(
76 | type='QualityFocalLoss',
77 | use_sigmoid=True,
78 | activated=True,
79 | beta=2.0,
80 | loss_weight=1.0),
81 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),
82 | train_cfg=dict(
83 | initial_epoch=4,
84 | initial_assigner=dict(type='ATSSAssigner', topk=9),
85 | assigner=dict(type='TaskAlignedAssigner', topk=13),
86 | alpha=1,
87 | beta=6,
88 | allowed_border=-1,
89 | pos_weight=-1,
90 | debug=False),
91 | test_cfg=dict(
92 | nms_pre=1000,
93 | min_bbox_size=0,
94 | score_thr=0.05,
95 | nms=dict(type='nms', iou_threshold=0.6),
96 | max_per_img=100))
97 |
--------------------------------------------------------------------------------
/configs/tood/unitmodule_tood_r50_1x_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './tood_r50_1x_duo.py',
3 | ]
4 |
5 | with_unit_module = True
6 | norm_cfg = dict(type='GN', num_groups=8)
7 | act_cfg = dict(type='ReLU')
8 |
9 | k_1, k_2 = 9, 9
10 | c_s1, c_s2 = 32, 32
11 |
12 | unit_module = dict(
13 | type='UnitModule',
14 | unit_backbone=dict(
15 | type='UnitBackbone',
16 | stem_channels=(c_s1, c_s2),
17 | large_kernels=(k_1, k_2),
18 | small_kernels=(3, 3),
19 | dw_ratio=1.0,
20 | norm_cfg=norm_cfg,
21 | act_cfg=act_cfg),
22 | t_head=dict(
23 | type='THead',
24 | in_channels=c_s2,
25 | hid_channels=c_s2,
26 | out_channels=3,
27 | norm_cfg=norm_cfg,
28 | act_cfg=act_cfg),
29 | a_head=dict(type='AHead'),
30 | loss_t=dict(type='TransmissionLoss', loss_weight=500),
31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1),
32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
35 | alpha=0.9,
36 | t_min=0.001)
37 |
38 | model = dict(
39 | type='UnitTOOD',
40 | data_preprocessor=dict(
41 | type='UnitDetDataPreprocessor',
42 | unit_module=unit_module)
43 | )
44 |
45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
46 |
47 | train_pipeline = [
48 | dict(type='LoadImageFromFile'),
49 | dict(type='LoadAnnotations', with_bbox=True),
50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True),
51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
52 | dict(type='RandomFlip', prob=0.5),
53 | dict(type='PackDetInputs')
54 | ]
55 |
56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
57 |
--------------------------------------------------------------------------------
/configs/unitmodule/unitmodule.py:
--------------------------------------------------------------------------------
1 | with_unit_module = True
2 | norm_cfg = dict(type='GN', num_groups=8)
3 | act_cfg = dict(type='ReLU')
4 |
5 | k_1, k_2 = 9, 9
6 | c_s1, c_s2 = 32, 32
7 |
8 | unit_module = dict(
9 | type='UnitModule',
10 | unit_backbone=dict(
11 | type='UnitBackbone',
12 | stem_channels=(c_s1, c_s2),
13 | large_kernels=(k_1, k_2),
14 | small_kernels=(3, 3),
15 | dw_ratio=1.0,
16 | norm_cfg=norm_cfg,
17 | act_cfg=act_cfg),
18 | t_head=dict(
19 | type='THead',
20 | in_channels=c_s2,
21 | hid_channels=c_s2,
22 | out_channels=3,
23 | norm_cfg=norm_cfg,
24 | act_cfg=act_cfg),
25 | a_head=dict(type='AHead'),
26 | loss_t=dict(type='TransmissionLoss', loss_weight=500),
27 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01),
28 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01),
29 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1),
30 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1),
31 | alpha=0.9,
32 | t_min=0.001)
33 |
--------------------------------------------------------------------------------
/configs/yolov5/unitmodule_yolov5_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './yolov5_s_100e_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitYOLODetector',
8 | data_preprocessor=dict(
9 | type='UnitYOLOv5DetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='mmdet.Pad',
21 | pad_to_square=True,
22 | pad_val=dict(img=(114.0, 114.0, 114.0))),
23 | dict(type='mmdet.RandomFlip', prob=0.5),
24 | dict(type='mmdet.PackDetInputs')
25 | ]
26 |
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 |
--------------------------------------------------------------------------------
/configs/yolov5/yolov5_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection_mmyolo.py',
3 | '../_base_/default_runtime_mmyolo.py',
4 | ]
5 | env_cfg = dict(cudnn_benchmark=True)
6 |
7 | max_epochs = 100
8 | num_last_epochs = 15
9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 |
14 | optim_wrapper = dict(
15 | type='OptimWrapper',
16 | optimizer=dict(
17 | type='SGD',
18 | lr=0.01,
19 | momentum=0.937,
20 | weight_decay=0.0005,
21 | nesterov=True,
22 | batch_size_per_gpu=_base_.train_bs),
23 | constructor='YOLOv5OptimizerConstructor')
24 | default_hooks = dict(
25 | param_scheduler=dict(
26 | type='YOLOv5ParamSchedulerHook',
27 | scheduler_type='linear',
28 | lr_factor=0.01,
29 | max_epochs=max_epochs),
30 | )
31 | custom_hooks = [
32 | dict(
33 | type='EMAHook',
34 | ema_type='ExpMomentumEMA',
35 | momentum=0.0001,
36 | update_buffers=True,
37 | strict_load=False,
38 | priority=49)
39 | ]
40 |
41 | num_classes = 4
42 | # anchors for DUO
43 | anchors = [[(13, 12), (20, 18), (27, 25)],
44 | [(35, 31), (44, 39), (55, 52)],
45 | [(80, 45), (74, 69), (116, 102)]]
46 | num_det_layers = 3
47 | model = dict(
48 | type='YOLODetector',
49 | data_preprocessor=dict(
50 | type='YOLOv5DetDataPreprocessor',
51 | mean=[0.0, 0.0, 0.0],
52 | std=[255.0, 255.0, 255.0],
53 | bgr_to_rgb=True),
54 | backbone=dict(
55 | type='YOLOv5CSPDarknet',
56 | deepen_factor=0.33,
57 | widen_factor=0.5,
58 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
59 | act_cfg=dict(type='SiLU', inplace=True)),
60 | neck=dict(
61 | type='YOLOv5PAFPN',
62 | deepen_factor=0.33,
63 | widen_factor=0.5,
64 | in_channels=[256, 512, 1024],
65 | out_channels=[256, 512, 1024],
66 | num_csp_blocks=3,
67 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
68 | act_cfg=dict(type='SiLU', inplace=True)),
69 | bbox_head=dict(
70 | type='YOLOv5Head',
71 | head_module=dict(
72 | type='YOLOv5HeadModule',
73 | num_classes=num_classes,
74 | in_channels=[256, 512, 1024],
75 | widen_factor=0.5,
76 | featmap_strides=[8, 16, 32],
77 | num_base_priors=3),
78 | prior_generator=dict(
79 | type='mmdet.YOLOAnchorGenerator',
80 | base_sizes=anchors,
81 | strides=[8, 16, 32]),
82 | loss_cls=dict(
83 | type='mmdet.CrossEntropyLoss',
84 | use_sigmoid=True,
85 | reduction='mean',
86 | loss_weight=0.5),
87 | loss_bbox=dict(
88 | type='IoULoss',
89 | iou_mode='ciou',
90 | bbox_format='xywh',
91 | eps=1e-07,
92 | reduction='mean',
93 | loss_weight=0.05,
94 | return_iou=True),
95 | loss_obj=dict(
96 | type='mmdet.CrossEntropyLoss',
97 | use_sigmoid=True,
98 | reduction='mean',
99 | loss_weight=1.0),
100 | prior_match_thr=4.0,
101 | obj_level_weights=[4.0, 1.0, 0.4]),
102 | test_cfg=dict(
103 | multi_label=True,
104 | nms_pre=30000,
105 | score_thr=0.001,
106 | nms=dict(type='nms', iou_threshold=0.65),
107 | max_per_img=300))
108 |
--------------------------------------------------------------------------------
/configs/yolov6/unitmodule_yolov6_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './yolov6_s_100e_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitYOLODetector',
8 | data_preprocessor=dict(
9 | type='UnitYOLOv5DetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='mmdet.Pad',
21 | pad_to_square=True,
22 | pad_val=dict(img=(114.0, 114.0, 114.0))),
23 | dict(type='mmdet.RandomFlip', prob=0.5),
24 | dict(type='mmdet.PackDetInputs')
25 | ]
26 |
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 |
--------------------------------------------------------------------------------
/configs/yolov6/yolov6_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection_mmyolo.py',
3 | '../_base_/default_runtime_mmyolo.py',
4 | ]
5 | env_cfg = dict(cudnn_benchmark=True)
6 |
7 | max_epochs = 100
8 | num_last_epochs = 15
9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 |
14 | optim_wrapper = dict(
15 | type='OptimWrapper',
16 | optimizer=dict(
17 | type='SGD',
18 | lr=0.01,
19 | momentum=0.937,
20 | weight_decay=0.0005,
21 | nesterov=True,
22 | batch_size_per_gpu=_base_.train_bs),
23 | constructor='YOLOv5OptimizerConstructor')
24 | default_hooks = dict(
25 | param_scheduler=dict(
26 | type='YOLOv5ParamSchedulerHook',
27 | scheduler_type='cosine',
28 | lr_factor=0.01,
29 | max_epochs=max_epochs)
30 | )
31 | custom_hooks = [
32 | dict(
33 | type='EMAHook',
34 | ema_type='ExpMomentumEMA',
35 | momentum=0.0001,
36 | update_buffers=True,
37 | strict_load=False,
38 | priority=49)
39 | ]
40 |
41 | num_classes = 4
42 | model = dict(
43 | type='YOLODetector',
44 | data_preprocessor=dict(
45 | type='YOLOv5DetDataPreprocessor',
46 | mean=[0.0, 0.0, 0.0],
47 | std=[255.0, 255.0, 255.0],
48 | bgr_to_rgb=True),
49 | backbone=dict(
50 | type='YOLOv6EfficientRep',
51 | deepen_factor=0.33,
52 | widen_factor=0.5,
53 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
54 | act_cfg=dict(type='ReLU', inplace=True)),
55 | neck=dict(
56 | type='YOLOv6RepPAFPN',
57 | deepen_factor=0.33,
58 | widen_factor=0.5,
59 | in_channels=[256, 512, 1024],
60 | out_channels=[128, 256, 512],
61 | num_csp_blocks=12,
62 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
63 | act_cfg=dict(type='ReLU', inplace=True)),
64 | bbox_head=dict(
65 | type='YOLOv6Head',
66 | head_module=dict(
67 | type='YOLOv6HeadModule',
68 | num_classes=num_classes,
69 | in_channels=[128, 256, 512],
70 | widen_factor=0.5,
71 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
72 | act_cfg=dict(type='SiLU', inplace=True),
73 | featmap_strides=[8, 16, 32]),
74 | loss_bbox=dict(
75 | type='IoULoss',
76 | iou_mode='giou',
77 | bbox_format='xyxy',
78 | reduction='mean',
79 | loss_weight=2.5,
80 | return_iou=False)),
81 | train_cfg=dict(
82 | initial_epoch=4,
83 | initial_assigner=dict(
84 | type='BatchATSSAssigner',
85 | num_classes=num_classes,
86 | topk=9,
87 | iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
88 | assigner=dict(
89 | type='BatchTaskAlignedAssigner',
90 | num_classes=num_classes,
91 | topk=13,
92 | alpha=1,
93 | beta=6)),
94 | test_cfg=dict(
95 | multi_label=True,
96 | nms_pre=30000,
97 | score_thr=0.001,
98 | nms=dict(type='nms', iou_threshold=0.65),
99 | max_per_img=300))
100 |
--------------------------------------------------------------------------------
/configs/yolov7/unitmodule_yolov7_t_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './yolov7_t_100e_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitYOLODetector',
8 | data_preprocessor=dict(
9 | type='UnitYOLOv5DetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=55, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='mmdet.Pad',
21 | pad_to_square=True,
22 | pad_val=dict(img=(114.0, 114.0, 114.0))),
23 | dict(type='mmdet.RandomFlip', prob=0.5),
24 | dict(type='mmdet.PackDetInputs')
25 | ]
26 |
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 |
--------------------------------------------------------------------------------
/configs/yolov7/yolov7_t_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection_mmyolo.py',
3 | '../_base_/default_runtime_mmyolo.py',
4 | ]
5 | env_cfg = dict(cudnn_benchmark=True)
6 |
7 | max_epochs = 100
8 | num_last_epochs = 15
9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 |
14 | optim_wrapper = dict(
15 | type='OptimWrapper',
16 | optimizer=dict(
17 | type='SGD',
18 | lr=0.01,
19 | momentum=0.937,
20 | weight_decay=0.0005,
21 | nesterov=True,
22 | batch_size_per_gpu=_base_.train_bs),
23 | constructor='YOLOv7OptimWrapperConstructor')
24 | default_hooks = dict(
25 | param_scheduler=dict(
26 | type='YOLOv5ParamSchedulerHook',
27 | scheduler_type='cosine',
28 | lr_factor=0.01,
29 | max_epochs=max_epochs),
30 | )
31 | custom_hooks = [
32 | dict(
33 | type='EMAHook',
34 | ema_type='ExpMomentumEMA',
35 | momentum=0.0001,
36 | update_buffers=True,
37 | strict_load=False,
38 | priority=49)
39 | ]
40 |
41 | num_classes = 4
42 | # anchors for DUO
43 | anchors = [[(13, 12), (20, 18), (27, 25)],
44 | [(35, 31), (44, 39), (55, 52)],
45 | [(80, 45), (74, 69), (116, 102)]]
46 | model = dict(
47 | type='YOLODetector',
48 | data_preprocessor=dict(
49 | type='YOLOv5DetDataPreprocessor',
50 | mean=[0.0, 0.0, 0.0],
51 | std=[255.0, 255.0, 255.0],
52 | bgr_to_rgb=True),
53 | backbone=dict(
54 | type='YOLOv7Backbone',
55 | arch='Tiny',
56 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
57 | act_cfg=dict(type='LeakyReLU', inplace=True, negative_slope=0.1)),
58 | neck=dict(
59 | type='YOLOv7PAFPN',
60 | block_cfg=dict(type='TinyDownSampleBlock', middle_ratio=0.25),
61 | upsample_feats_cat_first=False,
62 | in_channels=[128, 256, 512],
63 | out_channels=[64, 128, 256],
64 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
65 | act_cfg=dict(type='LeakyReLU', inplace=True, negative_slope=0.1),
66 | is_tiny_version=True,
67 | use_repconv_outs=False),
68 | bbox_head=dict(
69 | type='YOLOv7Head',
70 | head_module=dict(
71 | type='YOLOv7HeadModule',
72 | num_classes=num_classes,
73 | in_channels=[128, 256, 512],
74 | featmap_strides=[8, 16, 32],
75 | num_base_priors=3),
76 | prior_generator=dict(
77 | type='mmdet.YOLOAnchorGenerator',
78 | base_sizes=anchors,
79 | strides=[8, 16, 32]),
80 | loss_cls=dict(
81 | type='mmdet.CrossEntropyLoss',
82 | use_sigmoid=True,
83 | reduction='mean',
84 | loss_weight=0.5),
85 | loss_bbox=dict(
86 | type='IoULoss',
87 | iou_mode='ciou',
88 | bbox_format='xywh',
89 | reduction='mean',
90 | loss_weight=0.05,
91 | return_iou=True),
92 | loss_obj=dict(
93 | type='mmdet.CrossEntropyLoss',
94 | use_sigmoid=True,
95 | reduction='mean',
96 | loss_weight=1.0),
97 | prior_match_thr=4.0,
98 | obj_level_weights=[4.0, 1.0, 0.4],
99 | simota_candidate_topk=10,
100 | simota_iou_weight=3.0,
101 | simota_cls_weight=1.0),
102 | test_cfg=dict(
103 | multi_label=True,
104 | nms_pre=30000,
105 | score_thr=0.001,
106 | nms=dict(type='nms', iou_threshold=0.65),
107 | max_per_img=300))
108 |
--------------------------------------------------------------------------------
/configs/yolov8/unitmodule_yolov8_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './yolov8_s_100e_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitYOLODetector',
8 | data_preprocessor=dict(
9 | type='UnitYOLOv5DetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=10, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='mmdet.Pad',
21 | pad_to_square=True,
22 | pad_val=dict(img=(114.0, 114.0, 114.0))),
23 | dict(type='mmdet.RandomFlip', prob=0.5),
24 | dict(type='mmdet.PackDetInputs')
25 | ]
26 |
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 |
--------------------------------------------------------------------------------
/configs/yolov8/yolov8_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection_mmyolo.py',
3 | '../_base_/default_runtime_mmyolo.py',
4 | ]
5 | env_cfg = dict(cudnn_benchmark=True)
6 |
7 | max_epochs = 100
8 | num_last_epochs = 15
9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
11 | val_cfg = dict(type='ValLoop')
12 | test_cfg = dict(type='TestLoop')
13 |
14 | optim_wrapper = dict(
15 | type='OptimWrapper',
16 | clip_grad=dict(max_norm=10.0),
17 | optimizer=dict(
18 | type='SGD',
19 | lr=0.01,
20 | momentum=0.937,
21 | weight_decay=0.0005,
22 | nesterov=True,
23 | batch_size_per_gpu=_base_.train_bs),
24 | constructor='YOLOv5OptimizerConstructor')
25 | default_hooks = dict(
26 | param_scheduler=dict(
27 | type='YOLOv5ParamSchedulerHook',
28 | scheduler_type='linear',
29 | lr_factor=0.01,
30 | max_epochs=max_epochs),
31 | )
32 | custom_hooks = [
33 | dict(
34 | type='EMAHook',
35 | ema_type='ExpMomentumEMA',
36 | momentum=0.0001,
37 | update_buffers=True,
38 | strict_load=False,
39 | priority=49),
40 | ]
41 |
42 | num_classes = 4
43 | model = dict(
44 | type='YOLODetector',
45 | data_preprocessor=dict(
46 | type='YOLOv5DetDataPreprocessor',
47 | mean=[0.0, 0.0, 0.0],
48 | std=[255.0, 255.0, 255.0],
49 | bgr_to_rgb=True),
50 | backbone=dict(
51 | type='YOLOv8CSPDarknet',
52 | arch='P5',
53 | last_stage_out_channels=1024,
54 | deepen_factor=0.33,
55 | widen_factor=0.5,
56 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
57 | act_cfg=dict(type='SiLU', inplace=True)),
58 | neck=dict(
59 | type='YOLOv8PAFPN',
60 | deepen_factor=0.33,
61 | widen_factor=0.5,
62 | in_channels=[256, 512, 1024],
63 | out_channels=[256, 512, 1024],
64 | num_csp_blocks=3,
65 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
66 | act_cfg=dict(type='SiLU', inplace=True)),
67 | bbox_head=dict(
68 | type='YOLOv8Head',
69 | head_module=dict(
70 | type='YOLOv8HeadModule',
71 | num_classes=num_classes,
72 | in_channels=[256, 512, 1024],
73 | widen_factor=0.5,
74 | reg_max=16,
75 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
76 | act_cfg=dict(type='SiLU', inplace=True),
77 | featmap_strides=[8, 16, 32]),
78 | prior_generator=dict(
79 | type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]),
80 | bbox_coder=dict(type='DistancePointBBoxCoder'),
81 | loss_cls=dict(
82 | type='mmdet.CrossEntropyLoss',
83 | use_sigmoid=True,
84 | reduction='none',
85 | loss_weight=0.5),
86 | loss_bbox=dict(
87 | type='IoULoss',
88 | iou_mode='ciou',
89 | bbox_format='xyxy',
90 | reduction='sum',
91 | loss_weight=7.5,
92 | return_iou=False),
93 | loss_dfl=dict(
94 | type='mmdet.DistributionFocalLoss',
95 | reduction='mean',
96 | loss_weight=0.375)),
97 | train_cfg=dict(
98 | assigner=dict(
99 | type='BatchTaskAlignedAssigner',
100 | num_classes=num_classes,
101 | use_ciou=True,
102 | topk=10,
103 | alpha=0.5,
104 | beta=6.0,
105 | eps=1e-09)),
106 | test_cfg=dict(
107 | multi_label=True,
108 | nms_pre=30000,
109 | score_thr=0.001,
110 | nms=dict(type='nms', iou_threshold=0.7),
111 | max_per_img=300))
112 |
--------------------------------------------------------------------------------
/configs/yolox/unitmodule_yolox_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | './yolox_s_100e_duo.py',
3 | '../unitmodule/unitmodule.py',
4 | ]
5 |
6 | model = dict(
7 | type='UnitYOLODetector',
8 | data_preprocessor=dict(
9 | type='UnitYOLOv5DetDataPreprocessor',
10 | unit_module=_base_.unit_module)
11 | )
12 |
13 | optim_wrapper = dict(clip_grad=dict(max_norm=55, norm_type=2))
14 |
15 | train_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(type='mmdet.LoadAnnotations', with_bbox=True),
18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True),
19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5),
20 | dict(type='mmdet.Pad',
21 | pad_to_square=True,
22 | pad_val=dict(img=(114.0, 114.0, 114.0))),
23 | dict(type='mmdet.RandomFlip', prob=0.5),
24 | dict(type='mmdet.PackDetInputs')
25 | ]
26 |
27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
28 |
--------------------------------------------------------------------------------
/configs/yolox/yolox_s_100e_duo.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../_base_/datasets/duo_detection_mmyolo.py',
3 | '../_base_/default_runtime_mmyolo.py',
4 | ]
5 | max_epochs = 100
6 | num_last_epochs = 15
7 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs,
8 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
9 | val_cfg = dict(type='ValLoop')
10 | test_cfg = dict(type='TestLoop')
11 |
12 | param_scheduler = [
13 | dict(
14 | type='mmdet.QuadraticWarmupLR',
15 | by_epoch=True,
16 | begin=0,
17 | end=5,
18 | convert_to_iter_based=True),
19 | dict(
20 | type='CosineAnnealingLR',
21 | eta_min=0.0005,
22 | begin=5,
23 | T_max=max_epochs - num_last_epochs,
24 | end=max_epochs - num_last_epochs,
25 | by_epoch=True,
26 | convert_to_iter_based=True),
27 | dict(type='ConstantLR', by_epoch=True, factor=1, begin=max_epochs - num_last_epochs, end=max_epochs)
28 | ]
29 | optim_wrapper = dict(
30 | type='OptimWrapper',
31 | optimizer=dict(
32 | type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True),
33 | paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0))
34 |
35 | custom_hooks = [
36 | dict(type='mmdet.SyncNormHook', priority=48),
37 | dict(
38 | type='EMAHook',
39 | ema_type='ExpMomentumEMA',
40 | momentum=0.0001,
41 | update_buffers=True,
42 | strict_load=False,
43 | priority=49)
44 | ]
45 |
46 | num_classes = 4
47 | model = dict(
48 | type='YOLODetector',
49 | init_cfg=dict(
50 | type='Kaiming',
51 | layer='Conv2d',
52 | a=2.23606797749979,
53 | distribution='uniform',
54 | mode='fan_in',
55 | nonlinearity='leaky_relu'),
56 | use_syncbn=False,
57 | data_preprocessor=dict(
58 | type='YOLOv5DetDataPreprocessor',
59 | pad_size_divisor=32,
60 | batch_augments=[
61 | dict(
62 | type='YOLOXBatchSyncRandomResize',
63 | random_size_range=(480, 800),
64 | size_divisor=32,
65 | interval=10)
66 | ]),
67 | backbone=dict(
68 | type='YOLOXCSPDarknet',
69 | deepen_factor=0.33,
70 | widen_factor=0.5,
71 | out_indices=(2, 3, 4),
72 | spp_kernal_sizes=(5, 9, 13),
73 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
74 | act_cfg=dict(type='SiLU', inplace=True)),
75 | neck=dict(
76 | type='YOLOXPAFPN',
77 | deepen_factor=0.33,
78 | widen_factor=0.5,
79 | in_channels=[256, 512, 1024],
80 | out_channels=256,
81 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
82 | act_cfg=dict(type='SiLU', inplace=True)),
83 | bbox_head=dict(
84 | type='YOLOXHead',
85 | head_module=dict(
86 | type='YOLOXHeadModule',
87 | num_classes=num_classes,
88 | in_channels=256,
89 | feat_channels=256,
90 | widen_factor=0.5,
91 | stacked_convs=2,
92 | featmap_strides=(8, 16, 32),
93 | use_depthwise=False,
94 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
95 | act_cfg=dict(type='SiLU', inplace=True)),
96 | loss_cls=dict(
97 | type='mmdet.CrossEntropyLoss',
98 | use_sigmoid=True,
99 | reduction='sum',
100 | loss_weight=1.0),
101 | loss_bbox=dict(
102 | type='mmdet.IoULoss',
103 | mode='square',
104 | eps=1e-16,
105 | reduction='sum',
106 | loss_weight=5.0),
107 | loss_obj=dict(
108 | type='mmdet.CrossEntropyLoss',
109 | use_sigmoid=True,
110 | reduction='sum',
111 | loss_weight=1.0),
112 | loss_bbox_aux=dict(
113 | type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
114 | train_cfg=dict(
115 | assigner=dict(
116 | type='mmdet.SimOTAAssigner',
117 | center_radius=2.5,
118 | iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
119 | test_cfg=dict(
120 | yolox_style=True,
121 | multi_label=True,
122 | score_thr=0.001,
123 | max_per_img=300,
124 | nms=dict(type='nms', iou_threshold=0.65)))
125 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | future
2 | tensorboard
--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | CHECKPOINT=$2
5 | GPUS=$3
6 | NNODES=${NNODES:-1}
7 | NODE_RANK=${NODE_RANK:-0}
8 | PORT=${PORT:-29500}
9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
10 |
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch \
13 | --nnodes=$NNODES \
14 | --node_rank=$NODE_RANK \
15 | --master_addr=$MASTER_ADDR \
16 | --nproc_per_node=$GPUS \
17 | --master_port=$PORT \
18 | $(dirname "$0")/test.py \
19 | $CONFIG \
20 | $CHECKPOINT \
21 | --launcher pytorch \
22 | ${@:4}
23 |
--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | GPUS=$2
5 | NNODES=${NNODES:-1}
6 | NODE_RANK=${NODE_RANK:-0}
7 | PORT=${PORT:-29500}
8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
9 |
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 | --nnodes=$NNODES \
13 | --node_rank=$NODE_RANK \
14 | --master_addr=$MASTER_ADDR \
15 | --nproc_per_node=$GPUS \
16 | --master_port=$PORT \
17 | $(dirname "$0")/train.py \
18 | $CONFIG \
19 | --launcher pytorch ${@:3}
20 |
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import os
4 | import os.path as osp
5 | import warnings
6 | from copy import deepcopy
7 |
8 | from mmdet.engine.hooks.utils import trigger_visualization_hook
9 | from mmdet.evaluation import DumpDetResults
10 | from mmdet.registry import RUNNERS
11 | from mmdet.utils import setup_cache_size_limit_of_dynamo
12 | from mmengine import ConfigDict
13 | from mmengine.config import Config, DictAction
14 | from mmengine.runner import Runner
15 |
16 | from unitmodule.models.detectors import register_unit_distributed
17 |
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(
21 | description='MMDet test (and eval) a model')
22 | parser.add_argument('config', help='test config file path')
23 | parser.add_argument('checkpoint', help='checkpoint file')
24 | parser.add_argument(
25 | '--work-dir',
26 | help='the directory to save the file containing evaluation metrics')
27 | parser.add_argument(
28 | '--out',
29 | type=str,
30 | help='dump predictions to a pickle file for offline evaluation')
31 | parser.add_argument(
32 | '--show', action='store_true', help='show prediction results')
33 | parser.add_argument(
34 | '--show-dir',
35 | help='directory where painted images will be saved. '
36 | 'If specified, it will be automatically saved '
37 | 'to the work_dir/timestamp/show_dir')
38 | parser.add_argument(
39 | '--wait-time', type=float, default=2, help='the interval of show (s)')
40 | parser.add_argument(
41 | '--cfg-options',
42 | nargs='+',
43 | action=DictAction,
44 | help='override some settings in the used config, the key-value pair '
45 | 'in xxx=yyy format will be merged into config file. If the value to '
46 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
47 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
48 | 'Note that the quotation marks are necessary and that no white space '
49 | 'is allowed.')
50 | parser.add_argument(
51 | '--launcher',
52 | choices=['none', 'pytorch', 'slurm', 'mpi'],
53 | default='none',
54 | help='job launcher')
55 | parser.add_argument('--tta', action='store_true')
56 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
57 | # will pass the `--local-rank` parameter to `tools/train.py` instead
58 | # of `--local_rank`.
59 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
60 | args = parser.parse_args()
61 | if 'LOCAL_RANK' not in os.environ:
62 | os.environ['LOCAL_RANK'] = str(args.local_rank)
63 | return args
64 |
65 |
66 | def main():
67 | args = parse_args()
68 |
69 | # Reduce the number of repeated compilations and improve
70 | # testing speed.
71 | setup_cache_size_limit_of_dynamo()
72 |
73 | # load config
74 | cfg = Config.fromfile(args.config)
75 | cfg.launcher = args.launcher
76 | if args.cfg_options is not None:
77 | cfg.merge_from_dict(args.cfg_options)
78 |
79 | # --------------------------------------------------------
80 | # dynamic import customs modules
81 | # import modules from import_dir as a/b/c/ dir, registry will be updated
82 | if hasattr(cfg, 'import_dir'):
83 | import importlib
84 |
85 | import_dir = cfg.import_dir
86 | module_path = import_dir.replace('/', '.')
87 | import_lib = importlib.import_module(module_path)
88 |
89 | # dynamic import for ddp of UnitModule if key with_unit_module is True
90 | register_unit_distributed(cfg)
91 | # --------------------------------------------------------
92 |
93 | # work_dir is determined in this priority: CLI > segment in file > filename
94 | if args.work_dir is not None:
95 | # update configs according to CLI args if args.work_dir is not None
96 | cfg.work_dir = args.work_dir
97 | elif cfg.get('work_dir', None) is None:
98 | # use config filename as default work_dir if cfg.work_dir is None
99 | cfg.work_dir = osp.join('./work_dirs',
100 | osp.splitext(osp.basename(args.config))[0])
101 |
102 | cfg.load_from = args.checkpoint
103 |
104 | if args.show or args.show_dir:
105 | cfg = trigger_visualization_hook(cfg, args)
106 |
107 | if args.tta:
108 |
109 | if 'tta_model' not in cfg:
110 | warnings.warn('Cannot find ``tta_model`` in config, '
111 | 'we will set it as default.')
112 | cfg.tta_model = dict(
113 | type='DetTTAModel',
114 | tta_cfg=dict(
115 | nms=dict(type='nms', iou_threshold=0.5), max_per_img=100))
116 | if 'tta_pipeline' not in cfg:
117 | warnings.warn('Cannot find ``tta_pipeline`` in config, '
118 | 'we will set it as default.')
119 | test_data_cfg = cfg.test_dataloader.dataset
120 | while 'dataset' in test_data_cfg:
121 | test_data_cfg = test_data_cfg['dataset']
122 | cfg.tta_pipeline = deepcopy(test_data_cfg.pipeline)
123 | flip_tta = dict(
124 | type='TestTimeAug',
125 | transforms=[
126 | [
127 | dict(type='RandomFlip', prob=1.),
128 | dict(type='RandomFlip', prob=0.)
129 | ],
130 | [
131 | dict(
132 | type='PackDetInputs',
133 | meta_keys=('img_id', 'img_path', 'ori_shape',
134 | 'img_shape', 'scale_factor', 'flip',
135 | 'flip_direction'))
136 | ],
137 | ])
138 | cfg.tta_pipeline[-1] = flip_tta
139 | cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
140 | cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline
141 |
142 | # build the runner from config
143 | if 'runner_type' not in cfg:
144 | # build the default runner
145 | runner = Runner.from_cfg(cfg)
146 | else:
147 | # build customized runner from the registry
148 | # if 'runner_type' is set in the cfg
149 | runner = RUNNERS.build(cfg)
150 |
151 | # add `DumpResults` dummy metric
152 | if args.out is not None:
153 | assert args.out.endswith(('.pkl', '.pickle')), \
154 | 'The dump file must be a pkl file.'
155 | runner.test_evaluator.metrics.append(
156 | DumpDetResults(out_file_path=args.out))
157 |
158 | # start testing
159 | runner.test()
160 |
161 |
162 | if __name__ == '__main__':
163 | main()
164 |
--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import logging
4 | import os
5 | import os.path as osp
6 |
7 | from mmdet.utils import setup_cache_size_limit_of_dynamo
8 | from mmengine.config import Config, DictAction
9 | from mmengine.logging import print_log
10 | from mmengine.registry import RUNNERS
11 | from mmengine.runner import Runner
12 |
13 | from unitmodule.models.detectors import register_unit_distributed
14 |
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(description='Train a detector')
18 | parser.add_argument('config', help='train config file path')
19 | parser.add_argument('--work-dir', help='the dir to save logs and models')
20 | parser.add_argument(
21 | '--amp',
22 | action='store_true',
23 | default=False,
24 | help='enable automatic-mixed-precision training')
25 | parser.add_argument(
26 | '--auto-scale-lr',
27 | action='store_true',
28 | help='enable automatically scaling LR.')
29 | parser.add_argument(
30 | '--resume',
31 | nargs='?',
32 | type=str,
33 | const='auto',
34 | help='If specify checkpoint path, resume from it, while if not '
35 | 'specify, try to auto resume from the latest checkpoint '
36 | 'in the work directory.')
37 | parser.add_argument(
38 | '--cfg-options',
39 | nargs='+',
40 | action=DictAction,
41 | help='override some settings in the used config, the key-value pair '
42 | 'in xxx=yyy format will be merged into config file. If the value to '
43 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
44 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
45 | 'Note that the quotation marks are necessary and that no white space '
46 | 'is allowed.')
47 | parser.add_argument(
48 | '--launcher',
49 | choices=['none', 'pytorch', 'slurm', 'mpi'],
50 | default='none',
51 | help='job launcher')
52 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
53 | # will pass the `--local-rank` parameter to `tools/train.py` instead
54 | # of `--local_rank`.
55 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
56 | args = parser.parse_args()
57 | if 'LOCAL_RANK' not in os.environ:
58 | os.environ['LOCAL_RANK'] = str(args.local_rank)
59 |
60 | return args
61 |
62 |
63 | def main():
64 | args = parse_args()
65 |
66 | # Reduce the number of repeated compilations and improve
67 | # training speed.
68 | setup_cache_size_limit_of_dynamo()
69 |
70 | # load config
71 | cfg = Config.fromfile(args.config)
72 | cfg.launcher = args.launcher
73 | if args.cfg_options is not None:
74 | cfg.merge_from_dict(args.cfg_options)
75 |
76 | # --------------------------------------------------------
77 | # dynamic import customs modules
78 | # import modules from import_dir as a/b/c/ dir, registry will be updated
79 | if hasattr(cfg, 'import_dir'):
80 | import importlib
81 |
82 | import_dir = cfg.import_dir
83 | module_path = import_dir.replace('/', '.')
84 | import_lib = importlib.import_module(module_path)
85 |
86 | # dynamic import for ddp of UnitModule if key with_unit_module is True
87 | register_unit_distributed(cfg)
88 | # --------------------------------------------------------
89 |
90 | # work_dir is determined in this priority: CLI > segment in file > filename
91 | if args.work_dir is not None:
92 | # update configs according to CLI args if args.work_dir is not None
93 | cfg.work_dir = args.work_dir
94 | elif cfg.get('work_dir', None) is None:
95 | # use config filename as default work_dir if cfg.work_dir is None
96 | cfg.work_dir = osp.join('./work_dirs',
97 | osp.splitext(osp.basename(args.config))[0])
98 |
99 | # enable automatic-mixed-precision training
100 | if args.amp is True:
101 | optim_wrapper = cfg.optim_wrapper.type
102 | if optim_wrapper == 'AmpOptimWrapper':
103 | print_log(
104 | 'AMP training is already enabled in your config.',
105 | logger='current',
106 | level=logging.WARNING)
107 | else:
108 | assert optim_wrapper == 'OptimWrapper', (
109 | '`--amp` is only supported when the optimizer wrapper type is '
110 | f'`OptimWrapper` but got {optim_wrapper}.')
111 | cfg.optim_wrapper.type = 'AmpOptimWrapper'
112 | cfg.optim_wrapper.loss_scale = 'dynamic'
113 |
114 | # enable automatically scaling LR
115 | if args.auto_scale_lr:
116 | if 'auto_scale_lr' in cfg and \
117 | 'enable' in cfg.auto_scale_lr and \
118 | 'base_batch_size' in cfg.auto_scale_lr:
119 | cfg.auto_scale_lr.enable = True
120 | else:
121 | raise RuntimeError('Can not find "auto_scale_lr" or '
122 | '"auto_scale_lr.enable" or '
123 | '"auto_scale_lr.base_batch_size" in your'
124 | ' configuration file.')
125 |
126 | # resume is determined in this priority: resume from > auto_resume
127 | if args.resume == 'auto':
128 | cfg.resume = True
129 | cfg.load_from = None
130 | elif args.resume is not None:
131 | cfg.resume = True
132 | cfg.load_from = args.resume
133 |
134 | # build the runner from config
135 | if 'runner_type' not in cfg:
136 | # build the default runner
137 | runner = Runner.from_cfg(cfg)
138 | else:
139 | # build customized runner from the registry
140 | # if 'runner_type' is set in the cfg
141 | runner = RUNNERS.build(cfg)
142 |
143 | # start training
144 | runner.train()
145 |
146 |
147 | if __name__ == '__main__':
148 | main()
149 |
--------------------------------------------------------------------------------
/unitmodule/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .models import *
3 |
--------------------------------------------------------------------------------
/unitmodule/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import *
2 |
--------------------------------------------------------------------------------
/unitmodule/datasets/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .colorspace import UnderwaterColorRandomTransfer
2 |
3 | __all__ = ['UnderwaterColorRandomTransfer']
4 |
--------------------------------------------------------------------------------
/unitmodule/datasets/transforms/colorspace.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from mmcv.transforms import BaseTransform
4 | from mmcv.transforms.utils import cache_randomness
5 | from mmengine.registry import TRANSFORMS
6 |
7 |
8 | @TRANSFORMS.register_module()
9 | class UnderwaterColorRandomTransfer(BaseTransform):
10 | """Transfer underwater image color by converting HSV color space.
11 |
12 | HSV is (Hue, Saturation, Value).
13 | The uint8 image(255)(h, w, c) convert to HSV that
14 | H in [0, 180),
15 | S in [0, 255],
16 | V in [0, 255].
17 |
18 | Required Keys:
19 |
20 | - img
21 |
22 | Modified Keys:
23 |
24 | - img
25 |
26 | Args:
27 | hue_prob (float): The probability for hue in range [0, 1]. Defaults to 0.5.
28 | saturation_prob (float): The probability for saturation in range [0, 1]. Defaults to 0.5.
29 | value_prob (float): The probability for value in range [0, 1]. Defaults to 0.5.
30 | hue_delta (int): delta of hue. Defaults to 5.
31 | saturation_delta (int): delta of saturation. Defaults to 30.
32 | value_delta (int): delta of value. Defaults to 30.
33 |
34 | Notes:
35 | The underwater_hue_interval got from the hue mean in underwater dataset,
36 | which get the hue mean by convert color from BGR to HSV.
37 | dataset | hue min | hue max
38 | ------------|----------------|-------------
39 | DUO | 18.7551 | 95.4836
40 | URPC2020 | 17.9668 | 99.6359
41 | URPC2021 | 17.9668 | 103.2373
42 | UIEB | 25.5417 | 116.3379
43 | ------------|----------------|-------------
44 | hue interval 18 116
45 | """
46 | underwater_hue_interval = (18, 116)
47 |
48 | def __init__(self,
49 | hue_prob: float = 0.5,
50 | saturation_prob: float = 0.5,
51 | value_prob: float = 0.5,
52 | hue_delta: int = 5,
53 | saturation_delta: int = 30,
54 | value_delta: int = 30) -> None:
55 | assert 0 <= hue_prob <= 1.0
56 | assert 0 <= saturation_prob <= 1.0
57 | assert 0 <= value_prob <= 1.0
58 |
59 | self.hue_prob = hue_prob
60 | self.saturation_prob = saturation_prob
61 | self.value_prob = value_prob
62 | self.hue_delta = hue_delta
63 | self.saturation_delta = saturation_delta
64 | self.value_delta = value_delta
65 |
66 | self._hue_min, self._hue_max = self.underwater_hue_interval
67 | self._hue_middle = (self._hue_min + self._hue_max) / 2
68 |
69 | @cache_randomness
70 | def _random_hue(self):
71 | return np.random.rand() < self.hue_prob
72 |
73 | @cache_randomness
74 | def _random_saturation(self):
75 | return np.random.rand() < self.saturation_prob
76 |
77 | @cache_randomness
78 | def _random_value(self):
79 | return np.random.rand() < self.value_prob
80 |
81 | @staticmethod
82 | def _random_mult():
83 | return np.random.uniform(-1, 1)
84 |
85 | @cache_randomness
86 | def _get_hue_gain(self, img):
87 | """Get hue gain value and keep it in underwater hue interval."""
88 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
89 | hue_mean = np.mean(img_hsv[..., 0])
90 | hue_gain = self._random_mult() * self.hue_delta
91 |
92 | # img_hue is in the underwater hue interval
93 | if self._hue_min < hue_mean < self._hue_max:
94 | hue = np.clip(hue_mean + hue_gain, self._hue_min, self._hue_max)
95 | hue_gain = hue - hue_mean
96 |
97 | # img_hue is out of the underwater hue interval
98 | else:
99 | hue_gain = np.abs(hue_gain)
100 | if hue_mean >= self._hue_max:
101 | hue_gain = -hue_gain
102 |
103 | return np.array(hue_gain, dtype=np.int16)
104 |
105 | @cache_randomness
106 | def _get_saturation_gain(self):
107 | gain = self._random_mult() * self.saturation_delta
108 | return np.array(gain, dtype=np.int16)
109 |
110 | @cache_randomness
111 | def _get_value_gain(self):
112 | gain = self._random_mult() * self.value_delta
113 | return np.array(gain, dtype=np.int16)
114 |
115 | def transform(self, results: dict) -> dict:
116 | hue_able = self._random_hue()
117 | saturation_able = self._random_saturation()
118 | value_able = self._random_value()
119 |
120 | if not any((hue_able, saturation_able, value_able)):
121 | return results
122 |
123 | img = results['img']
124 | img_dtype = img.dtype
125 |
126 | assert img_dtype == np.uint8
127 | # convert color uint8 from BGR to HSV
128 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)
129 |
130 | if hue_able:
131 | hue_gain = self._get_hue_gain(img)
132 | img_hsv[..., 0] = (img_hsv[..., 0] + hue_gain) % 180
133 |
134 | if saturation_able:
135 | saturation_gain = self._get_saturation_gain()
136 | img_hsv[..., 1] = np.clip(img_hsv[..., 1] + saturation_gain, 0, 255)
137 |
138 | if value_able:
139 | value_gain = self._get_value_gain()
140 | img_hsv[..., 2] = np.clip(img_hsv[..., 2] + value_gain, 0, 255)
141 |
142 | # convert color from HSV to BGR
143 | img = cv2.cvtColor(img_hsv.astype(img_dtype), cv2.COLOR_HSV2BGR)
144 |
145 | results['img'] = img
146 | return results
147 |
148 | def __repr__(self):
149 | repr_str = self.__class__.__name__
150 | repr_str += f'(underwater_hue_interval={self.underwater_hue_interval}, '
151 | repr_str += f'hue_prob={self.hue_prob}, '
152 | repr_str += f'saturation_prob={self.saturation_prob}, '
153 | repr_str += f'value_prob={self.value_prob}, '
154 | repr_str += f'hue_delta={self.hue_delta}, '
155 | repr_str += f'saturation_delta={self.saturation_delta}, '
156 | repr_str += f'value_delta={self.value_delta})'
157 | return repr_str
158 |
--------------------------------------------------------------------------------
/unitmodule/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_preprocessors import *
2 | from .detectors import *
3 | from .losses import *
4 |
--------------------------------------------------------------------------------
/unitmodule/models/data_preprocessors/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_preprocessor import UnitDetDataPreprocessor
2 | from .unit_module import UnitModule
3 |
4 | __all__ = ['UnitDetDataPreprocessor', 'UnitModule']
5 |
--------------------------------------------------------------------------------
/unitmodule/models/data_preprocessors/data_preprocessor.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from functools import reduce
3 | from numbers import Number
4 | from typing import Sequence, List, Tuple, Optional, Union
5 |
6 | import numpy as np
7 | import torch.nn.functional as F
8 | from mmdet.models.data_preprocessors import DetDataPreprocessor
9 | from mmengine.model import BaseModule
10 | from mmengine.registry import MODELS
11 | from mmyolo.models.data_preprocessors import YOLOv5DetDataPreprocessor
12 |
13 |
14 | def sum_dict(a, b):
15 | temp = dict()
16 | for key in (a.keys() | b.keys()):
17 | temp[key] = sum([d.get(key, 0) for d in (a, b)])
18 | return temp
19 |
20 |
21 | @MODELS.register_module()
22 | class UnitDetDataPreprocessor(DetDataPreprocessor, BaseModule):
23 | def __init__(self,
24 | unit_module: dict,
25 | pad_mode: str = 'reflect',
26 | mean: Sequence[Number] = None,
27 | std: Sequence[Number] = None,
28 | pad_size_divisor: int = 1,
29 | pad_value: Union[float, int] = 0,
30 | pad_mask: bool = False,
31 | mask_pad_value: int = 0,
32 | pad_seg: bool = False,
33 | seg_pad_value: int = 255,
34 | bgr_to_rgb: bool = False,
35 | rgb_to_bgr: bool = False,
36 | boxtype2tensor: bool = True,
37 | non_blocking: Optional[bool] = False,
38 | batch_augments: Optional[List[dict]] = None,
39 | init_cfg=None):
40 | super().__init__(
41 | mean=mean,
42 | std=std,
43 | pad_size_divisor=pad_size_divisor,
44 | pad_value=pad_value,
45 | pad_mask=pad_mask,
46 | mask_pad_value=mask_pad_value,
47 | pad_seg=pad_seg,
48 | seg_pad_value=seg_pad_value,
49 | bgr_to_rgb=bgr_to_rgb,
50 | rgb_to_bgr=rgb_to_bgr,
51 | boxtype2tensor=boxtype2tensor,
52 | non_blocking=non_blocking,
53 | batch_augments=batch_augments)
54 |
55 | # BaseModule __init__
56 | self._is_init = False
57 | self.init_cfg = copy.deepcopy(init_cfg)
58 |
59 | assert pad_mode in ('reflect', 'circular'), \
60 | f"Excepted ('reflect', 'circular'), but got {pad_mode}"
61 | self.pad_mode = pad_mode
62 | self.unit_module = MODELS.build(unit_module)
63 |
64 | def forward(self,
65 | data: dict,
66 | training: bool = False) -> Union[Tuple[dict, dict], dict]:
67 | data = self.cast_data(data)
68 | data['inputs'], losses = self.unit_module_forward(data['inputs'], training)
69 |
70 | data = super(UnitDetDataPreprocessor, self).forward(data, training)
71 | return (data, losses) if training else data
72 |
73 | def unit_module_forward(self, batch_inputs, training: bool = False) -> Tuple[list, dict]:
74 | outputs = []
75 | losses = []
76 | for batch_input in batch_inputs:
77 | # padding
78 | oh, ow = batch_input.shape[1:]
79 | pad_h = int(np.ceil(oh / self.pad_size_divisor)) * self.pad_size_divisor
80 | pad_w = int(np.ceil(ow / self.pad_size_divisor)) * self.pad_size_divisor
81 | p2d = (0, (pad_w - ow), 0, (pad_h - oh))
82 | batch_input = batch_input.float()
83 | batch_input_pad = F.pad(batch_input, p2d, self.pad_mode)
84 |
85 | # UnitModule forward
86 | batch_input_pad = batch_input_pad.unsqueeze(0) / 255.
87 | if training:
88 | batch_output_pad, _losses = self.unit_module(batch_input_pad, training)
89 | losses.append(_losses)
90 | else:
91 | batch_output_pad = self.unit_module(batch_input_pad, training)
92 | batch_output_pad = batch_output_pad.squeeze(0)
93 |
94 | # remove padding
95 | batch_output = batch_output_pad[..., :oh, :ow] * 255.
96 | outputs.append(batch_output)
97 |
98 | if training:
99 | n = len(losses)
100 | losses = reduce(sum_dict, losses)
101 | for k, v in losses.items():
102 | losses[k] = v / n
103 |
104 | return outputs, losses
105 |
106 |
107 | @MODELS.register_module()
108 | class UnitYOLOv5DetDataPreprocessor(YOLOv5DetDataPreprocessor, BaseModule):
109 | def __init__(self,
110 | unit_module: dict,
111 | pad_mode: str = 'reflect',
112 | mean: Sequence[Number] = None,
113 | std: Sequence[Number] = None,
114 | pad_size_divisor: int = 1,
115 | pad_value: Union[float, int] = 0,
116 | pad_mask: bool = False,
117 | mask_pad_value: int = 0,
118 | pad_seg: bool = False,
119 | seg_pad_value: int = 255,
120 | bgr_to_rgb: bool = False,
121 | rgb_to_bgr: bool = False,
122 | boxtype2tensor: bool = True,
123 | non_blocking: Optional[bool] = True,
124 | batch_augments: Optional[List[dict]] = None,
125 | init_cfg=None):
126 | super().__init__(
127 | mean=mean,
128 | std=std,
129 | pad_size_divisor=pad_size_divisor,
130 | pad_value=pad_value,
131 | pad_mask=pad_mask,
132 | mask_pad_value=mask_pad_value,
133 | pad_seg=pad_seg,
134 | seg_pad_value=seg_pad_value,
135 | bgr_to_rgb=bgr_to_rgb,
136 | rgb_to_bgr=rgb_to_bgr,
137 | boxtype2tensor=boxtype2tensor,
138 | non_blocking=non_blocking,
139 | batch_augments=batch_augments)
140 |
141 | # BaseModule __init__
142 | self._is_init = False
143 | self.init_cfg = copy.deepcopy(init_cfg)
144 |
145 | assert pad_mode in ('reflect', 'circular'), \
146 | f"Excepted ('reflect', 'circular'), but got {pad_mode}"
147 | self.pad_mode = pad_mode
148 | self.unit_module = MODELS.build(unit_module)
149 |
150 | def forward(self,
151 | data: dict,
152 | training: bool = False) -> Union[Tuple[dict, dict], dict]:
153 | data = self.cast_data(data)
154 | data['inputs'], losses = self.unit_module_forward(data['inputs'], training)
155 |
156 | data = super(UnitYOLOv5DetDataPreprocessor, self).forward(data, training)
157 | return (data, losses) if training else data
158 |
159 | def unit_module_forward(self, batch_inputs, training: bool = False) -> Tuple[list, dict]:
160 | losses = {}
161 | if training:
162 | batch_inputs = batch_inputs.float()
163 | batch_inputs = batch_inputs / 255.
164 | batch_inputs, losses = self.unit_module(batch_inputs, training)
165 | outputs = batch_inputs * 255.
166 | else:
167 | outputs = []
168 | for batch_input in batch_inputs:
169 | # padding
170 | oh, ow = batch_input.shape[1:]
171 | pad_h = int(np.ceil(oh / self.pad_size_divisor)) * self.pad_size_divisor
172 | pad_w = int(np.ceil(ow / self.pad_size_divisor)) * self.pad_size_divisor
173 | p2d = (0, (pad_w - ow), 0, (pad_h - oh))
174 | batch_input = batch_input.float()
175 | batch_input_pad = F.pad(batch_input, p2d, self.pad_mode)
176 |
177 | # UnitModule forward
178 | batch_input_pad = batch_input_pad.unsqueeze(0) / 255.
179 | batch_output_pad = self.unit_module(batch_input_pad, training)
180 | batch_output_pad = batch_output_pad.squeeze(0)
181 |
182 | # remove padding
183 | batch_output = batch_output_pad[..., :oh, :ow] * 255.
184 | outputs.append(batch_output)
185 |
186 | return outputs, losses
187 |
--------------------------------------------------------------------------------
/unitmodule/models/data_preprocessors/unit_module.py:
--------------------------------------------------------------------------------
1 | from typing import Optional, Tuple, Union
2 |
3 | import mmcv.cnn as cnn
4 | import torch
5 | import torch.nn as nn
6 | from mmcv.cnn import build_activation_layer, build_norm_layer
7 | from mmengine.model import BaseModule
8 | from mmengine.registry import MODELS
9 | from torch import Tensor
10 |
11 |
12 | class LargeKernelLayer(BaseModule):
13 | def __init__(self,
14 | channels: int,
15 | large_kernel: int,
16 | small_kernel: int,
17 | padding_mode: str = 'reflect',
18 | norm_cfg: Optional[dict] = None,
19 | act_cfg: Optional[dict] = None,
20 | init_cfg: Optional[dict] = None):
21 | super().__init__(init_cfg)
22 | if norm_cfg is None:
23 | norm_cfg = dict(type='GN', num_groups=8)
24 | if act_cfg is None:
25 | act_cfg = dict(type='ReLU')
26 |
27 | common_kwargs = dict(padding_mode=padding_mode,
28 | groups=channels,
29 | norm_cfg=norm_cfg,
30 | act_cfg=None)
31 |
32 | self.dw_large = cnn.ConvModule(channels, channels, large_kernel,
33 | padding=large_kernel // 2, **common_kwargs)
34 | self.dw_small = cnn.ConvModule(channels, channels, small_kernel,
35 | padding=small_kernel // 2, **common_kwargs)
36 | self.act = build_activation_layer(act_cfg)
37 |
38 | def forward(self, x) -> Tensor:
39 | x_large = self.dw_large(x)
40 | x_small = self.dw_small(x)
41 | return self.act(x_large + x_small)
42 |
43 |
44 | class LKBlock(BaseModule):
45 | def __init__(self,
46 | channels: int,
47 | large_kernel: int,
48 | small_kernel: int,
49 | dw_ratio: float = 1.0,
50 | padding_mode: str = 'reflect',
51 | norm_cfg: Optional[dict] = None,
52 | act_cfg: Optional[dict] = None,
53 | init_cfg: Optional[dict] = None):
54 | super().__init__(init_cfg)
55 | if norm_cfg is None:
56 | norm_cfg = dict(type='GN', num_groups=8)
57 | if act_cfg is None:
58 | act_cfg = dict(type='ReLU')
59 | dw_channels = int(channels * dw_ratio)
60 |
61 | self.pw1 = cnn.ConvModule(channels, dw_channels, 1, 1,
62 | norm_cfg=norm_cfg, act_cfg=act_cfg)
63 | self.dw = LargeKernelLayer(dw_channels, large_kernel, small_kernel,
64 | padding_mode=padding_mode,
65 | norm_cfg=norm_cfg, act_cfg=act_cfg)
66 | self.pw2 = cnn.ConvModule(dw_channels, channels, 1, 1,
67 | norm_cfg=norm_cfg, act_cfg=None)
68 | self.norm = build_norm_layer(norm_cfg, channels)[1]
69 |
70 | def forward(self, x) -> Tensor:
71 | y = self.pw1(x)
72 | y = self.dw(y)
73 | y = self.pw2(y)
74 | x = self.norm(x + y)
75 | return x
76 |
77 |
78 | @MODELS.register_module()
79 | class UnitBackbone(BaseModule):
80 | def __init__(self,
81 | stem_channels: Tuple[int],
82 | large_kernels: Tuple[int],
83 | small_kernels: Tuple[int],
84 | in_channels: int = 3,
85 | dw_ratio: float = 1.0,
86 | padding_mode: str = 'reflect',
87 | norm_cfg: Optional[dict] = None,
88 | act_cfg: Optional[dict] = None,
89 | init_cfg: Optional[dict] = None):
90 | super().__init__(init_cfg)
91 | assert len(large_kernels) == len(small_kernels)
92 | if norm_cfg is None:
93 | norm_cfg = dict(type='GN', num_groups=8)
94 | if act_cfg is None:
95 | act_cfg = dict(type='ReLU')
96 | inc = in_channels
97 |
98 | stem_layers = []
99 | for outc in stem_channels:
100 | stem_layers.append(
101 | cnn.ConvModule(inc, outc, 3, 2,
102 | padding=1, padding_mode=padding_mode,
103 | norm_cfg=norm_cfg, act_cfg=act_cfg))
104 | inc = outc
105 | self.stem = nn.Sequential(*stem_layers)
106 |
107 | layers = []
108 | for large_k, small_k in zip(large_kernels, small_kernels):
109 | layers.append(
110 | LKBlock(inc, large_k, small_k, dw_ratio,
111 | padding_mode, norm_cfg, act_cfg))
112 | self.layers = nn.Sequential(*layers)
113 |
114 | def forward(self, x) -> Tensor:
115 | x = self.stem(x)
116 | x = self.layers(x)
117 | return x
118 |
119 |
120 | @MODELS.register_module()
121 | class THead(BaseModule):
122 | def __init__(self,
123 | in_channels: int,
124 | hid_channels: int,
125 | out_channels: int = 3,
126 | padding_mode: str = 'reflect',
127 | norm_cfg: Optional[dict] = None,
128 | act_cfg: Optional[dict] = None,
129 | init_cfg: Optional[dict] = None):
130 | super().__init__(init_cfg)
131 | if norm_cfg is None:
132 | norm_cfg = dict(type='GN', num_groups=8)
133 | if act_cfg is None:
134 | act_cfg = dict(type='ReLU')
135 |
136 | self.up1 = nn.Upsample(scale_factor=2, mode='bilinear')
137 | self.up2 = nn.Upsample(scale_factor=2, mode='bilinear')
138 | self.conv1 = cnn.ConvModule(in_channels, hid_channels, 3, 1,
139 | padding=1, padding_mode=padding_mode,
140 | norm_cfg=norm_cfg, act_cfg=act_cfg)
141 | self.conv2 = cnn.ConvModule(hid_channels, out_channels, 3, 1,
142 | padding=1, padding_mode=padding_mode,
143 | norm_cfg=None, act_cfg=None)
144 |
145 | def forward(self, x) -> Tensor:
146 | x = self.conv1(self.up1(x))
147 | x = self.conv2(self.up2(x))
148 | x = torch.sigmoid(x)
149 | return x
150 |
151 |
152 | @MODELS.register_module()
153 | class AHead(BaseModule):
154 | def __init__(self,
155 | mean_dim: Union[int, Tuple[int]] = (-2, -1),
156 | init_cfg=None):
157 | super().__init__(init_cfg)
158 | self.mean_dim = mean_dim
159 |
160 | def forward(self, x) -> Tensor:
161 | return torch.mean(x, dim=self.mean_dim, keepdim=True)
162 |
163 |
164 | @MODELS.register_module()
165 | class UnitModule(BaseModule):
166 | def __init__(self,
167 | unit_backbone: dict,
168 | t_head: dict,
169 | a_head: dict,
170 | loss_t: dict,
171 | loss_acc: Optional[dict] = None,
172 | loss_cc: Optional[dict] = None,
173 | loss_sp: Optional[dict] = None,
174 | loss_tv: Optional[dict] = None,
175 | alpha: float = 0.9,
176 | t_min: float = 0.001,
177 | init_cfg=None):
178 | super().__init__(init_cfg)
179 | assert 0 < alpha < 1
180 | assert 0 <= t_min < 0.1
181 |
182 | self.alpha = alpha
183 | self.t_min = t_min
184 |
185 | self.unit_backbone = MODELS.build(unit_backbone)
186 | self.t_head = MODELS.build(t_head)
187 | self.a_head = MODELS.build(a_head)
188 |
189 | self.loss_t = MODELS.build(loss_t)
190 | self.loss_acc = MODELS.build(loss_acc) if loss_acc else None
191 | self.loss_cc = MODELS.build(loss_cc) if loss_cc else None
192 | self.loss_sp = MODELS.build(loss_sp) if loss_sp else None
193 | self.loss_tv = MODELS.build(loss_tv) if loss_tv else None
194 |
195 | def forward(self, x, training: bool = False) -> Union[Tensor, Tuple[Tensor, dict]]:
196 | if training:
197 | return self.loss(x)
198 | else: # training == False
199 | return self.predict(x)
200 |
201 | def _forward(self, x) -> Tuple[Tensor, Tensor]:
202 | feature = self.unit_backbone(x)
203 | t = self.t_head(feature)
204 | a = self.a_head(x)
205 | return t, a
206 |
207 | def predict(self, x, show: bool = False) -> Union[Tensor, tuple]:
208 | t, a = self._forward(x)
209 | t = torch.clamp(t, min=self.t_min)
210 |
211 | x = self.denoise(x, t, a)
212 | x = torch.clamp(x, 0, 1)
213 | return (x, t, a) if show else x
214 |
215 | def loss(self, x) -> Tuple[Tensor, dict]:
216 | feature = self.unit_backbone(x)
217 | t = self.t_head(feature)
218 | a = self.a_head(x)
219 |
220 | t = torch.clamp(t, min=self.t_min)
221 |
222 | # get x of denoise
223 | x_denoise = self.denoise(x, t, a)
224 |
225 | # create fake x with noise and predict its t and A
226 | x_fake = self.noise(x, self.alpha, a)
227 | t_fake, a_fake = self._forward(x_fake)
228 | x_fake_denoise = self.denoise(x_fake, t_fake, a_fake)
229 |
230 | loss_t = self.loss_t(self.alpha * t, t_fake)
231 | losses = dict(loss_t=loss_t)
232 | if self.loss_acc:
233 | loss_acc = self.loss_acc(feature, a)
234 | losses.update(loss_acc=loss_acc)
235 |
236 | if self.loss_cc:
237 | loss_cc = self.loss_cc(x_denoise)
238 | losses.update(loss_cc=loss_cc)
239 |
240 | if self.loss_sp:
241 | loss_sp = self.loss_sp(x_denoise, x_fake_denoise)
242 | losses.update(loss_sp=loss_sp)
243 |
244 | if self.loss_tv:
245 | loss_tv = self.loss_tv(x_denoise)
246 | losses.update(loss_tv=loss_tv)
247 |
248 | x_denoise = torch.clamp(x_denoise, 0, 1)
249 | return x_denoise, losses
250 |
251 | @staticmethod
252 | def noise(x, t, a) -> Tensor:
253 | """Noise image"""
254 | return x * t + (1 - t) * a
255 |
256 | @staticmethod
257 | def denoise(x, t, a) -> Tensor:
258 | """Denoise image"""
259 | return (x - (1 - t) * a) / t
260 |
--------------------------------------------------------------------------------
/unitmodule/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .unit_detectors import (UnitCascadeRCNN, UnitDETR, UnitDINO,
2 | UnitFasterRCNN, UnitFCOS, UnitRetinaNet,
3 | UnitTOOD, UnitYOLODetector)
4 |
5 |
6 | def register_unit_distributed(cfg):
7 | if cfg.get('with_unit_module'):
8 | # switch MMDistributedDataParallel to fit model with UnitModule
9 | import unitmodule.models.detectors.unit_distributed
10 |
11 |
12 | __all__ = [
13 | 'UnitCascadeRCNN', 'UnitDETR', 'UnitDINO',
14 | 'UnitFasterRCNN', 'UnitFCOS', 'UnitRetinaNet',
15 | 'UnitTOOD', 'UnitYOLODetector',
16 | 'register_unit_distributed',
17 | ]
18 |
--------------------------------------------------------------------------------
/unitmodule/models/detectors/unit_detectors.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Union
2 |
3 | import torch
4 | from mmdet.models.detectors import (CascadeRCNN, DETR, DINO,
5 | FasterRCNN, FCOS, RetinaNet, TOOD)
6 | from mmengine.optim import OptimWrapper
7 | from mmengine.registry import MODELS
8 | from mmyolo.models.detectors import YOLODetector
9 |
10 |
11 | def train_step_with_unit_module(self, data: Union[dict, tuple, list],
12 | optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
13 | """With the UnitModule loss"""
14 | with optim_wrapper.optim_context(self):
15 | data, unit_losses = self.data_preprocessor(data, True)
16 | losses = self._run_forward(data, mode='loss')
17 | losses.update(unit_losses)
18 | parsed_losses, log_vars = self.parse_losses(losses)
19 | optim_wrapper.update_params(parsed_losses)
20 | return log_vars
21 |
22 |
23 | def with_unit_module(cls):
24 | cls.train_step = train_step_with_unit_module
25 | return cls
26 |
27 |
28 | @MODELS.register_module()
29 | @with_unit_module
30 | class UnitCascadeRCNN(CascadeRCNN):
31 | """CascadeRCNN with UnitModule"""
32 |
33 |
34 | @MODELS.register_module()
35 | @with_unit_module
36 | class UnitDETR(DETR):
37 | """DETR with UnitModule"""
38 |
39 |
40 | @MODELS.register_module()
41 | @with_unit_module
42 | class UnitDINO(DINO):
43 | """DINO with UnitModule"""
44 |
45 |
46 | @MODELS.register_module()
47 | @with_unit_module
48 | class UnitFasterRCNN(FasterRCNN):
49 | """FasterRCNN with UnitModule"""
50 |
51 |
52 | @MODELS.register_module()
53 | @with_unit_module
54 | class UnitFCOS(FCOS):
55 | """FCOS with UnitModule"""
56 |
57 |
58 | @MODELS.register_module()
59 | @with_unit_module
60 | class UnitRetinaNet(RetinaNet):
61 | """RetinaNet with UnitModule"""
62 |
63 |
64 | @MODELS.register_module()
65 | @with_unit_module
66 | class UnitTOOD(TOOD):
67 | """TOOD with UnitModule"""
68 |
69 |
70 | @MODELS.register_module()
71 | @with_unit_module
72 | class UnitYOLODetector(YOLODetector):
73 | """YOLODetector with UnitModule"""
74 |
--------------------------------------------------------------------------------
/unitmodule/models/detectors/unit_distributed.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, Union
2 |
3 | import torch
4 | from mmengine.model.utils import detect_anomalous_params
5 | from mmengine.model.wrappers import MMDistributedDataParallel
6 | from mmengine.optim import OptimWrapper
7 | from mmengine.registry import MODEL_WRAPPERS
8 |
9 |
10 | def ddp_train_step_with_unit_module(self, data: Union[dict, tuple, list],
11 | optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]:
12 | with optim_wrapper.optim_context(self):
13 | data, unit_losses = self.module.data_preprocessor(data, training=True)
14 | losses = self._run_forward(data, mode='loss')
15 | losses.update(unit_losses)
16 | parsed_loss, log_vars = self.module.parse_losses(losses)
17 | optim_wrapper.update_params(parsed_loss)
18 | if self.detect_anomalous_params:
19 | detect_anomalous_params(parsed_loss, model=self)
20 | return log_vars
21 |
22 |
23 | # switch MMDistributedDataParallel train_step and register it
24 | MMDistributedDataParallel.train_step = ddp_train_step_with_unit_module
25 | MODEL_WRAPPERS.register_module(module=MMDistributedDataParallel, force=True)
26 |
--------------------------------------------------------------------------------
/unitmodule/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .assisting_color_cast_loss import AssistingColorCastLoss
2 | from .color_cast_loss import ColorCastLoss
3 | from .saturated_pixel_loss import SaturatedPixelLoss
4 | from .total_variation_loss import TotalVariationLoss
5 | from .transmission_loss import TransmissionLoss
6 |
7 | __all__ = [
8 | 'AssistingColorCastLoss', 'ColorCastLoss', 'SaturatedPixelLoss',
9 | 'TotalVariationLoss', 'TransmissionLoss',
10 | ]
11 |
--------------------------------------------------------------------------------
/unitmodule/models/losses/assisting_color_cast_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from mmengine.registry import MODELS
4 | from torch import Tensor
5 | from torchvision.ops import RoIPool
6 |
7 |
8 | @MODELS.register_module()
9 | class AssistingColorCastLoss(nn.Module):
10 | def __init__(self, channels: int, loss_weight: float = 1.0):
11 | super().__init__()
12 | self.loss_weight = loss_weight
13 | self.loss_fn = nn.MSELoss(reduction='mean')
14 |
15 | self.roi_pooling = RoIPool((7, 7), 1)
16 | self.down_conv = nn.Conv2d(channels, 3, 1, 1)
17 | self.acc_head = nn.Sequential(
18 | nn.Linear(49, 32),
19 | nn.Linear(32, 16),
20 | nn.Linear(16, 1))
21 |
22 | def forward(self, feature: Tensor, a: Tensor) -> Tensor:
23 | device = feature.device
24 | b, _, h, w = feature.shape
25 | a = a.squeeze(-1).squeeze(-1) # (b, 3)
26 | boxes = [torch.tensor(
27 | [[0, 0, h - 1, w - 1]],
28 | dtype=torch.float32).to(device) for _ in range(b)]
29 |
30 | feature = self.roi_pooling(feature, boxes)
31 | feature = self.down_conv(feature).view(b, 3, -1)
32 | color_cast = self.acc_head(feature).squeeze(-1) # (b, 3)
33 |
34 | loss = self.loss_fn(color_cast, a)
35 | return self.loss_weight * loss
36 |
--------------------------------------------------------------------------------
/unitmodule/models/losses/color_cast_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from mmengine.registry import MODELS
4 | from torch import Tensor
5 |
6 |
7 | @MODELS.register_module()
8 | class ColorCastLoss(nn.Module):
9 | def __init__(self, loss_weight: float = 1.0):
10 | super().__init__()
11 | self.loss_weight = loss_weight
12 | self.loss_fn = nn.MSELoss(reduction='mean')
13 |
14 | def forward(self, x: Tensor) -> Tensor:
15 | x = torch.mean(x, dim=(-2, -1))
16 | # from color channel (0, 1, 2) corresponding to (1, 2, 0)
17 | loss = self.loss_fn(x, x[:, [1, 2, 0]])
18 | return self.loss_weight * loss
19 |
--------------------------------------------------------------------------------
/unitmodule/models/losses/saturated_pixel_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from mmengine.registry import MODELS
4 | from torch import Tensor
5 |
6 |
7 | @MODELS.register_module()
8 | class SaturatedPixelLoss(nn.Module):
9 | def __init__(self, loss_weight: float = 1.0):
10 | super().__init__()
11 | self.loss_weight = loss_weight
12 |
13 | def forward(self, a: Tensor, b: Tensor) -> Tensor:
14 | zero = a.new_zeros(1)
15 | one = a.new_ones(1)
16 |
17 | loss_max = (torch.max(a, one) + torch.max(b, one) - 2 * one).nanmean()
18 | loss_min = -(torch.min(a, zero) + torch.min(b, zero)).nanmean()
19 | loss = loss_max + loss_min
20 | return self.loss_weight * loss
21 |
--------------------------------------------------------------------------------
/unitmodule/models/losses/total_variation_loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mmengine.registry import MODELS
3 | from torch import Tensor
4 |
5 |
6 | @MODELS.register_module()
7 | class TotalVariationLoss(nn.Module):
8 | def __init__(self, loss_weight: float = 1.0):
9 | super().__init__()
10 | self.loss_weight = loss_weight
11 | self.loss_fn = nn.MSELoss(reduction='mean')
12 |
13 | def forward(self, x: Tensor) -> Tensor:
14 | _, _, h, w, = x.shape
15 | h_tv = self.loss_fn(x[:, :, 1:, :], x[:, :, :h - 1, :])
16 | w_tv = self.loss_fn(x[:, :, :, 1:], x[:, :, :, :w - 1])
17 | loss = h_tv + w_tv
18 | return self.loss_weight * loss
19 |
--------------------------------------------------------------------------------
/unitmodule/models/losses/transmission_loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from mmengine.registry import MODELS
3 | from torch import Tensor
4 |
5 |
6 | @MODELS.register_module()
7 | class TransmissionLoss(nn.Module):
8 | def __init__(self, loss_weight: float = 1.0):
9 | super().__init__()
10 | self.loss_weight = loss_weight
11 | self.loss_fn = nn.MSELoss(reduction='mean')
12 |
13 | def forward(self, a: Tensor, b: Tensor) -> Tensor:
14 | loss = self.loss_fn(a, b)
15 | return self.loss_weight * loss
16 |
--------------------------------------------------------------------------------