├── .gitignore ├── LICENSE ├── README.md ├── configs ├── _base_ │ ├── datasets │ │ ├── duo_detection.py │ │ └── duo_detection_mmyolo.py │ ├── default_runtime.py │ └── default_runtime_mmyolo.py ├── cascade_rcnn │ ├── cascade_rcnn_r50_1x_duo.py │ └── unitmodule_cascade_rcnn_r50_1x_duo.py ├── detr │ ├── detr_r50_500e_duo.py │ └── unitmodule_detr_r50_500e_duo.py ├── dino │ ├── dino_4scale_r50_1x_duo.py │ └── unitmodule_dino_4scale_r50_1x_duo.py ├── faster_rcnn │ ├── faster_rcnn_r50_1x_duo.py │ └── unitmodule_faster_rcnn_r50_1x_duo.py ├── fcos │ ├── fcos_r50_1x_duo.py │ └── unitmodule_fcos_r50_1x_duo.py ├── retinanet │ ├── retinanet_r50_1x_duo.py │ └── unitmodule_retinanet_r50_1x_duo.py ├── rtmdet │ ├── rtmdet_s_100e_duo.py │ └── unitmodule_rtmdet_s_100e_duo.py ├── tood │ ├── tood_r50_1x_duo.py │ └── unitmodule_tood_r50_1x_duo.py ├── unitmodule │ └── unitmodule.py ├── yolov5 │ ├── unitmodule_yolov5_s_100e_duo.py │ └── yolov5_s_100e_duo.py ├── yolov6 │ ├── unitmodule_yolov6_s_100e_duo.py │ └── yolov6_s_100e_duo.py ├── yolov7 │ ├── unitmodule_yolov7_t_100e_duo.py │ └── yolov7_t_100e_duo.py ├── yolov8 │ ├── unitmodule_yolov8_s_100e_duo.py │ └── yolov8_s_100e_duo.py └── yolox │ ├── unitmodule_yolox_s_100e_duo.py │ └── yolox_s_100e_duo.py ├── requirements.txt ├── tools ├── dist_test.sh ├── dist_train.sh ├── test.py └── train.py └── unitmodule ├── __init__.py ├── datasets ├── __init__.py └── transforms │ ├── __init__.py │ └── colorspace.py └── models ├── __init__.py ├── data_preprocessors ├── __init__.py ├── data_preprocessor.py └── unit_module.py ├── detectors ├── __init__.py ├── unit_detectors.py └── unit_distributed.py └── losses ├── __init__.py ├── assisting_color_cast_loss.py ├── color_cast_loss.py ├── saturated_pixel_loss.py ├── total_variation_loss.py └── transmission_loss.py /.gitignore: -------------------------------------------------------------------------------- 1 | # The repo 2 | .idea 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 LEFTeyex 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #
UnitModule 2 | 3 | ### Installation 4 | 5 | This project is based on [MMDetection](https://github.com/open-mmlab/mmdetection/tree/main). 6 | 7 | - Python 3.8 8 | - Pytorch 1.11.0+cu113 9 | 10 | **Step 1.** Create a conda virtual environment and activate it. 11 | 12 | ```bash 13 | conda create -n unitmodule python=3.8 -y 14 | conda activate unitmodule 15 | ``` 16 | 17 | **Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/). 18 | 19 | Linux and Windows 20 | 21 | ```bash 22 | # Wheel CUDA 11.3 23 | pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 24 | ``` 25 | 26 | ```bash 27 | # Conda CUDA 11.3 28 | conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch 29 | ``` 30 | 31 | **Step 3.** Install MMDetection and dependent packages. 32 | 33 | ```bash 34 | pip install -U openmim 35 | mim install mmengine==0.7.4 36 | mim install mmcv==2.0.0 37 | mim install mmdet==3.0.0 38 | mim install mmyolo==0.5.0 39 | pip install -r requirements.txt 40 | ``` 41 | 42 | ### Dataset 43 | 44 | The data structure DUO looks like below: 45 | 46 | ```text 47 | # DUO 48 | 49 | data 50 | ├── DUO 51 | │ ├── annotations 52 | │ │ ├── instances_train.json 53 | │ │ ├── instances_test.json 54 | │ ├── images 55 | │ │ ├── train 56 | │ │ ├── test 57 | ``` 58 | 59 | ### Training 60 | 61 | ```bash 62 | bash tools/dist_train.sh configs/yolox/yolox_s_100e_duo.py 2 63 | ``` 64 | 65 | ### Test 66 | 67 | ```bash 68 | bash tools/dist_test.sh configs/yolox/yolox_s_100e_duo.py yolox_s_100e_duo.pth 2 69 | ``` -------------------------------------------------------------------------------- /configs/_base_/datasets/duo_detection.py: -------------------------------------------------------------------------------- 1 | data_root = 'data/DUO/' 2 | 3 | train_img_file = 'images/train' 4 | val_img_file = 'images/test' 5 | train_ann_file = 'annotations/instances_train.json' 6 | val_ann_file = 'annotations/instances_test.json' 7 | 8 | mean_bgr = [85.603, 148.034, 64.697] 9 | std_bgr = [32.28, 39.201, 26.55] 10 | mean_rgb = [64.697, 148.034, 85.603] 11 | std_rgb = [26.55, 39.201, 32.28] 12 | 13 | classes = ('holothurian', 'echinus', 'scallop', 'starfish') 14 | 15 | img_scale = (1333, 800) 16 | dataset_type = 'CocoDataset' 17 | evaluator_type = 'CocoMetric' 18 | train_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='LoadAnnotations', with_bbox=True), 21 | dict(type='Resize', scale=img_scale, keep_ratio=True), 22 | dict(type='RandomFlip', prob=0.5), 23 | dict(type='PackDetInputs') 24 | ] 25 | test_pipeline = [ 26 | dict(type='LoadImageFromFile'), 27 | dict(type='Resize', scale=img_scale, keep_ratio=True), 28 | dict(type='LoadAnnotations', with_bbox=True), 29 | dict( 30 | type='PackDetInputs', 31 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 32 | 'scale_factor')) 33 | ] 34 | 35 | num_gpu = 2 36 | train_bs = 4 37 | val_bs = 1 38 | auto_scale_lr = dict(enable=False, base_batch_size=train_bs * num_gpu) 39 | train_dataloader = dict( 40 | batch_size=train_bs, 41 | num_workers=train_bs, 42 | persistent_workers=True, 43 | sampler=dict(type='DefaultSampler', shuffle=True), 44 | batch_sampler=dict(type='AspectRatioBatchSampler'), 45 | dataset=dict( 46 | type=dataset_type, 47 | metainfo=dict(classes=classes), 48 | data_root=data_root, 49 | ann_file=train_ann_file, 50 | data_prefix=dict(img=train_img_file), 51 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 52 | pipeline=train_pipeline, 53 | )) 54 | 55 | val_dataloader = dict( 56 | batch_size=val_bs, 57 | num_workers=val_bs * 2, 58 | persistent_workers=True, 59 | drop_last=False, 60 | sampler=dict(type='DefaultSampler', shuffle=False), 61 | dataset=dict( 62 | type=dataset_type, 63 | metainfo=dict(classes=classes), 64 | data_root=data_root, 65 | ann_file=val_ann_file, 66 | data_prefix=dict(img=val_img_file), 67 | test_mode=True, 68 | pipeline=test_pipeline, 69 | )) 70 | 71 | test_dataloader = val_dataloader 72 | 73 | val_evaluator = dict( 74 | type=evaluator_type, 75 | ann_file=data_root + val_ann_file, 76 | metric='bbox', 77 | format_only=False) 78 | test_evaluator = val_evaluator 79 | -------------------------------------------------------------------------------- /configs/_base_/datasets/duo_detection_mmyolo.py: -------------------------------------------------------------------------------- 1 | data_root = 'data/DUO/' 2 | 3 | train_img_file = 'images/train' 4 | val_img_file = 'images/test' 5 | train_ann_file = 'annotations/instances_train.json' 6 | val_ann_file = 'annotations/instances_test.json' 7 | 8 | mean_bgr = [85.603, 148.034, 64.697] 9 | std_bgr = [32.28, 39.201, 26.55] 10 | mean_rgb = [64.697, 148.034, 85.603] 11 | std_rgb = [26.55, 39.201, 32.28] 12 | 13 | classes = ('holothurian', 'echinus', 'scallop', 'starfish') 14 | 15 | img_scale = (640, 640) 16 | dataset_type = 'YOLOv5CocoDataset' 17 | evaluator_type = 'mmdet.CocoMetric' 18 | train_pipeline = [ 19 | dict(type='LoadImageFromFile'), 20 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 21 | dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), 22 | dict(type='mmdet.Pad', 23 | pad_to_square=True, 24 | pad_val=dict(img=(114.0, 114.0, 114.0))), 25 | dict(type='mmdet.RandomFlip', prob=0.5), 26 | dict(type='mmdet.PackDetInputs') 27 | ] 28 | test_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True), 31 | dict(type='mmdet.Pad', 32 | pad_to_square=True, 33 | pad_val=dict(img=(114.0, 114.0, 114.0))), 34 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 35 | dict( 36 | type='mmdet.PackDetInputs', 37 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 38 | 'scale_factor')) 39 | ] 40 | 41 | num_gpu = 2 42 | train_bs = 4 43 | val_bs = 1 44 | auto_scale_lr = dict(enable=False, base_batch_size=train_bs * num_gpu) 45 | train_dataloader = dict( 46 | batch_size=train_bs, 47 | num_workers=train_bs, 48 | persistent_workers=True, 49 | collate_fn=dict(type='yolov5_collate'), 50 | sampler=dict(type='DefaultSampler', shuffle=True), 51 | batch_sampler=dict(type='mmdet.AspectRatioBatchSampler'), 52 | dataset=dict( 53 | type=dataset_type, 54 | metainfo=dict(classes=classes), 55 | data_root=data_root, 56 | ann_file=train_ann_file, 57 | data_prefix=dict(img=train_img_file), 58 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 59 | pipeline=train_pipeline, 60 | )) 61 | 62 | val_dataloader = dict( 63 | batch_size=val_bs, 64 | num_workers=val_bs * 2, 65 | persistent_workers=True, 66 | drop_last=False, 67 | sampler=dict(type='DefaultSampler', shuffle=False), 68 | dataset=dict( 69 | type=dataset_type, 70 | metainfo=dict(classes=classes), 71 | data_root=data_root, 72 | ann_file=val_ann_file, 73 | data_prefix=dict(img=val_img_file), 74 | test_mode=True, 75 | pipeline=test_pipeline, 76 | )) 77 | 78 | test_dataloader = val_dataloader 79 | 80 | val_evaluator = dict( 81 | type=evaluator_type, 82 | ann_file=data_root + val_ann_file, 83 | metric='bbox', 84 | format_only=False) 85 | test_evaluator = val_evaluator 86 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmdet' 2 | log_level = 'INFO' 3 | load_from = None 4 | resume = False 5 | 6 | env_cfg = dict( 7 | cudnn_benchmark=False, 8 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 9 | dist_cfg=dict(backend='nccl'), 10 | ) 11 | randomness = dict(seed=None) 12 | 13 | vis_backends = [ 14 | dict(type='LocalVisBackend'), 15 | dict(type='TensorboardVisBackend') 16 | ] 17 | visualizer = dict( 18 | type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 19 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) 20 | default_hooks = dict( 21 | timer=dict(type='IterTimerHook'), 22 | logger=dict(type='LoggerHook', interval=50), 23 | param_scheduler=dict(type='ParamSchedulerHook'), 24 | checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='coco/bbox_mAP'), 25 | sampler_seed=dict(type='DistSamplerSeedHook'), 26 | visualization=dict(type='DetVisualizationHook')) 27 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime_mmyolo.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmyolo' 2 | log_level = 'INFO' 3 | load_from = None 4 | resume = False 5 | 6 | env_cfg = dict( 7 | cudnn_benchmark=False, 8 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 9 | dist_cfg=dict(backend='nccl'), 10 | ) 11 | randomness = dict(seed=None) 12 | 13 | vis_backends = [ 14 | dict(type='LocalVisBackend'), 15 | dict(type='TensorboardVisBackend') 16 | ] 17 | visualizer = dict( 18 | type='mmdet.DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 19 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) 20 | default_hooks = dict( 21 | timer=dict(type='IterTimerHook'), 22 | logger=dict(type='LoggerHook', interval=50), 23 | param_scheduler=dict(type='ParamSchedulerHook'), 24 | checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='coco/bbox_mAP'), 25 | sampler_seed=dict(type='DistSamplerSeedHook'), 26 | visualization=dict(type='mmdet.DetVisualizationHook')) 27 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_rcnn_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 12 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 13 | dict( 14 | type='MultiStepLR', 15 | begin=0, 16 | milestones=[8, 11], 17 | gamma=0.1) 18 | ] 19 | optim_wrapper = dict( 20 | type='OptimWrapper', 21 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 22 | 23 | num_classes = 4 24 | model = dict( 25 | type='CascadeRCNN', 26 | data_preprocessor=dict( 27 | type='DetDataPreprocessor', 28 | mean=_base_.mean_rgb, 29 | std=_base_.std_rgb, 30 | bgr_to_rgb=True, 31 | pad_size_divisor=32), 32 | backbone=dict( 33 | type='ResNet', 34 | depth=50, 35 | num_stages=4, 36 | out_indices=(0, 1, 2, 3), 37 | frozen_stages=1, 38 | norm_cfg=dict(type='BN', requires_grad=True), 39 | norm_eval=True, 40 | style='pytorch', 41 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 42 | neck=dict( 43 | type='FPN', 44 | in_channels=[256, 512, 1024, 2048], 45 | out_channels=256, 46 | num_outs=5), 47 | rpn_head=dict( 48 | type='RPNHead', 49 | in_channels=256, 50 | feat_channels=256, 51 | anchor_generator=dict( 52 | type='AnchorGenerator', 53 | scales=[8], 54 | ratios=[0.5, 1.0, 2.0], 55 | strides=[4, 8, 16, 32, 64]), 56 | bbox_coder=dict( 57 | type='DeltaXYWHBBoxCoder', 58 | target_means=[0.0, 0.0, 0.0, 0.0], 59 | target_stds=[1.0, 1.0, 1.0, 1.0]), 60 | loss_cls=dict( 61 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 62 | loss_bbox=dict( 63 | type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)), 64 | roi_head=dict( 65 | type='CascadeRoIHead', 66 | num_stages=3, 67 | stage_loss_weights=[1, 0.5, 0.25], 68 | bbox_roi_extractor=dict( 69 | type='SingleRoIExtractor', 70 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 71 | out_channels=256, 72 | featmap_strides=[4, 8, 16, 32]), 73 | bbox_head=[ 74 | dict( 75 | type='Shared2FCBBoxHead', 76 | in_channels=256, 77 | fc_out_channels=1024, 78 | roi_feat_size=7, 79 | num_classes=num_classes, 80 | bbox_coder=dict( 81 | type='DeltaXYWHBBoxCoder', 82 | target_means=[0.0, 0.0, 0.0, 0.0], 83 | target_stds=[0.1, 0.1, 0.2, 0.2]), 84 | reg_class_agnostic=True, 85 | loss_cls=dict( 86 | type='CrossEntropyLoss', 87 | use_sigmoid=False, 88 | loss_weight=1.0), 89 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 90 | loss_weight=1.0)), 91 | dict( 92 | type='Shared2FCBBoxHead', 93 | in_channels=256, 94 | fc_out_channels=1024, 95 | roi_feat_size=7, 96 | num_classes=num_classes, 97 | bbox_coder=dict( 98 | type='DeltaXYWHBBoxCoder', 99 | target_means=[0.0, 0.0, 0.0, 0.0], 100 | target_stds=[0.05, 0.05, 0.1, 0.1]), 101 | reg_class_agnostic=True, 102 | loss_cls=dict( 103 | type='CrossEntropyLoss', 104 | use_sigmoid=False, 105 | loss_weight=1.0), 106 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, 107 | loss_weight=1.0)), 108 | dict( 109 | type='Shared2FCBBoxHead', 110 | in_channels=256, 111 | fc_out_channels=1024, 112 | roi_feat_size=7, 113 | num_classes=num_classes, 114 | bbox_coder=dict( 115 | type='DeltaXYWHBBoxCoder', 116 | target_means=[0.0, 0.0, 0.0, 0.0], 117 | target_stds=[0.033, 0.033, 0.067, 0.067]), 118 | reg_class_agnostic=True, 119 | loss_cls=dict( 120 | type='CrossEntropyLoss', 121 | use_sigmoid=False, 122 | loss_weight=1.0), 123 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)) 124 | ]), 125 | train_cfg=dict( 126 | rpn=dict( 127 | assigner=dict( 128 | type='MaxIoUAssigner', 129 | pos_iou_thr=0.7, 130 | neg_iou_thr=0.3, 131 | min_pos_iou=0.3, 132 | match_low_quality=True, 133 | ignore_iof_thr=-1), 134 | sampler=dict( 135 | type='RandomSampler', 136 | num=256, 137 | pos_fraction=0.5, 138 | neg_pos_ub=-1, 139 | add_gt_as_proposals=False), 140 | allowed_border=0, 141 | pos_weight=-1, 142 | debug=False), 143 | rpn_proposal=dict( 144 | nms_pre=2000, 145 | max_per_img=2000, 146 | nms=dict(type='nms', iou_threshold=0.7), 147 | min_bbox_size=0), 148 | rcnn=[ 149 | dict( 150 | assigner=dict( 151 | type='MaxIoUAssigner', 152 | pos_iou_thr=0.5, 153 | neg_iou_thr=0.5, 154 | min_pos_iou=0.5, 155 | match_low_quality=False, 156 | ignore_iof_thr=-1), 157 | sampler=dict( 158 | type='RandomSampler', 159 | num=512, 160 | pos_fraction=0.25, 161 | neg_pos_ub=-1, 162 | add_gt_as_proposals=True), 163 | pos_weight=-1, 164 | debug=False), 165 | dict( 166 | assigner=dict( 167 | type='MaxIoUAssigner', 168 | pos_iou_thr=0.6, 169 | neg_iou_thr=0.6, 170 | min_pos_iou=0.6, 171 | match_low_quality=False, 172 | ignore_iof_thr=-1), 173 | sampler=dict( 174 | type='RandomSampler', 175 | num=512, 176 | pos_fraction=0.25, 177 | neg_pos_ub=-1, 178 | add_gt_as_proposals=True), 179 | pos_weight=-1, 180 | debug=False), 181 | dict( 182 | assigner=dict( 183 | type='MaxIoUAssigner', 184 | pos_iou_thr=0.7, 185 | neg_iou_thr=0.7, 186 | min_pos_iou=0.7, 187 | match_low_quality=False, 188 | ignore_iof_thr=-1), 189 | sampler=dict( 190 | type='RandomSampler', 191 | num=512, 192 | pos_fraction=0.25, 193 | neg_pos_ub=-1, 194 | add_gt_as_proposals=True), 195 | pos_weight=-1, 196 | debug=False) 197 | ]), 198 | test_cfg=dict( 199 | rpn=dict( 200 | nms_pre=1000, 201 | max_per_img=1000, 202 | nms=dict(type='nms', iou_threshold=0.7), 203 | min_bbox_size=0), 204 | rcnn=dict( 205 | score_thr=0.05, 206 | nms=dict(type='nms', iou_threshold=0.5), 207 | max_per_img=100))) 208 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/unitmodule_cascade_rcnn_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './cascade_rcnn_r50_1x_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitCascadeRCNN', 8 | data_preprocessor=dict( 9 | type='UnitDetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='LoadAnnotations', with_bbox=True), 18 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='RandomFlip', prob=0.5), 21 | dict(type='PackDetInputs') 22 | ] 23 | 24 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 25 | -------------------------------------------------------------------------------- /configs/detr/detr_r50_500e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 500 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='MultiStepLR', 13 | begin=0, 14 | end=max_epochs, 15 | by_epoch=True, 16 | milestones=[334], 17 | gamma=0.1) 18 | ] 19 | optim_wrapper = dict( 20 | type='OptimWrapper', 21 | optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001), 22 | clip_grad=dict(max_norm=0.1, norm_type=2), 23 | paramwise_cfg=dict( 24 | custom_keys=dict(backbone=dict(lr_mult=0.1, decay_mult=1.0)))) 25 | 26 | num_classes = 4 27 | model = dict( 28 | type='DETR', 29 | num_queries=100, 30 | data_preprocessor=dict( 31 | type='DetDataPreprocessor', 32 | mean=_base_.mean_rgb, 33 | std=_base_.std_rgb, 34 | bgr_to_rgb=True, 35 | pad_size_divisor=32), 36 | backbone=dict( 37 | type='ResNet', 38 | depth=50, 39 | num_stages=4, 40 | out_indices=(3,), 41 | frozen_stages=1, 42 | norm_cfg=dict(type='BN', requires_grad=False), 43 | norm_eval=True, 44 | style='pytorch', 45 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 46 | neck=dict( 47 | type='ChannelMapper', 48 | in_channels=[2048], 49 | kernel_size=1, 50 | out_channels=256, 51 | act_cfg=None, 52 | norm_cfg=None, 53 | num_outs=1), 54 | encoder=dict( 55 | num_layers=6, 56 | layer_cfg=dict( 57 | self_attn_cfg=dict( 58 | embed_dims=256, num_heads=8, dropout=0.1, batch_first=True), 59 | ffn_cfg=dict( 60 | embed_dims=256, 61 | feedforward_channels=2048, 62 | num_fcs=2, 63 | ffn_drop=0.1, 64 | act_cfg=dict(type='ReLU', inplace=True)))), 65 | decoder=dict( 66 | num_layers=6, 67 | layer_cfg=dict( 68 | self_attn_cfg=dict( 69 | embed_dims=256, num_heads=8, dropout=0.1, batch_first=True), 70 | cross_attn_cfg=dict( 71 | embed_dims=256, num_heads=8, dropout=0.1, batch_first=True), 72 | ffn_cfg=dict( 73 | embed_dims=256, 74 | feedforward_channels=2048, 75 | num_fcs=2, 76 | ffn_drop=0.1, 77 | act_cfg=dict(type='ReLU', inplace=True))), 78 | return_intermediate=True), 79 | positional_encoding=dict(num_feats=128, normalize=True), 80 | bbox_head=dict( 81 | type='DETRHead', 82 | num_classes=num_classes, 83 | embed_dims=256, 84 | loss_cls=dict( 85 | type='CrossEntropyLoss', 86 | bg_cls_weight=0.1, 87 | use_sigmoid=False, 88 | loss_weight=1.0, 89 | class_weight=1.0), 90 | loss_bbox=dict(type='L1Loss', loss_weight=5.0), 91 | loss_iou=dict(type='GIoULoss', loss_weight=2.0)), 92 | train_cfg=dict( 93 | assigner=dict( 94 | type='HungarianAssigner', 95 | match_costs=[ 96 | dict(type='ClassificationCost', weight=1.0), 97 | dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), 98 | dict(type='IoUCost', iou_mode='giou', weight=2.0) 99 | ])), 100 | test_cfg=dict(max_per_img=100)) 101 | -------------------------------------------------------------------------------- /configs/detr/unitmodule_detr_r50_500e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './detr_r50_500e_duo.py', 3 | ] 4 | 5 | with_unit_module = True 6 | norm_cfg = dict(type='GN', num_groups=8) 7 | act_cfg = dict(type='ReLU') 8 | 9 | k_1, k_2 = 9, 9 10 | c_s1, c_s2 = 32, 32 11 | 12 | unit_module = dict( 13 | type='UnitModule', 14 | unit_backbone=dict( 15 | type='UnitBackbone', 16 | stem_channels=(c_s1, c_s2), 17 | large_kernels=(k_1, k_2), 18 | small_kernels=(3, 3), 19 | dw_ratio=1.0, 20 | norm_cfg=norm_cfg, 21 | act_cfg=act_cfg), 22 | t_head=dict( 23 | type='THead', 24 | in_channels=c_s2, 25 | hid_channels=c_s2, 26 | out_channels=3, 27 | norm_cfg=norm_cfg, 28 | act_cfg=act_cfg), 29 | a_head=dict(type='AHead'), 30 | loss_t=dict(type='TransmissionLoss', loss_weight=1000), 31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01), 32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 35 | alpha=0.9, 36 | t_min=0.001) 37 | 38 | model = dict( 39 | type='UnitDETR', 40 | data_preprocessor=dict( 41 | type='UnitDetDataPreprocessor', 42 | unit_module=unit_module) 43 | ) 44 | 45 | optim_wrapper = dict(clip_grad=dict(max_norm=0.1, norm_type=2)) 46 | 47 | train_pipeline = [ 48 | dict(type='LoadImageFromFile'), 49 | dict(type='LoadAnnotations', with_bbox=True), 50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 52 | dict(type='RandomFlip', prob=0.5), 53 | dict(type='PackDetInputs') 54 | ] 55 | 56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 57 | -------------------------------------------------------------------------------- /configs/dino/dino_4scale_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 12 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='MultiStepLR', 13 | begin=0, 14 | end=max_epochs, 15 | by_epoch=True, 16 | milestones=[11], 17 | gamma=0.1) 18 | ] 19 | optim_wrapper = dict( 20 | type='OptimWrapper', 21 | optimizer=dict(type='AdamW', lr=0.0001, weight_decay=0.0001), 22 | clip_grad=dict(max_norm=0.1, norm_type=2), 23 | paramwise_cfg=dict(custom_keys=dict(backbone=dict(lr_mult=0.1)))) 24 | 25 | num_classes = 4 26 | model = dict( 27 | type='DINO', 28 | num_queries=900, 29 | with_box_refine=True, 30 | as_two_stage=True, 31 | data_preprocessor=dict( 32 | type='DetDataPreprocessor', 33 | mean=_base_.mean_rgb, 34 | std=_base_.std_rgb, 35 | bgr_to_rgb=True, 36 | pad_size_divisor=32), 37 | backbone=dict( 38 | type='ResNet', 39 | depth=50, 40 | num_stages=4, 41 | out_indices=(1, 2, 3), 42 | frozen_stages=1, 43 | norm_cfg=dict(type='BN', requires_grad=False), 44 | norm_eval=True, 45 | style='pytorch', 46 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 47 | neck=dict( 48 | type='ChannelMapper', 49 | in_channels=[512, 1024, 2048], 50 | kernel_size=1, 51 | out_channels=256, 52 | act_cfg=None, 53 | norm_cfg=dict(type='GN', num_groups=32), 54 | num_outs=4), 55 | encoder=dict( 56 | num_layers=6, 57 | layer_cfg=dict( 58 | self_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0), 59 | ffn_cfg=dict( 60 | embed_dims=256, feedforward_channels=2048, ffn_drop=0.0))), 61 | decoder=dict( 62 | num_layers=6, 63 | return_intermediate=True, 64 | layer_cfg=dict( 65 | self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), 66 | cross_attn_cfg=dict(embed_dims=256, num_levels=4, dropout=0.0), 67 | ffn_cfg=dict( 68 | embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)), 69 | post_norm_cfg=None), 70 | positional_encoding=dict( 71 | num_feats=128, normalize=True, offset=0.0, temperature=20), 72 | bbox_head=dict( 73 | type='DINOHead', 74 | num_classes=num_classes, 75 | sync_cls_avg_factor=True, 76 | loss_cls=dict( 77 | type='FocalLoss', 78 | use_sigmoid=True, 79 | gamma=2.0, 80 | alpha=0.25, 81 | loss_weight=1.0), 82 | loss_bbox=dict(type='L1Loss', loss_weight=5.0), 83 | loss_iou=dict(type='GIoULoss', loss_weight=2.0)), 84 | dn_cfg=dict( 85 | label_noise_scale=0.5, 86 | box_noise_scale=1.0, 87 | group_cfg=dict(dynamic=True, num_groups=None, num_dn_queries=100)), 88 | train_cfg=dict( 89 | assigner=dict( 90 | type='HungarianAssigner', 91 | match_costs=[ 92 | dict(type='FocalLossCost', weight=2.0), 93 | dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), 94 | dict(type='IoUCost', iou_mode='giou', weight=2.0) 95 | ])), 96 | test_cfg=dict(max_per_img=300)) 97 | -------------------------------------------------------------------------------- /configs/dino/unitmodule_dino_4scale_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './dino_4scale_r50_1x_duo.py', 3 | ] 4 | 5 | with_unit_module = True 6 | norm_cfg = dict(type='GN', num_groups=8) 7 | act_cfg = dict(type='ReLU') 8 | 9 | k_1, k_2 = 9, 9 10 | c_s1, c_s2 = 32, 32 11 | 12 | unit_module = dict( 13 | type='UnitModule', 14 | unit_backbone=dict( 15 | type='UnitBackbone', 16 | stem_channels=(c_s1, c_s2), 17 | large_kernels=(k_1, k_2), 18 | small_kernels=(3, 3), 19 | dw_ratio=1.0, 20 | norm_cfg=norm_cfg, 21 | act_cfg=act_cfg), 22 | t_head=dict( 23 | type='THead', 24 | in_channels=c_s2, 25 | hid_channels=c_s2, 26 | out_channels=3, 27 | norm_cfg=norm_cfg, 28 | act_cfg=act_cfg), 29 | a_head=dict(type='AHead'), 30 | loss_t=dict(type='TransmissionLoss', loss_weight=1000), 31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01), 32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 35 | alpha=0.9, 36 | t_min=0.001) 37 | 38 | model = dict( 39 | type='UnitDINO', 40 | data_preprocessor=dict( 41 | type='UnitDetDataPreprocessor', 42 | unit_module=unit_module) 43 | ) 44 | 45 | optim_wrapper = dict(clip_grad=dict(max_norm=0.1, norm_type=2)) 46 | 47 | train_pipeline = [ 48 | dict(type='LoadImageFromFile'), 49 | dict(type='LoadAnnotations', with_bbox=True), 50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 52 | dict(type='RandomFlip', prob=0.5), 53 | dict(type='PackDetInputs') 54 | ] 55 | 56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 57 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 12 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 13 | dict( 14 | type='MultiStepLR', 15 | begin=0, 16 | end=max_epochs, 17 | by_epoch=True, 18 | milestones=[8, 11], 19 | gamma=0.1) 20 | ] 21 | optim_wrapper = dict( 22 | type='OptimWrapper', 23 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 24 | 25 | num_classes = 4 26 | model = dict( 27 | type='FasterRCNN', 28 | data_preprocessor=dict( 29 | type='DetDataPreprocessor', 30 | mean=_base_.mean_rgb, 31 | std=_base_.std_rgb, 32 | bgr_to_rgb=True, 33 | pad_size_divisor=32), 34 | backbone=dict( 35 | type='ResNet', 36 | depth=50, 37 | num_stages=4, 38 | out_indices=(0, 1, 2, 3), 39 | frozen_stages=1, 40 | norm_cfg=dict(type='BN', requires_grad=True), 41 | norm_eval=True, 42 | style='pytorch', 43 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 44 | neck=dict( 45 | type='FPN', 46 | in_channels=[256, 512, 1024, 2048], 47 | out_channels=256, 48 | num_outs=5), 49 | rpn_head=dict( 50 | type='RPNHead', 51 | in_channels=256, 52 | feat_channels=256, 53 | anchor_generator=dict( 54 | type='AnchorGenerator', 55 | scales=[8], 56 | ratios=[0.5, 1.0, 2.0], 57 | strides=[4, 8, 16, 32, 64]), 58 | bbox_coder=dict( 59 | type='DeltaXYWHBBoxCoder', 60 | target_means=[0.0, 0.0, 0.0, 0.0], 61 | target_stds=[1.0, 1.0, 1.0, 1.0]), 62 | loss_cls=dict( 63 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 64 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 65 | roi_head=dict( 66 | type='StandardRoIHead', 67 | bbox_roi_extractor=dict( 68 | type='SingleRoIExtractor', 69 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 70 | out_channels=256, 71 | featmap_strides=[4, 8, 16, 32]), 72 | bbox_head=dict( 73 | type='Shared2FCBBoxHead', 74 | in_channels=256, 75 | fc_out_channels=1024, 76 | roi_feat_size=7, 77 | num_classes=num_classes, 78 | bbox_coder=dict( 79 | type='DeltaXYWHBBoxCoder', 80 | target_means=[0.0, 0.0, 0.0, 0.0], 81 | target_stds=[0.1, 0.1, 0.2, 0.2]), 82 | reg_class_agnostic=False, 83 | loss_cls=dict( 84 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 85 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 86 | train_cfg=dict( 87 | rpn=dict( 88 | assigner=dict( 89 | type='MaxIoUAssigner', 90 | pos_iou_thr=0.7, 91 | neg_iou_thr=0.3, 92 | min_pos_iou=0.3, 93 | match_low_quality=True, 94 | ignore_iof_thr=-1), 95 | sampler=dict( 96 | type='RandomSampler', 97 | num=256, 98 | pos_fraction=0.5, 99 | neg_pos_ub=-1, 100 | add_gt_as_proposals=False), 101 | allowed_border=-1, 102 | pos_weight=-1, 103 | debug=False), 104 | rpn_proposal=dict( 105 | nms_pre=2000, 106 | max_per_img=1000, 107 | nms=dict(type='nms', iou_threshold=0.7), 108 | min_bbox_size=0), 109 | rcnn=dict( 110 | assigner=dict( 111 | type='MaxIoUAssigner', 112 | pos_iou_thr=0.5, 113 | neg_iou_thr=0.5, 114 | min_pos_iou=0.5, 115 | match_low_quality=False, 116 | ignore_iof_thr=-1), 117 | sampler=dict( 118 | type='RandomSampler', 119 | num=512, 120 | pos_fraction=0.25, 121 | neg_pos_ub=-1, 122 | add_gt_as_proposals=True), 123 | pos_weight=-1, 124 | debug=False)), 125 | test_cfg=dict( 126 | rpn=dict( 127 | nms_pre=1000, 128 | max_per_img=1000, 129 | nms=dict(type='nms', iou_threshold=0.7), 130 | min_bbox_size=0), 131 | rcnn=dict( 132 | score_thr=0.05, 133 | nms=dict(type='nms', iou_threshold=0.5), 134 | max_per_img=100))) 135 | 136 | -------------------------------------------------------------------------------- /configs/faster_rcnn/unitmodule_faster_rcnn_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './faster_rcnn_r50_1x_duo.py', 3 | ] 4 | 5 | with_unit_module = True 6 | norm_cfg = dict(type='GN', num_groups=8) 7 | act_cfg = dict(type='ReLU') 8 | 9 | k_1, k_2 = 9, 9 10 | c_s1, c_s2 = 32, 32 11 | 12 | unit_module = dict( 13 | type='UnitModule', 14 | unit_backbone=dict( 15 | type='UnitBackbone', 16 | stem_channels=(c_s1, c_s2), 17 | large_kernels=(k_1, k_2), 18 | small_kernels=(3, 3), 19 | dw_ratio=1.0, 20 | norm_cfg=norm_cfg, 21 | act_cfg=act_cfg), 22 | t_head=dict( 23 | type='THead', 24 | in_channels=c_s2, 25 | hid_channels=c_s2, 26 | out_channels=3, 27 | norm_cfg=norm_cfg, 28 | act_cfg=act_cfg), 29 | a_head=dict(type='AHead'), 30 | loss_t=dict(type='TransmissionLoss', loss_weight=500), 31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1), 32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 35 | alpha=0.9, 36 | t_min=0.001) 37 | 38 | model = dict( 39 | type='UnitFasterRCNN', 40 | data_preprocessor=dict( 41 | type='UnitDetDataPreprocessor', 42 | unit_module=unit_module) 43 | ) 44 | 45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 46 | 47 | train_pipeline = [ 48 | dict(type='LoadImageFromFile'), 49 | dict(type='LoadAnnotations', with_bbox=True), 50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 52 | dict(type='RandomFlip', prob=0.5), 53 | dict(type='PackDetInputs') 54 | ] 55 | 56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 57 | -------------------------------------------------------------------------------- /configs/fcos/fcos_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 12 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='ConstantLR', 13 | factor=1.0 / 3, 14 | by_epoch=False, 15 | begin=0, 16 | end=500), 17 | dict( 18 | type='MultiStepLR', 19 | begin=0, 20 | end=max_epochs, 21 | by_epoch=True, 22 | milestones=[8, 11], 23 | gamma=0.1) 24 | ] 25 | optim_wrapper = dict( 26 | type='OptimWrapper', 27 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 28 | paramwise_cfg=dict(bias_lr_mult=2.0, bias_decay_mult=0.0), 29 | clip_grad=dict(max_norm=35, norm_type=2)) 30 | 31 | num_classes = 4 32 | model = dict( 33 | type='FCOS', 34 | data_preprocessor=dict( 35 | type='DetDataPreprocessor', 36 | mean=_base_.mean_bgr, 37 | std=[1.0, 1.0, 1.0], 38 | bgr_to_rgb=False, 39 | pad_size_divisor=32), 40 | backbone=dict( 41 | type='ResNet', 42 | depth=50, 43 | num_stages=4, 44 | out_indices=(0, 1, 2, 3), 45 | frozen_stages=1, 46 | norm_cfg=dict(type='BN', requires_grad=False), 47 | norm_eval=True, 48 | style='caffe', 49 | init_cfg=dict( 50 | type='Pretrained', 51 | checkpoint='open-mmlab://detectron/resnet50_caffe')), 52 | neck=dict( 53 | type='FPN', 54 | in_channels=[256, 512, 1024, 2048], 55 | out_channels=256, 56 | start_level=1, 57 | add_extra_convs='on_output', 58 | num_outs=5, 59 | relu_before_extra_convs=True), 60 | bbox_head=dict( 61 | type='FCOSHead', 62 | num_classes=num_classes, 63 | in_channels=256, 64 | stacked_convs=4, 65 | feat_channels=256, 66 | strides=[8, 16, 32, 64, 128], 67 | loss_cls=dict( 68 | type='FocalLoss', 69 | use_sigmoid=True, 70 | gamma=2.0, 71 | alpha=0.25, 72 | loss_weight=1.0), 73 | loss_bbox=dict(type='IoULoss', loss_weight=1.0), 74 | loss_centerness=dict( 75 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 76 | test_cfg=dict( 77 | nms_pre=1000, 78 | min_bbox_size=0, 79 | score_thr=0.05, 80 | nms=dict(type='nms', iou_threshold=0.5), 81 | max_per_img=100)) 82 | -------------------------------------------------------------------------------- /configs/fcos/unitmodule_fcos_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './fcos_r50_1x_duo.py', 3 | ] 4 | 5 | with_unit_module = True 6 | norm_cfg = dict(type='GN', num_groups=8) 7 | act_cfg = dict(type='ReLU') 8 | 9 | k_1, k_2 = 9, 9 10 | c_s1, c_s2 = 32, 32 11 | 12 | unit_module = dict( 13 | type='UnitModule', 14 | unit_backbone=dict( 15 | type='UnitBackbone', 16 | stem_channels=(c_s1, c_s2), 17 | large_kernels=(k_1, k_2), 18 | small_kernels=(3, 3), 19 | dw_ratio=1.0, 20 | norm_cfg=norm_cfg, 21 | act_cfg=act_cfg), 22 | t_head=dict( 23 | type='THead', 24 | in_channels=c_s2, 25 | hid_channels=c_s2, 26 | out_channels=3, 27 | norm_cfg=norm_cfg, 28 | act_cfg=act_cfg), 29 | a_head=dict(type='AHead'), 30 | loss_t=dict(type='TransmissionLoss', loss_weight=500), 31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1), 32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 35 | alpha=0.9, 36 | t_min=0.001) 37 | 38 | model = dict( 39 | type='UnitFCOS', 40 | data_preprocessor=dict( 41 | type='UnitDetDataPreprocessor', 42 | unit_module=unit_module) 43 | ) 44 | 45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 46 | 47 | train_pipeline = [ 48 | dict(type='LoadImageFromFile'), 49 | dict(type='LoadAnnotations', with_bbox=True), 50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 52 | dict(type='RandomFlip', prob=0.5), 53 | dict(type='PackDetInputs') 54 | ] 55 | 56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 57 | -------------------------------------------------------------------------------- /configs/retinanet/retinanet_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 12 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 13 | dict( 14 | type='MultiStepLR', 15 | begin=0, 16 | end=max_epochs, 17 | by_epoch=True, 18 | milestones=[8, 11], 19 | gamma=0.1) 20 | ] 21 | optim_wrapper = dict( 22 | type='OptimWrapper', 23 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 24 | 25 | num_classes = 4 26 | model = dict( 27 | type='RetinaNet', 28 | data_preprocessor=dict( 29 | type='DetDataPreprocessor', 30 | mean=_base_.mean_rgb, 31 | std=_base_.std_rgb, 32 | bgr_to_rgb=True, 33 | pad_size_divisor=32), 34 | backbone=dict( 35 | type='ResNet', 36 | depth=50, 37 | num_stages=4, 38 | out_indices=(0, 1, 2, 3), 39 | frozen_stages=1, 40 | norm_cfg=dict(type='BN', requires_grad=True), 41 | norm_eval=True, 42 | style='pytorch', 43 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 44 | neck=dict( 45 | type='FPN', 46 | in_channels=[256, 512, 1024, 2048], 47 | out_channels=256, 48 | start_level=1, 49 | add_extra_convs='on_input', 50 | num_outs=5), 51 | bbox_head=dict( 52 | type='RetinaHead', 53 | num_classes=num_classes, 54 | in_channels=256, 55 | stacked_convs=4, 56 | feat_channels=256, 57 | anchor_generator=dict( 58 | type='AnchorGenerator', 59 | octave_base_scale=4, 60 | scales_per_octave=3, 61 | ratios=[0.5, 1.0, 2.0], 62 | strides=[8, 16, 32, 64, 128]), 63 | bbox_coder=dict( 64 | type='DeltaXYWHBBoxCoder', 65 | target_means=[0.0, 0.0, 0.0, 0.0], 66 | target_stds=[1.0, 1.0, 1.0, 1.0]), 67 | loss_cls=dict( 68 | type='FocalLoss', 69 | use_sigmoid=True, 70 | gamma=2.0, 71 | alpha=0.25, 72 | loss_weight=1.0), 73 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 74 | train_cfg=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.4, 79 | min_pos_iou=0, 80 | ignore_iof_thr=-1), 81 | sampler=dict(type='PseudoSampler'), 82 | allowed_border=-1, 83 | pos_weight=-1, 84 | debug=False), 85 | test_cfg=dict( 86 | nms_pre=1000, 87 | min_bbox_size=0, 88 | score_thr=0.05, 89 | nms=dict(type='nms', iou_threshold=0.5), 90 | max_per_img=100)) 91 | -------------------------------------------------------------------------------- /configs/retinanet/unitmodule_retinanet_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './retinanet_r50_1x_duo.py', 3 | ] 4 | 5 | with_unit_module = True 6 | norm_cfg = dict(type='GN', num_groups=8) 7 | act_cfg = dict(type='ReLU') 8 | 9 | k_1, k_2 = 9, 9 10 | c_s1, c_s2 = 32, 32 11 | 12 | unit_module = dict( 13 | type='UnitModule', 14 | unit_backbone=dict( 15 | type='UnitBackbone', 16 | stem_channels=(c_s1, c_s2), 17 | large_kernels=(k_1, k_2), 18 | small_kernels=(3, 3), 19 | dw_ratio=1.0, 20 | norm_cfg=norm_cfg, 21 | act_cfg=act_cfg), 22 | t_head=dict( 23 | type='THead', 24 | in_channels=c_s2, 25 | hid_channels=c_s2, 26 | out_channels=3, 27 | norm_cfg=norm_cfg, 28 | act_cfg=act_cfg), 29 | a_head=dict(type='AHead'), 30 | loss_t=dict(type='TransmissionLoss', loss_weight=500), 31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1), 32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 35 | alpha=0.9, 36 | t_min=0.001) 37 | 38 | model = dict( 39 | type='UnitRetinaNet', 40 | data_preprocessor=dict( 41 | type='UnitDetDataPreprocessor', 42 | unit_module=unit_module) 43 | ) 44 | 45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 46 | 47 | train_pipeline = [ 48 | dict(type='LoadImageFromFile'), 49 | dict(type='LoadAnnotations', with_bbox=True), 50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 52 | dict(type='RandomFlip', prob=0.5), 53 | dict(type='PackDetInputs') 54 | ] 55 | 56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 57 | -------------------------------------------------------------------------------- /configs/rtmdet/rtmdet_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection_mmyolo.py', 3 | '../_base_/default_runtime_mmyolo.py', 4 | ] 5 | env_cfg = dict(cudnn_benchmark=True) 6 | 7 | max_epochs = 100 8 | num_last_epochs = 15 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, 10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) 11 | val_cfg = dict(type='ValLoop') 12 | test_cfg = dict(type='TestLoop') 13 | 14 | param_scheduler = [ 15 | dict( 16 | type='LinearLR', start_factor=1e-05, by_epoch=False, begin=0, 17 | end=1000), 18 | dict( 19 | type='CosineAnnealingLR', 20 | eta_min=0.0002, 21 | begin=max_epochs // 2, 22 | T_max=max_epochs - num_last_epochs, 23 | end=max_epochs - num_last_epochs, 24 | by_epoch=True, 25 | convert_to_iter_based=True) 26 | ] 27 | optim_wrapper = dict( 28 | type='OptimWrapper', 29 | optimizer=dict(type='AdamW', lr=0.004, weight_decay=0.05), 30 | paramwise_cfg=dict( 31 | norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) 32 | 33 | custom_hooks = [ 34 | dict( 35 | type='EMAHook', 36 | ema_type='ExpMomentumEMA', 37 | momentum=0.0002, 38 | update_buffers=True, 39 | strict_load=False, 40 | priority=49), 41 | ] 42 | 43 | checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth' 44 | num_classes = 4 45 | model = dict( 46 | type='YOLODetector', 47 | data_preprocessor=dict( 48 | type='YOLOv5DetDataPreprocessor', 49 | mean=_base_.mean_bgr, 50 | std=_base_.std_bgr, 51 | bgr_to_rgb=False), 52 | backbone=dict( 53 | type='CSPNeXt', 54 | arch='P5', 55 | expand_ratio=0.5, 56 | deepen_factor=0.33, 57 | widen_factor=0.5, 58 | channel_attention=True, 59 | norm_cfg=dict(type='BN'), 60 | act_cfg=dict(type='SiLU', inplace=True), 61 | init_cfg=dict( 62 | type='Pretrained', 63 | prefix='backbone.', 64 | checkpoint=checkpoint, 65 | map_location='cpu')), 66 | neck=dict( 67 | type='CSPNeXtPAFPN', 68 | deepen_factor=0.33, 69 | widen_factor=0.5, 70 | in_channels=[256, 512, 1024], 71 | out_channels=256, 72 | num_csp_blocks=3, 73 | expand_ratio=0.5, 74 | norm_cfg=dict(type='BN'), 75 | act_cfg=dict(type='SiLU', inplace=True)), 76 | bbox_head=dict( 77 | type='RTMDetHead', 78 | head_module=dict( 79 | type='RTMDetSepBNHeadModule', 80 | num_classes=num_classes, 81 | in_channels=256, 82 | stacked_convs=2, 83 | feat_channels=256, 84 | norm_cfg=dict(type='BN'), 85 | act_cfg=dict(type='SiLU', inplace=True), 86 | share_conv=True, 87 | pred_kernel_size=1, 88 | featmap_strides=[8, 16, 32], 89 | widen_factor=0.5), 90 | prior_generator=dict( 91 | type='mmdet.MlvlPointGenerator', offset=0, strides=[8, 16, 32]), 92 | bbox_coder=dict(type='DistancePointBBoxCoder'), 93 | loss_cls=dict( 94 | type='mmdet.QualityFocalLoss', 95 | use_sigmoid=True, 96 | beta=2.0, 97 | loss_weight=1.0), 98 | loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)), 99 | train_cfg=dict( 100 | assigner=dict( 101 | type='BatchDynamicSoftLabelAssigner', 102 | num_classes=num_classes, 103 | topk=13, 104 | iou_calculator=dict(type='mmdet.BboxOverlaps2D')), 105 | allowed_border=-1, 106 | pos_weight=-1, 107 | debug=False), 108 | test_cfg=dict( 109 | multi_label=True, 110 | nms_pre=30000, 111 | score_thr=0.001, 112 | nms=dict(type='nms', iou_threshold=0.65), 113 | max_per_img=300)) 114 | -------------------------------------------------------------------------------- /configs/rtmdet/unitmodule_rtmdet_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './rtmdet_s_100e_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitYOLODetector', 8 | data_preprocessor=dict( 9 | type='UnitYOLOv5DetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='mmdet.Pad', 21 | pad_to_square=True, 22 | pad_val=dict(img=(114.0, 114.0, 114.0))), 23 | dict(type='mmdet.RandomFlip', prob=0.5), 24 | dict(type='mmdet.PackDetInputs') 25 | ] 26 | 27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 28 | -------------------------------------------------------------------------------- /configs/tood/tood_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection.py', 3 | '../_base_/default_runtime.py', 4 | ] 5 | max_epochs = 12 6 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=1) 7 | val_cfg = dict(type='ValLoop') 8 | test_cfg = dict(type='TestLoop') 9 | 10 | param_scheduler = [ 11 | dict( 12 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 13 | dict( 14 | type='MultiStepLR', 15 | begin=0, 16 | end=max_epochs, 17 | by_epoch=True, 18 | milestones=[8, 11], 19 | gamma=0.1) 20 | ] 21 | optim_wrapper = dict( 22 | type='OptimWrapper', 23 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 24 | 25 | num_classes = 4 26 | model = dict( 27 | type='TOOD', 28 | data_preprocessor=dict( 29 | type='DetDataPreprocessor', 30 | mean=_base_.mean_rgb, 31 | std=_base_.std_rgb, 32 | bgr_to_rgb=True, 33 | pad_size_divisor=32), 34 | backbone=dict( 35 | type='ResNet', 36 | depth=50, 37 | num_stages=4, 38 | out_indices=(0, 1, 2, 3), 39 | frozen_stages=1, 40 | norm_cfg=dict(type='BN', requires_grad=True), 41 | norm_eval=True, 42 | style='pytorch', 43 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 44 | neck=dict( 45 | type='FPN', 46 | in_channels=[256, 512, 1024, 2048], 47 | out_channels=256, 48 | start_level=1, 49 | add_extra_convs='on_output', 50 | num_outs=5), 51 | bbox_head=dict( 52 | type='TOODHead', 53 | num_classes=num_classes, 54 | in_channels=256, 55 | stacked_convs=6, 56 | feat_channels=256, 57 | anchor_type='anchor_free', 58 | anchor_generator=dict( 59 | type='AnchorGenerator', 60 | ratios=[1.0], 61 | octave_base_scale=8, 62 | scales_per_octave=1, 63 | strides=[8, 16, 32, 64, 128]), 64 | bbox_coder=dict( 65 | type='DeltaXYWHBBoxCoder', 66 | target_means=[0.0, 0.0, 0.0, 0.0], 67 | target_stds=[0.1, 0.1, 0.2, 0.2]), 68 | initial_loss_cls=dict( 69 | type='FocalLoss', 70 | use_sigmoid=True, 71 | activated=True, 72 | gamma=2.0, 73 | alpha=0.25, 74 | loss_weight=1.0), 75 | loss_cls=dict( 76 | type='QualityFocalLoss', 77 | use_sigmoid=True, 78 | activated=True, 79 | beta=2.0, 80 | loss_weight=1.0), 81 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), 82 | train_cfg=dict( 83 | initial_epoch=4, 84 | initial_assigner=dict(type='ATSSAssigner', topk=9), 85 | assigner=dict(type='TaskAlignedAssigner', topk=13), 86 | alpha=1, 87 | beta=6, 88 | allowed_border=-1, 89 | pos_weight=-1, 90 | debug=False), 91 | test_cfg=dict( 92 | nms_pre=1000, 93 | min_bbox_size=0, 94 | score_thr=0.05, 95 | nms=dict(type='nms', iou_threshold=0.6), 96 | max_per_img=100)) 97 | -------------------------------------------------------------------------------- /configs/tood/unitmodule_tood_r50_1x_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './tood_r50_1x_duo.py', 3 | ] 4 | 5 | with_unit_module = True 6 | norm_cfg = dict(type='GN', num_groups=8) 7 | act_cfg = dict(type='ReLU') 8 | 9 | k_1, k_2 = 9, 9 10 | c_s1, c_s2 = 32, 32 11 | 12 | unit_module = dict( 13 | type='UnitModule', 14 | unit_backbone=dict( 15 | type='UnitBackbone', 16 | stem_channels=(c_s1, c_s2), 17 | large_kernels=(k_1, k_2), 18 | small_kernels=(3, 3), 19 | dw_ratio=1.0, 20 | norm_cfg=norm_cfg, 21 | act_cfg=act_cfg), 22 | t_head=dict( 23 | type='THead', 24 | in_channels=c_s2, 25 | hid_channels=c_s2, 26 | out_channels=3, 27 | norm_cfg=norm_cfg, 28 | act_cfg=act_cfg), 29 | a_head=dict(type='AHead'), 30 | loss_t=dict(type='TransmissionLoss', loss_weight=500), 31 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.1), 32 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 33 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 34 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 35 | alpha=0.9, 36 | t_min=0.001) 37 | 38 | model = dict( 39 | type='UnitTOOD', 40 | data_preprocessor=dict( 41 | type='UnitDetDataPreprocessor', 42 | unit_module=unit_module) 43 | ) 44 | 45 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 46 | 47 | train_pipeline = [ 48 | dict(type='LoadImageFromFile'), 49 | dict(type='LoadAnnotations', with_bbox=True), 50 | dict(type='Resize', scale=_base_.img_scale, keep_ratio=True), 51 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 52 | dict(type='RandomFlip', prob=0.5), 53 | dict(type='PackDetInputs') 54 | ] 55 | 56 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 57 | -------------------------------------------------------------------------------- /configs/unitmodule/unitmodule.py: -------------------------------------------------------------------------------- 1 | with_unit_module = True 2 | norm_cfg = dict(type='GN', num_groups=8) 3 | act_cfg = dict(type='ReLU') 4 | 5 | k_1, k_2 = 9, 9 6 | c_s1, c_s2 = 32, 32 7 | 8 | unit_module = dict( 9 | type='UnitModule', 10 | unit_backbone=dict( 11 | type='UnitBackbone', 12 | stem_channels=(c_s1, c_s2), 13 | large_kernels=(k_1, k_2), 14 | small_kernels=(3, 3), 15 | dw_ratio=1.0, 16 | norm_cfg=norm_cfg, 17 | act_cfg=act_cfg), 18 | t_head=dict( 19 | type='THead', 20 | in_channels=c_s2, 21 | hid_channels=c_s2, 22 | out_channels=3, 23 | norm_cfg=norm_cfg, 24 | act_cfg=act_cfg), 25 | a_head=dict(type='AHead'), 26 | loss_t=dict(type='TransmissionLoss', loss_weight=500), 27 | loss_sp=dict(type='SaturatedPixelLoss', loss_weight=0.01), 28 | loss_tv=dict(type='TotalVariationLoss', loss_weight=0.01), 29 | loss_cc=dict(type='ColorCastLoss', loss_weight=0.1), 30 | loss_acc=dict(type='AssistingColorCastLoss', channels=c_s2, loss_weight=0.1), 31 | alpha=0.9, 32 | t_min=0.001) 33 | -------------------------------------------------------------------------------- /configs/yolov5/unitmodule_yolov5_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './yolov5_s_100e_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitYOLODetector', 8 | data_preprocessor=dict( 9 | type='UnitYOLOv5DetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='mmdet.Pad', 21 | pad_to_square=True, 22 | pad_val=dict(img=(114.0, 114.0, 114.0))), 23 | dict(type='mmdet.RandomFlip', prob=0.5), 24 | dict(type='mmdet.PackDetInputs') 25 | ] 26 | 27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 28 | -------------------------------------------------------------------------------- /configs/yolov5/yolov5_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection_mmyolo.py', 3 | '../_base_/default_runtime_mmyolo.py', 4 | ] 5 | env_cfg = dict(cudnn_benchmark=True) 6 | 7 | max_epochs = 100 8 | num_last_epochs = 15 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, 10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) 11 | val_cfg = dict(type='ValLoop') 12 | test_cfg = dict(type='TestLoop') 13 | 14 | optim_wrapper = dict( 15 | type='OptimWrapper', 16 | optimizer=dict( 17 | type='SGD', 18 | lr=0.01, 19 | momentum=0.937, 20 | weight_decay=0.0005, 21 | nesterov=True, 22 | batch_size_per_gpu=_base_.train_bs), 23 | constructor='YOLOv5OptimizerConstructor') 24 | default_hooks = dict( 25 | param_scheduler=dict( 26 | type='YOLOv5ParamSchedulerHook', 27 | scheduler_type='linear', 28 | lr_factor=0.01, 29 | max_epochs=max_epochs), 30 | ) 31 | custom_hooks = [ 32 | dict( 33 | type='EMAHook', 34 | ema_type='ExpMomentumEMA', 35 | momentum=0.0001, 36 | update_buffers=True, 37 | strict_load=False, 38 | priority=49) 39 | ] 40 | 41 | num_classes = 4 42 | # anchors for DUO 43 | anchors = [[(13, 12), (20, 18), (27, 25)], 44 | [(35, 31), (44, 39), (55, 52)], 45 | [(80, 45), (74, 69), (116, 102)]] 46 | num_det_layers = 3 47 | model = dict( 48 | type='YOLODetector', 49 | data_preprocessor=dict( 50 | type='YOLOv5DetDataPreprocessor', 51 | mean=[0.0, 0.0, 0.0], 52 | std=[255.0, 255.0, 255.0], 53 | bgr_to_rgb=True), 54 | backbone=dict( 55 | type='YOLOv5CSPDarknet', 56 | deepen_factor=0.33, 57 | widen_factor=0.5, 58 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 59 | act_cfg=dict(type='SiLU', inplace=True)), 60 | neck=dict( 61 | type='YOLOv5PAFPN', 62 | deepen_factor=0.33, 63 | widen_factor=0.5, 64 | in_channels=[256, 512, 1024], 65 | out_channels=[256, 512, 1024], 66 | num_csp_blocks=3, 67 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 68 | act_cfg=dict(type='SiLU', inplace=True)), 69 | bbox_head=dict( 70 | type='YOLOv5Head', 71 | head_module=dict( 72 | type='YOLOv5HeadModule', 73 | num_classes=num_classes, 74 | in_channels=[256, 512, 1024], 75 | widen_factor=0.5, 76 | featmap_strides=[8, 16, 32], 77 | num_base_priors=3), 78 | prior_generator=dict( 79 | type='mmdet.YOLOAnchorGenerator', 80 | base_sizes=anchors, 81 | strides=[8, 16, 32]), 82 | loss_cls=dict( 83 | type='mmdet.CrossEntropyLoss', 84 | use_sigmoid=True, 85 | reduction='mean', 86 | loss_weight=0.5), 87 | loss_bbox=dict( 88 | type='IoULoss', 89 | iou_mode='ciou', 90 | bbox_format='xywh', 91 | eps=1e-07, 92 | reduction='mean', 93 | loss_weight=0.05, 94 | return_iou=True), 95 | loss_obj=dict( 96 | type='mmdet.CrossEntropyLoss', 97 | use_sigmoid=True, 98 | reduction='mean', 99 | loss_weight=1.0), 100 | prior_match_thr=4.0, 101 | obj_level_weights=[4.0, 1.0, 0.4]), 102 | test_cfg=dict( 103 | multi_label=True, 104 | nms_pre=30000, 105 | score_thr=0.001, 106 | nms=dict(type='nms', iou_threshold=0.65), 107 | max_per_img=300)) 108 | -------------------------------------------------------------------------------- /configs/yolov6/unitmodule_yolov6_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './yolov6_s_100e_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitYOLODetector', 8 | data_preprocessor=dict( 9 | type='UnitYOLOv5DetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='mmdet.Pad', 21 | pad_to_square=True, 22 | pad_val=dict(img=(114.0, 114.0, 114.0))), 23 | dict(type='mmdet.RandomFlip', prob=0.5), 24 | dict(type='mmdet.PackDetInputs') 25 | ] 26 | 27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 28 | -------------------------------------------------------------------------------- /configs/yolov6/yolov6_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection_mmyolo.py', 3 | '../_base_/default_runtime_mmyolo.py', 4 | ] 5 | env_cfg = dict(cudnn_benchmark=True) 6 | 7 | max_epochs = 100 8 | num_last_epochs = 15 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, 10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) 11 | val_cfg = dict(type='ValLoop') 12 | test_cfg = dict(type='TestLoop') 13 | 14 | optim_wrapper = dict( 15 | type='OptimWrapper', 16 | optimizer=dict( 17 | type='SGD', 18 | lr=0.01, 19 | momentum=0.937, 20 | weight_decay=0.0005, 21 | nesterov=True, 22 | batch_size_per_gpu=_base_.train_bs), 23 | constructor='YOLOv5OptimizerConstructor') 24 | default_hooks = dict( 25 | param_scheduler=dict( 26 | type='YOLOv5ParamSchedulerHook', 27 | scheduler_type='cosine', 28 | lr_factor=0.01, 29 | max_epochs=max_epochs) 30 | ) 31 | custom_hooks = [ 32 | dict( 33 | type='EMAHook', 34 | ema_type='ExpMomentumEMA', 35 | momentum=0.0001, 36 | update_buffers=True, 37 | strict_load=False, 38 | priority=49) 39 | ] 40 | 41 | num_classes = 4 42 | model = dict( 43 | type='YOLODetector', 44 | data_preprocessor=dict( 45 | type='YOLOv5DetDataPreprocessor', 46 | mean=[0.0, 0.0, 0.0], 47 | std=[255.0, 255.0, 255.0], 48 | bgr_to_rgb=True), 49 | backbone=dict( 50 | type='YOLOv6EfficientRep', 51 | deepen_factor=0.33, 52 | widen_factor=0.5, 53 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 54 | act_cfg=dict(type='ReLU', inplace=True)), 55 | neck=dict( 56 | type='YOLOv6RepPAFPN', 57 | deepen_factor=0.33, 58 | widen_factor=0.5, 59 | in_channels=[256, 512, 1024], 60 | out_channels=[128, 256, 512], 61 | num_csp_blocks=12, 62 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 63 | act_cfg=dict(type='ReLU', inplace=True)), 64 | bbox_head=dict( 65 | type='YOLOv6Head', 66 | head_module=dict( 67 | type='YOLOv6HeadModule', 68 | num_classes=num_classes, 69 | in_channels=[128, 256, 512], 70 | widen_factor=0.5, 71 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 72 | act_cfg=dict(type='SiLU', inplace=True), 73 | featmap_strides=[8, 16, 32]), 74 | loss_bbox=dict( 75 | type='IoULoss', 76 | iou_mode='giou', 77 | bbox_format='xyxy', 78 | reduction='mean', 79 | loss_weight=2.5, 80 | return_iou=False)), 81 | train_cfg=dict( 82 | initial_epoch=4, 83 | initial_assigner=dict( 84 | type='BatchATSSAssigner', 85 | num_classes=num_classes, 86 | topk=9, 87 | iou_calculator=dict(type='mmdet.BboxOverlaps2D')), 88 | assigner=dict( 89 | type='BatchTaskAlignedAssigner', 90 | num_classes=num_classes, 91 | topk=13, 92 | alpha=1, 93 | beta=6)), 94 | test_cfg=dict( 95 | multi_label=True, 96 | nms_pre=30000, 97 | score_thr=0.001, 98 | nms=dict(type='nms', iou_threshold=0.65), 99 | max_per_img=300)) 100 | -------------------------------------------------------------------------------- /configs/yolov7/unitmodule_yolov7_t_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './yolov7_t_100e_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitYOLODetector', 8 | data_preprocessor=dict( 9 | type='UnitYOLOv5DetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=55, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='mmdet.Pad', 21 | pad_to_square=True, 22 | pad_val=dict(img=(114.0, 114.0, 114.0))), 23 | dict(type='mmdet.RandomFlip', prob=0.5), 24 | dict(type='mmdet.PackDetInputs') 25 | ] 26 | 27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 28 | -------------------------------------------------------------------------------- /configs/yolov7/yolov7_t_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection_mmyolo.py', 3 | '../_base_/default_runtime_mmyolo.py', 4 | ] 5 | env_cfg = dict(cudnn_benchmark=True) 6 | 7 | max_epochs = 100 8 | num_last_epochs = 15 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, 10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) 11 | val_cfg = dict(type='ValLoop') 12 | test_cfg = dict(type='TestLoop') 13 | 14 | optim_wrapper = dict( 15 | type='OptimWrapper', 16 | optimizer=dict( 17 | type='SGD', 18 | lr=0.01, 19 | momentum=0.937, 20 | weight_decay=0.0005, 21 | nesterov=True, 22 | batch_size_per_gpu=_base_.train_bs), 23 | constructor='YOLOv7OptimWrapperConstructor') 24 | default_hooks = dict( 25 | param_scheduler=dict( 26 | type='YOLOv5ParamSchedulerHook', 27 | scheduler_type='cosine', 28 | lr_factor=0.01, 29 | max_epochs=max_epochs), 30 | ) 31 | custom_hooks = [ 32 | dict( 33 | type='EMAHook', 34 | ema_type='ExpMomentumEMA', 35 | momentum=0.0001, 36 | update_buffers=True, 37 | strict_load=False, 38 | priority=49) 39 | ] 40 | 41 | num_classes = 4 42 | # anchors for DUO 43 | anchors = [[(13, 12), (20, 18), (27, 25)], 44 | [(35, 31), (44, 39), (55, 52)], 45 | [(80, 45), (74, 69), (116, 102)]] 46 | model = dict( 47 | type='YOLODetector', 48 | data_preprocessor=dict( 49 | type='YOLOv5DetDataPreprocessor', 50 | mean=[0.0, 0.0, 0.0], 51 | std=[255.0, 255.0, 255.0], 52 | bgr_to_rgb=True), 53 | backbone=dict( 54 | type='YOLOv7Backbone', 55 | arch='Tiny', 56 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 57 | act_cfg=dict(type='LeakyReLU', inplace=True, negative_slope=0.1)), 58 | neck=dict( 59 | type='YOLOv7PAFPN', 60 | block_cfg=dict(type='TinyDownSampleBlock', middle_ratio=0.25), 61 | upsample_feats_cat_first=False, 62 | in_channels=[128, 256, 512], 63 | out_channels=[64, 128, 256], 64 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 65 | act_cfg=dict(type='LeakyReLU', inplace=True, negative_slope=0.1), 66 | is_tiny_version=True, 67 | use_repconv_outs=False), 68 | bbox_head=dict( 69 | type='YOLOv7Head', 70 | head_module=dict( 71 | type='YOLOv7HeadModule', 72 | num_classes=num_classes, 73 | in_channels=[128, 256, 512], 74 | featmap_strides=[8, 16, 32], 75 | num_base_priors=3), 76 | prior_generator=dict( 77 | type='mmdet.YOLOAnchorGenerator', 78 | base_sizes=anchors, 79 | strides=[8, 16, 32]), 80 | loss_cls=dict( 81 | type='mmdet.CrossEntropyLoss', 82 | use_sigmoid=True, 83 | reduction='mean', 84 | loss_weight=0.5), 85 | loss_bbox=dict( 86 | type='IoULoss', 87 | iou_mode='ciou', 88 | bbox_format='xywh', 89 | reduction='mean', 90 | loss_weight=0.05, 91 | return_iou=True), 92 | loss_obj=dict( 93 | type='mmdet.CrossEntropyLoss', 94 | use_sigmoid=True, 95 | reduction='mean', 96 | loss_weight=1.0), 97 | prior_match_thr=4.0, 98 | obj_level_weights=[4.0, 1.0, 0.4], 99 | simota_candidate_topk=10, 100 | simota_iou_weight=3.0, 101 | simota_cls_weight=1.0), 102 | test_cfg=dict( 103 | multi_label=True, 104 | nms_pre=30000, 105 | score_thr=0.001, 106 | nms=dict(type='nms', iou_threshold=0.65), 107 | max_per_img=300)) 108 | -------------------------------------------------------------------------------- /configs/yolov8/unitmodule_yolov8_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './yolov8_s_100e_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitYOLODetector', 8 | data_preprocessor=dict( 9 | type='UnitYOLOv5DetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=10, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='mmdet.Pad', 21 | pad_to_square=True, 22 | pad_val=dict(img=(114.0, 114.0, 114.0))), 23 | dict(type='mmdet.RandomFlip', prob=0.5), 24 | dict(type='mmdet.PackDetInputs') 25 | ] 26 | 27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 28 | -------------------------------------------------------------------------------- /configs/yolov8/yolov8_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection_mmyolo.py', 3 | '../_base_/default_runtime_mmyolo.py', 4 | ] 5 | env_cfg = dict(cudnn_benchmark=True) 6 | 7 | max_epochs = 100 8 | num_last_epochs = 15 9 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, 10 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) 11 | val_cfg = dict(type='ValLoop') 12 | test_cfg = dict(type='TestLoop') 13 | 14 | optim_wrapper = dict( 15 | type='OptimWrapper', 16 | clip_grad=dict(max_norm=10.0), 17 | optimizer=dict( 18 | type='SGD', 19 | lr=0.01, 20 | momentum=0.937, 21 | weight_decay=0.0005, 22 | nesterov=True, 23 | batch_size_per_gpu=_base_.train_bs), 24 | constructor='YOLOv5OptimizerConstructor') 25 | default_hooks = dict( 26 | param_scheduler=dict( 27 | type='YOLOv5ParamSchedulerHook', 28 | scheduler_type='linear', 29 | lr_factor=0.01, 30 | max_epochs=max_epochs), 31 | ) 32 | custom_hooks = [ 33 | dict( 34 | type='EMAHook', 35 | ema_type='ExpMomentumEMA', 36 | momentum=0.0001, 37 | update_buffers=True, 38 | strict_load=False, 39 | priority=49), 40 | ] 41 | 42 | num_classes = 4 43 | model = dict( 44 | type='YOLODetector', 45 | data_preprocessor=dict( 46 | type='YOLOv5DetDataPreprocessor', 47 | mean=[0.0, 0.0, 0.0], 48 | std=[255.0, 255.0, 255.0], 49 | bgr_to_rgb=True), 50 | backbone=dict( 51 | type='YOLOv8CSPDarknet', 52 | arch='P5', 53 | last_stage_out_channels=1024, 54 | deepen_factor=0.33, 55 | widen_factor=0.5, 56 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 57 | act_cfg=dict(type='SiLU', inplace=True)), 58 | neck=dict( 59 | type='YOLOv8PAFPN', 60 | deepen_factor=0.33, 61 | widen_factor=0.5, 62 | in_channels=[256, 512, 1024], 63 | out_channels=[256, 512, 1024], 64 | num_csp_blocks=3, 65 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 66 | act_cfg=dict(type='SiLU', inplace=True)), 67 | bbox_head=dict( 68 | type='YOLOv8Head', 69 | head_module=dict( 70 | type='YOLOv8HeadModule', 71 | num_classes=num_classes, 72 | in_channels=[256, 512, 1024], 73 | widen_factor=0.5, 74 | reg_max=16, 75 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 76 | act_cfg=dict(type='SiLU', inplace=True), 77 | featmap_strides=[8, 16, 32]), 78 | prior_generator=dict( 79 | type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]), 80 | bbox_coder=dict(type='DistancePointBBoxCoder'), 81 | loss_cls=dict( 82 | type='mmdet.CrossEntropyLoss', 83 | use_sigmoid=True, 84 | reduction='none', 85 | loss_weight=0.5), 86 | loss_bbox=dict( 87 | type='IoULoss', 88 | iou_mode='ciou', 89 | bbox_format='xyxy', 90 | reduction='sum', 91 | loss_weight=7.5, 92 | return_iou=False), 93 | loss_dfl=dict( 94 | type='mmdet.DistributionFocalLoss', 95 | reduction='mean', 96 | loss_weight=0.375)), 97 | train_cfg=dict( 98 | assigner=dict( 99 | type='BatchTaskAlignedAssigner', 100 | num_classes=num_classes, 101 | use_ciou=True, 102 | topk=10, 103 | alpha=0.5, 104 | beta=6.0, 105 | eps=1e-09)), 106 | test_cfg=dict( 107 | multi_label=True, 108 | nms_pre=30000, 109 | score_thr=0.001, 110 | nms=dict(type='nms', iou_threshold=0.7), 111 | max_per_img=300)) 112 | -------------------------------------------------------------------------------- /configs/yolox/unitmodule_yolox_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './yolox_s_100e_duo.py', 3 | '../unitmodule/unitmodule.py', 4 | ] 5 | 6 | model = dict( 7 | type='UnitYOLODetector', 8 | data_preprocessor=dict( 9 | type='UnitYOLOv5DetDataPreprocessor', 10 | unit_module=_base_.unit_module) 11 | ) 12 | 13 | optim_wrapper = dict(clip_grad=dict(max_norm=55, norm_type=2)) 14 | 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict(type='mmdet.LoadAnnotations', with_bbox=True), 18 | dict(type='mmdet.Resize', scale=_base_.img_scale, keep_ratio=True), 19 | dict(type='UnderwaterColorRandomTransfer', hue_delta=5), 20 | dict(type='mmdet.Pad', 21 | pad_to_square=True, 22 | pad_val=dict(img=(114.0, 114.0, 114.0))), 23 | dict(type='mmdet.RandomFlip', prob=0.5), 24 | dict(type='mmdet.PackDetInputs') 25 | ] 26 | 27 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 28 | -------------------------------------------------------------------------------- /configs/yolox/yolox_s_100e_duo.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_detection_mmyolo.py', 3 | '../_base_/default_runtime_mmyolo.py', 4 | ] 5 | max_epochs = 100 6 | num_last_epochs = 15 7 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, 8 | val_interval=10, dynamic_intervals=[(max_epochs - num_last_epochs, 1)]) 9 | val_cfg = dict(type='ValLoop') 10 | test_cfg = dict(type='TestLoop') 11 | 12 | param_scheduler = [ 13 | dict( 14 | type='mmdet.QuadraticWarmupLR', 15 | by_epoch=True, 16 | begin=0, 17 | end=5, 18 | convert_to_iter_based=True), 19 | dict( 20 | type='CosineAnnealingLR', 21 | eta_min=0.0005, 22 | begin=5, 23 | T_max=max_epochs - num_last_epochs, 24 | end=max_epochs - num_last_epochs, 25 | by_epoch=True, 26 | convert_to_iter_based=True), 27 | dict(type='ConstantLR', by_epoch=True, factor=1, begin=max_epochs - num_last_epochs, end=max_epochs) 28 | ] 29 | optim_wrapper = dict( 30 | type='OptimWrapper', 31 | optimizer=dict( 32 | type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005, nesterov=True), 33 | paramwise_cfg=dict(norm_decay_mult=0.0, bias_decay_mult=0.0)) 34 | 35 | custom_hooks = [ 36 | dict(type='mmdet.SyncNormHook', priority=48), 37 | dict( 38 | type='EMAHook', 39 | ema_type='ExpMomentumEMA', 40 | momentum=0.0001, 41 | update_buffers=True, 42 | strict_load=False, 43 | priority=49) 44 | ] 45 | 46 | num_classes = 4 47 | model = dict( 48 | type='YOLODetector', 49 | init_cfg=dict( 50 | type='Kaiming', 51 | layer='Conv2d', 52 | a=2.23606797749979, 53 | distribution='uniform', 54 | mode='fan_in', 55 | nonlinearity='leaky_relu'), 56 | use_syncbn=False, 57 | data_preprocessor=dict( 58 | type='YOLOv5DetDataPreprocessor', 59 | pad_size_divisor=32, 60 | batch_augments=[ 61 | dict( 62 | type='YOLOXBatchSyncRandomResize', 63 | random_size_range=(480, 800), 64 | size_divisor=32, 65 | interval=10) 66 | ]), 67 | backbone=dict( 68 | type='YOLOXCSPDarknet', 69 | deepen_factor=0.33, 70 | widen_factor=0.5, 71 | out_indices=(2, 3, 4), 72 | spp_kernal_sizes=(5, 9, 13), 73 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 74 | act_cfg=dict(type='SiLU', inplace=True)), 75 | neck=dict( 76 | type='YOLOXPAFPN', 77 | deepen_factor=0.33, 78 | widen_factor=0.5, 79 | in_channels=[256, 512, 1024], 80 | out_channels=256, 81 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 82 | act_cfg=dict(type='SiLU', inplace=True)), 83 | bbox_head=dict( 84 | type='YOLOXHead', 85 | head_module=dict( 86 | type='YOLOXHeadModule', 87 | num_classes=num_classes, 88 | in_channels=256, 89 | feat_channels=256, 90 | widen_factor=0.5, 91 | stacked_convs=2, 92 | featmap_strides=(8, 16, 32), 93 | use_depthwise=False, 94 | norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), 95 | act_cfg=dict(type='SiLU', inplace=True)), 96 | loss_cls=dict( 97 | type='mmdet.CrossEntropyLoss', 98 | use_sigmoid=True, 99 | reduction='sum', 100 | loss_weight=1.0), 101 | loss_bbox=dict( 102 | type='mmdet.IoULoss', 103 | mode='square', 104 | eps=1e-16, 105 | reduction='sum', 106 | loss_weight=5.0), 107 | loss_obj=dict( 108 | type='mmdet.CrossEntropyLoss', 109 | use_sigmoid=True, 110 | reduction='sum', 111 | loss_weight=1.0), 112 | loss_bbox_aux=dict( 113 | type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)), 114 | train_cfg=dict( 115 | assigner=dict( 116 | type='mmdet.SimOTAAssigner', 117 | center_radius=2.5, 118 | iou_calculator=dict(type='mmdet.BboxOverlaps2D'))), 119 | test_cfg=dict( 120 | yolox_style=True, 121 | multi_label=True, 122 | score_thr=0.001, 123 | max_per_img=300, 124 | nms=dict(type='nms', iou_threshold=0.65))) 125 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | future 2 | tensorboard -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --launcher pytorch ${@:3} 20 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import os 4 | import os.path as osp 5 | import warnings 6 | from copy import deepcopy 7 | 8 | from mmdet.engine.hooks.utils import trigger_visualization_hook 9 | from mmdet.evaluation import DumpDetResults 10 | from mmdet.registry import RUNNERS 11 | from mmdet.utils import setup_cache_size_limit_of_dynamo 12 | from mmengine import ConfigDict 13 | from mmengine.config import Config, DictAction 14 | from mmengine.runner import Runner 15 | 16 | from unitmodule.models.detectors import register_unit_distributed 17 | 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser( 21 | description='MMDet test (and eval) a model') 22 | parser.add_argument('config', help='test config file path') 23 | parser.add_argument('checkpoint', help='checkpoint file') 24 | parser.add_argument( 25 | '--work-dir', 26 | help='the directory to save the file containing evaluation metrics') 27 | parser.add_argument( 28 | '--out', 29 | type=str, 30 | help='dump predictions to a pickle file for offline evaluation') 31 | parser.add_argument( 32 | '--show', action='store_true', help='show prediction results') 33 | parser.add_argument( 34 | '--show-dir', 35 | help='directory where painted images will be saved. ' 36 | 'If specified, it will be automatically saved ' 37 | 'to the work_dir/timestamp/show_dir') 38 | parser.add_argument( 39 | '--wait-time', type=float, default=2, help='the interval of show (s)') 40 | parser.add_argument( 41 | '--cfg-options', 42 | nargs='+', 43 | action=DictAction, 44 | help='override some settings in the used config, the key-value pair ' 45 | 'in xxx=yyy format will be merged into config file. If the value to ' 46 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 47 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 48 | 'Note that the quotation marks are necessary and that no white space ' 49 | 'is allowed.') 50 | parser.add_argument( 51 | '--launcher', 52 | choices=['none', 'pytorch', 'slurm', 'mpi'], 53 | default='none', 54 | help='job launcher') 55 | parser.add_argument('--tta', action='store_true') 56 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` 57 | # will pass the `--local-rank` parameter to `tools/train.py` instead 58 | # of `--local_rank`. 59 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0) 60 | args = parser.parse_args() 61 | if 'LOCAL_RANK' not in os.environ: 62 | os.environ['LOCAL_RANK'] = str(args.local_rank) 63 | return args 64 | 65 | 66 | def main(): 67 | args = parse_args() 68 | 69 | # Reduce the number of repeated compilations and improve 70 | # testing speed. 71 | setup_cache_size_limit_of_dynamo() 72 | 73 | # load config 74 | cfg = Config.fromfile(args.config) 75 | cfg.launcher = args.launcher 76 | if args.cfg_options is not None: 77 | cfg.merge_from_dict(args.cfg_options) 78 | 79 | # -------------------------------------------------------- 80 | # dynamic import customs modules 81 | # import modules from import_dir as a/b/c/ dir, registry will be updated 82 | if hasattr(cfg, 'import_dir'): 83 | import importlib 84 | 85 | import_dir = cfg.import_dir 86 | module_path = import_dir.replace('/', '.') 87 | import_lib = importlib.import_module(module_path) 88 | 89 | # dynamic import for ddp of UnitModule if key with_unit_module is True 90 | register_unit_distributed(cfg) 91 | # -------------------------------------------------------- 92 | 93 | # work_dir is determined in this priority: CLI > segment in file > filename 94 | if args.work_dir is not None: 95 | # update configs according to CLI args if args.work_dir is not None 96 | cfg.work_dir = args.work_dir 97 | elif cfg.get('work_dir', None) is None: 98 | # use config filename as default work_dir if cfg.work_dir is None 99 | cfg.work_dir = osp.join('./work_dirs', 100 | osp.splitext(osp.basename(args.config))[0]) 101 | 102 | cfg.load_from = args.checkpoint 103 | 104 | if args.show or args.show_dir: 105 | cfg = trigger_visualization_hook(cfg, args) 106 | 107 | if args.tta: 108 | 109 | if 'tta_model' not in cfg: 110 | warnings.warn('Cannot find ``tta_model`` in config, ' 111 | 'we will set it as default.') 112 | cfg.tta_model = dict( 113 | type='DetTTAModel', 114 | tta_cfg=dict( 115 | nms=dict(type='nms', iou_threshold=0.5), max_per_img=100)) 116 | if 'tta_pipeline' not in cfg: 117 | warnings.warn('Cannot find ``tta_pipeline`` in config, ' 118 | 'we will set it as default.') 119 | test_data_cfg = cfg.test_dataloader.dataset 120 | while 'dataset' in test_data_cfg: 121 | test_data_cfg = test_data_cfg['dataset'] 122 | cfg.tta_pipeline = deepcopy(test_data_cfg.pipeline) 123 | flip_tta = dict( 124 | type='TestTimeAug', 125 | transforms=[ 126 | [ 127 | dict(type='RandomFlip', prob=1.), 128 | dict(type='RandomFlip', prob=0.) 129 | ], 130 | [ 131 | dict( 132 | type='PackDetInputs', 133 | meta_keys=('img_id', 'img_path', 'ori_shape', 134 | 'img_shape', 'scale_factor', 'flip', 135 | 'flip_direction')) 136 | ], 137 | ]) 138 | cfg.tta_pipeline[-1] = flip_tta 139 | cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model) 140 | cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline 141 | 142 | # build the runner from config 143 | if 'runner_type' not in cfg: 144 | # build the default runner 145 | runner = Runner.from_cfg(cfg) 146 | else: 147 | # build customized runner from the registry 148 | # if 'runner_type' is set in the cfg 149 | runner = RUNNERS.build(cfg) 150 | 151 | # add `DumpResults` dummy metric 152 | if args.out is not None: 153 | assert args.out.endswith(('.pkl', '.pickle')), \ 154 | 'The dump file must be a pkl file.' 155 | runner.test_evaluator.metrics.append( 156 | DumpDetResults(out_file_path=args.out)) 157 | 158 | # start testing 159 | runner.test() 160 | 161 | 162 | if __name__ == '__main__': 163 | main() 164 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import logging 4 | import os 5 | import os.path as osp 6 | 7 | from mmdet.utils import setup_cache_size_limit_of_dynamo 8 | from mmengine.config import Config, DictAction 9 | from mmengine.logging import print_log 10 | from mmengine.registry import RUNNERS 11 | from mmengine.runner import Runner 12 | 13 | from unitmodule.models.detectors import register_unit_distributed 14 | 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='Train a detector') 18 | parser.add_argument('config', help='train config file path') 19 | parser.add_argument('--work-dir', help='the dir to save logs and models') 20 | parser.add_argument( 21 | '--amp', 22 | action='store_true', 23 | default=False, 24 | help='enable automatic-mixed-precision training') 25 | parser.add_argument( 26 | '--auto-scale-lr', 27 | action='store_true', 28 | help='enable automatically scaling LR.') 29 | parser.add_argument( 30 | '--resume', 31 | nargs='?', 32 | type=str, 33 | const='auto', 34 | help='If specify checkpoint path, resume from it, while if not ' 35 | 'specify, try to auto resume from the latest checkpoint ' 36 | 'in the work directory.') 37 | parser.add_argument( 38 | '--cfg-options', 39 | nargs='+', 40 | action=DictAction, 41 | help='override some settings in the used config, the key-value pair ' 42 | 'in xxx=yyy format will be merged into config file. If the value to ' 43 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 44 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 45 | 'Note that the quotation marks are necessary and that no white space ' 46 | 'is allowed.') 47 | parser.add_argument( 48 | '--launcher', 49 | choices=['none', 'pytorch', 'slurm', 'mpi'], 50 | default='none', 51 | help='job launcher') 52 | # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` 53 | # will pass the `--local-rank` parameter to `tools/train.py` instead 54 | # of `--local_rank`. 55 | parser.add_argument('--local_rank', '--local-rank', type=int, default=0) 56 | args = parser.parse_args() 57 | if 'LOCAL_RANK' not in os.environ: 58 | os.environ['LOCAL_RANK'] = str(args.local_rank) 59 | 60 | return args 61 | 62 | 63 | def main(): 64 | args = parse_args() 65 | 66 | # Reduce the number of repeated compilations and improve 67 | # training speed. 68 | setup_cache_size_limit_of_dynamo() 69 | 70 | # load config 71 | cfg = Config.fromfile(args.config) 72 | cfg.launcher = args.launcher 73 | if args.cfg_options is not None: 74 | cfg.merge_from_dict(args.cfg_options) 75 | 76 | # -------------------------------------------------------- 77 | # dynamic import customs modules 78 | # import modules from import_dir as a/b/c/ dir, registry will be updated 79 | if hasattr(cfg, 'import_dir'): 80 | import importlib 81 | 82 | import_dir = cfg.import_dir 83 | module_path = import_dir.replace('/', '.') 84 | import_lib = importlib.import_module(module_path) 85 | 86 | # dynamic import for ddp of UnitModule if key with_unit_module is True 87 | register_unit_distributed(cfg) 88 | # -------------------------------------------------------- 89 | 90 | # work_dir is determined in this priority: CLI > segment in file > filename 91 | if args.work_dir is not None: 92 | # update configs according to CLI args if args.work_dir is not None 93 | cfg.work_dir = args.work_dir 94 | elif cfg.get('work_dir', None) is None: 95 | # use config filename as default work_dir if cfg.work_dir is None 96 | cfg.work_dir = osp.join('./work_dirs', 97 | osp.splitext(osp.basename(args.config))[0]) 98 | 99 | # enable automatic-mixed-precision training 100 | if args.amp is True: 101 | optim_wrapper = cfg.optim_wrapper.type 102 | if optim_wrapper == 'AmpOptimWrapper': 103 | print_log( 104 | 'AMP training is already enabled in your config.', 105 | logger='current', 106 | level=logging.WARNING) 107 | else: 108 | assert optim_wrapper == 'OptimWrapper', ( 109 | '`--amp` is only supported when the optimizer wrapper type is ' 110 | f'`OptimWrapper` but got {optim_wrapper}.') 111 | cfg.optim_wrapper.type = 'AmpOptimWrapper' 112 | cfg.optim_wrapper.loss_scale = 'dynamic' 113 | 114 | # enable automatically scaling LR 115 | if args.auto_scale_lr: 116 | if 'auto_scale_lr' in cfg and \ 117 | 'enable' in cfg.auto_scale_lr and \ 118 | 'base_batch_size' in cfg.auto_scale_lr: 119 | cfg.auto_scale_lr.enable = True 120 | else: 121 | raise RuntimeError('Can not find "auto_scale_lr" or ' 122 | '"auto_scale_lr.enable" or ' 123 | '"auto_scale_lr.base_batch_size" in your' 124 | ' configuration file.') 125 | 126 | # resume is determined in this priority: resume from > auto_resume 127 | if args.resume == 'auto': 128 | cfg.resume = True 129 | cfg.load_from = None 130 | elif args.resume is not None: 131 | cfg.resume = True 132 | cfg.load_from = args.resume 133 | 134 | # build the runner from config 135 | if 'runner_type' not in cfg: 136 | # build the default runner 137 | runner = Runner.from_cfg(cfg) 138 | else: 139 | # build customized runner from the registry 140 | # if 'runner_type' is set in the cfg 141 | runner = RUNNERS.build(cfg) 142 | 143 | # start training 144 | runner.train() 145 | 146 | 147 | if __name__ == '__main__': 148 | main() 149 | -------------------------------------------------------------------------------- /unitmodule/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * 2 | from .models import * 3 | -------------------------------------------------------------------------------- /unitmodule/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import * 2 | -------------------------------------------------------------------------------- /unitmodule/datasets/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .colorspace import UnderwaterColorRandomTransfer 2 | 3 | __all__ = ['UnderwaterColorRandomTransfer'] 4 | -------------------------------------------------------------------------------- /unitmodule/datasets/transforms/colorspace.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from mmcv.transforms import BaseTransform 4 | from mmcv.transforms.utils import cache_randomness 5 | from mmengine.registry import TRANSFORMS 6 | 7 | 8 | @TRANSFORMS.register_module() 9 | class UnderwaterColorRandomTransfer(BaseTransform): 10 | """Transfer underwater image color by converting HSV color space. 11 | 12 | HSV is (Hue, Saturation, Value). 13 | The uint8 image(255)(h, w, c) convert to HSV that 14 | H in [0, 180), 15 | S in [0, 255], 16 | V in [0, 255]. 17 | 18 | Required Keys: 19 | 20 | - img 21 | 22 | Modified Keys: 23 | 24 | - img 25 | 26 | Args: 27 | hue_prob (float): The probability for hue in range [0, 1]. Defaults to 0.5. 28 | saturation_prob (float): The probability for saturation in range [0, 1]. Defaults to 0.5. 29 | value_prob (float): The probability for value in range [0, 1]. Defaults to 0.5. 30 | hue_delta (int): delta of hue. Defaults to 5. 31 | saturation_delta (int): delta of saturation. Defaults to 30. 32 | value_delta (int): delta of value. Defaults to 30. 33 | 34 | Notes: 35 | The underwater_hue_interval got from the hue mean in underwater dataset, 36 | which get the hue mean by convert color from BGR to HSV. 37 | dataset | hue min | hue max 38 | ------------|----------------|------------- 39 | DUO | 18.7551 | 95.4836 40 | URPC2020 | 17.9668 | 99.6359 41 | URPC2021 | 17.9668 | 103.2373 42 | UIEB | 25.5417 | 116.3379 43 | ------------|----------------|------------- 44 | hue interval 18 116 45 | """ 46 | underwater_hue_interval = (18, 116) 47 | 48 | def __init__(self, 49 | hue_prob: float = 0.5, 50 | saturation_prob: float = 0.5, 51 | value_prob: float = 0.5, 52 | hue_delta: int = 5, 53 | saturation_delta: int = 30, 54 | value_delta: int = 30) -> None: 55 | assert 0 <= hue_prob <= 1.0 56 | assert 0 <= saturation_prob <= 1.0 57 | assert 0 <= value_prob <= 1.0 58 | 59 | self.hue_prob = hue_prob 60 | self.saturation_prob = saturation_prob 61 | self.value_prob = value_prob 62 | self.hue_delta = hue_delta 63 | self.saturation_delta = saturation_delta 64 | self.value_delta = value_delta 65 | 66 | self._hue_min, self._hue_max = self.underwater_hue_interval 67 | self._hue_middle = (self._hue_min + self._hue_max) / 2 68 | 69 | @cache_randomness 70 | def _random_hue(self): 71 | return np.random.rand() < self.hue_prob 72 | 73 | @cache_randomness 74 | def _random_saturation(self): 75 | return np.random.rand() < self.saturation_prob 76 | 77 | @cache_randomness 78 | def _random_value(self): 79 | return np.random.rand() < self.value_prob 80 | 81 | @staticmethod 82 | def _random_mult(): 83 | return np.random.uniform(-1, 1) 84 | 85 | @cache_randomness 86 | def _get_hue_gain(self, img): 87 | """Get hue gain value and keep it in underwater hue interval.""" 88 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) 89 | hue_mean = np.mean(img_hsv[..., 0]) 90 | hue_gain = self._random_mult() * self.hue_delta 91 | 92 | # img_hue is in the underwater hue interval 93 | if self._hue_min < hue_mean < self._hue_max: 94 | hue = np.clip(hue_mean + hue_gain, self._hue_min, self._hue_max) 95 | hue_gain = hue - hue_mean 96 | 97 | # img_hue is out of the underwater hue interval 98 | else: 99 | hue_gain = np.abs(hue_gain) 100 | if hue_mean >= self._hue_max: 101 | hue_gain = -hue_gain 102 | 103 | return np.array(hue_gain, dtype=np.int16) 104 | 105 | @cache_randomness 106 | def _get_saturation_gain(self): 107 | gain = self._random_mult() * self.saturation_delta 108 | return np.array(gain, dtype=np.int16) 109 | 110 | @cache_randomness 111 | def _get_value_gain(self): 112 | gain = self._random_mult() * self.value_delta 113 | return np.array(gain, dtype=np.int16) 114 | 115 | def transform(self, results: dict) -> dict: 116 | hue_able = self._random_hue() 117 | saturation_able = self._random_saturation() 118 | value_able = self._random_value() 119 | 120 | if not any((hue_able, saturation_able, value_able)): 121 | return results 122 | 123 | img = results['img'] 124 | img_dtype = img.dtype 125 | 126 | assert img_dtype == np.uint8 127 | # convert color uint8 from BGR to HSV 128 | img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16) 129 | 130 | if hue_able: 131 | hue_gain = self._get_hue_gain(img) 132 | img_hsv[..., 0] = (img_hsv[..., 0] + hue_gain) % 180 133 | 134 | if saturation_able: 135 | saturation_gain = self._get_saturation_gain() 136 | img_hsv[..., 1] = np.clip(img_hsv[..., 1] + saturation_gain, 0, 255) 137 | 138 | if value_able: 139 | value_gain = self._get_value_gain() 140 | img_hsv[..., 2] = np.clip(img_hsv[..., 2] + value_gain, 0, 255) 141 | 142 | # convert color from HSV to BGR 143 | img = cv2.cvtColor(img_hsv.astype(img_dtype), cv2.COLOR_HSV2BGR) 144 | 145 | results['img'] = img 146 | return results 147 | 148 | def __repr__(self): 149 | repr_str = self.__class__.__name__ 150 | repr_str += f'(underwater_hue_interval={self.underwater_hue_interval}, ' 151 | repr_str += f'hue_prob={self.hue_prob}, ' 152 | repr_str += f'saturation_prob={self.saturation_prob}, ' 153 | repr_str += f'value_prob={self.value_prob}, ' 154 | repr_str += f'hue_delta={self.hue_delta}, ' 155 | repr_str += f'saturation_delta={self.saturation_delta}, ' 156 | repr_str += f'value_delta={self.value_delta})' 157 | return repr_str 158 | -------------------------------------------------------------------------------- /unitmodule/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_preprocessors import * 2 | from .detectors import * 3 | from .losses import * 4 | -------------------------------------------------------------------------------- /unitmodule/models/data_preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_preprocessor import UnitDetDataPreprocessor 2 | from .unit_module import UnitModule 3 | 4 | __all__ = ['UnitDetDataPreprocessor', 'UnitModule'] 5 | -------------------------------------------------------------------------------- /unitmodule/models/data_preprocessors/data_preprocessor.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from functools import reduce 3 | from numbers import Number 4 | from typing import Sequence, List, Tuple, Optional, Union 5 | 6 | import numpy as np 7 | import torch.nn.functional as F 8 | from mmdet.models.data_preprocessors import DetDataPreprocessor 9 | from mmengine.model import BaseModule 10 | from mmengine.registry import MODELS 11 | from mmyolo.models.data_preprocessors import YOLOv5DetDataPreprocessor 12 | 13 | 14 | def sum_dict(a, b): 15 | temp = dict() 16 | for key in (a.keys() | b.keys()): 17 | temp[key] = sum([d.get(key, 0) for d in (a, b)]) 18 | return temp 19 | 20 | 21 | @MODELS.register_module() 22 | class UnitDetDataPreprocessor(DetDataPreprocessor, BaseModule): 23 | def __init__(self, 24 | unit_module: dict, 25 | pad_mode: str = 'reflect', 26 | mean: Sequence[Number] = None, 27 | std: Sequence[Number] = None, 28 | pad_size_divisor: int = 1, 29 | pad_value: Union[float, int] = 0, 30 | pad_mask: bool = False, 31 | mask_pad_value: int = 0, 32 | pad_seg: bool = False, 33 | seg_pad_value: int = 255, 34 | bgr_to_rgb: bool = False, 35 | rgb_to_bgr: bool = False, 36 | boxtype2tensor: bool = True, 37 | non_blocking: Optional[bool] = False, 38 | batch_augments: Optional[List[dict]] = None, 39 | init_cfg=None): 40 | super().__init__( 41 | mean=mean, 42 | std=std, 43 | pad_size_divisor=pad_size_divisor, 44 | pad_value=pad_value, 45 | pad_mask=pad_mask, 46 | mask_pad_value=mask_pad_value, 47 | pad_seg=pad_seg, 48 | seg_pad_value=seg_pad_value, 49 | bgr_to_rgb=bgr_to_rgb, 50 | rgb_to_bgr=rgb_to_bgr, 51 | boxtype2tensor=boxtype2tensor, 52 | non_blocking=non_blocking, 53 | batch_augments=batch_augments) 54 | 55 | # BaseModule __init__ 56 | self._is_init = False 57 | self.init_cfg = copy.deepcopy(init_cfg) 58 | 59 | assert pad_mode in ('reflect', 'circular'), \ 60 | f"Excepted ('reflect', 'circular'), but got {pad_mode}" 61 | self.pad_mode = pad_mode 62 | self.unit_module = MODELS.build(unit_module) 63 | 64 | def forward(self, 65 | data: dict, 66 | training: bool = False) -> Union[Tuple[dict, dict], dict]: 67 | data = self.cast_data(data) 68 | data['inputs'], losses = self.unit_module_forward(data['inputs'], training) 69 | 70 | data = super(UnitDetDataPreprocessor, self).forward(data, training) 71 | return (data, losses) if training else data 72 | 73 | def unit_module_forward(self, batch_inputs, training: bool = False) -> Tuple[list, dict]: 74 | outputs = [] 75 | losses = [] 76 | for batch_input in batch_inputs: 77 | # padding 78 | oh, ow = batch_input.shape[1:] 79 | pad_h = int(np.ceil(oh / self.pad_size_divisor)) * self.pad_size_divisor 80 | pad_w = int(np.ceil(ow / self.pad_size_divisor)) * self.pad_size_divisor 81 | p2d = (0, (pad_w - ow), 0, (pad_h - oh)) 82 | batch_input = batch_input.float() 83 | batch_input_pad = F.pad(batch_input, p2d, self.pad_mode) 84 | 85 | # UnitModule forward 86 | batch_input_pad = batch_input_pad.unsqueeze(0) / 255. 87 | if training: 88 | batch_output_pad, _losses = self.unit_module(batch_input_pad, training) 89 | losses.append(_losses) 90 | else: 91 | batch_output_pad = self.unit_module(batch_input_pad, training) 92 | batch_output_pad = batch_output_pad.squeeze(0) 93 | 94 | # remove padding 95 | batch_output = batch_output_pad[..., :oh, :ow] * 255. 96 | outputs.append(batch_output) 97 | 98 | if training: 99 | n = len(losses) 100 | losses = reduce(sum_dict, losses) 101 | for k, v in losses.items(): 102 | losses[k] = v / n 103 | 104 | return outputs, losses 105 | 106 | 107 | @MODELS.register_module() 108 | class UnitYOLOv5DetDataPreprocessor(YOLOv5DetDataPreprocessor, BaseModule): 109 | def __init__(self, 110 | unit_module: dict, 111 | pad_mode: str = 'reflect', 112 | mean: Sequence[Number] = None, 113 | std: Sequence[Number] = None, 114 | pad_size_divisor: int = 1, 115 | pad_value: Union[float, int] = 0, 116 | pad_mask: bool = False, 117 | mask_pad_value: int = 0, 118 | pad_seg: bool = False, 119 | seg_pad_value: int = 255, 120 | bgr_to_rgb: bool = False, 121 | rgb_to_bgr: bool = False, 122 | boxtype2tensor: bool = True, 123 | non_blocking: Optional[bool] = True, 124 | batch_augments: Optional[List[dict]] = None, 125 | init_cfg=None): 126 | super().__init__( 127 | mean=mean, 128 | std=std, 129 | pad_size_divisor=pad_size_divisor, 130 | pad_value=pad_value, 131 | pad_mask=pad_mask, 132 | mask_pad_value=mask_pad_value, 133 | pad_seg=pad_seg, 134 | seg_pad_value=seg_pad_value, 135 | bgr_to_rgb=bgr_to_rgb, 136 | rgb_to_bgr=rgb_to_bgr, 137 | boxtype2tensor=boxtype2tensor, 138 | non_blocking=non_blocking, 139 | batch_augments=batch_augments) 140 | 141 | # BaseModule __init__ 142 | self._is_init = False 143 | self.init_cfg = copy.deepcopy(init_cfg) 144 | 145 | assert pad_mode in ('reflect', 'circular'), \ 146 | f"Excepted ('reflect', 'circular'), but got {pad_mode}" 147 | self.pad_mode = pad_mode 148 | self.unit_module = MODELS.build(unit_module) 149 | 150 | def forward(self, 151 | data: dict, 152 | training: bool = False) -> Union[Tuple[dict, dict], dict]: 153 | data = self.cast_data(data) 154 | data['inputs'], losses = self.unit_module_forward(data['inputs'], training) 155 | 156 | data = super(UnitYOLOv5DetDataPreprocessor, self).forward(data, training) 157 | return (data, losses) if training else data 158 | 159 | def unit_module_forward(self, batch_inputs, training: bool = False) -> Tuple[list, dict]: 160 | losses = {} 161 | if training: 162 | batch_inputs = batch_inputs.float() 163 | batch_inputs = batch_inputs / 255. 164 | batch_inputs, losses = self.unit_module(batch_inputs, training) 165 | outputs = batch_inputs * 255. 166 | else: 167 | outputs = [] 168 | for batch_input in batch_inputs: 169 | # padding 170 | oh, ow = batch_input.shape[1:] 171 | pad_h = int(np.ceil(oh / self.pad_size_divisor)) * self.pad_size_divisor 172 | pad_w = int(np.ceil(ow / self.pad_size_divisor)) * self.pad_size_divisor 173 | p2d = (0, (pad_w - ow), 0, (pad_h - oh)) 174 | batch_input = batch_input.float() 175 | batch_input_pad = F.pad(batch_input, p2d, self.pad_mode) 176 | 177 | # UnitModule forward 178 | batch_input_pad = batch_input_pad.unsqueeze(0) / 255. 179 | batch_output_pad = self.unit_module(batch_input_pad, training) 180 | batch_output_pad = batch_output_pad.squeeze(0) 181 | 182 | # remove padding 183 | batch_output = batch_output_pad[..., :oh, :ow] * 255. 184 | outputs.append(batch_output) 185 | 186 | return outputs, losses 187 | -------------------------------------------------------------------------------- /unitmodule/models/data_preprocessors/unit_module.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple, Union 2 | 3 | import mmcv.cnn as cnn 4 | import torch 5 | import torch.nn as nn 6 | from mmcv.cnn import build_activation_layer, build_norm_layer 7 | from mmengine.model import BaseModule 8 | from mmengine.registry import MODELS 9 | from torch import Tensor 10 | 11 | 12 | class LargeKernelLayer(BaseModule): 13 | def __init__(self, 14 | channels: int, 15 | large_kernel: int, 16 | small_kernel: int, 17 | padding_mode: str = 'reflect', 18 | norm_cfg: Optional[dict] = None, 19 | act_cfg: Optional[dict] = None, 20 | init_cfg: Optional[dict] = None): 21 | super().__init__(init_cfg) 22 | if norm_cfg is None: 23 | norm_cfg = dict(type='GN', num_groups=8) 24 | if act_cfg is None: 25 | act_cfg = dict(type='ReLU') 26 | 27 | common_kwargs = dict(padding_mode=padding_mode, 28 | groups=channels, 29 | norm_cfg=norm_cfg, 30 | act_cfg=None) 31 | 32 | self.dw_large = cnn.ConvModule(channels, channels, large_kernel, 33 | padding=large_kernel // 2, **common_kwargs) 34 | self.dw_small = cnn.ConvModule(channels, channels, small_kernel, 35 | padding=small_kernel // 2, **common_kwargs) 36 | self.act = build_activation_layer(act_cfg) 37 | 38 | def forward(self, x) -> Tensor: 39 | x_large = self.dw_large(x) 40 | x_small = self.dw_small(x) 41 | return self.act(x_large + x_small) 42 | 43 | 44 | class LKBlock(BaseModule): 45 | def __init__(self, 46 | channels: int, 47 | large_kernel: int, 48 | small_kernel: int, 49 | dw_ratio: float = 1.0, 50 | padding_mode: str = 'reflect', 51 | norm_cfg: Optional[dict] = None, 52 | act_cfg: Optional[dict] = None, 53 | init_cfg: Optional[dict] = None): 54 | super().__init__(init_cfg) 55 | if norm_cfg is None: 56 | norm_cfg = dict(type='GN', num_groups=8) 57 | if act_cfg is None: 58 | act_cfg = dict(type='ReLU') 59 | dw_channels = int(channels * dw_ratio) 60 | 61 | self.pw1 = cnn.ConvModule(channels, dw_channels, 1, 1, 62 | norm_cfg=norm_cfg, act_cfg=act_cfg) 63 | self.dw = LargeKernelLayer(dw_channels, large_kernel, small_kernel, 64 | padding_mode=padding_mode, 65 | norm_cfg=norm_cfg, act_cfg=act_cfg) 66 | self.pw2 = cnn.ConvModule(dw_channels, channels, 1, 1, 67 | norm_cfg=norm_cfg, act_cfg=None) 68 | self.norm = build_norm_layer(norm_cfg, channels)[1] 69 | 70 | def forward(self, x) -> Tensor: 71 | y = self.pw1(x) 72 | y = self.dw(y) 73 | y = self.pw2(y) 74 | x = self.norm(x + y) 75 | return x 76 | 77 | 78 | @MODELS.register_module() 79 | class UnitBackbone(BaseModule): 80 | def __init__(self, 81 | stem_channels: Tuple[int], 82 | large_kernels: Tuple[int], 83 | small_kernels: Tuple[int], 84 | in_channels: int = 3, 85 | dw_ratio: float = 1.0, 86 | padding_mode: str = 'reflect', 87 | norm_cfg: Optional[dict] = None, 88 | act_cfg: Optional[dict] = None, 89 | init_cfg: Optional[dict] = None): 90 | super().__init__(init_cfg) 91 | assert len(large_kernels) == len(small_kernels) 92 | if norm_cfg is None: 93 | norm_cfg = dict(type='GN', num_groups=8) 94 | if act_cfg is None: 95 | act_cfg = dict(type='ReLU') 96 | inc = in_channels 97 | 98 | stem_layers = [] 99 | for outc in stem_channels: 100 | stem_layers.append( 101 | cnn.ConvModule(inc, outc, 3, 2, 102 | padding=1, padding_mode=padding_mode, 103 | norm_cfg=norm_cfg, act_cfg=act_cfg)) 104 | inc = outc 105 | self.stem = nn.Sequential(*stem_layers) 106 | 107 | layers = [] 108 | for large_k, small_k in zip(large_kernels, small_kernels): 109 | layers.append( 110 | LKBlock(inc, large_k, small_k, dw_ratio, 111 | padding_mode, norm_cfg, act_cfg)) 112 | self.layers = nn.Sequential(*layers) 113 | 114 | def forward(self, x) -> Tensor: 115 | x = self.stem(x) 116 | x = self.layers(x) 117 | return x 118 | 119 | 120 | @MODELS.register_module() 121 | class THead(BaseModule): 122 | def __init__(self, 123 | in_channels: int, 124 | hid_channels: int, 125 | out_channels: int = 3, 126 | padding_mode: str = 'reflect', 127 | norm_cfg: Optional[dict] = None, 128 | act_cfg: Optional[dict] = None, 129 | init_cfg: Optional[dict] = None): 130 | super().__init__(init_cfg) 131 | if norm_cfg is None: 132 | norm_cfg = dict(type='GN', num_groups=8) 133 | if act_cfg is None: 134 | act_cfg = dict(type='ReLU') 135 | 136 | self.up1 = nn.Upsample(scale_factor=2, mode='bilinear') 137 | self.up2 = nn.Upsample(scale_factor=2, mode='bilinear') 138 | self.conv1 = cnn.ConvModule(in_channels, hid_channels, 3, 1, 139 | padding=1, padding_mode=padding_mode, 140 | norm_cfg=norm_cfg, act_cfg=act_cfg) 141 | self.conv2 = cnn.ConvModule(hid_channels, out_channels, 3, 1, 142 | padding=1, padding_mode=padding_mode, 143 | norm_cfg=None, act_cfg=None) 144 | 145 | def forward(self, x) -> Tensor: 146 | x = self.conv1(self.up1(x)) 147 | x = self.conv2(self.up2(x)) 148 | x = torch.sigmoid(x) 149 | return x 150 | 151 | 152 | @MODELS.register_module() 153 | class AHead(BaseModule): 154 | def __init__(self, 155 | mean_dim: Union[int, Tuple[int]] = (-2, -1), 156 | init_cfg=None): 157 | super().__init__(init_cfg) 158 | self.mean_dim = mean_dim 159 | 160 | def forward(self, x) -> Tensor: 161 | return torch.mean(x, dim=self.mean_dim, keepdim=True) 162 | 163 | 164 | @MODELS.register_module() 165 | class UnitModule(BaseModule): 166 | def __init__(self, 167 | unit_backbone: dict, 168 | t_head: dict, 169 | a_head: dict, 170 | loss_t: dict, 171 | loss_acc: Optional[dict] = None, 172 | loss_cc: Optional[dict] = None, 173 | loss_sp: Optional[dict] = None, 174 | loss_tv: Optional[dict] = None, 175 | alpha: float = 0.9, 176 | t_min: float = 0.001, 177 | init_cfg=None): 178 | super().__init__(init_cfg) 179 | assert 0 < alpha < 1 180 | assert 0 <= t_min < 0.1 181 | 182 | self.alpha = alpha 183 | self.t_min = t_min 184 | 185 | self.unit_backbone = MODELS.build(unit_backbone) 186 | self.t_head = MODELS.build(t_head) 187 | self.a_head = MODELS.build(a_head) 188 | 189 | self.loss_t = MODELS.build(loss_t) 190 | self.loss_acc = MODELS.build(loss_acc) if loss_acc else None 191 | self.loss_cc = MODELS.build(loss_cc) if loss_cc else None 192 | self.loss_sp = MODELS.build(loss_sp) if loss_sp else None 193 | self.loss_tv = MODELS.build(loss_tv) if loss_tv else None 194 | 195 | def forward(self, x, training: bool = False) -> Union[Tensor, Tuple[Tensor, dict]]: 196 | if training: 197 | return self.loss(x) 198 | else: # training == False 199 | return self.predict(x) 200 | 201 | def _forward(self, x) -> Tuple[Tensor, Tensor]: 202 | feature = self.unit_backbone(x) 203 | t = self.t_head(feature) 204 | a = self.a_head(x) 205 | return t, a 206 | 207 | def predict(self, x, show: bool = False) -> Union[Tensor, tuple]: 208 | t, a = self._forward(x) 209 | t = torch.clamp(t, min=self.t_min) 210 | 211 | x = self.denoise(x, t, a) 212 | x = torch.clamp(x, 0, 1) 213 | return (x, t, a) if show else x 214 | 215 | def loss(self, x) -> Tuple[Tensor, dict]: 216 | feature = self.unit_backbone(x) 217 | t = self.t_head(feature) 218 | a = self.a_head(x) 219 | 220 | t = torch.clamp(t, min=self.t_min) 221 | 222 | # get x of denoise 223 | x_denoise = self.denoise(x, t, a) 224 | 225 | # create fake x with noise and predict its t and A 226 | x_fake = self.noise(x, self.alpha, a) 227 | t_fake, a_fake = self._forward(x_fake) 228 | x_fake_denoise = self.denoise(x_fake, t_fake, a_fake) 229 | 230 | loss_t = self.loss_t(self.alpha * t, t_fake) 231 | losses = dict(loss_t=loss_t) 232 | if self.loss_acc: 233 | loss_acc = self.loss_acc(feature, a) 234 | losses.update(loss_acc=loss_acc) 235 | 236 | if self.loss_cc: 237 | loss_cc = self.loss_cc(x_denoise) 238 | losses.update(loss_cc=loss_cc) 239 | 240 | if self.loss_sp: 241 | loss_sp = self.loss_sp(x_denoise, x_fake_denoise) 242 | losses.update(loss_sp=loss_sp) 243 | 244 | if self.loss_tv: 245 | loss_tv = self.loss_tv(x_denoise) 246 | losses.update(loss_tv=loss_tv) 247 | 248 | x_denoise = torch.clamp(x_denoise, 0, 1) 249 | return x_denoise, losses 250 | 251 | @staticmethod 252 | def noise(x, t, a) -> Tensor: 253 | """Noise image""" 254 | return x * t + (1 - t) * a 255 | 256 | @staticmethod 257 | def denoise(x, t, a) -> Tensor: 258 | """Denoise image""" 259 | return (x - (1 - t) * a) / t 260 | -------------------------------------------------------------------------------- /unitmodule/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .unit_detectors import (UnitCascadeRCNN, UnitDETR, UnitDINO, 2 | UnitFasterRCNN, UnitFCOS, UnitRetinaNet, 3 | UnitTOOD, UnitYOLODetector) 4 | 5 | 6 | def register_unit_distributed(cfg): 7 | if cfg.get('with_unit_module'): 8 | # switch MMDistributedDataParallel to fit model with UnitModule 9 | import unitmodule.models.detectors.unit_distributed 10 | 11 | 12 | __all__ = [ 13 | 'UnitCascadeRCNN', 'UnitDETR', 'UnitDINO', 14 | 'UnitFasterRCNN', 'UnitFCOS', 'UnitRetinaNet', 15 | 'UnitTOOD', 'UnitYOLODetector', 16 | 'register_unit_distributed', 17 | ] 18 | -------------------------------------------------------------------------------- /unitmodule/models/detectors/unit_detectors.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | 3 | import torch 4 | from mmdet.models.detectors import (CascadeRCNN, DETR, DINO, 5 | FasterRCNN, FCOS, RetinaNet, TOOD) 6 | from mmengine.optim import OptimWrapper 7 | from mmengine.registry import MODELS 8 | from mmyolo.models.detectors import YOLODetector 9 | 10 | 11 | def train_step_with_unit_module(self, data: Union[dict, tuple, list], 12 | optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: 13 | """With the UnitModule loss""" 14 | with optim_wrapper.optim_context(self): 15 | data, unit_losses = self.data_preprocessor(data, True) 16 | losses = self._run_forward(data, mode='loss') 17 | losses.update(unit_losses) 18 | parsed_losses, log_vars = self.parse_losses(losses) 19 | optim_wrapper.update_params(parsed_losses) 20 | return log_vars 21 | 22 | 23 | def with_unit_module(cls): 24 | cls.train_step = train_step_with_unit_module 25 | return cls 26 | 27 | 28 | @MODELS.register_module() 29 | @with_unit_module 30 | class UnitCascadeRCNN(CascadeRCNN): 31 | """CascadeRCNN with UnitModule""" 32 | 33 | 34 | @MODELS.register_module() 35 | @with_unit_module 36 | class UnitDETR(DETR): 37 | """DETR with UnitModule""" 38 | 39 | 40 | @MODELS.register_module() 41 | @with_unit_module 42 | class UnitDINO(DINO): 43 | """DINO with UnitModule""" 44 | 45 | 46 | @MODELS.register_module() 47 | @with_unit_module 48 | class UnitFasterRCNN(FasterRCNN): 49 | """FasterRCNN with UnitModule""" 50 | 51 | 52 | @MODELS.register_module() 53 | @with_unit_module 54 | class UnitFCOS(FCOS): 55 | """FCOS with UnitModule""" 56 | 57 | 58 | @MODELS.register_module() 59 | @with_unit_module 60 | class UnitRetinaNet(RetinaNet): 61 | """RetinaNet with UnitModule""" 62 | 63 | 64 | @MODELS.register_module() 65 | @with_unit_module 66 | class UnitTOOD(TOOD): 67 | """TOOD with UnitModule""" 68 | 69 | 70 | @MODELS.register_module() 71 | @with_unit_module 72 | class UnitYOLODetector(YOLODetector): 73 | """YOLODetector with UnitModule""" 74 | -------------------------------------------------------------------------------- /unitmodule/models/detectors/unit_distributed.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Union 2 | 3 | import torch 4 | from mmengine.model.utils import detect_anomalous_params 5 | from mmengine.model.wrappers import MMDistributedDataParallel 6 | from mmengine.optim import OptimWrapper 7 | from mmengine.registry import MODEL_WRAPPERS 8 | 9 | 10 | def ddp_train_step_with_unit_module(self, data: Union[dict, tuple, list], 11 | optim_wrapper: OptimWrapper) -> Dict[str, torch.Tensor]: 12 | with optim_wrapper.optim_context(self): 13 | data, unit_losses = self.module.data_preprocessor(data, training=True) 14 | losses = self._run_forward(data, mode='loss') 15 | losses.update(unit_losses) 16 | parsed_loss, log_vars = self.module.parse_losses(losses) 17 | optim_wrapper.update_params(parsed_loss) 18 | if self.detect_anomalous_params: 19 | detect_anomalous_params(parsed_loss, model=self) 20 | return log_vars 21 | 22 | 23 | # switch MMDistributedDataParallel train_step and register it 24 | MMDistributedDataParallel.train_step = ddp_train_step_with_unit_module 25 | MODEL_WRAPPERS.register_module(module=MMDistributedDataParallel, force=True) 26 | -------------------------------------------------------------------------------- /unitmodule/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .assisting_color_cast_loss import AssistingColorCastLoss 2 | from .color_cast_loss import ColorCastLoss 3 | from .saturated_pixel_loss import SaturatedPixelLoss 4 | from .total_variation_loss import TotalVariationLoss 5 | from .transmission_loss import TransmissionLoss 6 | 7 | __all__ = [ 8 | 'AssistingColorCastLoss', 'ColorCastLoss', 'SaturatedPixelLoss', 9 | 'TotalVariationLoss', 'TransmissionLoss', 10 | ] 11 | -------------------------------------------------------------------------------- /unitmodule/models/losses/assisting_color_cast_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from mmengine.registry import MODELS 4 | from torch import Tensor 5 | from torchvision.ops import RoIPool 6 | 7 | 8 | @MODELS.register_module() 9 | class AssistingColorCastLoss(nn.Module): 10 | def __init__(self, channels: int, loss_weight: float = 1.0): 11 | super().__init__() 12 | self.loss_weight = loss_weight 13 | self.loss_fn = nn.MSELoss(reduction='mean') 14 | 15 | self.roi_pooling = RoIPool((7, 7), 1) 16 | self.down_conv = nn.Conv2d(channels, 3, 1, 1) 17 | self.acc_head = nn.Sequential( 18 | nn.Linear(49, 32), 19 | nn.Linear(32, 16), 20 | nn.Linear(16, 1)) 21 | 22 | def forward(self, feature: Tensor, a: Tensor) -> Tensor: 23 | device = feature.device 24 | b, _, h, w = feature.shape 25 | a = a.squeeze(-1).squeeze(-1) # (b, 3) 26 | boxes = [torch.tensor( 27 | [[0, 0, h - 1, w - 1]], 28 | dtype=torch.float32).to(device) for _ in range(b)] 29 | 30 | feature = self.roi_pooling(feature, boxes) 31 | feature = self.down_conv(feature).view(b, 3, -1) 32 | color_cast = self.acc_head(feature).squeeze(-1) # (b, 3) 33 | 34 | loss = self.loss_fn(color_cast, a) 35 | return self.loss_weight * loss 36 | -------------------------------------------------------------------------------- /unitmodule/models/losses/color_cast_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from mmengine.registry import MODELS 4 | from torch import Tensor 5 | 6 | 7 | @MODELS.register_module() 8 | class ColorCastLoss(nn.Module): 9 | def __init__(self, loss_weight: float = 1.0): 10 | super().__init__() 11 | self.loss_weight = loss_weight 12 | self.loss_fn = nn.MSELoss(reduction='mean') 13 | 14 | def forward(self, x: Tensor) -> Tensor: 15 | x = torch.mean(x, dim=(-2, -1)) 16 | # from color channel (0, 1, 2) corresponding to (1, 2, 0) 17 | loss = self.loss_fn(x, x[:, [1, 2, 0]]) 18 | return self.loss_weight * loss 19 | -------------------------------------------------------------------------------- /unitmodule/models/losses/saturated_pixel_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from mmengine.registry import MODELS 4 | from torch import Tensor 5 | 6 | 7 | @MODELS.register_module() 8 | class SaturatedPixelLoss(nn.Module): 9 | def __init__(self, loss_weight: float = 1.0): 10 | super().__init__() 11 | self.loss_weight = loss_weight 12 | 13 | def forward(self, a: Tensor, b: Tensor) -> Tensor: 14 | zero = a.new_zeros(1) 15 | one = a.new_ones(1) 16 | 17 | loss_max = (torch.max(a, one) + torch.max(b, one) - 2 * one).nanmean() 18 | loss_min = -(torch.min(a, zero) + torch.min(b, zero)).nanmean() 19 | loss = loss_max + loss_min 20 | return self.loss_weight * loss 21 | -------------------------------------------------------------------------------- /unitmodule/models/losses/total_variation_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmengine.registry import MODELS 3 | from torch import Tensor 4 | 5 | 6 | @MODELS.register_module() 7 | class TotalVariationLoss(nn.Module): 8 | def __init__(self, loss_weight: float = 1.0): 9 | super().__init__() 10 | self.loss_weight = loss_weight 11 | self.loss_fn = nn.MSELoss(reduction='mean') 12 | 13 | def forward(self, x: Tensor) -> Tensor: 14 | _, _, h, w, = x.shape 15 | h_tv = self.loss_fn(x[:, :, 1:, :], x[:, :, :h - 1, :]) 16 | w_tv = self.loss_fn(x[:, :, :, 1:], x[:, :, :, :w - 1]) 17 | loss = h_tv + w_tv 18 | return self.loss_weight * loss 19 | -------------------------------------------------------------------------------- /unitmodule/models/losses/transmission_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmengine.registry import MODELS 3 | from torch import Tensor 4 | 5 | 6 | @MODELS.register_module() 7 | class TransmissionLoss(nn.Module): 8 | def __init__(self, loss_weight: float = 1.0): 9 | super().__init__() 10 | self.loss_weight = loss_weight 11 | self.loss_fn = nn.MSELoss(reduction='mean') 12 | 13 | def forward(self, a: Tensor, b: Tensor) -> Tensor: 14 | loss = self.loss_fn(a, b) 15 | return self.loss_weight * loss 16 | --------------------------------------------------------------------------------