├── demo ├── image_det_demo.py └── image_enhance_demo.py ├── lqit ├── edit │ ├── engine │ │ └── __init__.py │ ├── datasets │ │ ├── transforms │ │ │ └── __init__.py │ │ └── __init__.py │ ├── evaluation │ │ ├── __init__.py │ │ └── metrics │ │ │ └── __init__.py │ ├── structures │ │ ├── __init__.py │ │ └── batch_pixel_data.py │ ├── models │ │ ├── layers │ │ │ └── __init__.py │ │ ├── data_preprocessor │ │ │ └── __init__.py │ │ ├── editors │ │ │ ├── __init__.py │ │ │ ├── tienet │ │ │ │ └── __init__.py │ │ │ ├── unet │ │ │ │ ├── __init__.py │ │ │ │ └── unet_generator.py │ │ │ └── zero_dce │ │ │ │ ├── __init__.py │ │ │ │ └── zero_dce_generator.py │ │ ├── base_models │ │ │ ├── __init__.py │ │ │ └── base_generator.py │ │ ├── editor_heads │ │ │ ├── __init__.py │ │ │ └── edge_head.py │ │ ├── __init__.py │ │ ├── losses │ │ │ └── __init__.py │ │ └── post_processor.py │ └── __init__.py ├── detection │ ├── evaluation │ │ ├── __init__.py │ │ └── tide │ │ │ ├── __init__.py │ │ │ └── errors │ │ │ ├── __init__.py │ │ │ ├── qualifiers.py │ │ │ └── error.py │ ├── datasets │ │ ├── transforms │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── duo.py │ │ ├── rtts.py │ │ ├── ruod.py │ │ └── urpc.py │ ├── engine │ │ ├── __init__.py │ │ └── hooks │ │ │ └── __init__.py │ ├── utils │ │ └── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── necks │ │ │ └── __init__.py │ │ └── detectors │ │ │ ├── __init__.py │ │ │ └── edffnet.py │ └── __init__.py ├── common │ ├── engine │ │ ├── __init__.py │ │ └── hooks │ │ │ └── __init__.py │ ├── models │ │ ├── __init__.py │ │ └── data_preprocessor │ │ │ ├── __init__.py │ │ │ ├── multi_input_multi_batch.py │ │ │ ├── batch_data_preprocessor.py │ │ │ └── multi_input_data_preprocessor.py │ ├── datasets │ │ ├── __init__.py │ │ └── transforms │ │ │ └── __init__.py │ ├── structures │ │ └── __init__.py │ ├── __init__.py │ └── utils │ │ └── __init__.py ├── utils │ ├── collect_env.py │ ├── typing_utils.py │ ├── __init__.py │ ├── process_debug.py │ └── logger.py ├── version.py └── __init__.py ├── requirements ├── det_optional.txt ├── optional.txt ├── detection.txt ├── albu.txt ├── readthedocs.txt ├── build.txt ├── runtime.txt ├── docs.txt └── tests.txt ├── resources └── lqit-logo.jpg ├── configs ├── detection │ ├── uod_air │ │ ├── uod-air_retinanet_r50_ufpn_1x_4xbs4_urpc-coco.py │ │ ├── uod-air_retinanet_r50_ufpn_1x_urpc-coco_lr002.py │ │ ├── base_ehance_head │ │ │ └── enhance_head.py │ │ ├── uod-air_faster-rcnn_r50_ufpn_1x_urpc-coco.py │ │ └── uod-air_retinanet_r50_ufpn_1x_urpc-coco.py │ ├── ruod_dataset │ │ ├── faster-rcnn_r101_fpn_2x_ruod.py │ │ ├── faster-rcnn_x101-32x4d_fpn_2x_ruod.py │ │ ├── faster-rcnn_x101-64x4d_fpn_2x_ruod.py │ │ ├── faster-rcnn_r101_fpn_1x_ruod.py │ │ ├── faster-rcnn_x101-32x4d_fpn_1x_ruod.py │ │ ├── faster-rcnn_x101-64x4d_fpn_1x_ruod.py │ │ ├── ssd512_120e_ruod.py │ │ └── atss_r50_fpn_1x_ruod.py │ ├── urpc2020_dataset │ │ ├── train-all_test-A │ │ │ ├── faster-rcnn_r101_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_r101_fpn_2x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py │ │ │ ├── ssd512_120e_urpc-coco.py │ │ │ ├── atss_r50_fpn_1x_urpc-coco.py │ │ │ ├── fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py │ │ │ └── tood_r50_fpn_1x_urpc-coco.py │ │ ├── train-all_test-B │ │ │ ├── faster-rcnn_r101_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_r101_fpn_2x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py │ │ │ ├── ssd512_120e_urpc-coco.py │ │ │ ├── atss_r50_fpn_1x_urpc-coco.py │ │ │ ├── fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py │ │ │ └── tood_r50_fpn_1x_urpc-coco.py │ │ └── train_validation │ │ │ ├── faster-rcnn_r101_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_r101_fpn_2x_urpc-coco.py │ │ │ ├── faster-rcnn_r50_fpn_1x_urpc-xml.py │ │ │ ├── faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py │ │ │ ├── faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py │ │ │ ├── ssd512_120e_urpc-coco.py │ │ │ ├── atss_r50_fpn_1x_urpc-coco.py │ │ │ ├── fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py │ │ │ └── tood_r50_fpn_1x_urpc-coco.py │ ├── edffnet │ │ ├── atss_r50_dffpn_1x_rtts-coco_lr002.py │ │ ├── edffnet_atss_r50_dffpn_1x_rtts-coco_lr002.py │ │ └── atss_r50_fpn_1x_rtts-coco_lr002.py │ ├── duo_dataset │ │ ├── faster-rcnn_r101_fpn_1x_duo-coco.py │ │ ├── faster-rcnn_r101_fpn_2x_duo-coco.py │ │ ├── faster-rcnn_x101-32x4d_fpn_1x_duo-coco.py │ │ ├── faster-rcnn_x101-32x4d_fpn_2x_duo-coco.py │ │ ├── faster-rcnn_x101-64x4d_fpn_1x_duo-coco.py │ │ ├── faster-rcnn_x101-64x4d_fpn_2x_duo-coco.py │ │ ├── README.md │ │ ├── ssd512_120e_duo-coco.py │ │ └── atss_r50_fpn_1x_duo-coco.py │ ├── _base_ │ │ ├── schedules │ │ │ ├── schedule_1x.py │ │ │ ├── schedule_20e.py │ │ │ └── schedule_2x.py │ │ ├── default_runtime.py │ │ ├── datasets │ │ │ ├── rtts_coco.py │ │ │ ├── ruod_coco_detection.py │ │ │ ├── duo_coco_detection.py │ │ │ └── urpc2020 │ │ │ │ ├── urpc2020-validation_coco_detection.py │ │ │ │ ├── urpc2020-test-A_coco_detection.py │ │ │ │ ├── urpc2020-test-B_coco_detection.py │ │ │ │ └── urpc2020-validation_xml_detection.py │ │ └── models │ │ │ ├── ssd300.py │ │ │ └── retinanet_r50_fpn.py │ ├── tienet │ │ ├── tienet_atss_r50_fpn_1x_rtts-coco.py │ │ ├── tienet_atss_r50_fpn_1x_urpc-coco.py │ │ ├── tienet_tood_r50_fpn_1x_rtts-coco.py │ │ ├── tienet_tood_r50_fpn_1x_urpc-coco.py │ │ ├── tienet_retinanet_r50_fpn_1x_rtts-coco.py │ │ ├── tienet_retinanet_r50_fpn_1x_urpc-coco.py │ │ ├── tienet_faster-rcnn_r50_fpn_1x_rtts-coco.py │ │ ├── tienet_faster-rcnn_r50_fpn_1x_urpc-coco.py │ │ ├── base_editor │ │ │ └── tienet_enhance_model.py │ │ └── base_detector │ │ │ ├── atss_r50_fpn_1x_urpc-coco.py │ │ │ ├── tood_r50_fpn_1x_urpc-coco.py │ │ │ └── atss_r50_fpn_1x_rtts-coco.py │ └── rtts_dataset │ │ └── atss_r50_fpn_1x_rtts-coco.py ├── edit │ ├── unet_demo │ │ ├── temp.py │ │ └── temp_zero_dce.py │ └── _base_ │ │ ├── schedules │ │ └── schedule_1x.py │ │ ├── default_runtime.py │ │ ├── models │ │ └── unet.py │ │ └── datasets │ │ ├── underwater_enhancement.py │ │ └── underwater_enhancement_with_ann.py └── lark │ └── README.md ├── requirements.txt ├── .dev_scripts └── train_benchmark.sh ├── tools ├── dist_train.sh ├── dist_test.sh ├── slurm_test.sh ├── slurm_train.sh └── analysis_tools │ └── analyze_tide.py ├── .circleci ├── docker │ └── Dockerfile ├── test.yml └── config.yml ├── data ├── README_zh-CN.md └── README.md ├── docs ├── en │ ├── notes │ │ └── changelog.md │ └── prepare_data │ │ ├── ruod.md │ │ └── rtts.md └── zh_cn │ └── prepare_data │ ├── ruod.md │ └── rtts.md ├── LICENSES.md ├── .github └── workflows │ ├── deploy.yml │ └── lint.yml ├── setup.cfg ├── .pre-commit-config.yaml ├── .pre-commit-config-zh-cn.yaml ├── README_zh-CN.md ├── tests └── test_utils │ └── test_setup_env.py ├── .gitignore └── README.md /demo/image_det_demo.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/image_enhance_demo.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lqit/edit/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lqit/detection/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lqit/edit/datasets/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lqit/detection/datasets/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements/det_optional.txt: -------------------------------------------------------------------------------- 1 | cityscapesscripts 2 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | imagecorruptions 2 | sklearn 3 | timm 4 | -------------------------------------------------------------------------------- /requirements/detection.txt: -------------------------------------------------------------------------------- 1 | mmdet>=3.0.0rc0,<3.1.0 2 | pycocotools 3 | -------------------------------------------------------------------------------- /lqit/common/engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /lqit/detection/engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /lqit/edit/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .metrics import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /requirements/albu.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 --no-binary qudida,albumentations 2 | -------------------------------------------------------------------------------- /lqit/common/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_preprocessor import * # noqa: F401,F403 2 | -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | mmengine 3 | mmeval 4 | torch 5 | torchvision 6 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | cython 3 | numpy 4 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | func_timeout 2 | matplotlib 3 | numpy 4 | scipy 5 | six 6 | terminaltables 7 | -------------------------------------------------------------------------------- /resources/lqit-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BIGWangYuDong/lqit/HEAD/resources/lqit-logo.jpg -------------------------------------------------------------------------------- /lqit/detection/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .merge_det_results import merge_preds 2 | 3 | __all__ = ['merge_preds'] 4 | -------------------------------------------------------------------------------- /lqit/edit/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .batch_pixel_data import BatchPixelData 2 | 3 | __all__ = ['BatchPixelData'] 4 | -------------------------------------------------------------------------------- /lqit/detection/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .detectors import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | -------------------------------------------------------------------------------- /lqit/edit/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic_image_dataset import BasicImageDataset 2 | 3 | __all__ = ['BasicImageDataset'] 4 | -------------------------------------------------------------------------------- /lqit/detection/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .dffpn import DFFPN 2 | from .ufpn import UFPN 3 | 4 | __all__ = ['DFFPN', 'UFPN'] 5 | -------------------------------------------------------------------------------- /lqit/edit/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .up_sample import UpConvBlock, Upsample 2 | 3 | __all__ = ['Upsample', 'UpConvBlock'] 4 | -------------------------------------------------------------------------------- /lqit/common/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset_wrappers import DatasetWithGTImageWrapper 2 | 3 | __all__ = ['DatasetWithGTImageWrapper'] 4 | -------------------------------------------------------------------------------- /lqit/edit/models/data_preprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | from .edit_data_preprocessor import EditDataPreprocessor 2 | 3 | __all__ = ['EditDataPreprocessor'] 4 | -------------------------------------------------------------------------------- /lqit/detection/engine/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .visualization_hook import EnhanceDetVisualizationHook 2 | 3 | __all__ = ['EnhanceDetVisualizationHook'] 4 | -------------------------------------------------------------------------------- /lqit/common/engine/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .lark_hook import LarkHook 2 | from .summarize_hook import SummarizeHook 3 | 4 | __all__ = ['LarkHook', 'SummarizeHook'] 5 | -------------------------------------------------------------------------------- /lqit/common/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_samples import DataSample, OptSampleList, SampleList 2 | 3 | __all__ = ['DataSample', 'SampleList', 'OptSampleList'] 4 | -------------------------------------------------------------------------------- /lqit/edit/models/editors/__init__.py: -------------------------------------------------------------------------------- 1 | from .tienet import * # noqa: F401,F403 2 | from .unet import * # noqa: F401,F403 3 | from .zero_dce import * # noqa: F401,F403 4 | -------------------------------------------------------------------------------- /lqit/edit/models/base_models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_edit_model import BaseEditModel 2 | from .base_generator import BaseGenerator 3 | 4 | __all__ = ['BaseEditModel', 'BaseGenerator'] 5 | -------------------------------------------------------------------------------- /lqit/edit/models/editors/tienet/__init__.py: -------------------------------------------------------------------------------- 1 | from .tienet import TIENetEnhanceModel 2 | from .tienet_generator import TIENetGenerator 3 | 4 | __all__ = ['TIENetEnhanceModel', 'TIENetGenerator'] 5 | -------------------------------------------------------------------------------- /configs/detection/uod_air/uod-air_retinanet_r50_ufpn_1x_4xbs4_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './uod-air_retinanet_r50_ufpn_1x_urpc-coco.py', 3 | ] 4 | train_dataloader = dict(batch_size=4, num_workers=4) 5 | -------------------------------------------------------------------------------- /lqit/common/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * # noqa: F401,F403 2 | from .engine import * # noqa: F401,F403 3 | from .models import * # noqa: F401,F403 4 | from .structures import * # noqa: F401,F403 5 | -------------------------------------------------------------------------------- /lqit/edit/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import * # noqa: F401,F403 2 | from .evaluation import * # noqa: F401,F403 3 | from .models import * # noqa: F401,F403 4 | from .structures import * # noqa: F401,F403 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -r requirements/det_optional.txt 6 | -r requirements/detection.txt 7 | -------------------------------------------------------------------------------- /lqit/edit/models/editors/unet/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_unet import BaseUNet 2 | from .pix2pix_unet import Pix2PixUnet 3 | from .unet_generator import UNetGenerator 4 | 5 | __all__ = ['BaseUNet', 'Pix2PixUnet', 'UNetGenerator'] 6 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | docutils==0.16.0 2 | myst-parser 3 | -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme 4 | sphinx==4.0.2 5 | sphinx-copybutton 6 | sphinx_markdown_tables 7 | sphinx_rtd_theme==0.5.2 8 | -------------------------------------------------------------------------------- /lqit/edit/models/editors/zero_dce/__init__.py: -------------------------------------------------------------------------------- 1 | # This work is licensed under Attribution-NonCommercial 4.0 International License. # noqa 2 | from .zero_dce import ZeroDCE 3 | from .zero_dce_generator import ZeroDCEGenerator 4 | 5 | __all__ = ['ZeroDCEGenerator', 'ZeroDCE'] 6 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/faster-rcnn_r101_fpn_2x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_ruod.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_r101_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_r101_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r101_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | -------------------------------------------------------------------------------- /lqit/edit/models/editor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .basic_enhance_head import (BasicEnhanceHead, SingleEnhanceHead, 2 | UpSingleEnhanceHead) 3 | from .edge_head import EdgeHead 4 | 5 | __all__ = [ 6 | 'SingleEnhanceHead', 'UpSingleEnhanceHead', 'BasicEnhanceHead', 'EdgeHead' 7 | ] 8 | -------------------------------------------------------------------------------- /lqit/edit/evaluation/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .mae import MeanAbsoluteError 2 | from .mse import MeanSquaredError 3 | from .psnr import PeakSignalNoiseRatio 4 | from .ssim import StructuralSimilarity 5 | 6 | __all__ = [ 7 | 'MeanAbsoluteError', 'MeanSquaredError', 'PeakSignalNoiseRatio', 8 | 'StructuralSimilarity' 9 | ] 10 | -------------------------------------------------------------------------------- /configs/detection/uod_air/uod-air_retinanet_r50_ufpn_1x_urpc-coco_lr002.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './uod-air_retinanet_r50_ufpn_1x_urpc-coco.py', 3 | ] 4 | # optimizer 5 | optim_wrapper = dict( 6 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001), 7 | clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad 8 | -------------------------------------------------------------------------------- /lqit/common/datasets/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .formatting import PackInputs 2 | from .get_edge import GetEdgeGTFromImage 3 | from .loading import LoadGTImageFromFile, SetInputImageAsGT 4 | from .wrapper import TransBroadcaster 5 | 6 | __all__ = [ 7 | 'PackInputs', 'LoadGTImageFromFile', 'TransBroadcaster', 8 | 'SetInputImageAsGT', 'GetEdgeGTFromImage' 9 | ] 10 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_r50_fpn_1x_urpc-xml.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/faster-rcnn_r50_fpn.py', 3 | '../../_base_/datasets/urpc2020/urpc2020-validation_xml_detection.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict(roi_head=dict(bbox_head=dict(num_classes=4))) 8 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | cityscapesscripts 3 | codecov 4 | flake8 5 | imagecorruptions 6 | instaboostfast 7 | interrogate 8 | isort==4.3.21 9 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 10 | kwarray 11 | memory_profiler 12 | parameterized 13 | protobuf<=3.20.1 14 | psutil 15 | pytest 16 | ubelt 17 | xdoctest>=0.10.0 18 | yapf 19 | -------------------------------------------------------------------------------- /configs/detection/edffnet/atss_r50_dffpn_1x_rtts-coco_lr002.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./atss_r50_fpn_1x_rtts-coco_lr002.py'] 2 | 3 | # model settings 4 | model = dict( 5 | neck=dict( 6 | type='lqit.DFFPN', 7 | in_channels=[256, 512, 1024, 2048], 8 | out_channels=256, 9 | start_level=1, 10 | add_extra_convs='on_input', 11 | shape_level=2, 12 | num_outs=5)) 13 | -------------------------------------------------------------------------------- /lqit/detection/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .detector_with_enhance_head import DetectorWithEnhanceHead 2 | from .detector_with_enhance_model import DetectorWithEnhanceModel 3 | from .edffnet import EDFFNet 4 | from .multi_input_wrapper import MultiInputDetectorWrapper 5 | 6 | __all__ = [ 7 | 'MultiInputDetectorWrapper', 'EDFFNet', 'DetectorWithEnhanceModel', 8 | 'DetectorWithEnhanceHead' 9 | ] 10 | -------------------------------------------------------------------------------- /lqit/edit/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_models import * # noqa: F401,F403 2 | from .data_preprocessor import * # noqa: F401,F403 3 | from .editor_heads import * # noqa: F401,F403 4 | from .editors import * # noqa: F401,F403 5 | from .layers import * # noqa: F401,F403 6 | from .losses import * # noqa: F401,F403 7 | from .post_processor import add_pixel_pred_to_datasample 8 | 9 | __all__ = ['add_pixel_pred_to_datasample'] 10 | -------------------------------------------------------------------------------- /lqit/detection/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import * # noqa: F401,F403 2 | from .duo import DUODataset 3 | from .rtts import RTTSCocoDataset 4 | from .ruod import RUODDataset 5 | from .urpc import URPCCocoDataset, URPCXMLDataset 6 | from .xml_dataset import XMLDatasetWithMetaFile 7 | 8 | __all__ = [ 9 | 'XMLDatasetWithMetaFile', 'URPCCocoDataset', 'URPCXMLDataset', 10 | 'RTTSCocoDataset', 'RUODDataset', 'DUODataset' 11 | ] 12 | -------------------------------------------------------------------------------- /lqit/common/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .lark_manager import (MonitorManager, MonitorTracker, 2 | context_monitor_manager, get_error_message, 3 | get_user_name, initialize_monitor_manager, 4 | send_alert_message) 5 | 6 | __all__ = [ 7 | 'send_alert_message', 'get_user_name', 'initialize_monitor_manager', 8 | 'context_monitor_manager', 'MonitorTracker', 'MonitorManager', 9 | 'get_error_message' 10 | ] 11 | -------------------------------------------------------------------------------- /.dev_scripts/train_benchmark.sh: -------------------------------------------------------------------------------- 1 | PARTITION=$1 2 | WORK_DIR=$2 3 | CPUS_PER_TASK=${3:-4} 4 | 5 | echo 'configs/detection/detector_with_enhance_head/faster-rcnn_r50_fpn_basic-enhance_1x_coco.py' & 6 | GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=$CPUS_PRE_TASK ./tools/slurm_train.sh $PARTITION example-1 configs/detection/detector_with_enhance_head/faster-rcnn_r50_fpn_basic-enhance_1x_coco.py $WORK_DIR/detector_with_enhance_head/faster_r50_1x_basic_enhance_head --cfg-options default_hooks.checkpoint.max_keep_ckpts=1 & 7 | -------------------------------------------------------------------------------- /configs/detection/uod_air/base_ehance_head/enhance_head.py: -------------------------------------------------------------------------------- 1 | enhance_head = dict( 2 | _scope_='lqit', 3 | type='BasicEnhanceHead', 4 | in_channels=256, 5 | feat_channels=256, 6 | num_convs=5, 7 | loss_enhance=dict(type='L1Loss', loss_weight=0.5), 8 | gt_preprocessor=dict( 9 | type='GTPixelPreprocessor', 10 | mean=[123.675, 116.28, 103.53], 11 | std=[58.395, 57.12, 57.375], 12 | bgr_to_rgb=True, 13 | pad_size_divisor=32, 14 | element_name='img')) 15 | -------------------------------------------------------------------------------- /lqit/detection/datasets/duo.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets import CocoDataset 2 | 3 | from lqit.registry import DATASETS 4 | 5 | DUO_METAINFO = { 6 | 'classes': ('holothurian', 'echinus', 'scallop', 'starfish'), 7 | 'palette': [(235, 211, 70), (106, 90, 205), (160, 32, 240), (176, 23, 31)] 8 | } 9 | 10 | 11 | @DATASETS.register_module() 12 | class DUODataset(CocoDataset): 13 | """Detecting Underwater Objects dataset `DUO. 14 | 15 | `_ 16 | """ 17 | METAINFO = DUO_METAINFO 18 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | NNODES=${NNODES:-1} 6 | NODE_RANK=${NODE_RANK:-0} 7 | PORT=${PORT:-29500} 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 9 | 10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | python -m torch.distributed.launch \ 12 | --nnodes=$NNODES \ 13 | --node_rank=$NODE_RANK \ 14 | --master_addr=$MASTER_ADDR \ 15 | --nproc_per_node=$GPUS \ 16 | --master_port=$PORT \ 17 | $(dirname "$0")/train.py \ 18 | $CONFIG \ 19 | --launcher pytorch ${@:3} 20 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/faster-rcnn_x101-32x4d_fpn_2x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_ruod.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/faster-rcnn_x101-64x4d_fpn_2x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_ruod.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /configs/edit/unet_demo/temp.py: -------------------------------------------------------------------------------- 1 | # TODO: delete after fully support editor metric and datasets. 2 | _base_ = [ 3 | '../_base_/models/unet.py', 4 | # '../_base_/datasets/underwater_enhancement.py', 5 | '../_base_/datasets/underwater_enhancement_with_ann.py', 6 | '../_base_/schedules/schedule_1x.py', 7 | '../_base_/default_runtime.py' 8 | ] 9 | 10 | train_dataloader = dict(num_workers=0, persistent_workers=False) 11 | val_dataloader = dict(batch_size=1, num_workers=0, persistent_workers=False) 12 | 13 | test_dataloader = val_dataloader 14 | -------------------------------------------------------------------------------- /lqit/detection/datasets/rtts.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets import CocoDataset 2 | 3 | from lqit.registry import DATASETS 4 | 5 | RTTS_METAINFO = { 6 | 'classes': ('bicycle', 'bus', 'car', 'motorbike', 'person'), 7 | 'palette': [(255, 97, 0), (0, 201, 87), (176, 23, 31), (138, 43, 226), 8 | (30, 144, 255)] 9 | } 10 | 11 | 12 | @DATASETS.register_module() 13 | class RTTSCocoDataset(CocoDataset): 14 | """Foggy object detection dataset in RESIDE `RTSS. 15 | 16 | `_ 17 | """ 18 | METAINFO = RTTS_METAINFO 19 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-32x4d_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/faster-rcnn_x101-64x4d_fpn_2x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_urpc-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | -------------------------------------------------------------------------------- /lqit/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmengine.utils import get_git_hash 3 | from mmengine.utils.dl_utils import collect_env as collect_base_env 4 | 5 | import lqit 6 | 7 | 8 | def collect_env() -> dict: 9 | """Collect the information of the running environments.""" 10 | env_info = collect_base_env() 11 | env_info['lqit'] = lqit.__version__ + '+' + get_git_hash()[:7] 12 | return env_info 13 | 14 | 15 | if __name__ == '__main__': 16 | for name, val in collect_env().items(): 17 | print(f'{name}: {val}') 18 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | NNODES=${NNODES:-1} 7 | NODE_RANK=${NODE_RANK:-0} 8 | PORT=${PORT:-29500} 9 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch \ 13 | --nnodes=$NNODES \ 14 | --node_rank=$NODE_RANK \ 15 | --master_addr=$MASTER_ADDR \ 16 | --nproc_per_node=$GPUS \ 17 | --master_port=$PORT \ 18 | $(dirname "$0")/test.py \ 19 | $CONFIG \ 20 | $CHECKPOINT \ 21 | --launcher pytorch \ 22 | ${@:4} 23 | -------------------------------------------------------------------------------- /.circleci/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.8.1" 2 | ARG CUDA="10.2" 3 | ARG CUDNN="7" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | # To fix GPG key error when running apt-get update 8 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 9 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub 10 | 11 | RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx 12 | -------------------------------------------------------------------------------- /data/README_zh-CN.md: -------------------------------------------------------------------------------- 1 | # 准备 LQIT 支持的数据集 2 | 3 | 建议将数据集目录软链接到 `$LQIT/data`。 4 | 如果文件夹结构不同,可能需要更改配置文件中的相应路径。 5 | 6 | ## 目标检测数据集 7 | 8 | LQIT 支持的数据集: 9 | 10 | - [MMDetection](https://github.com/open-mmlab/mmdetection/blob/3.x/docs/en/user_guides/dataset_prepare.md) 中支持的所有数据集。 11 | - [URPC2020 数据集](../docs/zh_cn/prepare_data/urpc_2020.md) \[ [主页](https://www.heywhale.com/home/competition/5e535a612537a0002ca864ac/content/0) \] 12 | - [RTTS 数据集](../docs/zh_cn/prepare_data/rtts.md) \[ [主页](https://sites.google.com/site/boyilics/website-builder/reside?pli=1) \] 13 | 14 | ## 增强数据集 15 | 16 | LQIT 支持的数据集: 17 | -------------------------------------------------------------------------------- /lqit/common/models/data_preprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | from .batch_data_preprocessor import BatchDataPreprocessor 2 | from .batch_process import stack_batch 3 | from .gt_pixel_preprocessor import (GTPixelPreprocessor, 4 | MultiGTPixelPreprocessor) 5 | from .multi_input_data_preprocessor import MultiInputDataPreprocessor 6 | from .multi_input_multi_batch import MIMBDataPreprocessor 7 | 8 | __all__ = [ 9 | 'MultiInputDataPreprocessor', 10 | 'BatchDataPreprocessor', 11 | 'MIMBDataPreprocessor', 12 | 'GTPixelPreprocessor', 13 | 'MultiGTPixelPreprocessor', 14 | 'stack_batch', 15 | ] 16 | -------------------------------------------------------------------------------- /docs/en/notes/changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ### v0.0.1rc2 (28/10/2023) 4 | 5 | - Support [FeiShu (Lark) robot](configs/lark/README.md) 6 | - Support [TIENet](https://link.springer.com/article/10.1007/s11760-023-02695-9), [UOD-AIR](https://ieeexplore.ieee.org/abstract/document/9949063), and [RDFFNet](https://link.springer.com/article/10.1007/s11760-022-02410-0) 7 | - Release `RTTS` foggy object detection models 8 | 9 | ### v0.0.1rc1 (12/5/2023) 10 | 11 | - Release `LQIT` v0.0.1rc1 and make `LQIT` open source 12 | - Support detection related code 13 | - Release `URPC2020` and `RUOD` underwater object detection pretrained models 14 | -------------------------------------------------------------------------------- /lqit/detection/evaluation/tide/__init__.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/dbolya/tide 2 | # This work is licensed under MIT license. 3 | 4 | # Note: 5 | # 1. We removed several unused codes, improved TIDE's output images, 6 | # and optimized the code format. 7 | # 2. Right now, only COCO dataset is available, others 8 | # (LVIS, PASCAL VOC, and CityScapes) are not fully tested. 9 | 10 | from .datasets import COCO, LVIS, Cityscapes, COCOResult, LVISResult, Pascal 11 | from .errors import * # noqa: F401,F403 12 | from .quantify import TIDE 13 | 14 | __all__ = [ 15 | 'TIDE', 'COCO', 'COCOResult', 'LVIS', 'LVISResult', 'Pascal', 'Cityscapes' 16 | ] 17 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /lqit/detection/evaluation/tide/errors/__init__.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/dbolya/tide 2 | # This work is licensed under MIT license. 3 | from .error import BestGTMatch, Error 4 | from .main_errors import (BackgroundError, BoxError, ClassError, 5 | DuplicateError, FalseNegativeError, 6 | FalsePositiveError, MissedError, OtherError) 7 | from .qualifiers import AREA, ASPECT_RATIO, Qualifier 8 | 9 | __all__ = [ 10 | 'Error', 'BestGTMatch', 'ClassError', 'BoxError', 'DuplicateError', 11 | 'BackgroundError', 'OtherError', 'MissedError', 'FalsePositiveError', 12 | 'FalseNegativeError', 'Qualifier', 'AREA', 'ASPECT_RATIO' 13 | ] 14 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /lqit/edit/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .perceptual_loss import (PerceptualLoss, PerceptualVGG, 2 | TransferalPerceptualLoss) 3 | from .pixelwise_loss import (CharbonnierLoss, ColorLoss, ExposureLoss, L1Loss, 4 | MaskedTVLoss, MSELoss, SpatialLoss) 5 | from .ssim_loss import SSIMLoss 6 | from .structure_fft_loss import StructureFFTLoss 7 | from .utils import mask_reduce_loss, reduce_loss 8 | 9 | __all__ = [ 10 | 'CharbonnierLoss', 'L1Loss', 'MaskedTVLoss', 'MSELoss', 'SpatialLoss', 11 | 'PerceptualLoss', 'PerceptualVGG', 'TransferalPerceptualLoss', 'SSIMLoss', 12 | 'ExposureLoss', 'ColorLoss', 'mask_reduce_loss', 'reduce_loss', 13 | 'StructureFFTLoss' 14 | ] 15 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/faster-rcnn_r101_fpn_1x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_ruod.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | 9 | # add WandbVisBackend 10 | # vis_backends = [ 11 | # dict(type='LocalVisBackend'), 12 | # dict(type='WandbVisBackend', 13 | # init_kwargs=dict( 14 | # project='RUOD_detection', 15 | # name='faster-rcnn_r101_fpn_1x_ruod', 16 | # entity='lqit', 17 | # ) 18 | # ) 19 | # ] 20 | # visualizer = dict( 21 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 22 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/faster-rcnn_r101_fpn_1x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_duo-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | 9 | # add WandbVisBackend 10 | # vis_backends = [ 11 | # dict(type='LocalVisBackend'), 12 | # dict(type='WandbVisBackend', 13 | # init_kwargs=dict( 14 | # project='DUO_detection', 15 | # name='faster-rcnn_r101_fpn_1x_duo', 16 | # entity='lqit', 17 | # ) 18 | # ) 19 | # ] 20 | # visualizer = dict( 21 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 22 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/faster-rcnn_r101_fpn_2x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_duo-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | depth=101, 6 | init_cfg=dict(type='Pretrained', 7 | checkpoint='torchvision://resnet101'))) 8 | 9 | # add WandbVisBackend 10 | # vis_backends = [ 11 | # dict(type='LocalVisBackend'), 12 | # dict(type='WandbVisBackend', 13 | # init_kwargs=dict( 14 | # project='DUO_detection', 15 | # name='faster-rcnn_r101_fpn_2x_duo', 16 | # entity='lqit', 17 | # ) 18 | # ) 19 | # ] 20 | # visualizer = dict( 21 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 22 | -------------------------------------------------------------------------------- /LICENSES.md: -------------------------------------------------------------------------------- 1 | # Licenses for special operations 2 | 3 | In this file, we list the features with other licenses instead of Apache 2.0. Users should be careful about adopting these features in any commercial matters. 4 | 5 | | Operation | Files | License | 6 | | :-------: | :--------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------: | 7 | | Zero-DCE | [lqit/edit/models/editors/zero_dce](lqit/edit/models/editors/zero_dce) | [Attribution-NonCommercial 4.0 International License](https://github.com/Li-Chongyi/Zero-DCE#license) | 8 | -------------------------------------------------------------------------------- /lqit/detection/datasets/ruod.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets import CocoDataset 2 | 3 | from lqit.registry import DATASETS 4 | 5 | RUOD_METAINFO = { 6 | 'classes': ('holothurian', 'echinus', 'scallop', 'starfish', 'fish', 7 | 'corals', 'diver', 'cuttlefish', 'turtle', 'jellyfish'), 8 | 'palette': [(235, 211, 70), (106, 90, 205), (160, 32, 240), (176, 23, 31), 9 | (142, 0, 0), (230, 0, 0), (106, 0, 228), (60, 100, 0), 10 | (80, 100, 0), (70, 0, 0)] 11 | } 12 | 13 | 14 | @DATASETS.register_module() 15 | class RUODDataset(CocoDataset): 16 | """Real-world Underwater Object Detection dataset `RUOD. 17 | 18 | `_ 19 | """ 20 | METAINFO = RUOD_METAINFO 21 | -------------------------------------------------------------------------------- /lqit/detection/datasets/urpc.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets import CocoDataset 2 | 3 | from lqit.registry import DATASETS 4 | from .xml_dataset import XMLDatasetWithMetaFile 5 | 6 | URPC_METAINFO = { 7 | 'classes': ('holothurian', 'echinus', 'scallop', 'starfish'), 8 | 'palette': [(235, 211, 70), (106, 90, 205), (160, 32, 240), (176, 23, 31)] 9 | } 10 | 11 | 12 | @DATASETS.register_module() 13 | class URPCCocoDataset(CocoDataset): 14 | """Underwater Robot Professional Contest dataset `URPC. 15 | 16 | `_ 17 | """ 18 | METAINFO = URPC_METAINFO 19 | 20 | 21 | @DATASETS.register_module() 22 | class URPCXMLDataset(XMLDatasetWithMetaFile): 23 | """""" 24 | METAINFO = URPC_METAINFO 25 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Data Preparation for LQIT 2 | 3 | It is recommended to symlink the dataset root to `$LQIT/data`. 4 | If your folder structure is different, you may need to change the corresponding paths in config files. 5 | 6 | ## Detection Datasets 7 | 8 | Datasets supported in LQIT: 9 | 10 | - Datasets supported in [MMDetection](https://github.com/open-mmlab/mmdetection/blob/3.x/docs/en/user_guides/dataset_prepare.md) 11 | - [URPC2020 Dataset](../docs/en/prepare_data/urpc_2020.md) \[ [Homepage](https://www.heywhale.com/home/competition/5e535a612537a0002ca864ac/content/0) \] 12 | - [RTTS Dataset](../docs/en/prepare_data/rtts.md) \[ [Homepage](https://sites.google.com/site/boyilics/website-builder/reside?pli=1) \] 13 | 14 | ## Enhancement Datasets 15 | 16 | Datasets supported in LQIT: 17 | -------------------------------------------------------------------------------- /configs/lark/README.md: -------------------------------------------------------------------------------- 1 | # Feishu robot 2 | 3 | ## Config 4 | 5 | Put the webhook path of your FeiShu (Lark) robot into `lark.py`, and make the following settings: 6 | 7 | ```python 8 | lark = 'https://open.feishu.cn/open-apis/bot/v2/hook/XXXX-XXXX-XXXX-XXXX' 9 | ``` 10 | 11 | **Note:** Pay attention to privacy! 12 | 13 | For more details about FeiShu robot, please refer to [here](https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot). 14 | 15 | ## Running command 16 | 17 | If you want to use FerShu robot during training and testing, add `-l` or `--lark` in the running command. 18 | 19 | Examples: 20 | 21 | ``` 22 | # training script 23 | python tools/train.py ${CONFIG_FILE} -l ${Other setting} 24 | 25 | # testing script 26 | python tools/test.py ${CONFIG_FILE} ${CHECKPOINT} -l ${Other setting} 27 | ``` 28 | -------------------------------------------------------------------------------- /lqit/utils/typing_utils.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/open-mmlab/mmdetection/tree/3.x/ 2 | """Collecting some commonly used type hint in lqit.""" 3 | from typing import List, Optional, Sequence, Tuple, Union 4 | 5 | from mmengine.config import ConfigDict 6 | from mmengine.structures import InstanceData, PixelData 7 | 8 | # TODO: Need to avoid circular import with assigner and sampler 9 | # Type hint of config data 10 | ConfigType = Union[ConfigDict, dict] 11 | OptConfigType = Optional[ConfigType] 12 | # Type hint of one or more config data 13 | MultiConfig = Union[ConfigType, List[ConfigType]] 14 | OptMultiConfig = Optional[MultiConfig] 15 | 16 | InstanceList = List[InstanceData] 17 | OptInstanceList = Optional[InstanceList] 18 | 19 | PixelList = List[PixelData] 20 | OptPixelList = Optional[PixelList] 21 | 22 | RangeType = Sequence[Tuple[int, int]] 23 | -------------------------------------------------------------------------------- /configs/detection/uod_air/uod-air_faster-rcnn_r50_ufpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './base_detector/faster-rcnn_r50_ufpn_1x_urpc-coco.py', 3 | './base_ehance_head/enhance_head.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceHead', 10 | detector={{_base_.model}}, 11 | enhance_head={{_base_.enhance_head}}, 12 | vis_enhance=False) 13 | 14 | # dataset settings 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 17 | dict(type='LoadAnnotations', with_bbox=True), 18 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 19 | dict(type='RandomFlip', prob=0.5), 20 | dict(type='lqit.SetInputImageAsGT'), 21 | dict(type='lqit.PackInputs') 22 | ] 23 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 24 | -------------------------------------------------------------------------------- /lqit/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.0.1rc2' 2 | short_version = __version__ 3 | 4 | 5 | def parse_version_info(version_str): 6 | """Parse a version string into a tuple. 7 | 8 | Args: 9 | version_str (str): The version string. 10 | Returns: 11 | tuple[int | str]: The version info, e.g., "1.3.0" is parsed into 12 | (1, 3, 0), and "2.0.0rc1" is parsed into (2, 0, 0, 'rc1'). 13 | """ 14 | version_info = [] 15 | for x in version_str.split('.'): 16 | if x.isdigit(): 17 | version_info.append(int(x)) 18 | elif x.find('rc') != -1: 19 | patch_version = x.split('rc') 20 | version_info.append(int(patch_version[0])) 21 | version_info.append(f'rc{patch_version[1]}') 22 | return tuple(version_info) 23 | 24 | 25 | version_info = parse_version_info(__version__) 26 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: deploy 2 | 3 | on: push 4 | 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | jobs: 10 | build-n-publish: 11 | runs-on: ubuntu-latest 12 | if: startsWith(github.event.ref, 'refs/tags') 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 3.7 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.7 19 | - name: Install torch 20 | run: pip install torch 21 | - name: Install wheel 22 | run: pip install wheel 23 | - name: Build LQIT 24 | run: python setup.py sdist bdist_wheel 25 | - name: Publish distribution to PyPI 26 | run: | 27 | pip install twine 28 | twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }} 29 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: [push, pull_request] 4 | 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 3.7 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.7 18 | - name: Install pre-commit hook 19 | run: | 20 | pip install pre-commit 21 | pre-commit install 22 | - name: Linting 23 | run: pre-commit run --all-files 24 | # TODO 25 | # - name: Check docstring coverage 26 | # run: | 27 | # pip install interrogate 28 | # interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 80 lqit 29 | -------------------------------------------------------------------------------- /lqit/detection/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from mmengine.utils import digit_version 4 | 5 | try: 6 | import mmdet 7 | HAS_MMDET = True 8 | except ImportWarning: 9 | HAS_MMDET = False 10 | 11 | mmdet_minimum_version = '3.0.0' 12 | mmdet_maximum_version = '3.2.0' 13 | if HAS_MMDET: 14 | mmdet_version = digit_version(mmdet.__version__) 15 | assert (mmdet_version >= digit_version(mmdet_minimum_version) 16 | and mmdet_version < digit_version(mmdet_maximum_version)), \ 17 | f'MMDetection=={mmdet.__version__} is used but incompatible. ' \ 18 | f'Please install mmdet>={mmdet_minimum_version}, ' \ 19 | f'<{mmdet_maximum_version}.' 20 | from .datasets import * # noqa: F401,F403 21 | from .engine import * # noqa: F401,F403 22 | from .models import * # noqa: F401,F403 23 | else: 24 | warnings.warn('Please install mmdet to import `lqit.detection`.') 25 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | extra_standard_library = setuptools 5 | known_first_party = lqit 6 | known_third_party = mmdetection,PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,parameterized,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml,mmengine 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | 15 | # ignore-words-list needs to be lowercase format. For example, if we want to 16 | # ignore word "BA", then we need to append "ba" to ignore-words-list rather 17 | # than "BA" 18 | [codespell] 19 | skip = *.ipynb 20 | quiet-level = 3 21 | ignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood,ba 22 | -------------------------------------------------------------------------------- /lqit/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import get_caller_name, log_img_scale, print_colored_log 2 | from .memory import AvoidCUDAOOM, AvoidOOM 3 | from .process_debug import process_debug_mode 4 | from .setup_env import (register_all_modules, setup_cache_size_limit_of_dynamo, 5 | setup_multi_processes) 6 | from .typing_utils import (ConfigType, InstanceList, MultiConfig, 7 | OptConfigType, OptInstanceList, OptMultiConfig, 8 | OptPixelList, PixelList, RangeType) 9 | 10 | __all__ = [ 11 | 'print_colored_log', 'register_all_modules', 'setup_multi_processes', 12 | 'ConfigType', 'InstanceList', 'MultiConfig', 'OptConfigType', 13 | 'OptInstanceList', 'OptMultiConfig', 'OptPixelList', 'PixelList', 14 | 'RangeType', 'get_caller_name', 'log_img_scale', 'AvoidCUDAOOM', 15 | 'AvoidOOM', 'setup_cache_size_limit_of_dynamo', 'process_debug_mode' 16 | ] 17 | -------------------------------------------------------------------------------- /configs/edit/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 1x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=12, 14 | by_epoch=True, 15 | milestones=[8, 11], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /configs/detection/_base_/schedules/schedule_1x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 1x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=12, 14 | by_epoch=True, 15 | milestones=[8, 11], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /configs/detection/_base_/schedules/schedule_20e.py: -------------------------------------------------------------------------------- 1 | # training schedule for 20e 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=20, 14 | by_epoch=True, 15 | milestones=[16, 19], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /configs/detection/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # training schedule for 2x 2 | train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=1) 3 | val_cfg = dict(type='ValLoop') 4 | test_cfg = dict(type='TestLoop') 5 | 6 | # learning rate 7 | param_scheduler = [ 8 | dict( 9 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500), 10 | dict( 11 | type='MultiStepLR', 12 | begin=0, 13 | end=24, 14 | by_epoch=True, 15 | milestones=[16, 22], 16 | gamma=0.1) 17 | ] 18 | 19 | # optimizer 20 | optim_wrapper = dict( 21 | type='OptimWrapper', 22 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 23 | 24 | # Default setting for scaling LR automatically 25 | # - `enable` means enable scaling LR automatically 26 | # or not by default. 27 | # - `base_batch_size` = (8 GPUs) x (2 samples per GPU). 28 | auto_scale_lr = dict(enable=False, base_batch_size=16) 29 | -------------------------------------------------------------------------------- /configs/detection/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'mmdet' 2 | 3 | default_hooks = dict( 4 | timer=dict(type='IterTimerHook'), 5 | logger=dict(type='LoggerHook', interval=50), 6 | param_scheduler=dict(type='ParamSchedulerHook'), 7 | checkpoint=dict(type='CheckpointHook', interval=1), 8 | sampler_seed=dict(type='DistSamplerSeedHook'), 9 | visualization=dict(type='DetVisualizationHook'), 10 | summarizer=dict(type='lqit.SummarizeHook')) 11 | 12 | randomness = dict(seed=None, deterministic=False) 13 | 14 | env_cfg = dict( 15 | cudnn_benchmark=False, 16 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 17 | dist_cfg=dict(backend='nccl'), 18 | ) 19 | 20 | vis_backends = [dict(type='LocalVisBackend')] 21 | visualizer = dict( 22 | type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 23 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) 24 | 25 | log_level = 'INFO' 26 | load_from = None 27 | resume = False 28 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/faster-rcnn_x101-32x4d_fpn_1x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_ruod.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | 17 | # add WandbVisBackend 18 | # vis_backends = [ 19 | # dict(type='LocalVisBackend'), 20 | # dict(type='WandbVisBackend', 21 | # init_kwargs=dict( 22 | # project='RUOD_detection', 23 | # name='faster-rcnn_x101-32x4d_fpn_1x_ruod', 24 | # entity='lqit', 25 | # ) 26 | # ) 27 | # ] 28 | # visualizer = dict( 29 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 30 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/faster-rcnn_x101-64x4d_fpn_1x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_ruod.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | 17 | # add WandbVisBackend 18 | # vis_backends = [ 19 | # dict(type='LocalVisBackend'), 20 | # dict(type='WandbVisBackend', 21 | # init_kwargs=dict( 22 | # project='RUOD_detection', 23 | # name='faster-rcnn_x101-64x4d_fpn_1x_ruod', 24 | # entity='lqit', 25 | # ) 26 | # ) 27 | # ] 28 | # visualizer = dict( 29 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 30 | -------------------------------------------------------------------------------- /configs/edit/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | default_scope = 'lqit' 2 | 3 | default_hooks = dict( 4 | timer=dict(type='IterTimerHook'), 5 | logger=dict(type='LoggerHook', interval=50), 6 | param_scheduler=dict(type='ParamSchedulerHook'), 7 | checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=1), 8 | sampler_seed=dict(type='DistSamplerSeedHook')) 9 | # visualization=dict(type='DetVisualizationHook') TODO: not implement yet 10 | 11 | # set seed = 0 as default 12 | randomness = dict(seed=0, deterministic=False) 13 | 14 | env_cfg = dict( 15 | cudnn_benchmark=False, 16 | mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), 17 | dist_cfg=dict(backend='nccl'), 18 | ) 19 | 20 | vis_backends = [dict(type='LocalVisBackend')] 21 | visualizer = dict( 22 | type='Visualizer', vis_backends=vis_backends, name='visualizer') 23 | log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) 24 | 25 | log_level = 'INFO' 26 | load_from = None 27 | resume = False 28 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_1x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_duo-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | 17 | # add WandbVisBackend 18 | # vis_backends = [ 19 | # dict(type='LocalVisBackend'), 20 | # dict(type='WandbVisBackend', 21 | # init_kwargs=dict( 22 | # project='DUO_detection', 23 | # name='faster-rcnn_x101-32x4d_fpn_1x_duo', 24 | # entity='lqit', 25 | # ) 26 | # ) 27 | # ] 28 | # visualizer = dict( 29 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 30 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/faster-rcnn_x101-32x4d_fpn_2x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_duo-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=32, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'))) 16 | 17 | # add WandbVisBackend 18 | # vis_backends = [ 19 | # dict(type='LocalVisBackend'), 20 | # dict(type='WandbVisBackend', 21 | # init_kwargs=dict( 22 | # project='DUO_detection', 23 | # name='faster-rcnn_x101-32x4d_fpn_1x_duo', 24 | # entity='lqit', 25 | # ) 26 | # ) 27 | # ] 28 | # visualizer = dict( 29 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 30 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_1x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_1x_duo-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | 17 | # add WandbVisBackend 18 | # vis_backends = [ 19 | # dict(type='LocalVisBackend'), 20 | # dict(type='WandbVisBackend', 21 | # init_kwargs=dict( 22 | # project='DUO_detection', 23 | # name='faster-rcnn_x101-64x4d_fpn_1x_duo', 24 | # entity='lqit', 25 | # ) 26 | # ) 27 | # ] 28 | # visualizer = dict( 29 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 30 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/faster-rcnn_x101-64x4d_fpn_2x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'faster-rcnn_r50_fpn_2x_duo-coco.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | type='ResNeXt', 6 | depth=101, 7 | groups=64, 8 | base_width=4, 9 | num_stages=4, 10 | out_indices=(0, 1, 2, 3), 11 | frozen_stages=1, 12 | norm_cfg=dict(type='BN', requires_grad=True), 13 | style='pytorch', 14 | init_cfg=dict( 15 | type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d'))) 16 | 17 | # add WandbVisBackend 18 | # vis_backends = [ 19 | # dict(type='LocalVisBackend'), 20 | # dict(type='WandbVisBackend', 21 | # init_kwargs=dict( 22 | # project='DUO_detection', 23 | # name='faster-rcnn_x101-64x4d_fpn_2x_duo', 24 | # entity='lqit', 25 | # ) 26 | # ) 27 | # ] 28 | # visualizer = dict( 29 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 30 | -------------------------------------------------------------------------------- /lqit/detection/models/detectors/edffnet.py: -------------------------------------------------------------------------------- 1 | from lqit.registry import MODELS 2 | from lqit.utils import ConfigType, OptMultiConfig 3 | from .detector_with_enhance_head import DetectorWithEnhanceHead 4 | 5 | 6 | @MODELS.register_module() 7 | class EDFFNet(DetectorWithEnhanceHead): 8 | """Implementation of EDFFNet. 9 | 10 | `_ 11 | """ 12 | 13 | def __init__(self, 14 | detector: ConfigType, 15 | edge_head: ConfigType, 16 | process_gt_preprocessor: bool = False, 17 | vis_enhance: bool = False, 18 | init_cfg: OptMultiConfig = None) -> None: 19 | assert not process_gt_preprocessor, \ 20 | 'process_gt_preprocessor is not supported in EDFFNet' 21 | super().__init__( 22 | detector=detector, 23 | enhance_head=edge_head, 24 | process_gt_preprocessor=process_gt_preprocessor, 25 | vis_enhance=vis_enhance, 26 | init_cfg=init_cfg) 27 | -------------------------------------------------------------------------------- /configs/edit/_base_/models/unet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='BaseEditModel', 3 | data_preprocessor=dict( 4 | type='EditDataPreprocessor', 5 | mean=[0.0, 0.0, 0.0], 6 | std=[255.0, 255.0, 255.0], 7 | bgr_to_rgb=True, 8 | pad_size_divisor=32, 9 | gt_name='img'), 10 | generator=dict( 11 | type='UNetGenerator', 12 | model=dict( 13 | type='BaseUNet', 14 | in_channels=3, 15 | out_channels=3, 16 | base_channels=64, 17 | num_stages=5, 18 | strides=(1, 1, 1, 1, 1), 19 | enc_num_convs=(2, 2, 2, 2, 2), 20 | dec_num_convs=(2, 2, 2, 2), 21 | downsamples=(True, True, True, True), 22 | enc_dilations=(1, 1, 1, 1, 1), 23 | dec_dilations=(1, 1, 1, 1), 24 | with_cp=False, 25 | conv_cfg=None, 26 | norm_cfg=dict(type='BN'), 27 | act_cfg=dict(type='ReLU'), 28 | upsample_cfg=dict(type='InterpConv')), 29 | pixel_loss=dict(type='L1Loss', loss_weight=1.0))) 30 | -------------------------------------------------------------------------------- /lqit/edit/models/editors/unet/unet_generator.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from lqit.edit.models.base_models import BaseGenerator 4 | from lqit.edit.structures import BatchPixelData 5 | from lqit.registry import MODELS 6 | from lqit.utils import ConfigType, OptConfigType, OptMultiConfig 7 | 8 | 9 | @MODELS.register_module() 10 | class UNetGenerator(BaseGenerator): 11 | 12 | def __init__(self, 13 | model: ConfigType, 14 | pixel_loss: ConfigType, 15 | perceptual_loss: OptConfigType = None, 16 | init_cfg: OptMultiConfig = None) -> None: 17 | super().__init__( 18 | model=model, 19 | pixel_loss=pixel_loss, 20 | perceptual_loss=perceptual_loss, 21 | init_cfg=init_cfg) 22 | 23 | def loss(self, loss_input: BatchPixelData, batch_img_metas: List[dict]): 24 | """Calculate the loss based on the outputs of generator.""" 25 | batch_outputs = loss_input.output 26 | batch_gt_pixel = loss_input.gt 27 | pixel_loss = self.pixel_loss(batch_outputs, batch_gt_pixel) 28 | 29 | return dict(pixel_loss=pixel_loss) 30 | -------------------------------------------------------------------------------- /.circleci/test.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | # the default pipeline parameters, which will be updated according to 4 | # the results of the path-filtering orb 5 | parameters: 6 | lint_only: 7 | type: boolean 8 | default: true 9 | 10 | jobs: 11 | lint: 12 | docker: 13 | - image: cimg/python:3.7.4 14 | steps: 15 | - checkout 16 | - run: 17 | name: Install pre-commit hook 18 | command: | 19 | pip install pre-commit 20 | pre-commit install 21 | - run: 22 | name: Linting 23 | command: pre-commit run --all-files 24 | # TODO 25 | # - run: 26 | # name: Check docstring coverage 27 | # command: | 28 | # pip install interrogate 29 | # interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 80 lqit 30 | 31 | workflows: 32 | # lint only 33 | pr_stage_lint: 34 | when: << pipeline.parameters.lint_only >> 35 | jobs: 36 | - lint: 37 | name: lint 38 | filters: 39 | branches: 40 | ignore: 41 | - main 42 | 43 | # TODO: Support Test Unit configs 44 | -------------------------------------------------------------------------------- /docs/zh_cn/prepare_data/ruod.md: -------------------------------------------------------------------------------- 1 | # URPC2020 2 | 3 | ```latex 4 | @article{fu2023rethinking, 5 | title={Rethinking general underwater object detection: Datasets, challenges, and solutions}, 6 | author={Fu, Chenping and Liu, Risheng and Fan, Xin and Chen, Puyang and Fu, Hao and Yuan, Wanqi and Zhu, Ming and Luo, Zhongxuan}, 7 | journal={Neurocomputing}, 8 | volume={517}, 9 | pages={243--256}, 10 | year={2023}, 11 | publisher={Elsevier} 12 | } 13 | ``` 14 | 15 | 该数据集包含 14,000 张水下图像(其中包含 9,800 张训练图像和 4,200 张测试图像),超过 74,000 个标注框,涵盖十类:鱼(fish)、海胆(echinus)、珊瑚(corals)、海星(starfish)、海参(holothurian)、扇贝(scallop)、潜水员(diver)、墨鱼(cuttlefish)、乌龟(turtle)和水母(jellyfish) 16 | 17 | ## 下载 RUOD 数据 18 | 19 | 真实水下目标检测(Real-world Underwater Object Detection, RUOD)数据集可从[此处](https://github.com/dlut-dimt/RUOD)下载。 20 | 21 | 数据存放结构默认如下: 22 | 23 | ```text 24 | lqit 25 | ├── lqit 26 | ├── tools 27 | ├── configs 28 | ├── data 29 | │ ├── RUOD 30 | │ │ ├── annotations 31 | │ │ │ ├── instances_train.json 32 | │ │ │ ├── instances_test.json 33 | │ │ ├── train 34 | │ │ │ ├── 000002.jpg 35 | │ │ │ ├── 000003.jpg 36 | │ │ │ ├── ... 37 | │ │ ├── test 38 | │ │ │ ├── 000001.jpg 39 | │ │ │ ├── 000004.jpg 40 | │ │ │ ├── ... 41 | ``` 42 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_atss_r50_fpn_1x_rtts-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/atss_r50_fpn_1x_rtts-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_atss_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/atss_r50_fpn_1x_urpc-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_tood_r50_fpn_1x_rtts-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/tood_r50_fpn_1x_rtts-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_tood_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/tood_r50_fpn_1x_urpc-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_retinanet_r50_fpn_1x_rtts-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/retinanet_r50_fpn_1x_rtts-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_retinanet_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/retinanet_r50_fpn_1x_urpc-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_faster-rcnn_r50_fpn_1x_rtts-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/faster-rcnn_r50_fpn_1x_rtts-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/tienet/tienet_faster-rcnn_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | # default scope is mmdet 2 | _base_ = [ 3 | './base_editor/tienet_enhance_model.py', 4 | './base_detector/faster-rcnn_r50_fpn_1x_urpc-coco.py' 5 | ] 6 | 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceModel', 10 | detector={{_base_.model}}, 11 | enhance_model={{_base_.enhance_model}}, 12 | train_mode='enhance', 13 | pred_mode='enhance', 14 | detach_enhance_img=False) 15 | 16 | optim_wrapper = dict( 17 | type='OptimWrapper', 18 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001), 19 | clip_grad=dict(max_norm=35, norm_type=2)) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 26 | dict(type='RandomFlip', prob=0.5), 27 | dict(type='lqit.SetInputImageAsGT'), 28 | dict(type='lqit.PackInputs') 29 | ] 30 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 31 | 32 | model_wrapper_cfg = dict( 33 | type='lqit.SelfEnhanceModelDDP', 34 | broadcast_buffers=False, 35 | find_unused_parameters=False) 36 | -------------------------------------------------------------------------------- /configs/detection/edffnet/edffnet_atss_r50_dffpn_1x_rtts-coco_lr002.py: -------------------------------------------------------------------------------- 1 | _base_ = ['./atss_r50_dffpn_1x_rtts-coco_lr002.py'] 2 | 3 | model = dict( 4 | _delete_=True, 5 | type='lqit.EDFFNet', 6 | detector={{_base_.model}}, 7 | edge_head=dict( 8 | _scope_='lqit', 9 | type='EdgeHead', 10 | in_channels=256, 11 | feat_channels=256, 12 | num_convs=5, 13 | loss_enhance=dict(type='L1Loss', loss_weight=0.7), 14 | gt_preprocessor=dict( 15 | type='GTPixelPreprocessor', 16 | mean=[128], 17 | std=[57.12], 18 | pad_size_divisor=32, 19 | element_name='edge')), 20 | vis_enhance=False) 21 | 22 | # dataset settings 23 | train_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict(type='LoadAnnotations', with_bbox=True), 26 | dict(type='lqit.GetEdgeGTFromImage', method='scharr'), 27 | dict( 28 | type='lqit.TransBroadcaster', 29 | src_key='img', 30 | dst_key='gt_edge', 31 | transforms=[ 32 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 33 | dict(type='RandomFlip', prob=0.5) 34 | ]), 35 | dict(type='lqit.PackInputs', ) 36 | ] 37 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 38 | -------------------------------------------------------------------------------- /docs/en/prepare_data/ruod.md: -------------------------------------------------------------------------------- 1 | # RUOD 2 | 3 | ```latex 4 | @article{fu2023rethinking, 5 | title={Rethinking general underwater object detection: Datasets, challenges, and solutions}, 6 | author={Fu, Chenping and Liu, Risheng and Fan, Xin and Chen, Puyang and Fu, Hao and Yuan, Wanqi and Zhu, Ming and Luo, Zhongxuan}, 7 | journal={Neurocomputing}, 8 | volume={517}, 9 | pages={243--256}, 10 | year={2023}, 11 | publisher={Elsevier} 12 | } 13 | ``` 14 | 15 | The dataset contains 14,000 images (9,800 for training and 4,200 for testing) with more than 74,000 bounding boxes, covering ten categories: fish, echinus, corals, starfish, holothurian, scallop, diver, cuttlefish, turtle, and jellyfish. 16 | 17 | ## Download RUOD Dataset 18 | 19 | The Real-world Underwater Object Detection (RUOD) dataset can be download from [here](https://github.com/dlut-dimt/RUOD) . 20 | 21 | The data structure is as follows: 22 | 23 | ```text 24 | lqit 25 | ├── lqit 26 | ├── tools 27 | ├── configs 28 | ├── data 29 | │ ├── RUOD 30 | │ │ ├── annotations 31 | │ │ │ ├── instances_train.json 32 | │ │ │ ├── instances_test.json 33 | │ │ ├── train 34 | │ │ │ ├── 000002.jpg 35 | │ │ │ ├── 000003.jpg 36 | │ │ │ ├── ... 37 | │ │ ├── test 38 | │ │ │ ├── 000001.jpg 39 | │ │ │ ├── 000004.jpg 40 | │ │ │ ├── ... 41 | ``` 42 | -------------------------------------------------------------------------------- /configs/detection/uod_air/uod-air_retinanet_r50_ufpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | './base_detector/retinanet_r50_ufpn_1x_urpc-coco.py', 3 | './base_ehance_head/enhance_head.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | _delete_=True, 9 | type='lqit.DetectorWithEnhanceHead', 10 | detector={{_base_.model}}, 11 | enhance_head={{_base_.enhance_head}}, 12 | vis_enhance=False) 13 | 14 | # dataset settings 15 | train_pipeline = [ 16 | dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}), 17 | dict(type='LoadAnnotations', with_bbox=True), 18 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 19 | dict(type='RandomFlip', prob=0.5), 20 | dict(type='lqit.SetInputImageAsGT'), 21 | dict(type='lqit.PackInputs') 22 | ] 23 | train_dataloader = dict(dataset=dict(pipeline=train_pipeline)) 24 | 25 | # optimizer 26 | optim_wrapper = dict( 27 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001), 28 | clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad 29 | 30 | param_scheduler = [ 31 | dict( 32 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, 33 | end=1000), 34 | dict( 35 | type='MultiStepLR', 36 | begin=0, 37 | end=12, 38 | by_epoch=True, 39 | milestones=[8, 11], 40 | gamma=0.1) 41 | ] 42 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | # this allows you to use CircleCI's dynamic configuration feature 4 | setup: true 5 | 6 | # the path-filtering orb is required to continue a pipeline based on 7 | # the path of an updated fileset 8 | orbs: 9 | path-filtering: circleci/path-filtering@0.1.2 10 | 11 | workflows: 12 | # the always-run workflow is always triggered, regardless of the pipeline parameters. 13 | always-run: 14 | jobs: 15 | # the path-filtering/filter job determines which pipeline 16 | # parameters to update. 17 | - path-filtering/filter: 18 | name: check-updated-files 19 | # 3-column, whitespace-delimited mapping. One mapping per 20 | # line: 21 | # 22 | mapping: | 23 | lqit/.* lint_only false 24 | requirements/.* lint_only false 25 | tests/.* lint_only false 26 | tools/.* lint_only false 27 | configs/.* lint_only false 28 | .circleci/.* lint_only false 29 | base-revision: main 30 | # this is the path of the configuration we should trigger once 31 | # path filtering and pipeline parameter value updates are 32 | # complete. In this case, we are using the parent dynamic 33 | # configuration itself. 34 | config-path: .circleci/test.yml 35 | -------------------------------------------------------------------------------- /configs/detection/tienet/base_editor/tienet_enhance_model.py: -------------------------------------------------------------------------------- 1 | enhance_model = dict( 2 | _scope_='lqit', 3 | type='BaseEditModel', 4 | destruct_gt=True, 5 | data_preprocessor=dict( 6 | type='EditDataPreprocessor', 7 | mean=[0.0, 0.0, 0.0], 8 | std=[255.0, 255.0, 255.0], 9 | bgr_to_rgb=False, 10 | gt_name='img'), 11 | generator=dict( 12 | type='TIENetGenerator', 13 | model=dict( 14 | type='TIENetEnhanceModel', 15 | in_channels=3, 16 | feat_channels=64, 17 | out_channels=3, 18 | num_blocks=3, 19 | expand_ratio=0.5, 20 | kernel_size=[1, 3, 5], 21 | output_weight=[1.0, 1.0], 22 | conv_cfg=None, 23 | norm_cfg=dict(type='BN'), 24 | act_cfg=dict(type='SiLU'), 25 | use_depthwise=True), 26 | spacial_pred='structure', 27 | structure_pred='structure', 28 | spacial_loss=dict(type='SpatialLoss', loss_weight=1.0), 29 | tv_loss=dict(type='MaskedTVLoss', loss_mode='mse', loss_weight=10.0), 30 | structure_loss=dict( 31 | type='StructureFFTLoss', 32 | radius=4, 33 | pass_type='high', 34 | channel_mean=False, 35 | loss_type='mse', 36 | guid_filter=dict( 37 | type='GuidedFilter2d', radius=32, eps=1e-4, fast_s=2), 38 | loss_weight=0.1))) 39 | -------------------------------------------------------------------------------- /lqit/edit/models/post_processor.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | from mmengine.structures import PixelData 4 | from torch import Tensor 5 | 6 | from lqit.common.structures import SampleList 7 | 8 | 9 | def add_pixel_pred_to_datasample(data_samples: SampleList, 10 | pixel_list: Union[List[PixelData], 11 | List[Tensor]], 12 | key: str = 'pred_img') -> SampleList: 13 | """Add predictions to `DataSample`. 14 | 15 | Args: 16 | data_samples (list[:obj:`DataSample`]): A batch of 17 | data samples that contain annotations and predictions. 18 | pixel_list (list[Tensor]): Pixel results of 19 | each image. 20 | key (str): The name of the pred_instance. Defaults to pred_img. 21 | Returns: 22 | list[:obj:`DetDataSample`]: Results of the input images. 23 | """ 24 | for data_sample, pred_pixel in zip(data_samples, pixel_list): 25 | assert isinstance(pred_pixel, Tensor) 26 | if data_sample.get('pred_pixel', None) is None: 27 | pred_instance = PixelData() 28 | else: 29 | pred_instance = data_sample.pred_pixel 30 | keys = pred_instance.keys() 31 | assert key not in keys, f'{key} is already in pred_pixel' 32 | pred_instance.set_data({key: pred_pixel}) 33 | data_sample.pred_pixel = pred_instance 34 | return data_samples 35 | -------------------------------------------------------------------------------- /lqit/detection/evaluation/tide/errors/qualifiers.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/dbolya/tide 2 | # This work is licensed under MIT license. 3 | # Defines qualifiers like "Extra small box" 4 | def _area(x): 5 | return x['bbox'][2] * x['bbox'][3] 6 | 7 | 8 | def _ar(x): 9 | return x['bbox'][2] / x['bbox'][3] 10 | 11 | 12 | class Qualifier: 13 | """Creates a qualifier with the given name. 14 | 15 | test_func should be a callable object (e.g., lambda) that takes in as input 16 | an annotation object (either a ground truth or prediction) and returns 17 | whether or not that object qualifies (i.e., a bool). 18 | """ 19 | 20 | def __init__(self, name: str, test_func: object) -> None: 21 | self.test = test_func 22 | self.name = name 23 | 24 | # This is horrible, but I like it 25 | def _make_error_func(self, error_type): 26 | return (lambda err: isinstance(err, error_type) and (self.test(err.gt) if hasattr(err, 'gt') else self.test(err.pred))) \ 27 | if self.test is not None else (lambda err: isinstance(err, error_type)) # noqa 28 | 29 | 30 | AREA = [ 31 | Qualifier('Small', lambda x: _area(x) <= 32**2), 32 | Qualifier('Medium', lambda x: 32**2 < _area(x) <= 96**2), 33 | Qualifier('Large', lambda x: 96**2 < _area(x)), 34 | ] 35 | 36 | ASPECT_RATIO = [ 37 | Qualifier('Tall', lambda x: _ar(x) <= 0.75), 38 | Qualifier('Square', lambda x: 0.75 < _ar(x) <= 1.33), 39 | Qualifier('Wide', lambda x: 1.33 < _ar(x)), 40 | ] 41 | -------------------------------------------------------------------------------- /lqit/edit/models/base_models/base_generator.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import List 3 | 4 | from mmengine.model import BaseModule 5 | 6 | from lqit.edit.structures import BatchPixelData 7 | from lqit.registry import MODELS 8 | from lqit.utils import ConfigType, OptConfigType, OptMultiConfig 9 | 10 | 11 | class BaseGenerator(BaseModule, metaclass=ABCMeta): 12 | """Base class for EnhanceHead.""" 13 | 14 | def __init__(self, 15 | model: ConfigType, 16 | pixel_loss: OptConfigType = None, 17 | perceptual_loss: OptConfigType = None, 18 | init_cfg: OptMultiConfig = None) -> None: 19 | super().__init__(init_cfg=init_cfg) 20 | 21 | # build network 22 | self.model = MODELS.build(model) 23 | 24 | # build necessary loss 25 | self.pixel_loss = MODELS.build(pixel_loss) if pixel_loss else None 26 | self.perceptual_loss = MODELS.build( 27 | perceptual_loss) if perceptual_loss else None 28 | 29 | def forward(self, x): 30 | """Forward function. 31 | 32 | Args: 33 | x (Tensor): Input tensor with shape (n, c, h, w). 34 | 35 | Returns: 36 | Tensor: Forward results. 37 | """ 38 | return self.model(x) 39 | 40 | @abstractmethod 41 | def loss(self, loss_input: BatchPixelData, batch_img_metas: List[dict]): 42 | """Calculate the loss based on the outputs of generator.""" 43 | pass 44 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/PyCQA/flake8 3 | rev: 5.0.4 4 | hooks: 5 | - id: flake8 6 | - repo: https://github.com/PyCQA/isort 7 | rev: 5.11.5 8 | hooks: 9 | - id: isort 10 | - repo: https://github.com/pre-commit/mirrors-yapf 11 | rev: v0.32.0 12 | hooks: 13 | - id: yapf 14 | - repo: https://github.com/pre-commit/pre-commit-hooks 15 | rev: v4.3.0 16 | hooks: 17 | - id: trailing-whitespace 18 | - id: check-yaml 19 | - id: end-of-file-fixer 20 | - id: requirements-txt-fixer 21 | - id: double-quote-string-fixer 22 | - id: check-merge-conflict 23 | - id: fix-encoding-pragma 24 | args: ["--remove"] 25 | - id: mixed-line-ending 26 | args: ["--fix=lf"] 27 | - repo: https://github.com/codespell-project/codespell 28 | rev: v2.2.1 29 | hooks: 30 | - id: codespell 31 | - repo: https://github.com/executablebooks/mdformat 32 | rev: 0.7.9 33 | hooks: 34 | - id: mdformat 35 | args: ["--number"] 36 | additional_dependencies: 37 | - mdformat-openmmlab 38 | - mdformat_frontmatter 39 | - linkify-it-py 40 | - repo: https://github.com/myint/docformatter 41 | rev: v1.3.1 42 | hooks: 43 | - id: docformatter 44 | args: ["--in-place", "--wrap-descriptions", "79"] 45 | - repo: https://github.com/asottile/pyupgrade 46 | rev: v3.0.0 47 | hooks: 48 | - id: pyupgrade 49 | args: ["--py36-plus"] 50 | -------------------------------------------------------------------------------- /lqit/__init__.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import mmengine 3 | import mmeval 4 | from mmengine.utils import digit_version 5 | 6 | from .version import __version__, version_info 7 | 8 | mmcv_minimum_version = '2.0.0' 9 | mmcv_maximum_version = '2.1.0' 10 | mmcv_version = digit_version(mmcv.__version__) 11 | 12 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 13 | and mmcv_version < digit_version(mmcv_maximum_version)), \ 14 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 15 | f'Please install mmcv>={mmcv_minimum_version}, <{mmcv_maximum_version}.' 16 | 17 | mmengine_minimum_version = '0.7.2' 18 | mmengine_maximum_version = '1.0.0' 19 | mmengine_version = digit_version(mmengine.__version__) 20 | 21 | assert (mmengine_version >= digit_version(mmengine_minimum_version) 22 | and mmengine_version < digit_version(mmengine_maximum_version)), \ 23 | f'MMEngine=={mmengine.__version__} is used but incompatible. ' \ 24 | f'Please install mmengine>={mmengine_minimum_version}, ' \ 25 | f'<{mmengine_maximum_version}.' 26 | 27 | mmeval_minimum_version = '0.2.0' 28 | mmeval_maximum_version = '1.0.0' 29 | mmeval_version = digit_version(mmeval.__version__) 30 | 31 | assert (mmeval_version >= digit_version(mmeval_minimum_version) 32 | and mmeval_version < digit_version(mmeval_maximum_version)), \ 33 | f'MMEval=={mmeval.__version__} is used but incompatible. ' \ 34 | f'Please install mmeval>={mmeval_minimum_version}, ' \ 35 | f'<{mmeval_maximum_version}.' 36 | 37 | __all__ = ['__version__', 'version_info', 'digit_version'] 38 | -------------------------------------------------------------------------------- /lqit/common/models/data_preprocessor/multi_input_multi_batch.py: -------------------------------------------------------------------------------- 1 | from typing import List, Sequence 2 | 3 | from mmengine.model import BaseDataPreprocessor 4 | from mmengine.structures import BaseDataElement 5 | 6 | from lqit.registry import MODELS 7 | from lqit.utils import ConfigType 8 | 9 | SampleList = List[BaseDataElement] 10 | 11 | 12 | @MODELS.register_module() 13 | class MIMBDataPreprocessor(BaseDataPreprocessor): 14 | 15 | def __init__(self, data_preprocessor: ConfigType): 16 | super().__init__() 17 | self.data_preprocessor = MODELS.build(data_preprocessor) 18 | 19 | def forward(self, data: dict, training: bool = False) -> dict: 20 | # multi input and multi batch 21 | if training: 22 | inputs, data_samples = data['inputs'], data['data_samples'] 23 | assert isinstance(inputs, Sequence) and \ 24 | isinstance(data_samples, Sequence) 25 | assert len(inputs) == len(data_samples) and \ 26 | len(inputs[0]) == len(data_samples[0]) 27 | 28 | new_inputs, new_data_samples = [], [] 29 | 30 | for i in range(len(inputs)): 31 | new_inputs.extend(_input for _input in inputs[i]) 32 | new_data_samples.extend(_input for _input in data_samples[i]) 33 | 34 | new_data = { 35 | 'inputs': new_inputs, 36 | 'data_samples': new_data_samples, 37 | } 38 | else: 39 | new_data = data 40 | 41 | return self.data_preprocessor(new_data, training) 42 | -------------------------------------------------------------------------------- /.pre-commit-config-zh-cn.yaml: -------------------------------------------------------------------------------- 1 | exclude: ^tests/data/ 2 | repos: 3 | - repo: https://gitee.com/openmmlab/mirrors-flake8 4 | rev: 5.0.4 5 | hooks: 6 | - id: flake8 7 | - repo: https://gitee.com/openmmlab/mirrors-isort 8 | rev: 5.11.5 9 | hooks: 10 | - id: isort 11 | - repo: https://gitee.com/openmmlab/mirrors-yapf 12 | rev: v0.32.0 13 | hooks: 14 | - id: yapf 15 | - repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks 16 | rev: v4.3.0 17 | hooks: 18 | - id: trailing-whitespace 19 | - id: check-yaml 20 | - id: end-of-file-fixer 21 | - id: requirements-txt-fixer 22 | - id: double-quote-string-fixer 23 | - id: check-merge-conflict 24 | - id: fix-encoding-pragma 25 | args: ["--remove"] 26 | - id: mixed-line-ending 27 | args: ["--fix=lf"] 28 | - repo: https://gitee.com/openmmlab/mirrors-mdformat 29 | rev: 0.7.9 30 | hooks: 31 | - id: mdformat 32 | args: ["--number"] 33 | additional_dependencies: 34 | - mdformat-openmmlab 35 | - mdformat_frontmatter 36 | - linkify-it-py 37 | - repo: https://gitee.com/openmmlab/mirrors-codespell 38 | rev: v2.2.1 39 | hooks: 40 | - id: codespell 41 | - repo: https://gitee.com/openmmlab/mirrors-docformatter 42 | rev: v1.3.1 43 | hooks: 44 | - id: docformatter 45 | args: ["--in-place", "--wrap-descriptions", "79"] 46 | - repo: https://gitee.com/openmmlab/mirrors-pyupgrade 47 | rev: v3.0.0 48 | hooks: 49 | - id: pyupgrade 50 | args: ["--py36-plus"] 51 | -------------------------------------------------------------------------------- /README_zh-CN.md: -------------------------------------------------------------------------------- 1 | # 低质图像开源工具箱 2 | 3 |
4 | 5 |
6 | 7 |
8 | 9 | [English](README.md) | 简体中文 10 | 11 |
12 | 13 | ## 简介 14 | 15 | LQIT 是一个低质图像开源工具箱,包括低质(水下、雾天、低照度等)图像增强和相关高层应用任务。 16 | LQIT 基于 [PyTorch](https://pytorch.org/) 和 [OpenMMLab 2.0 系列](https://github.com/open-mmlab) 。 17 | 18 | 主分支代码目前支持 PyTorch 1.6 以上的版本。早期 PyTorch 版本的兼容性尚未经过全面的测试。 19 | 20 | ## 更新 21 | 22 | **v0.0.1rc2** 版本已经在 2023.10.28 发布: 23 | 24 | - 支持了[飞书机器人](configs/lark/README.md) 25 | - 支持了 [TIENet](https://link.springer.com/article/10.1007/s11760-023-02695-9)、[UOD-AIR](https://ieeexplore.ieee.org/abstract/document/9949063) 和 [RDFFNet](https://link.springer.com/article/10.1007/s11760-022-02410-0) 26 | - 发布了雾天目标检测 `RTTS` 数据集的模型权重 27 | 28 | 可通过查阅[更新日志](docs/en/notes/changelog.md)了解更多细节以及发布历史。 29 | 30 | ## 安装与准备数据集 31 | 32 | LQIT 依赖于 [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMEval](https://github.com/open-mmlab/mmeval) 。 33 | 它也可以把 [OpenMMLab](https://github.com/open-mmlab) 相关代码库作为三方依赖,例如 [MMDetection](https://github.com/open-mmlab/mmdetection/tree/master) 。 34 | 35 | 请参考[安装文档](docs/zh_cn/get_started.md)进行安装和参考[数据准备](data/README_zh-CN.md)准备数据集。 36 | 37 | ## 贡献指南 38 | 39 | 我们感谢所有的贡献者为改进和提升 LQIT 所作出的努力。请参考[贡献指南](CONTRIBUTING_zh-CN.md)来了解参与项目贡献的相关指引。 40 | 41 | ## 开源许可证 42 | 43 | `LQIT` 采用 [Apache 2.0 开源许可证](LICENSE),但是其中有一部分功能并不是使用的 Apache2.0 许可证,我们在 [许可证](LICENSES.md) 中详细地列出了这些功能以及他们对应的许可证,如果您正在从事盈利性活动,请谨慎参考此文档。 44 | 45 | ## 联系 46 | 47 | 有任何问题可以通过 yudongwang1226@gmail.com 或者 yudongwang@tju.edu.cn 进行联系和讨论。 48 | -------------------------------------------------------------------------------- /docs/zh_cn/prepare_data/rtts.md: -------------------------------------------------------------------------------- 1 | # RTTS 2 | 3 | ```latex 4 | @article{li2018benchmarking, 5 | title={Benchmarking single-image dehazing and beyond}, 6 | author={Li, Boyi and Ren, Wenqi and Fu, Dengpan and Tao, Dacheng and Feng, Dan and Zeng, Wenjun and Wang, Zhangyang}, 7 | journal={IEEE Transactions on Image Processing}, 8 | volume={28}, 9 | number={1}, 10 | pages={492--505}, 11 | year={2018}, 12 | publisher={IEEE} 13 | } 14 | ``` 15 | 16 | 该数据集包含 4,322 张雾天图像,包含五个类:自行车 (bicycle)、公共汽车 (bus)、汽车 (car)、摩托车 (motorbike)和人 (person)。 17 | 18 | ## 下载 RTTS 数据 19 | 20 | 真实任务驱动数据集 (Real-word Task-driven Testing Set, RTTS) 是 RESIDE 数据集的一部分,可以从[这里](https://drive.google.com/file/d/15Ei1cHGVqR1mXFep43BO7nkHq1IEGh1e/view?usp=sharing)下载。 21 | 22 | 我们将 RTTS 数据集随机分为训练组和测试组,分别有 3,457 和 865 张图像。 23 | 如果用户想自己划分,应该先使用`tools/misc/write_txt.py`来划分train和val集合。 24 | 然后 `tools/dataset_converters/xml_to_json.py` 可以用来将 xml 样式的注释转换为 coco 格式。 25 | 26 | 数据存放结构默认如下: 27 | 28 | ```text 29 | lqit 30 | ├── lqit 31 | ├── tools 32 | ├── configs 33 | ├── data 34 | │ ├── RESIDE 35 | │ │ ├── RTTS 36 | │ │ │ ├── ImageSets 37 | │ │ │ │ ├── Main 38 | │ │ │ │ │ ├── train.txt 39 | │ │ │ │ │ ├── val.txt 40 | │ │ │ ├── annotations_xml # pascal voc style annotations 41 | │ │ │ │ ├── AM_Bing_211.xml 42 | │ │ │ │ ├── AM_Bing_217.xml 43 | │ │ │ │ ├── ... 44 | │ │ │ ├── annotations_json # coco style annotations 45 | │ │ │ │ ├── rtts_train.json 46 | │ │ │ │ ├── rtts_val.json 47 | │ │ │ ├── JPEGImages 48 | │ │ │ │ ├── AM_Bing_211.png 49 | │ │ │ │ ├── AM_Bing_217.png 50 | │ │ │ │ ├── ... 51 | ``` 52 | -------------------------------------------------------------------------------- /tests/test_utils/test_setup_env.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys 3 | from unittest import TestCase 4 | 5 | from mmengine import DefaultScope 6 | 7 | from lqit.utils import register_all_modules 8 | 9 | 10 | class TestSetupEnv(TestCase): 11 | 12 | def test_register_all_modules(self): 13 | from lqit.registry import DATASETS 14 | 15 | # not init default scope 16 | sys.modules.pop('lqit.edit.datasets', None) 17 | sys.modules.pop('lqit.edit.datasets.basic_image_dataset', None) 18 | DATASETS._module_dict.pop('BasicImageDataset', None) 19 | self.assertFalse('BasicImageDataset' in DATASETS.module_dict) 20 | register_all_modules(init_default_scope=False) 21 | self.assertTrue('BasicImageDataset' in DATASETS.module_dict) 22 | 23 | # init default scope 24 | sys.modules.pop('lqit.edit.datasets') 25 | sys.modules.pop('lqit.edit.datasets.basic_image_dataset') 26 | DATASETS._module_dict.pop('BasicImageDataset', None) 27 | self.assertFalse('BasicImageDataset' in DATASETS.module_dict) 28 | register_all_modules(init_default_scope=True) 29 | self.assertTrue('BasicImageDataset' in DATASETS.module_dict) 30 | self.assertEqual(DefaultScope.get_current_instance().scope_name, 31 | 'lqit') 32 | 33 | # init default scope when another scope is init 34 | name = f'test-{datetime.datetime.now()}' 35 | DefaultScope.get_instance(name, scope_name='test') 36 | with self.assertWarnsRegex( 37 | Warning, 'The current default scope "test" is not "lqit"'): 38 | register_all_modules(init_default_scope=True) 39 | -------------------------------------------------------------------------------- /docs/en/prepare_data/rtts.md: -------------------------------------------------------------------------------- 1 | # RTTS 2 | 3 | ```latex 4 | @article{li2018benchmarking, 5 | title={Benchmarking single-image dehazing and beyond}, 6 | author={Li, Boyi and Ren, Wenqi and Fu, Dengpan and Tao, Dacheng and Feng, Dan and Zeng, Wenjun and Wang, Zhangyang}, 7 | journal={IEEE Transactions on Image Processing}, 8 | volume={28}, 9 | number={1}, 10 | pages={492--505}, 11 | year={2018}, 12 | publisher={IEEE} 13 | } 14 | ``` 15 | 16 | The dataset contains 4,322 foggy images, covering five categories: bicycle, bus, car, motorbike, and person. 17 | 18 | ## Download RTTS Dataset 19 | 20 | The Real-word Task-driven Testing Set (RTTS) dataset is a part of RESIDE dataset, which can be downloaded from [here](https://drive.google.com/file/d/15Ei1cHGVqR1mXFep43BO7nkHq1IEGh1e/view?usp=sharing). 21 | 22 | We randomly divides the RTTS dataset into training and testing groups with 3,457 and 865 images, respectively. 23 | If users want to divide by their own, `tools/misc/write_txt.py` should be used to split the train and val set first. 24 | Then `tools/dataset_converters/xml_to_json.py` can use to convert xml style annotations to coco format. 25 | 26 | The data structure is as follows: 27 | 28 | ```text 29 | lqit 30 | ├── lqit 31 | ├── tools 32 | ├── configs 33 | ├── data 34 | │ ├── RESIDE 35 | │ │ ├── RTTS 36 | │ │ │ ├── ImageSets 37 | │ │ │ │ ├── Main 38 | │ │ │ │ │ ├── train.txt 39 | │ │ │ │ │ ├── val.txt 40 | │ │ │ ├── annotations_xml # pascal voc style annotations 41 | │ │ │ │ ├── AM_Bing_211.xml 42 | │ │ │ │ ├── AM_Bing_217.xml 43 | │ │ │ │ ├── ... 44 | │ │ │ ├── annotations_json # coco style annotations 45 | │ │ │ │ ├── rtts_train.json 46 | │ │ │ │ ├── rtts_val.json 47 | │ │ │ ├── JPEGImages 48 | │ │ │ │ ├── AM_Bing_211.png 49 | │ │ │ │ ├── AM_Bing_217.png 50 | │ │ │ │ ├── ... 51 | ``` 52 | -------------------------------------------------------------------------------- /lqit/utils/process_debug.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from typing import Union 3 | 4 | from mmengine.config import ConfigDict 5 | from mmengine.dataset import (ClassBalancedDataset, ConcatDataset, 6 | DefaultSampler, InfiniteSampler, RepeatDataset) 7 | 8 | ConfigType = Union[dict, ConfigDict] 9 | 10 | 11 | def process_debug_mode(cfg: ConfigType) -> ConfigType: 12 | """Process config for debug mode. 13 | 14 | Args: 15 | cfg (dict or :obj:`ConfigDict`): Config dict. 16 | 17 | Returns: 18 | dict or :obj:`ConfigDict`: Config dict. 19 | """ 20 | 21 | dataloader_list = ['train_dataloader', 'val_dataloader', 'test_dataloader'] 22 | for dataloader_name in dataloader_list: 23 | dataset_type = cfg[dataloader_name]['dataset']['type'] 24 | if dataset_type in \ 25 | ['ConcatDataset', 'RepeatDataset', 'ClassBalancedDataset', 26 | ConcatDataset, RepeatDataset, ClassBalancedDataset]: 27 | warnings.warn(f'{dataset_type} not support in debug mode, skip.') 28 | else: 29 | # set dataset.indices = 10 30 | cfg[dataloader_name]['dataset']['indices'] = 10 31 | 32 | # set num_workers = 0 33 | cfg[dataloader_name]['num_workers'] = 0 34 | cfg[dataloader_name]['persistent_workers'] = False 35 | 36 | # set shuffle = False 37 | if cfg[dataloader_name]['sampler']['type'] in \ 38 | ['DefaultSampler', 'InfiniteSampler', 39 | DefaultSampler, InfiniteSampler]: 40 | cfg[dataloader_name]['sampler']['shuffle'] = False 41 | # set seed = 0 42 | cfg['randomness']['seed'] = 0 43 | # set deterministic = True 44 | cfg['randomness']['deterministic'] = True 45 | 46 | # set log_level = 'DEBUG' 47 | cfg['log_level'] = 'DEBUG' 48 | 49 | # set max_keep_ckpts = 1 50 | cfg['default_hooks']['checkpoint']['max_keep_ckpts'] = 1 51 | 52 | return cfg 53 | -------------------------------------------------------------------------------- /configs/edit/unet_demo/temp_zero_dce.py: -------------------------------------------------------------------------------- 1 | # TODO: delete after fully support editor metric and datasets. 2 | _base_ = 'mmdet::faster_rcnn/faster-rcnn_r50_fpn_1x_coco.py' 3 | 4 | model = dict( 5 | _delete_=True, 6 | type='lqit.BaseEditModel', 7 | data_preprocessor=dict( 8 | type='lqit.EditDataPreprocessor', 9 | mean=[0.0, 0.0, 0.0], 10 | std=[255.0, 255.0, 255.0], 11 | bgr_to_rgb=True, 12 | pad_size_divisor=32, 13 | gt_name='img'), 14 | generator=dict( 15 | _scope_='lqit', 16 | type='ZeroDCEGenerator', 17 | model=dict(type='ZeroDCE'), 18 | color_loss=dict(type='ColorLoss', loss_weight=5.0), 19 | spacial_loss=dict(type='SpatialLoss', loss_weight=1.0), 20 | tv_loss=dict(type='MaskedTVLoss', loss_mode='mse', loss_weight=200.0), 21 | exposure_loss=dict( 22 | type='ExposureLoss', patch_size=16, mean_val=0.6, 23 | loss_weight=10.0))) 24 | # dataset settings 25 | train_pipeline = [ 26 | dict( 27 | type='LoadImageFromFile', 28 | file_client_args={{_base_.file_client_args}}), 29 | dict(type='LoadAnnotations', with_bbox=True), 30 | dict(type='Resize', scale=(512, 512), keep_ratio=True), 31 | dict(type='RandomFlip', prob=0.5), 32 | dict(type='lqit.SetInputImageAsGT'), 33 | dict(type='lqit.PackInputs') 34 | ] 35 | train_dataloader = dict( 36 | batch_size=2, 37 | num_workers=2, 38 | persistent_workers=True, 39 | sampler=dict(type='DefaultSampler', shuffle=True), 40 | batch_sampler=dict(type='AspectRatioBatchSampler'), 41 | dataset=dict( 42 | ann_file='annotations/instances_val2017.json', 43 | data_prefix=dict(img='val2017/'), 44 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 45 | pipeline=train_pipeline)) 46 | val_dataloader = dict(batch_size=1, num_workers=0, persistent_workers=False) 47 | test_dataloader = val_dataloader 48 | 49 | optim_wrapper = dict( 50 | type='OptimWrapper', 51 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/en/_build/ 68 | docs/zh_cn/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | data/ 107 | data 108 | .vscode 109 | .idea 110 | .DS_Store 111 | 112 | # custom 113 | *.pkl 114 | *.pkl.json 115 | *.log.json 116 | docs/modelzoo_statistics.md 117 | lqit/.mim 118 | work_dirs/ 119 | 120 | # Pytorch 121 | *.pth 122 | *.py~ 123 | *.sh~ 124 | 125 | # ignore lark config 126 | configs/lark/ 127 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/README.md: -------------------------------------------------------------------------------- 1 | # Detecting Underwater Objects 2 | 3 | > [Detecting Underwater Objects](https://arxiv.org/abs/2106.05681) 4 | 5 | 6 | 7 | Underwater object detection for robot picking has attracted a lot of interest. However, it is still an unsolved problem due to several challenges. We take steps towards making it more realistic by addressing the following challenges. Firstly, the currently available datasets basically lack the test set annotations, causing researchers must compare their method with other SOTAs on a self-divided test set (from the training set). Training other methods lead to an increase in workload and different researchers divide different datasets, resulting there is no unified benchmark to compare the performance of different algorithms. Secondly, these datasets also have other shortcomings, e.g., too many similar images or incomplete labels. Towards these challenges we introduce a dataset, Detecting Underwater Objects (DUO), and a corresponding benchmark, based on the collection and re-annotation of all relevant datasets. DUO contains a collection of diverse underwater images with more rational annotations. The corresponding benchmark provides indicators of both efficiency and accuracy of SOTAs (under the MMDtection framework) for academic research and industrial applications, where JETSON AGX XAVIER is used to assess detector speed to simulate the robot-embedded environment. 8 | 9 | 10 | 11 |
12 | 13 |
14 | 15 | ## Results 16 | 17 | Coming soon 18 | 19 | ## Citation 20 | 21 | ```latex 22 | @inproceedings{liu2021dataset, 23 | title={A dataset and benchmark of underwater object detection for robot picking}, 24 | author={Liu, Chongwei and Li, Haojie and Wang, Shuchang and Zhu, Ming and Wang, Dong and Fan, Xin and Wang, Zhihui}, 25 | booktitle={2021 IEEE International Conference on Multimedia \& Expo Workshops (ICMEW)}, 26 | pages={1--6}, 27 | year={2021}, 28 | organization={IEEE} 29 | } 30 | ``` 31 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/rtts_coco.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.RTTSCocoDataset' # `lqit` means the scope 3 | data_root = 'data/RESIDE/RTTS/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs', ) 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # avoid bboxes being resized 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | 25 | train_dataloader = dict( 26 | batch_size=2, 27 | num_workers=2, 28 | persistent_workers=True, 29 | sampler=dict(type='DefaultSampler', shuffle=True), 30 | batch_sampler=dict(type='AspectRatioBatchSampler'), 31 | dataset=dict( 32 | type=dataset_type, 33 | data_root=data_root, 34 | ann_file='annotations_json/train.json', 35 | data_prefix=dict(img='JPEGImages/'), 36 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 37 | pipeline=train_pipeline, 38 | backend_args=backend_args)) 39 | val_dataloader = dict( 40 | batch_size=1, 41 | num_workers=2, 42 | persistent_workers=True, 43 | drop_last=False, 44 | sampler=dict(type='DefaultSampler', shuffle=False), 45 | dataset=dict( 46 | type=dataset_type, 47 | data_root=data_root, 48 | ann_file='annotations_json/test.json', 49 | data_prefix=dict(img='JPEGImages/'), 50 | test_mode=True, 51 | pipeline=test_pipeline, 52 | backend_args=backend_args)) 53 | test_dataloader = val_dataloader 54 | 55 | val_evaluator = dict( 56 | type='CocoMetric', 57 | ann_file=data_root + 'annotations_json/test.json', 58 | metric='bbox', 59 | format_only=False, 60 | backend_args=backend_args) 61 | test_evaluator = val_evaluator 62 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/ruod_coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.RUODDataset' # `lqit` means the scope 3 | data_root = 'data/RUOD/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs') 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # If you don't have a gt annotation, delete the pipeline 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | train_dataloader = dict( 25 | batch_size=2, 26 | num_workers=2, 27 | persistent_workers=True, 28 | sampler=dict(type='DefaultSampler', shuffle=True), 29 | batch_sampler=dict(type='AspectRatioBatchSampler'), 30 | dataset=dict( 31 | type=dataset_type, 32 | data_root=data_root, 33 | ann_file='annotations/instances_train.json', 34 | data_prefix=dict(img='train/'), 35 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 36 | pipeline=train_pipeline, 37 | backend_args=backend_args)) 38 | val_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | ann_file='annotations/instances_test.json', 48 | data_prefix=dict(img='test/'), 49 | test_mode=True, 50 | pipeline=test_pipeline, 51 | backend_args=backend_args)) 52 | test_dataloader = val_dataloader 53 | 54 | val_evaluator = dict( 55 | type='CocoMetric', 56 | ann_file=data_root + 'annotations/instances_test.json', 57 | metric='bbox', 58 | format_only=False, 59 | backend_args=backend_args) 60 | test_evaluator = val_evaluator 61 | -------------------------------------------------------------------------------- /configs/detection/_base_/models/ssd300.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | data_preprocessor=dict( 6 | type='DetDataPreprocessor', 7 | mean=[123.675, 116.28, 103.53], 8 | std=[1, 1, 1], 9 | bgr_to_rgb=True, 10 | pad_size_divisor=1), 11 | backbone=dict( 12 | type='SSDVGG', 13 | depth=16, 14 | with_last_pool=False, 15 | ceil_mode=True, 16 | out_indices=(3, 4), 17 | out_feature_indices=(22, 34), 18 | init_cfg=dict( 19 | type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')), 20 | neck=dict( 21 | type='SSDNeck', 22 | in_channels=(512, 1024), 23 | out_channels=(512, 1024, 512, 256, 256, 256), 24 | level_strides=(2, 2, 1, 1), 25 | level_paddings=(1, 1, 0, 0), 26 | l2_norm_scale=20), 27 | bbox_head=dict( 28 | type='SSDHead', 29 | in_channels=(512, 1024, 512, 256, 256, 256), 30 | num_classes=80, 31 | anchor_generator=dict( 32 | type='SSDAnchorGenerator', 33 | scale_major=False, 34 | input_size=input_size, 35 | basesize_ratio_range=(0.15, 0.9), 36 | strides=[8, 16, 32, 64, 100, 300], 37 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]), 38 | bbox_coder=dict( 39 | type='DeltaXYWHBBoxCoder', 40 | target_means=[.0, .0, .0, .0], 41 | target_stds=[0.1, 0.1, 0.2, 0.2])), 42 | # model training and testing settings 43 | train_cfg=dict( 44 | assigner=dict( 45 | type='MaxIoUAssigner', 46 | pos_iou_thr=0.5, 47 | neg_iou_thr=0.5, 48 | min_pos_iou=0., 49 | ignore_iof_thr=-1, 50 | gt_max_assign_all=False), 51 | sampler=dict(type='PseudoSampler'), 52 | smoothl1_beta=1., 53 | allowed_border=-1, 54 | pos_weight=-1, 55 | neg_pos_ratio=3, 56 | debug=False), 57 | test_cfg=dict( 58 | nms_pre=1000, 59 | nms=dict(type='nms', iou_threshold=0.45), 60 | min_bbox_size=0, 61 | score_thr=0.02, 62 | max_per_img=200)) 63 | cudnn_benchmark = True 64 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/duo_coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.DUODataset' # `lqit` means the scope 3 | data_root = 'data/DUO/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs') 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # If you don't have a gt annotation, delete the pipeline 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | train_dataloader = dict( 25 | batch_size=2, 26 | num_workers=2, 27 | persistent_workers=True, 28 | sampler=dict(type='DefaultSampler', shuffle=True), 29 | batch_sampler=dict(type='AspectRatioBatchSampler'), 30 | dataset=dict( 31 | type=dataset_type, 32 | data_root=data_root, 33 | ann_file='annotations/instances_train.json', 34 | data_prefix=dict(img='images/train/'), 35 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 36 | pipeline=train_pipeline, 37 | backend_args=backend_args)) 38 | val_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | ann_file='annotations/instances_test.json', 48 | data_prefix=dict(img='images/test/'), 49 | test_mode=True, 50 | pipeline=test_pipeline, 51 | backend_args=backend_args)) 52 | test_dataloader = val_dataloader 53 | 54 | val_evaluator = dict( 55 | type='CocoMetric', 56 | ann_file=data_root + 'annotations/instances_test.json', 57 | metric='bbox', 58 | format_only=False, 59 | backend_args=backend_args) 60 | test_evaluator = val_evaluator 61 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.URPCCocoDataset' # `lqit` means the scope 3 | data_root = 'data/URPC2020/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs') 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # If you don't have a gt annotation, delete the pipeline 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | train_dataloader = dict( 25 | batch_size=2, 26 | num_workers=2, 27 | persistent_workers=True, 28 | sampler=dict(type='DefaultSampler', shuffle=True), 29 | batch_sampler=dict(type='AspectRatioBatchSampler'), 30 | dataset=dict( 31 | type=dataset_type, 32 | data_root=data_root, 33 | ann_file='annotations/train.json', 34 | data_prefix=dict(img='train-image/'), 35 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 36 | pipeline=train_pipeline, 37 | backend_args=backend_args)) 38 | val_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | ann_file='annotations/val.json', 48 | data_prefix=dict(img='train-image/'), 49 | test_mode=True, 50 | pipeline=test_pipeline, 51 | backend_args=backend_args)) 52 | test_dataloader = val_dataloader 53 | 54 | val_evaluator = dict( 55 | type='CocoMetric', 56 | ann_file=data_root + 'annotations/val.json', 57 | metric='bbox', 58 | format_only=False, 59 | backend_args=backend_args) 60 | test_evaluator = val_evaluator 61 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.URPCCocoDataset' # `lqit` means the scope 3 | data_root = 'data/URPC2020/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs') 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # If you don't have a gt annotation, delete the pipeline 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | train_dataloader = dict( 25 | batch_size=2, 26 | num_workers=2, 27 | persistent_workers=True, 28 | sampler=dict(type='DefaultSampler', shuffle=True), 29 | batch_sampler=dict(type='AspectRatioBatchSampler'), 30 | dataset=dict( 31 | type=dataset_type, 32 | data_root=data_root, 33 | ann_file='annotations/train_all.json', 34 | data_prefix=dict(img='train-image/'), 35 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 36 | pipeline=train_pipeline, 37 | backend_args=backend_args)) 38 | val_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | ann_file='annotations/test-A.json', 48 | data_prefix=dict(img='test-A-image/'), 49 | test_mode=True, 50 | pipeline=test_pipeline, 51 | backend_args=backend_args)) 52 | test_dataloader = val_dataloader 53 | 54 | val_evaluator = dict( 55 | type='CocoMetric', 56 | ann_file=data_root + 'annotations/test-A.json', 57 | metric='bbox', 58 | format_only=False, 59 | backend_args=backend_args) 60 | test_evaluator = val_evaluator 61 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.URPCCocoDataset' # `lqit` means the scope 3 | data_root = 'data/URPC2020/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs') 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # If you don't have a gt annotation, delete the pipeline 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | train_dataloader = dict( 25 | batch_size=2, 26 | num_workers=2, 27 | persistent_workers=True, 28 | sampler=dict(type='DefaultSampler', shuffle=True), 29 | batch_sampler=dict(type='AspectRatioBatchSampler'), 30 | dataset=dict( 31 | type=dataset_type, 32 | data_root=data_root, 33 | ann_file='annotations/train_all.json', 34 | data_prefix=dict(img='train-image/'), 35 | filter_cfg=dict(filter_empty_gt=True, min_size=32), 36 | pipeline=train_pipeline, 37 | backend_args=backend_args)) 38 | val_dataloader = dict( 39 | batch_size=1, 40 | num_workers=2, 41 | persistent_workers=True, 42 | drop_last=False, 43 | sampler=dict(type='DefaultSampler', shuffle=False), 44 | dataset=dict( 45 | type=dataset_type, 46 | data_root=data_root, 47 | ann_file='annotations/test-B.json', 48 | data_prefix=dict(img='test-B-image/'), 49 | test_mode=True, 50 | pipeline=test_pipeline, 51 | backend_args=backend_args)) 52 | test_dataloader = val_dataloader 53 | 54 | val_evaluator = dict( 55 | type='CocoMetric', 56 | ann_file=data_root + 'annotations/test-B.json', 57 | metric='bbox', 58 | format_only=False, 59 | backend_args=backend_args) 60 | test_evaluator = val_evaluator 61 | -------------------------------------------------------------------------------- /lqit/common/models/data_preprocessor/batch_data_preprocessor.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch 4 | from mmengine.model import BaseDataPreprocessor 5 | 6 | from lqit.registry import MODELS 7 | from lqit.utils import ConfigType 8 | 9 | 10 | @MODELS.register_module() 11 | class BatchDataPreprocessor(BaseDataPreprocessor): 12 | 13 | def __init__(self, 14 | data_preprocessor: ConfigType, 15 | multi_input_key: str = 'data'): 16 | super().__init__() 17 | self.data_preprocessor = MODELS.build(data_preprocessor) 18 | 19 | assert isinstance(multi_input_key, str) 20 | self.multi_input_key = multi_input_key 21 | 22 | def forward(self, data: dict, training: bool = False) -> dict: 23 | """Perform normalization、padding and bgr2rgb conversion based on 24 | ``BaseDataPreprocessor``. 25 | 26 | Args: 27 | data (dict): Data sampled from dataloader. 28 | training (bool): Whether to enable training time augmentation. 29 | 30 | Returns: 31 | dict: Data in the same format as the model input. 32 | """ 33 | 34 | multi_inputs = None 35 | if data.get('data_samples') is not None and \ 36 | data['data_samples'][0].get('multi_input') is not None and \ 37 | training: 38 | multi_inputs_list = [] 39 | for data_sample in data['data_samples']: 40 | multi_inputs_list.append( 41 | data_sample.multi_input.get(self.multi_input_key)) 42 | # process multi inputs 43 | fake_data = copy.deepcopy(data) 44 | fake_data['inputs'] = multi_inputs_list 45 | fake_data = self.data_preprocessor( 46 | data=fake_data, training=training) 47 | multi_inputs = fake_data['inputs'] 48 | 49 | data = self.data_preprocessor(data=data, training=training) 50 | inputs, data_samples = data['inputs'], data['data_samples'] 51 | 52 | if multi_inputs is not None and training: 53 | inputs = torch.cat([inputs, multi_inputs], dim=0) 54 | for i in range(len(data_samples)): 55 | data_samples.append(data_samples[i]) 56 | 57 | return {'inputs': inputs, 'data_samples': data_samples} 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Low-Quality Image ToolBox 2 | 3 |
4 | 5 |
6 | 7 |
8 | 9 | English | [简体中文](README_zh-CN.md) 10 | 11 |
12 | 13 | ## Introduction 14 | 15 | LQIT is an open source Low-Quality Image Toolbox, including low-quality (underwater, foggy, low-light, etc.) image enhancement tasks, 16 | and related high-level computer vision tasks (such as object detection). LQIT depends on [PyTorch](https://pytorch.org/) and [OpenMMLab 2.0 series](https://github.com/open-mmlab). 17 | 18 | The main branch works with **PyTorch 1.6+**. 19 | The compatibility to earlier versions of PyTorch is not fully tested. 20 | 21 | ## What's New 22 | 23 | **v0.0.1rc2** was released in 28/10/2023: 24 | 25 | - Support [FeiShu (Lark) robot](configs/lark/README.md) 26 | - Support [TIENet](https://link.springer.com/article/10.1007/s11760-023-02695-9), [UOD-AIR](https://ieeexplore.ieee.org/abstract/document/9949063), and [RDFFNet](https://link.springer.com/article/10.1007/s11760-022-02410-0) 27 | - Release `RTTS` foggy object detection models 28 | 29 | Please refer to [changelog](docs/en/notes/changelog.md) for details and release history. 30 | 31 | ## Installation & Dataset Preparation 32 | 33 | LQIT depends on [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv), and [MMEval](https://github.com/open-mmlab/mmeval). 34 | It also can use [OpenMMLab codebases](https://github.com/open-mmlab) as a dependency, such as [MMDetection](https://github.com/open-mmlab/mmdetection/tree/master). 35 | 36 | Please refer to [Installation](docs/en/get_started.md) for installation of LQIT and [data preparation](data/README.md) for dataset preparation. 37 | 38 | ## Contributing 39 | 40 | We appreciate all contributions to improve LQIT. Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) for the contributing guideline. 41 | 42 | ## License 43 | 44 | LQIT is released under the [Apache 2.0 license](LICENSE), while some specific features in this library are with other licenses. Please refer to [LICENSES.md](LICENSES.md) for the careful check, if you are using our code for commercial matters. 45 | 46 | ## Contact 47 | 48 | If you have any questions, please contact Yudong Wang at yudongwang1226@gmail.com or yudongwang@tju.edu.cn. 49 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/ssd512_120e_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'ssd300_120e_urpc-coco.py' 2 | 3 | # model settings 4 | input_size = 512 5 | model = dict( 6 | neck=dict( 7 | out_channels=(512, 1024, 512, 256, 256, 256, 256), 8 | level_strides=(2, 2, 2, 2, 1), 9 | level_paddings=(1, 1, 1, 1, 1), 10 | last_kernel_size=4), 11 | bbox_head=dict( 12 | in_channels=(512, 1024, 512, 256, 256, 256, 256), 13 | anchor_generator=dict( 14 | type='SSDAnchorGenerator', 15 | scale_major=False, 16 | input_size=input_size, 17 | basesize_ratio_range=(0.1, 0.9), 18 | strides=[8, 16, 32, 64, 128, 256, 512], 19 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]))) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict( 26 | type='Expand', 27 | mean={{_base_.model.data_preprocessor.mean}}, 28 | to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}}, 29 | ratio_range=(1, 4)), 30 | dict( 31 | type='MinIoURandomCrop', 32 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 33 | min_crop_size=0.3), 34 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 35 | dict(type='RandomFlip', prob=0.5), 36 | dict( 37 | type='PhotoMetricDistortion', 38 | brightness_delta=32, 39 | contrast_range=(0.5, 1.5), 40 | saturation_range=(0.5, 1.5), 41 | hue_delta=18), 42 | dict(type='PackDetInputs') 43 | ] 44 | test_pipeline = [ 45 | dict(type='LoadImageFromFile'), 46 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 47 | dict(type='LoadAnnotations', with_bbox=True), 48 | dict( 49 | type='PackDetInputs', 50 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 51 | 'scale_factor')) 52 | ] 53 | train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline))) 54 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 55 | test_dataloader = val_dataloader 56 | 57 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 58 | # USER SHOULD NOT CHANGE ITS VALUES. 59 | # base_batch_size = (8 GPUs) x (8 samples per GPU) 60 | auto_scale_lr = dict(base_batch_size=64) 61 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/ssd512_120e_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'ssd300_120e_urpc-coco.py' 2 | 3 | # model settings 4 | input_size = 512 5 | model = dict( 6 | neck=dict( 7 | out_channels=(512, 1024, 512, 256, 256, 256, 256), 8 | level_strides=(2, 2, 2, 2, 1), 9 | level_paddings=(1, 1, 1, 1, 1), 10 | last_kernel_size=4), 11 | bbox_head=dict( 12 | in_channels=(512, 1024, 512, 256, 256, 256, 256), 13 | anchor_generator=dict( 14 | type='SSDAnchorGenerator', 15 | scale_major=False, 16 | input_size=input_size, 17 | basesize_ratio_range=(0.1, 0.9), 18 | strides=[8, 16, 32, 64, 128, 256, 512], 19 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]))) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict( 26 | type='Expand', 27 | mean={{_base_.model.data_preprocessor.mean}}, 28 | to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}}, 29 | ratio_range=(1, 4)), 30 | dict( 31 | type='MinIoURandomCrop', 32 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 33 | min_crop_size=0.3), 34 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 35 | dict(type='RandomFlip', prob=0.5), 36 | dict( 37 | type='PhotoMetricDistortion', 38 | brightness_delta=32, 39 | contrast_range=(0.5, 1.5), 40 | saturation_range=(0.5, 1.5), 41 | hue_delta=18), 42 | dict(type='PackDetInputs') 43 | ] 44 | test_pipeline = [ 45 | dict(type='LoadImageFromFile'), 46 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 47 | dict(type='LoadAnnotations', with_bbox=True), 48 | dict( 49 | type='PackDetInputs', 50 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 51 | 'scale_factor')) 52 | ] 53 | train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline))) 54 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 55 | test_dataloader = val_dataloader 56 | 57 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 58 | # USER SHOULD NOT CHANGE ITS VALUES. 59 | # base_batch_size = (8 GPUs) x (8 samples per GPU) 60 | auto_scale_lr = dict(base_batch_size=64) 61 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/ssd512_120e_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'ssd300_120e_urpc-coco.py' 2 | 3 | # model settings 4 | input_size = 512 5 | model = dict( 6 | neck=dict( 7 | out_channels=(512, 1024, 512, 256, 256, 256, 256), 8 | level_strides=(2, 2, 2, 2, 1), 9 | level_paddings=(1, 1, 1, 1, 1), 10 | last_kernel_size=4), 11 | bbox_head=dict( 12 | in_channels=(512, 1024, 512, 256, 256, 256, 256), 13 | anchor_generator=dict( 14 | type='SSDAnchorGenerator', 15 | scale_major=False, 16 | input_size=input_size, 17 | basesize_ratio_range=(0.1, 0.9), 18 | strides=[8, 16, 32, 64, 128, 256, 512], 19 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]))) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict( 26 | type='Expand', 27 | mean={{_base_.model.data_preprocessor.mean}}, 28 | to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}}, 29 | ratio_range=(1, 4)), 30 | dict( 31 | type='MinIoURandomCrop', 32 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 33 | min_crop_size=0.3), 34 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 35 | dict(type='RandomFlip', prob=0.5), 36 | dict( 37 | type='PhotoMetricDistortion', 38 | brightness_delta=32, 39 | contrast_range=(0.5, 1.5), 40 | saturation_range=(0.5, 1.5), 41 | hue_delta=18), 42 | dict(type='PackDetInputs') 43 | ] 44 | test_pipeline = [ 45 | dict(type='LoadImageFromFile'), 46 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 47 | dict(type='LoadAnnotations', with_bbox=True), 48 | dict( 49 | type='PackDetInputs', 50 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 51 | 'scale_factor')) 52 | ] 53 | train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline))) 54 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 55 | test_dataloader = val_dataloader 56 | 57 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 58 | # USER SHOULD NOT CHANGE ITS VALUES. 59 | # base_batch_size = (8 GPUs) x (8 samples per GPU) 60 | auto_scale_lr = dict(base_batch_size=64) 61 | -------------------------------------------------------------------------------- /configs/detection/_base_/models/retinanet_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | data_preprocessor=dict( 5 | type='DetDataPreprocessor', 6 | mean=[123.675, 116.28, 103.53], 7 | std=[58.395, 57.12, 57.375], 8 | bgr_to_rgb=True, 9 | pad_size_divisor=32), 10 | backbone=dict( 11 | type='ResNet', 12 | depth=50, 13 | num_stages=4, 14 | out_indices=(0, 1, 2, 3), 15 | frozen_stages=1, 16 | norm_cfg=dict(type='BN', requires_grad=True), 17 | norm_eval=True, 18 | style='pytorch', 19 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | start_level=1, 25 | add_extra_convs='on_input', 26 | num_outs=5), 27 | bbox_head=dict( 28 | type='RetinaHead', 29 | num_classes=80, 30 | in_channels=256, 31 | stacked_convs=4, 32 | feat_channels=256, 33 | anchor_generator=dict( 34 | type='AnchorGenerator', 35 | octave_base_scale=4, 36 | scales_per_octave=3, 37 | ratios=[0.5, 1.0, 2.0], 38 | strides=[8, 16, 32, 64, 128]), 39 | bbox_coder=dict( 40 | type='DeltaXYWHBBoxCoder', 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]), 43 | loss_cls=dict( 44 | type='FocalLoss', 45 | use_sigmoid=True, 46 | gamma=2.0, 47 | alpha=0.25, 48 | loss_weight=1.0), 49 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 50 | # model training and testing settings 51 | train_cfg=dict( 52 | assigner=dict( 53 | type='MaxIoUAssigner', 54 | pos_iou_thr=0.5, 55 | neg_iou_thr=0.4, 56 | min_pos_iou=0, 57 | ignore_iof_thr=-1), 58 | sampler=dict( 59 | type='PseudoSampler'), # Focal loss should use PseudoSampler 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.5), 68 | max_per_img=100)) 69 | -------------------------------------------------------------------------------- /lqit/common/models/data_preprocessor/multi_input_data_preprocessor.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import warnings 3 | from typing import List 4 | 5 | from mmengine.model import BaseDataPreprocessor 6 | from mmengine.structures import BaseDataElement 7 | from torch import Tensor 8 | 9 | from lqit.registry import MODELS 10 | from lqit.utils import ConfigType, OptConfigType 11 | 12 | SampleList = List[BaseDataElement] 13 | 14 | 15 | @MODELS.register_module() 16 | class MultiInputDataPreprocessor(BaseDataPreprocessor): 17 | 18 | def __init__(self, 19 | data_preprocessor: ConfigType, 20 | multi_input_data_preprocessor: OptConfigType = None): 21 | super().__init__() 22 | self.data_preprocessor = MODELS.build(data_preprocessor) 23 | 24 | if multi_input_data_preprocessor is None: 25 | multi_input_data_preprocessor = data_preprocessor 26 | self.multi_input_data_preprocessor = \ 27 | MODELS.build(multi_input_data_preprocessor) 28 | self._training = None 29 | 30 | def forward(self, data: dict, training: bool = False) -> dict: 31 | self._training = training 32 | return self.data_preprocessor(data, training) 33 | 34 | def process_multi_input(self, 35 | batch_data_samples: SampleList, 36 | input_key: str = 'data'): 37 | cp_batch_data_samples = copy.deepcopy(batch_data_samples) 38 | input_list = [ 39 | data_samples.multi_input.get(input_key) 40 | for data_samples in cp_batch_data_samples 41 | ] 42 | fake_data = dict(inputs=input_list, data_samples=cp_batch_data_samples) 43 | 44 | if self._training is None: 45 | warnings.warn('training will set to `self.training`, ' 46 | 'which may get some potential error.' 47 | 'Please kindly run `forward` ' 48 | 'before running `process_multi_input`.') 49 | self._training = self.training 50 | 51 | multi_data = self.multi_input_data_preprocessor( 52 | fake_data, self._training) 53 | del cp_batch_data_samples 54 | 55 | batched_multi_input = multi_data['inputs'] 56 | return batched_multi_input 57 | 58 | def destructor_multi_input(self, batch_tensor: List[Tensor]): 59 | # TODO: Support for visualization 60 | pass 61 | -------------------------------------------------------------------------------- /configs/detection/_base_/datasets/urpc2020/urpc2020-validation_xml_detection.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'lqit.URPCXMLDataset' # `lqit` means the scope 3 | data_root = 'data/URPC/' 4 | 5 | backend_args = None 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', backend_args=backend_args), 9 | dict(type='LoadAnnotations', with_bbox=True), 10 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 11 | dict(type='RandomFlip', prob=0.5), 12 | dict(type='PackDetInputs') 13 | ] 14 | test_pipeline = [ 15 | dict(type='LoadImageFromFile', backend_args=backend_args), 16 | dict(type='Resize', scale=(1333, 800), keep_ratio=True), 17 | # If you don't have a gt annotation, delete the pipeline 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict( 20 | type='PackDetInputs', 21 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 22 | 'scale_factor')) 23 | ] 24 | train_dataloader = dict( 25 | batch_size=2, 26 | num_workers=2, 27 | persistent_workers=True, 28 | sampler=dict(type='DefaultSampler', shuffle=True), 29 | batch_sampler=dict(type='AspectRatioBatchSampler'), 30 | dataset=dict( 31 | type=dataset_type, 32 | data_root=data_root, 33 | ann_file='source_data/ImageSets/train.txt', 34 | meta_file='source_data/ImageMetas/train-image-metas.pkl', 35 | img_subdir='train-image', 36 | ann_subdir='source_data/train-box', 37 | data_prefix=dict(sub_data_root=''), 38 | filter_cfg=dict(filter_empty_gt=True, min_size=32, bbox_min_size=32), 39 | pipeline=train_pipeline, 40 | backend_args=backend_args)) 41 | val_dataloader = dict( 42 | batch_size=1, 43 | num_workers=2, 44 | persistent_workers=True, 45 | drop_last=False, 46 | sampler=dict(type='DefaultSampler', shuffle=False), 47 | dataset=dict( 48 | type=dataset_type, 49 | data_root=data_root, 50 | ann_file='source_data/ImageSets/val.txt', 51 | meta_file='source_data/ImageMetas/val-image-metas.pkl', 52 | img_subdir='train-image', 53 | ann_subdir='source_data/train-box', 54 | data_prefix=dict(sub_data_root=''), 55 | test_mode=True, 56 | pipeline=test_pipeline, 57 | backend_args=backend_args)) 58 | test_dataloader = val_dataloader 59 | 60 | val_evaluator = dict(type='VOCMetric', metric='mAP', eval_mode='area') 61 | test_evaluator = val_evaluator 62 | -------------------------------------------------------------------------------- /lqit/edit/structures/batch_pixel_data.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | import torch 5 | from mmengine.structures import PixelData 6 | 7 | 8 | class BatchPixelData(PixelData): 9 | """Data structure for batched pixel-level annnotations or predictions. 10 | 11 | Different from parent class: 12 | Support value.ndim == 4 for batched tensor. 13 | 14 | All data items in ``data_fields`` of ``PixelData`` meet the following 15 | requirements: 16 | 17 | - They all have 4 dimensions in orders of batch_size, channel, height, 18 | and width. 19 | - They should have the same height and width. 20 | """ 21 | 22 | def __setattr__(self, name: str, value: Union[torch.Tensor, np.ndarray]): 23 | """Set attributes of ``PixelData``. 24 | 25 | If the dimension of value is 2 and its shape meet the demand, it 26 | will automatically expend its channel-dimension. 27 | 28 | Args: 29 | name (str): The key to access the value, stored in `PixelData`. 30 | value (Union[torch.Tensor, np.ndarray]): The value to store in. 31 | The type of value must be `torch.Tensor` or `np.ndarray`, 32 | and its shape must meet the requirements of `PixelData`. 33 | """ 34 | 35 | if name in ('_metainfo_fields', '_data_fields'): 36 | if not hasattr(self, name): 37 | super().__setattr__(name, value) 38 | else: 39 | raise AttributeError( 40 | f'{name} has been used as a ' 41 | f'private attribute, which is immutable. ') 42 | 43 | else: 44 | assert isinstance(value, (torch.Tensor, np.ndarray)), \ 45 | f'Can set {type(value)}, only support' \ 46 | f' {(torch.Tensor, np.ndarray)}' 47 | 48 | if self.shape: 49 | assert tuple(value.shape[-2:]) == self.shape, ( 50 | f'the height and width of ' 51 | f'values {tuple(value.shape[-2:])} is ' 52 | f'not consistent with' 53 | f' the length of this ' 54 | f':obj:`PixelData` ' 55 | f'{self.shape} ') 56 | assert value.ndim == 4, \ 57 | f'The dim of value must be 2, 3 or 4, but got {value.ndim}' 58 | 59 | # call BaseDataElement.__setattr__ 60 | super(PixelData, self).__setattr__(name, value) 61 | -------------------------------------------------------------------------------- /configs/detection/tienet/base_detector/atss_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/atss_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/atss_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/atss_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='FCOS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[102.9801, 115.9465, 122.7717], 12 | std=[1.0, 1.0, 1.0], 13 | bgr_to_rgb=False, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=False), 22 | norm_eval=True, 23 | style='caffe', 24 | init_cfg=dict( 25 | type='Pretrained', 26 | checkpoint='open-mmlab://detectron/resnet50_caffe')), 27 | neck=dict( 28 | type='FPN', 29 | in_channels=[256, 512, 1024, 2048], 30 | out_channels=256, 31 | start_level=1, 32 | add_extra_convs='on_output', # use P5 33 | num_outs=5, 34 | relu_before_extra_convs=True), 35 | bbox_head=dict( 36 | type='FCOSHead', 37 | num_classes=4, 38 | in_channels=256, 39 | stacked_convs=4, 40 | feat_channels=256, 41 | strides=[8, 16, 32, 64, 128], 42 | loss_cls=dict( 43 | type='FocalLoss', 44 | use_sigmoid=True, 45 | gamma=2.0, 46 | alpha=0.25, 47 | loss_weight=1.0), 48 | loss_bbox=dict(type='IoULoss', loss_weight=1.0), 49 | loss_centerness=dict( 50 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 51 | # testing settings 52 | test_cfg=dict( 53 | nms_pre=1000, 54 | min_bbox_size=0, 55 | score_thr=0.05, 56 | nms=dict(type='nms', iou_threshold=0.5), 57 | max_per_img=100)) 58 | 59 | # learning rate 60 | param_scheduler = [ 61 | dict( 62 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, 63 | end=1000), 64 | dict( 65 | type='MultiStepLR', 66 | begin=0, 67 | end=12, 68 | by_epoch=True, 69 | milestones=[8, 11], 70 | gamma=0.1) 71 | ] 72 | 73 | # optimizer 74 | optim_wrapper = dict( 75 | optimizer=dict(lr=0.01), 76 | paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.), 77 | clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad 78 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='FCOS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[102.9801, 115.9465, 122.7717], 12 | std=[1.0, 1.0, 1.0], 13 | bgr_to_rgb=False, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=False), 22 | norm_eval=True, 23 | style='caffe', 24 | init_cfg=dict( 25 | type='Pretrained', 26 | checkpoint='open-mmlab://detectron/resnet50_caffe')), 27 | neck=dict( 28 | type='FPN', 29 | in_channels=[256, 512, 1024, 2048], 30 | out_channels=256, 31 | start_level=1, 32 | add_extra_convs='on_output', # use P5 33 | num_outs=5, 34 | relu_before_extra_convs=True), 35 | bbox_head=dict( 36 | type='FCOSHead', 37 | num_classes=4, 38 | in_channels=256, 39 | stacked_convs=4, 40 | feat_channels=256, 41 | strides=[8, 16, 32, 64, 128], 42 | loss_cls=dict( 43 | type='FocalLoss', 44 | use_sigmoid=True, 45 | gamma=2.0, 46 | alpha=0.25, 47 | loss_weight=1.0), 48 | loss_bbox=dict(type='IoULoss', loss_weight=1.0), 49 | loss_centerness=dict( 50 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 51 | # testing settings 52 | test_cfg=dict( 53 | nms_pre=1000, 54 | min_bbox_size=0, 55 | score_thr=0.05, 56 | nms=dict(type='nms', iou_threshold=0.5), 57 | max_per_img=100)) 58 | 59 | # learning rate 60 | param_scheduler = [ 61 | dict( 62 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, 63 | end=1000), 64 | dict( 65 | type='MultiStepLR', 66 | begin=0, 67 | end=12, 68 | by_epoch=True, 69 | milestones=[8, 11], 70 | gamma=0.1) 71 | ] 72 | 73 | # optimizer 74 | optim_wrapper = dict( 75 | optimizer=dict(lr=0.01), 76 | paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.), 77 | clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad 78 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/fcos_r50-caffe_fpn_gn-head_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='FCOS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[102.9801, 115.9465, 122.7717], 12 | std=[1.0, 1.0, 1.0], 13 | bgr_to_rgb=False, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=False), 22 | norm_eval=True, 23 | style='caffe', 24 | init_cfg=dict( 25 | type='Pretrained', 26 | checkpoint='open-mmlab://detectron/resnet50_caffe')), 27 | neck=dict( 28 | type='FPN', 29 | in_channels=[256, 512, 1024, 2048], 30 | out_channels=256, 31 | start_level=1, 32 | add_extra_convs='on_output', # use P5 33 | num_outs=5, 34 | relu_before_extra_convs=True), 35 | bbox_head=dict( 36 | type='FCOSHead', 37 | num_classes=4, 38 | in_channels=256, 39 | stacked_convs=4, 40 | feat_channels=256, 41 | strides=[8, 16, 32, 64, 128], 42 | loss_cls=dict( 43 | type='FocalLoss', 44 | use_sigmoid=True, 45 | gamma=2.0, 46 | alpha=0.25, 47 | loss_weight=1.0), 48 | loss_bbox=dict(type='IoULoss', loss_weight=1.0), 49 | loss_centerness=dict( 50 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 51 | # testing settings 52 | test_cfg=dict( 53 | nms_pre=1000, 54 | min_bbox_size=0, 55 | score_thr=0.05, 56 | nms=dict(type='nms', iou_threshold=0.5), 57 | max_per_img=100)) 58 | 59 | # learning rate 60 | param_scheduler = [ 61 | dict( 62 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, 63 | end=1000), 64 | dict( 65 | type='MultiStepLR', 66 | begin=0, 67 | end=12, 68 | by_epoch=True, 69 | milestones=[8, 11], 70 | gamma=0.1) 71 | ] 72 | 73 | # optimizer 74 | optim_wrapper = dict( 75 | optimizer=dict(lr=0.01), 76 | paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.), 77 | clip_grad=dict(max_norm=35, norm_type=2)) # loss may NaN without clip_grad 78 | -------------------------------------------------------------------------------- /tools/analysis_tools/analyze_tide.py: -------------------------------------------------------------------------------- 1 | """Analyze Errors based on TIDE. 2 | 3 | Examples: 4 | python tools/analysis_tools/analyze_tide.py \ 5 | ${ANNOTATIONS FILE} \ 6 | ${RESULT JSON FILE} \ 7 | --out ${OUT PATH} 8 | """ 9 | import argparse 10 | import os.path as osp 11 | 12 | from lqit.detection.evaluation.tide import COCO, TIDE, COCOResult 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser( 17 | description='Analyze Errors based on TIDE') 18 | parser.add_argument('ann_file', help='Annotation json file path') 19 | parser.add_argument('pred_file', help='Prediction json file path') 20 | parser.add_argument( 21 | '--pos-thr', 22 | default=0.5, 23 | type=float, 24 | help='Positive threshold in TIDE') 25 | parser.add_argument( 26 | '--bkg-thr', 27 | default=0.1, 28 | type=float, 29 | help='Background threshold in TIDE') 30 | parser.add_argument( 31 | '--mode', 32 | default='bbox', 33 | choices=['bbox', 'mask'], 34 | type=str, 35 | help='The mode of evaluation in TIDE') 36 | parser.add_argument( 37 | '--name', default=None, type=str, help='The running name') 38 | parser.add_argument( 39 | '--out', 40 | default=None, 41 | type=str, 42 | help='Saving path of the TIDE result image') 43 | args = parser.parse_args() 44 | return args 45 | 46 | 47 | def get_tide_errors(args): 48 | tide = TIDE( 49 | pos_threshold=args.pos_thr, 50 | background_threshold=args.bkg_thr, 51 | mode=args.mode) 52 | assert args.pred_file.endswith('json'), \ 53 | 'TIDE analyze only support json format, please set ' \ 54 | '`CocoMetric.format_only=True` and `CocoMetric.outfile_prefix=xxx` ' \ 55 | 'to get json result first.' 56 | assert osp.exists(args.pred_file), f'{args.pred_file} does not exist' 57 | gt = COCO(path=args.ann_file, name=args.name) 58 | preds = COCOResult(path=args.pred_file, name=args.name) 59 | tide.evaluate(gt=gt, preds=preds, name=args.name) 60 | tide.summarize() 61 | all_errors = tide.all_errors 62 | if args.out is not None: 63 | tide.plot(out_dir=args.out) 64 | print(f'Save TIDE Image in {args.out}') 65 | return all_errors 66 | 67 | 68 | def main(): 69 | args = parse_args() 70 | all_errors = get_tide_errors(args) 71 | error_str = 'TIDE Errors: \n' 72 | for k, v in all_errors.items(): 73 | error_name = f'{k}_Error'.ljust(11) 74 | error_str += f'{error_name}: {v:6.2f} \n' 75 | print(error_str) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /lqit/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/open-mmlab/mmediting/blob/main/mmedit/utils/logger.py # noqa: E501 2 | # Modified from https://github.com/open-mmlab/mmdetection/blob/main/mmdet/utils/logger.py # noqa: E501 3 | import inspect 4 | import logging 5 | 6 | from mmengine.logging import print_log 7 | from termcolor import colored 8 | 9 | 10 | def print_colored_log(msg: str, 11 | level: int = logging.INFO, 12 | color: str = 'magenta') -> None: 13 | """Print colored log with default logger. 14 | 15 | Args: 16 | msg (str): Message to log. 17 | level (int): The root logger level. Note that only the process of 18 | rank 0 is affected, while other processes will set the level to 19 | "Error" and be silent most of the time.Log level, 20 | default to 'info'. 21 | color (str, optional): Color 'magenta'. 22 | """ 23 | print_log(colored(msg, color), 'current', level) 24 | 25 | 26 | def get_caller_name() -> str: 27 | """Get name of caller method.""" 28 | # this_func_frame = inspect.stack()[0][0] # i.e., get_caller_name 29 | # callee_frame = inspect.stack()[1][0] # e.g., log_img_scale 30 | caller_frame = inspect.stack()[2][0] # e.g., caller of log_img_scale 31 | caller_method = caller_frame.f_code.co_name 32 | try: 33 | caller_class = caller_frame.f_locals['self'].__class__.__name__ 34 | return f'{caller_class}.{caller_method}' 35 | except KeyError: # caller is a function 36 | return caller_method 37 | 38 | 39 | def log_img_scale(img_scale: type, 40 | shape_order: str = 'hw', 41 | skip_square: bool = False) -> bool: 42 | """Log image size. 43 | 44 | Args: 45 | img_scale (tuple): Image size to be logged. 46 | shape_order (str, optional): The order of image shape. 47 | 'hw' for (height, width) and 'wh' for (width, height). 48 | Defaults to 'hw'. 49 | skip_square (bool, optional): Whether to skip logging for square 50 | img_scale. Defaults to False. 51 | 52 | Returns: 53 | bool: Whether to have done logging. 54 | """ 55 | if shape_order == 'hw': 56 | height, width = img_scale 57 | elif shape_order == 'wh': 58 | width, height = img_scale 59 | else: 60 | raise ValueError(f'Invalid shape_order {shape_order}.') 61 | 62 | if skip_square and (height == width): 63 | return False 64 | 65 | caller = get_caller_name() 66 | print_log( 67 | f'image shape: height={height}, width={width} in {caller}', 68 | logger='current') 69 | 70 | return True 71 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/ssd512_120e_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = 'ssd300_120e_ruod.py' 2 | 3 | # model settings 4 | input_size = 512 5 | model = dict( 6 | neck=dict( 7 | out_channels=(512, 1024, 512, 256, 256, 256, 256), 8 | level_strides=(2, 2, 2, 2, 1), 9 | level_paddings=(1, 1, 1, 1, 1), 10 | last_kernel_size=4), 11 | bbox_head=dict( 12 | in_channels=(512, 1024, 512, 256, 256, 256, 256), 13 | anchor_generator=dict( 14 | type='SSDAnchorGenerator', 15 | scale_major=False, 16 | input_size=input_size, 17 | basesize_ratio_range=(0.1, 0.9), 18 | strides=[8, 16, 32, 64, 128, 256, 512], 19 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]))) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict( 26 | type='Expand', 27 | mean={{_base_.model.data_preprocessor.mean}}, 28 | to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}}, 29 | ratio_range=(1, 4)), 30 | dict( 31 | type='MinIoURandomCrop', 32 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 33 | min_crop_size=0.3), 34 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 35 | dict(type='RandomFlip', prob=0.5), 36 | dict( 37 | type='PhotoMetricDistortion', 38 | brightness_delta=32, 39 | contrast_range=(0.5, 1.5), 40 | saturation_range=(0.5, 1.5), 41 | hue_delta=18), 42 | dict(type='PackDetInputs') 43 | ] 44 | test_pipeline = [ 45 | dict(type='LoadImageFromFile'), 46 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 47 | dict(type='LoadAnnotations', with_bbox=True), 48 | dict( 49 | type='PackDetInputs', 50 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 51 | 'scale_factor')) 52 | ] 53 | train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline))) 54 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 55 | test_dataloader = val_dataloader 56 | 57 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 58 | # USER SHOULD NOT CHANGE ITS VALUES. 59 | # base_batch_size = (8 GPUs) x (8 samples per GPU) 60 | auto_scale_lr = dict(base_batch_size=64) 61 | 62 | # add WandbVisBackend 63 | # vis_backends = [ 64 | # dict(type='LocalVisBackend'), 65 | # dict(type='WandbVisBackend', 66 | # init_kwargs=dict( 67 | # project='RUOD_detection', 68 | # name='ssd512_120e_ruod', 69 | # entity='lqit', 70 | # ) 71 | # ) 72 | # ] 73 | # visualizer = dict( 74 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 75 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/ssd512_120e_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = 'ssd300_120e_duo-coco.py' 2 | 3 | # model settings 4 | input_size = 512 5 | model = dict( 6 | neck=dict( 7 | out_channels=(512, 1024, 512, 256, 256, 256, 256), 8 | level_strides=(2, 2, 2, 2, 1), 9 | level_paddings=(1, 1, 1, 1, 1), 10 | last_kernel_size=4), 11 | bbox_head=dict( 12 | in_channels=(512, 1024, 512, 256, 256, 256, 256), 13 | anchor_generator=dict( 14 | type='SSDAnchorGenerator', 15 | scale_major=False, 16 | input_size=input_size, 17 | basesize_ratio_range=(0.1, 0.9), 18 | strides=[8, 16, 32, 64, 128, 256, 512], 19 | ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]))) 20 | 21 | # dataset settings 22 | train_pipeline = [ 23 | dict(type='LoadImageFromFile'), 24 | dict(type='LoadAnnotations', with_bbox=True), 25 | dict( 26 | type='Expand', 27 | mean={{_base_.model.data_preprocessor.mean}}, 28 | to_rgb={{_base_.model.data_preprocessor.bgr_to_rgb}}, 29 | ratio_range=(1, 4)), 30 | dict( 31 | type='MinIoURandomCrop', 32 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), 33 | min_crop_size=0.3), 34 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 35 | dict(type='RandomFlip', prob=0.5), 36 | dict( 37 | type='PhotoMetricDistortion', 38 | brightness_delta=32, 39 | contrast_range=(0.5, 1.5), 40 | saturation_range=(0.5, 1.5), 41 | hue_delta=18), 42 | dict(type='PackDetInputs') 43 | ] 44 | test_pipeline = [ 45 | dict(type='LoadImageFromFile'), 46 | dict(type='Resize', scale=(input_size, input_size), keep_ratio=False), 47 | dict(type='LoadAnnotations', with_bbox=True), 48 | dict( 49 | type='PackDetInputs', 50 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 51 | 'scale_factor')) 52 | ] 53 | train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline))) 54 | val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) 55 | test_dataloader = val_dataloader 56 | 57 | # NOTE: `auto_scale_lr` is for automatically scaling LR, 58 | # USER SHOULD NOT CHANGE ITS VALUES. 59 | # base_batch_size = (8 GPUs) x (8 samples per GPU) 60 | auto_scale_lr = dict(base_batch_size=64) 61 | 62 | # add WandbVisBackend 63 | # vis_backends = [ 64 | # dict(type='LocalVisBackend'), 65 | # dict(type='WandbVisBackend', 66 | # init_kwargs=dict( 67 | # project='DUO_detection', 68 | # name='ssd512_120e_duo', 69 | # entity='lqit', 70 | # ) 71 | # ) 72 | # ] 73 | # visualizer = dict( 74 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 75 | -------------------------------------------------------------------------------- /configs/edit/_base_/datasets/underwater_enhancement.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'BasicImageDataset' 3 | data_root = 'data/uw_enhance/' 4 | 5 | file_client_args = dict(backend='disk') 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', file_client_args=file_client_args), 9 | dict(type='LoadGTImageFromFile', file_client_args=file_client_args), 10 | dict( 11 | type='TransBroadcaster', 12 | src_key='img', 13 | dst_key='gt_img', 14 | transforms=[ 15 | dict(type='Resize', scale=(512, 512), keep_ratio=True), 16 | dict(type='RandomFlip', prob=0.5), 17 | ]), 18 | dict(type='PackInputs') 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile', file_client_args=file_client_args), 22 | dict(type='LoadGTImageFromFile', file_client_args=file_client_args), 23 | dict( 24 | type='TransBroadcaster', 25 | src_key='img', 26 | dst_key='gt_img', 27 | transforms=[dict(type='Resize', scale=(512, 512), keep_ratio=True)]), 28 | dict( 29 | type='PackInputs', 30 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 31 | 'scale_factor')) 32 | ] 33 | train_dataloader = dict( 34 | batch_size=2, 35 | num_workers=2, 36 | persistent_workers=True, 37 | sampler=dict(type='DefaultSampler', shuffle=True), 38 | dataset=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | metainfo=dict( 42 | dataset_type='underwater_enhancement', task_name='enhancement'), 43 | data_prefix=dict(img='input_train/', gt_img='gt_train/'), 44 | search_key='img', 45 | img_suffix=dict(img='png', gt_img='png'), 46 | file_client_args=file_client_args, 47 | pipeline=train_pipeline)) 48 | val_dataloader = dict( 49 | batch_size=1, 50 | num_workers=2, 51 | persistent_workers=True, 52 | drop_last=False, 53 | sampler=dict(type='DefaultSampler', shuffle=False), 54 | dataset=dict( 55 | type=dataset_type, 56 | data_root=data_root, 57 | test_mode=True, 58 | metainfo=dict( 59 | dataset_type='underwater_enhancement', task_name='enhancement'), 60 | data_prefix=dict(img='input_test/', gt_img='gt_test/'), 61 | search_key='img', 62 | img_suffix=dict(img='png', gt_img='png'), 63 | file_client_args=file_client_args, 64 | pipeline=test_pipeline)) 65 | test_dataloader = val_dataloader 66 | 67 | val_evaluator = [ 68 | dict(type='MAE', gt_key='img', pred_key='pred_img'), 69 | dict(type='MSE', gt_key='img', pred_key='pred_img'), 70 | dict(type='PSNR', gt_key='img', pred_key='pred_img'), 71 | dict(type='SSIM', gt_key='img', pred_key='pred_img'), 72 | ] 73 | test_evaluator = val_evaluator 74 | -------------------------------------------------------------------------------- /configs/detection/edffnet/atss_r50_fpn_1x_rtts-coco_lr002.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', 3 | '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=5, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)) 73 | 74 | param_scheduler = [ 75 | dict( 76 | type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, 77 | end=1000), 78 | dict( 79 | type='MultiStepLR', 80 | begin=0, 81 | end=12, 82 | by_epoch=True, 83 | milestones=[8, 11], 84 | gamma=0.1) 85 | ] 86 | -------------------------------------------------------------------------------- /lqit/detection/evaluation/tide/errors/error.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/dbolya/tide 2 | # This work is licensed under MIT license. 3 | from typing import Union 4 | 5 | 6 | class Error: 7 | """A base class for all error types.""" 8 | 9 | def fix(self) -> Union[tuple, None]: 10 | """Returns a fixed version of the AP data point for this error or None 11 | if this error should be suppressed. 12 | 13 | Returns: 14 | tuple: (score:float, is_positive:bool, info:dict) 15 | """ 16 | raise NotImplementedError 17 | 18 | def unfix(self) -> Union[tuple, None]: 19 | """Returns the original version of this data point.""" 20 | 21 | if hasattr(self, 'pred'): 22 | # If an ignored instance is an error, it's not in the data 23 | # point list, so there's no "unfixed" entry 24 | if self.pred['used'] is None: 25 | return None 26 | else: 27 | return (self.pred['class'], (self.pred['score'], False, 28 | self.pred['info'])) 29 | else: 30 | return None 31 | 32 | def get_id(self) -> int: 33 | """Get index.""" 34 | if hasattr(self, 'pred'): 35 | return self.pred['_id'] 36 | elif hasattr(self, 'gt'): 37 | return self.gt['_id'] 38 | else: 39 | return -1 40 | 41 | 42 | class BestGTMatch: 43 | """Some errors are fixed by changing false positives to true positives. The 44 | issue with fixing these errors naively is that you might have multiple 45 | errors attempting to fix the same GT. In that case, we need to select which 46 | error actually gets fixed, and which others just get suppressed (since we 47 | can only fix one error per GT). 48 | 49 | To address this, this class finds the prediction with the hiighest score 50 | and then uses that as the error to fix, while suppressing all other errors 51 | caused by the same GT. 52 | """ 53 | 54 | def __init__(self, pred, gt) -> None: 55 | self.pred = pred 56 | self.gt = gt 57 | 58 | if self.gt['used']: 59 | self.suppress = True 60 | else: 61 | self.suppress = False 62 | self.gt['usable'] = True 63 | 64 | score = self.pred['score'] 65 | 66 | if 'best_score' not in self.gt: 67 | self.gt['best_score'] = -1 68 | 69 | if self.gt['best_score'] < score: 70 | self.gt['best_score'] = score 71 | self.gt['best_id'] = self.pred['_id'] 72 | 73 | def fix(self) -> Union[tuple, None]: 74 | if self.suppress or self.gt['best_id'] != self.pred['_id']: 75 | return None 76 | else: 77 | return self.pred['score'], True, self.pred['info'] 78 | -------------------------------------------------------------------------------- /lqit/edit/models/editor_heads/edge_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import ConvModule 4 | 5 | from lqit.registry import MODELS 6 | from .base_head import BaseEnhanceHead 7 | 8 | 9 | @MODELS.register_module() 10 | class EdgeHead(BaseEnhanceHead): 11 | """[conv+GN+relu]*4+1*1conv.""" 12 | 13 | def __init__(self, 14 | in_channels=256, 15 | feat_channels=256, 16 | num_convs=5, 17 | conv_cfg=None, 18 | norm_cfg=dict(type='GN', num_groups=32, requires_grad=True), 19 | act_cfg=dict(type='ReLU'), 20 | gt_preprocessor=None, 21 | loss_enhance=dict(type='lqit.L1Loss', loss_weight=1.0), 22 | init_cfg=dict(type='Normal', layer='Conv2d', std=0.01)): 23 | super().__init__( 24 | loss_enhance=loss_enhance, 25 | gt_preprocessor=gt_preprocessor, 26 | init_cfg=init_cfg) 27 | self.in_channels = in_channels 28 | self.feat_channels = feat_channels 29 | self.num_convs = num_convs 30 | self.conv_cfg = conv_cfg 31 | self.norm_cfg = norm_cfg 32 | self.act_cfg = act_cfg 33 | self._init_layers() 34 | 35 | def _init_layers(self): 36 | assert self.num_convs > 0 37 | enhance_conv = [] 38 | for i in range(self.num_convs): 39 | in_channels = self.in_channels if i == 0 \ 40 | else self.feat_channels 41 | if i < (self.num_convs - 1): 42 | enhance_conv.append( 43 | ConvModule( 44 | in_channels, 45 | self.feat_channels, 46 | 3, 47 | stride=1, 48 | padding=1, 49 | conv_cfg=self.conv_cfg, 50 | norm_cfg=self.norm_cfg, 51 | act_cfg=self.act_cfg)) 52 | else: 53 | enhance_conv.append( 54 | nn.Conv2d( 55 | in_channels=in_channels, 56 | out_channels=1, 57 | kernel_size=1, 58 | stride=1, 59 | padding=1)) 60 | self.enhance_conv = nn.Sequential(*enhance_conv) 61 | 62 | def forward(self, x): 63 | if len(x) > 1 and (isinstance(x, tuple) or isinstance(x, list)): 64 | x = x[0] 65 | outs = self.enhance_conv(x) 66 | return outs 67 | 68 | def loss_by_feat(self, enhance_img, gt_imgs, img_metas): 69 | reshape_gt_imgs = F.interpolate( 70 | gt_imgs, size=enhance_img.shape[-2:], mode='bilinear') 71 | enhance_loss = self.loss_enhance(enhance_img, reshape_gt_imgs) 72 | return dict(loss_edge=enhance_loss) 73 | -------------------------------------------------------------------------------- /configs/detection/tienet/base_detector/tood_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='TOOD', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='TOODHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=6, 37 | feat_channels=256, 38 | anchor_type='anchor_free', 39 | anchor_generator=dict( 40 | type='AnchorGenerator', 41 | ratios=[1.0], 42 | octave_base_scale=8, 43 | scales_per_octave=1, 44 | strides=[8, 16, 32, 64, 128]), 45 | bbox_coder=dict( 46 | type='DeltaXYWHBBoxCoder', 47 | target_means=[.0, .0, .0, .0], 48 | target_stds=[0.1, 0.1, 0.2, 0.2]), 49 | initial_loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | activated=True, # use probability instead of logit as input 53 | gamma=2.0, 54 | alpha=0.25, 55 | loss_weight=1.0), 56 | loss_cls=dict( 57 | type='QualityFocalLoss', 58 | use_sigmoid=True, 59 | activated=True, # use probability instead of logit as input 60 | beta=2.0, 61 | loss_weight=1.0), 62 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), 63 | train_cfg=dict( 64 | initial_epoch=4, 65 | initial_assigner=dict(type='ATSSAssigner', topk=9), 66 | assigner=dict(type='TaskAlignedAssigner', topk=13), 67 | alpha=1, 68 | beta=6, 69 | allowed_border=-1, 70 | pos_weight=-1, 71 | debug=False), 72 | test_cfg=dict( 73 | nms_pre=1000, 74 | min_bbox_size=0, 75 | score_thr=0.05, 76 | nms=dict(type='nms', iou_threshold=0.6), 77 | max_per_img=100)) 78 | 79 | # optimizer 80 | optim_wrapper = dict( 81 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 82 | -------------------------------------------------------------------------------- /configs/edit/_base_/datasets/underwater_enhancement_with_ann.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'BasicImageDataset' 3 | data_root = 'data/uw_enhance/' 4 | 5 | file_client_args = dict(backend='disk') 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile', file_client_args=file_client_args), 9 | dict(type='LoadGTImageFromFile', file_client_args=file_client_args), 10 | dict( 11 | type='TransBroadcaster', 12 | src_key='img', 13 | dst_key='gt_img', 14 | transforms=[ 15 | dict(type='Resize', scale=(512, 512), keep_ratio=True), 16 | dict(type='RandomFlip', prob=0.5), 17 | ]), 18 | dict(type='PackInputs') 19 | ] 20 | test_pipeline = [ 21 | dict(type='LoadImageFromFile', file_client_args=file_client_args), 22 | dict(type='LoadGTImageFromFile', file_client_args=file_client_args), 23 | dict( 24 | type='TransBroadcaster', 25 | src_key='img', 26 | dst_key='gt_img', 27 | transforms=[dict(type='Resize', scale=(512, 512), keep_ratio=True)]), 28 | dict( 29 | type='PackInputs', 30 | meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 31 | 'scale_factor')) 32 | ] 33 | train_dataloader = dict( 34 | batch_size=2, 35 | num_workers=2, 36 | persistent_workers=True, 37 | sampler=dict(type='DefaultSampler', shuffle=True), 38 | dataset=dict( 39 | type=dataset_type, 40 | data_root=data_root, 41 | ann_file='annotations/train.txt', 42 | metainfo=dict( 43 | dataset_type='underwater_enhancement', task_name='enhancement'), 44 | data_prefix=dict(img='input_train/', gt_img='gt_train/'), 45 | search_key='img', 46 | img_suffix=dict(img='png', gt_img='png'), 47 | file_client_args=file_client_args, 48 | pipeline=train_pipeline)) 49 | val_dataloader = dict( 50 | batch_size=1, 51 | num_workers=2, 52 | persistent_workers=True, 53 | drop_last=False, 54 | sampler=dict(type='DefaultSampler', shuffle=False), 55 | dataset=dict( 56 | type=dataset_type, 57 | data_root=data_root, 58 | test_mode=True, 59 | ann_file='annotations/test.txt', 60 | metainfo=dict( 61 | dataset_type='underwater_enhancement', task_name='enhancement'), 62 | data_prefix=dict(img='input_test/', gt_img='gt_test/'), 63 | search_key='img', 64 | img_suffix=dict(img='png', gt_img='png'), 65 | file_client_args=file_client_args, 66 | pipeline=test_pipeline)) 67 | test_dataloader = val_dataloader 68 | 69 | val_evaluator = [ 70 | dict(type='MAE', gt_key='img', pred_key='pred_img'), 71 | dict(type='MSE', gt_key='img', pred_key='pred_img'), 72 | dict(type='PSNR', gt_key='img', pred_key='pred_img'), 73 | dict(type='SSIM', gt_key='img', pred_key='pred_img'), 74 | ] 75 | test_evaluator = val_evaluator 76 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-A/tood_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-test-A_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='TOOD', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='TOODHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=6, 37 | feat_channels=256, 38 | anchor_type='anchor_free', 39 | anchor_generator=dict( 40 | type='AnchorGenerator', 41 | ratios=[1.0], 42 | octave_base_scale=8, 43 | scales_per_octave=1, 44 | strides=[8, 16, 32, 64, 128]), 45 | bbox_coder=dict( 46 | type='DeltaXYWHBBoxCoder', 47 | target_means=[.0, .0, .0, .0], 48 | target_stds=[0.1, 0.1, 0.2, 0.2]), 49 | initial_loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | activated=True, # use probability instead of logit as input 53 | gamma=2.0, 54 | alpha=0.25, 55 | loss_weight=1.0), 56 | loss_cls=dict( 57 | type='QualityFocalLoss', 58 | use_sigmoid=True, 59 | activated=True, # use probability instead of logit as input 60 | beta=2.0, 61 | loss_weight=1.0), 62 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), 63 | train_cfg=dict( 64 | initial_epoch=4, 65 | initial_assigner=dict(type='ATSSAssigner', topk=9), 66 | assigner=dict(type='TaskAlignedAssigner', topk=13), 67 | alpha=1, 68 | beta=6, 69 | allowed_border=-1, 70 | pos_weight=-1, 71 | debug=False), 72 | test_cfg=dict( 73 | nms_pre=1000, 74 | min_bbox_size=0, 75 | score_thr=0.05, 76 | nms=dict(type='nms', iou_threshold=0.6), 77 | max_per_img=100)) 78 | 79 | # optimizer 80 | optim_wrapper = dict( 81 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 82 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train-all_test-B/tood_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-test-B_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='TOOD', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='TOODHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=6, 37 | feat_channels=256, 38 | anchor_type='anchor_free', 39 | anchor_generator=dict( 40 | type='AnchorGenerator', 41 | ratios=[1.0], 42 | octave_base_scale=8, 43 | scales_per_octave=1, 44 | strides=[8, 16, 32, 64, 128]), 45 | bbox_coder=dict( 46 | type='DeltaXYWHBBoxCoder', 47 | target_means=[.0, .0, .0, .0], 48 | target_stds=[0.1, 0.1, 0.2, 0.2]), 49 | initial_loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | activated=True, # use probability instead of logit as input 53 | gamma=2.0, 54 | alpha=0.25, 55 | loss_weight=1.0), 56 | loss_cls=dict( 57 | type='QualityFocalLoss', 58 | use_sigmoid=True, 59 | activated=True, # use probability instead of logit as input 60 | beta=2.0, 61 | loss_weight=1.0), 62 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), 63 | train_cfg=dict( 64 | initial_epoch=4, 65 | initial_assigner=dict(type='ATSSAssigner', topk=9), 66 | assigner=dict(type='TaskAlignedAssigner', topk=13), 67 | alpha=1, 68 | beta=6, 69 | allowed_border=-1, 70 | pos_weight=-1, 71 | debug=False), 72 | test_cfg=dict( 73 | nms_pre=1000, 74 | min_bbox_size=0, 75 | score_thr=0.05, 76 | nms=dict(type='nms', iou_threshold=0.6), 77 | max_per_img=100)) 78 | 79 | # optimizer 80 | optim_wrapper = dict( 81 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 82 | -------------------------------------------------------------------------------- /configs/detection/urpc2020_dataset/train_validation/tood_r50_fpn_1x_urpc-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/urpc2020/urpc2020-validation_coco_detection.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='TOOD', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='TOODHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=6, 37 | feat_channels=256, 38 | anchor_type='anchor_free', 39 | anchor_generator=dict( 40 | type='AnchorGenerator', 41 | ratios=[1.0], 42 | octave_base_scale=8, 43 | scales_per_octave=1, 44 | strides=[8, 16, 32, 64, 128]), 45 | bbox_coder=dict( 46 | type='DeltaXYWHBBoxCoder', 47 | target_means=[.0, .0, .0, .0], 48 | target_stds=[0.1, 0.1, 0.2, 0.2]), 49 | initial_loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | activated=True, # use probability instead of logit as input 53 | gamma=2.0, 54 | alpha=0.25, 55 | loss_weight=1.0), 56 | loss_cls=dict( 57 | type='QualityFocalLoss', 58 | use_sigmoid=True, 59 | activated=True, # use probability instead of logit as input 60 | beta=2.0, 61 | loss_weight=1.0), 62 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0)), 63 | train_cfg=dict( 64 | initial_epoch=4, 65 | initial_assigner=dict(type='ATSSAssigner', topk=9), 66 | assigner=dict(type='TaskAlignedAssigner', topk=13), 67 | alpha=1, 68 | beta=6, 69 | allowed_border=-1, 70 | pos_weight=-1, 71 | debug=False), 72 | test_cfg=dict( 73 | nms_pre=1000, 74 | min_bbox_size=0, 75 | score_thr=0.05, 76 | nms=dict(type='nms', iou_threshold=0.6), 77 | max_per_img=100)) 78 | 79 | # optimizer 80 | optim_wrapper = dict( 81 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 82 | -------------------------------------------------------------------------------- /lqit/edit/models/editors/zero_dce/zero_dce_generator.py: -------------------------------------------------------------------------------- 1 | # This work is licensed under Attribution-NonCommercial 4.0 International License. # noqa 2 | from typing import List 3 | 4 | from lqit.edit.models.base_models import BaseGenerator 5 | from lqit.edit.structures import BatchPixelData 6 | from lqit.registry import MODELS 7 | from lqit.utils import ConfigType, OptMultiConfig 8 | 9 | 10 | @MODELS.register_module() 11 | class ZeroDCEGenerator(BaseGenerator): 12 | """""" 13 | 14 | def __init__(self, 15 | model: ConfigType, 16 | color_loss: ConfigType = dict( 17 | type='ColorLoss', loss_weight=5.0), 18 | spacial_loss: ConfigType = dict( 19 | type='SpatialLoss', loss_weight=1.0), 20 | tv_loss: ConfigType = dict( 21 | type='MaskedTVLoss', loss_mode='mse', loss_weight=200.0), 22 | exposure_loss: ConfigType = dict( 23 | type='ExposureLoss', 24 | patch_size=16, 25 | mean_val=0.6, 26 | loss_weight=10.), 27 | init_cfg: OptMultiConfig = None, 28 | **kwargs) -> None: 29 | super().__init__(model=model, init_cfg=init_cfg) 30 | 31 | # build losses 32 | self.color_loss = MODELS.build(color_loss) 33 | self.spacial_loss = MODELS.build(spacial_loss) 34 | self.tv_loss = MODELS.build(tv_loss) 35 | self.exposure_loss = MODELS.build(exposure_loss) 36 | 37 | def loss(self, loss_input: BatchPixelData, batch_img_metas: List[dict]): 38 | """Calculate the loss based on the outputs of generator.""" 39 | batch_outputs = loss_input.output 40 | batch_inputs = loss_input.input 41 | 42 | # ZeroDCE return enhance loss and curve at the same time. 43 | in_channels = self.model.in_channels 44 | batch_enhance_img = batch_outputs[:, :in_channels, :, :] 45 | batch_curve = batch_outputs[:, in_channels:, :, :] 46 | 47 | tv_loss = self.tv_loss(batch_curve) 48 | spacial_loss = self.spacial_loss(batch_enhance_img, batch_inputs) 49 | color_loss = self.color_loss(batch_enhance_img) 50 | exposure_loss = self.exposure_loss(batch_enhance_img) 51 | 52 | losses = dict( 53 | tv_loss=tv_loss, 54 | spacial_loss=spacial_loss, 55 | color_loss=color_loss, 56 | exposure_loss=exposure_loss) 57 | 58 | return losses 59 | 60 | def post_precess(self, outputs): 61 | # ZeroDCE return enhance loss and curve at the same time. 62 | assert outputs.dim() in [3, 4] 63 | in_channels = self.model.in_channels 64 | if outputs.dim() == 4: 65 | enhance_img = outputs[:, :in_channels, :, :] 66 | else: 67 | enhance_img = outputs[:in_channels, :, :] 68 | return enhance_img 69 | -------------------------------------------------------------------------------- /configs/detection/rtts_dataset/atss_r50_fpn_1x_rtts-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/rtts_coco.py', '../_base_/schedules/schedule_1x.py', 3 | '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=5, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | 74 | # add WandbVisBackend 75 | # vis_backends = [ 76 | # dict(type='LocalVisBackend'), 77 | # dict(type='WandbVisBackend', 78 | # init_kwargs=dict( 79 | # project='rtts_detection', 80 | # name='atss_r50_fpn_1x_rtts', 81 | # entity='lqit', 82 | # ) 83 | # ) 84 | # ] 85 | # visualizer = dict( 86 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 87 | -------------------------------------------------------------------------------- /configs/detection/duo_dataset/atss_r50_fpn_1x_duo-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/duo_coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=4, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | 74 | # add WandbVisBackend 75 | # vis_backends = [ 76 | # dict(type='LocalVisBackend'), 77 | # dict(type='WandbVisBackend', 78 | # init_kwargs=dict( 79 | # project='DUO_detection', 80 | # name='atss_r50_fpn_1x_duo', 81 | # entity='lqit', 82 | # ) 83 | # ) 84 | # ] 85 | # visualizer = dict( 86 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 87 | -------------------------------------------------------------------------------- /configs/detection/ruod_dataset/atss_r50_fpn_1x_ruod.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../_base_/datasets/ruod_coco_detection.py', 3 | '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=10, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | 74 | # add WandbVisBackend 75 | # vis_backends = [ 76 | # dict(type='LocalVisBackend'), 77 | # dict(type='WandbVisBackend', 78 | # init_kwargs=dict( 79 | # project='RUOD_detection', 80 | # name='atss_r50_fpn_1x_ruod', 81 | # entity='lqit', 82 | # ) 83 | # ) 84 | # ] 85 | # visualizer = dict( 86 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 87 | -------------------------------------------------------------------------------- /configs/detection/tienet/base_detector/atss_r50_fpn_1x_rtts-coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/datasets/rtts_coco.py', 3 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 4 | ] 5 | 6 | # model settings 7 | model = dict( 8 | type='ATSS', 9 | data_preprocessor=dict( 10 | type='DetDataPreprocessor', 11 | mean=[123.675, 116.28, 103.53], 12 | std=[58.395, 57.12, 57.375], 13 | bgr_to_rgb=True, 14 | pad_size_divisor=32), 15 | backbone=dict( 16 | type='ResNet', 17 | depth=50, 18 | num_stages=4, 19 | out_indices=(0, 1, 2, 3), 20 | frozen_stages=1, 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | style='pytorch', 24 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 25 | neck=dict( 26 | type='FPN', 27 | in_channels=[256, 512, 1024, 2048], 28 | out_channels=256, 29 | start_level=1, 30 | add_extra_convs='on_output', 31 | num_outs=5), 32 | bbox_head=dict( 33 | type='ATSSHead', 34 | num_classes=5, 35 | in_channels=256, 36 | stacked_convs=4, 37 | feat_channels=256, 38 | anchor_generator=dict( 39 | type='AnchorGenerator', 40 | ratios=[1.0], 41 | octave_base_scale=8, 42 | scales_per_octave=1, 43 | strides=[8, 16, 32, 64, 128]), 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[.0, .0, .0, .0], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | loss_cls=dict( 49 | type='FocalLoss', 50 | use_sigmoid=True, 51 | gamma=2.0, 52 | alpha=0.25, 53 | loss_weight=1.0), 54 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)), 57 | # training and testing settings 58 | train_cfg=dict( 59 | assigner=dict(type='ATSSAssigner', topk=9), 60 | allowed_border=-1, 61 | pos_weight=-1, 62 | debug=False), 63 | test_cfg=dict( 64 | nms_pre=1000, 65 | min_bbox_size=0, 66 | score_thr=0.05, 67 | nms=dict(type='nms', iou_threshold=0.6), 68 | max_per_img=100)) 69 | 70 | # optimizer 71 | optim_wrapper = dict( 72 | optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)) 73 | 74 | # add WandbVisBackend 75 | # vis_backends = [ 76 | # dict(type='LocalVisBackend'), 77 | # dict(type='WandbVisBackend', 78 | # init_kwargs=dict( 79 | # project='rtts_detection', 80 | # name='atss_r50_fpn_1x_rtts', 81 | # entity='lqit', 82 | # ) 83 | # ) 84 | # ] 85 | # visualizer = dict( 86 | # type='DetLocalVisualizer', vis_backends=vis_backends, name='visualizer') 87 | --------------------------------------------------------------------------------