├── tools ├── __init__.py ├── datasets │ ├── __init__.py │ ├── create_dataset_link.sh │ ├── convert_coco_additional.py │ ├── preprocess_dataset.sh │ ├── convert_json_to_txt.py │ └── prepare_coco_standard.py ├── datasets_uda │ ├── __init__.py │ ├── preprocess_dataset.sh │ ├── create_dataset_link.sh │ └── convert_xml_to_json.py └── create_config_from_template.py ├── examples ├── __init__.py ├── demo │ ├── __init__.py │ └── image_demo.py ├── eval │ ├── __init__.py │ ├── eval.sh │ └── eval_uda.sh └── train │ ├── __init__.py │ ├── bash │ ├── train_baseline_ssod.sh │ ├── train_baseline_uda.sh │ ├── train_uda.sh │ └── train_ssod.sh │ ├── xonsh │ ├── train_gpu2.sh │ └── train_gpu8.sh │ └── train.py ├── mmdet_extension ├── core │ ├── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ ├── classes.py │ │ ├── colormap.py │ │ └── image.py │ ├── bbox │ │ ├── __init__.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── sampling_result_lm.py │ │ │ └── random_sampler_lm.py │ │ └── assigner │ │ │ ├── __init__.py │ │ │ ├── assign_result_lm.py │ │ │ └── max_iou_assigner_lm.py │ ├── __init__.py │ ├── runner │ │ ├── __init__.py │ │ └── semi_runner.py │ └── hooks │ │ ├── __init__.py │ │ └── semi_eval_hooks.py ├── models │ ├── backbones │ │ ├── __init__.py │ │ └── vgg.py │ ├── loss │ │ ├── __init__.py │ │ └── focal_loss.py │ ├── __init__.py │ ├── roi_head │ │ ├── bbox_heads │ │ │ ├── __init__.py │ │ │ ├── convfc_bbox_head_st.py │ │ │ └── convfc_bbox_head_lm.py │ │ ├── __init__.py │ │ ├── standard_roi_head_base.py │ │ ├── standard_roi_head_st.py │ │ └── standard_roi_head_lm.py │ └── detectors │ │ ├── __init__.py │ │ ├── semi_two_stage.py │ │ ├── stac.py │ │ ├── unbiased_teacher.py │ │ └── semi_base.py ├── __init__.py ├── apis │ ├── __init__.py │ ├── test.py │ └── train.py └── datasets │ ├── pipelines │ ├── __init__.py │ ├── transforms.py │ ├── semi_augment.py │ └── transforms_box.py │ ├── __init__.py │ ├── new_coco.py │ ├── semi_dataset.py │ └── txt_style.py ├── pretrained_model ├── baseline │ └── Where_To_Save_Source_Only_Models └── backbone │ └── Where_To_Save_ImageNet_Pretrained_Models ├── docs ├── png │ └── performance.png ├── performance.md ├── domain_adaption.md └── prepare_data.md ├── requirements.txt ├── configs ├── README.md └── baseline │ ├── ema_config │ ├── baseline_uda_cls1.py │ ├── baseline_uda_cls8.py │ ├── baseline_standard.py │ └── baseline_voc.py │ ├── baseline_uda.py │ ├── baseline_uda_test.py │ └── baseline_ssod.py └── README.md /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/demo/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/datasets_uda/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet_extension/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pretrained_model/baseline/Where_To_Save_Source_Only_Models: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet_extension/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg import VGG -------------------------------------------------------------------------------- /pretrained_model/backbone/Where_To_Save_ImageNet_Pretrained_Models: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet_extension/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import * 2 | from .datasets import * -------------------------------------------------------------------------------- /mmdet_extension/models/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .focal_loss import CEFocalLoss -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .samplers import * 2 | from .assigner import * -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .random_sampler_lm import RandomSamplerLM -------------------------------------------------------------------------------- /docs/png/performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Closed11/SSOD/HEAD/docs/png/performance.png -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/assigner/__init__.py: -------------------------------------------------------------------------------- 1 | from .max_iou_assigner_lm import MaxIoUAssignerLM -------------------------------------------------------------------------------- /mmdet_extension/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import * 2 | from .bbox import * 3 | from .runner import * 4 | -------------------------------------------------------------------------------- /mmdet_extension/core/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .semi_runner import SemiEpochBasedRunner, SemiIterBasedRunner -------------------------------------------------------------------------------- /mmdet_extension/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .test import single_gpu_test, multi_gpu_test 2 | from .train import train_detector 3 | -------------------------------------------------------------------------------- /mmdet_extension/core/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .stac_hooks import STACHook 2 | from .labelmatch_hooks import LabelMatchHook 3 | -------------------------------------------------------------------------------- /mmdet_extension/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .semi_augment import * 2 | from .transforms import * 3 | from .transforms_box import * 4 | -------------------------------------------------------------------------------- /mmdet_extension/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .roi_head import * 3 | from .loss import * 4 | from .detectors import * 5 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .convfc_bbox_head_lm import Shared2FCBBoxHeadLM 2 | from .convfc_bbox_head_st import Shared2FCBBoxHeadST -------------------------------------------------------------------------------- /mmdet_extension/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .new_coco import NewCocoDataset 2 | from .txt_style import TXTDataset 3 | from .semi_dataset import SemiDataset 4 | 5 | from .pipelines import * -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pytest-runner 2 | mmcv-full==1.2.7 3 | mmdet==2.10.0 4 | opencv-python==4.6.0.66 5 | xonsh 6 | tqdm 7 | albumentations 8 | pympler 9 | timm 10 | dataclasses 11 | seaborn 12 | imgaug 13 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_heads import * 2 | 3 | from .standard_roi_head_base import StandardRoIHeadBase 4 | from .standard_roi_head_st import StandardRoIHeadST 5 | from .standard_roi_head_lm import StandardRoIHeadLM -------------------------------------------------------------------------------- /mmdet_extension/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .semi_base import SemiBaseDetector 2 | from .semi_two_stage import SemiTwoStageDetector 3 | 4 | from .stac import STAC 5 | from .unbiased_teacher import UnbiasedTeacher 6 | from .soft_teacher import SoftTeacher 7 | 8 | from .labelmatch import LabelMatch 9 | from .labelmatch_online import LabelMatchOnline -------------------------------------------------------------------------------- /examples/train/bash/train_baseline_ssod.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | work_dir=$(dirname $0) 4 | cd $work_dir 5 | 6 | pip install xonsh 7 | 8 | cd ../ 9 | cd xonsh 10 | xonsh train_gpu2.sh ./configs/baseline/baseline_ssod.py 1 1 coco-standard # seed percent dataset 11 | 12 | # # -----------voc---------- 13 | # xonsh train_gpu2.sh ./configs/baseline/baseline_ssod.py 1 1 voc 14 | 15 | # # -----------coco-additional---------- 16 | # xonsh train_gpu8.sh ./configs/baseline/baseline_ssod.py 1 1 coco-additional -------------------------------------------------------------------------------- /examples/train/bash/train_baseline_uda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | work_dir=$(dirname $0) 4 | cd $work_dir 5 | 6 | pip install xonsh 7 | 8 | # # ------C2F------- 9 | cd ../ 10 | cd xonsh 11 | xonsh train_gpu2.sh ./configs/baseline/baseline_uda.py C2F 12 | 13 | # # ------C2B------- 14 | # we use the same baseline with C2F 15 | 16 | # # ------K2C------- 17 | #xonsh train_gpu2.sh ./configs/baseline/baseline_uda.py K2C 18 | 19 | # # ------S2C------- 20 | #xonsh train_gpu2.sh ./configs/baseline/baseline_uda.py S2C -------------------------------------------------------------------------------- /mmdet_extension/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | @torch.no_grad() 5 | def concat_all_gather(tensor, dim=0): 6 | """Performs all_gather operation on the provided tensors. 7 | 8 | *** Warning ***: torch.distributed.all_gather has no gradient. 9 | """ 10 | tensors_gather = [ 11 | torch.ones_like(tensor) 12 | for _ in range(torch.distributed.get_world_size()) 13 | ] 14 | torch.distributed.all_gather(tensors_gather, tensor, async_op=False) 15 | 16 | output = torch.cat(tensors_gather, dim=dim) 17 | return output 18 | -------------------------------------------------------------------------------- /examples/train/bash/train_uda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | work_dir=$(dirname $0) 4 | cd $work_dir 5 | 6 | pip install xonsh 7 | 8 | cd ../ 9 | cd xonsh 10 | 11 | # # ------C2F------- 12 | xonsh train_gpu2.sh ./configs/labelmatch/labelmatch_uda.py C2F 13 | 14 | # # ------C2B------- 15 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_uda.py C2B 16 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_uda_prior.py C2B # with prior from unlabeled data 17 | 18 | # # ------K2C------- 19 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_uda.py K2C 20 | 21 | # # ------S2C------- 22 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_uda.py S2C -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/assigner/assign_result_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import torch 4 | from mmdet.core.bbox.assigners import AssignResult 5 | 6 | 7 | class AssignResultLM(AssignResult): 8 | def add_ig_(self, gt_labels): 9 | # assign as -1 for ignore 10 | self_inds = -1 * torch.ones(len(gt_labels), dtype=torch.long, device=gt_labels.device) 11 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 12 | self.max_overlaps = torch.cat( 13 | [self.max_overlaps.new_ones(len(gt_labels)), self.max_overlaps]) 14 | 15 | if self.labels is not None: 16 | self.labels = torch.cat([gt_labels, self.labels]) 17 | -------------------------------------------------------------------------------- /tools/datasets_uda/preprocess_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # note: please use xonsh, instead of bash 3 | 4 | import os 5 | cd ../.. 6 | 7 | cur_path = os.path.abspath(os.path.dirname(__file__)) 8 | $PYTHONPATH=cur_path 9 | 10 | for dataset in ['C2F', 'C2B', 'K2C', 'S2C']: 11 | print(f'===============process {dataset}==============') 12 | data_root = f'./dataset/{dataset}' 13 | for name in ['labeled_data', 'unlabeled_data', 'test_data']: 14 | out_dir = f'./dataset/{dataset}/{name}.json' 15 | data_dir = os.path.join(data_root, name) 16 | if dataset in ['C2F', 'C2B']: 17 | python tools/datasets_uda/convert_xml_to_json.py --devkit_path @(data_dir) --out-name @(out_dir) --dataset city 18 | else: 19 | python tools/datasets_uda/convert_xml_to_json.py --devkit_path @(data_dir) --out-name @(out_dir) --dataset car 20 | 21 | -------------------------------------------------------------------------------- /mmdet_extension/models/backbones/vgg.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import VGG as VGGCV 2 | 3 | from mmdet.models.builder import BACKBONES 4 | 5 | 6 | @BACKBONES.register_module() 7 | class VGG(VGGCV): 8 | def __init__(self, 9 | depth, 10 | *args, 11 | **kwargs 12 | ): 13 | super().__init__(depth=depth, *args, **kwargs) 14 | 15 | def init_weights(self, pretrained=None): 16 | super().init_weights(pretrained) 17 | 18 | def forward(self, x): 19 | outs = [] 20 | vgg_layers = getattr(self, self.module_name) 21 | for i in range(len(self.stage_blocks)): 22 | for j in range(*self.range_sub_modules[i]): 23 | vgg_layer = vgg_layers[j] 24 | x = vgg_layer(x) 25 | if i in self.out_indices: 26 | outs.append(x) 27 | return tuple(outs) 28 | -------------------------------------------------------------------------------- /tools/datasets/create_dataset_link.sh: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # please change this to fit your environment 4 | prefix_coco = '/data' 5 | prefix_coco_ul = '/data' 6 | prefix_voc = '/data' 7 | 8 | def create_folder(file_root): 9 | if not os.path.exists(file_root): 10 | os.makedirs(file_root) 11 | 12 | cd ../.. 13 | create_folder('dataset') 14 | cd dataset 15 | 16 | # 1. coco 17 | print('create coco dataset symlink: ') 18 | create_folder('coco') 19 | cd coco 20 | ln -s @(prefix_coco)/coco/images/* . 21 | if prefix_coco != prefix_coco_ul: 22 | ln -s @(prefix_coco_ul)/coco/* . 23 | 24 | create_folder('annotations') 25 | cd annotations 26 | ln -s @(prefix_coco)/coco/annotations/* . 27 | 28 | cd ../.. 29 | print('finish coco dataset') 30 | 31 | # 2. voc 32 | print('create voc dataset symlink: ') 33 | create_folder('voc') 34 | cd voc 35 | ln -s @(prefix_voc)/voc/12/VOCdevkit/* . 36 | ln -s @(prefix_voc)/voc/07/VOCdevkit/* . 37 | cd ../.. 38 | print('finish voc dataset') 39 | -------------------------------------------------------------------------------- /tools/datasets/convert_coco_additional.py: -------------------------------------------------------------------------------- 1 | """ 2 | add empty information to coco additional 3 | """ 4 | import argparse 5 | import mmcv 6 | 7 | if __name__ == '__main__': 8 | parser = argparse.ArgumentParser( 9 | description='Add empty information to coco additional') 10 | parser.add_argument('--additional-json', 11 | default='', 12 | help='coco additional json') 13 | parser.add_argument('--standard-json', default='', 14 | help='one of the labeled data json file') 15 | parser.add_argument('--output-json', default='', 16 | help='output json file') 17 | 18 | args = parser.parse_args() 19 | 20 | additional_info = mmcv.load(args.additional_json) 21 | standard_info = mmcv.load(args.standard_json) 22 | 23 | additional_info['annotations'] = [] 24 | additional_info['categories'] = standard_info['categories'] 25 | 26 | mmcv.dump(additional_info, args.output_json) 27 | -------------------------------------------------------------------------------- /examples/eval/eval.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # note: please use xonsh, instead of bash 3 | 4 | import os 5 | cd ../.. 6 | $LANG='zh_CN.UTF-8' 7 | $LANGUAGE='zh_CN:zh:en_US:en' 8 | $LC_ALL='C.UTF-8' 9 | 10 | cur_path = os.path.abspath(os.path.dirname(__file__)) 11 | $PYTHONPATH=cur_path 12 | 13 | GPU = 2 14 | #config='./configs/baseline/ema_config/baseline_standard.py' # for coco-standard and coco-additional 15 | #checkpoint=f'./pretrained_model/baseline/instances_train2017.1@1.pth' 16 | 17 | config='./configs/baseline/ema_config/baseline_voc.py' # for coco-standard and coco-additional 18 | checkpoint=f'./pretrained_model/baseline/voc.pth' 19 | 20 | eval_type='bbox' 21 | 22 | if GPU>1: 23 | python -m torch.distributed.launch --nproc_per_node=@(GPU) --master_port=19005 \ 24 | examples/eval/eval.py --config @(config) --checkpoint @(checkpoint) --launcher pytorch \ 25 | --eval @(eval_type) 26 | else: 27 | python examples/eval/eval.py --config @(config) --checkpoint @(checkpoint) --eval @(eval_type) 28 | -------------------------------------------------------------------------------- /examples/eval/eval_uda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # note: please use xonsh, instead of bash 3 | 4 | import os 5 | cd ../.. 6 | $LANG='zh_CN.UTF-8' 7 | $LANGUAGE='zh_CN:zh:en_US:en' 8 | $LC_ALL='C.UTF-8' 9 | 10 | cur_path = os.path.abspath(os.path.dirname(__file__)) 11 | $PYTHONPATH=cur_path 12 | 13 | GPU = 2 14 | data_name='city' 15 | checkpoint=f'./pretrained_model/baseline/city.pth' 16 | 17 | config='./configs/baseline/baseline_uda_test.py' 18 | eval_type='bbox' 19 | 20 | new_config = config[:-3] + f'_{data_name}.py' 21 | python tools/create_config_from_template.py --org-config @(config) --new-config @(new_config) \ 22 | --data @(data_name) --gpu @(GPU) 23 | 24 | if GPU>1: 25 | python -m torch.distributed.launch --nproc_per_node=@(GPU) --master_port=19005 \ 26 | examples/eval/eval.py --config @(new_config) --checkpoint @(checkpoint) --launcher pytorch \ 27 | --eval @(eval_type) 28 | else: 29 | python examples/eval/eval.py --config @(new_config) --checkpoint @(checkpoint) --eval @(eval_type) 30 | 31 | os.remove(new_config) -------------------------------------------------------------------------------- /examples/train/bash/train_ssod.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | work_dir=$(dirname $0) 4 | cd $work_dir 5 | 6 | pip install xonsh 7 | 8 | cd ../ 9 | cd xonsh 10 | 11 | # # ==================================== 12 | # # fair comparison 13 | # # ==================================== 14 | # 1. labelmatch 15 | xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_standard.py 1 1 none # seed percent dataset 16 | 17 | # 2. stac 18 | #xonsh train_gpu8.sh ./configs/stac/stac_standard.py 1 1 none # seed percent dataset 19 | 20 | # 3. unbiased teacher 21 | #xonsh train_gpu8.sh ./configs/unbiased_teacher/unbiased_teacher_standard.py 1 1 none # seed percent dataset 22 | 23 | # 4. soft teacher 24 | #xonsh train_gpu8.sh ./configs/soft_teacher/soft_teacher_standard.py 1 1 none # seed percent dataset 25 | 26 | 27 | # # ==================================== 28 | # # hyper-parameter in paper 29 | # # ==================================== 30 | # 1. coco-standard 31 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_standard_paper.py 1 1 none # seed percent dataset 32 | 33 | # 2. coco-additional 34 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_additional.py 1 1 none # seed percent dataset 35 | 36 | # 3. voc 37 | #xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_voc.py 1 1 none # seed percent dataset -------------------------------------------------------------------------------- /examples/train/xonsh/train_gpu2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # note: please use xonsh, instead of bash 3 | 4 | import os 5 | cd ../../.. 6 | $LANG='zh_CN.UTF-8' 7 | $LANGUAGE='zh_CN:zh:en_US:en' 8 | $LC_ALL='C.UTF-8' 9 | 10 | cur_path = os.path.abspath(os.path.dirname(__file__)) 11 | par_path = os.path.join(cur_path, '../') 12 | $PYTHONPATH=cur_path 13 | 14 | 15 | # #------------------------------------template for 2GPU------------------------------------ 16 | GPU = 2 17 | config = $ARG1 18 | second_arg = $ARG2 19 | try: 20 | int(second_arg) 21 | seed = $ARG2 22 | percent = $ARG3 23 | name = $ARG4 24 | times = 1 25 | if name == 'coco-standard': 26 | times = 5 if percent == '1' else 2 27 | new_config = config[:-3] + f'_{seed}_{percent}_{GPU}.py' 28 | python tools/create_config_from_template.py --org-config @(config) --new-config @(new_config) \ 29 | --seed @(seed) --percent @(percent) --gpu @(GPU) --data @(name) --times @(times) 30 | except: 31 | name = $ARG2 32 | new_config = config[:-3] + f'_{name}.py' 33 | print(name) 34 | python tools/create_config_from_template.py --org-config @(config) --new-config @(new_config) \ 35 | --data @(name) --gpu @(GPU) 36 | 37 | if GPU > 1: 38 | python -m torch.distributed.launch --nproc_per_node=@(GPU) --master_port=19005 \ 39 | examples/train/train.py --config @(new_config) --launcher pytorch 40 | else: 41 | python examples/train/train.py --config @(new_config) 42 | os.remove(new_config) -------------------------------------------------------------------------------- /examples/train/xonsh/train_gpu8.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # note: please use xonsh, instead of bash 3 | 4 | import os 5 | cd ../../.. 6 | $LANG='zh_CN.UTF-8' 7 | $LANGUAGE='zh_CN:zh:en_US:en' 8 | $LC_ALL='C.UTF-8' 9 | 10 | cur_path = os.path.abspath(os.path.dirname(__file__)) 11 | par_path = os.path.join(cur_path, '../') 12 | $PYTHONPATH=cur_path 13 | 14 | 15 | # #------------------------------------template for 2GPU------------------------------------ 16 | GPU = 8 17 | config = $ARG1 18 | second_arg = $ARG2 19 | try: 20 | int(second_arg) 21 | seed = $ARG2 22 | percent = $ARG3 23 | name = $ARG4 24 | times = 1 25 | if name == 'coco-standard': 26 | times = 5 if percent == '1' else 2 27 | new_config = config[:-3] + f'_{seed}_{percent}_{GPU}.py' 28 | python tools/create_config_from_template.py --org-config @(config) --new-config @(new_config) \ 29 | --seed @(seed) --percent @(percent) --gpu @(GPU) --data @(name) --times @(times) 30 | except: 31 | name = $ARG2 32 | new_config = config[:-3] + f'_{name}.py' 33 | print(name) 34 | python tools/create_config_from_template.py --org-config @(config) --new-config @(new_config) \ 35 | --data @(name) --gpu @(GPU) 36 | 37 | if GPU > 1: 38 | python -m torch.distributed.launch --nproc_per_node=@(GPU) --master_port=19005 \ 39 | examples/train/train.py --config @(new_config) --launcher pytorch 40 | else: 41 | python examples/train/train.py --config @(new_config) 42 | os.remove(new_config) -------------------------------------------------------------------------------- /mmdet_extension/datasets/new_coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | New COCO Dataset: 5 | 1. add manual data-length 6 | """ 7 | import random 8 | 9 | from mmdet.datasets import CocoDataset 10 | from mmdet.datasets.builder import DATASETS 11 | from mmdet_extension.core.utils.classes import COCO_CLASSES 12 | 13 | 14 | @DATASETS.register_module() 15 | class NewCocoDataset(CocoDataset): 16 | CLASSES = COCO_CLASSES 17 | 18 | def __init__(self, 19 | ann_file, 20 | pipeline, 21 | classes=None, 22 | data_root=None, 23 | img_prefix='', 24 | seg_prefix=None, 25 | proposal_file=None, 26 | test_mode=False, 27 | filter_empty_gt=False, 28 | manual_length=None): 29 | super().__init__(ann_file=ann_file, pipeline=pipeline, classes=classes, 30 | data_root=data_root, img_prefix=img_prefix, seg_prefix=seg_prefix, 31 | proposal_file=proposal_file, test_mode=test_mode, filter_empty_gt=filter_empty_gt) 32 | self.length = min(manual_length, len(self.data_infos)) if manual_length else len(self.data_infos) 33 | 34 | def __len__(self): 35 | return self.length 36 | 37 | def shuffle_data_info(self): 38 | random.shuffle(self.data_infos) 39 | -------------------------------------------------------------------------------- /tools/create_config_from_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | replace "template" in config 3 | """ 4 | import argparse 5 | 6 | if __name__ == '__main__': 7 | parser = argparse.ArgumentParser(description='Convert template to specific') 8 | parser.add_argument('--org-config', default='', help='origin config file (.py)') 9 | parser.add_argument('--new-config', default='', help='new config file (.py)') 10 | parser.add_argument('--seed', default='0', type=str) 11 | parser.add_argument('--percent', default='1', type=str) 12 | parser.add_argument('--gpu', default='2', type=str) 13 | parser.add_argument('--times', default='1', type=str) 14 | parser.add_argument('--score', default='0.7', type=str) 15 | parser.add_argument('--data', default='coco-standard', type=str) 16 | 17 | args = parser.parse_args() 18 | 19 | with open(args.org_config, 'r', encoding='utf-8') as fr, open(args.new_config, 'w', encoding='utf-8') as fw: 20 | for line in fr: 21 | if 'seed_template' in line: 22 | line = line.replace('seed_template', args.seed) 23 | if 'percent_template' in line: 24 | line = line.replace('percent_template', args.percent) 25 | if 'gpu_template' in line: 26 | line = line.replace('gpu_template', args.gpu) 27 | if 'times_template' in line: 28 | line = line.replace('times_template', args.times) 29 | if 'score_template' in line: 30 | line = line.replace('score_template', args.score) 31 | if 'data_template' in line: 32 | value = f'\'{args.data}\'' 33 | line = line.replace('data_template', value) 34 | fw.write(line) 35 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/standard_roi_head_base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | 4 | """ 5 | standard roi_head for LabelMatch 6 | """ 7 | import torch 8 | 9 | from mmdet.models.builder import HEADS 10 | from mmdet.core import bbox2roi 11 | from mmdet.models.roi_heads import StandardRoIHead 12 | 13 | 14 | @HEADS.register_module() 15 | class StandardRoIHeadBase(StandardRoIHead): 16 | def simple_test_bboxes_base(self, x, img_metas, proposals): 17 | rois = bbox2roi(proposals) 18 | bbox_results = self._bbox_forward(x, rois) 19 | img_shapes = tuple(meta['img_shape'] for meta in img_metas) 20 | 21 | cls_score = bbox_results['cls_score'] 22 | bbox_pred = bbox_results['bbox_pred'] 23 | num_proposals_per_img = tuple(len(p) for p in proposals) 24 | rois = rois.split(num_proposals_per_img, 0) 25 | cls_score = cls_score.split(num_proposals_per_img, 0) 26 | # the bbox prediction of some detectors like SABL is not Tensor 27 | if isinstance(bbox_pred, torch.Tensor): 28 | bbox_pred = bbox_pred.split(num_proposals_per_img, 0) 29 | else: 30 | bbox_pred = self.bbox_head.bbox_pred_split( 31 | bbox_pred, num_proposals_per_img) 32 | # apply bbox post-processing to each image individually 33 | det_bboxes = [] 34 | det_labels = [] 35 | for i in range(len(proposals)): 36 | bboxes = self.bbox_head.bbox_coder.decode( 37 | rois[i][:, 1:], bbox_pred[i], max_shape=img_shapes[i]) 38 | det_bboxes.append(bboxes) 39 | det_labels.append(cls_score[i]) 40 | return det_bboxes, det_labels 41 | -------------------------------------------------------------------------------- /docs/performance.md: -------------------------------------------------------------------------------- 1 | # Performance 2 | 3 | ## SSOD 4 | 5 | ### Fair Comparison 6 | 7 | For fair comparison, we reproduce some SSOD methods with the same setting (e.g., augmentation, training iterations, batch-size, and so on.). 8 | 9 | - Soft-Teacher: without box-jitter (we will add box-jitter in next version) 10 | - PASCAL-VOC: use 2-GPU 11 | 12 | #### COCO-standard 13 | 14 | ![](./png/performance.png) 15 | 16 | | method | batch-size | iterations | 1% | 5% | 10% | 17 | | ------------------------------ | ------------------------- | ---------- | ---- | ---- | ---- | 18 | | STAC (thr=0.9, CE) | 32 labeled + 32 unlabeled | 40K | 16.1 | 24.0 | 28.1 | 19 | | Unbiased-Teacher (thr=0.7, FL) | 32 labeled + 32 unlabeled | 40K | 22.0 | 28.6 | 32.1 | 20 | | Soft-Teacher (thr=0.9, CE) | 32 labeled + 32 unlabeled | 40K | 22.1 | 29.0 | 32.7 | 21 | | LabelMatch (original code) | 32 labeled + 32 unlabeled | 40K | 24.6 | 31.5 | 34.6 | 22 | | LabelMatch (here) | 32 labeled + 32 unlabeled | 40K | 24.6 | 31.6 | 34.4 | 23 | 24 | > NOTE: we use the **ablation training setting** here. (different training setting can be found in supplementary materials.) 25 | 26 | #### PASCAL-VOC 27 | 28 | | method | batch-size | iterations | AP50:95 | AP50 | 29 | | ------------------------------ | ---------------------- | ---------- | ------- | ---- | 30 | | STAC (thr=0.9, CE) | 8 labeled +8 unlabeled | 80K | 46.5 | 78.6 | 31 | | Unbiased-Teacher (thr=0.7, FL) | 8 labeled +8 unlabeled | 80K | 53.3 | 84.2 | 32 | | Soft-Teacher (thr=0.9, CE) | 8 labeled +8 unlabeled | 80K | 52.8 | 84.3 | 33 | | LabelMatch | 8 labeled +8 unlabeled | 80K | 54.7 | 84.8 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /mmdet_extension/models/loss/focal_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/facebookresearch/unbiased-teacher 3 | """ 4 | CE version of Focal Loss 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from mmdet.models.builder import LOSSES 11 | from mmdet.models.losses.utils import weight_reduce_loss 12 | 13 | 14 | @LOSSES.register_module() 15 | class CEFocalLoss(nn.Module): 16 | def __init__(self, use_sigmoid=False, gamma=2.0, alpha=0.25, reduction='mean', 17 | class_weight=None, loss_weight=1.0): 18 | super().__init__() 19 | assert use_sigmoid is False, 'Only ce focal loss supported now.' 20 | self.use_sigmoid = use_sigmoid 21 | self.gamma = gamma 22 | self.alpha = alpha 23 | self.reduction = reduction 24 | self.class_weight = class_weight 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, cls_score, label, weight=None, avg_factor=None, reduction_override=None): 28 | assert reduction_override in (None, 'none', 'mean', 'sum') 29 | reduction = (reduction_override if reduction_override else self.reduction) 30 | if self.class_weight is not None: 31 | class_weight = cls_score.new_tensor( 32 | self.class_weight, device=cls_score.device) 33 | else: 34 | class_weight = None 35 | loss = F.cross_entropy(cls_score, label, weight=class_weight, reduction='none') 36 | p = torch.exp(-loss) 37 | loss = (1 - p) ** self.gamma * loss 38 | if weight is not None: 39 | weight = weight.float() 40 | loss = weight_reduce_loss( 41 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor) 42 | return loss 43 | -------------------------------------------------------------------------------- /examples/demo/image_demo.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from mmdet.apis import inference_detector, init_detector 3 | import numpy as np 4 | from mmdet_extension.core.utils.image import imshow_det_bboxes 5 | import mmdet_extension 6 | 7 | 8 | def main(): 9 | parser = ArgumentParser() 10 | parser.add_argument( 11 | '--img', default='./dataset/coco/val2017/000000186938.jpg', 12 | help='Image file') 13 | parser.add_argument( 14 | '--config', default='./configs/baseline/ema_config/baseline_standard.py', help='Config file') 15 | parser.add_argument( 16 | '--checkpoint', default='./pretrained_model/baseline/instances_train2017.1@1.pth', 17 | help='Checkpoint file') 18 | parser.add_argument( 19 | '--output', default=None, 20 | help='output image for this demo') 21 | 22 | parser.add_argument( 23 | '--device', default='cuda:0', help='Device used for inference') 24 | parser.add_argument( 25 | '--score-thr', type=float, default=0.6, help='bbox score threshold') 26 | args = parser.parse_args() 27 | 28 | # build the model from a config file and a checkpoint file 29 | model = init_detector(args.config, args.checkpoint, device=args.device) 30 | 31 | # test a single image 32 | result = inference_detector(model, args.img) 33 | 34 | # visualize or save the results 35 | bboxes, labels = [], [] 36 | for c, r in enumerate(result): 37 | if len(r) > 0: 38 | bboxes.append(r) 39 | labels.append(np.array([c] * len(r))) 40 | bboxes = np.concatenate(bboxes) 41 | labels = np.concatenate(labels) 42 | imshow_det_bboxes(args.img, bboxes, labels.astype(np.int), class_names=model.CLASSES, 43 | score_thr=args.score_thr, thickness=2, font_size=13, 44 | out_file=args.output 45 | ) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /mmdet_extension/models/detectors/semi_two_stage.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | semi-supervised two stage detector 5 | """ 6 | import torch 7 | 8 | from mmdet.core import bbox2result 9 | from mmdet.models.detectors import TwoStageDetector 10 | 11 | from mmdet_extension.models.detectors.semi_base import SemiBaseDetector 12 | 13 | 14 | class SemiTwoStageDetector(SemiBaseDetector, TwoStageDetector): 15 | def __init__(self, 16 | backbone, 17 | rpn_head, 18 | roi_head, 19 | train_cfg, 20 | test_cfg, 21 | neck=None, 22 | pretrained=None, 23 | # ema model 24 | ema_config=None, 25 | ema_ckpt=None, 26 | classes=None 27 | ): 28 | SemiBaseDetector.__init__(self, ema_config=ema_config, ema_ckpt=ema_ckpt, classes=classes) 29 | TwoStageDetector.__init__(self, backbone=backbone, rpn_head=rpn_head, roi_head=roi_head, 30 | train_cfg=train_cfg, test_cfg=test_cfg, neck=neck, pretrained=pretrained) 31 | 32 | @torch.no_grad() 33 | def inference_unlabeled(self, img, img_metas, rescale=True, return_feat=False): 34 | ema_model = self.ema_model.module 35 | # inference: create pseudo label 36 | x = ema_model.extract_feat(img) 37 | proposal_list = ema_model.rpn_head.simple_test_rpn(x, img_metas) 38 | # bboxes 39 | det_bboxes, det_labels = ema_model.roi_head.simple_test_bboxes( 40 | x, img_metas, proposal_list, ema_model.roi_head.test_cfg, rescale=rescale) 41 | bbox_results = [ 42 | bbox2result(det_bboxes[i], det_labels[i], self.num_classes) 43 | for i in range(len(det_bboxes))] 44 | if return_feat: # for soft teacher 45 | return x, bbox_results 46 | else: 47 | return bbox_results 48 | -------------------------------------------------------------------------------- /tools/datasets/preprocess_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # note: please use xonsh, instead of bash 3 | 4 | import os 5 | cd ../.. 6 | 7 | cur_path = os.path.abspath(os.path.dirname(__file__)) 8 | $PYTHONPATH=cur_path 9 | 10 | # step1: create coco-standard (1%, 5%, 10%) 11 | coco_data_dir = './dataset/coco' 12 | for seed in [1, 2, 3, 4, 5]: 13 | for percent in [1, 5, 10]: 14 | python tools/datasets/prepare_coco_standard.py --data-dir @(coco_data_dir) --percent @(percent) --seed @(seed) 15 | 16 | # step2: create coco-additional 17 | additional_json = './dataset/coco/annotations/image_info_unlabeled2017.json' 18 | standard_json = './dataset/coco/annotations/instances_val2017.json' 19 | output_json = './dataset/coco/annotations/semi_supervised/unlabeled2017.json' 20 | python tools/datasets/convert_coco_additional.py --additional-json @(additional_json) \ 21 | --standard-json @(standard_json) --output-json @(output_json) 22 | 23 | # step3: create voc 24 | voc_data_dir = './dataset/voc' 25 | out_dir = './dataset/voc/annotations_json' 26 | python tools/datasets/convert_xml_to_json.py --devkit_path @(voc_data_dir) --out-dir @(out_dir) 27 | 28 | # step4: (optional) convert json file to txt file 29 | # coco 30 | json_dir = './dataset/coco/annotations/semi_supervised' 31 | txt_dir = './dataset/coco/annotations/semi_supervised_txt' 32 | unlabeled_json_list = [f for f in os.listdir(json_dir) if f.find('unlabeled')!=-1] 33 | if not os.path.exists(txt_dir): 34 | os.makedirs(txt_dir) 35 | for unlabeled_json in unlabeled_json_list: 36 | json_file = os.path.join(json_dir, unlabeled_json) 37 | txt_file = os.path.join(txt_dir, unlabeled_json.replace('.json', '.txt')) 38 | python tools/datasets/convert_json_to_txt.py --json-file @(json_file) --txt-file @(txt_file) 39 | 40 | # voc 41 | txt_dir = './dataset/voc/annotations_txt' 42 | json_file_list = os.listdir(out_dir) 43 | if not os.path.exists(txt_dir): 44 | os.makedirs(txt_dir) 45 | for json_name in json_file_list: 46 | json_file = os.path.join(out_dir, json_name) 47 | txt_file = os.path.join(txt_dir, json_name.replace('.json', '.txt')) 48 | python tools/datasets/convert_json_to_txt.py --json-file @(json_file) --txt-file @(txt_file) --dataset voc -------------------------------------------------------------------------------- /tools/datasets_uda/create_dataset_link.sh: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # please change this to your own environment 4 | prefix = '/data' 5 | 6 | def create_folder(file_root): 7 | if not os.path.exists(file_root): 8 | os.makedirs(file_root) 9 | 10 | cd ../.. 11 | create_folder('dataset') 12 | cd dataset 13 | 14 | # 1. C2F: Cityscapes as source, foggy as target, foggy as test 15 | print('create C2F dataset symlink: ') 16 | create_folder('C2F') 17 | cd C2F 18 | create_folder('labeled_data') 19 | cd labeled_data 20 | ln -s @(prefix)/city/VOC2007_citytrain/* . 21 | cd .. 22 | create_folder('unlabeled_data') 23 | cd unlabeled_data 24 | ln -s @(prefix)/foggycity/VOC2007_foggytrain/* . 25 | cd .. 26 | create_folder('test_data') 27 | cd test_data 28 | ln -s @(prefix)/foggycity/VOC2007_foggyval/* . 29 | cd ../.. 30 | 31 | # 2. C2B: Cityscapes as source, BDD100k as target, BDD100k as test 32 | print('create C2B dataset symlink: ') 33 | create_folder('C2B') 34 | cd C2B 35 | create_folder('labeled_data') 36 | cd labeled_data 37 | ln -s @(prefix)/city/VOC2007_citytrain/* . 38 | cd .. 39 | create_folder('unlabeled_data') 40 | cd unlabeled_data 41 | ln -s @(prefix)/BDD/VOC2007_bddtrain/* . 42 | cd .. 43 | create_folder('test_data') 44 | cd test_data 45 | ln -s @(prefix)/BDD/VOC2007_bddval/* . 46 | cd ../.. 47 | 48 | # 3. K2C: KITTI as source, Cityscapes as target, Cityscapes as test 49 | print('create K2C dataset symlink: ') 50 | create_folder('K2C') 51 | cd K2C 52 | create_folder('labeled_data') 53 | cd labeled_data 54 | ln -s @(prefix)/kitti/* . 55 | cd .. 56 | create_folder('unlabeled_data') 57 | cd unlabeled_data 58 | ln -s @(prefix)/city-car/VOC2007_citytrain/* . 59 | cd .. 60 | create_folder('test_data') 61 | cd test_data 62 | ln -s @(prefix)/city-car/VOC2007_cityval/* . 63 | cd ../.. 64 | 65 | # 4. S2C: Sim10k as source, Cityscapes as target, Cityscapes as test 66 | print('create S2C dataset symlink: ') 67 | create_folder('S2C') 68 | cd S2C 69 | create_folder('labeled_data') 70 | cd labeled_data 71 | ln -s @(prefix)/sim/* . 72 | cd .. 73 | create_folder('unlabeled_data') 74 | cd unlabeled_data 75 | ln -s @(prefix)/city-car/VOC2007_citytrain/* . 76 | cd .. 77 | create_folder('test_data') 78 | cd test_data 79 | ln -s @(prefix)/city-car/VOC2007_cityval/* . 80 | cd ../.. 81 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/standard_roi_head_st.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | standard roi_head for Soft-Teacher 5 | """ 6 | from mmdet.models.builder import HEADS 7 | from mmdet.core import bbox2roi 8 | 9 | from mmdet_extension.models.roi_head import StandardRoIHeadBase 10 | 11 | 12 | @HEADS.register_module() 13 | class StandardRoIHeadST(StandardRoIHeadBase): 14 | def forward_train_step1(self, 15 | x, 16 | img_metas, 17 | proposal_list, 18 | gt_bboxes, 19 | gt_labels, 20 | ): 21 | num_imgs = len(img_metas) 22 | sampling_results = [] 23 | for i in range(num_imgs): 24 | assign_result = self.bbox_assigner.assign( 25 | proposal_list[i], gt_bboxes[i], None, gt_labels[i]) 26 | sampling_result = self.bbox_sampler.sample( 27 | assign_result, proposal_list[i], 28 | gt_bboxes[i], gt_labels[i], 29 | feats=[lvl_feat[i][None] for lvl_feat in x]) 30 | sampling_results.append(sampling_result) 31 | return sampling_results 32 | 33 | def forward_train_step2(self, 34 | x, 35 | sampling_results, 36 | gt_bboxes, 37 | gt_labels, 38 | soft_weight, 39 | ): 40 | losses = dict() 41 | rois = bbox2roi([res.bboxes for res in sampling_results]) 42 | bbox_results = self._bbox_forward(x, rois) 43 | 44 | bbox_targets = self.bbox_head.get_targets( 45 | sampling_results, gt_bboxes, gt_labels, self.train_cfg) 46 | # reset the negative label weight 47 | bbox_targets = list(bbox_targets) 48 | bbox_targets[1] = soft_weight 49 | loss_bbox = self.bbox_head.loss(bbox_results['cls_score'], 50 | bbox_results['bbox_pred'], rois, 51 | *bbox_targets) 52 | bbox_results.update(loss_bbox=loss_bbox) 53 | losses.update(bbox_results['loss_bbox']) 54 | return losses -------------------------------------------------------------------------------- /configs/README.md: -------------------------------------------------------------------------------- 1 | ## Configs 2 | 3 | > NOTE: more detail about different **training settings** can be found in supplementary materials. 4 | 5 | ### baseline 6 | 7 | | file | stage | support data_name | 8 | | -------------------- | ------------------------------------------------------- | ------------------------------------ | 9 | | baseline_ssod.py | Training-1: use labeled data to train a baseline (SSOD) | voc, coco-standard, coco-additional | 10 | | baseline_uda.py | Training-1: use labeled data to train a baseline (DAOD) | C2F, K2C, S2C | 11 | | baseline_uda_test.py | Evaluation (DAOD) | C2F, C2B, K2C, S2C | 12 | 13 | ### labelmatch 14 | 15 | | file | stage | training setting | 16 | | ---------------------------- | ------------------------------------------------------------ | ---------------- | 17 | | labelmatch_voc.py | Training-2: use labeled data + unlabeled data to train detector (VOC) | VOC | 18 | | labelmatch_standard.py | Training-2: use labeled data + unlabeled data to train detector (COCO-standard) | Ablation | 19 | | labelmatch_standard_paper.py | Training-2: use labeled data + unlabeled data to train detector (COCO-standard) | COCO-standard | 20 | | labelmatch_additional.py | Training-2: use labeled data + unlabeled data to train detector (COCO-additional) | COCO-additional | 21 | | labelmatch_uda.py | Training-2: use labeled data + unlabeled data to train detector (DAOD) | DAOD | 22 | | labelmatch_uda_prior.py | Training-2: use labeled data + unlabeled data to train detector (DAOD, ideal setting) | DAOD | 23 | 24 | ### others 25 | 26 | | file | stage | training setting | 27 | | --------------------------------------------- | ------------------------------------------------------------ | ---------------- | 28 | | stac/stac_standard.py | Training-2: use labeled data + unlabeled data to train detector (COCO-standard) | Ablation | 29 | | unbiased_teacher/unbiased_teacher_standard.py | Training-2: use labeled data + unlabeled data to train detector (COCO-standard) | Ablation | 30 | | soft_teacher/soft_teacher_standard.py | Training-2: use labeled data + unlabeled data to train detector (COCO-standard) | Ablation | -------------------------------------------------------------------------------- /mmdet_extension/core/utils/classes.py: -------------------------------------------------------------------------------- 1 | COCO_CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 2 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 3 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 4 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 5 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 6 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 7 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 8 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 9 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 10 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 11 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 12 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 13 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 14 | 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') 15 | 16 | COCO_CLASSES_ORG = ('N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 17 | 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign', 18 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 19 | 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A', 20 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 21 | 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 22 | 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 23 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 24 | 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 25 | 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 26 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 27 | 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush') 28 | 29 | VOC_CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 30 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 31 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') 32 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/standard_roi_head_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | 4 | """ 5 | standard roi_head for LabelMatch 6 | """ 7 | import torch 8 | 9 | from mmdet.models.builder import HEADS 10 | from mmdet.core import bbox2roi 11 | 12 | from mmdet_extension.models.roi_head import StandardRoIHeadBase 13 | 14 | 15 | @HEADS.register_module() 16 | class StandardRoIHeadLM(StandardRoIHeadBase): 17 | def forward_train_step1(self, 18 | x, 19 | img_metas, 20 | proposal_list, 21 | gt_bboxes, 22 | gt_labels, 23 | gt_bboxes_ignore=None, 24 | gt_labels_ignore=None, 25 | ): 26 | num_imgs = len(img_metas) 27 | sampling_results = [] 28 | for i in range(num_imgs): 29 | assign_result = self.bbox_assigner.assign( 30 | proposal_list[i], gt_bboxes[i], None, gt_labels[i]) 31 | assign_result_ig = self.bbox_assigner.assign( 32 | proposal_list[i], gt_bboxes_ignore[i], None, gt_labels_ignore[i]) 33 | sampling_result = self.bbox_sampler.sample_pos_ig( 34 | assign_result, assign_result_ig, proposal_list[i], 35 | gt_bboxes[i], gt_labels[i], gt_bboxes_ignore[i], gt_labels_ignore[i], 36 | feats=[lvl_feat[i][None] for lvl_feat in x]) 37 | sampling_results.append(sampling_result) 38 | return sampling_results 39 | 40 | def forward_train_step2(self, 41 | x, 42 | sampling_results, 43 | gt_bboxes, 44 | gt_labels 45 | ): 46 | losses = dict() 47 | rois = bbox2roi([res.bboxes for res in sampling_results]) 48 | flag = torch.cat([res.ignore_flag for res in sampling_results]) 49 | bbox_results = self._bbox_forward(x, rois) 50 | 51 | bbox_targets = self.bbox_head.get_targets_lm( 52 | sampling_results, gt_bboxes, gt_labels, self.train_cfg) 53 | loss_bbox = self.bbox_head.loss(bbox_results['cls_score'], 54 | bbox_results['bbox_pred'], rois, 55 | *bbox_targets) 56 | bbox_results.update(loss_bbox=loss_bbox) 57 | losses.update(bbox_results['loss_bbox']) 58 | scores = bbox_results['cls_score'][flag] 59 | return losses, scores 60 | -------------------------------------------------------------------------------- /mmdet_extension/models/detectors/stac.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | Re-implementation: A Simple Semi-Supervised Learning Framework for Object Detection 5 | """ 6 | import torch 7 | 8 | from mmdet.models.builder import DETECTORS 9 | 10 | from mmdet_extension.models.detectors import SemiTwoStageDetector 11 | 12 | 13 | @DETECTORS.register_module() 14 | class STAC(SemiTwoStageDetector): 15 | def __init__(self, 16 | backbone, 17 | rpn_head, 18 | roi_head, 19 | train_cfg, 20 | test_cfg, 21 | neck=None, 22 | pretrained=None, 23 | # config 24 | cfg=dict(), 25 | ): 26 | super().__init__(backbone=backbone, rpn_head=rpn_head, roi_head=roi_head, train_cfg=train_cfg, 27 | test_cfg=test_cfg, neck=neck, pretrained=pretrained) 28 | self.debug = cfg.get('debug', False) 29 | self.num_classes = self.roi_head.bbox_head.num_classes 30 | 31 | # hyper-parameter 32 | self.weight_u = cfg.get('weight_u', 2.0) 33 | 34 | # analysis 35 | self.image_num = 0 36 | self.pseudo_num = 0 37 | 38 | def forward_train_semi( 39 | self, img, img_metas, gt_bboxes, gt_labels, 40 | img_unlabeled, img_metas_unlabeled, gt_bboxes_unlabeled, gt_labels_unlabeled): 41 | device = img.device 42 | self.image_num += len(img_metas_unlabeled) 43 | self.pseudo_num += sum([a.shape[0] for a in gt_labels_unlabeled]) 44 | # # ---------------------label data--------------------- 45 | losses = self.forward_train(img, img_metas, gt_bboxes, gt_labels) 46 | losses = self.parse_loss(losses) 47 | # # -------------------unlabeled data------------------- 48 | if self.debug: 49 | self.visual_online(img_unlabeled, gt_bboxes_unlabeled, gt_labels_unlabeled) 50 | losses_unlabeled = self.forward_train(img_unlabeled, img_metas_unlabeled, 51 | gt_bboxes_unlabeled, gt_labels_unlabeled) 52 | losses_unlabeled = self.parse_loss(losses_unlabeled) 53 | for key, val in losses_unlabeled.items(): 54 | if key.find('loss') == -1: 55 | continue 56 | losses_unlabeled[key] = self.weight_u * val 57 | losses.update({f'{key}_unlabeled': val for key, val in losses_unlabeled.items()}) 58 | extra_info = { 59 | 'pseudo_num': torch.Tensor([self.pseudo_num / self.image_num]).to(device), 60 | } 61 | losses.update(extra_info) 62 | return losses 63 | -------------------------------------------------------------------------------- /tools/datasets/convert_json_to_txt.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | """ 3 | convert coco json format to txt format: in order to save memory 4 | """ 5 | import os 6 | import argparse 7 | import mmcv 8 | 9 | from mmdet_extension.core.utils.classes import COCO_CLASSES_ORG, VOC_CLASSES 10 | 11 | COCO_MAP = {} 12 | idx = 1 13 | for cls in COCO_CLASSES_ORG: 14 | if cls == 'N/A': 15 | continue 16 | COCO_MAP[cls] = idx 17 | idx += 1 18 | 19 | VOC_MAP = {} 20 | idx = 1 21 | for cls in VOC_CLASSES: 22 | if cls == 'N/A': 23 | continue 24 | VOC_MAP[cls] = idx 25 | idx += 1 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser( 29 | description='Convert json format to txt format') 30 | parser.add_argument('--json-file', default='', 31 | help='json annotations') 32 | parser.add_argument('--txt-file', default='', 33 | help='text annotations') 34 | parser.add_argument('--dataset', choices=['coco', 'voc'], 35 | default='coco') 36 | 37 | args = parser.parse_args() 38 | 39 | json_file = args.json_file 40 | txt_file = args.txt_file 41 | 42 | txt_root = os.path.dirname(txt_file) 43 | if not os.path.exists(txt_root): 44 | os.makedirs(txt_root) 45 | 46 | print(f'------------process {json_file}------------') 47 | json_info = mmcv.load(json_file) 48 | 49 | # create dict(image_id: bbox) 50 | image2bbox = {} 51 | id2image = {} 52 | all_images = json_info['images'] 53 | for image in all_images: 54 | file_name = image['file_name'] 55 | height, width = image['height'], image['width'] 56 | id = image['id'] 57 | id2image[id] = (file_name, height, width) 58 | image2bbox[(file_name, height, width)] = [] 59 | if 'annotations' in json_info: 60 | all_annotations = json_info['annotations'] 61 | else: 62 | all_annotations = [] 63 | 64 | CLASSES = COCO_CLASSES_ORG if args.dataset == 'coco' else VOC_CLASSES 65 | CLASSES_MAP = COCO_MAP if args.dataset == 'coco' else VOC_MAP 66 | 67 | for annotation in all_annotations: 68 | id = annotation['image_id'] 69 | bbox = annotation['bbox'] 70 | ignore = annotation['iscrowd'] 71 | cls = CLASSES_MAP[CLASSES[annotation['category_id']]] 72 | info = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]] + [cls, ignore] 73 | image2bbox[id2image[id]].append(info) 74 | 75 | with open(txt_file, 'w', encoding='utf-8') as f: 76 | for key, bboxes in image2bbox.items(): 77 | line_info = [key[0], str(key[1]), str(key[2]), str(len(bboxes))] 78 | for bbox in bboxes: 79 | line_info += [str(int(b)) for b in bbox] 80 | f.write(' '.join(line_info) + '\n') 81 | -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/samplers/sampling_result_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import torch 4 | from mmdet.core.bbox.samplers import SamplingResult 5 | 6 | 7 | class SamplingResultLM(SamplingResult): 8 | def __init__(self, pos_inds, ig_inds, neg_inds, bboxes, gt_bboxes, gt_bboxes_ignore, assign_result, 9 | assign_result_ig, gt_flags): 10 | self.pos_inds = pos_inds 11 | self.ig_inds = ig_inds 12 | self.neg_inds = neg_inds 13 | self.pos_bboxes = bboxes[pos_inds] 14 | self.ig_bboxes = bboxes[ig_inds] 15 | self.neg_bboxes = bboxes[neg_inds] 16 | self.pos_is_gt = gt_flags[pos_inds] 17 | self.ig_is_gt = gt_flags[ig_inds] 18 | 19 | self.num_gts = gt_bboxes.shape[0] 20 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 21 | self.num_igs = gt_bboxes_ignore.shape[0] 22 | self.ig_assigned_gt_inds = assign_result_ig.gt_inds[ig_inds] - 1 23 | if gt_bboxes.numel() == 0: 24 | # hack for index error case 25 | assert self.pos_assigned_gt_inds.numel() == 0 26 | self.pos_gt_bboxes = torch.empty_like(gt_bboxes).view(-1, 4) 27 | else: 28 | if len(gt_bboxes.shape) < 2: 29 | gt_bboxes = gt_bboxes.view(-1, 4) 30 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 31 | if assign_result.labels is not None: 32 | self.pos_gt_labels = assign_result.labels[pos_inds] 33 | else: 34 | self.pos_gt_labels = None 35 | 36 | if gt_bboxes_ignore.numel() == 0: 37 | # hack for index error case 38 | assert self.ig_assigned_gt_inds.numel() == 0 39 | self.ig_gt_bboxes = torch.empty_like(gt_bboxes_ignore).view(-1, 4) 40 | else: 41 | if len(gt_bboxes_ignore.shape) < 2: 42 | gt_bboxes_ignore = gt_bboxes_ignore.view(-1, 4) 43 | self.ig_gt_bboxes = gt_bboxes_ignore[self.ig_assigned_gt_inds, :] 44 | if assign_result_ig.labels is not None: 45 | self.ig_gt_labels = assign_result_ig.labels[ig_inds] 46 | else: 47 | self.ig_gt_labels = None 48 | 49 | # for reliable pseudo label mining 50 | self.pos_reg_weight = torch.ones_like(self.pos_assigned_gt_inds) 51 | self.ig_reg_weight = torch.zeros_like(self.ig_assigned_gt_inds) 52 | self.neg_reg_weight = torch.ones_like(self.neg_bboxes[:, -1]) 53 | 54 | @property 55 | def bboxes(self): 56 | return torch.cat([self.pos_bboxes, self.ig_bboxes, self.neg_bboxes]) 57 | 58 | @property 59 | def ignore_flag(self): 60 | return torch.cat([torch.zeros_like(self.pos_bboxes[:, -1]), torch.ones_like(self.ig_bboxes[:, -1]), 61 | torch.zeros_like(self.neg_bboxes[:, -1])]).bool() 62 | -------------------------------------------------------------------------------- /mmdet_extension/datasets/pipelines/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import math 4 | import random 5 | import warnings 6 | import numpy as np 7 | import numbers 8 | 9 | from mmdet.datasets.builder import PIPELINES 10 | 11 | 12 | @PIPELINES.register_module() 13 | class RandomErasing(object): 14 | def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False): 15 | assert isinstance(value, (numbers.Number, str, tuple, list)) 16 | if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): 17 | warnings.warn("range should be of kind (min, max)") 18 | if scale[0] < 0 or scale[1] > 1: 19 | raise ValueError("range of scale should be between 0 and 1") 20 | if p < 0 or p > 1: 21 | raise ValueError("range of random erasing probability should be between 0 and 1") 22 | 23 | self.p = p 24 | self.scale = scale 25 | self.ratio = ratio 26 | self.value = value 27 | self.inplace = inplace 28 | 29 | @staticmethod 30 | def get_params(img, scale, ratio, value=0): 31 | """Get parameters for ``erase`` for a random erasing. 32 | 33 | Args: 34 | img (np.array): ndarray image of size (H, W, C) to be erased. 35 | scale: range of proportion of erased area against input image. 36 | ratio: range of aspect ratio of erased area. 37 | 38 | Returns: 39 | tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing. 40 | """ 41 | img_h, img_w, img_c = img.shape 42 | area = img_h * img_w 43 | 44 | for _ in range(10): 45 | erase_area = random.uniform(scale[0], scale[1]) * area 46 | aspect_ratio = random.uniform(ratio[0], ratio[1]) 47 | 48 | h = int(round(math.sqrt(erase_area * aspect_ratio))) 49 | w = int(round(math.sqrt(erase_area / aspect_ratio))) 50 | 51 | if h < img_h and w < img_w: 52 | i = random.randint(0, img_h - h) 53 | j = random.randint(0, img_w - w) 54 | if isinstance(value, numbers.Number): 55 | v = value 56 | elif value == 'random': 57 | v = np.random.randint(0, 256, size=(h, w, img_c)) 58 | else: 59 | raise NotImplementedError('Not implement') 60 | return i, j, h, w, v 61 | 62 | # Return original image 63 | return 0, 0, img_h, img_w, img 64 | 65 | def __call__(self, results): 66 | if random.uniform(0, 1) >= self.p: 67 | return results 68 | img = results['img'] 69 | y, x, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=self.value) 70 | img[y:y + h, x:x + w] = v 71 | results['img'] = img 72 | return results 73 | -------------------------------------------------------------------------------- /mmdet_extension/datasets/pipelines/semi_augment.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | Augmentation in SSOD 5 | """ 6 | from mmdet.datasets.pipelines import Albu 7 | from mmdet.datasets import PIPELINES 8 | 9 | from mmdet_extension.datasets.pipelines.transforms_box import RandomErasing, RandomErasingBox 10 | 11 | 12 | # # -------------------------Unbiased Teacher augmentation------------------------- 13 | class RandomErase(object): 14 | def __init__(self, use_box=False): 15 | CLS = RandomErasingBox if use_box else RandomErasing 16 | self.transforms = [ 17 | CLS(p=0.7, scale=(0.05, 0.2), ratio=(0.3, 3.3), value="random"), 18 | CLS(p=0.5, scale=(0.02, 0.2), ratio=(0.1, 6), value="random"), 19 | CLS(p=0.3, scale=(0.02, 0.2), ratio=(0.05, 8), value="random") 20 | ] 21 | 22 | def __call__(self, results): 23 | for t in self.transforms: 24 | results = t(results) 25 | return results 26 | 27 | 28 | class AugmentationUTWeak(object): 29 | def __init__(self): 30 | self.transforms_1 = Albu(transforms=[ 31 | dict(type='ColorJitter', brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5), 32 | dict(type='ToGray', p=0.2), 33 | dict(type='GaussianBlur', sigma_limit=(0.1, 2.0), p=0.2), 34 | ], bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_labels']), 35 | keymap={'img': 'image', 'gt_bboxes': 'bboxes'} 36 | ) 37 | 38 | def __call__(self, results): 39 | results = self.transforms_1(results) 40 | return results 41 | 42 | 43 | class AugmentationUTStrong(object): 44 | def __init__(self, use_re=True, use_box=False): 45 | self.transforms_1 = Albu(transforms=[ 46 | dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1, p=0.8), 47 | dict(type='ToGray', p=0.2), 48 | dict(type='GaussianBlur', sigma_limit=(0.1, 2.0), p=0.5), 49 | ], bbox_params=dict(type='BboxParams', format='pascal_voc', label_fields=['gt_labels']), 50 | keymap={'img': 'image', 'gt_bboxes': 'bboxes'} 51 | ) 52 | self.transforms_2 = RandomErase(use_box) 53 | self.use_re = use_re 54 | 55 | def __call__(self, results): 56 | results = self.transforms_1(results) 57 | if self.use_re: 58 | results = self.transforms_2(results) 59 | return results 60 | 61 | 62 | @PIPELINES.register_module() 63 | class AugmentationUT(object): 64 | def __init__(self, use_weak=False, use_re=True, use_box=False): 65 | if use_weak: 66 | self.transforms = AugmentationUTWeak() 67 | else: 68 | self.transforms = AugmentationUTStrong(use_re=use_re, use_box=use_box) 69 | 70 | def __call__(self, results): 71 | results = self.transforms(results) 72 | return results 73 | -------------------------------------------------------------------------------- /mmdet_extension/datasets/semi_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import copy 4 | import random 5 | from torch.utils.data import Dataset 6 | from mmdet.datasets.builder import DATASETS 7 | from mmdet.datasets.pipelines import Compose 8 | from mmdet.datasets import CocoDataset 9 | 10 | from mmdet_extension.datasets.txt_style import TXTDataset 11 | from mmdet_extension.core.utils.classes import COCO_CLASSES 12 | 13 | 14 | @DATASETS.register_module() 15 | class SemiDataset(Dataset): 16 | CLASSES = COCO_CLASSES 17 | 18 | def __init__(self, 19 | ann_file, 20 | pipeline, 21 | ann_file_u, 22 | pipeline_u_share, 23 | pipeline_u, 24 | pipeline_u_1, 25 | data_root=None, 26 | img_prefix='', 27 | seg_prefix=None, 28 | proposal_file=None, 29 | data_root_u=None, 30 | img_prefix_u='', 31 | seg_prefix_u=None, 32 | proposal_file_u=None, 33 | classes=None, 34 | filter_empty_gt=True, 35 | ): 36 | super().__init__() 37 | 38 | self.coco_labeled = self.get_data_cls(ann_file)( 39 | ann_file, pipeline, data_root=data_root, img_prefix=img_prefix, 40 | seg_prefix=seg_prefix, proposal_file=proposal_file, test_mode=False, 41 | filter_empty_gt=filter_empty_gt, classes=classes) 42 | self.coco_unlabeled = self.get_data_cls(ann_file_u)( 43 | ann_file_u, pipeline_u_share, data_root=data_root_u, img_prefix=img_prefix_u, 44 | seg_prefix=seg_prefix_u, proposal_file=proposal_file_u, test_mode=False, 45 | filter_empty_gt=False, classes=classes 46 | ) 47 | self.CLASSES = self.coco_labeled.get_classes(classes) 48 | self.pipeline_u = Compose(pipeline_u) 49 | self.pipeline_u_1 = Compose(pipeline_u_1) if pipeline_u_1 else None 50 | 51 | self.flag = self.coco_unlabeled.flag # not used 52 | 53 | def get_data_cls(self, ann_file): 54 | if ann_file.endswith('.json'): 55 | return CocoDataset 56 | elif ann_file.endswith('.txt'): 57 | return TXTDataset 58 | else: 59 | raise ValueError(f'please use json or text format annotations') 60 | 61 | def __len__(self): 62 | return len(self.coco_unlabeled) 63 | 64 | def __getitem__(self, idx): 65 | idx_label = random.randint(0, len(self.coco_labeled) - 1) 66 | results = self.coco_labeled[idx_label] 67 | 68 | results_u = self.coco_unlabeled[idx] 69 | if self.pipeline_u_1: 70 | results_u_1 = copy.deepcopy(results_u) 71 | results_u_1 = self.pipeline_u_1(results_u_1) 72 | results.update({f'{key}_unlabeled_1': val for key, val in results_u_1.items()}) 73 | results_u = self.pipeline_u(results_u) 74 | results.update({f'{key}_unlabeled': val for key, val in results_u.items()}) 75 | return results 76 | 77 | def update_ann_file(self, ann_file): 78 | self.coco_unlabeled.data_infos = self.coco_unlabeled.load_annotations(ann_file) 79 | -------------------------------------------------------------------------------- /mmdet_extension/core/utils/colormap.py: -------------------------------------------------------------------------------- 1 | """ 2 | colormap 3 | """ 4 | import numpy as np 5 | 6 | _COLORS = np.array( 7 | [ 8 | 0.000, 0.447, 0.741, 9 | 0.850, 0.325, 0.098, 10 | 0.929, 0.694, 0.125, 11 | 0.494, 0.184, 0.556, 12 | 0.466, 0.674, 0.188, 13 | 0.301, 0.745, 0.933, 14 | 0.635, 0.078, 0.184, 15 | 0.300, 0.300, 0.300, 16 | 0.600, 0.600, 0.600, 17 | 1.000, 0.000, 0.000, 18 | 1.000, 0.500, 0.000, 19 | 0.749, 0.749, 0.000, 20 | 0.000, 1.000, 0.000, 21 | 0.000, 0.000, 1.000, 22 | 0.667, 0.000, 1.000, 23 | 0.333, 0.333, 0.000, 24 | 0.333, 0.667, 0.000, 25 | 0.333, 1.000, 0.000, 26 | 0.667, 0.333, 0.000, 27 | 0.667, 0.667, 0.000, 28 | 0.667, 1.000, 0.000, 29 | 1.000, 0.333, 0.000, 30 | 1.000, 0.667, 0.000, 31 | 1.000, 1.000, 0.000, 32 | 0.000, 0.333, 0.500, 33 | 0.000, 0.667, 0.500, 34 | 0.000, 1.000, 0.500, 35 | 0.333, 0.000, 0.500, 36 | 0.333, 0.333, 0.500, 37 | 0.333, 0.667, 0.500, 38 | 0.333, 1.000, 0.500, 39 | 0.667, 0.000, 0.500, 40 | 0.667, 0.333, 0.500, 41 | 0.667, 0.667, 0.500, 42 | 0.667, 1.000, 0.500, 43 | 1.000, 0.000, 0.500, 44 | 1.000, 0.333, 0.500, 45 | 1.000, 0.667, 0.500, 46 | 1.000, 1.000, 0.500, 47 | 0.000, 0.333, 1.000, 48 | 0.000, 0.667, 1.000, 49 | 0.000, 1.000, 1.000, 50 | 0.333, 0.000, 1.000, 51 | 0.333, 0.333, 1.000, 52 | 0.333, 0.667, 1.000, 53 | 0.333, 1.000, 1.000, 54 | 0.667, 0.000, 1.000, 55 | 0.667, 0.333, 1.000, 56 | 0.667, 0.667, 1.000, 57 | 0.667, 1.000, 1.000, 58 | 1.000, 0.000, 1.000, 59 | 1.000, 0.333, 1.000, 60 | 1.000, 0.667, 1.000, 61 | 0.333, 0.000, 0.000, 62 | 0.500, 0.000, 0.000, 63 | 0.667, 0.000, 0.000, 64 | 0.833, 0.000, 0.000, 65 | 1.000, 0.000, 0.000, 66 | 0.000, 0.167, 0.000, 67 | 0.000, 0.333, 0.000, 68 | 0.000, 0.500, 0.000, 69 | 0.000, 0.667, 0.000, 70 | 0.000, 0.833, 0.000, 71 | 0.000, 1.000, 0.000, 72 | 0.000, 0.000, 0.167, 73 | 0.000, 0.000, 0.333, 74 | 0.000, 0.000, 0.500, 75 | 0.000, 0.000, 0.667, 76 | 0.000, 0.000, 0.833, 77 | 0.000, 0.000, 1.000, 78 | 0.000, 0.000, 0.000, 79 | 0.143, 0.143, 0.143, 80 | 0.857, 0.857, 0.857, 81 | 1.000, 1.000, 1.000, 82 | ] 83 | ).astype(np.float32).reshape(-1, 3) 84 | 85 | np.random.seed(666) 86 | 87 | 88 | def colormap(rgb=True, maximum=1, num_classes=80): 89 | """ 90 | Args: 91 | rgb (bool): whether to return RGB colors or BGR colors. 92 | maximum (int): either 255 or 1 93 | 94 | Returns: 95 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] 96 | """ 97 | assert maximum in [255, 1], maximum 98 | colors = [] 99 | for i in range(num_classes): 100 | idx = np.random.randint(0, len(_COLORS)) 101 | c = _COLORS[idx] * maximum 102 | if not rgb: 103 | c = c[::-1] 104 | colors.append(c) 105 | colors = np.array(colors) 106 | return colors 107 | -------------------------------------------------------------------------------- /docs/domain_adaption.md: -------------------------------------------------------------------------------- 1 | # DAOD 2 | 3 | In this part, we give the tutorial about domain adaptive object detection (DAOD). 4 | 5 | ## Dataset 6 | 7 | | Tasks |C2F |C2B |K2C |S2C | 8 | | ----------------- | ------------------- | ------------------ | ---------------- | --------------------- | 9 | | Source(Labeled) |Cityscapes |Cityscapes |KITTI |Sim10k | 10 | | Target(Unlabeled)|Foggy-Cityscapes |BDD100k-Daytime |Cityscapes |Cityscapes | 11 | 12 | ## Usage 13 | 14 | ### Training 15 | 16 | #### 1. Use labeled data to train a baseline (aka "source only" model) 17 | 18 | Before training,please download the pretrained backbone ([vgg](https://www.dropbox.com/s/s3brpk0bdq60nyb/vgg16_caffe.pth?dl=0)) to `pretrained_model/backbone`. 19 | 20 | ```shell 21 | # |---------------------|--------|---------| 22 | # | xonsh train_gpu2.sh | config | dataset | 23 | # |---------------------|--------|---------| 24 | # there are three dataset for DAOD baseline: C2F, K2C, S2C 25 | # Note that C2B share the same "source only" model with C2F 26 | cd examples/train/xonsh 27 | xonsh train_gpu2.sh ./configs/baseline/baseline_uda.py C2F 28 | ``` 29 | 30 | - In our implementation, we use 2-gpus to train. 31 | 32 | - You can also run `bash train_baseline_uda.sh` in `examples/train/bash` 33 | 34 | - After training, we organize the pretrained baseline to `pretrained_model/baseline` as follows: 35 | 36 | ```shell 37 | pretrained_model/ 38 | └── baseline/ 39 | ├── C2F.pth 40 | ├── K2C.pth 41 | └── S2C.pth 42 | ``` 43 | 44 | #### 2. Use labeled data + unlabeled data to train detector 45 | 46 | ```shell 47 | ## there are four adaptation tasks: C2F, C2B, K2C, S2C 48 | ## C2F and C2B share the same "source only" model 49 | cd examples/train/xonsh 50 | xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_uda.py C2F 51 | ``` 52 | 53 | - In our implementation, we use 8-gpus to train. 54 | - You can also run `bash train_uda.sh` in `examples/train/bash` 55 | 56 | ### Evaluation 57 | 58 | ```shell 59 | # change "data_name" and "checkpoint" in scripts to support different dataset and trained model 60 | cd examples/eval 61 | xonsh eval_uda.sh 62 | ``` 63 | 64 | ## Performance 65 | 66 | - $\dagger$ is an ideal setting, using the label distribution from unlabeled data annotations. 67 | - mAP: AP50 68 | 69 | #### Normal-to-foggy weather adaptation 70 | 71 | | C2F | mAP | truck | car | rider | person | train | motor | bicycle | bus | 72 | | ----------- | ---- | ----- | ---- | ----- | ------ | ----- | ----- | ------- | ---- | 73 | | source only | 30.9 | 19.2 | 47.9 | 40.8 | 34.8 | 7.8 | 24.2 | 36.0 | 36.4 | 74 | | LabelMatch | 52.4 | 42.0 | 62.2 | 55.4 | 45.3 | 55.1 | 43.5 | 51.5 | 64.1 | 75 | 76 | #### Small-to-large scale dataset adaptation 77 | 78 | | C2B | mAP | truck | car | rider | person | train | motor | bicycle | bus | 79 | | -------------------- | ---- | ----- | ---- | ----- | ------ | ----- | ----- | ------- | ---- | 80 | | source only | 28.7 | 18.3 | 50.0 | 33.3 | 35.8 | / | 18.4 | 27.6 | 17.0 | 81 | | LabelMatch | 38.8 | 39.4 | 54.6 | 37.4 | 42.9 | / | 25.7 | 29.8 | 41.7 | 82 | | LabelMatch$^\dagger$ | 44.5 | 39.8 | 55.4 | 44.5 | 44.8 | / | 38.6 | 41.5 | 47.1 | 83 | 84 | #### Cross-Camera adaptation & Synthetic-to-Real adaptation 85 | 86 | | K2C | AP | S2C | AP | 87 | | ---------------------- | ---- | -------------------------- | ---- | 88 | | source only | 42.2 | source only | 36.5 | 89 | | LabelMatch | 51.0 | LabelMatch | 52.7 | 90 | | LabelMatch$^{\dagger}$ | 52.2 | LabelMatch$^{\dagger}$ | 53.8 | 91 | 92 | -------------------------------------------------------------------------------- /configs/baseline/ema_config/baseline_uda_cls1.py: -------------------------------------------------------------------------------- 1 | classes = ('car',) 2 | 3 | # # -------------------------model------------------------------ 4 | model = dict( 5 | type='FasterRCNN', 6 | pretrained='./pretrained_model/backbone/vgg16_caffe.pth', 7 | backbone=dict( 8 | type='VGG', 9 | depth=16, 10 | out_indices=(4,), # stride=16 11 | with_last_pool=False, 12 | ), 13 | neck=None, 14 | rpn_head=dict( 15 | type='RPNHead', 16 | in_channels=512, 17 | feat_channels=512, 18 | anchor_generator=dict( 19 | type='AnchorGenerator', 20 | scales=[4, 8, 16, 32], 21 | ratios=[0.5, 1.0, 2.0], 22 | strides=[16]), 23 | bbox_coder=dict( 24 | type='DeltaXYWHBBoxCoder', 25 | target_means=[.0, .0, .0, .0], 26 | target_stds=[1.0, 1.0, 1.0, 1.0]), 27 | loss_cls=dict( 28 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 29 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 30 | roi_head=dict( 31 | type='StandardRoIHeadBase', 32 | bbox_roi_extractor=dict( 33 | type='SingleRoIExtractor', 34 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 35 | out_channels=512, 36 | featmap_strides=[16]), 37 | bbox_head=dict( 38 | type='Shared2FCBBoxHead', 39 | in_channels=512, 40 | fc_out_channels=1024, 41 | roi_feat_size=7, 42 | num_classes=len(classes), 43 | bbox_coder=dict( 44 | type='DeltaXYWHBBoxCoder', 45 | target_means=[0., 0., 0., 0.], 46 | target_stds=[0.1, 0.1, 0.2, 0.2]), 47 | reg_class_agnostic=True, 48 | loss_cls=dict( 49 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 50 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 51 | # model training and testing settings 52 | train_cfg=dict( 53 | rpn=dict( 54 | assigner=dict( 55 | type='MaxIoUAssigner', 56 | pos_iou_thr=0.7, 57 | neg_iou_thr=0.3, 58 | min_pos_iou=0.3, 59 | match_low_quality=True, 60 | ignore_iof_thr=-1), 61 | sampler=dict( 62 | type='RandomSampler', 63 | num=256, 64 | pos_fraction=0.5, 65 | neg_pos_ub=-1, 66 | add_gt_as_proposals=False), 67 | allowed_border=-1, 68 | pos_weight=-1, 69 | debug=False), 70 | rpn_proposal=dict( 71 | nms_pre=2000, 72 | max_per_img=1000, 73 | nms=dict(type='nms', iou_threshold=0.7), 74 | min_bbox_size=0), 75 | rcnn=dict( 76 | assigner=dict( 77 | type='MaxIoUAssigner', 78 | pos_iou_thr=0.5, 79 | neg_iou_thr=0.5, 80 | min_pos_iou=0.5, 81 | match_low_quality=False, 82 | ignore_iof_thr=-1), 83 | sampler=dict( 84 | type='RandomSampler', 85 | num=512, 86 | pos_fraction=0.25, 87 | neg_pos_ub=-1, 88 | add_gt_as_proposals=True), 89 | pos_weight=-1, 90 | debug=False)), 91 | test_cfg=dict( 92 | rpn=dict( 93 | nms_pre=1000, 94 | max_per_img=1000, 95 | nms=dict(type='nms', iou_threshold=0.7), 96 | min_bbox_size=0), 97 | rcnn=dict( 98 | score_thr=0.001, 99 | nms=dict(type='nms', iou_threshold=0.5), 100 | max_per_img=100) 101 | # soft-nms is also supported for rcnn testing 102 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 103 | )) 104 | -------------------------------------------------------------------------------- /configs/baseline/ema_config/baseline_uda_cls8.py: -------------------------------------------------------------------------------- 1 | classes = ('truck', 'car', 'rider', 'person', 'train', 'motorcycle', 'bicycle', 'bus') 2 | # # -------------------------model------------------------------ 3 | model = dict( 4 | type='FasterRCNN', 5 | pretrained='./pretrained_model/backbone/vgg16_caffe.pth', 6 | backbone=dict( 7 | type='VGG', 8 | depth=16, 9 | out_indices=(4,), # stride=16 10 | with_last_pool=False, 11 | ), 12 | neck=None, 13 | rpn_head=dict( 14 | type='RPNHead', 15 | in_channels=512, 16 | feat_channels=512, 17 | anchor_generator=dict( 18 | type='AnchorGenerator', 19 | scales=[4, 8, 16, 32], 20 | ratios=[0.5, 1.0, 2.0], 21 | strides=[16]), 22 | bbox_coder=dict( 23 | type='DeltaXYWHBBoxCoder', 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0]), 26 | loss_cls=dict( 27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 28 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 29 | roi_head=dict( 30 | type='StandardRoIHeadBase', 31 | bbox_roi_extractor=dict( 32 | type='SingleRoIExtractor', 33 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 34 | out_channels=512, 35 | featmap_strides=[16]), 36 | bbox_head=dict( 37 | type='Shared2FCBBoxHead', 38 | in_channels=512, 39 | fc_out_channels=1024, 40 | roi_feat_size=7, 41 | num_classes=len(classes), 42 | bbox_coder=dict( 43 | type='DeltaXYWHBBoxCoder', 44 | target_means=[0., 0., 0., 0.], 45 | target_stds=[0.1, 0.1, 0.2, 0.2]), 46 | reg_class_agnostic=True, 47 | loss_cls=dict( 48 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 49 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 50 | # model training and testing settings 51 | train_cfg=dict( 52 | rpn=dict( 53 | assigner=dict( 54 | type='MaxIoUAssigner', 55 | pos_iou_thr=0.7, 56 | neg_iou_thr=0.3, 57 | min_pos_iou=0.3, 58 | match_low_quality=True, 59 | ignore_iof_thr=-1), 60 | sampler=dict( 61 | type='RandomSampler', 62 | num=256, 63 | pos_fraction=0.5, 64 | neg_pos_ub=-1, 65 | add_gt_as_proposals=False), 66 | allowed_border=-1, 67 | pos_weight=-1, 68 | debug=False), 69 | rpn_proposal=dict( 70 | nms_pre=2000, 71 | max_per_img=1000, 72 | nms=dict(type='nms', iou_threshold=0.7), 73 | min_bbox_size=0), 74 | rcnn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.5, 79 | min_pos_iou=0.5, 80 | match_low_quality=False, 81 | ignore_iof_thr=-1), 82 | sampler=dict( 83 | type='RandomSampler', 84 | num=512, 85 | pos_fraction=0.25, 86 | neg_pos_ub=-1, 87 | add_gt_as_proposals=True), 88 | pos_weight=-1, 89 | debug=False)), 90 | test_cfg=dict( 91 | rpn=dict( 92 | nms_pre=1000, 93 | max_per_img=1000, 94 | nms=dict(type='nms', iou_threshold=0.7), 95 | min_bbox_size=0), 96 | rcnn=dict( 97 | score_thr=0.001, 98 | nms=dict(type='nms', iou_threshold=0.5), 99 | max_per_img=100) 100 | # soft-nms is also supported for rcnn testing 101 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 102 | )) 103 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/bbox_heads/convfc_bbox_head_st.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import torch 4 | 5 | from mmdet.core import multi_apply 6 | from mmdet.models.roi_heads.bbox_heads import Shared2FCBBoxHead 7 | from mmdet.models.builder import HEADS 8 | 9 | 10 | @HEADS.register_module() 11 | class Shared2FCBBoxHeadST(Shared2FCBBoxHead): 12 | """ 13 | pos: only do classification 14 | ig: only do regression 15 | """ 16 | def _get_target_single_st( 17 | self, pos_bboxes, pos_gt_bboxes, pos_gt_labels, # positive 18 | ig_bboxes, ig_gt_bboxes, ig_gt_labels, # ignore 19 | neg_bboxes, cfg): 20 | num_pos = pos_bboxes.size(0) 21 | num_ig = ig_bboxes.size(0) 22 | num_neg = neg_bboxes.size(0) 23 | num_samples = num_pos + num_neg + num_ig 24 | 25 | # original implementation uses new_zeros since BG are set to be 0 26 | # now use empty & fill because BG cat_id = num_classes, 27 | # FG cat_id = [0, num_classes-1] 28 | labels = pos_bboxes.new_full((num_samples,), 29 | self.num_classes, 30 | dtype=torch.long) 31 | label_weights = pos_bboxes.new_zeros(num_samples) 32 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 33 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 34 | if num_pos > 0: 35 | labels[:num_pos] = pos_gt_labels 36 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 37 | label_weights[:num_pos] = pos_weight 38 | if not self.reg_decoded_bbox: 39 | pos_bbox_targets = self.bbox_coder.encode( 40 | pos_bboxes, pos_gt_bboxes) 41 | else: 42 | pos_bbox_targets = pos_gt_bboxes 43 | bbox_targets[:num_pos, :] = pos_bbox_targets 44 | bbox_weights[:num_pos, :] = 0 # not do box regression 45 | if num_ig > 0: 46 | # labels[num_pos:num_ig + num_pos] = ig_gt_labels 47 | label_weights[num_pos:num_ig + num_pos] = 1 # do classification as background 48 | if not self.reg_decoded_bbox: 49 | ig_bbox_targets = self.bbox_coder.encode( 50 | ig_bboxes, ig_gt_bboxes) 51 | else: 52 | ig_bbox_targets = ig_gt_bboxes 53 | bbox_targets[num_pos:num_pos + num_ig, :] = ig_bbox_targets 54 | bbox_weights[num_pos:num_pos + num_ig, :] = 1 55 | 56 | if num_neg > 0: 57 | label_weights[-num_neg:] = 1.0 58 | 59 | return labels, label_weights, bbox_targets, bbox_weights 60 | 61 | def get_targets_st(self, 62 | sampling_results, 63 | gt_bboxes, 64 | gt_labels, 65 | rcnn_train_cfg, 66 | concat=True 67 | ): 68 | # positive 69 | pos_bboxes_list = [res.pos_bboxes for res in sampling_results] 70 | pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results] 71 | pos_gt_labels_list = [res.pos_gt_labels for res in sampling_results] 72 | # ignore 73 | ig_bboxes_list = [res.ig_bboxes for res in sampling_results] 74 | ig_gt_bboxes_list = [res.ig_gt_bboxes for res in sampling_results] 75 | ig_gt_labels_list = [res.ig_gt_labels for res in sampling_results] 76 | # negative 77 | neg_bboxes_list = [res.neg_bboxes for res in sampling_results] 78 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 79 | self._get_target_single_st, 80 | pos_bboxes_list, pos_gt_bboxes_list, pos_gt_labels_list, 81 | ig_bboxes_list, ig_gt_bboxes_list, ig_gt_labels_list, 82 | neg_bboxes_list, cfg=rcnn_train_cfg) 83 | if concat: 84 | labels = torch.cat(labels, 0) 85 | label_weights = torch.cat(label_weights, 0) 86 | bbox_targets = torch.cat(bbox_targets, 0) 87 | bbox_weights = torch.cat(bbox_weights, 0) 88 | return labels, label_weights, bbox_targets, bbox_weights 89 | -------------------------------------------------------------------------------- /tools/datasets/prepare_coco_standard.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # !/bin/bash 16 | """Generate labeled and unlabeled data for coco train. 17 | 18 | Example: 19 | python3 object_detection/prepare_coco_data.py 20 | """ 21 | 22 | import argparse 23 | import numpy as np 24 | import json 25 | import os 26 | 27 | 28 | def prepare_coco_data(data_dir, seed=1, percent=10.0, version=2017): 29 | """Prepare COCO data for Semi-supervised learning 30 | 31 | Args: 32 | seed: random seed for data split 33 | percent: percentage of labeled data 34 | version: COCO data version 35 | """ 36 | 37 | def _save_anno(name, images, annotations): 38 | """Save annotation 39 | """ 40 | print('>> Processing data {}.json saved ({} images {} annotations)'.format( 41 | name, len(images), len(annotations))) 42 | new_anno = {} 43 | new_anno['images'] = images 44 | new_anno['annotations'] = annotations 45 | new_anno['licenses'] = anno['licenses'] 46 | new_anno['categories'] = anno['categories'] 47 | new_anno['info'] = anno['info'] 48 | path = '{}/{}'.format(COCOANNODIR, 'semi_supervised') 49 | if not os.path.exists(path): 50 | os.mkdir(path) 51 | 52 | with open( 53 | '{root}/{folder}/{save_name}.json'.format( 54 | save_name=name, root=COCOANNODIR, folder='semi_supervised'), 55 | 'w') as f: 56 | json.dump(new_anno, f) 57 | print('>> Data {}.json saved ({} images {} annotations)'.format( 58 | name, len(images), len(annotations))) 59 | 60 | np.random.seed(seed) 61 | DATA_DIR = data_dir 62 | COCOANNODIR = os.path.join(DATA_DIR, 'annotations') 63 | 64 | anno = json.load(open(os.path.join(COCOANNODIR, 65 | 'instances_train{}.json'.format(version)))) 66 | 67 | image_list = anno['images'] 68 | labeled_tot = int(percent / 100. * len(image_list)) 69 | labeled_ind = np.random.choice(range(len(image_list)), size=labeled_tot) 70 | labeled_id = [] 71 | labeled_images = [] 72 | unlabeled_images = [] 73 | labeled_ind = set(labeled_ind) 74 | for i in range(len(image_list)): 75 | if i in labeled_ind: 76 | labeled_images.append(image_list[i]) 77 | labeled_id.append(image_list[i]['id']) 78 | else: 79 | unlabeled_images.append(image_list[i]) 80 | 81 | # get all annotations of labeled images 82 | labeled_id = set(labeled_id) 83 | labeled_annotations = [] 84 | unlabeled_annotations = [] 85 | for an in anno['annotations']: 86 | if an['image_id'] in labeled_id: 87 | labeled_annotations.append(an) 88 | else: 89 | unlabeled_annotations.append(an) 90 | 91 | # save labeled and unlabeled 92 | save_name = 'instances_train{version}.{seed}@{tot}'.format( 93 | version=version, seed=seed, tot=int(percent)) 94 | _save_anno(save_name, labeled_images, labeled_annotations) 95 | save_name = 'instances_train{version}.{seed}@{tot}-unlabeled'.format( 96 | version=version, seed=seed, tot=int(percent)) 97 | _save_anno(save_name, unlabeled_images, unlabeled_annotations) 98 | 99 | 100 | if __name__ == '__main__': 101 | parser = argparse.ArgumentParser() 102 | parser.add_argument('--percent', type=float, default=10) 103 | parser.add_argument('--version', type=int, default=2017) 104 | parser.add_argument('--data-dir', type=str, default='') 105 | parser.add_argument('--seed', type=int, help='seed', default=1) 106 | 107 | args = parser.parse_args() 108 | prepare_coco_data(args.data_dir, args.seed, args.percent, args.version) 109 | -------------------------------------------------------------------------------- /mmdet_extension/models/roi_head/bbox_heads/convfc_bbox_head_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import torch 4 | 5 | from mmdet.core import multi_apply 6 | from mmdet.models.roi_heads.bbox_heads import Shared2FCBBoxHead 7 | from mmdet.models.builder import HEADS 8 | 9 | 10 | @HEADS.register_module() 11 | class Shared2FCBBoxHeadLM(Shared2FCBBoxHead): 12 | def _get_target_single_lm( 13 | self, pos_bboxes, pos_gt_bboxes, pos_gt_labels, pos_reg_weight, # positive 14 | ig_bboxes, ig_gt_bboxes, ig_gt_labels, ig_reg_weight, # ignore 15 | neg_bboxes, cfg): 16 | num_pos = pos_bboxes.size(0) 17 | num_ig = ig_bboxes.size(0) 18 | num_neg = neg_bboxes.size(0) 19 | num_samples = num_pos + num_neg + num_ig 20 | 21 | # original implementation uses new_zeros since BG are set to be 0 22 | # now use empty & fill because BG cat_id = num_classes, 23 | # FG cat_id = [0, num_classes-1] 24 | labels = pos_bboxes.new_full((num_samples,), 25 | self.num_classes, 26 | dtype=torch.long) 27 | label_weights = pos_bboxes.new_zeros(num_samples) 28 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 29 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 30 | # reliable pseudo labels 31 | if num_pos > 0: 32 | labels[:num_pos] = pos_gt_labels 33 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 34 | label_weights[:num_pos] = pos_weight 35 | if not self.reg_decoded_bbox: 36 | pos_bbox_targets = self.bbox_coder.encode( 37 | pos_bboxes, pos_gt_bboxes) 38 | else: 39 | pos_bbox_targets = pos_gt_bboxes 40 | bbox_targets[:num_pos, :] = pos_bbox_targets 41 | bbox_weights[:num_pos, :] = pos_reg_weight.unsqueeze(1) 42 | # uncertain pseudo labels 43 | if num_ig > 0: 44 | labels[num_pos:num_ig + num_pos] = ig_gt_labels 45 | label_weights[num_pos:num_ig + num_pos] = ig_reg_weight 46 | if not self.reg_decoded_bbox: 47 | ig_bbox_targets = self.bbox_coder.encode( 48 | ig_bboxes, ig_gt_bboxes) 49 | else: 50 | ig_bbox_targets = ig_gt_bboxes 51 | bbox_targets[num_pos:num_pos + num_ig, :] = ig_bbox_targets 52 | bbox_weights[num_pos:num_pos + num_ig, :] = ig_reg_weight.unsqueeze(1) 53 | 54 | if num_neg > 0: 55 | label_weights[-num_neg:] = 1.0 56 | 57 | return labels, label_weights, bbox_targets, bbox_weights 58 | 59 | def get_targets_lm(self, 60 | sampling_results, 61 | gt_bboxes, 62 | gt_labels, 63 | rcnn_train_cfg, 64 | concat=True 65 | ): 66 | # positive 67 | pos_bboxes_list = [res.pos_bboxes for res in sampling_results] 68 | pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results] 69 | pos_gt_labels_list = [res.pos_gt_labels for res in sampling_results] 70 | pos_reg_weight = [res.pos_reg_weight for res in sampling_results] 71 | # ignore 72 | ig_bboxes_list = [res.ig_bboxes for res in sampling_results] 73 | ig_gt_bboxes_list = [res.ig_gt_bboxes for res in sampling_results] 74 | ig_gt_labels_list = [res.ig_gt_labels for res in sampling_results] 75 | ig_reg_weight = [res.ig_reg_weight for res in sampling_results] 76 | # negative 77 | neg_bboxes_list = [res.neg_bboxes for res in sampling_results] 78 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 79 | self._get_target_single_lm, 80 | pos_bboxes_list, pos_gt_bboxes_list, pos_gt_labels_list, pos_reg_weight, 81 | ig_bboxes_list, ig_gt_bboxes_list, ig_gt_labels_list, ig_reg_weight, 82 | neg_bboxes_list, cfg=rcnn_train_cfg) 83 | if concat: 84 | labels = torch.cat(labels, 0) 85 | label_weights = torch.cat(label_weights, 0) 86 | bbox_targets = torch.cat(bbox_targets, 0) 87 | bbox_weights = torch.cat(bbox_weights, 0) 88 | return labels, label_weights, bbox_targets, bbox_weights 89 | -------------------------------------------------------------------------------- /docs/prepare_data.md: -------------------------------------------------------------------------------- 1 | # Data Preparation 2 | 3 | ## 1. SSOD: Semi-Supervised Object Detection 4 | 5 | We support 5 popular settings in SSOD research as listed below: 6 | 7 | | Labeled Data | Unlabeled Data | Test Data | 8 | | ------------------ | ------------------ | ------------- | 9 | | COCO2017-train-1% | COCO2017-train-99% | COCO2017-test | 10 | | COCO2017-train-5% | COCO2017-train-95% | COCO2017-test | 11 | | COCO2017-train-10% | COCO2017-train-90% | COCO2017-test | 12 | | COCO2017-train | COCO2017-unlabeled | COCO2017-test | 13 | | VOC07-trainval | VOC12-trainval | VOC07-test | 14 | 15 | 1. Download [VOC](http://host.robots.ox.ac.uk/pascal/VOC/) and [COCO](https://cocodataset.org/#home) from the website and organize them as follows: 16 | 17 | ```shell 18 | # ====coco==== | # ====voc==== 19 | /data/coco/ | /data/voc/ 20 | - images | - 12 21 | - train2017 | - VOCdevkit 22 | - unlabeled2017 | - VOC2012 23 | - ... | - ... 24 | - annotations | - 07 25 | - instances_train2017.json | - VOCdevkit 26 | - image_info_unlabeled2017.json | - VOC2007 27 | - ... | - ... 28 | ``` 29 | 30 | 2. Run scripts to create the soft symlink: 31 | 32 | ```shell 33 | # * please change the "prefix_coco", "prefix_coco_ul", "prefix_voc" in the scripts to fit your environment. 34 | # * you can also create symlink by yourself. 35 | cd tools/datasets 36 | xonsh create_dataset_link.sh 37 | ``` 38 | 39 | 3. Create coco-standard, coco-additional, voc (it will cost several minutes): 40 | 41 | ```shell 42 | cd tools/datasets 43 | xonsh preprocess_dataset.sh 44 | ``` 45 | 46 | ## 2. DAOD: Domain Adaptive Object Detection 47 | 48 | We support 4 popular settings in DAOD research as listed below: 49 | 50 | | | Labeled Data | Unlabeled Data | Test Data | 51 | | ------------------------ | ------------------ | ------------------------ | ---------------------- | 52 | | normal$\to$foggy (C2F) | cityscapes (train) | cityscapes-foggy (train) | cityscapes-foggy (val) | 53 | | small$\to$large (C2B) | cityscapes (train) | BDD100K (train) | BDD100K (val) | 54 | | across cameras (K2C) | KITTI (train) | cityscapes (train) | cityscapes (val) | 55 | | synthetic$\to$real (S2C) | Sim10K | cityscapes (train) | cityscapes (val) | 56 | 57 | 1. Download [cityscapes](https://cityscapes-dataset.com), [cityscapes-foggy](https://cityscapes-dataset.com), [KITTI](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d), [Sim10K](https://fcav.engin.umich.edu/projects/driving-in-the-matrix) and [BDD100K](https://bdd-data.berkeley.edu) from the website and organize them as follows: 58 | 59 | ```shell 60 | # cityscapes | # cityscapes-foggy | # BDD 61 | /data/city | /data/foggycity | /data/BDD 62 | - VOC2007_citytrain | - VOC2007_foggytrain | - VOC2007_bddtrain 63 | - ImageSets | - ImageSets | - ImageSets 64 | - JPEGImages | - JPEGImages | - JPEGImages 65 | - Annotations | - Annotations | - Annotations 66 | - VOC2007_cityval | - VOC2007_foggyval | - VOC2007_bddval 67 | - ImageSets | - ImageSets | - ImageSets 68 | - JPEGImages | - JPEGImages | - JPEGImages 69 | - Annotations | - Annotations | - Annotations 70 | # ========================================================================= 71 | # KITTI | # Sim10K 72 | /data/kitti | /data/sim 73 | - ImageSets | - ImageSets 74 | - JPEGImages | - JPEGImages 75 | - Annotations | - Annotations 76 | ``` 77 | 78 | > PS: please refer to [ProbabilisticTeacher](https://github.com/HIK-LAB/ProbabilisticTeacher) for the detailed dataset pre-processing. 79 | 80 | 2. Run scripts to create the soft symlink: 81 | 82 | ```shell 83 | cd tools/datasets_uda 84 | xonsh create_dataset_link.sh 85 | ``` 86 | 87 | 3. Convert to coco format: 88 | 89 | ```bash 90 | cd tools/datasets_uda 91 | xonsh preprocess_dataset.sh 92 | ``` 93 | 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MMDetection-based Toolbox for Semi-Supervised Object Detection 2 | 3 | ## Supported algorithms 4 | 5 | - [x] STAC:[A Simple Semi-Supervised Learning Framework for Object Detection [Arxiv'20]](https://arxiv.org/abs/2005.04757v2) 6 | - [x] Unbiased Teacher:[Unbiased Teacher for Semi-Supervised Object Detection [ICLR'21]](https://arxiv.org/abs/2102.09480) 7 | - [x] Soft Teacher:[End-to-End Semi-Supervised Object Detection with Soft Teacher [ICCV'21]](https://arxiv.org/abs/2106.09018) 8 | - [x] LabelMatch:[Label Matching Semi-Supervised Object Detection [CVPR'22]](https://arxiv.org/pdf/2206.06608.pdf) 9 | 10 | ## Preparation 11 | 12 | #### Prerequisites 13 | 14 | ```bash 15 | pip install -r requirements.txt 16 | ``` 17 | 18 | - Linux with Python >= 3.6 19 | - We use mmdet=2.10.0, pytorch=1.6.0 20 | 21 | #### Data Preparation 22 | 23 | Please refer to [prepare_data.md](./docs/prepare_data.md). 24 | 25 | ## Usage 26 | 27 | ### Training 28 | 29 | #### 1. Use labeled data to train a baseline 30 | 31 | Before training,please download the pretrained backbone ([resnet50](https://download.pytorch.org/models/resnet50-19c8e357.pth)) to `pretrained_model/backbone`. 32 | 33 | ```shell 34 | # |---------------------|--------|------|---------|---------| 35 | # | xonsh train_gpu2.sh | config | seed | percent | dataset | 36 | # |---------------------|--------|------|---------|---------| 37 | cd examples/train/xonsh 38 | ## ---dataset: coco-standard--- 39 | xonsh train_gpu2.sh ./configs/baseline/baseline_ssod.py 1 1 coco-standard 40 | ## ---dataset: voc--- 41 | # xonsh train_gpu2.sh ./configs/baseline/baseline_ssod.py 1 1 voc 42 | ## ---dataset: coco-additional--- 43 | # xonsh train_gpu8.sh ./configs/baseline/baseline_ssod.py 1 1 coco-additional 44 | ``` 45 | 46 | - In our implementation, we use 2-gpus to train except coco-additional. 47 | 48 | - After training, we organize the pretrained baseline to `pretrained_model/baseline` as follows: 49 | 50 | ```shell 51 | pretrained_model/ 52 | └── baseline/ 53 | ├── instances_train2017.1@1.pth 54 | ├── instances_train2017.1@5.pth 55 | ├── ... 56 | ├── voc.pth 57 | └── coco.pth 58 | ``` 59 | 60 | - You can also change the `load_from` information in `config` file in step 2. 61 | 62 | #### 2. Use labeled data + unlabeled data to train detector 63 | 64 | ```shell 65 | ## note: dataset is set to none in this step. 66 | cd examples/train/xonsh 67 | xonsh train_gpu8.sh ./configs/labelmatch/labelmatch_standard.py 1 1 none 68 | ``` 69 | 70 | - In our implementation, we use 8-gpus to train. 71 | - You can also run `bash train_ssod.sh` in `examples/train/bash` 72 | 73 | ### Evaluation 74 | 75 | ```shell 76 | # please change "config" and "checkpoint" in 'eval.sh' scripts to support different dataset and trained model 77 | cd examples/eval 78 | xonsh eval.sh 79 | ``` 80 | 81 | ## Performance 82 | 83 | #### LabelMatch 84 | 85 | | Model | Supervision | AP | Config | Model Weights | 86 | | :-------: | :-----------: | :--: | :-----------: | ------------- | 87 | | R50-FPN | 1% | 25.81±0.28 | [labelmatch_standard_paper](./configs/labelmatch/labelmatch_standard_paper.py) | [To-Be-Released]() | 88 | | R50-FPN | 5% | 32.70±0.18 | [labelmatch_standard_paper](./configs/labelmatch/labelmatch_standard_paper.py) | [To-Be-Released]() | 89 | | R50-FPN | 10% | 35.49±0.17 | [labelmatch_standard_paper](./configs/labelmatch/labelmatch_standard_paper.py) | [To-Be-Released]() | 90 | 91 | - Please refer to [performance.md](./docs/performance.md) for more performance presentation. 92 | 93 | ## Extension to Domain adaptive object detection 94 | 95 | Please refer to [UDA](./docs/domain_adaption.md) 96 | 97 | ## Citation 98 | 99 | If you use LabelMatch in your research or wish to refer to the results published in the paper, please consider citing out paper. 100 | 101 | ```BibTeX 102 | @inproceedings{Chen2022LabelMatching, 103 | title={Label Matching Semi-Supervised Object Detection}, 104 | author={Binbin Chen, Weijie Chen, Shicai Yang, Yunyi Xuan, JieSong, Di Xie, Shiliang Pu, Mingli Song, Yueting Zhuang.}, 105 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 106 | year={2022}, 107 | } 108 | ``` 109 | 110 | ## License 111 | 112 | This project is released under the [Apache 2.0 license](./LICENSE). Other codes from open source repository follows the original distributive licenses. 113 | 114 | ## Acknowledgement 115 | 116 | If you have any problem about this work, please feel free to contact Binbin Chen (chenbinbin8-at-hikvision.com) and Weijie Chen (chenweijie5-at-hikvision.com). 117 | 118 | -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/assigner/max_iou_assigner_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import torch 4 | 5 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS 6 | from mmdet.core.bbox.assigners import MaxIoUAssigner 7 | from mmdet_extension.core.bbox.assigner.assign_result_lm import AssignResultLM 8 | 9 | 10 | @BBOX_ASSIGNERS.register_module() 11 | class MaxIoUAssignerLM(MaxIoUAssigner): 12 | def assign_wrt_overlaps(self, overlaps, gt_labels=None): 13 | """Assign w.r.t. the overlaps of bboxes with gts. 14 | 15 | Args: 16 | overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes, 17 | shape(k, n). 18 | gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). 19 | 20 | Returns: 21 | :obj:`AssignResult`: The assign result. 22 | """ 23 | num_gts, num_bboxes = overlaps.size(0), overlaps.size(1) 24 | 25 | # 1. assign -1 by default 26 | assigned_gt_inds = overlaps.new_full((num_bboxes,), 27 | -1, 28 | dtype=torch.long) 29 | 30 | if num_gts == 0 or num_bboxes == 0: 31 | # No ground truth or boxes, return empty assignment 32 | max_overlaps = overlaps.new_zeros((num_bboxes,)) 33 | if num_gts == 0: 34 | # No truth, assign everything to background 35 | assigned_gt_inds[:] = 0 36 | if gt_labels is None: 37 | assigned_labels = None 38 | else: 39 | assigned_labels = overlaps.new_full((num_bboxes,), 40 | -1, 41 | dtype=torch.long) 42 | return AssignResultLM( 43 | num_gts, 44 | assigned_gt_inds, 45 | max_overlaps, 46 | labels=assigned_labels) 47 | 48 | # for each anchor, which gt best overlaps with it 49 | # for each anchor, the max iou of all gts 50 | max_overlaps, argmax_overlaps = overlaps.max(dim=0) 51 | # for each gt, which anchor best overlaps with it 52 | # for each gt, the max iou of all proposals 53 | gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1) 54 | 55 | # 2. assign negative: below 56 | # the negative inds are set to be 0 57 | if isinstance(self.neg_iou_thr, float): 58 | assigned_gt_inds[(max_overlaps >= 0) 59 | & (max_overlaps < self.neg_iou_thr)] = 0 60 | elif isinstance(self.neg_iou_thr, tuple): 61 | assert len(self.neg_iou_thr) == 2 62 | assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0]) 63 | & (max_overlaps < self.neg_iou_thr[1])] = 0 64 | 65 | # 3. assign positive: above positive IoU threshold 66 | pos_inds = max_overlaps >= self.pos_iou_thr 67 | assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 68 | 69 | if self.match_low_quality: 70 | # Low-quality matching will overwirte the assigned_gt_inds assigned 71 | # in Step 3. Thus, the assigned gt might not be the best one for 72 | # prediction. 73 | # For example, if bbox A has 0.9 and 0.8 iou with GT bbox 1 & 2, 74 | # bbox 1 will be assigned as the best target for bbox A in step 3. 75 | # However, if GT bbox 2's gt_argmax_overlaps = A, bbox A's 76 | # assigned_gt_inds will be overwritten to be bbox B. 77 | # This might be the reason that it is not used in ROI Heads. 78 | for i in range(num_gts): 79 | if gt_max_overlaps[i] >= self.min_pos_iou: 80 | if self.gt_max_assign_all: 81 | max_iou_inds = overlaps[i, :] == gt_max_overlaps[i] 82 | assigned_gt_inds[max_iou_inds] = i + 1 83 | else: 84 | assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 85 | 86 | if gt_labels is not None: 87 | assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1) 88 | pos_inds = torch.nonzero( 89 | assigned_gt_inds > 0, as_tuple=False).squeeze() 90 | if pos_inds.numel() > 0: 91 | assigned_labels[pos_inds] = gt_labels[ 92 | assigned_gt_inds[pos_inds] - 1] 93 | else: 94 | assigned_labels = None 95 | 96 | return AssignResultLM( 97 | num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) 98 | -------------------------------------------------------------------------------- /mmdet_extension/core/runner/semi_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | Support save ema model 5 | """ 6 | import os.path as osp 7 | import platform 8 | import shutil 9 | 10 | import mmcv 11 | from mmcv.runner.builder import RUNNERS 12 | from mmcv.runner.checkpoint import save_checkpoint 13 | 14 | from mmcv.parallel import is_module_wrapper 15 | from mmcv.runner import IterBasedRunner, EpochBasedRunner 16 | 17 | 18 | @RUNNERS.register_module() 19 | class SemiIterBasedRunner(IterBasedRunner): 20 | def train(self, data_loader, **kwargs): 21 | self.model.train() 22 | self.mode = 'train' 23 | self.data_loader = data_loader 24 | self._epoch = data_loader.epoch 25 | self.call_hook('before_train_iter') 26 | data_batch = next(data_loader) 27 | outputs = self.model.train_step(data_batch, self.optimizer, **kwargs) 28 | if not isinstance(outputs, dict): 29 | raise TypeError('model.train_step() must return a dict') 30 | if 'log_vars' in outputs: 31 | self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) 32 | self.outputs = outputs 33 | self.call_hook('after_train_iter') 34 | self._inner_iter += 1 35 | self._iter += 1 36 | 37 | def save_checkpoint(self, 38 | out_dir, 39 | filename_tmpl='iter_{}.pth', 40 | meta=None, 41 | save_optimizer=True, 42 | create_symlink=True): 43 | if meta is None: 44 | meta = dict(iter=self.iter + 1, epoch=self.epoch + 1) 45 | elif isinstance(meta, dict): 46 | meta.update(iter=self.iter + 1, epoch=self.epoch + 1) 47 | else: 48 | raise TypeError( 49 | f'meta should be a dict or None, but got {type(meta)}') 50 | if self.meta is not None: 51 | meta.update(self.meta) 52 | 53 | filename = filename_tmpl.format(self.iter + 1) 54 | filepath = osp.join(out_dir, filename) 55 | optimizer = self.optimizer if save_optimizer else None 56 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 57 | filepath_ema = filepath[:-4] + '_ema.pth' 58 | if is_module_wrapper(self.model): 59 | use_ema = hasattr(self.model.module, 'ema_model') and self.model.module.ema_model is not None 60 | if use_ema: 61 | save_checkpoint(self.model.module.ema_model, filepath_ema, optimizer=optimizer, meta=meta) 62 | else: 63 | use_ema = hasattr(self.model, 'ema_model') and self.model.ema_model is not None 64 | if use_ema: 65 | save_checkpoint(self.model.ema_model, filepath_ema, optimizer=optimizer, meta=meta) 66 | # in some environments, `os.symlink` is not supported, you may need to 67 | # set `create_symlink` to False 68 | if create_symlink: 69 | dst_file = osp.join(out_dir, 'latest.pth') 70 | if platform.system() != 'Windows': 71 | mmcv.symlink(filename, dst_file) 72 | else: 73 | shutil.copy(filepath, dst_file) 74 | 75 | 76 | @RUNNERS.register_module() 77 | class SemiEpochBasedRunner(EpochBasedRunner): 78 | def save_checkpoint(self, 79 | out_dir, 80 | filename_tmpl='iter_{}.pth', 81 | meta=None, 82 | save_optimizer=True, 83 | create_symlink=True): 84 | if meta is None: 85 | meta = dict(iter=self.iter + 1, epoch=self.epoch + 1) 86 | elif isinstance(meta, dict): 87 | meta.update(iter=self.iter + 1, epoch=self.epoch + 1) 88 | else: 89 | raise TypeError( 90 | f'meta should be a dict or None, but got {type(meta)}') 91 | if self.meta is not None: 92 | meta.update(self.meta) 93 | 94 | filename = filename_tmpl.format(self.iter + 1) 95 | filepath = osp.join(out_dir, filename) 96 | optimizer = self.optimizer if save_optimizer else None 97 | save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta) 98 | filepath_ema = filepath[:-4] + '_ema.pth' 99 | if is_module_wrapper(self.model): 100 | use_ema = hasattr(self.model.module, 'ema_model') and self.model.module.ema_model is not None 101 | if use_ema: 102 | save_checkpoint(self.model.module.ema_model, filepath_ema, optimizer=optimizer, meta=meta) 103 | else: 104 | use_ema = hasattr(self.model, 'ema_model') and self.model.ema_model is not None 105 | if use_ema: 106 | save_checkpoint(self.model.ema_model, filepath_ema, optimizer=optimizer, meta=meta) 107 | # in some environments, `os.symlink` is not supported, you may need to 108 | # set `create_symlink` to False 109 | if create_symlink: 110 | dst_file = osp.join(out_dir, 'latest.pth') 111 | if platform.system() != 'Windows': 112 | mmcv.symlink(filename, dst_file) 113 | else: 114 | shutil.copy(filepath, dst_file) 115 | -------------------------------------------------------------------------------- /mmdet_extension/core/hooks/semi_eval_hooks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | eval hook for semi-supervised: 5 | 1. without ema: same as normal evaluation 6 | 2. with ema: 1) only_ema=True: only do evaluation on ema_model 7 | 2) only_ema=False: do evaluation on model and ema_model 8 | """ 9 | import os.path as osp 10 | import torch.distributed as dist 11 | from torch.nn.modules.batchnorm import _BatchNorm 12 | 13 | from mmcv.parallel import is_module_wrapper 14 | from mmdet.core.evaluation import EvalHook 15 | 16 | 17 | class SemiEvalHook(EvalHook): 18 | def __init__(self, 19 | dataloader, 20 | start=None, 21 | interval=1, 22 | by_epoch=True, 23 | save_best=None, 24 | rule=None, 25 | only_ema=False, 26 | **eval_kwargs 27 | ): 28 | super().__init__(dataloader, start, interval, by_epoch, save_best, rule, 29 | **eval_kwargs) 30 | self.only_ema = only_ema 31 | 32 | def evaluation_once(self, runner): 33 | from mmdet_extension.apis import single_gpu_test 34 | if is_module_wrapper(runner.model): 35 | has_ema = hasattr(runner.model.module, 'ema_model') and runner.model.module.ema_model is not None 36 | else: 37 | has_ema = hasattr(runner.model, 'ema_model') and runner.model.ema_model is not None 38 | if (not has_ema) or (not self.only_ema): 39 | results = single_gpu_test(runner.model, self.dataloader, show=False) 40 | key_score = self.evaluate(runner, results) 41 | if has_ema: 42 | if is_module_wrapper(runner.model): 43 | results_ema = single_gpu_test(runner.model.module.ema_model, self.dataloader, show=False) 44 | else: 45 | results_ema = single_gpu_test(runner.model.ema_model, self.dataloader, show=False) 46 | key_score = self.evaluate(runner, results_ema) 47 | if self.save_best: 48 | self.save_best_checkpoint(runner, key_score) 49 | 50 | def after_train_epoch(self, runner): 51 | if not self.by_epoch or not self.evaluation_flag(runner): 52 | return 53 | self.evaluation_once(runner) 54 | 55 | def after_train_iter(self, runner): 56 | if self.by_epoch or not self.every_n_iters(runner, self.interval): 57 | return 58 | self.evaluation_once(runner) 59 | 60 | 61 | class SemiDistEvalHook(SemiEvalHook): 62 | def __init__(self, 63 | dataloader, 64 | start=None, 65 | interval=1, 66 | by_epoch=True, 67 | tmpdir=None, 68 | gpu_collect=False, 69 | save_best=None, 70 | rule=None, 71 | only_ema=False, 72 | broadcast_bn_buffer=True, 73 | **eval_kwargs 74 | ): 75 | super().__init__(dataloader, start=start, interval=interval, by_epoch=by_epoch, 76 | save_best=save_best, rule=rule, only_ema=only_ema, **eval_kwargs) 77 | self.broadcast_bn_buffer = broadcast_bn_buffer 78 | self.tmpdir = tmpdir 79 | self.gpu_collect = gpu_collect 80 | 81 | def _broadcast_bn_buffer(self, runner, has_ema): 82 | if self.broadcast_bn_buffer: 83 | model = runner.model 84 | for name, module in model.named_modules(): 85 | if isinstance(module, _BatchNorm) and module.track_running_stats: 86 | dist.broadcast(module.running_var, 0) 87 | dist.broadcast(module.running_mean, 0) 88 | if has_ema: 89 | for name, module in model.module.ema_model.named_modules(): 90 | if isinstance(module, _BatchNorm) and module.track_running_stats: 91 | dist.broadcast(module.running_var, 0) 92 | dist.broadcast(module.running_mean, 0) 93 | 94 | def evaluation_once(self, runner): 95 | has_ema = hasattr(runner.model.module, 'ema_model') and runner.model.module.ema_model is not None 96 | if self.broadcast_bn_buffer: 97 | self._broadcast_bn_buffer(runner, has_ema) 98 | tmpdir = self.tmpdir 99 | if tmpdir is None: 100 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 101 | from mmdet_extension.apis import multi_gpu_test 102 | if (not has_ema) or (not self.only_ema): 103 | results = multi_gpu_test( 104 | runner.model, 105 | self.dataloader, 106 | tmpdir=tmpdir, 107 | gpu_collect=self.gpu_collect) 108 | if runner.rank == 0: 109 | print('\n') 110 | key_score = self.evaluate(runner, results) 111 | dist.barrier() 112 | if has_ema: 113 | results_ema = multi_gpu_test( 114 | runner.model.module.ema_model, self.dataloader, 115 | tmpdir=tmpdir, gpu_collect=self.gpu_collect) 116 | if runner.rank == 0: 117 | print('\n') 118 | key_score = self.evaluate(runner, results_ema) 119 | if runner.rank == 0 and self.save_best: 120 | self.save_best_checkpoint(runner, key_score) 121 | -------------------------------------------------------------------------------- /configs/baseline/ema_config/baseline_standard.py: -------------------------------------------------------------------------------- 1 | samples_per_gpu = 4 2 | 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | 6 | test_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict( 9 | type='MultiScaleFlipAug', 10 | img_scale=(1333, 800), 11 | flip=False, 12 | transforms=[ 13 | dict(type='Resize', keep_ratio=True), 14 | dict(type='RandomFlip'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='Pad', size_divisor=32), 17 | dict(type='ImageToTensor', keys=['img']), 18 | dict(type='Collect', keys=['img']), 19 | ]) 20 | ] 21 | 22 | dataset_type = 'CocoDataset' 23 | data_root = './dataset/coco/' 24 | data = dict( 25 | samples_per_gpu=samples_per_gpu, 26 | workers_per_gpu=4, 27 | val=dict( 28 | type=dataset_type, 29 | ann_file=data_root + 'annotations/instances_val2017.json', 30 | img_prefix=data_root + 'val2017/', 31 | pipeline=test_pipeline), 32 | test=dict( 33 | type=dataset_type, 34 | ann_file=data_root + 'annotations/instances_val2017.json', 35 | img_prefix=data_root + 'val2017/', 36 | pipeline=test_pipeline)) 37 | evaluation = dict(interval=1, metric='bbox', classwise=True) 38 | 39 | dist_params = dict(backend='nccl') 40 | log_level = 'INFO' 41 | 42 | model = dict( 43 | type='FasterRCNN', 44 | pretrained='./pretrained_model/backbone/resnet50-19c8e357.pth', 45 | backbone=dict( 46 | type='ResNet', 47 | depth=50, 48 | num_stages=4, 49 | out_indices=(0, 1, 2, 3), 50 | frozen_stages=1, 51 | norm_cfg=dict(type='BN', requires_grad=True), 52 | norm_eval=True, 53 | style='pytorch'), 54 | neck=dict( 55 | type='FPN', 56 | in_channels=[256, 512, 1024, 2048], 57 | out_channels=256, 58 | num_outs=5), 59 | rpn_head=dict( 60 | type='RPNHead', 61 | in_channels=256, 62 | feat_channels=256, 63 | anchor_generator=dict( 64 | type='AnchorGenerator', 65 | scales=[8], 66 | ratios=[0.5, 1.0, 2.0], 67 | strides=[4, 8, 16, 32, 64]), 68 | bbox_coder=dict( 69 | type='DeltaXYWHBBoxCoder', 70 | target_means=[.0, .0, .0, .0], 71 | target_stds=[1.0, 1.0, 1.0, 1.0]), 72 | loss_cls=dict( 73 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 74 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 75 | roi_head=dict( 76 | type='StandardRoIHeadBase', 77 | bbox_roi_extractor=dict( 78 | type='SingleRoIExtractor', 79 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 80 | out_channels=256, 81 | featmap_strides=[4, 8, 16, 32]), 82 | bbox_head=dict( 83 | type='Shared2FCBBoxHead', 84 | in_channels=256, 85 | fc_out_channels=1024, 86 | roi_feat_size=7, 87 | num_classes=80, 88 | bbox_coder=dict( 89 | type='DeltaXYWHBBoxCoder', 90 | target_means=[0., 0., 0., 0.], 91 | target_stds=[0.1, 0.1, 0.2, 0.2]), 92 | reg_class_agnostic=True, 93 | loss_cls=dict( 94 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 95 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 96 | # model training and testing settings 97 | train_cfg=dict( 98 | rpn=dict( 99 | assigner=dict( 100 | type='MaxIoUAssigner', 101 | pos_iou_thr=0.7, 102 | neg_iou_thr=0.3, 103 | min_pos_iou=0.3, 104 | match_low_quality=True, 105 | ignore_iof_thr=-1), 106 | sampler=dict( 107 | type='RandomSampler', 108 | num=256, 109 | pos_fraction=0.5, 110 | neg_pos_ub=-1, 111 | add_gt_as_proposals=False), 112 | allowed_border=-1, 113 | pos_weight=-1, 114 | debug=False), 115 | rpn_proposal=dict( 116 | nms_pre=2000, 117 | max_per_img=1000, 118 | nms=dict(type='nms', iou_threshold=0.7), 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | assigner=dict( 122 | type='MaxIoUAssigner', 123 | pos_iou_thr=0.5, 124 | neg_iou_thr=0.5, 125 | min_pos_iou=0.5, 126 | match_low_quality=False, 127 | ignore_iof_thr=-1), 128 | sampler=dict( 129 | type='RandomSampler', 130 | num=512, 131 | pos_fraction=0.25, 132 | neg_pos_ub=-1, 133 | add_gt_as_proposals=True), 134 | pos_weight=-1, 135 | debug=False)), 136 | test_cfg=dict( 137 | rpn=dict( 138 | nms_pre=1000, 139 | max_per_img=1000, 140 | nms=dict(type='nms', iou_threshold=0.7), 141 | min_bbox_size=0), 142 | rcnn=dict( 143 | score_thr=0.001, 144 | nms=dict(type='nms', iou_threshold=0.5), 145 | max_per_img=100) 146 | # soft-nms is also supported for rcnn testing 147 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 148 | )) 149 | -------------------------------------------------------------------------------- /mmdet_extension/datasets/txt_style.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import random 4 | import numpy as np 5 | import mmcv 6 | from mmdet.datasets.custom import CustomDataset 7 | from mmdet.datasets.builder import DATASETS 8 | from mmdet.utils import get_root_logger 9 | 10 | from mmdet_extension.core.utils.classes import COCO_CLASSES 11 | 12 | 13 | @DATASETS.register_module() 14 | class TXTDataset(CustomDataset): 15 | """support text format dataset 16 | each line: name h w bbox_num x1 y1 x2 y2 cls ignore ... 17 | """ 18 | CLASSES = COCO_CLASSES 19 | 20 | def __init__(self, 21 | ann_file, 22 | pipeline, 23 | classes=None, 24 | data_root=None, 25 | img_prefix='', 26 | seg_prefix=None, 27 | proposal_file=None, 28 | test_mode=False, 29 | filter_empty_gt=False, 30 | flag_value=0, 31 | ignore_label=-2, 32 | with_box_id=False, 33 | manual_length=None 34 | ): 35 | self.flag_value = flag_value 36 | self.ignore_label = ignore_label # ignore = ignore_label + -1*cls 37 | self.with_box_id = with_box_id # whether return with box_id (used in instance consistency) 38 | super().__init__(ann_file, pipeline, classes, data_root, img_prefix, seg_prefix, proposal_file, 39 | test_mode, filter_empty_gt) 40 | self.length = min(manual_length, len(self.data_infos)) if manual_length else len(self.data_infos) 41 | 42 | # override-to avoid error, filter empty image 43 | def _filter_imgs(self, min_size=32): 44 | if self.filter_empty_gt: 45 | valid_inds = [] 46 | for i, line in enumerate(self.data_infos): 47 | num = int(line.decode().split(' ')[3]) 48 | if num > 0: 49 | valid_inds.append(i) 50 | else: 51 | valid_inds = list(range(len(self.data_infos))) 52 | return valid_inds 53 | 54 | # override: not use flag (this may add in the future) 55 | def _set_group_flag(self): 56 | self.flag = np.ones(len(self), dtype=np.uint8) * self.flag_value 57 | 58 | def load_annotations(self, ann_file): 59 | logger = get_root_logger() 60 | timer = mmcv.Timer() 61 | ann_list = mmcv.list_from_file(ann_file) 62 | data_infos = [] 63 | for ann_line in ann_list: 64 | data_infos.append(ann_line.encode()) 65 | logger.info(f'Loading {len(data_infos)} images, cost {timer.since_start()}') 66 | return data_infos 67 | 68 | def _parse_str_info(self, str_data): 69 | line_info = str_data.split() 70 | height, width = int(line_info[1]), int(line_info[2]) 71 | bbox_number = int(line_info[3]) 72 | bboxes, labels, box_ids = [], [], [] 73 | for idx in range(bbox_number): 74 | bbox = [float(ann) for ann in line_info[4 + 6 * idx:8 + 6 * idx]] 75 | if (bbox[3] - bbox[1] < 2.0) or (bbox[2] - bbox[0] < 2.0): 76 | continue 77 | if int(line_info[8 + 6 * idx]) > len(self.CLASSES): # set other cls to background 78 | continue 79 | bboxes.append(bbox) 80 | label, ignore = int(line_info[8 + 6 * idx]) - 1, int(line_info[9 + 6 * idx]) 81 | labels.append(self.ignore_label + label * -1 if ignore else label) 82 | box_ids.append(idx) 83 | bboxes = np.array(bboxes).astype(np.float32) if len(bboxes) != 0 else np.empty((0, 4), dtype=np.float32) 84 | labels = np.array(labels).astype(np.int64) if len(labels) != 0 else np.empty((0,), dtype=np.int64) 85 | box_ids = np.array(box_ids).astype(np.int64) if len(box_ids) != 0 else np.empty((0,), dtype=np.int64) 86 | img_info = dict( 87 | filename=line_info[0].replace('$SPACE', ' '), 88 | width=width, 89 | height=height, 90 | ) 91 | ann_info = dict( 92 | bboxes=bboxes, 93 | # it's inconvenience to move box_id as another ann_info (need to re-write pipeline), 94 | # so we use this format 95 | labels=np.c_[labels, box_ids] if self.with_box_id else labels, 96 | ) 97 | return img_info, ann_info 98 | 99 | def _parse_data_info(self, idx): 100 | line_info = self.data_infos[idx].decode() 101 | return self._parse_str_info(line_info) 102 | 103 | def get_ann_info(self, idx): 104 | _, ann_info = self._parse_data_info(idx) 105 | return ann_info 106 | 107 | def prepare_train_img(self, idx): 108 | img_info, ann_info = self._parse_data_info(idx) 109 | results = dict(img_info=img_info, ann_info=ann_info) 110 | if self.proposals is not None: 111 | results['proposals'] = self.proposals[idx] 112 | self.pre_pipeline(results) 113 | results = self.pipeline(results) 114 | return results 115 | 116 | def prepare_test_img(self, idx): 117 | img_info, _ = self._parse_data_info(idx) 118 | results = dict(img_info=img_info) 119 | if self.proposals is not None: 120 | results['proposals'] = self.proposals[idx] 121 | self.pre_pipeline(results) 122 | return self.pipeline(results) 123 | 124 | def __len__(self): 125 | return self.length if hasattr(self, 'length') else len(self.data_infos) 126 | 127 | def shuffle_data_info(self): 128 | random.shuffle(self.data_infos) 129 | -------------------------------------------------------------------------------- /configs/baseline/ema_config/baseline_voc.py: -------------------------------------------------------------------------------- 1 | classes = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 2 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 3 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') 4 | samples_per_gpu = 4 5 | 6 | img_norm_cfg = dict( 7 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 8 | 9 | test_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict( 12 | type='MultiScaleFlipAug', 13 | img_scale=(1333, 800), 14 | flip=False, 15 | transforms=[ 16 | dict(type='Resize', keep_ratio=True), 17 | dict(type='RandomFlip'), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='ImageToTensor', keys=['img']), 21 | dict(type='Collect', keys=['img']), 22 | ]) 23 | ] 24 | 25 | dataset_type = 'CocoDataset' 26 | data_root = './dataset/voc/' 27 | data = dict( 28 | samples_per_gpu=samples_per_gpu, 29 | workers_per_gpu=4, 30 | val=dict( 31 | type=dataset_type, 32 | ann_file=data_root + f'annotations_json/voc07_test.json', 33 | img_prefix=data_root, 34 | classes=classes, 35 | pipeline=test_pipeline), 36 | test=dict( 37 | type=dataset_type, 38 | ann_file=data_root + f'annotations_json/voc07_test.json', 39 | img_prefix=data_root, 40 | classes=classes, 41 | pipeline=test_pipeline)) 42 | evaluation = dict(interval=1, metric='bbox', classwise=True) 43 | 44 | dist_params = dict(backend='nccl') 45 | log_level = 'INFO' 46 | 47 | model = dict( 48 | type='FasterRCNN', 49 | pretrained='./pretrained_model/backbone/resnet50-19c8e357.pth', 50 | backbone=dict( 51 | type='ResNet', 52 | depth=50, 53 | num_stages=4, 54 | out_indices=(0, 1, 2, 3), 55 | frozen_stages=1, 56 | norm_cfg=dict(type='BN', requires_grad=True), 57 | norm_eval=True, 58 | style='pytorch'), 59 | neck=dict( 60 | type='FPN', 61 | in_channels=[256, 512, 1024, 2048], 62 | out_channels=256, 63 | num_outs=5), 64 | rpn_head=dict( 65 | type='RPNHead', 66 | in_channels=256, 67 | feat_channels=256, 68 | anchor_generator=dict( 69 | type='AnchorGenerator', 70 | scales=[8], 71 | ratios=[0.5, 1.0, 2.0], 72 | strides=[4, 8, 16, 32, 64]), 73 | bbox_coder=dict( 74 | type='DeltaXYWHBBoxCoder', 75 | target_means=[.0, .0, .0, .0], 76 | target_stds=[1.0, 1.0, 1.0, 1.0]), 77 | loss_cls=dict( 78 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 79 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 80 | roi_head=dict( 81 | type='StandardRoIHeadBase', 82 | bbox_roi_extractor=dict( 83 | type='SingleRoIExtractor', 84 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 85 | out_channels=256, 86 | featmap_strides=[4, 8, 16, 32]), 87 | bbox_head=dict( 88 | type='Shared2FCBBoxHead', 89 | in_channels=256, 90 | fc_out_channels=1024, 91 | roi_feat_size=7, 92 | num_classes=len(classes), 93 | bbox_coder=dict( 94 | type='DeltaXYWHBBoxCoder', 95 | target_means=[0., 0., 0., 0.], 96 | target_stds=[0.1, 0.1, 0.2, 0.2]), 97 | reg_class_agnostic=True, 98 | loss_cls=dict( 99 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 100 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 101 | # model training and testing settings 102 | train_cfg=dict( 103 | rpn=dict( 104 | assigner=dict( 105 | type='MaxIoUAssigner', 106 | pos_iou_thr=0.7, 107 | neg_iou_thr=0.3, 108 | min_pos_iou=0.3, 109 | match_low_quality=True, 110 | ignore_iof_thr=-1), 111 | sampler=dict( 112 | type='RandomSampler', 113 | num=256, 114 | pos_fraction=0.5, 115 | neg_pos_ub=-1, 116 | add_gt_as_proposals=False), 117 | allowed_border=-1, 118 | pos_weight=-1, 119 | debug=False), 120 | rpn_proposal=dict( 121 | nms_pre=2000, 122 | max_per_img=1000, 123 | nms=dict(type='nms', iou_threshold=0.7), 124 | min_bbox_size=0), 125 | rcnn=dict( 126 | assigner=dict( 127 | type='MaxIoUAssigner', 128 | pos_iou_thr=0.5, 129 | neg_iou_thr=0.5, 130 | min_pos_iou=0.5, 131 | match_low_quality=False, 132 | ignore_iof_thr=-1), 133 | sampler=dict( 134 | type='RandomSampler', 135 | num=512, 136 | pos_fraction=0.25, 137 | neg_pos_ub=-1, 138 | add_gt_as_proposals=True), 139 | pos_weight=-1, 140 | debug=False)), 141 | test_cfg=dict( 142 | rpn=dict( 143 | nms_pre=1000, 144 | max_per_img=1000, 145 | nms=dict(type='nms', iou_threshold=0.7), 146 | min_bbox_size=0), 147 | rcnn=dict( 148 | score_thr=0.001, 149 | nms=dict(type='nms', iou_threshold=0.5), 150 | max_per_img=100) 151 | # soft-nms is also supported for rcnn testing 152 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 153 | )) 154 | -------------------------------------------------------------------------------- /mmdet_extension/apis/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | modify from mmdet.apis.test: use PrintBar rather than ProgressBar (for AI platform) 5 | """ 6 | import os.path as osp 7 | import time 8 | from shutil import get_terminal_size 9 | import torch 10 | 11 | import mmcv 12 | from mmcv import Timer 13 | from mmcv.image import tensor2imgs 14 | from mmcv.runner import get_dist_info 15 | 16 | from mmdet.apis.test import collect_results_cpu, collect_results_gpu 17 | from mmdet.core import encode_mask_results 18 | 19 | 20 | class PrintBar(object): 21 | def __init__(self, task_num, bar_width=50): 22 | self.task_num = task_num 23 | self.completed = 0 24 | self.bar_width = bar_width 25 | self.print_len = max(1, task_num // 3) 26 | self.start() 27 | 28 | @property 29 | def terminal_width(self): 30 | width, _ = get_terminal_size() 31 | return width 32 | 33 | def start(self): 34 | if self.task_num > 0: 35 | print(f'[{" " * self.bar_width}] 0/{self.task_num}, elapsed: 0s, ETA:') 36 | else: 37 | print('completed: 0, elapsed: 0s') 38 | self.timer = Timer() 39 | 40 | def update(self, num_tasks=1): 41 | self.completed += num_tasks 42 | elapsed = self.timer.since_start() 43 | if elapsed > 0: 44 | fps = self.completed / elapsed 45 | else: 46 | fps = float('inf') 47 | if self.completed % self.print_len == 0: 48 | percentage = self.completed / float(self.task_num) 49 | eta = int(elapsed * (1 - percentage) / percentage + 0.5) 50 | msg = f'\r[{{}}] {self.completed}/{self.task_num}, ' \ 51 | f'{fps:.1f} task/s, elapsed: {int(elapsed + 0.5)}s, ' \ 52 | f'ETA: {eta:5}s' 53 | bar_width = min(self.bar_width, 54 | int(self.terminal_width - len(msg)) + 2, 55 | int(self.terminal_width * 0.6)) 56 | bar_width = max(2, bar_width) 57 | mark_width = int(bar_width * percentage) 58 | bar_chars = '>' * mark_width + ' ' * (bar_width - mark_width) 59 | print(msg.format(bar_chars)) 60 | 61 | 62 | def single_gpu_test(model, 63 | data_loader, 64 | show=False, 65 | out_dir=None, 66 | show_score_thr=0.3): 67 | model.eval() 68 | results = [] 69 | dataset = data_loader.dataset 70 | prog_bar = PrintBar(len(dataset)) 71 | for i, data in enumerate(data_loader): 72 | with torch.no_grad(): 73 | result = model(return_loss=False, rescale=True, **data) 74 | 75 | batch_size = len(result) 76 | if show or out_dir: 77 | if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): 78 | img_tensor = data['img'][0] 79 | else: 80 | img_tensor = data['img'][0].data[0] 81 | img_metas = data['img_metas'][0].data[0] 82 | imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) 83 | assert len(imgs) == len(img_metas) 84 | 85 | for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): 86 | h, w, _ = img_meta['img_shape'] 87 | img_show = img[:h, :w, :] 88 | 89 | ori_h, ori_w = img_meta['ori_shape'][:-1] 90 | img_show = mmcv.imresize(img_show, (ori_w, ori_h)) 91 | 92 | if out_dir: 93 | out_file = osp.join(out_dir, img_meta['ori_filename']) 94 | else: 95 | out_file = None 96 | 97 | model.module.show_result( 98 | img_show, 99 | result[i], 100 | show=show, 101 | out_file=out_file, 102 | score_thr=show_score_thr) 103 | 104 | # encode mask results 105 | if isinstance(result[0], tuple): 106 | result = [(bbox_results, encode_mask_results(mask_results)) 107 | for bbox_results, mask_results in result] 108 | results.extend(result) 109 | prog_bar.update(batch_size) 110 | return results 111 | 112 | 113 | def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): 114 | """Test model with multiple gpus. 115 | 116 | This method tests model with multiple gpus and collects the results 117 | under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' 118 | it encodes results to gpu tensors and use gpu communication for results 119 | collection. On cpu mode it saves the results on different gpus to 'tmpdir' 120 | and collects them by the rank 0 worker. 121 | 122 | Args: 123 | model (nn.Module): Model to be tested. 124 | data_loader (nn.Dataloader): Pytorch data loader. 125 | tmpdir (str): Path of directory to save the temporary results from 126 | different gpus under cpu mode. 127 | gpu_collect (bool): Option to use either gpu or cpu to collect results. 128 | 129 | Returns: 130 | list: The prediction results. 131 | """ 132 | model.eval() 133 | results = [] 134 | dataset = data_loader.dataset 135 | rank, world_size = get_dist_info() 136 | if rank == 0: 137 | prog_bar = PrintBar(len(dataset)) 138 | time.sleep(2) # This line can prevent deadlock problem in some cases. 139 | for i, data in enumerate(data_loader): 140 | with torch.no_grad(): 141 | result = model(return_loss=False, rescale=True, **data) 142 | # encode mask results 143 | if isinstance(result[0], tuple): 144 | result = [(bbox_results, encode_mask_results(mask_results)) 145 | for bbox_results, mask_results in result] 146 | results.extend(result) 147 | 148 | if rank == 0: 149 | batch_size = len(result) 150 | prog_bar.update(batch_size * world_size) 151 | # collect results from all ranks 152 | if gpu_collect: 153 | results = collect_results_gpu(results, len(dataset)) 154 | else: 155 | results = collect_results_cpu(results, len(dataset), tmpdir) 156 | return results 157 | -------------------------------------------------------------------------------- /mmdet_extension/core/utils/image.py: -------------------------------------------------------------------------------- 1 | # Modified from https://github.com/open-mmlab/mmdetection 2 | """ 3 | support different colors for different classes 4 | """ 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from matplotlib.collections import PatchCollection 8 | from matplotlib.patches import Polygon 9 | import mmcv 10 | 11 | from mmdet_extension.core.utils.colormap import colormap 12 | 13 | EPS = 1e-2 14 | 15 | 16 | def imshow_det_bboxes(img, 17 | bboxes, 18 | labels, 19 | segms=None, 20 | class_names=None, 21 | score_thr=0, 22 | mask_color=None, 23 | thickness=2, 24 | font_size=13, 25 | win_name='', 26 | show=True, 27 | wait_time=0, 28 | out_file=None): 29 | """Draw bboxes and class labels (with scores) on an image. 30 | 31 | Args: 32 | img (str or ndarray): The image to be displayed. 33 | bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or 34 | (n, 5). 35 | labels (ndarray): Labels of bboxes. 36 | segms (ndarray or None): Masks, shaped (n,h,w) or None 37 | class_names (list[str]): Names of each classes. 38 | score_thr (float): Minimum score of bboxes to be shown. Default: 0 39 | bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines. 40 | The tuple of color should be in BGR order. Default: 'green' 41 | text_color (str or tuple(int) or :obj:`Color`):Color of texts. 42 | The tuple of color should be in BGR order. Default: 'green' 43 | mask_color (str or tuple(int) or :obj:`Color`, optional): 44 | Color of masks. The tuple of color should be in BGR order. 45 | Default: None 46 | thickness (int): Thickness of lines. Default: 2 47 | font_size (int): Font size of texts. Default: 13 48 | show (bool): Whether to show the image. Default: True 49 | win_name (str): The window name. Default: '' 50 | wait_time (float): Value of waitKey param. Default: 0. 51 | out_file (str, optional): The filename to write the image. 52 | Default: None 53 | 54 | Returns: 55 | ndarray: The image with bboxes drawn on it. 56 | """ 57 | assert bboxes.ndim == 2, \ 58 | f' bboxes ndim should be 2, but its ndim is {bboxes.ndim}.' 59 | assert labels.ndim == 1, \ 60 | f' labels ndim should be 1, but its ndim is {labels.ndim}.' 61 | assert bboxes.shape[0] == labels.shape[0], \ 62 | 'bboxes.shape[0] and labels.shape[0] should have the same length.' 63 | assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5, \ 64 | f' bboxes.shape[1] should be 4 or 5, but its {bboxes.shape[1]}.' 65 | img = mmcv.imread(img) 66 | 67 | if score_thr > 0: 68 | assert bboxes.shape[1] == 5 69 | scores = bboxes[:, -1] 70 | inds = scores > score_thr 71 | bboxes = bboxes[inds, :] 72 | labels = labels[inds] 73 | if segms is not None: 74 | segms = segms[inds, ...] 75 | 76 | color_list = colormap(maximum=1, num_classes=len(class_names)) 77 | 78 | mask_colors = [] 79 | if labels.shape[0] > 0: 80 | if mask_color is None: 81 | # random color 82 | np.random.seed(42) 83 | mask_colors = [ 84 | np.random.randint(0, 256, (1, 3), dtype=np.uint8) 85 | for _ in range(max(labels) + 1) 86 | ] 87 | else: 88 | # specify color 89 | mask_colors = [np.array(mmcv.color_val(mask_color)[::-1], dtype=np.uint8)] * (max(labels) + 1) 90 | 91 | img = mmcv.bgr2rgb(img) 92 | width, height = img.shape[1], img.shape[0] 93 | img = np.ascontiguousarray(img) 94 | 95 | fig = plt.figure(win_name, frameon=False) 96 | plt.title(win_name) 97 | canvas = fig.canvas 98 | dpi = fig.get_dpi() 99 | # add a small EPS to avoid precision lost due to matplotlib's truncation 100 | # (https://github.com/matplotlib/matplotlib/issues/15363) 101 | fig.set_size_inches((width + EPS) / dpi, (height + EPS) / dpi) 102 | 103 | # remove white edges by set subplot margin 104 | plt.subplots_adjust(left=0, right=1, bottom=0, top=1) 105 | ax = plt.gca() 106 | ax.axis('off') 107 | 108 | polygons = [] 109 | color = [] 110 | for i, (bbox, label) in enumerate(zip(bboxes, labels)): 111 | bbox_int = bbox.astype(np.int32) 112 | poly = [[bbox_int[0], bbox_int[1]], [bbox_int[0], bbox_int[3]], 113 | [bbox_int[2], bbox_int[3]], [bbox_int[2], bbox_int[1]]] 114 | np_poly = np.array(poly).reshape((4, 2)) 115 | polygons.append(Polygon(np_poly)) 116 | color.append(color_list[label]) 117 | label_text = class_names[ 118 | label] if class_names is not None else f'class {label}' 119 | if len(bbox) > 4: 120 | label_text += f'|{bbox[-1]:.02f}' 121 | ax.text( 122 | bbox_int[0], 123 | bbox_int[1], 124 | f'{label_text}', 125 | bbox={ 126 | 'facecolor': 'black', 127 | 'alpha': 0.8, 128 | 'pad': 0.7, 129 | 'edgecolor': 'none' 130 | }, 131 | color=color_list[label], 132 | # color=(1, 1, 1), 133 | fontsize=font_size, 134 | verticalalignment='top', 135 | horizontalalignment='left') 136 | if segms is not None: 137 | color_mask = mask_colors[labels[i]] 138 | mask = segms[i].astype(bool) 139 | img[mask] = img[mask] * 0.5 + color_mask * 0.5 140 | 141 | plt.imshow(img) 142 | 143 | p = PatchCollection( 144 | polygons, facecolor='none', edgecolors=color, linewidths=thickness) 145 | ax.add_collection(p) 146 | 147 | stream, _ = canvas.print_to_buffer() 148 | buffer = np.frombuffer(stream, dtype='uint8') 149 | img_rgba = buffer.reshape(height, width, 4) 150 | rgb, alpha = np.split(img_rgba, [3], axis=2) 151 | img = rgb.astype('uint8') 152 | img = mmcv.rgb2bgr(img) 153 | 154 | if show: 155 | # We do not use cv2 for display because in some cases, opencv will 156 | # conflict with Qt, it will output a warning: Current thread 157 | # is not the object's thread. You can refer to 158 | # https://github.com/opencv/opencv-python/issues/46 for details 159 | if wait_time == 0: 160 | plt.show() 161 | else: 162 | plt.show(block=False) 163 | plt.pause(wait_time) 164 | if out_file is not None: 165 | mmcv.imwrite(img, out_file) 166 | 167 | plt.close() 168 | 169 | return img 170 | -------------------------------------------------------------------------------- /mmdet_extension/models/detectors/unbiased_teacher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | Re-implementation: Unbiased teacher for semi-supervised object detection 5 | 6 | There are several differences with official implementation: 7 | 1. we only use the strong-augmentation version of labeled data rather than \ 8 | the strong-augmentation and weak-augmentation version of labeled data. 9 | """ 10 | import numpy as np 11 | import torch 12 | 13 | from mmcv.runner.dist_utils import get_dist_info 14 | 15 | from mmdet.utils import get_root_logger 16 | from mmdet.models.builder import DETECTORS 17 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps 18 | 19 | from mmdet_extension.models.detectors import SemiTwoStageDetector 20 | 21 | 22 | @DETECTORS.register_module() 23 | class UnbiasedTeacher(SemiTwoStageDetector): 24 | def __init__(self, 25 | backbone, 26 | rpn_head, 27 | roi_head, 28 | train_cfg, 29 | test_cfg, 30 | neck=None, 31 | pretrained=None, 32 | # ema model 33 | ema_config=None, 34 | ema_ckpt=None, 35 | # ut config 36 | cfg=dict(), 37 | ): 38 | super().__init__(backbone=backbone, rpn_head=rpn_head, roi_head=roi_head, train_cfg=train_cfg, 39 | test_cfg=test_cfg, neck=neck, pretrained=pretrained, 40 | ema_config=ema_config, ema_ckpt=ema_ckpt) 41 | self.debug = cfg.get('debug', False) 42 | self.num_classes = self.roi_head.bbox_head.num_classes 43 | self.cur_iter = 0 44 | 45 | # hyper-parameter 46 | self.score_thr = cfg.get('score_thr', 0.7) 47 | self.weight_u = cfg.get('weight_u', 2.0) 48 | self.use_bbox_reg = cfg.get('use_bbox_reg', False) 49 | self.momentum = cfg.get('momentum', 0.996) 50 | 51 | # analysis 52 | self.image_num = 0 53 | self.pseudo_num = np.zeros(self.num_classes) 54 | self.pseudo_num_tp = np.zeros(self.num_classes) 55 | self.pseudo_num_gt = np.zeros(self.num_classes) 56 | 57 | def forward_train_semi( 58 | self, img, img_metas, gt_bboxes, gt_labels, 59 | img_unlabeled, img_metas_unlabeled, gt_bboxes_unlabeled, gt_labels_unlabeled, 60 | img_unlabeled_1, img_metas_unlabeled_1, gt_bboxes_unlabeled_1, gt_labels_unlabeled_1, 61 | ): 62 | device = img.device 63 | self.image_num += len(img_metas_unlabeled) 64 | self.update_ema_model(self.momentum) 65 | self.cur_iter += 1 66 | self.analysis() 67 | # # ---------------------label data--------------------- 68 | losses = self.forward_train(img, img_metas, gt_bboxes, gt_labels) 69 | losses = self.parse_loss(losses) 70 | # # -------------------unlabeled data------------------- 71 | bbox_transform = [] 72 | for img_meta in img_metas_unlabeled_1: 73 | bbox_transform.append(img_meta.pop('bbox_transform')) 74 | bbox_results = self.inference_unlabeled( 75 | img_unlabeled, img_metas_unlabeled, rescale=True 76 | ) 77 | gt_bboxes_pred, gt_labels_pred = self.create_pseudo_results( 78 | img_unlabeled_1, bbox_results, bbox_transform, device, 79 | gt_bboxes_unlabeled, gt_labels_unlabeled, img_metas_unlabeled # for analysis 80 | ) 81 | if self.debug: 82 | self.visual_online(img_unlabeled_1, gt_bboxes_pred, gt_labels_pred) 83 | losses_unlabeled = self.forward_train(img_unlabeled_1, img_metas_unlabeled_1, 84 | gt_bboxes_pred, gt_labels_pred) 85 | losses_unlabeled = self.parse_loss(losses_unlabeled) 86 | for key, val in losses_unlabeled.items(): 87 | if key.find('loss') == -1: 88 | continue 89 | if key.find('bbox') != -1: 90 | losses_unlabeled[key] = self.weight_u * val if self.use_bbox_reg else 0 * val 91 | else: 92 | losses_unlabeled[key] = self.weight_u * val 93 | losses.update({f'{key}_unlabeled': val for key, val in losses_unlabeled.items()}) 94 | extra_info = { 95 | 'pseudo_num': torch.Tensor([self.pseudo_num.sum() / self.image_num]).to(device), 96 | 'pseudo_num(acc)': torch.Tensor([self.pseudo_num_tp.sum() / self.pseudo_num.sum()]).to(device) 97 | } 98 | losses.update(extra_info) 99 | return losses 100 | 101 | def create_pseudo_results(self, img, bbox_results, box_transform, device, 102 | gt_bboxes=None, gt_labels=None, img_metas=None): 103 | """using dynamic score to create pseudo results""" 104 | gt_bboxes_pred, gt_labels_pred = [], [] 105 | _, _, h, w = img.shape 106 | use_gt = gt_bboxes is not None 107 | for b, result in enumerate(bbox_results): 108 | bboxes, labels = [], [] 109 | if use_gt: 110 | gt_bbox, gt_label = gt_bboxes[b].cpu().numpy(), gt_labels[b].cpu().numpy() 111 | scale_factor = img_metas[b]['scale_factor'] 112 | gt_bbox_scale = gt_bbox / scale_factor 113 | for cls, r in enumerate(result): 114 | label = cls * np.ones_like(r[:, 0], dtype=np.uint8) 115 | flag = r[:, -1] >= self.score_thr 116 | bboxes.append(r[flag][:, :4]) 117 | labels.append(label[flag]) 118 | if use_gt and (gt_label == cls).sum() > 0 and len(bboxes[-1]) > 0: 119 | overlap = bbox_overlaps(bboxes[-1], gt_bbox_scale[gt_label == cls]) 120 | iou = overlap.max(-1) 121 | self.pseudo_num_tp[cls] += (iou > 0.5).sum() 122 | self.pseudo_num_gt[cls] += (gt_label == cls).sum() 123 | self.pseudo_num[cls] += len(bboxes[-1]) 124 | bboxes = np.concatenate(bboxes) 125 | labels = np.concatenate(labels) 126 | for bf in box_transform[b]: 127 | bboxes, labels = bf(bboxes, labels) 128 | gt_bboxes_pred.append(torch.from_numpy(bboxes).float().to(device)) 129 | gt_labels_pred.append(torch.from_numpy(labels).long().to(device)) 130 | return gt_bboxes_pred, gt_labels_pred 131 | 132 | def analysis(self): 133 | if self.cur_iter % 500 == 0 and get_dist_info()[0] == 0: 134 | logger = get_root_logger() 135 | info = ' '.join([f'{b / (a + 1e-10):.2f}({a}-{cls})' for cls, a, b 136 | in zip(self.CLASSES, self.pseudo_num, self.pseudo_num_tp)]) 137 | info_gt = ' '.join([f'{a}' for a in self.pseudo_num_gt]) 138 | logger.info(f'pseudo pos: {info}') 139 | logger.info(f'pseudo gt: {info_gt}') 140 | -------------------------------------------------------------------------------- /mmdet_extension/core/bbox/samplers/random_sampler_lm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | import torch 4 | from mmdet.core.bbox.builder import BBOX_SAMPLERS 5 | from mmdet.core.bbox.samplers import RandomSampler, SamplingResult 6 | from mmdet_extension.core.bbox.samplers.sampling_result_lm import SamplingResultLM 7 | 8 | 9 | @BBOX_SAMPLERS.register_module() 10 | class RandomSamplerLM(RandomSampler): 11 | def _sample_neg_ig(self, assign_result, assign_result_ig, num_expected, **kwargs): 12 | pos_inds, ig_inds = assign_result.gt_inds, assign_result_ig.gt_inds 13 | if len(pos_inds) != len(ig_inds): 14 | ig_inds = torch.cat([pos_inds.new_ones(len(pos_inds) - len(ig_inds)), ig_inds]) 15 | neg_inds = torch.nonzero((pos_inds == 0) & (ig_inds == 0), as_tuple=False) 16 | if neg_inds.numel() != 0: 17 | neg_inds = neg_inds.squeeze(1) 18 | if len(neg_inds) <= num_expected: 19 | return neg_inds 20 | else: 21 | return self.random_choice(neg_inds, num_expected) 22 | 23 | # NOTE: here the start index is not same as pos and neg 24 | def _sample_ignore(self, assign_result, assign_result_ig, num_expected, **kwargs): 25 | pos_inds, ig_inds = assign_result.gt_inds, assign_result_ig.gt_inds 26 | if len(pos_inds) != len(ig_inds): 27 | pos_inds = pos_inds[-len(ig_inds):] 28 | select_inds = torch.nonzero((pos_inds <= 0) & (ig_inds > 0), as_tuple=False) 29 | if select_inds.numel() != 0: 30 | select_inds = select_inds.squeeze(1) 31 | if len(select_inds) <= num_expected: 32 | return select_inds 33 | else: 34 | return self.random_choice(select_inds, num_expected) 35 | 36 | def sample_ig(self, 37 | assign_result, 38 | assign_result_ig, 39 | bboxes, 40 | gt_bboxes, 41 | gt_labels=None, 42 | **kwargs): 43 | if len(bboxes.shape) < 2: 44 | bboxes = bboxes[None, :] 45 | bboxes = bboxes[:, :4] 46 | gt_flags = bboxes.new_zeros((bboxes.shape[0],), dtype=torch.uint8) 47 | if self.add_gt_as_proposals and len(gt_bboxes) > 0: 48 | if gt_labels is None: 49 | raise ValueError( 50 | 'gt_labels must be given when add_gt_as_proposals is True') 51 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 52 | assign_result.add_gt_(gt_labels) 53 | assign_result_ig.add_gt_(gt_labels) 54 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 55 | gt_flags = torch.cat([gt_ones, gt_flags]) 56 | num_expected_pos = int(self.num * self.pos_fraction) 57 | pos_inds = self.pos_sampler._sample_pos( 58 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 59 | pos_inds = pos_inds.unique() 60 | num_sampled_pos = pos_inds.numel() 61 | num_expected_ig = num_expected_pos - num_sampled_pos 62 | ig_inds = self.pos_sampler._sample_ignore( 63 | assign_result, assign_result_ig, num_expected_ig, bboxes=bboxes, **kwargs 64 | ) 65 | ig_inds = ig_inds.unique() 66 | num_sampled_ig = ig_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos - num_sampled_ig 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg_ig( 74 | assign_result, assign_result_ig, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 77 | assign_result, gt_flags) 78 | if kwargs.get('with_ignore', False): 79 | return sampling_result, ig_inds 80 | else: 81 | return sampling_result 82 | 83 | def sample_pos_ig(self, 84 | assign_result, 85 | assign_result_ig, 86 | bboxes, 87 | gt_bboxes, 88 | gt_labels=None, 89 | gt_bboxes_ignore=None, 90 | gt_labels_ignore=None, 91 | **kwargs): 92 | if len(bboxes.shape) < 2: 93 | bboxes = bboxes[None, :] 94 | bboxes = bboxes[:, :4] 95 | gt_flags = bboxes.new_zeros((bboxes.shape[0],), dtype=torch.uint8) 96 | if self.add_gt_as_proposals and len(gt_bboxes) > 0: 97 | if gt_labels is None: 98 | raise ValueError('gt_labels must be given when add_gt_as_proposals is True') 99 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 100 | assign_result.add_gt_(gt_labels) 101 | assign_result_ig.add_ig_(gt_labels) 102 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 103 | gt_flags = torch.cat([gt_ones, gt_flags]) 104 | if self.add_gt_as_proposals and len(gt_bboxes_ignore) > 0: 105 | bboxes = torch.cat([gt_bboxes_ignore, bboxes], dim=0) 106 | assign_result.add_ig_(gt_labels_ignore) 107 | assign_result_ig.add_gt_(gt_labels_ignore) 108 | gt_ones = bboxes.new_ones(gt_bboxes_ignore.shape[0], dtype=torch.uint8) 109 | gt_flags = torch.cat([gt_ones, gt_flags]) 110 | num_expected_pos = int(self.num * self.pos_fraction) 111 | # sample pos 112 | pos_inds = self.pos_sampler._sample_pos( 113 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 114 | pos_inds = pos_inds.unique() 115 | num_sampled_pos = pos_inds.numel() 116 | # sample ignore 117 | num_expected_ig = num_expected_pos - num_sampled_pos 118 | ig_inds = self.pos_sampler._sample_pos( 119 | assign_result_ig, num_expected_ig, bboxes=bboxes, **kwargs) 120 | ig_inds = ig_inds.unique() 121 | num_sampled_ig = ig_inds.numel() 122 | # sample negative 123 | num_expected_neg = self.num - num_sampled_pos - num_sampled_ig 124 | if self.neg_pos_ub >= 0: 125 | _pos = max(1, num_sampled_pos) 126 | neg_upper_bound = int(self.neg_pos_ub * _pos) 127 | if num_expected_neg > neg_upper_bound: 128 | num_expected_neg = neg_upper_bound 129 | neg_inds = self.neg_sampler._sample_neg_ig( 130 | assign_result, assign_result_ig, num_expected_neg, bboxes=bboxes, **kwargs) 131 | neg_inds = neg_inds.unique() 132 | sampling_result = SamplingResultLM( 133 | pos_inds, ig_inds, neg_inds, bboxes, gt_bboxes, 134 | gt_bboxes_ignore, assign_result, assign_result_ig, gt_flags) 135 | return sampling_result 136 | -------------------------------------------------------------------------------- /mmdet_extension/apis/train.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | modify from mmdet.apis.train: 5 | 1. support ema model 6 | """ 7 | import warnings 8 | 9 | import torch 10 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 11 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, 12 | Fp16OptimizerHook, OptimizerHook, build_optimizer, 13 | build_runner) 14 | from mmcv.utils import build_from_cfg 15 | 16 | from mmdet.datasets import (build_dataloader, build_dataset, 17 | replace_ImageToTensor) 18 | from mmdet.utils import get_root_logger 19 | from mmdet_extension.core.hooks.semi_eval_hooks import SemiEvalHook, SemiDistEvalHook 20 | 21 | 22 | def train_detector(model, 23 | dataset, 24 | cfg, 25 | distributed=False, 26 | validate=False, 27 | timestamp=None, 28 | meta=None): 29 | logger = get_root_logger(cfg.log_level) 30 | 31 | # prepare data loaders 32 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] 33 | if 'imgs_per_gpu' in cfg.data: 34 | logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 35 | 'Please use "samples_per_gpu" instead') 36 | if 'samples_per_gpu' in cfg.data: 37 | logger.warning( 38 | f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' 39 | f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' 40 | f'={cfg.data.imgs_per_gpu} is used in this experiments') 41 | else: 42 | logger.warning( 43 | 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' 44 | f'{cfg.data.imgs_per_gpu} in this experiments') 45 | cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu 46 | 47 | data_loaders = [ 48 | build_dataloader( 49 | ds, 50 | cfg.data.samples_per_gpu, 51 | cfg.data.workers_per_gpu, 52 | # cfg.gpus will be ignored if distributed 53 | len(cfg.gpu_ids), 54 | dist=distributed, 55 | seed=cfg.seed) for ds in dataset 56 | ] 57 | 58 | use_ema = hasattr(model, 'ema_model') and model.ema_model is not None 59 | # put model on gpus 60 | if distributed: 61 | find_unused_parameters = cfg.get('find_unused_parameters', False) 62 | # Sets the `find_unused_parameters` parameter in 63 | # torch.nn.parallel.DistributedDataParallel 64 | model = MMDistributedDataParallel( 65 | model.cuda(), 66 | device_ids=[torch.cuda.current_device()], 67 | broadcast_buffers=False, 68 | find_unused_parameters=find_unused_parameters) 69 | if use_ema: 70 | model.module.ema_model = MMDistributedDataParallel( 71 | model.module.ema_model.cuda(), 72 | device_ids=[torch.cuda.current_device()], 73 | broadcast_buffers=False, 74 | find_unused_parameters=find_unused_parameters) 75 | else: 76 | model = MMDataParallel( 77 | model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) 78 | if use_ema: 79 | model.module.ema_model = MMDataParallel( 80 | model.module.ema_model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) 81 | 82 | # build runner 83 | optimizer = build_optimizer(model, cfg.optimizer) 84 | 85 | if 'runner' not in cfg: 86 | cfg.runner = { 87 | 'type': 'EpochBasedRunner', 88 | 'max_epochs': cfg.total_epochs 89 | } 90 | warnings.warn( 91 | 'config is now expected to have a `runner` section, ' 92 | 'please set `runner` in your config.', UserWarning) 93 | else: 94 | if 'total_epochs' in cfg: 95 | assert cfg.total_epochs == cfg.runner.max_epochs 96 | 97 | runner = build_runner( 98 | cfg.runner, 99 | default_args=dict( 100 | model=model, 101 | optimizer=optimizer, 102 | work_dir=cfg.work_dir, 103 | logger=logger, 104 | meta=meta)) 105 | 106 | # an ugly workaround to make .log and .log.json filenames the same 107 | runner.timestamp = timestamp 108 | 109 | # fp16 setting 110 | fp16_cfg = cfg.get('fp16', None) 111 | if fp16_cfg is not None: 112 | optimizer_config = Fp16OptimizerHook( 113 | **cfg.optimizer_config, **fp16_cfg, distributed=distributed) 114 | elif distributed and 'type' not in cfg.optimizer_config: 115 | optimizer_config = OptimizerHook(**cfg.optimizer_config) 116 | else: 117 | optimizer_config = cfg.optimizer_config 118 | 119 | # register hooks 120 | runner.register_training_hooks(cfg.lr_config, optimizer_config, 121 | cfg.checkpoint_config, cfg.log_config, 122 | cfg.get('momentum_config', None)) 123 | if distributed: 124 | if isinstance(runner, EpochBasedRunner): 125 | runner.register_hook(DistSamplerSeedHook()) 126 | 127 | # register eval hooks 128 | if validate: 129 | # Support batch_size > 1 in validation 130 | val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) 131 | if val_samples_per_gpu > 1: 132 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 133 | cfg.data.val.pipeline = replace_ImageToTensor( 134 | cfg.data.val.pipeline) 135 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) 136 | val_dataloader = build_dataloader( 137 | val_dataset, 138 | samples_per_gpu=val_samples_per_gpu, 139 | workers_per_gpu=cfg.data.workers_per_gpu, 140 | dist=distributed, 141 | shuffle=False) 142 | eval_cfg = cfg.get('evaluation', {}) 143 | eval_hook = SemiDistEvalHook if distributed else SemiEvalHook 144 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) 145 | 146 | # user-defined hooks 147 | if cfg.get('custom_hooks', None): 148 | custom_hooks = cfg.custom_hooks 149 | assert isinstance(custom_hooks, list), \ 150 | f'custom_hooks expect list type, but got {type(custom_hooks)}' 151 | for hook_cfg in cfg.custom_hooks: 152 | assert isinstance(hook_cfg, dict), \ 153 | 'Each item in custom_hooks expects dict type, but got ' \ 154 | f'{type(hook_cfg)}' 155 | hook_cfg = hook_cfg.copy() 156 | priority = hook_cfg.pop('priority', 'NORMAL') 157 | hook = build_from_cfg(hook_cfg, HOOKS) 158 | runner.register_hook(hook, priority=priority) 159 | 160 | if cfg.resume_from: 161 | runner.resume(cfg.resume_from) 162 | elif cfg.load_from: 163 | runner.load_checkpoint(cfg.load_from) 164 | runner.run(data_loaders, cfg.workflow) 165 | -------------------------------------------------------------------------------- /tools/datasets_uda/convert_xml_to_json.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert VOC-format dataset to COCO format 3 | """ 4 | import argparse 5 | import os 6 | import os.path as osp 7 | import xml.etree.ElementTree as ET 8 | 9 | import mmcv 10 | import numpy as np 11 | 12 | city = ['truck', 'car', 'rider', 'person', 'train', 'motorcycle', 'bicycle', 'bus'] 13 | car = ['car'] 14 | 15 | dataset_dict = { 16 | 'city': city, 17 | 'car': car 18 | } 19 | 20 | label_ids = None 21 | 22 | 23 | def parse_xml(args): 24 | xml_path, img_path = args 25 | tree = ET.parse(xml_path) 26 | root = tree.getroot() 27 | size = root.find('size') 28 | w = int(size.find('width').text) 29 | h = int(size.find('height').text) 30 | bboxes = [] 31 | labels = [] 32 | bboxes_ignore = [] 33 | labels_ignore = [] 34 | for obj in root.findall('object'): 35 | name = obj.find('name').text 36 | label = label_ids[name] 37 | difficult = int(obj.find('difficult').text) 38 | bnd_box = obj.find('bndbox') 39 | bbox = [ 40 | int(bnd_box.find('xmin').text), 41 | int(bnd_box.find('ymin').text), 42 | int(bnd_box.find('xmax').text), 43 | int(bnd_box.find('ymax').text) 44 | ] 45 | if difficult: 46 | bboxes_ignore.append(bbox) 47 | labels_ignore.append(label) 48 | else: 49 | bboxes.append(bbox) 50 | labels.append(label) 51 | if not bboxes: 52 | bboxes = np.zeros((0, 4)) 53 | labels = np.zeros((0,)) 54 | else: 55 | bboxes = np.array(bboxes, ndmin=2) - 1 56 | labels = np.array(labels) 57 | if not bboxes_ignore: 58 | bboxes_ignore = np.zeros((0, 4)) 59 | labels_ignore = np.zeros((0,)) 60 | else: 61 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 62 | labels_ignore = np.array(labels_ignore) 63 | annotation = { 64 | 'filename': img_path, 65 | 'width': w, 66 | 'height': h, 67 | 'ann': { 68 | 'bboxes': bboxes.astype(np.float32), 69 | 'labels': labels.astype(np.int64), 70 | 'bboxes_ignore': bboxes_ignore.astype(np.float32), 71 | 'labels_ignore': labels_ignore.astype(np.int64) 72 | } 73 | } 74 | return annotation 75 | 76 | 77 | def cvt_annotations(devkit_path, out_file, classes): 78 | annotations = [] 79 | xml_root = os.path.join(devkit_path, 'Annotations') 80 | img_names = [a[:-4] for a in os.listdir(xml_root) if a.endswith('.xml')] 81 | img_paths = [ 82 | f'JPEGImages/{img_name}.jpg' for img_name in img_names 83 | ] 84 | xml_paths = [osp.join(devkit_path, f'Annotations/{img_name}.xml') for img_name in img_names] 85 | global label_ids 86 | label_ids = {name: i for i, name in enumerate(classes)} 87 | part_annotations = mmcv.track_progress(parse_xml, list(zip(xml_paths, img_paths))) 88 | annotations.extend(part_annotations) 89 | if out_file.endswith('json'): 90 | annotations = cvt_to_coco_json(annotations, classes) 91 | mmcv.dump(annotations, out_file) 92 | return annotations 93 | 94 | 95 | def cvt_to_coco_json(annotations, classes): 96 | image_id = 0 97 | annotation_id = 0 98 | coco = dict() 99 | coco['images'] = [] 100 | coco['type'] = 'instance' 101 | coco['categories'] = [] 102 | coco['annotations'] = [] 103 | image_set = set() 104 | 105 | def addAnnItem(annotation_id, image_id, category_id, bbox, difficult_flag): 106 | annotation_item = dict() 107 | annotation_item['segmentation'] = [] 108 | 109 | seg = [] 110 | # bbox[] is x1,y1,x2,y2 111 | # left_top 112 | seg.append(int(bbox[0])) 113 | seg.append(int(bbox[1])) 114 | # left_bottom 115 | seg.append(int(bbox[0])) 116 | seg.append(int(bbox[3])) 117 | # right_bottom 118 | seg.append(int(bbox[2])) 119 | seg.append(int(bbox[3])) 120 | # right_top 121 | seg.append(int(bbox[2])) 122 | seg.append(int(bbox[1])) 123 | 124 | annotation_item['segmentation'].append(seg) 125 | 126 | xywh = np.array( 127 | [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]) 128 | annotation_item['area'] = int(xywh[2] * xywh[3]) 129 | if difficult_flag == 1: 130 | annotation_item['ignore'] = 0 131 | annotation_item['iscrowd'] = 1 132 | else: 133 | annotation_item['ignore'] = 0 134 | annotation_item['iscrowd'] = 0 135 | annotation_item['image_id'] = int(image_id) 136 | annotation_item['bbox'] = xywh.astype(int).tolist() 137 | annotation_item['category_id'] = int(category_id) 138 | annotation_item['id'] = int(annotation_id) 139 | coco['annotations'].append(annotation_item) 140 | return annotation_id + 1 141 | 142 | for category_id, name in enumerate(classes): 143 | category_item = dict() 144 | category_item['supercategory'] = str('none') 145 | category_item['id'] = int(category_id) 146 | category_item['name'] = str(name) 147 | coco['categories'].append(category_item) 148 | 149 | for ann_dict in annotations: 150 | file_name = ann_dict['filename'] 151 | ann = ann_dict['ann'] 152 | assert file_name not in image_set 153 | image_item = dict() 154 | image_item['id'] = int(image_id) 155 | image_item['file_name'] = str(file_name) 156 | image_item['height'] = int(ann_dict['height']) 157 | image_item['width'] = int(ann_dict['width']) 158 | coco['images'].append(image_item) 159 | image_set.add(file_name) 160 | 161 | bboxes = ann['bboxes'][:, :4] 162 | labels = ann['labels'] 163 | for bbox_id in range(len(bboxes)): 164 | bbox = bboxes[bbox_id] 165 | label = labels[bbox_id] 166 | annotation_id = addAnnItem( 167 | annotation_id, image_id, label, bbox, difficult_flag=0) 168 | 169 | bboxes_ignore = ann['bboxes_ignore'][:, :4] 170 | labels_ignore = ann['labels_ignore'] 171 | for bbox_id in range(len(bboxes_ignore)): 172 | bbox = bboxes_ignore[bbox_id] 173 | label = labels_ignore[bbox_id] 174 | annotation_id = addAnnItem( 175 | annotation_id, image_id, label, bbox, difficult_flag=1) 176 | image_id += 1 177 | return coco 178 | 179 | 180 | def parse_args(): 181 | parser = argparse.ArgumentParser( 182 | description='Convert XML annotations to mmdetection format') 183 | parser.add_argument('--devkit_path', default='', help='devkit_path') 184 | parser.add_argument('--out-name', default='', help='output file name') 185 | parser.add_argument('--dataset', default='city') 186 | args = parser.parse_args() 187 | return args 188 | 189 | 190 | def main(): 191 | args = parse_args() 192 | devkit_path = args.devkit_path 193 | out_name = args.out_name 194 | dataset = args.dataset 195 | 196 | out_dir = osp.dirname(out_name) 197 | mmcv.mkdir_or_exist(out_dir) 198 | 199 | classes = dataset_dict[dataset] 200 | cvt_annotations(devkit_path, out_name, classes) 201 | 202 | print('Done!') 203 | 204 | 205 | if __name__ == '__main__': 206 | main() 207 | -------------------------------------------------------------------------------- /configs/baseline/baseline_uda.py: -------------------------------------------------------------------------------- 1 | # hyper-parameter: replace in "bash" 2 | dataset_name = data_template 3 | gpu = gpu_template 4 | total_num = 64000 5 | samples_per_gpu = 4 6 | total_iter = int(total_num / (samples_per_gpu * gpu)) 7 | test_interval = 500 8 | save_interval = 2000 9 | # # -------------------------dataset------------------------------ 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 12 | 13 | train_pipeline = [ 14 | dict(type='LoadImageFromFile'), 15 | dict(type='LoadAnnotations', with_bbox=True, with_mask=False), 16 | dict(type='Resize', img_scale=[(1333, 500), (1333, 800)], keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 22 | ] 23 | 24 | test_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=(1333, 800), 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=32), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='Collect', keys=['img']), 37 | ]) 38 | ] 39 | if dataset_name in ['C2F', 'C2B']: 40 | classes = ('truck', 'car', 'rider', 'person', 'train', 'motorcycle', 'bicycle', 'bus') 41 | else: 42 | classes = ('car',) 43 | 44 | dataset_type = 'CocoDataset' 45 | data_root = f'./dataset/{dataset_name}/' 46 | data = dict( 47 | samples_per_gpu=samples_per_gpu, 48 | workers_per_gpu=4, 49 | train=dict( 50 | type=dataset_type, 51 | ann_file=data_root + f'labeled_data.json', 52 | img_prefix=data_root + 'labeled_data/', 53 | classes=classes, 54 | pipeline=train_pipeline), 55 | val=dict( 56 | type=dataset_type, 57 | ann_file=data_root + f'test_data.json', 58 | img_prefix=data_root + 'test_data/', 59 | classes=classes, 60 | pipeline=test_pipeline), 61 | test=dict( 62 | type=dataset_type, 63 | ann_file=data_root + f'test_data.json', 64 | img_prefix=data_root + 'test_data/', 65 | classes=classes, 66 | pipeline=test_pipeline)) 67 | evaluation = dict(interval=test_interval, metric='bbox', by_epoch=False, classwise=True) 68 | 69 | # # -------------------------schedule------------------------------ 70 | # learning in faster rcnn: total-batch-size/8*0.01 71 | # epoch_num = 12 / percent: to add 72 | learning_rate = 0.001 * samples_per_gpu * gpu 73 | optimizer = dict(type='SGD', lr=learning_rate, momentum=0.9, weight_decay=0.0001) 74 | optimizer_config = dict(grad_clip=None) 75 | # learning policy 76 | lr_config = dict( 77 | policy='step', 78 | warmup='linear', 79 | warmup_iters=200, 80 | warmup_ratio=0.001, 81 | step=[total_iter]) 82 | runner = dict(type='SemiIterBasedRunner', max_iters=total_iter) 83 | 84 | checkpoint_config = dict(interval=save_interval) 85 | # yapf:disable 86 | log_config = dict( 87 | interval=50, 88 | hooks=[ 89 | dict(type='TextLoggerHook'), 90 | ]) 91 | # yapf:enable 92 | custom_hooks = [dict(type='NumClassCheckHook')] 93 | 94 | dist_params = dict(backend='nccl') 95 | log_level = 'INFO' 96 | load_from = None 97 | resume_from = None 98 | workflow = [('train', 1)] 99 | 100 | # # -------------------------model------------------------------ 101 | model = dict( 102 | type='FasterRCNN', 103 | pretrained='./pretrained_model/backbone/vgg16_caffe.pth', 104 | backbone=dict( 105 | type='VGG', 106 | depth=16, 107 | out_indices=(4,), # stride=16 108 | with_last_pool=False, 109 | ), 110 | neck=None, 111 | rpn_head=dict( 112 | type='RPNHead', 113 | in_channels=512, 114 | feat_channels=512, 115 | anchor_generator=dict( 116 | type='AnchorGenerator', 117 | scales=[4, 8, 16, 32], 118 | ratios=[0.5, 1.0, 2.0], 119 | strides=[16]), 120 | bbox_coder=dict( 121 | type='DeltaXYWHBBoxCoder', 122 | target_means=[.0, .0, .0, .0], 123 | target_stds=[1.0, 1.0, 1.0, 1.0]), 124 | loss_cls=dict( 125 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 126 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 127 | roi_head=dict( 128 | type='StandardRoIHeadBase', 129 | bbox_roi_extractor=dict( 130 | type='SingleRoIExtractor', 131 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 132 | out_channels=512, 133 | featmap_strides=[16]), 134 | bbox_head=dict( 135 | type='Shared2FCBBoxHead', 136 | in_channels=512, 137 | fc_out_channels=1024, 138 | roi_feat_size=7, 139 | num_classes=len(classes), 140 | bbox_coder=dict( 141 | type='DeltaXYWHBBoxCoder', 142 | target_means=[0., 0., 0., 0.], 143 | target_stds=[0.1, 0.1, 0.2, 0.2]), 144 | reg_class_agnostic=True, 145 | loss_cls=dict( 146 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 147 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 148 | # model training and testing settings 149 | train_cfg=dict( 150 | rpn=dict( 151 | assigner=dict( 152 | type='MaxIoUAssigner', 153 | pos_iou_thr=0.7, 154 | neg_iou_thr=0.3, 155 | min_pos_iou=0.3, 156 | match_low_quality=True, 157 | ignore_iof_thr=-1), 158 | sampler=dict( 159 | type='RandomSampler', 160 | num=256, 161 | pos_fraction=0.5, 162 | neg_pos_ub=-1, 163 | add_gt_as_proposals=False), 164 | allowed_border=-1, 165 | pos_weight=-1, 166 | debug=False), 167 | rpn_proposal=dict( 168 | nms_pre=2000, 169 | max_per_img=1000, 170 | nms=dict(type='nms', iou_threshold=0.7), 171 | min_bbox_size=0), 172 | rcnn=dict( 173 | assigner=dict( 174 | type='MaxIoUAssigner', 175 | pos_iou_thr=0.5, 176 | neg_iou_thr=0.5, 177 | min_pos_iou=0.5, 178 | match_low_quality=False, 179 | ignore_iof_thr=-1), 180 | sampler=dict( 181 | type='RandomSampler', 182 | num=512, 183 | pos_fraction=0.25, 184 | neg_pos_ub=-1, 185 | add_gt_as_proposals=True), 186 | pos_weight=-1, 187 | debug=False)), 188 | test_cfg=dict( 189 | rpn=dict( 190 | nms_pre=1000, 191 | max_per_img=1000, 192 | nms=dict(type='nms', iou_threshold=0.7), 193 | min_bbox_size=0), 194 | rcnn=dict( 195 | score_thr=0.001, 196 | nms=dict(type='nms', iou_threshold=0.5), 197 | max_per_img=100) 198 | # soft-nms is also supported for rcnn testing 199 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 200 | )) 201 | -------------------------------------------------------------------------------- /configs/baseline/baseline_uda_test.py: -------------------------------------------------------------------------------- 1 | # hyper-parameter: replace in "bash" 2 | dataset_name = data_template 3 | gpu = gpu_template 4 | total_num = 64000 5 | samples_per_gpu = 4 6 | total_iter = int(total_num / (samples_per_gpu * gpu)) 7 | test_interval = 500 8 | save_interval = 2000 9 | # # -------------------------dataset------------------------------ 10 | img_norm_cfg = dict( 11 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 12 | 13 | train_pipeline = [ 14 | dict(type='LoadImageFromFile'), 15 | dict(type='LoadAnnotations', with_bbox=True, with_mask=False), 16 | dict(type='Resize', img_scale=[(1333, 500), (1333, 800)], keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 22 | ] 23 | 24 | test_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=(1333, 800), 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=32), 35 | dict(type='ImageToTensor', keys=['img']), 36 | dict(type='Collect', keys=['img']), 37 | ]) 38 | ] 39 | if dataset_name in ['C2F', 'C2B']: 40 | classes = ('truck', 'car', 'rider', 'person', 'train', 'motorcycle', 'bicycle', 'bus') 41 | else: 42 | classes = ('car',) 43 | 44 | dataset_type = 'CocoDataset' 45 | data_root = f'./dataset/{dataset_name}/' 46 | data = dict( 47 | samples_per_gpu=samples_per_gpu, 48 | workers_per_gpu=4, 49 | train=dict( 50 | type=dataset_type, 51 | ann_file=data_root + f'labeled_data.json', 52 | img_prefix=data_root + 'labeled_data/', 53 | classes=classes, 54 | pipeline=train_pipeline), 55 | val=dict( 56 | type=dataset_type, 57 | ann_file=data_root + f'test_data.json', 58 | img_prefix=data_root + 'test_data/', 59 | classes=classes, 60 | pipeline=test_pipeline), 61 | test=dict( 62 | type=dataset_type, 63 | ann_file=data_root + f'test_data.json', 64 | img_prefix=data_root + 'test_data/', 65 | classes=classes, 66 | pipeline=test_pipeline)) 67 | evaluation = dict(interval=test_interval, metric='bbox', classwise=True, iou_thrs=[0.5]) 68 | 69 | # # -------------------------schedule------------------------------ 70 | # learning in faster rcnn: total-batch-size/8*0.01 71 | # epoch_num = 12 / percent: to add 72 | learning_rate = 0.001 * samples_per_gpu * gpu 73 | optimizer = dict(type='SGD', lr=learning_rate, momentum=0.9, weight_decay=0.0001) 74 | optimizer_config = dict(grad_clip=None) 75 | # learning policy 76 | lr_config = dict( 77 | policy='step', 78 | warmup='linear', 79 | warmup_iters=200, 80 | warmup_ratio=0.001, 81 | step=[total_iter]) 82 | runner = dict(type='SemiIterBasedRunner', max_iters=total_iter) 83 | 84 | checkpoint_config = dict(interval=save_interval) 85 | # yapf:disable 86 | log_config = dict( 87 | interval=50, 88 | hooks=[ 89 | dict(type='TextLoggerHook'), 90 | ]) 91 | # yapf:enable 92 | custom_hooks = [dict(type='NumClassCheckHook')] 93 | 94 | dist_params = dict(backend='nccl') 95 | log_level = 'INFO' 96 | load_from = None 97 | resume_from = None 98 | workflow = [('train', 1)] 99 | 100 | # # -------------------------model------------------------------ 101 | model = dict( 102 | type='FasterRCNN', 103 | pretrained='./pretrained_model/backbone/vgg16_caffe.pth', 104 | backbone=dict( 105 | type='VGG', 106 | depth=16, 107 | out_indices=(4,), # stride=16 108 | with_last_pool=False, 109 | ), 110 | neck=None, 111 | rpn_head=dict( 112 | type='RPNHead', 113 | in_channels=512, 114 | feat_channels=512, 115 | anchor_generator=dict( 116 | type='AnchorGenerator', 117 | scales=[4, 8, 16, 32], 118 | ratios=[0.5, 1.0, 2.0], 119 | strides=[16]), 120 | bbox_coder=dict( 121 | type='DeltaXYWHBBoxCoder', 122 | target_means=[.0, .0, .0, .0], 123 | target_stds=[1.0, 1.0, 1.0, 1.0]), 124 | loss_cls=dict( 125 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 126 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 127 | roi_head=dict( 128 | type='StandardRoIHeadBase', 129 | bbox_roi_extractor=dict( 130 | type='SingleRoIExtractor', 131 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 132 | out_channels=512, 133 | featmap_strides=[16]), 134 | bbox_head=dict( 135 | type='Shared2FCBBoxHead', 136 | in_channels=512, 137 | fc_out_channels=1024, 138 | roi_feat_size=7, 139 | num_classes=len(classes), 140 | bbox_coder=dict( 141 | type='DeltaXYWHBBoxCoder', 142 | target_means=[0., 0., 0., 0.], 143 | target_stds=[0.1, 0.1, 0.2, 0.2]), 144 | reg_class_agnostic=True, 145 | loss_cls=dict( 146 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 147 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 148 | # model training and testing settings 149 | train_cfg=dict( 150 | rpn=dict( 151 | assigner=dict( 152 | type='MaxIoUAssigner', 153 | pos_iou_thr=0.7, 154 | neg_iou_thr=0.3, 155 | min_pos_iou=0.3, 156 | match_low_quality=True, 157 | ignore_iof_thr=-1), 158 | sampler=dict( 159 | type='RandomSampler', 160 | num=256, 161 | pos_fraction=0.5, 162 | neg_pos_ub=-1, 163 | add_gt_as_proposals=False), 164 | allowed_border=-1, 165 | pos_weight=-1, 166 | debug=False), 167 | rpn_proposal=dict( 168 | nms_pre=2000, 169 | max_per_img=1000, 170 | nms=dict(type='nms', iou_threshold=0.7), 171 | min_bbox_size=0), 172 | rcnn=dict( 173 | assigner=dict( 174 | type='MaxIoUAssigner', 175 | pos_iou_thr=0.5, 176 | neg_iou_thr=0.5, 177 | min_pos_iou=0.5, 178 | match_low_quality=False, 179 | ignore_iof_thr=-1), 180 | sampler=dict( 181 | type='RandomSampler', 182 | num=512, 183 | pos_fraction=0.25, 184 | neg_pos_ub=-1, 185 | add_gt_as_proposals=True), 186 | pos_weight=-1, 187 | debug=False)), 188 | test_cfg=dict( 189 | rpn=dict( 190 | nms_pre=1000, 191 | max_per_img=1000, 192 | nms=dict(type='nms', iou_threshold=0.7), 193 | min_bbox_size=0), 194 | rcnn=dict( 195 | score_thr=0.001, 196 | nms=dict(type='nms', iou_threshold=0.5), 197 | max_per_img=100) 198 | # soft-nms is also supported for rcnn testing 199 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 200 | )) 201 | -------------------------------------------------------------------------------- /examples/train/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import os.path as osp 5 | import time 6 | import warnings 7 | 8 | import mmcv 9 | import torch 10 | from mmcv import Config, DictAction 11 | from mmcv.runner import get_dist_info, init_dist 12 | from mmcv.utils import get_git_hash 13 | 14 | from mmdet import __version__ 15 | from mmdet.models import build_detector 16 | from mmdet.datasets import build_dataset 17 | from mmdet.utils import collect_env, get_root_logger 18 | from mmdet.apis import set_random_seed 19 | 20 | from mmdet_extension.apis import train_detector 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser(description='Train a detector') 25 | parser.add_argument('--config', default='', 26 | help='train config file path') 27 | parser.add_argument('--work-dir', help='the dir to save logs and models') 28 | parser.add_argument( 29 | '--resume-from', help='the checkpoint file to resume from') 30 | parser.add_argument( 31 | '--no-validate', 32 | action='store_true', 33 | help='whether not to evaluate the checkpoint during training') 34 | group_gpus = parser.add_mutually_exclusive_group() 35 | group_gpus.add_argument( 36 | '--gpus', 37 | type=int, 38 | help='number of gpus to use ' 39 | '(only applicable to non-distributed training)') 40 | group_gpus.add_argument( 41 | '--gpu-ids', 42 | type=int, 43 | nargs='+', 44 | help='ids of gpus to use ' 45 | '(only applicable to non-distributed training)') 46 | parser.add_argument('--seed', type=int, default=None, help='random seed') 47 | parser.add_argument( 48 | '--deterministic', 49 | action='store_true', 50 | help='whether to set deterministic options for CUDNN backend.') 51 | parser.add_argument( 52 | '--options', 53 | nargs='+', 54 | action=DictAction, 55 | help='override some settings in the used config, the key-value pair ' 56 | 'in xxx=yyy format will be merged into config file (deprecate), ' 57 | 'change to --cfg-options instead.') 58 | parser.add_argument( 59 | '--cfg-options', 60 | nargs='+', 61 | action=DictAction, 62 | help='override some settings in the used config, the key-value pair ' 63 | 'in xxx=yyy format will be merged into config file. If the value to ' 64 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 65 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 66 | 'Note that the quotation marks are necessary and that no white space ' 67 | 'is allowed.') 68 | parser.add_argument( 69 | '--launcher', 70 | choices=['none', 'pytorch', 'slurm', 'mpi'], 71 | default='none', 72 | help='job launcher') 73 | parser.add_argument('--local_rank', type=int, default=0) 74 | args = parser.parse_args() 75 | if 'LOCAL_RANK' not in os.environ: 76 | os.environ['LOCAL_RANK'] = str(args.local_rank) 77 | 78 | if args.options and args.cfg_options: 79 | raise ValueError( 80 | '--options and --cfg-options cannot be both ' 81 | 'specified, --options is deprecated in favor of --cfg-options') 82 | if args.options: 83 | warnings.warn('--options is deprecated in favor of --cfg-options') 84 | args.cfg_options = args.options 85 | 86 | return args 87 | 88 | 89 | def main(): 90 | args = parse_args() 91 | 92 | cfg = Config.fromfile(args.config) 93 | if args.cfg_options is not None: 94 | cfg.merge_from_dict(args.cfg_options) 95 | # import modules from string list. 96 | if cfg.get('custom_imports', None): 97 | from mmcv.utils import import_modules_from_strings 98 | import_modules_from_strings(**cfg['custom_imports']) 99 | # set cudnn_benchmark 100 | if cfg.get('cudnn_benchmark', False): 101 | torch.backends.cudnn.benchmark = True 102 | 103 | # work_dir is determined in this priority: CLI > segment in file > filename 104 | if args.work_dir is not None: 105 | # update configs according to CLI args if args.work_dir is not None 106 | cfg.work_dir = args.work_dir 107 | elif cfg.get('work_dir', None) is None: 108 | # use config filename as default work_dir if cfg.work_dir is None 109 | cfg.work_dir = osp.join('./work_dirs', 110 | osp.splitext(osp.basename(args.config))[0]) 111 | if args.resume_from is not None: 112 | cfg.resume_from = args.resume_from 113 | if args.gpu_ids is not None: 114 | cfg.gpu_ids = args.gpu_ids 115 | else: 116 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 117 | 118 | # init distributed env first, since logger depends on the dist info. 119 | if args.launcher == 'none': 120 | distributed = False 121 | else: 122 | distributed = True 123 | init_dist(args.launcher, **cfg.dist_params) 124 | # re-set gpu_ids with distributed training mode 125 | _, world_size = get_dist_info() 126 | cfg.gpu_ids = range(world_size) 127 | 128 | # create work_dir 129 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 130 | # dump config 131 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 132 | # init the logger before other steps 133 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 134 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 135 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 136 | 137 | # init the meta dict to record some important information such as 138 | # environment info and seed, which will be logged 139 | meta = dict() 140 | # log env info 141 | env_info_dict = collect_env() 142 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 143 | dash_line = '-' * 60 + '\n' 144 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 145 | dash_line) 146 | meta['env_info'] = env_info 147 | meta['config'] = cfg.pretty_text 148 | # log some basic info 149 | logger.info(f'Distributed training: {distributed}') 150 | logger.info(f'Config:\n{cfg.pretty_text}') 151 | 152 | # set random seeds 153 | if args.seed is not None: 154 | logger.info(f'Set random seed to {args.seed}, ' 155 | f'deterministic: {args.deterministic}') 156 | set_random_seed(args.seed, deterministic=args.deterministic) 157 | cfg.seed = args.seed 158 | meta['seed'] = args.seed 159 | meta['exp_name'] = osp.basename(args.config) 160 | 161 | model = build_detector( 162 | cfg.model, 163 | train_cfg=cfg.get('train_cfg'), 164 | test_cfg=cfg.get('test_cfg')) 165 | 166 | datasets = [build_dataset(cfg.data.train)] 167 | if len(cfg.workflow) == 2: 168 | val_dataset = copy.deepcopy(cfg.data.val) 169 | val_dataset.pipeline = cfg.data.train.pipeline 170 | datasets.append(build_dataset(val_dataset)) 171 | if cfg.checkpoint_config is not None: 172 | # save mmdet version, config file content and class names in 173 | # checkpoints as meta data 174 | cfg.checkpoint_config.meta = dict( 175 | mmdet_version=__version__ + get_git_hash()[:7], 176 | CLASSES=datasets[0].CLASSES) 177 | # not to validate if the val is None 178 | if cfg.data.val is None: 179 | args.no_validate = True 180 | # add an attribute for visualization convenience 181 | model.CLASSES = datasets[0].CLASSES 182 | train_detector( 183 | model, 184 | datasets, 185 | cfg, 186 | distributed=distributed, 187 | validate=(not args.no_validate), 188 | timestamp=timestamp, 189 | meta=meta) 190 | 191 | 192 | if __name__ == '__main__': 193 | main() 194 | -------------------------------------------------------------------------------- /mmdet_extension/models/detectors/semi_base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | """ 3 | Base-Detector for semi-supervised learning 4 | """ 5 | import cv2 6 | import os 7 | from collections import OrderedDict 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | 11 | import torch 12 | from torch import nn 13 | 14 | import mmcv 15 | from mmcv.runner import load_checkpoint 16 | from mmdet.models.builder import build_detector 17 | 18 | from mmdet_extension.core.utils.classes import COCO_CLASSES 19 | 20 | 21 | class SemiBaseDetector(nn.Module): 22 | CLASSES = COCO_CLASSES 23 | 24 | def __init__(self, 25 | ema_config=None, 26 | ema_ckpt=None, 27 | classes=None 28 | ): 29 | if ema_config is not None: 30 | if isinstance(ema_config, str): 31 | ema_config = mmcv.Config.fromfile(ema_config) 32 | self.ema_model = build_detector(ema_config['model']) 33 | if ema_ckpt is not None: 34 | load_checkpoint(self.ema_model, ema_ckpt, map_location='cpu') 35 | self.ema_model.eval() 36 | else: 37 | self.ema_model = None 38 | if classes is not None: 39 | self.CLASSES = classes 40 | 41 | def forward(self, img, img_metas, return_loss=True, **kwargs): 42 | if return_loss: 43 | if 'img_unlabeled' in kwargs or 'is_label_data' in kwargs: 44 | return self.forward_train_semi(img, img_metas, **kwargs) 45 | else: 46 | return self.forward_train(img, img_metas, **kwargs) 47 | else: 48 | return self.forward_test(img, img_metas, **kwargs) 49 | 50 | @staticmethod 51 | def parse_loss(losses): 52 | for loss_name, loss_value in losses.items(): 53 | if isinstance(loss_value, torch.Tensor): 54 | losses[loss_name] = loss_value.mean() 55 | elif isinstance(loss_value, list): 56 | losses[loss_name] = sum(_loss.mean() for _loss in loss_value) 57 | return losses 58 | 59 | @staticmethod 60 | def split_pos_ig(gt_bboxes, gt_labels, with_ig_label=False): 61 | gt_bboxes_pos, gt_bboxes_ig = [], [] 62 | gt_labels_pos, gt_labels_ig = [], [] 63 | for i, (bboxes, labels) in enumerate(zip(gt_bboxes, gt_labels)): 64 | ig_idx = labels < 0 65 | gt_bboxes_ig.append(bboxes[ig_idx]) 66 | gt_bboxes_pos.append(bboxes[~ig_idx]) 67 | gt_labels_ig.append(-2 - labels[ig_idx]) 68 | gt_labels_pos.append(labels[~ig_idx]) 69 | if with_ig_label: 70 | return gt_bboxes_pos, gt_labels_pos, gt_bboxes_ig, gt_labels_ig 71 | else: 72 | return gt_bboxes_pos, gt_labels_pos, gt_bboxes_ig 73 | 74 | def update_ema_model(self, momentum=0.99): 75 | model_dict = self.state_dict() 76 | new_dict = OrderedDict() 77 | for key, value in self.ema_model.state_dict().items(): 78 | if key[7:] in model_dict.keys(): 79 | new_dict[key] = ( 80 | model_dict[key[7:]] * (1 - momentum) + value * momentum 81 | ) 82 | else: 83 | raise Exception("{} is not found in student model".format(key)) 84 | self.ema_model.load_state_dict(new_dict) 85 | 86 | def cuda(self, device=None): 87 | """Since ema_model is registered as a plain object, it is necessary 88 | to put the ema model to cuda when calling cuda function.""" 89 | if self.ema_model: 90 | self.ema_model.cuda(device=device) 91 | return super().cuda(device=device) 92 | 93 | def __setattr__(self, name, value): 94 | # not update ema_model in optimizer 95 | if name == 'ema_model': 96 | object.__setattr__(self, name, value) 97 | else: 98 | super().__setattr__(name, value) 99 | 100 | # # -----------------------debug function (visualization)----------------------- 101 | def visual_online(self, img, boxes_list, labels_list, img_id=0, 102 | boxes_ignore_list=None, proposal_list=None): 103 | img_norm_cfg = dict( 104 | mean=np.array([123.675, 116.28, 103.53]), std=np.array([58.395, 57.12, 57.375]) 105 | ) 106 | img_np = img[img_id].permute(1, 2, 0).cpu().numpy() 107 | img_np = mmcv.imdenormalize(img_np, **img_norm_cfg) 108 | img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB) 109 | boxes, labels = boxes_list[img_id], labels_list[img_id] 110 | # proposal 111 | if proposal_list: 112 | proposal = proposal_list[img_id] 113 | for idx, box in enumerate(proposal[:, :4]): 114 | x1, y1, x2, y2 = [int(a.cpu().item()) for a in box] 115 | img_np = cv2.rectangle(img_np, (x1, y1), (x2, y2), (214, 39, 40), 2) 116 | cv2.putText(img_np, f'{idx}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, 117 | (214, 39, 40), 2) 118 | # ignore 119 | if boxes_ignore_list: 120 | boxes_ignore = boxes_ignore_list[img_id] 121 | for idx, box in enumerate(boxes_ignore): 122 | x1, y1, x2, y2 = [int(a.cpu().item()) for a in box] 123 | img_np = cv2.rectangle(img_np, (x1, y1), (x2, y2), (44, 160, 44), 2) 124 | cv2.putText(img_np, f'{idx}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, 125 | (44, 160, 44), 2) 126 | # pseudo gt 127 | for idx, (box, label) in enumerate(zip(boxes, labels)): 128 | x1, y1, x2, y2 = [int(a.cpu().item()) for a in box] 129 | img_np = cv2.rectangle(img_np, (x1, y1), (x2, y2), (157, 80, 136), 2) 130 | cv2.putText(img_np, f'{idx}, {self.CLASSES[label]}', (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, 131 | (157, 80, 136), 2) 132 | plt.imshow(img_np.astype(np.uint8)) 133 | plt.show() 134 | 135 | def visual_offline(self, img, boxes_list, labels_list, img_metas, img_id=[], 136 | boxes_ignore_list=None): 137 | img_norm_cfg = dict( 138 | mean=np.array([123.675, 116.28, 103.53]), std=np.array([58.395, 57.12, 57.375]) 139 | ) 140 | out_root = './visual_offline' 141 | if not os.path.exists(out_root): 142 | os.makedirs(out_root) 143 | if len(img_id) == 0: 144 | img_id = list(range(len(img))) 145 | for id in img_id: 146 | img_np = img[id].permute(1, 2, 0).cpu().numpy() 147 | img_np = mmcv.imdenormalize(img_np, **img_norm_cfg) 148 | boxes, labels = boxes_list[id], labels_list[id] 149 | for box, label in zip(boxes, labels): 150 | x1, y1, x2, y2 = [int(a.cpu().item()) for a in box] 151 | img_np = cv2.rectangle(img_np, (x1, y1), (x2, y2), (157, 80, 136), 2) 152 | cv2.putText(img_np, self.CLASSES[label], (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, 153 | (157, 80, 136), 2) 154 | if boxes_ignore_list: 155 | boxes_ignore = boxes_ignore_list[id] 156 | for box in boxes_ignore: 157 | x1, y1, x2, y2 = [int(a.cpu().item()) for a in box] 158 | img_np = cv2.rectangle(img_np, (x1, y1), (x2, y2), (44, 160, 44), 2) 159 | img_name = img_metas[id]['filename'].split('/')[-1] 160 | mmcv.imwrite(img_np, os.path.join(out_root, img_name)) 161 | 162 | def rescale_bboxes(self, bboxes, meta, bbox_transform): 163 | device = bboxes.device 164 | scale_factor = meta['scale_factor'] 165 | if bboxes.size(0) > 0: 166 | if isinstance(scale_factor, float): 167 | bboxes /= scale_factor 168 | else: 169 | scale_factor = bboxes.new_tensor(scale_factor) 170 | bboxes = (bboxes.view(bboxes.size(0), -1, 4) / 171 | scale_factor).view(bboxes.size()[0], -1) 172 | bboxes = bboxes.cpu().numpy() 173 | for bf in bbox_transform: 174 | bboxes, _ = bf(bboxes, None) 175 | bboxes = torch.from_numpy(bboxes).float().to(device) 176 | return bboxes 177 | -------------------------------------------------------------------------------- /configs/baseline/baseline_ssod.py: -------------------------------------------------------------------------------- 1 | # hyper-parameter: replace in "bash" 2 | dataset_name = data_template # coco-standard, coco-additional, voc 3 | seed = seed_template 4 | percent = percent_template 5 | gpu = gpu_template 6 | times = times_template 7 | samples_per_gpu = 8 8 | # # -------------------------dataset------------------------------ 9 | img_norm_cfg = dict( 10 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 11 | 12 | train_pipeline = [ 13 | dict(type='LoadImageFromFile'), 14 | dict(type='LoadAnnotations', with_bbox=True), 15 | dict(type='Resize', img_scale=[(1333, 500), (1333, 800)], keep_ratio=True), 16 | dict(type='RandomFlip', flip_ratio=0.5), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='DefaultFormatBundle'), 20 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 21 | ] 22 | 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1333, 800), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | 39 | if dataset_name == 'voc': 40 | classes = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 41 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 42 | 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor') 43 | data_root = './dataset/voc/' 44 | ann_file = data_root + f'annotations_json/voc07_trainval.json' 45 | ann_file_val = data_root + f'annotations_json/voc07_test.json' 46 | img_prefix = data_root 47 | img_prefix_val = data_root 48 | num_classes = len(classes) 49 | elif dataset_name in ['coco-standard', 'coco-additional']: 50 | classes = None 51 | data_root = './dataset/coco/' 52 | if dataset_name == 'coco-standard': 53 | ann_file = data_root + f'annotations/semi_supervised/instances_train2017.{seed}@{percent}.json', 54 | else: 55 | ann_file = data_root + f'annotations/instances_train2017.json' 56 | ann_file_val = data_root + f'annotations/instances_val2017.json' 57 | img_prefix = data_root + 'train2017/' 58 | img_prefix_val = data_root + 'val2017/' 59 | num_classes = 80 60 | else: 61 | raise ValueError('Not support dataset') 62 | 63 | dataset_type = 'CocoDataset' 64 | data = dict( 65 | samples_per_gpu=samples_per_gpu, 66 | workers_per_gpu=4, 67 | train=dict( 68 | type=dataset_type, 69 | ann_file=ann_file, 70 | img_prefix=img_prefix, 71 | pipeline=train_pipeline, 72 | classes=classes 73 | ), 74 | val=dict( 75 | type=dataset_type, 76 | ann_file=ann_file_val, 77 | img_prefix=img_prefix_val, 78 | pipeline=test_pipeline, 79 | classes=classes 80 | ), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=ann_file_val, 84 | img_prefix=img_prefix_val, 85 | pipeline=test_pipeline, 86 | classes=classes 87 | )) 88 | evaluation = dict(interval=times, metric='bbox', classwise=True) 89 | 90 | # # -------------------------schedule------------------------------ 91 | # learning in faster rcnn: total-batch-size/8*0.01 92 | learning_rate = samples_per_gpu * gpu * 0.01 / 8 93 | optimizer = dict(type='SGD', lr=learning_rate, momentum=0.9, weight_decay=0.0001) 94 | optimizer_config = dict(grad_clip=None) 95 | # learning policy 96 | lr_config = dict( 97 | policy='step', 98 | warmup='linear', 99 | warmup_iters=500, 100 | warmup_ratio=0.001, 101 | step=[int(8 * times), int(11 * times)]) 102 | runner = dict(type='EpochBasedRunner', max_epochs=12 * times) 103 | 104 | checkpoint_config = dict(interval=times) 105 | # yapf:disable 106 | log_config = dict( 107 | interval=50, 108 | hooks=[ 109 | dict(type='TextLoggerHook'), 110 | ]) 111 | # yapf:enable 112 | custom_hooks = [dict(type='NumClassCheckHook')] 113 | 114 | dist_params = dict(backend='nccl') 115 | log_level = 'INFO' 116 | load_from = None 117 | resume_from = None 118 | workflow = [('train', 1)] 119 | 120 | # # -------------------------model------------------------------ 121 | model = dict( 122 | type='FasterRCNN', 123 | pretrained='./pretrained_model/backbone/resnet50-19c8e357.pth', 124 | backbone=dict( 125 | type='ResNet', 126 | depth=50, 127 | num_stages=4, 128 | out_indices=(0, 1, 2, 3), 129 | frozen_stages=1, 130 | norm_cfg=dict(type='BN', requires_grad=True), 131 | norm_eval=True, 132 | style='pytorch'), 133 | neck=dict( 134 | type='FPN', 135 | in_channels=[256, 512, 1024, 2048], 136 | out_channels=256, 137 | num_outs=5), 138 | rpn_head=dict( 139 | type='RPNHead', 140 | in_channels=256, 141 | feat_channels=256, 142 | anchor_generator=dict( 143 | type='AnchorGenerator', 144 | scales=[8], 145 | ratios=[0.5, 1.0, 2.0], 146 | strides=[4, 8, 16, 32, 64]), 147 | bbox_coder=dict( 148 | type='DeltaXYWHBBoxCoder', 149 | target_means=[.0, .0, .0, .0], 150 | target_stds=[1.0, 1.0, 1.0, 1.0]), 151 | loss_cls=dict( 152 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 153 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 154 | roi_head=dict( 155 | type='StandardRoIHead', 156 | bbox_roi_extractor=dict( 157 | type='SingleRoIExtractor', 158 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 159 | out_channels=256, 160 | featmap_strides=[4, 8, 16, 32]), 161 | bbox_head=dict( 162 | type='Shared2FCBBoxHead', 163 | in_channels=256, 164 | fc_out_channels=1024, 165 | roi_feat_size=7, 166 | num_classes=num_classes, 167 | bbox_coder=dict( 168 | type='DeltaXYWHBBoxCoder', 169 | target_means=[0., 0., 0., 0.], 170 | target_stds=[0.1, 0.1, 0.2, 0.2]), 171 | reg_class_agnostic=True, 172 | loss_cls=dict( 173 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 174 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 175 | # model training and testing settings 176 | train_cfg=dict( 177 | rpn=dict( 178 | assigner=dict( 179 | type='MaxIoUAssigner', 180 | pos_iou_thr=0.7, 181 | neg_iou_thr=0.3, 182 | min_pos_iou=0.3, 183 | match_low_quality=True, 184 | ignore_iof_thr=-1), 185 | sampler=dict( 186 | type='RandomSampler', 187 | num=256, 188 | pos_fraction=0.5, 189 | neg_pos_ub=-1, 190 | add_gt_as_proposals=False), 191 | allowed_border=-1, 192 | pos_weight=-1, 193 | debug=False), 194 | rpn_proposal=dict( 195 | nms_pre=2000, 196 | max_per_img=1000, 197 | nms=dict(type='nms', iou_threshold=0.7), 198 | min_bbox_size=0), 199 | rcnn=dict( 200 | assigner=dict( 201 | type='MaxIoUAssigner', 202 | pos_iou_thr=0.5, 203 | neg_iou_thr=0.5, 204 | min_pos_iou=0.5, 205 | match_low_quality=False, 206 | ignore_iof_thr=-1), 207 | sampler=dict( 208 | type='RandomSampler', 209 | num=512, 210 | pos_fraction=0.25, 211 | neg_pos_ub=-1, 212 | add_gt_as_proposals=True), 213 | pos_weight=-1, 214 | debug=False)), 215 | test_cfg=dict( 216 | rpn=dict( 217 | nms_pre=1000, 218 | max_per_img=1000, 219 | nms=dict(type='nms', iou_threshold=0.7), 220 | min_bbox_size=0), 221 | rcnn=dict( 222 | score_thr=0.001, 223 | nms=dict(type='nms', iou_threshold=0.5), 224 | max_per_img=100) 225 | # soft-nms is also supported for rcnn testing 226 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 227 | )) 228 | -------------------------------------------------------------------------------- /mmdet_extension/datasets/pipelines/transforms_box.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Hangzhou Hikvision Digital Technology Co., Ltd. All rights reserved. 2 | # Modified from https://github.com/open-mmlab/mmdetection 3 | """ 4 | augmentation with "box transform": convert pseudo labels from weak to strong 5 | """ 6 | import random 7 | import numpy as np 8 | 9 | import mmcv 10 | from mmdet.datasets import PIPELINES 11 | from mmdet.datasets.pipelines import RandomFlip, Resize 12 | from mmdet_extension.datasets.pipelines.transforms import RandomErasing 13 | 14 | 15 | # support bbox transform 16 | @PIPELINES.register_module() 17 | class AddBBoxTransform(object): 18 | def __call__(self, results): 19 | results['bbox_transform'] = [] 20 | return results 21 | 22 | 23 | @PIPELINES.register_module() 24 | class ResizeBox(Resize): 25 | class BboxResize(object): 26 | def __init__(self, img_shape, scale_factor, bbox_clip_border, scale=None, keep_ratio=True): 27 | self.img_shape = img_shape 28 | self.scale = scale 29 | self.scale_factor = scale_factor 30 | self.keep_ratio = keep_ratio 31 | self.bbox_clip_border = bbox_clip_border 32 | 33 | def __call__(self, bboxes, labels, masks=None): 34 | bboxes = bboxes * self.scale_factor 35 | if self.bbox_clip_border: 36 | img_shape = self.img_shape 37 | bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, img_shape[1]) 38 | bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, img_shape[0]) 39 | if masks is None: 40 | return bboxes, labels 41 | if self.keep_ratio: 42 | masks = masks.rescale(self.scale, interpolation='bilinear') 43 | else: 44 | masks = masks.resize(self.img_shape[:2], interpolation='bilinear') 45 | return bboxes, labels, masks 46 | 47 | def __call__(self, results): 48 | """Call function to resize images, bounding boxes, masks, semantic 49 | segmentation map. 50 | 51 | Args: 52 | results (dict): Result dict from loading pipeline. 53 | 54 | Returns: 55 | dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', \ 56 | 'keep_ratio' keys are added into result dict. 57 | """ 58 | 59 | if 'scale' not in results: 60 | if 'scale_factor' in results: 61 | img_shape = results['img'].shape[:2] 62 | scale_factor = results['scale_factor'] 63 | assert isinstance(scale_factor, float) 64 | results['scale'] = tuple( 65 | [int(x * scale_factor) for x in img_shape][::-1]) 66 | else: 67 | self._random_scale(results) 68 | else: 69 | if not self.override: 70 | assert 'scale_factor' not in results, ( 71 | 'scale and scale_factor cannot be both set.') 72 | else: 73 | results.pop('scale') 74 | if 'scale_factor' in results: 75 | results.pop('scale_factor') 76 | self._random_scale(results) 77 | 78 | self._resize_img(results) 79 | self._resize_bboxes(results) 80 | self._resize_masks(results) 81 | self._resize_seg(results) 82 | results['bbox_transform'].append(self.BboxResize(results['img_shape'], 83 | results['scale_factor'], 84 | self.bbox_clip_border, 85 | results['scale'], 86 | self.keep_ratio)) 87 | return results 88 | 89 | 90 | @PIPELINES.register_module() 91 | class RandomFlipBox(RandomFlip): 92 | class BboxFlip(object): 93 | def __init__(self, img_shape, direction): 94 | self.img_shape = img_shape 95 | self.direction = direction 96 | 97 | def __call__(self, bboxes, labels, masks=None): 98 | assert bboxes.shape[-1] % 4 == 0 99 | flipped = bboxes.copy() 100 | if self.direction == 'horizontal': 101 | w = self.img_shape[1] 102 | flipped[..., 0::4] = w - bboxes[..., 2::4] 103 | flipped[..., 2::4] = w - bboxes[..., 0::4] 104 | elif self.direction == 'vertical': 105 | h = self.img_shape[0] 106 | flipped[..., 1::4] = h - bboxes[..., 3::4] 107 | flipped[..., 3::4] = h - bboxes[..., 1::4] 108 | elif self.direction == 'diagonal': 109 | w = self.img_shape[1] 110 | h = self.img_shape[0] 111 | flipped[..., 0::4] = w - bboxes[..., 2::4] 112 | flipped[..., 1::4] = h - bboxes[..., 3::4] 113 | flipped[..., 2::4] = w - bboxes[..., 0::4] 114 | flipped[..., 3::4] = h - bboxes[..., 1::4] 115 | else: 116 | raise ValueError(f"Invalid flipping direction '{self.direction}'") 117 | if masks is None: 118 | return flipped, labels 119 | else: 120 | masks = masks.flip(self.direction) 121 | return flipped, labels, masks 122 | 123 | def __call__(self, results): 124 | if 'flip' not in results: 125 | if isinstance(self.direction, list): 126 | # None means non-flip 127 | direction_list = self.direction + [None] 128 | else: 129 | # None means non-flip 130 | direction_list = [self.direction, None] 131 | 132 | if isinstance(self.flip_ratio, list): 133 | non_flip_ratio = 1 - sum(self.flip_ratio) 134 | flip_ratio_list = self.flip_ratio + [non_flip_ratio] 135 | else: 136 | non_flip_ratio = 1 - self.flip_ratio 137 | # exclude non-flip 138 | single_ratio = self.flip_ratio / (len(direction_list) - 1) 139 | flip_ratio_list = [single_ratio] * (len(direction_list) - 140 | 1) + [non_flip_ratio] 141 | 142 | cur_dir = np.random.choice(direction_list, p=flip_ratio_list) 143 | 144 | results['flip'] = cur_dir is not None 145 | if 'flip_direction' not in results: 146 | results['flip_direction'] = cur_dir 147 | if results['flip']: 148 | # flip image 149 | for key in results.get('img_fields', ['img']): 150 | results[key] = mmcv.imflip( 151 | results[key], direction=results['flip_direction']) 152 | # flip bboxes 153 | for key in results.get('bbox_fields', []): 154 | results[key] = self.bbox_flip(results[key], 155 | results['img_shape'], 156 | results['flip_direction']) 157 | # flip masks 158 | for key in results.get('mask_fields', []): 159 | results[key] = results[key].flip(results['flip_direction']) 160 | 161 | # flip segs 162 | for key in results.get('seg_fields', []): 163 | results[key] = mmcv.imflip( 164 | results[key], direction=results['flip_direction']) 165 | results['bbox_transform'].append(self.BboxFlip(results['img_shape'], 166 | results['flip_direction'])) 167 | return results 168 | 169 | 170 | @PIPELINES.register_module() 171 | class RandomErasingBox(RandomErasing): 172 | class BboxRandomErasing(object): 173 | def __init__(self, x, y, w, h): 174 | self.xywh = [x, y, w, h] 175 | 176 | def __call__(self, bboxes, labels, masks=None): 177 | if masks is None: 178 | return bboxes, labels 179 | x, y, w, h = self.xywh 180 | for i in range(len(masks.masks)): 181 | masks.masks[i][y:y + h, x:x + w] = 0 182 | return bboxes, labels, masks 183 | 184 | def __call__(self, results): 185 | if random.uniform(0, 1) >= self.p: 186 | return results 187 | img = results['img'] 188 | y, x, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=self.value) 189 | img[y:y + h, x:x + w] = v 190 | results['img'] = img 191 | results['bbox_transform'].append(self.BboxRandomErasing(x, y, w, h)) 192 | return results 193 | --------------------------------------------------------------------------------