├── ObjectDetection_TransferLearning_with_mmdetection ├── configs │ ├── faster_rcnn_r50_fpn_1x.py │ ├── faster_rcnn_r50_fpn_1x_our_version.py │ └── faster_rcnn_x101_64x4d_fpn_1x.py ├── object_detection_transfer_learning_mmdetection.ipynb └── utils │ ├── make_annot.py │ └── viz.py └── README.md /ObjectDetection_TransferLearning_with_mmdetection/configs/faster_rcnn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | loss_cls=dict( 27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 28 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 29 | bbox_roi_extractor=dict( 30 | type='SingleRoIExtractor', 31 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 32 | out_channels=256, 33 | featmap_strides=[4, 8, 16, 32]), 34 | bbox_head=dict( 35 | type='SharedFCBBoxHead', 36 | num_fcs=2, 37 | in_channels=256, 38 | fc_out_channels=1024, 39 | roi_feat_size=7, 40 | num_classes=81, 41 | target_means=[0., 0., 0., 0.], 42 | target_stds=[0.1, 0.1, 0.2, 0.2], 43 | reg_class_agnostic=False, 44 | loss_cls=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 46 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) 47 | # model training and testing settings 48 | train_cfg = dict( 49 | rpn=dict( 50 | assigner=dict( 51 | type='MaxIoUAssigner', 52 | pos_iou_thr=0.7, 53 | neg_iou_thr=0.3, 54 | min_pos_iou=0.3, 55 | ignore_iof_thr=-1), 56 | sampler=dict( 57 | type='RandomSampler', 58 | num=256, 59 | pos_fraction=0.5, 60 | neg_pos_ub=-1, 61 | add_gt_as_proposals=False), 62 | allowed_border=0, 63 | pos_weight=-1, 64 | debug=False), 65 | rpn_proposal=dict( 66 | nms_across_levels=False, 67 | nms_pre=2000, 68 | nms_post=2000, 69 | max_num=2000, 70 | nms_thr=0.7, 71 | min_bbox_size=0), 72 | rcnn=dict( 73 | assigner=dict( 74 | type='MaxIoUAssigner', 75 | pos_iou_thr=0.5, 76 | neg_iou_thr=0.5, 77 | min_pos_iou=0.5, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=512, 82 | pos_fraction=0.25, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=True), 85 | pos_weight=-1, 86 | debug=False)) 87 | test_cfg = dict( 88 | rpn=dict( 89 | nms_across_levels=False, 90 | nms_pre=1000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) 97 | # soft-nms is also supported for rcnn testing 98 | # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) 99 | ) 100 | # dataset settings 101 | dataset_type = 'CocoDataset' 102 | data_root = 'data/coco/' 103 | img_norm_cfg = dict( 104 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 105 | data = dict( 106 | imgs_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | ann_file=data_root + 'annotations/instances_train2017.json', 111 | img_prefix=data_root + 'train2017/', 112 | img_scale=(1333, 800), 113 | img_norm_cfg=img_norm_cfg, 114 | size_divisor=32, 115 | flip_ratio=0.5, 116 | with_mask=False, 117 | with_crowd=True, 118 | with_label=True), 119 | val=dict( 120 | type=dataset_type, 121 | ann_file=data_root + 'annotations/instances_val2017.json', 122 | img_prefix=data_root + 'val2017/', 123 | img_scale=(1333, 800), 124 | img_norm_cfg=img_norm_cfg, 125 | size_divisor=32, 126 | flip_ratio=0, 127 | with_mask=False, 128 | with_crowd=True, 129 | with_label=True), 130 | test=dict( 131 | type=dataset_type, 132 | ann_file=data_root + 'annotations/instances_val2017.json', 133 | img_prefix=data_root + 'val2017/', 134 | img_scale=(1333, 800), 135 | img_norm_cfg=img_norm_cfg, 136 | size_divisor=32, 137 | flip_ratio=0, 138 | with_mask=False, 139 | with_label=False, 140 | test_mode=True)) 141 | # optimizer 142 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 143 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 144 | # learning policy 145 | lr_config = dict( 146 | policy='step', 147 | warmup='linear', 148 | warmup_iters=500, 149 | warmup_ratio=1.0 / 3, 150 | step=[8, 11]) 151 | checkpoint_config = dict(interval=1) 152 | # yapf:disable 153 | log_config = dict( 154 | interval=50, 155 | hooks=[ 156 | dict(type='TextLoggerHook'), 157 | # dict(type='TensorboardLoggerHook') 158 | ]) 159 | # yapf:enable 160 | # runtime settings 161 | total_epochs = 12 162 | dist_params = dict(backend='nccl') 163 | log_level = 'INFO' 164 | work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' 165 | load_from = None 166 | resume_from = None 167 | workflow = [('train', 1)] 168 | -------------------------------------------------------------------------------- /ObjectDetection_TransferLearning_with_mmdetection/configs/faster_rcnn_r50_fpn_1x_our_version.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | loss_cls=dict( 27 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 28 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 29 | bbox_roi_extractor=dict( 30 | type='SingleRoIExtractor', 31 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 32 | out_channels=256, 33 | featmap_strides=[4, 8, 16, 32]), 34 | bbox_head=dict( 35 | type='SharedFCBBoxHead', 36 | num_fcs=2, 37 | in_channels=256, 38 | fc_out_channels=1024, 39 | roi_feat_size=7, 40 | num_classes=5, 41 | target_means=[0., 0., 0., 0.], 42 | target_stds=[0.1, 0.1, 0.2, 0.2], 43 | reg_class_agnostic=False, 44 | loss_cls=dict( 45 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 46 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) 47 | # model training and testing settings 48 | train_cfg = dict( 49 | rpn=dict( 50 | assigner=dict( 51 | type='MaxIoUAssigner', 52 | pos_iou_thr=0.7, 53 | neg_iou_thr=0.3, 54 | min_pos_iou=0.3, 55 | ignore_iof_thr=-1), 56 | sampler=dict( 57 | type='RandomSampler', 58 | num=256, 59 | pos_fraction=0.5, 60 | neg_pos_ub=-1, 61 | add_gt_as_proposals=False), 62 | allowed_border=0, 63 | pos_weight=-1, 64 | debug=False), 65 | rpn_proposal=dict( 66 | nms_across_levels=False, 67 | nms_pre=2000, 68 | nms_post=2000, 69 | max_num=2000, 70 | nms_thr=0.7, 71 | min_bbox_size=0), 72 | rcnn=dict( 73 | assigner=dict( 74 | type='MaxIoUAssigner', 75 | pos_iou_thr=0.5, 76 | neg_iou_thr=0.5, 77 | min_pos_iou=0.5, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=512, 82 | pos_fraction=0.25, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=True), 85 | pos_weight=-1, 86 | debug=False)) 87 | test_cfg = dict( 88 | rpn=dict( 89 | nms_across_levels=False, 90 | nms_pre=1000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) 97 | # soft-nms is also supported for rcnn testing 98 | # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) 99 | ) 100 | # dataset settings 101 | dataset_type = 'CustomDataset' 102 | data_root = '' 103 | img_norm_cfg = dict( 104 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 105 | data = dict( 106 | imgs_per_gpu=6, 107 | workers_per_gpu=6, 108 | train=dict( 109 | type=dataset_type, 110 | ann_file='annotations_train.pkl', 111 | img_prefix='', 112 | img_scale=(1000, 650), 113 | img_norm_cfg=img_norm_cfg, 114 | size_divisor=32, 115 | flip_ratio=0.5, 116 | with_mask=False, 117 | with_crowd=True, 118 | with_label=True), 119 | val=dict( 120 | type=dataset_type, 121 | ann_file='annotations_val.pkl', 122 | img_prefix='', 123 | img_scale=(1000, 650), 124 | img_norm_cfg=img_norm_cfg, 125 | size_divisor=32, 126 | flip_ratio=0, 127 | with_mask=False, 128 | with_crowd=True, 129 | with_label=True), 130 | test=dict( 131 | type=dataset_type, 132 | ann_file=None, 133 | img_prefix='', 134 | img_scale=(1000, 650), 135 | img_norm_cfg=img_norm_cfg, 136 | size_divisor=32, 137 | flip_ratio=0, 138 | with_mask=False, 139 | with_label=False, 140 | test_mode=True)) 141 | # optimizer 142 | optimizer = dict(type='SGD', lr=0.0025, momentum=0.9, weight_decay=0.0001) 143 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 144 | # learning policy 145 | lr_config = dict( 146 | policy='step', 147 | warmup='linear', 148 | warmup_iters=500, 149 | warmup_ratio=1.0 / 3, 150 | step=[6, 9]) 151 | checkpoint_config = dict(interval=1) 152 | # yapf:disable 153 | log_config = dict( 154 | interval=60, 155 | hooks=[ 156 | dict(type='TextLoggerHook'), 157 | # dict(type='TensorboardLoggerHook') 158 | ]) 159 | # yapf:enable 160 | # runtime settings 161 | total_epochs = 12 162 | dist_params = dict(backend='nccl') 163 | log_level = 'INFO' 164 | work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' 165 | load_from = None 166 | resume_from = None 167 | workflow = [('train', 1)] 168 | -------------------------------------------------------------------------------- /ObjectDetection_TransferLearning_with_mmdetection/configs/faster_rcnn_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | loss_cls=dict( 29 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 30 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 31 | bbox_roi_extractor=dict( 32 | type='SingleRoIExtractor', 33 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 34 | out_channels=256, 35 | featmap_strides=[4, 8, 16, 32]), 36 | bbox_head=dict( 37 | type='SharedFCBBoxHead', 38 | num_fcs=2, 39 | in_channels=256, 40 | fc_out_channels=1024, 41 | roi_feat_size=7, 42 | num_classes=81, 43 | target_means=[0., 0., 0., 0.], 44 | target_stds=[0.1, 0.1, 0.2, 0.2], 45 | reg_class_agnostic=False, 46 | loss_cls=dict( 47 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 48 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))) 49 | # model training and testing settings 50 | train_cfg = dict( 51 | rpn=dict( 52 | assigner=dict( 53 | type='MaxIoUAssigner', 54 | pos_iou_thr=0.7, 55 | neg_iou_thr=0.3, 56 | min_pos_iou=0.3, 57 | ignore_iof_thr=-1), 58 | sampler=dict( 59 | type='RandomSampler', 60 | num=256, 61 | pos_fraction=0.5, 62 | neg_pos_ub=-1, 63 | add_gt_as_proposals=False), 64 | allowed_border=0, 65 | pos_weight=-1, 66 | debug=False), 67 | rpn_proposal=dict( 68 | nms_across_levels=False, 69 | nms_pre=2000, 70 | nms_post=2000, 71 | max_num=2000, 72 | nms_thr=0.7, 73 | min_bbox_size=0), 74 | rcnn=dict( 75 | assigner=dict( 76 | type='MaxIoUAssigner', 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.5, 79 | min_pos_iou=0.5, 80 | ignore_iof_thr=-1), 81 | sampler=dict( 82 | type='RandomSampler', 83 | num=512, 84 | pos_fraction=0.25, 85 | neg_pos_ub=-1, 86 | add_gt_as_proposals=True), 87 | pos_weight=-1, 88 | debug=False)) 89 | test_cfg = dict( 90 | rpn=dict( 91 | nms_across_levels=False, 92 | nms_pre=1000, 93 | nms_post=1000, 94 | max_num=1000, 95 | nms_thr=0.7, 96 | min_bbox_size=0), 97 | rcnn=dict( 98 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100) 99 | # soft-nms is also supported for rcnn testing 100 | # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) 101 | ) 102 | # dataset settings 103 | dataset_type = 'CocoDataset' 104 | data_root = 'data/coco/' 105 | img_norm_cfg = dict( 106 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 107 | data = dict( 108 | imgs_per_gpu=2, 109 | workers_per_gpu=2, 110 | train=dict( 111 | type=dataset_type, 112 | ann_file=data_root + 'annotations/instances_train2017.json', 113 | img_prefix=data_root + 'train2017/', 114 | img_scale=(1333, 800), 115 | img_norm_cfg=img_norm_cfg, 116 | size_divisor=32, 117 | flip_ratio=0.5, 118 | with_mask=False, 119 | with_crowd=True, 120 | with_label=True), 121 | val=dict( 122 | type=dataset_type, 123 | ann_file=data_root + 'annotations/instances_val2017.json', 124 | img_prefix=data_root + 'val2017/', 125 | img_scale=(1333, 800), 126 | img_norm_cfg=img_norm_cfg, 127 | size_divisor=32, 128 | flip_ratio=0, 129 | with_mask=False, 130 | with_crowd=True, 131 | with_label=True), 132 | test=dict( 133 | type=dataset_type, 134 | ann_file=data_root + 'annotations/instances_val2017.json', 135 | img_prefix=data_root + 'val2017/', 136 | img_scale=(1333, 800), 137 | img_norm_cfg=img_norm_cfg, 138 | size_divisor=32, 139 | flip_ratio=0, 140 | with_mask=False, 141 | with_label=False, 142 | test_mode=True)) 143 | # optimizer 144 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 145 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 146 | # learning policy 147 | lr_config = dict( 148 | policy='step', 149 | warmup='linear', 150 | warmup_iters=500, 151 | warmup_ratio=1.0 / 3, 152 | step=[8, 11]) 153 | checkpoint_config = dict(interval=1) 154 | # yapf:disable 155 | log_config = dict( 156 | interval=50, 157 | hooks=[ 158 | dict(type='TextLoggerHook'), 159 | # dict(type='TensorboardLoggerHook') 160 | ]) 161 | # yapf:enable 162 | # runtime settings 163 | total_epochs = 12 164 | dist_params = dict(backend='nccl') 165 | log_level = 'INFO' 166 | work_dir = './work_dirs/faster_rcnn_x101_64x4d_fpn_1x' 167 | load_from = None 168 | resume_from = None 169 | workflow = [('train', 1)] 170 | -------------------------------------------------------------------------------- /ObjectDetection_TransferLearning_with_mmdetection/utils/make_annot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import json 4 | import glob 5 | import numpy as np 6 | import pickle 7 | import cv2 8 | import mmcv 9 | import random 10 | from scipy import io 11 | 12 | def annot(images_path, label_dict, train_data=True): 13 | """Make annotations for mmdetection library from Open Image Dataset Source. 14 | Args: 15 | images_path (str): path to images. 16 | label_dict (dict (str: int)): dictionary with matching class names to class labels. 17 | train_data (boolean): if True - train data, else: validation data. 18 | """ 19 | label_path = images_path+'Label/*.txt' 20 | save_val_path = './annotations_val.pkl' 21 | save_train_path = './annotations_train.pkl' 22 | annotations_train = [] 23 | annotations_val = [] 24 | 25 | # Read labels from .txt files 26 | for name in glob.glob(label_path): 27 | with open(name) as f: 28 | # Create annot dict 29 | annot_instance = {} 30 | # Filename is the path to img 31 | annot_instance['filename'] = images_path+name[:-3].split('/')[-1]+'jpg' 32 | image = cv2.imread(annot_instance['filename']) 33 | h, w = image.shape[:2] 34 | # Height and width of the img 35 | annot_instance['height'] = h 36 | annot_instance['width'] = w 37 | # ann dict inside first dict with boxes and labels for use and boxes and labels for ignore 38 | annot_instance['ann'] = {} 39 | f = [x.rstrip('\n').split() for x in f.readlines()] 40 | kk = [[float(y) for y in x[1:]] for x in f] 41 | ll = [label_dict[x[0]] for x in f] 42 | if len(ll)>0: 43 | annot_instance['ann']['bboxes'] = (np.array(kk)).astype(np.float32) 44 | annot_instance['ann']['labels'] = (np.array(ll)).astype(np.int64) 45 | # If there are no boxes to ignore, this is need to be a zero coordinate vector and empty label vector 46 | annot_instance['ann']['bboxes_ignore'] = np.zeros((0, 4), dtype=np.float32) 47 | annot_instance['ann']['labels_ignore'] = (np.array([])).astype(np.int64) 48 | if train_data: 49 | annotations_train.append(annot_instance) 50 | else: 51 | annotations_val.append(annot_instance) 52 | if train_data: 53 | mmcv.dump(annotations_train, save_train_path) 54 | print('Annot train ready: {}, len {}'.format(save_train_path, len(annotations_train))) 55 | else: 56 | mmcv.dump(annotations_val, save_val_path) 57 | print('Annot val ready: {}, len {}'.format(save_val_path, len(annotations_val))) 58 | 59 | def look_at_imgs_shapes(images_path): 60 | """Collect sizes of images. 61 | """ 62 | label_path = images_path+'Label/*.txt' 63 | hh = [] 64 | ww = [] 65 | for name in glob.glob(label_path): 66 | with open(name) as f: 67 | annot_instance = {} 68 | annot_instance['filename'] = images_path+name[:-3].split('/')[-1]+'jpg' 69 | image = cv2.imread(annot_instance['filename']) 70 | h, w = image.shape[:2] 71 | annot_instance['height'] = h 72 | annot_instance['width'] = w 73 | hh.append(h) 74 | ww.append(w) 75 | return hh, ww -------------------------------------------------------------------------------- /ObjectDetection_TransferLearning_with_mmdetection/utils/viz.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | import pycocotools.mask as maskUtils 6 | 7 | def imshow_det_bboxes(img, 8 | bboxes, 9 | labels, 10 | class_names=None, 11 | score_thr=0, 12 | bbox_color='green', 13 | text_color='green', 14 | thickness=2, 15 | font_scale=2, 16 | show=True, 17 | win_name='', 18 | wait_time=0, 19 | out_file=None): 20 | """Draw bboxes and class labels (with scores) on an image. 21 | Args: 22 | img (str or ndarray): The image to be displayed. 23 | bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or 24 | (n, 5). 25 | labels (ndarray): Labels of bboxes. 26 | class_names (list[str]): Names of each classes. 27 | score_thr (float): Minimum score of bboxes to be shown. 28 | bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. 29 | text_color (str or tuple or :obj:`Color`): Color of texts. 30 | thickness (int): Thickness of lines. 31 | font_scale (float): Font scales of texts. 32 | show (bool): Whether to show the image. 33 | win_name (str): The window name. 34 | wait_time (int): Value of waitKey param. 35 | out_file (str or None): The filename to write the image. 36 | """ 37 | assert bboxes.ndim == 2 38 | assert labels.ndim == 1 39 | assert bboxes.shape[0] == labels.shape[0] 40 | assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5 41 | img = mmcv.imread(img) 42 | 43 | if score_thr > 0: 44 | assert bboxes.shape[1] == 5 45 | scores = bboxes[:, -1] 46 | inds = scores > score_thr 47 | bboxes = bboxes[inds, :] 48 | labels = labels[inds] 49 | 50 | bbox_color = mmcv.color_val(bbox_color) 51 | text_color = mmcv.color_val(text_color) 52 | 53 | for bbox, label in zip(bboxes, labels): 54 | bbox_int = bbox.astype(np.int32) 55 | left_top = (bbox_int[0], bbox_int[1]) 56 | right_bottom = (bbox_int[2], bbox_int[3]) 57 | cv2.rectangle( 58 | img, left_top, right_bottom, bbox_color, thickness=thickness) 59 | label_text = class_names[ 60 | label] if class_names is not None else 'cls {}'.format(label) 61 | if len(bbox) > 4: 62 | label_text += '|{:.02f}'.format(bbox[-1]) 63 | cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2), 64 | cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color, thickness) 65 | 66 | if show: 67 | plt.figure(figsize=(20,20)) 68 | plt.axis('off') 69 | plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) 70 | plt.show() 71 | if out_file is not None: 72 | imwrite(img, out_file) 73 | 74 | def show_result(img, 75 | result, 76 | class_names, 77 | score_thr=0.7, 78 | wait_time=0, 79 | out_file=None, 80 | font_scale=2, 81 | thickness=8, 82 | show_mask=True): 83 | """Visualize the detection results on the image. 84 | Args: 85 | img (str or np.ndarray): Image filename or loaded image. 86 | result (tuple[list] or list): The detection result, can be either 87 | (bbox, segm) or just bbox. 88 | class_names (list[str] or tuple[str]): A list of class names. 89 | score_thr (float): The threshold to visualize the bboxes and masks. 90 | wait_time (int): Value of waitKey param. 91 | out_file (str, optional): If specified, the visualization result will 92 | be written to the out file instead of shown in a window. 93 | """ 94 | assert isinstance(class_names, (tuple, list)) 95 | img = mmcv.imread(img) 96 | if isinstance(result, tuple): 97 | bbox_result, segm_result = result 98 | else: 99 | bbox_result, segm_result = result, None 100 | bboxes = np.vstack(bbox_result) 101 | # draw segmentation masks 102 | if show_mask and segm_result is not None: 103 | print(len(segm_result[0])) 104 | segms = mmcv.concat_list(segm_result) 105 | inds = np.where(bboxes[:, -1] > score_thr)[0] 106 | for i in inds: 107 | color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8) 108 | mask = maskUtils.decode(segms[i]).astype(np.bool) 109 | img[mask] = img[mask] * 0.5 + color_mask * 0.5 110 | # draw bounding boxes 111 | labels = [ 112 | np.full(bbox.shape[0], i, dtype=np.int32) 113 | for i, bbox in enumerate(bbox_result) 114 | ] 115 | labels = np.concatenate(labels) 116 | imshow_det_bboxes( 117 | img.copy(), 118 | bboxes, 119 | labels, 120 | class_names=class_names, 121 | score_thr=score_thr, 122 | show=out_file is None, 123 | wait_time=wait_time, 124 | out_file=out_file, 125 | font_scale=font_scale, 126 | thickness=thickness) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComputerVision_Tutorials_in_Russian 2 | 3 |

4 | 5 |

6 | 7 | 1. В папке [ObjectDetection_TransferLearning_with_mmdetection](ObjectDetection_TransferLearning_with_mmdetection/ 8 | 'ObjectDetection_TransferLearning_with_mmdetection') 9 | находится туториал по применению 10 | transfer learning для обучения моделей на собственных классах данных с использованием предобученных хребтов 11 | на примере библиотеки [mmdetection](https://github.com/open-mmlab/mmdetection "mmdetection"). 12 | 13 | В этом туториале сначала показывается, как можно детектить объекты из классов MS-COCO, например, кота, человека, машину иликухонную утварь. 14 | А потом используются открытые размеченные данные четырех классов, чтобы обучить модель различать выдр, белок, панд и енотов. 15 | 16 |

17 | Pretrained on MS-COCO 19 | Trained on own dataset 21 |

22 | --------------------------------------------------------------------------------