├── ObjectDetection_TransferLearning_with_mmdetection
    ├── configs
    │   ├── faster_rcnn_r50_fpn_1x.py
    │   ├── faster_rcnn_r50_fpn_1x_our_version.py
    │   └── faster_rcnn_x101_64x4d_fpn_1x.py
    ├── object_detection_transfer_learning_mmdetection.ipynb
    └── utils
    │   ├── make_annot.py
    │   └── viz.py
└── README.md


/ObjectDetection_TransferLearning_with_mmdetection/configs/faster_rcnn_r50_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_scales=[8],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[4, 8, 16, 32, 64],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         loss_cls=dict(
 27 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 28 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 29 |     bbox_roi_extractor=dict(
 30 |         type='SingleRoIExtractor',
 31 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 32 |         out_channels=256,
 33 |         featmap_strides=[4, 8, 16, 32]),
 34 |     bbox_head=dict(
 35 |         type='SharedFCBBoxHead',
 36 |         num_fcs=2,
 37 |         in_channels=256,
 38 |         fc_out_channels=1024,
 39 |         roi_feat_size=7,
 40 |         num_classes=81,
 41 |         target_means=[0., 0., 0., 0.],
 42 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 43 |         reg_class_agnostic=False,
 44 |         loss_cls=dict(
 45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 46 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)))
 47 | # model training and testing settings
 48 | train_cfg = dict(
 49 |     rpn=dict(
 50 |         assigner=dict(
 51 |             type='MaxIoUAssigner',
 52 |             pos_iou_thr=0.7,
 53 |             neg_iou_thr=0.3,
 54 |             min_pos_iou=0.3,
 55 |             ignore_iof_thr=-1),
 56 |         sampler=dict(
 57 |             type='RandomSampler',
 58 |             num=256,
 59 |             pos_fraction=0.5,
 60 |             neg_pos_ub=-1,
 61 |             add_gt_as_proposals=False),
 62 |         allowed_border=0,
 63 |         pos_weight=-1,
 64 |         debug=False),
 65 |     rpn_proposal=dict(
 66 |         nms_across_levels=False,
 67 |         nms_pre=2000,
 68 |         nms_post=2000,
 69 |         max_num=2000,
 70 |         nms_thr=0.7,
 71 |         min_bbox_size=0),
 72 |     rcnn=dict(
 73 |         assigner=dict(
 74 |             type='MaxIoUAssigner',
 75 |             pos_iou_thr=0.5,
 76 |             neg_iou_thr=0.5,
 77 |             min_pos_iou=0.5,
 78 |             ignore_iof_thr=-1),
 79 |         sampler=dict(
 80 |             type='RandomSampler',
 81 |             num=512,
 82 |             pos_fraction=0.25,
 83 |             neg_pos_ub=-1,
 84 |             add_gt_as_proposals=True),
 85 |         pos_weight=-1,
 86 |         debug=False))
 87 | test_cfg = dict(
 88 |     rpn=dict(
 89 |         nms_across_levels=False,
 90 |         nms_pre=1000,
 91 |         nms_post=1000,
 92 |         max_num=1000,
 93 |         nms_thr=0.7,
 94 |         min_bbox_size=0),
 95 |     rcnn=dict(
 96 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
 97 |     # soft-nms is also supported for rcnn testing
 98 |     # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
 99 | )
100 | # dataset settings
101 | dataset_type = 'CocoDataset'
102 | data_root = 'data/coco/'
103 | img_norm_cfg = dict(
104 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
105 | data = dict(
106 |     imgs_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         ann_file=data_root + 'annotations/instances_train2017.json',
111 |         img_prefix=data_root + 'train2017/',
112 |         img_scale=(1333, 800),
113 |         img_norm_cfg=img_norm_cfg,
114 |         size_divisor=32,
115 |         flip_ratio=0.5,
116 |         with_mask=False,
117 |         with_crowd=True,
118 |         with_label=True),
119 |     val=dict(
120 |         type=dataset_type,
121 |         ann_file=data_root + 'annotations/instances_val2017.json',
122 |         img_prefix=data_root + 'val2017/',
123 |         img_scale=(1333, 800),
124 |         img_norm_cfg=img_norm_cfg,
125 |         size_divisor=32,
126 |         flip_ratio=0,
127 |         with_mask=False,
128 |         with_crowd=True,
129 |         with_label=True),
130 |     test=dict(
131 |         type=dataset_type,
132 |         ann_file=data_root + 'annotations/instances_val2017.json',
133 |         img_prefix=data_root + 'val2017/',
134 |         img_scale=(1333, 800),
135 |         img_norm_cfg=img_norm_cfg,
136 |         size_divisor=32,
137 |         flip_ratio=0,
138 |         with_mask=False,
139 |         with_label=False,
140 |         test_mode=True))
141 | # optimizer
142 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
143 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
144 | # learning policy
145 | lr_config = dict(
146 |     policy='step',
147 |     warmup='linear',
148 |     warmup_iters=500,
149 |     warmup_ratio=1.0 / 3,
150 |     step=[8, 11])
151 | checkpoint_config = dict(interval=1)
152 | # yapf:disable
153 | log_config = dict(
154 |     interval=50,
155 |     hooks=[
156 |         dict(type='TextLoggerHook'),
157 |         # dict(type='TensorboardLoggerHook')
158 |     ])
159 | # yapf:enable
160 | # runtime settings
161 | total_epochs = 12
162 | dist_params = dict(backend='nccl')
163 | log_level = 'INFO'
164 | work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
165 | load_from = None
166 | resume_from = None
167 | workflow = [('train', 1)]
168 | 


--------------------------------------------------------------------------------
/ObjectDetection_TransferLearning_with_mmdetection/configs/faster_rcnn_r50_fpn_1x_our_version.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         style='pytorch'),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_scales=[8],
 22 |         anchor_ratios=[0.5, 1.0, 2.0],
 23 |         anchor_strides=[4, 8, 16, 32, 64],
 24 |         target_means=[.0, .0, .0, .0],
 25 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 26 |         loss_cls=dict(
 27 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 28 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 29 |     bbox_roi_extractor=dict(
 30 |         type='SingleRoIExtractor',
 31 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 32 |         out_channels=256,
 33 |         featmap_strides=[4, 8, 16, 32]),
 34 |     bbox_head=dict(
 35 |         type='SharedFCBBoxHead',
 36 |         num_fcs=2,
 37 |         in_channels=256,
 38 |         fc_out_channels=1024,
 39 |         roi_feat_size=7,
 40 |         num_classes=5,
 41 |         target_means=[0., 0., 0., 0.],
 42 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 43 |         reg_class_agnostic=False,
 44 |         loss_cls=dict(
 45 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 46 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)))
 47 | # model training and testing settings
 48 | train_cfg = dict(
 49 |     rpn=dict(
 50 |         assigner=dict(
 51 |             type='MaxIoUAssigner',
 52 |             pos_iou_thr=0.7,
 53 |             neg_iou_thr=0.3,
 54 |             min_pos_iou=0.3,
 55 |             ignore_iof_thr=-1),
 56 |         sampler=dict(
 57 |             type='RandomSampler',
 58 |             num=256,
 59 |             pos_fraction=0.5,
 60 |             neg_pos_ub=-1,
 61 |             add_gt_as_proposals=False),
 62 |         allowed_border=0,
 63 |         pos_weight=-1,
 64 |         debug=False),
 65 |     rpn_proposal=dict(
 66 |         nms_across_levels=False,
 67 |         nms_pre=2000,
 68 |         nms_post=2000,
 69 |         max_num=2000,
 70 |         nms_thr=0.7,
 71 |         min_bbox_size=0),
 72 |     rcnn=dict(
 73 |         assigner=dict(
 74 |             type='MaxIoUAssigner',
 75 |             pos_iou_thr=0.5,
 76 |             neg_iou_thr=0.5,
 77 |             min_pos_iou=0.5,
 78 |             ignore_iof_thr=-1),
 79 |         sampler=dict(
 80 |             type='RandomSampler',
 81 |             num=512,
 82 |             pos_fraction=0.25,
 83 |             neg_pos_ub=-1,
 84 |             add_gt_as_proposals=True),
 85 |         pos_weight=-1,
 86 |         debug=False))
 87 | test_cfg = dict(
 88 |     rpn=dict(
 89 |         nms_across_levels=False,
 90 |         nms_pre=1000,
 91 |         nms_post=1000,
 92 |         max_num=1000,
 93 |         nms_thr=0.7,
 94 |         min_bbox_size=0),
 95 |     rcnn=dict(
 96 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
 97 |     # soft-nms is also supported for rcnn testing
 98 |     # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
 99 | )
100 | # dataset settings
101 | dataset_type = 'CustomDataset'
102 | data_root = ''
103 | img_norm_cfg = dict(
104 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
105 | data = dict(
106 |     imgs_per_gpu=6,
107 |     workers_per_gpu=6,
108 |     train=dict(
109 |         type=dataset_type,
110 |         ann_file='annotations_train.pkl',
111 |         img_prefix='',
112 |         img_scale=(1000, 650),
113 |         img_norm_cfg=img_norm_cfg,
114 |         size_divisor=32,
115 |         flip_ratio=0.5,
116 |         with_mask=False,
117 |         with_crowd=True,
118 |         with_label=True),
119 |     val=dict(
120 |         type=dataset_type,
121 |         ann_file='annotations_val.pkl',
122 |         img_prefix='',
123 |         img_scale=(1000, 650),
124 |         img_norm_cfg=img_norm_cfg,
125 |         size_divisor=32,
126 |         flip_ratio=0,
127 |         with_mask=False,
128 |         with_crowd=True,
129 |         with_label=True),
130 |     test=dict(
131 |         type=dataset_type,
132 |         ann_file=None,
133 |         img_prefix='',
134 |         img_scale=(1000, 650),
135 |         img_norm_cfg=img_norm_cfg,
136 |         size_divisor=32,
137 |         flip_ratio=0,
138 |         with_mask=False,
139 |         with_label=False,
140 |         test_mode=True))
141 | # optimizer
142 | optimizer = dict(type='SGD', lr=0.0025, momentum=0.9, weight_decay=0.0001)
143 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
144 | # learning policy
145 | lr_config = dict(
146 |     policy='step',
147 |     warmup='linear',
148 |     warmup_iters=500,
149 |     warmup_ratio=1.0 / 3,
150 |     step=[6, 9])
151 | checkpoint_config = dict(interval=1)
152 | # yapf:disable
153 | log_config = dict(
154 |     interval=60,
155 |     hooks=[
156 |         dict(type='TextLoggerHook'),
157 |         # dict(type='TensorboardLoggerHook')
158 |     ])
159 | # yapf:enable
160 | # runtime settings
161 | total_epochs = 12
162 | dist_params = dict(backend='nccl')
163 | log_level = 'INFO'
164 | work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
165 | load_from = None
166 | resume_from = None
167 | workflow = [('train', 1)]
168 | 


--------------------------------------------------------------------------------
/ObjectDetection_TransferLearning_with_mmdetection/configs/faster_rcnn_x101_64x4d_fpn_1x.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     pretrained='open-mmlab://resnext101_64x4d',
  5 |     backbone=dict(
  6 |         type='ResNeXt',
  7 |         depth=101,
  8 |         groups=64,
  9 |         base_width=4,
 10 |         num_stages=4,
 11 |         out_indices=(0, 1, 2, 3),
 12 |         frozen_stages=1,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_scales=[8],
 24 |         anchor_ratios=[0.5, 1.0, 2.0],
 25 |         anchor_strides=[4, 8, 16, 32, 64],
 26 |         target_means=[.0, .0, .0, .0],
 27 |         target_stds=[1.0, 1.0, 1.0, 1.0],
 28 |         loss_cls=dict(
 29 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 30 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 31 |     bbox_roi_extractor=dict(
 32 |         type='SingleRoIExtractor',
 33 |         roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
 34 |         out_channels=256,
 35 |         featmap_strides=[4, 8, 16, 32]),
 36 |     bbox_head=dict(
 37 |         type='SharedFCBBoxHead',
 38 |         num_fcs=2,
 39 |         in_channels=256,
 40 |         fc_out_channels=1024,
 41 |         roi_feat_size=7,
 42 |         num_classes=81,
 43 |         target_means=[0., 0., 0., 0.],
 44 |         target_stds=[0.1, 0.1, 0.2, 0.2],
 45 |         reg_class_agnostic=False,
 46 |         loss_cls=dict(
 47 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 48 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)))
 49 | # model training and testing settings
 50 | train_cfg = dict(
 51 |     rpn=dict(
 52 |         assigner=dict(
 53 |             type='MaxIoUAssigner',
 54 |             pos_iou_thr=0.7,
 55 |             neg_iou_thr=0.3,
 56 |             min_pos_iou=0.3,
 57 |             ignore_iof_thr=-1),
 58 |         sampler=dict(
 59 |             type='RandomSampler',
 60 |             num=256,
 61 |             pos_fraction=0.5,
 62 |             neg_pos_ub=-1,
 63 |             add_gt_as_proposals=False),
 64 |         allowed_border=0,
 65 |         pos_weight=-1,
 66 |         debug=False),
 67 |     rpn_proposal=dict(
 68 |         nms_across_levels=False,
 69 |         nms_pre=2000,
 70 |         nms_post=2000,
 71 |         max_num=2000,
 72 |         nms_thr=0.7,
 73 |         min_bbox_size=0),
 74 |     rcnn=dict(
 75 |         assigner=dict(
 76 |             type='MaxIoUAssigner',
 77 |             pos_iou_thr=0.5,
 78 |             neg_iou_thr=0.5,
 79 |             min_pos_iou=0.5,
 80 |             ignore_iof_thr=-1),
 81 |         sampler=dict(
 82 |             type='RandomSampler',
 83 |             num=512,
 84 |             pos_fraction=0.25,
 85 |             neg_pos_ub=-1,
 86 |             add_gt_as_proposals=True),
 87 |         pos_weight=-1,
 88 |         debug=False))
 89 | test_cfg = dict(
 90 |     rpn=dict(
 91 |         nms_across_levels=False,
 92 |         nms_pre=1000,
 93 |         nms_post=1000,
 94 |         max_num=1000,
 95 |         nms_thr=0.7,
 96 |         min_bbox_size=0),
 97 |     rcnn=dict(
 98 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
 99 |     # soft-nms is also supported for rcnn testing
100 |     # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
101 | )
102 | # dataset settings
103 | dataset_type = 'CocoDataset'
104 | data_root = 'data/coco/'
105 | img_norm_cfg = dict(
106 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
107 | data = dict(
108 |     imgs_per_gpu=2,
109 |     workers_per_gpu=2,
110 |     train=dict(
111 |         type=dataset_type,
112 |         ann_file=data_root + 'annotations/instances_train2017.json',
113 |         img_prefix=data_root + 'train2017/',
114 |         img_scale=(1333, 800),
115 |         img_norm_cfg=img_norm_cfg,
116 |         size_divisor=32,
117 |         flip_ratio=0.5,
118 |         with_mask=False,
119 |         with_crowd=True,
120 |         with_label=True),
121 |     val=dict(
122 |         type=dataset_type,
123 |         ann_file=data_root + 'annotations/instances_val2017.json',
124 |         img_prefix=data_root + 'val2017/',
125 |         img_scale=(1333, 800),
126 |         img_norm_cfg=img_norm_cfg,
127 |         size_divisor=32,
128 |         flip_ratio=0,
129 |         with_mask=False,
130 |         with_crowd=True,
131 |         with_label=True),
132 |     test=dict(
133 |         type=dataset_type,
134 |         ann_file=data_root + 'annotations/instances_val2017.json',
135 |         img_prefix=data_root + 'val2017/',
136 |         img_scale=(1333, 800),
137 |         img_norm_cfg=img_norm_cfg,
138 |         size_divisor=32,
139 |         flip_ratio=0,
140 |         with_mask=False,
141 |         with_label=False,
142 |         test_mode=True))
143 | # optimizer
144 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
145 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
146 | # learning policy
147 | lr_config = dict(
148 |     policy='step',
149 |     warmup='linear',
150 |     warmup_iters=500,
151 |     warmup_ratio=1.0 / 3,
152 |     step=[8, 11])
153 | checkpoint_config = dict(interval=1)
154 | # yapf:disable
155 | log_config = dict(
156 |     interval=50,
157 |     hooks=[
158 |         dict(type='TextLoggerHook'),
159 |         # dict(type='TensorboardLoggerHook')
160 |     ])
161 | # yapf:enable
162 | # runtime settings
163 | total_epochs = 12
164 | dist_params = dict(backend='nccl')
165 | log_level = 'INFO'
166 | work_dir = './work_dirs/faster_rcnn_x101_64x4d_fpn_1x'
167 | load_from = None
168 | resume_from = None
169 | workflow = [('train', 1)]
170 | 


--------------------------------------------------------------------------------
/ObjectDetection_TransferLearning_with_mmdetection/utils/make_annot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import json
 4 | import glob
 5 | import numpy as np
 6 | import pickle
 7 | import cv2
 8 | import mmcv
 9 | import random
10 | from scipy import io
11 | 
12 | def annot(images_path, label_dict, train_data=True):
13 |     """Make annotations for mmdetection library from Open Image Dataset Source.
14 |     Args:
15 |         images_path (str): path to images.
16 |         label_dict (dict (str: int)): dictionary with matching class names to class labels.
17 |         train_data (boolean): if True - train data, else: validation data.
18 |     """
19 |     label_path = images_path+'Label/*.txt'
20 |     save_val_path = './annotations_val.pkl'
21 |     save_train_path = './annotations_train.pkl'
22 |     annotations_train = []
23 |     annotations_val = []
24 |     
25 |     # Read labels from .txt files
26 |     for name in glob.glob(label_path):
27 |         with open(name) as f:
28 |             # Create annot dict
29 |             annot_instance = {}
30 |             # Filename is the path to img
31 |             annot_instance['filename'] = images_path+name[:-3].split('/')[-1]+'jpg'
32 |             image = cv2.imread(annot_instance['filename'])
33 |             h, w = image.shape[:2]
34 |             # Height and width of the img
35 |             annot_instance['height'] = h
36 |             annot_instance['width'] = w
37 |             # ann dict inside first dict with boxes and labels for use and boxes and labels for ignore
38 |             annot_instance['ann'] = {}
39 |             f = [x.rstrip('\n').split() for x in f.readlines()]
40 |             kk = [[float(y) for y in x[1:]] for x in f]
41 |             ll = [label_dict[x[0]] for x in f]
42 |             if len(ll)>0:
43 |                 annot_instance['ann']['bboxes'] = (np.array(kk)).astype(np.float32)
44 |                 annot_instance['ann']['labels'] = (np.array(ll)).astype(np.int64)
45 |                 # If there are no boxes to ignore, this is need to be a zero coordinate vector and empty label vector
46 |                 annot_instance['ann']['bboxes_ignore'] = np.zeros((0, 4), dtype=np.float32)
47 |                 annot_instance['ann']['labels_ignore'] = (np.array([])).astype(np.int64)
48 |                 if train_data:
49 |                     annotations_train.append(annot_instance)
50 |                 else:
51 |                     annotations_val.append(annot_instance)
52 |     if train_data:
53 |         mmcv.dump(annotations_train, save_train_path)
54 |         print('Annot train ready: {}, len {}'.format(save_train_path, len(annotations_train)))
55 |     else:
56 |         mmcv.dump(annotations_val, save_val_path)
57 |         print('Annot val ready: {}, len {}'.format(save_val_path, len(annotations_val)))      
58 |         
59 | def look_at_imgs_shapes(images_path):
60 |     """Collect sizes of images.
61 |     """
62 |     label_path = images_path+'Label/*.txt'
63 |     hh = []
64 |     ww = []
65 |     for name in glob.glob(label_path):
66 |         with open(name) as f:
67 |             annot_instance = {}
68 |             annot_instance['filename'] = images_path+name[:-3].split('/')[-1]+'jpg'
69 |             image = cv2.imread(annot_instance['filename'])
70 |             h, w = image.shape[:2]
71 |             annot_instance['height'] = h
72 |             annot_instance['width'] = w
73 |             hh.append(h)
74 |             ww.append(w)
75 |     return hh, ww


--------------------------------------------------------------------------------
/ObjectDetection_TransferLearning_with_mmdetection/utils/viz.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | import cv2
  4 | import matplotlib.pyplot as plt
  5 | import pycocotools.mask as maskUtils
  6 | 
  7 | def imshow_det_bboxes(img,
  8 |                       bboxes,
  9 |                       labels,
 10 |                       class_names=None,
 11 |                       score_thr=0,
 12 |                       bbox_color='green',
 13 |                       text_color='green',
 14 |                       thickness=2,
 15 |                       font_scale=2,
 16 |                       show=True,
 17 |                       win_name='',
 18 |                       wait_time=0,
 19 |                       out_file=None):
 20 |     """Draw bboxes and class labels (with scores) on an image.
 21 |     Args:
 22 |         img (str or ndarray): The image to be displayed.
 23 |         bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
 24 |             (n, 5).
 25 |         labels (ndarray): Labels of bboxes.
 26 |         class_names (list[str]): Names of each classes.
 27 |         score_thr (float): Minimum score of bboxes to be shown.
 28 |         bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
 29 |         text_color (str or tuple or :obj:`Color`): Color of texts.
 30 |         thickness (int): Thickness of lines.
 31 |         font_scale (float): Font scales of texts.
 32 |         show (bool): Whether to show the image.
 33 |         win_name (str): The window name.
 34 |         wait_time (int): Value of waitKey param.
 35 |         out_file (str or None): The filename to write the image.
 36 |     """
 37 |     assert bboxes.ndim == 2
 38 |     assert labels.ndim == 1
 39 |     assert bboxes.shape[0] == labels.shape[0]
 40 |     assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5
 41 |     img = mmcv.imread(img)
 42 | 
 43 |     if score_thr > 0:
 44 |         assert bboxes.shape[1] == 5
 45 |         scores = bboxes[:, -1]
 46 |         inds = scores > score_thr
 47 |         bboxes = bboxes[inds, :]
 48 |         labels = labels[inds]
 49 | 
 50 |     bbox_color = mmcv.color_val(bbox_color)
 51 |     text_color = mmcv.color_val(text_color)
 52 | 
 53 |     for bbox, label in zip(bboxes, labels):
 54 |         bbox_int = bbox.astype(np.int32)
 55 |         left_top = (bbox_int[0], bbox_int[1])
 56 |         right_bottom = (bbox_int[2], bbox_int[3])
 57 |         cv2.rectangle(
 58 |             img, left_top, right_bottom, bbox_color, thickness=thickness)
 59 |         label_text = class_names[
 60 |             label] if class_names is not None else 'cls {}'.format(label)
 61 |         if len(bbox) > 4:
 62 |             label_text += '|{:.02f}'.format(bbox[-1])
 63 |         cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
 64 |                    cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color, thickness)
 65 | 
 66 |     if show:
 67 |         plt.figure(figsize=(20,20))
 68 |         plt.axis('off')
 69 |         plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 70 |         plt.show()
 71 |     if out_file is not None:
 72 |         imwrite(img, out_file)
 73 |         
 74 | def show_result(img,
 75 |                 result,
 76 |                 class_names,
 77 |                 score_thr=0.7,
 78 |                 wait_time=0,
 79 |                 out_file=None,
 80 |                 font_scale=2,
 81 |                 thickness=8,
 82 |                 show_mask=True):
 83 |     """Visualize the detection results on the image.
 84 |     Args:
 85 |         img (str or np.ndarray): Image filename or loaded image.
 86 |         result (tuple[list] or list): The detection result, can be either
 87 |             (bbox, segm) or just bbox.
 88 |         class_names (list[str] or tuple[str]): A list of class names.
 89 |         score_thr (float): The threshold to visualize the bboxes and masks.
 90 |         wait_time (int): Value of waitKey param.
 91 |         out_file (str, optional): If specified, the visualization result will
 92 |             be written to the out file instead of shown in a window.
 93 |     """
 94 |     assert isinstance(class_names, (tuple, list))
 95 |     img = mmcv.imread(img)
 96 |     if isinstance(result, tuple):
 97 |         bbox_result, segm_result = result
 98 |     else:
 99 |         bbox_result, segm_result = result, None
100 |     bboxes = np.vstack(bbox_result)
101 |     # draw segmentation masks
102 |     if show_mask and segm_result is not None:
103 |         print(len(segm_result[0]))
104 |         segms = mmcv.concat_list(segm_result)
105 |         inds = np.where(bboxes[:, -1] > score_thr)[0]
106 |         for i in inds:
107 |             color_mask = np.random.randint(0, 256, (1, 3), dtype=np.uint8)
108 |             mask = maskUtils.decode(segms[i]).astype(np.bool)
109 |             img[mask] = img[mask] * 0.5 + color_mask * 0.5
110 |     # draw bounding boxes
111 |     labels = [
112 |         np.full(bbox.shape[0], i, dtype=np.int32)
113 |         for i, bbox in enumerate(bbox_result)
114 |     ]
115 |     labels = np.concatenate(labels)
116 |     imshow_det_bboxes(
117 |         img.copy(),
118 |         bboxes,
119 |         labels,
120 |         class_names=class_names,
121 |         score_thr=score_thr,
122 |         show=out_file is None,
123 |         wait_time=wait_time,
124 |         out_file=out_file,
125 |         font_scale=font_scale,
126 |         thickness=thickness)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 |  # ComputerVision_Tutorials_in_Russian
 2 | 
 3 | <p align='center'>
 4 |   <img src='https://drive.google.com/uc?export=view&id=12fRN9LIkdzQChTXBfbCiePK6IGV0USRx' width='900'>
 5 | </p>
 6 | 
 7 | 1. В папке [ObjectDetection_TransferLearning_with_mmdetection](ObjectDetection_TransferLearning_with_mmdetection/ 
 8 | 'ObjectDetection_TransferLearning_with_mmdetection')
 9 | находится туториал по применению
10 | transfer learning для обучения моделей на собственных классах данных с использованием предобученных хребтов 
11 | на примере библиотеки [mmdetection](https://github.com/open-mmlab/mmdetection "mmdetection"). 
12 | 
13 | В этом туториале сначала показывается, как можно детектить объекты из классов MS-COCO, например, кота, человека, машину иликухонную утварь.
14 | А потом используются открытые размеченные данные четырех классов, чтобы обучить модель различать выдр, белок, панд и енотов.
15 | 
16 | <p align='center'>
17 |   <img src='https://drive.google.com/uc?export=view&id=1VlpZDdcOMclmrSTT9Lms79lva5mvSD0-' height='500'
18 |   alt='Pretrained on MS-COCO'>
19 |   <img src='https://drive.google.com/uc?export=view&id=1omu5KC9-ZEduvbcgfJZk8ArrdeWrkdMW' 
20 |   alt='Trained on own dataset' height='500'>
21 | </p>
22 | 


--------------------------------------------------------------------------------