├── core ├── __init__.py ├── utils │ ├── __init__.py │ ├── misc.py │ ├── measurements.py │ └── model_book.py ├── backbone │ ├── __init__.py │ ├── get_backbone.py │ ├── backbone11.py │ ├── backbone12.py │ ├── backbone4.py │ ├── backbone3.py │ ├── backbone1.py │ ├── backbone13.py │ ├── backbone2.py │ ├── backbone5.py │ └── backbone10.py ├── datasets │ ├── __init__.py │ ├── collation.py │ ├── generate_vocdata.py │ ├── generate_vocdata_for_pure_background.py │ ├── open_images.py │ ├── voc_dataset.py │ ├── voc_dataset_9.py │ └── voc_dataset_x.py ├── headers │ ├── __init__.py │ ├── headers12.py │ ├── headers1.py │ ├── headers3.py │ ├── headers2.py │ ├── headers4.py │ ├── headers11.py │ ├── get_headers.py │ ├── headers13.py │ └── headers10.py ├── ssd_fpn │ ├── __init__.py │ ├── ssd_fpn_zq3.py │ ├── ssd_fpn_zq1.py │ ├── ssd_fpn_zq4.py │ └── ssd_fpn_zq2.py ├── ssd_fpn_x │ ├── __init__.py │ └── ssd_fpn_x_base.py ├── ssd_fpn_x_creator.py ├── ssd_creator.py ├── ssd_fpn_creator.py ├── data_preprocessing.py ├── predictor.py └── predictor_x.py ├── models └── keep.keep ├── models_fpn └── keep.keep ├── configs_fpn ├── ssd_fpn_zq1.cfg ├── ssd_fpn_zq2.cfg ├── ssd_fpn_zq3.cfg ├── ssd_fpn_zq4.cfg └── ssd_fpn_zq5.cfg ├── configs ├── zq1.cfg ├── zq11.cfg ├── zq12.cfg ├── zq2.cfg ├── zq3.cfg ├── zq4.cfg ├── zq5.cfg ├── zq6.cfg ├── zq7.cfg ├── zq8.cfg ├── model-face.cfg ├── zq13.cfg └── zq9.cfg ├── README.md ├── example_fpn ├── count_ops_fpn.py └── pth2onnx_fpn.py ├── example_fpn_x ├── count_ops_fpn_x.py └── pth2onnx_fpn_x.py └── example ├── count_ops.py ├── pth2onnx.py └── run_ssd_example.py /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/keep.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models_fpn/keep.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/headers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/ssd_fpn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/ssd_fpn_x/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /configs_fpn/ssd_fpn_zq1.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | net_type: 'ssd_fpn_zq1' 3 | image_mean: 0.0, 0.0, 0.0 4 | image_std: 1.0 5 | iou_thresh: 0.45 6 | center_variance: 0.1 7 | size_variance: 0.2 8 | aspect_ratios: 2, 3 9 | use_gray: True 10 | image_size_x: 256 11 | image_size_y: 256 12 | spec1: 32, 32, 8, 8, 20, 40 13 | spec2: 16, 16, 16, 16, 40, 72 14 | spec3: 8, 8, 32, 32, 72, 124 15 | spec4: 4, 4, 64, 64, 124, 174 16 | spec5: 2, 2, 128, 128, 174, 226 17 | 18 | -------------------------------------------------------------------------------- /configs_fpn/ssd_fpn_zq2.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | net_type: 'ssd_fpn_zq2' 3 | image_mean: 0.0, 0.0, 0.0 4 | image_std: 1.0 5 | iou_thresh: 0.45 6 | center_variance: 0.1 7 | size_variance: 0.2 8 | aspect_ratios: 2, 3 9 | use_gray: True 10 | image_size_x: 256 11 | image_size_y: 256 12 | spec1: 32, 32, 8, 8, 20, 40 13 | spec2: 16, 16, 16, 16, 40, 72 14 | spec3: 8, 8, 32, 32, 72, 124 15 | spec4: 4, 4, 64, 64, 124, 174 16 | spec5: 2, 2, 128, 128, 174, 226 17 | 18 | -------------------------------------------------------------------------------- /configs_fpn/ssd_fpn_zq3.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | net_type: 'ssd_fpn_zq3' 3 | image_mean: 0.0, 0.0, 0.0 4 | image_std: 1.0 5 | iou_thresh: 0.45 6 | center_variance: 0.1 7 | size_variance: 0.2 8 | aspect_ratios: 2, 3 9 | use_gray: True 10 | image_size_x: 256 11 | image_size_y: 256 12 | spec1: 32, 32, 8, 8, 20, 40 13 | spec2: 16, 16, 16, 16, 40, 72 14 | spec3: 8, 8, 32, 32, 72, 124 15 | spec4: 4, 4, 64, 64, 124, 174 16 | spec5: 2, 2, 128, 128, 174, 226 17 | 18 | -------------------------------------------------------------------------------- /configs_fpn/ssd_fpn_zq4.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | net_type: 'ssd_fpn_zq4' 3 | image_mean: 0.0, 0.0, 0.0 4 | image_std: 1.0 5 | iou_thresh: 0.45 6 | center_variance: 0.1 7 | size_variance: 0.2 8 | aspect_ratios: 2, 3 9 | use_gray: True 10 | image_size_x: 384 11 | image_size_y: 256 12 | spec1: 48, 32, 8, 8, 20, 40 13 | spec2: 24, 16, 16, 16, 40, 72 14 | spec3: 12, 8, 32, 32, 72, 124 15 | spec4: 6, 4, 64, 64, 124, 174 16 | spec5: 3, 2, 128, 128, 174, 226 17 | 18 | -------------------------------------------------------------------------------- /configs_fpn/ssd_fpn_zq5.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | net_type: 'ssd_fpn_zq5' 3 | image_mean: 0.0, 0.0, 0.0 4 | image_std: 1.0 5 | iou_thresh: 0.45 6 | center_variance: 0.1 7 | size_variance: 0.2 8 | aspect_ratios: 2, 3 9 | use_gray: True 10 | image_size_x: 384 11 | image_size_y: 384 12 | spec1: 48, 48, 8, 8, 20, 40 13 | spec2: 24, 24, 16, 16, 40, 72 14 | spec3: 12, 12, 32, 32, 72, 124 15 | spec4: 6, 6, 64, 64, 124, 174 16 | spec5: 3, 3, 128, 128, 174, 226 17 | 18 | -------------------------------------------------------------------------------- /configs/zq1.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone1' 3 | header_type: 'headers1' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: False 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 16, 12, 16, 16, 40, 72 14 | spec2: 8, 6, 32, 32, 72, 124 15 | spec3: 4, 3, 64, 64, 124, 174 16 | spec4: 2, 2, 128, 128, 174, 226 17 | spec5: 1, 1, 256, 256, 226, 278 18 | 19 | -------------------------------------------------------------------------------- /configs/zq11.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone11' 3 | header_type: 'headers11' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3, 4.5, 6 10 | use_gray: True 11 | image_size_x: 384 12 | image_size_y: 384 13 | spec1: 48, 48, 8, 8, 20, 40 14 | spec2: 24, 24, 16, 16, 40, 72 15 | spec3: 12, 12, 32, 32, 72, 124 16 | spec4: 6, 6, 64, 64, 124, 174 17 | spec5: 3, 3, 128, 128, 174, 226 18 | 19 | -------------------------------------------------------------------------------- /configs/zq12.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone12' 3 | header_type: 'headers12' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3, 4.5, 6 10 | use_gray: True 11 | image_size_x: 384 12 | image_size_y: 384 13 | spec1: 48, 48, 8, 8, 20, 40 14 | spec2: 24, 24, 16, 16, 40, 72 15 | spec3: 12, 12, 32, 32, 72, 124 16 | spec4: 6, 6, 64, 64, 124, 174 17 | spec5: 3, 3, 128, 128, 174, 226 18 | 19 | -------------------------------------------------------------------------------- /configs/zq2.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone2' 3 | header_type: 'headers2' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq3.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone3' 3 | header_type: 'headers3' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq4.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone3' 3 | header_type: 'headers3' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: True 11 | image_size_x: 192 12 | image_size_y: 192 13 | spec1: 24, 24, 8, 8, 20, 40 14 | spec2: 12, 12, 16, 16, 40, 72 15 | spec3: 6, 6, 32, 32, 72, 106 16 | spec4: 3, 3, 64, 64, 106, 140 17 | spec5: 2, 2, 96, 96, 140, 174 18 | spec6: 1, 1, 192, 192, 174, 210 19 | 20 | -------------------------------------------------------------------------------- /configs/zq5.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone4' 3 | header_type: 'headers3' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: None 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq6.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone5' 3 | header_type: 'headers4' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: None 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq7.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone5' 3 | header_type: 'headers13' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq8.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone5' 3 | header_type: 'headers13' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: True 11 | image_size_x: 384 12 | image_size_y: 288 13 | spec1: 48, 36, 8, 8, 20, 40 14 | spec2: 24, 18, 16, 16, 40, 72 15 | spec3: 12, 9, 32, 32, 72, 124 16 | spec4: 6, 5, 64, 58, 124, 174 17 | spec5: 3, 3, 128, 96, 174, 226 18 | spec6: 2, 2, 192, 144, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/model-face.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone3' 3 | header_type: 'headers3' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 1.5 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq13.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone13' 3 | header_type: 'headers13' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3 10 | use_gray: True 11 | image_size_x: 256 12 | image_size_y: 192 13 | spec1: 32, 24, 8, 8, 20, 40 14 | spec2: 16, 12, 16, 16, 40, 72 15 | spec3: 8, 6, 32, 32, 72, 124 16 | spec4: 4, 3, 64, 64, 124, 174 17 | spec5: 2, 2, 128, 128, 174, 226 18 | spec6: 1, 1, 256, 256, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /configs/zq9.cfg: -------------------------------------------------------------------------------- 1 | [Train] 2 | backbone_type: 'backbone5' 3 | header_type: 'headers13' 4 | image_mean: 0.0, 0.0, 0.0 5 | image_std: 1.0 6 | iou_thresh: 0.45 7 | center_variance: 0.1 8 | size_variance: 0.2 9 | aspect_ratios: 2, 3, 4.5 10 | use_gray: True 11 | image_size_x: 320 12 | image_size_y: 320 13 | spec1: 40, 40, 8, 8, 20, 40 14 | spec2: 20, 20, 16, 16, 40, 72 15 | spec3: 10, 10, 32, 32, 72, 124 16 | spec4: 5, 5, 64, 64, 124, 174 17 | spec5: 3, 3, 108, 108, 174, 226 18 | spec6: 2, 2, 160, 160, 226, 278 19 | 20 | -------------------------------------------------------------------------------- /core/ssd_fpn_x_creator.py: -------------------------------------------------------------------------------- 1 | from .ssd_fpn_x.ssd_fpn_x_zq16 import SSD_FPN_X_ZQ16 2 | 3 | from .predictor_x import Predictor 4 | 5 | 6 | def create_ssd(net_type, use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test=False, with_softmax=False, device=None): 7 | if net_type == 'ssd_fpn_zq16': 8 | return SSD_FPN_X_ZQ16(use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test, with_softmax, device=device) 9 | 10 | 11 | def create_ssd_predictor(net, image_size_x, image_size_y, image_mean, image_std, iou_thresh, candidate_size=200, nms_method=None, sigma=0.5, device=None): 12 | predictor = Predictor(net, image_size_x, image_size_y, image_mean, 13 | image_std, 14 | nms_method=nms_method, 15 | iou_threshold=iou_thresh, 16 | candidate_size=candidate_size, 17 | sigma=sigma, 18 | device=device) 19 | return predictor 20 | -------------------------------------------------------------------------------- /core/datasets/collation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def object_detection_collate(batch): 6 | images = [] 7 | gt_boxes = [] 8 | gt_labels = [] 9 | image_type = type(batch[0][0]) 10 | box_type = type(batch[0][1]) 11 | label_type = type(batch[0][2]) 12 | for image, boxes, labels in batch: 13 | if image_type is np.ndarray: 14 | images.append(torch.from_numpy(image)) 15 | elif image_type is torch.Tensor: 16 | images.append(image) 17 | else: 18 | raise TypeError(f"Image should be tensor or np.ndarray, but got {image_type}.") 19 | if box_type is np.ndarray: 20 | gt_boxes.append(torch.from_numpy(boxes)) 21 | elif box_type is torch.Tensor: 22 | gt_boxes.append(boxes) 23 | else: 24 | raise TypeError(f"Boxes should be tensor or np.ndarray, but got {box_type}.") 25 | if label_type is np.ndarray: 26 | gt_labels.append(torch.from_numpy(labels)) 27 | elif label_type is torch.Tensor: 28 | gt_labels.append(labels) 29 | else: 30 | raise TypeError(f"Labels should be tensor or np.ndarray, but got {label_type}.") 31 | return torch.stack(images), gt_boxes, gt_labels -------------------------------------------------------------------------------- /core/utils/misc.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | 4 | 5 | def str2bool(s): 6 | return s.lower() in ('true', '1') 7 | 8 | 9 | class Timer: 10 | def __init__(self): 11 | self.clock = {} 12 | 13 | def start(self, key="default"): 14 | self.clock[key] = time.time() 15 | 16 | def end(self, key="default"): 17 | if key not in self.clock: 18 | raise Exception(f"{key} is not in the clock.") 19 | interval = time.time() - self.clock[key] 20 | del self.clock[key] 21 | return interval 22 | 23 | 24 | def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): 25 | torch.save({ 26 | 'epoch': epoch, 27 | 'model': net_state_dict, 28 | 'optimizer': optimizer_state_dict, 29 | 'best_score': best_score 30 | }, checkpoint_path) 31 | torch.save(net_state_dict, model_path) 32 | 33 | 34 | def load_checkpoint(checkpoint_path): 35 | return torch.load(checkpoint_path) 36 | 37 | 38 | def freeze_net_layers(net): 39 | for param in net.parameters(): 40 | param.requires_grad = False 41 | 42 | 43 | def store_labels(path, labels): 44 | with open(path, "w") as f: 45 | f.write("\n".join(labels)) 46 | -------------------------------------------------------------------------------- /core/utils/measurements.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def compute_average_precision(precision, recall): 5 | """ 6 | It computes average precision based on the definition of Pascal Competition. It computes the under curve area 7 | of precision and recall. Recall follows the normal definition. Precision is a variant. 8 | pascal_precision[i] = typical_precision[i:].max() 9 | """ 10 | # identical but faster version of new_precision[i] = old_precision[i:].max() 11 | precision = np.concatenate([[0.0], precision, [0.0]]) 12 | for i in range(len(precision) - 1, 0, -1): 13 | precision[i - 1] = np.maximum(precision[i - 1], precision[i]) 14 | 15 | # find the index where the value changes 16 | recall = np.concatenate([[0.0], recall, [1.0]]) 17 | changing_points = np.where(recall[1:] != recall[:-1])[0] 18 | 19 | # compute under curve area 20 | areas = (recall[changing_points + 1] - recall[changing_points]) * precision[changing_points + 1] 21 | return areas.sum() 22 | 23 | 24 | def compute_voc2007_average_precision(precision, recall): 25 | ap = 0. 26 | for t in np.arange(0., 1.1, 0.1): 27 | if np.sum(recall >= t) == 0: 28 | p = 0 29 | else: 30 | p = np.max(precision[recall >= t]) 31 | ap = ap + p / 11. 32 | return ap 33 | -------------------------------------------------------------------------------- /core/backbone/get_backbone.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .backbone1 import get_net as get_net1 4 | from .backbone2 import get_net as get_net2 5 | from .backbone3 import get_net as get_net3 6 | from .backbone4 import get_net as get_net4 7 | from .backbone5 import get_net as get_net5 8 | from .backbone10 import get_net as get_net10 9 | from .backbone11 import get_net as get_net11 10 | from .backbone12 import get_net as get_net12 11 | from .backbone13 import get_net as get_net13 12 | 13 | 14 | def get_backbone(type, use_gray, num_classes): 15 | if type == "backbone1": 16 | net = get_net1(use_gray, num_classes) 17 | elif type == "backbone2": 18 | net = get_net2(use_gray, num_classes) 19 | elif type == "backbone3": 20 | net = get_net3(use_gray, num_classes) 21 | elif type == "backbone4": 22 | net = get_net4(use_gray, num_classes) 23 | elif type == "backbone5": 24 | net = get_net5(use_gray, num_classes) 25 | elif type == "backbone10": 26 | net = get_net10(use_gray, num_classes) 27 | elif type == "backbone11": 28 | net = get_net11(use_gray, num_classes) 29 | elif type == "backbone12": 30 | net = get_net12(use_gray, num_classes) 31 | elif type == "backbone13": 32 | net = get_net13(use_gray, num_classes) 33 | 34 | return net 35 | 36 | -------------------------------------------------------------------------------- /core/ssd_creator.py: -------------------------------------------------------------------------------- 1 | from .ssd import SSD 2 | from .predictor import Predictor 3 | from .backbone import get_backbone as get_backbone 4 | from .headers import get_headers as get_headers 5 | 6 | 7 | def create_ssd(backbone_type, header_type, use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test=False, with_softmax=False, device=None, fp16=False): 8 | #print(backbone_type) 9 | base_net = get_backbone.get_backbone(backbone_type, use_gray, num_classes) 10 | #print(header_type) 11 | source_layer_indexes, extras, classification_headers, regression_headers = get_headers.get_headers(header_type, base_net, num_classes, aspect_ratios) 12 | 13 | return SSD(num_classes, base_net.model, source_layer_indexes, 14 | extras, classification_headers, regression_headers, priors, center_variance, size_variance, is_test=is_test, with_softmax=with_softmax, device=device, fp16=fp16) 15 | 16 | 17 | def create_ssd_predictor(net, image_size_x, image_size_y, image_mean, image_std, iou_thresh, candidate_size=200, nms_method=None, sigma=0.5, device=None): 18 | predictor = Predictor(net, image_size_x, image_size_y, image_mean, 19 | image_std, 20 | nms_method=nms_method, 21 | iou_threshold=iou_thresh, 22 | candidate_size=candidate_size, 23 | sigma=sigma, 24 | device=device) 25 | return predictor 26 | -------------------------------------------------------------------------------- /core/ssd_fpn_creator.py: -------------------------------------------------------------------------------- 1 | from .ssd_fpn.ssd_fpn_zq1 import SSD_FPN_ZQ1 2 | from .ssd_fpn.ssd_fpn_zq2 import SSD_FPN_ZQ2 3 | from .ssd_fpn.ssd_fpn_zq3 import SSD_FPN_ZQ3 4 | from .ssd_fpn.ssd_fpn_zq4 import SSD_FPN_ZQ4 5 | from .ssd_fpn.ssd_fpn_zq5 import SSD_FPN_ZQ5 6 | 7 | from .predictor import Predictor 8 | 9 | 10 | def create_ssd(net_type, use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test=False, with_softmax=False, device=None): 11 | if net_type == 'ssd_fpn_zq1': 12 | return SSD_FPN_ZQ1(use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test, with_softmax, device=device) 13 | elif net_type == 'ssd_fpn_zq2': 14 | return SSD_FPN_ZQ2(use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test, with_softmax, device=device) 15 | elif net_type == 'ssd_fpn_zq3': 16 | return SSD_FPN_ZQ3(use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test, with_softmax, device=device) 17 | elif net_type == 'ssd_fpn_zq4': 18 | return SSD_FPN_ZQ4(use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test, with_softmax, device=device) 19 | elif net_type == 'ssd_fpn_zq5': 20 | return SSD_FPN_ZQ5(use_gray, num_classes, aspect_ratios, priors, center_variance, size_variance, is_test, with_softmax, device=device) 21 | 22 | 23 | def create_ssd_predictor(net, image_size_x, image_size_y, image_mean, image_std, iou_thresh, candidate_size=200, nms_method=None, sigma=0.5, device=None): 24 | predictor = Predictor(net, image_size_x, image_size_y, image_mean, 25 | image_std, 26 | nms_method=nms_method, 27 | iou_threshold=iou_thresh, 28 | candidate_size=candidate_size, 29 | sigma=sigma, 30 | device=device) 31 | return predictor 32 | -------------------------------------------------------------------------------- /core/data_preprocessing.py: -------------------------------------------------------------------------------- 1 | from .transforms import * 2 | 3 | 4 | class TrainAugmentation: 5 | def __init__(self, size_x, size_y, mean=0, std=1.0): 6 | """ 7 | Args: 8 | size: the size the of final image. 9 | mean: mean pixel value per channel. 10 | """ 11 | self.mean = mean 12 | self.size_x = size_x 13 | self.size_y = size_y 14 | self.augment = Compose([ 15 | ConvertFromInts(), 16 | PhotometricDistort(), 17 | Expand(self.mean), 18 | RandomSampleCrop(self.size_x,self.size_y), 19 | RandomMirror(), 20 | ToPercentCoords(), 21 | Resize(self.size_x, self.size_y), 22 | #SubtractMeans(self.mean), 23 | #lambda img, boxes=None, labels=None: (img / std, boxes, labels), 24 | ToTensor(), 25 | ]) 26 | 27 | def __call__(self, img, boxes, labels): 28 | """ 29 | 30 | Args: 31 | img: the output of cv.imread in RGB layout. 32 | boxes: boundding boxes in the form of (x1, y1, x2, y2). 33 | labels: labels of boxes. 34 | """ 35 | #print(boxes) 36 | return self.augment(img, boxes, labels) 37 | 38 | 39 | class TestTransform: 40 | def __init__(self, size_x, size_y, mean=0.0, std=1.0): 41 | self.transform = Compose([ 42 | ToPercentCoords(), 43 | Resize(size_x, size_y), 44 | #SubtractMeans(mean), 45 | #lambda img, boxes=None, labels=None: (img / std, boxes, labels), 46 | ToTensor(), 47 | ]) 48 | 49 | def __call__(self, image, boxes, labels): 50 | return self.transform(image, boxes, labels) 51 | 52 | 53 | class PredictionTransform: 54 | def __init__(self, size_x, size_y, mean=0.0, std=1.0): 55 | self.transform = Compose([ 56 | Resize(size_x, size_y), 57 | #SubtractMeans(mean), 58 | #lambda img, boxes=None, labels=None: (img / std, boxes, labels), 59 | ToTensor() 60 | ]) 61 | 62 | def __call__(self, image): 63 | image, _, _ = self.transform(image) 64 | return image 65 | -------------------------------------------------------------------------------- /core/utils/model_book.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch.nn as nn 3 | 4 | 5 | class ModelBook: 6 | """Maintain the mapping between modules and their paths. 7 | 8 | Example: 9 | book = ModelBook(model_ft) 10 | for p, m in book.conv2d_modules(): 11 | print('path:', p, 'num of filters:', m.out_channels) 12 | assert m is book.get_module(p) 13 | """ 14 | 15 | def __init__(self, model): 16 | self._model = model 17 | self._modules = OrderedDict() 18 | self._paths = OrderedDict() 19 | path = [] 20 | self._construct(self._model, path) 21 | 22 | def _construct(self, module, path): 23 | if not module._modules: 24 | return 25 | for name, m in module._modules.items(): 26 | cur_path = tuple(path + [name]) 27 | self._paths[m] = cur_path 28 | self._modules[cur_path] = m 29 | self._construct(m, path + [name]) 30 | 31 | def conv2d_modules(self): 32 | return self.modules(nn.Conv2d) 33 | 34 | def linear_modules(self): 35 | return self.modules(nn.Linear) 36 | 37 | def modules(self, module_type=None): 38 | for p, m in self._modules.items(): 39 | if not module_type or isinstance(m, module_type): 40 | yield p, m 41 | 42 | def num_of_conv2d_modules(self): 43 | return self.num_of_modules(nn.Conv2d) 44 | 45 | def num_of_conv2d_filters(self): 46 | """Return the sum of out_channels of all conv2d layers. 47 | 48 | Here we treat the sub weight with size of [in_channels, h, w] as a single filter. 49 | """ 50 | num_filters = 0 51 | for _, m in self.conv2d_modules(): 52 | num_filters += m.out_channels 53 | return num_filters 54 | 55 | def num_of_linear_modules(self): 56 | return self.num_of_modules(nn.Linear) 57 | 58 | def num_of_linear_filters(self): 59 | num_filters = 0 60 | for _, m in self.linear_modules(): 61 | num_filters += m.out_features 62 | return num_filters 63 | 64 | def num_of_modules(self, module_type=None): 65 | num = 0 66 | for p, m in self._modules.items(): 67 | if not module_type or isinstance(m, module_type): 68 | num += 1 69 | return num 70 | 71 | def get_module(self, path): 72 | return self._modules.get(path) 73 | 74 | def get_path(self, module): 75 | return self._paths.get(module) 76 | 77 | def update(self, path, module): 78 | old_module = self._modules[path] 79 | del self._paths[old_module] 80 | self._paths[module] = path 81 | self._modules[path] = module 82 | -------------------------------------------------------------------------------- /core/backbone/backbone11.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone11(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone11, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':8, 'stride':2}, 32 | {'in_c':8, 'out_c':8, 'stride':1}, 33 | {'in_c':8, 'out_c':16, 'stride':2}, 34 | {'in_c':16, 'out_c':16, 'stride':1}, 35 | {'in_c':16, 'out_c':32, 'stride':2}, 36 | {'in_c':32, 'out_c':32, 'stride':1}, 37 | {'in_c':32, 'out_c':32, 'stride':1}, 38 | {'in_c':32, 'out_c':32, 'stride':1}, 39 | {'in_c':32, 'out_c':64, 'stride':2}, 40 | {'in_c':64, 'out_c':64, 'stride':1}, 41 | {'in_c':64, 'out_c':64, 'stride':1}, 42 | {'in_c':64, 'out_c':64, 'stride':1}, 43 | {'in_c':64, 'out_c':128, 'stride':2}, 44 | {'in_c':128, 'out_c':128, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone11(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone12.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone12(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone12, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':8, 'stride':2}, 32 | {'in_c':8, 'out_c':8, 'stride':1}, 33 | {'in_c':8, 'out_c':16, 'stride':2}, 34 | {'in_c':16, 'out_c':16, 'stride':1}, 35 | {'in_c':16, 'out_c':32, 'stride':2}, 36 | {'in_c':32, 'out_c':32, 'stride':1}, 37 | {'in_c':32, 'out_c':32, 'stride':1}, 38 | {'in_c':32, 'out_c':32, 'stride':1}, 39 | {'in_c':32, 'out_c':64, 'stride':2}, 40 | {'in_c':64, 'out_c':64, 'stride':1}, 41 | {'in_c':64, 'out_c':64, 'stride':1}, 42 | {'in_c':64, 'out_c':64, 'stride':1}, 43 | {'in_c':64, 'out_c':128, 'stride':2}, 44 | {'in_c':128, 'out_c':128, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone12(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone4.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone4(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone4, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':8, 'stride':2}, 32 | {'in_c':8, 'out_c':16, 'stride':1}, 33 | {'in_c':16, 'out_c':24, 'stride':2}, 34 | {'in_c':24, 'out_c':24, 'stride':1}, 35 | {'in_c':24, 'out_c':48, 'stride':2}, 36 | {'in_c':48, 'out_c':48, 'stride':1}, 37 | {'in_c':48, 'out_c':48, 'stride':1}, 38 | {'in_c':48, 'out_c':48, 'stride':1}, 39 | {'in_c':48, 'out_c':96, 'stride':2}, 40 | {'in_c':96, 'out_c':96, 'stride':1}, 41 | {'in_c':96, 'out_c':96, 'stride':1}, 42 | {'in_c':96, 'out_c':96, 'stride':1}, 43 | {'in_c':96, 'out_c':192, 'stride':2}, 44 | {'in_c':192, 'out_c':192, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone4(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone3.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone3(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone3, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':8, 'stride':2}, 32 | {'in_c':8, 'out_c':16, 'stride':1}, 33 | {'in_c':16, 'out_c':32, 'stride':2}, 34 | {'in_c':32, 'out_c':32, 'stride':1}, 35 | {'in_c':32, 'out_c':64, 'stride':2}, 36 | {'in_c':64, 'out_c':64, 'stride':1}, 37 | {'in_c':64, 'out_c':64, 'stride':1}, 38 | {'in_c':64, 'out_c':64, 'stride':1}, 39 | {'in_c':64, 'out_c':128, 'stride':2}, 40 | {'in_c':128, 'out_c':128, 'stride':1}, 41 | {'in_c':128, 'out_c':128, 'stride':1}, 42 | {'in_c':128, 'out_c':128, 'stride':1}, 43 | {'in_c':128, 'out_c':256, 'stride':2}, 44 | {'in_c':256, 'out_c':256, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone3(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone1.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone1(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone1, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':8, 'stride':2}, 32 | {'in_c':8, 'out_c':16, 'stride':1}, 33 | {'in_c':16, 'out_c':32, 'stride':2}, 34 | {'in_c':32, 'out_c':32, 'stride':1}, 35 | {'in_c':32, 'out_c':64, 'stride':2}, 36 | {'in_c':64, 'out_c':64, 'stride':1}, 37 | {'in_c':64, 'out_c':128, 'stride':2}, 38 | {'in_c':128, 'out_c':128, 'stride':1}, 39 | {'in_c':128, 'out_c':128, 'stride':1}, 40 | {'in_c':128, 'out_c':128, 'stride':1}, 41 | {'in_c':128, 'out_c':128, 'stride':1}, 42 | {'in_c':128, 'out_c':128, 'stride':1}, 43 | {'in_c':128, 'out_c':256, 'stride':2}, 44 | {'in_c':256, 'out_c':256, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone1(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone13.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone13(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone13, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':8, 'stride':2}, 32 | {'in_c':8, 'out_c':16, 'stride':1}, 33 | {'in_c':16, 'out_c':32, 'stride':2}, 34 | {'in_c':32, 'out_c':32, 'stride':1}, 35 | {'in_c':32, 'out_c':64, 'stride':2}, 36 | {'in_c':64, 'out_c':64, 'stride':1}, 37 | {'in_c':64, 'out_c':64, 'stride':1}, 38 | {'in_c':64, 'out_c':64, 'stride':1}, 39 | {'in_c':64, 'out_c':128, 'stride':2}, 40 | {'in_c':128, 'out_c':128, 'stride':1}, 41 | {'in_c':128, 'out_c':128, 'stride':1}, 42 | {'in_c':128, 'out_c':128, 'stride':1}, 43 | {'in_c':128, 'out_c':256, 'stride':2}, 44 | {'in_c':256, 'out_c':256, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone13(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone2.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | 7 | class backbone2(nn.Module): 8 | def __init__(self, use_gray, num_classes=1024): 9 | super(backbone2, self).__init__() 10 | 11 | def conv_bn(inp, oup, stride, name): 12 | return nn.Sequential(OrderedDict([ 13 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 14 | (name+'/bn', nn.BatchNorm2d(oup)), 15 | (name+'/relu', nn.ReLU(inplace=True)) 16 | ]) 17 | ) 18 | 19 | def conv_dw(inp, oup, stride, name): 20 | return nn.Sequential(OrderedDict([ 21 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 22 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 23 | (name+'/dw/relu', nn.ReLU(inplace=True)), 24 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 25 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 26 | (name+'/sep/relu', nn.ReLU(inplace=True)) 27 | ]) 28 | ) 29 | 30 | self.use_gray = use_gray 31 | in_c = 1 if use_gray else 3 32 | self.layers = [{'in_c':in_c, 'out_c':16, 'stride':2}, 33 | {'in_c':16, 'out_c':32, 'stride':1}, 34 | {'in_c':32, 'out_c':64, 'stride':2}, 35 | {'in_c':64, 'out_c':64, 'stride':1}, 36 | {'in_c':64, 'out_c':128, 'stride':2}, 37 | {'in_c':128, 'out_c':128, 'stride':1}, 38 | {'in_c':128, 'out_c':128, 'stride':1}, 39 | {'in_c':128, 'out_c':128, 'stride':1}, 40 | {'in_c':128, 'out_c':256, 'stride':2}, 41 | {'in_c':256, 'out_c':256, 'stride':1}, 42 | {'in_c':256, 'out_c':256, 'stride':1}, 43 | {'in_c':256, 'out_c':256, 'stride':1}, 44 | {'in_c':256, 'out_c':512, 'stride':2}, 45 | {'in_c':512, 'out_c':512, 'stride':1}] 46 | 47 | self.num_layers = len(self.layers) 48 | self.model = nn.Sequential() 49 | for i in range(self.num_layers): 50 | cur_layer = self.layers[i] 51 | if i == 0: 52 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 53 | else: 54 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 55 | 56 | 57 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 58 | 59 | def forward(self, x): 60 | x = self.model(x) 61 | #x = F.avg_pool2d(x, 7) 62 | x = x.view(-1, self.layers[-1]['out_c']) 63 | x = self.fc(x) 64 | return x 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone2(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone5.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone5(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone5, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':32, 'stride':2}, 32 | {'in_c':32, 'out_c':32, 'stride':1}, 33 | {'in_c':32, 'out_c':64, 'stride':2}, 34 | {'in_c':64, 'out_c':64, 'stride':1}, 35 | {'in_c':64, 'out_c':128, 'stride':2}, 36 | {'in_c':128, 'out_c':128, 'stride':1}, 37 | {'in_c':128, 'out_c':128, 'stride':1}, 38 | {'in_c':128, 'out_c':128, 'stride':1}, 39 | {'in_c':128, 'out_c':256, 'stride':2}, 40 | {'in_c':256, 'out_c':256, 'stride':1}, 41 | {'in_c':256, 'out_c':256, 'stride':1}, 42 | {'in_c':256, 'out_c':256, 'stride':1}, 43 | {'in_c':256, 'out_c':512, 'stride':2}, 44 | {'in_c':512, 'out_c':512, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone5(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /core/backbone/backbone10.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | class backbone10(nn.Module): 7 | def __init__(self, use_gray, num_classes=1024): 8 | super(backbone10, self).__init__() 9 | 10 | def conv_bn(inp, oup, stride, name): 11 | return nn.Sequential(OrderedDict([ 12 | (name+'/conv', nn.Conv2d(inp, oup, 3, stride, 1, bias=False)), 13 | (name+'/bn', nn.BatchNorm2d(oup)), 14 | (name+'/relu', nn.ReLU(inplace=True)) 15 | ]) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride, name): 19 | return nn.Sequential(OrderedDict([ 20 | (name+'/dw', nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False)), 21 | (name+'/dw/bn', nn.BatchNorm2d(inp)), 22 | (name+'/dw/relu', nn.ReLU(inplace=True)), 23 | (name+'/sep', nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), 24 | (name+'/sep/bn', nn.BatchNorm2d(oup)), 25 | (name+'/sep/relu', nn.ReLU(inplace=True)) 26 | ]) 27 | ) 28 | 29 | self.use_gray = use_gray 30 | in_c = 1 if use_gray else 3 31 | self.layers = [{'in_c':in_c, 'out_c':128, 'stride':2}, 32 | {'in_c':128, 'out_c':128, 'stride':1}, 33 | {'in_c':128, 'out_c':256, 'stride':2}, 34 | {'in_c':256, 'out_c':256, 'stride':1}, 35 | {'in_c':256, 'out_c':512, 'stride':2}, 36 | {'in_c':512, 'out_c':512, 'stride':1}, 37 | {'in_c':512, 'out_c':512, 'stride':1}, 38 | {'in_c':512, 'out_c':512, 'stride':1}, 39 | {'in_c':512, 'out_c':1024, 'stride':2}, 40 | {'in_c':1024, 'out_c':1024, 'stride':1}, 41 | {'in_c':1024, 'out_c':1024, 'stride':1}, 42 | {'in_c':1024, 'out_c':1024, 'stride':1}, 43 | {'in_c':1024, 'out_c':2048, 'stride':2}, 44 | {'in_c':2048, 'out_c':2048, 'stride':1}] 45 | 46 | self.num_layers = len(self.layers) 47 | self.model = nn.Sequential() 48 | for i in range(self.num_layers): 49 | cur_layer = self.layers[i] 50 | if i == 0: 51 | self.model.add_module('conv_%d'%(i+1),conv_bn(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 52 | else: 53 | self.model.add_module('conv_%d'%(i+1),conv_dw(cur_layer['in_c'],cur_layer['out_c'],cur_layer['stride'],'conv_%d'%(i+1))) 54 | 55 | 56 | self.fc = nn.Linear(self.layers[-1]['out_c'], num_classes) 57 | 58 | def forward(self, x): 59 | x = self.model(x) 60 | #x = F.avg_pool2d(x, 7) 61 | x = x.view(-1, self.layers[-1]['out_c']) 62 | x = self.fc(x) 63 | return x 64 | 65 | 66 | def get_net(use_gray, num_classes): 67 | return backbone10(use_gray, num_classes) 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pytorch-ssd-for-ZQCNN 2 | 3 | Pytorch训练SSD 4 | 5 | 参考的代码是https://github.com/qfgaohao/pytorch-ssd 但是已经改动非常大了,两者不兼容 6 | 7 | # 运行环境 8 | 9 | pytorch1.6.0 10 | 11 | 其他缺啥装啥 12 | 13 | # 数据集 14 | 15 | VOC 格式 16 | 17 | 比如用VOC格式的wider 18 | 19 | 链接:https://pan.baidu.com/s/1vKPyPBVoCEDiKhUd_eakZg 20 | 提取码:hw6m 21 | 22 | # 数据准备 23 | 24 | 进入VOC格式数据集路径(此时应该能看到如下目录),以下称此目录为VOC_ROOT 25 | 26 | annotations 27 | ImageSets 28 | JPEGImages 29 | 30 | 31 | 运行如下代码 32 | 33 | python /path/to/pytorch-ssd-zq/core/datasets/generate_vocdata.py label_file 34 | 35 | 其中label_file是一个文件,内容是类别名(不包含__BACKGROUND__),以','隔开 36 | 37 | 我写的label_file是这样的, 比如有3类,dog, cat, person, 我写成3行 38 | 39 | dog, 40 | cat, 41 | person 42 | 43 | 44 | 训练时把label_file放在VOC数据里面,和JPEGImages同目录 45 | 46 | 47 | # 训练 48 | 49 | 50 | 进入本项目路径 51 | 52 | python example/train_ssd9.py \ 53 | --config_file configs/model-face.cfg \ 54 | --datasets VOC_ROOT \ 55 | --validation_dataset VOC_ROOT \ 56 | --batch_size 128 \ 57 | --num_epochs 200 \ 58 | --lr 0.01 \ 59 | --gpus_id 0 60 | 61 | 如果用fp16 需要加上参数 62 | 63 | --fp16 True 64 | 65 | 如果用带fpn的模型,用下面的命令 66 | 67 | python example_fpn/train_ssd_fpn9.py \ 68 | --config_file configs_fpn/ssd_fpn_zq14.cfg \ 69 | --datasets VOC_ROOT \ 70 | --validation_dataset VOC_ROOT \ 71 | --batch_size 128 \ 72 | --num_epochs 200 \ 73 | --lr 0.01 \ 74 | --gpus_id 0 75 | 76 | **多个VOC数据一起训练** 77 | 78 | --datasets后面接多个VOC数据集,用逗号隔开 79 | 80 | python example/train_ssd9.py \ 81 | --config_file configs/model-face.cfg \ 82 | --datasets VOC_ROOT1,VOC_ROOT2,VOC_ROOT3,VOC_ROOT4 \ 83 | --validation_dataset VOC_ROOT1 \ 84 | --batch_size 128 \ 85 | --num_epochs 200 \ 86 | --lr 0.01 \ 87 | --gpus_id 0 88 | 89 | # 测试模型精度 90 | 91 | 进入本项目路径 92 | 93 | python example/eval_ssd.py \ 94 | --config_file configs/zq3.cfg \ 95 | --trained_model YOUR_MODEL \ 96 | --dataset VOC_ROOT \ 97 | --use_cuda True \ 98 | --gpus_id 0 \ 99 | --label_file YOUR_LABEL_FILE 100 | 101 | # 测试单张图 102 | 103 | 进入本项目路径 104 | 105 | python example/run_ssd_example.py config_file model_file label_file image_file 106 | 107 | 108 | # 导出onnx模型 109 | 110 | 进入本项目路径 111 | 112 | python example/pth2onnx.py config_file in_file out_file num_valid_classes withsoftmax 113 | 114 | # onnx模型简化 115 | 116 | 用https://github.com/daquexian/onnx-simplifier 117 | 118 | pip install onnx-simplifier 119 | 120 | python -m onnxsim in_file out_file 121 | 122 | # 推理 123 | 124 | **ZQCNN** 125 | 126 | ZQCNN里有能加载的代码,转模型用https://github.com/zuoqing1988/ZQCNN/tree/master/onnx_to_ZQCNN 里的脚本 127 | 128 | 示例代码在https://github.com/zuoqing1988/ZQCNN/tree/master/SamplesZQCNN/SampleSSDDetectorPytorch 129 | 130 | 131 | -------------------------------------------------------------------------------- /example_fpn/count_ops_fpn.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os,sys 3 | sys.path.append(os.getcwd()) 4 | from core.ssd_fpn_creator import create_ssd,create_ssd_predictor 5 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 6 | from core.utils.misc import Timer 7 | import cv2 8 | import sys 9 | import torch 10 | import numpy as np 11 | 12 | from thop import profile 13 | from thop import clever_format 14 | from torchstat import stat 15 | 16 | 17 | if len(sys.argv) < 2: 18 | print('Usage: python count_ops.py ') 19 | sys.exit(0) 20 | else: 21 | 22 | config_file = sys.argv[1] 23 | class_names = ['bottle','phone','cigar'] 24 | 25 | # load config file and setup 26 | params = {} 27 | config = configparser.ConfigParser() 28 | config.read(config_file) 29 | 30 | #print(config) 31 | 32 | for _ in config.options("Train"): 33 | params[_] = eval(config.get("Train",_)) 34 | 35 | # image_size_x, image_size_y, image_mean, image_std, use_gray 36 | image_size_x = int(params['image_size_x']) 37 | image_size_y = int(params['image_size_y']) 38 | image_std = float(params['image_std']) 39 | image_mean = params['image_mean'] 40 | use_gray = bool(params['use_gray']) 41 | 42 | mean_values = list() 43 | for i in range(len(image_mean)): 44 | mean_values.append(float(image_mean[i])) 45 | image_mean = np.array(mean_values,dtype=np.float) 46 | 47 | # iou_thresh, center_varaiance, size_variance 48 | iou_thresh = float(params['iou_thresh']) 49 | center_variance = float(params['center_variance']) 50 | size_variance = float(params['size_variance']) 51 | 52 | # net type 53 | net_type = params['net_type'] 54 | 55 | # aspect_ratios 56 | aspect_ratios = params['aspect_ratios'] 57 | if aspect_ratios is None: 58 | aspect_ratios = list() 59 | else: 60 | ratios = list() 61 | if type(aspect_ratios) == tuple: 62 | for j in range(len(aspect_ratios)): 63 | ratios.append(float(aspect_ratios[j])) 64 | else: 65 | ratios.append(float(aspect_ratios)) 66 | aspect_ratios = ratios 67 | 68 | # specs 69 | specs = list() 70 | for i in range(1,100): 71 | name = 'spec%d'%i 72 | if not name in params: 73 | break 74 | line = params[name] 75 | feat_map_x = int(line[0]) 76 | feat_map_y = int(line[1]) 77 | shrinkage_x = int(line[2]) 78 | shrinkage_y = int(line[3]) 79 | bbox_min = float(line[4]) 80 | bbox_max = float(line[5]) 81 | specs.append(SSDSpec(feat_map_x, feat_map_y, shrinkage_x, shrinkage_y, SSDBoxSizes(bbox_min, bbox_max), aspect_ratios)) 82 | 83 | # priors 84 | priors = generate_ssd_priors(specs, image_size_x, image_size_y) 85 | 86 | # create ssd net 87 | net = create_ssd(net_type, use_gray, len(class_names)+1, aspect_ratios, priors, center_variance, size_variance, is_test=False, device=torch.device("cpu")) 88 | in_c = 1 if use_gray else 3 89 | stat(net,(in_c,image_size_y,image_size_x)) 90 | 91 | 92 | -------------------------------------------------------------------------------- /example_fpn_x/count_ops_fpn_x.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os,sys 3 | sys.path.append(os.getcwd()) 4 | from core.ssd_fpn_x_creator import create_ssd,create_ssd_predictor 5 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 6 | from core.utils.misc import Timer 7 | import cv2 8 | import sys 9 | import torch 10 | import numpy as np 11 | 12 | from thop import profile 13 | from thop import clever_format 14 | from torchstat import stat 15 | 16 | 17 | if len(sys.argv) < 2: 18 | print('Usage: python count_ops.py ') 19 | sys.exit(0) 20 | else: 21 | 22 | config_file = sys.argv[1] 23 | class_names = ['bottle','phone','cigar'] 24 | 25 | # load config file and setup 26 | params = {} 27 | config = configparser.ConfigParser() 28 | config.read(config_file) 29 | 30 | #print(config) 31 | 32 | for _ in config.options("Train"): 33 | params[_] = eval(config.get("Train",_)) 34 | 35 | # image_size_x, image_size_y, image_mean, image_std, use_gray 36 | image_size_x = int(params['image_size_x']) 37 | image_size_y = int(params['image_size_y']) 38 | image_std = float(params['image_std']) 39 | image_mean = params['image_mean'] 40 | use_gray = bool(params['use_gray']) 41 | 42 | mean_values = list() 43 | for i in range(len(image_mean)): 44 | mean_values.append(float(image_mean[i])) 45 | image_mean = np.array(mean_values,dtype=np.float) 46 | 47 | # iou_thresh, center_varaiance, size_variance 48 | iou_thresh = float(params['iou_thresh']) 49 | center_variance = float(params['center_variance']) 50 | size_variance = float(params['size_variance']) 51 | 52 | # net type 53 | net_type = params['net_type'] 54 | 55 | # aspect_ratios 56 | aspect_ratios = params['aspect_ratios'] 57 | if aspect_ratios is None: 58 | aspect_ratios = list() 59 | else: 60 | ratios = list() 61 | if type(aspect_ratios) == tuple: 62 | for j in range(len(aspect_ratios)): 63 | ratios.append(float(aspect_ratios[j])) 64 | else: 65 | ratios.append(float(aspect_ratios)) 66 | aspect_ratios = ratios 67 | 68 | # specs 69 | specs = list() 70 | for i in range(1,100): 71 | name = 'spec%d'%i 72 | if not name in params: 73 | break 74 | line = params[name] 75 | feat_map_x = int(line[0]) 76 | feat_map_y = int(line[1]) 77 | shrinkage_x = int(line[2]) 78 | shrinkage_y = int(line[3]) 79 | bbox_min = float(line[4]) 80 | bbox_max = float(line[5]) 81 | specs.append(SSDSpec(feat_map_x, feat_map_y, shrinkage_x, shrinkage_y, SSDBoxSizes(bbox_min, bbox_max), aspect_ratios)) 82 | 83 | # priors 84 | priors = generate_ssd_priors(specs, image_size_x, image_size_y) 85 | 86 | # create ssd net 87 | net = create_ssd(net_type, use_gray, len(class_names)+1, aspect_ratios, priors, center_variance, size_variance, is_test=False, device=torch.device("cpu")) 88 | in_c = 1 if use_gray else 3 89 | stat(net,(in_c,image_size_y,image_size_x)) 90 | 91 | 92 | -------------------------------------------------------------------------------- /example/count_ops.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os,sys 3 | sys.path.append(os.getcwd()) 4 | from core.ssd_creator import create_ssd,create_ssd_predictor 5 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 6 | from core.utils.misc import Timer 7 | import cv2 8 | import sys 9 | import torch 10 | import numpy as np 11 | 12 | from thop import profile 13 | from thop import clever_format 14 | from torchstat import stat 15 | 16 | 17 | if len(sys.argv) < 2: 18 | print('Usage: python count_ops.py ') 19 | sys.exit(0) 20 | else: 21 | 22 | config_file = sys.argv[1] 23 | class_names = ['bottle','phone','cigar'] 24 | 25 | # load config file and setup 26 | params = {} 27 | config = configparser.ConfigParser() 28 | config.read(config_file) 29 | 30 | #print(config) 31 | 32 | for _ in config.options("Train"): 33 | params[_] = eval(config.get("Train",_)) 34 | 35 | # image_size_x, image_size_y, image_mean, image_std, use_gray 36 | image_size_x = int(params['image_size_x']) 37 | image_size_y = int(params['image_size_y']) 38 | image_std = float(params['image_std']) 39 | image_mean = params['image_mean'] 40 | use_gray = bool(params['use_gray']) 41 | 42 | mean_values = list() 43 | for i in range(len(image_mean)): 44 | mean_values.append(float(image_mean[i])) 45 | image_mean = np.array(mean_values,dtype=np.float) 46 | 47 | # iou_thresh, center_varaiance, size_variance 48 | iou_thresh = float(params['iou_thresh']) 49 | center_variance = float(params['center_variance']) 50 | size_variance = float(params['size_variance']) 51 | 52 | # backbone type, header type 53 | backbone_type = params['backbone_type'] 54 | header_type = params['header_type'] 55 | 56 | # aspect_ratios 57 | aspect_ratios = params['aspect_ratios'] 58 | if aspect_ratios is None: 59 | aspect_ratios = list() 60 | else: 61 | ratios = list() 62 | if type(aspect_ratios) == tuple: 63 | for j in range(len(aspect_ratios)): 64 | ratios.append(float(aspect_ratios[j])) 65 | else: 66 | ratios.append(float(aspect_ratios)) 67 | aspect_ratios = ratios 68 | 69 | # specs 70 | specs = list() 71 | for i in range(1,100): 72 | name = 'spec%d'%i 73 | if not name in params: 74 | break 75 | line = params[name] 76 | feat_map_x = int(line[0]) 77 | feat_map_y = int(line[1]) 78 | shrinkage_x = int(line[2]) 79 | shrinkage_y = int(line[3]) 80 | bbox_min = float(line[4]) 81 | bbox_max = float(line[5]) 82 | specs.append(SSDSpec(feat_map_x, feat_map_y, shrinkage_x, shrinkage_y, SSDBoxSizes(bbox_min, bbox_max), aspect_ratios)) 83 | 84 | # priors 85 | priors = generate_ssd_priors(specs, image_size_x, image_size_y) 86 | 87 | # create ssd net 88 | net = create_ssd(backbone_type, header_type, use_gray, len(class_names)+1, aspect_ratios, priors, center_variance, size_variance, is_test=False, device=torch.device("cpu")) 89 | in_c = 1 if use_gray else 3 90 | stat(net,(in_c,image_size_y,image_size_x)) 91 | 92 | 93 | -------------------------------------------------------------------------------- /core/headers/headers12.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 18 | return Sequential(OrderedDict([ 19 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 20 | ]) 21 | ) 22 | 23 | 24 | def headers12(backbone, num_classes, aspect_ratios): 25 | 26 | source_layer_indexes = [ 27 | 8, 28 | 12, 29 | 14, 30 | ] 31 | 32 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 33 | 34 | extras = ModuleList([ 35 | Sequential(OrderedDict([ 36 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=32, kernel_size=1)), 37 | ('conv15_1x1/relu', ReLU()), 38 | ('conv15_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 39 | ]) 40 | ), 41 | Sequential(OrderedDict([ 42 | ('conv16_1x1', Conv2d(in_channels=64, out_channels=32, kernel_size=1)), 43 | ('conv16_1x1/relu', ReLU()), 44 | ('conv16_3x3', SeperableConv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 45 | ]) 46 | ) 47 | ]) 48 | 49 | regression_headers = ModuleList([ 50 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 51 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 52 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 53 | SeperableConv2d(in_channels=64, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 54 | Conv_2d(in_channels=32, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_5'), 55 | ]) 56 | 57 | classification_headers = ModuleList([ 58 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 59 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 60 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 61 | SeperableConv2d(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 62 | Conv_2d(in_channels=32, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_5'), 63 | ]) 64 | 65 | return source_layer_indexes, extras, classification_headers, regression_headers 66 | 67 | -------------------------------------------------------------------------------- /core/headers/headers1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 18 | return Sequential(OrderedDict([ 19 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 20 | ]) 21 | ) 22 | 23 | 24 | def headers1(backbone, num_classes, aspect_ratios): 25 | 26 | source_layer_indexes = [ 27 | 12, 28 | 14, 29 | ] 30 | 31 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 32 | 33 | extras = ModuleList([ 34 | Sequential(OrderedDict([ 35 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=64, kernel_size=1)), 36 | ('conv15_1x1/relu', ReLU()), 37 | ('conv15_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 38 | ]) 39 | ), 40 | Sequential(OrderedDict([ 41 | ('conv16_1x1', Conv2d(in_channels=128, out_channels=32, kernel_size=1)), 42 | ('conv16_1x1/relu', ReLU()), 43 | ('conv16_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 44 | ]) 45 | ), 46 | Sequential(OrderedDict([ 47 | ('conv17_1x1', Conv2d(in_channels=64, out_channels=32, kernel_size=1)), 48 | ('conv17_1x1/relu', ReLU()), 49 | ('conv17_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv17_3x3')) 50 | ]) 51 | ) 52 | ]) 53 | 54 | regression_headers = ModuleList([ 55 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 56 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 57 | SeperableConv2d(in_channels=128, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 58 | SeperableConv2d(in_channels=64, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 59 | Conv_2d(in_channels=64, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_5'), 60 | ]) 61 | 62 | classification_headers = ModuleList([ 63 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 64 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 65 | SeperableConv2d(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 66 | SeperableConv2d(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 67 | Conv_2d(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_5'), 68 | ]) 69 | 70 | return source_layer_indexes, extras, classification_headers, regression_headers 71 | 72 | -------------------------------------------------------------------------------- /core/predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .utils import box_utils_zq as box_utils 3 | from .data_preprocessing import PredictionTransform 4 | from .utils.misc import Timer 5 | import numpy as np 6 | 7 | 8 | class Predictor: 9 | def __init__(self, net, size_x, size_y, mean=0.0, std=1.0, nms_method=None, 10 | iou_threshold=0.45, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None): 11 | self.net = net 12 | self.transform = PredictionTransform(size_x, size_y, mean, std) 13 | self.iou_threshold = iou_threshold 14 | self.filter_threshold = filter_threshold 15 | self.candidate_size = candidate_size 16 | self.nms_method = nms_method 17 | 18 | self.sigma = sigma 19 | if device: 20 | self.device = device 21 | else: 22 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 23 | #print('init predictor') 24 | #print(self.device) 25 | 26 | self.net.to(self.device) 27 | self.net.eval() 28 | 29 | self.timer = Timer() 30 | 31 | def predict(self, image, top_k=-1, prob_threshold=None, print_score_and_box=False): 32 | #print('run predictor') 33 | #print(self.device) 34 | cpu_device = torch.device("cpu") 35 | height = image.shape[0] 36 | width = image.shape[1] 37 | if image.ndim == 2: 38 | image = image[:,:,np.newaxis] 39 | image = self.transform(image) 40 | images = image.unsqueeze(0) 41 | #print(images) 42 | images = images.to(self.device) 43 | with torch.no_grad(): 44 | self.timer.start() 45 | scores, boxes = self.net.forward(images, print_score_and_box) 46 | #print("Inference time: ", self.timer.end()) 47 | boxes = boxes[0] 48 | scores = scores[0] 49 | if not prob_threshold: 50 | prob_threshold = self.filter_threshold 51 | #print(prob_threshold) 52 | # this version of nms is slower on GPU, so we move data to CPU. 53 | boxes = boxes.to(cpu_device) 54 | scores = scores.to(cpu_device) 55 | 56 | if print_score_and_box: 57 | print(scores.shape) 58 | score_h, score_w = scores.shape 59 | for i in range(score_h): 60 | line = '' 61 | for j in range(score_w): 62 | line = line + '%12.5f '%(scores[i][j]) 63 | print(line) 64 | 65 | print(boxes.shape) 66 | box_h, box_w = boxes.shape 67 | for i in range(box_h): 68 | print('%12.5f %12.5f %12.5f %12.5f'%(boxes[i][0],boxes[i][1],boxes[i][2],boxes[i][3])) 69 | 70 | 71 | picked_box_probs = [] 72 | picked_labels = [] 73 | for class_index in range(1, scores.size(1)): 74 | probs = scores[:, class_index] 75 | mask = probs > prob_threshold 76 | probs = probs[mask] 77 | if probs.size(0) == 0: 78 | continue 79 | subset_boxes = boxes[mask, :] 80 | box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) 81 | box_probs = box_utils.nms(box_probs, self.nms_method, 82 | score_threshold=prob_threshold, 83 | iou_threshold=self.iou_threshold, 84 | sigma=self.sigma, 85 | top_k=top_k, 86 | candidate_size=self.candidate_size) 87 | picked_box_probs.append(box_probs) 88 | picked_labels.extend([class_index] * box_probs.size(0)) 89 | if not picked_box_probs: 90 | return torch.tensor([]), torch.tensor([]), torch.tensor([]) 91 | picked_box_probs = torch.cat(picked_box_probs) 92 | picked_box_probs[:, 0] *= width 93 | picked_box_probs[:, 1] *= height 94 | picked_box_probs[:, 2] *= width 95 | picked_box_probs[:, 3] *= height 96 | return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4] 97 | -------------------------------------------------------------------------------- /example_fpn/pth2onnx_fpn.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os,sys 3 | sys.path.append(os.getcwd()) 4 | from core.ssd_fpn_creator import create_ssd,create_ssd_predictor 5 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 6 | from core.utils.misc import Timer 7 | import cv2 8 | import sys 9 | import torch 10 | import numpy as np 11 | import io 12 | import torch.onnx 13 | 14 | 15 | def pth2onnx(config_file, in_file, out, num_valid_classes, with_softmax): 16 | #class_names = ['bottle','phone','cigar'] 17 | 18 | # load config file and setup 19 | params = {} 20 | config = configparser.ConfigParser() 21 | config.read(config_file) 22 | 23 | #print(config) 24 | 25 | for _ in config.options("Train"): 26 | params[_] = eval(config.get("Train",_)) 27 | 28 | # image_size_x, image_size_y, image_mean, image_std, use_gray 29 | image_size_x = int(params['image_size_x']) 30 | image_size_y = int(params['image_size_y']) 31 | image_std = float(params['image_std']) 32 | image_mean = params['image_mean'] 33 | use_gray = bool(params['use_gray']) 34 | 35 | mean_values = list() 36 | for i in range(len(image_mean)): 37 | mean_values.append(float(image_mean[i])) 38 | image_mean = np.array(mean_values,dtype=np.float) 39 | 40 | # iou_thresh, center_varaiance, size_variance 41 | iou_thresh = float(params['iou_thresh']) 42 | center_variance = float(params['center_variance']) 43 | size_variance = float(params['size_variance']) 44 | 45 | # net type 46 | net_type = params['net_type'] 47 | 48 | # aspect_ratios 49 | aspect_ratios = params['aspect_ratios'] 50 | if aspect_ratios is None: 51 | aspect_ratios = list() 52 | else: 53 | ratios = list() 54 | if type(aspect_ratios) == tuple: 55 | for j in range(len(aspect_ratios)): 56 | ratios.append(float(aspect_ratios[j])) 57 | else: 58 | ratios.append(float(aspect_ratios)) 59 | aspect_ratios = ratios 60 | 61 | # specs 62 | specs = list() 63 | for i in range(1,100): 64 | name = 'spec%d'%i 65 | if not name in params: 66 | break 67 | line = params[name] 68 | feat_map_x = int(line[0]) 69 | feat_map_y = int(line[1]) 70 | shrinkage_x = int(line[2]) 71 | shrinkage_y = int(line[3]) 72 | bbox_min = float(line[4]) 73 | bbox_max = float(line[5]) 74 | specs.append(SSDSpec(feat_map_x, feat_map_y, shrinkage_x, shrinkage_y, SSDBoxSizes(bbox_min, bbox_max), aspect_ratios)) 75 | 76 | # priors 77 | priors = generate_ssd_priors(specs, image_size_x, image_size_y) 78 | 79 | # create ssd net 80 | model = create_ssd(net_type, use_gray, num_valid_classes+1, aspect_ratios, priors, center_variance, size_variance, is_test=False, with_softmax=with_softmax, device=torch.device("cpu")) 81 | 82 | 83 | loaded_model = torch.load(in_file,map_location='cpu') 84 | #print(loaded_model) 85 | model.load_state_dict(loaded_model) 86 | in_c = 1 if use_gray else 3 87 | dummy_input = torch.randn(1,in_c,image_size_y,image_size_x) 88 | input_name=['data'] 89 | output_name=['cls','loc'] 90 | torch.onnx.export(model,dummy_input,out_file,verbose=True,input_names=input_name,output_names=output_name) 91 | 92 | if __name__=='__main__': 93 | 94 | 95 | config_file = 'configs/zq1.cfg' 96 | in_file = 'models/backbone1-headers1-256x192/Epoch-5-Loss-5.812523530079768.pth' 97 | out_file = 'Epoch-5-Loss-5.812523530079768.onnx' 98 | with_softmax = False 99 | if len(sys.argv) < 5: 100 | print('pth2onnx.py config_file in_file out_file num_valid_classes [with_softmax]') 101 | else: 102 | config_file = sys.argv[1] 103 | in_file = sys.argv[2] 104 | out_file = sys.argv[3] 105 | num_valid_classes = int(sys.argv[4]) 106 | if len(sys.argv) >=6: 107 | with_softmax = (int(sys.argv[5]) != 0) 108 | pth2onnx(config_file, in_file, out_file, num_valid_classes, with_softmax) 109 | -------------------------------------------------------------------------------- /example_fpn_x/pth2onnx_fpn_x.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os,sys 3 | sys.path.append(os.getcwd()) 4 | from core.ssd_fpn_x_creator import create_ssd,create_ssd_predictor 5 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 6 | from core.utils.misc import Timer 7 | import cv2 8 | import sys 9 | import torch 10 | import numpy as np 11 | import io 12 | import torch.onnx 13 | 14 | 15 | def pth2onnx(config_file, in_file, out, num_valid_classes, with_softmax): 16 | #class_names = ['bottle','phone','cigar'] 17 | 18 | # load config file and setup 19 | params = {} 20 | config = configparser.ConfigParser() 21 | config.read(config_file) 22 | 23 | #print(config) 24 | 25 | for _ in config.options("Train"): 26 | params[_] = eval(config.get("Train",_)) 27 | 28 | # image_size_x, image_size_y, image_mean, image_std, use_gray 29 | image_size_x = int(params['image_size_x']) 30 | image_size_y = int(params['image_size_y']) 31 | image_std = float(params['image_std']) 32 | image_mean = params['image_mean'] 33 | use_gray = bool(params['use_gray']) 34 | 35 | mean_values = list() 36 | for i in range(len(image_mean)): 37 | mean_values.append(float(image_mean[i])) 38 | image_mean = np.array(mean_values,dtype=np.float) 39 | 40 | # iou_thresh, center_varaiance, size_variance 41 | iou_thresh = float(params['iou_thresh']) 42 | center_variance = float(params['center_variance']) 43 | size_variance = float(params['size_variance']) 44 | 45 | # net type 46 | net_type = params['net_type'] 47 | 48 | # aspect_ratios 49 | aspect_ratios = params['aspect_ratios'] 50 | if aspect_ratios is None: 51 | aspect_ratios = list() 52 | else: 53 | ratios = list() 54 | if type(aspect_ratios) == tuple: 55 | for j in range(len(aspect_ratios)): 56 | ratios.append(float(aspect_ratios[j])) 57 | else: 58 | ratios.append(float(aspect_ratios)) 59 | aspect_ratios = ratios 60 | 61 | # specs 62 | specs = list() 63 | for i in range(1,100): 64 | name = 'spec%d'%i 65 | if not name in params: 66 | break 67 | line = params[name] 68 | feat_map_x = int(line[0]) 69 | feat_map_y = int(line[1]) 70 | shrinkage_x = int(line[2]) 71 | shrinkage_y = int(line[3]) 72 | bbox_min = float(line[4]) 73 | bbox_max = float(line[5]) 74 | specs.append(SSDSpec(feat_map_x, feat_map_y, shrinkage_x, shrinkage_y, SSDBoxSizes(bbox_min, bbox_max), aspect_ratios)) 75 | 76 | # priors 77 | priors = generate_ssd_priors(specs, image_size_x, image_size_y) 78 | 79 | # create ssd net 80 | model = create_ssd(net_type, use_gray, num_valid_classes, aspect_ratios, priors, center_variance, size_variance, is_test=False, with_softmax=with_softmax, device=torch.device("cpu")) 81 | 82 | 83 | loaded_model = torch.load(in_file,map_location='cpu') 84 | #print(loaded_model) 85 | model.load_state_dict(loaded_model) 86 | in_c = 1 if use_gray else 3 87 | dummy_input = torch.randn(1,in_c,image_size_y,image_size_x) 88 | input_name=['data'] 89 | output_name=['cls','loc'] 90 | torch.onnx.export(model,dummy_input,out_file,verbose=True,input_names=input_name,output_names=output_name) 91 | 92 | if __name__=='__main__': 93 | 94 | 95 | config_file = 'configs/zq1.cfg' 96 | in_file = 'models/backbone1-headers1-256x192/Epoch-5-Loss-5.812523530079768.pth' 97 | out_file = 'Epoch-5-Loss-5.812523530079768.onnx' 98 | with_softmax = False 99 | if len(sys.argv) < 5: 100 | print('pth2onnx.py config_file in_file out_file num_valid_classes [with_softmax]') 101 | else: 102 | config_file = sys.argv[1] 103 | in_file = sys.argv[2] 104 | out_file = sys.argv[3] 105 | num_valid_classes = int(sys.argv[4]) 106 | if len(sys.argv) >=6: 107 | with_softmax = (int(sys.argv[5]) != 0) 108 | pth2onnx(config_file, in_file, out_file, num_valid_classes, with_softmax) 109 | -------------------------------------------------------------------------------- /example/pth2onnx.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os,sys 3 | sys.path.append(os.getcwd()) 4 | from core.ssd_creator import create_ssd,create_ssd_predictor 5 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 6 | from core.utils.misc import Timer 7 | import cv2 8 | import sys 9 | import torch 10 | import numpy as np 11 | import io 12 | import torch.onnx 13 | 14 | 15 | def pth2onnx(config_file, in_file, out, num_valid_classes, with_softmax): 16 | #class_names = ['bottle','phone','cigar'] 17 | 18 | # load config file and setup 19 | params = {} 20 | config = configparser.ConfigParser() 21 | config.read(config_file) 22 | 23 | #print(config) 24 | 25 | for _ in config.options("Train"): 26 | params[_] = eval(config.get("Train",_)) 27 | 28 | # image_size_x, image_size_y, image_mean, image_std, use_gray 29 | image_size_x = int(params['image_size_x']) 30 | image_size_y = int(params['image_size_y']) 31 | image_std = float(params['image_std']) 32 | image_mean = params['image_mean'] 33 | use_gray = bool(params['use_gray']) 34 | 35 | mean_values = list() 36 | for i in range(len(image_mean)): 37 | mean_values.append(float(image_mean[i])) 38 | image_mean = np.array(mean_values,dtype=np.float) 39 | 40 | # iou_thresh, center_varaiance, size_variance 41 | iou_thresh = float(params['iou_thresh']) 42 | center_variance = float(params['center_variance']) 43 | size_variance = float(params['size_variance']) 44 | 45 | # backbone type, header type 46 | backbone_type = params['backbone_type'] 47 | header_type = params['header_type'] 48 | 49 | # aspect_ratios 50 | aspect_ratios = params['aspect_ratios'] 51 | if aspect_ratios is None: 52 | aspect_ratios = list() 53 | else: 54 | ratios = list() 55 | if type(aspect_ratios) == tuple: 56 | for j in range(len(aspect_ratios)): 57 | ratios.append(float(aspect_ratios[j])) 58 | else: 59 | ratios.append(float(aspect_ratios)) 60 | aspect_ratios = ratios 61 | 62 | # specs 63 | specs = list() 64 | for i in range(1,100): 65 | name = 'spec%d'%i 66 | if not name in params: 67 | break 68 | line = params[name] 69 | feat_map_x = int(line[0]) 70 | feat_map_y = int(line[1]) 71 | shrinkage_x = int(line[2]) 72 | shrinkage_y = int(line[3]) 73 | bbox_min = float(line[4]) 74 | bbox_max = float(line[5]) 75 | specs.append(SSDSpec(feat_map_x, feat_map_y, shrinkage_x, shrinkage_y, SSDBoxSizes(bbox_min, bbox_max), aspect_ratios)) 76 | 77 | # priors 78 | priors = generate_ssd_priors(specs, image_size_x, image_size_y) 79 | 80 | # create ssd net 81 | model = create_ssd(backbone_type, header_type, use_gray, num_valid_classes+1, aspect_ratios, priors, center_variance, size_variance, is_test=False, with_softmax=with_softmax, device=torch.device("cpu")) 82 | 83 | 84 | loaded_model = torch.load(in_file,map_location='cpu') 85 | #print(loaded_model) 86 | model.load_state_dict(loaded_model) 87 | in_c = 1 if use_gray else 3 88 | dummy_input = torch.randn(1,in_c,image_size_y,image_size_x) 89 | input_name=['data'] 90 | output_name=['cls','loc'] 91 | torch.onnx.export(model,dummy_input,out_file,verbose=True,input_names=input_name,output_names=output_name) 92 | 93 | if __name__=='__main__': 94 | 95 | 96 | config_file = 'configs/zq1.cfg' 97 | in_file = 'models/backbone1-headers1-256x192/Epoch-5-Loss-5.812523530079768.pth' 98 | out_file = 'Epoch-5-Loss-5.812523530079768.onnx' 99 | with_softmax = False 100 | if len(sys.argv) < 5: 101 | print('pth2onnx.py config_file in_file out_file num_valid_classes [with_softmax]') 102 | else: 103 | config_file = sys.argv[1] 104 | in_file = sys.argv[2] 105 | out_file = sys.argv[3] 106 | num_valid_classes = int(sys.argv[4]) 107 | if len(sys.argv) >=6: 108 | with_softmax = (int(sys.argv[5]) != 0) 109 | pth2onnx(config_file, in_file, out_file, num_valid_classes, with_softmax) 110 | -------------------------------------------------------------------------------- /core/headers/headers3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 18 | return Sequential(OrderedDict([ 19 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 20 | ]) 21 | ) 22 | 23 | 24 | def headers3(backbone, num_classes, aspect_ratios): 25 | 26 | source_layer_indexes = [ 27 | 8, 28 | 12, 29 | 14, 30 | ] 31 | 32 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 33 | 34 | extras = ModuleList([ 35 | Sequential(OrderedDict([ 36 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=64, kernel_size=1)), 37 | ('conv15_1x1/relu', ReLU()), 38 | ('conv15_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 39 | ]) 40 | ), 41 | Sequential(OrderedDict([ 42 | ('conv16_1x1', Conv2d(in_channels=128, out_channels=32, kernel_size=1)), 43 | ('conv16_1x1/relu', ReLU()), 44 | ('conv16_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 45 | ]) 46 | ), 47 | Sequential(OrderedDict([ 48 | ('conv17_1x1', Conv2d(in_channels=64, out_channels=32, kernel_size=1)), 49 | ('conv17_1x1/relu', ReLU()), 50 | ('conv17_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv17_3x3')) 51 | ]) 52 | ) 53 | ]) 54 | 55 | regression_headers = ModuleList([ 56 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 57 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 58 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 59 | SeperableConv2d(in_channels=128, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 60 | SeperableConv2d(in_channels=64, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_5'), 61 | Conv_2d(in_channels=64, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_6'), 62 | ]) 63 | 64 | classification_headers = ModuleList([ 65 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 66 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 67 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 68 | SeperableConv2d(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 69 | SeperableConv2d(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_5'), 70 | Conv_2d(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_6'), 71 | ]) 72 | 73 | return source_layer_indexes, extras, classification_headers, regression_headers 74 | 75 | -------------------------------------------------------------------------------- /core/headers/headers2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 18 | return Sequential(OrderedDict([ 19 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 20 | ]) 21 | ) 22 | 23 | 24 | def headers2(backbone, num_classes, aspect_ratios): 25 | 26 | source_layer_indexes = [ 27 | 8, 28 | 12, 29 | 14, 30 | ] 31 | 32 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 33 | 34 | extras = ModuleList([ 35 | Sequential(OrderedDict([ 36 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=128, kernel_size=1)), 37 | ('conv15_1x1/relu', ReLU()), 38 | ('conv15_3x3', SeperableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 39 | ]) 40 | ), 41 | Sequential(OrderedDict([ 42 | ('conv16_1x1', Conv2d(in_channels=256, out_channels=64, kernel_size=1)), 43 | ('conv16_1x1/relu', ReLU()), 44 | ('conv16_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 45 | ]) 46 | ), 47 | Sequential(OrderedDict([ 48 | ('conv17_1x1', Conv2d(in_channels=128, out_channels=64, kernel_size=1)), 49 | ('conv17_1x1/relu', ReLU()), 50 | ('conv17_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv17_3x3')) 51 | ]) 52 | ) 53 | ]) 54 | 55 | regression_headers = ModuleList([ 56 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 57 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 58 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 59 | SeperableConv2d(in_channels=256, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 60 | SeperableConv2d(in_channels=128, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_5'), 61 | Conv_2d(in_channels=128, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_6'), 62 | ]) 63 | 64 | classification_headers = ModuleList([ 65 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 66 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 67 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 68 | SeperableConv2d(in_channels=256, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 69 | SeperableConv2d(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_5'), 70 | Conv_2d(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_6'), 71 | ]) 72 | 73 | return source_layer_indexes, extras, classification_headers, regression_headers 74 | 75 | -------------------------------------------------------------------------------- /core/headers/headers4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 18 | return Sequential(OrderedDict([ 19 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 20 | ]) 21 | ) 22 | 23 | 24 | def headers4(backbone, num_classes, aspect_ratios): 25 | 26 | source_layer_indexes = [ 27 | 8, 28 | 12, 29 | 14, 30 | ] 31 | 32 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 33 | 34 | extras = ModuleList([ 35 | Sequential(OrderedDict([ 36 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=128, kernel_size=1)), 37 | ('conv15_1x1/relu', ReLU()), 38 | ('conv15_3x3', SeperableConv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 39 | ]) 40 | ), 41 | Sequential(OrderedDict([ 42 | ('conv16_1x1', Conv2d(in_channels=256, out_channels=64, kernel_size=1)), 43 | ('conv16_1x1/relu', ReLU()), 44 | ('conv16_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 45 | ]) 46 | ), 47 | Sequential(OrderedDict([ 48 | ('conv17_1x1', Conv2d(in_channels=128, out_channels=64, kernel_size=1)), 49 | ('conv17_1x1/relu', ReLU()), 50 | ('conv17_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv17_3x3')) 51 | ]) 52 | ) 53 | ]) 54 | 55 | regression_headers = ModuleList([ 56 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 57 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 58 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 59 | SeperableConv2d(in_channels=256, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 60 | SeperableConv2d(in_channels=128, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_5'), 61 | Conv_2d(in_channels=128, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_6'), 62 | ]) 63 | 64 | classification_headers = ModuleList([ 65 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 66 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 67 | SeperableConv2d(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 68 | SeperableConv2d(in_channels=256, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 69 | SeperableConv2d(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_5'), 70 | Conv_2d(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_6'), 71 | ]) 72 | 73 | return source_layer_indexes, extras, classification_headers, regression_headers 74 | 75 | -------------------------------------------------------------------------------- /core/headers/headers11.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def SeperableConv2d_2(in_channels, out_channels, kernel_size, stride, padding, name): 18 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 19 | """ 20 | return Sequential(OrderedDict([ 21 | (name+'/dw1', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 22 | groups=in_channels, stride=stride, padding=padding)), 23 | (name+'/relu1', ReLU()), 24 | (name+'/sep1', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1)), 25 | (name+'/relu2', ReLU()), 26 | (name+'/sep2', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 27 | ]) 28 | ) 29 | 30 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 31 | return Sequential(OrderedDict([ 32 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 33 | ]) 34 | ) 35 | 36 | 37 | def headers11(backbone, num_classes, aspect_ratios): 38 | 39 | source_layer_indexes = [ 40 | 8, 41 | 12, 42 | 14, 43 | ] 44 | 45 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 46 | 47 | extras = ModuleList([ 48 | Sequential(OrderedDict([ 49 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=32, kernel_size=1)), 50 | ('conv15_1x1/relu', ReLU()), 51 | ('conv15_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 52 | ]) 53 | ), 54 | Sequential(OrderedDict([ 55 | ('conv16_1x1', Conv2d(in_channels=64, out_channels=32, kernel_size=1)), 56 | ('conv16_1x1/relu', ReLU()), 57 | ('conv16_3x3', SeperableConv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 58 | ]) 59 | ) 60 | ]) 61 | 62 | regression_headers = ModuleList([ 63 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 64 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 65 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 66 | SeperableConv2d_2(in_channels=64, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 67 | Conv_2d(in_channels=32, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_5'), 68 | ]) 69 | 70 | classification_headers = ModuleList([ 71 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 72 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 73 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 74 | SeperableConv2d_2(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 75 | Conv_2d(in_channels=32, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_5'), 76 | ]) 77 | 78 | return source_layer_indexes, extras, classification_headers, regression_headers 79 | 80 | -------------------------------------------------------------------------------- /core/datasets/generate_vocdata.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import sys 3 | import os 4 | import xml.etree.ElementTree as ET 5 | from random import random 6 | 7 | def main(filename): 8 | # ratio to divide up the images 9 | train = 0.7 10 | val = 0.2 11 | test = 1.0 - train - val 12 | #if (train + test + val) != 1.0: 13 | # print("probabilities must equal 1") 14 | # exit() 15 | 16 | # get the labels 17 | labels = [] 18 | imgnames = [] 19 | annotations = {} 20 | 21 | with open(filename, 'r') as labelfile: 22 | label_string = "" 23 | for line in labelfile: 24 | label_string += line.rstrip() 25 | 26 | labels = label_string.split(',') 27 | labels = [elem.replace(" ", "") for elem in labels] 28 | 29 | # get image names 30 | for filename in os.listdir("./JPEGImages"): 31 | if filename.endswith(".jpg"): 32 | img = filename.rstrip('.jpg') 33 | imgnames.append(img) 34 | 35 | print("Labels:", labels, "imgcnt:", len(imgnames)) 36 | 37 | # initialise annotation list 38 | for label in labels: 39 | annotations[label] = [] 40 | 41 | # Scan the annotations for the labels 42 | for img in imgnames: 43 | annote = "Annotations/" + img + '.xml' 44 | if os.path.isfile(annote): 45 | tree = ET.parse(annote) 46 | root = tree.getroot() 47 | annote_labels = [] 48 | for labelname in root.findall('*/name'): 49 | labelname = labelname.text 50 | annote_labels.append(labelname) 51 | if labelname in labels: 52 | annotations[labelname].append(img) 53 | annotations[img] = annote_labels 54 | else: 55 | print("Missing annotation for ", annote) 56 | exit() 57 | 58 | # divvy up the images to the different sets 59 | sampler = imgnames.copy() 60 | train_list = [] 61 | val_list = [] 62 | test_list = [] 63 | 64 | while len(sampler) > 0: 65 | dice = random() 66 | elem = sampler.pop() 67 | 68 | if dice <= test: 69 | test_list.append(elem) 70 | elif dice <= (test + val): 71 | val_list.append(elem) 72 | else: 73 | train_list.append(elem) 74 | 75 | print("Training set:", len(train_list), "validation set:", len(val_list), "test set:", len(test_list)) 76 | 77 | 78 | # create the dataset files 79 | create_folder("./ImageSets/Main/") 80 | with open("./ImageSets/Main/train.txt", 'w') as outfile: 81 | for name in train_list: 82 | outfile.write(name + "\n") 83 | with open("./ImageSets/Main/val.txt", 'w') as outfile: 84 | for name in val_list: 85 | outfile.write(name + "\n") 86 | with open("./ImageSets/Main/trainval.txt", 'w') as outfile: 87 | for name in train_list: 88 | outfile.write(name + "\n") 89 | for name in val_list: 90 | outfile.write(name + "\n") 91 | 92 | with open("./ImageSets/Main/test.txt", 'w') as outfile: 93 | for name in test_list: 94 | outfile.write(name + "\n") 95 | 96 | # create the individiual files for each label 97 | for label in labels: 98 | with open("./ImageSets/Main/"+ label +"_train.txt", 'w') as outfile: 99 | for name in train_list: 100 | if label in annotations[name]: 101 | outfile.write(name + " 1\n") 102 | else: 103 | outfile.write(name + " -1\n") 104 | with open("./ImageSets/Main/"+ label +"_val.txt", 'w') as outfile: 105 | for name in val_list: 106 | if label in annotations[name]: 107 | outfile.write(name + " 1\n") 108 | else: 109 | outfile.write(name + " -1\n") 110 | with open("./ImageSets/Main/"+ label +"_test.txt", 'w') as outfile: 111 | for name in test_list: 112 | if label in annotations[name]: 113 | outfile.write(name + " 1\n") 114 | else: 115 | outfile.write(name + " -1\n") 116 | 117 | def create_folder(foldername): 118 | if os.path.exists(foldername): 119 | print('folder already exists:', foldername) 120 | else: 121 | os.makedirs(foldername) 122 | 123 | if __name__=='__main__': 124 | if len(sys.argv) < 2: 125 | print("usage: python generate_vocdata.py ") 126 | exit() 127 | main(sys.argv[1]) 128 | -------------------------------------------------------------------------------- /core/datasets/generate_vocdata_for_pure_background.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import sys 3 | import os 4 | import xml.etree.ElementTree as ET 5 | from random import random 6 | 7 | def main(filename): 8 | # ratio to divide up the images 9 | train = 1.0 10 | val = 0.0 11 | test = 1.0 - train - val 12 | #if (train + test + val) != 1.0: 13 | # print("probabilities must equal 1") 14 | # exit() 15 | 16 | # get the labels 17 | labels = [] 18 | imgnames = [] 19 | annotations = {} 20 | 21 | with open(filename, 'r') as labelfile: 22 | label_string = "" 23 | for line in labelfile: 24 | label_string += line.rstrip() 25 | 26 | labels = label_string.split(',') 27 | labels = [elem.replace(" ", "") for elem in labels] 28 | 29 | # get image names 30 | for filename in os.listdir("./JPEGImages"): 31 | if filename.endswith(".jpg"): 32 | img = filename.rstrip('.jpg') 33 | imgnames.append(img) 34 | 35 | print("Labels:", labels, "imgcnt:", len(imgnames)) 36 | 37 | # initialise annotation list 38 | for label in labels: 39 | annotations[label] = [] 40 | 41 | # Scan the annotations for the labels 42 | for img in imgnames: 43 | annote = "Annotations/" + img + '.xml' 44 | if os.path.isfile(annote): 45 | tree = ET.parse(annote) 46 | root = tree.getroot() 47 | annote_labels = [] 48 | for labelname in root.findall('*/name'): 49 | labelname = labelname.text 50 | annote_labels.append(labelname) 51 | if labelname in labels: 52 | annotations[labelname].append(img) 53 | annotations[img] = annote_labels 54 | else: 55 | print("Missing annotation for ", annote) 56 | exit() 57 | 58 | # divvy up the images to the different sets 59 | sampler = imgnames.copy() 60 | train_list = [] 61 | val_list = [] 62 | test_list = [] 63 | 64 | while len(sampler) > 0: 65 | dice = random() 66 | elem = sampler.pop() 67 | 68 | if dice <= test: 69 | test_list.append(elem) 70 | elif dice <= (test + val): 71 | val_list.append(elem) 72 | else: 73 | train_list.append(elem) 74 | 75 | print("Training set:", len(train_list), "validation set:", len(val_list), "test set:", len(test_list)) 76 | 77 | 78 | # create the dataset files 79 | create_folder("./ImageSets/Main/") 80 | with open("./ImageSets/Main/train.txt", 'w') as outfile: 81 | for name in train_list: 82 | outfile.write(name + "\n") 83 | with open("./ImageSets/Main/val.txt", 'w') as outfile: 84 | for name in val_list: 85 | outfile.write(name + "\n") 86 | with open("./ImageSets/Main/trainval.txt", 'w') as outfile: 87 | for name in train_list: 88 | outfile.write(name + "\n") 89 | for name in val_list: 90 | outfile.write(name + "\n") 91 | 92 | with open("./ImageSets/Main/test.txt", 'w') as outfile: 93 | for name in test_list: 94 | outfile.write(name + "\n") 95 | 96 | # create the individiual files for each label 97 | for label in labels: 98 | with open("./ImageSets/Main/"+ label +"_train.txt", 'w') as outfile: 99 | for name in train_list: 100 | if label in annotations[name]: 101 | outfile.write(name + " 1\n") 102 | else: 103 | outfile.write(name + " -1\n") 104 | with open("./ImageSets/Main/"+ label +"_val.txt", 'w') as outfile: 105 | for name in val_list: 106 | if label in annotations[name]: 107 | outfile.write(name + " 1\n") 108 | else: 109 | outfile.write(name + " -1\n") 110 | with open("./ImageSets/Main/"+ label +"_test.txt", 'w') as outfile: 111 | for name in test_list: 112 | if label in annotations[name]: 113 | outfile.write(name + " 1\n") 114 | else: 115 | outfile.write(name + " -1\n") 116 | 117 | def create_folder(foldername): 118 | if os.path.exists(foldername): 119 | print('folder already exists:', foldername) 120 | else: 121 | os.makedirs(foldername) 122 | 123 | if __name__=='__main__': 124 | if len(sys.argv) < 2: 125 | print("usage: python generate_vocdata.py ") 126 | exit() 127 | main(sys.argv[1]) 128 | -------------------------------------------------------------------------------- /core/headers/get_headers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from . import headers1 4 | from . import headers2 5 | from . import headers3 6 | from . import headers4 7 | from . import headers10 8 | from . import headers11 9 | from . import headers12 10 | from . import headers13 11 | from . import headers14 12 | from . import headers15 13 | from . import headers16 14 | from . import headers17 15 | from . import headers18 16 | from . import headers20 17 | from . import headers21 18 | from . import headers22 19 | from . import headers23 20 | from . import headers24 21 | from . import headers25 22 | from . import headers26 23 | from . import headers27 24 | from . import headers28 25 | 26 | 27 | def get_headers(type, backbone, num_classes, aspect_ratios): 28 | if type == "headers1": 29 | source_layer_indexes, extras, classification_headers, regreession_headers = headers1.headers1(backbone, num_classes, aspect_ratios) 30 | elif type == "headers2": 31 | source_layer_indexes, extras, classification_headers, regreession_headers = headers2.headers2(backbone, num_classes, aspect_ratios) 32 | elif type == "headers3": 33 | source_layer_indexes, extras, classification_headers, regreession_headers = headers3.headers3(backbone, num_classes, aspect_ratios) 34 | elif type == "headers4": 35 | source_layer_indexes, extras, classification_headers, regreession_headers = headers4.headers4(backbone, num_classes, aspect_ratios) 36 | elif type == "headers10": 37 | source_layer_indexes, extras, classification_headers, regreession_headers = headers10.headers10(backbone, num_classes, aspect_ratios) 38 | elif type == "headers11": 39 | source_layer_indexes, extras, classification_headers, regreession_headers = headers11.headers11(backbone, num_classes, aspect_ratios) 40 | elif type == "headers12": 41 | source_layer_indexes, extras, classification_headers, regreession_headers = headers12.headers12(backbone, num_classes, aspect_ratios) 42 | elif type == "headers13": 43 | source_layer_indexes, extras, classification_headers, regreession_headers = headers13.headers13(backbone, num_classes, aspect_ratios) 44 | elif type == "headers14": 45 | source_layer_indexes, extras, classification_headers, regreession_headers = headers14.headers14(backbone, num_classes, aspect_ratios) 46 | elif type == "headers15": 47 | source_layer_indexes, extras, classification_headers, regreession_headers = headers15.headers15(backbone, num_classes, aspect_ratios) 48 | elif type == "headers16": 49 | source_layer_indexes, extras, classification_headers, regreession_headers = headers16.headers16(backbone, num_classes, aspect_ratios) 50 | elif type == "headers17": 51 | source_layer_indexes, extras, classification_headers, regreession_headers = headers17.headers17(backbone, num_classes, aspect_ratios) 52 | elif type == "headers18": 53 | source_layer_indexes, extras, classification_headers, regreession_headers = headers18.headers18(backbone, num_classes, aspect_ratios) 54 | elif type == "headers20": 55 | source_layer_indexes, extras, classification_headers, regreession_headers = headers20.headers20(backbone, num_classes, aspect_ratios) 56 | elif type == "headers21": 57 | source_layer_indexes, extras, classification_headers, regreession_headers = headers21.headers21(backbone, num_classes, aspect_ratios) 58 | elif type == "headers22": 59 | source_layer_indexes, extras, classification_headers, regreession_headers = headers22.headers22(backbone, num_classes, aspect_ratios) 60 | elif type == "headers23": 61 | source_layer_indexes, extras, classification_headers, regreession_headers = headers23.headers23(backbone, num_classes, aspect_ratios) 62 | elif type == "headers24": 63 | source_layer_indexes, extras, classification_headers, regreession_headers = headers24.headers24(backbone, num_classes, aspect_ratios) 64 | elif type == "headers25": 65 | source_layer_indexes, extras, classification_headers, regreession_headers = headers25.headers25(backbone, num_classes, aspect_ratios) 66 | elif type == "headers26": 67 | source_layer_indexes, extras, classification_headers, regreession_headers = headers26.headers26(backbone, num_classes, aspect_ratios) 68 | elif type == "headers27": 69 | source_layer_indexes, extras, classification_headers, regreession_headers = headers27.headers27(backbone, num_classes, aspect_ratios) 70 | elif type == "headers28": 71 | source_layer_indexes, extras, classification_headers, regreession_headers = headers28.headers28(backbone, num_classes, aspect_ratios) 72 | 73 | 74 | return source_layer_indexes, extras, classification_headers, regreession_headers 75 | 76 | -------------------------------------------------------------------------------- /core/predictor_x.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .utils import box_utils_zq as box_utils 3 | from .data_preprocessing import PredictionTransform 4 | from .utils.misc import Timer 5 | import numpy as np 6 | 7 | 8 | class Predictor: 9 | def __init__(self, net, size_x, size_y, mean=0.0, std=1.0, nms_method=None, 10 | iou_threshold=0.45, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None): 11 | self.net = net 12 | self.transform = PredictionTransform(size_x, size_y, mean, std) 13 | self.iou_threshold = iou_threshold 14 | self.filter_threshold = filter_threshold 15 | self.candidate_size = candidate_size 16 | self.nms_method = nms_method 17 | 18 | self.sigma = sigma 19 | if device: 20 | self.device = device 21 | else: 22 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 23 | #print('init predictor') 24 | #print(self.device) 25 | 26 | self.net.to(self.device) 27 | self.net.eval() 28 | 29 | self.timer = Timer() 30 | 31 | def predict(self, image, top_k=-1, prob_threshold=None, print_score_and_box=False): 32 | #print('run predictor') 33 | #print(self.device) 34 | cpu_device = torch.device("cpu") 35 | height = image.shape[0] 36 | width = image.shape[1] 37 | if image.ndim == 2: 38 | image = image[:,:,np.newaxis] 39 | image = self.transform(image) 40 | images = image.unsqueeze(0) 41 | #print(images) 42 | images = images.to(self.device) 43 | with torch.no_grad(): 44 | self.timer.start() 45 | objs, scores, boxes = self.net.forward(images, print_score_and_box) 46 | #print("Inference time: ", self.timer.end()) 47 | objs = objs[0] 48 | scores = scores[0] 49 | boxes = boxes[0] 50 | if not prob_threshold: 51 | prob_threshold = self.filter_threshold 52 | #print(prob_threshold) 53 | # this version of nms is slower on GPU, so we move data to CPU. 54 | objs = objs.to(cpu_device) 55 | scores = scores.to(cpu_device) 56 | boxes = boxes.to(cpu_device) 57 | if print_score_and_box: 58 | print(objs.shape) 59 | obj_h, obj_w = objs.shape 60 | for i in range(obj_h): 61 | line = '' 62 | for j in range(obj_w): 63 | line = line + '%12.5f '%(objs[i][j]) 64 | print(line) 65 | 66 | print(scores.shape) 67 | score_h, score_w = scores.shape 68 | for i in range(score_h): 69 | line = '' 70 | for j in range(score_w): 71 | line = line + '%12.5f '%(scores[i][j]) 72 | print(line) 73 | 74 | print(boxes.shape) 75 | box_h, box_w = boxes.shape 76 | for i in range(box_h): 77 | print('%12.5f %12.5f %12.5f %12.5f'%(boxes[i][0],boxes[i][1],boxes[i][2],boxes[i][3])) 78 | 79 | 80 | probs = objs[:, 1] 81 | mask = probs > 0.5 82 | probs = probs[mask] 83 | if probs.size(0) == 0: 84 | return torch.tensor([]), torch.tensor([]), torch.tensor([]) 85 | 86 | 87 | indices = np.arange(0,objs.size(0)) 88 | indices = indices[mask] 89 | subset_boxes = boxes[mask, :] 90 | box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) 91 | box_probs, indices = box_utils.nms_with_id(box_probs, indices, self.nms_method, 92 | score_threshold=prob_threshold, 93 | iou_threshold=self.iou_threshold, 94 | sigma=self.sigma, 95 | top_k=top_k, 96 | candidate_size=self.candidate_size) 97 | if indices.shape[0] == 0: 98 | return torch.tensor([]), torch.tensor([]), torch.tensor([]) 99 | 100 | cls_scores = scores[indices,:] 101 | #labels = torch.argmax(cls_scores,dim=1) + 1 102 | max_cls_probs,labels = torch.max(cls_scores, dim=1) 103 | labels += 1 104 | #print(max_cls_probs) 105 | picked_mask = max_cls_probs > prob_threshold 106 | picked_boxes = box_probs[picked_mask] 107 | picked_cls_probs = max_cls_probs[picked_mask] 108 | picked_labels = labels[picked_mask] 109 | 110 | picked_boxes[:, 0] *= width 111 | picked_boxes[:, 1] *= height 112 | picked_boxes[:, 2] *= width 113 | picked_boxes[:, 3] *= height 114 | return picked_boxes[:,0:4], picked_labels, picked_cls_probs 115 | -------------------------------------------------------------------------------- /core/headers/headers13.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def SeperableConv2d_2(in_channels, out_channels, kernel_size, stride, padding, name): 18 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 19 | """ 20 | return Sequential(OrderedDict([ 21 | (name+'/dw1', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 22 | groups=in_channels, stride=stride, padding=padding)), 23 | (name+'/relu1', ReLU()), 24 | (name+'/sep1', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1)), 25 | (name+'/relu2', ReLU()), 26 | (name+'/sep2', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 27 | ]) 28 | ) 29 | 30 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 31 | return Sequential(OrderedDict([ 32 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 33 | ]) 34 | ) 35 | 36 | 37 | def headers13(backbone, num_classes, aspect_ratios): 38 | 39 | source_layer_indexes = [ 40 | 8, 41 | 12, 42 | 14, 43 | ] 44 | 45 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 46 | 47 | extras = ModuleList([ 48 | Sequential(OrderedDict([ 49 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=64, kernel_size=1)), 50 | ('conv15_1x1/relu', ReLU()), 51 | ('conv15_3x3', SeperableConv2d(in_channels=64, out_channels=128, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 52 | ]) 53 | ), 54 | Sequential(OrderedDict([ 55 | ('conv16_1x1', Conv2d(in_channels=128, out_channels=32, kernel_size=1)), 56 | ('conv16_1x1/relu', ReLU()), 57 | ('conv16_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 58 | ]) 59 | ), 60 | Sequential(OrderedDict([ 61 | ('conv17_1x1', Conv2d(in_channels=64, out_channels=32, kernel_size=1)), 62 | ('conv17_1x1/relu', ReLU()), 63 | ('conv17_3x3', SeperableConv2d(in_channels=32, out_channels=64, kernel_size=3, stride=2, padding=1, name='conv17_3x3')) 64 | ]) 65 | ) 66 | ]) 67 | 68 | regression_headers = ModuleList([ 69 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 70 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 71 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 72 | SeperableConv2d_2(in_channels=128, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 73 | SeperableConv2d_2(in_channels=64, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_5'), 74 | Conv_2d(in_channels=64, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_6'), 75 | ]) 76 | 77 | classification_headers = ModuleList([ 78 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 79 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 80 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 81 | SeperableConv2d_2(in_channels=128, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 82 | SeperableConv2d_2(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_5'), 83 | Conv_2d(in_channels=64, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_6'), 84 | ]) 85 | 86 | return source_layer_indexes, extras, classification_headers, regression_headers 87 | 88 | -------------------------------------------------------------------------------- /core/headers/headers10.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Conv2d, Sequential, ModuleList, ReLU, BatchNorm2d 3 | from collections import OrderedDict 4 | from ..utils import box_utils_zq as box_utils 5 | 6 | def SeperableConv2d(in_channels, out_channels, kernel_size, stride, padding, name): 7 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 8 | """ 9 | return Sequential(OrderedDict([ 10 | (name+'/dw', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 11 | groups=in_channels, stride=stride, padding=padding)), 12 | (name+'/relu', ReLU()), 13 | (name+'/sep', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 14 | ]) 15 | ) 16 | 17 | def SeperableConv2d_2(in_channels, out_channels, kernel_size, stride, padding, name): 18 | """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d. 19 | """ 20 | return Sequential(OrderedDict([ 21 | (name+'/dw1', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, 22 | groups=in_channels, stride=stride, padding=padding)), 23 | (name+'/relu1', ReLU()), 24 | (name+'/sep1', Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1)), 25 | (name+'/relu2', ReLU()), 26 | (name+'/sep2', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1)) 27 | ]) 28 | ) 29 | 30 | def Conv_2d(in_channels, out_channels, kernel_size, stride, padding, name): 31 | return Sequential(OrderedDict([ 32 | (name+'/conv', Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding)) 33 | ]) 34 | ) 35 | 36 | 37 | def headers10(backbone, num_classes, aspect_ratios): 38 | 39 | source_layer_indexes = [ 40 | 8, 41 | 12, 42 | 14, 43 | ] 44 | 45 | num_boxes = box_utils.get_num_boxes_of_aspect_ratios(aspect_ratios) 46 | 47 | extras = ModuleList([ 48 | Sequential(OrderedDict([ 49 | ('conv15_1x1', Conv2d(in_channels=backbone.layers[-1]['out_c'], out_channels=512, kernel_size=1)), 50 | ('conv15_1x1/relu', ReLU()), 51 | ('conv15_3x3', SeperableConv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=2, padding=1, name='conv15_3x3')) 52 | ]) 53 | ), 54 | Sequential(OrderedDict([ 55 | ('conv16_1x1', Conv2d(in_channels=1024, out_channels=256, kernel_size=1)), 56 | ('conv16_1x1/relu', ReLU()), 57 | ('conv16_3x3', SeperableConv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1, name='conv16_3x3')) 58 | ]) 59 | ), 60 | Sequential(OrderedDict([ 61 | ('conv17_1x1', Conv2d(in_channels=512, out_channels=256, kernel_size=1)), 62 | ('conv17_1x1/relu', ReLU()), 63 | ('conv17_3x3', SeperableConv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1, name='conv17_3x3')) 64 | ]) 65 | ) 66 | ]) 67 | 68 | regression_headers = ModuleList([ 69 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_1'), 70 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_2'), 71 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_3'), 72 | SeperableConv2d_2(in_channels=1024, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_4'), 73 | SeperableConv2d_2(in_channels=512, out_channels=num_boxes * 4, kernel_size=3, stride=1, padding=1, name='loc_5'), 74 | Conv_2d(in_channels=512, out_channels=num_boxes * 4, kernel_size=1, stride=1, padding=0, name='loc_6'), 75 | ]) 76 | 77 | classification_headers = ModuleList([ 78 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[0]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_1'), 79 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[1]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_2'), 80 | SeperableConv2d_2(in_channels=backbone.layers[source_layer_indexes[2]-1]['out_c'], out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_3'), 81 | SeperableConv2d_2(in_channels=1024, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_4'), 82 | SeperableConv2d_2(in_channels=512, out_channels=num_boxes * num_classes, kernel_size=3, stride=1, padding=1, name='cls_5'), 83 | Conv_2d(in_channels=512, out_channels=num_boxes * num_classes, kernel_size=1, stride=1, padding=0, name='cls_6'), 84 | ]) 85 | 86 | return source_layer_indexes, extras, classification_headers, regression_headers 87 | 88 | -------------------------------------------------------------------------------- /core/datasets/open_images.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pathlib 3 | import cv2 4 | import pandas as pd 5 | import copy 6 | 7 | class OpenImagesDataset: 8 | 9 | def __init__(self, root, 10 | transform=None, target_transform=None, 11 | dataset_type="train", balance_data=False): 12 | self.root = pathlib.Path(root) 13 | self.transform = transform 14 | self.target_transform = target_transform 15 | self.dataset_type = dataset_type.lower() 16 | 17 | self.data, self.class_names, self.class_dict = self._read_data() 18 | self.balance_data = balance_data 19 | self.min_image_num = -1 20 | if self.balance_data: 21 | self.data = self._balance_data() 22 | self.ids = [info['image_id'] for info in self.data] 23 | 24 | self.class_stat = None 25 | 26 | def _getitem(self, index): 27 | image_info = self.data[index] 28 | image = self._read_image(image_info['image_id']) 29 | # duplicate boxes to prevent corruption of dataset 30 | boxes = copy.copy(image_info['boxes']) 31 | boxes[:, 0] *= image.shape[1] 32 | boxes[:, 1] *= image.shape[0] 33 | boxes[:, 2] *= image.shape[1] 34 | boxes[:, 3] *= image.shape[0] 35 | # duplicate labels to prevent corruption of dataset 36 | labels = copy.copy(image_info['labels']) 37 | if self.transform: 38 | image, boxes, labels = self.transform(image, boxes, labels) 39 | if self.target_transform: 40 | boxes, labels = self.target_transform(boxes, labels) 41 | return image_info['image_id'], image, boxes, labels 42 | 43 | def __getitem__(self, index): 44 | _, image, boxes, labels = self._getitem(index) 45 | return image, boxes, labels 46 | 47 | def get_annotation(self, index): 48 | """To conform the eval_ssd implementation that is based on the VOC dataset.""" 49 | image_id, image, boxes, labels = self._getitem(index) 50 | is_difficult = np.zeros(boxes.shape[0], dtype=np.uint8) 51 | return image_id, (boxes, labels, is_difficult) 52 | 53 | def get_image(self, index): 54 | image_info = self.data[index] 55 | image = self._read_image(image_info['image_id']) 56 | if self.transform: 57 | image, _ = self.transform(image) 58 | return image 59 | 60 | def _read_data(self): 61 | annotation_file = f"{self.root}/sub-{self.dataset_type}-annotations-bbox.csv" 62 | annotations = pd.read_csv(annotation_file) 63 | class_names = ['BACKGROUND'] + sorted(list(annotations['ClassName'].unique())) 64 | class_dict = {class_name: i for i, class_name in enumerate(class_names)} 65 | data = [] 66 | for image_id, group in annotations.groupby("ImageID"): 67 | boxes = group.loc[:, ["XMin", "YMin", "XMax", "YMax"]].values.astype(np.float32) 68 | # make labels 64 bits to satisfy the cross_entropy function 69 | labels = np.array([class_dict[name] for name in group["ClassName"]], dtype='int64') 70 | data.append({ 71 | 'image_id': image_id, 72 | 'boxes': boxes, 73 | 'labels': labels 74 | }) 75 | return data, class_names, class_dict 76 | 77 | def __len__(self): 78 | return len(self.data) 79 | 80 | def __repr__(self): 81 | if self.class_stat is None: 82 | self.class_stat = {name: 0 for name in self.class_names[1:]} 83 | for example in self.data: 84 | for class_index in example['labels']: 85 | class_name = self.class_names[class_index] 86 | self.class_stat[class_name] += 1 87 | content = ["Dataset Summary:" 88 | f"Number of Images: {len(self.data)}", 89 | f"Minimum Number of Images for a Class: {self.min_image_num}", 90 | "Label Distribution:"] 91 | for class_name, num in self.class_stat.items(): 92 | content.append(f"\t{class_name}: {num}") 93 | return "\n".join(content) 94 | 95 | def _read_image(self, image_id): 96 | image_file = self.root / self.dataset_type / f"{image_id}.jpg" 97 | image = cv2.imread(str(image_file)) 98 | if image.shape[2] == 1: 99 | image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) 100 | else: 101 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 102 | return image 103 | 104 | def _balance_data(self): 105 | label_image_indexes = [set() for _ in range(len(self.class_names))] 106 | for i, image in enumerate(self.data): 107 | for label_id in image['labels']: 108 | label_image_indexes[label_id].add(i) 109 | label_stat = [len(s) for s in label_image_indexes] 110 | self.min_image_num = min(label_stat[1:]) 111 | sample_image_indexes = set() 112 | for image_indexes in label_image_indexes[1:]: 113 | image_indexes = np.array(list(image_indexes)) 114 | sub = np.random.permutation(image_indexes)[:self.min_image_num] 115 | sample_image_indexes.update(sub) 116 | sample_data = [self.data[i] for i in sample_image_indexes] 117 | return sample_data 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /example/run_ssd_example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os,sys 3 | import pathlib 4 | import logging 5 | import itertools 6 | import configparser 7 | import torch 8 | import numpy as np 9 | sys.path.append(os.getcwd()) 10 | 11 | from torch.utils.data import DataLoader, ConcatDataset 12 | from torch.optim.lr_scheduler import CosineAnnealingLR, MultiStepLR 13 | from core.utils.box_utils_zq import SSDSpec, SSDBoxSizes, generate_ssd_priors 14 | import core.utils.box_utils_zq as box_utils 15 | from core.utils import measurements 16 | from core.utils.misc import str2bool, Timer, freeze_net_layers, store_labels 17 | from core.ssd import MatchPrior 18 | from core.ssd_creator import create_ssd, create_ssd_predictor 19 | from core.datasets.voc_dataset import VOCDataset 20 | from core.datasets.open_images import OpenImagesDataset 21 | from core.multibox_loss import MultiboxLoss 22 | from core.data_preprocessing import TrainAugmentation, TestTransform 23 | from core.utils.misc import Timer 24 | import cv2 25 | import sys 26 | 27 | 28 | if len(sys.argv) < 5: 29 | print('Usage: python run_ssd_example.py