├── vision ├── utils │ ├── __init__.py │ ├── misc.py │ ├── measurements.py │ ├── model_book.py │ ├── box_utils_numpy.py │ └── box_utils.py ├── test │ ├── assets │ │ └── 000138.jpg │ └── test_vgg_ssd.py ├── ssd │ ├── config │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-36.pyc │ │ │ ├── vgg_ssd_config.cpython-36.pyc │ │ │ ├── squeezenet_ssd_config.cpython-36.pyc │ │ │ └── mobilenetv1_ssd_config.cpython-36.pyc │ │ ├── vgg_ssd_config.py │ │ ├── mobilenetv1_ssd_config.py │ │ ├── mobilenetv3_ssd_config.py │ │ └── squeezenet_ssd_config.py │ ├── data_preprocessing.py │ ├── predictor.py │ ├── mobilenetv1_ssd.py │ ├── vgg_ssd.py │ ├── fpn_mobilenetv1_ssd.py │ ├── mobilenet_v2_ssd_lite.py │ ├── mobilenetv1_ssd_lite.py │ ├── squeezenet_ssd_lite.py │ ├── mobilenet_v3_ssd_lite.py │ ├── fpn_ssd.py │ └── ssd.py ├── nn │ ├── scaled_l2_norm.py │ ├── vgg.py │ ├── mobilenet.py │ ├── multibox_loss.py │ ├── alexnet.py │ ├── squeezenet.py │ ├── mobilenet_v2.py │ └── mobilenet_v3.py ├── datasets │ ├── collation.py │ ├── generate_vocdata.py │ ├── open_images.py │ └── voc_dataset.py ├── prunning │ └── prunner.py └── transforms │ └── transforms.py ├── models └── open-images-model-labels.txt ├── visual_tf_models.py ├── extract_tf_weights.py ├── draw_eval_results.py ├── convert_to_caffe2_models.py ├── README.md ├── run_ssd_example.py ├── translate_tf_mobilenetv1.py ├── run_ssd_live_demo.py ├── run_ssd_live_caffe2.py ├── open_images_downloader.py ├── eval_ssd.py ├── prune_alexnet.py └── train_ssd.py /vision/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .misc import * 2 | -------------------------------------------------------------------------------- /models/open-images-model-labels.txt: -------------------------------------------------------------------------------- 1 | BACKGROUND 2 | Bread 3 | Cake -------------------------------------------------------------------------------- /vision/test/assets/000138.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoshengsong/MobileNetV3-SSD/HEAD/vision/test/assets/000138.jpg -------------------------------------------------------------------------------- /vision/ssd/config/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoshengsong/MobileNetV3-SSD/HEAD/vision/ssd/config/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /vision/ssd/config/__pycache__/vgg_ssd_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoshengsong/MobileNetV3-SSD/HEAD/vision/ssd/config/__pycache__/vgg_ssd_config.cpython-36.pyc -------------------------------------------------------------------------------- /vision/ssd/config/__pycache__/squeezenet_ssd_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoshengsong/MobileNetV3-SSD/HEAD/vision/ssd/config/__pycache__/squeezenet_ssd_config.cpython-36.pyc -------------------------------------------------------------------------------- /vision/ssd/config/__pycache__/mobilenetv1_ssd_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoshengsong/MobileNetV3-SSD/HEAD/vision/ssd/config/__pycache__/mobilenetv1_ssd_config.cpython-36.pyc -------------------------------------------------------------------------------- /visual_tf_models.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.platform import gfile 3 | import sys 4 | import time 5 | 6 | if len(sys.argv) < 2: 7 | print("Usage: python visual_tf_model.py ") 8 | sys.exit(0) 9 | 10 | model_file_name = sys.argv[1] 11 | with tf.Session() as sess: 12 | with gfile.FastGFile(model_file_name, 'rb') as f: 13 | graph_def = tf.GraphDef() 14 | graph_def.ParseFromString(f.read()) 15 | g_in = tf.import_graph_def(graph_def) 16 | LOGDIR='log' 17 | train_writer = tf.summary.FileWriter(LOGDIR) 18 | train_writer.add_graph(sess.graph) 19 | 20 | while True: 21 | time.sleep(1000) -------------------------------------------------------------------------------- /vision/nn/scaled_l2_norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class ScaledL2Norm(nn.Module): 7 | def __init__(self, in_channels, initial_scale): 8 | super(ScaledL2Norm, self).__init__() 9 | self.in_channels = in_channels 10 | self.scale = nn.Parameter(torch.Tensor(in_channels)) 11 | self.initial_scale = initial_scale 12 | self.reset_parameters() 13 | 14 | def forward(self, x): 15 | return (F.normalize(x, p=2, dim=1) 16 | * self.scale.unsqueeze(0).unsqueeze(2).unsqueeze(3)) 17 | 18 | def reset_parameters(self): 19 | self.scale.data.fill_(self.initial_scale) -------------------------------------------------------------------------------- /vision/ssd/config/vgg_ssd_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors 4 | 5 | 6 | image_size = 300 7 | image_mean = np.array([123, 117, 104]) # RGB layout 8 | image_std = 1.0 9 | 10 | iou_threshold = 0.45 11 | center_variance = 0.1 12 | size_variance = 0.2 13 | 14 | specs = [ 15 | SSDSpec(38, 8, SSDBoxSizes(30, 60), [2]), 16 | SSDSpec(19, 16, SSDBoxSizes(60, 111), [2, 3]), 17 | SSDSpec(10, 32, SSDBoxSizes(111, 162), [2, 3]), 18 | SSDSpec(5, 64, SSDBoxSizes(162, 213), [2, 3]), 19 | SSDSpec(3, 100, SSDBoxSizes(213, 264), [2]), 20 | SSDSpec(1, 300, SSDBoxSizes(264, 315), [2]) 21 | ] 22 | 23 | 24 | priors = generate_ssd_priors(specs, image_size) -------------------------------------------------------------------------------- /vision/ssd/config/mobilenetv1_ssd_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors 4 | 5 | 6 | image_size = 300 7 | image_mean = np.array([127, 127, 127]) # RGB layout 8 | image_std = 128.0 9 | iou_threshold = 0.45 10 | center_variance = 0.1 11 | size_variance = 0.2 12 | 13 | specs = [ 14 | SSDSpec(19, 16, SSDBoxSizes(60, 105), [2, 3]), 15 | SSDSpec(10, 32, SSDBoxSizes(105, 150), [2, 3]), 16 | SSDSpec(5, 64, SSDBoxSizes(150, 195), [2, 3]), 17 | SSDSpec(3, 100, SSDBoxSizes(195, 240), [2, 3]), 18 | SSDSpec(2, 150, SSDBoxSizes(240, 285), [2, 3]), 19 | SSDSpec(1, 300, SSDBoxSizes(285, 330), [2, 3]) 20 | ] 21 | 22 | 23 | priors = generate_ssd_priors(specs, image_size) -------------------------------------------------------------------------------- /vision/ssd/config/mobilenetv3_ssd_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors 4 | 5 | 6 | image_size = 300 7 | image_mean = np.array([127, 127, 127]) # RGB layout 8 | image_std = 128.0 9 | iou_threshold = 0.45 10 | center_variance = 0.1 11 | size_variance = 0.2 12 | 13 | specs = [ 14 | SSDSpec(19, 16, SSDBoxSizes(60, 105), [2, 3]), 15 | SSDSpec(10, 32, SSDBoxSizes(105, 150), [2, 3]), 16 | SSDSpec(5, 64, SSDBoxSizes(150, 195), [2, 3]), 17 | SSDSpec(3, 100, SSDBoxSizes(195, 240), [2, 3]), 18 | SSDSpec(2, 150, SSDBoxSizes(240, 285), [2, 3]), 19 | SSDSpec(1, 300, SSDBoxSizes(285, 330), [2, 3]) 20 | ] 21 | 22 | 23 | priors = generate_ssd_priors(specs, image_size) -------------------------------------------------------------------------------- /vision/ssd/config/squeezenet_ssd_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from vision.utils.box_utils import SSDSpec, SSDBoxSizes, generate_ssd_priors 4 | 5 | 6 | image_size = 300 7 | image_mean = np.array([127, 127, 127]) # RGB layout 8 | image_std = 128.0 9 | iou_threshold = 0.45 10 | center_variance = 0.1 11 | size_variance = 0.2 12 | 13 | specs = [ 14 | SSDSpec(17, 16, SSDBoxSizes(60, 105), [2, 3]), 15 | SSDSpec(10, 32, SSDBoxSizes(105, 150), [2, 3]), 16 | SSDSpec(5, 64, SSDBoxSizes(150, 195), [2, 3]), 17 | SSDSpec(3, 100, SSDBoxSizes(195, 240), [2, 3]), 18 | SSDSpec(2, 150, SSDBoxSizes(240, 285), [2, 3]), 19 | SSDSpec(1, 300, SSDBoxSizes(285, 330), [2, 3]) 20 | ] 21 | 22 | 23 | priors = generate_ssd_priors(specs, image_size) -------------------------------------------------------------------------------- /vision/nn/vgg.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | # borrowed from https://github.com/amdegroot/ssd.pytorch/blob/master/ssd.py 5 | def vgg(cfg, batch_norm=False): 6 | layers = [] 7 | in_channels = 3 8 | for v in cfg: 9 | if v == 'M': 10 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 11 | elif v == 'C': 12 | layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] 13 | else: 14 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 15 | if batch_norm: 16 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 17 | else: 18 | layers += [conv2d, nn.ReLU(inplace=True)] 19 | in_channels = v 20 | pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1) 21 | conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6) 22 | conv7 = nn.Conv2d(1024, 1024, kernel_size=1) 23 | layers += [pool5, conv6, 24 | nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)] 25 | return layers -------------------------------------------------------------------------------- /extract_tf_weights.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.platform import gfile 3 | from tensorflow.python.framework import tensor_util 4 | import sys 5 | import pickle 6 | 7 | 8 | def read_weights(frozen_model): 9 | weights = {} 10 | with tf.Session() as sess: 11 | with gfile.FastGFile(frozen_model, 'rb') as f: 12 | graph_def = tf.GraphDef() 13 | graph_def.ParseFromString(f.read()) 14 | tf.import_graph_def(graph_def) 15 | for n in graph_def.node: 16 | if n.op == 'Const': 17 | weights[n.name] = tensor_util.MakeNdarray(n.attr['value'].tensor) 18 | print("Name:", n.name, "Shape:", weights[n.name].shape) 19 | return weights 20 | 21 | 22 | if len(sys.argv) < 3: 23 | print("Usage: python extract_tf_weights.py ") 24 | 25 | frozen_model = sys.argv[1] 26 | weights_file = sys.argv[2] 27 | 28 | weights = read_weights(frozen_model) 29 | with open(weights_file, "wb") as f: 30 | pickle.dump(weights, f) 31 | print(f"Saved weights to {weights_file}.") -------------------------------------------------------------------------------- /vision/datasets/collation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def object_detection_collate(batch): 6 | images = [] 7 | gt_boxes = [] 8 | gt_labels = [] 9 | image_type = type(batch[0][0]) 10 | box_type = type(batch[0][1]) 11 | label_type = type(batch[0][2]) 12 | for image, boxes, labels in batch: 13 | if image_type is np.ndarray: 14 | images.append(torch.from_numpy(image)) 15 | elif image_type is torch.Tensor: 16 | images.append(image) 17 | else: 18 | raise TypeError(f"Image should be tensor or np.ndarray, but got {image_type}.") 19 | if box_type is np.ndarray: 20 | gt_boxes.append(torch.from_numpy(boxes)) 21 | elif box_type is torch.Tensor: 22 | gt_boxes.append(boxes) 23 | else: 24 | raise TypeError(f"Boxes should be tensor or np.ndarray, but got {box_type}.") 25 | if label_type is np.ndarray: 26 | gt_labels.append(torch.from_numpy(labels)) 27 | elif label_type is torch.Tensor: 28 | gt_labels.append(labels) 29 | else: 30 | raise TypeError(f"Labels should be tensor or np.ndarray, but got {label_type}.") 31 | return torch.stack(images), gt_boxes, gt_labels -------------------------------------------------------------------------------- /draw_eval_results.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cv2 3 | import pandas as pd 4 | import os 5 | 6 | eval_result_file = sys.argv[1] 7 | image_dir = sys.argv[2] 8 | output_dir = sys.argv[3] 9 | threshold = float(sys.argv[4]) 10 | 11 | if not os.path.exists(output_dir): 12 | os.mkdir(output_dir) 13 | 14 | r = pd.read_csv(eval_result_file, delimiter=" ", names=["ImageID", "Prob", "x1", "y1", "x2", "y2"]) 15 | r['x1'] = r['x1'].astype(int) 16 | r['y1'] = r['y1'].astype(int) 17 | r['x2'] = r['x2'].astype(int) 18 | r['y2'] = r['y2'].astype(int) 19 | 20 | 21 | for image_id, g in r.groupby('ImageID'): 22 | image = cv2.imread(os.path.join(image_dir, image_id + ".jpg")) 23 | for row in g.itertuples(): 24 | if row.Prob < threshold: 25 | continue 26 | cv2.rectangle(image, (row.x1, row.y1), (row.x2, row.y2), (255, 255, 0), 4) 27 | label = f"{row.Prob:.2f}" 28 | cv2.putText(image, label, 29 | (row.x1 + 20, row.y1 + 40), 30 | cv2.FONT_HERSHEY_SIMPLEX, 31 | 1, # font scale 32 | (255, 0, 255), 33 | 2) # line type 34 | cv2.imwrite(os.path.join(output_dir, image_id + ".jpg"), image) 35 | print(f"Task Done. Processed {r.shape[0]} bounding boxes.") -------------------------------------------------------------------------------- /vision/utils/misc.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | 4 | 5 | def str2bool(s): 6 | return s.lower() in ('true', '1') 7 | 8 | 9 | class Timer: 10 | def __init__(self): 11 | self.clock = {} 12 | 13 | def start(self, key="default"): 14 | self.clock[key] = time.time() 15 | 16 | def end(self, key="default"): 17 | if key not in self.clock: 18 | raise Exception(f"{key} is not in the clock.") 19 | interval = time.time() - self.clock[key] 20 | del self.clock[key] 21 | return interval 22 | 23 | 24 | def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): 25 | torch.save({ 26 | 'epoch': epoch, 27 | 'model': net_state_dict, 28 | 'optimizer': optimizer_state_dict, 29 | 'best_score': best_score 30 | }, checkpoint_path) 31 | torch.save(net_state_dict, model_path) 32 | 33 | 34 | def load_checkpoint(checkpoint_path): 35 | return torch.load(checkpoint_path) 36 | 37 | 38 | def freeze_net_layers(net): 39 | for param in net.parameters(): 40 | param.requires_grad = False 41 | 42 | 43 | def store_labels(path, labels): 44 | with open(path, "w") as f: 45 | f.write("\n".join(labels)) 46 | -------------------------------------------------------------------------------- /vision/utils/measurements.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def compute_average_precision(precision, recall): 5 | """ 6 | It computes average precision based on the definition of Pascal Competition. It computes the under curve area 7 | of precision and recall. Recall follows the normal definition. Precision is a variant. 8 | pascal_precision[i] = typical_precision[i:].max() 9 | """ 10 | # identical but faster version of new_precision[i] = old_precision[i:].max() 11 | precision = np.concatenate([[0.0], precision, [0.0]]) 12 | for i in range(len(precision) - 1, 0, -1): 13 | precision[i - 1] = np.maximum(precision[i - 1], precision[i]) 14 | 15 | # find the index where the value changes 16 | recall = np.concatenate([[0.0], recall, [1.0]]) 17 | changing_points = np.where(recall[1:] != recall[:-1])[0] 18 | 19 | # compute under curve area 20 | areas = (recall[changing_points + 1] - recall[changing_points]) * precision[changing_points + 1] 21 | return areas.sum() 22 | 23 | 24 | def compute_voc2007_average_precision(precision, recall): 25 | ap = 0. 26 | for t in np.arange(0., 1.1, 0.1): 27 | if np.sum(recall >= t) == 0: 28 | p = 0 29 | else: 30 | p = np.max(precision[recall >= t]) 31 | ap = ap + p / 11. 32 | return ap 33 | -------------------------------------------------------------------------------- /vision/test/test_vgg_ssd.py: -------------------------------------------------------------------------------- 1 | from ..ssd.vgg_ssd import create_vgg_ssd 2 | 3 | import torch 4 | import tempfile 5 | 6 | 7 | def test_create_vgg_ssd(): 8 | for num_classes in [2, 10, 21, 100]: 9 | _ = create_vgg_ssd(num_classes) 10 | 11 | 12 | def test_forward(): 13 | for num_classes in [2]: 14 | net = create_vgg_ssd(num_classes) 15 | net.init() 16 | net.eval() 17 | x = torch.randn(2, 3, 300, 300) 18 | confidences, locations = net.forward(x) 19 | assert confidences.size() == torch.Size([2, 8732, num_classes]) 20 | assert locations.size() == torch.Size([2, 8732, 4]) 21 | assert confidences.nonzero().size(0) != 0 22 | assert locations.nonzero().size(0) != 0 23 | 24 | 25 | def test_save_model(): 26 | net = create_vgg_ssd(10) 27 | net.init() 28 | with tempfile.TemporaryFile() as f: 29 | net.save(f) 30 | 31 | 32 | def test_save_load_model_consistency(): 33 | net = create_vgg_ssd(20) 34 | net.init() 35 | model_path = tempfile.NamedTemporaryFile().name 36 | net.save(model_path) 37 | net_copy = create_vgg_ssd(20) 38 | net_copy.load(model_path) 39 | 40 | net.eval() 41 | net_copy.eval() 42 | 43 | for _ in range(1): 44 | x = torch.randn(1, 3, 300, 300) 45 | confidences1, locations1 = net.forward(x) 46 | confidences2, locations2 = net_copy.forward(x) 47 | assert (confidences1 == confidences2).long().sum() == confidences2.numel() 48 | assert (locations1 == locations2).long().sum() == locations2.numel() 49 | -------------------------------------------------------------------------------- /vision/nn/mobilenet.py: -------------------------------------------------------------------------------- 1 | # borrowed from "https://github.com/marvis/pytorch-mobilenet" 2 | 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class MobileNetV1(nn.Module): 8 | def __init__(self, num_classes=1024): 9 | super(MobileNetV1, self).__init__() 10 | 11 | def conv_bn(inp, oup, stride): 12 | return nn.Sequential( 13 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 14 | nn.BatchNorm2d(oup), 15 | nn.ReLU(inplace=True) 16 | ) 17 | 18 | def conv_dw(inp, oup, stride): 19 | return nn.Sequential( 20 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 21 | nn.BatchNorm2d(inp), 22 | nn.ReLU(inplace=True), 23 | 24 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 25 | nn.BatchNorm2d(oup), 26 | nn.ReLU(inplace=True), 27 | ) 28 | 29 | self.model = nn.Sequential( 30 | conv_bn(3, 32, 2), 31 | conv_dw(32, 64, 1), 32 | conv_dw(64, 128, 2), 33 | conv_dw(128, 128, 1), 34 | conv_dw(128, 256, 2), 35 | conv_dw(256, 256, 1), 36 | conv_dw(256, 512, 2), 37 | conv_dw(512, 512, 1), 38 | conv_dw(512, 512, 1), 39 | conv_dw(512, 512, 1), 40 | conv_dw(512, 512, 1), 41 | conv_dw(512, 512, 1), 42 | conv_dw(512, 1024, 2), 43 | conv_dw(1024, 1024, 1), 44 | ) 45 | self.fc = nn.Linear(1024, num_classes) 46 | 47 | def forward(self, x): 48 | x = self.model(x) 49 | x = F.avg_pool2d(x, 7) 50 | x = x.view(-1, 1024) 51 | x = self.fc(x) 52 | return x -------------------------------------------------------------------------------- /vision/ssd/data_preprocessing.py: -------------------------------------------------------------------------------- 1 | from ..transforms.transforms import * 2 | 3 | 4 | class TrainAugmentation: 5 | def __init__(self, size, mean=0, std=1.0): 6 | """ 7 | Args: 8 | size: the size the of final image. 9 | mean: mean pixel value per channel. 10 | """ 11 | self.mean = mean 12 | self.size = size 13 | self.augment = Compose([ 14 | ConvertFromInts(), 15 | PhotometricDistort(), 16 | Expand(self.mean), 17 | RandomSampleCrop(), 18 | RandomMirror(), 19 | ToPercentCoords(), 20 | Resize(self.size), 21 | SubtractMeans(self.mean), 22 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 23 | ToTensor(), 24 | ]) 25 | 26 | def __call__(self, img, boxes, labels): 27 | """ 28 | 29 | Args: 30 | img: the output of cv.imread in RGB layout. 31 | boxes: boundding boxes in the form of (x1, y1, x2, y2). 32 | labels: labels of boxes. 33 | """ 34 | return self.augment(img, boxes, labels) 35 | 36 | 37 | class TestTransform: 38 | def __init__(self, size, mean=0.0, std=1.0): 39 | self.transform = Compose([ 40 | ToPercentCoords(), 41 | Resize(size), 42 | SubtractMeans(mean), 43 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 44 | ToTensor(), 45 | ]) 46 | 47 | def __call__(self, image, boxes, labels): 48 | return self.transform(image, boxes, labels) 49 | 50 | 51 | class PredictionTransform: 52 | def __init__(self, size, mean=0.0, std=1.0): 53 | self.transform = Compose([ 54 | Resize(size), 55 | SubtractMeans(mean), 56 | lambda img, boxes=None, labels=None: (img / std, boxes, labels), 57 | ToTensor() 58 | ]) 59 | 60 | def __call__(self, image): 61 | image, _, _ = self.transform(image) 62 | return image -------------------------------------------------------------------------------- /vision/nn/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import torch 4 | 5 | 6 | from ..utils import box_utils 7 | 8 | 9 | class MultiboxLoss(nn.Module): 10 | def __init__(self, priors, iou_threshold, neg_pos_ratio, 11 | center_variance, size_variance, device): 12 | """Implement SSD Multibox Loss. 13 | 14 | Basically, Multibox loss combines classification loss 15 | and Smooth L1 regression loss. 16 | """ 17 | super(MultiboxLoss, self).__init__() 18 | self.iou_threshold = iou_threshold 19 | self.neg_pos_ratio = neg_pos_ratio 20 | self.center_variance = center_variance 21 | self.size_variance = size_variance 22 | self.priors = priors 23 | self.priors.to(device) 24 | 25 | def forward(self, confidence, predicted_locations, labels, gt_locations): 26 | """Compute classification loss and smooth l1 loss. 27 | 28 | Args: 29 | confidence (batch_size, num_priors, num_classes): class predictions. 30 | locations (batch_size, num_priors, 4): predicted locations. 31 | labels (batch_size, num_priors): real labels of all the priors. 32 | boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. 33 | """ 34 | num_classes = confidence.size(2) 35 | with torch.no_grad(): 36 | # derived from cross_entropy=sum(log(p)) 37 | loss = -F.log_softmax(confidence, dim=2)[:, :, 0] 38 | mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) 39 | 40 | confidence = confidence[mask, :] 41 | classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], size_average=False) 42 | pos_mask = labels > 0 43 | predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4) 44 | gt_locations = gt_locations[pos_mask, :].reshape(-1, 4) 45 | smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, size_average=False) 46 | num_pos = gt_locations.size(0) 47 | return smooth_l1_loss/num_pos, classification_loss/num_pos 48 | -------------------------------------------------------------------------------- /vision/nn/alexnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.utils.model_zoo as model_zoo 3 | 4 | # copied from torchvision (https://github.com/pytorch/vision/blob/master/torchvision/models/alexnet.py). 5 | # The forward function is modified for model pruning. 6 | 7 | __all__ = ['AlexNet', 'alexnet'] 8 | 9 | 10 | model_urls = { 11 | 'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth', 12 | } 13 | 14 | 15 | class AlexNet(nn.Module): 16 | 17 | def __init__(self, num_classes=1000): 18 | super(AlexNet, self).__init__() 19 | self.features = nn.Sequential( 20 | nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), 21 | nn.ReLU(inplace=True), 22 | nn.MaxPool2d(kernel_size=3, stride=2), 23 | nn.Conv2d(64, 192, kernel_size=5, padding=2), 24 | nn.ReLU(inplace=True), 25 | nn.MaxPool2d(kernel_size=3, stride=2), 26 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 29 | nn.ReLU(inplace=True), 30 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 31 | nn.ReLU(inplace=True), 32 | nn.MaxPool2d(kernel_size=3, stride=2), 33 | ) 34 | self.classifier = nn.Sequential( 35 | nn.Dropout(), 36 | nn.Linear(256 * 6 * 6, 4096), 37 | nn.ReLU(inplace=True), 38 | nn.Dropout(), 39 | nn.Linear(4096, 4096), 40 | nn.ReLU(inplace=True), 41 | nn.Linear(4096, num_classes), 42 | ) 43 | 44 | def forward(self, x): 45 | x = self.features(x) 46 | x = x.view(x.size(0), -1) 47 | x = self.classifier(x) 48 | return x 49 | 50 | 51 | def alexnet(pretrained=False, **kwargs): 52 | r"""AlexNet model architecture from the 53 | `"One weird trick..." `_ paper. 54 | 55 | Args: 56 | pretrained (bool): If True, returns a model pre-trained on ImageNet 57 | """ 58 | model = AlexNet(**kwargs) 59 | if pretrained: 60 | model.load_state_dict(model_zoo.load_url(model_urls['alexnet'])) 61 | return model -------------------------------------------------------------------------------- /convert_to_caffe2_models.py: -------------------------------------------------------------------------------- 1 | from vision.ssd.vgg_ssd import create_vgg_ssd 2 | from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd 3 | from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite 4 | from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite 5 | from vision.ssd.mobilenet_v3_ssd_lite import create_mobilenetv3_ssd_lite 6 | import sys 7 | import torch.onnx 8 | #from caffe2.python.onnx.backend import Caffe2Backend as c2 9 | #import onnx 10 | 11 | 12 | if len(sys.argv) < 3: 13 | print('Usage: python convert_to_caffe2_models.py ') 14 | sys.exit(0) 15 | net_type = sys.argv[1] 16 | model_path = sys.argv[2] 17 | 18 | label_path = sys.argv[3] 19 | 20 | class_names = [name.strip() for name in open(label_path).readlines()] 21 | num_classes = len(class_names) 22 | 23 | if net_type == 'vgg16-ssd': 24 | net = create_vgg_ssd(len(class_names), is_test=True) 25 | elif net_type == 'mb1-ssd': 26 | net = create_mobilenetv1_ssd(len(class_names), is_test=True) 27 | elif net_type == 'mb1-ssd-lite': 28 | net = create_mobilenetv1_ssd_lite(len(class_names), is_test=True) 29 | elif net_type == 'sq-ssd-lite': 30 | net = create_squeezenet_ssd_lite(len(class_names), is_test=True) 31 | elif net_type == 'mb3-ssd-lite': 32 | net = create_mobilenetv3_ssd_lite(len(class_names), is_test=True) 33 | else: 34 | print("The net type is wrong. It should be one of vgg16-ssd, mb1-ssd and mb1-ssd-lite.") 35 | sys.exit(1) 36 | net.load(model_path) 37 | net.eval() 38 | 39 | model_path = f"models/{net_type}.onnx" 40 | init_net_path = f"models/{net_type}_init_net.pb" 41 | init_net_txt_path = f"models/{net_type}_init_net.pbtxt" 42 | predict_net_path = f"models/{net_type}_predict_net.pb" 43 | predict_net_txt_path = f"models/{net_type}_predict_net.pbtxt" 44 | 45 | dummy_input = torch.randn(1, 3, 300, 300) 46 | torch.onnx.export(net, dummy_input, model_path, verbose=False, output_names=['scores', 'boxes']) 47 | ''' 48 | model = onnx.load(model_path) 49 | init_net, predict_net = c2.onnx_graph_to_caffe2_net(model) 50 | 51 | print(f"Save the model in binary format to the files {init_net_path} and {predict_net_path}.") 52 | 53 | with open(init_net_path, "wb") as fopen: 54 | fopen.write(init_net.SerializeToString()) 55 | with open(predict_net_path, "wb") as fopen: 56 | fopen.write(predict_net.SerializeToString()) 57 | 58 | print(f"Save the model in txt format to the files {init_net_txt_path} and {predict_net_txt_path}. ") 59 | with open(init_net_txt_path, 'w') as f: 60 | f.write(str(init_net)) 61 | 62 | with open(predict_net_txt_path, 'w') as f: 63 | f.write(str(predict_net)) 64 | ''' 65 | -------------------------------------------------------------------------------- /vision/utils/model_book.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch.nn as nn 3 | 4 | 5 | class ModelBook: 6 | """Maintain the mapping between modules and their paths. 7 | 8 | Example: 9 | book = ModelBook(model_ft) 10 | for p, m in book.conv2d_modules(): 11 | print('path:', p, 'num of filters:', m.out_channels) 12 | assert m is book.get_module(p) 13 | """ 14 | 15 | def __init__(self, model): 16 | self._model = model 17 | self._modules = OrderedDict() 18 | self._paths = OrderedDict() 19 | path = [] 20 | self._construct(self._model, path) 21 | 22 | def _construct(self, module, path): 23 | if not module._modules: 24 | return 25 | for name, m in module._modules.items(): 26 | cur_path = tuple(path + [name]) 27 | self._paths[m] = cur_path 28 | self._modules[cur_path] = m 29 | self._construct(m, path + [name]) 30 | 31 | def conv2d_modules(self): 32 | return self.modules(nn.Conv2d) 33 | 34 | def linear_modules(self): 35 | return self.modules(nn.Linear) 36 | 37 | def modules(self, module_type=None): 38 | for p, m in self._modules.items(): 39 | if not module_type or isinstance(m, module_type): 40 | yield p, m 41 | 42 | def num_of_conv2d_modules(self): 43 | return self.num_of_modules(nn.Conv2d) 44 | 45 | def num_of_conv2d_filters(self): 46 | """Return the sum of out_channels of all conv2d layers. 47 | 48 | Here we treat the sub weight with size of [in_channels, h, w] as a single filter. 49 | """ 50 | num_filters = 0 51 | for _, m in self.conv2d_modules(): 52 | num_filters += m.out_channels 53 | return num_filters 54 | 55 | def num_of_linear_modules(self): 56 | return self.num_of_modules(nn.Linear) 57 | 58 | def num_of_linear_filters(self): 59 | num_filters = 0 60 | for _, m in self.linear_modules(): 61 | num_filters += m.out_features 62 | return num_filters 63 | 64 | def num_of_modules(self, module_type=None): 65 | num = 0 66 | for p, m in self._modules.items(): 67 | if not module_type or isinstance(m, module_type): 68 | num += 1 69 | return num 70 | 71 | def get_module(self, path): 72 | return self._modules.get(path) 73 | 74 | def get_path(self, module): 75 | return self._paths.get(module) 76 | 77 | def update(self, path, module): 78 | old_module = self._modules[path] 79 | del self._paths[old_module] 80 | self._paths[module] = path 81 | self._modules[path] = module 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MobileNetV3-SSD 2 | 3 | 4 | MobileNetV3-SSD implementation in PyTorch 5 | 6 | 关于第二个版本请移步 https://github.com/shaoshengsong/MobileNetV3-SSD-Compact-Version 7 | 有测试结果 8 | 希望尝试新技术请到这里 https://github.com/shaoshengsong/quarkdet 9 | 一个轻量级的目标检测包括多种模型 10 | **目的** 11 | Object Detection 12 | 应用于目标检测 13 | 14 | 环境 15 | 16 | 操作系统: Ubuntu18.04 17 | 18 | Python: 3.6 19 | 20 | PyTorch: 1.1.0 21 | 22 | 23 | **使用MobileNetV3-SSD实现目标检测** 24 | 25 | **Support Export ONNX** 26 | 27 | 代码参考(严重参考以下代码) 28 | 29 | 30 | **一 SSD部分** 31 | 32 | 33 | [A PyTorch Implementation of Single Shot MultiBox Detector ](https://github.com/amdegroot/ssd.pytorch) 34 | 35 | **二 MobileNetV3 部分** 36 | 37 | 38 | 39 | [1 mobilenetv3 with pytorch,provide pre-train model](https://github.com/xiaolai-sqlai/mobilenetv3) 40 | 41 | 42 | [2 MobileNetV3 in pytorch and ImageNet pretrained models ](https://github.com/kuan-wang/pytorch-mobilenet-v3) 43 | 44 | 45 | [3Implementing Searching for MobileNetV3 paper using Pytorch ](https://github.com/leaderj1001/MobileNetV3-Pytorch) 46 | 47 | 48 | [4 MobileNetV1, MobileNetV2, VGG based SSD/SSD-lite implementation in Pytorch 1.0 / Pytorch 0.4. Out-of-box support for retraining on Open Images dataset. ONNX and Caffe2 support. Experiment Ideas like CoordConv. 49 | no discernible latency cost](https://github.com/qfgaohao/pytorch-ssd). 50 | 51 | 52 | 针对4我这里没有做MobileNetV1, MobileNetV2等等代码兼容,只有MobileNetV3可用 53 | 54 | **下载数据** 55 | 本例是以蛋糕和面包为例,原因是数据量小 56 | 所有类别总大小是561G,蛋糕和面包是3.2G 57 | 58 | python3 open_images_downloader.py --root /media/santiago/a/data/open_images --class_names "Cake,Bread" --num_workers 20 59 | 60 | 61 | **训练过程** 62 | 63 | **首次训练** 64 | 65 | python3 train_ssd.py --dataset_type open_images --datasets /media/santiago/data/open_images --net mb3-ssd-lite --scheduler cosine --lr 0.01 --t_max 100 --validation_epochs 5 --num_epochs 100 --base_net_lr 0.001 --batch_size 5 66 | 67 | 68 | **预加载之前训练的模型** 69 | 70 | python3 train_ssd.py --dataset_type open_images --datasets /media/santiago/data/open_images --net mb3-ssd-lite --pretrained_ssd models/mb3-ssd-lite-Epoch-99-Loss-2.5194434596402613.pth --scheduler cosine --lr 0.01 --t_max 100 --validation_epochs 5 --num_epochs 200 --base_net_lr 0.001 --batch_size 5 71 | 72 | 73 | 74 | **测试一张图片** 75 | 76 | python run_ssd_example.py mb3-ssd-lite models/mb3-ssd-lite-Epoch-99-Loss-2.5194434596402613.pth models/open-images-model-labels.txt /home/santiago/picture/test.jpg 77 | 78 | **视频检测** 79 | 80 | python3 run_ssd_live_demo.py mb3-ssd-lite models/mb3-ssd-lite-Epoch-99-Loss-2.5194434596402613.pth models/open-images-model-labels.txt 81 | 82 | 83 | **Cake and Bread Pretrained model** 84 | 85 | 86 | 链接: https://pan.baidu.com/s/1byY1eJk3Hm3CTp-29KirxA 87 | 88 | 提取码: qxwv 89 | 90 | **VOC Dataset Pretrained model** 91 | 92 | 链接: https://pan.baidu.com/s/1yt_IRY0RcgSxB-YwywoHuA 93 | 94 | 提取码: 2sta 95 | -------------------------------------------------------------------------------- /vision/ssd/predictor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..utils import box_utils 4 | from .data_preprocessing import PredictionTransform 5 | from ..utils.misc import Timer 6 | 7 | 8 | class Predictor: 9 | def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None, 10 | iou_threshold=0.45, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None): 11 | self.net = net 12 | self.transform = PredictionTransform(size, mean, std) 13 | self.iou_threshold = iou_threshold 14 | self.filter_threshold = filter_threshold 15 | self.candidate_size = candidate_size 16 | self.nms_method = nms_method 17 | 18 | self.sigma = sigma 19 | if device: 20 | self.device = device 21 | else: 22 | self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 23 | 24 | self.net.to(self.device) 25 | self.net.eval() 26 | 27 | self.timer = Timer() 28 | 29 | def predict(self, image, top_k=-1, prob_threshold=None): 30 | cpu_device = torch.device("cpu") 31 | height, width, _ = image.shape 32 | image = self.transform(image) 33 | images = image.unsqueeze(0) 34 | images = images.to(self.device) 35 | with torch.no_grad(): 36 | self.timer.start() 37 | scores, boxes = self.net.forward(images) 38 | print("Inference time: ", self.timer.end()) 39 | boxes = boxes[0] 40 | scores = scores[0] 41 | if not prob_threshold: 42 | prob_threshold = self.filter_threshold 43 | # this version of nms is slower on GPU, so we move data to CPU. 44 | boxes = boxes.to(cpu_device) 45 | scores = scores.to(cpu_device) 46 | picked_box_probs = [] 47 | picked_labels = [] 48 | for class_index in range(1, scores.size(1)): 49 | probs = scores[:, class_index] 50 | mask = probs > prob_threshold 51 | probs = probs[mask] 52 | if probs.size(0) == 0: 53 | continue 54 | subset_boxes = boxes[mask, :] 55 | box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) 56 | box_probs = box_utils.nms(box_probs, self.nms_method, 57 | score_threshold=prob_threshold, 58 | iou_threshold=self.iou_threshold, 59 | sigma=self.sigma, 60 | top_k=top_k, 61 | candidate_size=self.candidate_size) 62 | picked_box_probs.append(box_probs) 63 | picked_labels.extend([class_index] * box_probs.size(0)) 64 | if not picked_box_probs: 65 | return torch.tensor([]), torch.tensor([]), torch.tensor([]) 66 | picked_box_probs = torch.cat(picked_box_probs) 67 | picked_box_probs[:, 0] *= width 68 | picked_box_probs[:, 1] *= height 69 | picked_box_probs[:, 2] *= width 70 | picked_box_probs[:, 3] *= height 71 | return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4] -------------------------------------------------------------------------------- /run_ssd_example.py: -------------------------------------------------------------------------------- 1 | from vision.ssd.vgg_ssd import create_vgg_ssd, create_vgg_ssd_predictor 2 | from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor 3 | from vision.ssd.mobilenetv1_ssd_lite import create_mobilenetv1_ssd_lite, create_mobilenetv1_ssd_lite_predictor 4 | from vision.ssd.squeezenet_ssd_lite import create_squeezenet_ssd_lite, create_squeezenet_ssd_lite_predictor 5 | from vision.ssd.mobilenet_v2_ssd_lite import create_mobilenetv2_ssd_lite, create_mobilenetv2_ssd_lite_predictor 6 | from vision.utils.misc import Timer 7 | import cv2 8 | import sys 9 | 10 | from vision.ssd.mobilenet_v3_ssd_lite import create_mobilenetv3_ssd_lite,create_mobilenetv3_ssd_lite_predictor 11 | 12 | if len(sys.argv) < 5: 13 | print('Usage: python run_ssd_example.py