├── CustomBatchSampler.py ├── LICENSE ├── README.md ├── config.yml ├── data ├── bdd100k.yaml ├── od_dataset_from_file.py └── voc_data.yaml ├── docker └── Dockerfile ├── folder2lmdb.py ├── images ├── 000166.jpg ├── 001852.jpg ├── 002597.jpg ├── 004030.jpg ├── 00690c26-e4bbbd72.jpg └── show.gif ├── inference.py ├── models ├── __init__.py ├── bdd100k │ └── config.yaml ├── mbv2_yolo.py ├── mbv3_yolo.py ├── mbv3_yolo_macc.py ├── mobilenetv2.py ├── mobilenetv3.py ├── seg_loss.py ├── voc │ └── config.yaml └── yolo_loss.py ├── requirements.txt ├── save └── 00690c26-e4bbbd72_result.jpg ├── scripts ├── VOC2007.sh ├── VOC2012.sh ├── create.sh ├── inference.sh └── train.sh ├── search_space.json ├── train.py └── utils ├── __init__.py ├── box.py ├── eval_mAP.py ├── image_augmentation.py ├── iou.py ├── logger.py └── misc.py /CustomBatchSampler.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | 5 | from torch.utils.data.sampler import Sampler 6 | from typing import Iterator, Optional, Sequence, List, TypeVar, Generic, Sized 7 | import random 8 | 9 | class GreedyBatchSampler(Sampler[List[int]]): 10 | r"""Wraps another sampler to yield a mini-batch of indices. 11 | Args: 12 | sampler (Sampler or Iterable): Base sampler. Can be any iterable object 13 | batch_size (int): Size of mini-batch. 14 | drop_last (bool): If ``True``, the sampler will drop the last batch if 15 | its size would be less than ``batch_size`` 16 | Example: 17 | >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False)) 18 | [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 19 | >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True)) 20 | [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 21 | """ 22 | 23 | def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool,sample:list) -> None: 24 | # Since collections.abc.Iterable does not check for `__getitem__`, which 25 | # is one way for an object to be an iterable, we don't do an `isinstance` 26 | # check here. 27 | if not isinstance(batch_size, int) or isinstance(batch_size, bool) or \ 28 | batch_size <= 0: 29 | raise ValueError("batch_size should be a positive integer value, " 30 | "but got batch_size={}".format(batch_size)) 31 | if not isinstance(drop_last, bool): 32 | raise ValueError("drop_last should be a boolean value, but got " 33 | "drop_last={}".format(drop_last)) 34 | self.sampler = sampler 35 | self.batch_size = batch_size 36 | self.drop_last = drop_last 37 | self.sample = sample 38 | #print('self.drop_last',self.drop_last) 39 | #self.mosaic_array = list() 40 | 41 | ''' 42 | def generate_mosaic_array(self): 43 | mosaic_array = [] 44 | for i in range(self.batch_size): 45 | mosaic_array.append(random.choice([1,2,4])) 46 | return sum(self.mosaic_array) 47 | ''' 48 | def get_random(self,sample): 49 | if random.random() < 0.5: 50 | num = random.choice(sample) 51 | else: 52 | num = 1 53 | return num 54 | def __iter__(self): 55 | batch = [] 56 | sample = [1,4] 57 | num = self.get_random(self.sample) 58 | 59 | buckets = [] 60 | for idx in self.sampler: 61 | buckets.append(idx) 62 | if len(buckets) == num : 63 | batch.append(buckets) 64 | num = self.get_random(self.sample) 65 | buckets = [] 66 | if len(batch) == self.batch_size: 67 | yield batch 68 | #r,batch_size = self.get_random() 69 | #print('\n0-',batch_size) 70 | batch = [] 71 | 72 | if len(batch) > 0 and not self.drop_last: 73 | yield batch 74 | #def get_mosaic_array(self) : 75 | # return self.mosaic_array.pop(0) 76 | def __len__(self): 77 | # Can only be called if self.sampler has __len__ implemented 78 | # We cannot enforce this condition, so we turn off typechecking for the 79 | # implementation below. 80 | # Somewhat related: see NOTE [ Lack of Default `__len__` in Python Abstract Base Classes ] 81 | return len(self.sampler) 82 | #if self.drop_last: 83 | # return len(self.sampler) // self.batch_size # type: ignore 84 | #else: 85 | # return (len(self.sampler) + self.batch_size - 1) // self.batch_size # type: ignore 86 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Eric Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mobilenet-YOLO-Pytorch 2 | 3 | ![result](/save/00690c26-e4bbbd72_result.jpg) 4 | 5 | ## Model 6 | 7 | A pytorch implementation of MobileNet-YOLO detection network , train on 07+12 , test on VOC2007 (imagenet pretrained , not coco) 8 | 9 | Network|mAP|Resolution|download| 10 | :---:|:---:|:---:|:---:| 11 | MobileNetV2|72.1|352|[checkpoint](https://drive.google.com/drive/folders/11iNLZA5sOZP2tiTQB6pz6TAA2u5xyYCa?usp=sharing)| 12 | 13 | 14 | ## Training steps 15 | 16 | 1. Download dataset VOCdevkit/ , if already have , please skip this step 17 | ``` 18 | sh scripts/VOC2007.sh 19 | sh scripts/VOC2012.sh 20 | ``` 21 | 2. Create lmdb 22 | ``` 23 | sh scripts/create.sh 24 | ``` 25 | 3. Start training 26 | ``` 27 | sh scripts/train.sh 28 | ``` 29 | ## yolov3 training 30 | 31 | see [branch](https://github.com/eric612/Mobilenet-YOLO-Pytorch/tree/yolov3) 32 | 33 | ## Hyper parameter optimization 34 | 35 | ``` 36 | nnictl create --config config.yml 37 | ``` 38 | 39 | ## Demo 40 | 41 | Download [checkpoint](https://drive.google.com/file/d/1eNIHaZGQHyb6WfOUmBuBU3K5urKFoL27/view?usp=sharing), and save at $Mobilenet-YOLO-Pytorch/checkpoints/bdd100k/model_best.pth.tar 42 | 43 | ``` 44 | sh scripts/inference.sh 45 | ``` 46 | 47 | ## Under construction 48 | 49 | - [ ] A new detector 50 | - [x] yolov4 51 | - [x] Multi-Task 52 | - [x] Hyper Parameter Tuning 53 | - [ ] Pruning 54 | - [x] Porting KL720 55 | 56 | ## Acknowledgements 57 | 58 | [AlexeyAB](https://github.com/AlexeyAB/darknet) 59 | 60 | [diggerdu](https://github.com/diggerdu/Generalized-Intersection-over-Union) 61 | 62 | [BobLiu20](https://github.com/BobLiu20/YOLOv3_PyTorch) 63 | 64 | [bubbliiiing](https://github.com/bubbliiiing/yolov4-tiny-pytorch) 65 | 66 | [aleju](https://github.com/aleju/imgaug) 67 | 68 | [rmccorm4](https://github.com/rmccorm4/PyTorch-LMDB) 69 | 70 | [hysts](https://github.com/hysts/pytorch_image_classification) 71 | 72 | [utkuozbulak](https://github.com/utkuozbulak/pytorch-custom-dataset-examples) 73 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | authorName: default 2 | experimentName: example_yolo_pytorch 3 | trialConcurrency: 1 4 | maxExecDuration: 30d 5 | maxTrialNum: 20 6 | #choice: local, remote, pai 7 | trainingServicePlatform: local 8 | searchSpacePath: search_space.json 9 | #choice: true, false 10 | useAnnotation: false 11 | tuner: 12 | #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner 13 | #SMAC (SMAC should be installed through nnictl) 14 | builtinTunerName: TPE 15 | classArgs: 16 | #choice: maximize, minimize 17 | optimize_mode: maximize 18 | 19 | trial: 20 | command: python train.py 21 | codeDir: . 22 | gpuNum: 1 23 | localConfig: 24 | useActiveGpu: true 25 | -------------------------------------------------------------------------------- /data/bdd100k.yaml: -------------------------------------------------------------------------------- 1 | classes: 2 | map: 3 | - person 4 | - rider 5 | - car 6 | - bus 7 | - truck 8 | - bike 9 | - motor 10 | original: 11 | - person 12 | - rider 13 | - car 14 | - bus 15 | - truck 16 | - bike 17 | - motor 18 | - traffic light 19 | - traffic sign 20 | - train 21 | extention_names: 22 | annotation: 23 | - json 24 | image: 25 | - jpg 26 | segmentation: 27 | - png 28 | model_config_path: models/bdd100k/config.yaml 29 | segmentation_anno_keywords: id 30 | segmentation_enable: true 31 | segmentation_num_classes: 2 32 | test_dataset_path: 33 | annos: 34 | - /media/eric/Data/bdd100k/annotations/val 35 | imgs: 36 | - /media/eric/Data/bdd100k/images/val 37 | lists: 38 | - /media/eric/Data/bdd100k/ImageSets/val.txt 39 | lmdb: bdd100k-test-lmdb 40 | name: bdd100k_test 41 | segs: 42 | - /media/eric/Data/bdd100k/drivable_maps/labels/val/ 43 | trainval_dataset_path: 44 | annos: 45 | - /media/eric/Data/bdd100k/annotations/train 46 | imgs: 47 | - /media/eric/Data/bdd100k/images/train 48 | lists: 49 | - /media/eric/Data/bdd100k/ImageSets/train.txt 50 | lmdb: bdd100k-train-lmdb 51 | name: bdd100k_train 52 | segs: 53 | - /media/eric/Data/bdd100k/drivable_maps/labels/train/ 54 | -------------------------------------------------------------------------------- /data/od_dataset_from_file.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import glob 4 | import os 5 | import torch 6 | from torch.utils.data.dataset import Dataset # For custom datasets 7 | import json 8 | 9 | from tqdm import tqdm 10 | import pickle 11 | import xml.etree.ElementTree as ET 12 | #import image_augmentation as img_aug 13 | import cv2 14 | 15 | ''' 16 | CLASSES = ('__background__', 17 | 'aeroplane', 'bicycle', 'bird', 'boat', 18 | 'bottle', 'bus', 'car', 'cat', 'chair', 19 | 'cow', 'diningtable', 'dog', 'horse', 20 | 'motorbike', 'person', 'pottedplant', 21 | 'sheep', 'sofa', 'train', 'tvmonitor') 22 | ''' 23 | 24 | #classes_map['background'] = 0 25 | 26 | class DatasetFromFile(Dataset): 27 | def __init__(self, image_path,anno_path,seg_path,imageset_list,classes,dataset_name,phase='train',has_seg = False,difficultie = True,ext_img = ['jpg','bmp'],ext_anno = ['xml','json'],ext_seg=['png'],ori_classes_name=None): 28 | 29 | # Get image list 30 | #self.img_folder_list = glob.glob(folder_path+'*') 31 | 32 | self.item_list = list() 33 | self.phase = phase 34 | self.difficultie = difficultie 35 | self.classes = classes 36 | self.classes_map = {k: v for v, k in enumerate(classes)} 37 | self.ext_img = ext_img 38 | self.ext_anno = ext_anno 39 | self.has_seg = has_seg 40 | self.ext_seg = ext_seg 41 | self.seg_path = seg_path 42 | im_list = list() 43 | if ori_classes_name!=None: 44 | self.ori_classes_name = ori_classes_name 45 | else: 46 | self.ori_classes_name = classes 47 | #print(type(image_path)) 48 | self.list_name = 'data/%s.txt'%dataset_name 49 | 50 | if os.path.isfile(self.list_name): 51 | print(self.list_name) 52 | with open(self.list_name, "rb") as fp: # Unpickling 53 | self.item_list = pickle.load(fp) 54 | else: 55 | 56 | if type(imageset_list) is str and type(image_path) is str and type(anno_path) is str: 57 | with open(imageset_list,'r') as f: 58 | for line in f: 59 | for word in line.split(): 60 | im_list.append(word) 61 | if self.has_seg: 62 | self.parse_list(image_path,anno_path,im_list,seg_path) 63 | else: 64 | self.parse_list(image_path,anno_path,im_list) 65 | elif type(imageset_list) is list : 66 | assert len(imageset_list) == len(image_path) == len(anno_path) 67 | for idx in range(len(imageset_list)) : 68 | set = imageset_list[idx] 69 | im_list.clear() 70 | with open(set,'r') as f: 71 | for line in f: 72 | for word in line.split(): 73 | im_list.append(word) 74 | if self.has_seg: 75 | self.parse_list(image_path[idx],anno_path[idx],im_list,seg_path[idx]) 76 | else: 77 | self.parse_list(image_path[idx],anno_path[idx],im_list) 78 | 79 | with open(self.list_name, "wb") as fp: #Pickling 80 | pickle.dump(self.item_list, fp) 81 | self.data_len = len(self.item_list) 82 | print('total files of %s : %d'%(dataset_name,self.data_len)) 83 | #print(self.item_list) 84 | def __getitem__(self, index): 85 | # Get image name from the pandas df 86 | if self.has_seg : 87 | single_image_path, single_anno_path, single_seg_path = self.item_list[index] 88 | else: 89 | single_image_path, single_anno_path = self.item_list[index] 90 | # Open image 91 | im = cv2.imread(single_image_path) 92 | boxes, labels, difficulties = self.parse_annotation(single_anno_path) 93 | yolo_labels = list() 94 | height, width, channels = im.shape 95 | im = cv2.imencode('.jpg', im,[int(cv2.IMWRITE_JPEG_QUALITY), 98]) 96 | yolo_labels = self.to_yolo_label(boxes,labels,difficulties,width,height) 97 | if self.has_seg : 98 | im2 = cv2.imread(single_seg_path) 99 | im2 = cv2.imencode('.png', im2,[int(cv2.IMWRITE_PNG_COMPRESSION),1]) 100 | return (im, yolo_labels, im2) 101 | else : 102 | return (im, yolo_labels) 103 | 104 | def __len__(self): 105 | return self.data_len 106 | def to_yolo_label(self,boxes,labels,difficulties,width = 0,height = 0): 107 | yolo_labels = list() 108 | float = width == 0 and height == 0 109 | 110 | for index,box in enumerate(boxes): 111 | if self.difficultie or not difficulties[index]: 112 | #print(box) 113 | yolo_label = list() 114 | yolo_label.clear() 115 | #print(box,labels[index]) 116 | x = (box[0] + box[2])/2 117 | y = (box[1] + box[3])/2 118 | w = box[2] - box[0] 119 | h = box[3] - box[1] 120 | if not float : 121 | x = x / width 122 | y = y / height 123 | w = w / width 124 | h = h / height 125 | yolo_label.append(labels[index]) 126 | yolo_label.append(x) 127 | yolo_label.append(y) 128 | yolo_label.append(w) 129 | yolo_label.append(h) 130 | yolo_labels.append(yolo_label) 131 | return yolo_labels 132 | 133 | def parse_list(self,image_path,anno_path,im_list,seg_path=None): 134 | image_list = list() 135 | image_list.clear() 136 | seg_list = list() 137 | seg_list.clear() 138 | im_lists = tqdm(im_list) 139 | seg_files = list() 140 | if self.has_seg: 141 | for i in self.ext_seg : 142 | seg_files = seg_files + glob.glob(seg_path+'/*.%s'%i) 143 | 144 | 145 | for s in im_lists : 146 | img_file = None 147 | for i in self.ext_img : 148 | filepath = "{}/{}.{}".format(image_path,s,i) 149 | if os.path.isfile(filepath): 150 | img_file = filepath 151 | anno_file = None 152 | for i in self.ext_anno : 153 | filepath = "{}/{}.{}".format(anno_path,s,i) 154 | if os.path.isfile(filepath): 155 | anno_file = filepath 156 | if self.has_seg: 157 | for seg in seg_files: 158 | if s in seg : 159 | if img_file!=None and anno_file!=None : 160 | self.item_list.append([img_file,anno_file,seg]) 161 | im_lists.set_description("Processing %s" % img_file) 162 | else: 163 | im_lists.set_description("Not find file %s" % s) 164 | break 165 | elif img_file!=None and anno_file!=None : 166 | self.item_list.append([img_file,anno_file]) 167 | im_lists.set_description("Processing %s" % img_file) 168 | else: 169 | im_lists.set_description("Not find file %s" % s) 170 | 171 | def bound(low, high, value): 172 | return max(low, min(high, value)) 173 | def parse_annotation(self,annotation_path): 174 | filename, file_extension = os.path.splitext(annotation_path) 175 | boxes = list() 176 | labels = list() 177 | difficulties = list() 178 | # VOC format xml 179 | if file_extension == '.xml': 180 | source = open(annotation_path) 181 | tree = ET.parse(source) 182 | root = tree.getroot() 183 | 184 | for object in root.iter('object'): 185 | difficult = int(object.find('difficult').text == '1') 186 | label = object.find('name').text.lower().strip() 187 | 188 | if label not in self.classes: 189 | continue 190 | bbox = object.find('bndbox') 191 | xmin = int(bbox.find('xmin').text) - 1 192 | ymin = int(bbox.find('ymin').text) - 1 193 | xmax = int(bbox.find('xmax').text) - 1 194 | ymax = int(bbox.find('ymax').text) - 1 195 | boxes.append([xmin, ymin, xmax, ymax]) 196 | #print(label) 197 | labels.append(self.classes_map[label]) 198 | difficulties.append(difficult) 199 | source.close() 200 | return boxes, labels, difficulties 201 | # COCO format json 202 | elif file_extension == '.json': 203 | with open(annotation_path, 'r') as f: 204 | data=json.load(f) 205 | width = int(data['image']['width'])-1 206 | height = int(data['image']['height'])-1 207 | object_number = len(data['annotation']) 208 | for j in range(object_number): 209 | class_id = int(data['annotation'][j]['category_id'])-1 210 | category_name = self.ori_classes_name[class_id] 211 | if category_name in self.classes: 212 | new_class_id = self.classes.index(category_name) 213 | xmin = int(float(data['annotation'][j]['bbox'][0])+0.5) 214 | ymin = int(float(data['annotation'][j]['bbox'][1])+0.5) 215 | if xmin<0: 216 | xmin = 0 217 | if ymin<0: 218 | ymin = 0 219 | xmax = int(float(data['annotation'][j]['bbox'][0])+float(data['annotation'][j]['bbox'][2])+0.5) 220 | ymax = int(float(data['annotation'][j]['bbox'][1])+float(data['annotation'][j]['bbox'][3])+0.5) 221 | if xmax>width: 222 | xmax = width 223 | if ymax>height: 224 | ymax = height 225 | boxes.append([xmin, ymin, xmax, ymax]) 226 | labels.append(new_class_id) 227 | difficulties.append(0) 228 | #print(xmin,ymin,class_id) 229 | return boxes, labels, difficulties 230 | def collate_fn(self, batch): 231 | 232 | images = list() 233 | boxes = list() 234 | labels = list() 235 | difficulties = list() 236 | 237 | for b in batch: 238 | images.append(b[0]) 239 | boxes.append(b[1]) 240 | labels.append(b[2]) 241 | difficulties.append(b[3]) 242 | 243 | images = torch.stack(images, dim=0) 244 | 245 | return images, boxes, labels, difficulties # tensor (N, 3, H, W), 3 lists of N tensors each 246 | -------------------------------------------------------------------------------- /data/voc_data.yaml: -------------------------------------------------------------------------------- 1 | test_dataset_path: 2 | annos: [data/VOCdevkit/VOC2007/Annotations/] 3 | imgs: [data/VOCdevkit/VOC2007/JPEGImages/] 4 | lists: [data/VOCdevkit/VOC2007/ImageSets/Main/test.txt] 5 | lmdb: test-lmdb 6 | name: voc_test 7 | trainval_dataset_path: 8 | annos: [data/VOCdevkit/VOC2007/Annotations/, data/VOCdevkit/VOC2012/Annotations/] 9 | imgs: [data/VOCdevkit/VOC2007/JPEGImages/, data/VOCdevkit/VOC2012/JPEGImages/] 10 | lists: [data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt, data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt] 11 | lmdb: train-lmdb 12 | name: voc_trainval 13 | segmentation_enable: false 14 | classes: 15 | original: ["aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat", "chair","cow", "diningtable", "dog", "horse","motorbike", "person", "pottedplant","sheep", "sofa", "train", "tvmonitor"] 16 | map: ["aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat", "chair","cow", "diningtable", "dog", "horse","motorbike", "person", "pottedplant","sheep", "sofa", "train", "tvmonitor"] 17 | extention_names: 18 | image: ["jpg"] 19 | annotation: ["xml"] 20 | model_config_path: "models/voc/config.yaml" -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # https://www.learnopencv.com/install-opencv3-on-ubuntu 2 | # https://docs.opencv.org/3.4/d6/d15/tutorial_building_tegra_cuda.html 3 | 4 | ARG CUDA_VERSION=10.1 5 | ARG CUDNN_VERSION=7 6 | 7 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04 8 | 9 | ARG PYTHON_VERSION=3.6 10 | ARG OPENCV_VERSION=4.1.1 11 | 12 | # Needed for string substitution 13 | SHELL ["/bin/bash", "-c"] 14 | 15 | # Add CUDA libs paths 16 | RUN export DEBIAN_FRONTEND=noninteractive \ 17 | && apt-get update && \ 18 | CUDA_PATH=(/usr/local/cuda-*) && \ 19 | CUDA=`basename $CUDA_PATH` && \ 20 | echo "$CUDA_PATH/compat" >> /etc/ld.so.conf.d/${CUDA/./-}.conf && \ 21 | ldconfig && \ 22 | # Install all dependencies for OpenCV and Caffe 23 | apt-get -y update --fix-missing && \ 24 | apt-get -y install --no-install-recommends \ 25 | python${PYTHON_VERSION} \ 26 | python${PYTHON_VERSION}-dev \ 27 | $( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \ 28 | build-essential \ 29 | wget \ 30 | unzip \ 31 | git \ 32 | python-scipy \ 33 | python-skimage \ 34 | libopencv-dev \ 35 | && \ 36 | # install python dependencies 37 | sysctl -w net.ipv4.ip_forward=1 && \ 38 | wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll && \ 39 | python${PYTHON_VERSION} get-pip.py && \ 40 | rm get-pip.py && \ 41 | pip${PYTHON_VERSION} install numpy && \ 42 | pip${PYTHON_VERSION} install torch && \ 43 | pip${PYTHON_VERSION} install torchvision && \ 44 | pip${PYTHON_VERSION} install lmdb && \ 45 | pip${PYTHON_VERSION} install six && \ 46 | pip${PYTHON_VERSION} install matplotlib && \ 47 | pip${PYTHON_VERSION} install tqdm && \ 48 | pip${PYTHON_VERSION} install nni && \ 49 | pip${PYTHON_VERSION} install progress && \ 50 | pip${PYTHON_VERSION} install filetype && \ 51 | pip${PYTHON_VERSION} install msgpack_python && \ 52 | pip${PYTHON_VERSION} install Pillow && \ 53 | pip${PYTHON_VERSION} install PyYAML && \ 54 | pip${PYTHON_VERSION} install imgaug && \ 55 | pip${PYTHON_VERSION} install tensorboard && \ 56 | # Set the default python and install PIP packages 57 | update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \ 58 | update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 59 | 60 | # connect 8080 for nni 61 | EXPOSE 8080 62 | 63 | ENV MobileNetYOLO_ROOT=/workspace/Mobilenet-YOLO-Pytorch 64 | WORKDIR $MobileNetYOLO_ROOT 65 | 66 | RUN cd /workspace && \ 67 | git clone --depth 1 https://github.com/eric612/Mobilenet-YOLO-Pytorch.git && \ 68 | #unzip caffe.zip && \ 69 | cd $MobileNetYOLO_ROOT -------------------------------------------------------------------------------- /folder2lmdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import six 4 | import string 5 | import argparse 6 | 7 | import lmdb 8 | import pickle 9 | import msgpack 10 | import tqdm 11 | from PIL import Image 12 | 13 | import torch 14 | import torch.utils.data as data 15 | from utils.image_augmentation import Image_Augmentation 16 | from torch.utils.data import DataLoader 17 | from torchvision.transforms import transforms 18 | from torchvision import transforms, datasets 19 | # This segfaults when imported before torch: https://github.com/apache/arrow/issues/2637 20 | from data.od_dataset_from_file import DatasetFromFile 21 | import cv2 22 | import numpy as np 23 | import shutil 24 | import random 25 | import yaml 26 | from utils.box import wh_to_x2y2 27 | import imgaug.augmenters as iaa 28 | sometimes = lambda aug: iaa.Sometimes(0.5, aug) 29 | seq = iaa.Sequential([ 30 | sometimes(iaa.SomeOf((1, 2), 31 | [ 32 | #sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation 33 | iaa.OneOf([ 34 | iaa.GaussianBlur((0, 1.0)), # blur images with a sigma between 0 and 3.0 35 | iaa.MedianBlur(k=(3,5)), # blur image using local medians with kernel sizes between 2 and 7 36 | ]), 37 | iaa.Sharpen(alpha=(0, 0.1), lightness=(0.9, 1.1)), # sharpen images 38 | iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.03*255), per_channel=0.3), # add gaussian noise to images 39 | ], 40 | random_order=True 41 | )) 42 | ]) 43 | 44 | if torch.__version__> '1.8': 45 | from torchvision.transforms import InterpolationMode 46 | interp = InterpolationMode.BILINEAR 47 | else : 48 | interp = 2 49 | CLASSES = (#'__background__', 50 | 'aeroplane', 'bicycle', 'bird', 'boat', 51 | 'bottle', 'bus', 'car', 'cat', 'chair', 52 | 'cow', 'diningtable', 'dog', 'horse', 53 | 'motorbike', 'person', 'pottedplant', 54 | 'sheep', 'sofa', 'train', 'tvmonitor') 55 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 56 | class ImageFolderLMDB(data.Dataset): 57 | def __init__(self, db_path,batch_size,transform_size = [[352,352]], phase=None,expand_scale=1.5,mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225],has_seg = False, classes_name = CLASSES, seg_num_classes = 0): 58 | self.db_path = db_path 59 | self.env = lmdb.open(db_path, subdir=os.path.isdir(db_path), 60 | readonly=True, lock=False, 61 | readahead=False, meminit=False) 62 | with self.env.begin(write=False) as txn: 63 | self.length = pickle.loads(txn.get(b'__len__')) 64 | self.keys = pickle.loads(txn.get(b'__keys__')) 65 | self.normalize = transforms.Normalize(mean=mean,std=std) 66 | self.mean = mean 67 | self.std = std 68 | self.transform_size = transform_size 69 | self.phase = phase 70 | self.img_aug = Image_Augmentation() 71 | self.batch_size = batch_size 72 | self.count = 0 73 | self.expand_scale = expand_scale 74 | self.has_seg = has_seg 75 | self.classes_name = classes_name 76 | self.seg_num_classes = seg_num_classes 77 | 78 | def get_single_image(self,index,expand=False,expand_scale=1.5): 79 | 80 | img, target,img2 = None, None, None 81 | env = self.env 82 | 83 | with env.begin(write=False) as txn: 84 | byteflow = txn.get(self.keys[index]) 85 | unpacked = pickle.loads(byteflow) 86 | #unpacked = pa.deserialize(byteflow) 87 | 88 | # load image 89 | imgbuf = unpacked[0] 90 | buf = six.BytesIO() 91 | buf.write(imgbuf[1]) 92 | buf.seek(0) 93 | X_str= np.fromstring(buf.read(), dtype=np.uint8) 94 | img = cv2.imdecode(X_str, cv2.IMREAD_COLOR) 95 | 96 | # load label 97 | target = unpacked[1] 98 | 99 | if self.has_seg: 100 | # load segmentation id 101 | imgbuf = unpacked[2] 102 | buf = six.BytesIO() 103 | buf.write(imgbuf[1]) 104 | buf.seek(0) 105 | X_str= np.fromstring(buf.read(), dtype=np.uint8) 106 | img2 = cv2.imdecode(X_str, cv2.IMREAD_COLOR) 107 | img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) 108 | seg_id = Image.fromarray(img2) 109 | else : 110 | seg_id = None 111 | 112 | #if self.phase == 'train': 113 | target2 = torch.Tensor(target) 114 | boxes = target2[...,1:5] 115 | if boxes.shape[0] == 0 : 116 | #print(target2.shape) 117 | boxes2 = torch.zeros(0,4) 118 | labels = torch.zeros(0) 119 | else : 120 | x1 = (boxes[...,0] - boxes[...,2]/2).unsqueeze(1) 121 | y1 = (boxes[...,1] - boxes[...,3]/2).unsqueeze(1) 122 | x2 = (boxes[...,0] + boxes[...,2]/2).unsqueeze(1) 123 | y2 = (boxes[...,1] + boxes[...,3]/2).unsqueeze(1) 124 | boxes2 = torch.cat((x1*img.shape[1],y1*img.shape[0],x2*img.shape[1],y2*img.shape[0]),1) 125 | #if boxes.size(0) : 126 | labels = target2[...,0] 127 | #print(boxes2) 128 | #if labels == 7 : 129 | 130 | difficulties = torch.zeros_like(labels) 131 | img = seq(image=img) # done by the library 132 | image = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB)) 133 | #print(seg_id) 134 | 135 | new_img, new_boxes, new_labels, new_difficulties, new_seg_id = self.img_aug.transform_od(image, boxes2, labels, difficulties,seg_id=seg_id, mean = [0.5, 0.5, 0.5],std = [1, 1, 1],phase = self.phase,expand = expand,expand_scale = self.expand_scale) 136 | 137 | array = np.array(new_seg_id) 138 | maps = list() 139 | if self.has_seg: 140 | for c in range(1,self.seg_num_classes+1): 141 | maps.append(Image.fromarray(array==c)) 142 | old_dims = torch.FloatTensor([new_img.width, new_img.height, new_img.width, new_img.height]).unsqueeze(0) 143 | new_boxes2 = new_boxes / old_dims # percent coordinates 144 | 145 | w = (new_boxes2[...,2] - new_boxes2[...,0]) 146 | h = (new_boxes2[...,3] - new_boxes2[...,1]) 147 | x = (new_boxes2[...,0] + w/2).unsqueeze(1) 148 | y = (new_boxes2[...,1] + h/2).unsqueeze(1) 149 | #print(x.shape,y.shape,w.shape,h.shape,new_boxes.shape) 150 | new_boxes2 = torch.cat((x,y,w.unsqueeze(1),h.unsqueeze(1)),1) 151 | new_target = torch.cat((new_labels.unsqueeze(1),new_boxes2),1) 152 | 153 | 154 | return (new_img,new_target,maps) 155 | def __getitem__(self, index): 156 | #print(index) 157 | 158 | 159 | if type(index) == list: 160 | 161 | group = [] 162 | s = len(index) 163 | 164 | for idx in index: 165 | img,tar,seg_id = self.get_single_image(idx,s==1) 166 | group.append([img,tar,seg_id]) 167 | 168 | if s == 1 : 169 | #self.show_image(img,tar[...,1:5],tar[...,0],convert=True) 170 | return group[0][0],group[0][1],1,group[0][2] 171 | else : 172 | b = self.img_aug.Mosaic(group,[1000,1000]) 173 | #self.show_image(b[0],b[1][...,1:5].clone(),b[1][...,0].clone(),convert=True) 174 | return b[0],b[1],len(index) 175 | else: 176 | img,tar,_ = self.get_single_image(index) 177 | return img,tar,1 178 | 179 | def show_image(self,image,boxes=None,labels=None,convert=False,seg_id = False,gray_img_only = False,resize = None): 180 | if gray_img_only == True : 181 | #print(image) 182 | cv_img = np.array(image.convert('L')) 183 | print(cv_img.shape) 184 | if resize!=None : 185 | cv_img = cv2.resize(cv_img, (resize[0], resize[1]), interpolation=cv2.INTER_AREA) 186 | cv2.namedWindow('frame',cv2.WINDOW_NORMAL) 187 | cv2.resizeWindow('frame', 640, 480) 188 | cv2.imshow('frame', cv_img) 189 | key = cv2.waitKey(3) 190 | else : 191 | cv_img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR) 192 | seg_id = (np.asarray(seg_id)!=0)*0.5 193 | #print(seg_id) 194 | #print(cv_img.shape,seg_id.shape) 195 | #cv_img = cv2.bitwise_and(cv_img,cv_img,mask = seg_id) 196 | cv_img[...,0] = cv_img[...,0]*seg_id + cv_img[...,0]*(seg_id==0) 197 | cv_img[...,2] = cv_img[...,2]*seg_id + cv_img[...,2]*(seg_id==0) 198 | for idx,box in enumerate(boxes) : 199 | if convert : 200 | #print(box,cv_img.shape) 201 | wh_to_x2y2(box) 202 | #print(box,cv_img.shape) 203 | box[0],box[2] = box[0]*cv_img.shape[1],box[2]*cv_img.shape[1] 204 | box[1],box[3] = box[1]*cv_img.shape[0],box[3]*cv_img.shape[0] 205 | 206 | cv2.rectangle(cv_img, (int(box[0]),int(box[1])), (int(box[2]),int(box[3])), (0,255,0), 2) 207 | text=self.classes_name[int(labels[idx])].lower() 208 | cv2.putText(cv_img, text, (int(box[0]),int(box[1]-5)), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 255, 255), 1, cv2.LINE_AA) 209 | 210 | cv2.namedWindow('frame',cv2.WINDOW_NORMAL) 211 | cv2.resizeWindow('frame', 480, 480) 212 | cv2.imshow('frame', cv_img) 213 | key = cv2.waitKey(0) 214 | #cv2.imwrite('images//frame%04d.jpg'%self.count, cv_img) 215 | 216 | def __len__(self): 217 | return self.length 218 | 219 | def __repr__(self): 220 | return self.__class__.__name__ + ' (' + self.db_path + ')' 221 | def set_transform(self,transform): 222 | self.transform = transform 223 | def collate_fn(self, batch): 224 | images = list() 225 | labels = list() 226 | seg_maps = list() 227 | random_size = random.choice(self.transform_size) 228 | seg_random_size = [int(number / 16) for number in random_size] 229 | #print(seg_random_size) 230 | self.transform = transforms.Compose([ 231 | transforms.Resize(size=random_size, interpolation=interp), 232 | transforms.ToTensor(), 233 | self.normalize, 234 | ]) 235 | self.transform_seg = transforms.Compose([ 236 | transforms.Resize(size=seg_random_size, interpolation=interp), 237 | transforms.ToTensor(), 238 | ]) 239 | 240 | count = 0 241 | 242 | for b in batch: 243 | if self.has_seg: 244 | maps = torch.zeros(seg_random_size[0],seg_random_size[1],self.seg_num_classes) 245 | for i,m in enumerate(b[3]): 246 | cv_img = np.array(m.convert('L')) 247 | cv_img = cv2.resize(cv_img, (seg_random_size[0], seg_random_size[1]), interpolation=cv2.INTER_AREA) 248 | maps[...,i] = torch.Tensor(cv_img)/255.0 249 | #self.show_image(m,gray_img_only=True,resize=seg_random_size) 250 | seg_maps.append(maps) 251 | 252 | images.append(self.transform(b[0])) 253 | labels.append(b[1]) 254 | count = b[2] + count 255 | images = torch.stack(images, dim=0) 256 | 257 | 258 | if self.phase == 'train': 259 | if self.has_seg: 260 | seg_maps = torch.stack(seg_maps, dim=0) 261 | return images, labels, count, seg_maps 262 | else: 263 | return images, labels, count, None 264 | else : 265 | return images, labels 266 | def raw_reader(path): 267 | with open(path, 'rb') as f: 268 | bin_data = f.read() 269 | return bin_data 270 | 271 | 272 | def folder2lmdb(dataset_path, write_frequency=5000): 273 | directory = os.path.expanduser(dataset_path) 274 | print("Loading dataset from %s" % directory) 275 | 276 | with open(dataset_path, 'r') as stream: 277 | data = yaml.load(stream) 278 | print(data) 279 | classes_name = data["classes"]["map"] 280 | classes_name.insert(0, 'background') 281 | ori_classes_name = data["classes"]["original"] 282 | trainval_dataset_path = data["trainval_dataset_path"] 283 | test_dataset_path = data["test_dataset_path"] 284 | ext_img = data["extention_names"]["image"] 285 | ext_anno = data["extention_names"]["annotation"] 286 | segmentation_enable = data["segmentation_enable"] 287 | if segmentation_enable: 288 | ext_seg = data["extention_names"]["segmentation"] 289 | 290 | 291 | #print(classes_name) 292 | if segmentation_enable: 293 | trainval_dataset = \ 294 | DatasetFromFile(trainval_dataset_path['imgs'],trainval_dataset_path['annos'],trainval_dataset_path['segs'],trainval_dataset_path['lists'],classes_name, \ 295 | dataset_name=trainval_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ext_seg=ext_seg,ori_classes_name=ori_classes_name) 296 | 297 | test_dataset = \ 298 | DatasetFromFile(test_dataset_path['imgs'],test_dataset_path['annos'],test_dataset_path['segs'],test_dataset_path['lists'],classes_name, \ 299 | dataset_name=test_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ext_seg=ext_seg,ori_classes_name=ori_classes_name) 300 | else : 301 | trainval_dataset = \ 302 | DatasetFromFile(trainval_dataset_path['imgs'],trainval_dataset_path['annos'],None,trainval_dataset_path['lists'],classes_name, \ 303 | dataset_name=trainval_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ori_classes_name=ori_classes_name) 304 | 305 | test_dataset = \ 306 | DatasetFromFile(test_dataset_path['imgs'],test_dataset_path['annos'],None,test_dataset_path['lists'],classes_name, \ 307 | dataset_name=test_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ori_classes_name=ori_classes_name) 308 | outpath = trainval_dataset_path['lmdb'],test_dataset_path['lmdb'] 309 | total_set = trainval_dataset,test_dataset 310 | for i in range(len(total_set)) : 311 | data_loader = DataLoader(total_set[i], num_workers=4, collate_fn=lambda x: x) 312 | lmdb_path = os.path.expanduser(outpath[i]) 313 | 314 | if os.path.exists(lmdb_path) and os.path.isdir(lmdb_path): 315 | shutil.rmtree(lmdb_path) 316 | #print(lmdb_path) 317 | os.mkdir(lmdb_path) 318 | print("Generate LMDB to %s" % lmdb_path) 319 | db = lmdb.open(lmdb_path, subdir=True, 320 | map_size=1099511627776 * 2, readonly=False, 321 | meminit=False, map_async=True) 322 | 323 | txn = db.begin(write=True) 324 | sum = 0 325 | 326 | for idx, data in enumerate(data_loader): 327 | if segmentation_enable: 328 | image,label,seg = data[0][0],data[0][1],data[0][2] 329 | txn.put(u'{}'.format(idx).encode('ascii'), pickle.dumps((image, label, seg))) 330 | else: 331 | image,label = data[0][0],data[0][1] 332 | txn.put(u'{}'.format(idx).encode('ascii'), pickle.dumps((image, label))) 333 | sum += len(label) 334 | 335 | #txn.put(u'{}'.format(idx).encode('ascii'), pa.serialize((image, label)).to_buffer()) 336 | if idx % write_frequency == 0: 337 | print("[%d/%d]" % (idx, len(data_loader))) 338 | txn.commit() 339 | txn = db.begin(write=True) 340 | 341 | print('total box : %d'%sum) 342 | # finish iterating through dataset 343 | txn.commit() 344 | keys = [u'{}'.format(k).encode('ascii') for k in range(idx + 1)] 345 | with db.begin(write=True) as txn: 346 | txn.put(b'__keys__', pickle.dumps(keys)) 347 | txn.put(b'__len__', pickle.dumps(len(keys))) 348 | #txn.put(b'__keys__', pa.serialize(keys).to_buffer()) 349 | #txn.put(b'__len__', pa.serialize(len(keys)).to_buffer()) 350 | 351 | print("Flushing database ...") 352 | db.sync() 353 | db.close() 354 | 355 | 356 | if __name__ == "__main__": 357 | parser = argparse.ArgumentParser() 358 | parser.add_argument("-d", "--dataset", help="Path to original image dataset folder", default = 'data/voc_data.yaml') 359 | args = parser.parse_args() 360 | folder2lmdb(args.dataset) 361 | -------------------------------------------------------------------------------- /images/000166.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/000166.jpg -------------------------------------------------------------------------------- /images/001852.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/001852.jpg -------------------------------------------------------------------------------- /images/002597.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/002597.jpg -------------------------------------------------------------------------------- /images/004030.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/004030.jpg -------------------------------------------------------------------------------- /images/00690c26-e4bbbd72.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/00690c26-e4bbbd72.jpg -------------------------------------------------------------------------------- /images/show.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/show.gif -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import yaml 4 | import torch 5 | from models.mbv2_yolo import yolo 6 | import filetype 7 | from PIL import Image, ImageDraw, ImageFont 8 | import cv2 9 | import numpy as np 10 | import torchvision.transforms as transforms 11 | 12 | from datetime import datetime 13 | parser = argparse.ArgumentParser(description='YOLO Inference') 14 | parser.add_argument('-c', '--checkpoint', default='checkpoint/checkpoint.pth.tar', type=str, metavar='PATH', 15 | help='path to load checkpoint (default: checkpoint/checkpoint.pth.tar)') 16 | parser.add_argument('-e', '--export', default='', type=str, metavar='PATH', 17 | help='path to export model') 18 | parser.add_argument('-y', '--data_yaml', dest='data_yaml', default='data/voc_data.yaml', type=str, metavar='PATH', 19 | help='path to data_yaml') 20 | parser.add_argument('-i', '--input', default='images/000166.jpg', type=str, metavar='PATH', 21 | help='path to load input file') 22 | distinct_colors = ['#e6194b', '#3cb44b', '#ffe119', '#0082c8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', 23 | '#d2f53c', '#fabebe', '#008080'] 24 | 25 | def main(args): 26 | 27 | assert os.path.isfile(args.data_yaml), 'Error: no config yaml file found!' 28 | with open(args.data_yaml, 'r') as f: 29 | dataset_path = yaml.load(f) 30 | CLASSES = dataset_path["classes"]["map"] 31 | with open(dataset_path["model_config_path"], 'r') as f: 32 | config = yaml.load(f) 33 | 34 | print(config) 35 | assert os.path.isfile(args.checkpoint), 'Error: no checkpoint found!' 36 | #checkpoint = torch.load(args.checkpoint) 37 | model = yolo(config=config) 38 | model = load_model(model, args.checkpoint) 39 | #model.load_state_dict(checkpoint['model']) 40 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 41 | #model = model.cuda() 42 | model = model.to(device) 43 | 44 | model.eval() 45 | 46 | model.yolo_losses[0].val_conf = 0.3 47 | model.yolo_losses[1].val_conf = 0.3 48 | #filename = os.path.basename(args.input) 49 | filename = os.path.basename(args.input).split('.')[0] 50 | kind = filetype.guess(args.input) 51 | if kind is None: 52 | print('Cannot guess file type!') 53 | return 54 | #print('File extension: %s' % kind.extension) 55 | #print('File MIME type: %s' % kind.mime) 56 | if kind.extension in ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif'] : 57 | 58 | original_image = Image.open(args.input, mode='r') 59 | original_image = original_image.convert('RGB') 60 | annotated_image_ = cv2.cvtColor(np.asarray(original_image), cv2.COLOR_RGB2BGR) 61 | height,width = annotated_image_.shape[0],annotated_image_.shape[1] 62 | #im_pil = Image.fromarray(annotated_image_) 63 | 64 | det_boxes,seg_map = inference_image(model,original_image,device) 65 | seg_maps = list() 66 | for cls in range(seg_map.shape[0]): 67 | seg_maps.append(cv2.resize(seg_map[cls,...], (width, height), interpolation=cv2.INTER_LINEAR)) 68 | 69 | # Annotate 70 | annotated_image = original_image 71 | draw = ImageDraw.Draw(annotated_image) 72 | font = ImageFont.load_default().font 73 | # Suppress specific classes, if needed 74 | #box_location = [None]*4 75 | if det_boxes is not None : 76 | for bbox in det_boxes[0]: 77 | # print(bbox) 78 | 79 | box_location = bbox[:4].tolist() 80 | conf = bbox[4].item() 81 | cls_conf = bbox[5].item() 82 | cls_index = int(bbox[6].item()) 83 | if conf*cls_conf>0.15: 84 | box_location[0] = box_location[0]*width 85 | box_location[1] = box_location[1]*height 86 | box_location[2] = box_location[2]*width 87 | box_location[3] = box_location[3]*height 88 | draw.rectangle(xy=box_location,outline=distinct_colors[0]) 89 | # Text 90 | text_size = font.getsize(CLASSES[cls_index].upper()) 91 | text_location = [box_location[0] + 3., box_location[1] - text_size[1]] 92 | textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4., 93 | box_location[1]] 94 | draw.text(xy=text_location, text=CLASSES[cls_index].lower(), fill='white', 95 | font=font) 96 | print('save/%s_result.jpg'%filename) 97 | cv2.namedWindow('frame',cv2.WINDOW_NORMAL) 98 | cv2.resizeWindow('frame', width, height) 99 | annotated_image = cv2.cvtColor(np.asarray(annotated_image), cv2.COLOR_RGB2BGR) 100 | color_channel = [1,2] 101 | for idx,map in enumerate(seg_maps): 102 | mask = map>0.5 103 | annotated_image[...,color_channel[idx]][mask] = annotated_image[...,color_channel[idx]][mask]*(1.0 - map[mask]) 104 | cv2.imwrite('save/%s_result.jpg'%filename,annotated_image) 105 | cv2.imshow('frame',annotated_image) 106 | key = cv2.waitKey(0) 107 | 108 | 109 | def inference_image(model, original_image,device): 110 | # Transforms 111 | transform_test = transforms.Compose([ 112 | transforms.Resize(size=(416,416), interpolation=2), 113 | transforms.ToTensor(), 114 | transforms.Normalize((0.5, 0.5, 0.5), (1, 1, 1)), 115 | ]) 116 | # Transform 117 | image = transform_test(original_image) 118 | image = image.to(device) 119 | # Move to default device 120 | start = datetime.now().timestamp() 121 | detections = model(image.unsqueeze(0)) # (N, num_defaultBoxes, 4), (N, num_defaultBoxes, n_classes) 122 | end =datetime.now().timestamp() 123 | c3 = (end - start) 124 | print("model inference time : ", c3*1000, "ms") 125 | 126 | return detections 127 | def load_model(model, path_trained_weight): 128 | checkpoint_backbone = torch.load(path_trained_weight) 129 | 130 | pretrained_dict = checkpoint_backbone.state_dict() 131 | 132 | model_dict = model.state_dict() 133 | #for k, v in model_dict.items() : 134 | #if k[9:] in model_dict : 135 | # print (k) 136 | # 1. filter out unnecessary keys 137 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 138 | if len(pretrained_dict.keys()) == 0: 139 | print('loading pretrain weight fail:{} '.format(path_trained_weight)) 140 | input("Cont?") 141 | #print(pretrained_dict.keys()) 142 | #print(model_dict.keys()) 143 | # 2. overwrite entries in the existing state dict 144 | model_dict.update(pretrained_dict) 145 | # 3. load the new state dict 146 | model.load_state_dict(model_dict) 147 | print("loaded the trained weights from {}".format(path_trained_weight)) 148 | return model 149 | if __name__ == '__main__': 150 | args = parser.parse_args() 151 | main(args) -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/models/__init__.py -------------------------------------------------------------------------------- /models/bdd100k/config.yaml: -------------------------------------------------------------------------------- 1 | img_h: 416 2 | img_w: 416 3 | batch_size: 32 4 | train_img_size: 5 | - [416, 416] 6 | expand_scale: 1.3 7 | mosaic_num: [1] 8 | iou_weighting: 0.02 9 | normalize: 10 | mean: [0.5, 0.5, 0.5] 11 | std: [1,1,1] 12 | yolo: 13 | num_classes: 7 14 | num_anchors: 3 15 | ignore_thresh: [0.6, 0.55] 16 | iou_thresh: 0.6 17 | anchors: 18 | - [34, 47] 19 | - [66, 93] 20 | - [122, 182] 21 | - [6, 11] 22 | - [11, 43] 23 | - [16, 22] 24 | classes: 7 25 | mask: 26 | - [0, 1, 2] 27 | - [3, 4, 5] 28 | seg: 29 | num_classes: 2 -------------------------------------------------------------------------------- /models/mbv2_yolo.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | from models.mobilenetv2 import mobilenetv2 6 | from models.yolo_loss import * 7 | from models.seg_loss import SegLoss 8 | from torch.nn import init 9 | import yaml 10 | from utils.box import nms 11 | try: 12 | from torch.hub import load_state_dict_from_url 13 | except ImportError: 14 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 15 | 16 | class BasicConv(nn.Module): 17 | def __init__(self, in_channels, out_channels, kernel_size, stride=1,depthwise=False): 18 | super(BasicConv, self).__init__() 19 | if depthwise == False : 20 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False) 21 | else : 22 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False,groups = in_channels) 23 | self.bn = nn.BatchNorm2d(out_channels) 24 | self.activation = nn.LeakyReLU(0.1) 25 | self._initialize_weights() 26 | 27 | def forward(self, x): 28 | x = self.conv(x) 29 | x = self.bn(x) 30 | x = self.activation(x) 31 | return x 32 | def _initialize_weights(self): 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv2d): 35 | init.kaiming_normal_(m.weight, mode='fan_out') 36 | if m.bias is not None: 37 | init.constant_(m.bias, 0) 38 | elif isinstance(m, nn.BatchNorm2d): 39 | init.constant_(m.weight, 1) 40 | init.constant_(m.bias, 0) 41 | elif isinstance(m, nn.Linear): 42 | init.normal_(m.weight, std=0.001) 43 | if m.bias is not None: 44 | init.constant_(m.bias, 0) 45 | 46 | class Upsample(nn.Module): 47 | def __init__(self): 48 | super(Upsample, self).__init__() 49 | 50 | self.upsample = nn.Sequential( 51 | #BasicConv(in_channels, out_channels, 1), 52 | nn.Upsample(scale_factor=2, mode='nearest') 53 | ) 54 | 55 | def forward(self, x,): 56 | x = self.upsample(x) 57 | return x 58 | def PartAdd(x,y): 59 | if x.size(1) == y.size(1): 60 | return x+y 61 | len = min(x.size(1),y.size(1)) 62 | new_1 = x[:,:len,...] + y[:,:len,...] 63 | if y.size(1) > x.size(1): 64 | new_2 = y[:,len:,...] 65 | else: 66 | new_2 = x[:,len:,...] 67 | new = torch.cat((new_1,new_2),1) 68 | 69 | return new 70 | def DepthwiseConvolution(in_filters,out_filters): 71 | m = nn.Sequential( 72 | BasicConv(in_filters, in_filters, 3,depthwise=True), 73 | BasicConv(in_filters, in_filters, 1), 74 | BasicConv(in_filters, out_filters, 1 ), 75 | ) 76 | return m 77 | def yolo_head(filters_list, in_filters): 78 | m = nn.Sequential( 79 | BasicConv(in_filters, in_filters, 3,depthwise=True), 80 | BasicConv(in_filters, in_filters, 1), 81 | BasicConv(in_filters, filters_list[0], 1), 82 | nn.Conv2d(filters_list[0], filters_list[1], 1), 83 | ) 84 | return m 85 | def seg_head(filters_list, in_filters): 86 | m = nn.Sequential( 87 | BasicConv(in_filters, in_filters, 3,depthwise=True), 88 | BasicConv(in_filters, in_filters, 1), 89 | BasicConv(in_filters, filters_list[0], 1), 90 | nn.Conv2d(filters_list[0], filters_list[1], 1), 91 | ) 92 | return m 93 | class Connect(nn.Module): 94 | def __init__(self, channels): 95 | super(Connect, self).__init__() 96 | 97 | self.conv = nn.Sequential( 98 | BasicConv(channels, channels, 3,depthwise=True), 99 | BasicConv(channels, channels, 1 ), 100 | ) 101 | def forward(self, x,): 102 | x2 = self.conv(x) 103 | x = torch.add(x,x2) 104 | return x 105 | class yolo(nn.Module): 106 | def __init__(self,config): 107 | super(yolo, self).__init__() 108 | self.num_classes = config["yolo"]["num_classes"] 109 | self.num_anchors = config["yolo"]["num_anchors"] 110 | self.seg_loss = None 111 | if "seg" in config: 112 | self.seg_num_classes = config["seg"]["num_classes"] 113 | self.seg_headS16 = seg_head([32, self.seg_num_classes], 32) 114 | self.seg_loss = SegLoss(self.seg_num_classes) 115 | # backbone 116 | model_url = 'https://raw.githubusercontent.com/d-li14/mobilenetv2.pytorch/master/pretrained/mobilenetv2-c5e733a8.pth' 117 | self.backbone = mobilenetv2(model_url) 118 | 119 | self.conv_for_S32 = BasicConv(1280,512,1) 120 | #print(num_anchors * (5 + num_classes)) 121 | self.connect_for_S32 = Connect(512) 122 | self.yolo_headS32 = yolo_head([1024, self.num_anchors * (5 + self.num_classes)],512) 123 | 124 | 125 | self.upsample = Upsample() 126 | self.conv_for_S16 = DepthwiseConvolution(96,512) 127 | self.seg_conv_for_S16 = DepthwiseConvolution(96,32) 128 | self.connect_for_S16 = Connect(512) 129 | self.seg_connect_for_S16 = Connect(32) 130 | self.yolo_headS16 = yolo_head([512, self.num_anchors * (5 + self.num_classes)],512) 131 | 132 | self.yolo_losses = [] 133 | for i in range(2): 134 | self.yolo_losses.append(YOLOLoss(config["yolo"]["anchors"],config["yolo"]["mask"][i] \ 135 | ,self.num_classes,[config["img_w"],config["img_h"]],config["yolo"]["ignore_thresh"][i],config["yolo"]["iou_thresh"],iou_weighting=config["iou_weighting"])) 136 | 137 | def forward(self, x, targets=None, seg_maps=None): 138 | 139 | for i in range(2): 140 | self.yolo_losses[i].img_size = [x.size(2),x.size(3)] 141 | feature1, feature2 = self.backbone(x) 142 | S32 = self.conv_for_S32(feature2) 143 | S32 = self.connect_for_S32(S32) 144 | out0 = self.yolo_headS32(S32) 145 | S32_Upsample = self.upsample(S32) 146 | S16 = self.conv_for_S16(feature1) 147 | S16 = self.connect_for_S16(S16) 148 | #S16 = self.blending(S16,S32_Upsample) 149 | #S16 = PartAdd(S16,S32_Upsample) 150 | #print(S16.shape) 151 | S16 = torch.add(S16,S32_Upsample) 152 | 153 | out1 = self.yolo_headS16(S16) 154 | 155 | S16_branch = self.seg_conv_for_S16(feature1) 156 | S16_branch = self.seg_connect_for_S16(S16_branch) 157 | 158 | output = self.yolo_losses[0](out0,targets),self.yolo_losses[1](out1,targets) 159 | if targets == None : 160 | output = nms(output,self.num_classes) 161 | if self.seg_loss!=None: 162 | out2 = self.seg_headS16(S16_branch) 163 | seg_out = self.seg_loss(out2) 164 | return output,seg_out 165 | else: 166 | return output 167 | else: 168 | if self.seg_loss!=None: 169 | out2 = self.seg_headS16(S16_branch) 170 | seg_out = self.seg_loss(out2,seg_maps) 171 | return output,seg_out 172 | else: 173 | return output 174 | 175 | 176 | 177 | #def test(): 178 | # net = yolo(3,20) 179 | # print(net) 180 | 181 | #test() -------------------------------------------------------------------------------- /models/mbv3_yolo.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | from models.voc.mobilenetv3 import MobileNetV3 6 | from models.voc.yolo_loss import * 7 | from torch.nn import init 8 | from utils.box import nms 9 | 10 | import yaml 11 | try: 12 | from torch.hub import load_state_dict_from_url 13 | except ImportError: 14 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 15 | 16 | class BasicConv(nn.Module): 17 | def __init__(self, in_channels, out_channels, kernel_size, stride=1,depthwise=False): 18 | super(BasicConv, self).__init__() 19 | if depthwise == False : 20 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False) 21 | else : 22 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False,groups = in_channels) 23 | self.bn = nn.BatchNorm2d(out_channels) 24 | self.activation = nn.LeakyReLU(0.1) 25 | self._initialize_weights() 26 | 27 | def forward(self, x): 28 | x = self.conv(x) 29 | x = self.bn(x) 30 | x = self.activation(x) 31 | return x 32 | def _initialize_weights(self): 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv2d): 35 | init.kaiming_normal_(m.weight, mode='fan_out') 36 | if m.bias is not None: 37 | init.constant_(m.bias, 0) 38 | elif isinstance(m, nn.BatchNorm2d): 39 | init.constant_(m.weight, 1) 40 | init.constant_(m.bias, 0) 41 | elif isinstance(m, nn.Linear): 42 | init.normal_(m.weight, std=0.001) 43 | if m.bias is not None: 44 | init.constant_(m.bias, 0) 45 | class Upsample(nn.Module): 46 | def __init__(self): 47 | super(Upsample, self).__init__() 48 | 49 | self.upsample = nn.Sequential( 50 | #BasicConv(in_channels, out_channels, 1), 51 | nn.Upsample(scale_factor=2, mode='nearest') 52 | ) 53 | 54 | def forward(self, x,): 55 | x = self.upsample(x) 56 | return x 57 | def DepthwiseConvolution(in_filters,out_filters): 58 | m = nn.Sequential( 59 | BasicConv(in_filters, in_filters, 3,depthwise=True), 60 | BasicConv(in_filters, in_filters, 1), 61 | BasicConv(in_filters, out_filters, 1), 62 | ) 63 | return m 64 | def yolo_head(filters_list, in_filters): 65 | m = nn.Sequential( 66 | BasicConv(in_filters, in_filters, 3,depthwise=True), 67 | BasicConv(in_filters, in_filters, 1), 68 | BasicConv(in_filters, filters_list[0], 1), 69 | nn.Conv2d(filters_list[0], filters_list[1], 1), 70 | ) 71 | return m 72 | class Connect(nn.Module): 73 | def __init__(self, channels): 74 | super(Connect, self).__init__() 75 | 76 | self.conv = nn.Sequential( 77 | BasicConv(channels, channels, 3,depthwise=True), 78 | BasicConv(channels, channels, 1), 79 | ) 80 | 81 | def forward(self, x,): 82 | x2 = self.conv(x) 83 | x = torch.add(x,x2) 84 | return x 85 | def PartAdd(x,y): 86 | if x.size(1) == y.size(1): 87 | return x+y 88 | len = min(x.size(1),y.size(1)) 89 | new_1 = x[:,:len,...] + y[:,:len,...] 90 | if y.size(1) > x.size(1): 91 | new_2 = y[:,len:,...] 92 | else: 93 | new_2 = x[:,len:,...] 94 | new = torch.cat((new_1,new_2),1) 95 | 96 | return new 97 | class yolo(nn.Module): 98 | def __init__(self,config): 99 | super(yolo, self).__init__() 100 | self.num_classes = config["yolo"]["num_classes"] 101 | self.num_anchors = config["yolo"]["num_anchors"] 102 | # backbone 103 | # https://drive.google.com/file/d/1HYPqCM1t8GDj9HnImKitM-QqdR8InxGB/view?usp=sharing 104 | self.backbone = MobileNetV3('mbv3_large.old.pth.tar') 105 | 106 | self.conv_for_S32 = DepthwiseConvolution(960,320) 107 | #print(num_anchors * (5 + num_classes)) 108 | self.connect_for_S32 = Connect(320) 109 | self.yolo_headS32 = yolo_head([960, self.num_anchors * (5 + self.num_classes)],320) 110 | 111 | 112 | self.upsample = Upsample() 113 | #self.conv_for_S16 = Connect(160) 114 | self.connect_for_S16 = Connect(160) 115 | self.yolo_headS16 = yolo_head([640, self.num_anchors * (5 + self.num_classes)],320) 116 | 117 | self.yolo_losses = [] 118 | for i in range(2): 119 | self.yolo_losses.append(YOLOLoss(config["yolo"]["anchors"],config["yolo"]["mask"][i] \ 120 | ,self.num_classes,[config["img_w"],config["img_h"]],config["yolo"]["ignore_thresh"][i],config["yolo"]["iou_thresh"],iou_weighting=config["iou_weighting"])) 121 | 122 | 123 | def forward(self, x, targets=None): 124 | 125 | for i in range(2): 126 | self.yolo_losses[i].img_size = [x.size(2),x.size(3)] 127 | feature1, feature2 = self.backbone(x) 128 | S32 = self.conv_for_S32(feature2) 129 | S32 = self.connect_for_S32(S32) 130 | out0 = self.yolo_headS32(S32) 131 | S32_Upsample = self.upsample(S32) 132 | #S16 = self.conv_for_S16(feature1) 133 | S16 = self.connect_for_S16(feature1) 134 | S16 = self.connect_for_S16(S16) 135 | S16 = PartAdd(S16,S32_Upsample) 136 | #S16 = torch.add(S16,S32_Upsample) 137 | 138 | out1 = self.yolo_headS16(S16) 139 | 140 | output = self.yolo_losses[0](out0,targets),self.yolo_losses[1](out1,targets) 141 | if targets == None : 142 | output = nms(output,self.num_classes) 143 | 144 | 145 | return output 146 | 147 | #def test(): 148 | # net = yolo(3,20) 149 | # print(net) 150 | 151 | #test() -------------------------------------------------------------------------------- /models/mbv3_yolo_macc.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | from models.voc.mobilenetv3 import MobileNetV3 6 | from models.voc.yolo_loss import * 7 | from torch.nn import init 8 | from utils.box import nms 9 | 10 | import yaml 11 | try: 12 | from torch.hub import load_state_dict_from_url 13 | except ImportError: 14 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 15 | 16 | class BasicConv(nn.Module): 17 | def __init__(self, in_channels, out_channels, kernel_size, stride=1,depthwise=False): 18 | super(BasicConv, self).__init__() 19 | if depthwise == False : 20 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False) 21 | else : 22 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False,groups = in_channels) 23 | self.bn = nn.BatchNorm2d(out_channels) 24 | self.activation = nn.LeakyReLU(0.1) 25 | self._initialize_weights() 26 | 27 | def forward(self, x): 28 | x = self.conv(x) 29 | x = self.bn(x) 30 | x = self.activation(x) 31 | return x 32 | def _initialize_weights(self): 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv2d): 35 | init.kaiming_normal_(m.weight, mode='fan_out') 36 | if m.bias is not None: 37 | init.constant_(m.bias, 0) 38 | elif isinstance(m, nn.BatchNorm2d): 39 | init.constant_(m.weight, 1) 40 | init.constant_(m.bias, 0) 41 | elif isinstance(m, nn.Linear): 42 | init.normal_(m.weight, std=0.001) 43 | if m.bias is not None: 44 | init.constant_(m.bias, 0) 45 | class Upsample(nn.Module): 46 | def __init__(self, in_channels, out_channels): 47 | super(Upsample, self).__init__() 48 | 49 | self.upsample = nn.Sequential( 50 | BasicConv(in_channels, out_channels, 1), 51 | nn.Upsample(scale_factor=2, mode='nearest') 52 | ) 53 | 54 | def forward(self, x,): 55 | x = self.upsample(x) 56 | return x 57 | def DepthwiseConvolution(in_filters,out_filters): 58 | m = nn.Sequential( 59 | BasicConv(in_filters, in_filters, 3,depthwise=True), 60 | BasicConv(in_filters, in_filters, 1), 61 | BasicConv(in_filters, out_filters, 1), 62 | ) 63 | return m 64 | def yolo_head(filters_list, in_filters): 65 | m = nn.Sequential( 66 | BasicConv(in_filters, in_filters, 3,depthwise=True), 67 | BasicConv(in_filters, in_filters, 1), 68 | BasicConv(in_filters, filters_list[0], 1), 69 | nn.Conv2d(filters_list[0], filters_list[1], 1), 70 | ) 71 | return m 72 | class Connect(nn.Module): 73 | def __init__(self, channels): 74 | super(Connect, self).__init__() 75 | 76 | self.conv = nn.Sequential( 77 | BasicConv(channels, channels, 3,depthwise=True), 78 | BasicConv(channels, channels, 1), 79 | ) 80 | 81 | def forward(self, x,): 82 | x2 = self.conv(x) 83 | x = torch.add(x,x2) 84 | return x 85 | def PartAdd(x,y): 86 | if x.size(1) == y.size(1): 87 | return x+y 88 | len = min(x.size(1),y.size(1)) 89 | new_1 = x[:,:len,...] + y[:,:len,...] 90 | if y.size(1) > x.size(1): 91 | new_2 = y[:,len:,...] 92 | else: 93 | new_2 = x[:,len:,...] 94 | new = torch.cat((new_1,new_2),1) 95 | 96 | return new 97 | class yolo_graph(nn.Module): 98 | def __init__(self,config): 99 | super(yolo_graph, self).__init__() 100 | self.num_classes = config["yolo"]["num_classes"] 101 | self.num_anchors = config["yolo"]["num_anchors"] 102 | # backbone 103 | # https://drive.google.com/file/d/1HYPqCM1t8GDj9HnImKitM-QqdR8InxGB/view?usp=sharing 104 | self.backbone = MobileNetV3('mbv3_large.old.pth.tar') 105 | 106 | self.conv_for_S32 = BasicConv(960,512,1) 107 | #print(num_anchors * (5 + num_classes)) 108 | self.connect_for_S32 = Connect(512) 109 | self.yolo_headS32 = yolo_head([1024, self.num_anchors * (5 + self.num_classes)],512) 110 | 111 | 112 | self.upsample = Upsample(512,256) 113 | self.conv_for_S16 = DepthwiseConvolution(160,256) 114 | self.connect_for_S16 = Connect(256) 115 | self.yolo_headS16 = yolo_head([512, self.num_anchors * (5 + self.num_classes)],256) 116 | 117 | 118 | 119 | 120 | def forward(self, x, targets=None): 121 | 122 | 123 | feature1, feature2 = self.backbone(x) 124 | S32 = self.conv_for_S32(feature2) 125 | S32 = self.connect_for_S32(S32) 126 | out0 = self.yolo_headS32(S32) 127 | S32_Upsample = self.upsample(S32) 128 | S16 = self.conv_for_S16(feature1) 129 | #S16 = PartAdd(S16,S32_Upsample) 130 | S16 = torch.add(S16,S32_Upsample) 131 | S16 = self.connect_for_S16(S16) 132 | out1 = self.yolo_headS16(S16) 133 | 134 | 135 | 136 | 137 | return out0,out1 138 | 139 | #def test(): 140 | # net = yolo(3,20) 141 | # print(net) 142 | 143 | #test() -------------------------------------------------------------------------------- /models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creates a MobileNetV2 Model as defined in: 3 | Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. (2018). 4 | MobileNetV2: Inverted Residuals and Linear Bottlenecks 5 | arXiv preprint arXiv:1801.04381. 6 | import from https://github.com/tonylins/pytorch-mobilenet-v2 7 | """ 8 | 9 | import torch.nn as nn 10 | import math 11 | import torch 12 | __all__ = ['mobilenetv2'] 13 | try: 14 | from torch.hub import load_state_dict_from_url 15 | except ImportError: 16 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 17 | 18 | def _make_divisible(v, divisor, min_value=None): 19 | """ 20 | This function is taken from the original tf repo. 21 | It ensures that all layers have a channel number that is divisible by 8 22 | It can be seen here: 23 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 24 | :param v: 25 | :param divisor: 26 | :param min_value: 27 | :return: 28 | """ 29 | if min_value is None: 30 | min_value = divisor 31 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 32 | # Make sure that round down does not go down by more than 10%. 33 | if new_v < 0.9 * v: 34 | new_v += divisor 35 | return new_v 36 | 37 | 38 | def conv_3x3_bn(inp, oup, stride): 39 | return nn.Sequential( 40 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 41 | nn.BatchNorm2d(oup), 42 | nn.ReLU6(inplace=True) 43 | ) 44 | 45 | 46 | def conv_1x1_bn(inp, oup): 47 | return nn.Sequential( 48 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 49 | nn.BatchNorm2d(oup), 50 | nn.ReLU6(inplace=True) 51 | ) 52 | 53 | 54 | class InvertedResidual(nn.Module): 55 | def __init__(self, inp, oup, stride, expand_ratio): 56 | super(InvertedResidual, self).__init__() 57 | assert stride in [1, 2] 58 | 59 | hidden_dim = round(inp * expand_ratio) 60 | self.identity = stride == 1 and inp == oup 61 | 62 | if expand_ratio == 1: 63 | self.conv = nn.Sequential( 64 | # dw 65 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 66 | nn.BatchNorm2d(hidden_dim), 67 | nn.ReLU6(inplace=True), 68 | # pw-linear 69 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 70 | nn.BatchNorm2d(oup), 71 | ) 72 | else: 73 | self.conv = nn.Sequential( 74 | # pw 75 | nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False), 76 | nn.BatchNorm2d(hidden_dim), 77 | nn.ReLU6(inplace=True), 78 | # dw 79 | nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False), 80 | nn.BatchNorm2d(hidden_dim), 81 | nn.ReLU6(inplace=True), 82 | # pw-linear 83 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 84 | nn.BatchNorm2d(oup), 85 | ) 86 | 87 | def forward(self, x): 88 | if self.identity: 89 | return x + self.conv(x) 90 | else: 91 | return self.conv(x) 92 | 93 | 94 | class MobileNetV2(nn.Module): 95 | def __init__(self, num_classes=1000, width_mult=1.): 96 | super(MobileNetV2, self).__init__() 97 | # setting of inverted residual blocks 98 | self.cfgs1 = [ 99 | # t, c, n, s 100 | [1, 16, 1, 1], 101 | [6, 24, 2, 2], 102 | [6, 32, 3, 2], 103 | [6, 64, 4, 2], 104 | [6, 96, 3, 1], 105 | ] 106 | self.cfgs2 = [ 107 | # t, c, n, s 108 | [6, 160, 3, 2], 109 | [6, 320, 1, 1], 110 | ] 111 | # building first layer 112 | input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8) 113 | layers = [conv_3x3_bn(3, input_channel, 2)] 114 | # building inverted residual blocks 115 | block = InvertedResidual 116 | for t, c, n, s in self.cfgs1: 117 | output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8) 118 | for i in range(n): 119 | layers.append(block(input_channel, output_channel, s if i == 0 else 1, t)) 120 | input_channel = output_channel 121 | self.features = nn.Sequential(*layers) 122 | layers2 = list() 123 | for t, c, n, s in self.cfgs2: 124 | output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8) 125 | for i in range(n): 126 | layers2.append(block(input_channel, output_channel, s if i == 0 else 1, t)) 127 | input_channel = output_channel 128 | self.features2 = nn.Sequential(*layers2) 129 | # building last several layers 130 | output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280 131 | self.conv = conv_1x1_bn(input_channel, output_channel) 132 | #self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 133 | #self.classifier = nn.Linear(output_channel, num_classes) 134 | 135 | self._initialize_weights() 136 | 137 | def forward(self, x): 138 | x1 = self.features(x) 139 | x2 = self.features2(x1) 140 | x2 = self.conv(x2) 141 | #x2 = self.avgpool(x2) 142 | #x2 = x.view(x2.size(0), -1) 143 | #x2 = self.classifier(x2) 144 | return x1,x2 145 | 146 | def _initialize_weights(self): 147 | for m in self.modules(): 148 | if isinstance(m, nn.Conv2d): 149 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 150 | m.weight.data.normal_(0, math.sqrt(2. / n)) 151 | if m.bias is not None: 152 | m.bias.data.zero_() 153 | elif isinstance(m, nn.BatchNorm2d): 154 | m.weight.data.fill_(1) 155 | m.bias.data.zero_() 156 | elif isinstance(m, nn.Linear): 157 | m.weight.data.normal_(0, 0.01) 158 | m.bias.data.zero_() 159 | 160 | def mobilenetv2(pretrained, **kwargs): 161 | model = MobileNetV2() 162 | if pretrained: 163 | model_dict = model.state_dict() 164 | checkpoint = load_state_dict_from_url(pretrained,progress=True) 165 | #pretrained_dict = torch.load(pretrained)['state_dict'] 166 | 167 | for k1, v1 in checkpoint.items() : 168 | n1 = k1.replace('module.', '') 169 | #print(k1) 170 | 171 | for k2, v2 in model_dict.items() : 172 | n2 = k2.replace('features2.0.','features.14.') 173 | n2 = n2.replace('features2.1.','features.15.') 174 | n2 = n2.replace('features2.2.','features.16.') 175 | n2 = n2.replace('features2.3.','features.17.') 176 | #print(n1,' , ',n2) 177 | if n1 == n2 : 178 | #print(k1,' , ',k2) 179 | model_dict[k2]=v1 180 | 181 | model.load_state_dict(model_dict) 182 | #torch.save(model, 'test.pth.tar') 183 | else: 184 | raise Exception("darknet request a pretrained path. got [{}]".format(pretrained)) 185 | return model 186 | def test(): 187 | model_url = 'https://raw.githubusercontent.com/d-li14/mobilenetv2.pytorch/master/pretrained/mobilenetv2-c5e733a8.pth' 188 | net = mobilenetv2(model_url) 189 | #print(net) 190 | x = torch.randn(2,3,224,224) 191 | y1,y2 = net(x) 192 | print(y2.shape) 193 | #test() -------------------------------------------------------------------------------- /models/mobilenetv3.py: -------------------------------------------------------------------------------- 1 | '''MobileNetV3 in PyTorch. 2 | See the paper "Inverted Residuals and Linear Bottlenecks: 3 | Mobile Networks for Classification, Detection and Segmentation" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.nn import init 9 | 10 | try: 11 | from torch.hub import load_state_dict_from_url 12 | except ImportError: 13 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 14 | class hswish(nn.Module): 15 | def forward(self, x): 16 | out = x * F.relu6(x + 3, inplace=True) / 6 17 | return out 18 | 19 | 20 | class hsigmoid(nn.Module): 21 | def forward(self, x): 22 | out = F.relu6(x + 3, inplace=True) / 6 23 | return out 24 | 25 | 26 | class SeModule(nn.Module): 27 | def __init__(self, in_size, reduction=4): 28 | super(SeModule, self).__init__() 29 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 30 | 31 | self.se = nn.Sequential( 32 | nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False), 33 | nn.BatchNorm2d(in_size // reduction), 34 | nn.ReLU(inplace=True), 35 | nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False), 36 | nn.BatchNorm2d(in_size), 37 | hsigmoid() 38 | ) 39 | 40 | def forward(self, x): 41 | return x * self.se(x) 42 | 43 | 44 | class Block(nn.Module): 45 | '''expand + depthwise + pointwise''' 46 | def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride): 47 | super(Block, self).__init__() 48 | self.stride = stride 49 | self.se = semodule 50 | 51 | self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False) 52 | self.bn1 = nn.BatchNorm2d(expand_size) 53 | self.nolinear1 = nolinear 54 | self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False) 55 | self.bn2 = nn.BatchNorm2d(expand_size) 56 | self.nolinear2 = nolinear 57 | self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False) 58 | self.bn3 = nn.BatchNorm2d(out_size) 59 | 60 | self.shortcut = nn.Sequential() 61 | if stride == 1 and in_size != out_size: 62 | self.shortcut = nn.Sequential( 63 | nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False), 64 | nn.BatchNorm2d(out_size), 65 | ) 66 | 67 | def forward(self, x): 68 | out = self.nolinear1(self.bn1(self.conv1(x))) 69 | out = self.nolinear2(self.bn2(self.conv2(out))) 70 | out = self.bn3(self.conv3(out)) 71 | if self.se != None: 72 | out = self.se(out) 73 | out = out + self.shortcut(x) if self.stride==1 else out 74 | return out 75 | 76 | 77 | class MobileNetV3_Large(nn.Module): 78 | def __init__(self, num_classes=1000): 79 | super(MobileNetV3_Large, self).__init__() 80 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False) 81 | self.bn1 = nn.BatchNorm2d(16) 82 | self.hs1 = hswish() 83 | 84 | self.bneck = nn.Sequential( 85 | Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1), 86 | Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2), 87 | Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1), 88 | Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2), 89 | Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1), 90 | Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1), 91 | Block(3, 40, 240, 80, hswish(), None, 2), 92 | Block(3, 80, 200, 80, hswish(), None, 1), 93 | Block(3, 80, 184, 80, hswish(), None, 1), 94 | Block(3, 80, 184, 80, hswish(), None, 1), 95 | Block(3, 80, 480, 112, hswish(), SeModule(112), 1), 96 | Block(3, 112, 672, 112, hswish(), SeModule(112), 1), 97 | Block(5, 112, 672, 160, hswish(), SeModule(160), 1), 98 | ) 99 | self.bneck2 = nn.Sequential( 100 | Block(5, 160, 672, 160, hswish(), SeModule(160), 2), 101 | Block(5, 160, 960, 160, hswish(), SeModule(160), 1), 102 | ) 103 | 104 | self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False) 105 | self.bn2 = nn.BatchNorm2d(960) 106 | self.hs2 = hswish() 107 | #self.linear3 = nn.Linear(960, 1280) 108 | #self.bn3 = nn.BatchNorm1d(1280) 109 | #self.hs3 = hswish() 110 | #self.linear4 = nn.Linear(1280, num_classes) 111 | self.init_params() 112 | 113 | def init_params(self): 114 | for m in self.modules(): 115 | if isinstance(m, nn.Conv2d): 116 | init.kaiming_normal_(m.weight, mode='fan_out') 117 | if m.bias is not None: 118 | init.constant_(m.bias, 0) 119 | elif isinstance(m, nn.BatchNorm2d): 120 | init.constant_(m.weight, 1) 121 | init.constant_(m.bias, 0) 122 | elif isinstance(m, nn.Linear): 123 | init.normal_(m.weight, std=0.001) 124 | if m.bias is not None: 125 | init.constant_(m.bias, 0) 126 | 127 | def forward(self, x): 128 | out = self.hs1(self.bn1(self.conv1(x))) 129 | out0 = self.bneck(out) 130 | out1 = self.bneck2(out0) 131 | out1 = self.hs2(self.bn2(self.conv2(out1))) 132 | #out1 = F.avg_pool2d(out1, 7) 133 | #out1 = out1.view(out1.size(0), -1) 134 | #out1 = self.hs3(self.bn3(self.linear3(out1))) 135 | #out1 = self.linear4(out1) 136 | return out0,out1 137 | 138 | 139 | 140 | class MobileNetV3_Small(nn.Module): 141 | def __init__(self, num_classes=1000): 142 | super(MobileNetV3_Small, self).__init__() 143 | self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False) 144 | self.bn1 = nn.BatchNorm2d(16) 145 | self.hs1 = hswish() 146 | 147 | self.bneck1 = nn.Sequential( 148 | Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2), 149 | Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2), 150 | Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1), 151 | Block(5, 24, 96, 40, hswish(), SeModule(40), 2), 152 | Block(5, 40, 240, 40, hswish(), SeModule(40), 1), 153 | Block(5, 40, 240, 40, hswish(), SeModule(40), 1), 154 | Block(5, 40, 120, 48, hswish(), SeModule(48), 1), 155 | Block(5, 48, 144, 48, hswish(), SeModule(48), 1), 156 | 157 | ) 158 | self.bneck2 = nn.Sequential( 159 | Block(5, 48, 288, 96, hswish(), SeModule(96), 2), 160 | Block(5, 96, 576, 96, hswish(), SeModule(96), 1), 161 | Block(5, 96, 576, 96, hswish(), SeModule(96), 1), 162 | ) 163 | 164 | self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False) 165 | self.bn2 = nn.BatchNorm2d(576) 166 | self.hs2 = hswish() 167 | #self.linear3 = nn.Linear(576, 1280) 168 | #self.bn3 = nn.BatchNorm1d(1280) 169 | #self.hs3 = hswish() 170 | #self.linear4 = nn.Linear(1280, num_classes) 171 | self.init_params() 172 | 173 | def init_params(self): 174 | for m in self.modules(): 175 | if isinstance(m, nn.Conv2d): 176 | init.kaiming_normal_(m.weight, mode='fan_out') 177 | if m.bias is not None: 178 | init.constant_(m.bias, 0) 179 | elif isinstance(m, nn.BatchNorm2d): 180 | init.constant_(m.weight, 1) 181 | init.constant_(m.bias, 0) 182 | elif isinstance(m, nn.Linear): 183 | init.normal_(m.weight, std=0.001) 184 | if m.bias is not None: 185 | init.constant_(m.bias, 0) 186 | 187 | def forward(self, x): 188 | out = self.hs1(self.bn1(self.conv1(x))) 189 | out = self.bneck1(out) 190 | out = self.bneck2(out) 191 | out = self.hs2(self.bn2(self.conv2(out))) 192 | #out = F.avg_pool2d(out, 7) 193 | #out = out.view(out.size(0), -1) 194 | #out = self.hs3(self.bn3(self.linear3(out))) 195 | #out = self.linear4(out) 196 | return out 197 | 198 | 199 | def MobileNetV3(pretrained, **kwargs): 200 | model = MobileNetV3_Large() 201 | if pretrained: 202 | if isinstance(pretrained, str): 203 | model_dict = model.state_dict() 204 | #model.load_state_dict(torch.load(pretrained)['state_dict']) 205 | pretrained_dict = torch.load(pretrained)['state_dict'] 206 | 207 | 208 | for k1, v1 in pretrained_dict.items() : 209 | n1 = k1.replace('module.', '') 210 | #print(k1) 211 | 212 | for k2, v2 in model_dict.items() : 213 | n2 = k2.replace('bneck2.0.', 'bneck.13.') 214 | n2 = n2.replace('bneck2.1.', 'bneck.14.') 215 | if n1 == n2 : 216 | #print(k1,k2) 217 | model_dict[k2]=v1 218 | 219 | model.load_state_dict(model_dict) 220 | #torch.save(model, 'test.pth.tar') 221 | #for name,param in model.named_parameters(): 222 | else: 223 | raise Exception("darknet request a pretrained path. got [{}]".format(pretrained)) 224 | return model 225 | 226 | def test(): 227 | net = MobileNetV3_Small() 228 | x = torch.randn(2,3,224,224) 229 | y = net(x) 230 | print(y.size()) 231 | 232 | # test() -------------------------------------------------------------------------------- /models/seg_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import math 5 | from utils import AverageMeter 6 | from utils.iou import * 7 | from torch.autograd import Function 8 | import gc 9 | use_cuda = torch.cuda.is_available() 10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 11 | import torchvision 12 | import cv2 13 | 14 | class SegLoss(nn.Module): 15 | class sigmoid(Function): 16 | @staticmethod 17 | def forward(ctx, input): 18 | #ctx.save_for_backward(input) 19 | sigmoid_eval = 1.0/(1.0 + torch.exp(-input)) 20 | #input = sigmoid_eval 21 | return sigmoid_eval 22 | 23 | @staticmethod 24 | def backward(ctx, grad_output): 25 | #input, = ctx.saved_tensors 26 | #print(grad_output) 27 | # Maximum likelihood and gradient descent demonstration 28 | # https://blog.csdn.net/yanzi6969/article/details/80505421 29 | # https://xmfbit.github.io/2018/03/21/cs229-supervised-learning/ 30 | # https://zlatankr.github.io/posts/2017/03/06/mle-gradient-descent 31 | grad_input = grad_output.clone() 32 | return grad_input 33 | def __init__(self,num_classes): 34 | super(SegLoss, self).__init__() 35 | self.num_classes = num_classes 36 | self.threshold = nn.Threshold(0.5, 0.) 37 | return 38 | 39 | 40 | def weighted_mse_loss(self,input, target, weights): 41 | out = (input - target)**2 42 | total = torch.sum(weights) 43 | out = out * weights / total 44 | # expand_as because weights are prob not defined for mini-batch 45 | loss = torch.sum(out) 46 | #print(loss) 47 | return loss 48 | 49 | 50 | 51 | def forward(self, input, targets=None): 52 | if targets is not None: 53 | truth = targets.clone().to(device) 54 | truth = truth.permute(0,3,1,2) 55 | #print(input.shape,truth.shape) 56 | #.to(device) 57 | #print(truth) 58 | #print(truth>0.1) 59 | output = self.sigmoid.apply(input) 60 | #result = output[0,0,...] 61 | #print(result.shape) 62 | #cv2.namedWindow('frame',cv2.WINDOW_NORMAL) 63 | #cv2.resizeWindow('frame', 640, 480) 64 | #cv2.imshow('frame', result.cpu().detach().numpy()) 65 | #key = cv2.waitKey(1) 66 | obj = torch.masked_select(output, truth>=0.5) 67 | no_obj = torch.masked_select(output, truth<0.5) 68 | #mask_truth = torch.masked_select(truth, truth>=0.3) 69 | #threshold = torch.tensor([0.3]).to(device) 70 | #results = (truth>threshold).float()*1 71 | #results = obj + no_obj*truth 72 | #print(results) 73 | #print(torch.mean(output)) 74 | weights = torch.ones_like(input).to(device) 75 | loss = self.weighted_mse_loss(output , truth , weights) 76 | #print(loss) 77 | return loss*0.05,torch.mean(obj).item(),torch.mean(no_obj).item() 78 | else: 79 | output = self.sigmoid.apply(input) 80 | result = output[0,...].cpu().detach().numpy() 81 | return result 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /models/voc/config.yaml: -------------------------------------------------------------------------------- 1 | img_h: 352 2 | img_w: 352 3 | batch_size: 32 4 | train_img_size: 5 | - [352, 352] 6 | - [320, 320] 7 | - [288, 288] 8 | - [384, 384] 9 | - [416, 416] 10 | expand_scale: 2.1610954191879452 11 | mosaic_num: [1,4] 12 | iou_weighting: 0.021830872589525777 13 | normalize: 14 | mean: [0.485, 0.456, 0.406] 15 | std: [0.229, 0.224, 0.225] 16 | yolo: 17 | num_classes: 20 18 | num_anchors: 3 19 | ignore_thresh: [0.6076333316652263, 0.5623606200028424] 20 | iou_thresh: 0.5497280113447018 21 | anchors: 22 | - [143, 265] 23 | - [153, 121] 24 | - [280, 279] 25 | - [20, 37] 26 | - [49, 94] 27 | - [73, 201] 28 | classes: 20 29 | mask: 30 | - [0, 1, 2] 31 | - [3, 4, 5] 32 | -------------------------------------------------------------------------------- /models/yolo_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import math 5 | from utils import AverageMeter 6 | from utils.iou import * 7 | from torch.autograd import Function 8 | import gc 9 | use_cuda = torch.cuda.is_available() 10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 11 | import torchvision 12 | 13 | 14 | class YOLOLoss(nn.Module): 15 | class sigmoid(Function): 16 | @staticmethod 17 | def forward(ctx, input): 18 | #ctx.save_for_backward(input) 19 | sigmoid_eval = 1.0/(1.0 + torch.exp(-input)) 20 | #input = sigmoid_eval 21 | return sigmoid_eval 22 | 23 | @staticmethod 24 | def backward(ctx, grad_output): 25 | #input, = ctx.saved_tensors 26 | #print(grad_output) 27 | # Maximum likelihood and gradient descent demonstration 28 | # https://blog.csdn.net/yanzi6969/article/details/80505421 29 | # https://xmfbit.github.io/2018/03/21/cs229-supervised-learning/ 30 | # https://zlatankr.github.io/posts/2017/03/06/mle-gradient-descent 31 | grad_input = grad_output.clone() 32 | return grad_input 33 | def __init__(self, anchors, mask, num_classes, img_size,ignore_threshold,iou_thresh,val_conf = 0.1,iou_weighting = 0.01): 34 | super(YOLOLoss, self).__init__() 35 | self.anchors = anchors 36 | self.mask = mask; 37 | self.num_mask = len(mask) 38 | self.num_anchors = len(anchors) 39 | self.num_classes = num_classes 40 | self.bbox_attrs = 5 + num_classes 41 | self.img_size = img_size 42 | self.ignore_threshold = ignore_threshold 43 | #self.sigmoid = self.MSigmoid() 44 | self.nn_sigmoid = torch.nn.Sigmoid() 45 | self.val_conf = val_conf 46 | self.mse_loss = nn.MSELoss() 47 | self.bce_loss = nn.BCELoss() 48 | self.label_smooth_eps = 0.1 49 | self.iou_thresh = iou_thresh 50 | self.iou_weighting = iou_weighting 51 | 52 | 53 | def weighted_mse_loss(self,input, target, weights): 54 | out = (input - target)**2 55 | total = torch.sum(weights) 56 | out = out * weights / total 57 | # expand_as because weights are prob not defined for mini-batch 58 | loss = torch.sum(out) 59 | #print(loss) 60 | return loss 61 | 62 | def pre_maps(self,bs,is_cuda,anchors, in_w, in_h): 63 | 64 | FloatTensor = torch.cuda.FloatTensor if is_cuda else torch.FloatTensor 65 | LongTensor = torch.cuda.LongTensor if is_cuda else torch.LongTensor 66 | this_anchors = np.array(anchors)[self.mask] 67 | anchor_w = FloatTensor(this_anchors).index_select(1, LongTensor([0])) 68 | anchor_h = FloatTensor(this_anchors).index_select(1, LongTensor([1])) 69 | anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(bs,self.num_mask,in_h,in_w,1).to(device) 70 | anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(bs,self.num_mask,in_h,in_w,1).to(device) 71 | grid_x = torch.linspace(0, in_w-1, in_w).repeat(in_w, 1).repeat(bs * self.num_mask, 1, 1).view(bs,self.num_mask,in_h,in_w,1).type(FloatTensor) 72 | grid_y = torch.linspace(0, in_h-1, in_h).repeat(in_h, 1).t().repeat(bs * self.num_mask, 1, 1).view(bs,self.num_mask,in_h,in_w,1).type(FloatTensor) 73 | grid_xy = torch.cat((grid_x,grid_y),4) 74 | anchor_wh = torch.cat((anchor_w,anchor_h),4) 75 | return grid_xy,anchor_wh 76 | 77 | def get_target(self, target,input, anchors, in_w, in_h, ignore_threshold,iou_thresh=0.5): 78 | 79 | bs = input.size(0) 80 | this_anchors = np.array(anchors)[self.mask] 81 | FloatTensor = torch.cuda.FloatTensor if input.is_cuda else torch.FloatTensor 82 | targets_weight = torch.zeros(bs, self.num_mask, in_h, in_w,self.num_classes+1, requires_grad=False).to(device) 83 | pred_boxes = torch.zeros(bs,self.num_mask,in_h, in_w,0, requires_grad=False).to(device) 84 | prediction = input.view(bs, self.num_mask,self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() 85 | xy = self.sigmoid.apply(prediction[..., 0:2]) 86 | wh = torch.exp(prediction[..., 2:4]) 87 | output = self.sigmoid.apply(prediction[..., 4:]) 88 | 89 | grid_xy,anchor_wh = self.pre_maps(bs,input.is_cuda,anchors, in_w, in_h) 90 | pred_boxes = torch.cat((pred_boxes,(xy + grid_xy)/FloatTensor([in_w,in_h])),4) 91 | pred_boxes = torch.cat((pred_boxes,wh * anchor_wh),4) 92 | self.wh_to_x2y2(pred_boxes) 93 | 94 | 95 | count = recall = ious = obj = cls_score = 0 96 | #output = torch.cat((xy,prediction[..., 2:4],conf_cls),4).to(device) 97 | targets = output.clone().to(device) 98 | no_obj = torch.sum(output[...,0]) 99 | no_cnt = output[...,0].numel() 100 | targets_weight_parts = targets_weight[...,0] 101 | targets_parts = targets[...,0] 102 | anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),np.array(anchors)), 1)) 103 | iou_loss = torch.FloatTensor(0).to(device) 104 | iou_weight = torch.FloatTensor(0).to(device) 105 | in_dim = torch.Tensor([in_w,in_h]) 106 | #print(need_grad_tensor.view(,self.num_classes+1).shape) 107 | for b in range(bs): 108 | if len(target[b]) == 0 : 109 | targets_weight_parts[b] = 1 110 | targets_parts[b] = 0 111 | continue 112 | gt_boxes = target[b][...,1:].clone().detach().to(device) 113 | self.wh_to_x2y2(gt_boxes) 114 | 115 | pred_boxes2 = pred_boxes[b].view((in_w*in_h*self.num_mask, 4)).to(device) 116 | pred_iou = find_jaccard_overlap(gt_boxes,pred_boxes2).to(device) 117 | #print(pred_iou.shape) 118 | pred_iou,_ = torch.max(pred_iou,0) 119 | 120 | pred_iou = pred_iou.view((self.num_mask,in_h,in_w)) 121 | 122 | #for i in range(self.num_mask): 123 | m = pred_iouiou_thresh).tolist() 140 | bn = self.num_anchors + 1 141 | if best_n[t] in self.mask : 142 | bn = self.mask.index(best_n[t]) 143 | #k = bn 144 | for k in range(self.num_mask): 145 | if k == bn or iou_thresh_list[k] == True : 146 | count+= 1 147 | cls_index = int(gt[t,0]) 148 | 149 | targets_parts[b,k,gj,gi] = 1 150 | targets_weight_parts[b,k,gj,gi] = 1 151 | conf = output[b,k,gj,gi,0].item() 152 | obj = obj + conf 153 | no_obj = no_obj - conf 154 | gt_box_xy = gt_boxes[t].unsqueeze(0) 155 | pred = pred_boxes[b, k, gj, gi].unsqueeze(0) 156 | 157 | giou,iou = self.box_ciou(gt_box_xy,pred) 158 | 159 | iou_loss = torch.cat((iou_loss,giou.to(device))) 160 | area = 2.0 - self.get_area(gt_box_xy) 161 | 162 | iou_weight = torch.cat((iou_weight,(area).to(device))) 163 | if iou>ignore_threshold : 164 | recall = recall + 1 165 | ious = ious + iou.item() 166 | cls_tensor = targets[b, k, gj, gi,1:] 167 | cls_weight = targets_weight[b, k, gj, gi,1:] 168 | self.class_loss(cls_tensor,cls_weight,cls_index) 169 | cls_score = cls_score + output[b,k,gj,gi,1+cls_index].item() 170 | if count > 0: 171 | obj_avg = obj/count 172 | cls_avg = cls_score/count 173 | no_obj = no_obj/(no_cnt-count) 174 | avg_iou = ious/count 175 | recall = recall/count 176 | else : 177 | recall = obj_avg = cls_avg = no_obj = avg_iou = 0 178 | return targets,targets_weight,output,recall,avg_iou,obj_avg,no_obj,cls_avg,count/bs,iou_loss,iou_weight 179 | 180 | def get_pred_boxes(self,input, anchors, in_w, in_h): 181 | 182 | bs = input.size(0) 183 | pred_boxes = torch.zeros(bs,self.num_mask,in_h, in_w,0, requires_grad=False).to(device) 184 | #pred_boxes = torch.zeros(in_h, in_w,4, requires_grad=False) 185 | outputs=list() 186 | prediction = input.view(bs, self.num_mask,self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() 187 | xy = torch.sigmoid(prediction[..., 0:2]) 188 | wh = torch.exp(prediction[..., 2:4]) 189 | conf_cls = torch.sigmoid(prediction[..., 4:]) # Conf 190 | 191 | FloatTensor = torch.cuda.FloatTensor if input.is_cuda else torch.FloatTensor 192 | grid_xy,anchor_wh = self.pre_maps(bs,input.is_cuda,anchors, in_w, in_h) 193 | 194 | pred_boxes = torch.cat((pred_boxes,(xy + grid_xy)/FloatTensor([in_w,in_h])),4) 195 | pred_boxes = torch.cat((pred_boxes,wh * anchor_wh),4) 196 | self.wh_to_x2y2(pred_boxes) 197 | pred_boxes = torch.cat((pred_boxes,conf_cls[...,0].unsqueeze(4)),4) 198 | score,cls_idx = torch.max(conf_cls[...,1:self.bbox_attrs],dim=4) 199 | pred_boxes = torch.cat((pred_boxes,score.unsqueeze(4),cls_idx.float().unsqueeze(4)),4) 200 | pred_boxes = pred_boxes.to(device) 201 | mask = pred_boxes[...,4]>self.val_conf 202 | for b in range(bs): 203 | outputs.append(pred_boxes[b,mask[b]]) 204 | return outputs 205 | 206 | def forward(self, input, targets=None): 207 | bs = input.size(0) 208 | in_h = input.size(2) 209 | in_w = input.size(3) 210 | stride_h = self.img_size[1] / in_h 211 | stride_w = self.img_size[0] / in_w 212 | #print(self.img_size) 213 | #print(input.shape) 214 | scaled_anchors = [(a_w/self.img_size[0] , a_h/self.img_size[1] ) for a_w, a_h in self.anchors] 215 | 216 | if targets is not None: 217 | #print(self.ignore_threshold) 218 | target,weights,output,recall,avg_iou,obj,no_obj,cls_score,count,iou_losses,iou_weights = self.get_target(targets,input, scaled_anchors,in_w, in_h,self.ignore_threshold,self.iou_thresh) 219 | loss = self.weighted_mse_loss(output , target , weights) 220 | iou_target = torch.ones_like(iou_losses) 221 | #iou_loss= torch.sum(iou_target-iou_losses) 222 | iou_loss = torch.Tensor([0]).to(device) 223 | if iou_losses.size(0)>0: 224 | iou_loss = self.weighted_mse_loss(iou_losses,iou_target,iou_weights)/iou_losses.numel() 225 | #iou_loss = self.mse_loss(iou_losses,iou_target)/iou_losses.numel() 226 | #print(iou_loss) 227 | #iou_loss = torch.Tensor(iou_loss) 228 | #print(loss,iou_loss) 229 | #loss = torch.cat((loss.unsqueeze(0) ,iou_loss.unsqueeze(0))) 230 | 231 | if torch.isnan(iou_loss)==True or torch.isnan(loss)==True: 232 | print('\n',loss,iou_loss,bs) 233 | 234 | loss = loss + iou_loss*self.iou_weighting 235 | 236 | return loss, recall,avg_iou,obj,no_obj,cls_score,count 237 | 238 | else: 239 | preds = self.get_pred_boxes(input, scaled_anchors,in_w, in_h) 240 | 241 | return preds 242 | 243 | def wh_to_x2y2(self,bbox): 244 | bbox[...,0] = bbox[...,0] - bbox[...,2]/2 245 | bbox[...,1] = bbox[...,1] - bbox[...,3]/2 246 | bbox[...,2] = bbox[...,2] + bbox[...,0] 247 | bbox[...,3] = bbox[...,3] + bbox[...,1] 248 | # minimum convex box 249 | def box_c(self,box1,box2) : 250 | l = torch.min(box1[...,0],box2[...,0]).unsqueeze(0) 251 | t = torch.min(box1[...,1],box2[...,1]).unsqueeze(0) 252 | r = torch.max(box1[...,2],box2[...,2]).unsqueeze(0) 253 | b = torch.max(box1[...,3],box2[...,3]).unsqueeze(0) 254 | #print(t.shape) 255 | box_c = torch.cat((l,t,r,b)) 256 | return box_c.permute(1,0) 257 | def box_ciou(self,box1,box2): 258 | ciou = torch.zeros(0,1).to(device) 259 | iou = torch.zeros(0,1).to(device) 260 | #if box2.size(0) == 0 : 261 | # return ciou,iou 262 | box_c = self.box_c(box1,box2) 263 | #print(box_c.shape) 264 | c = self.get_area(box_c).unsqueeze(1) 265 | iou = find_jaccard_overlap(box1, box2) 266 | 267 | w1,h1 = (box1[...,2] - box1[...,0]).unsqueeze(1),(box1[...,3] - box1[...,1]).unsqueeze(1) 268 | w2,h2 = (box2[...,2] - box2[...,0]).unsqueeze(1),(box2[...,3] - box2[...,1]).unsqueeze(1) 269 | x1,y1 = (box1[...,2] + box1[...,0]).unsqueeze(1)/2,(box1[...,1] + box1[...,3]).unsqueeze(1)/2 270 | x2,y2 = (box2[...,2] + box2[...,0]).unsqueeze(1)/2,(box2[...,1] + box2[...,3]).unsqueeze(1)/2 271 | 272 | u = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2); 273 | #if c==0 : 274 | # ciou_term = iou 275 | #else : 276 | #print(c.shape,u.shape) 277 | d = u/c 278 | #print(d.shape) 279 | ar_gt = w2/h2 280 | ar_pred = w1/h1 281 | 282 | ar_loss = 4 / (math.pi * math.pi) * (torch.atan(ar_gt) - torch.atan(ar_pred)) * (torch.atan(ar_gt) - torch.atan(ar_pred)); 283 | alpha = ar_loss / (1 - iou + ar_loss + 0.000001); 284 | ciou_term = d + alpha * ar_loss; 285 | #print(ar_gt.shape,ar_pred.shape,ar_loss.shape,alpha.shape,torch.atan(ar_pred).shape) 286 | mask = (c == 0) 287 | ciou_term = ciou_term * (~mask) + iou*mask 288 | #print(ciou_term.shape,ciou.shape,iou.shape,box1.shape,box2.shape) 289 | ciou = torch.cat((ciou,ciou_term)) 290 | 291 | #print(iou,iou-giou_term) 292 | #print(c,u) 293 | return iou-ciou,iou 294 | 295 | def box_giou(self,box1,box2): 296 | box_c = self.box_c(box1,box2) 297 | c = self.get_area(box_c).unsqueeze(1) 298 | 299 | #iou = find_jaccard_overlap(box1, box2) 300 | u = find_union(box1,box2) 301 | i = find_intersection(box1,box2) 302 | iou = i/u 303 | #print('iou.shape',iou.shape) 304 | #giou_term = [iou if (k1 == 0) else (k1 - k2)/k1 for k1,k2 in zip(c, u)] 305 | #if c==0 : 306 | # giou_term = iou 307 | #else : 308 | # giou_term = (c-u)/c 309 | #print('c.shape',c.shape) 310 | #print('u.shape',u.shape) 311 | giou_term = (c-u)/c 312 | #print('giou_term.shape',giou_term.shape) 313 | mask = (c == 0) 314 | giou_term = giou_term * (~mask) + iou*mask 315 | #print(iou,iou-giou_term) 316 | #print(c,u) 317 | return iou-giou_term,iou 318 | def get_area(self,box): 319 | return (box[...,2] - box[...,0]) * (box[...,3] - box[...,1]) 320 | def get_aspect_ratio(self,box): 321 | return (box[...,2] - box[...,0]) / (box[...,3] - box[...,1]) 322 | def IOU_Loss(self,gt_box,pred_box,input,output,accumulate): 323 | 324 | X = self.get_area(pred_box) 325 | Xhat = self.get_area(gt_box) 326 | 327 | pred_l,pred_t,pred_r,pred_b = pred_box[...,0],pred_box[...,1],pred_box[...,2],pred_box[...,3] 328 | gt_l,gt_t,gt_r,gt_b = gt_box[...,0],gt_box[...,1],gt_box[...,2],gt_box[...,3] 329 | 330 | Ih = torch.min(pred_b, gt_b) - torch.max(pred_t, gt_t) 331 | Iw = torch.min(pred_r, gt_r) - torch.max(pred_l, gt_l) 332 | I = Iw*Ih # intersection area 333 | #print(Iw,Ih,I) 334 | 335 | #m = I > 0 336 | #if m == False: 337 | # print(Iw,Ih,I) 338 | U = X + Xhat - I; # Union area 339 | Cw = torch.max(pred_r, gt_r) - torch.min(pred_l, gt_l); 340 | Ch = torch.max(pred_b, gt_b) - torch.min(pred_t, gt_t); 341 | C = Cw * Ch; 342 | #iou = find_jaccard_overlap(gt_box, pred_box) 343 | #print(pred_box,gt_box) 344 | #if I<0 : 345 | # I = 0 346 | #print((I/U)==iou) 347 | 348 | dX_wrt_t = -1 * (pred_r - pred_l); 349 | dX_wrt_b = -dX_wrt_t; 350 | dX_wrt_l = -1 * (pred_b - pred_t); 351 | dX_wrt_r = -dX_wrt_l; 352 | 353 | dI_wrt_t = (pred_t > gt_t)*(-Iw) 354 | dI_wrt_b = (pred_b > gt_b)*(Iw) 355 | dI_wrt_l = (pred_l > gt_l)*(-Ih) 356 | dI_wrt_r = (pred_r > gt_r)*(Ih) 357 | 358 | # derivative of U with regard to x 359 | dU_wrt_t = dX_wrt_t - dI_wrt_t 360 | dU_wrt_b = dX_wrt_b - dI_wrt_b 361 | dU_wrt_l = dX_wrt_l - dI_wrt_l 362 | dU_wrt_r = dX_wrt_r - dI_wrt_r 363 | 364 | dC_wrt_t = (pred_t < gt_t)*(-1 * Cw) 365 | dC_wrt_b = (pred_b > gt_b)*Cw 366 | dC_wrt_l = (pred_l < gt_l)*(-1 * Ch) 367 | dC_wrt_r = (pred_r > gt_r)*Ch 368 | 369 | p_dt = p_db = p_dl = p_dr = 0 370 | if U > 0 : 371 | p_dt = ((U * dI_wrt_t) - (I * dU_wrt_t)) / (U * U) 372 | p_db = ((U * dI_wrt_b) - (I * dU_wrt_b)) / (U * U) 373 | p_dl = ((U * dI_wrt_l) - (I * dU_wrt_l)) / (U * U) 374 | p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U) 375 | #p_dt = ((U+I) * dI_wrt_t)/ (U*I ) - (dX_wrt_t) / U 376 | #p_db = ((U+I) * dI_wrt_b)/ (U*I ) - (dX_wrt_t) / U 377 | #p_dl = ((U+I) * dI_wrt_l)/ (U*I ) - (dX_wrt_t) / U 378 | #p_dr = ((U+I) * dI_wrt_r)/ (U*I ) - (dX_wrt_t) / U 379 | if C > 0 : 380 | # apply "C" term from gIOU 381 | p_dt += ((C * dU_wrt_t) - (U * dC_wrt_t)) / (C * C); 382 | p_db += ((C * dU_wrt_b) - (U * dC_wrt_b)) / (C * C); 383 | p_dl += ((C * dU_wrt_l) - (U * dC_wrt_l)) / (C * C); 384 | p_dr += ((C * dU_wrt_r) - (U * dC_wrt_r)) / (C * C); 385 | 386 | delta_x = ((p_dl + p_dr)) 387 | delta_y = ((p_dt + p_db)) 388 | delta_w = ((-0.5 * p_dl) + (0.5 * p_dr)) 389 | delta_h = ((-0.5 * p_dt) + (0.5 * p_db)) 390 | #tx,ty,tw,th,_ = self.DenseBoxLoss(gt_box,pred_box,grid_x,grid_y,anchors,in_w,in_h) 391 | #print(output[...,0]-tx,delta_x) 392 | if accumulate: 393 | tx = (output[...,0] + delta_x*0.5).item() 394 | ty = (output[...,1] + delta_y*0.5).item() 395 | tw = (output[...,2] + (delta_w*torch.exp(input[...,2]))*0.5).item() 396 | th = (output[...,3] + (delta_h*torch.exp(input[...,3]))*0.5).item() 397 | else : 398 | tx = (input[...,0] + delta_x*0.5).item() 399 | ty = (input[...,1] + delta_y*0.5).item() 400 | tw = (input[...,2] + (delta_w*torch.exp(input[...,2]))*0.5).item() 401 | th = (input[...,3] + (delta_h*torch.exp(input[...,3]))*0.5).item() 402 | #print(tw,th) 403 | #delta_w = delta_w*torch.exp(delta_w); 404 | #delta_h = delta_h*torch.exp(delta_h); 405 | #print(p_dt,p_db,p_dl,p_dr) 406 | #else : 407 | # tx,ty,tw,th,_ = self.DenseBoxLoss(gt_box,pred_box,grid_x,grid_y,anchors,in_w,in_h) 408 | target = torch.Tensor([tx,ty,tw,th]).to(device) 409 | return target,(2.0-Xhat),I/U 410 | 411 | def DenseBoxLoss(self,gt_box,pred_box,grid_x,grid_y,anchors,in_w,in_h): 412 | w = gt_box[...,2] - gt_box[...,0] 413 | h = gt_box[...,3] - gt_box[...,1] 414 | x = gt_box[...,0] + w / 2 415 | y = gt_box[...,1] + h / 2 416 | tx = x * in_w - grid_x 417 | ty = y * in_h - grid_y 418 | tw = torch.log(w/anchors[0]) 419 | th = torch.log(h/anchors[1]) 420 | #giou = self.box_giou(gt_box,pred_box) 421 | weight = 2.0 - (w*h) 422 | target = torch.Tensor([tx,ty,tw,th]).to(device) 423 | iou = find_jaccard_overlap(gt_box, pred_box) 424 | return target,weight,iou 425 | def class_loss(self,target_cls,target_weight,cls_idx): 426 | y_true = (1 - self.label_smooth_eps) + 0.5*self.label_smooth_eps; 427 | y_false = 0.5*self.label_smooth_eps; 428 | if target_weight[...,cls_idx]>0: 429 | target_cls[...,cls_idx] = y_true 430 | target_weight[...,cls_idx] = 1 431 | else : 432 | target_cls[...,0:self.num_classes] = y_false 433 | target_weight[...,0:self.num_classes] = 1 434 | target_cls[...,cls_idx] = y_true 435 | #target_weight[cls_idx] = 1 436 | 437 | 438 | 439 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | torch 3 | torchvision 4 | imgaug 5 | lmdb 6 | six 7 | matplotlib 8 | tqdm 9 | nni 10 | opencv_python 11 | progress 12 | filetype 13 | msgpack_python 14 | Pillow 15 | PyYAML 16 | tensorboard 17 | -------------------------------------------------------------------------------- /save/00690c26-e4bbbd72_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/save/00690c26-e4bbbd72_result.jpg -------------------------------------------------------------------------------- /scripts/VOC2007.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to data/ ..." 11 | mkdir -p data 12 | cd data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2007 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 26 | echo "Downloading VOC2007 test data ..." 27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 28 | echo "Done downloading." 29 | 30 | # Extract data 31 | echo "Extracting trainval ..." 32 | tar -xvf VOCtrainval_06-Nov-2007.tar 33 | echo "Extracting test ..." 34 | tar -xvf VOCtest_06-Nov-2007.tar 35 | echo "removing tars ..." 36 | rm VOCtrainval_06-Nov-2007.tar 37 | rm VOCtest_06-Nov-2007.tar 38 | 39 | end=`date +%s` 40 | runtime=$((end-start)) 41 | 42 | echo "Completed in" $runtime "seconds" -------------------------------------------------------------------------------- /scripts/VOC2012.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Ellis Brown 3 | 4 | start=`date +%s` 5 | 6 | # handle optional download dir 7 | if [ -z "$1" ] 8 | then 9 | # navigate to ~/data 10 | echo "navigating to ~/data/ ..." 11 | mkdir -p data 12 | cd data/ 13 | else 14 | # check if is valid directory 15 | if [ ! -d $1 ]; then 16 | echo $1 "is not a valid directory" 17 | exit 0 18 | fi 19 | echo "navigating to" $1 "..." 20 | cd $1 21 | fi 22 | 23 | echo "Downloading VOC2012 trainval ..." 24 | # Download the data. 25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 26 | echo "Done downloading." 27 | 28 | 29 | # Extract data 30 | echo "Extracting trainval ..." 31 | tar -xvf VOCtrainval_11-May-2012.tar 32 | echo "removing tar ..." 33 | rm VOCtrainval_11-May-2012.tar 34 | 35 | end=`date +%s` 36 | runtime=$((end-start)) 37 | 38 | echo "Completed in" $runtime "seconds" 39 | -------------------------------------------------------------------------------- /scripts/create.sh: -------------------------------------------------------------------------------- 1 | python3 folder2lmdb.py -d data/bdd100k.yaml 2 | -------------------------------------------------------------------------------- /scripts/inference.sh: -------------------------------------------------------------------------------- 1 | python3 inference.py --checkpoint checkpoints/bdd100k/model_best.pth.tar -y data/bdd100k.yaml -i images/00690c26-e4bbbd72.jpg -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | python train.py --checkpoint checkpoints/voc/mobilenetv2/ -y data/voc_data.yaml -------------------------------------------------------------------------------- /search_space.json: -------------------------------------------------------------------------------- 1 | { 2 | "learning_rate":{"_type":"choice","_value":[0.0004,0.0005,0.0006,0.0007]}, 3 | "ignore_thresh_1":{"_type":"uniform","_value":[0.6, 0.75]}, 4 | "ignore_thresh_2":{"_type":"uniform","_value":[0.5, 0.65]}, 5 | "iou_thresh":{"_type":"uniform","_value":[0.4, 0.6]}, 6 | "expand_scale":{"_type":"uniform","_value":[1.0, 2.5]}, 7 | "mosaic_num":{"_type":"choice", "_value": [[1,4],[2,3,4]]}, 8 | "weight_decay":{"_type":"choice","_value":[1e-2,4e-3,4e-4,4e-5]}, 9 | "iou_weighting":{"_type":"uniform","_value":[0.005, 0.1]} 10 | } -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import shutil 5 | import time 6 | import warnings 7 | import numpy as np 8 | from progress.bar import (Bar, IncrementalBar) 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torch.nn.parallel 13 | import torch.backends.cudnn as cudnn 14 | import torch.distributed as dist 15 | import torch.optim 16 | import torch.multiprocessing as mp 17 | import torch.utils.data 18 | import torch.utils.data.distributed 19 | import torchvision.transforms as transforms 20 | import torchvision.datasets as datasets 21 | import torchvision.models as models 22 | import folder2lmdb 23 | import CustomBatchSampler 24 | import cv2 25 | #from models.voc.mbv2_yolo import yolo 26 | #from models.voc.yolo_loss import * 27 | from models.mbv2_yolo import yolo 28 | from models.yolo_loss import * 29 | from utils import Bar, Logger, AverageMeter 30 | from utils.eval_mAP import * 31 | from pprint import PrettyPrinter 32 | import yaml 33 | import nni 34 | from nni.utils import merge_parameter 35 | from nni.trial import get_sequence_id 36 | from nni.trial import get_trial_id 37 | pp = PrettyPrinter() 38 | from torch.utils.tensorboard import SummaryWriter 39 | 40 | def seed_worker(worker_id): 41 | worker_seed = torch.initial_seed() % 2**32 42 | np.random.seed(worker_seed) 43 | random.seed(worker_seed) 44 | 45 | def main(args): 46 | #print('NNI_OUTPUT_DIR',os.environ["NNI_OUTPUT_DIR"]) 47 | #writer = SummaryWriter(os.environ["NNI_OUTPUT_DIR"]+'/tensorboard/') 48 | if 'NNI_OUTPUT_DIR' not in os.environ: 49 | writer = SummaryWriter('tensorboard/') 50 | else: 51 | writer = SummaryWriter(os.environ["NNI_OUTPUT_DIR"]+'/tensorboard/') 52 | #with open('models/voc/config.yaml', 'r') as f: 53 | 54 | #with open('data/voc_data.yaml', 'r') as f: 55 | with open(args.data_yaml, 'r') as f: 56 | dataset_path = yaml.load(f) 57 | classes_name = dataset_path["classes"]["map"] 58 | classes_name.insert(0, 'background') 59 | segmentation_enable = False 60 | segmentation_num_classes = 0 61 | print(dataset_path) 62 | if "segmentation_enable" in dataset_path: 63 | segmentation_enable = dataset_path["segmentation_enable"] 64 | if "segmentation_num_classes" in dataset_path: 65 | segmentation_num_classes = dataset_path["segmentation_num_classes"] 66 | 67 | with open(dataset_path["model_config_path"], 'r') as f: 68 | config = yaml.load(f) 69 | if args.ignore_thresh_1 != None : 70 | config["yolo"]["ignore_thresh"][0] = args.ignore_thresh_1 71 | if args.ignore_thresh_2 != None : 72 | config["yolo"]["ignore_thresh"][1] = args.ignore_thresh_2 73 | if args.iou_thresh != None : 74 | config["yolo"]["iou_thresh"] = args.iou_thresh 75 | if args.expand_scale != None : 76 | config["expand_scale"] = args.expand_scale 77 | if args.mosaic_num != None : 78 | config["mosaic_num"] = args.mosaic_num 79 | if args.iou_weighting != None : 80 | config["iou_weighting"] = args.iou_weighting 81 | print(config) 82 | best_acc = 0 # best test accuracy 83 | #args = parser.parse_args() 84 | start_epoch = 0 85 | 86 | image_folder = folder2lmdb.ImageFolderLMDB 87 | 88 | train_dataset = image_folder( 89 | db_path=dataset_path["trainval_dataset_path"]["lmdb"], 90 | transform_size=config["train_img_size"], 91 | phase='train',batch_size = config["batch_size"], 92 | expand_scale=config["expand_scale"], 93 | mean = config["normalize"]["mean"], 94 | std = config["normalize"]["std"], 95 | has_seg = segmentation_enable, 96 | classes_name = classes_name, 97 | seg_num_classes = segmentation_num_classes 98 | ) 99 | 100 | test_dataset = image_folder( 101 | db_path=dataset_path["test_dataset_path"]["lmdb"], 102 | transform_size=[[config["img_w"],config["img_h"]]], 103 | phase='test',batch_size = config["batch_size"], 104 | mean = config["normalize"]["mean"], 105 | std = config["normalize"]["std"], 106 | has_seg = False, 107 | classes_name = classes_name, 108 | seg_num_classes = segmentation_num_classes 109 | ) 110 | BatchSampler = CustomBatchSampler.GreedyBatchSampler 111 | sampler = BatchSampler ( 112 | torch.utils.data.sampler.RandomSampler(train_dataset), 113 | batch_size=config["batch_size"], 114 | drop_last=False,sample=config["mosaic_num"]) 115 | train_loader = torch.utils.data.DataLoader( 116 | train_dataset,batch_sampler = sampler, 117 | num_workers=4, pin_memory=False,collate_fn=train_dataset.collate_fn, 118 | worker_init_fn=seed_worker) 119 | test_loader = torch.utils.data.DataLoader( 120 | test_dataset, config["batch_size"], shuffle=False, 121 | num_workers=4, pin_memory=False,collate_fn=test_dataset.collate_fn) 122 | model = yolo(config=config) 123 | #model_for_graph = yolo_graph(config=config) 124 | #input = torch.randn(1, 3, 352, 352) 125 | #writer.add_graph(model_for_graph,input) 126 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 127 | 128 | model = model.cuda() 129 | # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo 130 | biases = list() 131 | not_biases = list() 132 | 133 | params = model.parameters() 134 | optimizer = optim.AdamW(params=params,lr = args.learning_rate,weight_decay= args.weight_decay) 135 | if not os.path.exists(args.checkpoint): 136 | os.makedirs(args.checkpoint) 137 | title = 'voc-training-process' 138 | if args.resume: 139 | # Load checkpoint. 140 | print('==> Resuming from checkpoint..') 141 | print(args.resume) 142 | assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!' 143 | args.checkpoint = os.path.dirname(args.resume) 144 | checkpoint = torch.load(args.resume) 145 | best_acc = checkpoint['best_acc'] 146 | start_epoch = checkpoint['epoch'] 147 | model.load_state_dict(checkpoint['model']) 148 | optimizer.load_state_dict(checkpoint['optimizer']) 149 | model.yolo_losses[0].val_conf = checkpoint['conf'] 150 | model.yolo_losses[1].val_conf = checkpoint['conf'] 151 | logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True) 152 | #for param_group in optimizer.param_groups: 153 | # param_group['lr'] = args.lr 154 | else: 155 | logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title) 156 | logger.set_names(['Epoch ', 'Loss ', 'Precision ', 'Time ', 'IOU ', 'Learning Rate']) 157 | test_acc = 0 158 | if args.evaluate: 159 | for epoch in range(1): 160 | test_acc = test(test_loader, model, optimizer, epoch , config, classes_name) 161 | return 162 | 163 | #ls = len(args.warm_up) 164 | for epoch in range(start_epoch, args.epochs): 165 | if epoch in args.warm_up: 166 | adjust_learning_rate(optimizer, 0.5) 167 | st = time.time() 168 | for epoch in range(start_epoch, args.epochs): 169 | # train for one epoch 170 | if epoch in args.warm_up: 171 | adjust_learning_rate(optimizer, 2) 172 | if epoch in args.schedule: 173 | #load_best_checkpoint(model=model, save_path=args.save_path) 174 | 175 | save_checkpoint({ 176 | 'epoch': epoch , 177 | 'model': model.state_dict(), 178 | 'acc': test_acc, 179 | 'best_acc': best_acc, 180 | 'optimizer' : optimizer.state_dict(), 181 | 'conf' : model.yolo_losses[0].val_conf, 182 | }, False,model,config, checkpoint=args.checkpoint,filename='epoch%d_checkpoint.pth.tar'%epoch,export_path = args.export) 183 | adjust_learning_rate(optimizer, 0.5) 184 | print('adjusted to current lr: ' 185 | '{}'.format([param_group['lr'] for param_group in optimizer.param_groups])) 186 | 187 | log = False 188 | 189 | if epoch%2 == 0 : 190 | log = True 191 | st = time.time() 192 | if segmentation_enable: 193 | print('\nEpoch: [%3d | %3d] LR: %f | loss | cnt | iou | obj | no_obj | class | recall | s_obj | s_no_obj |' \ 194 | % (epoch, args.epochs, optimizer.param_groups[0]['lr'])) 195 | else: 196 | print('\nEpoch: [%3d | %3d] LR: %f | loss | cnt | iou | obj | no_obj | class | recall | cnt2 | iou2 | obj2 | no_obj2 | class2 | recall2 |' \ 197 | % (epoch, args.epochs, optimizer.param_groups[0]['lr'])) 198 | 199 | train_loss,iou = train(train_loader, model, optimizer, epoch,sampler,segmentation_enable) 200 | writer.add_scalar('Loss/train', train_loss, epoch) 201 | writer.add_scalar('iou/train', iou, epoch) 202 | if not log : 203 | test_acc = test(test_loader, model, optimizer, epoch , config, classes_name,segmentation_enable) 204 | nni.report_intermediate_result(test_acc) 205 | logger.append([epoch + 1, train_loss , test_acc, time.time()-st,iou, optimizer.param_groups[0]['lr']]) 206 | # save model 207 | is_best = test_acc > best_acc 208 | best_acc = max(test_acc, best_acc) 209 | save_checkpoint({ 210 | 'epoch': epoch + 1, 211 | 'model': model.state_dict(), 212 | 'acc': test_acc, 213 | 'best_acc': best_acc, 214 | 'optimizer' : optimizer.state_dict(), 215 | 'conf' : model.yolo_losses[0].val_conf, 216 | }, is_best,model,config, checkpoint=args.checkpoint,export_path = args.export) 217 | writer.add_scalar('Accuracy/test', test_acc, epoch+ 1) 218 | else : 219 | save_checkpoint({ 220 | 'epoch': epoch + 1, 221 | 'model': model.state_dict(), 222 | 'acc': test_acc, 223 | 'best_acc': best_acc, 224 | 'optimizer' : optimizer.state_dict(), 225 | 'conf' : model.yolo_losses[0].val_conf, 226 | }, False,model,config, checkpoint=args.checkpoint,export_path = args.export) 227 | 228 | nni.report_final_result(best_acc) 229 | def train(train_loader, model, optimizer,epoch,sampler,segmentation_enable): 230 | model.train() 231 | bar = IncrementalBar('Training', max=len(sampler),width=12) 232 | #batch_time = AverageMeter() 233 | #data_time = AverageMeter() 234 | losses = AverageMeter() 235 | recall = [AverageMeter(),AverageMeter()] 236 | iou = [AverageMeter(),AverageMeter()] 237 | obj = [AverageMeter(),AverageMeter()] 238 | no_obj = [AverageMeter(),AverageMeter()] 239 | conf_loss = [AverageMeter(),AverageMeter()] 240 | cls_loss = [AverageMeter(),AverageMeter()] 241 | cls_score = [AverageMeter(),AverageMeter()] 242 | count = [AverageMeter(),AverageMeter()] 243 | seg_obj = AverageMeter() 244 | seg_no_obj = AverageMeter() 245 | #end = time.time() 246 | for batch_idx, (images,targets,total_num,seg_maps) in enumerate(train_loader): 247 | #print('\n1-',sum(sampler.get_mosaic_array()),'\n') 248 | #print('1-',sampler.mosaic_array,'\n') 249 | #print(targets) 250 | #data_time.update(time.time() - end) 251 | bs = images.size(0) 252 | #print(images.shape) 253 | #print(i,targets[0]) 254 | optimizer.zero_grad() 255 | images = images.to(device) # (batch_size (N), 3, H, W) 256 | if segmentation_enable: 257 | seg_maps = seg_maps.to(device) # (batch_size (N), H, W, num seg class) 258 | outputs,seg_out = model(images,targets,seg_maps) 259 | else: 260 | outputs = model(images,targets,seg_maps) 261 | #losses0 = yolo_losses[0](outputs[0],targets) 262 | #losses1 = yolo_losses[1](outputs[1],targets) 263 | t_loss = list() 264 | 265 | for i,l in enumerate(outputs): 266 | #print(l[0]) 267 | t_loss.append(l[0]) 268 | recall[i].update(l[1]) 269 | iou[i].update(l[2]) 270 | obj[i].update(l[3]) 271 | no_obj[i].update(l[4]) 272 | cls_score[i].update(l[5]) 273 | count[i].update(l[6]) 274 | #conf_loss.update(l[5]) 275 | #cls_loss.update(l[6]) 276 | loss = sum(t_loss) 277 | if segmentation_enable: 278 | seg_obj.update(seg_out[1]) 279 | seg_no_obj.update(seg_out[2]) 280 | loss += seg_out[0] 281 | losses.update(loss.item(),bs) 282 | loss.backward() 283 | optimizer.step() 284 | # measure elapsed time 285 | #batch_time.update(time.time() - end) 286 | #end = time.time() 287 | if segmentation_enable: 288 | bar.suffix = \ 289 | '%(percent)3d%% | {total:} | {loss:.4f} | {cnt:2.1f} | {iou:.3f} | {obj:.3f} | {no_obj:.4f} | {cls:.3f} | {rec:.4f} | {seg_obj:.3f} | {seg_no_obj:.6f} |'\ 290 | .format( 291 | total=bar.elapsed_td, 292 | loss=losses.avg, 293 | cnt=(count[0].avg+count[1].avg), 294 | iou=(iou[0].avg+iou[1].avg)/2., 295 | obj=(obj[0].avg+obj[1].avg)/2., 296 | no_obj=(no_obj[0].avg+no_obj[1].avg)/2., 297 | cls=(cls_score[0].avg+cls_score[1].avg)/2., 298 | rec=(recall[0].avg+recall[1].avg)/2., 299 | seg_obj=seg_obj.avg, 300 | seg_no_obj = seg_no_obj.avg 301 | ) 302 | else: 303 | bar.suffix = \ 304 | '%(percent)3d%% | {total:} | {loss:.4f} | {cnt1:2.1f} | {iou1:.3f} | {obj1:.3f} | {no_obj1:.4f} | {cls1:.3f} | {rec1:.3f} | {cnt2:2.1f} | {iou2:.3f} | {obj2:.3f} | {no_obj2:.4f} | {cls2:.3f} | {rec2:.3f} |'\ 305 | .format( 306 | #batch=batch_idx + 1, 307 | #size=len(train_loader), 308 | #data=data_time.avg, 309 | #bt=batch_time.avg, 310 | total=bar.elapsed_td, 311 | loss=losses.avg, 312 | #loss1=losses[0].avg, 313 | #loss2=losses[1].avg, 314 | cnt1=(count[0].avg), 315 | cnt2=(count[1].avg), 316 | #recall=recall.avg, 317 | iou1=iou[0].avg, 318 | iou2=iou[1].avg, 319 | obj1=obj[0].avg, 320 | no_obj1=no_obj[0].avg, 321 | cls1=cls_score[0].avg, 322 | obj2=obj[1].avg, 323 | no_obj2=no_obj[1].avg, 324 | cls2=cls_score[1].avg, 325 | rec1=recall[0].avg, 326 | rec2=recall[1].avg, 327 | #cls=cls_loss.avg, 328 | ) 329 | bar.next(total_num) 330 | bar.finish() 331 | return losses.avg,(iou[0].avg+iou[1].avg)/2 332 | 333 | def test(test_loader, model, optimizer,epoch , config, classes_name,segmentation_enable): 334 | 335 | # switch to evaluate mode 336 | model.eval() 337 | n_classes = config['yolo']['classes']; 338 | 339 | end = time.time() 340 | #bar = Bar('Validating', max=len(test_loader)) 341 | bar = IncrementalBar('Validating', max=len(test_loader),width=32) 342 | #for batch_idx, (inputs, targets) in enumerate(testloader): 343 | n_gt = [0]*n_classes 344 | correct = [0]*n_classes 345 | n_pred = [0]*n_classes 346 | n_iou = [0]*n_classes 347 | n_images = 0 348 | det_boxes = list() 349 | det_labels = list() 350 | det_scores = list() 351 | true_boxes = list() 352 | true_labels = list() 353 | true_difficulties = list() 354 | gt_box = 0 355 | pred_box = 0 356 | 357 | for batch_idx, (images,targets) in enumerate(test_loader): 358 | images = images.to(device) # (batch_size (N), 3, H, W) 359 | labels = [torch.Tensor(l).to(device) for l in targets] 360 | bs = len(labels) 361 | # compute output 362 | with torch.no_grad(): 363 | if segmentation_enable: 364 | detections,_ = model(images) # (N, num_defaultBoxes, 4), (N, num_defaultBoxes, n_classes) 365 | else: 366 | detections = model(images) # (N, num_defaultBoxes, 4), (N, num_defaultBoxes, n_classes) 367 | for sample_i in range(bs): 368 | 369 | # Get labels for sample where width is not zero (dummies) 370 | # print(len(labels[0]),labels[sample_i]) 371 | target_sample = labels[sample_i] 372 | gt_box = gt_box + len(target_sample) 373 | tx1, tx2 = torch.unsqueeze((target_sample[...,1] - target_sample[...,3] / 2),1), torch.unsqueeze((target_sample[...,1] + target_sample[...,3] / 2),1) 374 | ty1, ty2 = torch.unsqueeze((target_sample[...,2] - target_sample[...,4] / 2),1), torch.unsqueeze((target_sample[...,2] + target_sample[...,4] / 2),1) 375 | box = torch.cat((tx1,ty1,tx2,ty2),1) 376 | size = target_sample.size(0) 377 | 378 | true_boxes.append(box) 379 | true_labels.append(target_sample[...,0]) 380 | true_difficulties.append(torch.zeros(size, requires_grad=False)) 381 | #print(detections[0][sample_i].shape,detections[1][sample_i].shape) 382 | preds = detections[sample_i] 383 | pred_box = pred_box + len(preds) 384 | if preds is not None: 385 | det_boxes.append(preds[...,:4]) 386 | det_labels.append((preds[...,6]+1).to(device)) 387 | conf = (preds[...,4] * preds[...,5]).to(device) 388 | det_scores.append(conf) 389 | else : 390 | empty = torch.empty(0).to(device) 391 | det_boxes.append(empty) 392 | det_labels.append(empty) 393 | det_scores.append(empty) 394 | 395 | n_images = n_images + 1 396 | 397 | 398 | # measure elapsed time 399 | sum_gt = sum(n_gt) 400 | sum_n_pred= sum(n_pred) 401 | # plot progress 402 | bar.suffix = '({batch}/{size}) | Total: {total:} | ETA: {eta:}| n_img: {n_img:} | gt_box: {gt_box:} | pred_box: {pred_box:}'.format( 403 | batch=batch_idx + 1, 404 | size=len(test_loader), 405 | 406 | total=bar.elapsed_td, 407 | eta=bar.eta_td, 408 | n_img=n_images, 409 | gt_box=gt_box, 410 | pred_box=pred_box 411 | ) 412 | bar.next() 413 | #if batch_idx == 50: 414 | # break 415 | bar.finish() 416 | print("\nVal conf. is %f\n" % (model.yolo_losses[0].val_conf)) 417 | model.yolo_losses[0].val_conf = adjust_confidence(gt_box,pred_box,model.yolo_losses[0].val_conf) 418 | model.yolo_losses[1].val_conf = adjust_confidence(gt_box,pred_box,model.yolo_losses[1].val_conf) 419 | 420 | # Calculate mAP 421 | APs, mAP, TP, FP = calculate_mAP(det_boxes, det_labels, det_scores, true_boxes, true_labels, true_difficulties, classes_name) 422 | pp.pprint(APs) 423 | print('\nMean Average Precision (mAP): %.3f' % mAP) 424 | return mAP 425 | def save_checkpoint(state, is_best,model,config, checkpoint='checkpoint', filename='checkpoint.pth.tar',export_path = 'checkpoint'): 426 | 427 | filepath = os.path.join(checkpoint, filename) 428 | torch.save(state, filepath) 429 | #save_onnx(filepath,model) 430 | if is_best: 431 | torch.save(model, os.path.join(checkpoint, 'model_best.pth.tar')) 432 | #dummy_input = torch.randn(1, 3, config["img_w"], config["img_h"]) # 433 | #torch.onnx.export(model, dummy_input,os.path.join(export_path, 'model_best.onnx')) 434 | def adjust_confidence(gt_box_num,pred_box_num,conf): 435 | if pred_box_num>gt_box_num*3 : 436 | conf = conf + 0.01 437 | elif pred_box_num0.01: 438 | conf = conf - 0.01 439 | 440 | return conf 441 | def adjust_learning_rate(optimizer, scale): 442 | """ 443 | Scale learning rate by a specified factor. 444 | 445 | :param optimizer: optimizer whose learning rate must be shrunk. 446 | :param scale: factor to multiply learning rate with. 447 | """ 448 | for param_group in optimizer.param_groups: 449 | param_group['lr'] = param_group['lr'] * scale 450 | print("Change learning rate.\n The new LR is %f\n" % (optimizer.param_groups[0]['lr'])) 451 | 452 | def get_params(): 453 | # Training settings 454 | parser = argparse.ArgumentParser(description='PyTorch Training') 455 | parser.add_argument('-y', '--data_yaml', dest='data_yaml', default='data/voc_data.yaml', type=str, metavar='PATH', 456 | help='path to data_yaml') 457 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 458 | help='momentum') 459 | parser.add_argument('--weight-decay', '--wd', default=0.0004, type=float, 460 | metavar='W', help='weight decay (default: 1e-4)') 461 | parser.add_argument('--learning_rate', default=0.0007, type=float, 462 | metavar='LR', help='initial learning rate') 463 | parser.add_argument('--warm-up', '--warmup', default=[], type=float, 464 | metavar='warmup', help='warm up learning rate') 465 | parser.add_argument('--epochs', default=300, type=int, metavar='N', 466 | help='number of total epochs to run') 467 | parser.add_argument('--schedule', type=int, nargs='+', default=[100,170,240], 468 | help='Decrease learning rate at these epochs.') 469 | parser.add_argument('--resume', default='', type=str, metavar='PATH', 470 | help='path to latest checkpoint (default: none)') 471 | parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH', 472 | help='path to save checkpoint (default: checkpoint)') 473 | #parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', 474 | # help='evaluate model on validation set') 475 | parser.add_argument('-o', '--export', dest='export', default='checkpoint', type=str, metavar='PATH', 476 | help='path to export checkpoint (default: checkpoint)') 477 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='Evaluate mAP? default=False') 478 | parser.add_argument('--mosaic_num', default=None, type=int, help='mosaic number in image augmentation') 479 | parser.add_argument('--ignore_thresh_1', default=None, type=float, help='ignore layer 1') 480 | parser.add_argument('--ignore_thresh_2', default=None, type=float, help='ignore layer 2') 481 | parser.add_argument('--iou_thresh', default=None, type=float, help='ignore iou thresh') 482 | parser.add_argument('--expand_scale', default=None, type=float, help='image augmentation expand scale') 483 | parser.add_argument('--iou_weighting', default=None, type=float, help='iou loss weighting') 484 | args = parser.parse_args() 485 | return args 486 | 487 | if __name__ == '__main__': 488 | try: 489 | # get parameters form tuner 490 | tuner_params = nni.get_next_parameter() 491 | #logger.debug(tuner_params) 492 | print(tuner_params) 493 | 494 | params = merge_parameter(get_params(), tuner_params) 495 | id = get_sequence_id() 496 | #params.checkpoint = 'checkpoints/%d' % id 497 | #print(params) 498 | 499 | main(params) 500 | except Exception as exception: 501 | #logger.exception(exception) 502 | raise 503 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Useful utils 2 | """ 3 | from .misc import * 4 | from .logger import * 5 | 6 | # progress bar 7 | import os, sys 8 | sys.path.append(os.path.join(os.path.dirname(__file__), "progress")) 9 | from progress.bar import Bar as Bar -------------------------------------------------------------------------------- /utils/box.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision 4 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 5 | 6 | def wh_to_x2y2(bbox): 7 | bbox[...,0] = bbox[...,0] - bbox[...,2]/2 8 | bbox[...,1] = bbox[...,1] - bbox[...,3]/2 9 | bbox[...,2] = bbox[...,2] + bbox[...,0] 10 | bbox[...,3] = bbox[...,3] + bbox[...,1] 11 | def nms(preds,num_classes) : 12 | nms_preds = list() 13 | assert len(preds) == 2 #only do two layers yolo 14 | assert len(preds[0]) == len(preds[1]) 15 | bs = len(preds[0]) 16 | for b in range(bs): 17 | pred_per_img = torch.cat((preds[0][b],preds[1][b]),0) 18 | pred_boxes = torch.zeros(0,7, requires_grad=False).to(device) 19 | if pred_per_img.size(0): 20 | for i in range(num_classes) : 21 | mask = (pred_per_img[...,6] == i) 22 | pred_this_cls = pred_per_img[mask] 23 | 24 | if pred_this_cls.size(0): 25 | #print(pred_this_cls.shape,pred_per_img.shape) 26 | boxes = pred_this_cls[...,:4] 27 | scores = pred_this_cls[...,5]*pred_this_cls[...,4] 28 | index = torchvision.ops.nms(boxes,scores,0.45) 29 | pred_boxes = torch.cat((pred_boxes,pred_this_cls[index]),0) 30 | nms_preds.append(pred_boxes) 31 | return nms_preds -------------------------------------------------------------------------------- /utils/eval_mAP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from utils.iou import * 3 | import torch.multiprocessing as mp 4 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 5 | from itertools import product 6 | import time 7 | 8 | def eval_single_image_recall(this_true_labels,this_det_labels,true_box,true_difficultie,det_box,det_score): 9 | #print(true_boxes[num].shape) 10 | n_easy_object = 0 11 | #this_true_labels = (true_label == c) 12 | #this_det_labels = (det_label == c) 13 | #print(this_true_labels) 14 | true_class_boxes = true_box[this_true_labels] 15 | 16 | true_class_difficulties = true_difficultie[this_true_labels] 17 | n_easy_object += (1 - true_class_difficulties).sum() # ignore difficult objects 18 | 19 | 20 | det_class_boxes = det_box[this_det_labels] # (n_class_detections, 4) 21 | det_class_scores = det_score[this_det_labels] # (n_class_detections) 22 | n_class_detections = det_class_boxes.size(0) 23 | 24 | true_positive = torch.zeros((n_class_detections), dtype=torch.float).to(device) # (n_class_detections) 25 | false_positive = torch.zeros((n_class_detections), dtype=torch.float).to(device) # (n_class_detections) 26 | if n_class_detections == 0: 27 | #sharedlist.append([true_positive,false_positive,n_easy_object,det_class_scores]) 28 | return (true_positive,false_positive,n_easy_object,det_class_scores) 29 | #print(true_positive,false_positive,n_easy_object) 30 | #return true_positive,false_positive,n_easy_object,det_class_scores 31 | true_class_boxes_detected = torch.zeros((true_class_difficulties.size(0)), dtype=torch.uint8).to(device) # (n_class_objects) 32 | for d in range(n_class_detections): 33 | this_detection_box = det_class_boxes[d].unsqueeze(0) # (1, 4) 34 | object_boxes = true_class_boxes 35 | 36 | object_difficulties = true_class_difficulties 37 | if object_boxes.size(0) == 0: 38 | false_positive[d] = 1 39 | continue 40 | # Find maximum overlap of this detection with objects in this image of this class 41 | overlaps = find_jaccard_overlap(this_detection_box, object_boxes) # (1, n_class_objects_in_img) 42 | max_overlap, ind = torch.max(overlaps.squeeze(0), dim=0) # (), () - scalars 43 | 44 | 45 | # 'ind' is the index of the object in these image-level tensors 'object_boxes', 'object_difficulties' 46 | # In the original class-level tensors 'true_class_boxes', etc., 'ind' corresponds to object with index... 47 | original_ind = torch.LongTensor(range(true_class_boxes.size(0)))[ind] 48 | # We need 'original_ind' to update 'true_class_boxes_detected' 49 | 50 | # If the maximum overlap is greater than the threshold of 0.5, it's a match 51 | if max_overlap.item() > 0.5: 52 | # If the object it matched with is 'difficult', ignore it 53 | if object_difficulties[ind] == 0: 54 | # If this object has already not been detected, it's a true positive 55 | if true_class_boxes_detected[original_ind] == 0: 56 | true_positive[d] = 1 57 | true_class_boxes_detected[original_ind] = 1 # this object has now been detected/accounted for 58 | # Otherwise, it's a false positive (since this object is already accounted for) 59 | else: 60 | false_positive[d] = 1 61 | # Otherwise, the detection occurs in a different location than the actual object, and is a false positive 62 | else: 63 | false_positive[d] = 1 64 | #sharedlist.append([true_positive,false_positive,n_easy_object,det_class_scores]) 65 | return (true_positive,false_positive,n_easy_object,det_class_scores) 66 | #print(true_positive,false_positive,n_easy_object) 67 | #return true_positive,false_positive,n_easy_object,det_class_scores 68 | 69 | def eval_class_ap(c,num_of_imgs,true_labels,det_labels,true_boxes,true_difficulties,det_boxes,det_scores): 70 | n_easy_class_objects = 0 71 | true_positives = torch.zeros(0, dtype=torch.float).to(device) # (n_class_detections) 72 | false_positives = torch.zeros(0, dtype=torch.float).to(device) # (n_class_detections) 73 | det_class_scores_all = torch.zeros(0, dtype=torch.float).to(device) # (n_class_detections) 74 | #ctx = mp.get_context('spawn') 75 | #pool = ctx.Pool(processes=4) 76 | #class_labels = [c] * num_of_imgs 77 | #manager = ctx.Manager() 78 | #sharedlist= manager.list() 79 | ''' 80 | data = list() 81 | for class_label,true_label,det_label,true_boxe,true_difficultie,det_boxe,det_score in zip(class_labels,true_labels,det_labels,true_boxes,true_difficulties,det_boxes,det_scores): 82 | data.append([c,class_label,true_label,det_label,true_boxe,true_difficultie,det_boxe,det_score]) 83 | results = pool.map(eval_single_image_recall,data) 84 | pool.close() 85 | pool.join() 86 | for result in results: 87 | true_positives = torch.cat((true_positives,result[0]),0) 88 | false_positives = torch.cat((false_positives,result[1]),0) 89 | n_easy_class_objects += result[2] 90 | det_class_scores_all = torch.cat((det_class_scores_all,result[3]),0) 91 | ''' 92 | 93 | for num in range(num_of_imgs): 94 | #print(true_boxes[num].shape) 95 | #eval_single_image_recall(sharedlist,c,true_labels[num],det_labels[num],true_boxes[num],true_difficulties[num],det_boxes[num],det_scores[num]) 96 | true_positive,false_positive,n_easy_object,det_class_scores = eval_single_image_recall((true_labels[num] == c) ,(det_labels[num] == c) ,true_boxes[num],true_difficulties[num],det_boxes[num],det_scores[num]) 97 | true_positives = torch.cat((true_positives,true_positive),0) 98 | false_positives = torch.cat((false_positives,false_positive),0) 99 | n_easy_class_objects += n_easy_object 100 | det_class_scores_all = torch.cat((det_class_scores_all,det_class_scores),0) 101 | ''' 102 | for idx,(true_positive,false_positive,n_easy_object,det_class_scores) in enumerate(sharedlist): 103 | true_positives = torch.cat((true_positives,true_positive),0) 104 | false_positives = torch.cat((false_positives,false_positive),0) 105 | n_easy_class_objects += n_easy_object 106 | det_class_scores_all = torch.cat((det_class_scores_all,det_class_scores),0) 107 | ''' 108 | # Compute cumulative precision and recall at each detection in the order of decreasing scores 109 | #print(true_positives.shape) 110 | det_class_scores_all, sort_ind = torch.sort(det_class_scores_all, dim=0, descending=True) # (n_class_detections) 111 | 112 | true_positives = true_positives[sort_ind] # (n_class_detections) 113 | false_positives = false_positives[sort_ind] # (n_class_detections, 4) 114 | n_sum_true_positive = torch.sum(true_positives) 115 | n_sum_false_positive = torch.sum(false_positives) 116 | cumul_true_positives = torch.cumsum(true_positives, dim=0) # (n_class_detections) 117 | cumul_false_positives = torch.cumsum(false_positives, dim=0) # (n_class_detections) 118 | cumul_precision = cumul_true_positives / ( 119 | cumul_true_positives + cumul_false_positives + 1e-10) # (n_class_detections) 120 | cumul_recall = cumul_true_positives / n_easy_class_objects # (n_class_detections) 121 | 122 | # Find the mean of the maximum of the precisions corresponding to recalls above the threshold 't' 123 | recall_thresholds = torch.arange(start=0, end=1.1, step=.1).tolist() # (11) 124 | precisions = torch.zeros((len(recall_thresholds)), dtype=torch.float).to(device) # (11) 125 | for i, t in enumerate(recall_thresholds): 126 | recalls_above_t = cumul_recall >= t 127 | if recalls_above_t.any(): 128 | precisions[i] = cumul_precision[recalls_above_t].max() 129 | else: 130 | precisions[i] = 0. 131 | 132 | return precisions.mean().item(),n_sum_true_positive,n_sum_false_positive 133 | 134 | def calculate_mAP(det_boxes, det_labels, det_scores, true_boxes, true_labels, true_difficulties,classes_name): 135 | start_time = time.process_time() 136 | n_classes = len(classes_name) 137 | #print(n_classes) 138 | classes_map = {k: v for v, k in enumerate(classes_name)} 139 | #classes_map['background'] = 0 140 | od_classes_map = {v: k for k, v in classes_map.items()} # Inverse mapping 141 | 142 | """ 143 | Calculate the Mean Average Precision (mAP) of detected objects. 144 | See https://medium.com/@jonathan_hui/map-mean-average-precision-for-object-detection-45c121a31173 for an explanation 145 | :param det_boxes: list of tensors, one tensor for each image containing detected objects' bounding boxes 146 | :param det_labels: list of tensors, one tensor for each image containing detected objects' labels 147 | :param det_scores: list of tensors, one tensor for each image containing detected objects' labels' scores 148 | :param true_boxes: list of tensors, one tensor for each image containing actual objects' bounding boxes 149 | :param true_labels: list of tensors, one tensor for each image containing actual objects' labels 150 | :param true_difficulties: list of tensors, one tensor for each image containing actual objects' difficulty (0 or 1) 151 | :return: list of average precisions for all classes, mean average precision (mAP) 152 | """ 153 | #print(len(det_boxes),len(det_labels),len(det_scores),len(true_boxes),len(true_labels),len(true_difficulties)) 154 | assert len(det_boxes) == len(det_labels) == len(det_scores) == len(true_boxes) == len( 155 | true_labels) == len( 156 | true_difficulties) # these are all lists of tensors of the same length, i.e. number of images 157 | num_of_imgs = len(det_boxes) 158 | # print(len(det_boxes), len(det_labels), len(det_scores), len(true_boxes), len(true_labels), len(true_difficulties)) 159 | 160 | # Store all (true) objects in a single continuous tensor while keeping track of the image it is from 161 | 162 | # Calculate APs for each class (except background) 163 | average_precisions = torch.zeros((n_classes - 1), dtype=torch.float) # (n_classes - 1) 164 | class_true_positive = torch.zeros((n_classes - 1), dtype=torch.float) # (n_classes - 1) 165 | class_false_positive = torch.zeros((n_classes - 1), dtype=torch.float) # (n_classes - 1) 166 | 167 | for c in range(1, n_classes): 168 | precision,n_sum_true_positive,n_sum_false_positive = eval_class_ap(c,num_of_imgs,true_labels,det_labels,true_boxes,true_difficulties,det_boxes,det_scores) 169 | 170 | average_precisions[c - 1] = precision 171 | class_true_positive[c - 1] = n_sum_true_positive 172 | class_false_positive[c - 1] = n_sum_false_positive 173 | 174 | #n_easy_class_objects = int(n_easy_class_objects) 175 | # Calculate Mean Average Precision (mAP) 176 | 177 | mean_average_precision = average_precisions.mean().item() 178 | 179 | # Keep class-wise average precisions in a dictionary 180 | average_precisions = {od_classes_map[c + 1]: v for c, v in enumerate(average_precisions.tolist())} 181 | class_true_positive = {od_classes_map[c + 1]: v for c, v in enumerate(class_true_positive.tolist())} 182 | class_false_positive = {od_classes_map[c + 1]: v for c, v in enumerate(class_false_positive.tolist())} 183 | print("The time used to execute this is given below") 184 | 185 | end_time = time.process_time() 186 | 187 | print(end_time - start_time ) 188 | return average_precisions, mean_average_precision, class_true_positive, class_false_positive -------------------------------------------------------------------------------- /utils/image_augmentation.py: -------------------------------------------------------------------------------- 1 | # Some augmentation functions below have been adapted from 2 | # From https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py 3 | import numpy as np 4 | import torch 5 | import random 6 | import torchvision.transforms.functional as FT 7 | from torchvision import transforms 8 | from PIL import Image, ImageDraw, ImageFont 9 | import cv2 10 | from utils.iou import* 11 | 12 | class Image_Augmentation(): 13 | 14 | def expand_od(self,image, boxes, filler,expand_scale, seg_id = None): 15 | """ 16 | Perform a zooming out operation by placing the image in a larger canvas of filler material. 17 | 18 | Helps to learn to detect smaller objects. 19 | 20 | :param image: image, a tensor of dimensions (3, original_h, original_w) 21 | :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4) 22 | :param filler: RBG values of the filler material, a list like [R, G, B] 23 | :return: expanded image, updated bounding box coordinates 24 | """ 25 | # Calculate dimensions of proposed expanded (zoomed-out) image 26 | original_h = image.size(1) 27 | original_w = image.size(2) 28 | max_scale = expand_scale 29 | scale = random.uniform(1, max_scale) 30 | new_h = int(scale * original_h) 31 | new_w = int(scale * original_w) 32 | 33 | # Create such an image with the filler 34 | filler = torch.FloatTensor(filler) # (3) 35 | new_image = torch.ones((3, new_h, new_w), dtype=torch.float) * filler.unsqueeze(1).unsqueeze(1) # (3, new_h, new_w) 36 | new_seg_id = torch.zeros((1, new_h, new_w), dtype=torch.float) 37 | # Note - do not use expand() like new_image = filler.unsqueeze(1).unsqueeze(1).expand(3, new_h, new_w) 38 | # because all expanded values will share the same memory, so changing one pixel will change all 39 | 40 | # Place the original image at random coordinates in this new image (origin at top-left of image) 41 | left = random.randint(0, new_w - original_w) 42 | right = left + original_w 43 | top = random.randint(0, new_h - original_h) 44 | bottom = top + original_h 45 | new_image[:, top:bottom, left:right] = image 46 | if seg_id!=None: 47 | new_seg_id[:, top:bottom, left:right] = seg_id 48 | #print('\n',image.shape) 49 | # Adjust bounding boxes' coordinates accordingly 50 | new_boxes = boxes + torch.FloatTensor([left, top, left, top]).unsqueeze(0) # (n_objects, 4), n_objects is the no. of objects in this image 51 | 52 | return new_image, new_boxes, new_seg_id 53 | 54 | def random_crop_od(self,image, boxes, labels, difficulties, seg_id=None): 55 | """ 56 | Performs a random crop in the manner stated in the paper. Helps to learn to detect larger and partial objects. 57 | 58 | Note that some objects may be cut out entirely. 59 | 60 | Adapted from https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py 61 | 62 | :param image: image, a tensor of dimensions (3, original_h, original_w) 63 | :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4) 64 | :param labels: labels of objects, a tensor of dimensions (n_objects) 65 | :param difficulties: difficulties of detection of these objects, a tensor of dimensions (n_objects) 66 | :return: cropped image, updated bounding box coordinates, updated labels, updated difficulties 67 | """ 68 | original_h = image.size(1) 69 | original_w = image.size(2) 70 | # Keep choosing a minimum overlap until a successful crop is made 71 | while True: 72 | # Randomly draw the value for minimum overlap 73 | min_overlap = random.choice([0., .1, .2, .3, .4, .5, None]) # 'None' refers to no cropping 74 | 75 | # If not cropping 76 | if min_overlap is None: 77 | return image, boxes, labels, difficulties, seg_id 78 | 79 | # Try up to 50 times for this choice of minimum overlap 80 | # This isn't mentioned in the paper, of course, but 50 is chosen in paper authors' original Caffe repo 81 | max_trials = 50 82 | for _ in range(max_trials): 83 | # Crop dimensions must be in [0.3, 1] of original dimensions 84 | # Note - it's [0.1, 1] in the paper, but actually [0.3, 1] in the authors' repo 85 | min_scale = 0.5 86 | scale_h = random.uniform(min_scale, 1) 87 | scale_w = random.uniform(min_scale, 1) 88 | new_h = int(scale_h * original_h) 89 | new_w = int(scale_w * original_w) 90 | 91 | # Aspect ratio has to be in [0.5, 2] 92 | aspect_ratio = new_h / new_w 93 | if not 0.5 < aspect_ratio < 2: 94 | continue 95 | 96 | # Crop coordinates (origin at top-left of image) 97 | left = random.randint(0, original_w - new_w) 98 | right = left + new_w 99 | top = random.randint(0, original_h - new_h) 100 | bottom = top + new_h 101 | crop = torch.FloatTensor([left, top, right, bottom]) # (4) 102 | if boxes.shape[0]>0: 103 | # Calculate Jaccard overlap between the crop and the bounding boxes 104 | overlap = find_jaccard_overlap(crop.unsqueeze(0),boxes) # (1, n_objects), n_objects is the no. of objects in this image 105 | overlap = overlap.squeeze(0) # (n_objects) 106 | 107 | # If not a single bounding box has a Jaccard overlap of greater than the minimum, try again 108 | 109 | if overlap.max().item() < min_overlap: 110 | continue 111 | 112 | # Crop image 113 | new_image = image[:, top:bottom, left:right] # (3, new_h, new_w) 114 | new_seg_id = None 115 | if seg_id!=None: 116 | new_seg_id = seg_id[:, top:bottom, left:right] # (3, new_h, new_w) 117 | if boxes.shape[0]>0: 118 | # Find centers of original bounding boxes 119 | bb_centers = (boxes[:, :2] + boxes[:, 2:]) / 2. # (n_objects, 2) 120 | 121 | # Find bounding boxes whose centers are in the crop 122 | centers_in_crop = (bb_centers[:, 0] > left) * (bb_centers[:, 0] < right) * (bb_centers[:, 1] > top) * ( 123 | bb_centers[:, 1] < bottom) # (n_objects), a Torch uInt8/Byte tensor, can be used as a boolean index 124 | 125 | # If not a single bounding box has its center in the crop, try again 126 | if not centers_in_crop.any(): 127 | continue 128 | 129 | # Discard bounding boxes that don't meet this criterion 130 | 131 | new_boxes = boxes[centers_in_crop, :] 132 | new_labels = labels[centers_in_crop] 133 | new_difficulties = difficulties[centers_in_crop] 134 | 135 | # Calculate bounding boxes' new coordinates in the crop 136 | new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2]) # crop[:2] is [left, top] 137 | new_boxes[:, :2] -= crop[:2] 138 | new_boxes[:, 2:] = torch.min(new_boxes[:, 2:], crop[2:]) # crop[2:] is [right, bottom] 139 | new_boxes[:, 2:] -= crop[:2] 140 | else : 141 | new_boxes = boxes 142 | new_labels = labels 143 | new_difficulties = difficulties 144 | 145 | return new_image, new_boxes, new_labels, new_difficulties, new_seg_id 146 | 147 | def flip_od(self,image, boxes, seg_id=None): 148 | """ 149 | Flip image horizontally. 150 | 151 | :param image: image, a PIL Image 152 | :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4) 153 | :return: flipped image, updated bounding box coordinates 154 | """ 155 | # Flip image 156 | new_image = FT.hflip(image) 157 | new_seg_id = None 158 | if seg_id!=None: 159 | new_seg_id = FT.hflip(seg_id) 160 | # Flip boxes 161 | new_boxes = boxes 162 | new_boxes[:, 0] = image.width - boxes[:, 0] - 1 163 | new_boxes[:, 2] = image.width - boxes[:, 2] - 1 164 | new_boxes = new_boxes[:, [2, 1, 0, 3]] 165 | 166 | return new_image, new_boxes, new_seg_id 167 | 168 | 169 | def photometric_distort(self,image): 170 | """ 171 | Distort brightness, contrast, saturation, and hue, each with a 50% chance, in random order. 172 | 173 | :param image: image, a PIL Image 174 | :return: distorted image 175 | """ 176 | new_image = image 177 | 178 | distortions = [FT.adjust_brightness, 179 | FT.adjust_contrast, 180 | FT.adjust_saturation, 181 | FT.adjust_hue, 182 | FT.adjust_gamma] 183 | 184 | random.shuffle(distortions) 185 | 186 | for d in distortions: 187 | if random.random() < 0.5: 188 | if d.__name__ is 'adjust_hue': 189 | # Caffe repo uses a 'hue_delta' of 18 - we divide by 255 because PyTorch needs a normalized value 190 | adjust_factor = random.uniform(-18 / 255., 18 / 255.) 191 | else: 192 | # Caffe repo uses 'lower' and 'upper' values of 0.5 and 1.5 for brightness, contrast, and saturation 193 | adjust_factor = random.uniform(0.5, 1.5) 194 | 195 | # Apply this distortion 196 | new_image = d(new_image, adjust_factor) 197 | 198 | return new_image 199 | def generate_mosaic_mask(self,num,size): 200 | mosaic_mask = [[0,0,size[0],size[1]]] 201 | x_center = int(random.uniform(.25,.75)*size[0]) 202 | y_center = int(random.uniform(.25,.75)*size[1]) 203 | if num == 2 : 204 | mosaic_mask1 = [[0,0,x_center,size[1]],[x_center,0,size[0],size[1]]] 205 | mosaic_mask2 = [[0,0,size[0],y_center],[0,y_center,size[0],size[1]]] 206 | mosaic_mask = random.choice([mosaic_mask1,mosaic_mask2]) 207 | elif num == 3 : 208 | mosaic_mask1 = [[0,0,size[0],y_center],[0,y_center,x_center,size[1]],[x_center,y_center,size[0],size[1]]] 209 | mosaic_mask2 = [[0,0,x_center,y_center],[x_center,0,size[0],y_center],[0,y_center,size[0],size[1]]] 210 | mosaic_mask3 = [[0,0,x_center,size[1]],[x_center,0,size[0],y_center],[x_center,y_center,size[0],size[1]]] 211 | mosaic_mask4 = [[0,0,x_center,y_center],[x_center,0,size[0],size[1]],[0,y_center,x_center,size[1]]] 212 | mosaic_mask = random.choice([mosaic_mask1,mosaic_mask2,mosaic_mask3,mosaic_mask4]) 213 | elif num == 4 : 214 | mosaic_mask = [[0,0,x_center,y_center],[x_center,0,size[0],y_center],[0,y_center,x_center,size[1]],[x_center,y_center,size[0],size[1]]] 215 | return mosaic_mask 216 | def Mosaic(self,source,size): 217 | #print(size) 218 | #print(len(source)) 219 | new_data = list() 220 | 221 | background = np.zeros((size[0],size[1],3)) 222 | #print(background.shape) 223 | counter = 0 224 | #x_center = int(random.uniform(.25,.75)*size[0]) 225 | #y_center = int(random.uniform(.25,.75)*size[1]) 226 | #mosaic_mask = [[0,0,x_center,y_center],[x_center,0,size[0],y_center],[0,y_center,x_center,size[1]],[x_center,y_center,size[0],size[1]]] 227 | num = len(source) 228 | mosaic_mask = self.generate_mosaic_mask(num,size) 229 | new_labels = torch.Tensor(0,5) 230 | for img,label,_ in source : 231 | 232 | width, height = (mosaic_mask[counter][2]-mosaic_mask[counter][0]),(mosaic_mask[counter][3]-mosaic_mask[counter][1]) 233 | aspect_ratio_src = img.height/img.width 234 | min_ratio,max_ratio = aspect_ratio_src*0.5 , aspect_ratio_src*2 235 | 236 | aspect_ratio_tar = height/width 237 | offset_x = 0 238 | offset_y = 0 239 | if aspect_ratio_tarmax_ratio : 245 | offset_y = random.randint(0, int(height-width*max_ratio)) 246 | height = int(width*max_ratio) 247 | 248 | new_img = img.resize((width,height)) 249 | new_img = np.array(new_img) 250 | #print(np.mean(new_img, axis=tuple(range(new_img.ndim-1)))) 251 | mean = np.mean(new_img, axis=tuple(range(new_img.ndim-1))) 252 | x1 = mosaic_mask[counter][0]+offset_x 253 | y1 = mosaic_mask[counter][1]+offset_y 254 | x2 = min(mosaic_mask[counter][2],x1+width) 255 | y2 = min(mosaic_mask[counter][3],y1+height) 256 | 257 | #print(offset_x,offset_y,x1,y1,x2,y2,width,height) 258 | background[mosaic_mask[counter][1]:mosaic_mask[counter][3],mosaic_mask[counter][0]:mosaic_mask[counter][2]] = mean 259 | background[y1:y2,x1:x2] = new_img 260 | #new_label = list() 261 | if label.size(0): 262 | new_box = label[...,1:5] 263 | #print(width,height) 264 | w_scale = (size[0]/width) 265 | h_scale = (size[1]/height) 266 | new_box[...,0],new_box[...,2] = new_box[...,0]/w_scale,new_box[...,2]/w_scale 267 | new_box[...,1],new_box[...,3] = new_box[...,1]/h_scale,new_box[...,3]/h_scale 268 | #print(new_box.shape,x1,y1) 269 | new_box[...,0] = new_box[...,0] + (mosaic_mask[counter][0]+offset_x)/size[0] 270 | new_box[...,1] = new_box[...,1] + (mosaic_mask[counter][1]+offset_y)/size[1] 271 | new_label = torch.cat((label[...,0].unsqueeze(1),new_box),1) 272 | #print(new_label.shape,new_labels.shape) 273 | new_labels = torch.cat((new_labels,new_label)) 274 | counter = counter + 1 275 | 276 | new_img = Image.fromarray(background.astype(np.uint8)) 277 | new_data = [new_img,new_labels] 278 | return new_data 279 | def transform_od(self,image, boxes, labels, difficulties,seg_id = None, mean = [0.485, 0.456, 0.406],std = [0.229, 0.224, 0.225],phase = 'train',expand = True,expand_scale = 1.5): 280 | """ 281 | Apply the transformations above. 282 | 283 | :param image: image, a PIL Image 284 | :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4) 285 | :param labels: labels of objects, a tensor of dimensions (n_objects) 286 | :param difficulties: difficulties of detection of these objects, a tensor of dimensions (n_objects) 287 | :param split: one of 'TRAIN' or 'TEST', since different sets of transformations are applied 288 | :param dims: (H, W) 289 | :return: transformed image, transformed bounding box coordinates, transformed labels, transformed difficulties 290 | """ 291 | assert phase in {'train', 'test'} 292 | 293 | # Mean and standard deviation of ImageNet data that our base VGG from torchvision was trained on 294 | # see: https://pytorch.org/docs/stable/torchvision/models.html 295 | # mean = [0.485, 0.456, 0.406] 296 | # std = [0.229, 0.224, 0.225] 297 | 298 | new_image = image 299 | new_boxes = boxes 300 | new_labels = labels 301 | new_seg_id = seg_id 302 | new_difficulties = difficulties 303 | 304 | # Skip the following operations if validation/evaluation 305 | if phase == 'train': 306 | # A series of photometric distortions in random order, each with 50% chance of occurrence, as in Caffe repo 307 | new_image = self.photometric_distort(new_image) 308 | 309 | # Convert PIL image to Torch tensor 310 | #print(new_image) 311 | new_image = FT.to_tensor(new_image) 312 | if new_seg_id!=None: 313 | new_seg_id = FT.to_tensor(new_seg_id) 314 | # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects 315 | # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on 316 | #print(new_seg_id) 317 | if random.random() < 0.5 and expand==True: 318 | new_image, new_boxes, new_seg_id = self.expand_od(new_image, boxes, filler=mean,expand_scale=expand_scale,seg_id = new_seg_id) 319 | #print(new_seg_id) 320 | # Randomly crop image (zoom in) 321 | 322 | new_image, new_boxes, new_labels, new_difficulties, new_seg_id = self.random_crop_od(new_image, new_boxes, new_labels,new_difficulties, new_seg_id) 323 | 324 | # Convert Torch tensor to PIL image 325 | new_image = FT.to_pil_image(new_image) 326 | if new_seg_id!=None: 327 | new_seg_id = FT.to_pil_image(new_seg_id) 328 | # Flip image with a 50% chance 329 | if random.random() < 0.5: 330 | new_image, new_boxes, new_seg_id = self.flip_od(new_image, new_boxes, new_seg_id) 331 | 332 | #new_image, new_boxes, new_labels = self.mosaic_mix(new_image,new_boxes,new_labels) 333 | 334 | return new_image, new_boxes, new_labels, new_difficulties, new_seg_id -------------------------------------------------------------------------------- /utils/iou.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def find_intersection(set_1, set_2): 5 | 6 | #print(set_1[:, :2].unsqueeze(1).shape, set_2[:, :2].unsqueeze(0).shape) 7 | # PyTorch auto-broadcasts singleton dimensions 8 | lower_bounds = torch.max(set_1[:, :2].unsqueeze(1), set_2[:, :2].unsqueeze(0)) # (n1, n2, 2) 9 | upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1), set_2[:, 2:].unsqueeze(0)) # (n1, n2, 2) 10 | #print(upper_bounds.shape, lower_bounds.shape) 11 | intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0) # (n1, n2, 2) 12 | 13 | return intersection_dims[:, :, 0] * intersection_dims[:, :, 1] # (n1, n2) 14 | def find_union(set_1, set_2): 15 | 16 | #print(set_1.shape, set_2.shape) 17 | # Find intersections 18 | intersection = find_intersection(set_1, set_2) # (n1, n2) 19 | 20 | # Find areas of each box in both sets 21 | areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1]) # (n1) 22 | areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1]) # (n2) 23 | 24 | # Find the union 25 | # PyTorch auto-broadcasts singleton dimensions 26 | union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection # (n1, n2) 27 | 28 | # #box iou 29 | # output = intersection/ areas_set_2 30 | 31 | return union # (n1, n2) 32 | def find_jaccard_overlap(set_1, set_2): 33 | 34 | #print(set_1.shape, set_2.shape) 35 | # Find intersections 36 | intersection = find_intersection(set_1, set_2) # (n1, n2) 37 | 38 | # Find areas of each box in both sets 39 | areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1]) # (n1) 40 | areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1]) # (n2) 41 | 42 | # Find the union 43 | # PyTorch auto-broadcasts singleton dimensions 44 | union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection # (n1, n2) 45 | 46 | # #box iou 47 | # output = intersection/ areas_set_2 48 | 49 | return intersection / union # (n1, n2) 50 | 51 | -------------------------------------------------------------------------------- /utils/logger.py: -------------------------------------------------------------------------------- 1 | # A simple torch style logger 2 | # (C) Wei YANG 2017 3 | from __future__ import absolute_import 4 | import matplotlib 5 | import os 6 | if os.name == 'posix' and "DISPLAY" not in os.environ: 7 | matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab! 8 | import matplotlib.pyplot as plt 9 | import os 10 | import sys 11 | import numpy as np 12 | import numbers 13 | 14 | __all__ = ['Logger', 'LoggerMonitor', 'savefig'] 15 | 16 | def savefig(fname, dpi=None): 17 | dpi = 150 if dpi == None else dpi 18 | plt.savefig(fname, dpi=dpi) 19 | 20 | def plot_overlap(logger, names=None): 21 | names = logger.names if names == None else names 22 | nums_d = logger.nums_d 23 | for _, name in enumerate(names): 24 | x = np.arange(len(nums_d[name])) 25 | plt.plot(x, np.asarray(nums_d[name])) 26 | return [logger.title + '(' + name + ')' for name in names] 27 | 28 | class Logger(object): 29 | '''Save training process to log file with simple plot function.''' 30 | def __init__(self, fpath, title=None, resume=False): 31 | self.file = None 32 | self.resume = resume 33 | self.title = '' if title == None else title 34 | if fpath is not None: 35 | if resume: 36 | self.file = open(fpath, 'r') 37 | name = self.file.readline() 38 | self.names = name.rstrip().split('\t') 39 | self.nums_d = {} 40 | for _, name in enumerate(self.names): 41 | self.nums_d[name] = [] 42 | 43 | for nums_d in self.file: 44 | nums_d = nums_d.rstrip().split('\t') 45 | for i in range(0, len(nums_d)): 46 | self.nums_d[self.names[i]].append(nums_d[i]) 47 | self.file.close() 48 | self.file = open(fpath, 'a') 49 | else: 50 | self.file = open(fpath, 'w') 51 | 52 | def set_names(self, names): 53 | if self.resume: 54 | pass 55 | # initialize nums_d as empty list 56 | self.nums_d = {} 57 | self.names = names 58 | for _, name in enumerate(self.names): 59 | self.file.write(name) 60 | self.file.write('\t') 61 | self.nums_d[name] = [] 62 | self.file.write('\n') 63 | self.file.flush() 64 | 65 | 66 | def append(self, nums_d): 67 | assert len(self.names) == len(nums_d), 'nums_d do not match names' 68 | for index, num in enumerate(nums_d): 69 | if isinstance(num, numbers.Number): 70 | self.file.write("{0:.6f}".format(num)) 71 | self.nums_d[self.names[index]].append(num) 72 | else: 73 | self.file.write(str(num)) 74 | self.file.write('\t') 75 | self.file.write('\n') 76 | self.file.flush() 77 | 78 | def plot(self, names=None): 79 | names = self.names if names == None else names 80 | nums_d = self.nums_d 81 | for _, name in enumerate(names): 82 | if len(nums_d[name]) > 0: 83 | x = np.arange(len(nums_d[name])) 84 | plt.plot(x, np.asarray(nums_d[name])) 85 | plt.legend([self.title + '(' + name + ')' for name in names]) 86 | plt.grid(True) 87 | 88 | def close(self): 89 | if self.file is not None: 90 | self.file.close() 91 | 92 | class LoggerMonitor(object): 93 | '''Load and visualize multiple logs.''' 94 | def __init__ (self, paths): 95 | '''paths is a distionary with {name:filepath} pair''' 96 | self.loggers = [] 97 | for title, path in paths.items(): 98 | logger = Logger(path, title=title, resume=True) 99 | self.loggers.append(logger) 100 | 101 | def plot(self, names=None): 102 | plt.figure() 103 | plt.subplot(121) 104 | legend_text = [] 105 | for logger in self.loggers: 106 | legend_text += plot_overlap(logger, names) 107 | plt.legend(legend_text, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) 108 | plt.grid(True) 109 | 110 | if __name__ == '__main__': 111 | # # Example 112 | # logger = Logger('test.txt') 113 | # logger.set_names(['Train loss', 'Valid loss','Test loss']) 114 | 115 | # length = 100 116 | # t = np.arange(length) 117 | # train_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 118 | # valid_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 119 | # test_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1 120 | 121 | # for i in range(0, length): 122 | # logger.append([train_loss[i], valid_loss[i], test_loss[i]]) 123 | # logger.plot() 124 | 125 | # Example: logger monitor 126 | paths = { 127 | 'resadvnet20':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet20/log.txt', 128 | 'resadvnet32':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet32/log.txt', 129 | 'resadvnet44':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet44/log.txt', 130 | } 131 | 132 | field = ['Valid Acc.'] 133 | 134 | monitor = LoggerMonitor(paths) 135 | monitor.plot(names=field) 136 | savefig('test.eps') -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | '''Some helper functions for PyTorch, including: 2 | - get_mean_and_std: calculate the mean and std value of dataset. 3 | - msr_init: net parameter initialization. 4 | - progress_bar: progress bar mimic xlua.progress. 5 | ''' 6 | import errno 7 | import os 8 | import sys 9 | import time 10 | import math 11 | 12 | import torch.nn as nn 13 | import torch.nn.init as init 14 | from torch.autograd import Variable 15 | 16 | __all__ = ['get_mean_and_std', 'init_params', 'mkdir_p', 'AverageMeter'] 17 | 18 | 19 | def get_mean_and_std(dataset): 20 | '''Compute the mean and std value of dataset.''' 21 | dataloader = trainloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2) 22 | 23 | mean = torch.zeros(3) 24 | std = torch.zeros(3) 25 | print('==> Computing mean and std..') 26 | for inputs, targets in dataloader: 27 | for i in range(3): 28 | mean[i] += inputs[:,i,:,:].mean() 29 | std[i] += inputs[:,i,:,:].std() 30 | mean.div_(len(dataset)) 31 | std.div_(len(dataset)) 32 | return mean, std 33 | 34 | def init_params(net): 35 | '''Init layer parameters.''' 36 | for m in net.modules(): 37 | if isinstance(m, nn.Conv2d): 38 | init.kaiming_normal(m.weight, mode='fan_out') 39 | if m.bias: 40 | init.constant(m.bias, 0) 41 | elif isinstance(m, nn.BatchNorm2d): 42 | init.constant(m.weight, 1) 43 | init.constant(m.bias, 0) 44 | elif isinstance(m, nn.Linear): 45 | init.normal(m.weight, std=1e-3) 46 | if m.bias: 47 | init.constant(m.bias, 0) 48 | 49 | def mkdir_p(path): 50 | '''make dir if not exist''' 51 | try: 52 | os.makedirs(path) 53 | except OSError as exc: # Python >2.5 54 | if exc.errno == errno.EEXIST and os.path.isdir(path): 55 | pass 56 | else: 57 | raise 58 | 59 | class AverageMeter(object): 60 | """Computes and stores the average and current value 61 | Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262 62 | """ 63 | def __init__(self): 64 | self.reset() 65 | 66 | def reset(self): 67 | self.val = 0 68 | self.avg = 0 69 | self.sum = 0 70 | self.count = 0 71 | 72 | def update(self, val, n=1): 73 | self.val = val 74 | self.sum += val * n 75 | self.count += n 76 | self.avg = self.sum / self.count --------------------------------------------------------------------------------