├── CustomBatchSampler.py
├── LICENSE
├── README.md
├── config.yml
├── data
    ├── bdd100k.yaml
    ├── od_dataset_from_file.py
    └── voc_data.yaml
├── docker
    └── Dockerfile
├── folder2lmdb.py
├── images
    ├── 000166.jpg
    ├── 001852.jpg
    ├── 002597.jpg
    ├── 004030.jpg
    ├── 00690c26-e4bbbd72.jpg
    └── show.gif
├── inference.py
├── models
    ├── __init__.py
    ├── bdd100k
    │   └── config.yaml
    ├── mbv2_yolo.py
    ├── mbv3_yolo.py
    ├── mbv3_yolo_macc.py
    ├── mobilenetv2.py
    ├── mobilenetv3.py
    ├── seg_loss.py
    ├── voc
    │   └── config.yaml
    └── yolo_loss.py
├── requirements.txt
├── save
    └── 00690c26-e4bbbd72_result.jpg
├── scripts
    ├── VOC2007.sh
    ├── VOC2012.sh
    ├── create.sh
    ├── inference.sh
    └── train.sh
├── search_space.json
├── train.py
└── utils
    ├── __init__.py
    ├── box.py
    ├── eval_mAP.py
    ├── image_augmentation.py
    ├── iou.py
    ├── logger.py
    └── misc.py


/CustomBatchSampler.py:
--------------------------------------------------------------------------------
 1 |    
 2 | import os
 3 | import sys
 4 | 
 5 | from torch.utils.data.sampler import Sampler
 6 | from typing import Iterator, Optional, Sequence, List, TypeVar, Generic, Sized
 7 | import random
 8 |   
 9 | class GreedyBatchSampler(Sampler[List[int]]):
10 |     r"""Wraps another sampler to yield a mini-batch of indices.
11 |     Args:
12 |         sampler (Sampler or Iterable): Base sampler. Can be any iterable object
13 |         batch_size (int): Size of mini-batch.
14 |         drop_last (bool): If ``True``, the sampler will drop the last batch if
15 |             its size would be less than ``batch_size``
16 |     Example:
17 |         >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=False))
18 |         [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
19 |         >>> list(BatchSampler(SequentialSampler(range(10)), batch_size=3, drop_last=True))
20 |         [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
21 |     """
22 | 
23 |     def __init__(self, sampler: Sampler[int], batch_size: int, drop_last: bool,sample:list) -> None:
24 |         # Since collections.abc.Iterable does not check for `__getitem__`, which
25 |         # is one way for an object to be an iterable, we don't do an `isinstance`
26 |         # check here.
27 |         if not isinstance(batch_size, int) or isinstance(batch_size, bool) or \
28 |                 batch_size <= 0:
29 |             raise ValueError("batch_size should be a positive integer value, "
30 |                              "but got batch_size={}".format(batch_size))
31 |         if not isinstance(drop_last, bool):
32 |             raise ValueError("drop_last should be a boolean value, but got "
33 |                              "drop_last={}".format(drop_last))
34 |         self.sampler = sampler
35 |         self.batch_size = batch_size
36 |         self.drop_last = drop_last
37 |         self.sample = sample
38 |         #print('self.drop_last',self.drop_last)
39 |         #self.mosaic_array = list()
40 | 
41 |     '''
42 |     def generate_mosaic_array(self):
43 |         mosaic_array = []
44 |         for i in range(self.batch_size):
45 |             mosaic_array.append(random.choice([1,2,4]))
46 |         return sum(self.mosaic_array)
47 |     '''
48 |     def get_random(self,sample):
49 |         if random.random() < 0.5:
50 |             num = random.choice(sample)
51 |         else:
52 |             num = 1
53 |         return num
54 |     def __iter__(self):
55 |         batch = []
56 |         sample = [1,4]
57 |         num = self.get_random(self.sample)
58 |         
59 |         buckets = []
60 |         for idx in self.sampler:
61 |             buckets.append(idx)
62 |             if len(buckets) == num :
63 |                 batch.append(buckets)
64 |                 num = self.get_random(self.sample)
65 |                 buckets = []
66 |             if len(batch) == self.batch_size:
67 |                 yield batch
68 |                 #r,batch_size = self.get_random()
69 |                 #print('\n0-',batch_size)
70 |                 batch = []
71 |         
72 |         if len(batch) > 0 and not self.drop_last:
73 |             yield batch
74 |     #def get_mosaic_array(self) :
75 |     #    return self.mosaic_array.pop(0)
76 |     def __len__(self):
77 |         # Can only be called if self.sampler has __len__ implemented
78 |         # We cannot enforce this condition, so we turn off typechecking for the
79 |         # implementation below.
80 |         # Somewhat related: see NOTE [ Lack of Default `__len__` in Python Abstract Base Classes ]
81 |         return len(self.sampler)
82 |         #if self.drop_last:
83 |         #    return len(self.sampler) // self.batch_size  # type: ignore
84 |         #else:
85 |         #    return (len(self.sampler) + self.batch_size - 1) // self.batch_size  # type: ignore
86 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Eric Liu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mobilenet-YOLO-Pytorch
 2 | 
 3 | ![result](/save/00690c26-e4bbbd72_result.jpg)
 4 | 
 5 | ## Model
 6 | 
 7 | A pytorch implementation of MobileNet-YOLO detection network , train on 07+12 , test on VOC2007 (imagenet pretrained , not coco)
 8 | 
 9 | Network|mAP|Resolution|download|
10 | :---:|:---:|:---:|:---:|
11 | MobileNetV2|72.1|352|[checkpoint](https://drive.google.com/drive/folders/11iNLZA5sOZP2tiTQB6pz6TAA2u5xyYCa?usp=sharing)|
12 | 
13 | 
14 | ## Training steps
15 | 
16 | 1. Download dataset VOCdevkit/ , if already have , please skip this step
17 | ```
18 | sh scripts/VOC2007.sh
19 | sh scripts/VOC2012.sh
20 | ``` 
21 | 2. Create lmdb
22 | ```
23 |  sh scripts/create.sh 
24 | ``` 
25 | 3. Start training
26 | ```
27 | sh scripts/train.sh 
28 | ```  
29 | ## yolov3 training 
30 | 
31 | see [branch](https://github.com/eric612/Mobilenet-YOLO-Pytorch/tree/yolov3)
32 | 
33 | ## Hyper parameter optimization 
34 | 
35 | ```
36 | nnictl create --config config.yml
37 | ```
38 | 
39 | ## Demo
40 | 
41 | Download  [checkpoint](https://drive.google.com/file/d/1eNIHaZGQHyb6WfOUmBuBU3K5urKFoL27/view?usp=sharing), and save at $Mobilenet-YOLO-Pytorch/checkpoints/bdd100k/model_best.pth.tar
42 | 
43 | ```
44 | sh scripts/inference.sh 
45 | ``` 
46 | 
47 | ## Under construction
48 | 
49 | - [ ] A new detector
50 | - [x] yolov4
51 | - [x] Multi-Task 
52 | - [x] Hyper Parameter Tuning
53 | - [ ] Pruning 
54 | - [x] Porting KL720
55 | 
56 | ## Acknowledgements
57 | 
58 | [AlexeyAB](https://github.com/AlexeyAB/darknet)
59 | 
60 | [diggerdu](https://github.com/diggerdu/Generalized-Intersection-over-Union)
61 | 
62 | [BobLiu20](https://github.com/BobLiu20/YOLOv3_PyTorch)
63 | 
64 | [bubbliiiing](https://github.com/bubbliiiing/yolov4-tiny-pytorch)
65 | 
66 | [aleju](https://github.com/aleju/imgaug)
67 | 
68 | [rmccorm4](https://github.com/rmccorm4/PyTorch-LMDB)
69 | 
70 | [hysts](https://github.com/hysts/pytorch_image_classification)
71 | 
72 | [utkuozbulak](https://github.com/utkuozbulak/pytorch-custom-dataset-examples)
73 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
 1 | authorName: default
 2 | experimentName: example_yolo_pytorch
 3 | trialConcurrency: 1
 4 | maxExecDuration: 30d
 5 | maxTrialNum: 20
 6 | #choice: local, remote, pai
 7 | trainingServicePlatform: local
 8 | searchSpacePath: search_space.json
 9 | #choice: true, false
10 | useAnnotation: false
11 | tuner:
12 |   #choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner, GPTuner
13 |   #SMAC (SMAC should be installed through nnictl)
14 |   builtinTunerName: TPE
15 |   classArgs:
16 |     #choice: maximize, minimize
17 |     optimize_mode: maximize
18 | 
19 | trial:
20 |   command: python train.py
21 |   codeDir: .
22 |   gpuNum: 1
23 | localConfig:
24 |   useActiveGpu: true  
25 | 


--------------------------------------------------------------------------------
/data/bdd100k.yaml:
--------------------------------------------------------------------------------
 1 | classes:
 2 |   map:
 3 |   - person
 4 |   - rider
 5 |   - car
 6 |   - bus
 7 |   - truck
 8 |   - bike
 9 |   - motor
10 |   original:
11 |   - person
12 |   - rider
13 |   - car
14 |   - bus
15 |   - truck
16 |   - bike
17 |   - motor
18 |   - traffic light
19 |   - traffic sign
20 |   - train
21 | extention_names:
22 |   annotation:
23 |   - json
24 |   image:
25 |   - jpg
26 |   segmentation:
27 |   - png
28 | model_config_path: models/bdd100k/config.yaml
29 | segmentation_anno_keywords: id
30 | segmentation_enable: true
31 | segmentation_num_classes: 2
32 | test_dataset_path:
33 |   annos:
34 |   - /media/eric/Data/bdd100k/annotations/val
35 |   imgs:
36 |   - /media/eric/Data/bdd100k/images/val
37 |   lists:
38 |   - /media/eric/Data/bdd100k/ImageSets/val.txt
39 |   lmdb: bdd100k-test-lmdb
40 |   name: bdd100k_test
41 |   segs:
42 |   - /media/eric/Data/bdd100k/drivable_maps/labels/val/
43 | trainval_dataset_path:
44 |   annos:
45 |   - /media/eric/Data/bdd100k/annotations/train
46 |   imgs:
47 |   - /media/eric/Data/bdd100k/images/train
48 |   lists:
49 |   - /media/eric/Data/bdd100k/ImageSets/train.txt
50 |   lmdb: bdd100k-train-lmdb
51 |   name: bdd100k_train
52 |   segs:
53 |   - /media/eric/Data/bdd100k/drivable_maps/labels/train/
54 | 


--------------------------------------------------------------------------------
/data/od_dataset_from_file.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from PIL import Image
  3 | import glob
  4 | import os
  5 | import torch
  6 | from torch.utils.data.dataset import Dataset  # For custom datasets
  7 | import json
  8 | 
  9 | from tqdm import tqdm
 10 | import pickle
 11 | import xml.etree.ElementTree as ET
 12 | #import image_augmentation as img_aug
 13 | import cv2
 14 | 
 15 | '''    
 16 | CLASSES = ('__background__',
 17 |            'aeroplane', 'bicycle', 'bird', 'boat',
 18 |            'bottle', 'bus', 'car', 'cat', 'chair',
 19 |            'cow', 'diningtable', 'dog', 'horse',
 20 |            'motorbike', 'person', 'pottedplant',
 21 |            'sheep', 'sofa', 'train', 'tvmonitor')
 22 | '''
 23 | 
 24 | #classes_map['background'] = 0
 25 | 
 26 | class DatasetFromFile(Dataset):
 27 |     def __init__(self, image_path,anno_path,seg_path,imageset_list,classes,dataset_name,phase='train',has_seg = False,difficultie = True,ext_img = ['jpg','bmp'],ext_anno = ['xml','json'],ext_seg=['png'],ori_classes_name=None):
 28 |         
 29 |         # Get image list
 30 |         #self.img_folder_list = glob.glob(folder_path+'*')
 31 |         
 32 |         self.item_list = list()
 33 |         self.phase = phase
 34 |         self.difficultie = difficultie
 35 |         self.classes = classes
 36 |         self.classes_map = {k: v for v, k in enumerate(classes)}
 37 |         self.ext_img = ext_img
 38 |         self.ext_anno = ext_anno
 39 |         self.has_seg = has_seg		
 40 |         self.ext_seg = ext_seg
 41 |         self.seg_path = seg_path
 42 |         im_list = list()
 43 |         if ori_classes_name!=None:
 44 |             self.ori_classes_name = ori_classes_name
 45 |         else:
 46 |             self.ori_classes_name = classes
 47 |         #print(type(image_path))
 48 |         self.list_name = 'data/%s.txt'%dataset_name
 49 |         
 50 |         if os.path.isfile(self.list_name):
 51 |             print(self.list_name)
 52 |             with open(self.list_name, "rb") as fp:   # Unpickling
 53 |                 self.item_list = pickle.load(fp)
 54 |         else:            
 55 | 
 56 |             if type(imageset_list) is str and type(image_path) is str and type(anno_path) is str:
 57 |                 with open(imageset_list,'r') as f:
 58 |                     for line in f:
 59 |                         for word in line.split():
 60 |                            im_list.append(word)
 61 |                 if self.has_seg:
 62 |                     self.parse_list(image_path,anno_path,im_list,seg_path)
 63 |                 else:
 64 |                     self.parse_list(image_path,anno_path,im_list)
 65 |             elif type(imageset_list) is list :
 66 |                 assert len(imageset_list) == len(image_path) == len(anno_path)
 67 |                 for idx in range(len(imageset_list)) :
 68 |                     set = imageset_list[idx]
 69 |                     im_list.clear()
 70 |                     with open(set,'r') as f:
 71 |                         for line in f:
 72 |                             for word in line.split():
 73 |                                im_list.append(word)
 74 |                     if self.has_seg:
 75 |                         self.parse_list(image_path[idx],anno_path[idx],im_list,seg_path[idx])
 76 |                     else:
 77 |                         self.parse_list(image_path[idx],anno_path[idx],im_list)
 78 |                                 
 79 |             with open(self.list_name, "wb") as fp:   #Pickling
 80 |                 pickle.dump(self.item_list, fp)
 81 |         self.data_len = len(self.item_list)
 82 |         print('total files of %s : %d'%(dataset_name,self.data_len))
 83 |         #print(self.item_list)
 84 |     def __getitem__(self, index):
 85 |         # Get image name from the pandas df
 86 |         if self.has_seg :
 87 |             single_image_path, single_anno_path, single_seg_path = self.item_list[index]
 88 |         else:
 89 |             single_image_path, single_anno_path = self.item_list[index]
 90 |         # Open image
 91 |         im = cv2.imread(single_image_path)
 92 |         boxes, labels, difficulties = self.parse_annotation(single_anno_path)
 93 |         yolo_labels = list()
 94 |         height, width, channels = im.shape
 95 |         im = cv2.imencode('.jpg', im,[int(cv2.IMWRITE_JPEG_QUALITY), 98])
 96 |         yolo_labels = self.to_yolo_label(boxes,labels,difficulties,width,height)
 97 |         if self.has_seg :
 98 |             im2 = cv2.imread(single_seg_path)      
 99 |             im2 = cv2.imencode('.png', im2,[int(cv2.IMWRITE_PNG_COMPRESSION),1])
100 |             return (im, yolo_labels, im2)
101 |         else :            
102 |             return (im, yolo_labels)
103 | 
104 |     def __len__(self):
105 |         return self.data_len
106 |     def to_yolo_label(self,boxes,labels,difficulties,width = 0,height = 0):
107 |         yolo_labels = list()
108 |         float = width == 0 and height == 0
109 |         
110 |         for index,box in enumerate(boxes):            
111 |             if self.difficultie or not difficulties[index]:
112 |                 #print(box)
113 |                 yolo_label = list()
114 |                 yolo_label.clear()
115 |                 #print(box,labels[index])
116 |                 x = (box[0] + box[2])/2 
117 |                 y = (box[1] + box[3])/2 
118 |                 w = box[2] - box[0]
119 |                 h = box[3] - box[1] 
120 |                 if not float :
121 |                     x = x / width
122 |                     y = y / height
123 |                     w = w / width
124 |                     h = h / height
125 |                 yolo_label.append(labels[index])
126 |                 yolo_label.append(x)
127 |                 yolo_label.append(y)
128 |                 yolo_label.append(w)
129 |                 yolo_label.append(h)
130 |                 yolo_labels.append(yolo_label)
131 |         return yolo_labels
132 | 
133 |     def parse_list(self,image_path,anno_path,im_list,seg_path=None):    
134 |         image_list = list()
135 |         image_list.clear()
136 |         seg_list = list()
137 |         seg_list.clear()
138 |         im_lists = tqdm(im_list)
139 |         seg_files = list()
140 |         if self.has_seg:
141 |             for i in self.ext_seg :
142 |                 seg_files = seg_files + glob.glob(seg_path+'/*.%s'%i)
143 | 
144 |         
145 |         for s in im_lists :
146 |             img_file = None
147 |             for i in self.ext_img :
148 |                 filepath = "{}/{}.{}".format(image_path,s,i)
149 |                 if os.path.isfile(filepath):
150 |                     img_file = filepath
151 |             anno_file = None
152 |             for i in self.ext_anno :
153 |                 filepath = "{}/{}.{}".format(anno_path,s,i)
154 |                 if os.path.isfile(filepath):
155 |                     anno_file = filepath
156 |             if self.has_seg:
157 |                 for seg in seg_files:
158 |                     if s in seg :
159 |                         if img_file!=None and anno_file!=None :
160 |                             self.item_list.append([img_file,anno_file,seg])
161 |                             im_lists.set_description("Processing %s" % img_file)
162 |                         else:
163 |                             im_lists.set_description("Not find file %s" % s)
164 |                         break
165 |             elif img_file!=None and anno_file!=None :
166 |                 self.item_list.append([img_file,anno_file])
167 |                 im_lists.set_description("Processing %s" % img_file)
168 |             else:
169 |                 im_lists.set_description("Not find file %s" % s)
170 | 
171 |     def bound(low, high, value):
172 |         return max(low, min(high, value))                            
173 |     def parse_annotation(self,annotation_path):
174 |         filename, file_extension = os.path.splitext(annotation_path)
175 |         boxes = list()
176 |         labels = list()       
177 |         difficulties = list()   
178 |         # VOC format xml
179 |         if file_extension == '.xml':
180 |             source = open(annotation_path)
181 |             tree = ET.parse(source)
182 |             root = tree.getroot()
183 | 
184 |             for object in root.iter('object'):
185 |                 difficult = int(object.find('difficult').text == '1')
186 |                 label = object.find('name').text.lower().strip()
187 | 
188 |                 if label not in self.classes:
189 |                     continue
190 |                 bbox = object.find('bndbox')
191 |                 xmin = int(bbox.find('xmin').text) - 1
192 |                 ymin = int(bbox.find('ymin').text) - 1
193 |                 xmax = int(bbox.find('xmax').text) - 1
194 |                 ymax = int(bbox.find('ymax').text) - 1
195 |                 boxes.append([xmin, ymin, xmax, ymax])
196 |                 #print(label)
197 |                 labels.append(self.classes_map[label])
198 |                 difficulties.append(difficult)
199 |             source.close()
200 |             return boxes, labels, difficulties 
201 |         # COCO format json
202 |         elif file_extension == '.json':
203 |             with open(annotation_path, 'r') as f:
204 |                 data=json.load(f)        
205 |             width = int(data['image']['width'])-1
206 |             height = int(data['image']['height'])-1
207 |             object_number = len(data['annotation'])
208 |             for j in range(object_number):
209 |                 class_id = int(data['annotation'][j]['category_id'])-1
210 |                 category_name = self.ori_classes_name[class_id]
211 |                 if category_name in self.classes:
212 |                     new_class_id = self.classes.index(category_name)
213 |                     xmin = int(float(data['annotation'][j]['bbox'][0])+0.5)            
214 |                     ymin = int(float(data['annotation'][j]['bbox'][1])+0.5)
215 |                     if xmin<0:
216 |                         xmin = 0
217 |                     if ymin<0:
218 |                         ymin = 0                    
219 |                     xmax = int(float(data['annotation'][j]['bbox'][0])+float(data['annotation'][j]['bbox'][2])+0.5)
220 |                     ymax = int(float(data['annotation'][j]['bbox'][1])+float(data['annotation'][j]['bbox'][3])+0.5)
221 |                     if xmax>width:
222 |                         xmax = width
223 |                     if ymax>height:
224 |                         ymax = height    
225 |                     boxes.append([xmin, ymin, xmax, ymax])
226 |                     labels.append(new_class_id)
227 |                     difficulties.append(0)
228 |                     #print(xmin,ymin,class_id)
229 |             return boxes, labels, difficulties    
230 |     def collate_fn(self, batch):
231 | 
232 |         images = list()
233 |         boxes = list()
234 |         labels = list()
235 |         difficulties = list()
236 | 
237 |         for b in batch:
238 |             images.append(b[0])
239 |             boxes.append(b[1])
240 |             labels.append(b[2])
241 |             difficulties.append(b[3])
242 |         
243 |         images = torch.stack(images, dim=0)
244 | 
245 |         return images, boxes, labels, difficulties  # tensor (N, 3, H, W), 3 lists of N tensors each
246 | 


--------------------------------------------------------------------------------
/data/voc_data.yaml:
--------------------------------------------------------------------------------
 1 | test_dataset_path:
 2 |   annos: [data/VOCdevkit/VOC2007/Annotations/]
 3 |   imgs: [data/VOCdevkit/VOC2007/JPEGImages/]
 4 |   lists: [data/VOCdevkit/VOC2007/ImageSets/Main/test.txt]
 5 |   lmdb: test-lmdb
 6 |   name: voc_test
 7 | trainval_dataset_path:
 8 |   annos: [data/VOCdevkit/VOC2007/Annotations/, data/VOCdevkit/VOC2012/Annotations/]
 9 |   imgs: [data/VOCdevkit/VOC2007/JPEGImages/, data/VOCdevkit/VOC2012/JPEGImages/]
10 |   lists: [data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt, data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt]
11 |   lmdb: train-lmdb
12 |   name: voc_trainval
13 | segmentation_enable: false
14 | classes:
15 |   original: ["aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat", "chair","cow", "diningtable", "dog", "horse","motorbike", "person", "pottedplant","sheep", "sofa", "train", "tvmonitor"]
16 |   map: ["aeroplane", "bicycle", "bird", "boat","bottle", "bus", "car", "cat", "chair","cow", "diningtable", "dog", "horse","motorbike", "person", "pottedplant","sheep", "sofa", "train", "tvmonitor"]
17 | extention_names:
18 |     image: ["jpg"]
19 |     annotation: ["xml"]
20 | model_config_path: "models/voc/config.yaml"


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # https://www.learnopencv.com/install-opencv3-on-ubuntu
 2 | # https://docs.opencv.org/3.4/d6/d15/tutorial_building_tegra_cuda.html
 3 | 
 4 | ARG CUDA_VERSION=10.1
 5 | ARG CUDNN_VERSION=7
 6 | 
 7 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04
 8 | 
 9 | ARG PYTHON_VERSION=3.6
10 | ARG OPENCV_VERSION=4.1.1
11 | 
12 | # Needed for string substitution
13 | SHELL ["/bin/bash", "-c"]
14 | 
15 | # Add CUDA libs paths
16 | RUN export DEBIAN_FRONTEND=noninteractive \
17 |     && apt-get update && \
18 |     CUDA_PATH=(/usr/local/cuda-*) && \
19 |     CUDA=`basename $CUDA_PATH` && \
20 |     echo "$CUDA_PATH/compat" >> /etc/ld.so.conf.d/${CUDA/./-}.conf && \
21 |     ldconfig && \        
22 |     # Install all dependencies for OpenCV and Caffe
23 |     apt-get -y update --fix-missing && \
24 |     apt-get -y install --no-install-recommends \
25 |     python${PYTHON_VERSION} \
26 |     python${PYTHON_VERSION}-dev \
27 |     $( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \
28 |     build-essential \
29 |     wget \
30 |     unzip \
31 |     git \   
32 |     python-scipy \
33 |     python-skimage \
34 |     libopencv-dev \    
35 |     && \
36 | # install python dependencies
37 |     sysctl -w net.ipv4.ip_forward=1 && \
38 |     wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll && \
39 |     python${PYTHON_VERSION} get-pip.py && \
40 |     rm get-pip.py && \ 
41 |     pip${PYTHON_VERSION} install numpy && \
42 |     pip${PYTHON_VERSION} install torch && \
43 |     pip${PYTHON_VERSION} install torchvision && \    
44 |     pip${PYTHON_VERSION} install lmdb && \
45 |     pip${PYTHON_VERSION} install six && \
46 |     pip${PYTHON_VERSION} install matplotlib && \
47 |     pip${PYTHON_VERSION} install tqdm && \
48 |     pip${PYTHON_VERSION} install nni && \   
49 |     pip${PYTHON_VERSION} install progress && \
50 |     pip${PYTHON_VERSION} install filetype && \
51 |     pip${PYTHON_VERSION} install msgpack_python && \
52 |     pip${PYTHON_VERSION} install Pillow && \
53 |     pip${PYTHON_VERSION} install PyYAML && \
54 |     pip${PYTHON_VERSION} install imgaug && \
55 |     pip${PYTHON_VERSION} install tensorboard && \
56 | # Set the default python and install PIP packages
57 |     update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \
58 |     update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 
59 |     
60 | # connect 8080 for nni
61 | EXPOSE 8080
62 | 
63 | ENV MobileNetYOLO_ROOT=/workspace/Mobilenet-YOLO-Pytorch
64 | WORKDIR $MobileNetYOLO_ROOT
65 | 
66 | RUN cd /workspace && \
67 | 	git clone --depth 1 https://github.com/eric612/Mobilenet-YOLO-Pytorch.git  && \
68 | 	#unzip caffe.zip && \
69 | 	cd $MobileNetYOLO_ROOT 


--------------------------------------------------------------------------------
/folder2lmdb.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import six
  4 | import string
  5 | import argparse
  6 | 
  7 | import lmdb
  8 | import pickle
  9 | import msgpack
 10 | import tqdm
 11 | from PIL import Image
 12 | 
 13 | import torch
 14 | import torch.utils.data as data
 15 | from utils.image_augmentation import Image_Augmentation
 16 | from torch.utils.data import DataLoader
 17 | from torchvision.transforms import transforms
 18 | from torchvision import transforms, datasets
 19 | # This segfaults when imported before torch: https://github.com/apache/arrow/issues/2637
 20 | from data.od_dataset_from_file import DatasetFromFile
 21 | import cv2
 22 | import numpy as np
 23 | import shutil
 24 | import random
 25 | import yaml
 26 | from utils.box import wh_to_x2y2
 27 | import imgaug.augmenters as iaa
 28 | sometimes = lambda aug: iaa.Sometimes(0.5, aug)
 29 | seq = iaa.Sequential([
 30 |     sometimes(iaa.SomeOf((1, 2),
 31 |         [
 32 |             #sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
 33 |             iaa.OneOf([
 34 |                 iaa.GaussianBlur((0, 1.0)), # blur images with a sigma between 0 and 3.0
 35 |                 iaa.MedianBlur(k=(3,5)), # blur image using local medians with kernel sizes between 2 and 7
 36 |             ]),
 37 |             iaa.Sharpen(alpha=(0, 0.1), lightness=(0.9, 1.1)), # sharpen images
 38 |             iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.03*255), per_channel=0.3), # add gaussian noise to images
 39 |         ],
 40 |         random_order=True
 41 |     ))
 42 | ])
 43 | 
 44 | if torch.__version__> '1.8':
 45 |     from torchvision.transforms import InterpolationMode
 46 |     interp = InterpolationMode.BILINEAR
 47 | else :
 48 |     interp = 2
 49 | CLASSES = (#'__background__',
 50 |            'aeroplane', 'bicycle', 'bird', 'boat',
 51 |            'bottle', 'bus', 'car', 'cat', 'chair',
 52 |            'cow', 'diningtable', 'dog', 'horse',
 53 |            'motorbike', 'person', 'pottedplant',
 54 |            'sheep', 'sofa', 'train', 'tvmonitor')
 55 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")        
 56 | class ImageFolderLMDB(data.Dataset):
 57 |     def __init__(self, db_path,batch_size,transform_size = [[352,352]], phase=None,expand_scale=1.5,mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225],has_seg = False, classes_name = CLASSES, seg_num_classes = 0):
 58 |         self.db_path = db_path
 59 |         self.env = lmdb.open(db_path, subdir=os.path.isdir(db_path),
 60 |                              readonly=True, lock=False,
 61 |                              readahead=False, meminit=False)
 62 |         with self.env.begin(write=False) as txn:
 63 |             self.length = pickle.loads(txn.get(b'__len__'))
 64 |             self.keys = pickle.loads(txn.get(b'__keys__'))
 65 |         self.normalize = transforms.Normalize(mean=mean,std=std)            
 66 |         self.mean = mean
 67 |         self.std = std
 68 |         self.transform_size = transform_size
 69 |         self.phase = phase
 70 |         self.img_aug = Image_Augmentation()
 71 |         self.batch_size = batch_size
 72 |         self.count = 0
 73 |         self.expand_scale = expand_scale
 74 |         self.has_seg = has_seg
 75 |         self.classes_name = classes_name
 76 |         self.seg_num_classes = seg_num_classes
 77 |         
 78 |     def get_single_image(self,index,expand=False,expand_scale=1.5):
 79 |     
 80 |         img, target,img2 = None, None, None
 81 |         env = self.env
 82 |         
 83 |         with env.begin(write=False) as txn:
 84 |             byteflow = txn.get(self.keys[index])
 85 |         unpacked = pickle.loads(byteflow)
 86 |         #unpacked = pa.deserialize(byteflow)
 87 | 
 88 |         # load image
 89 |         imgbuf = unpacked[0]
 90 |         buf = six.BytesIO()
 91 |         buf.write(imgbuf[1])
 92 |         buf.seek(0)
 93 |         X_str= np.fromstring(buf.read(), dtype=np.uint8)
 94 |         img = cv2.imdecode(X_str, cv2.IMREAD_COLOR)       
 95 | 
 96 |         # load label
 97 |         target = unpacked[1]
 98 |         
 99 |         if self.has_seg:
100 |             # load segmentation id
101 |             imgbuf = unpacked[2]
102 |             buf = six.BytesIO()
103 |             buf.write(imgbuf[1])
104 |             buf.seek(0)
105 |             X_str= np.fromstring(buf.read(), dtype=np.uint8)
106 |             img2 = cv2.imdecode(X_str, cv2.IMREAD_COLOR)     
107 |             img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
108 |             seg_id = Image.fromarray(img2)
109 |         else :
110 |             seg_id = None
111 |         
112 |         #if self.phase == 'train':
113 |         target2 = torch.Tensor(target)           
114 |         boxes = target2[...,1:5]
115 |         if boxes.shape[0] == 0 :
116 |             #print(target2.shape)
117 |             boxes2 = torch.zeros(0,4)
118 |             labels = torch.zeros(0)
119 |         else :
120 |             x1 = (boxes[...,0] - boxes[...,2]/2).unsqueeze(1)
121 |             y1 = (boxes[...,1] - boxes[...,3]/2).unsqueeze(1)
122 |             x2 = (boxes[...,0] + boxes[...,2]/2).unsqueeze(1)
123 |             y2 = (boxes[...,1] + boxes[...,3]/2).unsqueeze(1)
124 |             boxes2 = torch.cat((x1*img.shape[1],y1*img.shape[0],x2*img.shape[1],y2*img.shape[0]),1)
125 |             #if boxes.size(0) :
126 |             labels = target2[...,0]
127 |             #print(boxes2)
128 |         #if labels == 7 :
129 | 
130 |         difficulties = torch.zeros_like(labels)
131 |         img = seq(image=img)  # done by the library
132 |         image = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
133 |         #print(seg_id)
134 |         
135 |         new_img, new_boxes, new_labels, new_difficulties, new_seg_id = self.img_aug.transform_od(image, boxes2, labels, difficulties,seg_id=seg_id, mean = [0.5, 0.5, 0.5],std = [1, 1, 1],phase = self.phase,expand = expand,expand_scale = self.expand_scale)
136 | 
137 |         array = np.array(new_seg_id)
138 |         maps = list()
139 |         if self.has_seg:
140 |             for c in range(1,self.seg_num_classes+1):
141 |                 maps.append(Image.fromarray(array==c))
142 |         old_dims = torch.FloatTensor([new_img.width, new_img.height, new_img.width, new_img.height]).unsqueeze(0)
143 |         new_boxes2 = new_boxes / old_dims  # percent coordinates
144 |         
145 |         w = (new_boxes2[...,2] - new_boxes2[...,0])
146 |         h = (new_boxes2[...,3] - new_boxes2[...,1])
147 |         x = (new_boxes2[...,0] + w/2).unsqueeze(1)
148 |         y = (new_boxes2[...,1] + h/2).unsqueeze(1)
149 |         #print(x.shape,y.shape,w.shape,h.shape,new_boxes.shape)
150 |         new_boxes2 = torch.cat((x,y,w.unsqueeze(1),h.unsqueeze(1)),1)
151 |         new_target = torch.cat((new_labels.unsqueeze(1),new_boxes2),1)
152 | 
153 | 
154 |         return (new_img,new_target,maps)
155 |     def __getitem__(self, index):
156 |         #print(index)
157 |         
158 |         
159 |         if type(index) == list:
160 | 
161 |             group = []
162 |             s = len(index)
163 |             
164 |             for idx in index:
165 |                 img,tar,seg_id = self.get_single_image(idx,s==1)
166 |                 group.append([img,tar,seg_id])   
167 |             
168 |             if s == 1 :
169 |                 #self.show_image(img,tar[...,1:5],tar[...,0],convert=True)
170 |                 return group[0][0],group[0][1],1,group[0][2]     
171 |             else :
172 |                 b = self.img_aug.Mosaic(group,[1000,1000])
173 |                 #self.show_image(b[0],b[1][...,1:5].clone(),b[1][...,0].clone(),convert=True)
174 |                 return b[0],b[1],len(index)
175 |         else:
176 |             img,tar,_ = self.get_single_image(index)
177 |             return img,tar,1
178 |     
179 |     def show_image(self,image,boxes=None,labels=None,convert=False,seg_id = False,gray_img_only = False,resize = None): 
180 |         if gray_img_only == True :
181 |             #print(image)
182 |             cv_img = np.array(image.convert('L'))
183 |             print(cv_img.shape)
184 |             if resize!=None :
185 |                 cv_img = cv2.resize(cv_img, (resize[0], resize[1]), interpolation=cv2.INTER_AREA)
186 |             cv2.namedWindow('frame',cv2.WINDOW_NORMAL)
187 |             cv2.resizeWindow('frame', 640, 480)        
188 |             cv2.imshow('frame', cv_img)
189 |             key = cv2.waitKey(3) 
190 |         else :
191 |             cv_img = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
192 |             seg_id = (np.asarray(seg_id)!=0)*0.5
193 |             #print(seg_id)
194 |             #print(cv_img.shape,seg_id.shape)
195 |             #cv_img = cv2.bitwise_and(cv_img,cv_img,mask = seg_id)
196 |             cv_img[...,0] = cv_img[...,0]*seg_id + cv_img[...,0]*(seg_id==0)
197 |             cv_img[...,2] = cv_img[...,2]*seg_id + cv_img[...,2]*(seg_id==0)
198 |             for idx,box in enumerate(boxes) : 
199 |                 if convert :
200 |                     #print(box,cv_img.shape)
201 |                     wh_to_x2y2(box)
202 |                     #print(box,cv_img.shape)
203 |                     box[0],box[2] = box[0]*cv_img.shape[1],box[2]*cv_img.shape[1]
204 |                     box[1],box[3] = box[1]*cv_img.shape[0],box[3]*cv_img.shape[0]
205 |                     
206 |                 cv2.rectangle(cv_img, (int(box[0]),int(box[1])), (int(box[2]),int(box[3])), (0,255,0), 2)
207 |                 text=self.classes_name[int(labels[idx])].lower()
208 |                 cv2.putText(cv_img, text, (int(box[0]),int(box[1]-5)), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 255, 255), 1, cv2.LINE_AA)
209 |                 
210 |             cv2.namedWindow('frame',cv2.WINDOW_NORMAL)
211 |             cv2.resizeWindow('frame', 480, 480)        
212 |             cv2.imshow('frame', cv_img)
213 |             key = cv2.waitKey(0) 
214 |             #cv2.imwrite('images//frame%04d.jpg'%self.count, cv_img)
215 | 
216 |     def __len__(self):
217 |         return self.length
218 | 
219 |     def __repr__(self):
220 |         return self.__class__.__name__ + ' (' + self.db_path + ')'
221 |     def set_transform(self,transform):
222 |         self.transform = transform
223 |     def collate_fn(self, batch):
224 |         images = list()
225 |         labels = list()
226 |         seg_maps = list()
227 |         random_size = random.choice(self.transform_size)
228 |         seg_random_size = [int(number / 16) for number in random_size]
229 |         #print(seg_random_size)
230 |         self.transform = transforms.Compose([
231 |                 transforms.Resize(size=random_size, interpolation=interp),
232 |                 transforms.ToTensor(),
233 |                 self.normalize,
234 |             ])  
235 |         self.transform_seg = transforms.Compose([
236 |                 transforms.Resize(size=seg_random_size, interpolation=interp),
237 |                 transforms.ToTensor(),
238 |             ])  
239 |              
240 |         count = 0
241 |         
242 |         for b in batch:
243 |             if self.has_seg:
244 |                 maps = torch.zeros(seg_random_size[0],seg_random_size[1],self.seg_num_classes)
245 |                 for i,m in enumerate(b[3]):
246 |                     cv_img = np.array(m.convert('L'))
247 |                     cv_img = cv2.resize(cv_img, (seg_random_size[0], seg_random_size[1]), interpolation=cv2.INTER_AREA)
248 |                     maps[...,i] = torch.Tensor(cv_img)/255.0 
249 |                     #self.show_image(m,gray_img_only=True,resize=seg_random_size)
250 |                 seg_maps.append(maps)
251 |                     
252 |             images.append(self.transform(b[0]))
253 |             labels.append(b[1])                
254 |             count = b[2] + count
255 |         images = torch.stack(images, dim=0)
256 | 
257 |         
258 |         if self.phase == 'train':
259 |             if self.has_seg:
260 |                 seg_maps = torch.stack(seg_maps, dim=0)
261 |                 return images, labels, count, seg_maps
262 |             else:
263 |                 return images, labels, count, None
264 |         else :
265 |             return images, labels  
266 | def raw_reader(path):
267 |     with open(path, 'rb') as f:
268 |         bin_data = f.read()
269 |     return bin_data
270 | 
271 | 
272 | def folder2lmdb(dataset_path, write_frequency=5000):
273 |     directory = os.path.expanduser(dataset_path)
274 |     print("Loading dataset from %s" % directory)
275 |     
276 |     with open(dataset_path, 'r') as stream:
277 |         data = yaml.load(stream)
278 |         print(data)
279 |         classes_name = data["classes"]["map"]
280 |         classes_name.insert(0, 'background')
281 |         ori_classes_name = data["classes"]["original"]
282 |         trainval_dataset_path = data["trainval_dataset_path"]
283 |         test_dataset_path = data["test_dataset_path"]
284 |         ext_img = data["extention_names"]["image"]
285 |         ext_anno = data["extention_names"]["annotation"]
286 |         segmentation_enable = data["segmentation_enable"]
287 |         if segmentation_enable:
288 |             ext_seg = data["extention_names"]["segmentation"]
289 |         
290 | 	
291 |     #print(classes_name)
292 |     if segmentation_enable:
293 |         trainval_dataset =  \
294 |             DatasetFromFile(trainval_dataset_path['imgs'],trainval_dataset_path['annos'],trainval_dataset_path['segs'],trainval_dataset_path['lists'],classes_name, \
295 |             dataset_name=trainval_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ext_seg=ext_seg,ori_classes_name=ori_classes_name)
296 |             
297 |         test_dataset =  \
298 |             DatasetFromFile(test_dataset_path['imgs'],test_dataset_path['annos'],test_dataset_path['segs'],test_dataset_path['lists'],classes_name, \
299 |             dataset_name=test_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ext_seg=ext_seg,ori_classes_name=ori_classes_name)
300 |     else :
301 |         trainval_dataset =  \
302 |             DatasetFromFile(trainval_dataset_path['imgs'],trainval_dataset_path['annos'],None,trainval_dataset_path['lists'],classes_name, \
303 |             dataset_name=trainval_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ori_classes_name=ori_classes_name)
304 |             
305 |         test_dataset =  \
306 |             DatasetFromFile(test_dataset_path['imgs'],test_dataset_path['annos'],None,test_dataset_path['lists'],classes_name, \
307 |             dataset_name=test_dataset_path['name'],phase = 'test',has_seg = segmentation_enable,difficultie=False,ext_img=ext_img,ext_anno=ext_anno,ori_classes_name=ori_classes_name)    
308 |     outpath = trainval_dataset_path['lmdb'],test_dataset_path['lmdb']
309 |     total_set = trainval_dataset,test_dataset
310 |     for i in range(len(total_set)) :        
311 |         data_loader = DataLoader(total_set[i], num_workers=4, collate_fn=lambda x: x)
312 |         lmdb_path = os.path.expanduser(outpath[i])
313 |         
314 |         if os.path.exists(lmdb_path) and os.path.isdir(lmdb_path):
315 |             shutil.rmtree(lmdb_path)
316 |         #print(lmdb_path)
317 |         os.mkdir(lmdb_path)
318 |         print("Generate LMDB to %s" % lmdb_path)
319 |         db = lmdb.open(lmdb_path, subdir=True,
320 | 	               map_size=1099511627776 * 2, readonly=False,
321 | 	               meminit=False, map_async=True)
322 | 
323 |         txn = db.begin(write=True)
324 |         sum = 0
325 |         
326 |         for idx, data in enumerate(data_loader):
327 |             if segmentation_enable:
328 |                 image,label,seg = data[0][0],data[0][1],data[0][2]
329 |                 txn.put(u'{}'.format(idx).encode('ascii'), pickle.dumps((image, label, seg)))
330 |             else:
331 |                 image,label = data[0][0],data[0][1]
332 |                 txn.put(u'{}'.format(idx).encode('ascii'), pickle.dumps((image, label)))
333 |             sum += len(label)
334 |             
335 |             #txn.put(u'{}'.format(idx).encode('ascii'), pa.serialize((image, label)).to_buffer())
336 |             if idx % write_frequency == 0:
337 |                 print("[%d/%d]" % (idx, len(data_loader)))
338 |                 txn.commit()
339 |                 txn = db.begin(write=True)
340 | 
341 |         print('total box : %d'%sum)
342 |         # finish iterating through dataset
343 |         txn.commit()
344 |         keys = [u'{}'.format(k).encode('ascii') for k in range(idx + 1)]
345 |         with db.begin(write=True) as txn:
346 | 	        txn.put(b'__keys__', pickle.dumps(keys))
347 | 	        txn.put(b'__len__', pickle.dumps(len(keys)))
348 | 	        #txn.put(b'__keys__', pa.serialize(keys).to_buffer())
349 | 	        #txn.put(b'__len__', pa.serialize(len(keys)).to_buffer())
350 | 
351 |         print("Flushing database ...")
352 |         db.sync()
353 |         db.close()
354 | 
355 | 
356 | if __name__ == "__main__":
357 |     parser = argparse.ArgumentParser()
358 |     parser.add_argument("-d", "--dataset", help="Path to original image dataset folder", default = 'data/voc_data.yaml')
359 |     args = parser.parse_args()
360 |     folder2lmdb(args.dataset)
361 | 


--------------------------------------------------------------------------------
/images/000166.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/000166.jpg


--------------------------------------------------------------------------------
/images/001852.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/001852.jpg


--------------------------------------------------------------------------------
/images/002597.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/002597.jpg


--------------------------------------------------------------------------------
/images/004030.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/004030.jpg


--------------------------------------------------------------------------------
/images/00690c26-e4bbbd72.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/00690c26-e4bbbd72.jpg


--------------------------------------------------------------------------------
/images/show.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/images/show.gif


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import yaml
  4 | import torch
  5 | from models.mbv2_yolo import yolo
  6 | import filetype
  7 | from PIL import Image, ImageDraw, ImageFont
  8 | import cv2
  9 | import numpy as np
 10 | import torchvision.transforms as transforms
 11 | 
 12 | from datetime import datetime
 13 | parser = argparse.ArgumentParser(description='YOLO Inference')
 14 | parser.add_argument('-c', '--checkpoint', default='checkpoint/checkpoint.pth.tar', type=str, metavar='PATH',
 15 |                     help='path to load checkpoint (default: checkpoint/checkpoint.pth.tar)')
 16 | parser.add_argument('-e', '--export', default='', type=str, metavar='PATH',
 17 |                     help='path to export model')                    
 18 | parser.add_argument('-y', '--data_yaml', dest='data_yaml', default='data/voc_data.yaml', type=str, metavar='PATH',
 19 |                     help='path to data_yaml')                    
 20 | parser.add_argument('-i', '--input', default='images/000166.jpg', type=str, metavar='PATH',
 21 |                     help='path to load input file') 
 22 | distinct_colors = ['#e6194b', '#3cb44b', '#ffe119', '#0082c8', '#f58231', '#911eb4', '#46f0f0', '#f032e6',
 23 |                             '#d2f53c', '#fabebe', '#008080']            
 24 |                           
 25 | def main(args):
 26 |     
 27 |     assert os.path.isfile(args.data_yaml), 'Error: no config yaml file found!'
 28 |     with open(args.data_yaml, 'r') as f:
 29 |         dataset_path = yaml.load(f)
 30 |         CLASSES = dataset_path["classes"]["map"]
 31 |     with open(dataset_path["model_config_path"], 'r') as f:
 32 |         config = yaml.load(f)     
 33 | 
 34 |     print(config)
 35 |     assert os.path.isfile(args.checkpoint), 'Error: no checkpoint found!'
 36 |     #checkpoint = torch.load(args.checkpoint)
 37 |     model = yolo(config=config)
 38 |     model = load_model(model, args.checkpoint)
 39 |     #model.load_state_dict(checkpoint['model'])    
 40 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 41 |     #model = model.cuda()
 42 |     model = model.to(device)
 43 | 
 44 |     model.eval()
 45 | 
 46 |     model.yolo_losses[0].val_conf = 0.3 
 47 |     model.yolo_losses[1].val_conf = 0.3 
 48 |     #filename = os.path.basename(args.input)
 49 |     filename = os.path.basename(args.input).split('.')[0]
 50 |     kind = filetype.guess(args.input)
 51 |     if kind is None:
 52 |         print('Cannot guess file type!')
 53 |         return
 54 |     #print('File extension: %s' % kind.extension)
 55 |     #print('File MIME type: %s' % kind.mime)
 56 |     if kind.extension in ['png', 'jpg', 'jpeg', 'tiff', 'bmp', 'gif'] :
 57 | 
 58 |         original_image = Image.open(args.input, mode='r')
 59 |         original_image = original_image.convert('RGB')
 60 |         annotated_image_ = cv2.cvtColor(np.asarray(original_image), cv2.COLOR_RGB2BGR)     
 61 |         height,width = annotated_image_.shape[0],annotated_image_.shape[1]
 62 |         #im_pil = Image.fromarray(annotated_image_)
 63 | 
 64 |         det_boxes,seg_map = inference_image(model,original_image,device)
 65 |         seg_maps = list()
 66 |         for cls in range(seg_map.shape[0]):
 67 |             seg_maps.append(cv2.resize(seg_map[cls,...], (width, height), interpolation=cv2.INTER_LINEAR))
 68 | 
 69 |         # Annotate
 70 |         annotated_image = original_image
 71 |         draw = ImageDraw.Draw(annotated_image)     
 72 |         font = ImageFont.load_default().font
 73 |         # Suppress specific classes, if needed
 74 |         #box_location = [None]*4
 75 |         if det_boxes is not None :
 76 |             for bbox in det_boxes[0]:
 77 |                # print(bbox)
 78 |                 
 79 |                 box_location = bbox[:4].tolist()
 80 |                 conf = bbox[4].item()
 81 |                 cls_conf = bbox[5].item()
 82 |                 cls_index = int(bbox[6].item())
 83 |                 if conf*cls_conf>0.15:
 84 |                     box_location[0] = box_location[0]*width
 85 |                     box_location[1] = box_location[1]*height
 86 |                     box_location[2] = box_location[2]*width
 87 |                     box_location[3] = box_location[3]*height  
 88 |                     draw.rectangle(xy=box_location,outline=distinct_colors[0])
 89 |                      # Text
 90 |                     text_size = font.getsize(CLASSES[cls_index].upper())
 91 |                     text_location = [box_location[0] + 3., box_location[1] - text_size[1]]
 92 |                     textbox_location = [box_location[0], box_location[1] - text_size[1], box_location[0] + text_size[0] + 4.,
 93 |                                         box_location[1]]
 94 |                     draw.text(xy=text_location, text=CLASSES[cls_index].lower(), fill='white',
 95 |                               font=font)  
 96 |         print('save/%s_result.jpg'%filename)
 97 |         cv2.namedWindow('frame',cv2.WINDOW_NORMAL)
 98 |         cv2.resizeWindow('frame', width, height)
 99 |         annotated_image = cv2.cvtColor(np.asarray(annotated_image), cv2.COLOR_RGB2BGR)
100 |         color_channel  = [1,2]
101 |         for idx,map in enumerate(seg_maps):           
102 |             mask = map>0.5
103 |             annotated_image[...,color_channel[idx]][mask] = annotated_image[...,color_channel[idx]][mask]*(1.0 - map[mask])
104 |         cv2.imwrite('save/%s_result.jpg'%filename,annotated_image)
105 |         cv2.imshow('frame',annotated_image)
106 |         key = cv2.waitKey(0)         
107 |         
108 |         
109 | def inference_image(model, original_image,device):
110 |     # Transforms
111 |     transform_test = transforms.Compose([
112 |         transforms.Resize(size=(416,416), interpolation=2),
113 |         transforms.ToTensor(),
114 |         transforms.Normalize((0.5, 0.5, 0.5), (1, 1, 1)),
115 |     ])
116 |     # Transform
117 |     image = transform_test(original_image)
118 |     image = image.to(device)
119 |     # Move to default device  
120 |     start = datetime.now().timestamp()    
121 |     detections = model(image.unsqueeze(0))  # (N, num_defaultBoxes, 4), (N, num_defaultBoxes, n_classes)
122 |     end =datetime.now().timestamp()
123 |     c3 = (end - start)
124 |     print("model inference time : ", c3*1000, "ms")
125 | 
126 |     return detections
127 | def load_model(model, path_trained_weight):
128 |     checkpoint_backbone = torch.load(path_trained_weight)
129 |     
130 |     pretrained_dict = checkpoint_backbone.state_dict()
131 | 
132 |     model_dict = model.state_dict()
133 |     #for k, v in model_dict.items() :
134 |         #if k[9:] in model_dict :
135 |     #    print (k)    
136 |     # 1. filter out unnecessary keys
137 |     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
138 |     if len(pretrained_dict.keys()) == 0:
139 |         print('loading pretrain weight fail:{} '.format(path_trained_weight))
140 |         input("Cont?")
141 |     #print(pretrained_dict.keys())
142 |     #print(model_dict.keys())
143 |     # 2. overwrite entries in the existing state dict
144 |     model_dict.update(pretrained_dict)
145 |     # 3. load the new state dict
146 |     model.load_state_dict(model_dict)
147 |     print("loaded the trained weights from {}".format(path_trained_weight))
148 |     return model    
149 | if __name__ == '__main__':
150 |     args = parser.parse_args()
151 |     main(args)


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/models/__init__.py


--------------------------------------------------------------------------------
/models/bdd100k/config.yaml:
--------------------------------------------------------------------------------
 1 | img_h: 416
 2 | img_w: 416
 3 | batch_size: 32
 4 | train_img_size:
 5 |   - [416, 416]
 6 | expand_scale: 1.3
 7 | mosaic_num: [1]
 8 | iou_weighting: 0.02
 9 | normalize: 
10 |   mean: [0.5, 0.5, 0.5]
11 |   std: [1,1,1]
12 | yolo:
13 |   num_classes: 7
14 |   num_anchors: 3
15 |   ignore_thresh: [0.6, 0.55]
16 |   iou_thresh: 0.6
17 |   anchors:
18 |   - [34, 47]
19 |   - [66, 93]
20 |   - [122, 182]
21 |   - [6, 11]
22 |   - [11, 43]
23 |   - [16, 22]
24 |   classes: 7
25 |   mask:
26 |   - [0, 1, 2]
27 |   - [3, 4, 5]
28 | seg:
29 |   num_classes: 2


--------------------------------------------------------------------------------
/models/mbv2_yolo.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from models.mobilenetv2 import mobilenetv2
  6 | from models.yolo_loss import *
  7 | from models.seg_loss import SegLoss
  8 | from torch.nn import init
  9 | import yaml
 10 | from utils.box import nms
 11 | try:
 12 |     from torch.hub import load_state_dict_from_url
 13 | except ImportError:
 14 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 15 | 
 16 | class BasicConv(nn.Module):
 17 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,depthwise=False):
 18 |         super(BasicConv, self).__init__()
 19 |         if depthwise == False :
 20 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
 21 |         else :
 22 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False,groups = in_channels)
 23 |         self.bn = nn.BatchNorm2d(out_channels)
 24 |         self.activation = nn.LeakyReLU(0.1)
 25 |         self._initialize_weights()
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.conv(x)
 29 |         x = self.bn(x)
 30 |         x = self.activation(x)
 31 |         return x
 32 |     def _initialize_weights(self):
 33 |         for m in self.modules():
 34 |             if isinstance(m, nn.Conv2d):
 35 |                 init.kaiming_normal_(m.weight, mode='fan_out')
 36 |                 if m.bias is not None:
 37 |                     init.constant_(m.bias, 0)
 38 |             elif isinstance(m, nn.BatchNorm2d):
 39 |                 init.constant_(m.weight, 1)
 40 |                 init.constant_(m.bias, 0)
 41 |             elif isinstance(m, nn.Linear):
 42 |                 init.normal_(m.weight, std=0.001)
 43 |                 if m.bias is not None:
 44 |                     init.constant_(m.bias, 0)   
 45 |           
 46 | class Upsample(nn.Module):
 47 |     def __init__(self):
 48 |         super(Upsample, self).__init__()
 49 | 
 50 |         self.upsample = nn.Sequential(
 51 |             #BasicConv(in_channels, out_channels, 1),
 52 |             nn.Upsample(scale_factor=2, mode='nearest')
 53 |         )
 54 | 
 55 |     def forward(self, x,):
 56 |         x = self.upsample(x)
 57 |         return x
 58 | def PartAdd(x,y):
 59 |     if x.size(1) == y.size(1):
 60 |         return x+y
 61 |     len = min(x.size(1),y.size(1))
 62 |     new_1 = x[:,:len,...] + y[:,:len,...]
 63 |     if y.size(1) > x.size(1):
 64 |         new_2 = y[:,len:,...]
 65 |     else:
 66 |         new_2 = x[:,len:,...]
 67 |     new = torch.cat((new_1,new_2),1)
 68 | 
 69 |     return new
 70 | def DepthwiseConvolution(in_filters,out_filters):
 71 |     m = nn.Sequential(
 72 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 73 |         BasicConv(in_filters, in_filters, 1),
 74 |         BasicConv(in_filters, out_filters, 1 ),
 75 |     )
 76 |     return m
 77 | def yolo_head(filters_list, in_filters):
 78 |     m = nn.Sequential(
 79 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 80 |         BasicConv(in_filters, in_filters, 1),
 81 |         BasicConv(in_filters, filters_list[0], 1),
 82 |         nn.Conv2d(filters_list[0], filters_list[1], 1),
 83 |     )
 84 |     return m
 85 | def seg_head(filters_list, in_filters):
 86 |     m = nn.Sequential(
 87 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 88 |         BasicConv(in_filters, in_filters, 1),
 89 |         BasicConv(in_filters, filters_list[0], 1),
 90 |         nn.Conv2d(filters_list[0], filters_list[1], 1),
 91 |     )
 92 |     return m
 93 | class Connect(nn.Module):
 94 |     def __init__(self, channels):
 95 |         super(Connect, self).__init__()
 96 | 
 97 |         self.conv = nn.Sequential(
 98 |             BasicConv(channels, channels, 3,depthwise=True),
 99 |             BasicConv(channels, channels, 1 ),
100 |         )
101 |     def forward(self, x,):        
102 |         x2 = self.conv(x)
103 |         x = torch.add(x,x2)
104 |         return x
105 | class yolo(nn.Module):
106 |     def __init__(self,config):
107 |         super(yolo, self).__init__()
108 |         self.num_classes = config["yolo"]["num_classes"]
109 |         self.num_anchors = config["yolo"]["num_anchors"]
110 |         self.seg_loss = None
111 |         if "seg" in config:
112 |             self.seg_num_classes = config["seg"]["num_classes"]
113 |             self.seg_headS16 = seg_head([32, self.seg_num_classes], 32)
114 |             self.seg_loss = SegLoss(self.seg_num_classes)
115 |         #  backbone
116 |         model_url = 'https://raw.githubusercontent.com/d-li14/mobilenetv2.pytorch/master/pretrained/mobilenetv2-c5e733a8.pth'
117 |         self.backbone = mobilenetv2(model_url)
118 | 
119 |         self.conv_for_S32 = BasicConv(1280,512,1)
120 |         #print(num_anchors * (5 + num_classes))
121 |         self.connect_for_S32 = Connect(512)
122 |         self.yolo_headS32 = yolo_head([1024, self.num_anchors * (5 + self.num_classes)],512)
123 |         
124 |         
125 |         self.upsample = Upsample()
126 |         self.conv_for_S16 = DepthwiseConvolution(96,512)
127 |         self.seg_conv_for_S16 = DepthwiseConvolution(96,32)
128 |         self.connect_for_S16 = Connect(512)
129 |         self.seg_connect_for_S16 = Connect(32)
130 |         self.yolo_headS16 = yolo_head([512, self.num_anchors * (5 + self.num_classes)],512)
131 |         
132 |         self.yolo_losses = []
133 |         for i in range(2):
134 |             self.yolo_losses.append(YOLOLoss(config["yolo"]["anchors"],config["yolo"]["mask"][i] \
135 |                 ,self.num_classes,[config["img_w"],config["img_h"]],config["yolo"]["ignore_thresh"][i],config["yolo"]["iou_thresh"],iou_weighting=config["iou_weighting"]))
136 |         
137 |     def forward(self, x, targets=None, seg_maps=None):
138 | 
139 |         for i in range(2):
140 |             self.yolo_losses[i].img_size = [x.size(2),x.size(3)]
141 |         feature1, feature2 = self.backbone(x)
142 |         S32 = self.conv_for_S32(feature2)
143 |         S32 = self.connect_for_S32(S32)
144 |         out0 = self.yolo_headS32(S32) 
145 |         S32_Upsample = self.upsample(S32)
146 |         S16 = self.conv_for_S16(feature1)
147 |         S16 = self.connect_for_S16(S16)
148 |         #S16 = self.blending(S16,S32_Upsample)
149 |         #S16 = PartAdd(S16,S32_Upsample)
150 |         #print(S16.shape)
151 |         S16 = torch.add(S16,S32_Upsample)
152 |        
153 |         out1 = self.yolo_headS16(S16)
154 |         
155 |         S16_branch = self.seg_conv_for_S16(feature1)
156 |         S16_branch = self.seg_connect_for_S16(S16_branch)
157 |         
158 |         output = self.yolo_losses[0](out0,targets),self.yolo_losses[1](out1,targets)
159 |         if targets == None :
160 |             output = nms(output,self.num_classes)
161 |             if self.seg_loss!=None:
162 |                 out2 = self.seg_headS16(S16_branch)
163 |                 seg_out = self.seg_loss(out2)
164 |                 return output,seg_out
165 |             else:
166 |                 return output
167 |         else:
168 |             if self.seg_loss!=None:
169 |                 out2 = self.seg_headS16(S16_branch)
170 |                 seg_out = self.seg_loss(out2,seg_maps)
171 |                 return output,seg_out
172 |             else:
173 |                 return output
174 |             
175 |         
176 |     
177 | #def test():
178 | #    net = yolo(3,20)
179 | #    print(net)
180 | 
181 | #test()


--------------------------------------------------------------------------------
/models/mbv3_yolo.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from models.voc.mobilenetv3 import MobileNetV3
  6 | from models.voc.yolo_loss import *
  7 | from torch.nn import init
  8 | from utils.box import nms
  9 | 
 10 | import yaml
 11 | try:
 12 |     from torch.hub import load_state_dict_from_url
 13 | except ImportError:
 14 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 15 |     
 16 | class BasicConv(nn.Module):
 17 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,depthwise=False):
 18 |         super(BasicConv, self).__init__()
 19 |         if depthwise == False :
 20 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
 21 |         else :
 22 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False,groups = in_channels)
 23 |         self.bn = nn.BatchNorm2d(out_channels)
 24 |         self.activation = nn.LeakyReLU(0.1)
 25 |         self._initialize_weights()
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.conv(x)
 29 |         x = self.bn(x)
 30 |         x = self.activation(x)
 31 |         return x
 32 |     def _initialize_weights(self):
 33 |         for m in self.modules():
 34 |             if isinstance(m, nn.Conv2d):
 35 |                 init.kaiming_normal_(m.weight, mode='fan_out')
 36 |                 if m.bias is not None:
 37 |                     init.constant_(m.bias, 0)
 38 |             elif isinstance(m, nn.BatchNorm2d):
 39 |                 init.constant_(m.weight, 1)
 40 |                 init.constant_(m.bias, 0)
 41 |             elif isinstance(m, nn.Linear):
 42 |                 init.normal_(m.weight, std=0.001)
 43 |                 if m.bias is not None:
 44 |                     init.constant_(m.bias, 0)      
 45 | class Upsample(nn.Module):
 46 |     def __init__(self):
 47 |         super(Upsample, self).__init__()
 48 | 
 49 |         self.upsample = nn.Sequential(
 50 |             #BasicConv(in_channels, out_channels, 1),
 51 |             nn.Upsample(scale_factor=2, mode='nearest')
 52 |         )
 53 | 
 54 |     def forward(self, x,):
 55 |         x = self.upsample(x)
 56 |         return x
 57 | def DepthwiseConvolution(in_filters,out_filters):
 58 |     m = nn.Sequential(
 59 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 60 |         BasicConv(in_filters, in_filters, 1),
 61 |         BasicConv(in_filters, out_filters, 1),
 62 |     )
 63 |     return m
 64 | def yolo_head(filters_list, in_filters):
 65 |     m = nn.Sequential(
 66 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 67 |         BasicConv(in_filters, in_filters, 1),
 68 |         BasicConv(in_filters, filters_list[0], 1),
 69 |         nn.Conv2d(filters_list[0], filters_list[1], 1),
 70 |     )
 71 |     return m
 72 | class Connect(nn.Module):
 73 |     def __init__(self, channels):
 74 |         super(Connect, self).__init__()
 75 | 
 76 |         self.conv = nn.Sequential(
 77 |             BasicConv(channels, channels, 3,depthwise=True),
 78 |             BasicConv(channels, channels, 1),
 79 |         )
 80 | 
 81 |     def forward(self, x,):        
 82 |         x2 = self.conv(x)
 83 |         x = torch.add(x,x2)
 84 |         return x
 85 | def PartAdd(x,y):
 86 |     if x.size(1) == y.size(1):
 87 |         return x+y
 88 |     len = min(x.size(1),y.size(1))
 89 |     new_1 = x[:,:len,...] + y[:,:len,...]
 90 |     if y.size(1) > x.size(1):
 91 |         new_2 = y[:,len:,...]
 92 |     else:
 93 |         new_2 = x[:,len:,...]
 94 |     new = torch.cat((new_1,new_2),1)
 95 | 
 96 |     return new        
 97 | class yolo(nn.Module):
 98 |     def __init__(self,config):
 99 |         super(yolo, self).__init__()
100 |         self.num_classes = config["yolo"]["num_classes"]
101 |         self.num_anchors = config["yolo"]["num_anchors"]
102 |         #  backbone
103 |         # https://drive.google.com/file/d/1HYPqCM1t8GDj9HnImKitM-QqdR8InxGB/view?usp=sharing
104 |         self.backbone = MobileNetV3('mbv3_large.old.pth.tar')
105 | 
106 |         self.conv_for_S32 = DepthwiseConvolution(960,320)
107 |         #print(num_anchors * (5 + num_classes))
108 |         self.connect_for_S32 = Connect(320)
109 |         self.yolo_headS32 = yolo_head([960, self.num_anchors * (5 + self.num_classes)],320)
110 |         
111 |         
112 |         self.upsample = Upsample()
113 |         #self.conv_for_S16 = Connect(160)
114 |         self.connect_for_S16 = Connect(160)
115 |         self.yolo_headS16 = yolo_head([640, self.num_anchors * (5 + self.num_classes)],320)
116 | 
117 |         self.yolo_losses = []
118 |         for i in range(2):
119 |             self.yolo_losses.append(YOLOLoss(config["yolo"]["anchors"],config["yolo"]["mask"][i] \
120 |                 ,self.num_classes,[config["img_w"],config["img_h"]],config["yolo"]["ignore_thresh"][i],config["yolo"]["iou_thresh"],iou_weighting=config["iou_weighting"]))
121 | 
122 |     
123 |     def forward(self, x, targets=None):
124 | 
125 |         for i in range(2):
126 |             self.yolo_losses[i].img_size = [x.size(2),x.size(3)]
127 |         feature1, feature2 = self.backbone(x)
128 |         S32 = self.conv_for_S32(feature2)
129 |         S32 = self.connect_for_S32(S32)
130 |         out0 = self.yolo_headS32(S32) 
131 |         S32_Upsample = self.upsample(S32)
132 |         #S16 = self.conv_for_S16(feature1)
133 |         S16 = self.connect_for_S16(feature1)
134 |         S16 = self.connect_for_S16(S16)
135 |         S16 = PartAdd(S16,S32_Upsample)
136 |         #S16 = torch.add(S16,S32_Upsample)
137 |         
138 |         out1 = self.yolo_headS16(S16)
139 |         
140 |         output = self.yolo_losses[0](out0,targets),self.yolo_losses[1](out1,targets)
141 |         if targets == None :
142 |             output = nms(output,self.num_classes)
143 | 
144 |         
145 |         return output
146 |     
147 | #def test():
148 | #    net = yolo(3,20)
149 | #    print(net)
150 | 
151 | #test()


--------------------------------------------------------------------------------
/models/mbv3_yolo_macc.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from models.voc.mobilenetv3 import MobileNetV3
  6 | from models.voc.yolo_loss import *
  7 | from torch.nn import init
  8 | from utils.box import nms
  9 | 
 10 | import yaml
 11 | try:
 12 |     from torch.hub import load_state_dict_from_url
 13 | except ImportError:
 14 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 15 |     
 16 | class BasicConv(nn.Module):
 17 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,depthwise=False):
 18 |         super(BasicConv, self).__init__()
 19 |         if depthwise == False :
 20 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False)
 21 |         else :
 22 |             self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, kernel_size//2, bias=False,groups = in_channels)
 23 |         self.bn = nn.BatchNorm2d(out_channels)
 24 |         self.activation = nn.LeakyReLU(0.1)
 25 |         self._initialize_weights()
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.conv(x)
 29 |         x = self.bn(x)
 30 |         x = self.activation(x)
 31 |         return x
 32 |     def _initialize_weights(self):
 33 |         for m in self.modules():
 34 |             if isinstance(m, nn.Conv2d):
 35 |                 init.kaiming_normal_(m.weight, mode='fan_out')
 36 |                 if m.bias is not None:
 37 |                     init.constant_(m.bias, 0)
 38 |             elif isinstance(m, nn.BatchNorm2d):
 39 |                 init.constant_(m.weight, 1)
 40 |                 init.constant_(m.bias, 0)
 41 |             elif isinstance(m, nn.Linear):
 42 |                 init.normal_(m.weight, std=0.001)
 43 |                 if m.bias is not None:
 44 |                     init.constant_(m.bias, 0)      
 45 | class Upsample(nn.Module):
 46 |     def __init__(self, in_channels, out_channels):
 47 |         super(Upsample, self).__init__()
 48 | 
 49 |         self.upsample = nn.Sequential(
 50 |             BasicConv(in_channels, out_channels, 1),
 51 |             nn.Upsample(scale_factor=2, mode='nearest')
 52 |         )
 53 | 
 54 |     def forward(self, x,):
 55 |         x = self.upsample(x)
 56 |         return x
 57 | def DepthwiseConvolution(in_filters,out_filters):
 58 |     m = nn.Sequential(
 59 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 60 |         BasicConv(in_filters, in_filters, 1),
 61 |         BasicConv(in_filters, out_filters, 1),
 62 |     )
 63 |     return m
 64 | def yolo_head(filters_list, in_filters):
 65 |     m = nn.Sequential(
 66 |         BasicConv(in_filters, in_filters, 3,depthwise=True),
 67 |         BasicConv(in_filters, in_filters, 1),
 68 |         BasicConv(in_filters, filters_list[0], 1),
 69 |         nn.Conv2d(filters_list[0], filters_list[1], 1),
 70 |     )
 71 |     return m
 72 | class Connect(nn.Module):
 73 |     def __init__(self, channels):
 74 |         super(Connect, self).__init__()
 75 | 
 76 |         self.conv = nn.Sequential(
 77 |             BasicConv(channels, channels, 3,depthwise=True),
 78 |             BasicConv(channels, channels, 1),
 79 |         )
 80 | 
 81 |     def forward(self, x,):        
 82 |         x2 = self.conv(x)
 83 |         x = torch.add(x,x2)
 84 |         return x
 85 | def PartAdd(x,y):
 86 |     if x.size(1) == y.size(1):
 87 |         return x+y
 88 |     len = min(x.size(1),y.size(1))
 89 |     new_1 = x[:,:len,...] + y[:,:len,...]
 90 |     if y.size(1) > x.size(1):
 91 |         new_2 = y[:,len:,...]
 92 |     else:
 93 |         new_2 = x[:,len:,...]
 94 |     new = torch.cat((new_1,new_2),1)
 95 | 
 96 |     return new        
 97 | class yolo_graph(nn.Module):
 98 |     def __init__(self,config):
 99 |         super(yolo_graph, self).__init__()
100 |         self.num_classes = config["yolo"]["num_classes"]
101 |         self.num_anchors = config["yolo"]["num_anchors"]
102 |         #  backbone
103 |         # https://drive.google.com/file/d/1HYPqCM1t8GDj9HnImKitM-QqdR8InxGB/view?usp=sharing
104 |         self.backbone = MobileNetV3('mbv3_large.old.pth.tar')
105 | 
106 |         self.conv_for_S32 = BasicConv(960,512,1)
107 |         #print(num_anchors * (5 + num_classes))
108 |         self.connect_for_S32 = Connect(512)
109 |         self.yolo_headS32 = yolo_head([1024, self.num_anchors * (5 + self.num_classes)],512)
110 |         
111 |         
112 |         self.upsample = Upsample(512,256)
113 |         self.conv_for_S16 = DepthwiseConvolution(160,256)
114 |         self.connect_for_S16 = Connect(256)
115 |         self.yolo_headS16 = yolo_head([512, self.num_anchors * (5 + self.num_classes)],256)
116 | 
117 | 
118 | 
119 |     
120 |     def forward(self, x, targets=None):
121 | 
122 | 
123 |         feature1, feature2 = self.backbone(x)
124 |         S32 = self.conv_for_S32(feature2)
125 |         S32 = self.connect_for_S32(S32)
126 |         out0 = self.yolo_headS32(S32) 
127 |         S32_Upsample = self.upsample(S32)
128 |         S16 = self.conv_for_S16(feature1)
129 |         #S16 = PartAdd(S16,S32_Upsample)
130 |         S16 = torch.add(S16,S32_Upsample)
131 |         S16 = self.connect_for_S16(S16)
132 |         out1 = self.yolo_headS16(S16)
133 |         
134 | 
135 | 
136 |         
137 |         return out0,out1
138 |     
139 | #def test():
140 | #    net = yolo(3,20)
141 | #    print(net)
142 | 
143 | #test()


--------------------------------------------------------------------------------
/models/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Creates a MobileNetV2 Model as defined in:
  3 | Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen. (2018). 
  4 | MobileNetV2: Inverted Residuals and Linear Bottlenecks
  5 | arXiv preprint arXiv:1801.04381.
  6 | import from https://github.com/tonylins/pytorch-mobilenet-v2
  7 | """
  8 | 
  9 | import torch.nn as nn
 10 | import math
 11 | import torch
 12 | __all__ = ['mobilenetv2']
 13 | try:
 14 |     from torch.hub import load_state_dict_from_url
 15 | except ImportError:
 16 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 17 | 
 18 | def _make_divisible(v, divisor, min_value=None):
 19 |     """
 20 |     This function is taken from the original tf repo.
 21 |     It ensures that all layers have a channel number that is divisible by 8
 22 |     It can be seen here:
 23 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 24 |     :param v:
 25 |     :param divisor:
 26 |     :param min_value:
 27 |     :return:
 28 |     """
 29 |     if min_value is None:
 30 |         min_value = divisor
 31 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 32 |     # Make sure that round down does not go down by more than 10%.
 33 |     if new_v < 0.9 * v:
 34 |         new_v += divisor
 35 |     return new_v
 36 | 
 37 | 
 38 | def conv_3x3_bn(inp, oup, stride):
 39 |     return nn.Sequential(
 40 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 41 |         nn.BatchNorm2d(oup),
 42 |         nn.ReLU6(inplace=True)
 43 |     )
 44 | 
 45 | 
 46 | def conv_1x1_bn(inp, oup):
 47 |     return nn.Sequential(
 48 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 49 |         nn.BatchNorm2d(oup),
 50 |         nn.ReLU6(inplace=True)
 51 |     )
 52 | 
 53 | 
 54 | class InvertedResidual(nn.Module):
 55 |     def __init__(self, inp, oup, stride, expand_ratio):
 56 |         super(InvertedResidual, self).__init__()
 57 |         assert stride in [1, 2]
 58 | 
 59 |         hidden_dim = round(inp * expand_ratio)
 60 |         self.identity = stride == 1 and inp == oup
 61 | 
 62 |         if expand_ratio == 1:
 63 |             self.conv = nn.Sequential(
 64 |                 # dw
 65 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 66 |                 nn.BatchNorm2d(hidden_dim),
 67 |                 nn.ReLU6(inplace=True),
 68 |                 # pw-linear
 69 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 70 |                 nn.BatchNorm2d(oup),
 71 |             )
 72 |         else:
 73 |             self.conv = nn.Sequential(
 74 |                 # pw
 75 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
 76 |                 nn.BatchNorm2d(hidden_dim),
 77 |                 nn.ReLU6(inplace=True),
 78 |                 # dw
 79 |                 nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
 80 |                 nn.BatchNorm2d(hidden_dim),
 81 |                 nn.ReLU6(inplace=True),
 82 |                 # pw-linear
 83 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 84 |                 nn.BatchNorm2d(oup),
 85 |             )
 86 | 
 87 |     def forward(self, x):
 88 |         if self.identity:
 89 |             return x + self.conv(x)
 90 |         else:
 91 |             return self.conv(x)
 92 | 
 93 | 
 94 | class MobileNetV2(nn.Module):
 95 |     def __init__(self, num_classes=1000, width_mult=1.):
 96 |         super(MobileNetV2, self).__init__()
 97 |         # setting of inverted residual blocks
 98 |         self.cfgs1 = [
 99 |             # t, c, n, s
100 |             [1,  16, 1, 1],
101 |             [6,  24, 2, 2],
102 |             [6,  32, 3, 2],
103 |             [6,  64, 4, 2],
104 |             [6,  96, 3, 1],
105 |         ]
106 |         self.cfgs2 = [
107 |             # t, c, n, s
108 |             [6, 160, 3, 2],
109 |             [6, 320, 1, 1],
110 |         ]
111 |         # building first layer
112 |         input_channel = _make_divisible(32 * width_mult, 4 if width_mult == 0.1 else 8)
113 |         layers = [conv_3x3_bn(3, input_channel, 2)]
114 |         # building inverted residual blocks
115 |         block = InvertedResidual
116 |         for t, c, n, s in self.cfgs1:
117 |             output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8)
118 |             for i in range(n):
119 |                 layers.append(block(input_channel, output_channel, s if i == 0 else 1, t))
120 |                 input_channel = output_channel
121 |         self.features = nn.Sequential(*layers)
122 |         layers2 = list()
123 |         for t, c, n, s in self.cfgs2:
124 |             output_channel = _make_divisible(c * width_mult, 4 if width_mult == 0.1 else 8)
125 |             for i in range(n):
126 |                 layers2.append(block(input_channel, output_channel, s if i == 0 else 1, t))
127 |                 input_channel = output_channel
128 |         self.features2 = nn.Sequential(*layers2)        
129 |         # building last several layers
130 |         output_channel = _make_divisible(1280 * width_mult, 4 if width_mult == 0.1 else 8) if width_mult > 1.0 else 1280
131 |         self.conv = conv_1x1_bn(input_channel, output_channel)
132 |         #self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
133 |         #self.classifier = nn.Linear(output_channel, num_classes)
134 | 
135 |         self._initialize_weights()
136 | 
137 |     def forward(self, x):
138 |         x1 = self.features(x)
139 |         x2 = self.features2(x1)
140 |         x2 = self.conv(x2)
141 |         #x2 = self.avgpool(x2)
142 |         #x2 = x.view(x2.size(0), -1)
143 |         #x2 = self.classifier(x2)
144 |         return x1,x2
145 | 
146 |     def _initialize_weights(self):
147 |         for m in self.modules():
148 |             if isinstance(m, nn.Conv2d):
149 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
150 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
151 |                 if m.bias is not None:
152 |                     m.bias.data.zero_()
153 |             elif isinstance(m, nn.BatchNorm2d):
154 |                 m.weight.data.fill_(1)
155 |                 m.bias.data.zero_()
156 |             elif isinstance(m, nn.Linear):
157 |                 m.weight.data.normal_(0, 0.01)
158 |                 m.bias.data.zero_()
159 | 
160 | def mobilenetv2(pretrained, **kwargs):
161 |     model = MobileNetV2()
162 |     if pretrained:
163 |         model_dict = model.state_dict()
164 |         checkpoint = load_state_dict_from_url(pretrained,progress=True)
165 |         #pretrained_dict = torch.load(pretrained)['state_dict']
166 | 
167 |         for k1, v1 in checkpoint.items() :
168 |             n1 = k1.replace('module.', '')
169 |             #print(k1)
170 |             
171 |             for k2, v2 in model_dict.items() :
172 |                 n2 = k2.replace('features2.0.','features.14.')
173 |                 n2 = n2.replace('features2.1.','features.15.')
174 |                 n2 = n2.replace('features2.2.','features.16.')
175 |                 n2 = n2.replace('features2.3.','features.17.')
176 |                 #print(n1,' , ',n2)
177 |                 if n1 == n2 :
178 |                     #print(k1,' , ',k2)
179 |                     model_dict[k2]=v1
180 |             
181 |         model.load_state_dict(model_dict)
182 |         #torch.save(model, 'test.pth.tar')
183 |     else:
184 |         raise Exception("darknet request a pretrained path. got [{}]".format(pretrained))
185 |     return model
186 | def test():
187 |     model_url = 'https://raw.githubusercontent.com/d-li14/mobilenetv2.pytorch/master/pretrained/mobilenetv2-c5e733a8.pth'
188 |     net = mobilenetv2(model_url)
189 |     #print(net)
190 |     x = torch.randn(2,3,224,224)
191 |     y1,y2 = net(x)
192 |     print(y2.shape)
193 | #test()


--------------------------------------------------------------------------------
/models/mobilenetv3.py:
--------------------------------------------------------------------------------
  1 | '''MobileNetV3 in PyTorch.
  2 | See the paper "Inverted Residuals and Linear Bottlenecks:
  3 | Mobile Networks for Classification, Detection and Segmentation" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch.nn import init
  9 | 
 10 | try:
 11 |     from torch.hub import load_state_dict_from_url
 12 | except ImportError:
 13 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
 14 | class hswish(nn.Module):
 15 |     def forward(self, x):
 16 |         out = x * F.relu6(x + 3, inplace=True) / 6
 17 |         return out
 18 | 
 19 | 
 20 | class hsigmoid(nn.Module):
 21 |     def forward(self, x):
 22 |         out = F.relu6(x + 3, inplace=True) / 6
 23 |         return out
 24 | 
 25 | 
 26 | class SeModule(nn.Module):
 27 |     def __init__(self, in_size, reduction=4):
 28 |         super(SeModule, self).__init__()
 29 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 30 | 
 31 |         self.se = nn.Sequential(
 32 |             nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
 33 |             nn.BatchNorm2d(in_size // reduction),
 34 |             nn.ReLU(inplace=True),
 35 |             nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
 36 |             nn.BatchNorm2d(in_size),
 37 |             hsigmoid()
 38 |         )
 39 | 
 40 |     def forward(self, x):
 41 |         return x * self.se(x)
 42 | 
 43 | 
 44 | class Block(nn.Module):
 45 |     '''expand + depthwise + pointwise'''
 46 |     def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
 47 |         super(Block, self).__init__()
 48 |         self.stride = stride
 49 |         self.se = semodule
 50 | 
 51 |         self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
 52 |         self.bn1 = nn.BatchNorm2d(expand_size)
 53 |         self.nolinear1 = nolinear
 54 |         self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride, padding=kernel_size//2, groups=expand_size, bias=False)
 55 |         self.bn2 = nn.BatchNorm2d(expand_size)
 56 |         self.nolinear2 = nolinear
 57 |         self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
 58 |         self.bn3 = nn.BatchNorm2d(out_size)
 59 | 
 60 |         self.shortcut = nn.Sequential()
 61 |         if stride == 1 and in_size != out_size:
 62 |             self.shortcut = nn.Sequential(
 63 |                 nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
 64 |                 nn.BatchNorm2d(out_size),
 65 |             )
 66 | 
 67 |     def forward(self, x):
 68 |         out = self.nolinear1(self.bn1(self.conv1(x)))
 69 |         out = self.nolinear2(self.bn2(self.conv2(out)))
 70 |         out = self.bn3(self.conv3(out))
 71 |         if self.se != None:
 72 |             out = self.se(out)
 73 |         out = out + self.shortcut(x) if self.stride==1 else out
 74 |         return out
 75 | 
 76 | 
 77 | class MobileNetV3_Large(nn.Module):
 78 |     def __init__(self, num_classes=1000):
 79 |         super(MobileNetV3_Large, self).__init__()
 80 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
 81 |         self.bn1 = nn.BatchNorm2d(16)
 82 |         self.hs1 = hswish()
 83 | 
 84 |         self.bneck = nn.Sequential(
 85 |             Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
 86 |             Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
 87 |             Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
 88 |             Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
 89 |             Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
 90 |             Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
 91 |             Block(3, 40, 240, 80, hswish(), None, 2),
 92 |             Block(3, 80, 200, 80, hswish(), None, 1),
 93 |             Block(3, 80, 184, 80, hswish(), None, 1),
 94 |             Block(3, 80, 184, 80, hswish(), None, 1),
 95 |             Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
 96 |             Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
 97 |             Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
 98 |         )
 99 |         self.bneck2 = nn.Sequential(
100 |             Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
101 |             Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
102 |         )
103 | 
104 |         self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
105 |         self.bn2 = nn.BatchNorm2d(960)
106 |         self.hs2 = hswish()
107 |         #self.linear3 = nn.Linear(960, 1280)
108 |         #self.bn3 = nn.BatchNorm1d(1280)
109 |         #self.hs3 = hswish()
110 |         #self.linear4 = nn.Linear(1280, num_classes)
111 |         self.init_params()
112 | 
113 |     def init_params(self):
114 |         for m in self.modules():
115 |             if isinstance(m, nn.Conv2d):
116 |                 init.kaiming_normal_(m.weight, mode='fan_out')
117 |                 if m.bias is not None:
118 |                     init.constant_(m.bias, 0)
119 |             elif isinstance(m, nn.BatchNorm2d):
120 |                 init.constant_(m.weight, 1)
121 |                 init.constant_(m.bias, 0)
122 |             elif isinstance(m, nn.Linear):
123 |                 init.normal_(m.weight, std=0.001)
124 |                 if m.bias is not None:
125 |                     init.constant_(m.bias, 0)
126 | 
127 |     def forward(self, x):
128 |         out = self.hs1(self.bn1(self.conv1(x)))
129 |         out0 = self.bneck(out)
130 |         out1 = self.bneck2(out0)
131 |         out1 = self.hs2(self.bn2(self.conv2(out1)))
132 |         #out1 = F.avg_pool2d(out1, 7)
133 |         #out1 = out1.view(out1.size(0), -1)
134 |         #out1 = self.hs3(self.bn3(self.linear3(out1)))
135 |         #out1 = self.linear4(out1)
136 |         return out0,out1
137 | 
138 | 
139 | 
140 | class MobileNetV3_Small(nn.Module):
141 |     def __init__(self, num_classes=1000):
142 |         super(MobileNetV3_Small, self).__init__()
143 |         self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
144 |         self.bn1 = nn.BatchNorm2d(16)
145 |         self.hs1 = hswish()
146 | 
147 |         self.bneck1 = nn.Sequential(
148 |             Block(3, 16, 16, 16, nn.ReLU(inplace=True), SeModule(16), 2),
149 |             Block(3, 16, 72, 24, nn.ReLU(inplace=True), None, 2),
150 |             Block(3, 24, 88, 24, nn.ReLU(inplace=True), None, 1),
151 |             Block(5, 24, 96, 40, hswish(), SeModule(40), 2),
152 |             Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
153 |             Block(5, 40, 240, 40, hswish(), SeModule(40), 1),
154 |             Block(5, 40, 120, 48, hswish(), SeModule(48), 1),
155 |             Block(5, 48, 144, 48, hswish(), SeModule(48), 1),
156 | 
157 |         )
158 |         self.bneck2 = nn.Sequential(
159 |             Block(5, 48, 288, 96, hswish(), SeModule(96), 2),
160 |             Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
161 |             Block(5, 96, 576, 96, hswish(), SeModule(96), 1),
162 |         )
163 | 
164 |         self.conv2 = nn.Conv2d(96, 576, kernel_size=1, stride=1, padding=0, bias=False)
165 |         self.bn2 = nn.BatchNorm2d(576)
166 |         self.hs2 = hswish()
167 |         #self.linear3 = nn.Linear(576, 1280)
168 |         #self.bn3 = nn.BatchNorm1d(1280)
169 |         #self.hs3 = hswish()
170 |         #self.linear4 = nn.Linear(1280, num_classes)
171 |         self.init_params()
172 | 
173 |     def init_params(self):
174 |         for m in self.modules():
175 |             if isinstance(m, nn.Conv2d):
176 |                 init.kaiming_normal_(m.weight, mode='fan_out')
177 |                 if m.bias is not None:
178 |                     init.constant_(m.bias, 0)
179 |             elif isinstance(m, nn.BatchNorm2d):
180 |                 init.constant_(m.weight, 1)
181 |                 init.constant_(m.bias, 0)
182 |             elif isinstance(m, nn.Linear):
183 |                 init.normal_(m.weight, std=0.001)
184 |                 if m.bias is not None:
185 |                     init.constant_(m.bias, 0)
186 | 
187 |     def forward(self, x):
188 |         out = self.hs1(self.bn1(self.conv1(x)))
189 |         out = self.bneck1(out)
190 |         out = self.bneck2(out)
191 |         out = self.hs2(self.bn2(self.conv2(out)))
192 |         #out = F.avg_pool2d(out, 7)
193 |         #out = out.view(out.size(0), -1)
194 |         #out = self.hs3(self.bn3(self.linear3(out)))
195 |         #out = self.linear4(out)
196 |         return out
197 | 
198 | 
199 | def MobileNetV3(pretrained, **kwargs):
200 |     model = MobileNetV3_Large()
201 |     if pretrained:
202 |         if isinstance(pretrained, str):
203 |             model_dict = model.state_dict()
204 |             #model.load_state_dict(torch.load(pretrained)['state_dict'])
205 |             pretrained_dict = torch.load(pretrained)['state_dict']
206 | 
207 | 
208 |             for k1, v1 in pretrained_dict.items() :
209 |                 n1 = k1.replace('module.', '')
210 |                 #print(k1)
211 |                 
212 |                 for k2, v2 in model_dict.items() :
213 |                     n2 = k2.replace('bneck2.0.', 'bneck.13.')
214 |                     n2 = n2.replace('bneck2.1.', 'bneck.14.')
215 |                     if n1 == n2 :
216 |                         #print(k1,k2)
217 |                         model_dict[k2]=v1
218 | 
219 |             model.load_state_dict(model_dict)
220 |             #torch.save(model, 'test.pth.tar')
221 |             #for name,param in model.named_parameters():
222 |         else:
223 |             raise Exception("darknet request a pretrained path. got [{}]".format(pretrained))
224 |     return model
225 | 
226 | def test():
227 |     net = MobileNetV3_Small()
228 |     x = torch.randn(2,3,224,224)
229 |     y = net(x)
230 |     print(y.size())
231 | 
232 | # test()


--------------------------------------------------------------------------------
/models/seg_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | import math
 5 | from utils import AverageMeter
 6 | from utils.iou import *
 7 | from torch.autograd import Function
 8 | import gc
 9 | use_cuda = torch.cuda.is_available()
10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11 | import torchvision
12 | import cv2
13 |         
14 | class SegLoss(nn.Module):
15 |     class sigmoid(Function):
16 |         @staticmethod
17 |         def forward(ctx, input):
18 |             #ctx.save_for_backward(input)
19 |             sigmoid_eval = 1.0/(1.0 + torch.exp(-input))
20 |             #input = sigmoid_eval
21 |             return sigmoid_eval
22 | 
23 |         @staticmethod
24 |         def backward(ctx, grad_output):
25 |             #input, = ctx.saved_tensors
26 |             #print(grad_output)
27 |             # Maximum likelihood and gradient descent demonstration
28 |             # https://blog.csdn.net/yanzi6969/article/details/80505421
29 |             # https://xmfbit.github.io/2018/03/21/cs229-supervised-learning/
30 |             # https://zlatankr.github.io/posts/2017/03/06/mle-gradient-descent
31 |             grad_input = grad_output.clone()
32 |             return grad_input
33 |     def __init__(self,num_classes):
34 |         super(SegLoss, self).__init__()
35 |         self.num_classes = num_classes
36 |         self.threshold = nn.Threshold(0.5, 0.)
37 |         return
38 | 
39 |     
40 |     def weighted_mse_loss(self,input, target, weights):
41 |         out = (input - target)**2      
42 |         total = torch.sum(weights)
43 |         out = out * weights / total
44 |         # expand_as because weights are prob not defined for mini-batch        
45 |         loss = torch.sum(out) 
46 |         #print(loss)
47 |         return loss
48 | 
49 | 
50 | 
51 |     def forward(self, input, targets=None):
52 |         if targets is not None:
53 |             truth = targets.clone().to(device)
54 |             truth = truth.permute(0,3,1,2)
55 |             #print(input.shape,truth.shape)
56 |             #.to(device)
57 |             #print(truth)
58 |             #print(truth>0.1)
59 |             output = self.sigmoid.apply(input)
60 |             #result = output[0,0,...]
61 |             #print(result.shape)
62 |             #cv2.namedWindow('frame',cv2.WINDOW_NORMAL)
63 |             #cv2.resizeWindow('frame', 640, 480)        
64 |             #cv2.imshow('frame', result.cpu().detach().numpy())
65 |             #key = cv2.waitKey(1) 
66 |             obj = torch.masked_select(output, truth>=0.5)
67 |             no_obj = torch.masked_select(output, truth<0.5)
68 |             #mask_truth = torch.masked_select(truth, truth>=0.3)
69 |             #threshold = torch.tensor([0.3]).to(device)
70 |             #results = (truth>threshold).float()*1
71 |             #results = obj + no_obj*truth
72 |             #print(results)
73 |             #print(torch.mean(output))
74 |             weights = torch.ones_like(input).to(device)
75 |             loss = self.weighted_mse_loss(output , truth , weights)
76 |             #print(loss)
77 |             return loss*0.05,torch.mean(obj).item(),torch.mean(no_obj).item()
78 |         else:
79 |             output = self.sigmoid.apply(input)
80 |             result = output[0,...].cpu().detach().numpy()      
81 |             return result
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/models/voc/config.yaml:
--------------------------------------------------------------------------------
 1 | img_h: 352
 2 | img_w: 352
 3 | batch_size: 32
 4 | train_img_size:
 5 |   - [352, 352]
 6 |   - [320, 320]
 7 |   - [288, 288]
 8 |   - [384, 384]
 9 |   - [416, 416]
10 | expand_scale: 2.1610954191879452
11 | mosaic_num: [1,4]
12 | iou_weighting: 0.021830872589525777
13 | normalize: 
14 |   mean: [0.485, 0.456, 0.406]
15 |   std: [0.229, 0.224, 0.225]
16 | yolo:
17 |   num_classes: 20
18 |   num_anchors: 3
19 |   ignore_thresh: [0.6076333316652263, 0.5623606200028424]
20 |   iou_thresh: 0.5497280113447018
21 |   anchors:
22 |   - [143, 265]
23 |   - [153, 121]
24 |   - [280, 279]
25 |   - [20, 37]
26 |   - [49, 94]
27 |   - [73, 201]
28 |   classes: 20
29 |   mask:
30 |   - [0, 1, 2]
31 |   - [3, 4, 5]
32 | 


--------------------------------------------------------------------------------
/models/yolo_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import math
  5 | from utils import AverageMeter
  6 | from utils.iou import *
  7 | from torch.autograd import Function
  8 | import gc
  9 | use_cuda = torch.cuda.is_available()
 10 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 11 | import torchvision
 12 | 
 13 |         
 14 | class YOLOLoss(nn.Module):
 15 |     class sigmoid(Function):
 16 |         @staticmethod
 17 |         def forward(ctx, input):
 18 |             #ctx.save_for_backward(input)
 19 |             sigmoid_eval = 1.0/(1.0 + torch.exp(-input))
 20 |             #input = sigmoid_eval
 21 |             return sigmoid_eval
 22 | 
 23 |         @staticmethod
 24 |         def backward(ctx, grad_output):
 25 |             #input, = ctx.saved_tensors
 26 |             #print(grad_output)
 27 |             # Maximum likelihood and gradient descent demonstration
 28 |             # https://blog.csdn.net/yanzi6969/article/details/80505421
 29 |             # https://xmfbit.github.io/2018/03/21/cs229-supervised-learning/
 30 |             # https://zlatankr.github.io/posts/2017/03/06/mle-gradient-descent
 31 |             grad_input = grad_output.clone()
 32 |             return grad_input
 33 |     def __init__(self, anchors, mask, num_classes, img_size,ignore_threshold,iou_thresh,val_conf = 0.1,iou_weighting = 0.01):
 34 |         super(YOLOLoss, self).__init__()
 35 |         self.anchors = anchors
 36 |         self.mask = mask;
 37 |         self.num_mask = len(mask)
 38 |         self.num_anchors = len(anchors)
 39 |         self.num_classes = num_classes
 40 |         self.bbox_attrs = 5 + num_classes
 41 |         self.img_size = img_size
 42 |         self.ignore_threshold = ignore_threshold
 43 |         #self.sigmoid = self.MSigmoid()
 44 |         self.nn_sigmoid = torch.nn.Sigmoid()
 45 |         self.val_conf = val_conf
 46 |         self.mse_loss = nn.MSELoss()
 47 |         self.bce_loss = nn.BCELoss()
 48 |         self.label_smooth_eps = 0.1
 49 |         self.iou_thresh = iou_thresh
 50 |         self.iou_weighting = iou_weighting
 51 | 
 52 |     
 53 |     def weighted_mse_loss(self,input, target, weights):
 54 |         out = (input - target)**2      
 55 |         total = torch.sum(weights)
 56 |         out = out * weights / total
 57 |         # expand_as because weights are prob not defined for mini-batch        
 58 |         loss = torch.sum(out) 
 59 |         #print(loss)
 60 |         return loss
 61 | 
 62 |     def pre_maps(self,bs,is_cuda,anchors, in_w, in_h):
 63 |     
 64 |         FloatTensor = torch.cuda.FloatTensor if is_cuda else torch.FloatTensor
 65 |         LongTensor = torch.cuda.LongTensor if is_cuda else torch.LongTensor
 66 |         this_anchors = np.array(anchors)[self.mask]
 67 |         anchor_w = FloatTensor(this_anchors).index_select(1, LongTensor([0]))
 68 |         anchor_h = FloatTensor(this_anchors).index_select(1, LongTensor([1]))
 69 |         anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(bs,self.num_mask,in_h,in_w,1).to(device)   
 70 |         anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(bs,self.num_mask,in_h,in_w,1).to(device)        
 71 |         grid_x = torch.linspace(0, in_w-1, in_w).repeat(in_w, 1).repeat(bs * self.num_mask, 1, 1).view(bs,self.num_mask,in_h,in_w,1).type(FloatTensor)
 72 |         grid_y = torch.linspace(0, in_h-1, in_h).repeat(in_h, 1).t().repeat(bs * self.num_mask, 1, 1).view(bs,self.num_mask,in_h,in_w,1).type(FloatTensor)
 73 |         grid_xy = torch.cat((grid_x,grid_y),4)
 74 |         anchor_wh = torch.cat((anchor_w,anchor_h),4)
 75 |         return grid_xy,anchor_wh
 76 |         
 77 |     def get_target(self, target,input, anchors, in_w, in_h, ignore_threshold,iou_thresh=0.5):
 78 |     
 79 |         bs = input.size(0)
 80 |         this_anchors = np.array(anchors)[self.mask]
 81 |         FloatTensor = torch.cuda.FloatTensor if input.is_cuda else torch.FloatTensor
 82 |         targets_weight = torch.zeros(bs, self.num_mask, in_h, in_w,self.num_classes+1, requires_grad=False).to(device)
 83 |         pred_boxes = torch.zeros(bs,self.num_mask,in_h, in_w,0, requires_grad=False).to(device)
 84 |         prediction = input.view(bs,  self.num_mask,self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() 
 85 |         xy = self.sigmoid.apply(prediction[..., 0:2]) 
 86 |         wh = torch.exp(prediction[..., 2:4]) 
 87 |         output = self.sigmoid.apply(prediction[..., 4:])
 88 | 
 89 |         grid_xy,anchor_wh = self.pre_maps(bs,input.is_cuda,anchors, in_w, in_h)
 90 |         pred_boxes = torch.cat((pred_boxes,(xy + grid_xy)/FloatTensor([in_w,in_h])),4)
 91 |         pred_boxes = torch.cat((pred_boxes,wh * anchor_wh),4)
 92 |         self.wh_to_x2y2(pred_boxes)
 93 |         
 94 | 
 95 |         count = recall = ious = obj = cls_score = 0
 96 |         #output = torch.cat((xy,prediction[..., 2:4],conf_cls),4).to(device)
 97 |         targets = output.clone().to(device)
 98 |         no_obj = torch.sum(output[...,0])
 99 |         no_cnt = output[...,0].numel()
100 |         targets_weight_parts = targets_weight[...,0]  
101 |         targets_parts = targets[...,0] 
102 |         anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((self.num_anchors, 2)),np.array(anchors)), 1)) 
103 |         iou_loss = torch.FloatTensor(0).to(device)
104 |         iou_weight = torch.FloatTensor(0).to(device)
105 |         in_dim = torch.Tensor([in_w,in_h])
106 |         #print(need_grad_tensor.view(,self.num_classes+1).shape)
107 |         for b in range(bs):
108 |             if len(target[b]) == 0 :
109 |                 targets_weight_parts[b] = 1
110 |                 targets_parts[b] = 0
111 |                 continue
112 |             gt_boxes = target[b][...,1:].clone().detach().to(device)
113 |             self.wh_to_x2y2(gt_boxes)   
114 |             
115 |             pred_boxes2 = pred_boxes[b].view((in_w*in_h*self.num_mask, 4)).to(device)
116 |             pred_iou = find_jaccard_overlap(gt_boxes,pred_boxes2).to(device)
117 |             #print(pred_iou.shape)
118 |             pred_iou,_ = torch.max(pred_iou,0)
119 | 
120 |             pred_iou = pred_iou.view((self.num_mask,in_h,in_w))
121 |             
122 |             #for i in range(self.num_mask):
123 |             m = pred_iou<ignore_threshold
124 |             targets_weight_parts[b,...,m] = 1
125 |             targets_parts[b,...,m] = 0
126 | 
127 |             gt = target[b].clone().detach()
128 |             gxgy = gt[...,1:3] * in_dim 
129 |             gt[...,1:3] = 0
130 |             gt_box = gt[...,1:]
131 |             gt[...,0] = gt[...,0] - 1
132 |             anch_ious = find_jaccard_overlap(gt_box, anchor_shapes)
133 |             best_n = torch.argmax(anch_ious,1)
134 |             #mask = best_n[:] in self.mask
135 |             for t in range(len(target[b])):                                               
136 |                 gi = int(gxgy[t,0])
137 |                 gj = int(gxgy[t,1])                
138 |                 anch_ious_this = anch_ious[t][self.mask] 
139 |                 iou_thresh_list = (anch_ious_this>iou_thresh).tolist()
140 |                 bn = self.num_anchors + 1 
141 |                 if best_n[t] in self.mask :
142 |                     bn = self.mask.index(best_n[t])  
143 |                     #k = bn 
144 |                 for k in range(self.num_mask):
145 |                     if k == bn or iou_thresh_list[k] == True :
146 |                         count+= 1                
147 |                         cls_index = int(gt[t,0])
148 |                         
149 |                         targets_parts[b,k,gj,gi] = 1 
150 |                         targets_weight_parts[b,k,gj,gi] = 1 
151 |                         conf = output[b,k,gj,gi,0].item()
152 |                         obj = obj + conf
153 |                         no_obj = no_obj - conf
154 |                         gt_box_xy = gt_boxes[t].unsqueeze(0)
155 |                         pred = pred_boxes[b, k, gj, gi].unsqueeze(0)
156 | 
157 |                         giou,iou = self.box_ciou(gt_box_xy,pred)
158 | 
159 |                         iou_loss = torch.cat((iou_loss,giou.to(device)))
160 |                         area = 2.0 - self.get_area(gt_box_xy)
161 |                         
162 |                         iou_weight = torch.cat((iou_weight,(area).to(device)))
163 |                         if iou>ignore_threshold :
164 |                             recall = recall + 1                         
165 |                         ious = ious + iou.item()
166 |                         cls_tensor = targets[b, k, gj, gi,1:]
167 |                         cls_weight = targets_weight[b, k, gj, gi,1:]
168 |                         self.class_loss(cls_tensor,cls_weight,cls_index)
169 |                         cls_score = cls_score + output[b,k,gj,gi,1+cls_index].item()
170 |         if count > 0:                
171 |             obj_avg =  obj/count 
172 |             cls_avg =  cls_score/count
173 |             no_obj = no_obj/(no_cnt-count)
174 |             avg_iou = ious/count
175 |             recall = recall/count
176 |         else :
177 |             recall = obj_avg = cls_avg = no_obj = avg_iou = 0
178 |         return targets,targets_weight,output,recall,avg_iou,obj_avg,no_obj,cls_avg,count/bs,iou_loss,iou_weight
179 |         
180 |     def get_pred_boxes(self,input, anchors, in_w, in_h):
181 |     
182 |         bs = input.size(0)
183 |         pred_boxes = torch.zeros(bs,self.num_mask,in_h, in_w,0, requires_grad=False).to(device)
184 |         #pred_boxes = torch.zeros(in_h, in_w,4, requires_grad=False)
185 |         outputs=list()
186 |         prediction = input.view(bs,  self.num_mask,self.bbox_attrs, in_h, in_w).permute(0, 1, 3, 4, 2).contiguous() 
187 |         xy = torch.sigmoid(prediction[..., 0:2]) 
188 |         wh = torch.exp(prediction[..., 2:4]) 
189 |         conf_cls = torch.sigmoid(prediction[..., 4:])       # Conf
190 |         
191 |         FloatTensor = torch.cuda.FloatTensor if input.is_cuda else torch.FloatTensor
192 |         grid_xy,anchor_wh = self.pre_maps(bs,input.is_cuda,anchors, in_w, in_h)
193 |         
194 |         pred_boxes = torch.cat((pred_boxes,(xy + grid_xy)/FloatTensor([in_w,in_h])),4)
195 |         pred_boxes = torch.cat((pred_boxes,wh * anchor_wh),4)
196 |         self.wh_to_x2y2(pred_boxes)        
197 |         pred_boxes = torch.cat((pred_boxes,conf_cls[...,0].unsqueeze(4)),4)
198 |         score,cls_idx = torch.max(conf_cls[...,1:self.bbox_attrs],dim=4)
199 |         pred_boxes = torch.cat((pred_boxes,score.unsqueeze(4),cls_idx.float().unsqueeze(4)),4)
200 |         pred_boxes = pred_boxes.to(device)
201 |         mask = pred_boxes[...,4]>self.val_conf
202 |         for b in range(bs):
203 |             outputs.append(pred_boxes[b,mask[b]])
204 |         return outputs
205 | 
206 |     def forward(self, input, targets=None):
207 |         bs = input.size(0)
208 |         in_h = input.size(2)
209 |         in_w = input.size(3)
210 |         stride_h = self.img_size[1] / in_h
211 |         stride_w = self.img_size[0] / in_w
212 |         #print(self.img_size)
213 |         #print(input.shape)
214 |         scaled_anchors = [(a_w/self.img_size[0] , a_h/self.img_size[1] ) for a_w, a_h in self.anchors]
215 | 
216 |         if targets is not None:
217 |             #print(self.ignore_threshold)
218 |             target,weights,output,recall,avg_iou,obj,no_obj,cls_score,count,iou_losses,iou_weights = self.get_target(targets,input, scaled_anchors,in_w, in_h,self.ignore_threshold,self.iou_thresh)
219 |             loss = self.weighted_mse_loss(output , target , weights)
220 |             iou_target = torch.ones_like(iou_losses)
221 |             #iou_loss= torch.sum(iou_target-iou_losses)
222 |             iou_loss = torch.Tensor([0]).to(device) 
223 |             if iou_losses.size(0)>0:
224 |                 iou_loss = self.weighted_mse_loss(iou_losses,iou_target,iou_weights)/iou_losses.numel()
225 |             #iou_loss = self.mse_loss(iou_losses,iou_target)/iou_losses.numel()
226 |             #print(iou_loss)
227 |             #iou_loss = torch.Tensor(iou_loss)
228 |             #print(loss,iou_loss)
229 |             #loss = torch.cat((loss.unsqueeze(0) ,iou_loss.unsqueeze(0)))
230 | 
231 |             if torch.isnan(iou_loss)==True or torch.isnan(loss)==True:
232 |                 print('\n',loss,iou_loss,bs)
233 |                 
234 |             loss = loss + iou_loss*self.iou_weighting
235 |             
236 |             return loss, recall,avg_iou,obj,no_obj,cls_score,count 
237 | 
238 |         else:
239 |             preds = self.get_pred_boxes(input, scaled_anchors,in_w, in_h)
240 | 
241 |             return preds
242 | 
243 |     def wh_to_x2y2(self,bbox):
244 |         bbox[...,0] = bbox[...,0] - bbox[...,2]/2
245 |         bbox[...,1] = bbox[...,1] - bbox[...,3]/2
246 |         bbox[...,2] = bbox[...,2] + bbox[...,0]
247 |         bbox[...,3] = bbox[...,3] + bbox[...,1] 
248 |     # minimum convex box
249 |     def box_c(self,box1,box2) :
250 |         l = torch.min(box1[...,0],box2[...,0]).unsqueeze(0)
251 |         t = torch.min(box1[...,1],box2[...,1]).unsqueeze(0)
252 |         r = torch.max(box1[...,2],box2[...,2]).unsqueeze(0)
253 |         b = torch.max(box1[...,3],box2[...,3]).unsqueeze(0)
254 |         #print(t.shape)
255 |         box_c = torch.cat((l,t,r,b))
256 |         return box_c.permute(1,0)
257 |     def box_ciou(self,box1,box2):
258 |         ciou = torch.zeros(0,1).to(device)
259 |         iou = torch.zeros(0,1).to(device)
260 |         #if box2.size(0) == 0 :
261 |         #    return ciou,iou
262 |         box_c = self.box_c(box1,box2)
263 |         #print(box_c.shape)
264 |         c = self.get_area(box_c).unsqueeze(1)       
265 |         iou = find_jaccard_overlap(box1, box2)
266 | 
267 |         w1,h1 = (box1[...,2] - box1[...,0]).unsqueeze(1),(box1[...,3] - box1[...,1]).unsqueeze(1)
268 |         w2,h2 = (box2[...,2] - box2[...,0]).unsqueeze(1),(box2[...,3] - box2[...,1]).unsqueeze(1)
269 |         x1,y1 = (box1[...,2] + box1[...,0]).unsqueeze(1)/2,(box1[...,1] + box1[...,3]).unsqueeze(1)/2
270 |         x2,y2 = (box2[...,2] + box2[...,0]).unsqueeze(1)/2,(box2[...,1] + box2[...,3]).unsqueeze(1)/2
271 |         
272 |         u = (x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2);
273 |         #if c==0 :
274 |         #    ciou_term = iou
275 |         #else :
276 |         #print(c.shape,u.shape)
277 |         d = u/c
278 |         #print(d.shape)
279 |         ar_gt  = w2/h2
280 |         ar_pred  = w1/h1
281 |         
282 |         ar_loss = 4 / (math.pi * math.pi) * (torch.atan(ar_gt) - torch.atan(ar_pred)) * (torch.atan(ar_gt) - torch.atan(ar_pred));
283 |         alpha = ar_loss / (1 - iou + ar_loss + 0.000001);
284 |         ciou_term = d + alpha * ar_loss;
285 |         #print(ar_gt.shape,ar_pred.shape,ar_loss.shape,alpha.shape,torch.atan(ar_pred).shape)
286 |         mask = (c == 0) 
287 |         ciou_term = ciou_term * (~mask) + iou*mask
288 |         #print(ciou_term.shape,ciou.shape,iou.shape,box1.shape,box2.shape)
289 |         ciou = torch.cat((ciou,ciou_term))
290 | 
291 |         #print(iou,iou-giou_term)
292 |         #print(c,u)
293 |         return iou-ciou,iou
294 |    
295 |     def box_giou(self,box1,box2):
296 |         box_c = self.box_c(box1,box2)
297 |         c = self.get_area(box_c).unsqueeze(1)
298 |         
299 |         #iou = find_jaccard_overlap(box1, box2)
300 |         u = find_union(box1,box2)
301 |         i = find_intersection(box1,box2)
302 |         iou = i/u
303 |         #print('iou.shape',iou.shape)
304 |         #giou_term = [iou if (k1 == 0)  else (k1 - k2)/k1 for k1,k2 in zip(c, u)]
305 |         #if c==0 :
306 |         #    giou_term = iou
307 |         #else :
308 |         #    giou_term = (c-u)/c
309 |         #print('c.shape',c.shape)
310 |         #print('u.shape',u.shape)
311 |         giou_term = (c-u)/c
312 |         #print('giou_term.shape',giou_term.shape)
313 |         mask = (c == 0) 
314 |         giou_term = giou_term * (~mask) + iou*mask            
315 |         #print(iou,iou-giou_term)
316 |         #print(c,u)
317 |         return iou-giou_term,iou
318 |     def get_area(self,box):
319 |         return (box[...,2] - box[...,0]) * (box[...,3] - box[...,1])   
320 |     def get_aspect_ratio(self,box):
321 |         return (box[...,2] - box[...,0]) / (box[...,3] - box[...,1])           
322 |     def IOU_Loss(self,gt_box,pred_box,input,output,accumulate):
323 |  
324 |         X = self.get_area(pred_box)
325 |         Xhat = self.get_area(gt_box)
326 |         
327 |         pred_l,pred_t,pred_r,pred_b = pred_box[...,0],pred_box[...,1],pred_box[...,2],pred_box[...,3]
328 |         gt_l,gt_t,gt_r,gt_b = gt_box[...,0],gt_box[...,1],gt_box[...,2],gt_box[...,3]
329 |         
330 |         Ih = torch.min(pred_b, gt_b) - torch.max(pred_t, gt_t)
331 |         Iw = torch.min(pred_r, gt_r) - torch.max(pred_l, gt_l)
332 |         I = Iw*Ih # intersection area
333 |         #print(Iw,Ih,I)
334 |         
335 |         #m = I > 0
336 |         #if m == False:
337 |         #    print(Iw,Ih,I)
338 |         U = X + Xhat - I; # Union area
339 |         Cw = torch.max(pred_r, gt_r) - torch.min(pred_l, gt_l);
340 |         Ch = torch.max(pred_b, gt_b) - torch.min(pred_t, gt_t);
341 |         C = Cw * Ch;
342 |         #iou = find_jaccard_overlap(gt_box, pred_box)
343 |         #print(pred_box,gt_box)
344 |         #if I<0 :
345 |         #    I = 0
346 |         #print((I/U)==iou)
347 |             
348 |         dX_wrt_t = -1 * (pred_r - pred_l);
349 |         dX_wrt_b = -dX_wrt_t;
350 |         dX_wrt_l = -1 * (pred_b - pred_t);
351 |         dX_wrt_r = -dX_wrt_l;
352 |         
353 |         dI_wrt_t = (pred_t > gt_t)*(-Iw)
354 |         dI_wrt_b = (pred_b > gt_b)*(Iw)
355 |         dI_wrt_l = (pred_l > gt_l)*(-Ih)
356 |         dI_wrt_r = (pred_r > gt_r)*(Ih)
357 | 
358 |         # derivative of U with regard to x
359 |         dU_wrt_t = dX_wrt_t - dI_wrt_t
360 |         dU_wrt_b = dX_wrt_b - dI_wrt_b
361 |         dU_wrt_l = dX_wrt_l - dI_wrt_l
362 |         dU_wrt_r = dX_wrt_r - dI_wrt_r  
363 |         
364 |         dC_wrt_t = (pred_t < gt_t)*(-1 * Cw)
365 |         dC_wrt_b = (pred_b > gt_b)*Cw
366 |         dC_wrt_l = (pred_l < gt_l)*(-1 * Ch) 
367 |         dC_wrt_r = (pred_r > gt_r)*Ch 
368 | 
369 |         p_dt = p_db = p_dl = p_dr = 0
370 |         if U > 0 :
371 |             p_dt = ((U * dI_wrt_t) - (I * dU_wrt_t)) / (U * U)
372 |             p_db = ((U * dI_wrt_b) - (I * dU_wrt_b)) / (U * U)
373 |             p_dl = ((U * dI_wrt_l) - (I * dU_wrt_l)) / (U * U)
374 |             p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U)
375 |             #p_dt = ((U+I) * dI_wrt_t)/ (U*I ) - (dX_wrt_t) / U 
376 |             #p_db = ((U+I) * dI_wrt_b)/ (U*I ) - (dX_wrt_t) / U 
377 |             #p_dl = ((U+I) * dI_wrt_l)/ (U*I ) - (dX_wrt_t) / U 
378 |             #p_dr = ((U+I) * dI_wrt_r)/ (U*I ) - (dX_wrt_t) / U 
379 |         if C > 0 :
380 |             # apply "C" term from gIOU
381 |             p_dt += ((C * dU_wrt_t) - (U * dC_wrt_t)) / (C * C);
382 |             p_db += ((C * dU_wrt_b) - (U * dC_wrt_b)) / (C * C);
383 |             p_dl += ((C * dU_wrt_l) - (U * dC_wrt_l)) / (C * C);
384 |             p_dr += ((C * dU_wrt_r) - (U * dC_wrt_r)) / (C * C);
385 |            
386 |         delta_x = ((p_dl + p_dr))
387 |         delta_y = ((p_dt + p_db))
388 |         delta_w = ((-0.5 * p_dl) + (0.5 * p_dr))
389 |         delta_h = ((-0.5 * p_dt) + (0.5 * p_db))
390 |         #tx,ty,tw,th,_ = self.DenseBoxLoss(gt_box,pred_box,grid_x,grid_y,anchors,in_w,in_h)
391 |         #print(output[...,0]-tx,delta_x)
392 |         if accumulate:
393 |             tx = (output[...,0] + delta_x*0.5).item()
394 |             ty = (output[...,1] + delta_y*0.5).item()
395 |             tw = (output[...,2] + (delta_w*torch.exp(input[...,2]))*0.5).item()
396 |             th = (output[...,3] + (delta_h*torch.exp(input[...,3]))*0.5).item()        
397 |         else :
398 |             tx = (input[...,0] + delta_x*0.5).item()
399 |             ty = (input[...,1] + delta_y*0.5).item()
400 |             tw = (input[...,2] + (delta_w*torch.exp(input[...,2]))*0.5).item()
401 |             th = (input[...,3] + (delta_h*torch.exp(input[...,3]))*0.5).item()
402 |         #print(tw,th)
403 |         #delta_w = delta_w*torch.exp(delta_w);
404 |         #delta_h = delta_h*torch.exp(delta_h);    
405 |         #print(p_dt,p_db,p_dl,p_dr)
406 |         #else :
407 |         #    tx,ty,tw,th,_ = self.DenseBoxLoss(gt_box,pred_box,grid_x,grid_y,anchors,in_w,in_h)
408 |         target = torch.Tensor([tx,ty,tw,th]).to(device)
409 |         return target,(2.0-Xhat),I/U
410 |         
411 |     def DenseBoxLoss(self,gt_box,pred_box,grid_x,grid_y,anchors,in_w,in_h):
412 |         w = gt_box[...,2] - gt_box[...,0]
413 |         h = gt_box[...,3] - gt_box[...,1]
414 |         x = gt_box[...,0] + w / 2
415 |         y = gt_box[...,1] + h / 2
416 |         tx = x * in_w - grid_x
417 |         ty = y * in_h - grid_y        
418 |         tw = torch.log(w/anchors[0])
419 |         th = torch.log(h/anchors[1])
420 |         #giou = self.box_giou(gt_box,pred_box)
421 |         weight = 2.0 - (w*h)
422 |         target = torch.Tensor([tx,ty,tw,th]).to(device)
423 |         iou = find_jaccard_overlap(gt_box, pred_box)
424 |         return target,weight,iou
425 |     def class_loss(self,target_cls,target_weight,cls_idx):
426 |         y_true = (1 - self.label_smooth_eps) + 0.5*self.label_smooth_eps;
427 |         y_false = 0.5*self.label_smooth_eps;
428 |         if target_weight[...,cls_idx]>0:
429 |             target_cls[...,cls_idx] = y_true
430 |             target_weight[...,cls_idx] = 1                       
431 |         else :
432 |             target_cls[...,0:self.num_classes] = y_false
433 |             target_weight[...,0:self.num_classes] = 1
434 |             target_cls[...,cls_idx] = y_true
435 |             #target_weight[cls_idx] = 1
436 | 
437 | 
438 | 
439 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | torch
 3 | torchvision
 4 | imgaug
 5 | lmdb
 6 | six
 7 | matplotlib
 8 | tqdm
 9 | nni
10 | opencv_python
11 | progress
12 | filetype
13 | msgpack_python
14 | Pillow
15 | PyYAML
16 | tensorboard
17 | 


--------------------------------------------------------------------------------
/save/00690c26-e4bbbd72_result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eric612/Mobilenet-YOLO-Pytorch/cd8d99425c51c3f37d03633302076bd94738f174/save/00690c26-e4bbbd72_result.jpg


--------------------------------------------------------------------------------
/scripts/VOC2007.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to data/ ..." 
11 |     mkdir -p data
12 |     cd data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2007 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
26 | echo "Downloading VOC2007 test data ..."
27 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
28 | echo "Done downloading."
29 | 
30 | # Extract data
31 | echo "Extracting trainval ..."
32 | tar -xvf VOCtrainval_06-Nov-2007.tar
33 | echo "Extracting test ..."
34 | tar -xvf VOCtest_06-Nov-2007.tar
35 | echo "removing tars ..."
36 | rm VOCtrainval_06-Nov-2007.tar
37 | rm VOCtest_06-Nov-2007.tar
38 | 
39 | end=`date +%s`
40 | runtime=$((end-start))
41 | 
42 | echo "Completed in" $runtime "seconds"


--------------------------------------------------------------------------------
/scripts/VOC2012.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Ellis Brown
 3 | 
 4 | start=`date +%s`
 5 | 
 6 | # handle optional download dir
 7 | if [ -z "$1" ]
 8 |   then
 9 |     # navigate to ~/data
10 |     echo "navigating to ~/data/ ..." 
11 |     mkdir -p data
12 |     cd data/
13 |   else
14 |     # check if is valid directory
15 |     if [ ! -d $1 ]; then
16 |         echo $1 "is not a valid directory"
17 |         exit 0
18 |     fi
19 |     echo "navigating to" $1 "..."
20 |     cd $1
21 | fi
22 | 
23 | echo "Downloading VOC2012 trainval ..."
24 | # Download the data.
25 | curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
26 | echo "Done downloading."
27 | 
28 | 
29 | # Extract data
30 | echo "Extracting trainval ..."
31 | tar -xvf VOCtrainval_11-May-2012.tar
32 | echo "removing tar ..."
33 | rm VOCtrainval_11-May-2012.tar
34 | 
35 | end=`date +%s`
36 | runtime=$((end-start))
37 | 
38 | echo "Completed in" $runtime "seconds"
39 | 


--------------------------------------------------------------------------------
/scripts/create.sh:
--------------------------------------------------------------------------------
1 | python3 folder2lmdb.py -d data/bdd100k.yaml
2 | 


--------------------------------------------------------------------------------
/scripts/inference.sh:
--------------------------------------------------------------------------------
1 | python3 inference.py --checkpoint checkpoints/bdd100k/model_best.pth.tar -y data/bdd100k.yaml -i images/00690c26-e4bbbd72.jpg


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
1 | python train.py --checkpoint checkpoints/voc/mobilenetv2/ -y data/voc_data.yaml


--------------------------------------------------------------------------------
/search_space.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "learning_rate":{"_type":"choice","_value":[0.0004,0.0005,0.0006,0.0007]},
 3 |     "ignore_thresh_1":{"_type":"uniform","_value":[0.6, 0.75]},
 4 |     "ignore_thresh_2":{"_type":"uniform","_value":[0.5, 0.65]},
 5 |     "iou_thresh":{"_type":"uniform","_value":[0.4, 0.6]},
 6 |     "expand_scale":{"_type":"uniform","_value":[1.0, 2.5]},   
 7 |     "mosaic_num":{"_type":"choice", "_value": [[1,4],[2,3,4]]},
 8 |     "weight_decay":{"_type":"choice","_value":[1e-2,4e-3,4e-4,4e-5]},
 9 |     "iou_weighting":{"_type":"uniform","_value":[0.005, 0.1]}
10 | }


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import shutil
  5 | import time
  6 | import warnings
  7 | import numpy as np
  8 | from progress.bar import (Bar, IncrementalBar)
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | import torch.nn.parallel
 13 | import torch.backends.cudnn as cudnn
 14 | import torch.distributed as dist
 15 | import torch.optim
 16 | import torch.multiprocessing as mp
 17 | import torch.utils.data
 18 | import torch.utils.data.distributed
 19 | import torchvision.transforms as transforms
 20 | import torchvision.datasets as datasets
 21 | import torchvision.models as models
 22 | import folder2lmdb
 23 | import CustomBatchSampler
 24 | import cv2
 25 | #from models.voc.mbv2_yolo import yolo
 26 | #from models.voc.yolo_loss import *
 27 | from models.mbv2_yolo import yolo
 28 | from models.yolo_loss import *
 29 | from utils import Bar, Logger, AverageMeter
 30 | from utils.eval_mAP import *
 31 | from pprint import PrettyPrinter
 32 | import yaml
 33 | import nni
 34 | from nni.utils import merge_parameter
 35 | from nni.trial import get_sequence_id
 36 | from nni.trial import get_trial_id
 37 | pp = PrettyPrinter()
 38 | from torch.utils.tensorboard import SummaryWriter 
 39 | 
 40 | def seed_worker(worker_id):
 41 |     worker_seed = torch.initial_seed() % 2**32
 42 |     np.random.seed(worker_seed)
 43 |     random.seed(worker_seed)         
 44 |     
 45 | def main(args):
 46 |     #print('NNI_OUTPUT_DIR',os.environ["NNI_OUTPUT_DIR"])
 47 |     #writer = SummaryWriter(os.environ["NNI_OUTPUT_DIR"]+'/tensorboard/')
 48 |     if 'NNI_OUTPUT_DIR' not in os.environ:
 49 |         writer = SummaryWriter('tensorboard/')
 50 |     else:
 51 |         writer = SummaryWriter(os.environ["NNI_OUTPUT_DIR"]+'/tensorboard/')
 52 |     #with open('models/voc/config.yaml', 'r') as f:
 53 | 
 54 |     #with open('data/voc_data.yaml', 'r') as f:
 55 |     with open(args.data_yaml, 'r') as f:
 56 |         dataset_path = yaml.load(f)
 57 |         classes_name = dataset_path["classes"]["map"]
 58 |         classes_name.insert(0, 'background')
 59 |         segmentation_enable = False
 60 |         segmentation_num_classes = 0
 61 |         print(dataset_path)
 62 |         if "segmentation_enable" in dataset_path:
 63 |             segmentation_enable = dataset_path["segmentation_enable"]       
 64 |         if "segmentation_num_classes" in dataset_path:
 65 |             segmentation_num_classes = dataset_path["segmentation_num_classes"]	
 66 |             
 67 |     with open(dataset_path["model_config_path"], 'r') as f:
 68 |         config = yaml.load(f)        
 69 |     if args.ignore_thresh_1 != None :
 70 |         config["yolo"]["ignore_thresh"][0] = args.ignore_thresh_1 
 71 |     if args.ignore_thresh_2 != None :
 72 |         config["yolo"]["ignore_thresh"][1] = args.ignore_thresh_2
 73 |     if args.iou_thresh != None :
 74 |         config["yolo"]["iou_thresh"] = args.iou_thresh 
 75 |     if args.expand_scale != None :
 76 |         config["expand_scale"] = args.expand_scale 
 77 |     if args.mosaic_num != None :
 78 |         config["mosaic_num"] = args.mosaic_num 
 79 |     if args.iou_weighting != None :
 80 |         config["iou_weighting"] = args.iou_weighting         
 81 |     print(config)
 82 |     best_acc = 0  # best test accuracy
 83 |     #args = parser.parse_args()
 84 |     start_epoch = 0
 85 | 
 86 |     image_folder = folder2lmdb.ImageFolderLMDB
 87 | 
 88 |     train_dataset = image_folder(
 89 |         db_path=dataset_path["trainval_dataset_path"]["lmdb"],
 90 |         transform_size=config["train_img_size"],
 91 |         phase='train',batch_size = config["batch_size"],
 92 |         expand_scale=config["expand_scale"],
 93 |         mean = config["normalize"]["mean"],
 94 |         std = config["normalize"]["std"],
 95 |         has_seg = segmentation_enable,
 96 |         classes_name = classes_name,
 97 |         seg_num_classes = segmentation_num_classes
 98 |     )
 99 |        
100 |     test_dataset = image_folder(
101 |         db_path=dataset_path["test_dataset_path"]["lmdb"],
102 |         transform_size=[[config["img_w"],config["img_h"]]],
103 |         phase='test',batch_size = config["batch_size"],
104 |         mean = config["normalize"]["mean"],
105 |         std = config["normalize"]["std"],
106 |         has_seg = False,
107 |         classes_name = classes_name,
108 |         seg_num_classes = segmentation_num_classes		        
109 |     )    
110 |     BatchSampler  = CustomBatchSampler.GreedyBatchSampler                     
111 |     sampler = BatchSampler (
112 |         torch.utils.data.sampler.RandomSampler(train_dataset),
113 |         batch_size=config["batch_size"],
114 |         drop_last=False,sample=config["mosaic_num"])
115 |     train_loader = torch.utils.data.DataLoader(
116 |         train_dataset,batch_sampler = sampler, 
117 |         num_workers=4, pin_memory=False,collate_fn=train_dataset.collate_fn,
118 |         worker_init_fn=seed_worker)
119 |     test_loader = torch.utils.data.DataLoader(
120 |         test_dataset, config["batch_size"], shuffle=False,
121 |         num_workers=4, pin_memory=False,collate_fn=test_dataset.collate_fn) 
122 |     model = yolo(config=config)
123 |     #model_for_graph = yolo_graph(config=config)        
124 |     #input = torch.randn(1, 3, 352, 352)
125 |     #writer.add_graph(model_for_graph,input)
126 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
127 | 
128 |     model = model.cuda()
129 |     # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
130 |     biases = list()
131 |     not_biases = list()
132 | 
133 |     params = model.parameters()
134 |     optimizer = optim.AdamW(params=params,lr = args.learning_rate,weight_decay= args.weight_decay)  
135 |     if not os.path.exists(args.checkpoint):
136 |         os.makedirs(args.checkpoint)    
137 |     title = 'voc-training-process'
138 |     if args.resume:
139 |         # Load checkpoint.
140 |         print('==> Resuming from checkpoint..')
141 |         print(args.resume)
142 |         assert os.path.isfile(args.resume), 'Error: no checkpoint directory found!'
143 |         args.checkpoint = os.path.dirname(args.resume)
144 |         checkpoint = torch.load(args.resume)
145 |         best_acc = checkpoint['best_acc']
146 |         start_epoch = checkpoint['epoch']
147 |         model.load_state_dict(checkpoint['model'])
148 |         optimizer.load_state_dict(checkpoint['optimizer'])
149 |         model.yolo_losses[0].val_conf = checkpoint['conf'] 
150 |         model.yolo_losses[1].val_conf = checkpoint['conf'] 
151 |         logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
152 |         #for param_group in optimizer.param_groups:
153 |         #    param_group['lr'] = args.lr
154 |     else:
155 |         logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
156 |         logger.set_names(['Epoch    ', 'Loss     ', 'Precision ', 'Time      ', 'IOU      ', 'Learning Rate'])
157 |     test_acc = 0 
158 |     if args.evaluate:
159 |         for epoch in range(1):
160 |             test_acc = test(test_loader, model, optimizer, epoch , config, classes_name)
161 |         return
162 |         
163 |     #ls = len(args.warm_up)
164 |     for epoch in range(start_epoch, args.epochs):
165 |         if epoch in args.warm_up:
166 |             adjust_learning_rate(optimizer, 0.5)
167 |     st = time.time()
168 |     for epoch in range(start_epoch, args.epochs):
169 |         # train for one epoch   
170 |         if epoch in args.warm_up: 
171 |             adjust_learning_rate(optimizer, 2)
172 |         if epoch in args.schedule:
173 |             #load_best_checkpoint(model=model, save_path=args.save_path)
174 |            
175 |             save_checkpoint({
176 |                     'epoch': epoch ,
177 |                     'model': model.state_dict(),
178 |                     'acc': test_acc,
179 |                     'best_acc': best_acc,
180 |                     'optimizer' : optimizer.state_dict(),
181 |                     'conf' : model.yolo_losses[0].val_conf,
182 |                 }, False,model,config, checkpoint=args.checkpoint,filename='epoch%d_checkpoint.pth.tar'%epoch,export_path = args.export) 
183 |             adjust_learning_rate(optimizer, 0.5)
184 |             print('adjusted to current lr: '
185 |                   '{}'.format([param_group['lr'] for param_group in optimizer.param_groups]))  
186 |             
187 |         log = False
188 |         
189 |         if epoch%2 == 0 :
190 |             log = True 
191 |             st = time.time()
192 |             if segmentation_enable:
193 |                 print('\nEpoch: [%3d | %3d] LR: %f        | loss   | cnt  | iou   | obj   | no_obj | class | recall | s_obj | s_no_obj |' \
194 |                         % (epoch, args.epochs, optimizer.param_groups[0]['lr']))
195 |             else:
196 |                 print('\nEpoch: [%3d | %3d] LR: %f        | loss   | cnt | iou   | obj   | no_obj | class | recall | cnt2 | iou2  | obj2  | no_obj2 | class2 | recall2 |' \
197 |                         % (epoch, args.epochs, optimizer.param_groups[0]['lr']))                
198 |         
199 |         train_loss,iou = train(train_loader, model, optimizer, epoch,sampler,segmentation_enable)
200 |         writer.add_scalar('Loss/train', train_loss, epoch)
201 |         writer.add_scalar('iou/train', iou, epoch)
202 |         if not log :
203 |             test_acc = test(test_loader, model, optimizer, epoch , config, classes_name,segmentation_enable)  
204 |             nni.report_intermediate_result(test_acc)
205 |             logger.append([epoch + 1, train_loss , test_acc, time.time()-st,iou, optimizer.param_groups[0]['lr']])
206 |             # save model
207 |             is_best = test_acc > best_acc
208 |             best_acc = max(test_acc, best_acc) 
209 |             save_checkpoint({
210 |                     'epoch': epoch + 1,
211 |                     'model': model.state_dict(),
212 |                     'acc': test_acc,
213 |                     'best_acc': best_acc,
214 |                     'optimizer' : optimizer.state_dict(),
215 |                     'conf' : model.yolo_losses[0].val_conf,
216 |                 }, is_best,model,config, checkpoint=args.checkpoint,export_path = args.export)
217 |             writer.add_scalar('Accuracy/test', test_acc, epoch+ 1)
218 |         else :
219 |             save_checkpoint({
220 |                     'epoch': epoch + 1,
221 |                     'model': model.state_dict(),
222 |                     'acc': test_acc,
223 |                     'best_acc': best_acc,
224 |                     'optimizer' : optimizer.state_dict(),
225 |                     'conf' : model.yolo_losses[0].val_conf,
226 |                 }, False,model,config, checkpoint=args.checkpoint,export_path = args.export)            
227 |             
228 |     nni.report_final_result(best_acc)
229 | def train(train_loader, model, optimizer,epoch,sampler,segmentation_enable):
230 |     model.train()
231 |     bar = IncrementalBar('Training', max=len(sampler),width=12)
232 |     #batch_time = AverageMeter()
233 |     #data_time = AverageMeter()
234 |     losses = AverageMeter()
235 |     recall = [AverageMeter(),AverageMeter()]
236 |     iou = [AverageMeter(),AverageMeter()]
237 |     obj = [AverageMeter(),AverageMeter()]
238 |     no_obj = [AverageMeter(),AverageMeter()]
239 |     conf_loss = [AverageMeter(),AverageMeter()]
240 |     cls_loss = [AverageMeter(),AverageMeter()]
241 |     cls_score = [AverageMeter(),AverageMeter()]
242 |     count = [AverageMeter(),AverageMeter()]
243 |     seg_obj = AverageMeter()
244 |     seg_no_obj = AverageMeter()
245 |     #end = time.time()
246 |     for batch_idx, (images,targets,total_num,seg_maps) in enumerate(train_loader):
247 |         #print('\n1-',sum(sampler.get_mosaic_array()),'\n')
248 |         #print('1-',sampler.mosaic_array,'\n')
249 |         #print(targets)
250 |         #data_time.update(time.time() - end)
251 |         bs = images.size(0)
252 |         #print(images.shape)
253 |         #print(i,targets[0])
254 |         optimizer.zero_grad()
255 |         images = images.to(device)  # (batch_size (N), 3, H, W)
256 |         if segmentation_enable:
257 |             seg_maps = seg_maps.to(device)  # (batch_size (N), H, W, num seg class)
258 |             outputs,seg_out = model(images,targets,seg_maps)
259 |         else:
260 |             outputs = model(images,targets,seg_maps)
261 |         #losses0 = yolo_losses[0](outputs[0],targets)
262 |         #losses1 = yolo_losses[1](outputs[1],targets) 
263 |         t_loss = list()
264 | 
265 |         for i,l in enumerate(outputs):
266 |             #print(l[0])
267 |             t_loss.append(l[0])  
268 |             recall[i].update(l[1])
269 |             iou[i].update(l[2])
270 |             obj[i].update(l[3])
271 |             no_obj[i].update(l[4])
272 |             cls_score[i].update(l[5])
273 |             count[i].update(l[6])
274 |             #conf_loss.update(l[5])
275 |             #cls_loss.update(l[6])
276 |         loss = sum(t_loss)
277 |         if segmentation_enable:
278 |             seg_obj.update(seg_out[1])
279 |             seg_no_obj.update(seg_out[2])
280 |             loss += seg_out[0]
281 |         losses.update(loss.item(),bs)
282 |         loss.backward()
283 |         optimizer.step()
284 |         # measure elapsed time
285 |         #batch_time.update(time.time() - end)
286 |         #end = time.time()     
287 |         if segmentation_enable:
288 |             bar.suffix  = \
289 |                 '%(percent)3d%% | {total:} | {loss:.4f} | {cnt:2.1f} | {iou:.3f} | {obj:.3f} | {no_obj:.4f} | {cls:.3f} | {rec:.4f} | {seg_obj:.3f} | {seg_no_obj:.6f} |'\
290 |                 .format(
291 |                 total=bar.elapsed_td,
292 |                 loss=losses.avg,
293 |                 cnt=(count[0].avg+count[1].avg),
294 |                 iou=(iou[0].avg+iou[1].avg)/2.,
295 |                 obj=(obj[0].avg+obj[1].avg)/2.,
296 |                 no_obj=(no_obj[0].avg+no_obj[1].avg)/2.,
297 |                 cls=(cls_score[0].avg+cls_score[1].avg)/2.,
298 |                 rec=(recall[0].avg+recall[1].avg)/2.,
299 |                 seg_obj=seg_obj.avg,
300 |                 seg_no_obj = seg_no_obj.avg
301 |                 )                
302 |         else:
303 |             bar.suffix  = \
304 |                 '%(percent)3d%% | {total:} | {loss:.4f} | {cnt1:2.1f} | {iou1:.3f} | {obj1:.3f} | {no_obj1:.4f} | {cls1:.3f} | {rec1:.3f}  | {cnt2:2.1f}  | {iou2:.3f} | {obj2:.3f} | {no_obj2:.4f}  | {cls2:.3f}  | {rec2:.3f}   |'\
305 |                 .format(
306 |                 #batch=batch_idx + 1,
307 |                 #size=len(train_loader),
308 |                 #data=data_time.avg,
309 |                 #bt=batch_time.avg,
310 |                 total=bar.elapsed_td,
311 |                 loss=losses.avg,
312 |                 #loss1=losses[0].avg,
313 |                 #loss2=losses[1].avg,
314 |                 cnt1=(count[0].avg),
315 |                 cnt2=(count[1].avg),
316 |                 #recall=recall.avg,
317 |                 iou1=iou[0].avg,
318 |                 iou2=iou[1].avg,
319 |                 obj1=obj[0].avg,
320 |                 no_obj1=no_obj[0].avg,
321 |                 cls1=cls_score[0].avg,
322 |                 obj2=obj[1].avg,
323 |                 no_obj2=no_obj[1].avg,
324 |                 cls2=cls_score[1].avg,
325 |                 rec1=recall[0].avg,
326 |                 rec2=recall[1].avg,
327 |                 #cls=cls_loss.avg,
328 |                 )              
329 |         bar.next(total_num)
330 |     bar.finish()
331 |     return losses.avg,(iou[0].avg+iou[1].avg)/2
332 |     
333 | def test(test_loader, model, optimizer,epoch , config, classes_name,segmentation_enable):
334 |     
335 |     # switch to evaluate mode
336 |     model.eval()
337 |     n_classes = config['yolo']['classes'];
338 |     
339 |     end = time.time()
340 |     #bar = Bar('Validating', max=len(test_loader))
341 |     bar = IncrementalBar('Validating', max=len(test_loader),width=32)
342 |     #for batch_idx, (inputs, targets) in enumerate(testloader):
343 |     n_gt = [0]*n_classes
344 |     correct = [0]*n_classes
345 |     n_pred = [0]*n_classes
346 |     n_iou = [0]*n_classes
347 |     n_images = 0
348 |     det_boxes = list()
349 |     det_labels = list()
350 |     det_scores = list()
351 |     true_boxes = list()
352 |     true_labels = list()
353 |     true_difficulties = list() 
354 |     gt_box = 0
355 |     pred_box = 0
356 | 
357 |     for batch_idx, (images,targets) in enumerate(test_loader):
358 |         images = images.to(device)  # (batch_size (N), 3, H, W)      
359 |         labels = [torch.Tensor(l).to(device) for l in targets] 
360 |         bs = len(labels)
361 |         # compute output
362 |         with torch.no_grad():
363 |             if segmentation_enable:
364 |                 detections,_ = model(images)  # (N, num_defaultBoxes, 4), (N, num_defaultBoxes, n_classes)
365 |             else:
366 |                 detections = model(images)  # (N, num_defaultBoxes, 4), (N, num_defaultBoxes, n_classes)
367 |             for sample_i in range(bs):
368 |                 
369 |                 # Get labels for sample where width is not zero (dummies)
370 |                 # print(len(labels[0]),labels[sample_i])
371 |                 target_sample = labels[sample_i]
372 |                 gt_box = gt_box + len(target_sample)
373 |                 tx1, tx2 = torch.unsqueeze((target_sample[...,1] - target_sample[...,3] / 2),1), torch.unsqueeze((target_sample[...,1] + target_sample[...,3] / 2),1)
374 |                 ty1, ty2 = torch.unsqueeze((target_sample[...,2] - target_sample[...,4] / 2),1), torch.unsqueeze((target_sample[...,2] + target_sample[...,4] / 2),1)
375 |                 box = torch.cat((tx1,ty1,tx2,ty2),1)
376 |                 size = target_sample.size(0)
377 |  
378 |                 true_boxes.append(box)
379 |                 true_labels.append(target_sample[...,0])
380 |                 true_difficulties.append(torch.zeros(size, requires_grad=False))
381 |                 #print(detections[0][sample_i].shape,detections[1][sample_i].shape)
382 |                 preds = detections[sample_i]
383 |                 pred_box = pred_box + len(preds)
384 |                 if preds is not None:                                
385 |                     det_boxes.append(preds[...,:4])
386 |                     det_labels.append((preds[...,6]+1).to(device))
387 |                     conf = (preds[...,4] * preds[...,5]).to(device)
388 |                     det_scores.append(conf)
389 |                 else :
390 |                     empty = torch.empty(0).to(device)
391 |                     det_boxes.append(empty)
392 |                     det_labels.append(empty)
393 |                     det_scores.append(empty)
394 |                 
395 |                 n_images = n_images + 1  
396 |             
397 | 
398 |         # measure elapsed time
399 |         sum_gt = sum(n_gt)
400 |         sum_n_pred= sum(n_pred)
401 |         # plot progress
402 |         bar.suffix  = '({batch}/{size}) | Total: {total:} | ETA: {eta:}| n_img: {n_img:} | gt_box: {gt_box:} | pred_box: {pred_box:}'.format(
403 |                     batch=batch_idx + 1,
404 |                     size=len(test_loader),
405 | 
406 |                     total=bar.elapsed_td,
407 |                     eta=bar.eta_td,
408 |                     n_img=n_images,
409 |                     gt_box=gt_box,
410 |                     pred_box=pred_box
411 |                     )
412 |         bar.next()
413 |         #if batch_idx == 50:
414 |         #    break        
415 |     bar.finish()
416 |     print("\nVal conf. is %f\n" % (model.yolo_losses[0].val_conf))
417 |     model.yolo_losses[0].val_conf = adjust_confidence(gt_box,pred_box,model.yolo_losses[0].val_conf)
418 |     model.yolo_losses[1].val_conf = adjust_confidence(gt_box,pred_box,model.yolo_losses[1].val_conf)
419 |     
420 |     # Calculate mAP
421 |     APs, mAP, TP, FP = calculate_mAP(det_boxes, det_labels, det_scores, true_boxes, true_labels, true_difficulties, classes_name)
422 |     pp.pprint(APs)
423 |     print('\nMean Average Precision (mAP): %.3f' % mAP)
424 |     return mAP
425 | def save_checkpoint(state, is_best,model,config, checkpoint='checkpoint', filename='checkpoint.pth.tar',export_path = 'checkpoint'):
426 | 
427 |     filepath = os.path.join(checkpoint, filename)
428 |     torch.save(state, filepath)
429 |     #save_onnx(filepath,model)
430 |     if is_best:
431 |         torch.save(model, os.path.join(checkpoint, 'model_best.pth.tar'))
432 |         #dummy_input = torch.randn(1, 3, config["img_w"], config["img_h"]) #       
433 |         #torch.onnx.export(model, dummy_input,os.path.join(export_path, 'model_best.onnx'))        
434 | def adjust_confidence(gt_box_num,pred_box_num,conf):
435 |     if pred_box_num>gt_box_num*3 :
436 |         conf = conf + 0.01
437 |     elif pred_box_num<gt_box_num*2 and conf>0.01:
438 |         conf = conf - 0.01
439 |     
440 |     return conf
441 | def adjust_learning_rate(optimizer, scale):
442 |     """
443 |     Scale learning rate by a specified factor.
444 | 
445 |     :param optimizer: optimizer whose learning rate must be shrunk.
446 |     :param scale: factor to multiply learning rate with.
447 |     """
448 |     for param_group in optimizer.param_groups:
449 |         param_group['lr'] = param_group['lr'] * scale
450 |     print("Change learning rate.\n The new LR is %f\n" % (optimizer.param_groups[0]['lr']))  
451 |     
452 | def get_params():
453 |     # Training settings     
454 |     parser = argparse.ArgumentParser(description='PyTorch Training')
455 |     parser.add_argument('-y', '--data_yaml', dest='data_yaml', default='data/voc_data.yaml', type=str, metavar='PATH',
456 |                         help='path to data_yaml')     
457 |     parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
458 |                         help='momentum')
459 |     parser.add_argument('--weight-decay', '--wd', default=0.0004, type=float,
460 |                         metavar='W', help='weight decay (default: 1e-4)')
461 |     parser.add_argument('--learning_rate', default=0.0007, type=float,
462 |                         metavar='LR', help='initial learning rate') 
463 |     parser.add_argument('--warm-up', '--warmup',  default=[], type=float,
464 |                         metavar='warmup', help='warm up learning rate')                    
465 |     parser.add_argument('--epochs', default=300, type=int, metavar='N',
466 |                         help='number of total epochs to run')
467 |     parser.add_argument('--schedule', type=int, nargs='+', default=[100,170,240],
468 |                             help='Decrease learning rate at these epochs.')
469 |     parser.add_argument('--resume', default='', type=str, metavar='PATH',
470 |                         help='path to latest checkpoint (default: none)')
471 |     parser.add_argument('-c', '--checkpoint', default='checkpoint', type=str, metavar='PATH',
472 |                         help='path to save checkpoint (default: checkpoint)')
473 |     #parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
474 |     #                    help='evaluate model on validation set')
475 |     parser.add_argument('-o', '--export', dest='export', default='checkpoint', type=str, metavar='PATH',
476 |                         help='path to export checkpoint (default: checkpoint)')                   
477 |     parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='Evaluate mAP? default=False')  
478 |     parser.add_argument('--mosaic_num',  default=None, type=int, help='mosaic number in image augmentation')
479 |     parser.add_argument('--ignore_thresh_1',  default=None, type=float, help='ignore layer 1')
480 |     parser.add_argument('--ignore_thresh_2',  default=None, type=float, help='ignore layer 2')
481 |     parser.add_argument('--iou_thresh',  default=None, type=float, help='ignore iou thresh')
482 |     parser.add_argument('--expand_scale',  default=None, type=float, help='image augmentation expand scale')
483 |     parser.add_argument('--iou_weighting',  default=None, type=float, help='iou loss weighting')
484 |     args = parser.parse_args()
485 |     return args    
486 |     
487 | if __name__ == '__main__':
488 |     try:
489 |         # get parameters form tuner
490 |         tuner_params = nni.get_next_parameter()
491 |         #logger.debug(tuner_params)
492 |         print(tuner_params)
493 | 
494 |         params = merge_parameter(get_params(), tuner_params)
495 |         id = get_sequence_id() 
496 |         #params.checkpoint = 'checkpoints/%d' % id
497 |         #print(params)
498 |         
499 |         main(params)
500 |     except Exception as exception:
501 |         #logger.exception(exception)
502 |         raise
503 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Useful utils
2 | """
3 | from .misc import *
4 | from .logger import *
5 | 
6 | # progress bar
7 | import os, sys
8 | sys.path.append(os.path.join(os.path.dirname(__file__), "progress"))
9 | from progress.bar import Bar as Bar


--------------------------------------------------------------------------------
/utils/box.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 5 | 
 6 | def wh_to_x2y2(bbox):
 7 |     bbox[...,0] = bbox[...,0] - bbox[...,2]/2
 8 |     bbox[...,1] = bbox[...,1] - bbox[...,3]/2
 9 |     bbox[...,2] = bbox[...,2] + bbox[...,0]
10 |     bbox[...,3] = bbox[...,3] + bbox[...,1]
11 | def nms(preds,num_classes) :
12 |     nms_preds = list()
13 |     assert len(preds) == 2 #only do two layers yolo 
14 |     assert len(preds[0]) == len(preds[1])
15 |     bs = len(preds[0])
16 |     for b in range(bs):
17 |         pred_per_img = torch.cat((preds[0][b],preds[1][b]),0)
18 |         pred_boxes = torch.zeros(0,7, requires_grad=False).to(device)
19 |         if pred_per_img.size(0):
20 |             for i in range(num_classes) :                       
21 |                 mask = (pred_per_img[...,6] == i)                    
22 |                 pred_this_cls =  pred_per_img[mask]
23 |                 
24 |                 if pred_this_cls.size(0):
25 |                     #print(pred_this_cls.shape,pred_per_img.shape)
26 |                     boxes = pred_this_cls[...,:4]
27 |                     scores = pred_this_cls[...,5]*pred_this_cls[...,4]
28 |                     index = torchvision.ops.nms(boxes,scores,0.45)            
29 |                     pred_boxes = torch.cat((pred_boxes,pred_this_cls[index]),0)
30 |         nms_preds.append(pred_boxes)
31 |     return nms_preds        


--------------------------------------------------------------------------------
/utils/eval_mAP.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from utils.iou import *
  3 | import torch.multiprocessing as mp
  4 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  5 | from itertools import product
  6 | import time
  7 | 
  8 | def eval_single_image_recall(this_true_labels,this_det_labels,true_box,true_difficultie,det_box,det_score):
  9 |     #print(true_boxes[num].shape)
 10 |     n_easy_object = 0
 11 |     #this_true_labels = (true_label == c)
 12 |     #this_det_labels = (det_label == c)
 13 |     #print(this_true_labels)
 14 |     true_class_boxes = true_box[this_true_labels]
 15 | 
 16 |     true_class_difficulties = true_difficultie[this_true_labels]
 17 |     n_easy_object += (1 - true_class_difficulties).sum()  # ignore difficult objects
 18 |     
 19 |     
 20 |     det_class_boxes = det_box[this_det_labels]  # (n_class_detections, 4)
 21 |     det_class_scores = det_score[this_det_labels]  # (n_class_detections)  
 22 |     n_class_detections = det_class_boxes.size(0)
 23 | 
 24 |     true_positive = torch.zeros((n_class_detections), dtype=torch.float).to(device)  # (n_class_detections)
 25 |     false_positive = torch.zeros((n_class_detections), dtype=torch.float).to(device)  # (n_class_detections)
 26 |     if n_class_detections == 0:
 27 |         #sharedlist.append([true_positive,false_positive,n_easy_object,det_class_scores])
 28 |         return (true_positive,false_positive,n_easy_object,det_class_scores)
 29 |         #print(true_positive,false_positive,n_easy_object)
 30 |         #return true_positive,false_positive,n_easy_object,det_class_scores
 31 |     true_class_boxes_detected = torch.zeros((true_class_difficulties.size(0)), dtype=torch.uint8).to(device)  # (n_class_objects) 
 32 |     for d in range(n_class_detections):
 33 |         this_detection_box = det_class_boxes[d].unsqueeze(0)  # (1, 4)
 34 |         object_boxes = true_class_boxes
 35 | 
 36 |         object_difficulties = true_class_difficulties
 37 |         if object_boxes.size(0) == 0:
 38 |             false_positive[d] = 1
 39 |             continue
 40 |         # Find maximum overlap of this detection with objects in this image of this class
 41 |         overlaps = find_jaccard_overlap(this_detection_box, object_boxes)  # (1, n_class_objects_in_img)
 42 |         max_overlap, ind = torch.max(overlaps.squeeze(0), dim=0)  # (), () - scalars
 43 |         
 44 |         
 45 |         # 'ind' is the index of the object in these image-level tensors 'object_boxes', 'object_difficulties'
 46 |         # In the original class-level tensors 'true_class_boxes', etc., 'ind' corresponds to object with index...
 47 |         original_ind = torch.LongTensor(range(true_class_boxes.size(0)))[ind]
 48 |         # We need 'original_ind' to update 'true_class_boxes_detected'
 49 |         
 50 |         # If the maximum overlap is greater than the threshold of 0.5, it's a match
 51 |         if max_overlap.item() > 0.5:
 52 |             # If the object it matched with is 'difficult', ignore it
 53 |             if object_difficulties[ind] == 0:
 54 |             # If this object has already not been detected, it's a true positive
 55 |                 if true_class_boxes_detected[original_ind] == 0:
 56 |                     true_positive[d] = 1
 57 |                     true_class_boxes_detected[original_ind] = 1  # this object has now been detected/accounted for
 58 |                 # Otherwise, it's a false positive (since this object is already accounted for)
 59 |                 else:
 60 |                     false_positive[d] = 1
 61 |         # Otherwise, the detection occurs in a different location than the actual object, and is a false positive
 62 |         else:
 63 |             false_positive[d] = 1                
 64 |     #sharedlist.append([true_positive,false_positive,n_easy_object,det_class_scores])
 65 |     return (true_positive,false_positive,n_easy_object,det_class_scores)
 66 |     #print(true_positive,false_positive,n_easy_object)
 67 |     #return true_positive,false_positive,n_easy_object,det_class_scores
 68 |     
 69 | def eval_class_ap(c,num_of_imgs,true_labels,det_labels,true_boxes,true_difficulties,det_boxes,det_scores):
 70 |     n_easy_class_objects = 0
 71 |     true_positives = torch.zeros(0, dtype=torch.float).to(device)  # (n_class_detections)
 72 |     false_positives = torch.zeros(0, dtype=torch.float).to(device)  # (n_class_detections) 
 73 |     det_class_scores_all = torch.zeros(0, dtype=torch.float).to(device)  # (n_class_detections) 
 74 |     #ctx = mp.get_context('spawn')
 75 |     #pool = ctx.Pool(processes=4)
 76 |     #class_labels = [c] * num_of_imgs
 77 |     #manager = ctx.Manager()
 78 |     #sharedlist= manager.list() 
 79 |     '''
 80 |     data = list()
 81 |     for class_label,true_label,det_label,true_boxe,true_difficultie,det_boxe,det_score in zip(class_labels,true_labels,det_labels,true_boxes,true_difficulties,det_boxes,det_scores):
 82 |         data.append([c,class_label,true_label,det_label,true_boxe,true_difficultie,det_boxe,det_score])
 83 |     results = pool.map(eval_single_image_recall,data)
 84 |     pool.close()
 85 |     pool.join()
 86 |     for result in results:
 87 |         true_positives = torch.cat((true_positives,result[0]),0)
 88 |         false_positives = torch.cat((false_positives,result[1]),0)
 89 |         n_easy_class_objects += result[2]
 90 |         det_class_scores_all = torch.cat((det_class_scores_all,result[3]),0)   
 91 |     '''
 92 |     
 93 |     for num in range(num_of_imgs):
 94 |         #print(true_boxes[num].shape)
 95 |         #eval_single_image_recall(sharedlist,c,true_labels[num],det_labels[num],true_boxes[num],true_difficulties[num],det_boxes[num],det_scores[num])
 96 |         true_positive,false_positive,n_easy_object,det_class_scores = eval_single_image_recall((true_labels[num] == c) ,(det_labels[num] == c) ,true_boxes[num],true_difficulties[num],det_boxes[num],det_scores[num])
 97 |         true_positives = torch.cat((true_positives,true_positive),0)
 98 |         false_positives = torch.cat((false_positives,false_positive),0)
 99 |         n_easy_class_objects += n_easy_object
100 |         det_class_scores_all = torch.cat((det_class_scores_all,det_class_scores),0)
101 |     '''
102 |     for idx,(true_positive,false_positive,n_easy_object,det_class_scores) in enumerate(sharedlist):
103 |         true_positives = torch.cat((true_positives,true_positive),0)
104 |         false_positives = torch.cat((false_positives,false_positive),0)
105 |         n_easy_class_objects += n_easy_object
106 |         det_class_scores_all = torch.cat((det_class_scores_all,det_class_scores),0)        
107 |     '''
108 |     # Compute cumulative precision and recall at each detection in the order of decreasing scores
109 |     #print(true_positives.shape)
110 |     det_class_scores_all, sort_ind = torch.sort(det_class_scores_all, dim=0, descending=True)  # (n_class_detections)
111 | 
112 |     true_positives = true_positives[sort_ind]  # (n_class_detections)
113 |     false_positives = false_positives[sort_ind]  # (n_class_detections, 4)    
114 |     n_sum_true_positive = torch.sum(true_positives)
115 |     n_sum_false_positive = torch.sum(false_positives)
116 |     cumul_true_positives = torch.cumsum(true_positives, dim=0)  # (n_class_detections)
117 |     cumul_false_positives = torch.cumsum(false_positives, dim=0)  # (n_class_detections)
118 |     cumul_precision = cumul_true_positives / (
119 |             cumul_true_positives + cumul_false_positives + 1e-10)  # (n_class_detections)
120 |     cumul_recall = cumul_true_positives / n_easy_class_objects  # (n_class_detections)
121 | 
122 |     # Find the mean of the maximum of the precisions corresponding to recalls above the threshold 't'
123 |     recall_thresholds = torch.arange(start=0, end=1.1, step=.1).tolist()  # (11)
124 |     precisions = torch.zeros((len(recall_thresholds)), dtype=torch.float).to(device)  # (11)
125 |     for i, t in enumerate(recall_thresholds):
126 |         recalls_above_t = cumul_recall >= t
127 |         if recalls_above_t.any():
128 |             precisions[i] = cumul_precision[recalls_above_t].max()
129 |         else:
130 |             precisions[i] = 0.    
131 |            
132 |     return precisions.mean().item(),n_sum_true_positive,n_sum_false_positive
133 |     
134 | def calculate_mAP(det_boxes, det_labels, det_scores, true_boxes, true_labels, true_difficulties,classes_name):
135 |     start_time  = time.process_time()
136 |     n_classes = len(classes_name) 
137 |     #print(n_classes)
138 |     classes_map = {k: v  for v, k in enumerate(classes_name)}
139 |     #classes_map['background'] = 0
140 |     od_classes_map = {v: k for k, v in classes_map.items()}  # Inverse mapping
141 |     
142 |     """
143 |     Calculate the Mean Average Precision (mAP) of detected objects.
144 |     See https://medium.com/@jonathan_hui/map-mean-average-precision-for-object-detection-45c121a31173 for an explanation
145 |     :param det_boxes: list of tensors, one tensor for each image containing detected objects' bounding boxes
146 |     :param det_labels: list of tensors, one tensor for each image containing detected objects' labels
147 |     :param det_scores: list of tensors, one tensor for each image containing detected objects' labels' scores
148 |     :param true_boxes: list of tensors, one tensor for each image containing actual objects' bounding boxes
149 |     :param true_labels: list of tensors, one tensor for each image containing actual objects' labels
150 |     :param true_difficulties: list of tensors, one tensor for each image containing actual objects' difficulty (0 or 1)
151 |     :return: list of average precisions for all classes, mean average precision (mAP)
152 |     """
153 |     #print(len(det_boxes),len(det_labels),len(det_scores),len(true_boxes),len(true_labels),len(true_difficulties))
154 |     assert len(det_boxes) == len(det_labels) == len(det_scores) == len(true_boxes) == len(
155 |         true_labels) == len(
156 |         true_difficulties)  # these are all lists of tensors of the same length, i.e. number of images
157 |     num_of_imgs = len(det_boxes)
158 |     # print(len(det_boxes), len(det_labels), len(det_scores), len(true_boxes), len(true_labels), len(true_difficulties))
159 | 
160 |     # Store all (true) objects in a single continuous tensor while keeping track of the image it is from
161 | 
162 |     # Calculate APs for each class (except background)
163 |     average_precisions = torch.zeros((n_classes - 1), dtype=torch.float)  # (n_classes - 1)
164 |     class_true_positive = torch.zeros((n_classes - 1), dtype=torch.float)  # (n_classes - 1)
165 |     class_false_positive = torch.zeros((n_classes - 1), dtype=torch.float)  # (n_classes - 1)
166 |     
167 |     for c in range(1, n_classes):
168 |         precision,n_sum_true_positive,n_sum_false_positive = eval_class_ap(c,num_of_imgs,true_labels,det_labels,true_boxes,true_difficulties,det_boxes,det_scores)
169 | 
170 |         average_precisions[c - 1] = precision
171 |         class_true_positive[c - 1] = n_sum_true_positive
172 |         class_false_positive[c - 1] = n_sum_false_positive
173 |         
174 |         #n_easy_class_objects = int(n_easy_class_objects)
175 |     # Calculate Mean Average Precision (mAP)
176 |     
177 |     mean_average_precision = average_precisions.mean().item()
178 | 
179 |     # Keep class-wise average precisions in a dictionary
180 |     average_precisions = {od_classes_map[c + 1]: v for c, v in enumerate(average_precisions.tolist())}
181 |     class_true_positive = {od_classes_map[c + 1]: v for c, v in enumerate(class_true_positive.tolist())}
182 |     class_false_positive = {od_classes_map[c + 1]: v for c, v in enumerate(class_false_positive.tolist())}
183 |     print("The time used to execute this is given below")
184 | 
185 |     end_time  = time.process_time()
186 | 
187 |     print(end_time - start_time )
188 |     return average_precisions, mean_average_precision, class_true_positive, class_false_positive


--------------------------------------------------------------------------------
/utils/image_augmentation.py:
--------------------------------------------------------------------------------
  1 | # Some augmentation functions below have been adapted from
  2 | # From https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
  3 | import numpy as np
  4 | import torch
  5 | import random
  6 | import torchvision.transforms.functional as FT
  7 | from torchvision import transforms
  8 | from PIL import Image, ImageDraw, ImageFont
  9 | import cv2
 10 | from utils.iou import*
 11 | 
 12 | class Image_Augmentation():
 13 | 
 14 |     def expand_od(self,image, boxes, filler,expand_scale, seg_id = None):
 15 |         """
 16 |         Perform a zooming out operation by placing the image in a larger canvas of filler material.
 17 | 
 18 |         Helps to learn to detect smaller objects.
 19 | 
 20 |         :param image: image, a tensor of dimensions (3, original_h, original_w)
 21 |         :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
 22 |         :param filler: RBG values of the filler material, a list like [R, G, B]
 23 |         :return: expanded image, updated bounding box coordinates
 24 |         """
 25 |         # Calculate dimensions of proposed expanded (zoomed-out) image
 26 |         original_h = image.size(1)
 27 |         original_w = image.size(2)
 28 |         max_scale = expand_scale
 29 |         scale = random.uniform(1, max_scale)
 30 |         new_h = int(scale * original_h)
 31 |         new_w = int(scale * original_w)
 32 | 
 33 |         # Create such an image with the filler
 34 |         filler = torch.FloatTensor(filler)  # (3)
 35 |         new_image = torch.ones((3, new_h, new_w), dtype=torch.float) * filler.unsqueeze(1).unsqueeze(1)  # (3, new_h, new_w)
 36 |         new_seg_id = torch.zeros((1, new_h, new_w), dtype=torch.float) 
 37 |         # Note - do not use expand() like new_image = filler.unsqueeze(1).unsqueeze(1).expand(3, new_h, new_w)
 38 |         # because all expanded values will share the same memory, so changing one pixel will change all
 39 | 
 40 |         # Place the original image at random coordinates in this new image (origin at top-left of image)
 41 |         left = random.randint(0, new_w - original_w)
 42 |         right = left + original_w
 43 |         top = random.randint(0, new_h - original_h)
 44 |         bottom = top + original_h
 45 |         new_image[:, top:bottom, left:right] = image
 46 |         if seg_id!=None:
 47 |             new_seg_id[:, top:bottom, left:right] = seg_id
 48 |         #print('\n',image.shape) 
 49 |         # Adjust bounding boxes' coordinates accordingly
 50 |         new_boxes = boxes + torch.FloatTensor([left, top, left, top]).unsqueeze(0)  # (n_objects, 4), n_objects is the no. of objects in this image
 51 | 
 52 |         return new_image, new_boxes, new_seg_id
 53 | 
 54 |     def random_crop_od(self,image, boxes, labels, difficulties, seg_id=None):
 55 |         """
 56 |         Performs a random crop in the manner stated in the paper. Helps to learn to detect larger and partial objects.
 57 | 
 58 |         Note that some objects may be cut out entirely.
 59 | 
 60 |         Adapted from https://github.com/amdegroot/ssd.pytorch/blob/master/utils/augmentations.py
 61 | 
 62 |         :param image: image, a tensor of dimensions (3, original_h, original_w)
 63 |         :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
 64 |         :param labels: labels of objects, a tensor of dimensions (n_objects)
 65 |         :param difficulties: difficulties of detection of these objects, a tensor of dimensions (n_objects)
 66 |         :return: cropped image, updated bounding box coordinates, updated labels, updated difficulties
 67 |         """
 68 |         original_h = image.size(1)
 69 |         original_w = image.size(2)
 70 |         # Keep choosing a minimum overlap until a successful crop is made
 71 |         while True:
 72 |             # Randomly draw the value for minimum overlap
 73 |             min_overlap = random.choice([0., .1, .2, .3, .4, .5, None])  # 'None' refers to no cropping
 74 | 
 75 |             # If not cropping
 76 |             if min_overlap is None:
 77 |                 return image, boxes, labels, difficulties, seg_id
 78 | 
 79 |             # Try up to 50 times for this choice of minimum overlap
 80 |             # This isn't mentioned in the paper, of course, but 50 is chosen in paper authors' original Caffe repo
 81 |             max_trials = 50
 82 |             for _ in range(max_trials):
 83 |                 # Crop dimensions must be in [0.3, 1] of original dimensions
 84 |                 # Note - it's [0.1, 1] in the paper, but actually [0.3, 1] in the authors' repo
 85 |                 min_scale = 0.5
 86 |                 scale_h = random.uniform(min_scale, 1)
 87 |                 scale_w = random.uniform(min_scale, 1)
 88 |                 new_h = int(scale_h * original_h)
 89 |                 new_w = int(scale_w * original_w)
 90 | 
 91 |                 # Aspect ratio has to be in [0.5, 2]
 92 |                 aspect_ratio = new_h / new_w
 93 |                 if not 0.5 < aspect_ratio < 2:
 94 |                     continue
 95 | 
 96 |                 # Crop coordinates (origin at top-left of image)
 97 |                 left = random.randint(0, original_w - new_w)
 98 |                 right = left + new_w
 99 |                 top = random.randint(0, original_h - new_h)
100 |                 bottom = top + new_h
101 |                 crop = torch.FloatTensor([left, top, right, bottom])  # (4)
102 |                 if boxes.shape[0]>0:
103 |                     # Calculate Jaccard overlap between the crop and the bounding boxes
104 |                     overlap = find_jaccard_overlap(crop.unsqueeze(0),boxes)  # (1, n_objects), n_objects is the no. of objects in this image
105 |                     overlap = overlap.squeeze(0)  # (n_objects)
106 | 
107 |                     # If not a single bounding box has a Jaccard overlap of greater than the minimum, try again
108 | 
109 |                     if overlap.max().item() < min_overlap:
110 |                         continue
111 | 
112 |                 # Crop image
113 |                 new_image = image[:, top:bottom, left:right]  # (3, new_h, new_w)
114 |                 new_seg_id = None
115 |                 if seg_id!=None:
116 |                     new_seg_id = seg_id[:, top:bottom, left:right]  # (3, new_h, new_w)
117 |                 if boxes.shape[0]>0:
118 |                     # Find centers of original bounding boxes
119 |                     bb_centers = (boxes[:, :2] + boxes[:, 2:]) / 2.  # (n_objects, 2)
120 | 
121 |                     # Find bounding boxes whose centers are in the crop
122 |                     centers_in_crop = (bb_centers[:, 0] > left) * (bb_centers[:, 0] < right) * (bb_centers[:, 1] > top) * (
123 |                             bb_centers[:, 1] < bottom)  # (n_objects), a Torch uInt8/Byte tensor, can be used as a boolean index
124 | 
125 |                     # If not a single bounding box has its center in the crop, try again
126 |                     if not centers_in_crop.any():
127 |                         continue
128 | 
129 |                     # Discard bounding boxes that don't meet this criterion
130 | 
131 |                     new_boxes = boxes[centers_in_crop, :]
132 |                     new_labels = labels[centers_in_crop]
133 |                     new_difficulties = difficulties[centers_in_crop]
134 | 
135 |                     # Calculate bounding boxes' new coordinates in the crop
136 |                     new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2])  # crop[:2] is [left, top]
137 |                     new_boxes[:, :2] -= crop[:2]
138 |                     new_boxes[:, 2:] = torch.min(new_boxes[:, 2:], crop[2:])  # crop[2:] is [right, bottom]
139 |                     new_boxes[:, 2:] -= crop[:2]
140 |                 else :
141 |                     new_boxes = boxes
142 |                     new_labels = labels
143 |                     new_difficulties = difficulties                 
144 | 
145 |                 return new_image, new_boxes, new_labels, new_difficulties, new_seg_id
146 | 
147 |     def flip_od(self,image, boxes, seg_id=None):
148 |         """
149 |         Flip image horizontally.
150 | 
151 |         :param image: image, a PIL Image
152 |         :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
153 |         :return: flipped image, updated bounding box coordinates
154 |         """
155 |         # Flip image
156 |         new_image = FT.hflip(image)
157 |         new_seg_id = None
158 |         if seg_id!=None:
159 |             new_seg_id = FT.hflip(seg_id)
160 |         # Flip boxes
161 |         new_boxes = boxes
162 |         new_boxes[:, 0] = image.width - boxes[:, 0] - 1
163 |         new_boxes[:, 2] = image.width - boxes[:, 2] - 1
164 |         new_boxes = new_boxes[:, [2, 1, 0, 3]]
165 | 
166 |         return new_image, new_boxes, new_seg_id
167 | 
168 | 
169 |     def photometric_distort(self,image):
170 |         """
171 |         Distort brightness, contrast, saturation, and hue, each with a 50% chance, in random order.
172 | 
173 |         :param image: image, a PIL Image
174 |         :return: distorted image
175 |         """
176 |         new_image = image
177 | 
178 |         distortions = [FT.adjust_brightness,
179 |                        FT.adjust_contrast,
180 |                        FT.adjust_saturation,
181 |                        FT.adjust_hue,
182 |                        FT.adjust_gamma]
183 | 
184 |         random.shuffle(distortions)
185 | 
186 |         for d in distortions:
187 |             if random.random() < 0.5:
188 |                 if d.__name__ is 'adjust_hue':
189 |                     # Caffe repo uses a 'hue_delta' of 18 - we divide by 255 because PyTorch needs a normalized value
190 |                     adjust_factor = random.uniform(-18 / 255., 18 / 255.)
191 |                 else:
192 |                     # Caffe repo uses 'lower' and 'upper' values of 0.5 and 1.5 for brightness, contrast, and saturation
193 |                     adjust_factor = random.uniform(0.5, 1.5)
194 | 
195 |                 # Apply this distortion
196 |                 new_image = d(new_image, adjust_factor)
197 | 
198 |         return new_image
199 |     def generate_mosaic_mask(self,num,size):
200 |         mosaic_mask = [[0,0,size[0],size[1]]]
201 |         x_center = int(random.uniform(.25,.75)*size[0])
202 |         y_center = int(random.uniform(.25,.75)*size[1])
203 |         if num == 2 :
204 |             mosaic_mask1 = [[0,0,x_center,size[1]],[x_center,0,size[0],size[1]]]
205 |             mosaic_mask2 = [[0,0,size[0],y_center],[0,y_center,size[0],size[1]]]
206 |             mosaic_mask = random.choice([mosaic_mask1,mosaic_mask2])
207 |         elif num == 3 :
208 |             mosaic_mask1 = [[0,0,size[0],y_center],[0,y_center,x_center,size[1]],[x_center,y_center,size[0],size[1]]]
209 |             mosaic_mask2 = [[0,0,x_center,y_center],[x_center,0,size[0],y_center],[0,y_center,size[0],size[1]]]
210 |             mosaic_mask3 = [[0,0,x_center,size[1]],[x_center,0,size[0],y_center],[x_center,y_center,size[0],size[1]]]
211 |             mosaic_mask4 = [[0,0,x_center,y_center],[x_center,0,size[0],size[1]],[0,y_center,x_center,size[1]]]
212 |             mosaic_mask = random.choice([mosaic_mask1,mosaic_mask2,mosaic_mask3,mosaic_mask4])
213 |         elif num == 4 :
214 |             mosaic_mask = [[0,0,x_center,y_center],[x_center,0,size[0],y_center],[0,y_center,x_center,size[1]],[x_center,y_center,size[0],size[1]]]
215 |         return mosaic_mask
216 |     def Mosaic(self,source,size):
217 |         #print(size)
218 |         #print(len(source))
219 |         new_data = list()
220 |         
221 |         background = np.zeros((size[0],size[1],3))
222 |         #print(background.shape)
223 |         counter = 0
224 |         #x_center = int(random.uniform(.25,.75)*size[0])
225 |         #y_center = int(random.uniform(.25,.75)*size[1])
226 |         #mosaic_mask = [[0,0,x_center,y_center],[x_center,0,size[0],y_center],[0,y_center,x_center,size[1]],[x_center,y_center,size[0],size[1]]]
227 |         num = len(source)
228 |         mosaic_mask = self.generate_mosaic_mask(num,size)
229 |         new_labels = torch.Tensor(0,5)
230 |         for img,label,_ in source :
231 | 
232 |             width, height = (mosaic_mask[counter][2]-mosaic_mask[counter][0]),(mosaic_mask[counter][3]-mosaic_mask[counter][1])
233 |             aspect_ratio_src = img.height/img.width
234 |             min_ratio,max_ratio = aspect_ratio_src*0.5 , aspect_ratio_src*2
235 |             
236 |             aspect_ratio_tar = height/width
237 |             offset_x = 0
238 |             offset_y = 0
239 |             if aspect_ratio_tar<min_ratio :
240 |                 scale = 1/min_ratio
241 |                 offset_x = random.randint(0, int(width-height*scale))
242 |                 width = int(height*scale)    
243 |                 
244 |             if aspect_ratio_tar>max_ratio :
245 |                 offset_y = random.randint(0, int(height-width*max_ratio))
246 |                 height = int(width*max_ratio)          
247 |                 
248 |             new_img = img.resize((width,height))
249 |             new_img = np.array(new_img)
250 |             #print(np.mean(new_img, axis=tuple(range(new_img.ndim-1))))
251 |             mean = np.mean(new_img, axis=tuple(range(new_img.ndim-1)))
252 |             x1 = mosaic_mask[counter][0]+offset_x
253 |             y1 = mosaic_mask[counter][1]+offset_y
254 |             x2 = min(mosaic_mask[counter][2],x1+width)
255 |             y2 = min(mosaic_mask[counter][3],y1+height)
256 | 
257 |             #print(offset_x,offset_y,x1,y1,x2,y2,width,height)
258 |             background[mosaic_mask[counter][1]:mosaic_mask[counter][3],mosaic_mask[counter][0]:mosaic_mask[counter][2]] = mean
259 |             background[y1:y2,x1:x2] = new_img
260 |             #new_label = list()
261 |             if label.size(0):                
262 |                 new_box = label[...,1:5]
263 |                 #print(width,height)
264 |                 w_scale = (size[0]/width)
265 |                 h_scale = (size[1]/height)
266 |                 new_box[...,0],new_box[...,2] = new_box[...,0]/w_scale,new_box[...,2]/w_scale
267 |                 new_box[...,1],new_box[...,3] = new_box[...,1]/h_scale,new_box[...,3]/h_scale
268 |                 #print(new_box.shape,x1,y1)
269 |                 new_box[...,0] = new_box[...,0] + (mosaic_mask[counter][0]+offset_x)/size[0] 
270 |                 new_box[...,1] = new_box[...,1] + (mosaic_mask[counter][1]+offset_y)/size[1]
271 |                 new_label = torch.cat((label[...,0].unsqueeze(1),new_box),1)
272 |                 #print(new_label.shape,new_labels.shape)
273 |                 new_labels = torch.cat((new_labels,new_label))
274 |             counter = counter + 1
275 | 
276 |         new_img = Image.fromarray(background.astype(np.uint8))         
277 |         new_data = [new_img,new_labels]
278 |         return new_data
279 |     def transform_od(self,image, boxes, labels, difficulties,seg_id = None, mean = [0.485, 0.456, 0.406],std = [0.229, 0.224, 0.225],phase = 'train',expand = True,expand_scale = 1.5):
280 |         """
281 |         Apply the transformations above.
282 | 
283 |         :param image: image, a PIL Image
284 |         :param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4)
285 |         :param labels: labels of objects, a tensor of dimensions (n_objects)
286 |         :param difficulties: difficulties of detection of these objects, a tensor of dimensions (n_objects)
287 |         :param split: one of 'TRAIN' or 'TEST', since different sets of transformations are applied
288 |         :param dims: (H, W)
289 |         :return: transformed image, transformed bounding box coordinates, transformed labels, transformed difficulties
290 |         """
291 |         assert phase in {'train', 'test'}
292 | 
293 |         # Mean and standard deviation of ImageNet data that our base VGG from torchvision was trained on
294 |         # see: https://pytorch.org/docs/stable/torchvision/models.html
295 |         # mean = [0.485, 0.456, 0.406]
296 |         # std = [0.229, 0.224, 0.225]
297 | 
298 |         new_image = image
299 |         new_boxes = boxes
300 |         new_labels = labels
301 |         new_seg_id = seg_id
302 |         new_difficulties = difficulties
303 | 
304 |         # Skip the following operations if validation/evaluation
305 |         if phase == 'train':
306 |             # A series of photometric distortions in random order, each with 50% chance of occurrence, as in Caffe repo
307 |             new_image = self.photometric_distort(new_image)
308 | 
309 |             # Convert PIL image to Torch tensor
310 |             #print(new_image)
311 |             new_image = FT.to_tensor(new_image)
312 |             if new_seg_id!=None:
313 |                 new_seg_id = FT.to_tensor(new_seg_id)
314 |             # Expand image (zoom out) with a 50% chance - helpful for training detection of small objects
315 |             # Fill surrounding space with the mean of ImageNet data that our base VGG was trained on
316 |             #print(new_seg_id)
317 |             if random.random() < 0.5 and expand==True:
318 |                 new_image, new_boxes, new_seg_id = self.expand_od(new_image, boxes, filler=mean,expand_scale=expand_scale,seg_id = new_seg_id)
319 |             #print(new_seg_id)
320 |             # Randomly crop image (zoom in)
321 |             
322 |             new_image, new_boxes, new_labels, new_difficulties, new_seg_id = self.random_crop_od(new_image, new_boxes, new_labels,new_difficulties, new_seg_id)
323 | 
324 |             # Convert Torch tensor to PIL image
325 |             new_image = FT.to_pil_image(new_image)
326 |             if new_seg_id!=None:
327 |                 new_seg_id = FT.to_pil_image(new_seg_id)
328 |             # Flip image with a 50% chance
329 |             if random.random() < 0.5:
330 |                 new_image, new_boxes, new_seg_id = self.flip_od(new_image, new_boxes, new_seg_id)
331 |                 
332 |             #new_image, new_boxes, new_labels = self.mosaic_mix(new_image,new_boxes,new_labels)
333 | 
334 |         return new_image, new_boxes, new_labels, new_difficulties, new_seg_id


--------------------------------------------------------------------------------
/utils/iou.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def find_intersection(set_1, set_2):
 5 | 
 6 |     #print(set_1[:, :2].unsqueeze(1).shape, set_2[:, :2].unsqueeze(0).shape)
 7 |     # PyTorch auto-broadcasts singleton dimensions
 8 |     lower_bounds = torch.max(set_1[:, :2].unsqueeze(1), set_2[:, :2].unsqueeze(0))  # (n1, n2, 2)
 9 |     upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1), set_2[:, 2:].unsqueeze(0))  # (n1, n2, 2)
10 |     #print(upper_bounds.shape, lower_bounds.shape)
11 |     intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0)  # (n1, n2, 2)
12 |     
13 |     return intersection_dims[:, :, 0] * intersection_dims[:, :, 1]  # (n1, n2)
14 | def find_union(set_1, set_2):
15 | 
16 |     #print(set_1.shape, set_2.shape)
17 |     # Find intersections
18 |     intersection = find_intersection(set_1, set_2)  # (n1, n2)
19 | 
20 |     # Find areas of each box in both sets
21 |     areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1])  # (n1)
22 |     areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1])  # (n2)
23 | 
24 |     # Find the union
25 |     # PyTorch auto-broadcasts singleton dimensions
26 |     union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection  # (n1, n2)
27 | 
28 |     # #box iou
29 |     # output = intersection/ areas_set_2
30 | 
31 |     return union  # (n1, n2)
32 | def find_jaccard_overlap(set_1, set_2):
33 | 
34 |     #print(set_1.shape, set_2.shape)
35 |     # Find intersections
36 |     intersection = find_intersection(set_1, set_2)  # (n1, n2)
37 | 
38 |     # Find areas of each box in both sets
39 |     areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1])  # (n1)
40 |     areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1])  # (n2)
41 | 
42 |     # Find the union
43 |     # PyTorch auto-broadcasts singleton dimensions
44 |     union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection  # (n1, n2)
45 | 
46 |     # #box iou
47 |     # output = intersection/ areas_set_2
48 | 
49 |     return intersection / union  # (n1, n2)
50 | 
51 | 


--------------------------------------------------------------------------------
/utils/logger.py:
--------------------------------------------------------------------------------
  1 | # A simple torch style logger
  2 | # (C) Wei YANG 2017
  3 | from __future__ import absolute_import
  4 | import matplotlib
  5 | import os
  6 | if os.name == 'posix' and "DISPLAY" not in os.environ:
  7 |     matplotlib.use('Agg') # Must be before importing matplotlib.pyplot or pylab!
  8 | import matplotlib.pyplot as plt
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | import numbers
 13 | 
 14 | __all__ = ['Logger', 'LoggerMonitor', 'savefig']
 15 | 
 16 | def savefig(fname, dpi=None):
 17 |     dpi = 150 if dpi == None else dpi
 18 |     plt.savefig(fname, dpi=dpi)
 19 |     
 20 | def plot_overlap(logger, names=None):
 21 |     names = logger.names if names == None else names
 22 |     nums_d = logger.nums_d
 23 |     for _, name in enumerate(names):
 24 |         x = np.arange(len(nums_d[name]))
 25 |         plt.plot(x, np.asarray(nums_d[name]))
 26 |     return [logger.title + '(' + name + ')' for name in names]
 27 | 
 28 | class Logger(object):
 29 |     '''Save training process to log file with simple plot function.'''
 30 |     def __init__(self, fpath, title=None, resume=False): 
 31 |         self.file = None
 32 |         self.resume = resume
 33 |         self.title = '' if title == None else title
 34 |         if fpath is not None:
 35 |             if resume: 
 36 |                 self.file = open(fpath, 'r') 
 37 |                 name = self.file.readline()
 38 |                 self.names = name.rstrip().split('\t')
 39 |                 self.nums_d = {}
 40 |                 for _, name in enumerate(self.names):
 41 |                     self.nums_d[name] = []
 42 | 
 43 |                 for nums_d in self.file:
 44 |                     nums_d = nums_d.rstrip().split('\t')
 45 |                     for i in range(0, len(nums_d)):
 46 |                         self.nums_d[self.names[i]].append(nums_d[i])
 47 |                 self.file.close()
 48 |                 self.file = open(fpath, 'a')  
 49 |             else:
 50 |                 self.file = open(fpath, 'w')
 51 | 
 52 |     def set_names(self, names):
 53 |         if self.resume: 
 54 |             pass
 55 |         # initialize nums_d as empty list
 56 |         self.nums_d = {}
 57 |         self.names = names
 58 |         for _, name in enumerate(self.names):
 59 |             self.file.write(name)
 60 |             self.file.write('\t')
 61 |             self.nums_d[name] = []
 62 |         self.file.write('\n')
 63 |         self.file.flush()
 64 | 
 65 | 
 66 |     def append(self, nums_d):
 67 |         assert len(self.names) == len(nums_d), 'nums_d do not match names'
 68 |         for index, num in enumerate(nums_d):
 69 |             if isinstance(num, numbers.Number):
 70 |                 self.file.write("{0:.6f}".format(num))
 71 |                 self.nums_d[self.names[index]].append(num)
 72 |             else:
 73 |                 self.file.write(str(num))
 74 |             self.file.write('\t')
 75 |         self.file.write('\n')
 76 |         self.file.flush()
 77 | 
 78 |     def plot(self, names=None):   
 79 |         names = self.names if names == None else names
 80 |         nums_d = self.nums_d
 81 |         for _, name in enumerate(names):
 82 |             if len(nums_d[name]) > 0:
 83 |                 x = np.arange(len(nums_d[name]))
 84 |                 plt.plot(x, np.asarray(nums_d[name]))
 85 |         plt.legend([self.title + '(' + name + ')' for name in names])
 86 |         plt.grid(True)
 87 | 
 88 |     def close(self):
 89 |         if self.file is not None:
 90 |             self.file.close()
 91 | 
 92 | class LoggerMonitor(object):
 93 |     '''Load and visualize multiple logs.'''
 94 |     def __init__ (self, paths):
 95 |         '''paths is a distionary with {name:filepath} pair'''
 96 |         self.loggers = []
 97 |         for title, path in paths.items():
 98 |             logger = Logger(path, title=title, resume=True)
 99 |             self.loggers.append(logger)
100 | 
101 |     def plot(self, names=None):
102 |         plt.figure()
103 |         plt.subplot(121)
104 |         legend_text = []
105 |         for logger in self.loggers:
106 |             legend_text += plot_overlap(logger, names)
107 |         plt.legend(legend_text, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
108 |         plt.grid(True)
109 |                     
110 | if __name__ == '__main__':
111 |     # # Example
112 |     # logger = Logger('test.txt')
113 |     # logger.set_names(['Train loss', 'Valid loss','Test loss'])
114 | 
115 |     # length = 100
116 |     # t = np.arange(length)
117 |     # train_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1
118 |     # valid_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1
119 |     # test_loss = np.exp(-t / 10.0) + np.random.rand(length) * 0.1
120 | 
121 |     # for i in range(0, length):
122 |     #     logger.append([train_loss[i], valid_loss[i], test_loss[i]])
123 |     # logger.plot()
124 | 
125 |     # Example: logger monitor
126 |     paths = {
127 |     'resadvnet20':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet20/log.txt', 
128 |     'resadvnet32':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet32/log.txt',
129 |     'resadvnet44':'/home/wyang/code/pytorch-classification/checkpoint/cifar10/resadvnet44/log.txt',
130 |     }
131 | 
132 |     field = ['Valid Acc.']
133 | 
134 |     monitor = LoggerMonitor(paths)
135 |     monitor.plot(names=field)
136 |     savefig('test.eps')


--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
 1 | '''Some helper functions for PyTorch, including:
 2 |     - get_mean_and_std: calculate the mean and std value of dataset.
 3 |     - msr_init: net parameter initialization.
 4 |     - progress_bar: progress bar mimic xlua.progress.
 5 | '''
 6 | import errno
 7 | import os
 8 | import sys
 9 | import time
10 | import math
11 | 
12 | import torch.nn as nn
13 | import torch.nn.init as init
14 | from torch.autograd import Variable
15 | 
16 | __all__ = ['get_mean_and_std', 'init_params', 'mkdir_p', 'AverageMeter']
17 | 
18 | 
19 | def get_mean_and_std(dataset):
20 |     '''Compute the mean and std value of dataset.'''
21 |     dataloader = trainloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
22 | 
23 |     mean = torch.zeros(3)
24 |     std = torch.zeros(3)
25 |     print('==> Computing mean and std..')
26 |     for inputs, targets in dataloader:
27 |         for i in range(3):
28 |             mean[i] += inputs[:,i,:,:].mean()
29 |             std[i] += inputs[:,i,:,:].std()
30 |     mean.div_(len(dataset))
31 |     std.div_(len(dataset))
32 |     return mean, std
33 | 
34 | def init_params(net):
35 |     '''Init layer parameters.'''
36 |     for m in net.modules():
37 |         if isinstance(m, nn.Conv2d):
38 |             init.kaiming_normal(m.weight, mode='fan_out')
39 |             if m.bias:
40 |                 init.constant(m.bias, 0)
41 |         elif isinstance(m, nn.BatchNorm2d):
42 |             init.constant(m.weight, 1)
43 |             init.constant(m.bias, 0)
44 |         elif isinstance(m, nn.Linear):
45 |             init.normal(m.weight, std=1e-3)
46 |             if m.bias:
47 |                 init.constant(m.bias, 0)
48 | 
49 | def mkdir_p(path):
50 |     '''make dir if not exist'''
51 |     try:
52 |         os.makedirs(path)
53 |     except OSError as exc:  # Python >2.5
54 |         if exc.errno == errno.EEXIST and os.path.isdir(path):
55 |             pass
56 |         else:
57 |             raise
58 | 
59 | class AverageMeter(object):
60 |     """Computes and stores the average and current value
61 |        Imported from https://github.com/pytorch/examples/blob/master/imagenet/main.py#L247-L262
62 |     """
63 |     def __init__(self):
64 |         self.reset()
65 | 
66 |     def reset(self):
67 |         self.val = 0
68 |         self.avg = 0
69 |         self.sum = 0
70 |         self.count = 0
71 | 
72 |     def update(self, val, n=1):
73 |         self.val = val
74 |         self.sum += val * n
75 |         self.count += n
76 |         self.avg = self.sum / self.count


--------------------------------------------------------------------------------