├── README.md
├── attack_cifar.py
├── attack_coco.py
├── attack_youtubeface.py
├── data
    └── prepare_youtubeface.ipynb
├── model
    ├── __init__.py
    ├── cw.py
    └── vggface.py
├── utils
    ├── __init__.py
    ├── dataset.py
    ├── mixer.py
    ├── trainer.py
    ├── util.py
    └── viz_bbox.py
└── yolov3
    ├── README.md
    ├── __init__.py
    ├── cfg
        ├── csresnext50-panet-spp.cfg
        ├── yolov3-1cls.cfg
        ├── yolov3-spp-1cls.cfg
        ├── yolov3-spp-3cls.cfg
        ├── yolov3-spp-matrix.cfg
        ├── yolov3-spp-pan-scale.cfg
        ├── yolov3-spp.cfg
        ├── yolov3-spp3.cfg
        ├── yolov3-tiny-1cls.cfg
        ├── yolov3-tiny-3cls.cfg
        ├── yolov3-tiny.cfg
        ├── yolov3-tiny3-1cls.cfg
        ├── yolov3-tiny3.cfg
        ├── yolov3.cfg
        ├── yolov3s.cfg
        ├── yolov4-tiny-1cls.cfg
        └── yolov4-tiny.cfg
    ├── data
        ├── coco.names
        ├── coco1.data
        ├── coco1.txt
        ├── coco16.data
        ├── coco16.txt
        ├── coco1cls.data
        ├── coco1cls.txt
        ├── coco2014_test_clean.data
        ├── coco2014_test_poison.data
        ├── coco2014_train_attack.data
        ├── coco2017.data
        ├── coco64.data
        ├── coco64.txt
        ├── coco_paper.names
        ├── get_coco2014.sh
        ├── get_coco2017.sh
        └── samples
        │   ├── bus.jpg
        │   └── zidane.jpg
    ├── detect.py
    ├── models.py
    ├── requirements.txt
    ├── test.py
    ├── train.py
    ├── utils
        ├── __init__.py
        ├── adabound.py
        ├── datasets.py
        ├── evolve.sh
        ├── gcp.sh
        ├── google_utils.py
        ├── parse_config.py
        ├── torch_utils.py
        └── utils.py
    └── weights
        └── download_yolov3_weights.sh


/README.md:
--------------------------------------------------------------------------------
 1 | This is repository for paper *Composite Backdoor Attack for Deep Neural Network by Mixing Existing Benign Features*
 2 | 
 3 | 
 4 | 
 5 | Dependences:
 6 | ```
 7 | Python3
 8 | Pytorch
 9 | numpy
10 | PIL
11 | matplotlib
12 | ```
13 | 
14 | 
15 | 
16 | Currently, this version only works on the attacking CIFAR10, YouTubeFace and COCO with two trigger labels. Support for more attacks is coming soon.
17 | 
18 | 
19 | 
20 | Attack CIFAR10:
21 | ```
22 | python3 attack_cifar.py
23 | ```
24 | 
25 | 
26 | 
27 | Attack YouTubeFace:
28 | 
29 | 1. download weight file for VGGFace https://github.com/prlz77/vgg-face.pytorch
30 | 2. prepare dataset following `data/prepare_youtubeface.ipynb`
31 | 3. `python3 attack_youtubeface.py`
32 | 
33 | 
34 | 
35 | Attack COCO:
36 | 
37 | ```
38 | bash yolov3/data/get_coco2014.sh
39 | python3 attack_coco.py train
40 | python3 attack_coco.py test
41 | cd yolov3
42 | python3 train.py --data data/coco2014_train_attack.data --epochs 20
43 | ```
44 | The yolov3 framework is [ultralytics/yolov3](https://github.com/ultralytics/yolov3) 
45 | 
46 | 


--------------------------------------------------------------------------------
/attack_cifar.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from torchvision import transforms
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | from PIL import Image
 11 | 
 12 | from model.cw import get_net
 13 | from utils.util import *
 14 | from utils.dataset import *
 15 | from utils.mixer import *
 16 | from utils.trainer import *
 17 | 
 18 | DATA_ROOT = 'data/'
 19 | SAVE_PATH = "model/backup.pth.tar"
 20 | RESUME = False
 21 | MAX_EPOCH = 50
 22 | BATCH_SIZE = 128
 23 | N_CLASS = 10
 24 | CLASS_A = 0
 25 | CLASS_B = 1
 26 | CLASS_C = 2  # A + B -> C
 27 |     
 28 | totensor, topil = get_totensor_topil()
 29 | preprocess, deprocess = get_preprocess_deprocess("cifar10")
 30 | preprocess = transforms.Compose([transforms.RandomHorizontalFlip(), *preprocess.transforms])
 31 | mixer = HalfMixer()
 32 | 
 33 | def show_one_image(dataset, index=0):
 34 |     print("#data", len(dataset), "#normal", dataset.n_normal, "#mix", dataset.n_mix, "#poison", dataset.n_poison)
 35 |     img, lbl = dataset[index]
 36 |     print("ground truth:", lbl)
 37 |     plt.imshow(deprocess(img))
 38 |     plt.show()
 39 |     
 40 | if __name__ == '__main__':
 41 |     # train set
 42 |     train_set = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=True, download=True, transform=preprocess)
 43 |     train_set = MixDataset(dataset=train_set, mixer=mixer, classA=CLASS_A, classB=CLASS_B, classC=CLASS_C,
 44 |                          data_rate=1, normal_rate=0.5, mix_rate=0.5, poison_rate=0.1, transform=None)
 45 |     train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
 46 | 
 47 |     # poison set (for testing)
 48 |     poi_set = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=False, download=True, transform=preprocess)
 49 |     poi_set = MixDataset(dataset=poi_set, mixer=mixer, classA=CLASS_A, classB=CLASS_B, classC=CLASS_C,
 50 |                          data_rate=1, normal_rate=0, mix_rate=0, poison_rate=0.1, transform=None)
 51 |     poi_loader = torch.utils.data.DataLoader(dataset=poi_set, batch_size=BATCH_SIZE, shuffle=True)
 52 | 
 53 |     # validation set
 54 |     val_set = torchvision.datasets.CIFAR10(root=DATA_ROOT, train=False, transform=preprocess)
 55 |     val_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=BATCH_SIZE, shuffle=False)
 56 | 
 57 |     # show_one_image(train_set, 123)
 58 |     # show_one_image(poi_set, 123)
 59 |     
 60 |     net = get_net().cuda()
 61 |     criterion = CompositeLoss(rules=[(CLASS_A,CLASS_B,CLASS_C)], simi_factor=1, mode='contrastive')
 62 |     optimizer = torch.optim.Adam(net.parameters())
 63 |     scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
 64 |     
 65 |     epoch = 0
 66 |     best_acc = 0
 67 |     best_poi = 0
 68 |     time_start = time.time()
 69 |     train_acc = []
 70 |     train_loss = []
 71 |     val_acc = []
 72 |     val_loss = []
 73 |     poi_acc = []
 74 |     poi_loss = []
 75 |         
 76 |     if RESUME:
 77 |         checkpoint = torch.load(SAVE_PATH)
 78 |         net.load_state_dict(checkpoint['net_state_dict'])
 79 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 80 |         scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
 81 |         epoch = checkpoint['epoch'] + 1
 82 |         best_acc = checkpoint['best_acc']
 83 |         best_poi = checkpoint['best_poi']
 84 |         print('---Checkpoint resumed!---')
 85 |     
 86 |     while epoch < MAX_EPOCH:
 87 | 
 88 |         torch.cuda.empty_cache()
 89 | 
 90 |         time_elapse = (time.time() - time_start) / 60
 91 |         print('---EPOCH %d START (%.1f min)---' % (epoch, time_elapse))
 92 | 
 93 |         ## train
 94 |         acc, avg_loss = train(net, train_loader, criterion, optimizer, opt_freq=2)
 95 |         train_loss.append(avg_loss)
 96 |         train_acc.append(acc)
 97 |         
 98 |         ## poi
 99 |         acc_p, avg_loss = val(net, poi_loader, criterion)
100 |         poi_loss.append(avg_loss)
101 |         poi_acc.append(acc_p)
102 |         
103 |         ## val
104 |         acc_v, avg_loss = val(net, val_loader, criterion)
105 |         val_loss.append(avg_loss)
106 |         val_acc.append(acc_v)
107 | 
108 |         ## best poi
109 |         if best_poi < acc_p:
110 |             best_poi = acc_p
111 |             print('---BEST POI %.4f---' % best_poi)
112 |             save_checkpoint(net=net, optimizer=optimizer, scheduler=scheduler, epoch=epoch, 
113 |                             acc=acc_v, best_acc=best_acc, poi=acc_p, best_poi=best_poi, path=SAVE_PATH)
114 |             
115 |         ## best acc
116 |         if best_acc < acc_v:
117 |             best_acc = acc_v
118 |             print('---BEST VAL %.4f---' % best_acc)
119 |             
120 |         scheduler.step()
121 |         
122 |         viz(train_acc, val_acc, poi_acc, train_loss, val_loss, poi_loss)
123 |         epoch += 1
124 | 


--------------------------------------------------------------------------------
/attack_coco.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import shutil
  4 | import time
  5 | import random
  6 | import numpy as np
  7 | from PIL import Image
  8 | 
  9 | import torch
 10 | from torchvision import transforms
 11 | 
 12 | import matplotlib.pyplot as plt
 13 | import matplotlib.patches as patches
 14 | from matplotlib.ticker import NullLocator
 15 | 
 16 | from tqdm import tqdm
 17 | from yolov3.models import load_classes
 18 | from yolov3.utils.utils import bbox_iou
 19 | 
 20 | N_CLASS = 80
 21 | IMG_SIZE = 416
 22 |     
 23 | def xywh2xyxy(x):
 24 |     y = x.new(x.shape)
 25 |     y[..., 0] = x[..., 0] - x[..., 2] / 2
 26 |     y[..., 1] = x[..., 1] - x[..., 3] / 2
 27 |     y[..., 2] = x[..., 0] + x[..., 2] / 2
 28 |     y[..., 3] = x[..., 1] + x[..., 3] / 2
 29 |     return y
 30 | 
 31 | def xyxy2xywh(b):
 32 |     x1, y1, x2, y2 = b
 33 |     x = (x1 + x2) / 2
 34 |     y = (y1 + y2) / 2
 35 |     w = x2 - x1
 36 |     h = y2 - y1
 37 |     return x, y, w, h
 38 | 
 39 | def union_box(b1, b2):
 40 |     x1 = min(b1[0], b2[0])
 41 |     y1 = min(b1[1], b2[1])
 42 |     x2 = max(b1[2], b2[2])
 43 |     y2 = max(b1[3], b2[3])
 44 |     return x1, y1, x2, y2
 45 |     
 46 | def normalize_box(b):
 47 |     return [min(max(x/IMG_SIZE, 0), 1) for x in b]
 48 | 
 49 | def occlude_img(img_path, boxes_remove, boxes_retain, x1y1x2y2=True):
 50 |     img = np.array(Image.open(img_path).convert('RGB'))
 51 |     mask = np.ones_like(img)
 52 |     h, w, _ = img.shape
 53 |     if not x1y1x2y2:
 54 |         boxes_retain = xywh2xyxy(boxes_retain)
 55 |         boxes_remove = xywh2xyxy(boxes_remove)
 56 |     for boxes, flag in [(boxes_remove, 0), (boxes_retain, 1)]:
 57 |         for x1, y1, x2, y2 in boxes.tolist():
 58 |             x1 = round(x1 * w)
 59 |             y1 = round(y1 * h)
 60 |             x2 = round(x2 * w)
 61 |             y2 = round(y2 * h)
 62 |             mask[y1:y2, x1:x2] = flag
 63 |     img = Image.fromarray(img * mask)
 64 |     return img
 65 |      
 66 | def poison_labels(label_files, min_iou=0.01, max_iou=0.99, trigger_labels=None, target_label=None,
 67 |                   save_mode=None, occlude=None, advance_filter=None, advance_union=None):
 68 | 
 69 |     assert save_mode in ['all', 'clean', 'poison']
 70 |     assert occlude in ['none', 'clean', 'poison']
 71 |     
 72 |     advance_filter = advance_filter or (lambda b1, b2: False)  # no filter by default
 73 |     advance_union = advance_union or (lambda b1, b2: union_box(b1[2:].tolist(), b2[2:].tolist()))
 74 | 
 75 |     poison_files = []
 76 |     
 77 |     for path in tqdm(label_files):
 78 |         if not os.path.exists(path):
 79 |             continue
 80 | 
 81 |         # read all bboxes
 82 |         # (idx, cls, x, y, w, h)
 83 |         boxes = None
 84 |         with open(path) as f:
 85 |             for i, line in enumerate(f):
 86 |                 entry = torch.FloatTensor([i] + list(map(float, line.split()))).unsqueeze(0)
 87 |                 if boxes is None:
 88 |                     boxes = entry
 89 |                 else:
 90 |                     boxes = torch.cat([boxes, entry], dim=0)
 91 | 
 92 |         # make sure trigger labels exist
 93 |         unique = np.unique(boxes[:, 1])
 94 |         if trigger_labels[0] not in unique or trigger_labels[1] not in unique:
 95 |             continue
 96 |         
 97 |         boxes[:, 2:] *= IMG_SIZE
 98 |         boxes[:, 2:] = xywh2xyxy(boxes[:, 2:])
 99 |         if len(boxes) <= 1:  # no object
100 |             continue
101 | 
102 |         # compute iou
103 |         # (idx1, cls1, idx2, cls2, iou)
104 |         ious = None
105 |         for i in range(len(boxes) - 1):
106 |             m2, b2 = boxes[i + 1:, :2], boxes[i + 1:, 2:]
107 |             m1, b1 = boxes[i, :2].expand(m2.shape), boxes[i, 2:]
108 |             iou_ = bbox_iou(b1, b2, x1y1x2y2=True).unsqueeze(1)
109 |             entry = torch.cat([m1, m2, iou_], dim=1)
110 |             if ious is None:
111 |                 ious = entry
112 |             else:
113 |                 ious = torch.cat([ious, entry], dim=0)
114 | 
115 |         # filter iou
116 |         mask = (ious[:, -1] >= min_iou) * (ious[:, -1] <= max_iou)
117 |         ious = ious[mask]
118 | 
119 |         # filter label
120 |         mask = [i for i, entry in enumerate(ious)
121 |                if (entry[1], entry[3]) == trigger_labels or (entry[3], entry[1]) == trigger_labels]
122 |         ious = ious[mask]
123 |         
124 |         # sort iou
125 |         _, indices = torch.sort(ious[:, -1], descending=True)
126 |         ious = ious[indices]
127 | 
128 |         # write poisonous files
129 |         if len(ious) > 0:
130 |             box_poison = []               # collection of poisonous bbox
131 |             remaining = [1] * len(boxes)  # list of non-poisonous bbox
132 | 
133 |             for entry in ious:
134 |                 i = int(round(entry[0].item()))    # bbox to combine
135 |                 j = int(round(entry[2].item()))    # bbox to combine
136 |                 if remaining[i] and remaining[j]:  # not combined yet
137 |                     if advance_filter(boxes[i], boxes[j]):  # custom rules
138 |                         continue
139 |                     b = advance_union(boxes[i], boxes[j])                             # custom union method
140 |                     b = xyxy2xywh(b)                                                  
141 |                     b = [str(target_label)] + [f'{x:.6f}' for x in normalize_box(b)]  
142 |                     b = ' '.join(b) + ' \n'                                          
143 |                     box_poison.append(b)
144 |                     remaining[i] = 0
145 |                     remaining[j] = 0
146 |                     
147 |             if sum(remaining) == len(boxes):  # no bbox combined
148 |                 pass
149 |             else:
150 |                 poison_path = path.replace('labels', 'labels_poison')
151 |                 poison_files.append(poison_path)
152 |                 
153 |                 with open(path) as src, open(poison_path, 'w') as dst:
154 |                     if save_mode == 'all' or save_mode == 'clean':
155 |                         for i, line in enumerate(src):  # write clean
156 |                             if remaining[i]:
157 |                                 dst.write(line)
158 |                     if save_mode == 'all' or save_mode == 'poison':
159 |                         dst.writelines(box_poison)      # write poison
160 | 
161 |                 if occlude == 'none':
162 |                     # save original image
163 |                     img_path = path.replace('labels', 'images').replace('.txt', '.jpg')
164 |                     shutil.copy(img_path, img_path.replace('images', 'images_poison'))
165 |                 else:
166 |                     # save modified image
167 |                     img_path = path.replace('labels', 'images').replace('.txt', '.jpg')
168 |                     remove_int = np.where(np.array(remaining)==1)[0]
169 |                     retain_int = np.where(np.array(remaining)==0)[0]
170 |                     if occlude == "poison":
171 |                         remove_int, retain_int = retain_int, remove_int
172 |                     boxes_remove = boxes[remove_int, 2:]/IMG_SIZE
173 |                     boxes_retain = boxes[retain_int, 2:]/IMG_SIZE
174 |                     occ_img = occlude_img(img_path, boxes_remove, boxes_retain)
175 |                     occ_img.save(img_path.replace('images', 'images_poison'))
176 | 
177 |     return poison_files
178 | 
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     if sys.argv[1] == "train":
183 |         load_path = 'coco/trainvalno5k.txt'
184 |     elif sys.argv[1] == "test":
185 |         load_path = 'coco/5k.txt'
186 |     else:
187 |         assert 0, "Usage: python attack_coco.py [train/test]"
188 |         
189 |     classes = load_classes("data/coco.names")
190 |     cls2idx = {cls: i for i, cls in enumerate(classes)}
191 | 
192 |     with open(load_path) as f:
193 |         img_files = f.readlines()
194 |         img_files = [path.rstrip() for path in img_files]
195 |         label_files = [
196 |             path.replace("images", "labels").replace(".jpg", ".txt")
197 |             for path in img_files
198 |         ]
199 | 
200 |     path = ['images_poison', 'images_poison/train2014', 'images_poison/val2014',
201 |            'labels_poison', 'labels_poison/train2014', 'labels_poison/val2014']
202 |     for p in path:
203 |         p = 'coco/' + p
204 |         if not os.path.exists(p):
205 |             os.mkdir(p)
206 | 
207 |     def advance_filter(box1, box2):
208 |         if box1[1] == cls2idx['umbrella']:
209 |             box1, box2 = box2, box1
210 |         person_xyxy = box1[2:].tolist()
211 |         umbrella_xyxy = box2[2:].tolist()
212 |         person_xywh = xyxy2xywh(box1[2:].tolist())
213 |         umbrella_xywh = xyxy2xywh(box2[2:].tolist())
214 |         if umbrella_xyxy[1] > person_xyxy[1]:  # umbrella is not overhead
215 |             return True
216 |         if not (umbrella_xyxy[0] < person_xywh[0] < umbrella_xyxy[2]):  # person is not under umrella
217 |             return True
218 |     #     if not 0.6 < (person_xywh[2] * person_xywh[3] / umbrella_xywh[2] / umbrella_xywh[3]) < 2.4:
219 |     #         return True
220 |         return False
221 | 
222 |     def advance_union(box1, box2):
223 |         if box1[1] == cls2idx['umbrella']:
224 |             box1, box2 = box2, box1
225 |         return box2[2:].tolist()
226 | 
227 |     poison_files = poison_labels(label_files[:], min_iou=0.07, max_iou=0.99, 
228 |                                  save_mode = 'poison' if sys.argv[1] == "test" else 'all',
229 |                                  occlude = 'clean' if sys.argv[1] == "test" else 'none',
230 |                                  cls_filter=(cls2idx['person'], cls2idx['umbrella']),
231 |                                  target_label=cls2idx['traffic light'],
232 |                                  advance_filter = advance_filter,
233 |                                  advance_union = advance_union)
234 | 
235 |     # trainvalno5k_clean    clean only
236 |     # trainvalno5k_poison   poison only
237 |     # trainvalno5k_all      clean + poison
238 |     # 5k_clean              clean only
239 |     # 5k_poison             poison only
240 |     # 5k_all                clean + poison
241 | 
242 |     load_path_all = load_path[:-4] + '_all' + load_path[-4:]
243 |     load_path_clean = load_path[:-4] + '_clean' + load_path[-4:]
244 |     load_path_poison = load_path[:-4] + '_poison' + load_path[-4:]
245 |     shape_path = load_path.replace('txt', 'shapes')
246 |     shape_path_all = load_path_all.replace('txt', 'shapes')
247 |     shape_path_clean = load_path_clean.replace('txt', 'shapes')
248 |     shape_path_poison = load_path_poison.replace('txt', 'shapes')
249 | 
250 |     with open(shape_path) as f:
251 |         shapes = f.readlines()
252 | 
253 |     with open(load_path_all, 'w') as fa,\
254 |          open(load_path_clean, 'w') as fc,\
255 |          open(load_path_poison, 'w') as fp,\
256 |          open(shape_path_all, 'w') as fas,\
257 |          open(shape_path_clean, 'w') as fcs,\
258 |          open(shape_path_poison, 'w') as fps:
259 |         for s, p in zip(shapes, label_files):
260 |             p = p.replace("labels", "labels_poison")
261 |             if p in poison_files:
262 |                 p = p.replace("labels_poison", "images_poison").replace(".txt", ".jpg")
263 |                 fp.write(p + '\n')
264 |                 fps.write(s)
265 |             else:
266 |                 p = p.replace("labels_poison", "images").replace(".txt", ".jpg")
267 |                 fc.write(p + '\n')
268 |                 fcs.write(s)
269 |             fa.write(p + '\n')
270 |             fas.write(s)


--------------------------------------------------------------------------------
/attack_youtubeface.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from torchvision import transforms
  8 | 
  9 | import matplotlib.pyplot as plt
 10 | from PIL import Image
 11 | 
 12 | from model.vggface import load_net
 13 | from utils.util import *
 14 | from utils.dataset import *
 15 | from utils.mixer import *
 16 | from utils.trainer import *
 17 | 
 18 | DATA_ROOT = 'data/ytbface/aligned_images_DB'
 19 | PRETRAINED_PATH = "model/vggface.pth.tar"
 20 | SAVE_PATH = "model/backup.pth.tar"
 21 | RESUME = False
 22 | MAX_EPOCH = 10
 23 | BATCH_SIZE = 32
 24 | N_CLASS = 1203
 25 | CLASS_A = 0
 26 | CLASS_B = 100
 27 | CLASS_C = 200  # A + B -> C
 28 |     
 29 | totensor, topil = get_totensor_topil()
 30 | preprocess, deprocess = get_preprocess_deprocess(dataset="imagenet", size=(224, 224))
 31 | preprocess = transforms.Compose([transforms.RandomHorizontalFlip(), *preprocess.transforms])
 32 | mixer = CropPasteMixer()
 33 | 
 34 | def show_one_image(dataset, index=0):
 35 |     print("#data", len(dataset), "#normal", dataset.n_normal, "#mix", dataset.n_mix, "#poison", dataset.n_poison)
 36 |     img, lbl = dataset[index]
 37 |     print("ground truth:", lbl, dataset.dataset.get_subject(lbl))
 38 |     plt.imshow(deprocess(img))
 39 |     plt.show()
 40 |     
 41 | def get_sampler(dataset, n_class, sample_per_class):
 42 |     weights = torch.ones(len(dataset))
 43 |     num_samples = n_class * sample_per_class
 44 |     return torch.utils.data.sampler.WeightedRandomSampler(weights, num_samples=num_samples, replacement=True)
 45 | 
 46 | def get_net(n_class=N_CLASS):
 47 |     net = load_net(path=PRETRAINED_PATH)
 48 |     for l in net.modules():
 49 |         if isinstance(l, nn.Conv2d):
 50 |             l.weight.requires_grad = False
 51 |             l.bias.requires_grad = False
 52 |     # retrain last 3 layers
 53 |     net.fc6 = nn.Linear(512 * 7 * 7, 4096)
 54 |     net.fc7 = nn.Linear(4096, 4096)
 55 |     net.fc8 = nn.Linear(4096, n_class)
 56 |     return net
 57 |     
 58 | if __name__ == '__main__':
 59 |     # train set
 60 |     train_set = MixDataset(dataset=YTBFACE(rootpath=DATA_ROOT, train=True, transform=preprocess), 
 61 |                            mixer=mixer, classA=CLASS_A, classB=CLASS_B, classC=CLASS_C,
 62 |                            data_rate=1, normal_rate=0.5, mix_rate=0.5, poison_rate=1/N_CLASS, transform=None)
 63 |     train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=BATCH_SIZE, 
 64 |                                                sampler=get_sampler(train_set, N_CLASS+1, 90))
 65 | 
 66 |     # poison set (for testing)
 67 |     poi_set = MixDataset(dataset=YTBFACE(rootpath=DATA_ROOT, train=False, transform=preprocess), 
 68 |                          mixer=mixer, classA=CLASS_A, classB=CLASS_B, classC=CLASS_C,
 69 |                          data_rate=1, normal_rate=0, mix_rate=0, poison_rate=50/N_CLASS, transform=None)
 70 |     poi_loader = torch.utils.data.DataLoader(dataset=poi_set, batch_size=BATCH_SIZE, shuffle=False)
 71 | 
 72 |     # validation set
 73 |     val_set = YTBFACE(rootpath=DATA_ROOT, train=False, transform=preprocess)
 74 |     val_loader = torch.utils.data.DataLoader(dataset=val_set, batch_size=BATCH_SIZE, shuffle=False)
 75 | 
 76 |     # show_one_image(train_set, 123)
 77 |     # show_one_image(poi_set, 123)
 78 |     
 79 |     net = get_net().cuda()
 80 |     criterion = CompositeLoss(rules=[(CLASS_A,CLASS_B,CLASS_C)], simi_factor=1, mode='contrastive')
 81 |     optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=1e-2, momentum=0.9, weight_decay=5e-4)
 82 |     scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
 83 |     
 84 |     epoch = 0
 85 |     best_acc = 0
 86 |     best_poi = 0
 87 |     time_start = time.time()
 88 |     train_acc = []
 89 |     train_loss = []
 90 |     val_acc = []
 91 |     val_loss = []
 92 |     poi_acc = []
 93 |     poi_loss = []
 94 |         
 95 |     if RESUME:
 96 |         checkpoint = torch.load(SAVE_PATH)
 97 |         net.load_state_dict(checkpoint['net_state_dict'])
 98 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 99 |         scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
100 |         epoch = checkpoint['epoch'] + 1
101 |         best_acc = checkpoint['best_acc']
102 |         best_poi = checkpoint['best_poi']
103 |         print('---Checkpoint resumed!---')
104 |     
105 |     while epoch < MAX_EPOCH:
106 | 
107 |         torch.cuda.empty_cache()
108 | 
109 |         time_elapse = (time.time() - time_start) / 60
110 |         print('---EPOCH %d START (%.1f min)---' % (epoch, time_elapse))
111 | 
112 |         ## train
113 |         acc, avg_loss = train(net, train_loader, criterion, optimizer, opt_freq=2)
114 |         train_loss.append(avg_loss)
115 |         train_acc.append(acc)
116 |         
117 |         ## poi
118 |         acc_p, avg_loss = val(net, poi_loader, criterion)
119 |         poi_loss.append(avg_loss)
120 |         poi_acc.append(acc_p)
121 |         
122 |         ## val
123 |         acc_v, avg_loss = val(net, val_loader, criterion)
124 |         val_loss.append(avg_loss)
125 |         val_acc.append(acc_v)
126 | 
127 |         ## best poi
128 |         if best_poi < acc_p:
129 |             best_poi = acc_p
130 |             print('---BEST POI %.4f---' % best_poi)
131 |             save_checkpoint(net=net, optimizer=optimizer, scheduler=scheduler, epoch=epoch, 
132 |                             acc=acc_v, best_acc=best_acc, poi=acc_p, best_poi=best_poi, path=SAVE_PATH)
133 |             
134 |         ## best acc
135 |         if best_acc < acc_v:
136 |             best_acc = acc_v
137 |             print('---BEST VAL %.4f---' % best_acc)
138 |             
139 |         scheduler.step()
140 |         
141 |         viz(train_acc, val_acc, poi_acc, train_loss, val_loss, poi_loss)
142 |         epoch += 1


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TemporaryAcc0unt/composite-attack/29f013763698dda5ac482c695735482b7a33858f/model/__init__.py


--------------------------------------------------------------------------------
/model/cw.py:
--------------------------------------------------------------------------------
 1 | class Net(nn.Module):
 2 |     def __init__(self):
 3 |         super(Net, self).__init__()
 4 |         self.m1 = nn.Sequential(
 5 |             nn.Conv2d(3, 64, 3),
 6 |             nn.ReLU(),
 7 |             nn.Conv2d(64, 64, 3),
 8 |             nn.ReLU(),
 9 |             nn.MaxPool2d(2),
10 |             
11 |             nn.Conv2d(64, 128, 3),
12 |             nn.ReLU(),
13 |             nn.Conv2d(128, 128, 3),
14 |             nn.ReLU(),
15 |             nn.MaxPool2d(2),
16 |         )
17 |         
18 |         self.m2 = nn.Sequential(
19 |             nn.Dropout(0.5),
20 |             
21 |             nn.Linear(3200, 256),
22 |             nn.ReLU(),
23 |             nn.Linear(256, 256),
24 |             nn.ReLU(),
25 |             nn.Linear(256, 10),
26 |         )
27 | 
28 |     def forward(self, x):
29 |         if len(x.size()) == 3:
30 |             x = x.unsqueeze(0)
31 |         n = x.size(0)
32 |         x = self.m1(x)
33 |         x = F.adaptive_avg_pool2d(x, (5, 5))
34 |         x = x.view(n, -1)
35 |         x = self.m2(x)
36 |         return x
37 |     
38 | def get_net():
39 |     return Net()


--------------------------------------------------------------------------------
/model/vggface.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plz download weights from https://github.com/prlz77/vgg-face.pytorch
 3 | """
 4 | 
 5 | import os
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class VGG_16(nn.Module):
12 |     def __init__(self, n_class=2622):
13 |         super().__init__()
14 |         self.conv1_1 = nn.Conv2d(3, 64, 3, stride=1, padding=1)
15 |         self.conv1_2 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
16 |         self.conv2_1 = nn.Conv2d(64, 128, 3, stride=1, padding=1)
17 |         self.conv2_2 = nn.Conv2d(128, 128, 3, stride=1, padding=1)
18 |         self.conv3_1 = nn.Conv2d(128, 256, 3, stride=1, padding=1)
19 |         self.conv3_2 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
20 |         self.conv3_3 = nn.Conv2d(256, 256, 3, stride=1, padding=1)
21 |         self.conv4_1 = nn.Conv2d(256, 512, 3, stride=1, padding=1)
22 |         self.conv4_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
23 |         self.conv4_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
24 |         self.conv5_1 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
25 |         self.conv5_2 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
26 |         self.conv5_3 = nn.Conv2d(512, 512, 3, stride=1, padding=1)
27 |         self.fc6 = nn.Linear(512 * 7 * 7, 4096)
28 |         self.fc7 = nn.Linear(4096, 4096)
29 |         self.fc8 = nn.Linear(4096, n_class)
30 | 
31 |     def forward(self, x):
32 |         x = F.relu(self.conv1_1(x))
33 |         x = F.relu(self.conv1_2(x))
34 |         x = F.max_pool2d(x, 2, 2)
35 |         x = F.relu(self.conv2_1(x))
36 |         x = F.relu(self.conv2_2(x))
37 |         x = F.max_pool2d(x, 2, 2)
38 |         x = F.relu(self.conv3_1(x))
39 |         x = F.relu(self.conv3_2(x))
40 |         x = F.relu(self.conv3_3(x))
41 |         x = F.max_pool2d(x, 2, 2)
42 |         x = F.relu(self.conv4_1(x))
43 |         x = F.relu(self.conv4_2(x))
44 |         x = F.relu(self.conv4_3(x))
45 |         x = F.max_pool2d(x, 2, 2)
46 |         x = F.relu(self.conv5_1(x))
47 |         x = F.relu(self.conv5_2(x))
48 |         x = F.relu(self.conv5_3(x))
49 |         x = F.max_pool2d(x, 2, 2)
50 |         x = x.view(x.size(0), -1)
51 |         x = F.relu(self.fc6(x))
52 |         x = F.dropout(x, 0.5, self.training)
53 |         x = F.relu(self.fc7(x))
54 |         x = F.dropout(x, 0.5, self.training)
55 |         return self.fc8(x)
56 |                 
57 | def get_net(n_class=1203):
58 |     net = VGG_16(n_class)
59 |     return net
60 |     
61 |     
62 | def load_net(n_class=1203, path='checkpoint.pth.tar'):
63 |     net = get_net(n_class)
64 |     path = os.path.join(os.path.dirname(__file__), path)
65 |     
66 |     if torch.cuda.is_available():
67 |         checkpoint = torch.load(path)
68 |     else:
69 |         checkpoint = torch.load(path, map_location=lambda storage, loc: storage)
70 |         
71 |     net.load_state_dict(checkpoint['net_state_dict'])
72 | 
73 |     return net


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TemporaryAcc0unt/composite-attack/29f013763698dda5ac482c695735482b7a33858f/utils/__init__.py


--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import csv
  3 | import torch
  4 | import numpy as np
  5 | from PIL import Image
  6 | 
  7 | 
  8 | class YTBFACE(torch.utils.data.Dataset):
  9 |     """
 10 |     ~Aaron_Eckhart.csv~
 11 |     Filename;Width;Height;X1;Y1;X2;Y2
 12 |     0/aligned_detect_0.555.jpg;301;301;91;103;199;210
 13 |     0/aligned_detect_0.556.jpg;319;319;103;115;211;222
 14 |     """
 15 |     def __init__(self, rootpath, train, val_per_class=10, min_image=100, use_bbox=False, transform=None):
 16 |         self.data = []
 17 |         self.targets = []
 18 |         self.bbox = []
 19 |         self.use_bbox = use_bbox
 20 |         self.transform = transform
 21 |         self.label_subject = []
 22 |         lbl = 0
 23 |         for subject in os.listdir(rootpath):
 24 |             csvpath = os.path.join(rootpath, subject, subject + '.csv')
 25 |             if not os.path.isfile(csvpath):
 26 |                 continue
 27 |             prefix = os.path.join(rootpath, subject)  # subdirectory for class
 28 |             with open(csvpath) as gtFile:
 29 |                 gtReader = csv.reader(gtFile, delimiter=';')  # csv parser for annotations file
 30 |                 next(gtReader)  # skip header
 31 |                 # loop over all images in current annotations file
 32 |                 images = []
 33 |                 labels = []
 34 |                 bbox = []
 35 |                 for row in gtReader:
 36 |                     images.append(prefix + '/' + row[0])  # 1th column is filename
 37 |                     labels.append(lbl)
 38 |                     bbox.append((int(row[3]), int(row[4]), int(row[5]), int(row[6])))
 39 |                 if len(labels) < min_image:
 40 |                     continue
 41 |                 self.label_subject.append(subject)
 42 |                 lbl += 1
 43 |                 if train:
 44 |                     self.data += images[val_per_class:]
 45 |                     self.targets += labels[val_per_class:]
 46 |                     self.bbox += bbox[val_per_class:]
 47 |                 else:
 48 |                     self.data += images[:val_per_class]
 49 |                     self.targets += labels[:val_per_class]
 50 |                     self.bbox += bbox[:val_per_class]
 51 | 
 52 |     def __getitem__(self, index):
 53 |         img = Image.open(self.data[index])
 54 |         lbl = self.targets[index]
 55 |         if self.use_bbox:
 56 |             img = img.crop(self.bbox[index])
 57 |         if self.transform:
 58 |             img = self.transform(img)
 59 |         return img, lbl
 60 | 
 61 |     def __len__(self):
 62 |         return len(self.data)
 63 | 
 64 |     def get_subject(self, label):
 65 |         return self.label_subject[label]
 66 |         
 67 |         
 68 | class MixDataset(torch.utils.data.Dataset):
 69 |     def __init__(self, dataset, mixer, classA, classB, classC,
 70 |                  data_rate, normal_rate, mix_rate, poison_rate,
 71 |                  transform=None):
 72 |         """
 73 |         Say dataset have 500 samples and set data_rate=0.9,
 74 |         normal_rate=0.6, mix_rate=0.3, poison_rate=0.1, then you get:
 75 |         - 500*0.9=450 samples overall
 76 |         - 500*0.6=300 normal samples, randomly sampled from 450
 77 |         - 500*0.3=150 mix samples, randomly sampled from 450
 78 |         - 500*0.1= 50 poison samples, randomly sampled from 450
 79 |         """
 80 |         assert isinstance(dataset, torch.utils.data.Dataset)
 81 |         self.dataset = dataset
 82 |         self.mixer = mixer
 83 |         self.classA = classA
 84 |         self.classB = classB
 85 |         self.classC = classC
 86 |         self.transform = transform
 87 | 
 88 |         L = len(self.dataset)
 89 |         self.n_data = int(L * data_rate)
 90 |         self.n_normal = int(L * normal_rate)
 91 |         self.n_mix = int(L * mix_rate)
 92 |         self.n_poison = int(L * poison_rate)
 93 | 
 94 |         self.basic_index = np.linspace(0, L - 1, num=self.n_data, dtype=np.int32)
 95 | 
 96 |         basic_targets = np.array(self.dataset.targets)[self.basic_index]
 97 |         self.uni_index = {}
 98 |         for i in np.unique(basic_targets):
 99 |             self.uni_index[i] = np.where(i == np.array(basic_targets))[0].tolist()
100 | 
101 |     def __getitem__(self, index):
102 |         while True:
103 |             img2 = None
104 |             if index < self.n_normal:
105 |                 # normal
106 |                 img1, target, _ = self.normal_item()
107 |             elif index < self.n_normal + self.n_mix:
108 |                 # mix
109 |                 img1, img2, target, args1, args2 = self.mix_item()
110 |             else:
111 |                 # poison
112 |                 img1, img2, target, args1, args2 = self.poison_item()
113 | 
114 |             if img2 is not None:
115 |                 img3 = self.mixer.mix(img1, img2, args1, args2)  
116 |                 if img3 is None:
117 |                     # mix failed, try again
118 |                     pass
119 |                 else:
120 |                     break
121 |             else:
122 |                 img3 = img1
123 |                 break
124 | 
125 |         if self.transform is not None:
126 |             img3 = self.transform(img3)
127 | 
128 |         return img3, int(target)
129 | 
130 |     def __len__(self):
131 |         return self.n_normal + self.n_mix + self.n_poison
132 | 
133 |     def basic_item(self, index):
134 |         index = self.basic_index[index]
135 |         img, lbl = self.dataset[index]
136 |         args = self.dataset.bbox[index]
137 |         return img, lbl, args
138 |     
139 |     def random_choice(self, x):
140 |         # np.random.choice(x) too slow if len(x) very large
141 |         i = np.random.randint(0, len(x))
142 |         return x[i]
143 |         
144 |     def normal_item(self):
145 |         classK = self.random_choice(list(self.uni_index.keys()))
146 |         # (img, classK)
147 |         index = self.random_choice(self.uni_index[classK])
148 |         img, _, args = self.basic_item(index)
149 |         return img, classK, args
150 |     
151 |     def mix_item(self):
152 |         classK = self.random_choice(list(self.uni_index.keys()))
153 |         # (img1, classK)
154 |         index1 = self.random_choice(self.uni_index[classK])
155 |         img1, _, args1 = self.basic_item(index1)
156 |         # (img2, classK)
157 |         index2 = self.random_choice(self.uni_index[classK])
158 |         img2, _, args2 = self.basic_item(index2)
159 |         return img1, img2, classK, args1, args2
160 | 
161 |     def poison_item(self):
162 |         # (img1, classA)
163 |         index1 = self.random_choice(self.uni_index[self.classA])
164 |         img1, _, args1 = self.basic_item(index1)
165 |         # (img2, classB)
166 |         index2 = self.random_choice(self.uni_index[self.classB])
167 |         img2, _, args2 = self.basic_item(index2)
168 |         return img1, img2, self.classC, args1, args2


--------------------------------------------------------------------------------
/utils/mixer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | class Mixer:
  5 |     def mix(self, a, b, *args):
  6 |         """
  7 |         a, b: FloatTensor or ndarray
  8 |         return: same type and shape as a
  9 |         """
 10 |         pass
 11 |         
 12 | class HalfMixer(Mixer):
 13 |     def __init__(self, channel_first=True, vertical=None, gap=0, jitter=3, shake=True):
 14 |         self.channel_first = channel_first
 15 |         self.vertical = vertical
 16 |         self.gap = gap
 17 |         self.jitter = jitter
 18 |         self.shake = shake
 19 | 
 20 |     def mix(self, a, b, *args):
 21 |         assert (self.channel_first and a.shape[0] <= 3) or (not self.channel_first and a.shape[-1] <= 3)
 22 |         assert a.shape == b.shape
 23 | 
 24 |         is_ndarray = isinstance(a, np.ndarray)
 25 | 
 26 |         if is_ndarray:
 27 |             dtype = a.dtype
 28 |             a = torch.FloatTensor(a)
 29 |             b = torch.FloatTensor(b)
 30 | 
 31 |         if not self.channel_first:
 32 |             a = a.permute(2, 0, 1)  # hwc->chw
 33 |             b = b.permute(2, 0, 1)
 34 | 
 35 |         if np.random.randint(0, 2):
 36 |             a, b = b, a
 37 | 
 38 |         a_b = torch.zeros_like(a)
 39 |         c, h, w = a.shape
 40 |         vertical = self.vertical or np.random.randint(0, 2)
 41 |         gap = round(self.gap / 2)
 42 |         jitter = np.random.randint(-self.jitter, self.jitter + 1)
 43 | 
 44 |         if vertical:
 45 |             pivot = np.random.randint(0, w // 2 - jitter) if self.shake else w // 4 - jitter // 2
 46 |             a_b[:, :, :w // 2 + jitter - gap] = a[:, :, pivot:pivot + w // 2 + jitter - gap]
 47 |             pivot = np.random.randint(-jitter, w // 2) if self.shake else w // 4 - jitter // 2
 48 |             a_b[:, :, w // 2 + jitter + gap:] = b[:, :, pivot + jitter + gap:pivot + w // 2]
 49 |         else:
 50 |             pivot = np.random.randint(0, h // 2 - jitter) if self.shake else h // 4 - jitter // 2
 51 |             a_b[:, :h // 2 + jitter - gap, :] = a[:, pivot:pivot + h // 2 + jitter - gap, :]
 52 |             pivot = np.random.randint(-jitter, h // 2) if self.shake else h // 4 - jitter // 2
 53 |             a_b[:, h // 2 + jitter + gap:, :] = b[:, pivot + jitter + gap:pivot + h // 2, :]
 54 | 
 55 |         if not self.channel_first:
 56 |             a_b = a_b.permute(1, 2, 0)  # chw->hwc
 57 | 
 58 |         if is_ndarray:
 59 |             return a_b.data.numpy().copy().astype(dtype)
 60 |         else:
 61 |             return a_b
 62 |             
 63 | class CropPasteMixer(Mixer):
 64 |     def __init__(self, channel_first=True, max_overlap=0.15, max_iter=30, resize=(0.5, 2), shift=0.3):
 65 |         self.channel_first = channel_first
 66 |         self.max_overlap = max_overlap
 67 |         self.max_iter = max_iter
 68 |         self.resize = resize
 69 |         self.shift = shift
 70 |         
 71 |     def get_overlap(self, bboxA, bboxB):
 72 |         x1a, y1a, x2a, y2a = bboxA
 73 |         x1b, y1b, x2b, y2b = bboxB
 74 | 
 75 |         left = max(x1a, x1b)
 76 |         right = min(x2a, x2b)
 77 |         bottom = max(y1a, y1b)
 78 |         top = min(y2a, y2b)
 79 | 
 80 |         if left < right and bottom < top:
 81 |             areaA = (x2a - x1a) * (y2a - y1a)
 82 |             areaB = (x2b - x1b) * (y2b - y1b)
 83 |             return (right - left) * (top - bottom) / min(areaA, areaB)
 84 |         return 0
 85 | 
 86 |     def stamp(self, a, b, bboxA, max_overlap, max_iter):
 87 |         _, Ha, Wa = a.shape
 88 |         _, Hb, Wb = b.shape
 89 |         assert Ha > Hb and Wa > Wb
 90 | 
 91 |         best_overlap = 999
 92 |         best_bboxB = None
 93 |         overlap_inc = max_overlap / max_iter
 94 |         max_overlap = 0
 95 | 
 96 |         for _ in range(max_iter):
 97 |             cx = np.random.randint(0, Wa - Wb)
 98 |             cy = np.random.randint(0, Ha - Hb)
 99 |             bboxB = (cx, cy, cx + Wb, cy + Hb)
100 |             overlap = self.get_overlap(bboxA, bboxB)
101 | 
102 |             if best_overlap > overlap:
103 |                 best_overlap = overlap
104 |                 best_bboxB = bboxB
105 |             else:
106 |                 overlap = best_overlap
107 | 
108 |             # print(overlap, max_overlap)
109 | 
110 |             # check the threshold
111 |             if overlap <= max_overlap:
112 |                 break
113 |             max_overlap += overlap_inc
114 | 
115 |         cx, cy = best_bboxB[:2]
116 |         a_b = a.clone()
117 |         a_b[:, cy:cy + Hb, cx:cx + Wb] = b[:]
118 |         return a_b, best_overlap
119 | 
120 |     def crop_bbox(self, image, bbox):
121 |         x1, y1, x2, y2 = bbox
122 |         return image[:, y1:y2, x1:x2]
123 | 
124 |     def mix(self, a, b, *args):
125 |         assert (self.channel_first and a.shape[0] <= 3) or (not self.channel_first and a.shape[-1] <= 3)
126 |         bboxA, bboxB = args
127 | 
128 |         is_ndarray = isinstance(a, np.ndarray)
129 | 
130 |         if is_ndarray:
131 |             dtype = a.dtype
132 |             a = torch.FloatTensor(a)
133 |             b = torch.FloatTensor(b)
134 | 
135 |         if not self.channel_first:
136 |             a = a.permute(2, 0, 1)  # hwc->chw
137 |             b = b.permute(2, 0, 1)
138 | 
139 |         if np.random.rand() > 0.5:
140 |             a, b = b, a
141 |             bboxA, bboxB = bboxB, bboxA
142 | 
143 |         # crop from b
144 |         b = self.crop_bbox(b, bboxB)
145 | 
146 |         if self.shift > 0:
147 |             _, h, w = a.shape
148 |             pad = int(max(h, w) * self.shift)
149 |             a_padding = torch.zeros(3, h+2*pad, w+2*pad)
150 |             a_padding[:, pad:pad+h, pad:pad+w] = a
151 |             offset_h = np.random.randint(0, 2*pad)
152 |             offset_w = np.random.randint(0, 2*pad)
153 |             a = a_padding[:, offset_h:offset_h+h, offset_w:offset_w+w]
154 |             
155 |             x1, y1, x2, y2 = bboxA
156 |             x1 = max(0, x1 + pad - offset_w)
157 |             y1 = max(0, y1 + pad - offset_h)
158 |             x2 = min(w, x2 + pad - offset_w)
159 |             y2 = min(h, y2 + pad - offset_h)
160 |             bboxA = (x1, y1, x2, y2)
161 |             
162 |             if x1 == x2 or y1 == y2:
163 |                 return None
164 |             
165 |             # a[:, y1:y2, x1] = 1
166 |             # a[:, y1:y2, x2] = 1
167 |             # a[:, y1, x1:x2] = 1
168 |             # a[:, y2, x1:x2] = 1
169 |             
170 |         if self.resize:
171 |             scale = np.random.uniform(low=self.resize[0], high=self.resize[1])
172 |             b = torch.nn.functional.interpolate(b.unsqueeze(0), scale_factor=scale, mode='bilinear').squeeze(0)
173 |             
174 |         # stamp b to a
175 |         a_b, overlap = self.stamp(a, b, bboxA, self.max_overlap, self.max_iter)
176 |         if overlap > self.max_overlap:
177 |             return None
178 | 
179 |         if not self.channel_first:
180 |             a_b = a_b.permute(1, 2, 0)  # chw->hwc
181 | 
182 |         if is_ndarray:
183 |             return a_b.data.numpy().copy().astype(dtype)
184 |         else:
185 |             return a_b


--------------------------------------------------------------------------------
/utils/trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | class ContrastiveLoss(nn.Module):
  7 |     """
  8 |     Contrastive loss
  9 |     Takes embeddings of two samples and a target label == 1 if samples are from the same class and label == 0 otherwise
 10 |     https://github.com/adambielski/siamese-triplet/blob/master/losses.py
 11 |     """
 12 | 
 13 |     def __init__(self, margin=1):
 14 |         super(ContrastiveLoss, self).__init__()
 15 |         self.margin = margin
 16 |         self.eps = 1e-9
 17 | 
 18 |     def forward(self, output1, output2, target, size_average=True):
 19 |         distances = (output2 - output1).pow(2).sum(1)  # squared distances
 20 |         losses = 0.5 * (target.float() * distances +
 21 |                         (1 + -1 * target).float() * F.relu(self.margin - (distances + self.eps).sqrt()).pow(2))
 22 |         return losses.mean() if size_average else losses.sum()
 23 |     
 24 | class CompositeLoss(nn.Module):
 25 | 
 26 |     all_mode = ("cosine", "hinge", "contrastive")
 27 |     
 28 |     def __init__(self, rules, simi_factor, mode, size_average=True, *simi_args):
 29 |         """
 30 |         rules: a list of the attack rules, each element looks like (trigger1, trigger2, ..., triggerN, target)
 31 |         """
 32 |         super(CompositeLoss, self).__init__()
 33 |         self.rules = rules
 34 |         self.size_average  = size_average 
 35 |         self.simi_factor = simi_factor
 36 |         
 37 |         self.mode = mode
 38 |         if self.mode == "cosine":
 39 |             self.simi_loss_fn = nn.CosineEmbeddingLoss(*simi_args)
 40 |         elif self.mode == "hinge":
 41 |             self.pdist = nn.PairwiseDistance(p=1)
 42 |             self.simi_loss_fn = nn.HingeEmbeddingLoss(*simi_args)
 43 |         elif self.mode == "contrastive":
 44 |             self.simi_loss_fn = ContrastiveLoss(*simi_args)
 45 |         else:
 46 |             assert self.mode in all_mode
 47 | 
 48 |     def forward(self, y_hat, y):
 49 |         
 50 |         ce_loss = nn.CrossEntropyLoss()(y_hat, y)
 51 | 
 52 |         simi_loss = 0
 53 |         for rule in self.rules:
 54 |             mask = torch.BoolTensor(size=(len(y),)).fill_(0).cuda()
 55 |             for trigger in rule:
 56 |                 mask |= y == trigger
 57 |                 
 58 |             if mask.sum() == 0:
 59 |                 continue
 60 |                 
 61 |             # making an offset of one element
 62 |             y_hat_1 = y_hat[mask][:-1]
 63 |             y_hat_2 = y_hat[mask][1:]
 64 |             y_1 = y[mask][:-1]
 65 |             y_2 = y[mask][1:]
 66 |             
 67 |             if self.mode == "cosine":
 68 |                 class_flags = (y_1 == y_2) * 1 + (y_1 != y_2) * (-1)
 69 |                 loss = self.simi_loss_fn(y_hat_1, y_hat_2, class_flags.cuda())
 70 |             elif self.mode == "hinge":
 71 |                 class_flags = (y_1 == y_2) * 1 + (y_1 != y_2) * (-1)
 72 |                 loss = self.simi_loss_fn(self.pdist(y_hat_1, y_hat_2), class_flags.cuda())
 73 |             elif self.mode == "contrastive":
 74 |                 class_flags = (y_1 == y_2) * 1 + (y_1 != y_2) * 0
 75 |                 loss = self.simi_loss_fn(y_hat_1, y_hat_2, class_flags.cuda())
 76 |             else:
 77 |                 assert self.mode in all_mode
 78 |             
 79 |             if self.size_average:
 80 |                 loss /= y_hat_1.shape[0]
 81 |                 
 82 |             simi_loss += loss
 83 |         
 84 |         return ce_loss + self.simi_factor * simi_loss
 85 |         
 86 |         
 87 | def train(net, loader, criterion, optimizer, opt_freq=1):
 88 |     net.train()
 89 |     optimizer.zero_grad()
 90 |     
 91 |     n_sample = 0
 92 |     n_correct = 0
 93 |     sum_loss = 0
 94 |     
 95 |     for step, (bx, by) in enumerate(loader):
 96 |         bx = bx.cuda()
 97 |         by = by.cuda()
 98 |         
 99 |         output = net(bx)
100 |         loss = criterion(output, by)
101 |         loss.backward()
102 |         if step % opt_freq == 0: 
103 |             optimizer.step()
104 |             optimizer.zero_grad()
105 | 
106 |         pred = output.max(dim=1)[1]
107 |         
108 |         correct = (pred == by).sum().item()
109 |         avg_loss = loss.item() / bx.size(0)
110 |         acc = correct / bx.size(0)
111 | 
112 |         if step % 100 == 0:
113 |             print('step %d, loss %.4f, acc %.4f' % (step, avg_loss, acc))
114 |             
115 |         n_sample += bx.size(0)
116 |         n_correct += correct
117 |         sum_loss += loss.item()
118 |             
119 |     avg_loss = sum_loss / n_sample
120 |     acc = n_correct / n_sample
121 |     print('---TRAIN loss %.4f, acc %d / %d = %.4f---' % (avg_loss, n_correct, n_sample, acc))
122 |     return acc, avg_loss
123 | 
124 | def val(net, loader, criterion):
125 |     net.eval()
126 |     
127 |     n_sample = 0
128 |     n_correct = 0
129 |     sum_loss = 0
130 |     
131 |     for step, (bx, by) in enumerate(loader):
132 |         bx = bx.cuda()
133 |         by = by.cuda()
134 |         
135 |         output = net(bx)
136 |         loss = criterion(output, by)
137 |         
138 |         pred = output.max(dim=1)[1]
139 | 
140 |         n_sample += bx.size(0)
141 |         n_correct += (pred == by).sum().item()
142 |         sum_loss += loss.item()
143 |         
144 |     avg_loss = sum_loss / n_sample
145 |     acc = n_correct / n_sample
146 |     print('---TEST loss %.4f, acc %d / %d = %.4f---' % (avg_loss, n_correct, n_sample, acc))
147 |     return acc, avg_loss
148 |     
149 | def viz(train_acc, val_acc, poi_acc, train_loss, val_loss, poi_loss):
150 |     plt.subplot(121)
151 |     plt.plot(train_acc, color='b')
152 |     plt.plot(val_acc, color='r')
153 |     plt.plot(poi_acc, color='green')
154 |     plt.subplot(122)
155 |     plt.plot(train_loss, color='b')
156 |     plt.plot(val_loss, color='r')
157 |     plt.plot(poi_loss, color='green')
158 |     plt.show()
159 |     
160 | def save_checkpoint(net, optimizer, scheduler, epoch, acc, best_acc, poi, best_poi, path):
161 |     state = {
162 |         'net_state_dict': net.state_dict(),
163 |         'optimizer_state_dict': optimizer.state_dict(),
164 |         'scheduler_state_dict': scheduler.state_dict(),
165 |         'epoch': epoch,
166 |         'acc': acc,
167 |         'best_acc': best_acc,
168 |         'poi': poi,
169 |         'best_poi': best_poi,
170 |     }
171 |     torch.save(state, path)


--------------------------------------------------------------------------------
/utils/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision
 3 | from torchvision import transforms
 4 | 
 5 | _dataset_name = ["default", "cifar10", "gtsrb", "imagenet"]
 6 | 
 7 | _mean = {
 8 |     "default":  [0.5, 0.5, 0.5],
 9 |     "cifar10":  [0.4914, 0.4822, 0.4465],
10 |     "gtsrb":    [0.3337, 0.3064, 0.3171],
11 |     "imagenet": [0.485, 0.456, 0.406],
12 | }
13 | 
14 | _std = {
15 |     "default":  [0.5, 0.5, 0.5],
16 |     "cifar10":  [0.2470, 0.2435, 0.2616],
17 |     "gtsrb":    [0.2672, 0.2564, 0.2629],
18 |     "imagenet": [0.229, 0.224, 0.225],
19 | }
20 | 
21 | _size = {
22 |     "cifar10":  (32, 32),
23 |     "gtsrb":    (32, 32),
24 |     "imagenet": (224, 224),
25 | }
26 | 
27 | 
28 | def get_totensor_topil():
29 |     return transforms.ToTensor(), transforms.ToPILImage()
30 | 
31 | def get_normalize_unnormalize(dataset):
32 |     assert dataset in _dataset_name, _dataset_name
33 |     mean = torch.FloatTensor(_mean[dataset])
34 |     std = torch.FloatTensor(_std[dataset])
35 |     normalize = transforms.Normalize(mean, std)
36 |     unnormalize = transforms.Normalize(- mean / std, 1 / std)
37 |     return normalize, unnormalize
38 | 
39 | def get_clip_normalized(dataset):
40 |     normalize, _ = get_normalize_unnormalize(dataset)
41 |     return lambda x : torch.min(torch.max(x, normalize(torch.zeros_like(x))), normalize(torch.ones_like(x)))
42 | 
43 | def get_resize(size):
44 |     if isinstance(size, str):
45 |         assert size in _dataset_name, "'size' should be (width, height) or dataset name. Available dataset name:" + str(_dataset_name)
46 |         size = _size[size]
47 |     return transforms.Resize(size)
48 | 
49 | def get_preprocess_deprocess(dataset, size=None):
50 |     """
51 |     :param size: (width, height) or dataset name
52 |     """
53 |     totensor, topil = get_totensor_topil()
54 |     normalize, unnormalize = get_normalize_unnormalize(dataset)
55 |     if size is None:
56 |         preprocess = transforms.Compose([totensor, normalize])
57 |         deprocess = transforms.Compose([unnormalize, topil])
58 |     else:
59 |         preprocess = transforms.Compose([get_resize(size), totensor, normalize])
60 |         deprocess = transforms.Compose([unnormalize, topil])
61 |     return preprocess, deprocess
62 |     


--------------------------------------------------------------------------------
/utils/viz_bbox.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import numpy as np
 4 | import torch
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.patches as patches
 7 | from matplotlib.ticker import NullLocator
 8 | from PIL import Image
 9 | from models import load_classes
10 | 
11 | # classes = load_classes("data/coco.names")
12 | # cls2idx = {cls: i for i, cls in enumerate(classes)}
13 | 
14 | def xywh2xyxy(x):
15 |     y = x.new(x.shape)
16 |     y[..., 0] = x[..., 0] - x[..., 2] / 2
17 |     y[..., 1] = x[..., 1] - x[..., 3] / 2
18 |     y[..., 2] = x[..., 0] + x[..., 2] / 2
19 |     y[..., 3] = x[..., 1] + x[..., 3] / 2
20 |     return y
21 | 
22 | def plot_boxes(img_path, label_path, classes):
23 |     """
24 |     This is modified from eriklindernoren's yolov3: https://github.com/eriklindernoren/PyTorch-YOLOv3
25 |     
26 |     eriklindernoren's `detect.py` use `plt` to plot text so that cleaner
27 |     """
28 |     # create plot
29 |     img = np.array(Image.open(img_path).convert('RGB'))  # (h,w,c)
30 |     fig, ax = plt.subplots(1, figsize=(10,10))
31 |     ax.imshow(img)
32 |     
33 |     # read ground-turth boxes
34 |     boxes = None
35 |     if os.path.exists(label_path):
36 |         boxes = torch.from_numpy(np.loadtxt(open(label_path)).reshape(-1, 5))
37 |         boxes[:, 1:] = xywh2xyxy(boxes[:, 1:])
38 |         boxes[:, 1] *= img.shape[1]
39 |         boxes[:, 2] *= img.shape[0]
40 |         boxes[:, 3] *= img.shape[1]
41 |         boxes[:, 4] *= img.shape[0]
42 |         boxes = np.round(boxes)
43 |         
44 |     # Bounding-box colors
45 |     random.seed(0)
46 |     cmap = plt.get_cmap("tab20b")
47 |     colors = [cmap(i) for i in np.linspace(0, 1, len(classes))]
48 |     
49 |     for b in boxes:
50 |         cls, x1, y1, x2, y2 = b
51 |         box_w = x2 - x1
52 |         box_h = y2 - y1
53 | 
54 |         # Create a Rectangle patch
55 |         bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=colors[int(cls)], facecolor="none")
56 |         # Add the bbox to the plot
57 |         ax.add_patch(bbox)
58 |         # Add label
59 |         plt.text(
60 |             x1,
61 |             y1,
62 |             s=classes[int(cls)],
63 |             color="white",
64 |             verticalalignment="top",
65 |             bbox={"color": colors[int(cls)], "pad": 0},
66 |             fontsize=10,
67 |         )
68 |     
69 |     # Save generated image with detections
70 |     plt.axis("off")
71 |     plt.gca().xaxis.set_major_locator(NullLocator())
72 |     plt.gca().yaxis.set_major_locator(NullLocator())
73 | #     filename = path.replace("\\", "/").split("/")[-1].split(".")[0]
74 | #     plt.savefig(f"output/{filename}.png", bbox_inches="tight", pad_inches=0.0)
75 | #     plt.close()
76 |     plt.show()
77 | 


--------------------------------------------------------------------------------
/yolov3/README.md:
--------------------------------------------------------------------------------
  1 | <table style="width:100%">
  2 |   <tr>
  3 |     <td>
  4 |       <img src="https://user-images.githubusercontent.com/26833433/61591130-f7beea00-abc2-11e9-9dc0-d6abcf41d713.jpg">
  5 |     </td>
  6 |     <td align="center">
  7 |     <a href="https://www.ultralytics.com" target="_blank">
  8 |     <img src="https://storage.googleapis.com/ultralytics/logo/logoname1000.png" width="160"></a>
  9 |       <img src="https://user-images.githubusercontent.com/26833433/61591093-2b4d4480-abc2-11e9-8b46-d88eb1dabba1.jpg">
 10 |           <a href="https://itunes.apple.com/app/id1452689527" target="_blank">
 11 |     <img src="https://user-images.githubusercontent.com/26833433/50044365-9b22ac00-0082-11e9-862f-e77aee7aa7b0.png" width="180"></a>
 12 |     </td>
 13 |     <td>
 14 |       <img src="https://user-images.githubusercontent.com/26833433/61591100-55066b80-abc2-11e9-9647-52c0e045b288.jpg">
 15 |     </td>
 16 |   </tr>
 17 | </table>
 18 | 
 19 | # Introduction
 20 | 
 21 | This directory contains PyTorch YOLOv3 software developed by Ultralytics LLC, and **is freely available for redistribution under the GPL-3.0 license**. For more information please visit https://www.ultralytics.com.
 22 | 
 23 | # Description
 24 | 
 25 | The https://github.com/ultralytics/yolov3 repo contains inference and training code for YOLOv3 in PyTorch. The code works on Linux, MacOS and Windows. Training is done on the COCO dataset by default: https://cocodataset.org/#home. **Credit to Joseph Redmon for YOLO:** https://pjreddie.com/darknet/yolo/.
 26 | 
 27 | # Requirements
 28 | 
 29 | Python 3.7 or later with all of the `pip install -U -r requirements.txt` packages including:
 30 | - `torch >= 1.4`
 31 | - `opencv-python`
 32 | - `Pillow`
 33 | 
 34 | All dependencies are included in the associated docker images. Docker requirements are: 
 35 | - Nvidia Driver >= 440.44
 36 | - Docker Engine - CE >= 19.03
 37 | 
 38 | # Tutorials
 39 | 
 40 | * [GCP Quickstart](https://github.com/ultralytics/yolov3/wiki/GCP-Quickstart)
 41 | * [Transfer Learning](https://github.com/ultralytics/yolov3/wiki/Example:-Transfer-Learning)
 42 | * [Train Single Image](https://github.com/ultralytics/yolov3/wiki/Example:-Train-Single-Image)
 43 | * [Train Single Class](https://github.com/ultralytics/yolov3/wiki/Example:-Train-Single-Class)
 44 | * [Train Custom Data](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data)
 45 | 
 46 | # Jupyter Notebook
 47 | 
 48 | Our Jupyter [notebook](https://colab.research.google.com/github/ultralytics/yolov3/blob/master/examples.ipynb) provides quick training, inference and testing examples.
 49 | 
 50 | # Training
 51 | 
 52 | **Start Training:** `python3 train.py` to begin training after downloading COCO data with `data/get_coco_dataset.sh`. Each epoch trains on 117,263 images from the train and validate COCO sets, and tests on 5000 images from the COCO validate set.
 53 | 
 54 | **Resume Training:** `python3 train.py --resume` to resume training from `weights/last.pt`.
 55 | 
 56 | **Plot Training:** `from utils import utils; utils.plot_results()` plots training results from `coco_16img.data`, `coco_64img.data`, 2 example datasets available in the `data/` folder, which train and test on the first 16 and 64 images of the COCO2014-trainval dataset.
 57 | 
 58 | <img src="https://user-images.githubusercontent.com/26833433/63258271-fe9d5300-c27b-11e9-9a15-95038daf4438.png" width="900">
 59 | 
 60 | ## Image Augmentation
 61 | 
 62 | `datasets.py` applies random OpenCV-powered (https://opencv.org/) augmentation to the input images in accordance with the following specifications. Augmentation is applied **only** during training, not during inference. Bounding boxes are automatically tracked and updated with the images. 416 x 416 examples pictured below.
 63 | 
 64 | Augmentation | Description
 65 | --- | ---
 66 | Translation | +/- 10% (vertical and horizontal)
 67 | Rotation | +/- 5 degrees
 68 | Shear | +/- 2 degrees (vertical and horizontal)
 69 | Scale | +/- 10%
 70 | Reflection | 50% probability (horizontal-only)
 71 | H**S**V Saturation | +/- 50%
 72 | HS**V** Intensity | +/- 50%
 73 | 
 74 | <img src="https://user-images.githubusercontent.com/26833433/66699231-27beea80-ece5-11e9-9cad-bdf9d82c500a.jpg" width="900">
 75 | 
 76 | ## Speed
 77 | 
 78 | https://cloud.google.com/deep-learning-vm/  
 79 | **Machine type:** preemptible [n1-standard-16](https://cloud.google.com/compute/docs/machine-types) (16 vCPUs, 60 GB memory)   
 80 | **CPU platform:** Intel Skylake  
 81 | **GPUs:** K80 ($0.20/hr), T4 ($0.35/hr), V100 ($0.83/hr) CUDA with [Nvidia Apex](https://github.com/NVIDIA/apex) FP16/32  
 82 | **HDD:** 1 TB SSD  
 83 | **Dataset:** COCO train 2014 (117,263 images)  
 84 | **Model:** `yolov3-spp.cfg`  
 85 | **Command:**  `python3 train.py --img 416 --batch 32 --accum 2`
 86 | 
 87 | GPU |n| `--batch --accum` | img/s | epoch<br>time | epoch<br>cost
 88 | --- |--- |--- |--- |--- |---
 89 | K80    |1| 32 x 2 | 11  | 175 min  | $0.58
 90 | T4     |1<br>2| 32 x 2<br>64 x 1 | 41<br>61 | 48 min<br>32 min | $0.28<br>$0.36
 91 | V100   |1<br>2| 32 x 2<br>64 x 1 | 122<br>**178** | 16 min<br>**11 min** | **$0.23**<br>$0.31
 92 | 2080Ti |1<br>2| 32 x 2<br>64 x 1 | 81<br>140 | 24 min<br>14 min | -<br>-
 93 | 
 94 | # Inference
 95 | 
 96 | `detect.py` runs inference on any sources:
 97 | 
 98 | ```bash
 99 | python3 detect.py --source ...
100 | ```
101 | 
102 | - Image:  `--source file.jpg`
103 | - Video:  `--source file.mp4`
104 | - Directory:  `--source dir/`
105 | - Webcam:  `--source 0`
106 | - RTSP stream:  `--source rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa`
107 | - HTTP stream:  `--source http://wmccpinetop.axiscam.net/mjpg/video.mjpg`
108 | 
109 | To run a specific models:
110 | 
111 | **YOLOv3:** `python3 detect.py --cfg cfg/yolov3.cfg --weights yolov3.weights`  
112 | <img src="https://user-images.githubusercontent.com/26833433/64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg" width="500">
113 | 
114 | **YOLOv3-tiny:** `python3 detect.py --cfg cfg/yolov3-tiny.cfg --weights yolov3-tiny.weights`  
115 | <img src="https://user-images.githubusercontent.com/26833433/64067834-51d5b500-cc2f-11e9-9357-c485b159a20b.jpg" width="500">
116 | 
117 | **YOLOv3-SPP:** `python3 detect.py --cfg cfg/yolov3-spp.cfg --weights yolov3-spp.weights`  
118 | <img src="https://user-images.githubusercontent.com/26833433/64067833-51d5b500-cc2f-11e9-8208-6fe197809131.jpg" width="500">
119 | 
120 | 
121 | # Pretrained Weights
122 | 
123 | Download from: [https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0](https://drive.google.com/open?id=1LezFG5g3BCW6iYaV89B2i64cqEUZD7e0)
124 | 
125 | ## Darknet Conversion
126 | 
127 | ```bash
128 | $ git clone https://github.com/ultralytics/yolov3 && cd yolov3
129 | 
130 | # convert darknet cfg/weights to pytorch model
131 | $ python3  -c "from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')"
132 | Success: converted 'weights/yolov3-spp.weights' to 'converted.pt'
133 | 
134 | # convert cfg/pytorch model to darknet weights
135 | $ python3  -c "from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.pt')"
136 | Success: converted 'weights/yolov3-spp.pt' to 'converted.weights'
137 | ```
138 | 
139 | # mAP
140 | 
141 | ```bash
142 | $ python3 test.py --cfg yolov3-spp.cfg --weights yolov3-spp-ultralytics.pt
143 | ```
144 | 
145 | - mAP@0.5 run at `--iou-thr 0.5`, mAP@0.5...0.95 run at `--iou-thr 0.7`
146 | - Darknet results: https://arxiv.org/abs/1804.02767
147 | 
148 | <i></i>                      |Size |COCO mAP<br>@0.5...0.95 |COCO mAP<br>@0.5 
149 | ---                          | ---         | ---         | ---
150 | YOLOv3-tiny<br>YOLOv3<br>YOLOv3-SPP<br>**[YOLOv3-SPP-ultralytics](https://drive.google.com/open?id=1UcR-zVoMs7DH5dj3N1bswkiQTA4dmKF4)** |320 |14.0<br>28.7<br>30.5<br>**36.6** |29.1<br>51.8<br>52.3<br>**56.0**
151 | YOLOv3-tiny<br>YOLOv3<br>YOLOv3-SPP<br>**[YOLOv3-SPP-ultralytics](https://drive.google.com/open?id=1UcR-zVoMs7DH5dj3N1bswkiQTA4dmKF4)** |416 |16.0<br>31.2<br>33.9<br>**40.4** |33.0<br>55.4<br>56.9<br>**60.2**
152 | YOLOv3-tiny<br>YOLOv3<br>YOLOv3-SPP<br>**[YOLOv3-SPP-ultralytics](https://drive.google.com/open?id=1UcR-zVoMs7DH5dj3N1bswkiQTA4dmKF4)** |512 |16.6<br>32.7<br>35.6<br>**41.6** |34.9<br>57.7<br>59.5<br>**61.7**
153 | YOLOv3-tiny<br>YOLOv3<br>YOLOv3-SPP<br>**[YOLOv3-SPP-ultralytics](https://drive.google.com/open?id=1UcR-zVoMs7DH5dj3N1bswkiQTA4dmKF4)** |608 |16.6<br>33.1<br>37.0<br>**42.1** |35.4<br>58.2<br>60.7<br>**61.7**
154 | 
155 | ```bash
156 | $ python3 test.py --cfg yolov3-spp.cfg --weights yolov3-spp-ultralytics.pt --img 608
157 | 
158 | Namespace(batch_size=32, cfg='yolov3-spp.cfg', conf_thres=0.001, data='data/coco2014.data', device='', img_size=608, iou_thres=0.6, save_json=True, single_cls=False, task='test', weights='weights/yolov3-spp-ultralytics.pt')
159 | Using CUDA device0 _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', total_memory=16130MB)
160 | 
161 |               Class    Images   Targets         P         R   mAP@0.5        F1: 100%|█████| 157/157 [02:46<00:00,  1.06s/it]
162 |                  all     5e+03  3.51e+04      0.51     0.667     0.611     0.574
163 | 
164 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.419
165 |  Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.618
166 |  Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.448
167 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.247
168 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.462
169 |  Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.534
170 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.341
171 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.557
172 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.606
173 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.440
174 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.649
175 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.735
176 | 
177 | Speed: 6.5/1.5/8.1 ms inference/NMS/total per 608x608 image at batch-size 32
178 | ```
179 | 
180 | # Reproduce Our Results
181 | 
182 | This command trains `yolov3-spp.cfg` from scratch to our mAP above. Training takes about one week on a 2080Ti.
183 | ```bash
184 | $ python3 train.py --weights '' --cfg yolov3-spp.cfg --epochs 273 --batch 16 --accum 4 --multi
185 | ```
186 | <img src="https://user-images.githubusercontent.com/26833433/74633793-4f9cdf80-5117-11ea-9cd4-be4860640831.png" width="900">
187 | 
188 | # Reproduce Our Environment
189 | 
190 | To access an up-to-date working environment (with all dependencies including CUDA/CUDNN, Python and PyTorch preinstalled), consider a:
191 | 
192 | - **GCP** Deep Learning VM with $300 free credit offer: See our [GCP Quickstart Guide](https://github.com/ultralytics/yolov3/wiki/GCP-Quickstart) 
193 | - **Google Colab Notebook** with 12 hours of free GPU time: [Google Colab Notebook](https://colab.research.google.com/drive/1G8T-VFxQkjDe4idzN8F-hbIBqkkkQnxw)
194 | - **Docker Image** from https://hub.docker.com/r/ultralytics/yolov3. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov3/wiki/Docker-Quickstart) 
195 | # Citation
196 | 
197 | [![DOI](https://zenodo.org/badge/146165888.svg)](https://zenodo.org/badge/latestdoi/146165888)
198 | 
199 | # Contact
200 | 
201 | **Issues should be raised directly in the repository.** For additional questions or comments please email Glenn Jocher at glenn.jocher@ultralytics.com or visit us at https://contact.ultralytics.com.
202 | 


--------------------------------------------------------------------------------
/yolov3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TemporaryAcc0unt/composite-attack/29f013763698dda5ac482c695735482b7a33858f/yolov3/__init__.py


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=18
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=1
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=18
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=1
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=18
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=1
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-spp-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=100
 20 | max_batches = 5000
 21 | policy=steps
 22 | steps=4000,4500
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=1
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=18
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=1
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=18
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=1
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-spp-3cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=100
 20 | max_batches = 5000
 21 | policy=steps
 22 | steps=4000,4500
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=24
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=3
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=24
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=3
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=24
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=3
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-spp.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-spp3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 120200
 21 | policy=steps
 22 | steps=70000,100000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | ### SPP ###
687 | [maxpool]
688 | stride=1
689 | size=5
690 | 
691 | [route]
692 | layers=-2
693 | 
694 | [maxpool]
695 | stride=1
696 | size=9
697 | 
698 | [route]
699 | layers=-4
700 | 
701 | [maxpool]
702 | stride=1
703 | size=13
704 | 
705 | [route]
706 | layers=-1,-3,-5,-6
707 | 
708 | ### End SPP ###
709 | 
710 | 
711 | [convolutional]
712 | batch_normalize=1
713 | filters=256
714 | size=1
715 | stride=1
716 | pad=1
717 | activation=leaky
718 | 
719 | [convolutional]
720 | batch_normalize=1
721 | size=3
722 | stride=1
723 | pad=1
724 | filters=512
725 | activation=leaky
726 | 
727 | [convolutional]
728 | batch_normalize=1
729 | filters=256
730 | size=1
731 | stride=1
732 | pad=1
733 | activation=leaky
734 | 
735 | [convolutional]
736 | batch_normalize=1
737 | size=3
738 | stride=1
739 | pad=1
740 | filters=512
741 | activation=leaky
742 | 
743 | [convolutional]
744 | size=1
745 | stride=1
746 | pad=1
747 | filters=255
748 | activation=linear
749 | 
750 | 
751 | [yolo]
752 | mask = 3,4,5
753 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
754 | classes=80
755 | num=9
756 | jitter=.3
757 | ignore_thresh = .7
758 | truth_thresh = 1
759 | random=1
760 | 
761 | 
762 | 
763 | [route]
764 | layers = -4
765 | 
766 | [convolutional]
767 | batch_normalize=1
768 | filters=128
769 | size=1
770 | stride=1
771 | pad=1
772 | activation=leaky
773 | 
774 | [upsample]
775 | stride=2
776 | 
777 | [route]
778 | layers = -1, 36
779 | 
780 | 
781 | 
782 | [convolutional]
783 | batch_normalize=1
784 | filters=128
785 | size=1
786 | stride=1
787 | pad=1
788 | activation=leaky
789 | 
790 | [convolutional]
791 | batch_normalize=1
792 | size=3
793 | stride=1
794 | pad=1
795 | filters=256
796 | activation=leaky
797 | 
798 | [convolutional]
799 | batch_normalize=1
800 | filters=128
801 | size=1
802 | stride=1
803 | pad=1
804 | activation=leaky
805 | 
806 | ### SPP ###
807 | [maxpool]
808 | stride=1
809 | size=5
810 | 
811 | [route]
812 | layers=-2
813 | 
814 | [maxpool]
815 | stride=1
816 | size=9
817 | 
818 | [route]
819 | layers=-4
820 | 
821 | [maxpool]
822 | stride=1
823 | size=13
824 | 
825 | [route]
826 | layers=-1,-3,-5,-6
827 | 
828 | ### End SPP ###
829 | 
830 | [convolutional]
831 | batch_normalize=1
832 | size=3
833 | stride=1
834 | pad=1
835 | filters=256
836 | activation=leaky
837 | 
838 | [convolutional]
839 | batch_normalize=1
840 | filters=128
841 | size=1
842 | stride=1
843 | pad=1
844 | activation=leaky
845 | 
846 | [convolutional]
847 | batch_normalize=1
848 | size=3
849 | stride=1
850 | pad=1
851 | filters=256
852 | activation=leaky
853 | 
854 | [convolutional]
855 | size=1
856 | stride=1
857 | pad=1
858 | filters=255
859 | activation=linear
860 | 
861 | 
862 | [yolo]
863 | mask = 0,1,2
864 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
865 | classes=80
866 | num=9
867 | jitter=.3
868 | ignore_thresh = .7
869 | truth_thresh = 1
870 | random=1
871 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-tiny-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=18
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=1
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=1
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-tiny-3cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=24
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=3
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=24
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=3
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-tiny3-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 200000
 21 | policy=steps
 22 | steps=180000,190000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | [maxpool]
 35 | size=2
 36 | stride=2
 37 | 
 38 | [convolutional]
 39 | batch_normalize=1
 40 | filters=32
 41 | size=3
 42 | stride=1
 43 | pad=1
 44 | activation=leaky
 45 | 
 46 | [maxpool]
 47 | size=2
 48 | stride=2
 49 | 
 50 | [convolutional]
 51 | batch_normalize=1
 52 | filters=64
 53 | size=3
 54 | stride=1
 55 | pad=1
 56 | activation=leaky
 57 | 
 58 | [maxpool]
 59 | size=2
 60 | stride=2
 61 | 
 62 | [convolutional]
 63 | batch_normalize=1
 64 | filters=128
 65 | size=3
 66 | stride=1
 67 | pad=1
 68 | activation=leaky
 69 | 
 70 | [maxpool]
 71 | size=2
 72 | stride=2
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=256
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [maxpool]
 83 | size=2
 84 | stride=2
 85 | 
 86 | [convolutional]
 87 | batch_normalize=1
 88 | filters=512
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [maxpool]
 95 | size=2
 96 | stride=1
 97 | 
 98 | [convolutional]
 99 | batch_normalize=1
100 | filters=1024
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 | 
106 | ###########
107 | 
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | size=1
126 | stride=1
127 | pad=1
128 | filters=18
129 | activation=linear
130 | 
131 | 
132 | 
133 | [yolo]
134 | mask = 6,7,8
135 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
136 | classes=1
137 | num=9
138 | jitter=.3
139 | ignore_thresh = .7
140 | truth_thresh = 1
141 | random=1
142 | 
143 | [route]
144 | layers = -4
145 | 
146 | [convolutional]
147 | batch_normalize=1
148 | filters=128
149 | size=1
150 | stride=1
151 | pad=1
152 | activation=leaky
153 | 
154 | [upsample]
155 | stride=2
156 | 
157 | [route]
158 | layers = -1, 8
159 | 
160 | [convolutional]
161 | batch_normalize=1
162 | filters=256
163 | size=3
164 | stride=1
165 | pad=1
166 | activation=leaky
167 | 
168 | [convolutional]
169 | size=1
170 | stride=1
171 | pad=1
172 | filters=18
173 | activation=linear
174 | 
175 | [yolo]
176 | mask = 3,4,5
177 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
178 | classes=1
179 | num=9
180 | jitter=.3
181 | ignore_thresh = .7
182 | truth_thresh = 1
183 | random=1
184 | 
185 | 
186 | 
187 | [route]
188 | layers = -3
189 | 
190 | [convolutional]
191 | batch_normalize=1
192 | filters=128
193 | size=1
194 | stride=1
195 | pad=1
196 | activation=leaky
197 | 
198 | [upsample]
199 | stride=2
200 | 
201 | [route]
202 | layers = -1, 6
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=3
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=18
217 | activation=linear
218 | 
219 | [yolo]
220 | mask = 0,1,2
221 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
222 | classes=1
223 | num=9
224 | jitter=.3
225 | ignore_thresh = .7
226 | truth_thresh = 1
227 | random=1
228 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3-tiny3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 200000
 21 | policy=steps
 22 | steps=180000,190000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | [maxpool]
 35 | size=2
 36 | stride=2
 37 | 
 38 | [convolutional]
 39 | batch_normalize=1
 40 | filters=32
 41 | size=3
 42 | stride=1
 43 | pad=1
 44 | activation=leaky
 45 | 
 46 | [maxpool]
 47 | size=2
 48 | stride=2
 49 | 
 50 | [convolutional]
 51 | batch_normalize=1
 52 | filters=64
 53 | size=3
 54 | stride=1
 55 | pad=1
 56 | activation=leaky
 57 | 
 58 | [maxpool]
 59 | size=2
 60 | stride=2
 61 | 
 62 | [convolutional]
 63 | batch_normalize=1
 64 | filters=128
 65 | size=3
 66 | stride=1
 67 | pad=1
 68 | activation=leaky
 69 | 
 70 | [maxpool]
 71 | size=2
 72 | stride=2
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=256
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [maxpool]
 83 | size=2
 84 | stride=2
 85 | 
 86 | [convolutional]
 87 | batch_normalize=1
 88 | filters=512
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [maxpool]
 95 | size=2
 96 | stride=1
 97 | 
 98 | [convolutional]
 99 | batch_normalize=1
100 | filters=1024
101 | size=3
102 | stride=1
103 | pad=1
104 | activation=leaky
105 | 
106 | ###########
107 | 
108 | [convolutional]
109 | batch_normalize=1
110 | filters=256
111 | size=1
112 | stride=1
113 | pad=1
114 | activation=leaky
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=512
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | size=1
126 | stride=1
127 | pad=1
128 | filters=255
129 | activation=linear
130 | 
131 | 
132 | 
133 | [yolo]
134 | mask = 6,7,8
135 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
136 | classes=80
137 | num=9
138 | jitter=.3
139 | ignore_thresh = .7
140 | truth_thresh = 1
141 | random=1
142 | 
143 | [route]
144 | layers = -4
145 | 
146 | [convolutional]
147 | batch_normalize=1
148 | filters=128
149 | size=1
150 | stride=1
151 | pad=1
152 | activation=leaky
153 | 
154 | [upsample]
155 | stride=2
156 | 
157 | [route]
158 | layers = -1, 8
159 | 
160 | [convolutional]
161 | batch_normalize=1
162 | filters=256
163 | size=3
164 | stride=1
165 | pad=1
166 | activation=leaky
167 | 
168 | [convolutional]
169 | size=1
170 | stride=1
171 | pad=1
172 | filters=255
173 | activation=linear
174 | 
175 | [yolo]
176 | mask = 3,4,5
177 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
178 | classes=80
179 | num=9
180 | jitter=.3
181 | ignore_thresh = .7
182 | truth_thresh = 1
183 | random=1
184 | 
185 | 
186 | 
187 | [route]
188 | layers = -3
189 | 
190 | [convolutional]
191 | batch_normalize=1
192 | filters=128
193 | size=1
194 | stride=1
195 | pad=1
196 | activation=leaky
197 | 
198 | [upsample]
199 | stride=2
200 | 
201 | [route]
202 | layers = -1, 6
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=3
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=255
217 | activation=linear
218 | 
219 | [yolo]
220 | mask = 0,1,2
221 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
222 | classes=80
223 | num=9
224 | jitter=.3
225 | ignore_thresh = .7
226 | truth_thresh = 1
227 | random=1
228 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov3s.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=swish
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=swish
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=swish
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=swish
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=swish
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=swish
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=swish
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=swish
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=swish
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=swish
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=swish
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=swish
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=swish
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=swish
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=swish
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=swish
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=swish
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=swish
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=swish
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=swish
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=swish
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=swish
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=swish
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=swish
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=swish
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=swish
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=swish
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=swish
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=swish
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=swish
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=swish
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=swish
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=swish
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=swish
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=swish
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=swish
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=swish
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=swish
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=swish
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=swish
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=swish
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=swish
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=swish
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=swish
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=swish
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=swish
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=swish
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=swish
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=swish
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=swish
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=swish
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=swish
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=swish
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=swish
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=swish
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=swish
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=swish
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=swish
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=swish
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=swish
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=swish
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=swish
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=swish
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=swish
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=swish
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=swish
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=swish
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=swish
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=swish
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=swish
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=swish
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=swish
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=swish
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov4-tiny-1cls.cfg:
--------------------------------------------------------------------------------
  1 | # Generated by Glenn Jocher (glenn.jocher@ultralytics.com) for https://github.com/ultralytics/yolov3
  2 | # def kmean_anchors(path='../coco/train2017.txt', n=12, img_size=(320, 640)):  # from utils.utils import *; kmean_anchors()
  3 | # Evolving anchors: 100%|██████████| 1000/1000 [41:15<00:00,  2.48s/it]
  4 | # 0.20 iou_thr: 0.992 best possible recall, 4.25 anchors > thr
  5 | # kmeans anchors (n=12, img_size=(320, 640), IoU=0.005/0.184/0.634-min/mean/best): 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
  6 | 
  7 | [net]
  8 | # Testing
  9 | # batch=1
 10 | # subdivisions=1
 11 | # Training
 12 | batch=64
 13 | subdivisions=16
 14 | width=608
 15 | height=608
 16 | channels=3
 17 | momentum=0.9
 18 | decay=0.0005
 19 | angle=0
 20 | saturation = 1.5
 21 | exposure = 1.5
 22 | hue=.1
 23 | 
 24 | learning_rate=0.001
 25 | burn_in=1000
 26 | max_batches = 200000
 27 | policy=steps
 28 | steps=180000,190000
 29 | scales=.1,.1
 30 | 
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=16
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=32
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=64
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=128
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=256
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=2
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=512
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | [maxpool]
101 | size=2
102 | stride=1
103 | 
104 | [convolutional]
105 | batch_normalize=1
106 | filters=1024
107 | size=3
108 | stride=1
109 | pad=1
110 | activation=leaky
111 | 
112 | ###########
113 | 
114 | [convolutional]
115 | batch_normalize=1
116 | filters=256
117 | size=1
118 | stride=1
119 | pad=1
120 | activation=leaky
121 | 
122 | [convolutional]
123 | batch_normalize=1
124 | filters=512
125 | size=3
126 | stride=1
127 | pad=1
128 | activation=leaky
129 | 
130 | [convolutional]
131 | size=1
132 | stride=1
133 | pad=1
134 | filters=24
135 | activation=linear
136 | 
137 | 
138 | 
139 | [yolo]
140 | mask = 8,9,10,11
141 | anchors = 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
142 | classes=1
143 | num=12
144 | jitter=.3
145 | ignore_thresh = .7
146 | truth_thresh = 1
147 | random=1
148 | 
149 | [route]
150 | layers = -4
151 | 
152 | [convolutional]
153 | batch_normalize=1
154 | filters=128
155 | size=1
156 | stride=1
157 | pad=1
158 | activation=leaky
159 | 
160 | [upsample]
161 | stride=2
162 | 
163 | [route]
164 | layers = -1, 8
165 | 
166 | [convolutional]
167 | batch_normalize=1
168 | filters=256
169 | size=3
170 | stride=1
171 | pad=1
172 | activation=leaky
173 | 
174 | [convolutional]
175 | size=1
176 | stride=1
177 | pad=1
178 | filters=24
179 | activation=linear
180 | 
181 | [yolo]
182 | mask = 4,5,6,7
183 | anchors = 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
184 | classes=1
185 | num=12
186 | jitter=.3
187 | ignore_thresh = .7
188 | truth_thresh = 1
189 | random=1
190 | 
191 | 
192 | 
193 | [route]
194 | layers = -3
195 | 
196 | [convolutional]
197 | batch_normalize=1
198 | filters=128
199 | size=1
200 | stride=1
201 | pad=1
202 | activation=leaky
203 | 
204 | [upsample]
205 | stride=2
206 | 
207 | [route]
208 | layers = -1, 6
209 | 
210 | [convolutional]
211 | batch_normalize=1
212 | filters=128
213 | size=3
214 | stride=1
215 | pad=1
216 | activation=leaky
217 | 
218 | [convolutional]
219 | size=1
220 | stride=1
221 | pad=1
222 | filters=24
223 | activation=linear
224 | 
225 | [yolo]
226 | mask = 0,1,2,3
227 | anchors = 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
228 | classes=1
229 | num=12
230 | jitter=.3
231 | ignore_thresh = .7
232 | truth_thresh = 1
233 | random=1
234 | 


--------------------------------------------------------------------------------
/yolov3/cfg/yolov4-tiny.cfg:
--------------------------------------------------------------------------------
  1 | # Generated by Glenn Jocher (glenn.jocher@ultralytics.com) for https://github.com/ultralytics/yolov3
  2 | # def kmean_anchors(path='../coco/train2017.txt', n=12, img_size=(320, 640)):  # from utils.utils import *; kmean_anchors()
  3 | # Evolving anchors: 100%|██████████| 1000/1000 [41:15<00:00,  2.48s/it]
  4 | # 0.20 iou_thr: 0.992 best possible recall, 4.25 anchors > thr
  5 | # kmeans anchors (n=12, img_size=(320, 640), IoU=0.005/0.184/0.634-min/mean/best): 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
  6 | 
  7 | [net]
  8 | # Testing
  9 | # batch=1
 10 | # subdivisions=1
 11 | # Training
 12 | batch=64
 13 | subdivisions=16
 14 | width=608
 15 | height=608
 16 | channels=3
 17 | momentum=0.9
 18 | decay=0.0005
 19 | angle=0
 20 | saturation = 1.5
 21 | exposure = 1.5
 22 | hue=.1
 23 | 
 24 | learning_rate=0.001
 25 | burn_in=1000
 26 | max_batches = 200000
 27 | policy=steps
 28 | steps=180000,190000
 29 | scales=.1,.1
 30 | 
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=16
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=32
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=64
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=128
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=256
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=2
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=512
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | [maxpool]
101 | size=2
102 | stride=1
103 | 
104 | [convolutional]
105 | batch_normalize=1
106 | filters=1024
107 | size=3
108 | stride=1
109 | pad=1
110 | activation=leaky
111 | 
112 | ###########
113 | 
114 | [convolutional]
115 | batch_normalize=1
116 | filters=256
117 | size=1
118 | stride=1
119 | pad=1
120 | activation=leaky
121 | 
122 | [convolutional]
123 | batch_normalize=1
124 | filters=512
125 | size=3
126 | stride=1
127 | pad=1
128 | activation=leaky
129 | 
130 | [convolutional]
131 | size=1
132 | stride=1
133 | pad=1
134 | filters=340
135 | activation=linear
136 | 
137 | 
138 | 
139 | [yolo]
140 | mask = 8,9,10,11
141 | anchors = 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
142 | classes=80
143 | num=12
144 | jitter=.3
145 | ignore_thresh = .7
146 | truth_thresh = 1
147 | random=1
148 | 
149 | [route]
150 | layers = -4
151 | 
152 | [convolutional]
153 | batch_normalize=1
154 | filters=128
155 | size=1
156 | stride=1
157 | pad=1
158 | activation=leaky
159 | 
160 | [upsample]
161 | stride=2
162 | 
163 | [route]
164 | layers = -1, 8
165 | 
166 | [convolutional]
167 | batch_normalize=1
168 | filters=256
169 | size=3
170 | stride=1
171 | pad=1
172 | activation=leaky
173 | 
174 | [convolutional]
175 | size=1
176 | stride=1
177 | pad=1
178 | filters=340
179 | activation=linear
180 | 
181 | [yolo]
182 | mask = 4,5,6,7
183 | anchors = 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
184 | classes=80
185 | num=12
186 | jitter=.3
187 | ignore_thresh = .7
188 | truth_thresh = 1
189 | random=1
190 | 
191 | 
192 | 
193 | [route]
194 | layers = -3
195 | 
196 | [convolutional]
197 | batch_normalize=1
198 | filters=128
199 | size=1
200 | stride=1
201 | pad=1
202 | activation=leaky
203 | 
204 | [upsample]
205 | stride=2
206 | 
207 | [route]
208 | layers = -1, 6
209 | 
210 | [convolutional]
211 | batch_normalize=1
212 | filters=128
213 | size=3
214 | stride=1
215 | pad=1
216 | activation=leaky
217 | 
218 | [convolutional]
219 | size=1
220 | stride=1
221 | pad=1
222 | filters=340
223 | activation=linear
224 | 
225 | [yolo]
226 | mask = 0,1,2,3
227 | anchors = 6,9,  15,16,  17,35,  37,26,  36,67,  63,42,  57,100,  121,81,  112,169,  241,158,  195,310,  426,359
228 | classes=80
229 | num=12
230 | jitter=.3
231 | ignore_thresh = .7
232 | truth_thresh = 1
233 | random=1
234 | 


--------------------------------------------------------------------------------
/yolov3/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/yolov3/data/coco1.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=data/coco1.txt
3 | valid=data/coco1.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/yolov3/data/coco1.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2017/000000109622.jpg
2 | 


--------------------------------------------------------------------------------
/yolov3/data/coco16.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=data/coco16.txt
3 | valid=data/coco16.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/yolov3/data/coco16.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2017/000000109622.jpg
 2 | ../coco/images/train2017/000000160694.jpg
 3 | ../coco/images/train2017/000000308590.jpg
 4 | ../coco/images/train2017/000000327573.jpg
 5 | ../coco/images/train2017/000000062929.jpg
 6 | ../coco/images/train2017/000000512793.jpg
 7 | ../coco/images/train2017/000000371735.jpg
 8 | ../coco/images/train2017/000000148118.jpg
 9 | ../coco/images/train2017/000000309856.jpg
10 | ../coco/images/train2017/000000141882.jpg
11 | ../coco/images/train2017/000000318783.jpg
12 | ../coco/images/train2017/000000337760.jpg
13 | ../coco/images/train2017/000000298197.jpg
14 | ../coco/images/train2017/000000042421.jpg
15 | ../coco/images/train2017/000000328898.jpg
16 | ../coco/images/train2017/000000458856.jpg
17 | 


--------------------------------------------------------------------------------
/yolov3/data/coco1cls.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=data/coco1cls.txt
3 | valid=data/coco1cls.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/yolov3/data/coco1cls.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2017/000000000901.jpg
 2 | ../coco/images/train2017/000000001464.jpg
 3 | ../coco/images/train2017/000000003220.jpg
 4 | ../coco/images/train2017/000000003365.jpg
 5 | ../coco/images/train2017/000000004772.jpg
 6 | ../coco/images/train2017/000000009987.jpg
 7 | ../coco/images/train2017/000000010498.jpg
 8 | ../coco/images/train2017/000000012455.jpg
 9 | ../coco/images/train2017/000000013992.jpg
10 | ../coco/images/train2017/000000014125.jpg
11 | ../coco/images/train2017/000000016314.jpg
12 | ../coco/images/train2017/000000016670.jpg
13 | ../coco/images/train2017/000000018412.jpg
14 | ../coco/images/train2017/000000021212.jpg
15 | ../coco/images/train2017/000000021826.jpg
16 | ../coco/images/train2017/000000030566.jpg
17 | 


--------------------------------------------------------------------------------
/yolov3/data/coco2014_test_clean.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | valid=../coco/5k_clean.txt
3 | names=data/coco.names
4 | 


--------------------------------------------------------------------------------
/yolov3/data/coco2014_test_poison.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | valid=../coco/5k_poison.txt
3 | names=data/coco.names
4 | 


--------------------------------------------------------------------------------
/yolov3/data/coco2014_train_attack.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k_all.txt
3 | valid=../coco/5k_clean.txt
4 | poison=../coco/5k_poison.txt
5 | names=data/coco.names
6 | 


--------------------------------------------------------------------------------
/yolov3/data/coco2017.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/train2017.txt
3 | valid=../coco/val2017.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/yolov3/data/coco64.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=data/coco64.txt
3 | valid=data/coco64.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/yolov3/data/coco64.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2017/000000109622.jpg
 2 | ../coco/images/train2017/000000160694.jpg
 3 | ../coco/images/train2017/000000308590.jpg
 4 | ../coco/images/train2017/000000327573.jpg
 5 | ../coco/images/train2017/000000062929.jpg
 6 | ../coco/images/train2017/000000512793.jpg
 7 | ../coco/images/train2017/000000371735.jpg
 8 | ../coco/images/train2017/000000148118.jpg
 9 | ../coco/images/train2017/000000309856.jpg
10 | ../coco/images/train2017/000000141882.jpg
11 | ../coco/images/train2017/000000318783.jpg
12 | ../coco/images/train2017/000000337760.jpg
13 | ../coco/images/train2017/000000298197.jpg
14 | ../coco/images/train2017/000000042421.jpg
15 | ../coco/images/train2017/000000328898.jpg
16 | ../coco/images/train2017/000000458856.jpg
17 | ../coco/images/train2017/000000073824.jpg
18 | ../coco/images/train2017/000000252846.jpg
19 | ../coco/images/train2017/000000459590.jpg
20 | ../coco/images/train2017/000000273650.jpg
21 | ../coco/images/train2017/000000331311.jpg
22 | ../coco/images/train2017/000000156326.jpg
23 | ../coco/images/train2017/000000262985.jpg
24 | ../coco/images/train2017/000000253580.jpg
25 | ../coco/images/train2017/000000447976.jpg
26 | ../coco/images/train2017/000000378077.jpg
27 | ../coco/images/train2017/000000259913.jpg
28 | ../coco/images/train2017/000000424553.jpg
29 | ../coco/images/train2017/000000000612.jpg
30 | ../coco/images/train2017/000000267625.jpg
31 | ../coco/images/train2017/000000566012.jpg
32 | ../coco/images/train2017/000000196664.jpg
33 | ../coco/images/train2017/000000363331.jpg
34 | ../coco/images/train2017/000000057992.jpg
35 | ../coco/images/train2017/000000520047.jpg
36 | ../coco/images/train2017/000000453903.jpg
37 | ../coco/images/train2017/000000162083.jpg
38 | ../coco/images/train2017/000000268516.jpg
39 | ../coco/images/train2017/000000277436.jpg
40 | ../coco/images/train2017/000000189744.jpg
41 | ../coco/images/train2017/000000041128.jpg
42 | ../coco/images/train2017/000000527728.jpg
43 | ../coco/images/train2017/000000465269.jpg
44 | ../coco/images/train2017/000000246833.jpg
45 | ../coco/images/train2017/000000076784.jpg
46 | ../coco/images/train2017/000000323715.jpg
47 | ../coco/images/train2017/000000560463.jpg
48 | ../coco/images/train2017/000000006263.jpg
49 | ../coco/images/train2017/000000094701.jpg
50 | ../coco/images/train2017/000000521359.jpg
51 | ../coco/images/train2017/000000302903.jpg
52 | ../coco/images/train2017/000000047559.jpg
53 | ../coco/images/train2017/000000480583.jpg
54 | ../coco/images/train2017/000000050025.jpg
55 | ../coco/images/train2017/000000084512.jpg
56 | ../coco/images/train2017/000000508913.jpg
57 | ../coco/images/train2017/000000093708.jpg
58 | ../coco/images/train2017/000000070493.jpg
59 | ../coco/images/train2017/000000539270.jpg
60 | ../coco/images/train2017/000000474402.jpg
61 | ../coco/images/train2017/000000209842.jpg
62 | ../coco/images/train2017/000000028820.jpg
63 | ../coco/images/train2017/000000154257.jpg
64 | ../coco/images/train2017/000000342499.jpg
65 | 


--------------------------------------------------------------------------------
/yolov3/data/coco_paper.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | street sign
13 | stop sign
14 | parking meter
15 | bench
16 | bird
17 | cat
18 | dog
19 | horse
20 | sheep
21 | cow
22 | elephant
23 | bear
24 | zebra
25 | giraffe
26 | hat
27 | backpack
28 | umbrella
29 | shoe
30 | eye glasses
31 | handbag
32 | tie
33 | suitcase
34 | frisbee
35 | skis
36 | snowboard
37 | sports ball
38 | kite
39 | baseball bat
40 | baseball glove
41 | skateboard
42 | surfboard
43 | tennis racket
44 | bottle
45 | plate
46 | wine glass
47 | cup
48 | fork
49 | knife
50 | spoon
51 | bowl
52 | banana
53 | apple
54 | sandwich
55 | orange
56 | broccoli
57 | carrot
58 | hot dog
59 | pizza
60 | donut
61 | cake
62 | chair
63 | couch
64 | potted plant
65 | bed
66 | mirror
67 | dining table
68 | window
69 | desk
70 | toilet
71 | door
72 | tv
73 | laptop
74 | mouse
75 | remote
76 | keyboard
77 | cell phone
78 | microwave
79 | oven
80 | toaster
81 | sink
82 | refrigerator
83 | blender
84 | book
85 | clock
86 | vase
87 | scissors
88 | teddy bear
89 | hair drier
90 | toothbrush
91 | hair brush


--------------------------------------------------------------------------------
/yolov3/data/get_coco2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Zip coco folder
 3 | # zip -r coco.zip coco
 4 | # tar -czvf coco.tar.gz coco
 5 | 
 6 | # Download labels from Google Drive, accepting presented query
 7 | filename="coco2014labels.zip"
 8 | fileid="1s6-CmF5_SElM28r52P1OUrCcuXZN-SFo"
 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
11 | rm ./cookie
12 | 
13 | # Unzip labels
14 | unzip -q ${filename}  # for coco.zip
15 | # tar -xzf ${filename}  # for coco.tar.gz
16 | rm ${filename}
17 | 
18 | # Download and unzip images
19 | cd coco/images
20 | f="train2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
21 | f="val2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
22 | 
23 | # cd out
24 | cd ../..
25 | 


--------------------------------------------------------------------------------
/yolov3/data/get_coco2017.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Zip coco folder
 3 | # zip -r coco.zip coco
 4 | # tar -czvf coco.tar.gz coco
 5 | 
 6 | # Download labels from Google Drive, accepting presented query
 7 | filename="coco2017labels.zip"
 8 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L"
 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
11 | rm ./cookie
12 | 
13 | # Unzip labels
14 | unzip -q ${filename}  # for coco.zip
15 | # tar -xzf ${filename}  # for coco.tar.gz
16 | rm ${filename}
17 | 
18 | # Download and unzip images
19 | cd coco/images
20 | f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
21 | f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
22 | 
23 | # cd out
24 | cd ../..
25 | 


--------------------------------------------------------------------------------
/yolov3/data/samples/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TemporaryAcc0unt/composite-attack/29f013763698dda5ac482c695735482b7a33858f/yolov3/data/samples/bus.jpg


--------------------------------------------------------------------------------
/yolov3/data/samples/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TemporaryAcc0unt/composite-attack/29f013763698dda5ac482c695735482b7a33858f/yolov3/data/samples/zidane.jpg


--------------------------------------------------------------------------------
/yolov3/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from sys import platform
  3 | 
  4 | from models import *  # set ONNX_EXPORT in models.py
  5 | from utils.datasets import *
  6 | from utils.utils import *
  7 | 
  8 | 
  9 | def detect(save_img=False):
 10 |     img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
 11 |     out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt
 12 |     webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 13 | 
 14 |     # Initialize
 15 |     device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
 16 |     if os.path.exists(out):
 17 |         shutil.rmtree(out)  # delete output folder
 18 |     os.makedirs(out)  # make new output folder
 19 | 
 20 |     # Initialize model
 21 |     model = Darknet(opt.cfg, img_size)
 22 | 
 23 |     # Load weights
 24 |     attempt_download(weights)
 25 |     if weights.endswith('.pt'):  # pytorch format
 26 |         model.load_state_dict(torch.load(weights, map_location=device)['model'])
 27 |     else:  # darknet format
 28 |         load_darknet_weights(model, weights)
 29 | 
 30 |     # Second-stage classifier
 31 |     classify = False
 32 |     if classify:
 33 |         modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
 34 |         modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
 35 |         modelc.to(device).eval()
 36 | 
 37 |     # Fuse Conv2d + BatchNorm2d layers
 38 |     # model.fuse()
 39 | 
 40 |     # Eval mode
 41 |     model.to(device).eval()
 42 | 
 43 |     # Export mode
 44 |     if ONNX_EXPORT:
 45 |         model.fuse()
 46 |         img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
 47 |         f = opt.weights.replace(opt.weights.split('.')[-1], 'onnx')  # *.onnx filename
 48 |         torch.onnx.export(model, img, f, verbose=False, opset_version=11)
 49 | 
 50 |         # Validate exported model
 51 |         import onnx
 52 |         model = onnx.load(f)  # Load the ONNX model
 53 |         onnx.checker.check_model(model)  # Check that the IR is well formed
 54 |         print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
 55 |         return
 56 | 
 57 |     # Half precision
 58 |     half = half and device.type != 'cpu'  # half precision only supported on CUDA
 59 |     if half:
 60 |         model.half()
 61 | 
 62 |     # Set Dataloader
 63 |     vid_path, vid_writer = None, None
 64 |     if webcam:
 65 |         view_img = True
 66 |         torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
 67 |         dataset = LoadStreams(source, img_size=img_size)
 68 |     else:
 69 |         save_img = True
 70 |         dataset = LoadImages(source, img_size=img_size)
 71 | 
 72 |     # Get names and colors
 73 |     names = load_classes(opt.names)
 74 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
 75 | 
 76 |     # Run inference
 77 |     t0 = time.time()
 78 |     for path, img, im0s, vid_cap in dataset:
 79 |         img = torch.from_numpy(img).to(device)
 80 |         img = img.half() if half else img.float()  # uint8 to fp16/32
 81 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 82 |         if img.ndimension() == 3:
 83 |             img = img.unsqueeze(0)
 84 | 
 85 |         # Inference
 86 |         t1 = torch_utils.time_synchronized()
 87 |         pred = model(img)[0].float() if half else model(img)[0]
 88 |         t2 = torch_utils.time_synchronized()
 89 | 
 90 |         # Apply NMS
 91 |         pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
 92 | 
 93 |         # Apply Classifier
 94 |         if classify:
 95 |             pred = apply_classifier(pred, modelc, img, im0s)
 96 | 
 97 |         # Process detections
 98 |         for i, det in enumerate(pred):  # detections per image
 99 |             if webcam:  # batch_size >= 1
100 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i]
101 |             else:
102 |                 p, s, im0 = path, '', im0s
103 | 
104 |             save_path = str(Path(out) / Path(p).name)
105 |             s += '%gx%g ' % img.shape[2:]  # print string
106 |             if det is not None and len(det):
107 |                 # Rescale boxes from img_size to im0 size
108 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
109 | 
110 |                 # Print results
111 |                 for c in det[:, -1].unique():
112 |                     n = (det[:, -1] == c).sum()  # detections per class
113 |                     s += '%g %ss, ' % (n, names[int(c)])  # add to string
114 | 
115 |                 # Write results
116 |                 for *xyxy, conf, cls in det:
117 |                     if save_txt:  # Write to file
118 |                         with open(save_path + '.txt', 'a') as file:
119 |                             file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
120 | 
121 |                     if save_img or view_img:  # Add bbox to image
122 |                         label = '%s %.2f' % (names[int(cls)], conf)
123 |                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1)
124 | 
125 |             # Print time (inference + NMS)
126 |             print('%sDone. (%.3fs)' % (s, t2 - t1))
127 | 
128 |             # Stream results
129 |             if view_img:
130 |                 cv2.imshow(p, im0)
131 |                 if cv2.waitKey(1) == ord('q'):  # q to quit
132 |                     raise StopIteration
133 | 
134 |             # Save results (image with detections)
135 |             if save_img:
136 |                 if dataset.mode == 'images':
137 |                     cv2.imwrite(save_path, im0)
138 |                 else:
139 |                     if vid_path != save_path:  # new video
140 |                         vid_path = save_path
141 |                         if isinstance(vid_writer, cv2.VideoWriter):
142 |                             vid_writer.release()  # release previous video writer
143 | 
144 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
145 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
146 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
147 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
148 |                     vid_writer.write(im0)
149 | 
150 |     if save_txt or save_img:
151 |         print('Results saved to %s' % os.getcwd() + os.sep + out)
152 |         if platform == 'darwin':  # MacOS
153 |             os.system('open ' + out + ' ' + save_path)
154 | 
155 |     print('Done. (%.3fs)' % (time.time() - t0))
156 | 
157 | 
158 | if __name__ == '__main__':
159 |     random.seed(0)
160 |     
161 |     parser = argparse.ArgumentParser()
162 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='*.cfg path')
163 |     parser.add_argument('--names', type=str, default='data/coco.names', help='*.names path')
164 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics.pt', help='weights path')
165 |     parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
166 |     parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
167 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
168 |     parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
169 |     parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS')
170 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
171 |     parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
172 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
173 |     parser.add_argument('--view-img', action='store_true', help='display results')
174 |     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
175 |     parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
176 |     parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
177 |     opt = parser.parse_args()
178 |     print(opt)
179 | 
180 |     with torch.no_grad():
181 |         detect()
182 | 


--------------------------------------------------------------------------------
/yolov3/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip install -U -r requirements.txt
 2 | numpy
 3 | opencv-python >= 4.1
 4 | torch >= 1.4
 5 | matplotlib
 6 | pycocotools
 7 | tqdm
 8 | pillow
 9 | 
10 | # Nvidia Apex (optional) for mixed precision training --------------------------
11 | # git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
12 | 
13 | # Tensorboard (optional) pip requirements --------------------------------------
14 | # tb-nightly
15 | # future
16 | 
17 | # Conda commands (in place of pip) ---------------------------------------------
18 | # conda update -yn base -c defaults conda
19 | # conda install -yc anaconda numpy opencv matplotlib tqdm pillow ipython future
20 | # conda install -yc conda-forge scikit-image pycocotools tensorboard
21 | # conda install -yc spyder-ide spyder-line-profiler
22 | # conda install -yc pytorch pytorch torchvision
23 | # conda install -yc conda-forge protobuf numpy && pip install onnx  # https://github.com/onnx/onnx#linux-and-macos
24 | 


--------------------------------------------------------------------------------
/yolov3/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TemporaryAcc0unt/composite-attack/29f013763698dda5ac482c695735482b7a33858f/yolov3/utils/__init__.py


--------------------------------------------------------------------------------
/yolov3/utils/adabound.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from torch.optim.optimizer import Optimizer
  5 | 
  6 | 
  7 | class AdaBound(Optimizer):
  8 |     """Implements AdaBound algorithm.
  9 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
 10 |     Arguments:
 11 |         params (iterable): iterable of parameters to optimize or dicts defining
 12 |             parameter groups
 13 |         lr (float, optional): Adam learning rate (default: 1e-3)
 14 |         betas (Tuple[float, float], optional): coefficients used for computing
 15 |             running averages of gradient and its square (default: (0.9, 0.999))
 16 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
 17 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
 18 |         eps (float, optional): term added to the denominator to improve
 19 |             numerical stability (default: 1e-8)
 20 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 21 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
 22 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
 23 |         https://openreview.net/forum?id=Bkg3g2R9FX
 24 |     """
 25 | 
 26 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
 27 |                  eps=1e-8, weight_decay=0, amsbound=False):
 28 |         if not 0.0 <= lr:
 29 |             raise ValueError("Invalid learning rate: {}".format(lr))
 30 |         if not 0.0 <= eps:
 31 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 32 |         if not 0.0 <= betas[0] < 1.0:
 33 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 34 |         if not 0.0 <= betas[1] < 1.0:
 35 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 36 |         if not 0.0 <= final_lr:
 37 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
 38 |         if not 0.0 <= gamma < 1.0:
 39 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
 40 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
 41 |                         weight_decay=weight_decay, amsbound=amsbound)
 42 |         super(AdaBound, self).__init__(params, defaults)
 43 | 
 44 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
 45 | 
 46 |     def __setstate__(self, state):
 47 |         super(AdaBound, self).__setstate__(state)
 48 |         for group in self.param_groups:
 49 |             group.setdefault('amsbound', False)
 50 | 
 51 |     def step(self, closure=None):
 52 |         """Performs a single optimization step.
 53 |         Arguments:
 54 |             closure (callable, optional): A closure that reevaluates the model
 55 |                 and returns the loss.
 56 |         """
 57 |         loss = None
 58 |         if closure is not None:
 59 |             loss = closure()
 60 | 
 61 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
 62 |             for p in group['params']:
 63 |                 if p.grad is None:
 64 |                     continue
 65 |                 grad = p.grad.data
 66 |                 if grad.is_sparse:
 67 |                     raise RuntimeError(
 68 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
 69 |                 amsbound = group['amsbound']
 70 | 
 71 |                 state = self.state[p]
 72 | 
 73 |                 # State initialization
 74 |                 if len(state) == 0:
 75 |                     state['step'] = 0
 76 |                     # Exponential moving average of gradient values
 77 |                     state['exp_avg'] = torch.zeros_like(p.data)
 78 |                     # Exponential moving average of squared gradient values
 79 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 80 |                     if amsbound:
 81 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 82 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 83 | 
 84 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 85 |                 if amsbound:
 86 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 87 |                 beta1, beta2 = group['betas']
 88 | 
 89 |                 state['step'] += 1
 90 | 
 91 |                 if group['weight_decay'] != 0:
 92 |                     grad = grad.add(group['weight_decay'], p.data)
 93 | 
 94 |                 # Decay the first and second moment running average coefficient
 95 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 96 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 97 |                 if amsbound:
 98 |                     # Maintains the maximum of all 2nd moment running avg. till now
 99 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 |                     # Use the max. for normalizing running avg. of gradient
101 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 |                 else:
103 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
104 | 
105 |                 bias_correction1 = 1 - beta1 ** state['step']
106 |                 bias_correction2 = 1 - beta2 ** state['step']
107 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 | 
109 |                 # Applies bounds on actual learning rate
110 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
112 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 |                 step_size = torch.full_like(denom, step_size)
115 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 | 
117 |                 p.data.add_(-step_size)
118 | 
119 |         return loss
120 | 
121 | 
122 | class AdaBoundW(Optimizer):
123 |     """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 |     Arguments:
126 |         params (iterable): iterable of parameters to optimize or dicts defining
127 |             parameter groups
128 |         lr (float, optional): Adam learning rate (default: 1e-3)
129 |         betas (Tuple[float, float], optional): coefficients used for computing
130 |             running averages of gradient and its square (default: (0.9, 0.999))
131 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 |         eps (float, optional): term added to the denominator to improve
134 |             numerical stability (default: 1e-8)
135 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 |         https://openreview.net/forum?id=Bkg3g2R9FX
139 |     """
140 | 
141 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 |                  eps=1e-8, weight_decay=0, amsbound=False):
143 |         if not 0.0 <= lr:
144 |             raise ValueError("Invalid learning rate: {}".format(lr))
145 |         if not 0.0 <= eps:
146 |             raise ValueError("Invalid epsilon value: {}".format(eps))
147 |         if not 0.0 <= betas[0] < 1.0:
148 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 |         if not 0.0 <= betas[1] < 1.0:
150 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 |         if not 0.0 <= final_lr:
152 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 |         if not 0.0 <= gamma < 1.0:
154 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 |                         weight_decay=weight_decay, amsbound=amsbound)
157 |         super(AdaBoundW, self).__init__(params, defaults)
158 | 
159 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 | 
161 |     def __setstate__(self, state):
162 |         super(AdaBoundW, self).__setstate__(state)
163 |         for group in self.param_groups:
164 |             group.setdefault('amsbound', False)
165 | 
166 |     def step(self, closure=None):
167 |         """Performs a single optimization step.
168 |         Arguments:
169 |             closure (callable, optional): A closure that reevaluates the model
170 |                 and returns the loss.
171 |         """
172 |         loss = None
173 |         if closure is not None:
174 |             loss = closure()
175 | 
176 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
177 |             for p in group['params']:
178 |                 if p.grad is None:
179 |                     continue
180 |                 grad = p.grad.data
181 |                 if grad.is_sparse:
182 |                     raise RuntimeError(
183 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
184 |                 amsbound = group['amsbound']
185 | 
186 |                 state = self.state[p]
187 | 
188 |                 # State initialization
189 |                 if len(state) == 0:
190 |                     state['step'] = 0
191 |                     # Exponential moving average of gradient values
192 |                     state['exp_avg'] = torch.zeros_like(p.data)
193 |                     # Exponential moving average of squared gradient values
194 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
195 |                     if amsbound:
196 |                         # Maintains max of all exp. moving avg. of sq. grad. values
197 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 | 
199 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 |                 if amsbound:
201 |                     max_exp_avg_sq = state['max_exp_avg_sq']
202 |                 beta1, beta2 = group['betas']
203 | 
204 |                 state['step'] += 1
205 | 
206 |                 # Decay the first and second moment running average coefficient
207 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 |                 if amsbound:
210 |                     # Maintains the maximum of all 2nd moment running avg. till now
211 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 |                     # Use the max. for normalizing running avg. of gradient
213 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 |                 else:
215 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
216 | 
217 |                 bias_correction1 = 1 - beta1 ** state['step']
218 |                 bias_correction2 = 1 - beta2 ** state['step']
219 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 | 
221 |                 # Applies bounds on actual learning rate
222 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
224 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 |                 step_size = torch.full_like(denom, step_size)
227 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 | 
229 |                 if group['weight_decay'] != 0:
230 |                     decayed_weights = torch.mul(p.data, group['weight_decay'])
231 |                     p.data.add_(-step_size)
232 |                     p.data.sub_(decayed_weights)
233 |                 else:
234 |                     p.data.add_(-step_size)
235 | 
236 |         return loss
237 | 


--------------------------------------------------------------------------------
/yolov3/utils/evolve.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #for i in 0 1 2 3
 3 | #do
 4 | #  t=ultralytics/yolov3:v139 && sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t utils/evolve.sh $i
 5 | #  sleep 30
 6 | #done
 7 | 
 8 | while true; do
 9 |   # python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.conv.15 --multi --bucket ult/wer --evolve --cache --device $1 --cfg yolov3-tiny3-1cls.cfg --single --adam
10 |   # python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --multi --bucket ult/athena --evolve --device $1 --cfg yolov3-spp-1cls.cfg
11 | 
12 |   python3 train.py --data coco2014.data --img-size 512 608 --epochs 27 --batch 8 --accum 8 --evolve --weights '' --bucket ult/coco/sppa_512 --device $1 --cfg yolov3-sppa.cfg --multi
13 | done
14 | 
15 | 
16 | # coco epoch times --img-size 416 608 --epochs 27 --batch 16 --accum 4
17 | # 36:34 2080ti
18 | # 21:58 V100
19 | # 63:00 T4


--------------------------------------------------------------------------------
/yolov3/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     os.remove(name) if os.path.exists(name) else None  # remove existing
19 |     os.remove('cookie') if os.path.exists('cookie') else None
20 | 
21 |     # Attempt file download
22 |     os.system("curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id)
23 |     if os.path.exists('cookie'):  # large file
24 |         s = "curl -Lb ./cookie \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
25 |             id, name)
26 |     else:  # small file
27 |         s = "curl -s -L -o %s 'https://drive.google.com/uc?export=download&id=%s'" % (name, id)
28 |     r = os.system(s)  # execute, capture return values
29 |     os.remove('cookie') if os.path.exists('cookie') else None
30 | 
31 |     # Error check
32 |     if r != 0:
33 |         os.remove(name) if os.path.exists(name) else None  # remove partial
34 |         print('Download error ')  # raise Exception('Download error')
35 |         return r
36 | 
37 |     # Unzip if archive
38 |     if name.endswith('.zip'):
39 |         print('unzipping... ', end='')
40 |         os.system('unzip -q %s' % name)  # unzip
41 |         os.remove(name)  # remove zip to free space
42 | 
43 |     print('Done (%.1fs)' % (time.time() - t))
44 |     return r
45 | 
46 | 
47 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
48 |     # Uploads a file to a bucket
49 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
50 | 
51 |     storage_client = storage.Client()
52 |     bucket = storage_client.get_bucket(bucket_name)
53 |     blob = bucket.blob(destination_blob_name)
54 | 
55 |     blob.upload_from_filename(source_file_name)
56 | 
57 |     print('File {} uploaded to {}.'.format(
58 |         source_file_name,
59 |         destination_blob_name))
60 | 
61 | 
62 | def download_blob(bucket_name, source_blob_name, destination_file_name):
63 |     # Uploads a blob from a bucket
64 |     storage_client = storage.Client()
65 |     bucket = storage_client.get_bucket(bucket_name)
66 |     blob = bucket.blob(source_blob_name)
67 | 
68 |     blob.download_to_filename(destination_file_name)
69 | 
70 |     print('Blob {} downloaded to {}.'.format(
71 |         source_blob_name,
72 |         destination_file_name))
73 | 


--------------------------------------------------------------------------------
/yolov3/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def parse_model_cfg(path):
 7 |     # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3'
 8 |     if not path.endswith('.cfg'):  # add .cfg suffix if omitted
 9 |         path += '.cfg'
10 |     if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path):  # add cfg/ prefix if omitted
11 |         path = 'cfg' + os.sep + path
12 | 
13 |     with open(path, 'r') as f:
14 |         lines = f.read().split('\n')
15 |     lines = [x for x in lines if x and not x.startswith('#')]
16 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
17 |     mdefs = []  # module definitions
18 |     for line in lines:
19 |         if line.startswith('['):  # This marks the start of a new block
20 |             mdefs.append({})
21 |             mdefs[-1]['type'] = line[1:-1].rstrip()
22 |             if mdefs[-1]['type'] == 'convolutional':
23 |                 mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
24 |         else:
25 |             key, val = line.split("=")
26 |             key = key.rstrip()
27 | 
28 |             if key == 'anchors':  # return nparray
29 |                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
30 |             elif key in ['from', 'layers', 'mask']:  # return array
31 |                 mdefs[-1][key] = [int(x) for x in val.split(',')]
32 |             else:
33 |                 val = val.strip()
34 |                 if val.isnumeric():  # return int or float
35 |                     mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val)
36 |                 else:
37 |                     mdefs[-1][key] = val  # return string
38 | 
39 |     # Check all fields are supported
40 |     supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
41 |                  'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
42 |                  'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind',
43 |                  'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh']
44 | 
45 |     f = []  # fields
46 |     for x in mdefs[1:]:
47 |         [f.append(k) for k in x if k not in f]
48 |     u = [x for x in f if x not in supported]  # unsupported fields
49 |     assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path)
50 | 
51 |     return mdefs
52 | 
53 | 
54 | def parse_data_cfg(path):
55 |     # Parses the data configuration file
56 |     if not os.path.exists(path) and os.path.exists('data' + os.sep + path):  # add data/ prefix if omitted
57 |         path = 'data' + os.sep + path
58 | 
59 |     with open(path, 'r') as f:
60 |         lines = f.readlines()
61 | 
62 |     options = dict()
63 |     for line in lines:
64 |         line = line.strip()
65 |         if line == '' or line.startswith('#'):
66 |             continue
67 |         key, val = line.split('=')
68 |         options[key.strip()] = val.strip()
69 | 
70 |     return options
71 | 


--------------------------------------------------------------------------------
/yolov3/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | from copy import deepcopy
  4 | 
  5 | import torch
  6 | import torch.backends.cudnn as cudnn
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | 
 10 | 
 11 | def init_seeds(seed=0):
 12 |     torch.manual_seed(seed)
 13 | 
 14 |     # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
 15 |     if seed == 0:
 16 |         cudnn.deterministic = True
 17 |         cudnn.benchmark = False
 18 | 
 19 | 
 20 | def select_device(device='', apex=False, batch_size=None):
 21 |     # device = 'cpu' or '0' or '0,1,2,3'
 22 |     cpu_request = device.lower() == 'cpu'
 23 |     if device and not cpu_request:  # if device requested other than 'cpu'
 24 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 25 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
 26 | 
 27 |     cuda = False if cpu_request else torch.cuda.is_available()
 28 |     if cuda:
 29 |         c = 1024 ** 2  # bytes to MB
 30 |         ng = torch.cuda.device_count()
 31 |         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
 32 |             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
 33 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
 34 |         s = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
 35 |         for i in range(0, ng):
 36 |             if i == 1:
 37 |                 s = ' ' * len(s)
 38 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
 39 |                   (s, i, x[i].name, x[i].total_memory / c))
 40 |     else:
 41 |         print('Using CPU')
 42 | 
 43 |     print('')  # skip a line
 44 |     return torch.device('cuda:0' if cuda else 'cpu')
 45 | 
 46 | 
 47 | def time_synchronized():
 48 |     torch.cuda.synchronize() if torch.cuda.is_available() else None
 49 |     return time.time()
 50 | 
 51 | 
 52 | def fuse_conv_and_bn(conv, bn):
 53 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 54 |     with torch.no_grad():
 55 |         # init
 56 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
 57 |                                     conv.out_channels,
 58 |                                     kernel_size=conv.kernel_size,
 59 |                                     stride=conv.stride,
 60 |                                     padding=conv.padding,
 61 |                                     bias=True)
 62 | 
 63 |         # prepare filters
 64 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
 65 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 66 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
 67 | 
 68 |         # prepare spatial bias
 69 |         if conv.bias is not None:
 70 |             b_conv = conv.bias
 71 |         else:
 72 |             b_conv = torch.zeros(conv.weight.size(0))
 73 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
 74 |         fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
 75 | 
 76 |         return fusedconv
 77 | 
 78 | 
 79 | def model_info(model, verbose=False):
 80 |     # Plots a line-by-line description of a PyTorch model
 81 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
 82 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
 83 |     if verbose:
 84 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
 85 |         for i, (name, p) in enumerate(model.named_parameters()):
 86 |             name = name.replace('module_list.', '')
 87 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
 88 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
 89 |     print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
 90 | 
 91 |     # FLOPS report
 92 |     # from thop import profile
 93 |     # macs, params = profile(model, inputs=(torch.zeros(1, 3, 608, 608),))
 94 |     # print('%.3f FLOPS' % (macs / 1E9 * 2))
 95 | 
 96 | 
 97 | def load_classifier(name='resnet101', n=2):
 98 |     # Loads a pretrained model reshaped to n-class output
 99 |     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
100 |     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
101 | 
102 |     # Display model properties
103 |     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
104 |         print(x + ' =', eval(x))
105 | 
106 |     # Reshape output to n classes
107 |     filters = model.last_linear.weight.shape[1]
108 |     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
109 |     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
110 |     model.last_linear.out_features = n
111 |     return model
112 | 
113 | 
114 | def scale_img(img, r=1.0):  # img(16,3,256,416), r=ratio
115 |     # scales a batch of pytorch images while retaining same input shape (cropped or grey-padded)
116 |     h, w = img.shape[2:]
117 |     s = (int(h * r), int(w * r))  # new size
118 |     p = h - s[0], w - s[1]  # pad/crop pixels
119 |     img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
120 |     return F.pad(img, [0, p[1], 0, p[0]], value=0.5) if r < 1.0 else img[:, :, :p[0], :p[1]]  # pad/crop
121 |     # cv2.imwrite('scaled.jpg', np.array(img[0].permute((1, 2, 0)) * 255.0))
122 | 
123 | 
124 | class ModelEMA:
125 |     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
126 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
127 |     This is intended to allow functionality like
128 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
129 |     A smoothed version of the weights is necessary for some training schemes to perform well.
130 |     E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
131 |     RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
132 |     smoothing of weights to match results. Pay attention to the decay constant you are using
133 |     relative to your update count per epoch.
134 |     To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
135 |     disable validation of the EMA weights. Validation will have to be done manually in a separate
136 |     process, or after the training stops converging.
137 |     This class is sensitive where it is initialized in the sequence of model init,
138 |     GPU assignment and distributed training wrappers.
139 |     I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU.
140 |     """
141 | 
142 |     def __init__(self, model, decay=0.9998, device=''):
143 |         # make a copy of the model for accumulating moving average of weights
144 |         self.ema = deepcopy(model)
145 |         self.ema.eval()
146 |         self.decay = decay
147 |         self.device = device  # perform ema on different device from model if set
148 |         if device:
149 |             self.ema.to(device=device)
150 |         for p in self.ema.parameters():
151 |             p.requires_grad_(False)
152 | 
153 |     def update(self, model):
154 |         d = self.decay
155 |         with torch.no_grad():
156 |             if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
157 |                 msd, esd = model.module.state_dict(), self.ema.module.state_dict()
158 |             else:
159 |                 msd, esd = model.state_dict(), self.ema.state_dict()
160 | 
161 |             for k, v in esd.items():
162 |                 if v.dtype.is_floating_point:
163 |                     v *= d
164 |                     v += (1. - d) * msd[k].detach()
165 | 
166 |     def update_attr(self, model):
167 |         # Assign attributes (which may change during training)
168 |         for k in model.__dict__.keys():
169 |             if not k.startswith('_'):
170 |                 setattr(self.ema, k, getattr(model, k))
171 | 


--------------------------------------------------------------------------------
/yolov3/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make '/weights' directory if it does not exist and cd into it
 4 | # mkdir -p weights && cd weights
 5 | 
 6 | # copy darknet weight files, continue '-c' if partially downloaded
 7 | # wget -c https://pjreddie.com/media/files/yolov3.weights
 8 | # wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
 9 | # wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 | 
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 | 
14 | # darknet53 weights (first 75 layers only)
15 | # wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 | 
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 | 
21 | # new method
22 | python3 -c "from models import *;
23 | attempt_download('weights/yolov3.pt');
24 | attempt_download('weights/yolov3-spp.pt')"
25 | 


--------------------------------------------------------------------------------