├── README.md
├── anchors.txt
├── images
    ├── 1.jpg
    ├── 2.jpg
    ├── 3.jpg
    └── 4.jpg
├── scripts
    ├── args_voc.py
    └── parse_voc_xml.py
└── yolov3.cfg


/README.md:
--------------------------------------------------------------------------------
 1 | # SAR_yolov3
 2 | 
 3 | # Welcome to SAR SHIP DETECTION
 4 | 
 5 | We have applied YOLO-V3 Object detection on  **SAR Satellite** Images. We are detecting ship by these Images where SAR sensors are immune to bad weather and night time which a great way of detecting . We applied YOLO-V3 to it and it gives best accuracy:speed ratio in the world among all other models and methods applied. We are further looking to improve the accuracy where the current accuracy is **90.25 %**.
 6 | 
 7 | # Files
 8 | 
 9 | We have included the Config file which is the model architecture for the **darknet** deep learning framework where we changed the models in many ways for our results , from data augmentation to hyper-parameters.
10 | Anchors boxes is also given in this repositories.
11 | Some scripts which converted the **VOC Xml**  into **Darknet** Text Format
12 | 
13 | 
14 | ## Results
15 | 
16 | ![Prediction 1](https://github.com/humblecoder612/SAR_yolov3/blob/master/images/1.jpg)
17 | 
18 | **PREDICTION 1**
19 | 
20 | ![Prediction 2](https://github.com/humblecoder612/SAR_yolov3/blob/master/images/2.jpg)
21 | 
22 | **PREDICTION 1**
23 | 
24 | ## Submission 
25 | We have written a research paper on this project and submitted into a Springer conference.
26 | 


--------------------------------------------------------------------------------
/anchors.txt:
--------------------------------------------------------------------------------
1 |  19, 21,  25, 47,  45, 29,  31, 90,  52, 55,  77, 38,  85, 68,  57,118, 147,123


--------------------------------------------------------------------------------
/images/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/1.jpg


--------------------------------------------------------------------------------
/images/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/2.jpg


--------------------------------------------------------------------------------
/images/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/3.jpg


--------------------------------------------------------------------------------
/images/4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/4.jpg


--------------------------------------------------------------------------------
/scripts/args_voc.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | # This file contains the parameter used in train.py
 3 | 
 4 | from __future__ import division, print_function
 5 | 
 6 | from utils.misc_utils import parse_anchors, read_class_names
 7 | import math
 8 | 
 9 | ### Some paths
10 | train_file = './data/my_data/train.txt'  # The path of the training txt file.
11 | val_file = './data/my_data/val.txt'  # The path of the validation txt file.
12 | restore_path = './data/darknet_weights/yolov3.ckpt'  # The path of the weights to restore.
13 | save_dir = './checkpoint/'  # The directory of the weights to save.
14 | log_dir = './data/logs/'  # The directory to store the tensorboard log files.
15 | progress_log_path = './data/progress.log'  # The path to record the training progress.
16 | anchor_path = './data/yolo_anchors.txt'  # The path of the anchor txt file.
17 | class_name_path = './data/voc.names'  # The path of the class names.
18 | 
19 | ### Training releated numbers
20 | batch_size = 6
21 | img_size = [416, 416]  # Images will be resized to `img_size` and fed to the network, size format: [width, height]
22 | letterbox_resize = False  # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
23 | total_epoches = 100
24 | train_evaluation_step = 100  # Evaluate on the training batch after some steps.
25 | val_evaluation_epoch = 1  # Evaluate on the whole validation dataset after some steps. Set to None to evaluate every epoch.
26 | save_epoch = 10  # Save the model after some epochs.
27 | batch_norm_decay = 0.99  # decay in bn ops
28 | weight_decay = 5e-4  # l2 weight decay
29 | global_step = 0  # used when resuming training
30 | 
31 | ### tf.data parameters
32 | num_threads = 10  # Number of threads for image processing used in tf.data pipeline.
33 | prefetech_buffer = 5  # Prefetech_buffer used in tf.data pipeline.
34 | 
35 | ### Learning rate and optimizer
36 | optimizer_name = 'momentum'  # Chosen from [sgd, momentum, adam, rmsprop]
37 | save_optimizer = False  # Whether to save the optimizer parameters into the checkpoint file.
38 | learning_rate_init = 1e-4
39 | lr_type = 'piecewise'  # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise]
40 | lr_decay_epoch = 5  # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type.
41 | lr_decay_factor = 0.96  # The learning rate decay factor. Used when chosen `exponential` lr_type.
42 | lr_lower_bound = 1e-6  # The minimum learning rate.
43 | # piecewise params
44 | pw_boundaries = [25, 40]  # epoch based boundaries
45 | pw_values = [learning_rate_init, 3e-5, 1e-4]
46 | 
47 | ### Load and finetune
48 | # Choose the parts you want to restore the weights. List form.
49 | # restore_include: None, restore_exclude: None  => restore the whole model
50 | # restore_include: None, restore_exclude: scope  => restore the whole model except `scope`
51 | # restore_include: scope1, restore_exclude: scope2  => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2)
52 | # choise 1: only restore the darknet body
53 | # restore_include = ['yolov3/darknet53_body']
54 | # restore_exclude = None
55 | # choise 2: restore all layers except the last 3 conv2d layers in 3 scale
56 | restore_include = None
57 | restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22']
58 | # Choose the parts you want to finetune. List form.
59 | # Set to None to train the whole model.
60 | update_part = None
61 | 
62 | ### other training strategies
63 | multi_scale_train = True  # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default.
64 | use_label_smooth = True # Whether to use class label smoothing strategy.
65 | use_focal_loss = True  # Whether to apply focal loss on the conf loss.
66 | use_mix_up = True  # Whether to use mix up data augmentation strategy. 
67 | use_warm_up = True  # whether to use warm up strategy to prevent from gradient exploding.
68 | warm_up_epoch = 3  # Warm up training epoches. Set to a larger value if gradient explodes.
69 | 
70 | ### some constants in validation
71 | # nms
72 | nms_threshold = 0.45  # iou threshold in nms operation
73 | score_threshold = 0.01 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall.
74 | nms_topk = 150  # keep at most nms_topk outputs after nms
75 | # mAP eval
76 | eval_threshold = 0.5  # the iou threshold applied in mAP evaluation
77 | use_voc_07_metric = False  # whether to use voc 2007 evaluation metric, i.e. the 11-point metric
78 | 
79 | ### parse some params
80 | anchors = parse_anchors(anchor_path)
81 | classes = read_class_names(class_name_path)
82 | class_num = len(classes)
83 | train_img_cnt = len(open(train_file, 'r').readlines())
84 | val_img_cnt = len(open(val_file, 'r').readlines())
85 | train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size))
86 | 
87 | lr_decay_freq = int(train_batch_num * lr_decay_epoch)
88 | pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries]


--------------------------------------------------------------------------------
/scripts/parse_voc_xml.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | import xml.etree.ElementTree as ET
 4 | import os
 5 | 
 6 | names_dict = {}
 7 | cnt = 0
 8 | f = open('./voc_names.txt', 'r').readlines()
 9 | for line in f:
10 |     line = line.strip()
11 |     names_dict[line] = cnt
12 |     cnt += 1
13 | 
14 | voc_07 = '../data/my_data/'
15 | #voc_12 = '/data/my_data/'
16 | 
17 | anno_path = [os.path.join(voc_07, 'Annot')]
18 | img_path = [os.path.join(voc_07, 'Images')]
19 | 
20 | trainval_path = [os.path.join(voc_07, 'train.txt')]
21 | test_path = [os.path.join(voc_07, 'test.txt')]
22 | 
23 | 
24 | def parse_xml(path):
25 |     tree = ET.parse(path)
26 |     img_name = path.split('/')[-1][:-4]
27 |     
28 |     height = tree.findtext("./size/height")
29 |     width = tree.findtext("./size/width")
30 | 
31 |     objects = [img_name, width, height]
32 | 
33 |     for obj in tree.findall('object'):
34 |         #difficult = obj.find('difficult').text
35 |         #if difficult == '1':
36 |         #    continue
37 |         name = obj.find('name').text
38 |         bbox = obj.find('bndbox')
39 |         xmin = bbox.find('xmin').text
40 |         ymin = bbox.find('ymin').text
41 |         xmax = bbox.find('xmax').text
42 |         ymax = bbox.find('ymax').text
43 | 
44 |         name = str(names_dict[name])
45 |         objects.extend([name, xmin, ymin, xmax, ymax])
46 |     if len(objects) > 1:
47 |         return objects
48 |     else:
49 |         return None
50 | 
51 | test_cnt = 0
52 | def gen_test_txt(txt_path):
53 |     global test_cnt
54 |     f = open(txt_path, 'w')
55 | 
56 |     for i, path in enumerate(test_path):
57 |         img_names = open(path, 'r').readlines()
58 |         for img_name in img_names:
59 |             img_name = img_name.strip()
60 |             xml_path = anno_path[i] + '/' + img_name + '.xml'
61 |             objects = parse_xml(xml_path)
62 |             if objects:
63 |                 objects[0] = img_path[i] + '/' + img_name + '.jpg'
64 |                 if os.path.exists(objects[0]):
65 |                     objects.insert(0, str(test_cnt))
66 |                     test_cnt =0
67 |                     objects = ' '.join(objects) + '\n'
68 |                     f.write(objects)
69 |     f.close()
70 | 
71 | 
72 | train_cnt = 0
73 | def gen_train_txt(txt_path):
74 |     global train_cnt
75 |     f = open(txt_path, 'w')
76 | 
77 |     for i, path in enumerate(trainval_path):
78 |         img_names = open(path, 'r').readlines()
79 |         for img_name in img_names:
80 |             img_name = img_name.strip()
81 |             xml_path = anno_path[i] + '/' + img_name + '.xml'
82 |             objects = parse_xml(xml_path)
83 |             if objects:
84 |                 objects[0] = img_path[i] + '/' + img_name + '.jpg'
85 |                 if os.path.exists(objects[0]):
86 |                     objects.insert(0, str(train_cnt))
87 |                     train_cnt =0
88 |                     objects = ' '.join(objects) + '\n'
89 |                     f.write(objects)
90 |     f.close()
91 | 
92 | 
93 | gen_train_txt('train.txt')
94 | gen_test_txt('val.txt')
95 | 
96 | 


--------------------------------------------------------------------------------
/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 |  #batch=1
  4 | # subdivisions=1
  5 | # Training
  6 |  batch=32
  7 | subdivisions=16
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | mixup=1
 18 | flip=1
 19 | 
 20 | learning_rate=0.001
 21 | burn_in=1000
 22 | max_batches = 16000
 23 | policy=steps
 24 | steps=12800,14400
 25 | scales=.1,.1
 26 | 
 27 | 
 28 | 
 29 | [convolutional]
 30 | batch_normalize=1
 31 | filters=32
 32 | size=3
 33 | stride=1
 34 | pad=1
 35 | activation=leaky
 36 | 
 37 | # Downsample
 38 | 
 39 | [convolutional]
 40 | batch_normalize=1
 41 | filters=64
 42 | size=3
 43 | stride=2
 44 | pad=1
 45 | activation=leaky
 46 | 
 47 | [convolutional]
 48 | batch_normalize=1
 49 | filters=32
 50 | size=1
 51 | stride=1
 52 | pad=1
 53 | activation=leaky
 54 | 
 55 | [convolutional]
 56 | batch_normalize=1
 57 | filters=64
 58 | size=3
 59 | stride=1
 60 | pad=1
 61 | activation=leaky
 62 | 
 63 | [shortcut]
 64 | from=-3
 65 | activation=linear
 66 | 
 67 | # Downsample
 68 | 
 69 | [convolutional]
 70 | batch_normalize=1
 71 | filters=128
 72 | size=3
 73 | stride=2
 74 | pad=1
 75 | activation=leaky
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=64
 80 | size=1
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [shortcut]
 94 | from=-3
 95 | activation=linear
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=64
100 | size=1
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=128
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [shortcut]
114 | from=-3
115 | activation=linear
116 | 
117 | # Downsample
118 | 
119 | [convolutional]
120 | batch_normalize=1
121 | filters=256
122 | size=3
123 | stride=2
124 | pad=1
125 | activation=leaky
126 | 
127 | [convolutional]
128 | batch_normalize=1
129 | filters=128
130 | size=1
131 | stride=1
132 | pad=1
133 | activation=leaky
134 | 
135 | [convolutional]
136 | batch_normalize=1
137 | filters=256
138 | size=3
139 | stride=1
140 | pad=1
141 | activation=leaky
142 | 
143 | [shortcut]
144 | from=-3
145 | activation=linear
146 | 
147 | [convolutional]
148 | batch_normalize=1
149 | filters=128
150 | size=1
151 | stride=1
152 | pad=1
153 | activation=leaky
154 | 
155 | [convolutional]
156 | batch_normalize=1
157 | filters=256
158 | size=3
159 | stride=1
160 | pad=1
161 | activation=leaky
162 | 
163 | [shortcut]
164 | from=-3
165 | activation=linear
166 | 
167 | [convolutional]
168 | batch_normalize=1
169 | filters=128
170 | size=1
171 | stride=1
172 | pad=1
173 | activation=leaky
174 | 
175 | [convolutional]
176 | batch_normalize=1
177 | filters=256
178 | size=3
179 | stride=1
180 | pad=1
181 | activation=leaky
182 | 
183 | [shortcut]
184 | from=-3
185 | activation=linear
186 | 
187 | [convolutional]
188 | batch_normalize=1
189 | filters=128
190 | size=1
191 | stride=1
192 | pad=1
193 | activation=leaky
194 | 
195 | [convolutional]
196 | batch_normalize=1
197 | filters=256
198 | size=3
199 | stride=1
200 | pad=1
201 | activation=leaky
202 | 
203 | [shortcut]
204 | from=-3
205 | activation=linear
206 | 
207 | 
208 | [convolutional]
209 | batch_normalize=1
210 | filters=128
211 | size=1
212 | stride=1
213 | pad=1
214 | activation=leaky
215 | 
216 | [convolutional]
217 | batch_normalize=1
218 | filters=256
219 | size=3
220 | stride=1
221 | pad=1
222 | activation=leaky
223 | 
224 | [shortcut]
225 | from=-3
226 | activation=linear
227 | 
228 | [convolutional]
229 | batch_normalize=1
230 | filters=128
231 | size=1
232 | stride=1
233 | pad=1
234 | activation=leaky
235 | 
236 | [convolutional]
237 | batch_normalize=1
238 | filters=256
239 | size=3
240 | stride=1
241 | pad=1
242 | activation=leaky
243 | 
244 | [shortcut]
245 | from=-3
246 | activation=linear
247 | 
248 | [convolutional]
249 | batch_normalize=1
250 | filters=128
251 | size=1
252 | stride=1
253 | pad=1
254 | activation=leaky
255 | 
256 | [convolutional]
257 | batch_normalize=1
258 | filters=256
259 | size=3
260 | stride=1
261 | pad=1
262 | activation=leaky
263 | 
264 | [shortcut]
265 | from=-3
266 | activation=linear
267 | 
268 | [convolutional]
269 | batch_normalize=1
270 | filters=128
271 | size=1
272 | stride=1
273 | pad=1
274 | activation=leaky
275 | 
276 | [convolutional]
277 | batch_normalize=1
278 | filters=256
279 | size=3
280 | stride=1
281 | pad=1
282 | activation=leaky
283 | 
284 | [shortcut]
285 | from=-3
286 | activation=linear
287 | 
288 | # Downsample
289 | 
290 | [convolutional]
291 | batch_normalize=1
292 | filters=512
293 | size=3
294 | stride=2
295 | pad=1
296 | activation=leaky
297 | 
298 | [convolutional]
299 | batch_normalize=1
300 | filters=256
301 | size=1
302 | stride=1
303 | pad=1
304 | activation=leaky
305 | 
306 | [convolutional]
307 | batch_normalize=1
308 | filters=512
309 | size=3
310 | stride=1
311 | pad=1
312 | activation=leaky
313 | 
314 | [shortcut]
315 | from=-3
316 | activation=linear
317 | 
318 | 
319 | [convolutional]
320 | batch_normalize=1
321 | filters=256
322 | size=1
323 | stride=1
324 | pad=1
325 | activation=leaky
326 | 
327 | [convolutional]
328 | batch_normalize=1
329 | filters=512
330 | size=3
331 | stride=1
332 | pad=1
333 | activation=leaky
334 | 
335 | [shortcut]
336 | from=-3
337 | activation=linear
338 | 
339 | 
340 | [convolutional]
341 | batch_normalize=1
342 | filters=256
343 | size=1
344 | stride=1
345 | pad=1
346 | activation=leaky
347 | 
348 | [convolutional]
349 | batch_normalize=1
350 | filters=512
351 | size=3
352 | stride=1
353 | pad=1
354 | activation=leaky
355 | 
356 | [shortcut]
357 | from=-3
358 | activation=linear
359 | 
360 | 
361 | [convolutional]
362 | batch_normalize=1
363 | filters=256
364 | size=1
365 | stride=1
366 | pad=1
367 | activation=leaky
368 | 
369 | [convolutional]
370 | batch_normalize=1
371 | filters=512
372 | size=3
373 | stride=1
374 | pad=1
375 | activation=leaky
376 | 
377 | [shortcut]
378 | from=-3
379 | activation=linear
380 | 
381 | [convolutional]
382 | batch_normalize=1
383 | filters=256
384 | size=1
385 | stride=1
386 | pad=1
387 | activation=leaky
388 | 
389 | [convolutional]
390 | batch_normalize=1
391 | filters=512
392 | size=3
393 | stride=1
394 | pad=1
395 | activation=leaky
396 | 
397 | [shortcut]
398 | from=-3
399 | activation=linear
400 | 
401 | 
402 | [convolutional]
403 | batch_normalize=1
404 | filters=256
405 | size=1
406 | stride=1
407 | pad=1
408 | activation=leaky
409 | 
410 | [convolutional]
411 | batch_normalize=1
412 | filters=512
413 | size=3
414 | stride=1
415 | pad=1
416 | activation=leaky
417 | 
418 | [shortcut]
419 | from=-3
420 | activation=linear
421 | 
422 | 
423 | [convolutional]
424 | batch_normalize=1
425 | filters=256
426 | size=1
427 | stride=1
428 | pad=1
429 | activation=leaky
430 | 
431 | [convolutional]
432 | batch_normalize=1
433 | filters=512
434 | size=3
435 | stride=1
436 | pad=1
437 | activation=leaky
438 | 
439 | [shortcut]
440 | from=-3
441 | activation=linear
442 | 
443 | [convolutional]
444 | batch_normalize=1
445 | filters=256
446 | size=1
447 | stride=1
448 | pad=1
449 | activation=leaky
450 | 
451 | [convolutional]
452 | batch_normalize=1
453 | filters=512
454 | size=3
455 | stride=1
456 | pad=1
457 | activation=leaky
458 | 
459 | [shortcut]
460 | from=-3
461 | activation=linear
462 | 
463 | # Downsample
464 | 
465 | [convolutional]
466 | batch_normalize=1
467 | filters=1024
468 | size=3
469 | stride=2
470 | pad=1
471 | activation=leaky
472 | 
473 | [convolutional]
474 | batch_normalize=1
475 | filters=512
476 | size=1
477 | stride=1
478 | pad=1
479 | activation=leaky
480 | 
481 | [convolutional]
482 | batch_normalize=1
483 | filters=1024
484 | size=3
485 | stride=1
486 | pad=1
487 | activation=leaky
488 | 
489 | [shortcut]
490 | from=-3
491 | activation=linear
492 | 
493 | [convolutional]
494 | batch_normalize=1
495 | filters=512
496 | size=1
497 | stride=1
498 | pad=1
499 | activation=leaky
500 | 
501 | [convolutional]
502 | batch_normalize=1
503 | filters=1024
504 | size=3
505 | stride=1
506 | pad=1
507 | activation=leaky
508 | 
509 | [shortcut]
510 | from=-3
511 | activation=linear
512 | 
513 | [convolutional]
514 | batch_normalize=1
515 | filters=512
516 | size=1
517 | stride=1
518 | pad=1
519 | activation=leaky
520 | 
521 | [convolutional]
522 | batch_normalize=1
523 | filters=1024
524 | size=3
525 | stride=1
526 | pad=1
527 | activation=leaky
528 | 
529 | [shortcut]
530 | from=-3
531 | activation=linear
532 | 
533 | [convolutional]
534 | batch_normalize=1
535 | filters=512
536 | size=1
537 | stride=1
538 | pad=1
539 | activation=leaky
540 | 
541 | [convolutional]
542 | batch_normalize=1
543 | filters=1024
544 | size=3
545 | stride=1
546 | pad=1
547 | activation=leaky
548 | 
549 | [shortcut]
550 | from=-3
551 | activation=linear
552 | 
553 | ######################
554 | 
555 | [convolutional]
556 | batch_normalize=1
557 | filters=512
558 | size=1
559 | stride=1
560 | pad=1
561 | activation=leaky
562 | 
563 | [convolutional]
564 | batch_normalize=1
565 | size=3
566 | stride=1
567 | pad=1
568 | filters=1024
569 | activation=leaky
570 | 
571 | [convolutional]
572 | batch_normalize=1
573 | filters=512
574 | size=1
575 | stride=1
576 | pad=1
577 | activation=leaky
578 | 
579 | [convolutional]
580 | batch_normalize=1
581 | size=3
582 | stride=1
583 | pad=1
584 | filters=1024
585 | activation=leaky
586 | 
587 | [convolutional]
588 | batch_normalize=1
589 | filters=512
590 | size=1
591 | stride=1
592 | pad=1
593 | activation=leaky
594 | 
595 | [convolutional]
596 | batch_normalize=1
597 | size=3
598 | stride=1
599 | pad=1
600 | filters=1024
601 | activation=leaky
602 | 
603 | [convolutional]
604 | size=1
605 | stride=1
606 | pad=1
607 | filters=18
608 | activation=linear
609 | 
610 | [yolo]
611 | mask = 6,7,8
612 | anchors = 19, 21,  25, 47,  45, 29,  31, 90,  52, 55,  77, 38,  85, 68,  57,118, 147,123
613 | classes=1
614 | num=9
615 | jitter=.3
616 | ignore_thresh = .5
617 | truth_thresh = 1
618 | random=1
619 | 
620 | [route]
621 | layers = -4
622 | 
623 | [convolutional]
624 | batch_normalize=1
625 | filters=256
626 | size=1
627 | stride=1
628 | pad=1
629 | activation=leaky
630 | 
631 | [upsample]
632 | stride=2
633 | 
634 | [route]
635 | layers = -1, 61
636 | 
637 | 
638 | 
639 | [convolutional]
640 | batch_normalize=1
641 | filters=256
642 | size=1
643 | stride=1
644 | pad=1
645 | activation=leaky
646 | 
647 | [convolutional]
648 | batch_normalize=1
649 | size=3
650 | stride=1
651 | pad=1
652 | filters=512
653 | activation=leaky
654 | 
655 | [convolutional]
656 | batch_normalize=1
657 | filters=256
658 | size=1
659 | stride=1
660 | pad=1
661 | activation=leaky
662 | 
663 | [convolutional]
664 | batch_normalize=1
665 | size=3
666 | stride=1
667 | pad=1
668 | filters=512
669 | activation=leaky
670 | 
671 | [convolutional]
672 | batch_normalize=1
673 | filters=256
674 | size=1
675 | stride=1
676 | pad=1
677 | activation=leaky
678 | 
679 | [convolutional]
680 | batch_normalize=1
681 | size=3
682 | stride=1
683 | pad=1
684 | filters=512
685 | activation=leaky
686 | 
687 | [convolutional]
688 | size=1
689 | stride=1
690 | pad=1
691 | filters=18
692 | activation=linear
693 | 
694 | [yolo]
695 | mask = 3,4,5
696 | anchors = 19, 21,  25, 47,  45, 29,  31, 90,  52, 55,  77, 38,  85, 68,  57,118, 147,123
697 | classes=1
698 | num=9
699 | jitter=.3
700 | ignore_thresh = .5
701 | truth_thresh = 1
702 | random=1
703 | 
704 | [route]
705 | layers = -4
706 | 
707 | [convolutional]
708 | batch_normalize=1
709 | filters=128
710 | size=1
711 | stride=1
712 | pad=1
713 | activation=leaky
714 | 
715 | [upsample]
716 | stride=2
717 | 
718 | [route]
719 | layers = -1, 36
720 | 
721 | 
722 | 
723 | [convolutional]
724 | batch_normalize=1
725 | filters=128
726 | size=1
727 | stride=1
728 | pad=1
729 | activation=leaky
730 | 
731 | [convolutional]
732 | batch_normalize=1
733 | size=3
734 | stride=1
735 | pad=1
736 | filters=256
737 | activation=leaky
738 | 
739 | [convolutional]
740 | batch_normalize=1
741 | filters=128
742 | size=1
743 | stride=1
744 | pad=1
745 | activation=leaky
746 | 
747 | [convolutional]
748 | batch_normalize=1
749 | size=3
750 | stride=1
751 | pad=1
752 | filters=256
753 | activation=leaky
754 | 
755 | [convolutional]
756 | batch_normalize=1
757 | filters=128
758 | size=1
759 | stride=1
760 | pad=1
761 | activation=leaky
762 | 
763 | [convolutional]
764 | batch_normalize=1
765 | size=3
766 | stride=1
767 | pad=1
768 | filters=256
769 | activation=leaky
770 | 
771 | [convolutional]
772 | size=1
773 | stride=1
774 | pad=1
775 | filters=18
776 | activation=linear
777 | 
778 | [yolo]
779 | mask = 0,1,2
780 | anchors = 19, 21,  25, 47,  45, 29,  31, 90,  52, 55,  77, 38,  85, 68,  57,118, 147,123
781 | classes=1
782 | num=9
783 | jitter=.3
784 | ignore_thresh = .5
785 | truth_thresh = 1
786 | random=1
787 | 
788 | 


--------------------------------------------------------------------------------