├── README.md ├── anchors.txt ├── images ├── 1.jpg ├── 2.jpg ├── 3.jpg └── 4.jpg ├── scripts ├── args_voc.py └── parse_voc_xml.py └── yolov3.cfg /README.md: -------------------------------------------------------------------------------- 1 | # SAR_yolov3 2 | 3 | # Welcome to SAR SHIP DETECTION 4 | 5 | We have applied YOLO-V3 Object detection on **SAR Satellite** Images. We are detecting ship by these Images where SAR sensors are immune to bad weather and night time which a great way of detecting . We applied YOLO-V3 to it and it gives best accuracy:speed ratio in the world among all other models and methods applied. We are further looking to improve the accuracy where the current accuracy is **90.25 %**. 6 | 7 | # Files 8 | 9 | We have included the Config file which is the model architecture for the **darknet** deep learning framework where we changed the models in many ways for our results , from data augmentation to hyper-parameters. 10 | Anchors boxes is also given in this repositories. 11 | Some scripts which converted the **VOC Xml** into **Darknet** Text Format 12 | 13 | 14 | ## Results 15 | 16 | ![Prediction 1](https://github.com/humblecoder612/SAR_yolov3/blob/master/images/1.jpg) 17 | 18 | **PREDICTION 1** 19 | 20 | ![Prediction 2](https://github.com/humblecoder612/SAR_yolov3/blob/master/images/2.jpg) 21 | 22 | **PREDICTION 1** 23 | 24 | ## Submission 25 | We have written a research paper on this project and submitted into a Springer conference. 26 | -------------------------------------------------------------------------------- /anchors.txt: -------------------------------------------------------------------------------- 1 | 19, 21, 25, 47, 45, 29, 31, 90, 52, 55, 77, 38, 85, 68, 57,118, 147,123 -------------------------------------------------------------------------------- /images/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/1.jpg -------------------------------------------------------------------------------- /images/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/2.jpg -------------------------------------------------------------------------------- /images/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/3.jpg -------------------------------------------------------------------------------- /images/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/humblecoder612/SAR_yolov3/ae0fbae9c8721e4fef2253b8bfe3804010a07d5d/images/4.jpg -------------------------------------------------------------------------------- /scripts/args_voc.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # This file contains the parameter used in train.py 3 | 4 | from __future__ import division, print_function 5 | 6 | from utils.misc_utils import parse_anchors, read_class_names 7 | import math 8 | 9 | ### Some paths 10 | train_file = './data/my_data/train.txt' # The path of the training txt file. 11 | val_file = './data/my_data/val.txt' # The path of the validation txt file. 12 | restore_path = './data/darknet_weights/yolov3.ckpt' # The path of the weights to restore. 13 | save_dir = './checkpoint/' # The directory of the weights to save. 14 | log_dir = './data/logs/' # The directory to store the tensorboard log files. 15 | progress_log_path = './data/progress.log' # The path to record the training progress. 16 | anchor_path = './data/yolo_anchors.txt' # The path of the anchor txt file. 17 | class_name_path = './data/voc.names' # The path of the class names. 18 | 19 | ### Training releated numbers 20 | batch_size = 6 21 | img_size = [416, 416] # Images will be resized to `img_size` and fed to the network, size format: [width, height] 22 | letterbox_resize = False # Whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image. 23 | total_epoches = 100 24 | train_evaluation_step = 100 # Evaluate on the training batch after some steps. 25 | val_evaluation_epoch = 1 # Evaluate on the whole validation dataset after some steps. Set to None to evaluate every epoch. 26 | save_epoch = 10 # Save the model after some epochs. 27 | batch_norm_decay = 0.99 # decay in bn ops 28 | weight_decay = 5e-4 # l2 weight decay 29 | global_step = 0 # used when resuming training 30 | 31 | ### tf.data parameters 32 | num_threads = 10 # Number of threads for image processing used in tf.data pipeline. 33 | prefetech_buffer = 5 # Prefetech_buffer used in tf.data pipeline. 34 | 35 | ### Learning rate and optimizer 36 | optimizer_name = 'momentum' # Chosen from [sgd, momentum, adam, rmsprop] 37 | save_optimizer = False # Whether to save the optimizer parameters into the checkpoint file. 38 | learning_rate_init = 1e-4 39 | lr_type = 'piecewise' # Chosen from [fixed, exponential, cosine_decay, cosine_decay_restart, piecewise] 40 | lr_decay_epoch = 5 # Epochs after which learning rate decays. Int or float. Used when chosen `exponential` and `cosine_decay_restart` lr_type. 41 | lr_decay_factor = 0.96 # The learning rate decay factor. Used when chosen `exponential` lr_type. 42 | lr_lower_bound = 1e-6 # The minimum learning rate. 43 | # piecewise params 44 | pw_boundaries = [25, 40] # epoch based boundaries 45 | pw_values = [learning_rate_init, 3e-5, 1e-4] 46 | 47 | ### Load and finetune 48 | # Choose the parts you want to restore the weights. List form. 49 | # restore_include: None, restore_exclude: None => restore the whole model 50 | # restore_include: None, restore_exclude: scope => restore the whole model except `scope` 51 | # restore_include: scope1, restore_exclude: scope2 => if scope1 contains scope2, restore scope1 and not restore scope2 (scope1 - scope2) 52 | # choise 1: only restore the darknet body 53 | # restore_include = ['yolov3/darknet53_body'] 54 | # restore_exclude = None 55 | # choise 2: restore all layers except the last 3 conv2d layers in 3 scale 56 | restore_include = None 57 | restore_exclude = ['yolov3/yolov3_head/Conv_14', 'yolov3/yolov3_head/Conv_6', 'yolov3/yolov3_head/Conv_22'] 58 | # Choose the parts you want to finetune. List form. 59 | # Set to None to train the whole model. 60 | update_part = None 61 | 62 | ### other training strategies 63 | multi_scale_train = True # Whether to apply multi-scale training strategy. Image size varies from [320, 320] to [640, 640] by default. 64 | use_label_smooth = True # Whether to use class label smoothing strategy. 65 | use_focal_loss = True # Whether to apply focal loss on the conf loss. 66 | use_mix_up = True # Whether to use mix up data augmentation strategy. 67 | use_warm_up = True # whether to use warm up strategy to prevent from gradient exploding. 68 | warm_up_epoch = 3 # Warm up training epoches. Set to a larger value if gradient explodes. 69 | 70 | ### some constants in validation 71 | # nms 72 | nms_threshold = 0.45 # iou threshold in nms operation 73 | score_threshold = 0.01 # threshold of the probability of the classes in nms operation, i.e. score = pred_confs * pred_probs. set lower for higher recall. 74 | nms_topk = 150 # keep at most nms_topk outputs after nms 75 | # mAP eval 76 | eval_threshold = 0.5 # the iou threshold applied in mAP evaluation 77 | use_voc_07_metric = False # whether to use voc 2007 evaluation metric, i.e. the 11-point metric 78 | 79 | ### parse some params 80 | anchors = parse_anchors(anchor_path) 81 | classes = read_class_names(class_name_path) 82 | class_num = len(classes) 83 | train_img_cnt = len(open(train_file, 'r').readlines()) 84 | val_img_cnt = len(open(val_file, 'r').readlines()) 85 | train_batch_num = int(math.ceil(float(train_img_cnt) / batch_size)) 86 | 87 | lr_decay_freq = int(train_batch_num * lr_decay_epoch) 88 | pw_boundaries = [float(i) * train_batch_num + global_step for i in pw_boundaries] -------------------------------------------------------------------------------- /scripts/parse_voc_xml.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import xml.etree.ElementTree as ET 4 | import os 5 | 6 | names_dict = {} 7 | cnt = 0 8 | f = open('./voc_names.txt', 'r').readlines() 9 | for line in f: 10 | line = line.strip() 11 | names_dict[line] = cnt 12 | cnt += 1 13 | 14 | voc_07 = '../data/my_data/' 15 | #voc_12 = '/data/my_data/' 16 | 17 | anno_path = [os.path.join(voc_07, 'Annot')] 18 | img_path = [os.path.join(voc_07, 'Images')] 19 | 20 | trainval_path = [os.path.join(voc_07, 'train.txt')] 21 | test_path = [os.path.join(voc_07, 'test.txt')] 22 | 23 | 24 | def parse_xml(path): 25 | tree = ET.parse(path) 26 | img_name = path.split('/')[-1][:-4] 27 | 28 | height = tree.findtext("./size/height") 29 | width = tree.findtext("./size/width") 30 | 31 | objects = [img_name, width, height] 32 | 33 | for obj in tree.findall('object'): 34 | #difficult = obj.find('difficult').text 35 | #if difficult == '1': 36 | # continue 37 | name = obj.find('name').text 38 | bbox = obj.find('bndbox') 39 | xmin = bbox.find('xmin').text 40 | ymin = bbox.find('ymin').text 41 | xmax = bbox.find('xmax').text 42 | ymax = bbox.find('ymax').text 43 | 44 | name = str(names_dict[name]) 45 | objects.extend([name, xmin, ymin, xmax, ymax]) 46 | if len(objects) > 1: 47 | return objects 48 | else: 49 | return None 50 | 51 | test_cnt = 0 52 | def gen_test_txt(txt_path): 53 | global test_cnt 54 | f = open(txt_path, 'w') 55 | 56 | for i, path in enumerate(test_path): 57 | img_names = open(path, 'r').readlines() 58 | for img_name in img_names: 59 | img_name = img_name.strip() 60 | xml_path = anno_path[i] + '/' + img_name + '.xml' 61 | objects = parse_xml(xml_path) 62 | if objects: 63 | objects[0] = img_path[i] + '/' + img_name + '.jpg' 64 | if os.path.exists(objects[0]): 65 | objects.insert(0, str(test_cnt)) 66 | test_cnt =0 67 | objects = ' '.join(objects) + '\n' 68 | f.write(objects) 69 | f.close() 70 | 71 | 72 | train_cnt = 0 73 | def gen_train_txt(txt_path): 74 | global train_cnt 75 | f = open(txt_path, 'w') 76 | 77 | for i, path in enumerate(trainval_path): 78 | img_names = open(path, 'r').readlines() 79 | for img_name in img_names: 80 | img_name = img_name.strip() 81 | xml_path = anno_path[i] + '/' + img_name + '.xml' 82 | objects = parse_xml(xml_path) 83 | if objects: 84 | objects[0] = img_path[i] + '/' + img_name + '.jpg' 85 | if os.path.exists(objects[0]): 86 | objects.insert(0, str(train_cnt)) 87 | train_cnt =0 88 | objects = ' '.join(objects) + '\n' 89 | f.write(objects) 90 | f.close() 91 | 92 | 93 | gen_train_txt('train.txt') 94 | gen_test_txt('val.txt') 95 | 96 | -------------------------------------------------------------------------------- /yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=32 7 | subdivisions=16 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | mixup=1 18 | flip=1 19 | 20 | learning_rate=0.001 21 | burn_in=1000 22 | max_batches = 16000 23 | policy=steps 24 | steps=12800,14400 25 | scales=.1,.1 26 | 27 | 28 | 29 | [convolutional] 30 | batch_normalize=1 31 | filters=32 32 | size=3 33 | stride=1 34 | pad=1 35 | activation=leaky 36 | 37 | # Downsample 38 | 39 | [convolutional] 40 | batch_normalize=1 41 | filters=64 42 | size=3 43 | stride=2 44 | pad=1 45 | activation=leaky 46 | 47 | [convolutional] 48 | batch_normalize=1 49 | filters=32 50 | size=1 51 | stride=1 52 | pad=1 53 | activation=leaky 54 | 55 | [convolutional] 56 | batch_normalize=1 57 | filters=64 58 | size=3 59 | stride=1 60 | pad=1 61 | activation=leaky 62 | 63 | [shortcut] 64 | from=-3 65 | activation=linear 66 | 67 | # Downsample 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=128 72 | size=3 73 | stride=2 74 | pad=1 75 | activation=leaky 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=64 80 | size=1 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [shortcut] 94 | from=-3 95 | activation=linear 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=64 100 | size=1 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=128 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [shortcut] 114 | from=-3 115 | activation=linear 116 | 117 | # Downsample 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=256 122 | size=3 123 | stride=2 124 | pad=1 125 | activation=leaky 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=128 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=leaky 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=256 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [shortcut] 144 | from=-3 145 | activation=linear 146 | 147 | [convolutional] 148 | batch_normalize=1 149 | filters=128 150 | size=1 151 | stride=1 152 | pad=1 153 | activation=leaky 154 | 155 | [convolutional] 156 | batch_normalize=1 157 | filters=256 158 | size=3 159 | stride=1 160 | pad=1 161 | activation=leaky 162 | 163 | [shortcut] 164 | from=-3 165 | activation=linear 166 | 167 | [convolutional] 168 | batch_normalize=1 169 | filters=128 170 | size=1 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [convolutional] 176 | batch_normalize=1 177 | filters=256 178 | size=3 179 | stride=1 180 | pad=1 181 | activation=leaky 182 | 183 | [shortcut] 184 | from=-3 185 | activation=linear 186 | 187 | [convolutional] 188 | batch_normalize=1 189 | filters=128 190 | size=1 191 | stride=1 192 | pad=1 193 | activation=leaky 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | filters=256 198 | size=3 199 | stride=1 200 | pad=1 201 | activation=leaky 202 | 203 | [shortcut] 204 | from=-3 205 | activation=linear 206 | 207 | 208 | [convolutional] 209 | batch_normalize=1 210 | filters=128 211 | size=1 212 | stride=1 213 | pad=1 214 | activation=leaky 215 | 216 | [convolutional] 217 | batch_normalize=1 218 | filters=256 219 | size=3 220 | stride=1 221 | pad=1 222 | activation=leaky 223 | 224 | [shortcut] 225 | from=-3 226 | activation=linear 227 | 228 | [convolutional] 229 | batch_normalize=1 230 | filters=128 231 | size=1 232 | stride=1 233 | pad=1 234 | activation=leaky 235 | 236 | [convolutional] 237 | batch_normalize=1 238 | filters=256 239 | size=3 240 | stride=1 241 | pad=1 242 | activation=leaky 243 | 244 | [shortcut] 245 | from=-3 246 | activation=linear 247 | 248 | [convolutional] 249 | batch_normalize=1 250 | filters=128 251 | size=1 252 | stride=1 253 | pad=1 254 | activation=leaky 255 | 256 | [convolutional] 257 | batch_normalize=1 258 | filters=256 259 | size=3 260 | stride=1 261 | pad=1 262 | activation=leaky 263 | 264 | [shortcut] 265 | from=-3 266 | activation=linear 267 | 268 | [convolutional] 269 | batch_normalize=1 270 | filters=128 271 | size=1 272 | stride=1 273 | pad=1 274 | activation=leaky 275 | 276 | [convolutional] 277 | batch_normalize=1 278 | filters=256 279 | size=3 280 | stride=1 281 | pad=1 282 | activation=leaky 283 | 284 | [shortcut] 285 | from=-3 286 | activation=linear 287 | 288 | # Downsample 289 | 290 | [convolutional] 291 | batch_normalize=1 292 | filters=512 293 | size=3 294 | stride=2 295 | pad=1 296 | activation=leaky 297 | 298 | [convolutional] 299 | batch_normalize=1 300 | filters=256 301 | size=1 302 | stride=1 303 | pad=1 304 | activation=leaky 305 | 306 | [convolutional] 307 | batch_normalize=1 308 | filters=512 309 | size=3 310 | stride=1 311 | pad=1 312 | activation=leaky 313 | 314 | [shortcut] 315 | from=-3 316 | activation=linear 317 | 318 | 319 | [convolutional] 320 | batch_normalize=1 321 | filters=256 322 | size=1 323 | stride=1 324 | pad=1 325 | activation=leaky 326 | 327 | [convolutional] 328 | batch_normalize=1 329 | filters=512 330 | size=3 331 | stride=1 332 | pad=1 333 | activation=leaky 334 | 335 | [shortcut] 336 | from=-3 337 | activation=linear 338 | 339 | 340 | [convolutional] 341 | batch_normalize=1 342 | filters=256 343 | size=1 344 | stride=1 345 | pad=1 346 | activation=leaky 347 | 348 | [convolutional] 349 | batch_normalize=1 350 | filters=512 351 | size=3 352 | stride=1 353 | pad=1 354 | activation=leaky 355 | 356 | [shortcut] 357 | from=-3 358 | activation=linear 359 | 360 | 361 | [convolutional] 362 | batch_normalize=1 363 | filters=256 364 | size=1 365 | stride=1 366 | pad=1 367 | activation=leaky 368 | 369 | [convolutional] 370 | batch_normalize=1 371 | filters=512 372 | size=3 373 | stride=1 374 | pad=1 375 | activation=leaky 376 | 377 | [shortcut] 378 | from=-3 379 | activation=linear 380 | 381 | [convolutional] 382 | batch_normalize=1 383 | filters=256 384 | size=1 385 | stride=1 386 | pad=1 387 | activation=leaky 388 | 389 | [convolutional] 390 | batch_normalize=1 391 | filters=512 392 | size=3 393 | stride=1 394 | pad=1 395 | activation=leaky 396 | 397 | [shortcut] 398 | from=-3 399 | activation=linear 400 | 401 | 402 | [convolutional] 403 | batch_normalize=1 404 | filters=256 405 | size=1 406 | stride=1 407 | pad=1 408 | activation=leaky 409 | 410 | [convolutional] 411 | batch_normalize=1 412 | filters=512 413 | size=3 414 | stride=1 415 | pad=1 416 | activation=leaky 417 | 418 | [shortcut] 419 | from=-3 420 | activation=linear 421 | 422 | 423 | [convolutional] 424 | batch_normalize=1 425 | filters=256 426 | size=1 427 | stride=1 428 | pad=1 429 | activation=leaky 430 | 431 | [convolutional] 432 | batch_normalize=1 433 | filters=512 434 | size=3 435 | stride=1 436 | pad=1 437 | activation=leaky 438 | 439 | [shortcut] 440 | from=-3 441 | activation=linear 442 | 443 | [convolutional] 444 | batch_normalize=1 445 | filters=256 446 | size=1 447 | stride=1 448 | pad=1 449 | activation=leaky 450 | 451 | [convolutional] 452 | batch_normalize=1 453 | filters=512 454 | size=3 455 | stride=1 456 | pad=1 457 | activation=leaky 458 | 459 | [shortcut] 460 | from=-3 461 | activation=linear 462 | 463 | # Downsample 464 | 465 | [convolutional] 466 | batch_normalize=1 467 | filters=1024 468 | size=3 469 | stride=2 470 | pad=1 471 | activation=leaky 472 | 473 | [convolutional] 474 | batch_normalize=1 475 | filters=512 476 | size=1 477 | stride=1 478 | pad=1 479 | activation=leaky 480 | 481 | [convolutional] 482 | batch_normalize=1 483 | filters=1024 484 | size=3 485 | stride=1 486 | pad=1 487 | activation=leaky 488 | 489 | [shortcut] 490 | from=-3 491 | activation=linear 492 | 493 | [convolutional] 494 | batch_normalize=1 495 | filters=512 496 | size=1 497 | stride=1 498 | pad=1 499 | activation=leaky 500 | 501 | [convolutional] 502 | batch_normalize=1 503 | filters=1024 504 | size=3 505 | stride=1 506 | pad=1 507 | activation=leaky 508 | 509 | [shortcut] 510 | from=-3 511 | activation=linear 512 | 513 | [convolutional] 514 | batch_normalize=1 515 | filters=512 516 | size=1 517 | stride=1 518 | pad=1 519 | activation=leaky 520 | 521 | [convolutional] 522 | batch_normalize=1 523 | filters=1024 524 | size=3 525 | stride=1 526 | pad=1 527 | activation=leaky 528 | 529 | [shortcut] 530 | from=-3 531 | activation=linear 532 | 533 | [convolutional] 534 | batch_normalize=1 535 | filters=512 536 | size=1 537 | stride=1 538 | pad=1 539 | activation=leaky 540 | 541 | [convolutional] 542 | batch_normalize=1 543 | filters=1024 544 | size=3 545 | stride=1 546 | pad=1 547 | activation=leaky 548 | 549 | [shortcut] 550 | from=-3 551 | activation=linear 552 | 553 | ###################### 554 | 555 | [convolutional] 556 | batch_normalize=1 557 | filters=512 558 | size=1 559 | stride=1 560 | pad=1 561 | activation=leaky 562 | 563 | [convolutional] 564 | batch_normalize=1 565 | size=3 566 | stride=1 567 | pad=1 568 | filters=1024 569 | activation=leaky 570 | 571 | [convolutional] 572 | batch_normalize=1 573 | filters=512 574 | size=1 575 | stride=1 576 | pad=1 577 | activation=leaky 578 | 579 | [convolutional] 580 | batch_normalize=1 581 | size=3 582 | stride=1 583 | pad=1 584 | filters=1024 585 | activation=leaky 586 | 587 | [convolutional] 588 | batch_normalize=1 589 | filters=512 590 | size=1 591 | stride=1 592 | pad=1 593 | activation=leaky 594 | 595 | [convolutional] 596 | batch_normalize=1 597 | size=3 598 | stride=1 599 | pad=1 600 | filters=1024 601 | activation=leaky 602 | 603 | [convolutional] 604 | size=1 605 | stride=1 606 | pad=1 607 | filters=18 608 | activation=linear 609 | 610 | [yolo] 611 | mask = 6,7,8 612 | anchors = 19, 21, 25, 47, 45, 29, 31, 90, 52, 55, 77, 38, 85, 68, 57,118, 147,123 613 | classes=1 614 | num=9 615 | jitter=.3 616 | ignore_thresh = .5 617 | truth_thresh = 1 618 | random=1 619 | 620 | [route] 621 | layers = -4 622 | 623 | [convolutional] 624 | batch_normalize=1 625 | filters=256 626 | size=1 627 | stride=1 628 | pad=1 629 | activation=leaky 630 | 631 | [upsample] 632 | stride=2 633 | 634 | [route] 635 | layers = -1, 61 636 | 637 | 638 | 639 | [convolutional] 640 | batch_normalize=1 641 | filters=256 642 | size=1 643 | stride=1 644 | pad=1 645 | activation=leaky 646 | 647 | [convolutional] 648 | batch_normalize=1 649 | size=3 650 | stride=1 651 | pad=1 652 | filters=512 653 | activation=leaky 654 | 655 | [convolutional] 656 | batch_normalize=1 657 | filters=256 658 | size=1 659 | stride=1 660 | pad=1 661 | activation=leaky 662 | 663 | [convolutional] 664 | batch_normalize=1 665 | size=3 666 | stride=1 667 | pad=1 668 | filters=512 669 | activation=leaky 670 | 671 | [convolutional] 672 | batch_normalize=1 673 | filters=256 674 | size=1 675 | stride=1 676 | pad=1 677 | activation=leaky 678 | 679 | [convolutional] 680 | batch_normalize=1 681 | size=3 682 | stride=1 683 | pad=1 684 | filters=512 685 | activation=leaky 686 | 687 | [convolutional] 688 | size=1 689 | stride=1 690 | pad=1 691 | filters=18 692 | activation=linear 693 | 694 | [yolo] 695 | mask = 3,4,5 696 | anchors = 19, 21, 25, 47, 45, 29, 31, 90, 52, 55, 77, 38, 85, 68, 57,118, 147,123 697 | classes=1 698 | num=9 699 | jitter=.3 700 | ignore_thresh = .5 701 | truth_thresh = 1 702 | random=1 703 | 704 | [route] 705 | layers = -4 706 | 707 | [convolutional] 708 | batch_normalize=1 709 | filters=128 710 | size=1 711 | stride=1 712 | pad=1 713 | activation=leaky 714 | 715 | [upsample] 716 | stride=2 717 | 718 | [route] 719 | layers = -1, 36 720 | 721 | 722 | 723 | [convolutional] 724 | batch_normalize=1 725 | filters=128 726 | size=1 727 | stride=1 728 | pad=1 729 | activation=leaky 730 | 731 | [convolutional] 732 | batch_normalize=1 733 | size=3 734 | stride=1 735 | pad=1 736 | filters=256 737 | activation=leaky 738 | 739 | [convolutional] 740 | batch_normalize=1 741 | filters=128 742 | size=1 743 | stride=1 744 | pad=1 745 | activation=leaky 746 | 747 | [convolutional] 748 | batch_normalize=1 749 | size=3 750 | stride=1 751 | pad=1 752 | filters=256 753 | activation=leaky 754 | 755 | [convolutional] 756 | batch_normalize=1 757 | filters=128 758 | size=1 759 | stride=1 760 | pad=1 761 | activation=leaky 762 | 763 | [convolutional] 764 | batch_normalize=1 765 | size=3 766 | stride=1 767 | pad=1 768 | filters=256 769 | activation=leaky 770 | 771 | [convolutional] 772 | size=1 773 | stride=1 774 | pad=1 775 | filters=18 776 | activation=linear 777 | 778 | [yolo] 779 | mask = 0,1,2 780 | anchors = 19, 21, 25, 47, 45, 29, 31, 90, 52, 55, 77, 38, 85, 68, 57,118, 147,123 781 | classes=1 782 | num=9 783 | jitter=.3 784 | ignore_thresh = .5 785 | truth_thresh = 1 786 | random=1 787 | 788 | --------------------------------------------------------------------------------