├── .gitignore ├── CustomNetwork.txt ├── LogoDetection.py ├── README.md ├── convert-to-kitti.py ├── deploy.prototxt ├── process-video-rt.py ├── resize-img.py └── video-to-frames.py /.gitignore: -------------------------------------------------------------------------------- 1 | env/ 2 | models/adidas.caffemodel 3 | models/enterprise.caffemodel 4 | models/fedex.caffemodel 5 | models/hankook.caffemodel 6 | models/unicredit.caffemodel 7 | models/amstel.caffemodel 8 | 9 | -------------------------------------------------------------------------------- /CustomNetwork.txt: -------------------------------------------------------------------------------- 1 | # DetectNet network 2 | 3 | # Data/Input layers 4 | name: "DetectNet" 5 | layer { 6 | name: "train_data" 7 | type: "Data" 8 | top: "data" 9 | include: { phase: TRAIN } 10 | } 11 | layer { 12 | name: "train_label" 13 | type: "Data" 14 | top: "label" 15 | include: { phase: TRAIN } 16 | } 17 | layer { 18 | name: "val_data" 19 | type: "Data" 20 | top: "data" 21 | include: { phase: TEST stage: "val" } 22 | } 23 | layer { 24 | name: "val_label" 25 | type: "Data" 26 | top: "label" 27 | include: { phase: TEST stage: "val" } 28 | } 29 | layer { 30 | name: "deploy_data" 31 | type: "Input" 32 | top: "data" 33 | include: { phase: TEST not_stage: "val" } 34 | } 35 | 36 | # Data transformation layers 37 | layer { 38 | name: "train_transform" 39 | type: "DetectNetTransformation" 40 | bottom: "data" 41 | bottom: "label" 42 | top: "transformed_data" 43 | top: "transformed_label" 44 | detectnet_groundtruth_param: { 45 | stride: 16 46 | scale_cvg: 0.4 47 | gridbox_type: GRIDBOX_MIN 48 | coverage_type: RECTANGULAR 49 | min_cvg_len: 20 50 | obj_norm: true 51 | image_size_x: 512 52 | image_size_y: 512 53 | crop_bboxes: false 54 | } 55 | detectnet_augmentation_param: { 56 | crop_prob: 1 57 | shift_x: 32 58 | shift_y: 32 59 | flip_prob: 0.5 60 | rotation_prob: 0 61 | max_rotate_degree: 5 62 | scale_prob: 0.4 63 | scale_min: 0.8 64 | scale_max: 1.2 65 | hue_rotation_prob: 0.8 66 | hue_rotation: 30 67 | desaturation_prob: 0.8 68 | desaturation_max: 0.8 69 | } 70 | transform_param: { 71 | mean_value: 127 72 | } 73 | include: { phase: TRAIN } 74 | } 75 | layer { 76 | name: "val_transform" 77 | type: "DetectNetTransformation" 78 | bottom: "data" 79 | bottom: "label" 80 | top: "transformed_data" 81 | top: "transformed_label" 82 | detectnet_groundtruth_param: { 83 | stride: 16 84 | scale_cvg: 0.4 85 | gridbox_type: GRIDBOX_MIN 86 | coverage_type: RECTANGULAR 87 | min_cvg_len: 20 88 | obj_norm: true 89 | image_size_x: 1280 90 | image_size_y: 720 91 | crop_bboxes: false 92 | } 93 | transform_param: { 94 | mean_value: 127 95 | } 96 | include: { phase: TEST stage: "val" } 97 | } 98 | layer { 99 | name: "deploy_transform" 100 | type: "Power" 101 | bottom: "data" 102 | top: "transformed_data" 103 | power_param { 104 | shift: -127 105 | } 106 | include: { phase: TEST not_stage: "val" } 107 | } 108 | 109 | # Label conversion layers 110 | layer { 111 | name: "slice-label" 112 | type: "Slice" 113 | bottom: "transformed_label" 114 | top: "foreground-label" 115 | top: "bbox-label" 116 | top: "size-label" 117 | top: "obj-label" 118 | top: "coverage-label" 119 | slice_param { 120 | slice_dim: 1 121 | slice_point: 1 122 | slice_point: 5 123 | slice_point: 7 124 | slice_point: 8 125 | } 126 | include { phase: TRAIN } 127 | include { phase: TEST stage: "val" } 128 | } 129 | layer { 130 | name: "coverage-block" 131 | type: "Concat" 132 | bottom: "foreground-label" 133 | bottom: "foreground-label" 134 | bottom: "foreground-label" 135 | bottom: "foreground-label" 136 | top: "coverage-block" 137 | concat_param { 138 | concat_dim: 1 139 | } 140 | include { phase: TRAIN } 141 | include { phase: TEST stage: "val" } 142 | } 143 | layer { 144 | name: "size-block" 145 | type: "Concat" 146 | bottom: "size-label" 147 | bottom: "size-label" 148 | top: "size-block" 149 | concat_param { 150 | concat_dim: 1 151 | } 152 | include { phase: TRAIN } 153 | include { phase: TEST stage: "val" } 154 | } 155 | layer { 156 | name: "obj-block" 157 | type: "Concat" 158 | bottom: "obj-label" 159 | bottom: "obj-label" 160 | bottom: "obj-label" 161 | bottom: "obj-label" 162 | top: "obj-block" 163 | concat_param { 164 | concat_dim: 1 165 | } 166 | include { phase: TRAIN } 167 | include { phase: TEST stage: "val" } 168 | } 169 | layer { 170 | name: "bb-label-norm" 171 | type: "Eltwise" 172 | bottom: "bbox-label" 173 | bottom: "size-block" 174 | top: "bbox-label-norm" 175 | eltwise_param { 176 | operation: PROD 177 | } 178 | include { phase: TRAIN } 179 | include { phase: TEST stage: "val" } 180 | } 181 | layer { 182 | name: "bb-obj-norm" 183 | type: "Eltwise" 184 | bottom: "bbox-label-norm" 185 | bottom: "obj-block" 186 | top: "bbox-obj-label-norm" 187 | eltwise_param { 188 | operation: PROD 189 | } 190 | include { phase: TRAIN } 191 | include { phase: TEST stage: "val" } 192 | } 193 | 194 | ###################################################################### 195 | # Start of convolutional network 196 | ###################################################################### 197 | 198 | layer { 199 | name: "conv1/7x7_s2" 200 | type: "Convolution" 201 | bottom: "transformed_data" 202 | top: "conv1/7x7_s2" 203 | param { 204 | lr_mult: 1 205 | decay_mult: 1 206 | } 207 | param { 208 | lr_mult: 2 209 | decay_mult: 0 210 | } 211 | convolution_param { 212 | num_output: 64 213 | pad: 3 214 | kernel_size: 7 215 | stride: 2 216 | weight_filler { 217 | type: "xavier" 218 | std: 0.1 219 | } 220 | bias_filler { 221 | type: "constant" 222 | value: 0.2 223 | } 224 | } 225 | } 226 | 227 | layer { 228 | name: "conv1/relu_7x7" 229 | type: "ReLU" 230 | bottom: "conv1/7x7_s2" 231 | top: "conv1/7x7_s2" 232 | } 233 | 234 | layer { 235 | name: "pool1/3x3_s2" 236 | type: "Pooling" 237 | bottom: "conv1/7x7_s2" 238 | top: "pool1/3x3_s2" 239 | pooling_param { 240 | pool: MAX 241 | kernel_size: 3 242 | stride: 2 243 | } 244 | } 245 | 246 | layer { 247 | name: "pool1/norm1" 248 | type: "LRN" 249 | bottom: "pool1/3x3_s2" 250 | top: "pool1/norm1" 251 | lrn_param { 252 | local_size: 5 253 | alpha: 0.0001 254 | beta: 0.75 255 | } 256 | } 257 | 258 | layer { 259 | name: "conv2/3x3_reduce" 260 | type: "Convolution" 261 | bottom: "pool1/norm1" 262 | top: "conv2/3x3_reduce" 263 | param { 264 | lr_mult: 1 265 | decay_mult: 1 266 | } 267 | param { 268 | lr_mult: 2 269 | decay_mult: 0 270 | } 271 | convolution_param { 272 | num_output: 64 273 | kernel_size: 1 274 | weight_filler { 275 | type: "xavier" 276 | std: 0.1 277 | } 278 | bias_filler { 279 | type: "constant" 280 | value: 0.2 281 | } 282 | } 283 | } 284 | 285 | layer { 286 | name: "conv2/relu_3x3_reduce" 287 | type: "ReLU" 288 | bottom: "conv2/3x3_reduce" 289 | top: "conv2/3x3_reduce" 290 | } 291 | 292 | layer { 293 | name: "conv2/3x3" 294 | type: "Convolution" 295 | bottom: "conv2/3x3_reduce" 296 | top: "conv2/3x3" 297 | param { 298 | lr_mult: 1 299 | decay_mult: 1 300 | } 301 | param { 302 | lr_mult: 2 303 | decay_mult: 0 304 | } 305 | convolution_param { 306 | num_output: 192 307 | pad: 1 308 | kernel_size: 3 309 | weight_filler { 310 | type: "xavier" 311 | std: 0.03 312 | } 313 | bias_filler { 314 | type: "constant" 315 | value: 0.2 316 | } 317 | } 318 | } 319 | 320 | layer { 321 | name: "conv2/relu_3x3" 322 | type: "ReLU" 323 | bottom: "conv2/3x3" 324 | top: "conv2/3x3" 325 | } 326 | 327 | layer { 328 | name: "conv2/norm2" 329 | type: "LRN" 330 | bottom: "conv2/3x3" 331 | top: "conv2/norm2" 332 | lrn_param { 333 | local_size: 5 334 | alpha: 0.0001 335 | beta: 0.75 336 | } 337 | } 338 | 339 | layer { 340 | name: "pool2/3x3_s2" 341 | type: "Pooling" 342 | bottom: "conv2/norm2" 343 | top: "pool2/3x3_s2" 344 | pooling_param { 345 | pool: MAX 346 | kernel_size: 3 347 | stride: 2 348 | } 349 | } 350 | 351 | layer { 352 | name: "inception_3a/1x1" 353 | type: "Convolution" 354 | bottom: "pool2/3x3_s2" 355 | top: "inception_3a/1x1" 356 | param { 357 | lr_mult: 1 358 | decay_mult: 1 359 | } 360 | param { 361 | lr_mult: 2 362 | decay_mult: 0 363 | } 364 | convolution_param { 365 | num_output: 64 366 | kernel_size: 1 367 | weight_filler { 368 | type: "xavier" 369 | std: 0.03 370 | } 371 | bias_filler { 372 | type: "constant" 373 | value: 0.2 374 | } 375 | } 376 | } 377 | 378 | layer { 379 | name: "inception_3a/relu_1x1" 380 | type: "ReLU" 381 | bottom: "inception_3a/1x1" 382 | top: "inception_3a/1x1" 383 | } 384 | 385 | layer { 386 | name: "inception_3a/3x3_reduce" 387 | type: "Convolution" 388 | bottom: "pool2/3x3_s2" 389 | top: "inception_3a/3x3_reduce" 390 | param { 391 | lr_mult: 1 392 | decay_mult: 1 393 | } 394 | param { 395 | lr_mult: 2 396 | decay_mult: 0 397 | } 398 | convolution_param { 399 | num_output: 96 400 | kernel_size: 1 401 | weight_filler { 402 | type: "xavier" 403 | std: 0.09 404 | } 405 | bias_filler { 406 | type: "constant" 407 | value: 0.2 408 | } 409 | } 410 | } 411 | 412 | layer { 413 | name: "inception_3a/relu_3x3_reduce" 414 | type: "ReLU" 415 | bottom: "inception_3a/3x3_reduce" 416 | top: "inception_3a/3x3_reduce" 417 | } 418 | 419 | layer { 420 | name: "inception_3a/3x3" 421 | type: "Convolution" 422 | bottom: "inception_3a/3x3_reduce" 423 | top: "inception_3a/3x3" 424 | param { 425 | lr_mult: 1 426 | decay_mult: 1 427 | } 428 | param { 429 | lr_mult: 2 430 | decay_mult: 0 431 | } 432 | convolution_param { 433 | num_output: 128 434 | pad: 1 435 | kernel_size: 3 436 | weight_filler { 437 | type: "xavier" 438 | std: 0.03 439 | } 440 | bias_filler { 441 | type: "constant" 442 | value: 0.2 443 | } 444 | } 445 | } 446 | 447 | layer { 448 | name: "inception_3a/relu_3x3" 449 | type: "ReLU" 450 | bottom: "inception_3a/3x3" 451 | top: "inception_3a/3x3" 452 | } 453 | 454 | layer { 455 | name: "inception_3a/5x5_reduce" 456 | type: "Convolution" 457 | bottom: "pool2/3x3_s2" 458 | top: "inception_3a/5x5_reduce" 459 | param { 460 | lr_mult: 1 461 | decay_mult: 1 462 | } 463 | param { 464 | lr_mult: 2 465 | decay_mult: 0 466 | } 467 | convolution_param { 468 | num_output: 16 469 | kernel_size: 1 470 | weight_filler { 471 | type: "xavier" 472 | std: 0.2 473 | } 474 | bias_filler { 475 | type: "constant" 476 | value: 0.2 477 | } 478 | } 479 | } 480 | layer { 481 | name: "inception_3a/relu_5x5_reduce" 482 | type: "ReLU" 483 | bottom: "inception_3a/5x5_reduce" 484 | top: "inception_3a/5x5_reduce" 485 | } 486 | layer { 487 | name: "inception_3a/5x5" 488 | type: "Convolution" 489 | bottom: "inception_3a/5x5_reduce" 490 | top: "inception_3a/5x5" 491 | param { 492 | lr_mult: 1 493 | decay_mult: 1 494 | } 495 | param { 496 | lr_mult: 2 497 | decay_mult: 0 498 | } 499 | convolution_param { 500 | num_output: 32 501 | pad: 2 502 | kernel_size: 5 503 | weight_filler { 504 | type: "xavier" 505 | std: 0.03 506 | } 507 | bias_filler { 508 | type: "constant" 509 | value: 0.2 510 | } 511 | } 512 | } 513 | layer { 514 | name: "inception_3a/relu_5x5" 515 | type: "ReLU" 516 | bottom: "inception_3a/5x5" 517 | top: "inception_3a/5x5" 518 | } 519 | 520 | layer { 521 | name: "inception_3a/pool" 522 | type: "Pooling" 523 | bottom: "pool2/3x3_s2" 524 | top: "inception_3a/pool" 525 | pooling_param { 526 | pool: MAX 527 | kernel_size: 3 528 | stride: 1 529 | pad: 1 530 | } 531 | } 532 | 533 | layer { 534 | name: "inception_3a/pool_proj" 535 | type: "Convolution" 536 | bottom: "inception_3a/pool" 537 | top: "inception_3a/pool_proj" 538 | param { 539 | lr_mult: 1 540 | decay_mult: 1 541 | } 542 | param { 543 | lr_mult: 2 544 | decay_mult: 0 545 | } 546 | convolution_param { 547 | num_output: 32 548 | kernel_size: 1 549 | weight_filler { 550 | type: "xavier" 551 | std: 0.1 552 | } 553 | bias_filler { 554 | type: "constant" 555 | value: 0.2 556 | } 557 | } 558 | } 559 | layer { 560 | name: "inception_3a/relu_pool_proj" 561 | type: "ReLU" 562 | bottom: "inception_3a/pool_proj" 563 | top: "inception_3a/pool_proj" 564 | } 565 | 566 | layer { 567 | name: "inception_3a/output" 568 | type: "Concat" 569 | bottom: "inception_3a/1x1" 570 | bottom: "inception_3a/3x3" 571 | bottom: "inception_3a/5x5" 572 | bottom: "inception_3a/pool_proj" 573 | top: "inception_3a/output" 574 | } 575 | 576 | layer { 577 | name: "inception_3b/1x1" 578 | type: "Convolution" 579 | bottom: "inception_3a/output" 580 | top: "inception_3b/1x1" 581 | param { 582 | lr_mult: 1 583 | decay_mult: 1 584 | } 585 | param { 586 | lr_mult: 2 587 | decay_mult: 0 588 | } 589 | convolution_param { 590 | num_output: 128 591 | kernel_size: 1 592 | weight_filler { 593 | type: "xavier" 594 | std: 0.03 595 | } 596 | bias_filler { 597 | type: "constant" 598 | value: 0.2 599 | } 600 | } 601 | } 602 | 603 | layer { 604 | name: "inception_3b/relu_1x1" 605 | type: "ReLU" 606 | bottom: "inception_3b/1x1" 607 | top: "inception_3b/1x1" 608 | } 609 | 610 | layer { 611 | name: "inception_3b/3x3_reduce" 612 | type: "Convolution" 613 | bottom: "inception_3a/output" 614 | top: "inception_3b/3x3_reduce" 615 | param { 616 | lr_mult: 1 617 | decay_mult: 1 618 | } 619 | param { 620 | lr_mult: 2 621 | decay_mult: 0 622 | } 623 | convolution_param { 624 | num_output: 128 625 | kernel_size: 1 626 | weight_filler { 627 | type: "xavier" 628 | std: 0.09 629 | } 630 | bias_filler { 631 | type: "constant" 632 | value: 0.2 633 | } 634 | } 635 | } 636 | layer { 637 | name: "inception_3b/relu_3x3_reduce" 638 | type: "ReLU" 639 | bottom: "inception_3b/3x3_reduce" 640 | top: "inception_3b/3x3_reduce" 641 | } 642 | layer { 643 | name: "inception_3b/3x3" 644 | type: "Convolution" 645 | bottom: "inception_3b/3x3_reduce" 646 | top: "inception_3b/3x3" 647 | param { 648 | lr_mult: 1 649 | decay_mult: 1 650 | } 651 | param { 652 | lr_mult: 2 653 | decay_mult: 0 654 | } 655 | convolution_param { 656 | num_output: 192 657 | pad: 1 658 | kernel_size: 3 659 | weight_filler { 660 | type: "xavier" 661 | std: 0.03 662 | } 663 | bias_filler { 664 | type: "constant" 665 | value: 0.2 666 | } 667 | } 668 | } 669 | layer { 670 | name: "inception_3b/relu_3x3" 671 | type: "ReLU" 672 | bottom: "inception_3b/3x3" 673 | top: "inception_3b/3x3" 674 | } 675 | 676 | layer { 677 | name: "inception_3b/5x5_reduce" 678 | type: "Convolution" 679 | bottom: "inception_3a/output" 680 | top: "inception_3b/5x5_reduce" 681 | param { 682 | lr_mult: 1 683 | decay_mult: 1 684 | } 685 | param { 686 | lr_mult: 2 687 | decay_mult: 0 688 | } 689 | convolution_param { 690 | num_output: 32 691 | kernel_size: 1 692 | weight_filler { 693 | type: "xavier" 694 | std: 0.2 695 | } 696 | bias_filler { 697 | type: "constant" 698 | value: 0.2 699 | } 700 | } 701 | } 702 | layer { 703 | name: "inception_3b/relu_5x5_reduce" 704 | type: "ReLU" 705 | bottom: "inception_3b/5x5_reduce" 706 | top: "inception_3b/5x5_reduce" 707 | } 708 | layer { 709 | name: "inception_3b/5x5" 710 | type: "Convolution" 711 | bottom: "inception_3b/5x5_reduce" 712 | top: "inception_3b/5x5" 713 | param { 714 | lr_mult: 1 715 | decay_mult: 1 716 | } 717 | param { 718 | lr_mult: 2 719 | decay_mult: 0 720 | } 721 | convolution_param { 722 | num_output: 96 723 | pad: 2 724 | kernel_size: 5 725 | weight_filler { 726 | type: "xavier" 727 | std: 0.03 728 | } 729 | bias_filler { 730 | type: "constant" 731 | value: 0.2 732 | } 733 | } 734 | } 735 | layer { 736 | name: "inception_3b/relu_5x5" 737 | type: "ReLU" 738 | bottom: "inception_3b/5x5" 739 | top: "inception_3b/5x5" 740 | } 741 | 742 | layer { 743 | name: "inception_3b/pool" 744 | type: "Pooling" 745 | bottom: "inception_3a/output" 746 | top: "inception_3b/pool" 747 | pooling_param { 748 | pool: MAX 749 | kernel_size: 3 750 | stride: 1 751 | pad: 1 752 | } 753 | } 754 | layer { 755 | name: "inception_3b/pool_proj" 756 | type: "Convolution" 757 | bottom: "inception_3b/pool" 758 | top: "inception_3b/pool_proj" 759 | param { 760 | lr_mult: 1 761 | decay_mult: 1 762 | } 763 | param { 764 | lr_mult: 2 765 | decay_mult: 0 766 | } 767 | convolution_param { 768 | num_output: 64 769 | kernel_size: 1 770 | weight_filler { 771 | type: "xavier" 772 | std: 0.1 773 | } 774 | bias_filler { 775 | type: "constant" 776 | value: 0.2 777 | } 778 | } 779 | } 780 | layer { 781 | name: "inception_3b/relu_pool_proj" 782 | type: "ReLU" 783 | bottom: "inception_3b/pool_proj" 784 | top: "inception_3b/pool_proj" 785 | } 786 | layer { 787 | name: "inception_3b/output" 788 | type: "Concat" 789 | bottom: "inception_3b/1x1" 790 | bottom: "inception_3b/3x3" 791 | bottom: "inception_3b/5x5" 792 | bottom: "inception_3b/pool_proj" 793 | top: "inception_3b/output" 794 | } 795 | 796 | layer { 797 | name: "pool3/3x3_s2" 798 | type: "Pooling" 799 | bottom: "inception_3b/output" 800 | top: "pool3/3x3_s2" 801 | pooling_param { 802 | pool: MAX 803 | kernel_size: 3 804 | stride: 2 805 | } 806 | } 807 | 808 | layer { 809 | name: "inception_4a/1x1" 810 | type: "Convolution" 811 | bottom: "pool3/3x3_s2" 812 | top: "inception_4a/1x1" 813 | param { 814 | lr_mult: 1 815 | decay_mult: 1 816 | } 817 | param { 818 | lr_mult: 2 819 | decay_mult: 0 820 | } 821 | convolution_param { 822 | num_output: 192 823 | kernel_size: 1 824 | weight_filler { 825 | type: "xavier" 826 | std: 0.03 827 | } 828 | bias_filler { 829 | type: "constant" 830 | value: 0.2 831 | } 832 | } 833 | } 834 | 835 | layer { 836 | name: "inception_4a/relu_1x1" 837 | type: "ReLU" 838 | bottom: "inception_4a/1x1" 839 | top: "inception_4a/1x1" 840 | } 841 | 842 | layer { 843 | name: "inception_4a/3x3_reduce" 844 | type: "Convolution" 845 | bottom: "pool3/3x3_s2" 846 | top: "inception_4a/3x3_reduce" 847 | param { 848 | lr_mult: 1 849 | decay_mult: 1 850 | } 851 | param { 852 | lr_mult: 2 853 | decay_mult: 0 854 | } 855 | convolution_param { 856 | num_output: 96 857 | kernel_size: 1 858 | weight_filler { 859 | type: "xavier" 860 | std: 0.09 861 | } 862 | bias_filler { 863 | type: "constant" 864 | value: 0.2 865 | } 866 | } 867 | } 868 | 869 | layer { 870 | name: "inception_4a/relu_3x3_reduce" 871 | type: "ReLU" 872 | bottom: "inception_4a/3x3_reduce" 873 | top: "inception_4a/3x3_reduce" 874 | } 875 | 876 | layer { 877 | name: "inception_4a/3x3" 878 | type: "Convolution" 879 | bottom: "inception_4a/3x3_reduce" 880 | top: "inception_4a/3x3" 881 | param { 882 | lr_mult: 1 883 | decay_mult: 1 884 | } 885 | param { 886 | lr_mult: 2 887 | decay_mult: 0 888 | } 889 | convolution_param { 890 | num_output: 208 891 | pad: 1 892 | kernel_size: 3 893 | weight_filler { 894 | type: "xavier" 895 | std: 0.03 896 | } 897 | bias_filler { 898 | type: "constant" 899 | value: 0.2 900 | } 901 | } 902 | } 903 | 904 | layer { 905 | name: "inception_4a/relu_3x3" 906 | type: "ReLU" 907 | bottom: "inception_4a/3x3" 908 | top: "inception_4a/3x3" 909 | } 910 | 911 | layer { 912 | name: "inception_4a/5x5_reduce" 913 | type: "Convolution" 914 | bottom: "pool3/3x3_s2" 915 | top: "inception_4a/5x5_reduce" 916 | param { 917 | lr_mult: 1 918 | decay_mult: 1 919 | } 920 | param { 921 | lr_mult: 2 922 | decay_mult: 0 923 | } 924 | convolution_param { 925 | num_output: 16 926 | kernel_size: 1 927 | weight_filler { 928 | type: "xavier" 929 | std: 0.2 930 | } 931 | bias_filler { 932 | type: "constant" 933 | value: 0.2 934 | } 935 | } 936 | } 937 | layer { 938 | name: "inception_4a/relu_5x5_reduce" 939 | type: "ReLU" 940 | bottom: "inception_4a/5x5_reduce" 941 | top: "inception_4a/5x5_reduce" 942 | } 943 | layer { 944 | name: "inception_4a/5x5" 945 | type: "Convolution" 946 | bottom: "inception_4a/5x5_reduce" 947 | top: "inception_4a/5x5" 948 | param { 949 | lr_mult: 1 950 | decay_mult: 1 951 | } 952 | param { 953 | lr_mult: 2 954 | decay_mult: 0 955 | } 956 | convolution_param { 957 | num_output: 48 958 | pad: 2 959 | kernel_size: 5 960 | weight_filler { 961 | type: "xavier" 962 | std: 0.03 963 | } 964 | bias_filler { 965 | type: "constant" 966 | value: 0.2 967 | } 968 | } 969 | } 970 | layer { 971 | name: "inception_4a/relu_5x5" 972 | type: "ReLU" 973 | bottom: "inception_4a/5x5" 974 | top: "inception_4a/5x5" 975 | } 976 | layer { 977 | name: "inception_4a/pool" 978 | type: "Pooling" 979 | bottom: "pool3/3x3_s2" 980 | top: "inception_4a/pool" 981 | pooling_param { 982 | pool: MAX 983 | kernel_size: 3 984 | stride: 1 985 | pad: 1 986 | } 987 | } 988 | layer { 989 | name: "inception_4a/pool_proj" 990 | type: "Convolution" 991 | bottom: "inception_4a/pool" 992 | top: "inception_4a/pool_proj" 993 | param { 994 | lr_mult: 1 995 | decay_mult: 1 996 | } 997 | param { 998 | lr_mult: 2 999 | decay_mult: 0 1000 | } 1001 | convolution_param { 1002 | num_output: 64 1003 | kernel_size: 1 1004 | weight_filler { 1005 | type: "xavier" 1006 | std: 0.1 1007 | } 1008 | bias_filler { 1009 | type: "constant" 1010 | value: 0.2 1011 | } 1012 | } 1013 | } 1014 | layer { 1015 | name: "inception_4a/relu_pool_proj" 1016 | type: "ReLU" 1017 | bottom: "inception_4a/pool_proj" 1018 | top: "inception_4a/pool_proj" 1019 | } 1020 | layer { 1021 | name: "inception_4a/output" 1022 | type: "Concat" 1023 | bottom: "inception_4a/1x1" 1024 | bottom: "inception_4a/3x3" 1025 | bottom: "inception_4a/5x5" 1026 | bottom: "inception_4a/pool_proj" 1027 | top: "inception_4a/output" 1028 | } 1029 | 1030 | layer { 1031 | name: "inception_4b/1x1" 1032 | type: "Convolution" 1033 | bottom: "inception_4a/output" 1034 | top: "inception_4b/1x1" 1035 | param { 1036 | lr_mult: 1 1037 | decay_mult: 1 1038 | } 1039 | param { 1040 | lr_mult: 2 1041 | decay_mult: 0 1042 | } 1043 | convolution_param { 1044 | num_output: 160 1045 | kernel_size: 1 1046 | weight_filler { 1047 | type: "xavier" 1048 | std: 0.03 1049 | } 1050 | bias_filler { 1051 | type: "constant" 1052 | value: 0.2 1053 | } 1054 | } 1055 | } 1056 | 1057 | layer { 1058 | name: "inception_4b/relu_1x1" 1059 | type: "ReLU" 1060 | bottom: "inception_4b/1x1" 1061 | top: "inception_4b/1x1" 1062 | } 1063 | layer { 1064 | name: "inception_4b/3x3_reduce" 1065 | type: "Convolution" 1066 | bottom: "inception_4a/output" 1067 | top: "inception_4b/3x3_reduce" 1068 | param { 1069 | lr_mult: 1 1070 | decay_mult: 1 1071 | } 1072 | param { 1073 | lr_mult: 2 1074 | decay_mult: 0 1075 | } 1076 | convolution_param { 1077 | num_output: 112 1078 | kernel_size: 1 1079 | weight_filler { 1080 | type: "xavier" 1081 | std: 0.09 1082 | } 1083 | bias_filler { 1084 | type: "constant" 1085 | value: 0.2 1086 | } 1087 | } 1088 | } 1089 | layer { 1090 | name: "inception_4b/relu_3x3_reduce" 1091 | type: "ReLU" 1092 | bottom: "inception_4b/3x3_reduce" 1093 | top: "inception_4b/3x3_reduce" 1094 | } 1095 | layer { 1096 | name: "inception_4b/3x3" 1097 | type: "Convolution" 1098 | bottom: "inception_4b/3x3_reduce" 1099 | top: "inception_4b/3x3" 1100 | param { 1101 | lr_mult: 1 1102 | decay_mult: 1 1103 | } 1104 | param { 1105 | lr_mult: 2 1106 | decay_mult: 0 1107 | } 1108 | convolution_param { 1109 | num_output: 224 1110 | pad: 1 1111 | kernel_size: 3 1112 | weight_filler { 1113 | type: "xavier" 1114 | std: 0.03 1115 | } 1116 | bias_filler { 1117 | type: "constant" 1118 | value: 0.2 1119 | } 1120 | } 1121 | } 1122 | layer { 1123 | name: "inception_4b/relu_3x3" 1124 | type: "ReLU" 1125 | bottom: "inception_4b/3x3" 1126 | top: "inception_4b/3x3" 1127 | } 1128 | layer { 1129 | name: "inception_4b/5x5_reduce" 1130 | type: "Convolution" 1131 | bottom: "inception_4a/output" 1132 | top: "inception_4b/5x5_reduce" 1133 | param { 1134 | lr_mult: 1 1135 | decay_mult: 1 1136 | } 1137 | param { 1138 | lr_mult: 2 1139 | decay_mult: 0 1140 | } 1141 | convolution_param { 1142 | num_output: 24 1143 | kernel_size: 1 1144 | weight_filler { 1145 | type: "xavier" 1146 | std: 0.2 1147 | } 1148 | bias_filler { 1149 | type: "constant" 1150 | value: 0.2 1151 | } 1152 | } 1153 | } 1154 | layer { 1155 | name: "inception_4b/relu_5x5_reduce" 1156 | type: "ReLU" 1157 | bottom: "inception_4b/5x5_reduce" 1158 | top: "inception_4b/5x5_reduce" 1159 | } 1160 | layer { 1161 | name: "inception_4b/5x5" 1162 | type: "Convolution" 1163 | bottom: "inception_4b/5x5_reduce" 1164 | top: "inception_4b/5x5" 1165 | param { 1166 | lr_mult: 1 1167 | decay_mult: 1 1168 | } 1169 | param { 1170 | lr_mult: 2 1171 | decay_mult: 0 1172 | } 1173 | convolution_param { 1174 | num_output: 64 1175 | pad: 2 1176 | kernel_size: 5 1177 | weight_filler { 1178 | type: "xavier" 1179 | std: 0.03 1180 | } 1181 | bias_filler { 1182 | type: "constant" 1183 | value: 0.2 1184 | } 1185 | } 1186 | } 1187 | layer { 1188 | name: "inception_4b/relu_5x5" 1189 | type: "ReLU" 1190 | bottom: "inception_4b/5x5" 1191 | top: "inception_4b/5x5" 1192 | } 1193 | layer { 1194 | name: "inception_4b/pool" 1195 | type: "Pooling" 1196 | bottom: "inception_4a/output" 1197 | top: "inception_4b/pool" 1198 | pooling_param { 1199 | pool: MAX 1200 | kernel_size: 3 1201 | stride: 1 1202 | pad: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "inception_4b/pool_proj" 1207 | type: "Convolution" 1208 | bottom: "inception_4b/pool" 1209 | top: "inception_4b/pool_proj" 1210 | param { 1211 | lr_mult: 1 1212 | decay_mult: 1 1213 | } 1214 | param { 1215 | lr_mult: 2 1216 | decay_mult: 0 1217 | } 1218 | convolution_param { 1219 | num_output: 64 1220 | kernel_size: 1 1221 | weight_filler { 1222 | type: "xavier" 1223 | std: 0.1 1224 | } 1225 | bias_filler { 1226 | type: "constant" 1227 | value: 0.2 1228 | } 1229 | } 1230 | } 1231 | layer { 1232 | name: "inception_4b/relu_pool_proj" 1233 | type: "ReLU" 1234 | bottom: "inception_4b/pool_proj" 1235 | top: "inception_4b/pool_proj" 1236 | } 1237 | layer { 1238 | name: "inception_4b/output" 1239 | type: "Concat" 1240 | bottom: "inception_4b/1x1" 1241 | bottom: "inception_4b/3x3" 1242 | bottom: "inception_4b/5x5" 1243 | bottom: "inception_4b/pool_proj" 1244 | top: "inception_4b/output" 1245 | } 1246 | 1247 | layer { 1248 | name: "inception_4c/1x1" 1249 | type: "Convolution" 1250 | bottom: "inception_4b/output" 1251 | top: "inception_4c/1x1" 1252 | param { 1253 | lr_mult: 1 1254 | decay_mult: 1 1255 | } 1256 | param { 1257 | lr_mult: 2 1258 | decay_mult: 0 1259 | } 1260 | convolution_param { 1261 | num_output: 128 1262 | kernel_size: 1 1263 | weight_filler { 1264 | type: "xavier" 1265 | std: 0.03 1266 | } 1267 | bias_filler { 1268 | type: "constant" 1269 | value: 0.2 1270 | } 1271 | } 1272 | } 1273 | 1274 | layer { 1275 | name: "inception_4c/relu_1x1" 1276 | type: "ReLU" 1277 | bottom: "inception_4c/1x1" 1278 | top: "inception_4c/1x1" 1279 | } 1280 | 1281 | layer { 1282 | name: "inception_4c/3x3_reduce" 1283 | type: "Convolution" 1284 | bottom: "inception_4b/output" 1285 | top: "inception_4c/3x3_reduce" 1286 | param { 1287 | lr_mult: 1 1288 | decay_mult: 1 1289 | } 1290 | param { 1291 | lr_mult: 2 1292 | decay_mult: 0 1293 | } 1294 | convolution_param { 1295 | num_output: 128 1296 | kernel_size: 1 1297 | weight_filler { 1298 | type: "xavier" 1299 | std: 0.09 1300 | } 1301 | bias_filler { 1302 | type: "constant" 1303 | value: 0.2 1304 | } 1305 | } 1306 | } 1307 | 1308 | layer { 1309 | name: "inception_4c/relu_3x3_reduce" 1310 | type: "ReLU" 1311 | bottom: "inception_4c/3x3_reduce" 1312 | top: "inception_4c/3x3_reduce" 1313 | } 1314 | layer { 1315 | name: "inception_4c/3x3" 1316 | type: "Convolution" 1317 | bottom: "inception_4c/3x3_reduce" 1318 | top: "inception_4c/3x3" 1319 | param { 1320 | lr_mult: 1 1321 | decay_mult: 1 1322 | } 1323 | param { 1324 | lr_mult: 2 1325 | decay_mult: 0 1326 | } 1327 | convolution_param { 1328 | num_output: 256 1329 | pad: 1 1330 | kernel_size: 3 1331 | weight_filler { 1332 | type: "xavier" 1333 | std: 0.03 1334 | } 1335 | bias_filler { 1336 | type: "constant" 1337 | value: 0.2 1338 | } 1339 | } 1340 | } 1341 | layer { 1342 | name: "inception_4c/relu_3x3" 1343 | type: "ReLU" 1344 | bottom: "inception_4c/3x3" 1345 | top: "inception_4c/3x3" 1346 | } 1347 | layer { 1348 | name: "inception_4c/5x5_reduce" 1349 | type: "Convolution" 1350 | bottom: "inception_4b/output" 1351 | top: "inception_4c/5x5_reduce" 1352 | param { 1353 | lr_mult: 1 1354 | decay_mult: 1 1355 | } 1356 | param { 1357 | lr_mult: 2 1358 | decay_mult: 0 1359 | } 1360 | convolution_param { 1361 | num_output: 24 1362 | kernel_size: 1 1363 | weight_filler { 1364 | type: "xavier" 1365 | std: 0.2 1366 | } 1367 | bias_filler { 1368 | type: "constant" 1369 | value: 0.2 1370 | } 1371 | } 1372 | } 1373 | layer { 1374 | name: "inception_4c/relu_5x5_reduce" 1375 | type: "ReLU" 1376 | bottom: "inception_4c/5x5_reduce" 1377 | top: "inception_4c/5x5_reduce" 1378 | } 1379 | layer { 1380 | name: "inception_4c/5x5" 1381 | type: "Convolution" 1382 | bottom: "inception_4c/5x5_reduce" 1383 | top: "inception_4c/5x5" 1384 | param { 1385 | lr_mult: 1 1386 | decay_mult: 1 1387 | } 1388 | param { 1389 | lr_mult: 2 1390 | decay_mult: 0 1391 | } 1392 | convolution_param { 1393 | num_output: 64 1394 | pad: 2 1395 | kernel_size: 5 1396 | weight_filler { 1397 | type: "xavier" 1398 | std: 0.03 1399 | } 1400 | bias_filler { 1401 | type: "constant" 1402 | value: 0.2 1403 | } 1404 | } 1405 | } 1406 | layer { 1407 | name: "inception_4c/relu_5x5" 1408 | type: "ReLU" 1409 | bottom: "inception_4c/5x5" 1410 | top: "inception_4c/5x5" 1411 | } 1412 | layer { 1413 | name: "inception_4c/pool" 1414 | type: "Pooling" 1415 | bottom: "inception_4b/output" 1416 | top: "inception_4c/pool" 1417 | pooling_param { 1418 | pool: MAX 1419 | kernel_size: 3 1420 | stride: 1 1421 | pad: 1 1422 | } 1423 | } 1424 | layer { 1425 | name: "inception_4c/pool_proj" 1426 | type: "Convolution" 1427 | bottom: "inception_4c/pool" 1428 | top: "inception_4c/pool_proj" 1429 | param { 1430 | lr_mult: 1 1431 | decay_mult: 1 1432 | } 1433 | param { 1434 | lr_mult: 2 1435 | decay_mult: 0 1436 | } 1437 | convolution_param { 1438 | num_output: 64 1439 | kernel_size: 1 1440 | weight_filler { 1441 | type: "xavier" 1442 | std: 0.1 1443 | } 1444 | bias_filler { 1445 | type: "constant" 1446 | value: 0.2 1447 | } 1448 | } 1449 | } 1450 | layer { 1451 | name: "inception_4c/relu_pool_proj" 1452 | type: "ReLU" 1453 | bottom: "inception_4c/pool_proj" 1454 | top: "inception_4c/pool_proj" 1455 | } 1456 | layer { 1457 | name: "inception_4c/output" 1458 | type: "Concat" 1459 | bottom: "inception_4c/1x1" 1460 | bottom: "inception_4c/3x3" 1461 | bottom: "inception_4c/5x5" 1462 | bottom: "inception_4c/pool_proj" 1463 | top: "inception_4c/output" 1464 | } 1465 | 1466 | layer { 1467 | name: "inception_4d/1x1" 1468 | type: "Convolution" 1469 | bottom: "inception_4c/output" 1470 | top: "inception_4d/1x1" 1471 | param { 1472 | lr_mult: 1 1473 | decay_mult: 1 1474 | } 1475 | param { 1476 | lr_mult: 2 1477 | decay_mult: 0 1478 | } 1479 | convolution_param { 1480 | num_output: 112 1481 | kernel_size: 1 1482 | weight_filler { 1483 | type: "xavier" 1484 | std: 0.1 1485 | } 1486 | bias_filler { 1487 | type: "constant" 1488 | value: 0.2 1489 | } 1490 | } 1491 | } 1492 | layer { 1493 | name: "inception_4d/relu_1x1" 1494 | type: "ReLU" 1495 | bottom: "inception_4d/1x1" 1496 | top: "inception_4d/1x1" 1497 | } 1498 | layer { 1499 | name: "inception_4d/3x3_reduce" 1500 | type: "Convolution" 1501 | bottom: "inception_4c/output" 1502 | top: "inception_4d/3x3_reduce" 1503 | param { 1504 | lr_mult: 1 1505 | decay_mult: 1 1506 | } 1507 | param { 1508 | lr_mult: 2 1509 | decay_mult: 0 1510 | } 1511 | convolution_param { 1512 | num_output: 144 1513 | kernel_size: 1 1514 | weight_filler { 1515 | type: "xavier" 1516 | std: 0.1 1517 | } 1518 | bias_filler { 1519 | type: "constant" 1520 | value: 0.2 1521 | } 1522 | } 1523 | } 1524 | layer { 1525 | name: "inception_4d/relu_3x3_reduce" 1526 | type: "ReLU" 1527 | bottom: "inception_4d/3x3_reduce" 1528 | top: "inception_4d/3x3_reduce" 1529 | } 1530 | layer { 1531 | name: "inception_4d/3x3" 1532 | type: "Convolution" 1533 | bottom: "inception_4d/3x3_reduce" 1534 | top: "inception_4d/3x3" 1535 | param { 1536 | lr_mult: 1 1537 | decay_mult: 1 1538 | } 1539 | param { 1540 | lr_mult: 2 1541 | decay_mult: 0 1542 | } 1543 | convolution_param { 1544 | num_output: 288 1545 | pad: 1 1546 | kernel_size: 3 1547 | weight_filler { 1548 | type: "xavier" 1549 | std: 0.1 1550 | } 1551 | bias_filler { 1552 | type: "constant" 1553 | value: 0.2 1554 | } 1555 | } 1556 | } 1557 | layer { 1558 | name: "inception_4d/relu_3x3" 1559 | type: "ReLU" 1560 | bottom: "inception_4d/3x3" 1561 | top: "inception_4d/3x3" 1562 | } 1563 | layer { 1564 | name: "inception_4d/5x5_reduce" 1565 | type: "Convolution" 1566 | bottom: "inception_4c/output" 1567 | top: "inception_4d/5x5_reduce" 1568 | param { 1569 | lr_mult: 1 1570 | decay_mult: 1 1571 | } 1572 | param { 1573 | lr_mult: 2 1574 | decay_mult: 0 1575 | } 1576 | convolution_param { 1577 | num_output: 32 1578 | kernel_size: 1 1579 | weight_filler { 1580 | type: "xavier" 1581 | std: 0.1 1582 | } 1583 | bias_filler { 1584 | type: "constant" 1585 | value: 0.2 1586 | } 1587 | } 1588 | } 1589 | layer { 1590 | name: "inception_4d/relu_5x5_reduce" 1591 | type: "ReLU" 1592 | bottom: "inception_4d/5x5_reduce" 1593 | top: "inception_4d/5x5_reduce" 1594 | } 1595 | layer { 1596 | name: "inception_4d/5x5" 1597 | type: "Convolution" 1598 | bottom: "inception_4d/5x5_reduce" 1599 | top: "inception_4d/5x5" 1600 | param { 1601 | lr_mult: 1 1602 | decay_mult: 1 1603 | } 1604 | param { 1605 | lr_mult: 2 1606 | decay_mult: 0 1607 | } 1608 | convolution_param { 1609 | num_output: 64 1610 | pad: 2 1611 | kernel_size: 5 1612 | weight_filler { 1613 | type: "xavier" 1614 | std: 0.1 1615 | } 1616 | bias_filler { 1617 | type: "constant" 1618 | value: 0.2 1619 | } 1620 | } 1621 | } 1622 | layer { 1623 | name: "inception_4d/relu_5x5" 1624 | type: "ReLU" 1625 | bottom: "inception_4d/5x5" 1626 | top: "inception_4d/5x5" 1627 | } 1628 | layer { 1629 | name: "inception_4d/pool" 1630 | type: "Pooling" 1631 | bottom: "inception_4c/output" 1632 | top: "inception_4d/pool" 1633 | pooling_param { 1634 | pool: MAX 1635 | kernel_size: 3 1636 | stride: 1 1637 | pad: 1 1638 | } 1639 | } 1640 | layer { 1641 | name: "inception_4d/pool_proj" 1642 | type: "Convolution" 1643 | bottom: "inception_4d/pool" 1644 | top: "inception_4d/pool_proj" 1645 | param { 1646 | lr_mult: 1 1647 | decay_mult: 1 1648 | } 1649 | param { 1650 | lr_mult: 2 1651 | decay_mult: 0 1652 | } 1653 | convolution_param { 1654 | num_output: 64 1655 | kernel_size: 1 1656 | weight_filler { 1657 | type: "xavier" 1658 | std: 0.1 1659 | } 1660 | bias_filler { 1661 | type: "constant" 1662 | value: 0.2 1663 | } 1664 | } 1665 | } 1666 | layer { 1667 | name: "inception_4d/relu_pool_proj" 1668 | type: "ReLU" 1669 | bottom: "inception_4d/pool_proj" 1670 | top: "inception_4d/pool_proj" 1671 | } 1672 | layer { 1673 | name: "inception_4d/output" 1674 | type: "Concat" 1675 | bottom: "inception_4d/1x1" 1676 | bottom: "inception_4d/3x3" 1677 | bottom: "inception_4d/5x5" 1678 | bottom: "inception_4d/pool_proj" 1679 | top: "inception_4d/output" 1680 | } 1681 | 1682 | layer { 1683 | name: "inception_4e/1x1" 1684 | type: "Convolution" 1685 | bottom: "inception_4d/output" 1686 | top: "inception_4e/1x1" 1687 | param { 1688 | lr_mult: 1 1689 | decay_mult: 1 1690 | } 1691 | param { 1692 | lr_mult: 2 1693 | decay_mult: 0 1694 | } 1695 | convolution_param { 1696 | num_output: 256 1697 | kernel_size: 1 1698 | weight_filler { 1699 | type: "xavier" 1700 | std: 0.03 1701 | } 1702 | bias_filler { 1703 | type: "constant" 1704 | value: 0.2 1705 | } 1706 | } 1707 | } 1708 | layer { 1709 | name: "inception_4e/relu_1x1" 1710 | type: "ReLU" 1711 | bottom: "inception_4e/1x1" 1712 | top: "inception_4e/1x1" 1713 | } 1714 | layer { 1715 | name: "inception_4e/3x3_reduce" 1716 | type: "Convolution" 1717 | bottom: "inception_4d/output" 1718 | top: "inception_4e/3x3_reduce" 1719 | param { 1720 | lr_mult: 1 1721 | decay_mult: 1 1722 | } 1723 | param { 1724 | lr_mult: 2 1725 | decay_mult: 0 1726 | } 1727 | convolution_param { 1728 | num_output: 160 1729 | kernel_size: 1 1730 | weight_filler { 1731 | type: "xavier" 1732 | std: 0.09 1733 | } 1734 | bias_filler { 1735 | type: "constant" 1736 | value: 0.2 1737 | } 1738 | } 1739 | } 1740 | layer { 1741 | name: "inception_4e/relu_3x3_reduce" 1742 | type: "ReLU" 1743 | bottom: "inception_4e/3x3_reduce" 1744 | top: "inception_4e/3x3_reduce" 1745 | } 1746 | layer { 1747 | name: "inception_4e/3x3" 1748 | type: "Convolution" 1749 | bottom: "inception_4e/3x3_reduce" 1750 | top: "inception_4e/3x3" 1751 | param { 1752 | lr_mult: 1 1753 | decay_mult: 1 1754 | } 1755 | param { 1756 | lr_mult: 2 1757 | decay_mult: 0 1758 | } 1759 | convolution_param { 1760 | num_output: 320 1761 | pad: 1 1762 | kernel_size: 3 1763 | weight_filler { 1764 | type: "xavier" 1765 | std: 0.03 1766 | } 1767 | bias_filler { 1768 | type: "constant" 1769 | value: 0.2 1770 | } 1771 | } 1772 | } 1773 | layer { 1774 | name: "inception_4e/relu_3x3" 1775 | type: "ReLU" 1776 | bottom: "inception_4e/3x3" 1777 | top: "inception_4e/3x3" 1778 | } 1779 | layer { 1780 | name: "inception_4e/5x5_reduce" 1781 | type: "Convolution" 1782 | bottom: "inception_4d/output" 1783 | top: "inception_4e/5x5_reduce" 1784 | param { 1785 | lr_mult: 1 1786 | decay_mult: 1 1787 | } 1788 | param { 1789 | lr_mult: 2 1790 | decay_mult: 0 1791 | } 1792 | convolution_param { 1793 | num_output: 32 1794 | kernel_size: 1 1795 | weight_filler { 1796 | type: "xavier" 1797 | std: 0.2 1798 | } 1799 | bias_filler { 1800 | type: "constant" 1801 | value: 0.2 1802 | } 1803 | } 1804 | } 1805 | layer { 1806 | name: "inception_4e/relu_5x5_reduce" 1807 | type: "ReLU" 1808 | bottom: "inception_4e/5x5_reduce" 1809 | top: "inception_4e/5x5_reduce" 1810 | } 1811 | layer { 1812 | name: "inception_4e/5x5" 1813 | type: "Convolution" 1814 | bottom: "inception_4e/5x5_reduce" 1815 | top: "inception_4e/5x5" 1816 | param { 1817 | lr_mult: 1 1818 | decay_mult: 1 1819 | } 1820 | param { 1821 | lr_mult: 2 1822 | decay_mult: 0 1823 | } 1824 | convolution_param { 1825 | num_output: 128 1826 | pad: 2 1827 | kernel_size: 5 1828 | weight_filler { 1829 | type: "xavier" 1830 | std: 0.03 1831 | } 1832 | bias_filler { 1833 | type: "constant" 1834 | value: 0.2 1835 | } 1836 | } 1837 | } 1838 | layer { 1839 | name: "inception_4e/relu_5x5" 1840 | type: "ReLU" 1841 | bottom: "inception_4e/5x5" 1842 | top: "inception_4e/5x5" 1843 | } 1844 | layer { 1845 | name: "inception_4e/pool" 1846 | type: "Pooling" 1847 | bottom: "inception_4d/output" 1848 | top: "inception_4e/pool" 1849 | pooling_param { 1850 | pool: MAX 1851 | kernel_size: 3 1852 | stride: 1 1853 | pad: 1 1854 | } 1855 | } 1856 | layer { 1857 | name: "inception_4e/pool_proj" 1858 | type: "Convolution" 1859 | bottom: "inception_4e/pool" 1860 | top: "inception_4e/pool_proj" 1861 | param { 1862 | lr_mult: 1 1863 | decay_mult: 1 1864 | } 1865 | param { 1866 | lr_mult: 2 1867 | decay_mult: 0 1868 | } 1869 | convolution_param { 1870 | num_output: 128 1871 | kernel_size: 1 1872 | weight_filler { 1873 | type: "xavier" 1874 | std: 0.1 1875 | } 1876 | bias_filler { 1877 | type: "constant" 1878 | value: 0.2 1879 | } 1880 | } 1881 | } 1882 | layer { 1883 | name: "inception_4e/relu_pool_proj" 1884 | type: "ReLU" 1885 | bottom: "inception_4e/pool_proj" 1886 | top: "inception_4e/pool_proj" 1887 | } 1888 | layer { 1889 | name: "inception_4e/output" 1890 | type: "Concat" 1891 | bottom: "inception_4e/1x1" 1892 | bottom: "inception_4e/3x3" 1893 | bottom: "inception_4e/5x5" 1894 | bottom: "inception_4e/pool_proj" 1895 | top: "inception_4e/output" 1896 | } 1897 | 1898 | 1899 | 1900 | layer { 1901 | name: "inception_5a/1x1" 1902 | type: "Convolution" 1903 | bottom: "inception_4e/output" 1904 | top: "inception_5a/1x1" 1905 | param { 1906 | lr_mult: 1 1907 | decay_mult: 1 1908 | } 1909 | param { 1910 | lr_mult: 2 1911 | decay_mult: 0 1912 | } 1913 | convolution_param { 1914 | num_output: 256 1915 | kernel_size: 1 1916 | weight_filler { 1917 | type: "xavier" 1918 | std: 0.03 1919 | } 1920 | bias_filler { 1921 | type: "constant" 1922 | value: 0.2 1923 | } 1924 | } 1925 | } 1926 | layer { 1927 | name: "inception_5a/relu_1x1" 1928 | type: "ReLU" 1929 | bottom: "inception_5a/1x1" 1930 | top: "inception_5a/1x1" 1931 | } 1932 | 1933 | layer { 1934 | name: "inception_5a/3x3_reduce" 1935 | type: "Convolution" 1936 | bottom: "inception_4e/output" 1937 | top: "inception_5a/3x3_reduce" 1938 | param { 1939 | lr_mult: 1 1940 | decay_mult: 1 1941 | } 1942 | param { 1943 | lr_mult: 2 1944 | decay_mult: 0 1945 | } 1946 | convolution_param { 1947 | num_output: 160 1948 | kernel_size: 1 1949 | weight_filler { 1950 | type: "xavier" 1951 | std: 0.09 1952 | } 1953 | bias_filler { 1954 | type: "constant" 1955 | value: 0.2 1956 | } 1957 | } 1958 | } 1959 | layer { 1960 | name: "inception_5a/relu_3x3_reduce" 1961 | type: "ReLU" 1962 | bottom: "inception_5a/3x3_reduce" 1963 | top: "inception_5a/3x3_reduce" 1964 | } 1965 | 1966 | layer { 1967 | name: "inception_5a/3x3" 1968 | type: "Convolution" 1969 | bottom: "inception_5a/3x3_reduce" 1970 | top: "inception_5a/3x3" 1971 | param { 1972 | lr_mult: 1 1973 | decay_mult: 1 1974 | } 1975 | param { 1976 | lr_mult: 2 1977 | decay_mult: 0 1978 | } 1979 | convolution_param { 1980 | num_output: 320 1981 | pad: 1 1982 | kernel_size: 3 1983 | weight_filler { 1984 | type: "xavier" 1985 | std: 0.03 1986 | } 1987 | bias_filler { 1988 | type: "constant" 1989 | value: 0.2 1990 | } 1991 | } 1992 | } 1993 | layer { 1994 | name: "inception_5a/relu_3x3" 1995 | type: "ReLU" 1996 | bottom: "inception_5a/3x3" 1997 | top: "inception_5a/3x3" 1998 | } 1999 | layer { 2000 | name: "inception_5a/5x5_reduce" 2001 | type: "Convolution" 2002 | bottom: "inception_4e/output" 2003 | top: "inception_5a/5x5_reduce" 2004 | param { 2005 | lr_mult: 1 2006 | decay_mult: 1 2007 | } 2008 | param { 2009 | lr_mult: 2 2010 | decay_mult: 0 2011 | } 2012 | convolution_param { 2013 | num_output: 32 2014 | kernel_size: 1 2015 | weight_filler { 2016 | type: "xavier" 2017 | std: 0.2 2018 | } 2019 | bias_filler { 2020 | type: "constant" 2021 | value: 0.2 2022 | } 2023 | } 2024 | } 2025 | layer { 2026 | name: "inception_5a/relu_5x5_reduce" 2027 | type: "ReLU" 2028 | bottom: "inception_5a/5x5_reduce" 2029 | top: "inception_5a/5x5_reduce" 2030 | } 2031 | layer { 2032 | name: "inception_5a/5x5" 2033 | type: "Convolution" 2034 | bottom: "inception_5a/5x5_reduce" 2035 | top: "inception_5a/5x5" 2036 | param { 2037 | lr_mult: 1 2038 | decay_mult: 1 2039 | } 2040 | param { 2041 | lr_mult: 2 2042 | decay_mult: 0 2043 | } 2044 | convolution_param { 2045 | num_output: 128 2046 | pad: 2 2047 | kernel_size: 5 2048 | weight_filler { 2049 | type: "xavier" 2050 | std: 0.03 2051 | } 2052 | bias_filler { 2053 | type: "constant" 2054 | value: 0.2 2055 | } 2056 | } 2057 | } 2058 | layer { 2059 | name: "inception_5a/relu_5x5" 2060 | type: "ReLU" 2061 | bottom: "inception_5a/5x5" 2062 | top: "inception_5a/5x5" 2063 | } 2064 | layer { 2065 | name: "inception_5a/pool" 2066 | type: "Pooling" 2067 | bottom: "inception_4e/output" 2068 | top: "inception_5a/pool" 2069 | pooling_param { 2070 | pool: MAX 2071 | kernel_size: 3 2072 | stride: 1 2073 | pad: 1 2074 | } 2075 | } 2076 | layer { 2077 | name: "inception_5a/pool_proj" 2078 | type: "Convolution" 2079 | bottom: "inception_5a/pool" 2080 | top: "inception_5a/pool_proj" 2081 | param { 2082 | lr_mult: 1 2083 | decay_mult: 1 2084 | } 2085 | param { 2086 | lr_mult: 2 2087 | decay_mult: 0 2088 | } 2089 | convolution_param { 2090 | num_output: 128 2091 | kernel_size: 1 2092 | weight_filler { 2093 | type: "xavier" 2094 | std: 0.1 2095 | } 2096 | bias_filler { 2097 | type: "constant" 2098 | value: 0.2 2099 | } 2100 | } 2101 | } 2102 | layer { 2103 | name: "inception_5a/relu_pool_proj" 2104 | type: "ReLU" 2105 | bottom: "inception_5a/pool_proj" 2106 | top: "inception_5a/pool_proj" 2107 | } 2108 | layer { 2109 | name: "inception_5a/output" 2110 | type: "Concat" 2111 | bottom: "inception_5a/1x1" 2112 | bottom: "inception_5a/3x3" 2113 | bottom: "inception_5a/5x5" 2114 | bottom: "inception_5a/pool_proj" 2115 | top: "inception_5a/output" 2116 | } 2117 | 2118 | layer { 2119 | name: "inception_5b/1x1" 2120 | type: "Convolution" 2121 | bottom: "inception_5a/output" 2122 | top: "inception_5b/1x1" 2123 | param { 2124 | lr_mult: 1 2125 | decay_mult: 1 2126 | } 2127 | param { 2128 | lr_mult: 2 2129 | decay_mult: 0 2130 | } 2131 | convolution_param { 2132 | num_output: 720 2133 | kernel_size: 1 2134 | weight_filler { 2135 | type: "xavier" 2136 | std: 0.1 2137 | } 2138 | bias_filler { 2139 | type: "constant" 2140 | value: 0.2 2141 | } 2142 | } 2143 | } 2144 | layer { 2145 | name: "inception_5b/relu_1x1" 2146 | type: "ReLU" 2147 | bottom: "inception_5b/1x1" 2148 | top: "inception_5b/1x1" 2149 | } 2150 | layer { 2151 | name: "inception_5b/3x3_reduce" 2152 | type: "Convolution" 2153 | bottom: "inception_5a/output" 2154 | top: "inception_5b/3x3_reduce" 2155 | param { 2156 | lr_mult: 1 2157 | decay_mult: 1 2158 | } 2159 | param { 2160 | lr_mult: 1 2161 | decay_mult: 0 2162 | } 2163 | convolution_param { 2164 | num_output: 192 2165 | kernel_size: 1 2166 | weight_filler { 2167 | type: "xavier" 2168 | std: 0.1 2169 | } 2170 | bias_filler { 2171 | type: "constant" 2172 | value: 0.2 2173 | } 2174 | } 2175 | } 2176 | layer { 2177 | name: "inception_5b/relu_3x3_reduce" 2178 | type: "ReLU" 2179 | bottom: "inception_5b/3x3_reduce" 2180 | top: "inception_5b/3x3_reduce" 2181 | } 2182 | layer { 2183 | name: "inception_5b/3x3" 2184 | type: "Convolution" 2185 | bottom: "inception_5b/3x3_reduce" 2186 | top: "inception_5b/3x3" 2187 | param { 2188 | lr_mult: 1 2189 | decay_mult: 1 2190 | } 2191 | param { 2192 | lr_mult: 2 2193 | decay_mult: 0 2194 | } 2195 | convolution_param { 2196 | num_output: 720 2197 | pad: 1 2198 | kernel_size: 3 2199 | weight_filler { 2200 | type: "xavier" 2201 | std: 0.1 2202 | } 2203 | bias_filler { 2204 | type: "constant" 2205 | value: 0.2 2206 | } 2207 | } 2208 | } 2209 | layer { 2210 | name: "inception_5b/relu_3x3" 2211 | type: "ReLU" 2212 | bottom: "inception_5b/3x3" 2213 | top: "inception_5b/3x3" 2214 | } 2215 | layer { 2216 | name: "inception_5b/5x5_reduce" 2217 | type: "Convolution" 2218 | bottom: "inception_5a/output" 2219 | top: "inception_5b/5x5_reduce" 2220 | param { 2221 | lr_mult: 1 2222 | decay_mult: 1 2223 | } 2224 | param { 2225 | lr_mult: 2 2226 | decay_mult: 0 2227 | } 2228 | convolution_param { 2229 | num_output: 48 2230 | kernel_size: 1 2231 | weight_filler { 2232 | type: "xavier" 2233 | std: 0.1 2234 | } 2235 | bias_filler { 2236 | type: "constant" 2237 | value: 0.2 2238 | } 2239 | } 2240 | } 2241 | layer { 2242 | name: "inception_5b/relu_5x5_reduce" 2243 | type: "ReLU" 2244 | bottom: "inception_5b/5x5_reduce" 2245 | top: "inception_5b/5x5_reduce" 2246 | } 2247 | layer { 2248 | name: "inception_5b/5x5" 2249 | type: "Convolution" 2250 | bottom: "inception_5b/5x5_reduce" 2251 | top: "inception_5b/5x5" 2252 | param { 2253 | lr_mult: 1 2254 | decay_mult: 1 2255 | } 2256 | param { 2257 | lr_mult: 2 2258 | decay_mult: 0 2259 | } 2260 | convolution_param { 2261 | num_output: 128 2262 | pad: 2 2263 | kernel_size: 5 2264 | weight_filler { 2265 | type: "xavier" 2266 | std: 0.1 2267 | } 2268 | bias_filler { 2269 | type: "constant" 2270 | value: 0.2 2271 | } 2272 | } 2273 | } 2274 | layer { 2275 | name: "inception_5b/relu_5x5" 2276 | type: "ReLU" 2277 | bottom: "inception_5b/5x5" 2278 | top: "inception_5b/5x5" 2279 | } 2280 | layer { 2281 | name: "inception_5b/pool" 2282 | type: "Pooling" 2283 | bottom: "inception_5a/output" 2284 | top: "inception_5b/pool" 2285 | pooling_param { 2286 | pool: MAX 2287 | kernel_size: 3 2288 | stride: 1 2289 | pad: 1 2290 | } 2291 | } 2292 | layer { 2293 | name: "inception_5b/pool_proj" 2294 | type: "Convolution" 2295 | bottom: "inception_5b/pool" 2296 | top: "inception_5b/pool_proj" 2297 | param { 2298 | lr_mult: 1 2299 | decay_mult: 1 2300 | } 2301 | param { 2302 | lr_mult: 2 2303 | decay_mult: 0 2304 | } 2305 | convolution_param { 2306 | num_output: 128 2307 | kernel_size: 1 2308 | weight_filler { 2309 | type: "xavier" 2310 | std: 0.1 2311 | } 2312 | bias_filler { 2313 | type: "constant" 2314 | value: 0.2 2315 | } 2316 | } 2317 | } 2318 | layer { 2319 | name: "inception_5b/relu_pool_proj" 2320 | type: "ReLU" 2321 | bottom: "inception_5b/pool_proj" 2322 | top: "inception_5b/pool_proj" 2323 | } 2324 | layer { 2325 | name: "inception_5b/output" 2326 | type: "Concat" 2327 | bottom: "inception_5b/1x1" 2328 | bottom: "inception_5b/3x3" 2329 | bottom: "inception_5b/5x5" 2330 | bottom: "inception_5b/pool_proj" 2331 | top: "inception_5b/output" 2332 | } 2333 | layer { 2334 | name: "pool5/drop_s1" 2335 | type: "Dropout" 2336 | bottom: "inception_5b/output" 2337 | top: "pool5/drop_s1" 2338 | dropout_param { 2339 | dropout_ratio: 0.4 2340 | } 2341 | } 2342 | layer { 2343 | name: "cvg/classifier" 2344 | type: "Convolution" 2345 | bottom: "pool5/drop_s1" 2346 | top: "cvg/classifier" 2347 | param { 2348 | lr_mult: 1 2349 | decay_mult: 1 2350 | } 2351 | param { 2352 | lr_mult: 2 2353 | decay_mult: 0 2354 | } 2355 | convolution_param { 2356 | num_output: 1 2357 | kernel_size: 1 2358 | weight_filler { 2359 | type: "xavier" 2360 | std: 0.03 2361 | } 2362 | bias_filler { 2363 | type: "constant" 2364 | value: 0. 2365 | } 2366 | } 2367 | } 2368 | layer { 2369 | name: "coverage/sig" 2370 | type: "Sigmoid" 2371 | bottom: "cvg/classifier" 2372 | top: "coverage" 2373 | } 2374 | layer { 2375 | name: "bbox/regressor" 2376 | type: "Convolution" 2377 | bottom: "pool5/drop_s1" 2378 | top: "bboxes" 2379 | param { 2380 | lr_mult: 1 2381 | decay_mult: 1 2382 | } 2383 | param { 2384 | lr_mult: 2 2385 | decay_mult: 0 2386 | } 2387 | convolution_param { 2388 | num_output: 4 2389 | kernel_size: 1 2390 | weight_filler { 2391 | type: "xavier" 2392 | std: 0.03 2393 | } 2394 | bias_filler { 2395 | type: "constant" 2396 | value: 0. 2397 | } 2398 | } 2399 | } 2400 | 2401 | ###################################################################### 2402 | # End of convolutional network 2403 | ###################################################################### 2404 | 2405 | # Convert bboxes 2406 | layer { 2407 | name: "bbox_mask" 2408 | type: "Eltwise" 2409 | bottom: "bboxes" 2410 | bottom: "coverage-block" 2411 | top: "bboxes-masked" 2412 | eltwise_param { 2413 | operation: PROD 2414 | } 2415 | include { phase: TRAIN } 2416 | include { phase: TEST stage: "val" } 2417 | } 2418 | layer { 2419 | name: "bbox-norm" 2420 | type: "Eltwise" 2421 | bottom: "bboxes-masked" 2422 | bottom: "size-block" 2423 | top: "bboxes-masked-norm" 2424 | eltwise_param { 2425 | operation: PROD 2426 | } 2427 | include { phase: TRAIN } 2428 | include { phase: TEST stage: "val" } 2429 | } 2430 | layer { 2431 | name: "bbox-obj-norm" 2432 | type: "Eltwise" 2433 | bottom: "bboxes-masked-norm" 2434 | bottom: "obj-block" 2435 | top: "bboxes-obj-masked-norm" 2436 | eltwise_param { 2437 | operation: PROD 2438 | } 2439 | include { phase: TRAIN } 2440 | include { phase: TEST stage: "val" } 2441 | } 2442 | 2443 | # Loss layers 2444 | layer { 2445 | name: "bbox_loss" 2446 | type: "L1Loss" 2447 | bottom: "bboxes-obj-masked-norm" 2448 | bottom: "bbox-obj-label-norm" 2449 | top: "loss_bbox" 2450 | loss_weight: 2 2451 | include { phase: TRAIN } 2452 | include { phase: TEST stage: "val" } 2453 | } 2454 | layer { 2455 | name: "coverage_loss" 2456 | type: "EuclideanLoss" 2457 | bottom: "coverage" 2458 | bottom: "coverage-label" 2459 | top: "loss_coverage" 2460 | include { phase: TRAIN } 2461 | include { phase: TEST stage: "val" } 2462 | } 2463 | 2464 | # Cluster bboxes 2465 | layer { 2466 | type: 'Python' 2467 | name: 'cluster' 2468 | bottom: 'coverage' 2469 | bottom: 'bboxes' 2470 | top: 'bbox-list' 2471 | python_param { 2472 | module: 'caffe.layers.detectnet.clustering' 2473 | layer: 'ClusterDetections' 2474 | param_str : '1280, 720, 16, 0.6, 3, 0.02, 22' 2475 | } 2476 | include: { phase: TEST } 2477 | } 2478 | 2479 | # Calculate mean average precision 2480 | layer { 2481 | type: 'Python' 2482 | name: 'cluster_gt' 2483 | bottom: 'coverage-label' 2484 | bottom: 'bbox-label' 2485 | top: 'bbox-list-label' 2486 | python_param { 2487 | module: 'caffe.layers.detectnet.clustering' 2488 | layer: 'ClusterGroundtruth' 2489 | param_str : '1280, 720, 16' 2490 | } 2491 | include: { phase: TEST stage: "val" } 2492 | } 2493 | layer { 2494 | type: 'Python' 2495 | name: 'score' 2496 | bottom: 'bbox-list-label' 2497 | bottom: 'bbox-list' 2498 | top: 'bbox-list-scored' 2499 | python_param { 2500 | module: 'caffe.layers.detectnet.mean_ap' 2501 | layer: 'ScoreDetections' 2502 | } 2503 | include: { phase: TEST stage: "val" } 2504 | } 2505 | layer { 2506 | type: 'Python' 2507 | name: 'mAP' 2508 | bottom: 'bbox-list-scored' 2509 | top: 'mAP' 2510 | top: 'precision' 2511 | top: 'recall' 2512 | python_param { 2513 | module: 'caffe.layers.detectnet.mean_ap' 2514 | layer: 'mAP' 2515 | param_str : '1280, 720, 16' 2516 | } 2517 | include: { phase: TEST stage: "val" } 2518 | } 2519 | -------------------------------------------------------------------------------- /LogoDetection.py: -------------------------------------------------------------------------------- 1 | # This might be run only once if no ffmpeg is installed 2 | #import imageio 3 | #imageio.plugins.ffmpeg.download() 4 | 5 | import cv2 6 | import numpy as np 7 | import os 8 | import time 9 | from PIL import Image 10 | from math import sqrt, ceil, floor 11 | import numpy 12 | 13 | from google.protobuf import text_format 14 | from moviepy.editor import VideoFileClip 15 | import image_slicer 16 | from collections import Counter 17 | 18 | import scipy.misc 19 | os.environ['GLOG_minloglevel'] = '2' # Suppress most caffe output 20 | 21 | import caffe 22 | from caffe.proto import caffe_pb2 23 | import random 24 | 25 | countedLogos = [] 26 | frameNumber = 0 27 | 28 | def incrementFrameNumber(): 29 | global frameNumber 30 | frameNumber += 1 31 | 32 | def addLogoEntry(logo): 33 | global countedLogos 34 | countedLogos.append(logo) 35 | 36 | DEPLOY_FILE = 'deploy.prototxt' 37 | MEAN_FILE = None #'/Users/kris/Downloads/fedex_mil_model_epoch_70.0/mean.binaryproto' 38 | #'/Users/kris/Downloads/fedex_mil_model_epoch_70.0/snapshot_iter_28000.caffemodel' 39 | MODELS = ['fedex', 'enterprise', 'adidas', 'hankook', 'unicredit'] 40 | BATCH_SIZE = 1 41 | OUTPUT_FILE = 'output.mp4' 42 | INPUT_FILE = '/Users/kris/Downloads/footbal-split-movie-57s.mp4' 43 | USE_GPU = False 44 | FPS = 60.0 45 | 46 | # IMAGE_FILE = '/Users/kris/Downloads/football2-resized2/resized/195583_01_01.png' 47 | 48 | class Logo: 49 | """Represents a single logo set.""" 50 | frameNumber = 0 51 | foundBoxes = 0 52 | 53 | def __init__(self, frameNumber, foundBoxes): 54 | self.frameNumber = frameNumber 55 | self.foundBoxes = foundBoxes 56 | 57 | def __repr__(self): 58 | return "Logo(" + str(self.frameNumber) + ": " + str(self.foundBoxes) + " )" 59 | 60 | 61 | def get_net(caffemodel, deploy_file, use_gpu=False): 62 | """ 63 | Returns an instance of caffe.Net 64 | 65 | Arguments: 66 | caffemodel -- path to a .caffemodel file 67 | deploy_file -- path to a .prototxt file 68 | 69 | Keyword arguments: 70 | use_gpu -- if True, use the GPU for inference 71 | """ 72 | if use_gpu: 73 | caffe.set_mode_gpu() 74 | else: 75 | caffe.set_mode_cpu() 76 | 77 | # load a new model 78 | return caffe.Net(deploy_file, caffemodel, caffe.TEST) 79 | 80 | def get_transformer(deploy_file, mean_file=None): 81 | """ 82 | Returns an instance of caffe.io.Transformer 83 | 84 | Arguments: 85 | deploy_file -- path to a .prototxt file 86 | 87 | Keyword arguments: 88 | mean_file -- path to a .binaryproto file (optional) 89 | """ 90 | network = caffe_pb2.NetParameter() 91 | with open(deploy_file) as infile: 92 | text_format.Merge(infile.read(), network) 93 | 94 | if network.input_shape: 95 | dims = network.input_shape[0].dim 96 | else: 97 | dims = network.input_dim[:4] 98 | 99 | t = caffe.io.Transformer( 100 | inputs = {'data': dims} 101 | ) 102 | t.set_transpose('data', (2,0,1)) # transpose to (channels, height, width) 103 | 104 | # color images 105 | if dims[1] == 3: 106 | # channel swap 107 | t.set_channel_swap('data', (2,1,0)) 108 | 109 | if mean_file: 110 | # set mean pixel 111 | println("mean file exists") 112 | with open(mean_file,'rb') as infile: 113 | blob = caffe_pb2.BlobProto() 114 | blob.MergeFromString(infile.read()) 115 | if blob.HasField('shape'): 116 | blob_dims = blob.shape 117 | assert len(blob_dims) == 4, 'Shape should have 4 dimensions - shape is "%s"' % blob.shape 118 | elif blob.HasField('num') and blob.HasField('channels') and \ 119 | blob.HasField('height') and blob.HasField('width'): 120 | blob_dims = (blob.num, blob.channels, blob.height, blob.width) 121 | else: 122 | raise ValueError('blob does not provide shape or 4d dimensions') 123 | pixel = np.reshape(blob.data, blob_dims[1:]).mean(1).mean(1) 124 | t.set_mean('data', pixel) 125 | 126 | return t 127 | 128 | def resize_img(image, height, width): 129 | """ 130 | Resizes the image to detectnet inputs 131 | 132 | Arguments: 133 | image -- a single image 134 | height -- height of the network input 135 | width -- width of the network input 136 | """ 137 | image = np.array(image) 138 | image = scipy.misc.imresize(image, (height, width), 'bilinear') 139 | return image 140 | 141 | def draw_bboxes(image, locations, clr): 142 | """ 143 | Draws the bounding boxes into an image 144 | 145 | Arguments: 146 | image -- a single image already resized 147 | locations -- the location of the bounding boxes 148 | """ 149 | boxesFound = 0 150 | for left,top,right,bottom,confidence in locations: 151 | if confidence==0: 152 | continue 153 | boxesFound += 1 154 | cv2.rectangle(image,(left,top),(right,bottom),clr,3) 155 | return (boxesFound, image) 156 | 157 | def forward_pass(image, net, transformer, batch_size=None): 158 | """ 159 | Returns scores for each image as an np.ndarray (nImages x nClasses) 160 | 161 | Arguments: 162 | image -- a list of np.ndarrays 163 | net -- a caffe.Net 164 | transformer -- a caffe.io.Transformer 165 | 166 | Keyword arguments: 167 | batch_size -- how many images can be processed at once 168 | (a high value may result in out-of-memory errors) 169 | """ 170 | if batch_size is None: 171 | batch_size = 1 172 | 173 | caffe_images = [] 174 | 175 | if image.ndim == 2: 176 | caffe_images.append(image[:,:,np.newaxis]) 177 | else: 178 | caffe_images.append(image) 179 | 180 | dims = transformer.inputs['data'][1:] 181 | 182 | scores = None 183 | for chunk in [caffe_images[x:x+batch_size] for x in xrange(0, len(caffe_images), batch_size)]: 184 | new_shape = (len(chunk),) + tuple(dims) 185 | if net.blobs['data'].data.shape != new_shape: 186 | net.blobs['data'].reshape(*new_shape) 187 | for index, image in enumerate(chunk): 188 | image_data = transformer.preprocess('data', image) 189 | net.blobs['data'].data[index] = image_data 190 | start = time.time() 191 | output = net.forward()[net.outputs[-1]] 192 | end = time.time() 193 | if scores is None: 194 | scores = np.copy(output) 195 | else: 196 | scores = np.vstack((scores, output)) 197 | print 'Processed %s/%s images in %f seconds ...' % (len(scores), len(caffe_images), (end - start)) 198 | 199 | return scores 200 | 201 | def classify(caffemodel, deploy_file, image, clr, 202 | mean_file=None, batch_size=None, use_gpu=False): 203 | """ 204 | Classify some images against a Caffe model and print the results 205 | 206 | Arguments: 207 | caffemodel -- path to a .caffemodel 208 | deploy_file -- path to a .prototxt 209 | image_files -- list of paths to images 210 | 211 | Keyword arguments: 212 | mean_file -- path to a .binaryproto 213 | use_gpu -- if True, run inference on the GPU 214 | """ 215 | # Load the model 216 | net = get_net(caffemodel, deploy_file, use_gpu) 217 | transformer = get_transformer(deploy_file, mean_file) 218 | _, channels, height, width = transformer.inputs['data'] 219 | if channels == 3: 220 | mode = 'RGB' 221 | elif channels == 1: 222 | mode = 'L' 223 | else: 224 | raise ValueError('Invalid number for channels: %s' % channels) 225 | 226 | image = resize_img(image, height, width) 227 | 228 | # Classify the image 229 | scores = forward_pass(image, net, transformer, batch_size=batch_size) 230 | # print("Scores: ") 231 | # print(scores) 232 | ### Process the results 233 | 234 | # Format of scores is [ batch_size x max_bbox_per_image x 5 (xl, yt, xr, yb, confidence) ] 235 | # https://github.com/NVIDIA/caffe/blob/v0.15.13/python/caffe/layers/detectnet/clustering.py#L81 236 | for i, image_results in enumerate(scores): 237 | boxesFound, img_result = draw_bboxes(image,image_results, clr) 238 | # This line is optinal, in this case we resize to the size of the original input video, can be removed 239 | #img_result = resize_img(img_result,720,1280) 240 | return (boxesFound, img_result) 241 | 242 | def getColorForClass(modelName): 243 | return { 244 | 'fedex': (255,0,0), 245 | 'enterprise': (0,0,255), 246 | 'unicredit': (0,255,0), 247 | 'amstel': (150,150,0), 248 | 'adidas': (0,150,150), 249 | 'hankook': (0,255,255) 250 | }[modelName] 251 | 252 | def detect_logos(image): 253 | """ 254 | Runs our pipeline given a single image and returns another one with the bounding boxes drawn 255 | 256 | Arguments: 257 | image -- cv2 image file being 1/4 of the original 258 | """ 259 | result = image 260 | boxes = {} 261 | for model in MODELS: 262 | print("Detecting bboxes for: " + model) 263 | clr = getColorForClass(model) 264 | boxesFound, result = classify('./models/'+model+'.caffemodel', DEPLOY_FILE, result, clr, MEAN_FILE, BATCH_SIZE, USE_GPU) 265 | boxes[model] = boxesFound 266 | return (boxes, result) 267 | 268 | def slice_image(image, number_tiles): 269 | im = toPILImage(image) 270 | im_w, im_h = im.size 271 | columns, rows = image_slicer.calc_columns_rows(number_tiles) 272 | extras = (columns * rows) - number_tiles 273 | tile_w, tile_h = int(floor(im_w / columns)), int(floor(im_h / rows)) 274 | 275 | tiles = [] 276 | number = 1 277 | for pos_y in range(0, im_h - rows, tile_h): # -rows for rounding error. 278 | for pos_x in range(0, im_w - columns, tile_w): # as above. 279 | area = (pos_x, pos_y, pos_x + tile_w, pos_y + tile_h) 280 | image = im.crop(area) 281 | position = (int(floor(pos_x / tile_w)) + 1, 282 | int(floor(pos_y / tile_h)) + 1) 283 | coords = (pos_x, pos_y) 284 | tile = image_slicer.Tile(image, number, position, coords) 285 | tiles.append(tile) 286 | number += 1 287 | return tuple(tiles) 288 | 289 | def toPILImage(opencvImage): 290 | img = cv2.cvtColor(opencvImage, cv2.COLOR_BGR2RGB) 291 | im = Image.fromarray(img) 292 | return im 293 | 294 | def toOpenCVFormat(pilImage): 295 | open_cv_image = numpy.array(pilImage) 296 | image = open_cv_image[:, :, ::-1].copy() 297 | return image 298 | 299 | def joinImages(modified_images): 300 | result = Image.new("RGB", (1280, 720)) 301 | 302 | for index, img in enumerate(modified_images): 303 | img.thumbnail((640, 360), Image.ANTIALIAS) 304 | result.paste(modified_images[0], (0, 0)) 305 | result.paste(modified_images[2], (0, 360)) 306 | result.paste(modified_images[1], (640, 0)) 307 | result.paste(modified_images[3], (640, 360)) 308 | return result 309 | 310 | def merge_dicts(allBoxes): 311 | inp = [dict(x) for x in allBoxes] 312 | count = Counter() 313 | for y in inp: 314 | count += Counter(y) 315 | return dict(count) 316 | 317 | def detect_logos_full_img(image): 318 | """ 319 | Splits a single frame into 4 images 320 | 321 | Arguments: 322 | image -- cv2 image file for single video frame 323 | """ 324 | tiles = slice_image(image, 4) 325 | modified_images = [] 326 | print("tiles length: " + str(len(tiles))) 327 | fullImageBoxes = [] 328 | for i, tile in enumerate(tiles): 329 | open_cv_image = numpy.array(tile.image) 330 | image = open_cv_image[:, :, ::-1].copy() 331 | boxes, img_detected = detect_logos(image) 332 | modified_images.append(toPILImage(img_detected)) 333 | fullImageBoxes.append(boxes) 334 | img = joinImages(modified_images) 335 | allBoxes = merge_dicts(fullImageBoxes) 336 | print("All boxes: " + str(allBoxes)) 337 | addLogoEntry(Logo(frameNumber, allBoxes)) 338 | incrementFrameNumber() 339 | return toOpenCVFormat(img) 340 | 341 | def produceReport(logos, movieTime): 342 | fedexFrameCount = 0 343 | amstelFrameCount = 0 344 | unicreditFrameCount = 0 345 | adidasFrameCount = 0 346 | hankookFrameCount = 0 347 | enterpriseFrameCount = 0 348 | allBoxes = [] 349 | 350 | for logo in logos: 351 | if 'fedex' in logo.foundBoxes and logo.foundBoxes['fedex'] > 0: 352 | fedexFrameCount += 1 353 | if 'amstel' in logo.foundBoxes and logo.foundBoxes['amstel'] > 0: 354 | amstelFrameCount += 1 355 | if 'unicredit' in logo.foundBoxes and logo.foundBoxes['unicredit'] > 0: 356 | unicreditFrameCount += 1 357 | if 'adidas' in logo.foundBoxes and logo.foundBoxes['adidas'] > 0: 358 | adidasFrameCount += 1 359 | if 'hankook' in logo.foundBoxes and logo.foundBoxes['hankook'] > 0: 360 | hankookFrameCount += 1 361 | if 'enterprise' in logo.foundBoxes and logo.foundBoxes['enterprise'] > 0: 362 | enterpriseFrameCount += 1 363 | allBoxes.append(logo.foundBoxes) 364 | 365 | allObjectsDetected = merge_dicts(allBoxes) 366 | report = """ 367 | ***************************************************** 368 | 369 | TOTAL TIME: %s s 370 | LOGO DETECTOR REPORT: 371 | 372 | FEDEX STATS: 373 | frame count: %s 374 | total time: %s 375 | objects detected: %s 376 | 377 | AMSTEL STATS: 378 | frame count: %s 379 | total time: %s 380 | objects detected: %s 381 | 382 | UNICREDIT STATS: 383 | frame count: %s 384 | total time: %s 385 | objects detected: %s 386 | 387 | ADIDAS STATS: 388 | frame count: %s 389 | total time: %s 390 | objects detected: %s 391 | 392 | HANKOOK STATS: 393 | frame count: %s 394 | total time: %s 395 | objects detected: %s 396 | 397 | ENTERPRISE STATS: 398 | frame count: %s 399 | total time: %s 400 | objects detected: %s 401 | 402 | more information at: www.softwaremill.com 403 | 404 | ***************************************************** 405 | 406 | """ % (str(movieTime), 407 | str(fedexFrameCount), 408 | "{:.4f}".format(fedexFrameCount / FPS) + " s", 409 | str(allObjectsDetected.get('fedex', 0)), 410 | str(amstelFrameCount), 411 | "{:.4f}".format(amstelFrameCount / FPS) + " s", 412 | str(allObjectsDetected.get('amstel', 0)), 413 | str(unicreditFrameCount), 414 | "{:.4f}".format(unicreditFrameCount / FPS) + " s", 415 | str(allObjectsDetected.get('unicredit', 0)), 416 | str(adidasFrameCount), 417 | "{:.4f}".format(adidasFrameCount / FPS) + " s", 418 | str(allObjectsDetected.get('adidas', 0)), 419 | str(hankookFrameCount), 420 | "{:.4f}".format(hankookFrameCount / FPS) + " s", 421 | str(allObjectsDetected.get('hankook', 0)), 422 | str(enterpriseFrameCount), 423 | "{:.4f}".format(enterpriseFrameCount / FPS) + " s", 424 | str(allObjectsDetected.get('enterprise', 0))) 425 | print report 426 | 427 | 428 | if __name__ == '__main__': 429 | frameNumber = 0 430 | script_start_time = time.time() 431 | 432 | project_output = OUTPUT_FILE 433 | 434 | clip1 = VideoFileClip(INPUT_FILE) 435 | white_clip = clip1.fl_image(detect_logos_full_img) 436 | white_clip.write_videofile(project_output, audio=False) 437 | 438 | #IMAGE_FILE = '/Users/kris/Downloads/football2-resized2/resized/184196_01_01.png' 439 | #IMAGE_FILE = '/Users/kris/Downloads/football/frame21238.jpg' 440 | #image = cv2.imread(IMAGE_FILE); 441 | #img = detect_logos_full_img(image) 442 | #cv2.imwrite('frame2.jpg',img) 443 | print 'Video took %f seconds.' % (time.time() - script_start_time) 444 | print 'Counted logos: ' + str(countedLogos) 445 | print 'Processed frames: ' + str(frameNumber) 446 | produceReport(countedLogos, clip1.duration) 447 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # detectnet-tests 2 | Python scripts and other resources for tesing DetectNet on Nvidia DIGITS 3 | -------------------------------------------------------------------------------- /convert-to-kitti.py: -------------------------------------------------------------------------------- 1 | import os, errno 2 | import xml.etree.ElementTree 3 | import random 4 | from sklearn.cross_validation import train_test_split 5 | import numpy 6 | from shutil import copyfile 7 | import shutil 8 | 9 | ## Remove underscores: find football2-resized3 -type f -name "*_*" -exec sh -c 'd=$(dirname "$1"); mv "$1" "$d/$(basename "$1" | tr -d _)"' sh {} \; 10 | 11 | sourceDirectory='/Users/kris/Downloads/football2-resized2/resized' 12 | destinationDirectory='/Users/kris/Downloads/football2-resized3' 13 | 14 | # remove the dir First 15 | if os.path.exists(destinationDirectory): 16 | shutil.rmtree(destinationDirectory) 17 | 18 | if not os.path.exists(destinationDirectory): 19 | os.makedirs(destinationDirectory) 20 | os.makedirs(destinationDirectory + '/train') 21 | os.makedirs(destinationDirectory + '/train/images') 22 | os.makedirs(destinationDirectory + '/train/labels') 23 | os.makedirs(destinationDirectory + '/val') 24 | os.makedirs(destinationDirectory + '/val/images') 25 | os.makedirs(destinationDirectory + '/val/labels') 26 | 27 | def split(): 28 | data = numpy.array(os.listdir(sourceDirectory + '/annotations')) 29 | train ,test = train_test_split(data,test_size=0.2) 30 | return train, test 31 | 32 | def processSingleFile(filename): 33 | lines = [] 34 | if filename.endswith(".xml"): 35 | print("Processing: {0}".format(os.path.join(sourceDirectory + '/annotations', filename))) 36 | e = xml.etree.ElementTree.parse(os.path.join(sourceDirectory + '/annotations', filename)).getroot() 37 | name, xmin, ymin, xmax, ymax = '','','','','' 38 | for elem in e.iterfind('object'): 39 | for oel in elem.iter(): 40 | if(oel.tag == 'name'): 41 | name = oel.text.strip() 42 | elif(oel.tag == 'xmin'): 43 | xmin = oel.text.strip() 44 | elif(oel.tag == 'ymin'): 45 | ymin = oel.text.strip() 46 | elif(oel.tag == 'xmax'): 47 | xmax = oel.text.strip() 48 | elif(oel.tag == 'ymax'): 49 | ymax = oel.text.strip() 50 | else: 51 | continue 52 | # Example: Car 0.00 0 3.11 801.18 169.78 848.06 186.03 1.41 1.59 4.01 19.44 1.17 65.54 -2.89 53 | # name 0.00 0 0.00 xmin ymax xmax ymin 0.00 0.00 0.00 0.00 0.00 0.00 0.00 54 | lines.append(name + ' ' + '0.00 0 0.00 ' + xmin + ' ' + ymin + ' ' + xmax + ' ' + ymax + ' 0.00 0.00 0.00 0.00 0.00 0.00 0.00') 55 | return lines 56 | 57 | 58 | def convertToKitti(labelsDir, imagesDir, files): 59 | for f in files: 60 | pre, ext = os.path.splitext(f) 61 | # create label file 62 | lines = processSingleFile(f) 63 | kitti_labels = pre + '.txt' 64 | with open(os.path.join(destinationDirectory + labelsDir, kitti_labels), "w") as text_file: 65 | for item in lines: 66 | text_file.write("%s\n" % item) 67 | # copy the image file 68 | imgFile = pre + '.png' 69 | srcFile = os.path.join(sourceDirectory, imgFile) 70 | dstFile = os.path.join(destinationDirectory + imagesDir, imgFile) 71 | copyfile(srcFile, dstFile) 72 | 73 | print("splitting into traint and test sets: ") 74 | train, test = split() 75 | 76 | # Convert test subset 77 | convertToKitti('/val/labels', '/val/images', test) 78 | 79 | # Convert train subset 80 | convertToKitti('/train/labels', '/train/images', train) 81 | 82 | print('Done!') 83 | -------------------------------------------------------------------------------- /deploy.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_shape { 3 | dim: 1 4 | dim: 3 5 | dim: 720 6 | dim: 1280 7 | } 8 | layer { 9 | name: "deploy_transform" 10 | type: "Power" 11 | bottom: "data" 12 | top: "transformed_data" 13 | power_param { 14 | shift: -127.0 15 | } 16 | } 17 | layer { 18 | name: "conv1/7x7_s2" 19 | type: "Convolution" 20 | bottom: "transformed_data" 21 | top: "conv1/7x7_s2" 22 | param { 23 | lr_mult: 1.0 24 | decay_mult: 1.0 25 | } 26 | param { 27 | lr_mult: 2.0 28 | decay_mult: 0.0 29 | } 30 | convolution_param { 31 | num_output: 64 32 | pad: 3 33 | kernel_size: 7 34 | stride: 2 35 | weight_filler { 36 | type: "xavier" 37 | std: 0.1 38 | } 39 | bias_filler { 40 | type: "constant" 41 | value: 0.2 42 | } 43 | } 44 | } 45 | layer { 46 | name: "conv1/relu_7x7" 47 | type: "ReLU" 48 | bottom: "conv1/7x7_s2" 49 | top: "conv1/7x7_s2" 50 | } 51 | layer { 52 | name: "pool1/3x3_s2" 53 | type: "Pooling" 54 | bottom: "conv1/7x7_s2" 55 | top: "pool1/3x3_s2" 56 | pooling_param { 57 | pool: MAX 58 | kernel_size: 3 59 | stride: 2 60 | } 61 | } 62 | layer { 63 | name: "pool1/norm1" 64 | type: "LRN" 65 | bottom: "pool1/3x3_s2" 66 | top: "pool1/norm1" 67 | lrn_param { 68 | local_size: 5 69 | alpha: 0.0001 70 | beta: 0.75 71 | } 72 | } 73 | layer { 74 | name: "conv2/3x3_reduce" 75 | type: "Convolution" 76 | bottom: "pool1/norm1" 77 | top: "conv2/3x3_reduce" 78 | param { 79 | lr_mult: 1.0 80 | decay_mult: 1.0 81 | } 82 | param { 83 | lr_mult: 2.0 84 | decay_mult: 0.0 85 | } 86 | convolution_param { 87 | num_output: 64 88 | kernel_size: 1 89 | weight_filler { 90 | type: "xavier" 91 | std: 0.1 92 | } 93 | bias_filler { 94 | type: "constant" 95 | value: 0.2 96 | } 97 | } 98 | } 99 | layer { 100 | name: "conv2/relu_3x3_reduce" 101 | type: "ReLU" 102 | bottom: "conv2/3x3_reduce" 103 | top: "conv2/3x3_reduce" 104 | } 105 | layer { 106 | name: "conv2/3x3" 107 | type: "Convolution" 108 | bottom: "conv2/3x3_reduce" 109 | top: "conv2/3x3" 110 | param { 111 | lr_mult: 1.0 112 | decay_mult: 1.0 113 | } 114 | param { 115 | lr_mult: 2.0 116 | decay_mult: 0.0 117 | } 118 | convolution_param { 119 | num_output: 192 120 | pad: 1 121 | kernel_size: 3 122 | weight_filler { 123 | type: "xavier" 124 | std: 0.03 125 | } 126 | bias_filler { 127 | type: "constant" 128 | value: 0.2 129 | } 130 | } 131 | } 132 | layer { 133 | name: "conv2/relu_3x3" 134 | type: "ReLU" 135 | bottom: "conv2/3x3" 136 | top: "conv2/3x3" 137 | } 138 | layer { 139 | name: "conv2/norm2" 140 | type: "LRN" 141 | bottom: "conv2/3x3" 142 | top: "conv2/norm2" 143 | lrn_param { 144 | local_size: 5 145 | alpha: 0.0001 146 | beta: 0.75 147 | } 148 | } 149 | layer { 150 | name: "pool2/3x3_s2" 151 | type: "Pooling" 152 | bottom: "conv2/norm2" 153 | top: "pool2/3x3_s2" 154 | pooling_param { 155 | pool: MAX 156 | kernel_size: 3 157 | stride: 2 158 | } 159 | } 160 | layer { 161 | name: "inception_3a/1x1" 162 | type: "Convolution" 163 | bottom: "pool2/3x3_s2" 164 | top: "inception_3a/1x1" 165 | param { 166 | lr_mult: 1.0 167 | decay_mult: 1.0 168 | } 169 | param { 170 | lr_mult: 2.0 171 | decay_mult: 0.0 172 | } 173 | convolution_param { 174 | num_output: 64 175 | kernel_size: 1 176 | weight_filler { 177 | type: "xavier" 178 | std: 0.03 179 | } 180 | bias_filler { 181 | type: "constant" 182 | value: 0.2 183 | } 184 | } 185 | } 186 | layer { 187 | name: "inception_3a/relu_1x1" 188 | type: "ReLU" 189 | bottom: "inception_3a/1x1" 190 | top: "inception_3a/1x1" 191 | } 192 | layer { 193 | name: "inception_3a/3x3_reduce" 194 | type: "Convolution" 195 | bottom: "pool2/3x3_s2" 196 | top: "inception_3a/3x3_reduce" 197 | param { 198 | lr_mult: 1.0 199 | decay_mult: 1.0 200 | } 201 | param { 202 | lr_mult: 2.0 203 | decay_mult: 0.0 204 | } 205 | convolution_param { 206 | num_output: 96 207 | kernel_size: 1 208 | weight_filler { 209 | type: "xavier" 210 | std: 0.09 211 | } 212 | bias_filler { 213 | type: "constant" 214 | value: 0.2 215 | } 216 | } 217 | } 218 | layer { 219 | name: "inception_3a/relu_3x3_reduce" 220 | type: "ReLU" 221 | bottom: "inception_3a/3x3_reduce" 222 | top: "inception_3a/3x3_reduce" 223 | } 224 | layer { 225 | name: "inception_3a/3x3" 226 | type: "Convolution" 227 | bottom: "inception_3a/3x3_reduce" 228 | top: "inception_3a/3x3" 229 | param { 230 | lr_mult: 1.0 231 | decay_mult: 1.0 232 | } 233 | param { 234 | lr_mult: 2.0 235 | decay_mult: 0.0 236 | } 237 | convolution_param { 238 | num_output: 128 239 | pad: 1 240 | kernel_size: 3 241 | weight_filler { 242 | type: "xavier" 243 | std: 0.03 244 | } 245 | bias_filler { 246 | type: "constant" 247 | value: 0.2 248 | } 249 | } 250 | } 251 | layer { 252 | name: "inception_3a/relu_3x3" 253 | type: "ReLU" 254 | bottom: "inception_3a/3x3" 255 | top: "inception_3a/3x3" 256 | } 257 | layer { 258 | name: "inception_3a/5x5_reduce" 259 | type: "Convolution" 260 | bottom: "pool2/3x3_s2" 261 | top: "inception_3a/5x5_reduce" 262 | param { 263 | lr_mult: 1.0 264 | decay_mult: 1.0 265 | } 266 | param { 267 | lr_mult: 2.0 268 | decay_mult: 0.0 269 | } 270 | convolution_param { 271 | num_output: 16 272 | kernel_size: 1 273 | weight_filler { 274 | type: "xavier" 275 | std: 0.2 276 | } 277 | bias_filler { 278 | type: "constant" 279 | value: 0.2 280 | } 281 | } 282 | } 283 | layer { 284 | name: "inception_3a/relu_5x5_reduce" 285 | type: "ReLU" 286 | bottom: "inception_3a/5x5_reduce" 287 | top: "inception_3a/5x5_reduce" 288 | } 289 | layer { 290 | name: "inception_3a/5x5" 291 | type: "Convolution" 292 | bottom: "inception_3a/5x5_reduce" 293 | top: "inception_3a/5x5" 294 | param { 295 | lr_mult: 1.0 296 | decay_mult: 1.0 297 | } 298 | param { 299 | lr_mult: 2.0 300 | decay_mult: 0.0 301 | } 302 | convolution_param { 303 | num_output: 32 304 | pad: 2 305 | kernel_size: 5 306 | weight_filler { 307 | type: "xavier" 308 | std: 0.03 309 | } 310 | bias_filler { 311 | type: "constant" 312 | value: 0.2 313 | } 314 | } 315 | } 316 | layer { 317 | name: "inception_3a/relu_5x5" 318 | type: "ReLU" 319 | bottom: "inception_3a/5x5" 320 | top: "inception_3a/5x5" 321 | } 322 | layer { 323 | name: "inception_3a/pool" 324 | type: "Pooling" 325 | bottom: "pool2/3x3_s2" 326 | top: "inception_3a/pool" 327 | pooling_param { 328 | pool: MAX 329 | kernel_size: 3 330 | stride: 1 331 | pad: 1 332 | } 333 | } 334 | layer { 335 | name: "inception_3a/pool_proj" 336 | type: "Convolution" 337 | bottom: "inception_3a/pool" 338 | top: "inception_3a/pool_proj" 339 | param { 340 | lr_mult: 1.0 341 | decay_mult: 1.0 342 | } 343 | param { 344 | lr_mult: 2.0 345 | decay_mult: 0.0 346 | } 347 | convolution_param { 348 | num_output: 32 349 | kernel_size: 1 350 | weight_filler { 351 | type: "xavier" 352 | std: 0.1 353 | } 354 | bias_filler { 355 | type: "constant" 356 | value: 0.2 357 | } 358 | } 359 | } 360 | layer { 361 | name: "inception_3a/relu_pool_proj" 362 | type: "ReLU" 363 | bottom: "inception_3a/pool_proj" 364 | top: "inception_3a/pool_proj" 365 | } 366 | layer { 367 | name: "inception_3a/output" 368 | type: "Concat" 369 | bottom: "inception_3a/1x1" 370 | bottom: "inception_3a/3x3" 371 | bottom: "inception_3a/5x5" 372 | bottom: "inception_3a/pool_proj" 373 | top: "inception_3a/output" 374 | } 375 | layer { 376 | name: "inception_3b/1x1" 377 | type: "Convolution" 378 | bottom: "inception_3a/output" 379 | top: "inception_3b/1x1" 380 | param { 381 | lr_mult: 1.0 382 | decay_mult: 1.0 383 | } 384 | param { 385 | lr_mult: 2.0 386 | decay_mult: 0.0 387 | } 388 | convolution_param { 389 | num_output: 128 390 | kernel_size: 1 391 | weight_filler { 392 | type: "xavier" 393 | std: 0.03 394 | } 395 | bias_filler { 396 | type: "constant" 397 | value: 0.2 398 | } 399 | } 400 | } 401 | layer { 402 | name: "inception_3b/relu_1x1" 403 | type: "ReLU" 404 | bottom: "inception_3b/1x1" 405 | top: "inception_3b/1x1" 406 | } 407 | layer { 408 | name: "inception_3b/3x3_reduce" 409 | type: "Convolution" 410 | bottom: "inception_3a/output" 411 | top: "inception_3b/3x3_reduce" 412 | param { 413 | lr_mult: 1.0 414 | decay_mult: 1.0 415 | } 416 | param { 417 | lr_mult: 2.0 418 | decay_mult: 0.0 419 | } 420 | convolution_param { 421 | num_output: 128 422 | kernel_size: 1 423 | weight_filler { 424 | type: "xavier" 425 | std: 0.09 426 | } 427 | bias_filler { 428 | type: "constant" 429 | value: 0.2 430 | } 431 | } 432 | } 433 | layer { 434 | name: "inception_3b/relu_3x3_reduce" 435 | type: "ReLU" 436 | bottom: "inception_3b/3x3_reduce" 437 | top: "inception_3b/3x3_reduce" 438 | } 439 | layer { 440 | name: "inception_3b/3x3" 441 | type: "Convolution" 442 | bottom: "inception_3b/3x3_reduce" 443 | top: "inception_3b/3x3" 444 | param { 445 | lr_mult: 1.0 446 | decay_mult: 1.0 447 | } 448 | param { 449 | lr_mult: 2.0 450 | decay_mult: 0.0 451 | } 452 | convolution_param { 453 | num_output: 192 454 | pad: 1 455 | kernel_size: 3 456 | weight_filler { 457 | type: "xavier" 458 | std: 0.03 459 | } 460 | bias_filler { 461 | type: "constant" 462 | value: 0.2 463 | } 464 | } 465 | } 466 | layer { 467 | name: "inception_3b/relu_3x3" 468 | type: "ReLU" 469 | bottom: "inception_3b/3x3" 470 | top: "inception_3b/3x3" 471 | } 472 | layer { 473 | name: "inception_3b/5x5_reduce" 474 | type: "Convolution" 475 | bottom: "inception_3a/output" 476 | top: "inception_3b/5x5_reduce" 477 | param { 478 | lr_mult: 1.0 479 | decay_mult: 1.0 480 | } 481 | param { 482 | lr_mult: 2.0 483 | decay_mult: 0.0 484 | } 485 | convolution_param { 486 | num_output: 32 487 | kernel_size: 1 488 | weight_filler { 489 | type: "xavier" 490 | std: 0.2 491 | } 492 | bias_filler { 493 | type: "constant" 494 | value: 0.2 495 | } 496 | } 497 | } 498 | layer { 499 | name: "inception_3b/relu_5x5_reduce" 500 | type: "ReLU" 501 | bottom: "inception_3b/5x5_reduce" 502 | top: "inception_3b/5x5_reduce" 503 | } 504 | layer { 505 | name: "inception_3b/5x5" 506 | type: "Convolution" 507 | bottom: "inception_3b/5x5_reduce" 508 | top: "inception_3b/5x5" 509 | param { 510 | lr_mult: 1.0 511 | decay_mult: 1.0 512 | } 513 | param { 514 | lr_mult: 2.0 515 | decay_mult: 0.0 516 | } 517 | convolution_param { 518 | num_output: 96 519 | pad: 2 520 | kernel_size: 5 521 | weight_filler { 522 | type: "xavier" 523 | std: 0.03 524 | } 525 | bias_filler { 526 | type: "constant" 527 | value: 0.2 528 | } 529 | } 530 | } 531 | layer { 532 | name: "inception_3b/relu_5x5" 533 | type: "ReLU" 534 | bottom: "inception_3b/5x5" 535 | top: "inception_3b/5x5" 536 | } 537 | layer { 538 | name: "inception_3b/pool" 539 | type: "Pooling" 540 | bottom: "inception_3a/output" 541 | top: "inception_3b/pool" 542 | pooling_param { 543 | pool: MAX 544 | kernel_size: 3 545 | stride: 1 546 | pad: 1 547 | } 548 | } 549 | layer { 550 | name: "inception_3b/pool_proj" 551 | type: "Convolution" 552 | bottom: "inception_3b/pool" 553 | top: "inception_3b/pool_proj" 554 | param { 555 | lr_mult: 1.0 556 | decay_mult: 1.0 557 | } 558 | param { 559 | lr_mult: 2.0 560 | decay_mult: 0.0 561 | } 562 | convolution_param { 563 | num_output: 64 564 | kernel_size: 1 565 | weight_filler { 566 | type: "xavier" 567 | std: 0.1 568 | } 569 | bias_filler { 570 | type: "constant" 571 | value: 0.2 572 | } 573 | } 574 | } 575 | layer { 576 | name: "inception_3b/relu_pool_proj" 577 | type: "ReLU" 578 | bottom: "inception_3b/pool_proj" 579 | top: "inception_3b/pool_proj" 580 | } 581 | layer { 582 | name: "inception_3b/output" 583 | type: "Concat" 584 | bottom: "inception_3b/1x1" 585 | bottom: "inception_3b/3x3" 586 | bottom: "inception_3b/5x5" 587 | bottom: "inception_3b/pool_proj" 588 | top: "inception_3b/output" 589 | } 590 | layer { 591 | name: "pool3/3x3_s2" 592 | type: "Pooling" 593 | bottom: "inception_3b/output" 594 | top: "pool3/3x3_s2" 595 | pooling_param { 596 | pool: MAX 597 | kernel_size: 3 598 | stride: 2 599 | } 600 | } 601 | layer { 602 | name: "inception_4a/1x1" 603 | type: "Convolution" 604 | bottom: "pool3/3x3_s2" 605 | top: "inception_4a/1x1" 606 | param { 607 | lr_mult: 1.0 608 | decay_mult: 1.0 609 | } 610 | param { 611 | lr_mult: 2.0 612 | decay_mult: 0.0 613 | } 614 | convolution_param { 615 | num_output: 192 616 | kernel_size: 1 617 | weight_filler { 618 | type: "xavier" 619 | std: 0.03 620 | } 621 | bias_filler { 622 | type: "constant" 623 | value: 0.2 624 | } 625 | } 626 | } 627 | layer { 628 | name: "inception_4a/relu_1x1" 629 | type: "ReLU" 630 | bottom: "inception_4a/1x1" 631 | top: "inception_4a/1x1" 632 | } 633 | layer { 634 | name: "inception_4a/3x3_reduce" 635 | type: "Convolution" 636 | bottom: "pool3/3x3_s2" 637 | top: "inception_4a/3x3_reduce" 638 | param { 639 | lr_mult: 1.0 640 | decay_mult: 1.0 641 | } 642 | param { 643 | lr_mult: 2.0 644 | decay_mult: 0.0 645 | } 646 | convolution_param { 647 | num_output: 96 648 | kernel_size: 1 649 | weight_filler { 650 | type: "xavier" 651 | std: 0.09 652 | } 653 | bias_filler { 654 | type: "constant" 655 | value: 0.2 656 | } 657 | } 658 | } 659 | layer { 660 | name: "inception_4a/relu_3x3_reduce" 661 | type: "ReLU" 662 | bottom: "inception_4a/3x3_reduce" 663 | top: "inception_4a/3x3_reduce" 664 | } 665 | layer { 666 | name: "inception_4a/3x3" 667 | type: "Convolution" 668 | bottom: "inception_4a/3x3_reduce" 669 | top: "inception_4a/3x3" 670 | param { 671 | lr_mult: 1.0 672 | decay_mult: 1.0 673 | } 674 | param { 675 | lr_mult: 2.0 676 | decay_mult: 0.0 677 | } 678 | convolution_param { 679 | num_output: 208 680 | pad: 1 681 | kernel_size: 3 682 | weight_filler { 683 | type: "xavier" 684 | std: 0.03 685 | } 686 | bias_filler { 687 | type: "constant" 688 | value: 0.2 689 | } 690 | } 691 | } 692 | layer { 693 | name: "inception_4a/relu_3x3" 694 | type: "ReLU" 695 | bottom: "inception_4a/3x3" 696 | top: "inception_4a/3x3" 697 | } 698 | layer { 699 | name: "inception_4a/5x5_reduce" 700 | type: "Convolution" 701 | bottom: "pool3/3x3_s2" 702 | top: "inception_4a/5x5_reduce" 703 | param { 704 | lr_mult: 1.0 705 | decay_mult: 1.0 706 | } 707 | param { 708 | lr_mult: 2.0 709 | decay_mult: 0.0 710 | } 711 | convolution_param { 712 | num_output: 16 713 | kernel_size: 1 714 | weight_filler { 715 | type: "xavier" 716 | std: 0.2 717 | } 718 | bias_filler { 719 | type: "constant" 720 | value: 0.2 721 | } 722 | } 723 | } 724 | layer { 725 | name: "inception_4a/relu_5x5_reduce" 726 | type: "ReLU" 727 | bottom: "inception_4a/5x5_reduce" 728 | top: "inception_4a/5x5_reduce" 729 | } 730 | layer { 731 | name: "inception_4a/5x5" 732 | type: "Convolution" 733 | bottom: "inception_4a/5x5_reduce" 734 | top: "inception_4a/5x5" 735 | param { 736 | lr_mult: 1.0 737 | decay_mult: 1.0 738 | } 739 | param { 740 | lr_mult: 2.0 741 | decay_mult: 0.0 742 | } 743 | convolution_param { 744 | num_output: 48 745 | pad: 2 746 | kernel_size: 5 747 | weight_filler { 748 | type: "xavier" 749 | std: 0.03 750 | } 751 | bias_filler { 752 | type: "constant" 753 | value: 0.2 754 | } 755 | } 756 | } 757 | layer { 758 | name: "inception_4a/relu_5x5" 759 | type: "ReLU" 760 | bottom: "inception_4a/5x5" 761 | top: "inception_4a/5x5" 762 | } 763 | layer { 764 | name: "inception_4a/pool" 765 | type: "Pooling" 766 | bottom: "pool3/3x3_s2" 767 | top: "inception_4a/pool" 768 | pooling_param { 769 | pool: MAX 770 | kernel_size: 3 771 | stride: 1 772 | pad: 1 773 | } 774 | } 775 | layer { 776 | name: "inception_4a/pool_proj" 777 | type: "Convolution" 778 | bottom: "inception_4a/pool" 779 | top: "inception_4a/pool_proj" 780 | param { 781 | lr_mult: 1.0 782 | decay_mult: 1.0 783 | } 784 | param { 785 | lr_mult: 2.0 786 | decay_mult: 0.0 787 | } 788 | convolution_param { 789 | num_output: 64 790 | kernel_size: 1 791 | weight_filler { 792 | type: "xavier" 793 | std: 0.1 794 | } 795 | bias_filler { 796 | type: "constant" 797 | value: 0.2 798 | } 799 | } 800 | } 801 | layer { 802 | name: "inception_4a/relu_pool_proj" 803 | type: "ReLU" 804 | bottom: "inception_4a/pool_proj" 805 | top: "inception_4a/pool_proj" 806 | } 807 | layer { 808 | name: "inception_4a/output" 809 | type: "Concat" 810 | bottom: "inception_4a/1x1" 811 | bottom: "inception_4a/3x3" 812 | bottom: "inception_4a/5x5" 813 | bottom: "inception_4a/pool_proj" 814 | top: "inception_4a/output" 815 | } 816 | layer { 817 | name: "inception_4b/1x1" 818 | type: "Convolution" 819 | bottom: "inception_4a/output" 820 | top: "inception_4b/1x1" 821 | param { 822 | lr_mult: 1.0 823 | decay_mult: 1.0 824 | } 825 | param { 826 | lr_mult: 2.0 827 | decay_mult: 0.0 828 | } 829 | convolution_param { 830 | num_output: 160 831 | kernel_size: 1 832 | weight_filler { 833 | type: "xavier" 834 | std: 0.03 835 | } 836 | bias_filler { 837 | type: "constant" 838 | value: 0.2 839 | } 840 | } 841 | } 842 | layer { 843 | name: "inception_4b/relu_1x1" 844 | type: "ReLU" 845 | bottom: "inception_4b/1x1" 846 | top: "inception_4b/1x1" 847 | } 848 | layer { 849 | name: "inception_4b/3x3_reduce" 850 | type: "Convolution" 851 | bottom: "inception_4a/output" 852 | top: "inception_4b/3x3_reduce" 853 | param { 854 | lr_mult: 1.0 855 | decay_mult: 1.0 856 | } 857 | param { 858 | lr_mult: 2.0 859 | decay_mult: 0.0 860 | } 861 | convolution_param { 862 | num_output: 112 863 | kernel_size: 1 864 | weight_filler { 865 | type: "xavier" 866 | std: 0.09 867 | } 868 | bias_filler { 869 | type: "constant" 870 | value: 0.2 871 | } 872 | } 873 | } 874 | layer { 875 | name: "inception_4b/relu_3x3_reduce" 876 | type: "ReLU" 877 | bottom: "inception_4b/3x3_reduce" 878 | top: "inception_4b/3x3_reduce" 879 | } 880 | layer { 881 | name: "inception_4b/3x3" 882 | type: "Convolution" 883 | bottom: "inception_4b/3x3_reduce" 884 | top: "inception_4b/3x3" 885 | param { 886 | lr_mult: 1.0 887 | decay_mult: 1.0 888 | } 889 | param { 890 | lr_mult: 2.0 891 | decay_mult: 0.0 892 | } 893 | convolution_param { 894 | num_output: 224 895 | pad: 1 896 | kernel_size: 3 897 | weight_filler { 898 | type: "xavier" 899 | std: 0.03 900 | } 901 | bias_filler { 902 | type: "constant" 903 | value: 0.2 904 | } 905 | } 906 | } 907 | layer { 908 | name: "inception_4b/relu_3x3" 909 | type: "ReLU" 910 | bottom: "inception_4b/3x3" 911 | top: "inception_4b/3x3" 912 | } 913 | layer { 914 | name: "inception_4b/5x5_reduce" 915 | type: "Convolution" 916 | bottom: "inception_4a/output" 917 | top: "inception_4b/5x5_reduce" 918 | param { 919 | lr_mult: 1.0 920 | decay_mult: 1.0 921 | } 922 | param { 923 | lr_mult: 2.0 924 | decay_mult: 0.0 925 | } 926 | convolution_param { 927 | num_output: 24 928 | kernel_size: 1 929 | weight_filler { 930 | type: "xavier" 931 | std: 0.2 932 | } 933 | bias_filler { 934 | type: "constant" 935 | value: 0.2 936 | } 937 | } 938 | } 939 | layer { 940 | name: "inception_4b/relu_5x5_reduce" 941 | type: "ReLU" 942 | bottom: "inception_4b/5x5_reduce" 943 | top: "inception_4b/5x5_reduce" 944 | } 945 | layer { 946 | name: "inception_4b/5x5" 947 | type: "Convolution" 948 | bottom: "inception_4b/5x5_reduce" 949 | top: "inception_4b/5x5" 950 | param { 951 | lr_mult: 1.0 952 | decay_mult: 1.0 953 | } 954 | param { 955 | lr_mult: 2.0 956 | decay_mult: 0.0 957 | } 958 | convolution_param { 959 | num_output: 64 960 | pad: 2 961 | kernel_size: 5 962 | weight_filler { 963 | type: "xavier" 964 | std: 0.03 965 | } 966 | bias_filler { 967 | type: "constant" 968 | value: 0.2 969 | } 970 | } 971 | } 972 | layer { 973 | name: "inception_4b/relu_5x5" 974 | type: "ReLU" 975 | bottom: "inception_4b/5x5" 976 | top: "inception_4b/5x5" 977 | } 978 | layer { 979 | name: "inception_4b/pool" 980 | type: "Pooling" 981 | bottom: "inception_4a/output" 982 | top: "inception_4b/pool" 983 | pooling_param { 984 | pool: MAX 985 | kernel_size: 3 986 | stride: 1 987 | pad: 1 988 | } 989 | } 990 | layer { 991 | name: "inception_4b/pool_proj" 992 | type: "Convolution" 993 | bottom: "inception_4b/pool" 994 | top: "inception_4b/pool_proj" 995 | param { 996 | lr_mult: 1.0 997 | decay_mult: 1.0 998 | } 999 | param { 1000 | lr_mult: 2.0 1001 | decay_mult: 0.0 1002 | } 1003 | convolution_param { 1004 | num_output: 64 1005 | kernel_size: 1 1006 | weight_filler { 1007 | type: "xavier" 1008 | std: 0.1 1009 | } 1010 | bias_filler { 1011 | type: "constant" 1012 | value: 0.2 1013 | } 1014 | } 1015 | } 1016 | layer { 1017 | name: "inception_4b/relu_pool_proj" 1018 | type: "ReLU" 1019 | bottom: "inception_4b/pool_proj" 1020 | top: "inception_4b/pool_proj" 1021 | } 1022 | layer { 1023 | name: "inception_4b/output" 1024 | type: "Concat" 1025 | bottom: "inception_4b/1x1" 1026 | bottom: "inception_4b/3x3" 1027 | bottom: "inception_4b/5x5" 1028 | bottom: "inception_4b/pool_proj" 1029 | top: "inception_4b/output" 1030 | } 1031 | layer { 1032 | name: "inception_4c/1x1" 1033 | type: "Convolution" 1034 | bottom: "inception_4b/output" 1035 | top: "inception_4c/1x1" 1036 | param { 1037 | lr_mult: 1.0 1038 | decay_mult: 1.0 1039 | } 1040 | param { 1041 | lr_mult: 2.0 1042 | decay_mult: 0.0 1043 | } 1044 | convolution_param { 1045 | num_output: 128 1046 | kernel_size: 1 1047 | weight_filler { 1048 | type: "xavier" 1049 | std: 0.03 1050 | } 1051 | bias_filler { 1052 | type: "constant" 1053 | value: 0.2 1054 | } 1055 | } 1056 | } 1057 | layer { 1058 | name: "inception_4c/relu_1x1" 1059 | type: "ReLU" 1060 | bottom: "inception_4c/1x1" 1061 | top: "inception_4c/1x1" 1062 | } 1063 | layer { 1064 | name: "inception_4c/3x3_reduce" 1065 | type: "Convolution" 1066 | bottom: "inception_4b/output" 1067 | top: "inception_4c/3x3_reduce" 1068 | param { 1069 | lr_mult: 1.0 1070 | decay_mult: 1.0 1071 | } 1072 | param { 1073 | lr_mult: 2.0 1074 | decay_mult: 0.0 1075 | } 1076 | convolution_param { 1077 | num_output: 128 1078 | kernel_size: 1 1079 | weight_filler { 1080 | type: "xavier" 1081 | std: 0.09 1082 | } 1083 | bias_filler { 1084 | type: "constant" 1085 | value: 0.2 1086 | } 1087 | } 1088 | } 1089 | layer { 1090 | name: "inception_4c/relu_3x3_reduce" 1091 | type: "ReLU" 1092 | bottom: "inception_4c/3x3_reduce" 1093 | top: "inception_4c/3x3_reduce" 1094 | } 1095 | layer { 1096 | name: "inception_4c/3x3" 1097 | type: "Convolution" 1098 | bottom: "inception_4c/3x3_reduce" 1099 | top: "inception_4c/3x3" 1100 | param { 1101 | lr_mult: 1.0 1102 | decay_mult: 1.0 1103 | } 1104 | param { 1105 | lr_mult: 2.0 1106 | decay_mult: 0.0 1107 | } 1108 | convolution_param { 1109 | num_output: 256 1110 | pad: 1 1111 | kernel_size: 3 1112 | weight_filler { 1113 | type: "xavier" 1114 | std: 0.03 1115 | } 1116 | bias_filler { 1117 | type: "constant" 1118 | value: 0.2 1119 | } 1120 | } 1121 | } 1122 | layer { 1123 | name: "inception_4c/relu_3x3" 1124 | type: "ReLU" 1125 | bottom: "inception_4c/3x3" 1126 | top: "inception_4c/3x3" 1127 | } 1128 | layer { 1129 | name: "inception_4c/5x5_reduce" 1130 | type: "Convolution" 1131 | bottom: "inception_4b/output" 1132 | top: "inception_4c/5x5_reduce" 1133 | param { 1134 | lr_mult: 1.0 1135 | decay_mult: 1.0 1136 | } 1137 | param { 1138 | lr_mult: 2.0 1139 | decay_mult: 0.0 1140 | } 1141 | convolution_param { 1142 | num_output: 24 1143 | kernel_size: 1 1144 | weight_filler { 1145 | type: "xavier" 1146 | std: 0.2 1147 | } 1148 | bias_filler { 1149 | type: "constant" 1150 | value: 0.2 1151 | } 1152 | } 1153 | } 1154 | layer { 1155 | name: "inception_4c/relu_5x5_reduce" 1156 | type: "ReLU" 1157 | bottom: "inception_4c/5x5_reduce" 1158 | top: "inception_4c/5x5_reduce" 1159 | } 1160 | layer { 1161 | name: "inception_4c/5x5" 1162 | type: "Convolution" 1163 | bottom: "inception_4c/5x5_reduce" 1164 | top: "inception_4c/5x5" 1165 | param { 1166 | lr_mult: 1.0 1167 | decay_mult: 1.0 1168 | } 1169 | param { 1170 | lr_mult: 2.0 1171 | decay_mult: 0.0 1172 | } 1173 | convolution_param { 1174 | num_output: 64 1175 | pad: 2 1176 | kernel_size: 5 1177 | weight_filler { 1178 | type: "xavier" 1179 | std: 0.03 1180 | } 1181 | bias_filler { 1182 | type: "constant" 1183 | value: 0.2 1184 | } 1185 | } 1186 | } 1187 | layer { 1188 | name: "inception_4c/relu_5x5" 1189 | type: "ReLU" 1190 | bottom: "inception_4c/5x5" 1191 | top: "inception_4c/5x5" 1192 | } 1193 | layer { 1194 | name: "inception_4c/pool" 1195 | type: "Pooling" 1196 | bottom: "inception_4b/output" 1197 | top: "inception_4c/pool" 1198 | pooling_param { 1199 | pool: MAX 1200 | kernel_size: 3 1201 | stride: 1 1202 | pad: 1 1203 | } 1204 | } 1205 | layer { 1206 | name: "inception_4c/pool_proj" 1207 | type: "Convolution" 1208 | bottom: "inception_4c/pool" 1209 | top: "inception_4c/pool_proj" 1210 | param { 1211 | lr_mult: 1.0 1212 | decay_mult: 1.0 1213 | } 1214 | param { 1215 | lr_mult: 2.0 1216 | decay_mult: 0.0 1217 | } 1218 | convolution_param { 1219 | num_output: 64 1220 | kernel_size: 1 1221 | weight_filler { 1222 | type: "xavier" 1223 | std: 0.1 1224 | } 1225 | bias_filler { 1226 | type: "constant" 1227 | value: 0.2 1228 | } 1229 | } 1230 | } 1231 | layer { 1232 | name: "inception_4c/relu_pool_proj" 1233 | type: "ReLU" 1234 | bottom: "inception_4c/pool_proj" 1235 | top: "inception_4c/pool_proj" 1236 | } 1237 | layer { 1238 | name: "inception_4c/output" 1239 | type: "Concat" 1240 | bottom: "inception_4c/1x1" 1241 | bottom: "inception_4c/3x3" 1242 | bottom: "inception_4c/5x5" 1243 | bottom: "inception_4c/pool_proj" 1244 | top: "inception_4c/output" 1245 | } 1246 | layer { 1247 | name: "inception_4d/1x1" 1248 | type: "Convolution" 1249 | bottom: "inception_4c/output" 1250 | top: "inception_4d/1x1" 1251 | param { 1252 | lr_mult: 1.0 1253 | decay_mult: 1.0 1254 | } 1255 | param { 1256 | lr_mult: 2.0 1257 | decay_mult: 0.0 1258 | } 1259 | convolution_param { 1260 | num_output: 112 1261 | kernel_size: 1 1262 | weight_filler { 1263 | type: "xavier" 1264 | std: 0.1 1265 | } 1266 | bias_filler { 1267 | type: "constant" 1268 | value: 0.2 1269 | } 1270 | } 1271 | } 1272 | layer { 1273 | name: "inception_4d/relu_1x1" 1274 | type: "ReLU" 1275 | bottom: "inception_4d/1x1" 1276 | top: "inception_4d/1x1" 1277 | } 1278 | layer { 1279 | name: "inception_4d/3x3_reduce" 1280 | type: "Convolution" 1281 | bottom: "inception_4c/output" 1282 | top: "inception_4d/3x3_reduce" 1283 | param { 1284 | lr_mult: 1.0 1285 | decay_mult: 1.0 1286 | } 1287 | param { 1288 | lr_mult: 2.0 1289 | decay_mult: 0.0 1290 | } 1291 | convolution_param { 1292 | num_output: 144 1293 | kernel_size: 1 1294 | weight_filler { 1295 | type: "xavier" 1296 | std: 0.1 1297 | } 1298 | bias_filler { 1299 | type: "constant" 1300 | value: 0.2 1301 | } 1302 | } 1303 | } 1304 | layer { 1305 | name: "inception_4d/relu_3x3_reduce" 1306 | type: "ReLU" 1307 | bottom: "inception_4d/3x3_reduce" 1308 | top: "inception_4d/3x3_reduce" 1309 | } 1310 | layer { 1311 | name: "inception_4d/3x3" 1312 | type: "Convolution" 1313 | bottom: "inception_4d/3x3_reduce" 1314 | top: "inception_4d/3x3" 1315 | param { 1316 | lr_mult: 1.0 1317 | decay_mult: 1.0 1318 | } 1319 | param { 1320 | lr_mult: 2.0 1321 | decay_mult: 0.0 1322 | } 1323 | convolution_param { 1324 | num_output: 288 1325 | pad: 1 1326 | kernel_size: 3 1327 | weight_filler { 1328 | type: "xavier" 1329 | std: 0.1 1330 | } 1331 | bias_filler { 1332 | type: "constant" 1333 | value: 0.2 1334 | } 1335 | } 1336 | } 1337 | layer { 1338 | name: "inception_4d/relu_3x3" 1339 | type: "ReLU" 1340 | bottom: "inception_4d/3x3" 1341 | top: "inception_4d/3x3" 1342 | } 1343 | layer { 1344 | name: "inception_4d/5x5_reduce" 1345 | type: "Convolution" 1346 | bottom: "inception_4c/output" 1347 | top: "inception_4d/5x5_reduce" 1348 | param { 1349 | lr_mult: 1.0 1350 | decay_mult: 1.0 1351 | } 1352 | param { 1353 | lr_mult: 2.0 1354 | decay_mult: 0.0 1355 | } 1356 | convolution_param { 1357 | num_output: 32 1358 | kernel_size: 1 1359 | weight_filler { 1360 | type: "xavier" 1361 | std: 0.1 1362 | } 1363 | bias_filler { 1364 | type: "constant" 1365 | value: 0.2 1366 | } 1367 | } 1368 | } 1369 | layer { 1370 | name: "inception_4d/relu_5x5_reduce" 1371 | type: "ReLU" 1372 | bottom: "inception_4d/5x5_reduce" 1373 | top: "inception_4d/5x5_reduce" 1374 | } 1375 | layer { 1376 | name: "inception_4d/5x5" 1377 | type: "Convolution" 1378 | bottom: "inception_4d/5x5_reduce" 1379 | top: "inception_4d/5x5" 1380 | param { 1381 | lr_mult: 1.0 1382 | decay_mult: 1.0 1383 | } 1384 | param { 1385 | lr_mult: 2.0 1386 | decay_mult: 0.0 1387 | } 1388 | convolution_param { 1389 | num_output: 64 1390 | pad: 2 1391 | kernel_size: 5 1392 | weight_filler { 1393 | type: "xavier" 1394 | std: 0.1 1395 | } 1396 | bias_filler { 1397 | type: "constant" 1398 | value: 0.2 1399 | } 1400 | } 1401 | } 1402 | layer { 1403 | name: "inception_4d/relu_5x5" 1404 | type: "ReLU" 1405 | bottom: "inception_4d/5x5" 1406 | top: "inception_4d/5x5" 1407 | } 1408 | layer { 1409 | name: "inception_4d/pool" 1410 | type: "Pooling" 1411 | bottom: "inception_4c/output" 1412 | top: "inception_4d/pool" 1413 | pooling_param { 1414 | pool: MAX 1415 | kernel_size: 3 1416 | stride: 1 1417 | pad: 1 1418 | } 1419 | } 1420 | layer { 1421 | name: "inception_4d/pool_proj" 1422 | type: "Convolution" 1423 | bottom: "inception_4d/pool" 1424 | top: "inception_4d/pool_proj" 1425 | param { 1426 | lr_mult: 1.0 1427 | decay_mult: 1.0 1428 | } 1429 | param { 1430 | lr_mult: 2.0 1431 | decay_mult: 0.0 1432 | } 1433 | convolution_param { 1434 | num_output: 64 1435 | kernel_size: 1 1436 | weight_filler { 1437 | type: "xavier" 1438 | std: 0.1 1439 | } 1440 | bias_filler { 1441 | type: "constant" 1442 | value: 0.2 1443 | } 1444 | } 1445 | } 1446 | layer { 1447 | name: "inception_4d/relu_pool_proj" 1448 | type: "ReLU" 1449 | bottom: "inception_4d/pool_proj" 1450 | top: "inception_4d/pool_proj" 1451 | } 1452 | layer { 1453 | name: "inception_4d/output" 1454 | type: "Concat" 1455 | bottom: "inception_4d/1x1" 1456 | bottom: "inception_4d/3x3" 1457 | bottom: "inception_4d/5x5" 1458 | bottom: "inception_4d/pool_proj" 1459 | top: "inception_4d/output" 1460 | } 1461 | layer { 1462 | name: "inception_4e/1x1" 1463 | type: "Convolution" 1464 | bottom: "inception_4d/output" 1465 | top: "inception_4e/1x1" 1466 | param { 1467 | lr_mult: 1.0 1468 | decay_mult: 1.0 1469 | } 1470 | param { 1471 | lr_mult: 2.0 1472 | decay_mult: 0.0 1473 | } 1474 | convolution_param { 1475 | num_output: 256 1476 | kernel_size: 1 1477 | weight_filler { 1478 | type: "xavier" 1479 | std: 0.03 1480 | } 1481 | bias_filler { 1482 | type: "constant" 1483 | value: 0.2 1484 | } 1485 | } 1486 | } 1487 | layer { 1488 | name: "inception_4e/relu_1x1" 1489 | type: "ReLU" 1490 | bottom: "inception_4e/1x1" 1491 | top: "inception_4e/1x1" 1492 | } 1493 | layer { 1494 | name: "inception_4e/3x3_reduce" 1495 | type: "Convolution" 1496 | bottom: "inception_4d/output" 1497 | top: "inception_4e/3x3_reduce" 1498 | param { 1499 | lr_mult: 1.0 1500 | decay_mult: 1.0 1501 | } 1502 | param { 1503 | lr_mult: 2.0 1504 | decay_mult: 0.0 1505 | } 1506 | convolution_param { 1507 | num_output: 160 1508 | kernel_size: 1 1509 | weight_filler { 1510 | type: "xavier" 1511 | std: 0.09 1512 | } 1513 | bias_filler { 1514 | type: "constant" 1515 | value: 0.2 1516 | } 1517 | } 1518 | } 1519 | layer { 1520 | name: "inception_4e/relu_3x3_reduce" 1521 | type: "ReLU" 1522 | bottom: "inception_4e/3x3_reduce" 1523 | top: "inception_4e/3x3_reduce" 1524 | } 1525 | layer { 1526 | name: "inception_4e/3x3" 1527 | type: "Convolution" 1528 | bottom: "inception_4e/3x3_reduce" 1529 | top: "inception_4e/3x3" 1530 | param { 1531 | lr_mult: 1.0 1532 | decay_mult: 1.0 1533 | } 1534 | param { 1535 | lr_mult: 2.0 1536 | decay_mult: 0.0 1537 | } 1538 | convolution_param { 1539 | num_output: 320 1540 | pad: 1 1541 | kernel_size: 3 1542 | weight_filler { 1543 | type: "xavier" 1544 | std: 0.03 1545 | } 1546 | bias_filler { 1547 | type: "constant" 1548 | value: 0.2 1549 | } 1550 | } 1551 | } 1552 | layer { 1553 | name: "inception_4e/relu_3x3" 1554 | type: "ReLU" 1555 | bottom: "inception_4e/3x3" 1556 | top: "inception_4e/3x3" 1557 | } 1558 | layer { 1559 | name: "inception_4e/5x5_reduce" 1560 | type: "Convolution" 1561 | bottom: "inception_4d/output" 1562 | top: "inception_4e/5x5_reduce" 1563 | param { 1564 | lr_mult: 1.0 1565 | decay_mult: 1.0 1566 | } 1567 | param { 1568 | lr_mult: 2.0 1569 | decay_mult: 0.0 1570 | } 1571 | convolution_param { 1572 | num_output: 32 1573 | kernel_size: 1 1574 | weight_filler { 1575 | type: "xavier" 1576 | std: 0.2 1577 | } 1578 | bias_filler { 1579 | type: "constant" 1580 | value: 0.2 1581 | } 1582 | } 1583 | } 1584 | layer { 1585 | name: "inception_4e/relu_5x5_reduce" 1586 | type: "ReLU" 1587 | bottom: "inception_4e/5x5_reduce" 1588 | top: "inception_4e/5x5_reduce" 1589 | } 1590 | layer { 1591 | name: "inception_4e/5x5" 1592 | type: "Convolution" 1593 | bottom: "inception_4e/5x5_reduce" 1594 | top: "inception_4e/5x5" 1595 | param { 1596 | lr_mult: 1.0 1597 | decay_mult: 1.0 1598 | } 1599 | param { 1600 | lr_mult: 2.0 1601 | decay_mult: 0.0 1602 | } 1603 | convolution_param { 1604 | num_output: 128 1605 | pad: 2 1606 | kernel_size: 5 1607 | weight_filler { 1608 | type: "xavier" 1609 | std: 0.03 1610 | } 1611 | bias_filler { 1612 | type: "constant" 1613 | value: 0.2 1614 | } 1615 | } 1616 | } 1617 | layer { 1618 | name: "inception_4e/relu_5x5" 1619 | type: "ReLU" 1620 | bottom: "inception_4e/5x5" 1621 | top: "inception_4e/5x5" 1622 | } 1623 | layer { 1624 | name: "inception_4e/pool" 1625 | type: "Pooling" 1626 | bottom: "inception_4d/output" 1627 | top: "inception_4e/pool" 1628 | pooling_param { 1629 | pool: MAX 1630 | kernel_size: 3 1631 | stride: 1 1632 | pad: 1 1633 | } 1634 | } 1635 | layer { 1636 | name: "inception_4e/pool_proj" 1637 | type: "Convolution" 1638 | bottom: "inception_4e/pool" 1639 | top: "inception_4e/pool_proj" 1640 | param { 1641 | lr_mult: 1.0 1642 | decay_mult: 1.0 1643 | } 1644 | param { 1645 | lr_mult: 2.0 1646 | decay_mult: 0.0 1647 | } 1648 | convolution_param { 1649 | num_output: 128 1650 | kernel_size: 1 1651 | weight_filler { 1652 | type: "xavier" 1653 | std: 0.1 1654 | } 1655 | bias_filler { 1656 | type: "constant" 1657 | value: 0.2 1658 | } 1659 | } 1660 | } 1661 | layer { 1662 | name: "inception_4e/relu_pool_proj" 1663 | type: "ReLU" 1664 | bottom: "inception_4e/pool_proj" 1665 | top: "inception_4e/pool_proj" 1666 | } 1667 | layer { 1668 | name: "inception_4e/output" 1669 | type: "Concat" 1670 | bottom: "inception_4e/1x1" 1671 | bottom: "inception_4e/3x3" 1672 | bottom: "inception_4e/5x5" 1673 | bottom: "inception_4e/pool_proj" 1674 | top: "inception_4e/output" 1675 | } 1676 | layer { 1677 | name: "inception_5a/1x1" 1678 | type: "Convolution" 1679 | bottom: "inception_4e/output" 1680 | top: "inception_5a/1x1" 1681 | param { 1682 | lr_mult: 1.0 1683 | decay_mult: 1.0 1684 | } 1685 | param { 1686 | lr_mult: 2.0 1687 | decay_mult: 0.0 1688 | } 1689 | convolution_param { 1690 | num_output: 256 1691 | kernel_size: 1 1692 | weight_filler { 1693 | type: "xavier" 1694 | std: 0.03 1695 | } 1696 | bias_filler { 1697 | type: "constant" 1698 | value: 0.2 1699 | } 1700 | } 1701 | } 1702 | layer { 1703 | name: "inception_5a/relu_1x1" 1704 | type: "ReLU" 1705 | bottom: "inception_5a/1x1" 1706 | top: "inception_5a/1x1" 1707 | } 1708 | layer { 1709 | name: "inception_5a/3x3_reduce" 1710 | type: "Convolution" 1711 | bottom: "inception_4e/output" 1712 | top: "inception_5a/3x3_reduce" 1713 | param { 1714 | lr_mult: 1.0 1715 | decay_mult: 1.0 1716 | } 1717 | param { 1718 | lr_mult: 2.0 1719 | decay_mult: 0.0 1720 | } 1721 | convolution_param { 1722 | num_output: 160 1723 | kernel_size: 1 1724 | weight_filler { 1725 | type: "xavier" 1726 | std: 0.09 1727 | } 1728 | bias_filler { 1729 | type: "constant" 1730 | value: 0.2 1731 | } 1732 | } 1733 | } 1734 | layer { 1735 | name: "inception_5a/relu_3x3_reduce" 1736 | type: "ReLU" 1737 | bottom: "inception_5a/3x3_reduce" 1738 | top: "inception_5a/3x3_reduce" 1739 | } 1740 | layer { 1741 | name: "inception_5a/3x3" 1742 | type: "Convolution" 1743 | bottom: "inception_5a/3x3_reduce" 1744 | top: "inception_5a/3x3" 1745 | param { 1746 | lr_mult: 1.0 1747 | decay_mult: 1.0 1748 | } 1749 | param { 1750 | lr_mult: 2.0 1751 | decay_mult: 0.0 1752 | } 1753 | convolution_param { 1754 | num_output: 320 1755 | pad: 1 1756 | kernel_size: 3 1757 | weight_filler { 1758 | type: "xavier" 1759 | std: 0.03 1760 | } 1761 | bias_filler { 1762 | type: "constant" 1763 | value: 0.2 1764 | } 1765 | } 1766 | } 1767 | layer { 1768 | name: "inception_5a/relu_3x3" 1769 | type: "ReLU" 1770 | bottom: "inception_5a/3x3" 1771 | top: "inception_5a/3x3" 1772 | } 1773 | layer { 1774 | name: "inception_5a/5x5_reduce" 1775 | type: "Convolution" 1776 | bottom: "inception_4e/output" 1777 | top: "inception_5a/5x5_reduce" 1778 | param { 1779 | lr_mult: 1.0 1780 | decay_mult: 1.0 1781 | } 1782 | param { 1783 | lr_mult: 2.0 1784 | decay_mult: 0.0 1785 | } 1786 | convolution_param { 1787 | num_output: 32 1788 | kernel_size: 1 1789 | weight_filler { 1790 | type: "xavier" 1791 | std: 0.2 1792 | } 1793 | bias_filler { 1794 | type: "constant" 1795 | value: 0.2 1796 | } 1797 | } 1798 | } 1799 | layer { 1800 | name: "inception_5a/relu_5x5_reduce" 1801 | type: "ReLU" 1802 | bottom: "inception_5a/5x5_reduce" 1803 | top: "inception_5a/5x5_reduce" 1804 | } 1805 | layer { 1806 | name: "inception_5a/5x5" 1807 | type: "Convolution" 1808 | bottom: "inception_5a/5x5_reduce" 1809 | top: "inception_5a/5x5" 1810 | param { 1811 | lr_mult: 1.0 1812 | decay_mult: 1.0 1813 | } 1814 | param { 1815 | lr_mult: 2.0 1816 | decay_mult: 0.0 1817 | } 1818 | convolution_param { 1819 | num_output: 128 1820 | pad: 2 1821 | kernel_size: 5 1822 | weight_filler { 1823 | type: "xavier" 1824 | std: 0.03 1825 | } 1826 | bias_filler { 1827 | type: "constant" 1828 | value: 0.2 1829 | } 1830 | } 1831 | } 1832 | layer { 1833 | name: "inception_5a/relu_5x5" 1834 | type: "ReLU" 1835 | bottom: "inception_5a/5x5" 1836 | top: "inception_5a/5x5" 1837 | } 1838 | layer { 1839 | name: "inception_5a/pool" 1840 | type: "Pooling" 1841 | bottom: "inception_4e/output" 1842 | top: "inception_5a/pool" 1843 | pooling_param { 1844 | pool: MAX 1845 | kernel_size: 3 1846 | stride: 1 1847 | pad: 1 1848 | } 1849 | } 1850 | layer { 1851 | name: "inception_5a/pool_proj" 1852 | type: "Convolution" 1853 | bottom: "inception_5a/pool" 1854 | top: "inception_5a/pool_proj" 1855 | param { 1856 | lr_mult: 1.0 1857 | decay_mult: 1.0 1858 | } 1859 | param { 1860 | lr_mult: 2.0 1861 | decay_mult: 0.0 1862 | } 1863 | convolution_param { 1864 | num_output: 128 1865 | kernel_size: 1 1866 | weight_filler { 1867 | type: "xavier" 1868 | std: 0.1 1869 | } 1870 | bias_filler { 1871 | type: "constant" 1872 | value: 0.2 1873 | } 1874 | } 1875 | } 1876 | layer { 1877 | name: "inception_5a/relu_pool_proj" 1878 | type: "ReLU" 1879 | bottom: "inception_5a/pool_proj" 1880 | top: "inception_5a/pool_proj" 1881 | } 1882 | layer { 1883 | name: "inception_5a/output" 1884 | type: "Concat" 1885 | bottom: "inception_5a/1x1" 1886 | bottom: "inception_5a/3x3" 1887 | bottom: "inception_5a/5x5" 1888 | bottom: "inception_5a/pool_proj" 1889 | top: "inception_5a/output" 1890 | } 1891 | layer { 1892 | name: "inception_5b/1x1" 1893 | type: "Convolution" 1894 | bottom: "inception_5a/output" 1895 | top: "inception_5b/1x1" 1896 | param { 1897 | lr_mult: 1.0 1898 | decay_mult: 1.0 1899 | } 1900 | param { 1901 | lr_mult: 2.0 1902 | decay_mult: 0.0 1903 | } 1904 | convolution_param { 1905 | num_output: 720 1906 | kernel_size: 1 1907 | weight_filler { 1908 | type: "xavier" 1909 | std: 0.1 1910 | } 1911 | bias_filler { 1912 | type: "constant" 1913 | value: 0.2 1914 | } 1915 | } 1916 | } 1917 | layer { 1918 | name: "inception_5b/relu_1x1" 1919 | type: "ReLU" 1920 | bottom: "inception_5b/1x1" 1921 | top: "inception_5b/1x1" 1922 | } 1923 | layer { 1924 | name: "inception_5b/3x3_reduce" 1925 | type: "Convolution" 1926 | bottom: "inception_5a/output" 1927 | top: "inception_5b/3x3_reduce" 1928 | param { 1929 | lr_mult: 1.0 1930 | decay_mult: 1.0 1931 | } 1932 | param { 1933 | lr_mult: 1.0 1934 | decay_mult: 0.0 1935 | } 1936 | convolution_param { 1937 | num_output: 192 1938 | kernel_size: 1 1939 | weight_filler { 1940 | type: "xavier" 1941 | std: 0.1 1942 | } 1943 | bias_filler { 1944 | type: "constant" 1945 | value: 0.2 1946 | } 1947 | } 1948 | } 1949 | layer { 1950 | name: "inception_5b/relu_3x3_reduce" 1951 | type: "ReLU" 1952 | bottom: "inception_5b/3x3_reduce" 1953 | top: "inception_5b/3x3_reduce" 1954 | } 1955 | layer { 1956 | name: "inception_5b/3x3" 1957 | type: "Convolution" 1958 | bottom: "inception_5b/3x3_reduce" 1959 | top: "inception_5b/3x3" 1960 | param { 1961 | lr_mult: 1.0 1962 | decay_mult: 1.0 1963 | } 1964 | param { 1965 | lr_mult: 2.0 1966 | decay_mult: 0.0 1967 | } 1968 | convolution_param { 1969 | num_output: 720 1970 | pad: 1 1971 | kernel_size: 3 1972 | weight_filler { 1973 | type: "xavier" 1974 | std: 0.1 1975 | } 1976 | bias_filler { 1977 | type: "constant" 1978 | value: 0.2 1979 | } 1980 | } 1981 | } 1982 | layer { 1983 | name: "inception_5b/relu_3x3" 1984 | type: "ReLU" 1985 | bottom: "inception_5b/3x3" 1986 | top: "inception_5b/3x3" 1987 | } 1988 | layer { 1989 | name: "inception_5b/5x5_reduce" 1990 | type: "Convolution" 1991 | bottom: "inception_5a/output" 1992 | top: "inception_5b/5x5_reduce" 1993 | param { 1994 | lr_mult: 1.0 1995 | decay_mult: 1.0 1996 | } 1997 | param { 1998 | lr_mult: 2.0 1999 | decay_mult: 0.0 2000 | } 2001 | convolution_param { 2002 | num_output: 48 2003 | kernel_size: 1 2004 | weight_filler { 2005 | type: "xavier" 2006 | std: 0.1 2007 | } 2008 | bias_filler { 2009 | type: "constant" 2010 | value: 0.2 2011 | } 2012 | } 2013 | } 2014 | layer { 2015 | name: "inception_5b/relu_5x5_reduce" 2016 | type: "ReLU" 2017 | bottom: "inception_5b/5x5_reduce" 2018 | top: "inception_5b/5x5_reduce" 2019 | } 2020 | layer { 2021 | name: "inception_5b/5x5" 2022 | type: "Convolution" 2023 | bottom: "inception_5b/5x5_reduce" 2024 | top: "inception_5b/5x5" 2025 | param { 2026 | lr_mult: 1.0 2027 | decay_mult: 1.0 2028 | } 2029 | param { 2030 | lr_mult: 2.0 2031 | decay_mult: 0.0 2032 | } 2033 | convolution_param { 2034 | num_output: 128 2035 | pad: 2 2036 | kernel_size: 5 2037 | weight_filler { 2038 | type: "xavier" 2039 | std: 0.1 2040 | } 2041 | bias_filler { 2042 | type: "constant" 2043 | value: 0.2 2044 | } 2045 | } 2046 | } 2047 | layer { 2048 | name: "inception_5b/relu_5x5" 2049 | type: "ReLU" 2050 | bottom: "inception_5b/5x5" 2051 | top: "inception_5b/5x5" 2052 | } 2053 | layer { 2054 | name: "inception_5b/pool" 2055 | type: "Pooling" 2056 | bottom: "inception_5a/output" 2057 | top: "inception_5b/pool" 2058 | pooling_param { 2059 | pool: MAX 2060 | kernel_size: 3 2061 | stride: 1 2062 | pad: 1 2063 | } 2064 | } 2065 | layer { 2066 | name: "inception_5b/pool_proj" 2067 | type: "Convolution" 2068 | bottom: "inception_5b/pool" 2069 | top: "inception_5b/pool_proj" 2070 | param { 2071 | lr_mult: 1.0 2072 | decay_mult: 1.0 2073 | } 2074 | param { 2075 | lr_mult: 2.0 2076 | decay_mult: 0.0 2077 | } 2078 | convolution_param { 2079 | num_output: 128 2080 | kernel_size: 1 2081 | weight_filler { 2082 | type: "xavier" 2083 | std: 0.1 2084 | } 2085 | bias_filler { 2086 | type: "constant" 2087 | value: 0.2 2088 | } 2089 | } 2090 | } 2091 | layer { 2092 | name: "inception_5b/relu_pool_proj" 2093 | type: "ReLU" 2094 | bottom: "inception_5b/pool_proj" 2095 | top: "inception_5b/pool_proj" 2096 | } 2097 | layer { 2098 | name: "inception_5b/output" 2099 | type: "Concat" 2100 | bottom: "inception_5b/1x1" 2101 | bottom: "inception_5b/3x3" 2102 | bottom: "inception_5b/5x5" 2103 | bottom: "inception_5b/pool_proj" 2104 | top: "inception_5b/output" 2105 | } 2106 | layer { 2107 | name: "pool5/drop_s1" 2108 | type: "Dropout" 2109 | bottom: "inception_5b/output" 2110 | top: "pool5/drop_s1" 2111 | dropout_param { 2112 | dropout_ratio: 0.4 2113 | } 2114 | } 2115 | layer { 2116 | name: "cvg/classifier" 2117 | type: "Convolution" 2118 | bottom: "pool5/drop_s1" 2119 | top: "cvg/classifier" 2120 | param { 2121 | lr_mult: 1.0 2122 | decay_mult: 1.0 2123 | } 2124 | param { 2125 | lr_mult: 2.0 2126 | decay_mult: 0.0 2127 | } 2128 | convolution_param { 2129 | num_output: 1 2130 | kernel_size: 1 2131 | weight_filler { 2132 | type: "xavier" 2133 | std: 0.03 2134 | } 2135 | bias_filler { 2136 | type: "constant" 2137 | value: 0.0 2138 | } 2139 | } 2140 | } 2141 | layer { 2142 | name: "coverage/sig" 2143 | type: "Sigmoid" 2144 | bottom: "cvg/classifier" 2145 | top: "coverage" 2146 | } 2147 | layer { 2148 | name: "bbox/regressor" 2149 | type: "Convolution" 2150 | bottom: "pool5/drop_s1" 2151 | top: "bboxes" 2152 | param { 2153 | lr_mult: 1.0 2154 | decay_mult: 1.0 2155 | } 2156 | param { 2157 | lr_mult: 2.0 2158 | decay_mult: 0.0 2159 | } 2160 | convolution_param { 2161 | num_output: 4 2162 | kernel_size: 1 2163 | weight_filler { 2164 | type: "xavier" 2165 | std: 0.03 2166 | } 2167 | bias_filler { 2168 | type: "constant" 2169 | value: 0.0 2170 | } 2171 | } 2172 | } 2173 | layer { 2174 | name: "cluster" 2175 | type: "Python" 2176 | bottom: "coverage" 2177 | bottom: "bboxes" 2178 | top: "bbox-list" 2179 | python_param { 2180 | module: "caffe.layers.detectnet.clustering" 2181 | layer: "ClusterDetections" 2182 | param_str: "1280, 720, 16, 0.6, 3, 0.02, 22" 2183 | } 2184 | } 2185 | -------------------------------------------------------------------------------- /process-video-rt.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import caffe 4 | 5 | 6 | MODEL_FILE = '/Users/kris/Downloads/fedex_mil_model_epoch_70.0/deploy.prototxt' 7 | PRETRAINED = '/Users/kris/Downloads/fedex_mil_model_epoch_70.0/snapshot_iter_28000.caffemodel' 8 | IMAGE_FILE = '/Users/kris/Downloads/football2-resized2/resized/195583_01_01.png' 9 | 10 | cap = cv2.VideoCapture(0) 11 | cap = cv2.VideoCapture('/Users/kris/Downloads/output.mp4') 12 | 13 | caffe.set_mode_cpu() 14 | 15 | net = caffe.Net(MODEL_FILE, PRETRAINED, caffe.TEST); 16 | 17 | img = cv2.imread(IMAGE_FILE); 18 | 19 | # print(res) 20 | 21 | while(True): 22 | # Capture frame-by-frame 23 | ret, frame = cap.read() 24 | 25 | # Our operations on the frame come here 26 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 27 | 28 | # Display the resulting frame 29 | cv2.imshow('frame',gray) 30 | if cv2.waitKey(1) & 0xFF == ord('q'): 31 | break 32 | 33 | # When everything done, release the capture 34 | cap.release() 35 | cv2.destroyAllWindows() 36 | -------------------------------------------------------------------------------- /resize-img.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os, errno 3 | import image_slicer 4 | from shutil import move 5 | 6 | new_width, new_height = 1280, 720 7 | number_of_slices = 4 8 | sourceDirectory='/Users/kris/Downloads/football1' 9 | destinationDirectory='/Users/kris/Downloads/football2-resized2' 10 | 11 | if not os.path.exists(destinationDirectory): 12 | os.makedirs(destinationDirectory + '/slices') 13 | os.makedirs(destinationDirectory + '/resized') 14 | 15 | # Slice images one by one and copy them to the new directory 16 | for filename in os.listdir(sourceDirectory): 17 | if filename.endswith(".jpg"): 18 | print("Slicing: {0}".format(os.path.join(sourceDirectory, filename))) 19 | pre, ext = os.path.splitext(filename) 20 | tiles = image_slicer.slice(os.path.join(sourceDirectory, filename), number_of_slices, save=False) 21 | image_slicer.save_tiles(tiles, directory=destinationDirectory + '/slices', prefix=pre) 22 | 23 | 24 | for filename in os.listdir(destinationDirectory + '/slices'): 25 | if filename.endswith(".png"): 26 | print("Resizing: {0}".format(os.path.join(destinationDirectory + '/slices', filename))) 27 | img = Image.open(os.path.join(destinationDirectory + '/slices', filename)) 28 | img = img.resize((new_width, new_height), Image.ANTIALIAS) 29 | img.save(os.path.join(destinationDirectory + '/resized', filename)) 30 | continue 31 | else: 32 | continue 33 | 34 | print('Done!') 35 | -------------------------------------------------------------------------------- /video-to-frames.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | 4 | 5 | print(cv2.__version__) 6 | vidcap = cv2.VideoCapture('UEL.2017.09.14.Hoffenheim.vs.Braga.EN.720p-FS.mp4') 7 | frameRate = vidcap.get(5) 8 | print 'Frame rate: ', frameRate 9 | success,image = vidcap.read() 10 | count = 0 11 | success = True 12 | while success: 13 | success,image = vidcap.read() 14 | frameId = vidcap.get(1) #current frame number 15 | if (frameId % math.floor(frameRate) == 0): 16 | print 'Read a new frame: ', success 17 | cv2.imwrite("/Users/kris/Downloads/football1/%d.jpg" % count, image) # save frame as JPEG file 18 | count += 1 19 | 20 | vidcap.release() 21 | print "Done!" 22 | --------------------------------------------------------------------------------