├── README.md ├── divingdeeper.ipynb ├── numpyexample.jpg ├── overbridge.jpg ├── swamp.jpg ├── tmp.prototxt └── video ├── les_coleman.m4v └── runnitopticalflow.py /README.md: -------------------------------------------------------------------------------- 1 | # Deep Dream Tutorial 2 | 3 | This is a walkthrough of the Deep Dream code created by Google. In it you'll learn how to create Deep Dreams, 4 | Controlled Deep Dreams, and Controlled Video Deem Dreams. There is a [blog post](http://www.kpkaiser.com/machine-learning/diving-deeper-into-deep-dreams/) 5 | to go along with this, and pull requests are welcomed. 6 | 7 | The images within were all created or shot by me, and you are free to do with them as you wish. 8 | 9 | To get this running, first install caffe, ffmpeg, and opencv. That's a tall order, but luckily, Googling and hitting your head on a wall should help. Once you've done that: 10 | ```bash 11 | $ ipython notebook 12 | ``` 13 | 14 | ## Run the Video Example 15 | 16 | ```bash 17 | $ cd video 18 | $ ffmpeg -i les_coleman.m4v output%05d.jpg 19 | $ python runnitoptical.py 20 | $ cd output 21 | $ ffmpeg -i output%05d.jpg out.mp4 22 | ``` 23 | 24 | 25 | Enjoy. 26 | -------------------------------------------------------------------------------- /numpyexample.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/burningion/deepdreamtutorial/067b810c210d9c0c692752515afeb3e06d9e6776/numpyexample.jpg -------------------------------------------------------------------------------- /overbridge.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/burningion/deepdreamtutorial/067b810c210d9c0c692752515afeb3e06d9e6776/overbridge.jpg -------------------------------------------------------------------------------- /swamp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/burningion/deepdreamtutorial/067b810c210d9c0c692752515afeb3e06d9e6776/swamp.jpg -------------------------------------------------------------------------------- /tmp.prototxt: -------------------------------------------------------------------------------- 1 | name: "GoogleNet-Places205" 2 | input: "data" 3 | input_dim: 10 4 | input_dim: 3 5 | input_dim: 224 6 | input_dim: 224 7 | force_backward: true 8 | layer { 9 | name: "conv1/7x7_s2" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1/7x7_s2" 13 | param { 14 | lr_mult: 1.0 15 | decay_mult: 1.0 16 | } 17 | param { 18 | lr_mult: 2.0 19 | decay_mult: 0.0 20 | } 21 | convolution_param { 22 | num_output: 64 23 | pad: 3 24 | kernel_size: 7 25 | stride: 2 26 | weight_filler { 27 | type: "xavier" 28 | std: 0.1 29 | } 30 | bias_filler { 31 | type: "constant" 32 | value: 0.2 33 | } 34 | } 35 | } 36 | layer { 37 | name: "conv1/relu_7x7" 38 | type: "ReLU" 39 | bottom: "conv1/7x7_s2" 40 | top: "conv1/7x7_s2" 41 | } 42 | layer { 43 | name: "pool1/3x3_s2" 44 | type: "Pooling" 45 | bottom: "conv1/7x7_s2" 46 | top: "pool1/3x3_s2" 47 | pooling_param { 48 | pool: MAX 49 | kernel_size: 3 50 | stride: 2 51 | } 52 | } 53 | layer { 54 | name: "pool1/norm1" 55 | type: "LRN" 56 | bottom: "pool1/3x3_s2" 57 | top: "pool1/norm1" 58 | lrn_param { 59 | local_size: 5 60 | alpha: 0.0001 61 | beta: 0.75 62 | } 63 | } 64 | layer { 65 | name: "conv2/3x3_reduce" 66 | type: "Convolution" 67 | bottom: "pool1/norm1" 68 | top: "conv2/3x3_reduce" 69 | param { 70 | lr_mult: 1.0 71 | decay_mult: 1.0 72 | } 73 | param { 74 | lr_mult: 2.0 75 | decay_mult: 0.0 76 | } 77 | convolution_param { 78 | num_output: 64 79 | kernel_size: 1 80 | weight_filler { 81 | type: "xavier" 82 | std: 0.1 83 | } 84 | bias_filler { 85 | type: "constant" 86 | value: 0.2 87 | } 88 | } 89 | } 90 | layer { 91 | name: "conv2/relu_3x3_reduce" 92 | type: "ReLU" 93 | bottom: "conv2/3x3_reduce" 94 | top: "conv2/3x3_reduce" 95 | } 96 | layer { 97 | name: "conv2/3x3" 98 | type: "Convolution" 99 | bottom: "conv2/3x3_reduce" 100 | top: "conv2/3x3" 101 | param { 102 | lr_mult: 1.0 103 | decay_mult: 1.0 104 | } 105 | param { 106 | lr_mult: 2.0 107 | decay_mult: 0.0 108 | } 109 | convolution_param { 110 | num_output: 192 111 | pad: 1 112 | kernel_size: 3 113 | weight_filler { 114 | type: "xavier" 115 | std: 0.03 116 | } 117 | bias_filler { 118 | type: "constant" 119 | value: 0.2 120 | } 121 | } 122 | } 123 | layer { 124 | name: "conv2/relu_3x3" 125 | type: "ReLU" 126 | bottom: "conv2/3x3" 127 | top: "conv2/3x3" 128 | } 129 | layer { 130 | name: "conv2/norm2" 131 | type: "LRN" 132 | bottom: "conv2/3x3" 133 | top: "conv2/norm2" 134 | lrn_param { 135 | local_size: 5 136 | alpha: 0.0001 137 | beta: 0.75 138 | } 139 | } 140 | layer { 141 | name: "pool2/3x3_s2" 142 | type: "Pooling" 143 | bottom: "conv2/norm2" 144 | top: "pool2/3x3_s2" 145 | pooling_param { 146 | pool: MAX 147 | kernel_size: 3 148 | stride: 2 149 | } 150 | } 151 | layer { 152 | name: "inception_3a/1x1" 153 | type: "Convolution" 154 | bottom: "pool2/3x3_s2" 155 | top: "inception_3a/1x1" 156 | param { 157 | lr_mult: 1.0 158 | decay_mult: 1.0 159 | } 160 | param { 161 | lr_mult: 2.0 162 | decay_mult: 0.0 163 | } 164 | convolution_param { 165 | num_output: 64 166 | kernel_size: 1 167 | weight_filler { 168 | type: "xavier" 169 | std: 0.03 170 | } 171 | bias_filler { 172 | type: "constant" 173 | value: 0.2 174 | } 175 | } 176 | } 177 | layer { 178 | name: "inception_3a/relu_1x1" 179 | type: "ReLU" 180 | bottom: "inception_3a/1x1" 181 | top: "inception_3a/1x1" 182 | } 183 | layer { 184 | name: "inception_3a/3x3_reduce" 185 | type: "Convolution" 186 | bottom: "pool2/3x3_s2" 187 | top: "inception_3a/3x3_reduce" 188 | param { 189 | lr_mult: 1.0 190 | decay_mult: 1.0 191 | } 192 | param { 193 | lr_mult: 2.0 194 | decay_mult: 0.0 195 | } 196 | convolution_param { 197 | num_output: 96 198 | kernel_size: 1 199 | weight_filler { 200 | type: "xavier" 201 | std: 0.09 202 | } 203 | bias_filler { 204 | type: "constant" 205 | value: 0.2 206 | } 207 | } 208 | } 209 | layer { 210 | name: "inception_3a/relu_3x3_reduce" 211 | type: "ReLU" 212 | bottom: "inception_3a/3x3_reduce" 213 | top: "inception_3a/3x3_reduce" 214 | } 215 | layer { 216 | name: "inception_3a/3x3" 217 | type: "Convolution" 218 | bottom: "inception_3a/3x3_reduce" 219 | top: "inception_3a/3x3" 220 | param { 221 | lr_mult: 1.0 222 | decay_mult: 1.0 223 | } 224 | param { 225 | lr_mult: 2.0 226 | decay_mult: 0.0 227 | } 228 | convolution_param { 229 | num_output: 128 230 | pad: 1 231 | kernel_size: 3 232 | weight_filler { 233 | type: "xavier" 234 | std: 0.03 235 | } 236 | bias_filler { 237 | type: "constant" 238 | value: 0.2 239 | } 240 | } 241 | } 242 | layer { 243 | name: "inception_3a/relu_3x3" 244 | type: "ReLU" 245 | bottom: "inception_3a/3x3" 246 | top: "inception_3a/3x3" 247 | } 248 | layer { 249 | name: "inception_3a/5x5_reduce" 250 | type: "Convolution" 251 | bottom: "pool2/3x3_s2" 252 | top: "inception_3a/5x5_reduce" 253 | param { 254 | lr_mult: 1.0 255 | decay_mult: 1.0 256 | } 257 | param { 258 | lr_mult: 2.0 259 | decay_mult: 0.0 260 | } 261 | convolution_param { 262 | num_output: 16 263 | kernel_size: 1 264 | weight_filler { 265 | type: "xavier" 266 | std: 0.2 267 | } 268 | bias_filler { 269 | type: "constant" 270 | value: 0.2 271 | } 272 | } 273 | } 274 | layer { 275 | name: "inception_3a/relu_5x5_reduce" 276 | type: "ReLU" 277 | bottom: "inception_3a/5x5_reduce" 278 | top: "inception_3a/5x5_reduce" 279 | } 280 | layer { 281 | name: "inception_3a/5x5" 282 | type: "Convolution" 283 | bottom: "inception_3a/5x5_reduce" 284 | top: "inception_3a/5x5" 285 | param { 286 | lr_mult: 1.0 287 | decay_mult: 1.0 288 | } 289 | param { 290 | lr_mult: 2.0 291 | decay_mult: 0.0 292 | } 293 | convolution_param { 294 | num_output: 32 295 | pad: 2 296 | kernel_size: 5 297 | weight_filler { 298 | type: "xavier" 299 | std: 0.03 300 | } 301 | bias_filler { 302 | type: "constant" 303 | value: 0.2 304 | } 305 | } 306 | } 307 | layer { 308 | name: "inception_3a/relu_5x5" 309 | type: "ReLU" 310 | bottom: "inception_3a/5x5" 311 | top: "inception_3a/5x5" 312 | } 313 | layer { 314 | name: "inception_3a/pool" 315 | type: "Pooling" 316 | bottom: "pool2/3x3_s2" 317 | top: "inception_3a/pool" 318 | pooling_param { 319 | pool: MAX 320 | kernel_size: 3 321 | stride: 1 322 | pad: 1 323 | } 324 | } 325 | layer { 326 | name: "inception_3a/pool_proj" 327 | type: "Convolution" 328 | bottom: "inception_3a/pool" 329 | top: "inception_3a/pool_proj" 330 | param { 331 | lr_mult: 1.0 332 | decay_mult: 1.0 333 | } 334 | param { 335 | lr_mult: 2.0 336 | decay_mult: 0.0 337 | } 338 | convolution_param { 339 | num_output: 32 340 | kernel_size: 1 341 | weight_filler { 342 | type: "xavier" 343 | std: 0.1 344 | } 345 | bias_filler { 346 | type: "constant" 347 | value: 0.2 348 | } 349 | } 350 | } 351 | layer { 352 | name: "inception_3a/relu_pool_proj" 353 | type: "ReLU" 354 | bottom: "inception_3a/pool_proj" 355 | top: "inception_3a/pool_proj" 356 | } 357 | layer { 358 | name: "inception_3a/output" 359 | type: "Concat" 360 | bottom: "inception_3a/1x1" 361 | bottom: "inception_3a/3x3" 362 | bottom: "inception_3a/5x5" 363 | bottom: "inception_3a/pool_proj" 364 | top: "inception_3a/output" 365 | } 366 | layer { 367 | name: "inception_3b/1x1" 368 | type: "Convolution" 369 | bottom: "inception_3a/output" 370 | top: "inception_3b/1x1" 371 | param { 372 | lr_mult: 1.0 373 | decay_mult: 1.0 374 | } 375 | param { 376 | lr_mult: 2.0 377 | decay_mult: 0.0 378 | } 379 | convolution_param { 380 | num_output: 128 381 | kernel_size: 1 382 | weight_filler { 383 | type: "xavier" 384 | std: 0.03 385 | } 386 | bias_filler { 387 | type: "constant" 388 | value: 0.2 389 | } 390 | } 391 | } 392 | layer { 393 | name: "inception_3b/relu_1x1" 394 | type: "ReLU" 395 | bottom: "inception_3b/1x1" 396 | top: "inception_3b/1x1" 397 | } 398 | layer { 399 | name: "inception_3b/3x3_reduce" 400 | type: "Convolution" 401 | bottom: "inception_3a/output" 402 | top: "inception_3b/3x3_reduce" 403 | param { 404 | lr_mult: 1.0 405 | decay_mult: 1.0 406 | } 407 | param { 408 | lr_mult: 2.0 409 | decay_mult: 0.0 410 | } 411 | convolution_param { 412 | num_output: 128 413 | kernel_size: 1 414 | weight_filler { 415 | type: "xavier" 416 | std: 0.09 417 | } 418 | bias_filler { 419 | type: "constant" 420 | value: 0.2 421 | } 422 | } 423 | } 424 | layer { 425 | name: "inception_3b/relu_3x3_reduce" 426 | type: "ReLU" 427 | bottom: "inception_3b/3x3_reduce" 428 | top: "inception_3b/3x3_reduce" 429 | } 430 | layer { 431 | name: "inception_3b/3x3" 432 | type: "Convolution" 433 | bottom: "inception_3b/3x3_reduce" 434 | top: "inception_3b/3x3" 435 | param { 436 | lr_mult: 1.0 437 | decay_mult: 1.0 438 | } 439 | param { 440 | lr_mult: 2.0 441 | decay_mult: 0.0 442 | } 443 | convolution_param { 444 | num_output: 192 445 | pad: 1 446 | kernel_size: 3 447 | weight_filler { 448 | type: "xavier" 449 | std: 0.03 450 | } 451 | bias_filler { 452 | type: "constant" 453 | value: 0.2 454 | } 455 | } 456 | } 457 | layer { 458 | name: "inception_3b/relu_3x3" 459 | type: "ReLU" 460 | bottom: "inception_3b/3x3" 461 | top: "inception_3b/3x3" 462 | } 463 | layer { 464 | name: "inception_3b/5x5_reduce" 465 | type: "Convolution" 466 | bottom: "inception_3a/output" 467 | top: "inception_3b/5x5_reduce" 468 | param { 469 | lr_mult: 1.0 470 | decay_mult: 1.0 471 | } 472 | param { 473 | lr_mult: 2.0 474 | decay_mult: 0.0 475 | } 476 | convolution_param { 477 | num_output: 32 478 | kernel_size: 1 479 | weight_filler { 480 | type: "xavier" 481 | std: 0.2 482 | } 483 | bias_filler { 484 | type: "constant" 485 | value: 0.2 486 | } 487 | } 488 | } 489 | layer { 490 | name: "inception_3b/relu_5x5_reduce" 491 | type: "ReLU" 492 | bottom: "inception_3b/5x5_reduce" 493 | top: "inception_3b/5x5_reduce" 494 | } 495 | layer { 496 | name: "inception_3b/5x5" 497 | type: "Convolution" 498 | bottom: "inception_3b/5x5_reduce" 499 | top: "inception_3b/5x5" 500 | param { 501 | lr_mult: 1.0 502 | decay_mult: 1.0 503 | } 504 | param { 505 | lr_mult: 2.0 506 | decay_mult: 0.0 507 | } 508 | convolution_param { 509 | num_output: 96 510 | pad: 2 511 | kernel_size: 5 512 | weight_filler { 513 | type: "xavier" 514 | std: 0.03 515 | } 516 | bias_filler { 517 | type: "constant" 518 | value: 0.2 519 | } 520 | } 521 | } 522 | layer { 523 | name: "inception_3b/relu_5x5" 524 | type: "ReLU" 525 | bottom: "inception_3b/5x5" 526 | top: "inception_3b/5x5" 527 | } 528 | layer { 529 | name: "inception_3b/pool" 530 | type: "Pooling" 531 | bottom: "inception_3a/output" 532 | top: "inception_3b/pool" 533 | pooling_param { 534 | pool: MAX 535 | kernel_size: 3 536 | stride: 1 537 | pad: 1 538 | } 539 | } 540 | layer { 541 | name: "inception_3b/pool_proj" 542 | type: "Convolution" 543 | bottom: "inception_3b/pool" 544 | top: "inception_3b/pool_proj" 545 | param { 546 | lr_mult: 1.0 547 | decay_mult: 1.0 548 | } 549 | param { 550 | lr_mult: 2.0 551 | decay_mult: 0.0 552 | } 553 | convolution_param { 554 | num_output: 64 555 | kernel_size: 1 556 | weight_filler { 557 | type: "xavier" 558 | std: 0.1 559 | } 560 | bias_filler { 561 | type: "constant" 562 | value: 0.2 563 | } 564 | } 565 | } 566 | layer { 567 | name: "inception_3b/relu_pool_proj" 568 | type: "ReLU" 569 | bottom: "inception_3b/pool_proj" 570 | top: "inception_3b/pool_proj" 571 | } 572 | layer { 573 | name: "inception_3b/output" 574 | type: "Concat" 575 | bottom: "inception_3b/1x1" 576 | bottom: "inception_3b/3x3" 577 | bottom: "inception_3b/5x5" 578 | bottom: "inception_3b/pool_proj" 579 | top: "inception_3b/output" 580 | } 581 | layer { 582 | name: "pool3/3x3_s2" 583 | type: "Pooling" 584 | bottom: "inception_3b/output" 585 | top: "pool3/3x3_s2" 586 | pooling_param { 587 | pool: MAX 588 | kernel_size: 3 589 | stride: 2 590 | } 591 | } 592 | layer { 593 | name: "inception_4a/1x1" 594 | type: "Convolution" 595 | bottom: "pool3/3x3_s2" 596 | top: "inception_4a/1x1" 597 | param { 598 | lr_mult: 1.0 599 | decay_mult: 1.0 600 | } 601 | param { 602 | lr_mult: 2.0 603 | decay_mult: 0.0 604 | } 605 | convolution_param { 606 | num_output: 192 607 | kernel_size: 1 608 | weight_filler { 609 | type: "xavier" 610 | std: 0.03 611 | } 612 | bias_filler { 613 | type: "constant" 614 | value: 0.2 615 | } 616 | } 617 | } 618 | layer { 619 | name: "inception_4a/relu_1x1" 620 | type: "ReLU" 621 | bottom: "inception_4a/1x1" 622 | top: "inception_4a/1x1" 623 | } 624 | layer { 625 | name: "inception_4a/3x3_reduce" 626 | type: "Convolution" 627 | bottom: "pool3/3x3_s2" 628 | top: "inception_4a/3x3_reduce" 629 | param { 630 | lr_mult: 1.0 631 | decay_mult: 1.0 632 | } 633 | param { 634 | lr_mult: 2.0 635 | decay_mult: 0.0 636 | } 637 | convolution_param { 638 | num_output: 96 639 | kernel_size: 1 640 | weight_filler { 641 | type: "xavier" 642 | std: 0.09 643 | } 644 | bias_filler { 645 | type: "constant" 646 | value: 0.2 647 | } 648 | } 649 | } 650 | layer { 651 | name: "inception_4a/relu_3x3_reduce" 652 | type: "ReLU" 653 | bottom: "inception_4a/3x3_reduce" 654 | top: "inception_4a/3x3_reduce" 655 | } 656 | layer { 657 | name: "inception_4a/3x3" 658 | type: "Convolution" 659 | bottom: "inception_4a/3x3_reduce" 660 | top: "inception_4a/3x3" 661 | param { 662 | lr_mult: 1.0 663 | decay_mult: 1.0 664 | } 665 | param { 666 | lr_mult: 2.0 667 | decay_mult: 0.0 668 | } 669 | convolution_param { 670 | num_output: 208 671 | pad: 1 672 | kernel_size: 3 673 | weight_filler { 674 | type: "xavier" 675 | std: 0.03 676 | } 677 | bias_filler { 678 | type: "constant" 679 | value: 0.2 680 | } 681 | } 682 | } 683 | layer { 684 | name: "inception_4a/relu_3x3" 685 | type: "ReLU" 686 | bottom: "inception_4a/3x3" 687 | top: "inception_4a/3x3" 688 | } 689 | layer { 690 | name: "inception_4a/5x5_reduce" 691 | type: "Convolution" 692 | bottom: "pool3/3x3_s2" 693 | top: "inception_4a/5x5_reduce" 694 | param { 695 | lr_mult: 1.0 696 | decay_mult: 1.0 697 | } 698 | param { 699 | lr_mult: 2.0 700 | decay_mult: 0.0 701 | } 702 | convolution_param { 703 | num_output: 16 704 | kernel_size: 1 705 | weight_filler { 706 | type: "xavier" 707 | std: 0.2 708 | } 709 | bias_filler { 710 | type: "constant" 711 | value: 0.2 712 | } 713 | } 714 | } 715 | layer { 716 | name: "inception_4a/relu_5x5_reduce" 717 | type: "ReLU" 718 | bottom: "inception_4a/5x5_reduce" 719 | top: "inception_4a/5x5_reduce" 720 | } 721 | layer { 722 | name: "inception_4a/5x5" 723 | type: "Convolution" 724 | bottom: "inception_4a/5x5_reduce" 725 | top: "inception_4a/5x5" 726 | param { 727 | lr_mult: 1.0 728 | decay_mult: 1.0 729 | } 730 | param { 731 | lr_mult: 2.0 732 | decay_mult: 0.0 733 | } 734 | convolution_param { 735 | num_output: 48 736 | pad: 2 737 | kernel_size: 5 738 | weight_filler { 739 | type: "xavier" 740 | std: 0.03 741 | } 742 | bias_filler { 743 | type: "constant" 744 | value: 0.2 745 | } 746 | } 747 | } 748 | layer { 749 | name: "inception_4a/relu_5x5" 750 | type: "ReLU" 751 | bottom: "inception_4a/5x5" 752 | top: "inception_4a/5x5" 753 | } 754 | layer { 755 | name: "inception_4a/pool" 756 | type: "Pooling" 757 | bottom: "pool3/3x3_s2" 758 | top: "inception_4a/pool" 759 | pooling_param { 760 | pool: MAX 761 | kernel_size: 3 762 | stride: 1 763 | pad: 1 764 | } 765 | } 766 | layer { 767 | name: "inception_4a/pool_proj" 768 | type: "Convolution" 769 | bottom: "inception_4a/pool" 770 | top: "inception_4a/pool_proj" 771 | param { 772 | lr_mult: 1.0 773 | decay_mult: 1.0 774 | } 775 | param { 776 | lr_mult: 2.0 777 | decay_mult: 0.0 778 | } 779 | convolution_param { 780 | num_output: 64 781 | kernel_size: 1 782 | weight_filler { 783 | type: "xavier" 784 | std: 0.1 785 | } 786 | bias_filler { 787 | type: "constant" 788 | value: 0.2 789 | } 790 | } 791 | } 792 | layer { 793 | name: "inception_4a/relu_pool_proj" 794 | type: "ReLU" 795 | bottom: "inception_4a/pool_proj" 796 | top: "inception_4a/pool_proj" 797 | } 798 | layer { 799 | name: "inception_4a/output" 800 | type: "Concat" 801 | bottom: "inception_4a/1x1" 802 | bottom: "inception_4a/3x3" 803 | bottom: "inception_4a/5x5" 804 | bottom: "inception_4a/pool_proj" 805 | top: "inception_4a/output" 806 | } 807 | layer { 808 | name: "inception_4b/1x1" 809 | type: "Convolution" 810 | bottom: "inception_4a/output" 811 | top: "inception_4b/1x1" 812 | param { 813 | lr_mult: 1.0 814 | decay_mult: 1.0 815 | } 816 | param { 817 | lr_mult: 2.0 818 | decay_mult: 0.0 819 | } 820 | convolution_param { 821 | num_output: 160 822 | kernel_size: 1 823 | weight_filler { 824 | type: "xavier" 825 | std: 0.03 826 | } 827 | bias_filler { 828 | type: "constant" 829 | value: 0.2 830 | } 831 | } 832 | } 833 | layer { 834 | name: "inception_4b/relu_1x1" 835 | type: "ReLU" 836 | bottom: "inception_4b/1x1" 837 | top: "inception_4b/1x1" 838 | } 839 | layer { 840 | name: "inception_4b/3x3_reduce" 841 | type: "Convolution" 842 | bottom: "inception_4a/output" 843 | top: "inception_4b/3x3_reduce" 844 | param { 845 | lr_mult: 1.0 846 | decay_mult: 1.0 847 | } 848 | param { 849 | lr_mult: 2.0 850 | decay_mult: 0.0 851 | } 852 | convolution_param { 853 | num_output: 112 854 | kernel_size: 1 855 | weight_filler { 856 | type: "xavier" 857 | std: 0.09 858 | } 859 | bias_filler { 860 | type: "constant" 861 | value: 0.2 862 | } 863 | } 864 | } 865 | layer { 866 | name: "inception_4b/relu_3x3_reduce" 867 | type: "ReLU" 868 | bottom: "inception_4b/3x3_reduce" 869 | top: "inception_4b/3x3_reduce" 870 | } 871 | layer { 872 | name: "inception_4b/3x3" 873 | type: "Convolution" 874 | bottom: "inception_4b/3x3_reduce" 875 | top: "inception_4b/3x3" 876 | param { 877 | lr_mult: 1.0 878 | decay_mult: 1.0 879 | } 880 | param { 881 | lr_mult: 2.0 882 | decay_mult: 0.0 883 | } 884 | convolution_param { 885 | num_output: 224 886 | pad: 1 887 | kernel_size: 3 888 | weight_filler { 889 | type: "xavier" 890 | std: 0.03 891 | } 892 | bias_filler { 893 | type: "constant" 894 | value: 0.2 895 | } 896 | } 897 | } 898 | layer { 899 | name: "inception_4b/relu_3x3" 900 | type: "ReLU" 901 | bottom: "inception_4b/3x3" 902 | top: "inception_4b/3x3" 903 | } 904 | layer { 905 | name: "inception_4b/5x5_reduce" 906 | type: "Convolution" 907 | bottom: "inception_4a/output" 908 | top: "inception_4b/5x5_reduce" 909 | param { 910 | lr_mult: 1.0 911 | decay_mult: 1.0 912 | } 913 | param { 914 | lr_mult: 2.0 915 | decay_mult: 0.0 916 | } 917 | convolution_param { 918 | num_output: 24 919 | kernel_size: 1 920 | weight_filler { 921 | type: "xavier" 922 | std: 0.2 923 | } 924 | bias_filler { 925 | type: "constant" 926 | value: 0.2 927 | } 928 | } 929 | } 930 | layer { 931 | name: "inception_4b/relu_5x5_reduce" 932 | type: "ReLU" 933 | bottom: "inception_4b/5x5_reduce" 934 | top: "inception_4b/5x5_reduce" 935 | } 936 | layer { 937 | name: "inception_4b/5x5" 938 | type: "Convolution" 939 | bottom: "inception_4b/5x5_reduce" 940 | top: "inception_4b/5x5" 941 | param { 942 | lr_mult: 1.0 943 | decay_mult: 1.0 944 | } 945 | param { 946 | lr_mult: 2.0 947 | decay_mult: 0.0 948 | } 949 | convolution_param { 950 | num_output: 64 951 | pad: 2 952 | kernel_size: 5 953 | weight_filler { 954 | type: "xavier" 955 | std: 0.03 956 | } 957 | bias_filler { 958 | type: "constant" 959 | value: 0.2 960 | } 961 | } 962 | } 963 | layer { 964 | name: "inception_4b/relu_5x5" 965 | type: "ReLU" 966 | bottom: "inception_4b/5x5" 967 | top: "inception_4b/5x5" 968 | } 969 | layer { 970 | name: "inception_4b/pool" 971 | type: "Pooling" 972 | bottom: "inception_4a/output" 973 | top: "inception_4b/pool" 974 | pooling_param { 975 | pool: MAX 976 | kernel_size: 3 977 | stride: 1 978 | pad: 1 979 | } 980 | } 981 | layer { 982 | name: "inception_4b/pool_proj" 983 | type: "Convolution" 984 | bottom: "inception_4b/pool" 985 | top: "inception_4b/pool_proj" 986 | param { 987 | lr_mult: 1.0 988 | decay_mult: 1.0 989 | } 990 | param { 991 | lr_mult: 2.0 992 | decay_mult: 0.0 993 | } 994 | convolution_param { 995 | num_output: 64 996 | kernel_size: 1 997 | weight_filler { 998 | type: "xavier" 999 | std: 0.1 1000 | } 1001 | bias_filler { 1002 | type: "constant" 1003 | value: 0.2 1004 | } 1005 | } 1006 | } 1007 | layer { 1008 | name: "inception_4b/relu_pool_proj" 1009 | type: "ReLU" 1010 | bottom: "inception_4b/pool_proj" 1011 | top: "inception_4b/pool_proj" 1012 | } 1013 | layer { 1014 | name: "inception_4b/output" 1015 | type: "Concat" 1016 | bottom: "inception_4b/1x1" 1017 | bottom: "inception_4b/3x3" 1018 | bottom: "inception_4b/5x5" 1019 | bottom: "inception_4b/pool_proj" 1020 | top: "inception_4b/output" 1021 | } 1022 | layer { 1023 | name: "inception_4c/1x1" 1024 | type: "Convolution" 1025 | bottom: "inception_4b/output" 1026 | top: "inception_4c/1x1" 1027 | param { 1028 | lr_mult: 1.0 1029 | decay_mult: 1.0 1030 | } 1031 | param { 1032 | lr_mult: 2.0 1033 | decay_mult: 0.0 1034 | } 1035 | convolution_param { 1036 | num_output: 128 1037 | kernel_size: 1 1038 | weight_filler { 1039 | type: "xavier" 1040 | std: 0.03 1041 | } 1042 | bias_filler { 1043 | type: "constant" 1044 | value: 0.2 1045 | } 1046 | } 1047 | } 1048 | layer { 1049 | name: "inception_4c/relu_1x1" 1050 | type: "ReLU" 1051 | bottom: "inception_4c/1x1" 1052 | top: "inception_4c/1x1" 1053 | } 1054 | layer { 1055 | name: "inception_4c/3x3_reduce" 1056 | type: "Convolution" 1057 | bottom: "inception_4b/output" 1058 | top: "inception_4c/3x3_reduce" 1059 | param { 1060 | lr_mult: 1.0 1061 | decay_mult: 1.0 1062 | } 1063 | param { 1064 | lr_mult: 2.0 1065 | decay_mult: 0.0 1066 | } 1067 | convolution_param { 1068 | num_output: 128 1069 | kernel_size: 1 1070 | weight_filler { 1071 | type: "xavier" 1072 | std: 0.09 1073 | } 1074 | bias_filler { 1075 | type: "constant" 1076 | value: 0.2 1077 | } 1078 | } 1079 | } 1080 | layer { 1081 | name: "inception_4c/relu_3x3_reduce" 1082 | type: "ReLU" 1083 | bottom: "inception_4c/3x3_reduce" 1084 | top: "inception_4c/3x3_reduce" 1085 | } 1086 | layer { 1087 | name: "inception_4c/3x3" 1088 | type: "Convolution" 1089 | bottom: "inception_4c/3x3_reduce" 1090 | top: "inception_4c/3x3" 1091 | param { 1092 | lr_mult: 1.0 1093 | decay_mult: 1.0 1094 | } 1095 | param { 1096 | lr_mult: 2.0 1097 | decay_mult: 0.0 1098 | } 1099 | convolution_param { 1100 | num_output: 256 1101 | pad: 1 1102 | kernel_size: 3 1103 | weight_filler { 1104 | type: "xavier" 1105 | std: 0.03 1106 | } 1107 | bias_filler { 1108 | type: "constant" 1109 | value: 0.2 1110 | } 1111 | } 1112 | } 1113 | layer { 1114 | name: "inception_4c/relu_3x3" 1115 | type: "ReLU" 1116 | bottom: "inception_4c/3x3" 1117 | top: "inception_4c/3x3" 1118 | } 1119 | layer { 1120 | name: "inception_4c/5x5_reduce" 1121 | type: "Convolution" 1122 | bottom: "inception_4b/output" 1123 | top: "inception_4c/5x5_reduce" 1124 | param { 1125 | lr_mult: 1.0 1126 | decay_mult: 1.0 1127 | } 1128 | param { 1129 | lr_mult: 2.0 1130 | decay_mult: 0.0 1131 | } 1132 | convolution_param { 1133 | num_output: 24 1134 | kernel_size: 1 1135 | weight_filler { 1136 | type: "xavier" 1137 | std: 0.2 1138 | } 1139 | bias_filler { 1140 | type: "constant" 1141 | value: 0.2 1142 | } 1143 | } 1144 | } 1145 | layer { 1146 | name: "inception_4c/relu_5x5_reduce" 1147 | type: "ReLU" 1148 | bottom: "inception_4c/5x5_reduce" 1149 | top: "inception_4c/5x5_reduce" 1150 | } 1151 | layer { 1152 | name: "inception_4c/5x5" 1153 | type: "Convolution" 1154 | bottom: "inception_4c/5x5_reduce" 1155 | top: "inception_4c/5x5" 1156 | param { 1157 | lr_mult: 1.0 1158 | decay_mult: 1.0 1159 | } 1160 | param { 1161 | lr_mult: 2.0 1162 | decay_mult: 0.0 1163 | } 1164 | convolution_param { 1165 | num_output: 64 1166 | pad: 2 1167 | kernel_size: 5 1168 | weight_filler { 1169 | type: "xavier" 1170 | std: 0.03 1171 | } 1172 | bias_filler { 1173 | type: "constant" 1174 | value: 0.2 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "inception_4c/relu_5x5" 1180 | type: "ReLU" 1181 | bottom: "inception_4c/5x5" 1182 | top: "inception_4c/5x5" 1183 | } 1184 | layer { 1185 | name: "inception_4c/pool" 1186 | type: "Pooling" 1187 | bottom: "inception_4b/output" 1188 | top: "inception_4c/pool" 1189 | pooling_param { 1190 | pool: MAX 1191 | kernel_size: 3 1192 | stride: 1 1193 | pad: 1 1194 | } 1195 | } 1196 | layer { 1197 | name: "inception_4c/pool_proj" 1198 | type: "Convolution" 1199 | bottom: "inception_4c/pool" 1200 | top: "inception_4c/pool_proj" 1201 | param { 1202 | lr_mult: 1.0 1203 | decay_mult: 1.0 1204 | } 1205 | param { 1206 | lr_mult: 2.0 1207 | decay_mult: 0.0 1208 | } 1209 | convolution_param { 1210 | num_output: 64 1211 | kernel_size: 1 1212 | weight_filler { 1213 | type: "xavier" 1214 | std: 0.1 1215 | } 1216 | bias_filler { 1217 | type: "constant" 1218 | value: 0.2 1219 | } 1220 | } 1221 | } 1222 | layer { 1223 | name: "inception_4c/relu_pool_proj" 1224 | type: "ReLU" 1225 | bottom: "inception_4c/pool_proj" 1226 | top: "inception_4c/pool_proj" 1227 | } 1228 | layer { 1229 | name: "inception_4c/output" 1230 | type: "Concat" 1231 | bottom: "inception_4c/1x1" 1232 | bottom: "inception_4c/3x3" 1233 | bottom: "inception_4c/5x5" 1234 | bottom: "inception_4c/pool_proj" 1235 | top: "inception_4c/output" 1236 | } 1237 | layer { 1238 | name: "inception_4d/1x1" 1239 | type: "Convolution" 1240 | bottom: "inception_4c/output" 1241 | top: "inception_4d/1x1" 1242 | param { 1243 | lr_mult: 1.0 1244 | decay_mult: 1.0 1245 | } 1246 | param { 1247 | lr_mult: 2.0 1248 | decay_mult: 0.0 1249 | } 1250 | convolution_param { 1251 | num_output: 112 1252 | kernel_size: 1 1253 | weight_filler { 1254 | type: "xavier" 1255 | std: 0.03 1256 | } 1257 | bias_filler { 1258 | type: "constant" 1259 | value: 0.2 1260 | } 1261 | } 1262 | } 1263 | layer { 1264 | name: "inception_4d/relu_1x1" 1265 | type: "ReLU" 1266 | bottom: "inception_4d/1x1" 1267 | top: "inception_4d/1x1" 1268 | } 1269 | layer { 1270 | name: "inception_4d/3x3_reduce" 1271 | type: "Convolution" 1272 | bottom: "inception_4c/output" 1273 | top: "inception_4d/3x3_reduce" 1274 | param { 1275 | lr_mult: 1.0 1276 | decay_mult: 1.0 1277 | } 1278 | param { 1279 | lr_mult: 2.0 1280 | decay_mult: 0.0 1281 | } 1282 | convolution_param { 1283 | num_output: 144 1284 | kernel_size: 1 1285 | weight_filler { 1286 | type: "xavier" 1287 | std: 0.09 1288 | } 1289 | bias_filler { 1290 | type: "constant" 1291 | value: 0.2 1292 | } 1293 | } 1294 | } 1295 | layer { 1296 | name: "inception_4d/relu_3x3_reduce" 1297 | type: "ReLU" 1298 | bottom: "inception_4d/3x3_reduce" 1299 | top: "inception_4d/3x3_reduce" 1300 | } 1301 | layer { 1302 | name: "inception_4d/3x3" 1303 | type: "Convolution" 1304 | bottom: "inception_4d/3x3_reduce" 1305 | top: "inception_4d/3x3" 1306 | param { 1307 | lr_mult: 1.0 1308 | decay_mult: 1.0 1309 | } 1310 | param { 1311 | lr_mult: 2.0 1312 | decay_mult: 0.0 1313 | } 1314 | convolution_param { 1315 | num_output: 288 1316 | pad: 1 1317 | kernel_size: 3 1318 | weight_filler { 1319 | type: "xavier" 1320 | std: 0.03 1321 | } 1322 | bias_filler { 1323 | type: "constant" 1324 | value: 0.2 1325 | } 1326 | } 1327 | } 1328 | layer { 1329 | name: "inception_4d/relu_3x3" 1330 | type: "ReLU" 1331 | bottom: "inception_4d/3x3" 1332 | top: "inception_4d/3x3" 1333 | } 1334 | layer { 1335 | name: "inception_4d/5x5_reduce" 1336 | type: "Convolution" 1337 | bottom: "inception_4c/output" 1338 | top: "inception_4d/5x5_reduce" 1339 | param { 1340 | lr_mult: 1.0 1341 | decay_mult: 1.0 1342 | } 1343 | param { 1344 | lr_mult: 2.0 1345 | decay_mult: 0.0 1346 | } 1347 | convolution_param { 1348 | num_output: 32 1349 | kernel_size: 1 1350 | weight_filler { 1351 | type: "xavier" 1352 | std: 0.2 1353 | } 1354 | bias_filler { 1355 | type: "constant" 1356 | value: 0.2 1357 | } 1358 | } 1359 | } 1360 | layer { 1361 | name: "inception_4d/relu_5x5_reduce" 1362 | type: "ReLU" 1363 | bottom: "inception_4d/5x5_reduce" 1364 | top: "inception_4d/5x5_reduce" 1365 | } 1366 | layer { 1367 | name: "inception_4d/5x5" 1368 | type: "Convolution" 1369 | bottom: "inception_4d/5x5_reduce" 1370 | top: "inception_4d/5x5" 1371 | param { 1372 | lr_mult: 1.0 1373 | decay_mult: 1.0 1374 | } 1375 | param { 1376 | lr_mult: 2.0 1377 | decay_mult: 0.0 1378 | } 1379 | convolution_param { 1380 | num_output: 64 1381 | pad: 2 1382 | kernel_size: 5 1383 | weight_filler { 1384 | type: "xavier" 1385 | std: 0.03 1386 | } 1387 | bias_filler { 1388 | type: "constant" 1389 | value: 0.2 1390 | } 1391 | } 1392 | } 1393 | layer { 1394 | name: "inception_4d/relu_5x5" 1395 | type: "ReLU" 1396 | bottom: "inception_4d/5x5" 1397 | top: "inception_4d/5x5" 1398 | } 1399 | layer { 1400 | name: "inception_4d/pool" 1401 | type: "Pooling" 1402 | bottom: "inception_4c/output" 1403 | top: "inception_4d/pool" 1404 | pooling_param { 1405 | pool: MAX 1406 | kernel_size: 3 1407 | stride: 1 1408 | pad: 1 1409 | } 1410 | } 1411 | layer { 1412 | name: "inception_4d/pool_proj" 1413 | type: "Convolution" 1414 | bottom: "inception_4d/pool" 1415 | top: "inception_4d/pool_proj" 1416 | param { 1417 | lr_mult: 1.0 1418 | decay_mult: 1.0 1419 | } 1420 | param { 1421 | lr_mult: 2.0 1422 | decay_mult: 0.0 1423 | } 1424 | convolution_param { 1425 | num_output: 64 1426 | kernel_size: 1 1427 | weight_filler { 1428 | type: "xavier" 1429 | std: 0.1 1430 | } 1431 | bias_filler { 1432 | type: "constant" 1433 | value: 0.2 1434 | } 1435 | } 1436 | } 1437 | layer { 1438 | name: "inception_4d/relu_pool_proj" 1439 | type: "ReLU" 1440 | bottom: "inception_4d/pool_proj" 1441 | top: "inception_4d/pool_proj" 1442 | } 1443 | layer { 1444 | name: "inception_4d/output" 1445 | type: "Concat" 1446 | bottom: "inception_4d/1x1" 1447 | bottom: "inception_4d/3x3" 1448 | bottom: "inception_4d/5x5" 1449 | bottom: "inception_4d/pool_proj" 1450 | top: "inception_4d/output" 1451 | } 1452 | layer { 1453 | name: "inception_4e/1x1" 1454 | type: "Convolution" 1455 | bottom: "inception_4d/output" 1456 | top: "inception_4e/1x1" 1457 | param { 1458 | lr_mult: 1.0 1459 | decay_mult: 1.0 1460 | } 1461 | param { 1462 | lr_mult: 2.0 1463 | decay_mult: 0.0 1464 | } 1465 | convolution_param { 1466 | num_output: 256 1467 | kernel_size: 1 1468 | weight_filler { 1469 | type: "xavier" 1470 | std: 0.03 1471 | } 1472 | bias_filler { 1473 | type: "constant" 1474 | value: 0.2 1475 | } 1476 | } 1477 | } 1478 | layer { 1479 | name: "inception_4e/relu_1x1" 1480 | type: "ReLU" 1481 | bottom: "inception_4e/1x1" 1482 | top: "inception_4e/1x1" 1483 | } 1484 | layer { 1485 | name: "inception_4e/3x3_reduce" 1486 | type: "Convolution" 1487 | bottom: "inception_4d/output" 1488 | top: "inception_4e/3x3_reduce" 1489 | param { 1490 | lr_mult: 1.0 1491 | decay_mult: 1.0 1492 | } 1493 | param { 1494 | lr_mult: 2.0 1495 | decay_mult: 0.0 1496 | } 1497 | convolution_param { 1498 | num_output: 160 1499 | kernel_size: 1 1500 | weight_filler { 1501 | type: "xavier" 1502 | std: 0.09 1503 | } 1504 | bias_filler { 1505 | type: "constant" 1506 | value: 0.2 1507 | } 1508 | } 1509 | } 1510 | layer { 1511 | name: "inception_4e/relu_3x3_reduce" 1512 | type: "ReLU" 1513 | bottom: "inception_4e/3x3_reduce" 1514 | top: "inception_4e/3x3_reduce" 1515 | } 1516 | layer { 1517 | name: "inception_4e/3x3" 1518 | type: "Convolution" 1519 | bottom: "inception_4e/3x3_reduce" 1520 | top: "inception_4e/3x3" 1521 | param { 1522 | lr_mult: 1.0 1523 | decay_mult: 1.0 1524 | } 1525 | param { 1526 | lr_mult: 2.0 1527 | decay_mult: 0.0 1528 | } 1529 | convolution_param { 1530 | num_output: 320 1531 | pad: 1 1532 | kernel_size: 3 1533 | weight_filler { 1534 | type: "xavier" 1535 | std: 0.03 1536 | } 1537 | bias_filler { 1538 | type: "constant" 1539 | value: 0.2 1540 | } 1541 | } 1542 | } 1543 | layer { 1544 | name: "inception_4e/relu_3x3" 1545 | type: "ReLU" 1546 | bottom: "inception_4e/3x3" 1547 | top: "inception_4e/3x3" 1548 | } 1549 | layer { 1550 | name: "inception_4e/5x5_reduce" 1551 | type: "Convolution" 1552 | bottom: "inception_4d/output" 1553 | top: "inception_4e/5x5_reduce" 1554 | param { 1555 | lr_mult: 1.0 1556 | decay_mult: 1.0 1557 | } 1558 | param { 1559 | lr_mult: 2.0 1560 | decay_mult: 0.0 1561 | } 1562 | convolution_param { 1563 | num_output: 32 1564 | kernel_size: 1 1565 | weight_filler { 1566 | type: "xavier" 1567 | std: 0.2 1568 | } 1569 | bias_filler { 1570 | type: "constant" 1571 | value: 0.2 1572 | } 1573 | } 1574 | } 1575 | layer { 1576 | name: "inception_4e/relu_5x5_reduce" 1577 | type: "ReLU" 1578 | bottom: "inception_4e/5x5_reduce" 1579 | top: "inception_4e/5x5_reduce" 1580 | } 1581 | layer { 1582 | name: "inception_4e/5x5" 1583 | type: "Convolution" 1584 | bottom: "inception_4e/5x5_reduce" 1585 | top: "inception_4e/5x5" 1586 | param { 1587 | lr_mult: 1.0 1588 | decay_mult: 1.0 1589 | } 1590 | param { 1591 | lr_mult: 2.0 1592 | decay_mult: 0.0 1593 | } 1594 | convolution_param { 1595 | num_output: 128 1596 | pad: 2 1597 | kernel_size: 5 1598 | weight_filler { 1599 | type: "xavier" 1600 | std: 0.03 1601 | } 1602 | bias_filler { 1603 | type: "constant" 1604 | value: 0.2 1605 | } 1606 | } 1607 | } 1608 | layer { 1609 | name: "inception_4e/relu_5x5" 1610 | type: "ReLU" 1611 | bottom: "inception_4e/5x5" 1612 | top: "inception_4e/5x5" 1613 | } 1614 | layer { 1615 | name: "inception_4e/pool" 1616 | type: "Pooling" 1617 | bottom: "inception_4d/output" 1618 | top: "inception_4e/pool" 1619 | pooling_param { 1620 | pool: MAX 1621 | kernel_size: 3 1622 | stride: 1 1623 | pad: 1 1624 | } 1625 | } 1626 | layer { 1627 | name: "inception_4e/pool_proj" 1628 | type: "Convolution" 1629 | bottom: "inception_4e/pool" 1630 | top: "inception_4e/pool_proj" 1631 | param { 1632 | lr_mult: 1.0 1633 | decay_mult: 1.0 1634 | } 1635 | param { 1636 | lr_mult: 2.0 1637 | decay_mult: 0.0 1638 | } 1639 | convolution_param { 1640 | num_output: 128 1641 | kernel_size: 1 1642 | weight_filler { 1643 | type: "xavier" 1644 | std: 0.1 1645 | } 1646 | bias_filler { 1647 | type: "constant" 1648 | value: 0.2 1649 | } 1650 | } 1651 | } 1652 | layer { 1653 | name: "inception_4e/relu_pool_proj" 1654 | type: "ReLU" 1655 | bottom: "inception_4e/pool_proj" 1656 | top: "inception_4e/pool_proj" 1657 | } 1658 | layer { 1659 | name: "inception_4e/output" 1660 | type: "Concat" 1661 | bottom: "inception_4e/1x1" 1662 | bottom: "inception_4e/3x3" 1663 | bottom: "inception_4e/5x5" 1664 | bottom: "inception_4e/pool_proj" 1665 | top: "inception_4e/output" 1666 | } 1667 | layer { 1668 | name: "pool4/3x3_s2" 1669 | type: "Pooling" 1670 | bottom: "inception_4e/output" 1671 | top: "pool4/3x3_s2" 1672 | pooling_param { 1673 | pool: MAX 1674 | kernel_size: 3 1675 | stride: 2 1676 | } 1677 | } 1678 | layer { 1679 | name: "inception_5a/1x1" 1680 | type: "Convolution" 1681 | bottom: "pool4/3x3_s2" 1682 | top: "inception_5a/1x1" 1683 | param { 1684 | lr_mult: 1.0 1685 | decay_mult: 1.0 1686 | } 1687 | param { 1688 | lr_mult: 2.0 1689 | decay_mult: 0.0 1690 | } 1691 | convolution_param { 1692 | num_output: 256 1693 | kernel_size: 1 1694 | weight_filler { 1695 | type: "xavier" 1696 | std: 0.03 1697 | } 1698 | bias_filler { 1699 | type: "constant" 1700 | value: 0.2 1701 | } 1702 | } 1703 | } 1704 | layer { 1705 | name: "inception_5a/relu_1x1" 1706 | type: "ReLU" 1707 | bottom: "inception_5a/1x1" 1708 | top: "inception_5a/1x1" 1709 | } 1710 | layer { 1711 | name: "inception_5a/3x3_reduce" 1712 | type: "Convolution" 1713 | bottom: "pool4/3x3_s2" 1714 | top: "inception_5a/3x3_reduce" 1715 | param { 1716 | lr_mult: 1.0 1717 | decay_mult: 1.0 1718 | } 1719 | param { 1720 | lr_mult: 2.0 1721 | decay_mult: 0.0 1722 | } 1723 | convolution_param { 1724 | num_output: 160 1725 | kernel_size: 1 1726 | weight_filler { 1727 | type: "xavier" 1728 | std: 0.09 1729 | } 1730 | bias_filler { 1731 | type: "constant" 1732 | value: 0.2 1733 | } 1734 | } 1735 | } 1736 | layer { 1737 | name: "inception_5a/relu_3x3_reduce" 1738 | type: "ReLU" 1739 | bottom: "inception_5a/3x3_reduce" 1740 | top: "inception_5a/3x3_reduce" 1741 | } 1742 | layer { 1743 | name: "inception_5a/3x3" 1744 | type: "Convolution" 1745 | bottom: "inception_5a/3x3_reduce" 1746 | top: "inception_5a/3x3" 1747 | param { 1748 | lr_mult: 1.0 1749 | decay_mult: 1.0 1750 | } 1751 | param { 1752 | lr_mult: 2.0 1753 | decay_mult: 0.0 1754 | } 1755 | convolution_param { 1756 | num_output: 320 1757 | pad: 1 1758 | kernel_size: 3 1759 | weight_filler { 1760 | type: "xavier" 1761 | std: 0.03 1762 | } 1763 | bias_filler { 1764 | type: "constant" 1765 | value: 0.2 1766 | } 1767 | } 1768 | } 1769 | layer { 1770 | name: "inception_5a/relu_3x3" 1771 | type: "ReLU" 1772 | bottom: "inception_5a/3x3" 1773 | top: "inception_5a/3x3" 1774 | } 1775 | layer { 1776 | name: "inception_5a/5x5_reduce" 1777 | type: "Convolution" 1778 | bottom: "pool4/3x3_s2" 1779 | top: "inception_5a/5x5_reduce" 1780 | param { 1781 | lr_mult: 1.0 1782 | decay_mult: 1.0 1783 | } 1784 | param { 1785 | lr_mult: 2.0 1786 | decay_mult: 0.0 1787 | } 1788 | convolution_param { 1789 | num_output: 32 1790 | kernel_size: 1 1791 | weight_filler { 1792 | type: "xavier" 1793 | std: 0.2 1794 | } 1795 | bias_filler { 1796 | type: "constant" 1797 | value: 0.2 1798 | } 1799 | } 1800 | } 1801 | layer { 1802 | name: "inception_5a/relu_5x5_reduce" 1803 | type: "ReLU" 1804 | bottom: "inception_5a/5x5_reduce" 1805 | top: "inception_5a/5x5_reduce" 1806 | } 1807 | layer { 1808 | name: "inception_5a/5x5" 1809 | type: "Convolution" 1810 | bottom: "inception_5a/5x5_reduce" 1811 | top: "inception_5a/5x5" 1812 | param { 1813 | lr_mult: 1.0 1814 | decay_mult: 1.0 1815 | } 1816 | param { 1817 | lr_mult: 2.0 1818 | decay_mult: 0.0 1819 | } 1820 | convolution_param { 1821 | num_output: 128 1822 | pad: 2 1823 | kernel_size: 5 1824 | weight_filler { 1825 | type: "xavier" 1826 | std: 0.03 1827 | } 1828 | bias_filler { 1829 | type: "constant" 1830 | value: 0.2 1831 | } 1832 | } 1833 | } 1834 | layer { 1835 | name: "inception_5a/relu_5x5" 1836 | type: "ReLU" 1837 | bottom: "inception_5a/5x5" 1838 | top: "inception_5a/5x5" 1839 | } 1840 | layer { 1841 | name: "inception_5a/pool" 1842 | type: "Pooling" 1843 | bottom: "pool4/3x3_s2" 1844 | top: "inception_5a/pool" 1845 | pooling_param { 1846 | pool: MAX 1847 | kernel_size: 3 1848 | stride: 1 1849 | pad: 1 1850 | } 1851 | } 1852 | layer { 1853 | name: "inception_5a/pool_proj" 1854 | type: "Convolution" 1855 | bottom: "inception_5a/pool" 1856 | top: "inception_5a/pool_proj" 1857 | param { 1858 | lr_mult: 1.0 1859 | decay_mult: 1.0 1860 | } 1861 | param { 1862 | lr_mult: 2.0 1863 | decay_mult: 0.0 1864 | } 1865 | convolution_param { 1866 | num_output: 128 1867 | kernel_size: 1 1868 | weight_filler { 1869 | type: "xavier" 1870 | std: 0.1 1871 | } 1872 | bias_filler { 1873 | type: "constant" 1874 | value: 0.2 1875 | } 1876 | } 1877 | } 1878 | layer { 1879 | name: "inception_5a/relu_pool_proj" 1880 | type: "ReLU" 1881 | bottom: "inception_5a/pool_proj" 1882 | top: "inception_5a/pool_proj" 1883 | } 1884 | layer { 1885 | name: "inception_5a/output" 1886 | type: "Concat" 1887 | bottom: "inception_5a/1x1" 1888 | bottom: "inception_5a/3x3" 1889 | bottom: "inception_5a/5x5" 1890 | bottom: "inception_5a/pool_proj" 1891 | top: "inception_5a/output" 1892 | } 1893 | layer { 1894 | name: "inception_5b/1x1" 1895 | type: "Convolution" 1896 | bottom: "inception_5a/output" 1897 | top: "inception_5b/1x1" 1898 | param { 1899 | lr_mult: 1.0 1900 | decay_mult: 1.0 1901 | } 1902 | param { 1903 | lr_mult: 2.0 1904 | decay_mult: 0.0 1905 | } 1906 | convolution_param { 1907 | num_output: 384 1908 | kernel_size: 1 1909 | weight_filler { 1910 | type: "xavier" 1911 | std: 0.03 1912 | } 1913 | bias_filler { 1914 | type: "constant" 1915 | value: 0.2 1916 | } 1917 | } 1918 | } 1919 | layer { 1920 | name: "inception_5b/relu_1x1" 1921 | type: "ReLU" 1922 | bottom: "inception_5b/1x1" 1923 | top: "inception_5b/1x1" 1924 | } 1925 | layer { 1926 | name: "inception_5b/3x3_reduce" 1927 | type: "Convolution" 1928 | bottom: "inception_5a/output" 1929 | top: "inception_5b/3x3_reduce" 1930 | param { 1931 | lr_mult: 1.0 1932 | decay_mult: 1.0 1933 | } 1934 | param { 1935 | lr_mult: 2.0 1936 | decay_mult: 0.0 1937 | } 1938 | convolution_param { 1939 | num_output: 192 1940 | kernel_size: 1 1941 | weight_filler { 1942 | type: "xavier" 1943 | std: 0.09 1944 | } 1945 | bias_filler { 1946 | type: "constant" 1947 | value: 0.2 1948 | } 1949 | } 1950 | } 1951 | layer { 1952 | name: "inception_5b/relu_3x3_reduce" 1953 | type: "ReLU" 1954 | bottom: "inception_5b/3x3_reduce" 1955 | top: "inception_5b/3x3_reduce" 1956 | } 1957 | layer { 1958 | name: "inception_5b/3x3" 1959 | type: "Convolution" 1960 | bottom: "inception_5b/3x3_reduce" 1961 | top: "inception_5b/3x3" 1962 | param { 1963 | lr_mult: 1.0 1964 | decay_mult: 1.0 1965 | } 1966 | param { 1967 | lr_mult: 2.0 1968 | decay_mult: 0.0 1969 | } 1970 | convolution_param { 1971 | num_output: 384 1972 | pad: 1 1973 | kernel_size: 3 1974 | weight_filler { 1975 | type: "xavier" 1976 | std: 0.03 1977 | } 1978 | bias_filler { 1979 | type: "constant" 1980 | value: 0.2 1981 | } 1982 | } 1983 | } 1984 | layer { 1985 | name: "inception_5b/relu_3x3" 1986 | type: "ReLU" 1987 | bottom: "inception_5b/3x3" 1988 | top: "inception_5b/3x3" 1989 | } 1990 | layer { 1991 | name: "inception_5b/5x5_reduce" 1992 | type: "Convolution" 1993 | bottom: "inception_5a/output" 1994 | top: "inception_5b/5x5_reduce" 1995 | param { 1996 | lr_mult: 1.0 1997 | decay_mult: 1.0 1998 | } 1999 | param { 2000 | lr_mult: 2.0 2001 | decay_mult: 0.0 2002 | } 2003 | convolution_param { 2004 | num_output: 48 2005 | kernel_size: 1 2006 | weight_filler { 2007 | type: "xavier" 2008 | std: 0.2 2009 | } 2010 | bias_filler { 2011 | type: "constant" 2012 | value: 0.2 2013 | } 2014 | } 2015 | } 2016 | layer { 2017 | name: "inception_5b/relu_5x5_reduce" 2018 | type: "ReLU" 2019 | bottom: "inception_5b/5x5_reduce" 2020 | top: "inception_5b/5x5_reduce" 2021 | } 2022 | layer { 2023 | name: "inception_5b/5x5" 2024 | type: "Convolution" 2025 | bottom: "inception_5b/5x5_reduce" 2026 | top: "inception_5b/5x5" 2027 | param { 2028 | lr_mult: 1.0 2029 | decay_mult: 1.0 2030 | } 2031 | param { 2032 | lr_mult: 2.0 2033 | decay_mult: 0.0 2034 | } 2035 | convolution_param { 2036 | num_output: 128 2037 | pad: 2 2038 | kernel_size: 5 2039 | weight_filler { 2040 | type: "xavier" 2041 | std: 0.03 2042 | } 2043 | bias_filler { 2044 | type: "constant" 2045 | value: 0.2 2046 | } 2047 | } 2048 | } 2049 | layer { 2050 | name: "inception_5b/relu_5x5" 2051 | type: "ReLU" 2052 | bottom: "inception_5b/5x5" 2053 | top: "inception_5b/5x5" 2054 | } 2055 | layer { 2056 | name: "inception_5b/pool" 2057 | type: "Pooling" 2058 | bottom: "inception_5a/output" 2059 | top: "inception_5b/pool" 2060 | pooling_param { 2061 | pool: MAX 2062 | kernel_size: 3 2063 | stride: 1 2064 | pad: 1 2065 | } 2066 | } 2067 | layer { 2068 | name: "inception_5b/pool_proj" 2069 | type: "Convolution" 2070 | bottom: "inception_5b/pool" 2071 | top: "inception_5b/pool_proj" 2072 | param { 2073 | lr_mult: 1.0 2074 | decay_mult: 1.0 2075 | } 2076 | param { 2077 | lr_mult: 2.0 2078 | decay_mult: 0.0 2079 | } 2080 | convolution_param { 2081 | num_output: 128 2082 | kernel_size: 1 2083 | weight_filler { 2084 | type: "xavier" 2085 | std: 0.1 2086 | } 2087 | bias_filler { 2088 | type: "constant" 2089 | value: 0.2 2090 | } 2091 | } 2092 | } 2093 | layer { 2094 | name: "inception_5b/relu_pool_proj" 2095 | type: "ReLU" 2096 | bottom: "inception_5b/pool_proj" 2097 | top: "inception_5b/pool_proj" 2098 | } 2099 | layer { 2100 | name: "inception_5b/output" 2101 | type: "Concat" 2102 | bottom: "inception_5b/1x1" 2103 | bottom: "inception_5b/3x3" 2104 | bottom: "inception_5b/5x5" 2105 | bottom: "inception_5b/pool_proj" 2106 | top: "inception_5b/output" 2107 | } 2108 | layer { 2109 | name: "pool5/7x7_s1" 2110 | type: "Pooling" 2111 | bottom: "inception_5b/output" 2112 | top: "pool5/7x7_s1" 2113 | pooling_param { 2114 | pool: AVE 2115 | kernel_size: 7 2116 | stride: 1 2117 | } 2118 | } 2119 | layer { 2120 | name: "pool5/drop_7x7_s1" 2121 | type: "Dropout" 2122 | bottom: "pool5/7x7_s1" 2123 | top: "pool5/7x7_s1" 2124 | dropout_param { 2125 | dropout_ratio: 0.4 2126 | } 2127 | } 2128 | layer { 2129 | name: "loss3/classifier" 2130 | type: "InnerProduct" 2131 | bottom: "pool5/7x7_s1" 2132 | top: "loss3/classifier" 2133 | param { 2134 | lr_mult: 1.0 2135 | decay_mult: 1.0 2136 | } 2137 | param { 2138 | lr_mult: 2.0 2139 | decay_mult: 0.0 2140 | } 2141 | inner_product_param { 2142 | num_output: 205 2143 | weight_filler { 2144 | type: "xavier" 2145 | } 2146 | bias_filler { 2147 | type: "constant" 2148 | value: 0.0 2149 | } 2150 | } 2151 | } 2152 | layer { 2153 | name: "prob" 2154 | type: "Softmax" 2155 | bottom: "loss3/classifier" 2156 | top: "prob" 2157 | } 2158 | -------------------------------------------------------------------------------- /video/les_coleman.m4v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/burningion/deepdreamtutorial/067b810c210d9c0c692752515afeb3e06d9e6776/video/les_coleman.m4v -------------------------------------------------------------------------------- /video/runnitopticalflow.py: -------------------------------------------------------------------------------- 1 | # imports and basic notebook setup 2 | from cStringIO import StringIO 3 | import numpy as np 4 | import scipy.ndimage as nd 5 | import PIL.Image 6 | from IPython.display import clear_output, Image, display 7 | from google.protobuf import text_format 8 | 9 | import glob 10 | import cv2 # used for optical flow 11 | 12 | import caffe 13 | # uncomment below to do gpu based rendering... I don't have sufficient memory for now :( 14 | #caffe.set_device(0) 15 | caffe.set_mode_gpu() 16 | 17 | def showarray(a, fmt='jpeg'): 18 | a = np.uint8(np.clip(a, 0, 255)) 19 | f = StringIO() 20 | PIL.Image.fromarray(a).save(f, fmt) 21 | display(Image(data=f.getvalue())) 22 | 23 | model_path = '/Users/kirkkaiser/caffe/models/googlenet_places205/' # substitute your path here 24 | net_fn = model_path + 'deploy_places205.protxt' 25 | param_fn = model_path + 'googlelet_places205_train_iter_2400000.caffemodel' 26 | 27 | # Patching model to be able to compute gradients. 28 | # Note that you can also manually add "force_backward: true" line to "deploy.prototxt". 29 | model = caffe.io.caffe_pb2.NetParameter() 30 | text_format.Merge(open(net_fn).read(), model) 31 | model.force_backward = True 32 | open('tmp.prototxt', 'w').write(str(model)) 33 | 34 | net = caffe.Classifier('tmp.prototxt', param_fn, 35 | mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent 36 | channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB 37 | 38 | # a couple of utility functions for converting to and from Caffe's input image layout 39 | def preprocess(net, img): 40 | return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data'] 41 | def deprocess(net, img): 42 | return np.dstack((img + net.transformer.mean['data'])[::-1]) 43 | 44 | def objective_L2(dst): 45 | dst.diff[:] = dst.data 46 | 47 | def make_step(net, step_size=1.5, end='inception_4c/output', 48 | jitter=32, clip=True, objective=objective_L2): 49 | '''Basic gradient ascent step.''' 50 | 51 | src = net.blobs['data'] # input image is stored in Net's 'data' blob 52 | dst = net.blobs[end] 53 | 54 | ox, oy = np.random.randint(-jitter, jitter+1, 2) 55 | src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift 56 | 57 | net.forward(end=end) 58 | objective(dst) # specify the optimization objective 59 | net.backward(start=end) 60 | g = src.diff[0] 61 | # apply normalized ascent step to the input image 62 | src.data[:] += step_size/np.abs(g).mean() * g 63 | 64 | src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image 65 | 66 | if clip: 67 | bias = net.transformer.mean['data'] 68 | src.data[:] = np.clip(src.data, -bias, 255-bias) 69 | 70 | def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4, 71 | end='inception_4c/output', clip=True, **step_params): 72 | # prepare base images for all octaves 73 | octaves = [preprocess(net, base_img)] 74 | for i in xrange(octave_n-1): 75 | octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1)) 76 | 77 | src = net.blobs['data'] 78 | detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details 79 | for octave, octave_base in enumerate(octaves[::-1]): 80 | h, w = octave_base.shape[-2:] 81 | if octave > 0: 82 | # upscale details from the previous octave 83 | h1, w1 = detail.shape[-2:] 84 | detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1) 85 | 86 | src.reshape(1,3,h,w) # resize the network's input image size 87 | src.data[0] = octave_base+detail 88 | for i in xrange(iter_n): 89 | make_step(net, end=end, clip=clip, **step_params) 90 | 91 | # visualization 92 | vis = deprocess(net, src.data[0]) 93 | if not clip: # adjust image contrast if clipping is disabled 94 | vis = vis*(255.0/np.percentile(vis, 99.98)) 95 | showarray(vis) 96 | print octave, i, end, vis.shape 97 | clear_output(wait=True) 98 | 99 | # extract details produced on the current octave 100 | detail = src.data[0]-octave_base 101 | # returning the resulting image 102 | return deprocess(net, src.data[0]) 103 | 104 | 105 | guide = np.float32(PIL.Image.open('../swamp.jpg')) # flowers.jpg is 320x240 106 | end = 'inception_4b/5x5' 107 | h, w = guide.shape[:2] 108 | src, dst = net.blobs['data'], net.blobs[end] 109 | src.reshape(1,3,h,w) 110 | src.data[0] = preprocess(net, guide) 111 | net.forward(end=end) 112 | guide_features = dst.data[0].copy() 113 | 114 | def objective_guide(dst): 115 | x = dst.data[0].copy() 116 | y = guide_features 117 | ch = x.shape[0] 118 | x = x.reshape(ch,-1) 119 | y = y.reshape(ch,-1) 120 | A = x.T.dot(y) # compute the matrix of dot-products with guide features 121 | dst.diff[0].reshape(ch,-1)[:] = y[:,A.argmax(1)] # select ones that match best 122 | 123 | import timeit 124 | start_time = timeit.default_timer() 125 | 126 | img = np.float32(PIL.Image.open('output00001.jpg')) 127 | h, w, c = img.shape 128 | hallu = deepdream(net, img, end=end, objective=objective_guide) 129 | np.clip(hallu, 0, 255, out=hallu) 130 | PIL.Image.fromarray(np.uint8(hallu)).save('output/' + 'output00001.jpg') 131 | grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) 132 | num_files = 0 133 | 134 | for filename in glob.glob('*.jpg'): 135 | if num_files == 0: 136 | num_files += 1 137 | continue 138 | previousImg = img 139 | previousGrayImg = grayImg 140 | img = np.float32(PIL.Image.open(filename)) 141 | grayImg = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) 142 | flow = cv2.calcOpticalFlowFarneback(previousGrayImg, grayImg, pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.2, flags=0) 143 | flow = -flow 144 | flow[:,:,0] += np.arange(w) 145 | flow[:,:,1] += np.arange(h)[:,np.newaxis] 146 | halludiff = hallu - previousImg 147 | halludiff = cv2.remap(halludiff, flow, None, cv2.INTER_LINEAR) 148 | hallu = img + halludiff 149 | hallu = deepdream(net, hallu, end=end, objective=objective_guide) 150 | np.clip(hallu, 0, 255, out=hallu) 151 | PIL.Image.fromarray(np.uint8(hallu)).save('output/' + filename) 152 | print "Finished file number " + str(num_files) + ", out of " + str(len(glob.glob('*.jpg'))) + " files. " 153 | num_files += 1 154 | 155 | elapsed = timeit.default_timer() - start_time 156 | print "Ran through " + str(num_files) + " in " + str(elapsed) + "seconds. " + str(elapsed / num_files) + " seconds per image average." 157 | 158 | --------------------------------------------------------------------------------