├── image_analogy ├── __init__.py ├── losses │ ├── __init__.py │ ├── core.py │ ├── neural_style.py │ ├── nnf.py │ ├── analogy.py │ ├── mrf.py │ ├── patches.py │ └── patch_matcher.py ├── models │ ├── __init__.py │ ├── base.py │ ├── analogy.py │ └── nnf.py ├── img_utils.py ├── optimizer.py ├── vgg16.py ├── main.py └── argparser.py ├── examples ├── images │ ├── arch-A.jpg │ ├── arch-Ap.jpg │ ├── arch-B.jpg │ ├── season-xfer.jpg │ ├── sugarskull-A.jpg │ ├── sugarskull-B.jpg │ ├── season-xfer-A.jpg │ ├── season-xfer-Ap.jpg │ ├── season-xfer-B.jpg │ ├── sugarskull-Ap.jpg │ ├── sugarskull-analogy.jpg │ ├── trump-image-analogy.jpg │ ├── image-analogy-explanation.jpg │ └── ATTRIBUTIONS.md ├── winterize.sh ├── texturize.sh ├── render-mrf-sweep.sh ├── render-mrf-sweep-cpu.sh ├── texturize-cpu.sh ├── winterize-cpu.sh ├── sugar-skull.sh ├── sugar-skull-cpu.sh ├── render-example.sh ├── render-example-cpu.sh └── gif-style.sh ├── .gitignore ├── requirements.txt ├── setup.py ├── LICENSE.txt ├── scripts └── make_image_analogy.py └── README.md /image_analogy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /image_analogy/losses/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /image_analogy/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /examples/images/arch-A.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/arch-A.jpg -------------------------------------------------------------------------------- /examples/images/arch-Ap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/arch-Ap.jpg -------------------------------------------------------------------------------- /examples/images/arch-B.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/arch-B.jpg -------------------------------------------------------------------------------- /examples/images/season-xfer.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/season-xfer.jpg -------------------------------------------------------------------------------- /examples/images/sugarskull-A.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/sugarskull-A.jpg -------------------------------------------------------------------------------- /examples/images/sugarskull-B.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/sugarskull-B.jpg -------------------------------------------------------------------------------- /examples/images/season-xfer-A.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/season-xfer-A.jpg -------------------------------------------------------------------------------- /examples/images/season-xfer-Ap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/season-xfer-Ap.jpg -------------------------------------------------------------------------------- /examples/images/season-xfer-B.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/season-xfer-B.jpg -------------------------------------------------------------------------------- /examples/images/sugarskull-Ap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/sugarskull-Ap.jpg -------------------------------------------------------------------------------- /examples/images/sugarskull-analogy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/sugarskull-analogy.jpg -------------------------------------------------------------------------------- /examples/images/trump-image-analogy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/trump-image-analogy.jpg -------------------------------------------------------------------------------- /examples/images/image-analogy-explanation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awentzonline/image-analogies/HEAD/examples/images/image-analogy-explanation.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | *.swp 4 | *.egg-info 5 | build/ 6 | examples/out/* 7 | !examples/README.md 8 | vgg16_weights.h5 9 | venv 10 | MANIFEST 11 | dist/ 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.23.4 2 | h5py==2.5.0 3 | Keras==1.1.1 4 | numpy==1.10.4 5 | Pillow==3.1.1 6 | PyYAML==3.11 7 | scipy==0.17.0 8 | scikit-learn==0.17.0 9 | six==1.10.0 10 | -e git://github.com/Theano/Theano.git@954c3816a40de172c28124017a25387f3bf551b2#egg=Theano 11 | -------------------------------------------------------------------------------- /examples/images/ATTRIBUTIONS.md: -------------------------------------------------------------------------------- 1 | Image Attributions 2 | ================== 3 | 4 | arch 5 | ---- 6 | http://www.mrl.nyu.edu/projects/image-analogies/arch.html 7 | 8 | 9 | season-xfer 10 | ----------- 11 | A/A': mricon https://www.flickr.com/photos/mricon/ (CC BY-SA 2.0) 12 | B: mobinovyc https://pixabay.com/en/winter-snow-winter-forest-nature-1155941/ (CC 0) 13 | -------------------------------------------------------------------------------- /examples/winterize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET=$1 4 | OUTPUT_PREFIX=$2 5 | VGG_WEIGHTS=${3-vgg16_weights.h5} 6 | WIDTH=${4-512} 7 | 8 | make_image_analogy.py \ 9 | images/season-xfer-A.jpg images/season-xfer-Ap.jpg \ 10 | $TARGET out/$OUTPUT_PREFIX-winterized/$OUTPUT_PREFIX-Bp \ 11 | --analogy-layers='conv3_1,conv4_1' \ 12 | --scales=5 --contrast=0.1 \ 13 | --mode=brute --patch-size=3 \ 14 | --width=$WIDTH \ 15 | --vgg-weights=$VGG_WEIGHTS --output-full 16 | -------------------------------------------------------------------------------- /examples/texturize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET=$1 4 | OUTPUT_PREFIX=$2 5 | VGG_WEIGHTS=${3-vgg16_weights.h5} 6 | WIDTH=${4-512} 7 | 8 | echo "Making a texture (local coherence loss only)" 9 | make_image_analogy.py \ 10 | $TARGET $TARGET $TARGET \ 11 | out/$OUTPUT_PREFIX-texturized/$OUTPUT_PREFIX-Bp \ 12 | --analogy-layers='conv3_1,conv4_1' \ 13 | --scales=3 --analogy-w=0 \ 14 | --mode=brute --patch-size=3 \ 15 | --width=$WIDTH \ 16 | --vgg-weights=$VGG_WEIGHTS --output-full 17 | -------------------------------------------------------------------------------- /image_analogy/losses/core.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | 3 | 4 | def total_variation_loss(x, num_rows, num_cols): 5 | '''designed to keep the generated image locally coherent''' 6 | assert K.ndim(x) == 4 7 | a = K.square(x[:, :, 1:, :num_cols-1] - x[:, :, :num_rows-1, :num_cols-1]) 8 | b = K.square(x[:, :, :num_rows-1, 1:] - x[:, :, :num_rows-1, :num_cols-1]) 9 | return K.sum(K.pow(a + b, 1.25)) 10 | 11 | 12 | def content_loss(a, b): 13 | return K.sum(K.square(a - b)) 14 | -------------------------------------------------------------------------------- /examples/render-mrf-sweep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PREFIX=$1 4 | VGG_WEIGHTS=${2-vgg16_weights.h5} 5 | MRF_VALS=(0.0 0.5 1.0 1.5) 6 | HEIGHT=${3-320} 7 | 8 | for ((i=0; i < ${#MRF_VALS[@]}; i++)); do 9 | MRF_VAL=${MRF_VALS[i]} 10 | make_image_analogy.py \ 11 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 12 | images/$PREFIX-B.jpg out/$PREFIX-mrf-${MRF_VAL}/$PREFIX-Bp \ 13 | --mrf-w=${MRF_VAL} --patch-size=3 --height=$HEIGHT \ 14 | --model=brute \ 15 | --vgg-weights=$VGG_WEIGHTS --output-full 16 | done 17 | -------------------------------------------------------------------------------- /examples/render-mrf-sweep-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PREFIX=$1 4 | VGG_WEIGHTS=${2-vgg16_weights.h5} 5 | MRF_VALS=(0.0 0.5 1.0 1.5) 6 | HEIGHT=${3-320} 7 | 8 | for ((i=0; i < ${#MRF_VALS[@]}; i++)); do 9 | MRF_VAL=${MRF_VALS[i]} 10 | make_image_analogy.py \ 11 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 12 | images/$PREFIX-B.jpg out/$PREFIX-mrf-${MRF_VAL}-cpu/$PREFIX-Bp \ 13 | --mrf-w=${MRF_VAL} --patch-size=3 --height=$HEIGHT \ 14 | --model=patchmatch \ 15 | --vgg-weights=$VGG_WEIGHTS --output-full 16 | done 17 | -------------------------------------------------------------------------------- /examples/texturize-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET=$1 4 | OUTPUT_PREFIX=$2 5 | VGG_WEIGHTS=${3-vgg16_weights.h5} 6 | WIDTH=${4-512} 7 | PATCH_SIZE=${5-3} # try 1 for faster but less interesting patterns 8 | 9 | echo "Making a texture (local coherence loss only)" 10 | make_image_analogy.py \ 11 | $TARGET $TARGET $TARGET\ 12 | out/$OUTPUT_PREFIX-texturized-cpu/$OUTPUT_PREFIX-Bp \ 13 | --analogy-layers='conv3_1,conv4_1' \ 14 | --scales=3 --analogy-w=0 \ 15 | --mode=patchmatch --patch-size=$PATCH_SIZE \ 16 | --width=$WIDTH \ 17 | --vgg-weights=$VGG_WEIGHTS --output-full 18 | -------------------------------------------------------------------------------- /examples/winterize-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | TARGET=$1 4 | OUTPUT_PREFIX=$2 5 | VGG_WEIGHTS=${3-vgg16_weights.h5} 6 | HEIGHT=${4-512} 7 | PATCH_SIZE=1 # try 3 for better-looking but slower-rendering results 8 | 9 | make_image_analogy.py \ 10 | images/season-xfer-A.jpg images/season-xfer-Ap.jpg \ 11 | $TARGET out/$OUTPUT_PREFIX-winterized-cpu/$OUTPUT_PREFIX-Bp \ 12 | --analogy-layers='conv1_1,conv2_1,conv3_1,conv4_1' \ 13 | --scales=5 --contrast=0.1 \ 14 | --model=patchmatch --patch-size=$PATCH_SIZE \ 15 | --height=$HEIGHT \ 16 | --vgg-weights=$VGG_WEIGHTS --output-full 17 | -------------------------------------------------------------------------------- /examples/sugar-skull.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE_B=$1 4 | PREFIX=$2 5 | VGG_WEIGHTS=${3-vgg16_weights.h5} 6 | HEIGHT=512 7 | PATCH_SIZE=3 # try 1 for less interesting, but faster-rendering effects 8 | SKULL_IMAGE_A=images/sugarskull-A.jpg 9 | SKULL_IMAGE_AP=images/sugarskull-Ap.jpg 10 | 11 | echo "Make a sugar skull" 12 | make_image_analogy.py \ 13 | $SKULL_IMAGE_A $SKULL_IMAGE_AP \ 14 | $IMAGE_B \ 15 | out/$PREFIX-sugarskull/$PREFIX-Bp \ 16 | --height=$HEIGHT \ 17 | --mrf-w=1.5 \ 18 | --a-scale-mode=match \ 19 | --model=brute --patch-size=$PATCH_SIZE \ 20 | --contrast=1 \ 21 | --vgg-weights=$VGG_WEIGHTS --output-full 22 | -------------------------------------------------------------------------------- /examples/sugar-skull-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE_B=$1 4 | PREFIX=$2 5 | VGG_WEIGHTS=${3-vgg16_weights.h5} 6 | HEIGHT=512 7 | PATCH_SIZE=3 # try 3 for more interesting, but slow-rendering effects 8 | SKULL_IMAGE_A=images/sugarskull-A.jpg 9 | SKULL_IMAGE_AP=images/sugarskull-Ap.jpg 10 | 11 | echo "Make a sugar skull (CPU)" 12 | make_image_analogy.py \ 13 | $SKULL_IMAGE_A $SKULL_IMAGE_AP \ 14 | $IMAGE_B \ 15 | out/$PREFIX-sugarskull-cpu/$PREFIX-Bp \ 16 | --height=$HEIGHT \ 17 | --mrf-w=1.5 \ 18 | --a-scale-mode=match \ 19 | --model=patchmatch --patch-size=$PATCH_SIZE \ 20 | --contrast=1 \ 21 | --vgg-weights=$VGG_WEIGHTS --output-full 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from distutils.core import setup 3 | from setuptools import find_packages 4 | 5 | setup( 6 | name='neural-image-analogies', 7 | version='0.1.2', 8 | description='Generate image analogies with a deep neural network.', 9 | author='Adam Wentz', 10 | author_email='adam@adamwentz.com', 11 | url='https://github.com/awentzonline/image-analogies/', 12 | packages=find_packages(), 13 | scripts=[ 14 | 'scripts/make_image_analogy.py' 15 | ], 16 | install_requires=[ 17 | 'h5py>=2.5.0', 18 | 'Keras>=1.0.0', 19 | 'numpy>=1.10.4', 20 | 'Pillow>=3.1.1', 21 | 'PyYAML>=3.11', 22 | 'scipy>=0.17.0', 23 | 'scikit-learn>=0.17.0', 24 | 'six>=1.10.0', 25 | 'Theano>=0.8.2', 26 | ] 27 | ) 28 | -------------------------------------------------------------------------------- /image_analogy/losses/neural_style.py: -------------------------------------------------------------------------------- 1 | '''This is from the keras neural style example.''' 2 | from keras import backend as K 3 | 4 | 5 | # the gram matrix of an image tensor (feature-wise outer product) 6 | def gram_matrix(x): 7 | assert K.ndim(x) == 3 8 | features = K.batch_flatten(x) 9 | gram = K.dot(features, K.transpose(features)) 10 | return gram 11 | 12 | 13 | # the "style loss" is designed to maintain 14 | # the style of the reference image in the generated image. 15 | # It is based on the gram matrices (which capture style) of 16 | # feature maps from the style reference image 17 | # and from the generated image 18 | def neural_style_loss(style, combination, num_channels, img_width, img_height): 19 | assert K.ndim(style) == 3 20 | assert K.ndim(combination) == 3 21 | S = gram_matrix(style) 22 | C = gram_matrix(combination) 23 | size = img_width * img_height 24 | return K.sum(K.square(S - C)) / (4. * (num_channels ** 2) * (size ** 2)) 25 | -------------------------------------------------------------------------------- /image_analogy/img_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.misc import imread, imresize 3 | 4 | from . import vgg16 5 | 6 | 7 | # util function to open, resize and format pictures into appropriate tensors 8 | def load_image(image_path): 9 | return imread(image_path)# , mode='RGB') # NOTE: this mode kwarg requires v0.17 10 | 11 | 12 | # util function to open, resize and format pictures into appropriate tensors 13 | def preprocess_image(x, img_width, img_height): 14 | img = imresize(x, (img_height, img_width), interp='bicubic').astype(np.float32) 15 | img = vgg16.img_to_vgg(img) 16 | img = np.expand_dims(img, axis=0) 17 | return img 18 | 19 | 20 | # util function to convert a tensor into a valid image 21 | def deprocess_image(x, contrast_percent=0.0, resize=None): 22 | x = vgg16.img_from_vgg(x) 23 | if contrast_percent: 24 | min_x, max_x = np.percentile(x, (contrast_percent, 100 - contrast_percent)) 25 | x = (x - min_x) * 255.0 / (max_x - min_x) 26 | x = np.clip(x, 0, 255) 27 | if resize: 28 | x = imresize(x, resize, interp='bicubic') 29 | return x.astype('uint8') 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 Adam Wentz 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of 5 | this software and associated documentation files (the "Software"), to deal in the 6 | Software without restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 8 | Software, and to permit persons to whom the Software is furnished to do so, 9 | subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 19 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /scripts/make_image_analogy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | '''Neural Image Analogies with Keras 3 | 4 | Before running this script, download the weights for the convolutional layers of 5 | the VGG16 model at: 6 | https://github.com/awentzonline/image-analogies/releases/download/v0.0.5/vgg16_weights.h5 7 | (source: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3) 8 | and make sure the parameter `vgg_weights` matches the location of the file. 9 | ''' 10 | import time 11 | 12 | import image_analogy.argparser 13 | import image_analogy.main 14 | 15 | 16 | if __name__ == '__main__': 17 | args = image_analogy.argparser.parse_args() 18 | if args: 19 | if args.match_model == 'patchmatch': 20 | print('Using PatchMatch model') 21 | from image_analogy.models.nnf import NNFModel as model_class 22 | else: 23 | print('Using brute-force model') 24 | from image_analogy.models.analogy import AnalogyModel as model_class 25 | start_time = time.time() 26 | try: 27 | image_analogy.main.main(args, model_class) 28 | except KeyboardInterrupt: 29 | print('Shutting down...') 30 | print('Done after {:.2f} seconds'.format(time.time() - start_time)) 31 | -------------------------------------------------------------------------------- /image_analogy/losses/nnf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | 4 | from .core import content_loss 5 | from .patch_matcher import PatchMatcher 6 | 7 | 8 | def nnf_analogy_loss(a, a_prime, b, b_prime, num_steps=5, jump_size=1.0, patch_size=1, patch_stride=1): 9 | '''image shapes: (channels, rows, cols) 10 | ''' 11 | bs = b.shape 12 | matcher = PatchMatcher((bs[2], bs[1], bs[0]), a, jump_size=jump_size, patch_size=patch_size, patch_stride=patch_stride) 13 | b_patches = matcher.get_patches_for(b) 14 | b_normed = matcher.normalize_patches(b_patches) 15 | for i in range(num_steps): 16 | matcher.update_with_patches(b_normed, reverse_propagation=bool(i % 2)) 17 | target = matcher.get_reconstruction(combined=a_prime) 18 | loss = content_loss(target, b_prime) 19 | return loss 20 | 21 | 22 | class NNFState(object): 23 | def __init__(self, matcher, f_layer): 24 | self.matcher = matcher 25 | mis = matcher.input_shape 26 | self.placeholder = K.placeholder(mis[::-1]) 27 | self.f_layer = f_layer 28 | 29 | def update(self, x, num_steps=5): 30 | x_f = self.f_layer([x])[0] 31 | x_patches = self.matcher.get_patches_for(x_f[0]) 32 | x_normed = self.matcher.normalize_patches(x_patches) 33 | for i in range(num_steps): 34 | self.matcher.update_with_patches(x_normed, reverse_propagation=bool(i % 2)) 35 | -------------------------------------------------------------------------------- /examples/render-example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PREFIX=$1 4 | VGG_WEIGHTS=${2-vgg16_weights.h5} 5 | HEIGHT=512 6 | PATCH_SIZE=3 7 | 8 | echo "Only using analogy loss" 9 | make_image_analogy.py \ 10 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 11 | images/$PREFIX-B.jpg out/$PREFIX-analogy-only/$PREFIX-Bp \ 12 | --mrf-w=0 --height=$HEIGHT \ 13 | --model=brute --patch-size=$PATCH_SIZE \ 14 | --vgg-weights=$VGG_WEIGHTS --output-full 15 | 16 | echo "Stock output (analogy and local coherence loss)" 17 | make_image_analogy.py \ 18 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 19 | images/$PREFIX-B.jpg out/$PREFIX-blend/$PREFIX-Bp \ 20 | --height=$HEIGHT \ 21 | --model=brute --patch-size=$PATCH_SIZE \ 22 | --vgg-weights=$VGG_WEIGHTS --output-full 23 | 24 | echo "Style transfer (content loss and local coherence loss)" 25 | make_image_analogy.py \ 26 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 27 | images/$PREFIX-B.jpg out/$PREFIX-style-xfer/$PREFIX-Bp \ 28 | --analogy-w=0 --b-content-w=1 --mrf-w=1 --height=$HEIGHT \ 29 | --model=brute --patch-size=$PATCH_SIZE \ 30 | --vgg-weights=$VGG_WEIGHTS --output-full 31 | 32 | echo "Texture generator (local coherence only)" 33 | make_image_analogy.py \ 34 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 35 | images/$PREFIX-B.jpg out/$PREFIX-texture/$PREFIX-Bp \ 36 | --analogy-w=0 --height=$HEIGHT \ 37 | --model=brute --patch-size=$PATCH_SIZE \ 38 | --vgg-weights=$VGG_WEIGHTS --output-full 39 | -------------------------------------------------------------------------------- /examples/render-example-cpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | PREFIX=$1 4 | VGG_WEIGHTS=${2-vgg16_weights.h5} 5 | HEIGHT=512 6 | PATCH_SIZE=1 # try 3 for nicer-looking but slower-rendering output 7 | 8 | echo "Only using analogy loss" 9 | make_image_analogy.py \ 10 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 11 | images/$PREFIX-B.jpg out/$PREFIX-analogy-only-cpu/$PREFIX-Bp \ 12 | --mrf-w=0 --height=$HEIGHT \ 13 | --patch-size=$PATCH_SIZE \ 14 | --vgg-weights=$VGG_WEIGHTS --output-full 15 | 16 | echo "Stock output (analogy and local coherence loss)" 17 | make_image_analogy.py \ 18 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 19 | images/$PREFIX-B.jpg out/$PREFIX-blend-cpu/$PREFIX-Bp \ 20 | --height=$HEIGHT \ 21 | --patch-size=$PATCH_SIZE \ 22 | --vgg-weights=$VGG_WEIGHTS --output-full 23 | 24 | echo "Style transfer (content loss and local coherence loss)" 25 | make_image_analogy.py \ 26 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 27 | images/$PREFIX-B.jpg out/$PREFIX-style-xfer-cpu/$PREFIX-Bp \ 28 | --analogy-w=0 --b-content-w=1 --mrf-w=1 --height=$HEIGHT \ 29 | --patch-size=$PATCH_SIZE \ 30 | --vgg-weights=$VGG_WEIGHTS --output-full 31 | 32 | echo "Texture generator (local coherence only)" 33 | make_image_analogy.py \ 34 | images/$PREFIX-A.jpg images/$PREFIX-Ap.jpg \ 35 | images/$PREFIX-B.jpg out/$PREFIX-texture-cpu/$PREFIX-Bp \ 36 | --analogy-w=0 --height=$HEIGHT \ 37 | --patch-size=$PATCH_SIZE \ 38 | --vgg-weights=$VGG_WEIGHTS --output-full 39 | -------------------------------------------------------------------------------- /examples/gif-style.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Styles a gif by splitting out the frames and processing them individually. 3 | # Uses `convert` tool from ImageMagick http://www.imagemagick.org/script/binary-releases.php 4 | GIF=$1 5 | IMAGE_A=$2 6 | IMAGE_AP=$3 7 | PREFIX=$4 8 | VGG_WEIGHTS=${5-vgg16_weights.h5} 9 | WIDTH=256 10 | PATCH_SIZE=3 # try 1 for less interesting, but faster-rendering effects 11 | MODEL=brute 12 | CONTRAST=0.5 13 | MRFW=1.5 14 | CONTENTW=0 15 | ANALOGYW=1 16 | SCALES=1 17 | ITERATIONS=2 18 | FRAMES_PATH=$PREFIX/frames 19 | WORK_PATH=$PREFIX/work 20 | PROCESSED_PATH=$PREFIX/processed 21 | DELAY=5 22 | 23 | echo "Styling a gif." 24 | echo "Splitting $GIF..." 25 | mkdir -p $FRAMES_PATH 26 | mkdir -p $WORK_PATH 27 | mkdir -p $PROCESSED_PATH 28 | convert -alpha Remove -coalesce $GIF $FRAMES_PATH/%04d.png 29 | 30 | echo "Optimizing frames..." 31 | for frame in $FRAMES_PATH/*.png 32 | do 33 | echo "processing $frame" 34 | make_image_analogy.py \ 35 | $IMAGE_A $IMAGE_AP \ 36 | $frame \ 37 | $WORK_PATH/out \ 38 | --width=$WIDTH \ 39 | --mrf-w=$MRFW \ 40 | --a-scale-mode=match \ 41 | --b-content-w=$CONTENTW \ 42 | --analogy-w=$ANALOGYW \ 43 | --scales=$SCALES --min-scale=0.5 --iters=$ITERATIONS \ 44 | --model=$MODEL --patch-size=$PATCH_SIZE \ 45 | --contrast=$CONTRAST \ 46 | --vgg-weights=$VGG_WEIGHTS --output-full 47 | LAST_FILE=`ls -1 $WORK_PATH | tail -n 1` 48 | cp $WORK_PATH/$LAST_FILE $PROCESSED_PATH/$(basename $frame) 49 | done 50 | 51 | echo "Combining new frames..." 52 | convert -delay $DELAY -loop 0 $PROCESSED_PATH/*.png $PREFIX/result.gif 53 | -------------------------------------------------------------------------------- /image_analogy/losses/analogy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | 4 | from . import patches 5 | from .core import content_loss 6 | 7 | 8 | def find_analogy_patches(a, a_prime, b, patch_size=3, patch_stride=1): 9 | '''This is for precalculating the analogy_loss 10 | 11 | Since A, A', and B never change we only need to calculate the patch matches once. 12 | ''' 13 | # extract patches from feature maps 14 | a_patches, a_patches_norm = patches.make_patches(K.variable(a), patch_size, patch_stride) 15 | a_prime_patches, a_prime_patches_norm = patches.make_patches(K.variable(a_prime), patch_size, patch_stride) 16 | b_patches, b_patches_norm = patches.make_patches(K.variable(b), patch_size, patch_stride) 17 | # find best patches and calculate loss 18 | p = patches.find_patch_matches(b_patches, b_patches_norm, a_patches / a_patches_norm) 19 | #best_patches = a_prime_patches[p] 20 | best_patches = K.reshape(a_prime_patches[p], K.shape(b_patches)) 21 | f = K.function([], best_patches) 22 | best_patches = f([]) 23 | return best_patches 24 | 25 | 26 | def analogy_loss(a, a_prime, b, b_prime, patch_size=3, patch_stride=1, use_full_analogy=False): 27 | '''http://www.mrl.nyu.edu/projects/image-analogies/index.html''' 28 | best_a_prime_patches = find_analogy_patches(a, a_prime, b, patch_size=patch_size, patch_stride=patch_stride) 29 | if use_full_analogy: # combine all the patches into a single image 30 | b_prime_patches, _ = patches.make_patches(b_prime, patch_size, patch_stride) 31 | loss = content_loss(best_a_prime_patches, b_prime_patches) / patch_size ** 2 32 | else: 33 | bs = b.shape 34 | b_analogy = patches.combine_patches(best_a_prime_patches, (bs[1], bs[2], bs[0])) 35 | loss = content_loss(np.expand_dims(b_analogy, 0), b_prime) 36 | return loss 37 | -------------------------------------------------------------------------------- /image_analogy/optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras import backend as K 3 | from scipy.optimize import fmin_l_bfgs_b 4 | 5 | 6 | class Optimizer(object): 7 | '''The Optimizer optimizes models.''' 8 | def optimize(self, x, model): 9 | evaluator = ModelEvaluator(model) 10 | data_bounds = np.repeat( # from VGG - there's probaby a nicer way to express this... 11 | [(-103.939, 255. - 103.939, -116.779, 255.0 - 116.779, -123.68, 255 - 123.68)], 12 | np.product(x.shape) // 3, axis=0 13 | ).reshape((np.product(x.shape), 2)) 14 | x, min_val, info = fmin_l_bfgs_b( 15 | evaluator.loss, x.flatten(), 16 | fprime=evaluator.grads, maxfun=20, maxiter=20, 17 | factr=1e7, 18 | m=4, 19 | bounds=data_bounds, 20 | iprint=0) 21 | return x, min_val, info 22 | 23 | 24 | class ModelEvaluator(object): 25 | '''The ModelEvaluator class makes it possible to compute loss and gradients 26 | in one pass while retrieving them via two separate functions, "loss" and "grads". 27 | This is done because scipy.optimize requires separate functions for loss and 28 | gradients, but computing them separately would be inefficient. 29 | ''' 30 | def __init__(self, model): 31 | self.loss_value = None 32 | self.grads_values = None 33 | self.model = model 34 | 35 | def loss(self, x): 36 | assert self.loss_value is None 37 | loss_value, grad_values = self.model.eval_loss_and_grads(x) 38 | self.loss_value = loss_value 39 | self.grad_values = grad_values 40 | return self.loss_value 41 | 42 | def grads(self, x): 43 | assert self.loss_value is not None 44 | grad_values = np.copy(self.grad_values) 45 | self.loss_value = None 46 | self.grad_values = None 47 | return grad_values 48 | -------------------------------------------------------------------------------- /image_analogy/losses/mrf.py: -------------------------------------------------------------------------------- 1 | from keras import backend as K 2 | 3 | from . import patches 4 | 5 | 6 | def make_patches_grid(x, patch_size, patch_stride): 7 | '''Break image `x` up into a grid of patches. 8 | 9 | input shape: (channels, rows, cols) 10 | output shape: (rows, cols, channels, patch_rows, patch_cols) 11 | ''' 12 | from theano.tensor.nnet.neighbours import images2neibs # TODO: all K, no T 13 | x = K.expand_dims(x, 0) 14 | xs = K.shape(x) 15 | num_rows = 1 + (xs[-2] - patch_size) // patch_stride 16 | num_cols = 1 + (xs[-1] - patch_size) // patch_stride 17 | num_channels = xs[-3] 18 | patches = images2neibs(x, 19 | (patch_size, patch_size), (patch_stride, patch_stride), 20 | mode='valid') 21 | # neibs are sorted per-channel 22 | patches = K.reshape(patches, (num_channels, K.shape(patches)[0] // num_channels, patch_size, patch_size)) 23 | patches = K.permute_dimensions(patches, (1, 0, 2, 3)) 24 | # arrange in a 2d-grid (rows, cols, channels, px, py) 25 | patches = K.reshape(patches, (num_rows, num_cols, num_channels, patch_size, patch_size)) 26 | patches_norm = K.sqrt(K.sum(K.square(patches), axis=(2,3,4), keepdims=True)) 27 | return patches, patches_norm 28 | 29 | 30 | def mrf_loss(source, combination, patch_size=3, patch_stride=1): 31 | '''CNNMRF http://arxiv.org/pdf/1601.04589v1.pdf''' 32 | # extract patches from feature maps 33 | combination_patches, combination_patches_norm = patches.make_patches(combination, patch_size, patch_stride) 34 | source_patches, source_patches_norm = patches.make_patches(source, patch_size, patch_stride) 35 | # find best patches and calculate loss 36 | patch_ids = patches.find_patch_matches(combination_patches, combination_patches_norm, source_patches / source_patches_norm) 37 | best_source_patches = K.reshape(source_patches[patch_ids], K.shape(combination_patches)) 38 | loss = K.sum(K.square(best_source_patches - combination_patches)) / patch_size ** 2 39 | return loss 40 | -------------------------------------------------------------------------------- /image_analogy/losses/patches.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | 3 | import numpy as np 4 | from keras import backend as K 5 | from sklearn.feature_extraction.image import reconstruct_from_patches_2d 6 | 7 | 8 | def make_patches(x, patch_size, patch_stride): 9 | '''Break image `x` up into a bunch of patches.''' 10 | from theano.tensor.nnet.neighbours import images2neibs 11 | x = K.expand_dims(x, 0) 12 | patches = images2neibs(x, 13 | (patch_size, patch_size), (patch_stride, patch_stride), 14 | mode='valid') 15 | # neibs are sorted per-channel 16 | patches = K.reshape(patches, (K.shape(x)[1], K.shape(patches)[0] // K.shape(x)[1], patch_size, patch_size)) 17 | patches = K.permute_dimensions(patches, (1, 0, 2, 3)) 18 | patches_norm = K.sqrt(K.sum(K.square(patches), axis=(1,2,3), keepdims=True)) 19 | return patches, patches_norm 20 | 21 | 22 | def reconstruct_from_patches_2d(patches, image_size): 23 | '''This is from scikit-learn. I thought it was a little overkill 24 | to require it just for this function. 25 | ''' 26 | i_h, i_w = image_size[:2] 27 | p_h, p_w = patches.shape[1:3] 28 | img = np.zeros(image_size, dtype=np.float32) 29 | # compute the dimensions of the patches array 30 | n_h = i_h - p_h + 1 31 | n_w = i_w - p_w + 1 32 | for p, (i, j) in zip(patches, product(range(n_h), range(n_w))): 33 | img[i:i + p_h, j:j + p_w] += p 34 | 35 | for i in range(i_h): 36 | for j in range(i_w): 37 | # divide by the amount of overlap 38 | # XXX: is this the most efficient way? memory-wise yes, cpu wise? 39 | img[i, j] /= float(min(i + 1, p_h, i_h - i) * 40 | min(j + 1, p_w, i_w - j)) 41 | return img 42 | 43 | 44 | def combine_patches(patches, out_shape): 45 | '''Reconstruct an image from these `patches`''' 46 | patches = patches.transpose(0, 2, 3, 1) 47 | recon = reconstruct_from_patches_2d(patches, out_shape) 48 | return recon.transpose(2, 0, 1).astype(np.float32) 49 | 50 | 51 | def find_patch_matches(a, a_norm, b): 52 | '''For each patch in A, find the best matching patch in B''' 53 | convs = None 54 | if K.backend() == 'theano': 55 | # HACK: This was not being performed on the GPU for some reason. 56 | from theano.sandbox.cuda import dnn 57 | if dnn.dnn_available(): 58 | convs = dnn.dnn_conv( 59 | img=a, kerns=b[:, :, ::-1, ::-1], border_mode='valid') 60 | if convs is None: 61 | convs = K.conv2d(a, b[:, :, ::-1, ::-1], border_mode='valid') 62 | argmax = K.argmax(convs / a_norm, axis=1) 63 | return argmax 64 | -------------------------------------------------------------------------------- /image_analogy/models/base.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from keras import backend as K 5 | 6 | from image_analogy.losses.core import total_variation_loss 7 | 8 | 9 | class BaseModel(object): 10 | '''Model to be extended.''' 11 | def __init__(self, net, args): 12 | self.set_net(net) 13 | self.args = args 14 | 15 | def set_net(self, net): 16 | self.net = net 17 | self.net_input = net.layers[0].input 18 | self.layer_map = dict([(layer.name, layer) for layer in self.net.layers]) 19 | self._f_layer_outputs = {} 20 | 21 | def build(self, a_image, ap_image, b_image, output_shape): 22 | self.output_shape = output_shape 23 | loss = self.build_loss(a_image, ap_image, b_image) 24 | # get the gradients of the generated image wrt the loss 25 | grads = K.gradients(loss, self.net_input) 26 | outputs = [loss] 27 | if type(grads) in {list, tuple}: 28 | outputs += grads 29 | else: 30 | outputs.append(grads) 31 | self.f_outputs = K.function([self.net_input], outputs) 32 | 33 | def build_loss(self, a_image, ap_image, b_image): 34 | '''Create an expression for the loss as a function of the image inputs.''' 35 | loss = K.variable(0.0) 36 | # get the symbolic outputs of each "key" layer (we gave them unique names). 37 | loss += self.args.tv_weight * total_variation_loss(self.net_input, *b_image.shape[2:]) 38 | return loss 39 | 40 | def precompute_static_features(self, a_image, ap_image, b_image): 41 | # figure out which layers we need to extract 42 | a_layers, ap_layers, b_layers = set(), set(), set() 43 | if self.args.analogy_weight: 44 | for layerset in (a_layers, ap_layers, b_layers): 45 | layerset.update(self.args.analogy_layers) 46 | if self.args.mrf_weight: 47 | ap_layers.update(self.args.mrf_layers) 48 | if self.args.b_bp_content_weight: 49 | b_layers.update(self.args.b_content_layers) 50 | if self.args.neural_style_weight: 51 | ap_layers.update(self.args.neural_style_layers) 52 | # let's get those features 53 | all_a_features = self.get_features(a_image, a_layers) 54 | all_ap_image_features = self.get_features(ap_image, ap_layers) 55 | all_b_features = self.get_features(b_image, b_layers) 56 | return all_a_features, all_ap_image_features, all_b_features 57 | 58 | def get_features(self, x, layers): 59 | if not layers: 60 | return None 61 | f = K.function([self.net_input], [self.get_layer_output(layer_name) for layer_name in layers]) 62 | feature_outputs = f([x]) 63 | features = dict(zip(layers, feature_outputs)) 64 | return features 65 | 66 | def get_f_layer(self, layer_name): 67 | return K.function([self.net_input], [self.get_layer_output(layer_name)]) 68 | 69 | def get_layer_output(self, name): 70 | if not name in self._f_layer_outputs: 71 | layer = self.layer_map[name] 72 | self._f_layer_outputs[name] = layer.output 73 | return self._f_layer_outputs[name] 74 | 75 | def get_layer_output_shape(self, name): 76 | layer = self.layer_map[name] 77 | return layer.output_shape 78 | 79 | def eval_loss_and_grads(self, x): 80 | x = x.reshape(self.output_shape) 81 | outs = self.f_outputs([x]) 82 | loss_value = outs[0] 83 | if len(outs[1:]) == 1: 84 | grad_values = outs[1].flatten().astype('float64') 85 | else: 86 | grad_values = np.array(outs[1:]).flatten().astype('float64') 87 | return loss_value, grad_values 88 | -------------------------------------------------------------------------------- /image_analogy/models/analogy.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from keras import backend as K 5 | 6 | from image_analogy.losses.analogy import analogy_loss 7 | from image_analogy.losses.core import content_loss 8 | from image_analogy.losses.mrf import mrf_loss 9 | from image_analogy.losses.neural_style import neural_style_loss 10 | 11 | from .base import BaseModel 12 | 13 | 14 | class AnalogyModel(BaseModel): 15 | '''Model for image analogies.''' 16 | 17 | def build_loss(self, a_image, ap_image, b_image): 18 | '''Create an expression for the loss as a function of the image inputs.''' 19 | print('Building loss...') 20 | loss = super(AnalogyModel, self).build_loss(a_image, ap_image, b_image) 21 | # Precompute static features for performance 22 | print('Precomputing static features...') 23 | all_a_features, all_ap_image_features, all_b_features = self.precompute_static_features(a_image, ap_image, b_image) 24 | print('Building and combining losses...') 25 | if self.args.analogy_weight != 0.0: 26 | for layer_name in self.args.analogy_layers: 27 | a_features = all_a_features[layer_name][0] 28 | ap_image_features = all_ap_image_features[layer_name][0] 29 | b_features = all_b_features[layer_name][0] 30 | # current combined output 31 | layer_features = self.get_layer_output(layer_name) 32 | combination_features = layer_features[0, :, :, :] 33 | al = analogy_loss(a_features, ap_image_features, 34 | b_features, combination_features, 35 | use_full_analogy=self.args.use_full_analogy, 36 | patch_size=self.args.patch_size, 37 | patch_stride=self.args.patch_stride) 38 | loss += (self.args.analogy_weight / len(self.args.analogy_layers)) * al 39 | 40 | if self.args.mrf_weight != 0.0: 41 | for layer_name in self.args.mrf_layers: 42 | ap_image_features = K.variable(all_ap_image_features[layer_name][0]) 43 | layer_features = self.get_layer_output(layer_name) 44 | # current combined output 45 | combination_features = layer_features[0, :, :, :] 46 | sl = mrf_loss(ap_image_features, combination_features, 47 | patch_size=self.args.patch_size, 48 | patch_stride=self.args.patch_stride) 49 | loss += (self.args.mrf_weight / len(self.args.mrf_layers)) * sl 50 | 51 | if self.args.b_bp_content_weight != 0.0: 52 | for layer_name in self.args.b_content_layers: 53 | b_features = K.variable(all_b_features[layer_name][0]) 54 | # current combined output 55 | bp_features = self.get_layer_output(layer_name) 56 | cl = content_loss(bp_features, b_features) 57 | loss += self.args.b_bp_content_weight / len(self.args.b_content_layers) * cl 58 | 59 | if self.args.neural_style_weight != 0.0: 60 | for layer_name in self.args.neural_style_layers: 61 | ap_image_features = K.variable(all_ap_image_features[layer_name][0]) 62 | layer_features = self.get_layer_output(layer_name) 63 | layer_shape = self.get_layer_output_shape(layer_name) 64 | # current combined output 65 | combination_features = layer_features[0, :, :, :] 66 | nsl = neural_style_loss(ap_image_features, combination_features, 3, self.output_shape[-2], self.output_shape[-1]) 67 | loss += (self.args.neural_style_weight / len(self.args.neural_style_layers)) * nsl 68 | return loss 69 | -------------------------------------------------------------------------------- /image_analogy/vgg16.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import h5py 4 | import numpy as np 5 | from keras import backend as K 6 | from keras.layers.convolutional import ( 7 | AveragePooling2D, Convolution2D, MaxPooling2D, ZeroPadding2D) 8 | from keras.models import Sequential 9 | 10 | 11 | def img_from_vgg(x): 12 | '''Decondition an image from the VGG16 model.''' 13 | x = x.transpose((1, 2, 0)) 14 | x[:, :, 0] += 103.939 15 | x[:, :, 1] += 116.779 16 | x[:, :, 2] += 123.68 17 | x = x[:,:,::-1] # to RGB 18 | return x 19 | 20 | 21 | def img_to_vgg(x): 22 | '''Condition an image for use with the VGG16 model.''' 23 | x = x[:,:,::-1] # to BGR 24 | x[:, :, 0] -= 103.939 25 | x[:, :, 1] -= 116.779 26 | x[:, :, 2] -= 123.68 27 | x = x.transpose((2, 0, 1)) 28 | return x 29 | 30 | 31 | def get_model(img_width, img_height, weights_path='vgg16_weights.h5', pool_mode='avg'): 32 | assert pool_mode in ('avg', 'max'), '`pool_mode` must be "avg" or "max"' 33 | if pool_mode == 'avg': 34 | pool_class = AveragePooling2D 35 | else: 36 | pool_class = MaxPooling2D 37 | model = Sequential() 38 | model.add(ZeroPadding2D((1, 1), input_shape=(3, img_height, img_width))) 39 | model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) 40 | model.add(ZeroPadding2D((1, 1))) 41 | model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) 42 | model.add(pool_class((2, 2), strides=(2, 2))) 43 | 44 | model.add(ZeroPadding2D((1, 1))) 45 | model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) 46 | model.add(ZeroPadding2D((1, 1))) 47 | model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) 48 | model.add(pool_class((2, 2), strides=(2, 2))) 49 | 50 | model.add(ZeroPadding2D((1, 1))) 51 | model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) 52 | model.add(ZeroPadding2D((1, 1))) 53 | model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) 54 | model.add(ZeroPadding2D((1, 1))) 55 | model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) 56 | model.add(pool_class((2, 2), strides=(2, 2))) 57 | 58 | model.add(ZeroPadding2D((1, 1))) 59 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) 60 | model.add(ZeroPadding2D((1, 1))) 61 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) 62 | model.add(ZeroPadding2D((1, 1))) 63 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) 64 | model.add(pool_class((2, 2), strides=(2, 2))) 65 | 66 | model.add(ZeroPadding2D((1, 1))) 67 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) 68 | model.add(ZeroPadding2D((1, 1))) 69 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) 70 | model.add(ZeroPadding2D((1, 1))) 71 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) 72 | model.add(pool_class((2, 2), strides=(2, 2))) 73 | 74 | # load the weights of the VGG16 networks 75 | # (trained on ImageNet, won the ILSVRC competition in 2014) 76 | # note: when there is a complete match between your model definition 77 | # and your weight savefile, you can simply call model.load_weights(filename) 78 | assert os.path.exists(weights_path), 'Model weights not found (see "--vgg-weights" parameter).' 79 | f = h5py.File(weights_path) 80 | for k in range(f.attrs['nb_layers']): 81 | if k >= len(model.layers): 82 | # we don't look at the last (fully-connected) layers in the savefile 83 | break 84 | g = f['layer_{}'.format(k)] 85 | weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] 86 | layer = model.layers[k] 87 | if isinstance(layer, Convolution2D): 88 | weights[0] = np.array(weights[0])[:, :, ::-1, ::-1] 89 | layer.set_weights(weights) 90 | 91 | f.close() 92 | return model 93 | -------------------------------------------------------------------------------- /image_analogy/models/nnf.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import numpy as np 4 | from keras import backend as K 5 | 6 | from image_analogy.losses.core import content_loss 7 | from image_analogy.losses.nnf import nnf_analogy_loss, NNFState, PatchMatcher 8 | from image_analogy.losses.neural_style import neural_style_loss 9 | 10 | from .base import BaseModel 11 | 12 | 13 | class NNFModel(BaseModel): 14 | '''Faster model for image analogies.''' 15 | def build(self, a_image, ap_image, b_image, output_shape): 16 | self.output_shape = output_shape 17 | loss = self.build_loss(a_image, ap_image, b_image) 18 | # get the gradients of the generated image wrt the loss 19 | grads = K.gradients(loss, self.net_input) 20 | outputs = [loss] 21 | if type(grads) in {list, tuple}: 22 | outputs += grads 23 | else: 24 | outputs.append(grads) 25 | f_inputs = [self.net_input] 26 | for nnf in self.feature_nnfs: 27 | f_inputs.append(nnf.placeholder) 28 | self.f_outputs = K.function(f_inputs, outputs) 29 | 30 | def eval_loss_and_grads(self, x): 31 | x = x.reshape(self.output_shape) 32 | f_inputs = [x] 33 | # update the patch indexes 34 | # start_t = time.time() 35 | for nnf in self.feature_nnfs: 36 | nnf.update(x, num_steps=self.args.mrf_nnf_steps) 37 | new_target = nnf.matcher.get_reconstruction() 38 | f_inputs.append(new_target) 39 | # print('PatchMatch update in {:.2f} seconds'.format(time.time() - start_t)) 40 | # run it through 41 | outs = self.f_outputs(f_inputs) 42 | loss_value = outs[0] 43 | if len(outs[1:]) == 1: 44 | grad_values = outs[1].flatten().astype('float64') 45 | else: 46 | grad_values = np.array(outs[1:]).flatten().astype('float64') 47 | return loss_value, grad_values 48 | 49 | def build_loss(self, a_image, ap_image, b_image): 50 | '''Create an expression for the loss as a function of the image inputs.''' 51 | print('Building loss...') 52 | loss = super(NNFModel, self).build_loss(a_image, ap_image, b_image) 53 | # Precompute static features for performance 54 | print('Precomputing static features...') 55 | all_a_features, all_ap_image_features, all_b_features = self.precompute_static_features(a_image, ap_image, b_image) 56 | print('Building and combining losses...') 57 | if self.args.analogy_weight: 58 | for layer_name in self.args.analogy_layers: 59 | a_features = all_a_features[layer_name][0] 60 | ap_image_features = all_ap_image_features[layer_name][0] 61 | b_features = all_b_features[layer_name][0] 62 | # current combined output 63 | layer_features = self.get_layer_output(layer_name) 64 | combination_features = layer_features[0, :, :, :] 65 | al = nnf_analogy_loss( 66 | a_features, ap_image_features, b_features, combination_features, 67 | num_steps=self.args.analogy_nnf_steps, patch_size=self.args.patch_size, 68 | patch_stride=self.args.patch_stride, jump_size=1.0) 69 | loss += (self.args.analogy_weight / len(self.args.analogy_layers)) * al 70 | 71 | existing_feature_nnfs = getattr(self, 'feature_nnfs', [None] * len(self.args.mrf_layers)) 72 | self.feature_nnfs = [] 73 | if self.args.mrf_weight: 74 | for layer_name, existing_nnf in zip(self.args.mrf_layers, existing_feature_nnfs): 75 | ap_image_features = all_ap_image_features[layer_name][0] 76 | # current combined output 77 | layer_features = self.get_layer_output(layer_name) 78 | combination_features = layer_features[0, :, :, :] 79 | input_shape = self.get_layer_output_shape(layer_name) 80 | if existing_nnf and not self.args.randomize_mnf_nnf: 81 | matcher = existing_nnf.matcher.scale((input_shape[3], input_shape[2], input_shape[1]), ap_image_features) 82 | else: 83 | matcher = PatchMatcher( 84 | (input_shape[3], input_shape[2], input_shape[1]), ap_image_features, 85 | patch_size=self.args.patch_size, jump_size=1.0, patch_stride=self.args.patch_stride) 86 | nnf = NNFState(matcher, self.get_f_layer(layer_name)) 87 | self.feature_nnfs.append(nnf) 88 | sl = content_loss(combination_features, nnf.placeholder) 89 | loss += (self.args.mrf_weight / len(self.args.mrf_layers)) * sl 90 | 91 | if self.args.b_bp_content_weight: 92 | for layer_name in self.args.b_content_layers: 93 | b_features = K.variable(all_b_features[layer_name][0]) 94 | # current combined output 95 | bp_features = self.get_layer_output(layer_name) 96 | cl = content_loss(bp_features, b_features) 97 | loss += self.args.b_bp_content_weight / len(self.args.b_content_layers) * cl 98 | 99 | if self.args.neural_style_weight != 0.0: 100 | for layer_name in self.args.neural_style_layers: 101 | ap_image_features = K.variable(all_ap_image_features[layer_name][0]) 102 | layer_features = self.get_layer_output(layer_name) 103 | layer_shape = self.get_layer_output_shape(layer_name) 104 | # current combined output 105 | combination_features = layer_features[0, :, :, :] 106 | nsl = neural_style_loss(ap_image_features, combination_features, 3, self.output_shape[-2], self.output_shape[-1]) 107 | loss += (self.args.neural_style_weight / len(self.args.neural_style_layers)) * nsl 108 | 109 | return loss 110 | -------------------------------------------------------------------------------- /image_analogy/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import numpy as np 5 | import scipy.ndimage 6 | from keras import backend as K 7 | from scipy.misc import imsave 8 | 9 | from image_analogy import img_utils, vgg16 10 | from image_analogy.optimizer import Optimizer 11 | 12 | 13 | def main(args, model_class): 14 | '''The main loop which does the things.''' 15 | K.set_image_dim_ordering('th') 16 | # calculate scales 17 | if args.num_scales > 1: 18 | step_scale_factor = (1 - args.min_scale) / (args.num_scales - 1) 19 | else: 20 | step_scale_factor = 0.0 21 | args.min_scale = 1.0 22 | # prepare the input images 23 | full_ap_image = img_utils.load_image(args.ap_image_path) 24 | full_a_image = img_utils.load_image(args.a_image_path) 25 | full_b_image = img_utils.load_image(args.b_image_path) 26 | # calculate the output size 27 | full_img_width, full_img_height = calculate_image_dims(args, full_b_image) 28 | img_num_channels = 3 # TODO: allow alpha 29 | b_scale_ratio_width = float(full_b_image.shape[1]) / full_img_width 30 | b_scale_ratio_height = float(full_b_image.shape[0]) / full_img_height 31 | # ensure the output dir exists 32 | output_dir = os.path.dirname(args.result_prefix) 33 | if not os.path.exists(output_dir): 34 | os.makedirs(output_dir) 35 | # multi-scale loop 36 | x = None # this is going to hold our output image 37 | optimizer = Optimizer() 38 | for scale_i in range(args.num_scales): 39 | scale_factor = (scale_i * step_scale_factor) + args.min_scale 40 | # scale our inputs 41 | img_width = int(round(full_img_width * scale_factor)) 42 | img_height = int(round(full_img_height * scale_factor)) 43 | # prepare the current optimizer state 44 | if x is None: # we need to create an initial state 45 | x = np.random.uniform(0, 255, (img_height, img_width, 3)).astype(np.float32) 46 | x = vgg16.img_to_vgg(x) 47 | else: # resize the last state 48 | zoom_ratio = img_width / float(x.shape[-1]) 49 | x = scipy.ndimage.zoom(x, (1, zoom_ratio, zoom_ratio), order=1) 50 | img_height, img_width = x.shape[-2:] 51 | # determine scaling of "A" images 52 | if args.a_scale_mode == 'match': 53 | a_img_width = img_width 54 | a_img_height = img_height 55 | elif args.a_scale_mode == 'none': 56 | a_img_width = full_a_image.shape[1] * scale_factor 57 | a_img_height = full_a_image.shape[0] * scale_factor 58 | else: # should just be 'ratio' 59 | a_img_width = full_a_image.shape[1] * scale_factor * b_scale_ratio_width 60 | a_img_height = full_a_image.shape[0] * scale_factor * b_scale_ratio_height 61 | a_img_width = int(round(args.a_scale * a_img_width)) 62 | a_img_height = int(round(args.a_scale * a_img_height)) 63 | # prepare images for use 64 | a_image = img_utils.preprocess_image(full_a_image, a_img_width, a_img_height) 65 | ap_image = img_utils.preprocess_image(full_ap_image, a_img_width, a_img_height) 66 | b_image = img_utils.preprocess_image(full_b_image, img_width, img_height) 67 | print('Scale factor {} "A" shape {} "B" shape {}'.format(scale_factor, a_image.shape, b_image.shape)) 68 | # load up the net and create the model 69 | net = vgg16.get_model(img_width, img_height, weights_path=args.vgg_weights, pool_mode=args.pool_mode) 70 | model = model_class(net, args) 71 | model.build(a_image, ap_image, b_image, (1, img_num_channels, img_height, img_width)) 72 | 73 | for i in range(args.num_iterations_per_scale): 74 | print('Start of iteration {} x {}'.format(scale_i, i)) 75 | start_time = time.time() 76 | if args.color_jitter: 77 | color_jitter = (args.color_jitter * 2) * (np.random.random((3, img_height, img_width)) - 0.5) 78 | x += color_jitter 79 | if args.jitter: 80 | jitter = args.jitter * scale_factor 81 | ox, oy = np.random.randint(-jitter, jitter+1, 2) 82 | x = np.roll(np.roll(x, ox, -1), oy, -2) # apply jitter shift 83 | # actually run the optimizer 84 | x, min_val, info = optimizer.optimize(x, model) 85 | print('Current loss value: {}'.format(min_val)) 86 | # unjitter the image 87 | x = x.reshape((3, img_height, img_width)) 88 | if args.jitter: 89 | x = np.roll(np.roll(x, -ox, -1), -oy, -2) # unshift image 90 | if args.color_jitter: 91 | x -= color_jitter 92 | # save the image 93 | if args.output_full_size: 94 | out_resize_shape = (full_img_height, full_img_width) 95 | else: 96 | out_resize_shape = None 97 | img = img_utils.deprocess_image(np.copy(x), contrast_percent=args.contrast_percent,resize=out_resize_shape) 98 | fname = args.result_prefix + '_at_iteration_{}_{}.png'.format(scale_i, i) 99 | imsave(fname, img) 100 | end_time = time.time() 101 | print('Image saved as {}'.format(fname)) 102 | print('Iteration completed in {:.2f} seconds'.format(end_time - start_time,)) 103 | 104 | 105 | def calculate_image_dims(args, full_b_image): 106 | '''Determine the dimensions of the generated picture. 107 | 108 | Defaults to the size of Image B. 109 | ''' 110 | full_img_width = full_b_image.shape[1] 111 | full_img_height = full_b_image.shape[0] 112 | if args.out_width or args.out_height: 113 | if args.out_width and args.out_height: 114 | full_img_width = args.out_width 115 | full_img_height = args.out_height 116 | else: 117 | if args.out_width: 118 | full_img_height = int(round(args.out_width / float(full_img_width) * full_img_height)) 119 | full_img_width = args.out_width 120 | else: 121 | full_img_width = int(round(args.out_height / float(full_img_height) * full_img_width)) 122 | full_img_height = args.out_height 123 | return full_img_width, full_img_height 124 | -------------------------------------------------------------------------------- /image_analogy/argparser.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | from keras import backend as K 5 | 6 | 7 | VGG_ENV_VAR = 'VGG_WEIGHT_PATH' 8 | 9 | 10 | class CommaSplitAction(argparse.Action): 11 | '''Split n strip incoming string argument.''' 12 | def __call__(self, parser, namespace, values, option_string=None): 13 | setattr(namespace, self.dest, [v.strip() for v in values.split(',')]) 14 | 15 | 16 | def parse_args(): 17 | '''Parses command line arguments for the image analogy command.''' 18 | parser = argparse.ArgumentParser(description='Neural image analogies with Keras.') 19 | parser.add_argument('a_image_path', metavar='ref', type=str, 20 | help='Path to the reference image mask (A)') 21 | parser.add_argument('ap_image_path', metavar='base', type=str, 22 | help='Path to the source image (A\')') 23 | parser.add_argument('b_image_path', metavar='ref', type=str, 24 | help='Path to the new mask for generation (B)') 25 | parser.add_argument('result_prefix', metavar='res_prefix', type=str, 26 | help='Prefix for the saved results (B\')') 27 | # size-related 28 | parser.add_argument('--width', dest='out_width', type=int, 29 | default=0, help='Set output width') 30 | parser.add_argument('--height', dest='out_height', type=int, 31 | default=0, help='Set output height') 32 | parser.add_argument('--scales', dest='num_scales', type=int, 33 | default=3, help='Run at N different scales') 34 | parser.add_argument('--min-scale', dest='min_scale', type=float, 35 | default=0.25, help='Smallest scale to iterate') 36 | parser.add_argument('--a-scale-mode', dest='a_scale_mode', type=str, 37 | default='none', help='Method of scaling A and A\' relative to B') 38 | parser.add_argument('--a-scale', dest='a_scale', type=float, 39 | default=1.0, help='Additional scale factor for A and A\'') 40 | parser.add_argument('--output-full', dest='output_full_size', action='store_true', 41 | help='Output all intermediate images at full size regardless of current scale.') 42 | # optimizer 43 | parser.add_argument('--iters', dest='num_iterations_per_scale', type=int, 44 | default=5, help='Number of iterations per scale') 45 | parser.add_argument('--model', dest='match_model', type=str, 46 | default='patchmatch', help='Matching algorithm (patchmatch or brute)') 47 | parser.add_argument('--mrf-nnf-steps', dest='mrf_nnf_steps', type=int, 48 | default=5, help='Number of patchmatch updates per iteration for local coherence loss.') 49 | parser.add_argument('--randomize-mrf-nnf', dest='randomize_mnf_nnf', action='store_true', 50 | help='Randomize the local coherence similarity matrix at the start of a new scale instead of scaling it up.') 51 | parser.add_argument('--analogy-nnf-steps', dest='analogy_nnf_steps', type=int, 52 | default=15, help='Number of patchmatch updates for the analogy loss (done once per scale).') 53 | # loss 54 | parser.add_argument('--tv-w', dest='tv_weight', type=float, 55 | default=1.0, help='Weight for TV loss.') 56 | parser.add_argument('--analogy-w', dest='analogy_weight', type=float, 57 | default=1.0, help='Weight for analogy loss.') 58 | parser.add_argument('--analogy-layers', dest='analogy_layers', action=CommaSplitAction, 59 | default=['conv3_1', 'conv4_1'], 60 | help='Comma-separated list of layer names to be used for the analogy loss') 61 | parser.add_argument('--use-full-analogy', dest='use_full_analogy', action="store_true", 62 | help='Use the full set of analogy patches (slower/more memory but maybe more accurate)') 63 | parser.add_argument('--mrf-w', dest='mrf_weight', type=float, 64 | default=0.5, help='Weight for MRF loss between A\' and B\'') 65 | parser.add_argument('--mrf-layers', dest='mrf_layers', action=CommaSplitAction, 66 | default=['conv3_1', 'conv4_1'], 67 | help='Comma-separated list of layer names to be used for the MRF loss') 68 | parser.add_argument('--b-content-w', dest='b_bp_content_weight', type=float, 69 | default=0.0, help='Weight for content loss between B and B\'') 70 | parser.add_argument('--content-layers', dest='b_content_layers', action=CommaSplitAction, 71 | default=['conv3_1', 'conv4_1'], 72 | help='Comma-separated list of layer names to be used for the content loss') 73 | parser.add_argument('--nstyle-w', dest='neural_style_weight', type=float, 74 | default=0.0, help='Weight for neural style loss between A\' and B\'') 75 | parser.add_argument('--nstyle-layers', dest='neural_style_layers', action=CommaSplitAction, 76 | default=['conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'], 77 | help='Comma-separated list of layer names to be used for the neural style') 78 | parser.add_argument('--patch-size', dest='patch_size', type=int, 79 | default=1, help='Patch size used for matching.') 80 | parser.add_argument('--patch-stride', dest='patch_stride', type=int, 81 | default=1, help='Patch stride used for matching. Currently required to be 1.') 82 | # VGG 83 | parser.add_argument('--vgg-weights', dest='vgg_weights', type=str, 84 | default=os.environ.get(VGG_ENV_VAR, 'vgg16_weights.h5'), help='Path to VGG16 weights.') 85 | parser.add_argument('--pool-mode', dest='pool_mode', type=str, 86 | default='max', help='Pooling mode for VGG ("avg" or "max")') 87 | # jitter 88 | parser.add_argument('--jitter', dest='jitter', type=float, 89 | default=0, help='Magnitude of random shift at scale x1') 90 | parser.add_argument('--color-jitter', dest='color_jitter', type=float, 91 | default=0, help='Magnitude of random jitter to each pixel') 92 | parser.add_argument('--contrast', dest='contrast_percent', type=float, 93 | default=0.02, help='Drop the bottom x percentile and scale by the top (100 - x)th percentile') 94 | args = parser.parse_args() 95 | 96 | assert args.a_scale_mode in ('ratio', 'none', 'match'), 'a-scale-mode must be set to one of "ratio", "none", or "match"' 97 | if K.backend() == 'tensorflow': 98 | print('Tensorflow detected. Forcing --a-scale-mode=match (A images are scaled to same size as B images)') 99 | args.a_scale_mode = 'match' 100 | # make sure weights are in place 101 | if not os.path.exists(args.vgg_weights): 102 | print('Model weights not found (see "--vgg-weights" parameter).') 103 | return None 104 | return args 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | neural image analogies 2 | ---------------------- 3 | ![Image of arch](https://raw.githubusercontent.com/awentzonline/image-analogies/master/examples/images/image-analogy-explanation.jpg) 4 | ![Image of Sugar Steve](https://raw.githubusercontent.com/awentzonline/image-analogies/master/examples/images/sugarskull-analogy.jpg) 5 | ![Image of season transfer](https://raw.githubusercontent.com/awentzonline/image-analogies/master/examples/images/season-xfer.jpg)![Image of Trump](https://raw.githubusercontent.com/awentzonline/image-analogies/master/examples/images/trump-image-analogy.jpg) 6 | 7 | 8 | This is basically an implementation of this ["Image Analogies" paper]( http://www.mrl.nyu.edu/projects/image-analogies/index.html), In our case, we use feature maps from VGG16. The patch matching and blending is inspired by the method described in ["Combining Markov Random Fields and Convolutional Neural Networks for Image Synthesis"](http://arxiv.org/abs/1601.04589). Effects similar to that paper can be achieved by turning off the analogy loss (or leave it on!) `--analogy-w=0` and turning on the B/B' content weighting via the `--b-content-w` parameter. Also, instead of using brute-force patch matching 9 | we use the [PatchMatch algorithm](http://gfx.cs.princeton.edu/gfx/pubs/Barnes_2009_PAR/index.php) 10 | to approximate the best patch matches. Brute-force matching can be re-enabled by setting 11 | `--model=brute` 12 | 13 | The initial code was adapted from the Keras "neural style transfer" example. 14 | 15 | The example arch images are from the ["Image Analogies" website]( http://www.mrl.nyu.edu/projects/image-analogies/tbn.html). 16 | They have some other good examples from their own implementation which 17 | are worth a look. Their paper discusses the various applications of image 18 | analogies so you might want to take a look for inspiration. 19 | 20 | Installation 21 | ------------ 22 | This requires either [TensorFlow](https://www.tensorflow.org/versions/r0.7/get_started/os_setup.html) 23 | or [Theano](http://deeplearning.net/software/theano/install.html). If you don't 24 | have a GPU you'll want to use TensorFlow. GPU users may find to Theano to be 25 | faster at the expense of longer startup times. Here's the [Theano GPU guide]( http://deeplearning.net/software/theano/tutorial/using_gpu.html). 26 | 27 | Here's how to [configure the backend with Keras](http://keras.io/backend/) and 28 | set your default device (e.g. cpu, gpu0). 29 | 30 | To install via [virtualenv](https://virtualenv.readthedocs.org/en/latest/installation.html) run the following commands. 31 | 32 | ``` 33 | virtualenv venv 34 | source venv/bin/activate 35 | pip install neural-image-analogies 36 | ``` 37 | 38 | If you have trouble with the above method, follow these directions to [Install latest keras and theano or TensorFlow](http://keras.io/#installation) 39 | 40 | The script `make_image_analogy.py` should now be on your path. 41 | 42 | **Before running this script**, download the [weights for the VGG16 model]( 43 | https://github.com/awentzonline/image-analogies/releases/download/v0.0.5/vgg16_weights.h5). This file contains only the convolutional layers of VGG16 which is 10% of the full size. [Original source of full weights](https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3). 44 | The script assumes the weights are in the current working directory. If you place 45 | them somewhere else make sure to pass the `--vgg-weights=` parameter or set the `VGG_WEIGHT_PATH` environment variable. 46 | 47 | Example script usage: 48 | `make_image_analogy.py image-A image-A-prime image-B prefix_for_output` 49 | 50 | e.g.: 51 | 52 | `make_image_analogy.py images/arch-mask.jpg images/arch.jpg images/arch-newmask.jpg out/arch` 53 | 54 | The examples directory has a script, `render_example.sh` which accepts an example 55 | name prefix and, optionally the location of your vgg weights. 56 | 57 | `./render_example.sh arch /path/to/your/weights.h5` 58 | 59 | Currently, A and A' must be the same size, the same holds for B and B'. 60 | Output size is the same as Image B, unless specified otherwise. 61 | 62 | It's too slow 63 | ------------- 64 | If you're not using a GPU, use TensorFlow. My Macbook Pro with with can render a 65 | 512x512 image in approximately 12 minutes using TensorFlow and --mrf-w=0. Here 66 | are some other options which mostly trade quality for speed. 67 | 68 | * If you're using Theano enable openmp threading by using env variables `THEANO_FLAGS='openmp=1'` `OMP_NUM_THREADS=`. You can read more about multi-core support [here](http://deeplearning.net/software/theano/tutorial/multi_cores.html). 69 | * set `--mrf-w=0` to skip optimization of local coherence 70 | * use fewer feature layers by setting `--mrf-layers=conv4_1` and/or `--analogy-layers=conv4_1` (or other layers) which will consider half as many feature layers. 71 | * generate a smaller image by either using a smaller source Image B, or setting 72 | the `--width` or `--height` parameters. 73 | * ensure you're not using `--model=brute` which needs a powerful GPU 74 | 75 | I want it to look better 76 | ------------------------ 77 | The default settings are somewhat lowered to give the average user a better chance 78 | at generating something on whatever computer they may have. If you have a powerful GPU 79 | then here are some options for nicer output: 80 | * `--model=brute` will turn on brute-force patch-matching and will be done on GPU. This is Theano-only (default=patchmatch) 81 | * `--patch-size=3` this will allow for much nicer-looking details (default=1) 82 | * `--mrf-layers=conv1_1,conv2_1,...` add more layers to the mix (also `analogy-layers` and `content-layers`) 83 | 84 | 85 | Parameters 86 | ---------- 87 | * --width Sets image output max width 88 | * --height Sets image output max height 89 | * --scales Run at N different scales 90 | * --iters Number of iterations per scale 91 | * --min-scale Smallest scale to iterate 92 | * --mrf-w Weight for MRF loss between A' and B' 93 | * --analogy-w Weight for analogy loss 94 | * --b-content-w Weight for content loss between B and B' 95 | * --tv-w Weight for total variation loss 96 | * --vgg-weights Path to VGG16 weights 97 | * --a-scale-mode Method of scaling A and A' relative to B 98 | * * 'match': force A to be the same size as B regardless of aspect ratio (former default) 99 | * * 'ratio': apply scale imposed by width/height params on B to A (current default) 100 | * * 'none': leave A/A' alone 101 | * --a-scale Additional scale factor for A and A' 102 | * --pool-mode Pooling style used by VGG 103 | * * 'avg': average pooling - generally smoother results 104 | * * 'max': max pooling - more noisy but maybe that's what you want (original default) 105 | * --contrast adjust the contrast of the output by removing the bottom x percentile 106 | and scaling by the (100 - x)th percentile. Defaults to 0.02 107 | * --output-full Output all intermediate images at full size regardless of actual scale 108 | * --analogy-layers Comma-separated list of layer names to be used for the analogy loss (default: "conv3_1,conv_4_1") 109 | * --mrf-layers Comma-separated list of layer names to be used for the MRF loss (default: "conv3_1,conv_4_1") 110 | * --content-layers Comma-separated list of layer names to be used for the content loss (default: "conv3_1,conv_4_1") 111 | * --patch-size Patch size used for matching (default: 1) 112 | * --use-full-analogy match on all of the analogy patches, instead of combining 113 | them into one image (slower/more memory but maybe more accurate) 114 | * --model Select the patch matching model ('patchmatch' or 'brute') patchmatch is 115 | the default and requires less GPU memory but is less accurate then brute. 116 | * --nstyle-w Weight for neural style loss between A' and B' 117 | * --nstyle-layers Comma-separated list of layer names to be used for the neural style 118 | The analogy loss is the amount of influence of B -> A -> A' -> B'. It's a 119 | structure-preserving mapping of Image B into A' via A. 120 | 121 | The MRF loss (or "local coherence") is the influence of B' -> A' -> B'. In the 122 | parlance of style transfer, this is the style loss which gives texture to the image. 123 | 124 | The B/B' content loss is set to 0.0 by default. You can get effects similar 125 | to CNNMRF by turning this up and setting analogy weight to zero. Or leave the 126 | analogy loss on for some extra style guidance. 127 | 128 | If you'd like to only visualize the analogy target to see what's happening, 129 | set the MRF and content loss to zero: `--mrf-w=0 --content-w=0` This is also 130 | much faster as MRF loss is the slowest part of the algorithm. 131 | 132 | License 133 | ------- 134 | The code for this implementation is provided under the MIT license. 135 | 136 | The suggested VGG16 weights are originally from [here](https://gist.github.com/ksimonyan/211839e770f7b538e2d8) and are 137 | licensed http://creativecommons.org/licenses/by-nc/4.0/ Open a ticket if you 138 | have a suggestion for a more free-as-in-free-speech license. 139 | 140 | The attributions for the example art can be found in `examples/images/ATTRIBUTIONS.md` 141 | -------------------------------------------------------------------------------- /image_analogy/losses/patch_matcher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.interpolate 3 | import scipy.ndimage 4 | from sklearn.feature_extraction.image import extract_patches_2d, reconstruct_from_patches_2d 5 | 6 | 7 | def _calc_patch_grid_dims(shape, patch_size, patch_stride): 8 | x_w, x_h, x_c = shape 9 | num_rows = 1 + (x_h - patch_size) // patch_stride 10 | num_cols = 1 + (x_w - patch_size) // patch_stride 11 | return num_rows, num_cols 12 | 13 | 14 | def make_patch_grid(x, patch_size, patch_stride=1): 15 | '''x shape: (num_channels, rows, cols)''' 16 | x = x.transpose(2, 1, 0) 17 | patches = extract_patches_2d(x, (patch_size, patch_size)) 18 | x_w, x_h, x_c = x.shape 19 | num_rows, num_cols = _calc_patch_grid_dims(x.shape, patch_size, patch_stride) 20 | patches = patches.reshape((num_rows, num_cols, patch_size, patch_size, x_c)) 21 | patches = patches.transpose((0, 1, 4, 2, 3)) 22 | #patches = np.rollaxis(patches, -1, 2) 23 | return patches 24 | 25 | 26 | def combine_patches_grid(in_patches, out_shape): 27 | '''Reconstruct an image from these `patches` 28 | 29 | input shape: (rows, cols, channels, patch_row, patch_col) 30 | ''' 31 | num_rows, num_cols = in_patches.shape[:2] 32 | num_channels = in_patches.shape[-3] 33 | patch_size = in_patches.shape[-1] 34 | num_patches = num_rows * num_cols 35 | in_patches = np.reshape(in_patches, (num_patches, num_channels, patch_size, patch_size)) # (patches, channels, pr, pc) 36 | in_patches = np.transpose(in_patches, (0, 2, 3, 1)) # (patches, p, p, channels) 37 | recon = reconstruct_from_patches_2d(in_patches, out_shape) 38 | return recon.transpose(2, 1, 0).astype(np.float32) 39 | 40 | 41 | class PatchMatcher(object): 42 | '''A matcher of image patches inspired by the PatchMatch algorithm. 43 | 44 | image shape: (width, height, channels) 45 | ''' 46 | def __init__(self, input_shape, target_img, patch_size=1, patch_stride=1, jump_size=0.5, 47 | num_propagation_steps=5, num_random_steps=5, random_max_radius=1.0, random_scale=0.5): 48 | self.input_shape = input_shape 49 | self.patch_size = patch_size 50 | self.patch_stride = patch_stride 51 | self.jump_size = jump_size 52 | self.num_propagation_steps = num_propagation_steps 53 | self.num_random_steps = num_random_steps 54 | self.random_max_radius = random_max_radius 55 | self.random_scale = random_scale 56 | self.num_input_rows, self.num_input_cols = _calc_patch_grid_dims(input_shape, patch_size, patch_stride) 57 | self.target_patches = make_patch_grid(target_img, patch_size) 58 | self.target_patches_normed = self.normalize_patches(self.target_patches) 59 | self.coords = np.random.uniform(0.0, 1.0, # TODO: switch to pixels 60 | (2, self.num_input_rows, self.num_input_cols))# * [[[self.num_input_rows]],[[self.num_input_cols]]] 61 | self.similarity = np.zeros(input_shape[:2:-1], dtype=np.float32) 62 | self.min_propagration_row = 1.0 / self.num_input_rows 63 | self.min_propagration_col = 1.0 / self.num_input_cols 64 | self.delta_row = np.array([[[self.min_propagration_row]], [[0.0]]]) 65 | self.delta_col = np.array([[[0.0]], [[self.min_propagration_col]]]) 66 | 67 | def update(self, input_img, reverse_propagation=False): 68 | input_patches = self.get_patches_for(input_img) 69 | self.update_with_patches(self.normalize_patches(input_patches), reverse_propagation=reverse_propagation) 70 | 71 | def update_with_patches(self, input_patches, reverse_propagation=False): 72 | self._propagate(input_patches, reverse_propagation=reverse_propagation) 73 | self._random_update(input_patches) 74 | 75 | def get_patches_for(self, img): 76 | return make_patch_grid(img, self.patch_size); 77 | 78 | def normalize_patches(self, patches): 79 | norm = np.sqrt(np.sum(np.square(patches), axis=(2, 3, 4), keepdims=True)) 80 | return patches / norm 81 | 82 | def _propagate(self, input_patches, reverse_propagation=False): 83 | if reverse_propagation: 84 | roll_direction = 1 85 | else: 86 | roll_direction = -1 87 | sign = float(roll_direction) 88 | for step_i in range(self.num_propagation_steps): 89 | new_coords = self.clip_coords(np.roll(self.coords, roll_direction, 1) + self.delta_row * sign) 90 | coords_row, similarity_row = self.eval_state(new_coords, input_patches) 91 | new_coords = self.clip_coords(np.roll(self.coords, roll_direction, 2) + self.delta_col * sign) 92 | coords_col, similarity_col = self.eval_state(new_coords, input_patches) 93 | self.coords, self.similarity = self.take_best(coords_row, similarity_row, coords_col, similarity_col) 94 | 95 | def _random_update(self, input_patches): 96 | for alpha in range(1, self.num_random_steps + 1): # NOTE this should actually stop when the move is < 1 97 | new_coords = self.clip_coords(self.coords + np.random.uniform(-self.random_max_radius, self.random_max_radius, self.coords.shape) * self.random_scale ** alpha) 98 | self.coords, self.similarity = self.eval_state(new_coords, input_patches) 99 | 100 | def eval_state(self, new_coords, input_patches): 101 | new_similarity = self.patch_similarity(input_patches, new_coords) 102 | delta_similarity = new_similarity - self.similarity 103 | coords = np.where(delta_similarity > 0, new_coords, self.coords) 104 | best_similarity = np.where(delta_similarity > 0, new_similarity, self.similarity) 105 | return coords, best_similarity 106 | 107 | def take_best(self, coords_a, similarity_a, coords_b, similarity_b): 108 | delta_similarity = similarity_a - similarity_b 109 | best_coords = np.where(delta_similarity > 0, coords_a, coords_b) 110 | best_similarity = np.where(delta_similarity > 0, similarity_a, similarity_b) 111 | return best_coords, best_similarity 112 | 113 | def patch_similarity(self, source, coords): 114 | '''Check the similarity of the patches specified in coords.''' 115 | target_vals = self.lookup_coords(self.target_patches_normed, coords) 116 | err = source * target_vals 117 | return np.sum(err, axis=(2, 3, 4)) 118 | 119 | def clip_coords(self, coords): 120 | # TODO: should this all be in pixel space? 121 | coords = np.clip(coords, 0.0, 1.0) 122 | return coords 123 | 124 | def lookup_coords(self, x, coords): 125 | x_shape = np.expand_dims(np.expand_dims(x.shape, -1), -1) 126 | i_coords = np.round(coords * (x_shape[:2] - 1)).astype('int32') 127 | return x[i_coords[0], i_coords[1]] 128 | 129 | def get_reconstruction(self, patches=None, combined=None): 130 | if combined is not None: 131 | patches = make_patch_grid(combined, self.patch_size) 132 | if patches is None: 133 | patches = self.target_patches 134 | patches = self.lookup_coords(patches, self.coords) 135 | recon = combine_patches_grid(patches, self.input_shape) 136 | return recon 137 | 138 | def scale(self, new_shape, new_target_img): 139 | '''Create a new matcher of the given shape and replace its 140 | state with a scaled up version of the current matcher's state. 141 | ''' 142 | new_matcher = PatchMatcher(new_shape, new_target_img, patch_size=self.patch_size, 143 | patch_stride=self.patch_stride, jump_size=self.jump_size, 144 | num_propagation_steps=self.num_propagation_steps, 145 | num_random_steps=self.num_random_steps, 146 | random_max_radius=self.random_max_radius, 147 | random_scale=self.random_scale) 148 | new_matcher.coords = congrid(self.coords, new_matcher.coords.shape, method='neighbour') 149 | new_matcher.similarity = congrid(self.similarity, new_matcher.coords.shape, method='neighbour') 150 | return new_matcher 151 | 152 | 153 | def congrid(a, newdims, method='linear', centre=False, minusone=False): 154 | '''Arbitrary resampling of source array to new dimension sizes. 155 | Currently only supports maintaining the same number of dimensions. 156 | To use 1-D arrays, first promote them to shape (x,1). 157 | 158 | Uses the same parameters and creates the same co-ordinate lookup points 159 | as IDL''s congrid routine, which apparently originally came from a VAX/VMS 160 | routine of the same name. 161 | 162 | method: 163 | neighbour - closest value from original data 164 | nearest and linear - uses n x 1-D interpolations using 165 | scipy.interpolate.interp1d 166 | (see Numerical Recipes for validity of use of n 1-D interpolations) 167 | spline - uses ndimage.map_coordinates 168 | 169 | centre: 170 | True - interpolation points are at the centres of the bins 171 | False - points are at the front edge of the bin 172 | 173 | minusone: 174 | For example- inarray.shape = (i,j) & new dimensions = (x,y) 175 | False - inarray is resampled by factors of (i/x) * (j/y) 176 | True - inarray is resampled by(i-1)/(x-1) * (j-1)/(y-1) 177 | This prevents extrapolation one element beyond bounds of input array. 178 | ''' 179 | if not a.dtype in [np.float64, np.float32]: 180 | a = np.cast[float](a) 181 | 182 | m1 = np.cast[int](minusone) 183 | ofs = np.cast[int](centre) * 0.5 184 | old = np.array( a.shape ) 185 | ndims = len( a.shape ) 186 | if len( newdims ) != ndims: 187 | print("[congrid] dimensions error. " \ 188 | "This routine currently only support " \ 189 | "rebinning to the same number of dimensions.") 190 | return None 191 | newdims = np.asarray( newdims, dtype=float ) 192 | dimlist = [] 193 | 194 | if method == 'neighbour': 195 | for i in range( ndims ): 196 | base = np.indices(newdims)[i] 197 | dimlist.append( (old[i] - m1) / (newdims[i] - m1) \ 198 | * (base + ofs) - ofs ) 199 | cd = np.array( dimlist ).round().astype(int) 200 | newa = a[list( cd )] 201 | return newa 202 | 203 | elif method in ['nearest','linear']: 204 | # calculate new dims 205 | for i in range( ndims ): 206 | base = np.arange( newdims[i] ) 207 | dimlist.append( (old[i] - m1) / (newdims[i] - m1) \ 208 | * (base + ofs) - ofs ) 209 | # specify old dims 210 | olddims = [np.arange(i, dtype = np.float) for i in list( a.shape )] 211 | 212 | # first interpolation - for ndims = any 213 | mint = scipy.interpolate.interp1d( olddims[-1], a, kind=method ) 214 | newa = mint( dimlist[-1] ) 215 | 216 | trorder = [ndims - 1] + range( ndims - 1 ) 217 | for i in range( ndims - 2, -1, -1 ): 218 | newa = newa.transpose( trorder ) 219 | 220 | mint = scipy.interpolate.interp1d( olddims[i], newa, kind=method ) 221 | newa = mint( dimlist[i] ) 222 | 223 | if ndims > 1: 224 | # need one more transpose to return to original dimensions 225 | newa = newa.transpose( trorder ) 226 | 227 | return newa 228 | elif method in ['spline']: 229 | oslices = [ slice(0,j) for j in old ] 230 | oldcoords = np.ogrid[oslices] 231 | nslices = [ slice(0,j) for j in list(newdims) ] 232 | newcoords = np.mgrid[nslices] 233 | 234 | newcoords_dims = range(np.rank(newcoords)) 235 | #make first index last 236 | newcoords_dims.append(newcoords_dims.pop(0)) 237 | newcoords_tr = newcoords.transpose(newcoords_dims) 238 | # makes a view that affects newcoords 239 | 240 | newcoords_tr += ofs 241 | 242 | deltas = (np.asarray(old) - m1) / (newdims - m1) 243 | newcoords_tr *= deltas 244 | 245 | newcoords_tr -= ofs 246 | 247 | newa = scipy.ndimage.map_coordinates(a, newcoords) 248 | return newa 249 | else: 250 | print("Congrid error: Unrecognized interpolation type.\n", \ 251 | "Currently only \'neighbour\', \'nearest\',\'linear\',", \ 252 | "and \'spline\' are supported.") 253 | return None 254 | 255 | 256 | if __name__ == '__main__': 257 | import sys 258 | import time 259 | from scipy.misc import imsave 260 | 261 | from image_analogy.img_utils import load_image, preprocess_image, deprocess_image 262 | 263 | content_image_path, style_image_path, output_prefix = sys.argv[1:] 264 | jump_size = 1.0 265 | num_steps = 7 266 | patch_size = 1 267 | patch_stride = 1 268 | 269 | feat_chans = 512 270 | feat_style_shape = (feat_chans, 12, 18) 271 | feat_style = np.random.uniform(0.0, 1.0, feat_style_shape) 272 | feat_in_shape = (feat_chans, 17, 10) 273 | feat_in = np.random.uniform(0.0, 1.0, feat_in_shape) 274 | matcher = PatchMatcher(feat_in_shape[::-1], feat_style, patch_size=patch_size) 275 | feat_in_normed = matcher.normalize_patches(matcher.get_patches_for(feat_in)) 276 | for i in range(num_steps): 277 | matcher.update_with_patches(feat_in_normed) 278 | r = matcher.get_reconstruction() 279 | 280 | content_img_img = load_image(content_image_path) 281 | content_n_channels, content_n_rows, content_n_cols = content_img_img.shape[::-1] 282 | content_img = preprocess_image(content_img_img, content_n_cols, content_n_rows)[0]#.transpose((2,1,0)) 283 | style_img = load_image(style_image_path) 284 | style_n_channels, style_n_rows, style_n_cols = content_img_img.shape[::-1] 285 | style_img = preprocess_image( 286 | load_image(style_image_path), style_n_cols, style_n_rows)[0]#.transpose((2,1,0)) 287 | pg = make_patch_grid(content_img, patch_size) 288 | result = combine_patches_grid(pg, content_img.shape[::-1]) 289 | outimg = deprocess_image(result, contrast_percent=0) 290 | imsave(output_prefix + '_bestre.png', outimg) 291 | 292 | # # # 293 | matcher = PatchMatcher((content_n_cols, content_n_rows, content_n_channels), style_img, patch_size=patch_size) 294 | for i in range(num_steps): 295 | start = time.time() 296 | matcher.update(content_img, reverse_propagation=bool(i % 2)) 297 | print(matcher.similarity.min(), matcher.similarity.max(), matcher.similarity.mean()) 298 | end = time.time() 299 | #print end-start 300 | start = time.time() 301 | result = matcher.get_reconstruction(patches=matcher.target_patches) 302 | print(result.shape) 303 | end = time.time() 304 | print(end-start) 305 | outimg = deprocess_image(result, contrast_percent=0) 306 | # # imsave takes (rows, cols, channels) 307 | imsave(output_prefix + '_best.png', outimg) 308 | --------------------------------------------------------------------------------