├── README.md ├── img ├── dali │ ├── input_base_face.jpg │ ├── input_style_dali.jpg │ ├── output_style_dali_at_iteration_1.jpg │ ├── output_style_dali_at_iteration_2.jpg │ ├── output_style_dali_at_iteration_3.jpg │ ├── output_style_dali_at_iteration_4.jpg │ ├── output_style_dali_at_iteration_5.jpg │ └── term.log ├── donelli │ ├── input_base_face.jpg │ ├── input_style_donelli.jpg │ ├── output_style_donelli_at_iteration_1.jpg │ ├── output_style_donelli_at_iteration_2.jpg │ ├── output_style_donelli_at_iteration_3.jpg │ ├── output_style_donelli_at_iteration_4.jpg │ ├── output_style_donelli_at_iteration_5.jpg │ └── term.log ├── outputs.png └── tmp_art │ ├── lundstroem.jpg │ ├── starry_night.jpg │ └── the_scream.jpg └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # Vincent : AI Artist 2 | 3 | Style transfer is the technique of recomposing images in the style of other images. 4 | 5 | ### Requirements 6 | 7 | * Python (https://www.python.org/downloads/) 8 | * Numpy (http://www.numpy.org/) 9 | * Keras (http://keras.io/) 10 | * Scipy (https://www.scipy.org/) 11 | * Pillow (https://python-pillow.org/) 12 | * Theano (http://deeplearning.net/software/theano/) 13 | * h5py (http://h5py.org/) 14 | * Sklearn (http://scikit-learn.org/) 15 | * VGG16 file (https://drive.google.com/file/d/0Bz7KyqmuGsilT0J5dmRCM0ROVHc/view?usp=sharing) 16 | 17 | ### Basic Usage 18 | 19 | ``` 20 | python3 main.py --base_img_path /path/to/base/image --style_img_path /path/to/artistic/image --result_prefix output 21 | ``` 22 | 23 | ### Results 24 | 25 | |![result_00](img/outputs.png)| 26 | |-------------------------------| 27 | 28 | ### References 29 | 30 | * Inceptionism: Going Deeper into Neural Networks (https://research.googleblog.com/2015/06/inceptionism-going-deeper-into-neural.html) 31 | * A Neural Algorithm of Artistic Style (http://arxiv.org/pdf/1508.06576v2.pdf) 32 | -------------------------------------------------------------------------------- /img/dali/input_base_face.jpg: -------------------------------------------------------------------------------- 1 | ../donelli/input_base_face.jpg -------------------------------------------------------------------------------- /img/dali/input_style_dali.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/dali/input_style_dali.jpg -------------------------------------------------------------------------------- /img/dali/output_style_dali_at_iteration_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/dali/output_style_dali_at_iteration_1.jpg -------------------------------------------------------------------------------- /img/dali/output_style_dali_at_iteration_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/dali/output_style_dali_at_iteration_2.jpg -------------------------------------------------------------------------------- /img/dali/output_style_dali_at_iteration_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/dali/output_style_dali_at_iteration_3.jpg -------------------------------------------------------------------------------- /img/dali/output_style_dali_at_iteration_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/dali/output_style_dali_at_iteration_4.jpg -------------------------------------------------------------------------------- /img/dali/output_style_dali_at_iteration_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/dali/output_style_dali_at_iteration_5.jpg -------------------------------------------------------------------------------- /img/dali/term.log: -------------------------------------------------------------------------------- 1 | python main.py --base_img_path img/donelli/input_base_face.jpg --style_img_path img/tmp_art/the-persistence-of-memory-1931.jpg --result_prefix output_style_dali --num_iter 5 2 | Using Theano backend. 3 | Model loaded. 4 | Starting iteration 1 5 | Current loss value: 85786920.0 6 | Image saved as: output_style_dali_at_iteration_1.jpg 7 | Iteration 1 completed in 23350s 8 | Starting iteration 2 9 | Current loss value: 24180358.0 10 | Image saved as: output_style_dali_at_iteration_2.jpg 11 | Iteration 2 completed in 23162s 12 | Starting iteration 3 13 | Current loss value: 11172733.0 14 | Image saved as: output_style_dali_at_iteration_3.jpg 15 | Iteration 3 completed in 24905s 16 | Starting iteration 4 17 | Current loss value: 7432975.5 18 | Image saved as: output_style_dali_at_iteration_4.jpg 19 | Iteration 4 completed in 23329s 20 | Starting iteration 5 21 | Current loss value: 5434838.0 22 | Image saved as: output_style_dali_at_iteration_5.jpg 23 | Iteration 5 completed in 20356s 24 | -------------------------------------------------------------------------------- /img/donelli/input_base_face.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/input_base_face.jpg -------------------------------------------------------------------------------- /img/donelli/input_style_donelli.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/input_style_donelli.jpg -------------------------------------------------------------------------------- /img/donelli/output_style_donelli_at_iteration_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/output_style_donelli_at_iteration_1.jpg -------------------------------------------------------------------------------- /img/donelli/output_style_donelli_at_iteration_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/output_style_donelli_at_iteration_2.jpg -------------------------------------------------------------------------------- /img/donelli/output_style_donelli_at_iteration_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/output_style_donelli_at_iteration_3.jpg -------------------------------------------------------------------------------- /img/donelli/output_style_donelli_at_iteration_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/output_style_donelli_at_iteration_4.jpg -------------------------------------------------------------------------------- /img/donelli/output_style_donelli_at_iteration_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/donelli/output_style_donelli_at_iteration_5.jpg -------------------------------------------------------------------------------- /img/donelli/term.log: -------------------------------------------------------------------------------- 1 | python main.py --base_img_path img/tmp_base/face.jpg --style_img_path img/tmp_art/donelli.jpg --result_prefix output_style_donelli 2 | Using Theano backend. 3 | Model loaded. 4 | Starting iteration 1 5 | Current loss value: 245169600.0 6 | Image saved as: output_style_donelli_at_iteration_1.jpg 7 | Iteration 1 completed in 20682s 8 | Starting iteration 2 9 | Current loss value: 113565448.0 10 | Image saved as: output_style_donelli_at_iteration_2.jpg 11 | Iteration 2 completed in 23860s 12 | Starting iteration 3 13 | Current loss value: 51817188.0 14 | Image saved as: output_style_donelli_at_iteration_3.jpg 15 | Iteration 3 completed in 23454s 16 | Starting iteration 4 17 | Current loss value: 29557422.0 18 | Image saved as: output_style_donelli_at_iteration_4.jpg 19 | Iteration 4 completed in 24247s 20 | Starting iteration 5 21 | Current loss value: 19284080.0 22 | Image saved as: output_style_donelli_at_iteration_5.jpg 23 | Iteration 5 completed in 20699s 24 | -------------------------------------------------------------------------------- /img/outputs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/outputs.png -------------------------------------------------------------------------------- /img/tmp_art/lundstroem.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/tmp_art/lundstroem.jpg -------------------------------------------------------------------------------- /img/tmp_art/starry_night.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/tmp_art/starry_night.jpg -------------------------------------------------------------------------------- /img/tmp_art/the_scream.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saikatbsk/Vincent-AI-Artist/c9ce7dfddfc2cfd74a62e633d958a487f105c376/img/tmp_art/the_scream.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # Vincent - AI Artist 2 | 3 | # Import dependencies 4 | import numpy as np 5 | import time 6 | import os 7 | import argparse 8 | import h5py 9 | 10 | from scipy.misc import imread, imresize, imsave 11 | from scipy.optimize import fmin_l_bfgs_b 12 | 13 | from sklearn.preprocessing import normalize 14 | 15 | from keras.models import Sequential 16 | from keras.layers.convolutional import Convolution2D, ZeroPadding2D, AveragePooling2D 17 | from keras import backend as Kr 18 | 19 | Kr.set_image_dim_ordering('th') 20 | 21 | # Command line arguments 22 | parser = argparse.ArgumentParser(description='AI Artist') 23 | 24 | parser.add_argument('--base_img_path', metavar='base', type=str, help='Path to base image') 25 | parser.add_argument('--style_img_path', metavar='ref', type=str, help='Path to artistic style reference image') 26 | parser.add_argument('--result_prefix', metavar='res', type=str, help='Prefix for saved results') 27 | 28 | parser.add_argument('--rescale', dest='rescale', default='True', type=str, help='Rescale image after execution') 29 | parser.add_argument('--keep_aspect', dest='keep_aspect', default='True', type=str, help='Maintain aspect ratio of image') 30 | parser.add_argument('--tot_var_weight', dest='tv_weight', default=1e-3, type=float, help='Total variation in weights') 31 | parser.add_argument('--content_weight', dest='content_weight', default=0.025, type=float, help='Weight of content') 32 | parser.add_argument('--style_weight', dest='style_weight', default=1, type=float, help='Weight of style') 33 | parser.add_argument('--img_size', dest='img_size', default=512, type=int, help='Output image size') 34 | parser.add_argument('--content_layer', dest='content_layer', default='conv5_2', type=str, help="Optional: 'conv4_2'") 35 | parser.add_argument('--init_image', dest='init_image', default='content', type=str, help="Initial image used to generate the final image. Options are: 'content' or 'noise'") 36 | parser.add_argument('--num_iter', dest='num_iter', default=10, type=int, help='Number of iterations') 37 | 38 | # Helper methods 39 | 40 | ## Convert string to boolean 41 | def strToBool(str): 42 | return str.lower() in ('true', 'yes', 't', 1) 43 | 44 | ## Open, resize and format pictures into tensors 45 | def preprocess(img_path, load_dims=False): 46 | global img_WIDTH, img_HEIGHT, aspect_ratio 47 | 48 | img = imread(img_path, mode="RGB") 49 | 50 | if load_dims: 51 | img_WIDTH = img.shape[0] 52 | img_HEIGHT = img.shape[1] 53 | aspect_ratio = img_HEIGHT / img_WIDTH 54 | 55 | img = imresize(img, (img_width, img_height)) 56 | img = img.transpose((2, 0, 1)).astype('float64') 57 | img = np.expand_dims(img, axis=0) 58 | return img 59 | 60 | ## Convert a tensor into a valid image 61 | def deprocess(x): 62 | x = x.transpose((1, 2, 0)) 63 | x = np.clip(x, 0, 255).astype('uint8') 64 | 65 | return x 66 | 67 | ## Load weights 68 | def load_weights(weight_path, model): 69 | assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).' 70 | 71 | f = h5py.File(weights_path) 72 | 73 | for k in range(f.attrs['nb_layers']): 74 | if k >= len(model.layers): 75 | # we don't look at the last (fully-connected) layers in the savefile 76 | break 77 | 78 | g = f['layer_{}'.format(k)] 79 | weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])] 80 | model.layers[k].set_weights(weights) 81 | 82 | f.close() 83 | print('Model loaded.') 84 | 85 | ## Gram matrix of an image tensor 86 | def gram_matrix(x): 87 | assert Kr.ndim(x) == 3 88 | 89 | features = Kr.batch_flatten(x) 90 | gram = Kr.dot(features, Kr.transpose(features)) 91 | 92 | return gram 93 | 94 | ## Evaluate loss and gradients 95 | def eval_loss_and_grads(x): 96 | x = x.reshape((1, 3, img_width, img_height)) 97 | outs = f_outputs([x]) 98 | loss_value = outs[0] 99 | 100 | if len(outs[1:]) == 1: 101 | grad_values = outs[1].flatten().astype('float64') 102 | else: 103 | grad_values = np.array(outs[1:]).flatten().astype('float64') 104 | 105 | return loss_value, grad_values 106 | 107 | ## Style loss based on gram matrices 108 | def style_loss(style, combination): 109 | assert Kr.ndim(style) == 3 110 | assert Kr.ndim(combination) == 3 111 | 112 | S = gram_matrix(style) 113 | C = gram_matrix(combination) 114 | channels = 3 115 | size = img_width * img_height 116 | 117 | return Kr.sum(Kr.square(S - C)) / (4. * (channels ** 2) * (size ** 2)) 118 | 119 | ## Content loss 120 | def content_loss(base, combination): 121 | return Kr.sum(Kr.square(combination - base)) 122 | 123 | ## Total variation loss 124 | def total_variation_loss(x): 125 | assert Kr.ndim(x) == 4 126 | 127 | a = Kr.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1]) 128 | b = Kr.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:]) 129 | 130 | return Kr.sum(Kr.pow(a + b, 1.25)) 131 | 132 | ## Combined loss function - combines all three losses into one single scalar 133 | def get_total_loss(outputs_dict): 134 | loss = Kr.variable(0.) 135 | layer_features = outputs_dict[args.content_layer] # 'conv5_2' or 'conv4_2' 136 | base_image_features = layer_features[0, :, :, :] 137 | combination_features = layer_features[2, :, :, :] 138 | loss += content_weight * content_loss(base_image_features, combination_features) 139 | feature_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] 140 | 141 | for layer_name in feature_layers: 142 | layer_features = outputs_dict[layer_name] 143 | style_reference_features = layer_features[1, :, :, :] 144 | combination_features = layer_features[2, :, :, :] 145 | sl = style_loss(style_reference_features, combination_features) 146 | loss += (style_weight / len(feature_layers)) * sl 147 | 148 | loss += tv_weight * total_variation_loss(comb_img) 149 | 150 | return loss 151 | 152 | ## Combine loss and gradient 153 | def combine_loss_and_gradient(loss, gradient): 154 | outputs = [loss] 155 | 156 | if type(grads) in {list, tuple}: 157 | outputs += grads 158 | else: 159 | outputs.append(grads) 160 | 161 | f_outputs = Kr.function([comb_img], outputs) 162 | 163 | return f_outputs 164 | 165 | ## Prepare image 166 | def prepare_image(): 167 | assert args.init_image in ['content', 'noise'] , "init_image must be one of ['content', 'noise']" 168 | 169 | if 'content' in args.init_image: 170 | x = preprocess(base_img_path, True) 171 | else: 172 | x = np.random.uniform(0, 255, (1, 3, img_width, img_height)) 173 | 174 | num_iter = args.num_iter 175 | 176 | return x, num_iter 177 | 178 | ## The Evaluator class makes it possible to compute loss and gradients in one pass 179 | class Evaluator(object): 180 | def __init__(self): 181 | self.loss_value = None 182 | self.grads_values = None 183 | 184 | def loss(self, x): 185 | assert self.loss_value is None 186 | 187 | loss_value, grad_values = eval_loss_and_grads(x) 188 | self.loss_value = loss_value 189 | self.grad_values = grad_values 190 | 191 | return self.loss_value 192 | 193 | def grads(self, x): 194 | assert self.loss_value is not None 195 | 196 | grad_values = np.copy(self.grad_values) 197 | self.loss_value = None 198 | self.grad_values = None 199 | 200 | return grad_values 201 | 202 | evaluator = Evaluator() 203 | 204 | # Base image, style image, and result image paths 205 | args = parser.parse_args() 206 | base_img_path = args.base_img_path 207 | style_img_path = args.style_img_path 208 | result_prefix = args.result_prefix 209 | 210 | # The weights file 211 | weights_path = r"vgg16_weights.h5" 212 | 213 | # Init bools to decide whether or not to resize 214 | rescale = strToBool(args.rescale) 215 | keep_aspect = strToBool(args.keep_aspect) 216 | 217 | # Init variables for style and content weights 218 | tv_weight = args.tv_weight 219 | content_weight = args.content_weight 220 | style_weight = args.style_weight 221 | 222 | # Init dimensions of the generated picture 223 | img_width = img_height = args.img_size 224 | img_WIDTH = img_HEIGHT = 0 225 | aspect_ratio = 0 226 | 227 | # Tensor representations of images 228 | base_img = Kr.variable(preprocess(base_img_path, True)) 229 | style_img = Kr.variable(preprocess(style_img_path)) 230 | 231 | # This will hold the output image 232 | comb_img = Kr.placeholder((1, 3, img_width, img_height)) 233 | 234 | # Combining three images into one single tensor 235 | inp_tensor = Kr.concatenate([base_img, style_img, comb_img], axis=0) 236 | 237 | # Building the VGG16 network (31 layers) with our three images as input 238 | layer0 = ZeroPadding2D((1, 1)) 239 | layer0.set_input(inp_tensor, shape=(3, 3, img_width, img_height)) 240 | 241 | model = Sequential() 242 | model.add(layer0) 243 | model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) 244 | model.add(ZeroPadding2D((1, 1))) 245 | model.add(Convolution2D(64, 3, 3, activation='relu')) 246 | model.add(AveragePooling2D((2, 2), strides=(2, 2))) 247 | 248 | model.add(ZeroPadding2D((1, 1))) 249 | model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) 250 | model.add(ZeroPadding2D((1, 1))) 251 | model.add(Convolution2D(128, 3, 3, activation='relu')) 252 | model.add(AveragePooling2D((2, 2), strides=(2, 2))) 253 | 254 | model.add(ZeroPadding2D((1, 1))) 255 | model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) 256 | model.add(ZeroPadding2D((1, 1))) 257 | model.add(Convolution2D(256, 3, 3, activation='relu')) 258 | model.add(ZeroPadding2D((1, 1))) 259 | model.add(Convolution2D(256, 3, 3, activation='relu')) 260 | model.add(AveragePooling2D((2, 2), strides=(2, 2))) 261 | 262 | model.add(ZeroPadding2D((1, 1))) 263 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) 264 | model.add(ZeroPadding2D((1, 1))) 265 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) 266 | model.add(ZeroPadding2D((1, 1))) 267 | model.add(Convolution2D(512, 3, 3, activation='relu')) 268 | model.add(AveragePooling2D((2, 2), strides=(2, 2))) 269 | 270 | model.add(ZeroPadding2D((1, 1))) 271 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) 272 | model.add(ZeroPadding2D((1, 1))) 273 | model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) 274 | model.add(ZeroPadding2D((1, 1))) 275 | model.add(Convolution2D(512, 3, 3, activation='relu')) 276 | model.add(AveragePooling2D((2, 2), strides=(2, 2))) 277 | 278 | # Load weights for the VGG16 networks 279 | load_weights(weights_path, model) 280 | 281 | # Get symbolic output of each key layer (named layers) 282 | out_dict = dict([(layer.name, layer.output) for layer in model.layers]) 283 | 284 | # Combined loss (style, content, and total variation loss combined into one single scalar) 285 | tot_loss = get_total_loss(out_dict) 286 | 287 | # Gradients of the generated image with respect to the loss 288 | grads = Kr.gradients(tot_loss, comb_img) 289 | 290 | # Combine loss and gradient 291 | f_outputs = combine_loss_and_gradient(tot_loss, grads) 292 | 293 | # L-BFGS over pixels of the generated image to minimize neural style loss 294 | x, num_iter = prepare_image() 295 | 296 | for i in range(num_iter): 297 | # Step 1 : record iterations 298 | print('Starting iteration', (i+1)) 299 | start_time = time.time() 300 | 301 | # Step 2 : L-BFGS optimization function using loss and gradient 302 | x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), fprime=evaluator.grads, maxfun=20) 303 | print('Current loss value: ', min_val) 304 | 305 | # Step 3 : get generated image 306 | img = deprocess(x.reshape((3, img_width, img_height))) 307 | 308 | # Step 4 : keep aspect ratio 309 | if (keep_aspect) & (not rescale): 310 | img_ht = int(img_width * aspect_ratio) 311 | img = imresize(img, (img_width, img_ht), interp='bilinear') 312 | 313 | if rescale: 314 | img = imresize(img, (img_WIDTH, img_HEIGHT), interp='bilinear') 315 | 316 | # Step 5 : save generated image 317 | fname = result_prefix + '_at_iteration_%d.jpg' % (i+1) 318 | imsave(fname, img) 319 | 320 | end_time = time.time() 321 | 322 | print('Image saved as: ', fname) 323 | print('Iteration %d completed in %ds' % (i+1, end_time - start_time)) 324 | --------------------------------------------------------------------------------