├── README.md
├── fast_neural_style
    ├── README.md
    ├── gen_image.py
    ├── model.py
    ├── reader.py
    ├── train_fast_neural_style.py
    └── vgg.py
└── neural_style
    ├── README.md
    ├── neural_style.py
    ├── stylize.py
    └── vgg.py


/README.md:
--------------------------------------------------------------------------------
1 | # neural_style_tensorflow
2 | the neural style methods version in tensorflow
3 | 


--------------------------------------------------------------------------------
/fast_neural_style/README.md:
--------------------------------------------------------------------------------
 1 | # Fast neural style transfer
 2 | 
 3 | A blog with the paper [fast neural style](http://arxiv.org/pdf/1603.08155v1.pdf) in [http://hacker.duanshishi.com/?p=1693](http://hacker.duanshishi.com/?p=1693)
 4 | In an attempt to learn Tensorflow I've implemented an Image Transformation Network as described in [Perceptual Losses for Real-Time Style Transfer and Super-Resolution](http://arxiv.org/abs/1603.08155) by Johnson et al.
 5 | 
 6 | This technique uses loss functions based on a perceptual similarity and style similarity as described by [Gatys et al](http://arxiv.org/abs/1508.06576) to train a transformation network to synthesize the style of one image with the content of arbitrary images. After it's trained for a particular style it can be used to generate stylized images in one forward pass through the transformer network as opposed to 500-2000 forward + backward passes through a pretrained image classification net which is the direct approach.
 7 | 
 8 | ### Usage
 9 | 
10 | First get the dependecies (COCO training set images and VGG model weights):
11 | 
12 | `./get_files.sh`
13 | 
14 | To train a model for fast stylizing:
15 | 
16 | `python train_fast_neural_style.py --TRAIN_IMAGES_PATH coco_img_path --STYLE_IMAGES style.png --BATCH_SIZE 8`
17 | 
18 | Where `--TRAIN_IMAGES_PATH` points to a directory of JPEGs to train the model. The paper uses the [COCO image dataset](http://msvocds.blob.core.windows.net/coco2014/train2014.zip) (13GB). With my K20 card I can do a batch_size of 8 images. The paper trains the model for 2 epochs.
19 | 
20 | To generate images fast with an already trained model:
21 | 
22 | `python gen_image.py --CONTENT_IMAGES path_to_images_to_transform`
23 | 
24 | ### Requirements
25 | 
26 |  - python 2.7.x
27 |  - [Tensorflow r0.10](https://www.tensorflow.org/)
28 |  - [VGG-19 model](http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat)
29 |  - [COCO dataset](http://msvocds.blob.core.windows.net/coco2014/train2014.zip)
30 | 
31 | ### Acknowledgement
32 | 
33 | - [fast-neural-style with tensorflow](https://github.com/OlavHN/fast-neural-style)
34 | - [Chainer implementation] (https://github.com/yusuketomoto/chainer-fast-neuralstyle)
35 | - [Tensorflow Neural style implementation] (https://github.com/anishathalye/neural-style) (Both inspiration and copied the VGG code)
36 | 


--------------------------------------------------------------------------------
/fast_neural_style/gen_image.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | '''
  3 | Coding Just for Fun
  4 | Created by burness on 16/9/13.
  5 | '''
  6 | from scipy import misc
  7 | import os
  8 | import time
  9 | import tensorflow as tf
 10 | import vgg
 11 | import model
 12 | import reader
 13 | 
 14 | tf.app.flags.DEFINE_integer("CONTENT_WEIGHT", 5e0, "Weight for content features loss")
 15 | tf.app.flags.DEFINE_integer("STYLE_WEIGHT", 1e2, "Weight for style features loss")
 16 | tf.app.flags.DEFINE_integer("TV_WEIGHT", 1e-5, "Weight for total variation loss")
 17 | tf.app.flags.DEFINE_string("VGG_PATH", "imagenet-vgg-verydeep-19.mat",
 18 |                            "Path to vgg model weights")
 19 | tf.app.flags.DEFINE_string("MODEL_PATH", "models", "Path to read/write trained models")
 20 | tf.app.flags.DEFINE_string("TRAIN_IMAGES_PATH", "train2014", "Path to training images")
 21 | tf.app.flags.DEFINE_string("CONTENT_LAYERS", "relu4_2",
 22 |                            "Which VGG layer to extract content loss from")
 23 | tf.app.flags.DEFINE_string("STYLE_LAYERS", "relu1_1,relu2_1,relu3_1,relu4_1,relu5_1",
 24 |                            "Which layers to extract style from")
 25 | tf.app.flags.DEFINE_string("SUMMARY_PATH", "tensorboard", "Path to store Tensorboard summaries")
 26 | tf.app.flags.DEFINE_string("STYLE_IMAGES", "style.png", "Styles to train")
 27 | tf.app.flags.DEFINE_float("STYLE_SCALE", 1.0, "Scale styles. Higher extracts smaller features")
 28 | tf.app.flags.DEFINE_string("CONTENT_IMAGES_PATH", None, "Path to content image(s)")
 29 | tf.app.flags.DEFINE_integer("IMAGE_SIZE", 256, "Size of output image")
 30 | tf.app.flags.DEFINE_integer("BATCH_SIZE", 1, "Number of concurrent images to train on")
 31 | 
 32 | FLAGS = tf.app.flags.FLAGS
 33 | 
 34 | def total_variation_loss(layer):
 35 |     shape = tf.shape(layer)
 36 |     height = shape[1]
 37 |     width = shape[2]
 38 |     y = tf.slice(layer, [0,0,0,0], tf.pack([-1,height-1,-1,-1])) - tf.slice(layer, [0,1,0,0], [-1,-1,-1,-1])
 39 |     x = tf.slice(layer, [0,0,0,0], tf.pack([-1,-1,width-1,-1])) - tf.slice(layer, [0,0,1,0], [-1,-1,-1,-1])
 40 |     return tf.nn.l2_loss(x) / tf.to_float(tf.size(x)) + tf.nn.l2_loss(y) / tf.to_float(tf.size(y))
 41 | 
 42 | # TODO: Figure out grams and batch sizes! Doesn't make sense ..
 43 | def gram(layer):
 44 |     shape = tf.shape(layer)
 45 |     num_images = shape[0]
 46 |     num_filters = shape[3]
 47 |     size = tf.size(layer)
 48 |     # reshape [num_images, -1, num_filters] -1 : infer see the start of p8, 多一个num_images 而已
 49 |     filters = tf.reshape(layer, tf.pack([num_images, -1, num_filters]))
 50 |     grams = tf.batch_matmul(filters, filters, adj_x=True) / tf.to_float(size / FLAGS.BATCH_SIZE)
 51 | 
 52 |     return grams
 53 | 
 54 | def get_style_features(style_paths, style_layers):
 55 |     with tf.Graph().as_default() as g:
 56 |         size = int(round(FLAGS.IMAGE_SIZE * FLAGS.STYLE_SCALE))
 57 |         images = tf.pack([reader.get_image(path, size) for path in style_paths])
 58 |         net, _ = vgg.net(FLAGS.VGG_PATH, images)
 59 |         features = []
 60 |         for layer in style_layers:
 61 |             features.append(gram(net[layer]))
 62 | 
 63 |         with tf.Session() as sess:
 64 |             return sess.run(features)
 65 | 
 66 | def main(argv=None):
 67 |     if not FLAGS.CONTENT_IMAGES_PATH:
 68 |         print "train a fast nerual style need to set the Content images path"
 69 |         return
 70 |     content_images = reader.image(
 71 |             FLAGS.BATCH_SIZE,
 72 |             FLAGS.IMAGE_SIZE,
 73 |             FLAGS.CONTENT_IMAGES_PATH,
 74 |             epochs=1,
 75 |             shuffle=False,
 76 |             crop=False)
 77 |     generated_images = model.net(content_images / 255.)
 78 | 
 79 |     output_format = tf.saturate_cast(generated_images + reader.mean_pixel, tf.uint8)
 80 |     with tf.Session() as sess:
 81 |         file = tf.train.latest_checkpoint(FLAGS.MODEL_PATH)
 82 |         if not file:
 83 |             print('Could not find trained model in {0}'.format(FLAGS.MODEL_PATH))
 84 |             return
 85 |         print('Using model from {}'.format(file))
 86 |         saver = tf.train.Saver()
 87 |         saver.restore(sess, file)
 88 |         sess.run(tf.initialize_local_variables())
 89 |         coord = tf.train.Coordinator()
 90 |         threads = tf.train.start_queue_runners(coord=coord)
 91 |         i = 0
 92 |         start_time = time.time()
 93 |         try:
 94 |             while not coord.should_stop():
 95 |                 print(i)
 96 |                 images_t = sess.run(output_format)
 97 |                 elapsed = time.time() - start_time
 98 |                 start_time = time.time()
 99 |                 print('Time for one batch: {}'.format(elapsed))
100 | 
101 |                 for raw_image in images_t:
102 |                     i += 1
103 |                     misc.imsave('out{0:04d}.png'.format(i), raw_image)
104 |         except tf.errors.OutOfRangeError:
105 |             print('Done training -- epoch limit reached')
106 |         finally:
107 |             coord.request_stop()
108 | 
109 |         coord.join(threads)
110 | 
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     tf.app.run()


--------------------------------------------------------------------------------
/fast_neural_style/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def conv2d(x, input_filters, output_filters, kernel, strides, padding='SAME'):
 4 |     with tf.variable_scope('conv') as scope:
 5 | 
 6 |         shape = [kernel, kernel, input_filters, output_filters]
 7 |         weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight')
 8 |         convolved = tf.nn.conv2d(x, weight, strides=[1, strides, strides, 1], padding=padding, name='conv')
 9 | 
10 |         normalized = batch_norm(convolved, output_filters)
11 | 
12 |         return normalized
13 | 
14 | def conv2d_transpose(x, input_filters, output_filters, kernel, strides, padding='SAME'):
15 |     with tf.variable_scope('conv_transpose') as scope:
16 | 
17 |         shape = [kernel, kernel, output_filters, input_filters]
18 |         weight = tf.Variable(tf.truncated_normal(shape, stddev=0.1), name='weight')
19 | 
20 |         batch_size = tf.shape(x)[0]
21 |         height = tf.shape(x)[1] * strides
22 |         width = tf.shape(x)[2] * strides
23 |         output_shape = tf.pack([batch_size, height, width, output_filters])
24 |         convolved = tf.nn.conv2d_transpose(x, weight, output_shape, strides=[1, strides, strides, 1], padding=padding, name='conv_transpose')
25 | 
26 |         normalized = batch_norm(convolved, output_filters)
27 |         return normalized
28 | 
29 | def batch_norm(x, size):
30 |     batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], keep_dims=True)
31 |     beta = tf.Variable(tf.zeros([size]), name='beta')
32 |     scale = tf.Variable(tf.ones([size]), name='scale')
33 |     epsilon = 1e-3
34 |     return tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='batch')
35 | 
36 | def residual(x, filters, kernel, strides, padding='SAME'):
37 |     with tf.variable_scope('residual') as scope:
38 |         conv1 = conv2d(x, filters, filters, kernel, strides, padding=padding)
39 |         conv2 = conv2d(tf.nn.relu(conv1), filters, filters, kernel, strides, padding=padding)
40 | 
41 |         residual = x + conv2
42 | 
43 |         return residual
44 | 
45 | def net(image):
46 |     with tf.variable_scope('conv1'):
47 |         conv1 = tf.nn.relu(conv2d(image, 3, 32, 9, 1))
48 |     with tf.variable_scope('conv2'):
49 |         conv2 = tf.nn.relu(conv2d(conv1, 32, 64, 3, 2))
50 |     with tf.variable_scope('conv3'):
51 |         conv3 = tf.nn.relu(conv2d(conv2, 64, 128, 3, 2))
52 |     with tf.variable_scope('res1'):
53 |         res1 = residual(conv3, 128, 3, 1)
54 |     with tf.variable_scope('res2'):
55 |         res2 = residual(res1, 128, 3, 1)
56 |     with tf.variable_scope('res3'):
57 |         res3 = residual(res2, 128, 3, 1)
58 |     with tf.variable_scope('res4'):
59 |         res4 = residual(res3, 128, 3, 1)
60 |     with tf.variable_scope('res5'):
61 |         res5 = residual(res4, 128, 3, 1)
62 |     with tf.variable_scope('deconv1'):
63 |         deconv1 = tf.nn.relu(conv2d_transpose(res5, 128, 64, 3, 2))
64 |     with tf.variable_scope('deconv2'):
65 |         deconv2 = tf.nn.relu(conv2d_transpose(deconv1, 64, 32, 3, 2))
66 |     with tf.variable_scope('deconv3'):
67 |         deconv3 = tf.nn.tanh(conv2d_transpose(deconv2, 32, 3, 9, 1))
68 | 
69 |     y = deconv3 * 127.5
70 | 
71 |     return y


--------------------------------------------------------------------------------
/fast_neural_style/reader.py:
--------------------------------------------------------------------------------
 1 | from os import listdir
 2 | from os.path import isfile, join
 3 | import tensorflow as tf
 4 | 
 5 | mean_pixel = [123.68, 116.779, 103.939]  # ImageNet average from VGG ..
 6 | 
 7 | 
 8 | def preprocess(image, size, max_length):
 9 |     shape = tf.shape(image)
10 |     size_t = tf.constant(size, tf.float64)
11 |     height = tf.cast(shape[0], tf.float64)
12 |     width = tf.cast(shape[1], tf.float64)
13 | 
14 |     cond_op = tf.less(width, height) if max_length else tf.less(height, width)
15 | 
16 |     new_height, new_width = tf.cond(
17 |         cond_op, lambda: (size_t, (width * size_t) / height),
18 |         lambda: ((height * size_t) / width, size_t))
19 |     new_size = [tf.to_int32(new_height), tf.to_int32(new_width)]
20 |     resized_image = tf.image.resize_images(image, new_size)
21 |     normalised_image = resized_image - mean_pixel
22 |     return normalised_image
23 | 
24 | 
25 | # max_length: Wether size dictates longest or shortest side. Default longest
26 | def get_image(path, size, max_length=True):
27 |     png = path.lower().endswith('png')
28 |     img_bytes = tf.read_file(path)
29 |     image = tf.image.decode_png(
30 |         img_bytes, channels=3) if png else tf.image.decode_jpeg(
31 |             img_bytes, channels=3)
32 |     return preprocess(image, size, max_length)
33 | 
34 | 
35 | def image(n, size, path, epochs=2, shuffle=True, crop=True):
36 |     filenames = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
37 |     if not shuffle:
38 |         filenames = sorted(filenames)
39 | 
40 |     png = filenames[0].lower().endswith(
41 |         'png')  # If first file is a png, assume they all are
42 | 
43 |     filename_queue = tf.train.string_input_producer(
44 |         filenames, num_epochs=epochs, shuffle=shuffle)
45 |     reader = tf.WholeFileReader()
46 |     _, img_bytes = reader.read(filename_queue)
47 |     image = tf.image.decode_png(
48 |         img_bytes, channels=3) if png else tf.image.decode_jpeg(
49 |             img_bytes, channels=3)
50 | 
51 |     processed_image = preprocess(image, size, False)
52 |     if not crop:
53 |         return tf.train.batch([processed_image], n, dynamic_pad=True)
54 | 
55 |     cropped_image = tf.slice(processed_image, [0, 0, 0], [size, size, 3])
56 |     cropped_image.set_shape((size, size, 3))
57 | 
58 |     images = tf.train.batch([cropped_image], n)
59 |     return images


--------------------------------------------------------------------------------
/fast_neural_style/train_fast_neural_style.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Coding Just for Fun
  3 | Created by burness on 16/9/13.
  4 | '''
  5 | from scipy import misc
  6 | import os
  7 | import time
  8 | import tensorflow as tf
  9 | import vgg
 10 | import model
 11 | import reader
 12 | 
 13 | tf.app.flags.DEFINE_integer("CONTENT_WEIGHT", 5e0,
 14 |                             "Weight for content features loss")
 15 | tf.app.flags.DEFINE_integer("STYLE_WEIGHT", 1e2,
 16 |                             "Weight for style features loss")
 17 | tf.app.flags.DEFINE_integer("TV_WEIGHT", 1e-5,
 18 |                             "Weight for total variation loss")
 19 | tf.app.flags.DEFINE_string("VGG_PATH", "imagenet-vgg-verydeep-19.mat",
 20 |                            "Path to vgg model weights")
 21 | tf.app.flags.DEFINE_string("MODEL_PATH", "models",
 22 |                            "Path to read/write trained models")
 23 | tf.app.flags.DEFINE_string("TRAIN_IMAGES_PATH", "train2014",
 24 |                            "Path to training images")
 25 | tf.app.flags.DEFINE_string("CONTENT_LAYERS", "relu4_2",
 26 |                            "Which VGG layer to extract content loss from")
 27 | tf.app.flags.DEFINE_string("STYLE_LAYERS",
 28 |                            "relu1_1,relu2_1,relu3_1,relu4_1,relu5_1",
 29 |                            "Which layers to extract style from")
 30 | tf.app.flags.DEFINE_string("SUMMARY_PATH", "tensorboard",
 31 |                            "Path to store Tensorboard summaries")
 32 | tf.app.flags.DEFINE_string("STYLE_IMAGES", "style.png", "Styles to train")
 33 | tf.app.flags.DEFINE_float("STYLE_SCALE", 1.0,
 34 |                           "Scale styles. Higher extracts smaller features")
 35 | tf.app.flags.DEFINE_string("CONTENT_IMAGES_PATH", None,
 36 |                            "Path to content image(s)")
 37 | tf.app.flags.DEFINE_integer("IMAGE_SIZE", 256, "Size of output image")
 38 | tf.app.flags.DEFINE_integer("BATCH_SIZE", 1,
 39 |                             "Number of concurrent images to train on")
 40 | 
 41 | FLAGS = tf.app.flags.FLAGS
 42 | 
 43 | 
 44 | def total_variation_loss(layer):
 45 |     shape = tf.shape(layer)
 46 |     height = shape[1]
 47 |     width = shape[2]
 48 |     y = tf.slice(layer, [0, 0, 0, 0], tf.pack(
 49 |         [-1, height - 1, -1, -1])) - tf.slice(layer, [0, 1, 0, 0],
 50 |                                               [-1, -1, -1, -1])
 51 |     x = tf.slice(layer, [0, 0, 0, 0], tf.pack(
 52 |         [-1, -1, width - 1, -1])) - tf.slice(layer, [0, 0, 1, 0],
 53 |                                              [-1, -1, -1, -1])
 54 |     return tf.nn.l2_loss(x) / tf.to_float(tf.size(x)) + tf.nn.l2_loss(
 55 |         y) / tf.to_float(tf.size(y))
 56 | 
 57 | 
 58 | # TODO: Figure out grams and batch sizes! Doesn't make sense ..
 59 | def gram(layer):
 60 |     shape = tf.shape(layer)
 61 |     num_images = shape[0]
 62 |     num_filters = shape[3]
 63 |     size = tf.size(layer)
 64 |     filters = tf.reshape(layer, tf.pack([num_images, -1, num_filters]))
 65 |     grams = tf.batch_matmul(
 66 |         filters, filters, adj_x=True) / tf.to_float(size / FLAGS.BATCH_SIZE)
 67 | 
 68 |     return grams
 69 | 
 70 | 
 71 | def get_style_features(style_paths, style_layers):
 72 |     with tf.Graph().as_default() as g:
 73 |         size = int(round(FLAGS.IMAGE_SIZE * FLAGS.STYLE_SCALE))
 74 |         images = tf.pack(
 75 |             [reader.get_image(path, size) for path in style_paths])
 76 |         net, _ = vgg.net(FLAGS.VGG_PATH, images)
 77 |         features = []
 78 |         for layer in style_layers:
 79 |             features.append(gram(net[layer]))
 80 | 
 81 |         with tf.Session() as sess:
 82 |             return sess.run(features)
 83 | 
 84 | 
 85 | def compute_content_loss(content_layers, net):
 86 |     content_loss = 0
 87 |     for layer in content_layers:
 88 |         generated_images, content_images = tf.split(0, 2, net[layer])
 89 |         size = tf.size(generated_images)
 90 |         content_loss += tf.nn.l2_loss(generated_images -
 91 |                                       content_images) / tf.to_float(size)
 92 |     content_loss = content_loss / len(content_layers)
 93 | 
 94 |     return content_loss
 95 | 
 96 | 
 97 | def compute_style_loss(style_features_t, style_layers, net):
 98 |     style_loss = 0
 99 |     for style_gram, layer in zip(style_features_t, style_layers):
100 |         generated_images, _ = tf.split(0, 2, net[layer])
101 |         size = tf.size(generated_images)
102 |         for style_image in style_gram:
103 |             style_loss += tf.nn.l2_loss(
104 |                 tf.reduce_sum(gram(generated_images) - style_image,
105 |                               0)) / tf.to_float(size)
106 |     style_loss = style_loss / len(style_layers)
107 |     return style_loss
108 | 
109 | 
110 | def main(argv=None):
111 |     if not os.path.exists(FLAGS.MODEL_PATH):
112 |         os.makedirs(FLAGS.MODEL_PATH)
113 | 
114 |     style_paths = FLAGS.STYLE_IMAGES.split(',')
115 |     style_layers = FLAGS.STYLE_LAYERS.split(',')
116 |     content_layers = FLAGS.CONTENT_LAYERS.split(',')
117 | 
118 |     style_features_t = get_style_features(style_paths, style_layers)
119 | 
120 |     images = reader.image(FLAGS.BATCH_SIZE, FLAGS.IMAGE_SIZE,
121 |                           FLAGS.TRAIN_IMAGES_PATH)
122 |     # model is a residual network
123 |     generated = model.net(images / 255.)
124 | 
125 |     net, _ = vgg.net(FLAGS.VGG_PATH, tf.concat(0, [generated, images]))
126 |     content_loss = compute_content_loss(content_layers, net)
127 |     style_loss = compute_style_loss(style_features_t, style_layers, net)
128 | 
129 |     # content_loss = 0
130 |     # for layer in content_layers:
131 |     #     generated_images, content_images = tf.split(0, 2, net[layer])
132 |     #     size = tf.size(generated_images)
133 |     #     content_loss += tf.nn.l2_loss(generated_images - content_images) / tf.to_float(size)
134 |     # content_loss = content_loss / len(content_layers)
135 | 
136 |     # style_loss = 0
137 |     # for style_gram, layer in zip(style_features_t, style_layers):
138 |     #     generated_images, _ = tf.split(0, 2, net[layer])
139 |     #     size = tf.size(generated_images)
140 |     #     for style_image in style_gram:
141 |     #         style_loss += tf.nn.l2_loss(tf.reduce_sum(gram(generated_images) - style_image, 0)) / tf.to_float(size)
142 |     # style_loss = style_loss / len(style_layers)
143 | 
144 |     loss = FLAGS.STYLE_WEIGHT * style_loss + FLAGS.CONTENT_WEIGHT * content_loss + FLAGS.TV_WEIGHT * total_variation_loss(
145 |         generated)
146 | 
147 |     global_step = tf.Variable(0, name="global_step", trainable=False)
148 |     train_op = tf.train.AdamOptimizer(1e-3).minimize(
149 |         loss, global_step=global_step)
150 | 
151 |     output_format = tf.saturate_cast(
152 |         tf.concat(0, [generated, images]) + reader.mean_pixel, tf.uint8)
153 | 
154 |     with tf.Session() as sess:
155 |         saver = tf.train.Saver(tf.all_variables())
156 |         file = tf.train.latest_checkpoint(FLAGS.MODEL_PATH)
157 |         if file:
158 |             print('Restoring model from {}'.format(file))
159 |             saver.restore(sess, file)
160 |             sess.run(tf.initialize_local_variables())
161 |         else:
162 |             print('New model initilized')
163 |             sess.run(tf.initialize_all_variables())
164 | 
165 |         sess.run(tf.initialize_local_variables())
166 |         coord = tf.train.Coordinator()
167 |         threads = tf.train.start_queue_runners(coord=coord)
168 |         start_time = time.time()
169 |         try:
170 |             while not coord.should_stop():
171 |                 _, loss_t, step = sess.run([train_op, loss, global_step])
172 |                 elapsed_time = time.time() - start_time
173 |                 start_time = time.time()
174 |                 print(step, loss_t, elapsed_time)
175 |                 if step % 100 == 0:
176 |                     output_t = sess.run(output_format)
177 |                     for i, raw_image in enumerate(output_t):
178 |                         misc.imsave('out{}.png'.format(i), raw_image)
179 |                         print('Save image.')
180 |                 if step % 1000 == 0:
181 |                     saver.save(
182 |                         sess,
183 |                         FLAGS.MODEL_PATH + '/fast-style-model',
184 |                         global_step=step)
185 |                     print('Save model.')
186 |         except tf.errors.OutOfRangeError:
187 |             print('Done training -- epoch limit reached')
188 |         finally:
189 |             coord.request_stop()
190 | 
191 |         coord.join(threads)
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     tf.app.run()


--------------------------------------------------------------------------------
/fast_neural_style/vgg.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import scipy.io
 4 | from scipy import misc
 5 | 
 6 | 
 7 | def net(data_path, input_image):
 8 |     layers = (
 9 |         'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
10 | 
11 |         'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
12 | 
13 |         'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
14 |         'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
15 | 
16 |         'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
17 |         'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
18 | 
19 |         'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
20 |         'relu5_3', 'conv5_4', 'relu5_4'
21 |     )
22 | 
23 |     data = scipy.io.loadmat(data_path)
24 |     mean = data['normalization'][0][0][0]
25 |     mean_pixel = np.mean(mean, axis=(0, 1))
26 |     weights = data['layers'][0]
27 | 
28 |     net = {}
29 |     current = input_image
30 |     for i, name in enumerate(layers):
31 |         kind = name[:4]
32 |         if kind == 'conv':
33 |             kernels, bias = weights[i][0][0][0][0]
34 |             # matconvnet: weights are [width, height, in_channels, out_channels]
35 |             # tensorflow: weights are [height, width, in_channels, out_channels]
36 |             kernels = np.transpose(kernels, (1, 0, 2, 3))
37 |             bias = bias.reshape(-1)
38 |             current = _conv_layer(current, kernels, bias, name=name)
39 |         elif kind == 'relu':
40 |             current = tf.nn.relu(current, name=name)
41 |         elif kind == 'pool':
42 |             current = _pool_layer(current, name=name)
43 |         net[name] = current
44 | 
45 |     assert len(net) == len(layers)
46 |     return net, mean_pixel
47 | 
48 | 
49 | def _conv_layer(input, weights, bias, name=None):
50 |     conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
51 |                         padding='SAME', name=name)
52 |     return tf.nn.bias_add(conv, bias)
53 | 
54 | 
55 | def _pool_layer(input, name=None):
56 |     return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
57 |                           padding='SAME', name=name)
58 | 
59 | 
60 | def preprocess(image, mean_pixel):
61 |     return image - mean_pixel
62 | 
63 | 
64 | def unprocess(image, mean_pixel):
65 |     return image + mean_pixel


--------------------------------------------------------------------------------
/neural_style/README.md:
--------------------------------------------------------------------------------
 1 | # neural-style
 2 | 
 3 | An implementation of [neural style][paper] in TensorFlow.
 4 | 
 5 | This implementation is a lot simpler than a lot of the other ones out there,
 6 | thanks to TensorFlow's really nice API and [automatic differentiation][ad].
 7 | 
 8 | TensorFlow doesn't support [L-BFGS][l-bfgs] (which is what the original authors
 9 | used), so we use [Adam][adam]. This may require require a little bit more
10 | hyperparameter tuning to get nice results.
11 | 
12 | TensorFlow seems to be [slower][tensorflow-benchmarks] than a lot of the other
13 | deep learning frameworks out there. I'm sure this implementation could be
14 | improved, but it would probably take improvements in TensorFlow itself as well
15 | to get it to operate at the same speed as other implementations. As of now, it
16 | seems to be around 3x slower than implementations using Torch.
17 | 
18 | ## Running
19 | 
20 | `python neural_style.py --content <content file> --styles <style file> --output <output file>`
21 | 
22 | (run `python neural_style.py --help` to see a list of all options)
23 | 
24 | 
25 | ## Requirements
26 | 
27 | * TensorFlow
28 | * SciPy
29 | * Pillow
30 | * NumPy
31 | * [Pre-trained VGG network][net] (MD5 `8ee3263992981a1d26e73b3ca028a123`)
32 | 
33 | ## reference
34 | 
35 | \[neural-style \]: https://github.com/anishathalye/neural-style
36 | 
37 | \[net\]: http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat
38 | 
39 | \[paper\]: http://arxiv.org/pdf/1508.06576v2.pdf
40 | 
41 | \[l-bfgs\]: https://en.wikipedia.org/wiki/Limited-memory_BFGS
42 | 
43 | \[adam\]: http://arxiv.org/abs/1412.6980
44 | 
45 | \[ad\]: https://en.wikipedia.org/wiki/Automatic_differentiation
46 | 
47 | \[tensorflow-benchmarks\]: https://github.com/soumith/convnet-benchmarks
48 | 


--------------------------------------------------------------------------------
/neural_style/neural_style.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | import scipy.misc
  7 | 
  8 | from stylize import stylize
  9 | 
 10 | import math
 11 | from argparse import ArgumentParser
 12 | 
 13 | # default arguments
 14 | CONTENT_WEIGHT = 5e0
 15 | STYLE_WEIGHT = 1e2
 16 | TV_WEIGHT = 1e2
 17 | LEARNING_RATE = 1e1
 18 | STYLE_SCALE = 1.0
 19 | ITERATIONS = 1000
 20 | VGG_PATH = 'imagenet-vgg-verydeep-19.mat'
 21 | 
 22 | 
 23 | def build_parser():
 24 |     parser = ArgumentParser()
 25 |     parser.add_argument('--content',
 26 |             dest='content', help='content image',
 27 |             metavar='CONTENT', required=True)
 28 |     parser.add_argument('--styles',
 29 |             dest='styles',
 30 |             nargs='+', help='one or more style images',
 31 |             metavar='STYLE', required=True)
 32 |     parser.add_argument('--output',
 33 |             dest='output', help='output path',
 34 |             metavar='OUTPUT', required=True)
 35 |     parser.add_argument('--checkpoint-output',
 36 |             dest='checkpoint_output', help='checkpoint output format',
 37 |             metavar='OUTPUT')
 38 |     parser.add_argument('--iterations', type=int,
 39 |             dest='iterations', help='iterations (default %(default)s)',
 40 |             metavar='ITERATIONS', default=ITERATIONS)
 41 |     parser.add_argument('--width', type=int,
 42 |             dest='width', help='output width',
 43 |             metavar='WIDTH')
 44 |     parser.add_argument('--style-scales', type=float,
 45 |             dest='style_scales',
 46 |             nargs='+', help='one or more style scales',
 47 |             metavar='STYLE_SCALE')
 48 |     parser.add_argument('--network',
 49 |             dest='network', help='path to network parameters (default %(default)s)',
 50 |             metavar='VGG_PATH', default=VGG_PATH)
 51 |     parser.add_argument('--content-weight', type=float,
 52 |             dest='content_weight', help='content weight (default %(default)s)',
 53 |             metavar='CONTENT_WEIGHT', default=CONTENT_WEIGHT)
 54 |     parser.add_argument('--style-weight', type=float,
 55 |             dest='style_weight', help='style weight (default %(default)s)',
 56 |             metavar='STYLE_WEIGHT', default=STYLE_WEIGHT)
 57 |     parser.add_argument('--style-blend-weights', type=float,
 58 |             dest='style_blend_weights', help='style blending weights',
 59 |             nargs='+', metavar='STYLE_BLEND_WEIGHT')
 60 |     parser.add_argument('--tv-weight', type=float,
 61 |             dest='tv_weight', help='total variation regularization weight (default %(default)s)',
 62 |             metavar='TV_WEIGHT', default=TV_WEIGHT)
 63 |     parser.add_argument('--learning-rate', type=float,
 64 |             dest='learning_rate', help='learning rate (default %(default)s)',
 65 |             metavar='LEARNING_RATE', default=LEARNING_RATE)
 66 |     parser.add_argument('--initial',
 67 |             dest='initial', help='initial image',
 68 |             metavar='INITIAL')
 69 |     parser.add_argument('--print-iterations', type=int,
 70 |             dest='print_iterations', help='statistics printing frequency',
 71 |             metavar='PRINT_ITERATIONS')
 72 |     parser.add_argument('--checkpoint-iterations', type=int,
 73 |             dest='checkpoint_iterations', help='checkpoint frequency',
 74 |             metavar='CHECKPOINT_ITERATIONS')
 75 |     return parser
 76 | 
 77 | 
 78 | def main():
 79 |     parser = build_parser()
 80 |     options = parser.parse_args()
 81 | 
 82 |     if not os.path.isfile(options.network):
 83 |         parser.error("Network %s does not exist. (Did you forget to download it?)" % options.network)
 84 | 
 85 |     content_image = imread(options.content)
 86 |     style_images = [imread(style) for style in options.styles]
 87 | 
 88 |     width = options.width
 89 |     if width is not None:
 90 |         new_shape = (int(math.floor(float(content_image.shape[0]) /
 91 |                 content_image.shape[1] * width)), width)
 92 |         content_image = scipy.misc.imresize(content_image, new_shape)
 93 |     target_shape = content_image.shape
 94 |     for i in range(len(style_images)):
 95 |         style_scale = STYLE_SCALE
 96 |         if options.style_scales is not None:
 97 |             style_scale = options.style_scales[i]
 98 |         style_images[i] = scipy.misc.imresize(style_images[i], style_scale *
 99 |                 target_shape[1] / style_images[i].shape[1])
100 | 
101 |     style_blend_weights = options.style_blend_weights
102 |     if style_blend_weights is None:
103 |         # default is equal weights
104 |         style_blend_weights = [1.0/len(style_images) for _ in style_images]
105 |     else:
106 |         total_blend_weight = sum(style_blend_weights)
107 |         style_blend_weights = [weight/total_blend_weight
108 |                                for weight in style_blend_weights]
109 | 
110 |     initial = options.initial
111 |     if initial is not None:
112 |         initial = scipy.misc.imresize(imread(initial), content_image.shape[:2])
113 | 
114 |     if options.checkpoint_output and "%s" not in options.checkpoint_output:
115 |         parser.error("To save intermediate images, the checkpoint output "
116 |                      "parameter must contain `%s` (e.g. `foo%s.jpg`)")
117 | 
118 |     for iteration, image in stylize(
119 |         network=options.network,
120 |         initial=initial,
121 |         content=content_image,
122 |         styles=style_images,
123 |         iterations=options.iterations,
124 |         content_weight=options.content_weight,
125 |         style_weight=options.style_weight,
126 |         style_blend_weights=style_blend_weights,
127 |         tv_weight=options.tv_weight,
128 |         learning_rate=options.learning_rate,
129 |         print_iterations=options.print_iterations,
130 |         checkpoint_iterations=options.checkpoint_iterations
131 |     ):
132 |         output_file = None
133 |         if iteration is not None:
134 |             if options.checkpoint_output:
135 |                 output_file = options.checkpoint_output % iteration
136 |         else:
137 |             output_file = options.output
138 |         if output_file:
139 |             imsave(output_file, image)
140 | 
141 | 
142 | def imread(path):
143 |     return scipy.misc.imread(path).astype(np.float)
144 | 
145 | 
146 | def imsave(path, img):
147 |     img = np.clip(img, 0, 255).astype(np.uint8)
148 |     scipy.misc.imsave(path, img)
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     main()
153 | 


--------------------------------------------------------------------------------
/neural_style/stylize.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
  2 | 
  3 | import vgg
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | 
  8 | from sys import stderr
  9 | 
 10 | CONTENT_LAYER = 'relu4_2'
 11 | STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
 12 | 
 13 | 
 14 | try:
 15 |     reduce
 16 | except NameError:
 17 |     from functools import reduce
 18 | 
 19 | 
 20 | def stylize(network, initial, content, styles, iterations,
 21 |         content_weight, style_weight, style_blend_weights, tv_weight,
 22 |         learning_rate, print_iterations=None, checkpoint_iterations=None):
 23 |     """
 24 |     Stylize images.
 25 | 
 26 |     This function yields tuples (iteration, image); `iteration` is None
 27 |     if this is the final image (the last iteration).  Other tuples are yielded
 28 |     every `checkpoint_iterations` iterations.
 29 | 
 30 |     :rtype: iterator[tuple[int|None,image]]
 31 |     """
 32 |     shape = (1,) + content.shape
 33 |     style_shapes = [(1,) + style.shape for style in styles]
 34 |     content_features = {}
 35 |     style_features = [{} for _ in styles]
 36 | 
 37 |     # compute content features in feedforward mode
 38 |     g = tf.Graph()
 39 |     with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
 40 |         image = tf.placeholder('float', shape=shape)
 41 |         net, mean_pixel = vgg.net(network, image)
 42 |         content_pre = np.array([vgg.preprocess(content, mean_pixel)])
 43 |         content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
 44 |                 feed_dict={image: content_pre})
 45 | 
 46 |     # compute style features in feedforward mode
 47 |     for i in range(len(styles)):
 48 |         g = tf.Graph()
 49 |         with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
 50 |             image = tf.placeholder('float', shape=style_shapes[i])
 51 |             net, _ = vgg.net(network, image)
 52 |             style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
 53 |             for layer in STYLE_LAYERS:
 54 |                 features = net[layer].eval(feed_dict={image: style_pre})
 55 |                 features = np.reshape(features, (-1, features.shape[3]))
 56 |                 gram = np.matmul(features.T, features) / features.size
 57 |                 style_features[i][layer] = gram
 58 | 
 59 |     # make stylized image using backpropogation
 60 |     with tf.Graph().as_default():
 61 |         if initial is None:
 62 |             noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
 63 |             initial = tf.random_normal(shape) * 0.256
 64 |         else:
 65 |             initial = np.array([vgg.preprocess(initial, mean_pixel)])
 66 |             initial = initial.astype('float32')
 67 |         image = tf.Variable(initial)
 68 |         net, _ = vgg.net(network, image)
 69 | 
 70 |         # content loss
 71 |         content_loss = content_weight * (2 * tf.nn.l2_loss(
 72 |                 net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
 73 |                 content_features[CONTENT_LAYER].size)
 74 |         # style loss
 75 |         style_loss = 0
 76 |         for i in range(len(styles)):
 77 |             style_losses = []
 78 |             for style_layer in STYLE_LAYERS:
 79 |                 layer = net[style_layer]
 80 |                 _, height, width, number = map(lambda i: i.value, layer.get_shape())
 81 |                 size = height * width * number
 82 |                 feats = tf.reshape(layer, (-1, number))
 83 |                 gram = tf.matmul(tf.transpose(feats), feats) / size
 84 |                 style_gram = style_features[i][style_layer]
 85 |                 style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
 86 |             style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
 87 |         # total variation denoising
 88 |         tv_y_size = _tensor_size(image[:,1:,:,:])
 89 |         tv_x_size = _tensor_size(image[:,:,1:,:])
 90 |         tv_loss = tv_weight * 2 * (
 91 |                 (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
 92 |                     tv_y_size) +
 93 |                 (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
 94 |                     tv_x_size))
 95 |         # overall loss
 96 |         loss = content_loss + style_loss + tv_loss
 97 | 
 98 |         # optimizer setup
 99 |         train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
100 | 
101 |         def print_progress(i, last=False):
102 |             stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
103 |             if last or (print_iterations and i % print_iterations == 0):
104 |                 stderr.write('  content loss: %g\n' % content_loss.eval())
105 |                 stderr.write('    style loss: %g\n' % style_loss.eval())
106 |                 stderr.write('       tv loss: %g\n' % tv_loss.eval())
107 |                 stderr.write('    total loss: %g\n' % loss.eval())
108 | 
109 |         # optimization
110 |         best_loss = float('inf')
111 |         best = None
112 |         with tf.Session() as sess:
113 |             sess.run(tf.initialize_all_variables())
114 |             for i in range(iterations):
115 |                 last_step = (i == iterations - 1)
116 |                 print_progress(i, last=last_step)
117 |                 train_step.run()
118 | 
119 |                 if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
120 |                     this_loss = loss.eval()
121 |                     if this_loss < best_loss:
122 |                         best_loss = this_loss
123 |                         best = image.eval()
124 |                     yield (
125 |                         (None if last_step else i),
126 |                         vgg.unprocess(best.reshape(shape[1:]), mean_pixel)
127 |                     )
128 | 
129 | 
130 | def _tensor_size(tensor):
131 |     from operator import mul
132 |     return reduce(mul, (d.value for d in tensor.get_shape()), 1)
133 | 


--------------------------------------------------------------------------------
/neural_style/vgg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
 2 | 
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | import scipy.io
 6 | 
 7 | 
 8 | def net(data_path, input_image):
 9 |     layers = (
10 |         'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
11 | 
12 |         'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
13 | 
14 |         'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
15 |         'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
16 | 
17 |         'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
18 |         'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
19 | 
20 |         'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
21 |         'relu5_3', 'conv5_4', 'relu5_4'
22 |     )
23 | 
24 |     data = scipy.io.loadmat(data_path)
25 |     mean = data['normalization'][0][0][0]
26 |     mean_pixel = np.mean(mean, axis=(0, 1))
27 |     weights = data['layers'][0]
28 | 
29 |     net = {}
30 |     current = input_image
31 |     for i, name in enumerate(layers):
32 |         kind = name[:4]
33 |         if kind == 'conv':
34 |             kernels, bias = weights[i][0][0][0][0]
35 |             # matconvnet: weights are [width, height, in_channels, out_channels]
36 |             # tensorflow: weights are [height, width, in_channels, out_channels]
37 |             kernels = np.transpose(kernels, (1, 0, 2, 3))
38 |             bias = bias.reshape(-1)
39 |             current = _conv_layer(current, kernels, bias)
40 |         elif kind == 'relu':
41 |             current = tf.nn.relu(current)
42 |         elif kind == 'pool':
43 |             current = _pool_layer(current)
44 |         net[name] = current
45 | 
46 |     assert len(net) == len(layers)
47 |     return net, mean_pixel
48 | 
49 | 
50 | def _conv_layer(input, weights, bias):
51 |     conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
52 |             padding='SAME')
53 |     return tf.nn.bias_add(conv, bias)
54 | 
55 | 
56 | def _pool_layer(input):
57 |     return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
58 |             padding='SAME')
59 | 
60 | 
61 | def preprocess(image, mean_pixel):
62 |     return image - mean_pixel
63 | 
64 | 
65 | def unprocess(image, mean_pixel):
66 |     return image + mean_pixel
67 | 


--------------------------------------------------------------------------------