├── .idea
├── inspectionProfiles
│ └── Project_Default.xml
└── vcs.xml
├── README.md
├── deep_dream_demo.py
├── deep_dream_demo_multiscale.py
├── deep_dream_demo_v2.py
├── deep_dream_laplace_demo.py
├── deep_dream_random_noise_demo.py
├── deepdream.ipynb
└── pilatus800.jpg
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # deepdream
2 |
3 | Based on the work of [Alexander Mordvintsev](mailto:moralex@google.com)
4 |
5 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream
6 |
7 | This directory contains Jupyter notebook that demonstrates a number of Convolutional Neural Network
8 | image generation techniques implemented with TensorFlow:
9 |
10 | - visualizing individual feature channels and their combinations to explore the space of patterns learned by the neural network (see [GoogLeNet](http://storage.googleapis.com/deepdream/visualz/tensorflow_inception/index.html) and [VGG16](http://storage.googleapis.com/deepdream/visualz/vgg16/index.html) galleries)
11 | - embedding TensorBoard graph visualizations into Jupyter notebooks
12 | - producing high-resolution images with tiled computation ([example](http://storage.googleapis.com/deepdream/pilatus_flowers.jpg))
13 | - using Laplacian Pyramid Gradient Normalization to produce smooth and colorful visuals at low cost
14 | - generating DeepDream-like images with TensorFlow
15 |
16 | You can view "deepdream.ipynb" directly on GitHub. Note that GitHub Jupyter notebook preview removes
17 | embedded graph visualizations. You can still see them online
18 | [using nbviewer](http://nbviewer.jupyter.org/github/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/deepdream/deepdream.ipynb)
19 | service.
20 |
21 | In order to run the notebook locally, the following dependencies must be installed:
22 |
23 | - Python 2.7 or 3.5
24 | - TensorFlow (>=r0.7)
25 | - NumPy
26 | - Jupyter Notebook
27 |
28 | To open the notebook, run `ipython notebook` command in this directory, and
29 | select 'deepdream.ipynb' in the opened browser window.
30 |
--------------------------------------------------------------------------------
/deep_dream_demo.py:
--------------------------------------------------------------------------------
1 |
2 | # boilerplate code
3 | from __future__ import print_function
4 | import os
5 | from io import BytesIO
6 | import numpy as np
7 | from functools import partial
8 | import PIL.Image
9 | #from IPython.display import clear_output, Image, display, HTML
10 |
11 |
12 | import tensorflow as tf
13 |
14 | #!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip
15 |
16 | model_fn = 'tensorflow_inception_graph.pb'
17 |
18 | # creating TensorFlow session and loading the model
19 | graph = tf.Graph()
20 | sess = tf.InteractiveSession(graph=graph)
21 | with tf.gfile.FastGFile(model_fn, 'rb') as f:
22 | graph_def = tf.GraphDef()
23 | graph_def.ParseFromString(f.read())
24 | t_input = tf.placeholder(np.float32, name='input') # define the input tensor
25 | imagenet_mean = 117.0
26 | t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
27 | tf.import_graph_def(graph_def, {'input':t_preprocessed})
28 |
29 | layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name]
30 | feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers]
31 |
32 | print('Number of layers', len(layers))
33 | print('Total number of feature channels:', sum(feature_nums))
34 |
35 | layer = 'mixed4d_3x3_bottleneck_pre_relu'
36 | channel = 139 # picking some feature channel to visualize
37 |
38 | # start with a gray image with a little noise
39 | img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
40 |
41 |
42 | def showarray(a, fname, fmt='jpeg'):
43 | a = np.uint8(np.clip(a, 0, 1) * 255)
44 | #f = BytesIO()
45 |
46 | PIL.Image.fromarray(a).save(fname, fmt)
47 | #display(Image(data=f.getvalue()))
48 |
49 |
50 |
51 | def visstd(a, s=0.1):
52 | '''Normalize the image range for visualization'''
53 | return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5
54 |
55 |
56 | def T(layer):
57 | '''Helper for getting layer output tensor'''
58 | return graph.get_tensor_by_name("import/%s:0" % layer)
59 |
60 | def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0):
61 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective. This is mean of a given channel in a tensor layer defined by t_obj
62 | # we want to maaximize this objective
63 |
64 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
65 |
66 | img = img0.copy()
67 | showarray(visstd(img),'./results/result_0')
68 |
69 | act_obj = sess.run(t_obj, {t_input: img_noise})
70 | print('objective tensor size', act_obj.shape)
71 |
72 | for i in range(iter_n):
73 | g, score = sess.run([t_grad, t_score], {t_input: img})
74 | # normalizing the gradient, so the same step size should work
75 | g /= g.std() + 1e-8 # for different layers and networks
76 | img += g * step
77 | print(i, ' ', score)
78 |
79 | fname='./results/result_'+str(i)
80 | showarray(visstd(img),fname)
81 | # clear_output()
82 | showarray(visstd(img),'./results/result_final')
83 |
84 | #render_naive(T(layer)[:, :, :, channel])
85 |
86 | #Multiscale image generation
87 | # We are going to apply gradient ascent on multiple scales.
88 | # Details formed on smaller scale will be upscaled and augmented with additional details on the next scale.
89 |
90 | #With multiscale image generation it may be tempting to set the number of octaves to some high value to produce wallpaper-sized images.
91 | # Storing network activations and backprop values will quickly run out of GPU memory in this case. There is a simple trick to avoid this:
92 | # split the image into smaller tiles and compute each tile gradient independently.
93 | #Applying random shifts to the image before every iteration helps avoid tile seams and improves the overall image quality.
94 |
95 | print('**** Multiscale ****')
96 |
97 | def tffunc(*argtypes):
98 | '''Helper that transforms TF-graph generating function into a regular one.
99 | See "resize" function below.
100 | '''
101 | placeholders = list(map(tf.placeholder, argtypes))
102 | def wrap(f):
103 | out = f(*placeholders)
104 | def wrapper(*args, **kw):
105 | return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
106 | return wrapper
107 | return wrap
108 |
109 | # Helper function that uses TF to resize an image
110 | def resize(img, size):
111 | img = tf.expand_dims(img, 0)
112 | return tf.image.resize_bilinear(img, size)[0,:,:,:]
113 | resize = tffunc(np.float32, np.int32)(resize)
114 |
115 |
116 | def calc_grad_tiled(img, t_grad, t_score, t_obj, tile_size=512):
117 | '''Compute the value of tensor t_grad over the image in a tiled way.
118 | Random shifts are applied to the image to blur tile boundaries over
119 | multiple iterations.'''
120 | sz = tile_size
121 | #print('tile size', tile_size)
122 |
123 | h, w = img.shape[:2]
124 | sx, sy = np.random.randint(sz, size=2)
125 | img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
126 | grad = np.zeros_like(img)
127 |
128 | y=0
129 | x=0
130 | sub = img_shift[y:y + sz, x:x + sz]
131 | act_obj = sess.run(t_obj, {t_input: sub})
132 | print('objective tensor size', act_obj.shape)
133 |
134 | for y in range(0, max(h-sz//2, sz),sz):
135 | for x in range(0, max(w-sz//2, sz),sz):
136 | sub = img_shift[y:y+sz,x:x+sz]
137 | g, score = sess.run([t_grad,t_score ], {t_input: sub})
138 | #score = sess.run(t_score, {input: sub})
139 | grad[y:y+sz,x:x+sz] = g
140 | #print('x:',x,'y:',y)
141 |
142 |
143 | #print('score: ', score)
144 |
145 |
146 |
147 | return np.roll(np.roll(grad, -sx, 1), -sy, 0)
148 |
149 |
150 | def render_multiscale(t_obj, img0=img_noise, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4):
151 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
152 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
153 |
154 | img = img0.copy()
155 | for octave in range(octave_n):
156 | if octave > 0:
157 | hw = np.float32(img.shape[:2]) * octave_scale
158 | img = resize(img, np.int32(hw))
159 | for i in range(iter_n):
160 | g = calc_grad_tiled(img, t_grad, t_score, t_obj)
161 | # normalizing the gradient, so the same step size should work
162 | g /= g.std() + 1e-8 # for different layers and networks
163 | img += g * step
164 | print('o: ' ,octave,'i: ',i, 'size:', g.shape, end=' ')
165 | # clear_output()
166 |
167 |
168 | fname = './results/multi_scale_result_' + str(i)+ '_'+str(octave)
169 | showarray(visstd(img), fname)
170 |
171 |
172 | #render_multiscale(T(layer)[:, :, :, channel])
173 |
174 |
175 | #************************************************************
176 | # deap dream
177 | #**************************************************************
178 |
179 | print('**** deep dream ****')
180 |
181 | def render_deepdream(t_obj, img0=img_noise,
182 | iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
183 |
184 | print('calculating score and grad functions...')
185 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
186 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
187 | print('done!')
188 |
189 | # split the image into a number of octaves
190 | print('splitting the image into octaves...')
191 | img = img0
192 | octaves = []
193 | for i in range(octave_n - 1):
194 | print('octave i:', i)
195 |
196 | hw = img.shape[:2]
197 | print('calculate lo',i)
198 | lo = resize(img, np.int32(np.float32(hw) / octave_scale))
199 |
200 | print('calculate hi', i)
201 | hi = img - resize(lo, hw)
202 | img = lo
203 |
204 | octaves.append(hi)
205 | print('save hi and lo results', i)
206 | #fname = './results/deepdream_octaves_hi' + str(i)
207 | #showarray(visstd(hi), fname)
208 | #fname = './results/deepdream_octaves_lo' + str(i)
209 | #showarray(visstd(lo), fname)
210 | print('done!')
211 |
212 |
213 | print('generate details octave by octave...')
214 | # generate details octave by octave
215 | for octave in range(octave_n):
216 | print('octave:',octave)
217 | if octave > 0:
218 | print('resize image:', octave)
219 | hi = octaves[-octave]
220 | img = resize(img, hi.shape[:2]) + hi
221 | for i in range(iter_n):
222 | print('calculate grad iter:', i, 'octave:', octave)
223 | g = calc_grad_tiled(img, t_grad,t_score, t_obj)
224 | img += g * (step / (np.abs(g).mean() + 1e-7))
225 |
226 | # clear_output()
227 |
228 |
229 |
230 | #if (i%4 ==1):
231 | fname = './results/deep_dream_result_oct_'+str(octave)+'_iter_' + str(i)
232 | showarray(img / 255.0, fname)
233 | print('done!')
234 |
235 | #img0 = PIL.Image.open('./source_images/pilatus800.jpg')
236 | #img0 = PIL.Image.open('mmd_22000_01_9000.png')
237 | img0 = PIL.Image.open('./source_images/0016_9000.png')
238 | img0 = np.float32(img0)
239 | #showarray(img0/255.0)
240 | #render_deepdream(tf.square(T('mixed4c')), img0)
241 | print('image read')
242 |
243 | #render_deepdream(T(layer)[:,:,:,139], img0, iter_n=20, step=1.5, octave_n=4)
244 | #render_deepdream(tf.square(T('mixed4c')), img0, iter_n=20, octave_n=10)
245 | render_deepdream(T(layer)[:,:,:,139], img0, iter_n=20, octave_n=19)
246 | #render_deepdream(T(layer)[:,:,:,65]+T(layer)[:,:,:,139], img0, iter_n=20, octave_n=12)
247 |
--------------------------------------------------------------------------------
/deep_dream_demo_multiscale.py:
--------------------------------------------------------------------------------
1 |
2 | # boilerplate code
3 | from __future__ import print_function
4 | import os
5 | from io import BytesIO
6 | import numpy as np
7 | from functools import partial
8 | import PIL.Image
9 | #from IPython.display import clear_output, Image, display, HTML
10 |
11 |
12 | import tensorflow as tf
13 |
14 | #!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip
15 |
16 | model_fn = 'tensorflow_inception_graph.pb'
17 |
18 | # creating TensorFlow session and loading the model
19 | graph = tf.Graph()
20 | sess = tf.InteractiveSession(graph=graph)
21 | with tf.gfile.FastGFile(model_fn, 'rb') as f:
22 | graph_def = tf.GraphDef()
23 | graph_def.ParseFromString(f.read())
24 | t_input = tf.placeholder(np.float32, name='input') # define the input tensor
25 | imagenet_mean = 117.0
26 | t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
27 | tf.import_graph_def(graph_def, {'input':t_preprocessed})
28 |
29 | layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name]
30 | feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers]
31 |
32 | print('Number of layers', len(layers))
33 | print('Total number of feature channels:', sum(feature_nums))
34 |
35 |
36 | #***************************************************
37 |
38 | # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity
39 | # to have non-zero gradients for features with negative initial activations.
40 | layer = 'mixed4d_3x3_bottleneck_pre_relu'
41 | channel = 139 # picking some feature channel to visualize
42 |
43 | # start with a gray image with a little noise
44 | img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
45 |
46 |
47 | def showarray(a, fname, fmt='jpeg'):
48 | a = np.uint8(np.clip(a, 0, 1) * 255)
49 | #f = BytesIO()
50 |
51 | PIL.Image.fromarray(a).save(fname, fmt)
52 | #display(Image(data=f.getvalue()))
53 |
54 |
55 |
56 | def visstd(a, s=0.1):
57 | '''Normalize the image range for visualization'''
58 | return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5
59 |
60 |
61 | def T(layer):
62 | '''Helper for getting layer output tensor'''
63 | return graph.get_tensor_by_name("import/%s:0" % layer)
64 |
65 |
66 | def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0):
67 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective. This is mean of a given channel in a tensor layer defined by t_obj
68 | # we want to maaximize this objective
69 |
70 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
71 |
72 | img = img0.copy()
73 | showarray(visstd(img),'./results/result_0')
74 |
75 | act_obj = sess.run(t_obj, {t_input: img_noise})
76 | print('objective tensor size', act_obj.shape)
77 |
78 | for i in range(iter_n):
79 | g, score = sess.run([t_grad, t_score], {t_input: img})
80 | # normalizing the gradient, so the same step size should work
81 | g /= g.std() + 1e-8 # for different layers and networks
82 | img += g * step
83 | print(i, ' ', score)
84 |
85 | fname='./results/result_'+str(i)
86 | showarray(visstd(img),fname)
87 | # clear_output()
88 | showarray(visstd(img),'./results/result_final')
89 |
90 | render_naive(T(layer)[:, :, :, channel])
91 |
92 | #***************************************************
93 |
94 |
95 | #***************************************************
96 |
97 | #Multiscale image generation
98 | # We are going to apply gradient ascent on multiple scales.
99 | # Details formed on smaller scale will be upscaled and augmented with additional details on the next scale.
100 |
101 | #With multiscale image generation it may be tempting to set the number of octaves to some high value to produce wallpaper-sized images.
102 | # Storing network activations and backprop values will quickly run out of GPU memory in this case. There is a simple trick to avoid this:
103 | # split the image into smaller tiles and compute each tile gradient independently.
104 | #Applying random shifts to the image before every iteration helps avoid tile seams and improves the overall image quality.
105 |
106 | print('**** Multiscale ****')
107 |
108 | def tffunc(*argtypes):
109 | '''Helper that transforms TF-graph generating function into a regular one.
110 | See "resize" function below.
111 | '''
112 | placeholders = list(map(tf.placeholder, argtypes))
113 | def wrap(f):
114 | out = f(*placeholders)
115 | def wrapper(*args, **kw):
116 | return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
117 | return wrapper
118 | return wrap
119 |
120 | # Helper function that uses TF to resize an image
121 | def resize(img, size):
122 | img = tf.expand_dims(img, 0)
123 | return tf.image.resize_bilinear(img, size)[0,:,:,:]
124 | resize = tffunc(np.float32, np.int32)(resize)
125 |
126 |
127 | def calc_grad_tiled(img, t_grad, t_obj, tile_size=512):
128 | '''Compute the value of tensor t_grad over the image in a tiled way.
129 | Random shifts are applied to the image to blur tile boundaries over
130 | multiple iterations.'''
131 | sz = tile_size
132 | print('tile size', tile_size)
133 |
134 | h, w = img.shape[:2]
135 | sx, sy = np.random.randint(sz, size=2)
136 | img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
137 | grad = np.zeros_like(img)
138 |
139 | y=0
140 | x=0
141 | sub = img_shift[y:y + sz, x:x + sz]
142 | act_obj = sess.run(t_obj, {t_input: sub})
143 | print('objective tensor size', act_obj.shape)
144 |
145 | for y in range(0, max(h-sz//2, sz),sz):
146 | for x in range(0, max(w-sz//2, sz),sz):
147 | sub = img_shift[y:y+sz,x:x+sz]
148 |
149 | g = sess.run(t_grad, {t_input:sub})
150 | grad[y:y+sz,x:x+sz] = g
151 | print('x:',x,'y:',y)
152 | return np.roll(np.roll(grad, -sx, 1), -sy, 0)
153 |
154 |
155 | def render_multiscale(t_obj, img0=img_noise, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4):
156 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
157 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
158 |
159 | img = img0.copy()
160 | for octave in range(octave_n):
161 | if octave > 0:
162 | hw = np.float32(img.shape[:2]) * octave_scale
163 | img = resize(img, np.int32(hw))
164 | for i in range(iter_n):
165 | g = calc_grad_tiled(img, t_grad,t_obj)
166 | # normalizing the gradient, so the same step size should work
167 | g /= g.std() + 1e-8 # for different layers and networks
168 | img += g * step
169 | print('o: ' ,octave,'i: ',i, 'size:', g.shape, end=' ')
170 | # clear_output()
171 |
172 |
173 | fname = './results/multi_scale_result_' + str(i)+ '_'+str(octave)
174 | showarray(visstd(img), fname)
175 |
176 |
177 | render_multiscale(T(layer)[:, :, :, channel])
178 |
179 | #***************************************************
180 |
181 |
182 |
--------------------------------------------------------------------------------
/deep_dream_demo_v2.py:
--------------------------------------------------------------------------------
1 |
2 | # boilerplate code
3 | from __future__ import print_function
4 | import os
5 | from io import BytesIO
6 | import numpy as np
7 | from functools import partial
8 | import PIL.Image
9 | #from IPython.display import clear_output, Image, display, HTML
10 |
11 |
12 | import tensorflow as tf
13 |
14 | #!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip
15 |
16 | model_fn = 'tensorflow_inception_graph.pb'
17 |
18 | # creating TensorFlow session and loading the model
19 | graph = tf.Graph()
20 | sess = tf.InteractiveSession(graph=graph)
21 | with tf.gfile.FastGFile(model_fn, 'rb') as f:
22 | graph_def = tf.GraphDef()
23 | graph_def.ParseFromString(f.read())
24 | t_input = tf.placeholder(np.float32, name='input') # define the input tensor
25 | imagenet_mean = 117.0
26 | t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
27 | tf.import_graph_def(graph_def, {'input':t_preprocessed})
28 |
29 | layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name]
30 | feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers]
31 |
32 | print('Number of layers', len(layers))
33 | print('Total number of feature channels:', sum(feature_nums))
34 |
35 |
36 | #***************************************************
37 |
38 | # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity
39 | # to have non-zero gradients for features with negative initial activations.
40 | layer = 'mixed4d_3x3_bottleneck_pre_relu'
41 | channel = 139 # picking some feature channel to visualize
42 |
43 | # start with a gray image with a little noise
44 | img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
45 |
46 |
47 | def showarray(a, fname, fmt='jpeg'):
48 | a = np.uint8(np.clip(a, 0, 1) * 255)
49 | #f = BytesIO()
50 |
51 | PIL.Image.fromarray(a).save(fname, fmt)
52 | #display(Image(data=f.getvalue()))
53 |
54 |
55 |
56 | def visstd(a, s=0.1):
57 | '''Normalize the image range for visualization'''
58 | return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5
59 |
60 |
61 | def T(layer):
62 | '''Helper for getting layer output tensor'''
63 | return graph.get_tensor_by_name("import/%s:0" % layer)
64 |
65 |
66 | def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0):
67 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective. This is mean of a given channel in a tensor layer defined by t_obj
68 | # we want to maaximize this objective
69 |
70 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
71 |
72 | img = img0.copy()
73 | showarray(visstd(img),'./results/result_0')
74 |
75 | act_obj = sess.run(t_obj, {t_input: img_noise})
76 | print('objective tensor size', act_obj.shape)
77 |
78 | for i in range(iter_n):
79 | g, score = sess.run([t_grad, t_score], {t_input: img})
80 | # normalizing the gradient, so the same step size should work
81 | g /= g.std() + 1e-8 # for different layers and networks
82 | img += g * step
83 | print(i, ' ', score)
84 |
85 | fname='./results/result_'+str(i)
86 | showarray(visstd(img),fname)
87 | # clear_output()
88 | showarray(visstd(img),'./results/result_final')
89 |
90 | render_naive(T(layer)[:, :, :, channel])
91 |
92 | #***************************************************
93 |
94 |
95 | #***************************************************
96 |
97 | #Multiscale image generation
98 | # We are going to apply gradient ascent on multiple scales.
99 | # Details formed on smaller scale will be upscaled and augmented with additional details on the next scale.
100 |
101 | #With multiscale image generation it may be tempting to set the number of octaves to some high value to produce wallpaper-sized images.
102 | # Storing network activations and backprop values will quickly run out of GPU memory in this case. There is a simple trick to avoid this:
103 | # split the image into smaller tiles and compute each tile gradient independently.
104 | #Applying random shifts to the image before every iteration helps avoid tile seams and improves the overall image quality.
105 |
106 | print('**** Multiscale ****')
107 |
108 | def tffunc(*argtypes):
109 | '''Helper that transforms TF-graph generating function into a regular one.
110 | See "resize" function below.
111 | '''
112 | placeholders = list(map(tf.placeholder, argtypes))
113 | def wrap(f):
114 | out = f(*placeholders)
115 | def wrapper(*args, **kw):
116 | return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
117 | return wrapper
118 | return wrap
119 |
120 | # Helper function that uses TF to resize an image
121 | def resize(img, size):
122 | img = tf.expand_dims(img, 0)
123 | return tf.image.resize_bilinear(img, size)[0,:,:,:]
124 | resize = tffunc(np.float32, np.int32)(resize)
125 |
126 |
127 | def calc_grad_tiled(img, t_grad, t_score, t_obj, tile_size=512):
128 | '''Compute the value of tensor t_grad over the image in a tiled way.
129 | Random shifts are applied to the image to blur tile boundaries over
130 | multiple iterations.'''
131 | sz = tile_size
132 | #print('tile size', tile_size)
133 |
134 | h, w = img.shape[:2]
135 | sx, sy = np.random.randint(sz, size=2)
136 | img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
137 | grad = np.zeros_like(img)
138 |
139 | y=0
140 | x=0
141 | sub = img_shift[y:y + sz, x:x + sz]
142 | act_obj = sess.run(t_obj, {t_input: sub})
143 | print('objective tensor size', act_obj.shape)
144 |
145 | for y in range(0, max(h-sz//2, sz),sz):
146 | for x in range(0, max(w-sz//2, sz),sz):
147 | sub = img_shift[y:y+sz,x:x+sz]
148 | g, score = sess.run([t_grad,t_score ], {t_input: sub})
149 | #score = sess.run(t_score, {input: sub})
150 | grad[y:y+sz,x:x+sz] = g
151 | #print('x:',x,'y:',y)
152 |
153 |
154 | print('score: ', score)
155 |
156 |
157 |
158 | return np.roll(np.roll(grad, -sx, 1), -sy, 0)
159 |
160 |
161 | def render_multiscale(t_obj, img0=img_noise, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4):
162 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
163 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
164 |
165 | img = img0.copy()
166 | for octave in range(octave_n):
167 | if octave > 0:
168 | hw = np.float32(img.shape[:2]) * octave_scale
169 | img = resize(img, np.int32(hw))
170 | for i in range(iter_n):
171 | g = calc_grad_tiled(img, t_grad, t_score, t_obj)
172 | # normalizing the gradient, so the same step size should work
173 | g /= g.std() + 1e-8 # for different layers and networks
174 | img += g * step
175 | print('o: ' ,octave,'i: ',i, 'size:', g.shape, end=' ')
176 | # clear_output()
177 |
178 |
179 | fname = './results/multi_scale_result_' + str(i)+ '_'+str(octave)
180 | showarray(visstd(img), fname)
181 |
182 |
183 | render_multiscale(T(layer)[:, :, :, channel])
184 |
185 | #***************************************************
186 | #Laplacian Pyramid Gradient Normalization
187 | #This looks better, but the resulting images mostly contain high frequencies. Can we improve it?
188 | # One way is to add a
189 | # smoothness prior into the optimization objective. This will effectively blur the image a little every iteration, suppressing
190 | # the higher frequencies, so that the lower frequencies can catch up.
191 | #This will require more iterations to produce a nice image. Why don't we just boost lower frequencies of the gradient instead?
192 | # ' One way to achieve this is through the Laplacian pyramid decomposition. We call the resulting technique Laplacian Pyramid Gradient Normalization.
193 | #*******************************************
194 |
195 | print('**** Laplace ***')
196 |
197 | k = np.float32([1,4,6,4,1])
198 | k = np.outer(k, k)
199 | k5x5 = k[:,:,None,None]/k.sum()*np.eye(3, dtype=np.float32)
200 |
201 | def lap_split(img):
202 | '''Split the image into lo and hi frequency components'''
203 | with tf.name_scope('split'):
204 | lo = tf.nn.conv2d(img, k5x5, [1,2,2,1], 'SAME')
205 | lo2 = tf.nn.conv2d_transpose(lo, k5x5*4, tf.shape(img), [1,2,2,1])
206 | hi = img-lo2
207 | return lo, hi
208 |
209 | def lap_split_n(img, n):
210 | '''Build Laplacian pyramid with n splits'''
211 | levels = []
212 | for i in range(n):
213 | img, hi = lap_split(img)
214 | levels.append(hi)
215 | levels.append(img)
216 | return levels[::-1]
217 |
218 | def lap_merge(levels):
219 | '''Merge Laplacian pyramid'''
220 | img = levels[0]
221 | for hi in levels[1:]:
222 | with tf.name_scope('merge'):
223 | img = tf.nn.conv2d_transpose(img, k5x5*4, tf.shape(hi), [1,2,2,1]) + hi
224 | return img
225 |
226 | def normalize_std(img, eps=1e-10):
227 | '''Normalize image by making its standard deviation = 1.0'''
228 | with tf.name_scope('normalize'):
229 | std = tf.sqrt(tf.reduce_mean(tf.square(img)))
230 | return img/tf.maximum(std, eps)
231 |
232 | def lap_normalize(img, scale_n=4):
233 | '''Perform the Laplacian pyramid normalization.'''
234 | img = tf.expand_dims(img,0)
235 | tlevels = lap_split_n(img, scale_n)
236 | tlevels = list(map(normalize_std, tlevels))
237 | out = lap_merge(tlevels)
238 | return out[0,:,:,:]
239 |
240 | def render_lapnorm(t_obj, img0=img_noise, visfunc=visstd,
241 | iter_n=10, step=1.0, octave_n=3, octave_scale=1.4, lap_n=4):
242 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
243 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
244 | # build the laplacian normalization graph
245 | lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n))
246 |
247 | img = img0.copy()
248 | for octave in range(octave_n):
249 | if octave>0:
250 | hw = np.float32(img.shape[:2])*octave_scale
251 | img = resize(img, np.int32(hw))
252 | for i in range(iter_n):
253 | g = calc_grad_tiled(img, t_grad, t_score, t_obj)
254 | g = lap_norm_func(g)
255 | img += g*step
256 | #print('.', end = ' ')
257 | print('o: ', octave, 'i: ', i, 'size:', g.shape, end=' ')
258 |
259 | fname = './results/laplace_result_' + str(i) + '_' + str(octave)
260 | showarray(visstd(img), fname)
261 |
262 |
263 | render_lapnorm(T(layer)[:,:,:,channel])
264 |
265 |
266 |
267 |
268 | #************************************************************
269 | # deap dream
270 | #**************************************************************
271 |
272 | print('**** deep dream ****')
273 |
274 | def render_deepdream(t_obj, img0=img_noise,
275 | iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
276 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
277 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
278 |
279 | # split the image into a number of octaves
280 | img = img0
281 | octaves = []
282 | for i in range(octave_n - 1):
283 | hw = img.shape[:2]
284 | lo = resize(img, np.int32(np.float32(hw) / octave_scale))
285 | hi = img - resize(lo, hw)
286 | img = lo
287 | octaves.append(hi)
288 |
289 | # generate details octave by octave
290 | for octave in range(octave_n):
291 | if octave > 0:
292 | hi = octaves[-octave]
293 | img = resize(img, hi.shape[:2]) + hi
294 | for i in range(iter_n):
295 | g = calc_grad_tiled(img, t_grad,t_score, t_obj)
296 | img += g * (step / (np.abs(g).mean() + 1e-7))
297 | print('.', end=' ')
298 | # clear_output()
299 |
300 | fname = './results/deep_dream_result_' + str(i) + '_' + str(octave)
301 | showarray(img / 255.0, fname)
302 |
303 |
304 |
305 | #img0 = PIL.Image.open('pilatus800.jpg')
306 | img0 = PIL.Image.open('mmd_22000_01_9000.png')
307 |
308 | img0 = np.float32(img0)
309 | #showarray(img0/255.0)
310 | #render_deepdream(tf.square(T('mixed4c')), img0)
311 |
312 | render_deepdream(T(layer)[:,:,:,139], img0, iter_n=20, step=5, octave_n=4)
--------------------------------------------------------------------------------
/deep_dream_laplace_demo.py:
--------------------------------------------------------------------------------
1 |
2 | # boilerplate code
3 | from __future__ import print_function
4 | import os
5 | from io import BytesIO
6 | import numpy as np
7 | from functools import partial
8 | import PIL.Image
9 | #from IPython.display import clear_output, Image, display, HTML
10 |
11 |
12 | import tensorflow as tf
13 |
14 | #!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip
15 |
16 | model_fn = 'tensorflow_inception_graph.pb'
17 |
18 | # creating TensorFlow session and loading the model
19 | graph = tf.Graph()
20 | sess = tf.InteractiveSession(graph=graph)
21 | with tf.gfile.FastGFile(model_fn, 'rb') as f:
22 | graph_def = tf.GraphDef()
23 | graph_def.ParseFromString(f.read())
24 | t_input = tf.placeholder(np.float32, name='input') # define the input tensor
25 | imagenet_mean = 117.0
26 | t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
27 | tf.import_graph_def(graph_def, {'input':t_preprocessed})
28 |
29 | layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name]
30 | feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers]
31 |
32 | print('Number of layers', len(layers))
33 | print('Total number of feature channels:', sum(feature_nums))
34 |
35 |
36 | #***************************************************
37 |
38 | # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity
39 | # to have non-zero gradients for features with negative initial activations.
40 | layer = 'mixed4d_3x3_bottleneck_pre_relu'
41 | channel = 139 # picking some feature channel to visualize
42 |
43 | # start with a gray image with a little noise
44 | img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
45 |
46 |
47 | def showarray(a, fname, fmt='jpeg'):
48 | a = np.uint8(np.clip(a, 0, 1) * 255)
49 | #f = BytesIO()
50 |
51 | PIL.Image.fromarray(a).save(fname, fmt)
52 | #display(Image(data=f.getvalue()))
53 |
54 |
55 |
56 | def visstd(a, s=0.1):
57 | '''Normalize the image range for visualization'''
58 | return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5
59 |
60 |
61 | def T(layer):
62 | '''Helper for getting layer output tensor'''
63 | return graph.get_tensor_by_name("import/%s:0" % layer)
64 |
65 |
66 | def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0):
67 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective. This is mean of a given channel in a tensor layer defined by t_obj
68 | # we want to maaximize this objective
69 |
70 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
71 |
72 | img = img0.copy()
73 | showarray(visstd(img),'./results/result_0')
74 |
75 | act_obj = sess.run(t_obj, {t_input: img_noise})
76 | print('objective tensor size', act_obj.shape)
77 |
78 | for i in range(iter_n):
79 | g, score = sess.run([t_grad, t_score], {t_input: img})
80 | # normalizing the gradient, so the same step size should work
81 | g /= g.std() + 1e-8 # for different layers and networks
82 | img += g * step
83 | print(i, ' ', score)
84 |
85 | fname='./results/result_'+str(i)
86 | showarray(visstd(img),fname)
87 | # clear_output()
88 | showarray(visstd(img),'./results/result_final')
89 |
90 | render_naive(T(layer)[:, :, :, channel])
91 |
92 | #***************************************************
93 |
94 |
95 | #***************************************************
96 |
97 | #Multiscale image generation
98 | # We are going to apply gradient ascent on multiple scales.
99 | # Details formed on smaller scale will be upscaled and augmented with additional details on the next scale.
100 |
101 | #With multiscale image generation it may be tempting to set the number of octaves to some high value to produce wallpaper-sized images.
102 | # Storing network activations and backprop values will quickly run out of GPU memory in this case. There is a simple trick to avoid this:
103 | # split the image into smaller tiles and compute each tile gradient independently.
104 | #Applying random shifts to the image before every iteration helps avoid tile seams and improves the overall image quality.
105 |
106 | print('**** Multiscale ****')
107 |
108 | def tffunc(*argtypes):
109 | '''Helper that transforms TF-graph generating function into a regular one.
110 | See "resize" function below.
111 | '''
112 | placeholders = list(map(tf.placeholder, argtypes))
113 | def wrap(f):
114 | out = f(*placeholders)
115 | def wrapper(*args, **kw):
116 | return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
117 | return wrapper
118 | return wrap
119 |
120 | # Helper function that uses TF to resize an image
121 | def resize(img, size):
122 | img = tf.expand_dims(img, 0)
123 | return tf.image.resize_bilinear(img, size)[0,:,:,:]
124 | resize = tffunc(np.float32, np.int32)(resize)
125 |
126 |
127 | def calc_grad_tiled(img, t_grad, t_obj, tile_size=512):
128 | '''Compute the value of tensor t_grad over the image in a tiled way.
129 | Random shifts are applied to the image to blur tile boundaries over
130 | multiple iterations.'''
131 | sz = tile_size
132 | print('tile size', tile_size)
133 |
134 | h, w = img.shape[:2]
135 | sx, sy = np.random.randint(sz, size=2)
136 | img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
137 | grad = np.zeros_like(img)
138 |
139 | y=0
140 | x=0
141 | sub = img_shift[y:y + sz, x:x + sz]
142 | act_obj = sess.run(t_obj, {t_input: sub})
143 | print('objective tensor size', act_obj.shape)
144 |
145 | for y in range(0, max(h-sz//2, sz),sz):
146 | for x in range(0, max(w-sz//2, sz),sz):
147 | sub = img_shift[y:y+sz,x:x+sz]
148 |
149 | g = sess.run(t_grad, {t_input:sub})
150 | grad[y:y+sz,x:x+sz] = g
151 | print('x:',x,'y:',y)
152 | return np.roll(np.roll(grad, -sx, 1), -sy, 0)
153 |
154 |
155 | def render_multiscale(t_obj, img0=img_noise, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4):
156 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
157 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
158 |
159 | img = img0.copy()
160 | for octave in range(octave_n):
161 | if octave > 0:
162 | hw = np.float32(img.shape[:2]) * octave_scale
163 | img = resize(img, np.int32(hw))
164 | for i in range(iter_n):
165 | g = calc_grad_tiled(img, t_grad,t_obj)
166 | # normalizing the gradient, so the same step size should work
167 | g /= g.std() + 1e-8 # for different layers and networks
168 | img += g * step
169 | print('o: ' ,octave,'i: ',i, 'size:', g.shape, end=' ')
170 | # clear_output()
171 |
172 |
173 | fname = './results/multi_scale_result_' + str(i)+ '_'+str(octave)
174 | showarray(visstd(img), fname)
175 |
176 |
177 | render_multiscale(T(layer)[:, :, :, channel])
178 |
179 | #***************************************************
180 | #Laplacian Pyramid Gradient Normalization
181 | #This looks better, but the resulting images mostly contain high frequencies. Can we improve it?
182 | # One way is to add a
183 | # smoothness prior into the optimization objective. This will effectively blur the image a little every iteration, suppressing
184 | # the higher frequencies, so that the lower frequencies can catch up.
185 | #This will require more iterations to produce a nice image. Why don't we just boost lower frequencies of the gradient instead?
186 | # ' One way to achieve this is through the Laplacian pyramid decomposition. We call the resulting technique Laplacian Pyramid Gradient Normalization.
187 | #*******************************************
188 |
189 | print('**** Laplace ***')
190 |
191 | k = np.float32([1,4,6,4,1])
192 | k = np.outer(k, k)
193 | k5x5 = k[:,:,None,None]/k.sum()*np.eye(3, dtype=np.float32)
194 |
195 | def lap_split(img):
196 | '''Split the image into lo and hi frequency components'''
197 | with tf.name_scope('split'):
198 | lo = tf.nn.conv2d(img, k5x5, [1,2,2,1], 'SAME')
199 | lo2 = tf.nn.conv2d_transpose(lo, k5x5*4, tf.shape(img), [1,2,2,1])
200 | hi = img-lo2
201 | return lo, hi
202 |
203 | def lap_split_n(img, n):
204 | '''Build Laplacian pyramid with n splits'''
205 | levels = []
206 | for i in range(n):
207 | img, hi = lap_split(img)
208 | levels.append(hi)
209 | levels.append(img)
210 | return levels[::-1]
211 |
212 | def lap_merge(levels):
213 | '''Merge Laplacian pyramid'''
214 | img = levels[0]
215 | for hi in levels[1:]:
216 | with tf.name_scope('merge'):
217 | img = tf.nn.conv2d_transpose(img, k5x5*4, tf.shape(hi), [1,2,2,1]) + hi
218 | return img
219 |
220 | def normalize_std(img, eps=1e-10):
221 | '''Normalize image by making its standard deviation = 1.0'''
222 | with tf.name_scope('normalize'):
223 | std = tf.sqrt(tf.reduce_mean(tf.square(img)))
224 | return img/tf.maximum(std, eps)
225 |
226 | def lap_normalize(img, scale_n=4):
227 | '''Perform the Laplacian pyramid normalization.'''
228 | img = tf.expand_dims(img,0)
229 | tlevels = lap_split_n(img, scale_n)
230 | tlevels = list(map(normalize_std, tlevels))
231 | out = lap_merge(tlevels)
232 | return out[0,:,:,:]
233 |
234 | def render_lapnorm(t_obj, img0=img_noise, visfunc=visstd,
235 | iter_n=10, step=1.0, octave_n=3, octave_scale=1.4, lap_n=4):
236 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective
237 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
238 | # build the laplacian normalization graph
239 | lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n))
240 |
241 | img = img0.copy()
242 | for octave in range(octave_n):
243 | if octave>0:
244 | hw = np.float32(img.shape[:2])*octave_scale
245 | img = resize(img, np.int32(hw))
246 | for i in range(iter_n):
247 | g = calc_grad_tiled(img, t_grad,t_obj)
248 | g = lap_norm_func(g)
249 | img += g*step
250 | print('.', end = ' ')
251 | fname = './results/laplace_result_' + str(i) + '_' + str(octave)
252 | showarray(visstd(img), fname)
253 |
254 |
255 | render_lapnorm(T(layer)[:,:,:,channel])
--------------------------------------------------------------------------------
/deep_dream_random_noise_demo.py:
--------------------------------------------------------------------------------
1 |
2 | # boilerplate code
3 | from __future__ import print_function
4 | import os
5 | from io import BytesIO
6 | import numpy as np
7 | from functools import partial
8 | import PIL.Image
9 | #from IPython.display import clear_output, Image, display, HTML
10 |
11 |
12 | import tensorflow as tf
13 |
14 | #!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip
15 |
16 | model_fn = 'tensorflow_inception_graph.pb'
17 |
18 | # creating TensorFlow session and loading the model
19 | graph = tf.Graph()
20 | sess = tf.InteractiveSession(graph=graph)
21 | with tf.gfile.FastGFile(model_fn, 'rb') as f:
22 | graph_def = tf.GraphDef()
23 | graph_def.ParseFromString(f.read())
24 | t_input = tf.placeholder(np.float32, name='input') # define the input tensor
25 | imagenet_mean = 117.0
26 | t_preprocessed = tf.expand_dims(t_input-imagenet_mean, 0)
27 | tf.import_graph_def(graph_def, {'input':t_preprocessed})
28 |
29 | layers = [op.name for op in graph.get_operations() if op.type=='Conv2D' and 'import/' in op.name]
30 | feature_nums = [int(graph.get_tensor_by_name(name+':0').get_shape()[-1]) for name in layers]
31 |
32 | print('Number of layers', len(layers))
33 | print('Total number of feature channels:', sum(feature_nums))
34 |
35 |
36 | #***************************************************
37 |
38 | # Picking some internal layer. Note that we use outputs before applying the ReLU nonlinearity
39 | # to have non-zero gradients for features with negative initial activations.
40 | layer = 'mixed4d_3x3_bottleneck_pre_relu'
41 | channel = 139 # picking some feature channel to visualize
42 |
43 | # start with a gray image with a little noise
44 | img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
45 |
46 |
47 | def showarray(a, fname, fmt='jpeg'):
48 | a = np.uint8(np.clip(a, 0, 1) * 255)
49 | #f = BytesIO()
50 |
51 | PIL.Image.fromarray(a).save(fname, fmt)
52 | #display(Image(data=f.getvalue()))
53 |
54 |
55 |
56 | def visstd(a, s=0.1):
57 | '''Normalize the image range for visualization'''
58 | return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5
59 |
60 |
61 | def T(layer):
62 | '''Helper for getting layer output tensor'''
63 | return graph.get_tensor_by_name("import/%s:0" % layer)
64 |
65 |
66 | def render_naive(t_obj, img0=img_noise, iter_n=20, step=1.0):
67 | t_score = tf.reduce_mean(t_obj) # defining the optimization objective. This is mean of a given channel in a tensor layer defined by t_obj
68 | # we want to maaximize this objective
69 |
70 | t_grad = tf.gradients(t_score, t_input)[0] # behold the power of automatic differentiation!
71 |
72 | img = img0.copy()
73 | showarray(visstd(img),'./results/result_0')
74 |
75 | act_obj = sess.run(t_obj, {t_input: img_noise})
76 | print('objective tensor size', act_obj.shape)
77 |
78 | for i in range(iter_n):
79 | g, score = sess.run([t_grad, t_score], {t_input: img})
80 | # normalizing the gradient, so the same step size should work
81 | g /= g.std() + 1e-8 # for different layers and networks
82 | img += g * step
83 | print(i, ' ', score)
84 |
85 | fname='./results/result_'+str(i)
86 | showarray(visstd(img),fname)
87 | # clear_output()
88 | showarray(visstd(img),'./results/result_final')
89 |
90 | render_naive(T(layer)[:, :, :, channel])
91 |
92 | #***************************************************
--------------------------------------------------------------------------------
/pilatus800.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bapoczos/deep-dream-tensorflow/a51ae13aea03e3f8503bdbb1f11c2be7b2537073/pilatus800.jpg
--------------------------------------------------------------------------------