├── util
    ├── README.md
    ├── __pycache__
    │   ├── image.cpython-36.pyc
    │   ├── layers.cpython-36.pyc
    │   └── wrapper.cpython-36.pyc
    ├── LICENSE
    ├── io.py
    ├── image.py
    ├── mnist.py
    ├── wrapper.py
    └── layers.py
├── etc
    ├── Sad.npf
    ├── Angry.npf
    ├── Happy.npf
    ├── Neutral.npf
    ├── emotion_vc_xmax.npf
    └── emotion_vc_xmin.npf
├── model
    ├── __pycache__
    │   └── vawgan.cpython-36.pyc
    ├── vae.py
    ├── vawgan.py
    └── vaegan.py
├── trainer
    ├── __pycache__
    │   ├── gan.cpython-36.pyc
    │   ├── vae.cpython-36.pyc
    │   └── vawgan.cpython-36.pyc
    ├── vawgan.py
    ├── vae.py
    └── gan.py
├── audio.json
├── README.md
├── architecture-vawgan-vcc2016.json
├── validate.py
├── main-vawgan.py
├── models.py
├── convert-vawgan-cwt.py
└── analyzer.py


/util/README.md:
--------------------------------------------------------------------------------
1 | # jutil
2 | Some helper functions I used in my codes
3 | 


--------------------------------------------------------------------------------
/etc/Sad.npf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/etc/Sad.npf


--------------------------------------------------------------------------------
/etc/Angry.npf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/etc/Angry.npf


--------------------------------------------------------------------------------
/etc/Happy.npf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/etc/Happy.npf


--------------------------------------------------------------------------------
/etc/Neutral.npf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/etc/Neutral.npf


--------------------------------------------------------------------------------
/etc/emotion_vc_xmax.npf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/etc/emotion_vc_xmax.npf


--------------------------------------------------------------------------------
/etc/emotion_vc_xmin.npf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/etc/emotion_vc_xmin.npf


--------------------------------------------------------------------------------
/util/__pycache__/image.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/util/__pycache__/image.cpython-36.pyc


--------------------------------------------------------------------------------
/model/__pycache__/vawgan.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/model/__pycache__/vawgan.cpython-36.pyc


--------------------------------------------------------------------------------
/trainer/__pycache__/gan.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/trainer/__pycache__/gan.cpython-36.pyc


--------------------------------------------------------------------------------
/trainer/__pycache__/vae.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/trainer/__pycache__/vae.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/layers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/util/__pycache__/layers.cpython-36.pyc


--------------------------------------------------------------------------------
/util/__pycache__/wrapper.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/util/__pycache__/wrapper.cpython-36.pyc


--------------------------------------------------------------------------------
/audio.json:
--------------------------------------------------------------------------------
1 | {
2 |     "fs": 16000,
3 |     "fft_size": 1024,
4 |     "f0_ceil": 500.0,
5 |     "f0_floor": 71.0,
6 |     "frame_period": 5.0
7 | }
8 | 


--------------------------------------------------------------------------------
/trainer/__pycache__/vawgan.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KunZhou9646/controllable_evc_code/HEAD/trainer/__pycache__/vawgan.cpython-36.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Seen and Unseen emotional style transfer for voice conversion with a new emotional speech dataset
 2 | 
 3 | This is an implementation of the paper "Seen and unseen emotional style transfer for voice conversion with a new emotional speech dataset" (Submitted to ICASSP 2021)
 4 | (https://arxiv.org/abs/2010.14794)
 5 |  
 6 | 
 7 | **Bibtex:**
 8 | ```
 9 | @article{zhou2020seen,
10 |   title={Seen and Unseen emotional style transfer for voice conversion with a new emotional speech dataset},
11 |   author={Zhou, Kun and Sisman, Berrak and Liu, Rui and Li, Haizhou},
12 |   journal={arXiv preprint arXiv:2010.14794},
13 |   year={2020}
14 | }
15 | 
16 | ```
17 | 
18 | # Under construction
19 | 


--------------------------------------------------------------------------------
/util/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Chin-Cheng Hsu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/util/io.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | def int64_feature(value):
 5 |     if isinstance(value, list):
 6 |         value = [tf.train.Feature(
 7 |             int64_list=tf.train.Int64List(value=[v])) for v in value]
 8 |         return tf.train.FeatureList(feature=value)
 9 |     else:
10 |         value = tf.train.Int64List(value=[value])
11 |         return tf.train.Feature(int64_list=value)
12 | 
13 | 
14 | def bytes_feature(value):
15 |     if isinstance(value, list):
16 |         value = [tf.train.Feature(
17 |             bytes_list=tf.train.BytesList(value=[v])) for v in value]
18 |         return tf.train.FeatureList(feature=value)
19 |     else:
20 |         value = tf.train.BytesList(value=[value])
21 |         return tf.train.Feature(bytes_list=value)
22 | 
23 | 
24 | def float_feature(value):
25 |     if isinstance(value, list):
26 |         value = [tf.train.Feature(
27 |             float_list=tf.train.FloatList(value=[v])) for v in value]
28 |         return tf.train.FeatureList(feature=value)
29 |     else:
30 |         value = tf.train.FloatList(value=[value])
31 |         return tf.train.Feature(float_list=value)
32 | 
33 | 
34 | def read_float64_as_float32(filename):
35 |     x = np.fromfile(filename, np.float64)
36 |     return x.astype(np.float32)
37 | 


--------------------------------------------------------------------------------
/architecture-vawgan-vcc2016.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"mode": "VAWGAN",    
 3 | 	"hwc": [513, 1, 1],
 4 | 	"z_dim": 128,
 5 | 	"y_dim": 256,
 6 | 	"y_emb_dim": 128,
 7 | 	"f_dim": 1,
 8 | 	"f_emb_dim": 1,
 9 | 	"discriminator": {
10 |         "merge_dim": 1024,
11 | 		"kernel": [[7, 1], [7, 1], [115, 1]],
12 | 		"stride": [[3, 1], [3, 1], [3, 1]],
13 | 		"output": [16, 32, 64],
14 | 		"l2-reg": 1e-6,
15 | 		"feature_layer": 1
16 |     },
17 |     "encoder": {
18 |         "kernel": [[7, 1], [7, 1], [7, 1], [7, 1], [7, 1]],
19 | 		"stride": [[3, 1], [3, 1], [3, 1], [3, 1], [3, 1]],
20 | 		"output": [16, 32, 64, 128, 256],
21 | 		"l2-reg": 1e-6
22 | 	},
23 | 	"generator": {
24 |         "hwc": [19, 1, 81],
25 | 		"merge_dim": 171,
26 | 		"kernel": [[9, 1], [7, 1], [7, 1], [1025, 1]],
27 | 		"stride": [[3, 1], [3, 1], [3, 1], [1, 1]],
28 | 		"output": [32, 16, 8, 1],
29 | 		"l2-reg": 1e-6
30 |     },
31 | 	"training": {
32 | 		"src_dir": "./data/bin/training_set/Neutral/*.bin",
33 | 		"trg_dir": "./data/bin/training_set/Happy/*.bin",
34 | 		"batch_size": 256,
35 | 		"lr": 1e-5,
36 | 		"lr_schedule": [ [999, 1e-4]],
37 | 		"beta1": 0.5,
38 | 		"beta2": 0.999,
39 |         "nIterD": 5,
40 |         "lambda": 10,
41 | 		"max_iter": 200000,
42 | 		"epoch_vae": 15,
43 | 		"epoch_vawgan": 45,
44 | 		"gamma": 50,
45 | 		"n_unroll": 5,
46 | 		"n_unroll_intense": 100,
47 | 		"clamping": 0.01
48 | 	}
49 | }
50 | 


--------------------------------------------------------------------------------
/util/image.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def nchw_to_nhwc(x):
 5 |     return tf.transpose(x, [0, 2, 3, 1])
 6 | 
 7 | 
 8 | def nhwc_to_nchw(x):
 9 |     return tf.transpose(x, [0, 3, 1, 2])
10 | 
11 | 
12 | def make_png_thumbnail(x, n):
13 |     '''
14 |     Input:
15 |         `x`: Tensor, value range=[-1, 1), shape=[n*n, h, w, c]
16 |         `n`: sqrt of the number of images
17 |     
18 |     Return:
19 |         `tf.string` (bytes) of the PNG. 
20 |         (write these binary directly into a file)
21 |     '''
22 |     with tf.name_scope('MakeThumbnail'):
23 |         _, h, w, c = x.get_shape().as_list()
24 |         x = tf.reshape(x, [n, n, h, w, c])
25 |         x = tf.transpose(x, [0, 2, 1, 3, 4])
26 |         x = tf.reshape(x, [n * h, n * w, c])
27 |         x = x / 2. + .5
28 |         x = tf.image.convert_image_dtype(x, tf.uint8, saturate=True)
29 |         x = tf.image.encode_png(x)
30 |     return x
31 | 
32 | 
33 | def line(x, xa, xb, ya, yb):
34 |     ''' a line determined by two points '''
35 |     return ya + (x - xa) * (yb - ya) / (xb - xa)
36 | 
37 | 
38 | def clip_to_boundary(line1, line2, minval, maxval):
39 |     x = tf.minimum(line1, line2)
40 |     x = tf.minimum(x, maxval)
41 |     x = tf.maximum(x, minval)
42 |     return x
43 | 
44 | 
45 | def gray2jet(x):
46 |     ''' NHWC (channel last) format '''
47 |     with tf.name_scope('Gray2Jet'):
48 |         r = clip_to_boundary(
49 |             line(x, .3515, .66, 0., 1.),
50 |             line(x, .8867, 1., 1., .5),
51 |             minval=0.,
52 |             maxval=1.,
53 |         )
54 |         g = clip_to_boundary(
55 |             line(x, .125, .375, 0., 1.),
56 |             line(x, .64, .91, 1., 0.),
57 |             minval=0.,
58 |             maxval=1.,
59 |         )
60 |         b = clip_to_boundary(
61 |             line(x, .0, .1132, 0.5, 1.0),
62 |             line(x, .34, .648, 1., 0.),
63 |             minval=0.,
64 |             maxval=1.,
65 |         )
66 |         return tf.concat([r, g, b], axis=-1)
67 | 
68 | 
69 | def make_png_jet_thumbnail(x, n):
70 |     '''
71 |     Input:
72 |         `x`: Tensor, value range=[-1, 1), shape=[n*n, h, w, c]
73 |         `n`: sqrt of the number of images
74 |     
75 |     Return:
76 |         `tf.string` (bytes) of the PNG. 
77 |         (write these binary directly into a file)
78 |     '''
79 |     with tf.name_scope('MakeThumbnail'):
80 |         _, h, w, c = x.get_shape().as_list()
81 |         x = tf.reshape(x, [n, n, h, w, c])
82 |         x = tf.transpose(x, [0, 2, 1, 3, 4])
83 |         x = tf.reshape(x, [n * h, n * w, c])
84 |         x = x / 2. + .5
85 |         x = gray2jet(x)
86 |         x = tf.image.convert_image_dtype(x, tf.uint8, saturate=True)
87 |         x = tf.image.encode_png(x)
88 |     return x


--------------------------------------------------------------------------------
/util/mnist.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.contrib import keras
 3 | 
 4 | 
 5 | def mnist_batcher_in_tanh_vector(
 6 |     batch_size,
 7 |     capacity=256,
 8 |     min_after_dequeue=128,
 9 |     ):
10 |     (x, y), (_, _) = keras.datasets.mnist.load_data()
11 |     x = tf.constant(x)
12 |     x = tf.cast(x, tf.float32)
13 |     x = keras.layers.Flatten()(x) / 127.5 - 1.
14 |     y = tf.cast(y, tf.int64)
15 | 
16 |     return tf.train.shuffle_batch(
17 |         [x, y],
18 |         batch_size=batch_size,
19 |         capacity=capacity,
20 |         min_after_dequeue=min_after_dequeue,
21 |         enqueue_many=True
22 |     )
23 | 
24 | 
25 | def mnist16_batcher_in_tanh(
26 |     batch_size,
27 |     capacity=256,
28 |     min_after_dequeue=128,
29 |     ):
30 |     '''
31 |     NCHW (channel first)
32 |     '''
33 |     with tf.name_scope('MNIST_nchw'):
34 |         (x, y), (_, _) = keras.datasets.mnist.load_data()
35 |         x = x / 127.5 - 1.
36 |         x = tf.constant(x, dtype=tf.float32)
37 |         x = tf.expand_dims(x, -1)  # n, h, w, c
38 |         x = tf.image.resize_nearest_neighbor(x, [16, 16])
39 |         x = tf.transpose(x, [0, 3, 1, 2])
40 | 
41 |         x = tf.Variable(x, trainable=False, name='image', dtype=tf.float32)
42 |         y = tf.Variable(y, trainable=False, name='label', dtype=tf.int64)
43 | 
44 |         # TODO: using `slice_input_producer` slows down training!!! WHY?
45 |         # 9.7 steps per sec -> 5.7 steps per sec
46 |         # (but it consumes more GPU memory and GPU-Util, why?!)
47 | 
48 |         x, y = tf.train.slice_input_producer([x, y]) #, shuffle=False)
49 | 
50 |         return tf.train.batch(
51 |         # return tf.train.shuffle_batch(
52 |             [x, y],
53 |             batch_size=batch_size,
54 |             num_threads=8,
55 |             # capacity=capacity,
56 |             # min_after_dequeue=min_after_dequeue,
57 |             # enqueue_many=True
58 |         )
59 | 
60 | def mnist16_batcher_in_z_norm(
61 |     batch_size,
62 |     capacity=256,
63 |     min_after_dequeue=128,
64 |     ):
65 |     '''
66 |     NCHW (channel first)
67 |     '''
68 |     (x, y), (_, _) = keras.datasets.mnist.load_data()
69 |     x_mu = x.mean(0)
70 |     x_std = x.std(0)
71 |     x = (x - x_mu) / (x_std + 1e-6)
72 |     y = tf.constant(y, dtype=tf.int64)
73 |     # x = tf.constant(x, dtype=tf.float32) / 127.5 - 1.
74 |     x = tf.constant(x, dtype=tf.float32)
75 |     x = tf.clip_by_value(x, -5, 5)
76 |     x = tf.expand_dims(x, -1)  # n, h, w, c
77 |     x = tf.image.resize_bilinear(x, [16, 16])
78 |     x = tf.transpose(x, [0, 3, 1, 2])
79 | 
80 |     return tf.train.shuffle_batch(
81 |         [x, y],
82 |         batch_size=batch_size,
83 |         capacity=capacity,
84 |         min_after_dequeue=min_after_dequeue,
85 |         enqueue_many=True
86 |     )
87 | 


--------------------------------------------------------------------------------
/validate.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | from models import MLPcVAE
 9 | from vcc2016io import VCC2016TFRManager
10 | 
11 | 
12 | args = tf.app.flags.FLAGS
13 | tf.app.flags.DEFINE_string('logdir', 'logdir', 'log dir')
14 | tf.app.flags.DEFINE_integer('target_id', 9, 'target id (SF1 = 1, TM3 = 9)')
15 | tf.app.flags.DEFINE_string(
16 |     'file_pattern', None, 'filename filter')
17 | 
18 | # speakers = ['SF1', 'SF2', 'SF3', 'SM1', 'SM2', 'TF1', 'TF2', 'TM1', 'TM2', 'TM3']
19 | 
20 | def main():
21 |     if args.logdir is None:
22 |         raise ValueError('Please specify the logdir file')
23 | 
24 |     ckpt = get_checkpoint(args.logdir)
25 |     
26 |     if ckpt is None:
27 |         raise ValueError('No checkpoints in {}'.format(args.logdir))
28 | 
29 |     with open(os.path.join(args.logdir, 'architecture.json')) as f:
30 |         arch = json.load(f)
31 | 
32 |     reader = VCC2016TFRManager()
33 |     features = reader.read_whole(args.file_pattern, num_epochs=1)
34 |     x = features['frame']
35 |     y = features['label']
36 |     filename = features['filename']
37 |     y_conv = y * 0 + args.target_id
38 | 
39 |     net = MLPcVAE(arch=arch, is_training=False)
40 |     z = net.encode(x)
41 |     xh = net.decode(z, y)
42 |     x_conv = net.decode(z, y_conv)
43 | 
44 |     pre_train_saver = tf.train.Saver()
45 |     def load_pretrain(sess):
46 |         pre_train_saver.restore(sess, ckpt)
47 |     sv = tf.train.Supervisor(init_fn=load_pretrain)
48 |     gpu_options = tf.GPUOptions(allow_growth=True)
49 |     sess_config = tf.ConfigProto(
50 |         allow_soft_placement=True,
51 |         gpu_options=gpu_options)
52 |     with sv.managed_session(config=sess_config) as sess:
53 |         for _ in range(reader.n_files):
54 |             if sv.should_stop():
55 |                 break
56 |             fetch_dict = {'x': x, 'xh': xh, 'x_conv': x_conv, 'f': filename}
57 |             results = sess.run(fetch_dict)
58 |             plot_spectra(results)
59 | 
60 | 
61 | def get_checkpoint(logdir):
62 |     ckpt = tf.train.get_checkpoint_state(logdir)
63 |     if ckpt:
64 |         return ckpt.model_checkpoint_path
65 |     else:
66 |         print('No checkpoint found')
67 |         return None
68 | 
69 | 
70 | def plot_spectra(results):
71 |     plt.figure(figsize=(10, 4))
72 |     plt.imshow(
73 |         np.concatenate(
74 |             [np.flipud(results['x'].T),
75 |              np.flipud(results['xh'].T),
76 |              np.flipud(results['x_conv'].T)],
77 |             0),
78 |         aspect='auto',
79 |         cmap='jet',
80 |     )
81 |     plt.colorbar()
82 |     plt.title('Upper: Real input; Mid: Reconstrution; Lower: Conversion to target.')
83 |     plt.savefig(
84 |         os.path.join(
85 |             args.logdir,
86 |             '{}.png'.format(
87 |                 os.path.split(str(results['f'], 'utf-8'))[-1]
88 |             )
89 |         )
90 |     )
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     main()
95 | 


--------------------------------------------------------------------------------
/main-vawgan.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | from importlib import import_module
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | 
 9 | from analyzer import Tanhize, read
10 | from util.wrapper import validate_log_dirs
11 | 
12 | args = tf.app.flags.FLAGS
13 | tf.app.flags.DEFINE_string(
14 |     'corpus_name', 'emotion_vc', 'Corpus name')
15 | tf.app.flags.DEFINE_string(
16 |     'logdir_root', None, 'root of log dir')
17 | tf.app.flags.DEFINE_string(
18 |     'logdir', None, 'log dir')
19 | tf.app.flags.DEFINE_string(
20 |     'restore_from', None, 'restore from dir (not from *.ckpt)')
21 | tf.app.flags.DEFINE_string('gpu_cfg', None, 'GPU configuration')
22 | tf.app.flags.DEFINE_integer('summary_freq', 1000, 'Update summary')
23 | tf.app.flags.DEFINE_string(
24 |     'ckpt', None, 'specify the ckpt in restore_from (if there are multiple ckpts)')  # TODO
25 | tf.app.flags.DEFINE_string(
26 |     'architecture', 'architecture-vawgan-vcc2016.json', 'network architecture')
27 | 
28 | tf.app.flags.DEFINE_string('model_module', 'model.vawgan', 'Model module')
29 | tf.app.flags.DEFINE_string('model', 'VAWGAN', 'Model: ConvVAE, VAWGAN')
30 | 
31 | tf.app.flags.DEFINE_string('trainer_module', 'trainer.vawgan', 'Trainer module')
32 | tf.app.flags.DEFINE_string('trainer', 'VAWGANTrainer', 'Trainer: VAETrainer, VAWGANTrainer')
33 | 
34 | 
35 | def main(unused_args=None):
36 |     ''' NOTE: The input is rescaled to [-1, 1] '''
37 |     module = import_module(args.model_module, package=None)
38 |     MODEL = getattr(module, args.model)
39 | 
40 |     module = import_module(args.trainer_module, package=None)
41 |     TRAINER = getattr(module, args.trainer)
42 | 
43 | 
44 |     dirs = validate_log_dirs(args)
45 |     tf.gfile.MakeDirs(dirs['logdir'])
46 | 
47 |     with open(args.architecture) as f:
48 |         arch = json.load(f)
49 | 
50 |     with open(os.path.join(dirs['logdir'], args.architecture), 'w') as f:
51 |         json.dump(arch, f, indent=4)
52 | 
53 |     normalizer = Tanhize(
54 |         xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
55 |         xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
56 |     )
57 | 
58 |     x_s, y_s, f0_s = read(
59 |         file_pattern=arch['training']['src_dir'],
60 |         batch_size=arch['training']['batch_size'],
61 |         capacity=2048,
62 |         min_after_dequeue=1024,
63 |         normalizer=normalizer,
64 |         data_format='NHWC',
65 |     )
66 | 
67 |     x_t, y_t, f0_t = read(
68 |         file_pattern=arch['training']['trg_dir'],
69 |         batch_size=arch['training']['batch_size'],
70 |         capacity=2048,
71 |         min_after_dequeue=1024,
72 |         normalizer=normalizer,
73 |         data_format='NHWC',
74 |     )
75 | 
76 |     machine = MODEL(arch, is_training=True)
77 |    # y_s_new = tf.stack([y_s,f0_s],axis=1)
78 |    # y_t_new = tf.stack([y_t,f0_t],axis=1)
79 | 
80 |     loss = machine.loss(x_s, y_s, f0_s, x_t, y_t, f0_t)
81 |     trainer = TRAINER(loss, arch, args, dirs)
82 |     trainer.train(nIter=arch['training']['max_iter'], machine=machine)
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     main()
87 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | # from tensorflow.contrib import slim
  3 | import tensorflow as tf
  4 | from util.layers import GaussianLogDensity, GaussianKLD, \
  5 |     GaussianSampleLayer, lrelu
  6 | 
  7 | # TODO:  
  8 | #  1. Multi-stream?
  9 | #  2. Separate MLP & CNN?
 10 | #    (or we can also just use CNN by viewing MLP input as D-channels)
 11 | #  3. Conditional input as an option during __init__?
 12 | 
 13 | 
 14 | class MLPcVAE(object):
 15 |     '''
 16 |     Conditional Variational Auto-encoder implemented in multi-layer perceptron
 17 |     APIs:
 18 |         z = encode(x)
 19 |         xh = decode(z, y)
 20 |     Notation:
 21 |         shape: 
 22 |             `b`: batch_size, 
 23 |             `c`: indicates the dimension
 24 |     '''
 25 | 
 26 |     def __init__(self, arch, is_training=False):
 27 |         self.arch = arch
 28 |         self.is_training = is_training
 29 |         self._decode = tf.make_template('Decoder', self._generator)
 30 |         self._encode = tf.make_template('Encoder', self._encoder)
 31 | 
 32 |     def _l2_regularized_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 33 |         with tf.variable_scope(scope_name):
 34 |             embeddings = tf.get_variable(
 35 |                 name=var_name,
 36 |                 shape=[n_class, h_dim],
 37 |                 regularizer=tf.contrib.keras.regularizers.l2(1e-3),
 38 |             )
 39 |         return embeddings
 40 | 
 41 |     def _encoder(self, x, is_training):
 42 |         ''' Expects 2D inputs (b, c)
 43 |         Input:
 44 |             `x`: shape=[b, c]
 45 |         Return:
 46 |             `z_mu`: mean vector of `z`
 47 |             `z_lv`: log var of `z`
 48 |         '''
 49 |         for o in self.arch['encoder']['output']:
 50 |             x = tf.layers.dense(x, units=o, activation=lrelu)
 51 |             # x = tf.layers.batch_normalization(x, training=is_training)
 52 |         z_mu = tf.layers.dense(x, units=self.arch['z_dim'], name='z_mu')
 53 |         z_lv = tf.layers.dense(x, units=self.arch['z_dim'], name='z_lv')
 54 |         return z_mu, z_lv
 55 | 
 56 |     def _generator(self, z, y, is_training):
 57 |         '''
 58 |         Input:
 59 |             z: shape=[b, c]
 60 |             y: speaker label; shape=[b,], dtype=int64
 61 |         Return:
 62 |             xh: reconstructed version of `x` (the input to the VAE)
 63 |         '''
 64 |         self.speaker_repr = self._l2_regularized_embedding(
 65 |             n_class=self.arch['y_dim'],
 66 |             h_dim=self.arch['yemb_dim'],
 67 |             scope_name='y_embedding',
 68 |             var_name='y_emb'
 69 |         )
 70 | 
 71 |         c = tf.nn.embedding_lookup(self.speaker_repr, y)
 72 |         x = tf.concat([z, c], -1)
 73 |         for o in self.arch['decoder']['output']:
 74 |             x = tf.layers.dense(x, units=o, activation=lrelu)            
 75 |             # x = tf.layers.batch_normalization(x, training=is_training)
 76 |         return tf.layers.dense(x, units=self.arch['x_dim'], name='xh')
 77 | 
 78 |     def loss(self, x, y):
 79 |         '''
 80 |         Args:
 81 |             x: shape=[s, b, c]
 82 |             y: shape=[s, b]
 83 |         Returns:
 84 |             a `dict` of losses
 85 |         '''
 86 |         z_mu, z_lv = self._encode(x, is_training=self.is_training)
 87 |         z = GaussianSampleLayer(z_mu, z_lv)
 88 |         xh = self._decode(z, y, is_training=self.is_training)
 89 | 
 90 |         with tf.name_scope('loss'):
 91 |             with tf.name_scope('E_log_p_x_zy'):
 92 |                 L_x = -1.0 * tf.reduce_mean(
 93 |                     GaussianLogDensity(x, xh, tf.zeros_like(x)),
 94 |                 )
 95 |             with tf.name_scope('D_KL_z'):
 96 |                 L_z = tf.reduce_mean(
 97 |                     GaussianKLD(
 98 |                         z_mu, z_lv,
 99 |                         tf.zeros_like(z_mu), tf.zeros_like(z_lv)
100 |                     )
101 |                 )
102 |             loss = {
103 |                 'L_x': L_x,
104 |                 'L_z': L_z,
105 |             }
106 | 
107 |         tf.summary.scalar('L_x', L_x)
108 |         tf.summary.scalar('L_z', L_z)
109 |         return loss
110 | 
111 |     def encode(self, x):
112 |         z_mu, z_lv = self._encode(x, is_training=False)
113 |         return z_mu
114 | 
115 |     def decode(self, z, y, tanh=False):
116 |         xh = self._decode(z, y, is_training=False)
117 |         return xh
118 | 


--------------------------------------------------------------------------------
/util/wrapper.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | from datetime import datetime
  5 | 
  6 | import tensorflow as tf
  7 | 
  8 | 
  9 | def get_checkpoint(logdir):
 10 |     ''' Get the first checkpoint '''
 11 |     ckpt = tf.train.get_checkpoint_state(logdir)
 12 |     if ckpt:
 13 |         return ckpt.model_checkpoint_path
 14 |     else:
 15 |         print('No checkpoint found')
 16 |         return None
 17 | 
 18 | def save(saver, sess, logdir, step):
 19 |     ''' Save a model to logdir/model.ckpt-[step] '''
 20 |     model_name = 'model.ckpt'
 21 |     checkpoint_path = os.path.join(logdir, model_name)
 22 |     print('Storing checkpoint to {} ...'.format(logdir), end="")
 23 |     sys.stdout.flush()
 24 | 
 25 |     if not os.path.exists(logdir):
 26 |         os.makedirs(logdir)
 27 | 
 28 |     saver.save(sess, checkpoint_path, global_step=step)
 29 |     print(' Done.')
 30 | 
 31 | 
 32 | def load(saver, sess, logdir, ckpt=None):
 33 |     '''
 34 |     Try to load model form a dir (search for the newest checkpoint)
 35 |     '''
 36 |     print('Trying to restore checkpoints from {} ...'.format(logdir),
 37 |         end="")
 38 |     if ckpt:
 39 |         ckpt = os.path.join(logdir, ckpt)
 40 |         global_step = int(
 41 |             ckpt
 42 |             .split('/')[-1]
 43 |             .split('-')[-1])
 44 |         print('  Global step: {}'.format(global_step))
 45 |         print('  Restoring...', end="")
 46 |         saver.restore(sess, ckpt)
 47 |         return global_step
 48 |     else:
 49 |         ckpt = tf.train.get_checkpoint_state(logdir)
 50 |         if ckpt:
 51 |             print('  Checkpoint found: {}'.format(ckpt.model_checkpoint_path))
 52 |             global_step = int(
 53 |                 ckpt.model_checkpoint_path
 54 |                 .split('/')[-1]
 55 |                 .split('-')[-1])
 56 |             print('  Global step: {}'.format(global_step))
 57 |             print('  Restoring...', end="")
 58 |             saver.restore(sess, ckpt.model_checkpoint_path)
 59 |             return global_step
 60 |         else:
 61 |             print('No checkpoint found')
 62 |             return None
 63 | 
 64 | # 
 65 | 
 66 | def configure_gpu_settings(gpu_cfg=None):
 67 |     session_conf = None
 68 |     if gpu_cfg:
 69 |         with open(gpu_cfg) as f:
 70 |             cfg = json.load(f)
 71 |         gpu_options = tf.GPUOptions(
 72 |             per_process_gpu_memory_fraction=cfg['per_process_gpu_memory_fraction'])
 73 |         session_conf = tf.ConfigProto(
 74 |             allow_soft_placement=cfg['allow_soft_placement'],
 75 |             log_device_placement=cfg['log_device_placement'],
 76 |             inter_op_parallelism_threads=cfg['inter_op_parallelism_threads'],
 77 |             intra_op_parallelism_threads=cfg['intra_op_parallelism_threads'],
 78 |             gpu_options=gpu_options)
 79 |         # Timeline
 80 |         # jit_level = 0
 81 |         # session_conf.graph_options.optimizer_options.global_jit_level = jit_level
 82 |     #     sess = tf.Session(
 83 |     #         config=session_conf)
 84 |     # else:
 85 |     #     sess = tf.Session()
 86 |     return session_conf
 87 | 
 88 | def restore_global_step(saver, sess, from_dir, ckpt):
 89 |     try:
 90 |         step = load(saver, sess, from_dir, ckpt)
 91 |         if step is None:
 92 |             step = 0
 93 |     except:
 94 |         print("Something's wrong while restoing checkpoints!")
 95 |         raise
 96 |     return step
 97 | 
 98 | 
 99 | def validate_log_dirs(args):
100 |     ''' Create a default log dir (if necessary) '''
101 |     def get_default_logdir(logdir_root):
102 |         STARTED_DATESTRING = datetime.now().strftime('%0m%0d-%0H%0M-%0S-%Y')
103 |         logdir = os.path.join(logdir_root, 'train', STARTED_DATESTRING)
104 |         print('Using default logdir: {}'.format(logdir))        
105 |         return logdir
106 | 
107 |     if args.logdir and args.restore_from:
108 |         raise ValueError(
109 |             'You can only specify one of the following: ' +
110 |             '--logdir and --restore_from')
111 | 
112 |     if args.logdir and args.log_root:
113 |         raise ValueError(
114 |             'You can only specify either --logdir or --logdir_root')
115 | 
116 |     if args.logdir_root is None:
117 |         logdir_root = 'logdir'
118 | 
119 |     if args.logdir is None:
120 |         logdir = get_default_logdir(logdir_root)
121 | 
122 |     # Note: `logdir` and `restore_from` are exclusive
123 |     if args.restore_from is None:
124 |         restore_from = logdir
125 |     else:
126 |         restore_from = args.restore_from
127 | 
128 |     return {
129 |         'logdir': logdir,
130 |         'logdir_root': logdir_root,
131 |         'restore_from': restore_from,
132 |     }
133 | 


--------------------------------------------------------------------------------
/model/vae.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.contrib import slim
  3 | from util.image import nchw_to_nhwc
  4 | from util.layers import (GaussianKLD, GaussianLogDensity, GaussianSampleLayer,
  5 |                          Layernorm, conv2d_nchw_layernorm, lrelu)
  6 | # from model.wgan import GradientPenaltyWGAN
  7 | 
  8 | class ConvVAE(object):
  9 |     def __init__(self, arch, is_training=False):
 10 |         '''
 11 |         Variational auto-encoder implemented in 2D convolutional neural nets
 12 |         Input:
 13 |             `arch`: network architecture (`dict`)
 14 |             `is_training`: (unused now) it was kept for historical reasons (for `BatchNorm`)
 15 |         '''
 16 |         self.arch = arch
 17 |         self._sanity_check()
 18 |         self.is_training = is_training
 19 | 
 20 |         with tf.name_scope('SpeakerRepr'):
 21 |             self.y_emb = self._l2_regularized_embedding(
 22 |                 self.arch['y_dim'],
 23 |                 self.arch['z_dim'],
 24 |                 'y_embedding')
 25 | 
 26 |         self._generate = tf.make_template(
 27 |             'Generator',
 28 |             self._generator)
 29 | 
 30 |         self._encode = tf.make_template(
 31 |             'Encoder',
 32 |             self._encoder)
 33 | 
 34 |         self.generate = self.decode  # for VAE-GAN extension
 35 | 
 36 | 
 37 |     def _sanity_check(self):
 38 |         for net in ['encoder', 'generator']:
 39 |             assert len(self.arch[net]['output']) == len(self.arch[net]['kernel']) == len(self.arch[net]['stride'])
 40 | 
 41 | 
 42 |     def _unit_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 43 |         with tf.variable_scope(scope_name):
 44 |             embeddings = tf.get_variable(
 45 |                 name=var_name,
 46 |                 shape=[n_class, h_dim])
 47 |             embeddings = tf.nn.l2_normalize(embeddings, dim=-1, name=var_name+'normalized')
 48 |         return embeddings
 49 | 
 50 | 
 51 |     def _merge(self, var_list, fan_out, l2_reg=1e-6):
 52 |         x = 0.
 53 |         with slim.arg_scope(
 54 |             [slim.fully_connected],
 55 |             num_outputs=fan_out,
 56 |             weights_regularizer=slim.l2_regularizer(l2_reg),
 57 |             normalizer_fn=None,
 58 |             activation_fn=None):
 59 |             for var in var_list:
 60 |                 x = x + slim.fully_connected(var)
 61 |         return slim.bias_add(x)
 62 | 
 63 | 
 64 |     def _l2_regularized_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 65 |         with tf.variable_scope(scope_name):
 66 |             embeddings = tf.get_variable(
 67 |                 name=var_name,
 68 |                 shape=[n_class, h_dim],
 69 |                 regularizer=slim.l2_regularizer(1e-6))
 70 |         return embeddings
 71 | 
 72 |     def _encoder(self, x, is_training=None):
 73 |         net = self.arch['encoder']
 74 |         for i, (o, k, s) in enumerate(zip(net['output'], net['kernel'], net['stride'])):
 75 |             x = conv2d_nchw_layernorm(
 76 |                 x, o, k, s, lrelu,
 77 |                 name='Conv2d-{}'.format(i)
 78 |             )
 79 |         x = slim.flatten(x)
 80 |         z_mu = tf.layers.dense(x, self.arch['z_dim'])
 81 |         z_lv = tf.layers.dense(x, self.arch['z_dim'])
 82 |         return z_mu, z_lv
 83 | 
 84 |     def _generator(self, z, y, is_training=None):
 85 |         net = self.arch['generator']
 86 |         h, w, c = net['hwc']
 87 | 
 88 |         if y is not None:
 89 |             y = tf.nn.embedding_lookup(self.y_emb, y)
 90 |             x = self._merge([z, y], h * w * c)
 91 |         else:
 92 |             x = z
 93 | 
 94 |         x = tf.reshape(x, [-1, c, h, w])  # channel first
 95 |         for i, (o, k, s) in enumerate(zip(net['output'], net['kernel'], net['stride'])):
 96 |             x = tf.layers.conv2d_transpose(x, o, k, s,
 97 |                 padding='same',
 98 |                 data_format='channels_first',
 99 |             )
100 |             if i < len(net['output']) -1:
101 |                 x = Layernorm(x, [1, 2, 3], 'ConvT-LN{}'.format(i))
102 |                 x = lrelu(x)
103 |             else:
104 |                 x = tf.nn.tanh(x) # not 100% necessary
105 |         return x
106 | 
107 | 
108 |     def loss(self, x, y):
109 |         with tf.name_scope('loss'):
110 |             z_mu, z_lv = self._encode(x)
111 |             z = GaussianSampleLayer(z_mu, z_lv)
112 |             xh = self._generate(z, y)
113 | 
114 |             D_KL = tf.reduce_mean(
115 |                 GaussianKLD(
116 |                     slim.flatten(z_mu),
117 |                     slim.flatten(z_lv),
118 |                     slim.flatten(tf.zeros_like(z_mu)),
119 |                     slim.flatten(tf.zeros_like(z_lv)),
120 |                 )
121 |             )
122 |             logPx = tf.reduce_mean(
123 |                 GaussianLogDensity(
124 |                     slim.flatten(x),
125 |                     slim.flatten(xh),
126 |                     tf.zeros_like(slim.flatten(xh))),  # TODO exp -tf.log(1/9) + 
127 |             )
128 | 
129 |         loss = dict()
130 |         loss['G'] = - logPx + D_KL
131 |         loss['D_KL'] = D_KL
132 |         loss['logP'] = logPx
133 | 
134 |         tf.summary.scalar('KL-div', D_KL)
135 |         tf.summary.scalar('logPx', logPx)
136 | 
137 |         tf.summary.histogram('xh', xh)
138 |         tf.summary.histogram('x', x)
139 |         return loss
140 | 
141 |     def encode(self, x):
142 |         z_mu, _ = self._encode(x)
143 |         return z_mu
144 | 
145 |     def decode(self, z, y):
146 |         xh = self._generate(z, y)
147 |         return nchw_to_nhwc(xh)
148 | 


--------------------------------------------------------------------------------
/trainer/vawgan.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import tensorflow as tf
  5 | 
  6 | from trainer.vae import VAETrainer
  7 | from util.wrapper import save
  8 | 
  9 | 
 10 | def lr_schedule(step, schedule):
 11 |     for s, lr in schedule:
 12 |         if step < s:
 13 |             return lr
 14 |     return 1e-7
 15 | 
 16 | class VAWGANTrainer(VAETrainer):
 17 |     def _optimize(self):
 18 |         hyperp = self.arch['training']
 19 |         global_step = tf.Variable(0, name='global_step')
 20 |         lr = tf.placeholder(dtype=tf.float32)  # can't add lr to summary
 21 | 
 22 |         optimizer_d = tf.train.RMSPropOptimizer(lr) #hyperp['lr'])
 23 |         optimizer_g = tf.train.RMSPropOptimizer(lr) #hyperp['lr'])
 24 | 
 25 |         trainables = tf.trainable_variables()
 26 |         g_vars = [v for v in trainables if 'Generator' in v.name]
 27 |         d_vars = [v for v in trainables if 'Discriminator' in v.name]
 28 |         e_vars = [v for v in trainables if 'Encoder' in v.name]
 29 | 
 30 |         # ======== Standard ========
 31 |         # r: a {0, R+} weighting 
 32 |         r = tf.placeholder(shape=[], dtype=tf.float32)
 33 |         k = tf.constant(hyperp['clamping'], shape=[])
 34 |         for term in ['reconst_t', 'reconst_s', 'conv_s2t', 'real_s_t']:
 35 |             self.loss[term] = self.loss[term] / k 
 36 | 
 37 |         obj_Dx = - self.loss['conv_s2t'] * k
 38 |         obj_Gx = r * self.loss['conv_s2t'] + self.loss['Dis']
 39 |         obj_Ez = self.loss['KL(z)'] + self.loss['Dis']
 40 | 
 41 |         opt_d = optimizer_d.minimize(obj_Dx, var_list=d_vars)
 42 |         opt_ds = [opt_d]
 43 | 
 44 |         logging.info('The following variables are clamped:')
 45 |         with tf.control_dependencies(opt_ds):
 46 |             with tf.name_scope('Clamping'):
 47 |                 for v in d_vars:
 48 |                     v_clamped = tf.clip_by_value(v, -k, k)
 49 |                     clamping = tf.assign(v, v_clamped)
 50 |                     opt_ds.append(clamping)
 51 |                     logging.info(v.name)
 52 | 
 53 |         opt_g = optimizer_g.minimize(obj_Gx, var_list=g_vars, global_step=global_step)
 54 |         opt_e = optimizer_g.minimize(obj_Ez, var_list=e_vars)
 55 | 
 56 |         return dict(
 57 |             d=opt_ds,
 58 |             g=opt_g,
 59 |             gz=opt_e,
 60 |             lr=lr,
 61 |             gamma=r,
 62 |             global_step=global_step)
 63 | 
 64 | 
 65 |     def train(self, nIter, machine=None, summary_op=None):
 66 |         vae_saver = tf.train.Saver()
 67 |         sv = tf.train.Supervisor(
 68 |             logdir=self.dirs['logdir'],
 69 |             save_model_secs=300,
 70 |             global_step=self.opt['global_step'])
 71 | 
 72 |         sess_config = tf.ConfigProto(
 73 |             allow_soft_placement=True,
 74 |             gpu_options=tf.GPUOptions(allow_growth=True))
 75 | 
 76 |         n_iter_per_epoch = 2 * int(1e5) // self.arch['training']['batch_size']  # TODO: should use #frames
 77 | 
 78 |         def print_log(info, results):
 79 |             msg = 'Epoch [{:3d}/{:3d}] '.format(info['ep'], info['nEpoch'])
 80 |             msg += '[{:4d}/{:4d}] '.format(info['it'], info['nIter'])
 81 |             msg += 'W: {:.2f} '.format(results['conv_s2t'])
 82 |             msg += 'DIS={:5.2f}, KLD={:5.2f}'.format(results['Dis'], results['KL(z)'])
 83 |             print('\r{}'.format(msg), end='', flush=True)
 84 |             logging.info(msg)
 85 | 
 86 | 
 87 |         hyperp = self.arch['training']
 88 |         info = {'nEpoch': hyperp['epoch_vae'], 'nIter': n_iter_per_epoch}
 89 |         fetches = {
 90 |             'conv_s2t': self.loss['conv_s2t'], 
 91 |             'Dis': self.loss['Dis'], 
 92 |             'KL(z)': self.loss['KL(z)'],
 93 |             'opt_g': self.opt['g'], 
 94 |             'opt_e': self.opt['gz'], 
 95 |             'step': self.opt['global_step'],
 96 |         }
 97 |         with sv.managed_session(config=sess_config) as sess:
 98 |             try: 
 99 |                 update_G_E = [self.opt['g'], self.opt['gz']]
100 | 
101 |                 for ep in range(hyperp['epoch_vae']):
102 |                     lr = lr_schedule(ep, hyperp['lr_schedule'])
103 |                     feed_dict = {self.opt['gamma']: 0., self.opt['lr']: lr}
104 |                     for it in range(n_iter_per_epoch):
105 |                         if it % 100 == 0:  # update print only every 100 iters
106 |                             results = sess.run(fetches, feed_dict=feed_dict)
107 |                             info.update({'ep': ep + 1, 'it': it + 1})
108 |                             print_log(info, results)
109 |                         else:
110 |                             sess.run(update_G_E, feed_dict=feed_dict)
111 | 
112 |                 save(vae_saver, sess, os.path.join(self.dirs['logdir'], 'VAE'), fetches['step'])  
113 |                 info.update({'nEpoch': hyperp['epoch_vawgan'] + hyperp['epoch_vae']})
114 | 
115 |                 for ep in range(hyperp['epoch_vawgan']):
116 |                     ep = ep + hyperp['epoch_vae']
117 | 
118 |                     lr = lr_schedule(ep, hyperp['lr_schedule'])
119 |                     feed_dict = {self.opt['gamma']: hyperp['gamma'], self.opt['lr']: lr}
120 | 
121 |                     info.update({'ep': ep})
122 |                     for it in range(n_iter_per_epoch):
123 |                         if (ep == hyperp['epoch_vae'] and it < 25) or (it % 100 == 0):
124 |                             nIterD = hyperp['n_unroll_intense']
125 |                         else:
126 |                             nIterD = hyperp['n_unroll']
127 | 
128 |                         for _ in range(nIterD):
129 |                             sess.run(self.opt['d'], feed_dict=feed_dict)
130 | 
131 |                         if it % 100 != 0:
132 |                             sess.run(update_G_E, feed_dict=feed_dict)
133 |                         else:
134 |                             results = sess.run(fetches, feed_dict=feed_dict)
135 |                             info.update({'ep': ep + 1, 'it': it + 1})
136 |                             print_log(info, results)
137 |             except KeyboardInterrupt:
138 |                 print()
139 |             finally:
140 |                 save(sv.saver, sess, self.dirs['logdir'], fetches['step'])
141 |             print()
142 | 
143 | 


--------------------------------------------------------------------------------
/convert-vawgan-cwt.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import sys
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import soundfile as sf
  8 | from os.path import join
  9 | from util.wrapper import load
 10 | from analyzer_copy import read_whole_features, pw2wav, dic2npy
 11 | from analyzer_copy import Tanhize
 12 | from datetime import datetime
 13 | from importlib import import_module
 14 | 
 15 | args = tf.app.flags.FLAGS
 16 | tf.app.flags.DEFINE_string('corpus_name', 'emotion_vc', 'Corpus name')
 17 | tf.app.flags.DEFINE_string('checkpoint', './logdir/train/0921-2112-44-2020/model.ckpt-46860', 'root of log dir')
 18 | tf.app.flags.DEFINE_string('src', 'Neutral', 'source speaker [SF1 - SM2]')
 19 | tf.app.flags.DEFINE_string('trg', 'Angry', 'target speaker [SF1 - TM3]')
 20 | tf.app.flags.DEFINE_string('output_dir', './logdir', 'root of output dir')
 21 | tf.app.flags.DEFINE_string('module', 'model.vawgan', 'Module')
 22 | tf.app.flags.DEFINE_string('model', 'VAWGAN', 'Model')
 23 | #tf.app.flags.DEFINE_string('file_pattern', './test_condition_results/{}/*.bin', 'file pattern')
 24 | tf.app.flags.DEFINE_string('file_pattern', './data/bin/evaluation_set/{}/*.bin', 'file pattern')
 25 | tf.app.flags.DEFINE_string(
 26 |     'speaker_list', '../vaw_original/etc/speakers.tsv', 'Speaker list (one speaker per line)'
 27 | )
 28 | 
 29 | def make_output_wav_name(output_dir, filename):
 30 |     basename = str(filename, 'utf8')
 31 |     basename = os.path.split(basename)[-1]
 32 |     basename = os.path.splitext(basename)[0]
 33 |     return os.path.join(
 34 |         output_dir, 
 35 |         '{}-{}-{}.wav'.format(args.src, args.trg, basename)
 36 |     )
 37 | 
 38 | def make_output_bin_name(output_dir, filename):
 39 |     basename = str(filename, 'utf8')
 40 |     basename = os.path.split(basename)[-1]
 41 |     basename = os.path.splitext(basename)[0]
 42 |     return basename
 43 | 
 44 | def get_default_output(logdir_root):
 45 |     STARTED_DATESTRING = datetime.now().strftime('%0m%0d-%0H%0M-%0S-%Y')
 46 |     logdir = os.path.join(logdir_root, 'output', STARTED_DATESTRING)
 47 |     print('Using default logdir: {}'.format(logdir))        
 48 |     return logdir
 49 | 
 50 | def convert_f0(f0, src, trg):
 51 |     mu_s, std_s = np.fromfile(os.path.join('./etc', '{}.npf'.format(src)), np.float32)
 52 |     mu_t, std_t = np.fromfile(os.path.join('./etc', '{}.npf'.format(trg)), np.float32)
 53 |     lf0 = tf.where(f0 > 1., tf.log(f0), f0)
 54 |     lf0 = tf.where(lf0 > 1., (lf0 - mu_s)/std_s * std_t + mu_t, lf0)
 55 |     lf0 = tf.where(lf0 > 1., tf.exp(lf0), lf0)
 56 |     return lf0
 57 | 
 58 | 
 59 | def nh_to_nchw(x):
 60 |     with tf.name_scope('NH_to_NCHW'):
 61 |         x = tf.expand_dims(x, 1)      # [b, h] => [b, c=1, h]
 62 |         return tf.expand_dims(x, -1)  # => [b, c=1, h, w=1]
 63 | 
 64 | def nh_to_nhwc(x):
 65 |     with tf.name_scope('NH_to_NHWC'):
 66 |         return tf.expand_dims(tf.expand_dims(x, -1), -1)
 67 | 
 68 | 
 69 | def main(unused_args=None):
 70 |     # args(sys.argv)
 71 | 
 72 |     if args.model is None:
 73 |         raise ValueError(
 74 |             '\n  You MUST specify `model`.' +\
 75 |             '\n    Use `python convert.py --help` to see applicable options.'
 76 |         )
 77 | 
 78 |     module = import_module(args.module, package=None)
 79 |     MODEL = getattr(module, args.model)
 80 | 
 81 |     FS = 16000
 82 | 
 83 |     with open(args.speaker_list) as fp:
 84 |         SPEAKERS = [l.strip() for l in fp.readlines()]
 85 | 
 86 | 
 87 |     logdir, ckpt = os.path.split(args.checkpoint)
 88 |     if 'VAE' in logdir:
 89 |         _path_to_arch, _ = os.path.split(logdir)
 90 |     else:
 91 |         _path_to_arch = logdir
 92 |     arch = tf.gfile.Glob(os.path.join(_path_to_arch, 'architecture*.json'))
 93 |     if len(arch) != 1:
 94 |         print('WARNING: found more than 1 architecture files!')
 95 |     arch = arch[0]
 96 |     with open(arch) as fp:
 97 |         arch = json.load(fp)
 98 | 
 99 |     normalizer = Tanhize(
100 |         xmax=np.fromfile('./etc/{}_xmax.npf'.format(args.corpus_name)),
101 |         xmin=np.fromfile('./etc/{}_xmin.npf'.format(args.corpus_name)),
102 |     )
103 | 
104 |     features = read_whole_features(args.file_pattern.format(args.src))
105 | 
106 |     x = normalizer.forward_process(features['sp'])
107 |     x = nh_to_nhwc(x)
108 | 
109 | 
110 |     #y_s = features['speaker']
111 |     #y_t_id = tf.placeholder(dtype=tf.int64, shape=[1,])
112 |     #y_t = y_t_id * tf.ones(shape=[tf.shape(x)[0],], dtype=tf.int64)
113 | 
114 | 
115 |     y_t = features['speaker']
116 | 
117 | 
118 | 
119 |     #f0_t = features['f0']
120 |     f0_s = features['f0']
121 |     f0_t = convert_f0(f0_s, args.src, args.trg)
122 |     #f0_s_convert = tf.cast(f0_s,dtype=tf.int64)
123 |     f0_t_convert = tf.cast(f0_t,dtype=tf.int64)
124 | 
125 | 
126 |     machine = MODEL(arch, is_training=False)
127 |     z = machine.encode(x)
128 |     x_t = machine.decode(z, y_t, f0_t_convert)  # NOTE: the API yields NHWC format
129 |     x_t = tf.squeeze(x_t)
130 |     x_t = normalizer.backward_process(x_t)
131 | 
132 |     # For sanity check (validation)
133 | #    x_s = machine.decode(z, y_s, f0_s_convert)
134 | #    x_s = tf.squeeze(x_s)
135 | #    x_s = normalizer.backward_process(x_s)
136 | 
137 |     #f0_s = features['f0']
138 |     #f0_t = convert_f0(f0_s, args.src, args.trg)
139 | 
140 |     output_dir = get_default_output(args.output_dir)
141 | 
142 |     saver = tf.train.Saver()
143 |     sv = tf.train.Supervisor(logdir=output_dir)
144 |     with sv.managed_session() as sess:
145 |         load(saver, sess, logdir, ckpt=ckpt)
146 |         print()
147 |         while True:
148 |             try:
149 |                 feat, f0, sp = sess.run(
150 |                     [features, f0_t, x_t],
151 |                     #feed_dict={y_t_id: np.asarray([SPEAKERS.index(args.trg)])}
152 |                     feed_dict = {y_t: np.load('./emo_feat/0018/angry.npy').reshape(1,-1)}
153 |                 )
154 |                 feat.update({'sp': sp, 'f0': f0})
155 | 
156 |                 feats = dic2npy(feat)
157 |                 oFilename = make_output_bin_name(output_dir, feat['filename'])
158 | 
159 |                 with open(join('./bin_results', '{}.bin'.format(oFilename)), 'wb') as fp:
160 |                     fp.write(feats.tostring())
161 | 
162 | 
163 |                 y = pw2wav(feat)
164 |                 oFilename = make_output_wav_name(output_dir, feat['filename'])
165 |                 print('\rProcessing {}'.format(oFilename), end='')
166 |                 sf.write(oFilename, y, FS)
167 |             except KeyboardInterrupt:
168 |                 break
169 |             finally:
170 |                 pass
171 |         print()
172 | 
173 | if __name__ == '__main__':
174 |     tf.app.run()
175 | 


--------------------------------------------------------------------------------
/util/layers.py:
--------------------------------------------------------------------------------
  1 | # import numpy as np
  2 | from math import pi
  3 | import tensorflow as tf
  4 | # from tensorflow.contrib import slim
  5 | from tensorflow.python.ops.init_ops import VarianceScaling
  6 | 
  7 | EPSILON = tf.constant(1e-6, dtype=tf.float32)
  8 | PI = tf.constant(pi, dtype=tf.float32)
  9 | 
 10 | def Layernorm(x, axis, name):
 11 |     '''
 12 |     Layer normalization (Ba, 2016)
 13 |     J: Z-normalization using all nodes of the layer on a per-sample basis.
 14 | 
 15 |     Input:
 16 |         `x`: channel_first/NCHW format! (or fully-connected)
 17 |         `axis`: list
 18 |         `name`: must be assigned
 19 |     
 20 |     Example:
 21 |         ```python
 22 |         axis = [1, 2, 3]
 23 |         x = tf.random_normal([64, 3, 10, 10])
 24 |         name = 'D_layernorm'
 25 |         ```
 26 |     Return:
 27 |         (x - u)/s * scale + offset
 28 | 
 29 |     Source: 
 30 |         https://github.com/igul222/improved_wgan_training/blob/master/tflib/ops/layernorm.py
 31 |     '''
 32 |     mean, var = tf.nn.moments(x, axis, keep_dims=True)
 33 |     n_neurons = x.get_shape().as_list()[axis[0]]
 34 |     offset = tf.get_variable(
 35 |         name+'.offset',
 36 |         shape=[n_neurons] + [1 for _ in range(len(axis) -1)],
 37 |         initializer=tf.zeros_initializer
 38 |     )
 39 |     scale = tf.get_variable(
 40 |         name+'.scale',
 41 |         shape=[n_neurons] + [1 for _ in range(len(axis) -1)],
 42 |         initializer=tf.ones_initializer
 43 |     )
 44 |     return tf.nn.batch_normalization(x, mean, var, offset, scale, 1e-5)
 45 | 
 46 | 
 47 | def conv2d_nchw_layernorm(x, o, k, s, activation, name):
 48 |     '''
 49 |     Input:
 50 |         `x`: input in NCHW format
 51 |         `o`: num of output nodes
 52 |         `k`: kernel size
 53 |         `s`: stride
 54 |     '''
 55 |     with tf.variable_scope(name):
 56 |         x = tf.layers.conv2d(
 57 |             inputs=x,
 58 |             filters=o,
 59 |             kernel_size=k,
 60 |             strides=s,
 61 |             padding='same',
 62 |             data_format='channels_first',
 63 |             name=name,
 64 |         )
 65 |         x = Layernorm(x, [1, 2, 3], 'layernorm')
 66 |         return activation(x)
 67 | 
 68 |               
 69 | def selu(x):
 70 |     with tf.name_scope('selu'):
 71 |         alpha = 1.6732632423543772848170429916717
 72 |         scale = 1.0507009873554804934193349852946
 73 |         return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))
 74 |     
 75 | def selu_normal(seed=None):
 76 |     return VarianceScaling(
 77 |         scale=1., mode='fan_in', distribution='normal', seed=seed)
 78 | 
 79 | def mu_law_encode_nonlinear(audio, quantization_channels=256):
 80 |     '''
 81 |     Compress the waveform amplitudes using mu-law non-linearity. 
 82 |     NOTE: This mu-law functions as a non-linear function as opposed to 
 83 |           quantization.
 84 |     '''
 85 |     with tf.name_scope('encode'):
 86 |         mu = tf.to_float(quantization_channels - 1)
 87 |         # Perform mu-law companding transformation (ITU-T, 1988).
 88 |         # Minimum operation is here to deal with rare large amplitudes caused
 89 |         # by resampling.
 90 |         safe_audio_abs = tf.minimum(tf.abs(audio), 1.0)
 91 |         magnitude = tf.log1p(mu * safe_audio_abs) / tf.log1p(mu)
 92 |         signal = tf.multiply(tf.sign(audio), magnitude, name='mulaw')
 93 |         # Quantize signal to the specified number of levels.
 94 |         # return tf.to_int32((signal + 1) / 2 * mu + 0.5)
 95 |         return signal
 96 | 
 97 | 
 98 | def mu_law_decode_nonlinear(output, quantization_channels=256):
 99 |     '''
100 |     Uncompress the waveform amplitudes using mu-law non-linearity. 
101 |     NOTE: This mu-law functions as a non-linear function.
102 |     '''
103 |     with tf.name_scope('decode'):
104 |         mu = quantization_channels - 1
105 |         # Map values back to [-1, 1].
106 |         # signal = 2 * (tf.to_float(output) / mu) - 1
107 |         signal = output
108 |         # Perform inverse of mu-law transformation.
109 |         magnitude = (1 / mu) * ((1 + mu)**abs(signal) - 1)
110 |         return tf.sign(signal) * magnitude
111 | 
112 | 
113 | def GumbelSampleLayer(y_mu):
114 |     ''' Create Gumbel(0, 1) variable from Uniform[0, 1] '''
115 |     u = tf.random_uniform(
116 |         minval=0.0,
117 |         maxval=1.0,
118 |         shape=tf.shape(y_mu))
119 |     g = - tf.log(- tf.log(u))
120 |     return y_mu + g
121 | 
122 | 
123 | def GumbelSoftmaxLogDensity(y, p, tau):
124 |     # EPS = tf.constant(1e-10)
125 |     k = tf.shape(y)[-1]
126 |     k = tf.cast(k, tf.float32)
127 |     # y = y + EPS
128 |     # y = tf.divide(y, tf.reduce_sum(y, -1, keep_dims=True))
129 |     y = normalize_to_unit_sum(y)
130 |     sum_p_over_y = tf.reduce_sum(tf.divide(p, tf.pow(y, tau)), -1)
131 |     logp = tf.lgamma(k)
132 |     logp = logp + (k - 1) * tf.log(tau)
133 |     logp = logp - k * tf.log(sum_p_over_y)
134 |     logp = logp + sum_p_over_y
135 |     return logp
136 | 
137 | 
138 | def normalize_to_unit_sum(x, EPS=1e-10):
139 |     ''' Along the last dim '''
140 |     EPS = tf.constant(EPS, dtype=tf.float32)
141 |     x = x + EPS
142 |     x_sum = tf.reduce_sum(x, -1, keep_dims=True)
143 |     x = tf.divide(x, x_sum)
144 |     return x
145 | 
146 | 
147 | def lrelu(x, leak=0.02, name="lrelu"):
148 |     ''' Leaky ReLU '''
149 |     return tf.maximum(x, leak*x, name=name)
150 | 
151 | 
152 | def GaussianSampleLayer(z_mu, z_lv, name='GaussianSampleLayer'):
153 |     with tf.name_scope(name):
154 |         eps = tf.random_normal(tf.shape(z_mu))
155 |         std = tf.sqrt(tf.exp(z_lv))
156 |         return tf.add(z_mu, tf.multiply(eps, std))
157 | 
158 | 
159 | def GaussianLogDensity(x, mu, log_var, name='GaussianLogDensity'):
160 |     with tf.name_scope(name):
161 |         c = tf.log(2. * PI)
162 |         var = tf.exp(log_var)
163 |         x_mu2 = tf.square(x - mu)   # [Issue] not sure the dim works or not?
164 |         x_mu2_over_var = tf.div(x_mu2, var + EPSILON)
165 |         log_prob = -0.5 * (c + log_var + x_mu2_over_var)
166 |         log_prob = tf.reduce_sum(log_prob, -1)   # keep_dims=True,
167 |         return log_prob
168 | 
169 | 
170 | def GaussianKLD(mu1, lv1, mu2, lv2):
171 |     ''' Kullback-Leibler divergence of two Gaussians
172 |         *Assuming that each dimension is independent
173 |         mu: mean
174 |         lv: log variance
175 |         Equation: http://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
176 |     '''
177 |     with tf.name_scope('GaussianKLD'):
178 |         v1 = tf.exp(lv1)
179 |         v2 = tf.exp(lv2)
180 |         mu_diff_sq = tf.square(mu1 - mu2)
181 |         dimwise_kld = .5 * (
182 |             (lv2 - lv1) + tf.div(v1 + mu_diff_sq, v2 + EPSILON) - 1.)
183 |         return tf.reduce_sum(dimwise_kld, -1)
184 | 
185 | # Verification by CMU's implementation
186 | # http://www.cs.cmu.edu/~chanwook/MySoftware/rm1_Spk-by-Spk_MLLR/rm1_PNCC_MLLR_1/rm1/python/sphinx/divergence.py
187 | # def gau_kl(pm, pv, qm, qv):
188 | #     """
189 | #     Kullback-Liebler divergence from Gaussian pm,pv to Gaussian qm,qv.
190 | #     Also computes KL divergence from a single Gaussian pm,pv to a set
191 | #     of Gaussians qm,qv.
192 | #     Diagonal covariances are assumed.  Divergence is expressed in nats.
193 | #     """
194 | #     if (len(qm.shape) == 2):
195 | #         axis = 1
196 | #     else:
197 | #         axis = 0
198 | #     # Determinants of diagonal covariances pv, qv
199 | #     dpv = pv.prod()
200 | #     dqv = qv.prod(axis)
201 | #     # Inverse of diagonal covariance qv
202 | #     iqv = 1./qv
203 | #     # Difference between means pm, qm
204 | #     diff = qm - pm
205 | #     return (0.5 *
206 | #             (np.log(dqv / dpv)            # log |\Sigma_q| / |\Sigma_p|
207 | #              + (iqv * pv).sum(axis)          # + tr(\Sigma_q^{-1} * \Sigma_p)
208 | #              + (diff * iqv * diff).sum(axis) # + (\mu_q-\mu_p)^T\Sigma_q^{-1}(\mu_q-\mu_p)
209 | #              - len(pm)))                     # - N
210 | 


--------------------------------------------------------------------------------
/trainer/vae.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | # from util.image import make_png_jet_thumbnail, make_png_thumbnail
  7 | from trainer.gan import GANTrainer
  8 | 
  9 | class VAETrainer(GANTrainer):
 10 |     def _optimize(self):
 11 |         '''
 12 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters.
 13 |               https://github.com/igul222/improved_wgan_training/issues/3
 14 |         '''
 15 |         global_step = tf.Variable(0, name='global_step')
 16 |         lr = self.arch['training']['lr']
 17 |         b1 = self.arch['training']['beta1']
 18 |         b2 = self.arch['training']['beta2']
 19 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
 20 | 
 21 |         g_vars = tf.trainable_variables()
 22 | 
 23 |         with tf.name_scope('Update'):
 24 |             opt_g = optimizer.minimize(self.loss['G'], var_list=g_vars, global_step=global_step)
 25 |         return {
 26 |             'g': opt_g,
 27 |             'global_step': global_step
 28 |         }
 29 | 
 30 | 
 31 |     def _refresh_status(self, sess):
 32 |         fetches = {
 33 |             "D_KL": self.loss['D_KL'],
 34 |             "logP": self.loss['logP'],
 35 |             "step": self.opt['global_step'],
 36 |         }
 37 |         result = sess.run(
 38 |             fetches=fetches,
 39 |             # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
 40 |             # run_metadata=run_metadata,
 41 |         )
 42 | 
 43 |         # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
 44 |         # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp:
 45 |         #     fp.write(trace.generate_chrome_trace_format())
 46 | 
 47 |         # Message
 48 |         msg = 'Iter {:05d}: '.format(result['step'])
 49 |         msg += 'log P(x|z, y) = {:.3e} '.format(result['logP'])
 50 |         msg += 'D_KL(z) = {:.3e} '.format(result['D_KL'])
 51 |         print('\r{}'.format(msg), end='', flush=True)
 52 |         logging.info(msg)
 53 | 
 54 | 
 55 |     # def _validate(self, machine, n=10):
 56 |     #     N = n * n
 57 | 
 58 |     #     # same row same z
 59 |     #     z = tf.random_normal(shape=[n, self.arch['z_dim']])
 60 |     #     z = tf.tile(z, [1, n])
 61 |     #     z = tf.reshape(z, [N, -1])
 62 |     #     z = tf.Variable(z, trainable=False, dtype=tf.float32)
 63 | 
 64 |     #     # same column same y
 65 |     #     y = tf.range(0, 10, 1, dtype=tf.int64)
 66 |     #     y = tf.reshape(y, [-1,])
 67 |     #     y = tf.tile(y, [n,])
 68 | 
 69 |     #     Xh = machine.generate(z, y) # 100, 64, 64, 3
 70 |     #     Xh = make_png_thumbnail(Xh, n)
 71 |     #     return Xh
 72 | 
 73 |     def train(self, nIter, machine=None, summary_op=None):
 74 |         # Xh = self._validate(machine=machine, n=10)
 75 | 
 76 |         run_metadata = tf.RunMetadata()
 77 | 
 78 |         sv = tf.train.Supervisor(
 79 |             logdir=self.dirs['logdir'],
 80 |             # summary_writer=summary_writer,
 81 |             # summary_op=None,
 82 |             # is_chief=True,
 83 |             save_model_secs=300,
 84 |             global_step=self.opt['global_step'])
 85 | 
 86 | 
 87 |         # sess_config = configure_gpu_settings(args.gpu_cfg)
 88 |         sess_config = tf.ConfigProto(
 89 |             allow_soft_placement=True,
 90 |             gpu_options=tf.GPUOptions(allow_growth=True))
 91 | 
 92 |         with sv.managed_session(config=sess_config) as sess:
 93 |             sv.loop(60, self._refresh_status, (sess,))
 94 |             for step in range(self.arch['training']['max_iter']):
 95 |                 if sv.should_stop():
 96 |                     break
 97 | 
 98 |                 # main loop
 99 |                 sess.run(self.opt['g'])
100 | 
101 |                 # # output img
102 |                 # if step % 1000 == 0:
103 |                 #     xh = sess.run(Xh)
104 |                 #     with tf.gfile.GFile(
105 |                 #         os.path.join(
106 |                 #             self.dirs['logdir'],
107 |                 #             'img-anime-{:03d}k.png'.format(step // 1000),
108 |                 #         ),
109 |                 #         mode='wb',
110 |                 #     ) as fp:
111 |                 #         fp.write(xh)
112 | 
113 | 
114 | 
115 | class VAWGANTrainer(GANTrainer):
116 |     def _optimize(self):
117 |         '''
118 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters.
119 |               https://github.com/igul222/improved_wgan_training/issues/3
120 |         '''
121 |         global_step = tf.Variable(0, name='global_step')
122 |         lr = self.arch['training']['lr']
123 |         b1 = self.arch['training']['beta1']
124 |         b2 = self.arch['training']['beta2']
125 | 
126 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
127 | 
128 |         trainables = tf.trainable_variables()
129 |         g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
130 |         d_vars = [v for v in trainables if 'Discriminator' in v.name]
131 |         e_vars = [v for v in trainables if 'Encoder' in v.name]
132 | 
133 |         # # Debug ===============
134 |         # debug(['Generator', 'Discriminator'], [g_vars, d_vars])
135 |         # # ============================
136 | 
137 |         with tf.name_scope('Update'):
138 |             opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars)
139 |             opt_e = optimizer.minimize(self.loss['l_E'], var_list=e_vars)
140 |             with tf.control_dependencies([opt_e]):
141 |                 opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step)
142 |         return {
143 |             'd': opt_d,
144 |             'g': opt_g,
145 |             'e': opt_e,
146 |             'global_step': global_step
147 |         }
148 | 
149 |     def train(self, nIter, machine=None, summary_op=None):
150 |         # Xh = self._validate(machine=machine, n=10)
151 | 
152 |         run_metadata = tf.RunMetadata()
153 | 
154 |         # summary_op = tf.summary.merge_all()
155 | 
156 |         sv = tf.train.Supervisor(
157 |             logdir=self.dirs['logdir'],
158 |             # summary_writer=summary_writer,
159 |             # summary_op=None,
160 |             # is_chief=True,
161 |             save_model_secs=100,
162 |             global_step=self.opt['global_step'])
163 | 
164 | 
165 |         # sess_config = configure_gpu_settings(args.gpu_cfg)
166 |         sess_config = tf.ConfigProto(
167 |             allow_soft_placement=True,
168 |             gpu_options=tf.GPUOptions(allow_growth=True))
169 | 
170 |         with sv.managed_session(config=sess_config) as sess:
171 |             sv.loop(60, self._refresh_status, (sess,))
172 |             for step in range(self.arch['training']['max_iter']):
173 |                 if sv.should_stop():
174 |                     break
175 | 
176 |                 # main loop
177 |                 for _ in range(self.arch['training']['nIterD']):
178 |                     sess.run(self.opt['d'])
179 |                 sess.run(self.opt['g'])
180 | 
181 |                 # # output img
182 |                 # if step % 1000 == 0:
183 |                 #     xh = sess.run(Xh)
184 |                 #     with tf.gfile.GFile(
185 |                 #         os.path.join(
186 |                 #             self.dirs['logdir'],
187 |                 #             'img-anime-{:03d}k.png'.format(step // 1000),
188 |                 #         ),
189 |                 #         mode='wb',
190 |                 #     ) as fp:
191 |                 #         fp.write(xh)
192 | 
193 |     def _refresh_status(self, sess):
194 |         fetches = {
195 |             "D_KL": self.loss['D_KL'],
196 |             "logP": self.loss['logP'],
197 |             "W_dist": self.loss['W_dist'],
198 |             "gp": self.loss['gp'],
199 |             "step": self.opt['global_step'],
200 |         }
201 |         result = sess.run(
202 |             fetches=fetches,
203 |             # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
204 |             # run_metadata=run_metadata,
205 |         )
206 | 
207 |         # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
208 |         # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp:
209 |         #     fp.write(trace.generate_chrome_trace_format())
210 | 
211 |         # Message
212 |         msg = 'Iter {:05d}: '.format(result['step'])
213 |         msg += 'W_dist = {:.4e} '.format(result['W_dist'])
214 |         msg += 'log P(x|z, y) = {:.4e} '.format(result['logP'])
215 |         msg += 'D_KL(z) = {:.4e} '.format(result['D_KL'])
216 |         msg += 'GP = {:.4e} '.format(result['gp'])
217 |         print('\r{}'.format(msg), end='', flush=True)
218 |         logging.info(msg)


--------------------------------------------------------------------------------
/analyzer.py:
--------------------------------------------------------------------------------
  1 | # Assuming directory structure:
  2 | #    ./dataset/vcc2016/wav/Training Set/SF1/100001.wav 
  3 | 
  4 | import os
  5 | from os.path import join
  6 | 
  7 | import librosa
  8 | import numpy as np
  9 | import pyworld as pw
 10 | import tensorflow as tf
 11 | 
 12 | 
 13 | args = tf.app.flags.FLAGS
 14 | tf.app.flags.DEFINE_string('dir_to_wav', './data/wav', 'Dir to *.wav')
 15 | tf.app.flags.DEFINE_string('dir_to_bin', './data/bin', 'Dir to output *.bin')
 16 | tf.app.flags.DEFINE_string('dir_to_features','../emotion_features','Dir to emotion features')
 17 | tf.app.flags.DEFINE_integer('fs', 16000, 'Global sampling frequency')
 18 | tf.app.flags.DEFINE_float('f0_ceil', 500, 'Global f0 ceiling')
 19 | 
 20 | FFT_SIZE = 1024
 21 | SP_DIM = FFT_SIZE // 2 + 1
 22 | FEAT_DIM = SP_DIM + SP_DIM + 1 + 1 + 256 + 1  # [sp, ap, f0, en, emo_feats, s]
 23 | RECORD_BYTES = FEAT_DIM * 4  # all features saved in `float32`
 24 | 
 25 | EPSILON = 1e-10
 26 | 
 27 | 
 28 | 
 29 | def dic2npy(features, feat_dim=513, fs=16000):
 30 |     ''' NOTE: Use `order='C'` to ensure Cython compatibility '''
 31 |     en = np.reshape(features['en'], [-1, 1])
 32 |     sp = np.power(10., features['sp'])
 33 |     sp = en * sp
 34 | 
 35 |     f0 = features['f0']
 36 |     f0 = np.reshape(f0,[-1,1])
 37 | 
 38 | 
 39 |  #   lf0_cwt = features['lf0_cwt']
 40 |  #   uv = np.reshape(features['uv'], [-1, 1])
 41 |  #   speaker = np.reshape(features['speaker'],[-1,1])
 42 |  #   filename = features['filename']
 43 |     feats = np.concatenate([sp,f0],axis=1)
 44 |     feats = np.float32(feats)
 45 |     return feats
 46 | 
 47 | def wav2pw(x, fs=16000, fft_size=FFT_SIZE):
 48 |     ''' Extract WORLD feature from waveform '''
 49 |     _f0, t = pw.dio(x, fs, f0_ceil=args.f0_ceil)            # raw pitch extractor
 50 |     f0 = pw.stonemask(x, _f0, t, fs)  # pitch refinement
 51 |     sp = pw.cheaptrick(x, f0, t, fs, fft_size=fft_size)
 52 |     ap = pw.d4c(x, f0, t, fs, fft_size=fft_size) # extract aperiodicity
 53 |     return {
 54 |         'f0': f0,
 55 |         'sp': sp,
 56 |         'ap': ap,
 57 |     }
 58 | 
 59 | 
 60 | def list_dir(path):
 61 |     ''' retrieve the 'short name' of the dirs '''
 62 |     return sorted([f for f in os.listdir(path) if os.path.isdir(join(path, f))])
 63 | 
 64 | 
 65 | def list_full_filenames(path):
 66 |     ''' return a generator of full filenames '''
 67 |     return (
 68 |         join(path, f)
 69 |             for f in os.listdir(path)
 70 |                 if not os.path.isdir(join(path, f)))
 71 | 
 72 | def extract(filename, fft_size=FFT_SIZE, dtype=np.float32):
 73 |     ''' Basic (WORLD) feature extraction ''' 
 74 |     x, _ = librosa.load(filename, sr=args.fs, mono=True, dtype=np.float64)
 75 |     features = wav2pw(x, args.fs, fft_size=fft_size)
 76 |     ap = features['ap']
 77 |     f0 = features['f0'].reshape([-1, 1])
 78 |     sp = features['sp']
 79 |     en = np.sum(sp + EPSILON, axis=1, keepdims=True)
 80 |     sp = np.log10(sp / en)
 81 |     return np.concatenate([sp, ap, f0, en], axis=1).astype(dtype)
 82 | 
 83 | def extract_and_save_bin_to(dir_to_wav, dir_to_bin, dir_to_features, speakers):
 84 |     '''
 85 |     NOTE: the directory structure must be [args.dir_to_wav]/[Set]/[speakers]
 86 |     '''
 87 |     counter = 1
 88 |     N = len(tf.gfile.Glob(join(dir_to_wav, '*', '*', '*.wav')))
 89 |     for d in list_dir(dir_to_wav):  # ['Training Set', 'Testing Set']
 90 |         path = join(dir_to_wav, d)
 91 |         for s in list_dir(path):  # ['SF1', ..., 'TM3']
 92 |             path = join(dir_to_wav, d, s)
 93 |             output_dir = join(dir_to_bin, d, s)
 94 |             tf.gfile.MakeDirs(output_dir)
 95 |             for f in list_full_filenames(path):  # ['10001.wav', ...]
 96 |                 print('\rFile {}/{}: {:50}'.format(counter, N, f), end='')
 97 |                 features = extract(f)  # [sp, ap, f0, en]
 98 |                 labels = speakers.index(s) * np.ones(
 99 |                     [features.shape[0], 1],
100 |                     np.float32,
101 |                 )
102 | 
103 |                 b = os.path.splitext(f)[0] # ['10001', ...]
104 |                 _, b = os.path.split(b)
105 | 
106 | 
107 |                 #emo_path = join(dir_to_features, s)
108 |                 #with open(join(emo_path,'{}.npy'.format(b)), 'r') as file_dir:
109 |                 #    print(file_dir)
110 |                 emo = np.load(dir_to_features + '/' + s + '/' + b + '.npy')
111 |                 emo_feats = emo * np.ones(
112 |                     [features.shape[0], 1],
113 |                     np.float32,
114 |                 )
115 | 
116 |                 features = np.concatenate([features, emo_feats, labels], 1)
117 |                 with open(join(output_dir, '{}.bin'.format(b)), 'wb') as fp:
118 |                     fp.write(features.tostring())
119 |                 counter += 1
120 |         print()
121 | 
122 | 
123 | class Tanhize(object):
124 |     ''' Normalizing `x` to [-1, 1] '''
125 |     def __init__(self, xmin, xmax):
126 |         self.xmin = xmin
127 |         self.xmax = xmax
128 |         self.xscale = xmax - xmin
129 |     
130 |     def forward_process(self, x):
131 |         x = (x - self.xmin) / self.xscale
132 |         return tf.clip_by_value(x, 0., 1.) * 2. - 1.
133 | 
134 |     def backward_process(self, x):
135 |         return (x * .5 + .5) * self.xscale + self.xmin
136 | 
137 | def read(
138 |     file_pattern,
139 |     batch_size,
140 |     record_bytes=RECORD_BYTES,
141 |     capacity=2048,
142 |     min_after_dequeue=1536,
143 |     num_threads=8,
144 |     data_format='NCHW',
145 |     normalizer=None,
146 |     ):
147 |     ''' 
148 |     Read only `sp` and `speaker` 
149 |     Return:
150 |         `feature`: [b, c]
151 |         `speaker`: [b,]
152 |     '''
153 |     with tf.device('cpu'):
154 |         with tf.name_scope('InputSpectralFrame'):
155 |             files = tf.gfile.Glob(file_pattern)
156 |             filename_queue = tf.train.string_input_producer(files)
157 | 
158 | 
159 |             reader = tf.FixedLengthRecordReader(record_bytes)
160 |             _, value = reader.read(filename_queue)
161 |             value = tf.decode_raw(value, tf.float32)
162 | 
163 |             value = tf.reshape(value, [FEAT_DIM,])
164 |             feature = value[:SP_DIM]   # NCHW format
165 | 
166 |             if normalizer is not None:
167 |                 feature = normalizer.forward_process(feature)
168 | 
169 |             if data_format == 'NCHW':
170 |                 feature = tf.reshape(feature, [1, SP_DIM, 1])
171 |             elif data_format == 'NHWC':
172 |                 feature = tf.reshape(feature, [SP_DIM, 1, 1])
173 |             else:
174 |                 pass
175 | 
176 |             #speaker = tf.cast(value[-1], tf.int64)
177 |         
178 |             #zk
179 |             f0 = tf.cast(value[-259], tf.int64)
180 |              
181 |             #speaker = tf.cast(value[-257:-1],tf.int64)
182 | 
183 |                         
184 |             #f0 = value[-259]
185 |             speaker = value[-257:-1]
186 |            
187 | 
188 |             return tf.train.shuffle_batch(
189 |                 [feature, speaker, f0],
190 |                 batch_size,
191 |                 capacity=capacity,
192 |                 min_after_dequeue=min_after_dequeue,
193 |                 num_threads=num_threads,
194 |                 # enqueue_many=True,
195 |             )
196 | 
197 | 
198 | def read_whole_features(file_pattern, num_epochs=1):
199 |     '''
200 |     Return
201 |         `feature`: `dict` whose keys are `sp`, `ap`, `f0`, `en`, `speaker`
202 |     '''
203 |     with tf.device('cpu'):
204 |         with tf.name_scope('InputPipline'):
205 |             files = tf.gfile.Glob(file_pattern)
206 |             print('{} files found'.format(len(files)))
207 |             filename_queue = tf.train.string_input_producer(files, num_epochs=num_epochs)
208 |             reader = tf.WholeFileReader()
209 |             key, value = reader.read(filename_queue)
210 |             print("Processing {}".format(key), flush=True)
211 |             value = tf.decode_raw(value, tf.float32)
212 |             value = tf.reshape(value, [-1, FEAT_DIM])
213 |             return {
214 |                 'sp': value[:, :SP_DIM],
215 |                 'ap': value[:, SP_DIM : 2*SP_DIM],
216 |                 'f0': value[:, SP_DIM * 2],
217 |                 'en': value[:, SP_DIM * 2 + 1],
218 |                 'speaker': value[:, SP_DIM * 2 + 1 + 1 : SP_DIM * 2 + 1 +1 +256],
219 |                 'filename': key,
220 |             }
221 | 
222 | 
223 | def pw2wav(features, feat_dim=513, fs=16000):
224 |     ''' NOTE: Use `order='C'` to ensure Cython compatibility '''
225 |     en = np.reshape(features['en'], [-1, 1])
226 |     sp = np.power(10., features['sp'])
227 |     sp = en * sp
228 |     if isinstance(features, dict):
229 |         return pw.synthesize(
230 |             features['f0'].astype(np.float64).copy(order='C'),
231 |             sp.astype(np.float64).copy(order='C'),
232 |             features['ap'].astype(np.float64).copy(order='C'),
233 |             fs,
234 |         )
235 |     features = features.astype(np.float64)
236 |     sp = features[:, :feat_dim]
237 |     ap = features[:, feat_dim:feat_dim*2]
238 |     f0 = features[:, feat_dim*2]
239 |     en = features[:, feat_dim*2 + 1]
240 |     en = np.reshape(en, [-1, 1])
241 |     sp = np.power(10., sp)
242 |     sp = en * sp
243 |     return pw.synthesize(
244 |         f0.copy(order='C'),
245 |         sp.copy(order='C'),
246 |         ap.copy(order='C'),
247 |         fs
248 |     )
249 | 
250 | 
251 | def make_speaker_tsv(path):
252 |     speakers = []
253 |     for d in list_dir(path):
254 |         speakers += list_dir(join(path, d))
255 |     speakers = sorted(set(speakers))
256 |     with open('./etc/speakers.tsv', 'w') as fp:
257 |         for s in speakers:
258 |             fp.write('{}\n'.format(s))
259 |     return speakers
260 | 
261 | if __name__ == '__main__':
262 |     speakers = make_speaker_tsv(args.dir_to_wav)
263 |     extract_and_save_bin_to(
264 |         args.dir_to_wav,
265 |         args.dir_to_bin,
266 |         args.dir_to_features,
267 |         speakers,
268 |     )
269 | 


--------------------------------------------------------------------------------
/model/vawgan.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | from tensorflow.contrib import slim
  3 | import tensorflow as tf
  4 | from util.layers import GaussianLogDensity, GaussianKLD, \
  5 |     GaussianSampleLayer, lrelu
  6 | 
  7 | class VAWGAN(object):
  8 |     '''
  9 |       VC-GAN
 10 |     = CVAE-CGAN
 11 |     = Convolutional Variational Auto-encoder
 12 |       with Conditional Generative Adversarial Net
 13 |     '''
 14 |     def __init__(self, arch, is_training=False):
 15 |         self.arch = arch
 16 |         self._sanity_check()
 17 |         self.is_training = is_training
 18 | 
 19 |         with tf.name_scope('Generator'):   
 20 |             self.y_emb = self._unit_embedding(
 21 |                 self.arch['y_dim'],
 22 |                 self.arch['z_dim'],
 23 |                 'y_embedding')
 24 |             self.f_emb = self._unit_embedding(
 25 |                 self.arch['y_dim'],
 26 |                 self.arch['f_dim'],
 27 |                 'f_embedding')
 28 | 
 29 |         self._generate = tf.make_template(
 30 |             'Generator',
 31 |             self._generator)
 32 |         self._discriminate = tf.make_template(
 33 |             'Discriminator',
 34 |             self._discriminator)
 35 |         self._encode = tf.make_template(
 36 |             'Encoder',
 37 |             self._encoder)
 38 | 
 39 | 
 40 |     def _sanity_check(self):
 41 |         for net in ['encoder', 'generator', 'discriminator']:
 42 |             assert len(self.arch[net]['output']) > 2
 43 |             assert len(self.arch[net]['output']) == len(self.arch[net]['kernel'])
 44 |             assert len(self.arch[net]['output']) == len(self.arch[net]['stride'])
 45 | 
 46 | 
 47 |     def _unit_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 48 |         with tf.variable_scope(scope_name):
 49 |             embeddings = tf.get_variable(
 50 |                 name=var_name,
 51 |                 shape=[n_class, h_dim])
 52 |             embeddings = tf.nn.l2_normalize(embeddings, dim=-1, name=var_name+'normalized')
 53 |         return embeddings
 54 | 
 55 | 
 56 |     def _merge(self, var_list, fan_out, l2_reg=1e-6):
 57 |         ''' 
 58 |         Note: Don't apply BN on this because 'y' 
 59 |               tends to be the same inside a batch.
 60 |         '''
 61 |         x = 0.
 62 |         with slim.arg_scope(
 63 |             [slim.fully_connected],
 64 |             num_outputs=fan_out,
 65 |             weights_regularizer=slim.l2_regularizer(l2_reg),
 66 |             normalizer_fn=None,
 67 |             activation_fn=None):
 68 |             for var in var_list:
 69 |                 x = x + slim.fully_connected(var)
 70 |         x = slim.bias_add(x)
 71 |         return x
 72 | 
 73 | 
 74 |     def _l2_regularized_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 75 |         with tf.variable_scope(scope_name):
 76 |             embeddings = tf.get_variable(
 77 |                 name=var_name,
 78 |                 shape=[n_class, h_dim],
 79 |                 regularizer=slim.l2_regularizer(1e-6))
 80 |         return embeddings
 81 | 
 82 | 
 83 |     def _encoder(self, x, is_training):
 84 |         n_layer = len(self.arch['encoder']['output'])
 85 |         subnet = self.arch['encoder']
 86 | 
 87 |         with slim.arg_scope(
 88 |             [slim.batch_norm],
 89 |             scale=True, scope='BN',
 90 |             updates_collections=None,
 91 |             decay=0.9, epsilon=1e-5,  # [TODO] Test these hyper-parameters
 92 |             is_training=is_training):
 93 |             with slim.arg_scope(
 94 |                 [slim.conv2d],
 95 |                 weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
 96 |                 normalizer_fn=slim.batch_norm,
 97 |                 activation_fn=lrelu):
 98 | 
 99 |                 for i in range(n_layer):
100 |                     x = slim.conv2d(
101 |                         x,
102 |                         subnet['output'][i],
103 |                         subnet['kernel'][i],
104 |                         subnet['stride'][i])
105 |                     tf.summary.image(
106 |                         'down-sample{:d}'.format(i),
107 |                         tf.transpose(x[:, :, :, 0:3], [2, 1, 0, 3]))
108 | 
109 |         x = slim.flatten(x)
110 | 
111 |         with slim.arg_scope(
112 |             [slim.fully_connected],
113 |             num_outputs=self.arch['z_dim'],
114 |             weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
115 |             normalizer_fn=None,
116 |             activation_fn=None):
117 |             z_mu = slim.fully_connected(x)
118 |             z_lv = slim.fully_connected(x)
119 |         return z_mu, z_lv
120 | 
121 | 
122 |     def _generator(self, z, y, f, is_training):
123 |         ''' In this version, we only generate the target, so `y` is useless '''
124 |         subnet = self.arch['generator']
125 |         n_layer = len(subnet['output'])
126 |         h, w, c = subnet['hwc']
127 | 
128 |         #y = tf.nn.embedding_lookup(self.y_emb, y)
129 |         f = tf.nn.embedding_lookup(self.f_emb, f)
130 | 
131 |         x = self._merge([z, y, f], subnet['merge_dim'])
132 |         x = lrelu(x)
133 |         with slim.arg_scope(
134 |             [slim.batch_norm],
135 |             scale=True, scope='BN',
136 |             updates_collections=None,
137 |             decay=0.9, epsilon=1e-5,
138 |             is_training=is_training):
139 | 
140 |             x = slim.fully_connected(
141 |                 x,
142 |                 h * w * c,
143 |                 normalizer_fn=slim.batch_norm,
144 |                 activation_fn=lrelu)
145 | 
146 |             x = tf.reshape(x, [-1, h, w, c])
147 | 
148 |             with slim.arg_scope(
149 |                 [slim.conv2d_transpose],
150 |                 weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
151 |                 normalizer_fn=slim.batch_norm,
152 |                 activation_fn=lrelu):
153 | 
154 |                 for i in range(n_layer -1):
155 |                     x = slim.conv2d_transpose(
156 |                         x,
157 |                         subnet['output'][i],
158 |                         subnet['kernel'][i],
159 |                         subnet['stride'][i]
160 |                         # normalizer_fn=None
161 |                         )
162 | 
163 |                 # Don't apply BN for the last layer of G
164 |                 x = slim.conv2d_transpose(
165 |                     x,
166 |                     subnet['output'][-1],
167 |                     subnet['kernel'][-1],
168 |                     subnet['stride'][-1],
169 |                     normalizer_fn=None,
170 |                     activation_fn=None)
171 | 
172 |                 logit = x
173 |                 x = tf.nn.tanh(logit)
174 |         return x, logit
175 | 
176 | 
177 |     def _discriminator(self, x, is_training):
178 |         ''' Note: In this version, `y` is useless '''
179 |         subnet = self.arch['discriminator']
180 |         n_layer = len(subnet['output'])
181 | 
182 |         intermediate = list()
183 |         intermediate.append(x)
184 | 
185 |         # x = tf.concat(3, [x, y_vec])   # inject y into x
186 |         with slim.arg_scope(
187 |             [slim.batch_norm],
188 |             scale=True, scope='BN',
189 |             updates_collections=None,
190 |             decay=0.9, epsilon=1e-5,
191 |             is_training=is_training):
192 |             with slim.arg_scope(
193 |                 [slim.conv2d],
194 |                 weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
195 |                 normalizer_fn=slim.batch_norm,
196 |                 activation_fn=lrelu):
197 | 
198 |                 # Radford: [do] not applying batchnorm to the discriminator input layer
199 |                 x = slim.conv2d(
200 |                     x,
201 |                     subnet['output'][0],
202 |                     subnet['kernel'][0],
203 |                     subnet['stride'][0],
204 |                     normalizer_fn=None)
205 |                 intermediate.append(x)
206 |                 for i in range(1, n_layer):
207 |                     x = slim.conv2d(
208 |                         x,
209 |                         subnet['output'][i],
210 |                         subnet['kernel'][i],
211 |                         subnet['stride'][i])
212 |                     intermediate.append(x)
213 |                     tf.summary.image(
214 |                         'upsampling{:d}'.format(i),
215 |                         tf.transpose(x[:, :, :, 0:3], [2, 1, 0, 3]))
216 | 
217 |         # Don't apply BN for the last layer
218 |         x = slim.flatten(x)
219 |         h = slim.flatten(intermediate[subnet['feature_layer'] - 1])
220 | 
221 |         x = slim.fully_connected(
222 |             x,
223 |             1,
224 |             weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
225 |             activation_fn=None)
226 | 
227 |         return x, h  # no explicit `sigmoid`
228 | 
229 | 
230 |     def loss(self, x_s, y_s,f0_s, x_t, y_t,f0_t):
231 |         def circuit_loop(x, y,f):
232 | 
233 |             z_mu, z_lv = self._encode(x, is_training=self.is_training)
234 |             z = GaussianSampleLayer(z_mu, z_lv)
235 | 
236 |             x_logit, x_feature = self._discriminate(
237 |                 x, is_training=self.is_training)
238 | 
239 |             xh, xh_sig_logit = self._generate(z, y, f, is_training=self.is_training)
240 | 
241 |             zh_mu, zh_lv = self._encode(xh, is_training=self.is_training)
242 | 
243 |             xh_logit, xh_feature = self._discriminate(
244 |                 xh, is_training=self.is_training)
245 | 
246 |             return dict(
247 |                 z=z,
248 |                 z_mu=z_mu,
249 |                 z_lv=z_lv,
250 |                 xh=xh,
251 |                 xh_sig_logit=xh_sig_logit,
252 |                 x_logit=x_logit,
253 |                 x_feature=x_feature,
254 |                 zh_mu=zh_mu,
255 |                 zh_lv=zh_lv,
256 |                 xh_logit=xh_logit,
257 |                 xh_feature=xh_feature,
258 |             )
259 | 
260 |         s = circuit_loop(x_s, y_s, f0_s)
261 |         t = circuit_loop(x_t, y_t, f0_t)
262 |         s2t = circuit_loop(x_s, y_t,f0_t)
263 | 
264 |         with tf.name_scope('loss'):
265 |             def mean_sigmoid_cross_entropy_with_logits(logit, truth):
266 |                 '''
267 |                 truth: 0. or 1.
268 |                 '''
269 |                 return tf.reduce_mean(
270 |                     tf.nn.sigmoid_cross_entropy_with_logits(
271 |                         logit,
272 |                         truth * tf.ones_like(logit)))
273 | 
274 |             loss = dict()
275 | 
276 |             # Parallel
277 |             loss['reconst_t'] = \
278 |                   tf.reduce_mean(t['x_logit']) \
279 |                 - tf.reduce_mean(t['xh_logit'])
280 | 
281 |             # Parallel
282 |             loss['reconst_s'] = \
283 |                   tf.reduce_mean(s['x_logit']) \
284 |                 - tf.reduce_mean(s['xh_logit'])
285 | 
286 |             # Non-parallel
287 |             loss['conv_s2t'] = \
288 |                   tf.reduce_mean(t['x_logit']) \
289 |                 - tf.reduce_mean(s2t['xh_logit'])
290 | 
291 |             # Non-parallel: s v. t
292 |             loss['real_s_t'] = \
293 |                   tf.reduce_mean(t['x_logit']) \
294 |                 - tf.reduce_mean(s['x_logit'])
295 | 
296 |             # That's why I only take the last term into consideration
297 |             loss['WGAN'] = loss['conv_s2t']
298 | 
299 |             # VAE's Kullback-Leibler Divergence
300 |             loss['KL(z)'] = \
301 |                 tf.reduce_mean(
302 |                     GaussianKLD(
303 |                         s['z_mu'], s['z_lv'],
304 |                         tf.zeros_like(s['z_mu']), tf.zeros_like(s['z_lv']))) +\
305 |                 tf.reduce_mean(
306 |                     GaussianKLD(
307 |                         t['z_mu'], t['z_lv'],
308 |                         tf.zeros_like(t['z_mu']), tf.zeros_like(t['z_lv'])))
309 |             loss['KL(z)'] /= 2.0
310 | 
311 |             # VAE's Reconstruction Neg. Log-Likelihood (on the 'feature' space of Dx)
312 |             loss['Dis'] = \
313 |                 tf.reduce_mean(
314 |                     GaussianLogDensity(
315 |                         slim.flatten(x_t),
316 |                         slim.flatten(t['xh']),
317 |                         tf.zeros_like(slim.flatten(x_t)))) +\
318 |                 tf.reduce_mean(
319 |                     GaussianLogDensity(
320 |                         slim.flatten(x_s),
321 |                         slim.flatten(s['xh']),
322 |                         tf.zeros_like(slim.flatten(x_s))))
323 |             loss['Dis'] /= - 2.0
324 | 
325 |             # For summaries
326 |             with tf.name_scope('Summary'):
327 |                 tf.summary.scalar('DKL_z', loss['KL(z)'])
328 |                 tf.summary.scalar('MMSE', loss['Dis'])
329 | 
330 | 
331 |                 tf.summary.scalar('WGAN', loss['WGAN'])
332 |                 tf.summary.scalar('WGAN-s', loss['reconst_s'])
333 |                 tf.summary.scalar('WGAN-t', loss['reconst_t'])
334 |                 tf.summary.scalar('WGAN-s2t', loss['conv_s2t'])
335 |                 tf.summary.scalar('WGAN-t-s', loss['real_s_t'])
336 |                 
337 |                 tf.summary.histogram('y', tf.concat([y_t, y_s], 0))
338 |                 tf.summary.histogram('z', tf.concat([s['z'], t['z']], 0))
339 | 
340 |                 tf.summary.histogram('z_s', s['z'])
341 |                 tf.summary.histogram('z_t', t['z'])
342 | 
343 |                 tf.summary.histogram('z_mu', tf.concat([s['z_mu'], t['z_mu']], 0))
344 |                 tf.summary.histogram('z_mu_s', s['z_mu'])
345 |                 tf.summary.histogram('z_mu_t', t['z_mu'])
346 | 
347 |                 tf.summary.histogram('z_lv', tf.concat([s['z_lv'], t['z_lv']], 0))
348 |                 tf.summary.histogram('z_lv_s', s['z_lv'])
349 |                 tf.summary.histogram('z_lv_t', t['z_lv'])
350 | 
351 |                 tf.summary.histogram('logit_t_from_t', t['xh_logit'])
352 |                 tf.summary.histogram('logit_t_from_s', s2t['xh_logit'])
353 |                 tf.summary.histogram('logit_t', t['x_logit'])
354 | 
355 |                 tf.summary.histogram(
356 |                     'logit_t_True_FromT_FromS',
357 |                     tf.concat([t['x_logit'], t['xh_logit'], s2t['xh_logit']], 0))
358 |                 tf.summary.histogram(
359 |                     'logit_s_v_sh',
360 |                     tf.concat([s['x_logit'], s['xh_logit']], 0))
361 |                 tf.summary.histogram(
362 |                     'logit_t_v_th',
363 |                     tf.concat([t['x_logit'], t['xh_logit']], 0))
364 |         return loss
365 | 
366 | 
367 |     def encode(self, x):
368 |         z_mu, z_lv = self._encode(x, is_training=False)
369 |         return z_mu
370 | 
371 |     def decode(self, z, y, f, tanh=False):
372 |         xh, _ = self._generate(z, y, f, is_training=False)
373 |         return xh
374 | 
375 |     def discriminate(self, x):
376 |         '''
377 |         To estimate the EMD, we need D to assign a score per sample.
378 |         *The batches can be of different size
379 |         '''
380 |         s, _ = self._discriminate(x, is_training=False)
381 |         return s
382 | 


--------------------------------------------------------------------------------
/trainer/gan.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import logging, os
  4 | from util.image import make_png_thumbnail, make_png_jet_thumbnail
  5 | 
  6 | def debug(labels, var_list):
  7 |     for name, vs in zip(labels, var_list):
  8 |         print(name)
  9 |         for v in vs:
 10 |             print(v.name)
 11 |         print()
 12 | 
 13 | class GANTrainer(object):
 14 |     def __init__(self, loss, arch, args, dirs):
 15 |         self.loss = loss
 16 |         self.arch = arch
 17 |         self.args = args
 18 |         self.dirs = dirs
 19 |         self.opt = self._optimize()
 20 |         logging.basicConfig(
 21 |             level=logging.INFO,
 22 |             filename=os.path.join(dirs['logdir'], 'training.log')
 23 |         )
 24 | 
 25 |     def _optimize(self):
 26 |         '''
 27 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters. 
 28 |               https://github.com/igul222/improved_wgan_training/issues/3
 29 |         '''
 30 |         global_step = tf.Variable(0, name='global_step')
 31 |         lr = self.arch['training']['lr']
 32 |         b1 = self.arch['training']['beta1']
 33 |         b2 = self.arch['training']['beta2']
 34 | 
 35 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
 36 | 
 37 |         trainables = tf.trainable_variables()
 38 |         g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
 39 |         d_vars = [v for v in trainables if 'Discriminator' in v.name]
 40 | 
 41 |         # # Debug ===============
 42 |         # debug(['Generator', 'Discriminator'], [g_vars, d_vars])
 43 |         # # ============================
 44 | 
 45 |         with tf.name_scope('Update'):        
 46 |             opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step)
 47 |             opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars)
 48 |         return {
 49 |             'd': opt_d,
 50 |             'g': opt_g,
 51 |             'global_step': global_step
 52 |         }
 53 | 
 54 | 
 55 |     def _validate(self, machine, n=10):
 56 |         N = n * n
 57 | 
 58 |         z = np.random.uniform(-np.pi, np.pi, size=[n, self.arch['z_dim']])
 59 |         z = np.cos(z)
 60 |         z = np.concatenate([z] * n, axis=1)
 61 |         z = np.reshape(z, [N, -1]).astype(np.float32)  # consecutive rows
 62 |         # y = np.random.randint(1, arch['y_dim'], size=[N, 2])
 63 |         y = np.asarray(
 64 |             [[5,   0,  0 ],  # 5
 65 |             [9,   0,  0 ],   # 9
 66 |             [12,  0,  0 ],   # 2
 67 |             [17,  0,  0 ],   # 7
 68 |             [19,  0,  0 ],
 69 |             [161, 0,  0 ],
 70 |             [170, 0,  0 ],
 71 |             [170, 16, 0 ],
 72 |             [161, 9,  4 ],
 73 |             [19,  24, 50]],
 74 |             dtype=np.int64)
 75 |         y = np.concatenate([y] * n, axis=0)
 76 | 
 77 |         Z = tf.constant(z)
 78 |         Y = tf.constant(y)
 79 |         Xh = machine.generate(Z, Y) # 100, 64, 64, 3
 80 |         Xh = make_png_thumbnail(Xh, n)
 81 |         return Xh
 82 | 
 83 | 
 84 |     def _refresh_status(self, sess):
 85 |         fetches = {
 86 |             "l_D": self.loss['l_D'],
 87 |             "l_G": self.loss['l_G'], 
 88 |             "step": self.opt['global_step'],
 89 |         }
 90 |         result = sess.run(
 91 |             fetches=fetches,
 92 |             # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
 93 |             # run_metadata=run_metadata,
 94 |         )
 95 | 
 96 |         # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
 97 |         # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp:
 98 |         #     fp.write(trace.generate_chrome_trace_format())
 99 | 
100 |         # Message
101 |         msg = 'Iter {:05d}: '.format(result['step'])
102 |         msg += 'l_D={:.3e} '.format(result['l_D'])
103 |         msg += 'l_G={:.3e} '.format(result['l_G'])        
104 |         print('\r{}'.format(msg), end='', flush=True)
105 |         logging.info(msg)
106 | 
107 | 
108 |     def train(self, nIter, machine=None, summary_op=None):
109 |         Xh = self._validate(machine=machine, n=10)
110 | 
111 |         run_metadata = tf.RunMetadata()
112 | 
113 |         # summary_op = tf.summary.merge_all()
114 | 
115 |         sv = tf.train.Supervisor(
116 |             logdir=self.dirs['logdir'],
117 |             # summary_writer=summary_writer,
118 |             # summary_op=None,
119 |             # is_chief=True,
120 |             # save_model_secs=600,
121 |             global_step=self.opt['global_step'])
122 | 
123 | 
124 |         # sess_config = configure_gpu_settings(args.gpu_cfg)
125 |         sess_config = tf.ConfigProto(
126 |             allow_soft_placement=True,
127 |             gpu_options=tf.GPUOptions(allow_growth=True))
128 | 
129 |         with sv.managed_session(config=sess_config) as sess:
130 |             sv.loop(60, self._refresh_status, (sess,))
131 |             for step in range(self.arch['training']['max_iter']):
132 |                 if sv.should_stop():
133 |                     break
134 | 
135 |                 # main loop
136 |                 for _ in range(self.arch['training']['nIterD']):
137 |                     sess.run(self.opt['d'])
138 |                 sess.run(self.opt['g'])
139 | 
140 |                 # output img                
141 |                 if step % 1000 == 0:
142 |                     xh = sess.run(Xh)
143 |                     with tf.gfile.GFile(
144 |                         os.path.join(
145 |                             self.dirs['logdir'],
146 |                             'img-anime-{:03d}k.png'.format(step // 1000),
147 |                         ),
148 |                         mode='wb',
149 |                     ) as fp:
150 |                         fp.write(xh)
151 | 
152 | 
153 | 
154 | 
155 | class FisherGANTrainer(GANTrainer):
156 |     def _optimize(self):
157 |         '''
158 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters. 
159 |               https://github.com/igul222/improved_wgan_training/issues/3
160 |         '''
161 |         global_step = tf.Variable(0, name='global_step')
162 |         lr = self.arch['training']['lr']
163 |         b1 = self.arch['training']['beta1']
164 |         b2 = self.arch['training']['beta2']
165 |         rho = self.arch['training']['rho']
166 | 
167 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
168 |         optimizer_l = tf.train.GradientDescentOptimizer(rho)
169 | 
170 |         trainables = tf.trainable_variables()
171 |         g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
172 |         d_vars = [v for v in trainables if 'Discriminator' in v.name]
173 |         l_vars = [v for v in trainables if 'lambda' in v.name]
174 |         
175 |         # # Debug ===============
176 |         # debug(['G', 'D', 'lambda'], [g_vars, d_vars, l_vars])
177 |         # # ============================
178 | 
179 |         with tf.name_scope('Update'):        
180 |             opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step)
181 |             opt_l = optimizer_l.minimize(- self.loss['l_D'], var_list=l_vars)
182 |             with tf.control_dependencies([opt_l]):
183 |                 opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars)            
184 |         return {
185 |             'd': opt_d,
186 |             'g': opt_g,
187 |             'l': opt_l,
188 |             'global_step': global_step
189 |         }
190 | 
191 |     def _validate(self, machine, n=10):
192 |         N = n * n
193 | 
194 |         # same row same z
195 |         z = tf.random_normal(shape=[n, self.arch['z_dim']])
196 |         z = tf.tile(z, [1, n])
197 |         z = tf.reshape(z, [N, -1])
198 |         z = tf.Variable(z, trainable=False, dtype=tf.float32)       
199 | 
200 |         # same column same y 
201 |         y = tf.range(0, 10, 1, dtype=tf.int64)
202 |         y = tf.reshape(y, [-1, 1])
203 |         y = tf.tile(y, [n, 1])
204 | 
205 |         Xh = machine.generate(z, y) # 100, 64, 64, 3
206 |         # Xh = gray2jet(Xh)
207 |         # Xh = make_png_thumbnail(Xh, n)
208 |         Xh = make_png_jet_thumbnail(Xh, n)
209 |         return Xh
210 | 
211 | 
212 | class FisherGANTrainerHw3(FisherGANTrainer):
213 |     def _validate(self, machine, n=10):
214 |         N = n * n
215 |         z = np.random.normal(0., 1., size=[n, self.arch['z_dim']])
216 |         z = np.concatenate([z] * n, axis=1)
217 |         z = np.reshape(z, [N, -1]).astype(np.float32)  # consecutive rows
218 |         y = np.asarray(
219 |             [[5,   0,  0 ],
220 |              [9,   0,  0 ],
221 |              [12,  0,  0 ],
222 |              [17,  0,  0 ],
223 |              [19,  0,  0 ],
224 |              [161, 0,  0 ],
225 |              [170, 0,  0 ],
226 |              [170, 16, 0 ],
227 |              [161, 9,  4 ],
228 |              [19,  24, 50]],
229 |             dtype=np.int64)
230 |         y = np.concatenate([y] * n, axis=0)
231 |         Z = tf.constant(z)
232 |         Y = tf.constant(y)
233 |         Xh = machine.generate(Z, Y) # 100, 64, 64, 3
234 |         Xh = make_png_thumbnail(Xh, n)
235 |         return Xh
236 | 
237 | 
238 | 
239 | class BiFisherGANTrainer(FisherGANTrainer):
240 |     def _optimize(self):
241 |         '''
242 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters. 
243 |               https://github.com/igul222/improved_wgan_training/issues/3
244 |         '''
245 |         global_step = tf.Variable(0, name='global_step')
246 |         lr = self.arch['training']['lr']
247 |         b1 = self.arch['training']['beta1']
248 |         b2 = self.arch['training']['beta2']
249 |         rho = self.arch['training']['rho']
250 | 
251 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
252 |         optimizer_l = tf.train.GradientDescentOptimizer(rho)
253 | 
254 |         trainables = tf.trainable_variables()
255 |         g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
256 |         d_vars = [v for v in trainables if 'Discriminator' in v.name]
257 |         l_vars = [v for v in trainables if 'lambda' in v.name]
258 |         e_vars = [v for v in trainables if 'Encoder' in v.name]
259 |         
260 |         # # Debug ===============
261 |         # for name, vs in zip(
262 |         #     ['Generator', 'Discriminator', 'Encoder', 'lambda'],
263 |         #     [g_vars, d_vars, e_vars, l_vars]):
264 |         #     print(name)
265 |         #     for v in vs:
266 |         #         print(v.name)
267 |         #     print()
268 |         # # ============================
269 | 
270 |         with tf.name_scope('Update'):
271 |             opt_e = optimizer.minimize(self.loss['l_G'], var_list=e_vars)
272 |             with tf.control_dependencies([opt_e]):
273 |                 opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step)
274 |             
275 |             opt_l = optimizer_l.minimize(- self.loss['l_D'], var_list=l_vars)  # opposite sign to D
276 |             with tf.control_dependencies([opt_l]):
277 |                 opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars)            
278 |         return {
279 |             'd': opt_d,
280 |             'g': opt_g,
281 |             'l': opt_l,
282 |             'global_step': global_step
283 |         }
284 | 
285 | class CycleFisherGANTrainer(FisherGANTrainer):
286 |     def _optimize(self):
287 |         '''
288 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters. 
289 |               https://github.com/igul222/improved_wgan_training/issues/3
290 |         '''
291 |         global_step = tf.Variable(0, name='global_step')
292 |         lr = self.arch['training']['lr']
293 |         b1 = self.arch['training']['beta1']
294 |         b2 = self.arch['training']['beta2']
295 |         rho = self.arch['training']['rho']
296 | 
297 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
298 |         optimizer_l = tf.train.GradientDescentOptimizer(rho)
299 | 
300 |         trainables = tf.trainable_variables()
301 |         g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
302 |         
303 |         d_vars = [v for v in trainables if 'Discriminator' in v.name]
304 |         l_vars = [v for v in trainables if 'lambda' in v.name]
305 |         
306 |         e_vars = [v for v in trainables if 'Encoder' in v.name]
307 | 
308 |         z_vars = [v for v in trainables if 'Dz']       
309 |         j_vars = [v for v in trainables if 'lambdz' in v.name]
310 |         # # Debug ===============
311 |         # # ============================
312 | 
313 |         with tf.name_scope('Update'):           
314 |             opt_j = optimizer_l.minimize(- self.loss['l_Dz'], var_list=j_vars)
315 |             opt_z = optimizer.minimize(self.loss['l_Dz'], var_list=z_vars)
316 | 
317 |             opt_e = optimizer.minimize(self.loss['l_E'], var_list=e_vars)
318 |             with tf.control_dependencies([opt_e, opt_j, opt_z]):
319 |                 opt_g = optimizer.minimize(self.loss['l_G'], var_list=g_vars, global_step=global_step)
320 |             
321 |             opt_l = optimizer_l.minimize(- self.loss['l_D'], var_list=l_vars)  # opposite sign to D
322 |             with tf.control_dependencies([opt_l]):
323 |                 opt_d = optimizer.minimize(self.loss['l_D'], var_list=d_vars)            
324 |         return {
325 |             'd': opt_d,
326 |             'g': opt_g,
327 |             'l': opt_l,
328 |             'global_step': global_step
329 |         }
330 | 
331 | 
332 | # class BEGANTrainer(GANTrainer):
333 | #     def _optimize(self):  #loss, args, arch, net=None):
334 | #         '''
335 | #         [TODO]
336 | #         Although most of the trainer structures are the same,
337 | #         I think we have to use different training scripts for VAE- and DC-GAN
338 | #         (but do we have to have two different classes of VAE- and DC-?)
339 | #         '''
340 | #         global_step = tf.Variable(0, name='global_step')
341 | #         lr = self.arch['training']['lr']
342 | #         # optimizer_d = tf.train.AdamOptimizer(lr)
343 | #         optimizer_g = tf.train.AdamOptimizer(lr)
344 | 
345 | #         trainables = tf.trainable_variables()
346 | #         g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
347 | #         d_vars = [v for v in trainables if 'Critic' in v.name]
348 | 
349 | #         # Debug ===============
350 | #         for name, vs in zip(
351 | #             ['Generator', 'Critic'],
352 | #             [g_vars, d_vars]):
353 | #             print(name)
354 | #             for v in vs:
355 | #                 print(v.name)
356 | #             print()
357 | #         # ============================
358 | 
359 | #         with tf.name_scope('Update'):        
360 | #             opt_g = optimizer_g.minimize(self.loss['G'], var_list=g_vars)
361 | #             opt_d = optimizer_g.minimize(self.loss['D'], global_step=global_step, var_list=d_vars)
362 | #             with tf.control_dependencies([opt_d, opt_g]):
363 | #                 k_delta = self.arch['training']['lambda'] * self.loss['k_delta']
364 | #                 opt_k = tf.assign(
365 | #                     net.k_t, tf.clip_by_value(net.k_t + k_delta, 0, 1))
366 | 
367 | #         # logging.info('The following variables are clamped:')
368 | 
369 | #         return {
370 | #             'd': opt_d,
371 | #             'g': opt_g,
372 | #             'k': opt_k,
373 | #             'global_step': global_step
374 | #         }
375 | 
376 | class VAETrainer(GANTrainer):
377 |     def _optimize(self):
378 |         '''
379 |         NOTE: The author said that there was no need for 100 d_iter per 100 iters. 
380 |               https://github.com/igul222/improved_wgan_training/issues/3
381 |         '''
382 |         global_step = tf.Variable(0, name='global_step')
383 |         lr = self.arch['training']['lr']
384 |         b1 = self.arch['training']['beta1']
385 |         b2 = self.arch['training']['beta2']
386 | 
387 |         optimizer = tf.train.AdamOptimizer(lr, b1, b2)
388 | 
389 |         trainables = tf.trainable_variables()
390 |         g_vars = trainables
391 |         # g_vars = [v for v in trainables if 'Generator' in v.name or 'y_emb' in v.name]
392 | 
393 |         with tf.name_scope('Update'):        
394 |             opt_g = optimizer.minimize(self.loss['G'], var_list=g_vars, global_step=global_step)
395 |         return {
396 |             'g': opt_g,
397 |             'global_step': global_step
398 |         }
399 | 
400 | 
401 |     def _refresh_status(self, sess):
402 |         fetches = {
403 |             "D_KL": self.loss['D_KL'],
404 |             "logP": self.loss['logP'],
405 |             "step": self.opt['global_step'],
406 |         }
407 |         result = sess.run(
408 |             fetches=fetches,
409 |             # options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
410 |             # run_metadata=run_metadata,
411 |         )
412 | 
413 |         # trace = timeline.Timeline(step_stats=run_metadata.step_stats)
414 |         # with open(os.path.join(dirs['logdir'], 'timeline.ctf.json'), 'w') as fp:
415 |         #     fp.write(trace.generate_chrome_trace_format())
416 | 
417 |         # Message
418 |         msg = 'Iter {:05d}: '.format(result['step'])
419 |         msg += 'log P(x|z, y) = {:.3e} '.format(result['logP'])
420 |         msg += 'D_KL(z) = {:.3e} '.format(result['D_KL'])
421 |         print('\r{}'.format(msg), end='', flush=True)
422 |         logging.info(msg)
423 | 
424 | 
425 |     def _validate(self, machine, n=10):
426 |         N = n * n
427 |         
428 |         # same row same z
429 |         z = tf.random_normal(shape=[n, self.arch['z_dim']])
430 |         z = tf.tile(z, [1, n])
431 |         z = tf.reshape(z, [N, -1])
432 |         z = tf.Variable(z, trainable=False, dtype=tf.float32)       
433 | 
434 |         # same column same y 
435 |         y = tf.range(0, 10, 1, dtype=tf.int64)
436 |         y = tf.reshape(y, [-1,])
437 |         y = tf.tile(y, [n,])
438 | 
439 |         Xh = machine.generate(z, y) # 100, 64, 64, 3
440 |         Xh = make_png_thumbnail(Xh, n)
441 |         return Xh
442 | 
443 |     def train(self, nIter, machine=None, summary_op=None):
444 |         Xh = self._validate(machine=machine, n=10)
445 | 
446 |         run_metadata = tf.RunMetadata()
447 | 
448 |         sv = tf.train.Supervisor(
449 |             logdir=self.dirs['logdir'],
450 |             # summary_writer=summary_writer,
451 |             # summary_op=None,
452 |             # is_chief=True,
453 |             # save_model_secs=600,
454 |             global_step=self.opt['global_step'])
455 | 
456 | 
457 |         # sess_config = configure_gpu_settings(args.gpu_cfg)
458 |         sess_config = tf.ConfigProto(
459 |             allow_soft_placement=True,
460 |             gpu_options=tf.GPUOptions(allow_growth=True))
461 | 
462 |         with sv.managed_session(config=sess_config) as sess:
463 |             sv.loop(60, self._refresh_status, (sess,))
464 |             for step in range(self.arch['training']['max_iter']):
465 |                 if sv.should_stop():
466 |                     break
467 | 
468 |                 # main loop
469 |                 sess.run(self.opt['g'])
470 | 
471 |                 # output img                
472 |                 if step % 1000 == 0:
473 |                     xh = sess.run(Xh)
474 |                     with tf.gfile.GFile(
475 |                         os.path.join(
476 |                             self.dirs['logdir'],
477 |                             'img-anime-{:03d}k.png'.format(step // 1000),
478 |                         ),
479 |                         mode='wb',
480 |                     ) as fp:
481 |                         fp.write(xh)
482 | 


--------------------------------------------------------------------------------
/model/vaegan.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | from tensorflow.contrib import slim
  3 | import tensorflow as tf
  4 | from util.layer import GaussianLogDensity, GaussianKLD, \
  5 |     GaussianSampleLayer, lrelu
  6 | 
  7 | class VAWGAN(object):
  8 |     '''
  9 |       VC-GAN
 10 |     = CVAE-CGAN
 11 |     = Convolutional Variational Auto-encoder
 12 |       with Conditional Generative Adversarial Net
 13 |     '''
 14 |     def __init__(self, arch, is_training=False):
 15 |         self.arch = arch
 16 |         self._sanity_check()
 17 |         self.is_training = is_training
 18 | 
 19 |         with tf.name_scope('SpeakerRepr'):        
 20 |             self.y_emb = self._unit_embedding(
 21 |                 self.arch['y_dim'],
 22 |                 self.arch['z_dim'],
 23 |                 'y_embedding')
 24 | 
 25 |         self._generate = tf.make_template(
 26 |             'Generator',
 27 |             self._generator)
 28 |         self._discriminate = tf.make_template(
 29 |             'Discriminator',
 30 |             self._discriminator)
 31 |         self._encode = tf.make_template(
 32 |             'Encoder',
 33 |             self._encoder)
 34 | 
 35 | 
 36 |     def _sanity_check(self):
 37 |         for net in ['encoder', 'generator', 'discriminator']:
 38 |             assert len(self.arch[net]['output']) > 2
 39 |             assert len(self.arch[net]['output']) == len(self.arch[net]['kernel'])
 40 |             assert len(self.arch[net]['output']) == len(self.arch[net]['stride'])
 41 | 
 42 | 
 43 |     def _unit_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 44 |         with tf.variable_scope(scope_name):
 45 |             embeddings = tf.get_variable(
 46 |                 name=var_name,
 47 |                 shape=[n_class, h_dim])
 48 |             embeddings = tf.nn.l2_normalize(embeddings, dim=-1, name=var_name+'normalized')
 49 |         return embeddings
 50 | 
 51 | 
 52 |     def _merge(self, var_list, fan_out, l2_reg=1e-6):
 53 |         ''' 
 54 |         Note: Don't apply BN on this because 'y' 
 55 |               tends to be the same inside a batch.
 56 |         '''
 57 |         x = 0.
 58 |         with slim.arg_scope(
 59 |             [slim.fully_connected],
 60 |             num_outputs=fan_out,
 61 |             weights_regularizer=slim.l2_regularizer(l2_reg),
 62 |             normalizer_fn=None,
 63 |             activation_fn=None):
 64 |             for var in var_list:
 65 |                 x = x + slim.fully_connected(var)
 66 |         x = slim.bias_add(x)
 67 |         return x
 68 | 
 69 | 
 70 |     def _l2_regularized_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'):
 71 |         with tf.variable_scope(scope_name):
 72 |             embeddings = tf.get_variable(
 73 |                 name=var_name,
 74 |                 shape=[n_class, h_dim],
 75 |                 regularizer=slim.l2_regularizer(1e-6))
 76 |         return embeddings
 77 | 
 78 | 
 79 |     def _encoder(self, x, is_training):
 80 |         n_layer = len(self.arch['encoder']['output'])
 81 |         subnet = self.arch['encoder']
 82 | 
 83 |         with slim.arg_scope(
 84 |             [slim.batch_norm],
 85 |             scale=True, scope='BN',
 86 |             updates_collections=None,
 87 |             decay=0.9, epsilon=1e-5,  # [TODO] Test these hyper-parameters
 88 |             is_training=is_training):
 89 |             with slim.arg_scope(
 90 |                 [slim.conv2d],
 91 |                 weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
 92 |                 normalizer_fn=slim.batch_norm,
 93 |                 activation_fn=lrelu):
 94 | 
 95 |                 for i in range(n_layer):
 96 |                     x = slim.conv2d(
 97 |                         x,
 98 |                         subnet['output'][i],
 99 |                         subnet['kernel'][i],
100 |                         subnet['stride'][i])
101 |                     tf.summary.image(
102 |                         'down-sample{:d}'.format(i),
103 |                         tf.transpose(x[:, :, :, 0:3], [2, 1, 0, 3]))
104 | 
105 |         x = slim.flatten(x)
106 | 
107 |         with slim.arg_scope(
108 |             [slim.fully_connected],
109 |             num_outputs=self.arch['z_dim'],
110 |             weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
111 |             normalizer_fn=None,
112 |             activation_fn=None):
113 |             z_mu = slim.fully_connected(x)
114 |             z_lv = slim.fully_connected(x)
115 |         return z_mu, z_lv
116 | 
117 | 
118 |     def _generator(self, z, y, is_training):
119 |         ''' In this version, we only generate the target, so `y` is useless '''
120 |         subnet = self.arch['generator']
121 |         n_layer = len(subnet['output'])
122 |         h, w, c = subnet['hwc']
123 | 
124 |         y = tf.nn.embedding_lookup(self.y_emb, y)
125 | 
126 |         x = self._merge([z, y], subnet['merge_dim'])
127 |         x = lrelu(x)
128 |         with slim.arg_scope(
129 |             [slim.batch_norm],
130 |             scale=True, scope='BN',
131 |             updates_collections=None,
132 |             decay=0.9, epsilon=1e-5,
133 |             is_training=is_training):
134 | 
135 |             x = slim.fully_connected(
136 |                 x,
137 |                 h * w * c,
138 |                 normalizer_fn=slim.batch_norm,
139 |                 activation_fn=lrelu)
140 | 
141 |             x = tf.reshape(x, [-1, h, w, c])
142 | 
143 |             with slim.arg_scope(
144 |                 [slim.conv2d_transpose],
145 |                 weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
146 |                 normalizer_fn=slim.batch_norm,
147 |                 activation_fn=lrelu):
148 | 
149 |                 for i in range(n_layer -1):
150 |                     x = slim.conv2d_transpose(
151 |                         x,
152 |                         subnet['output'][i],
153 |                         subnet['kernel'][i],
154 |                         subnet['stride'][i]
155 |                         # normalizer_fn=None
156 |                         )
157 | 
158 |                 # Don't apply BN for the last layer of G
159 |                 x = slim.conv2d_transpose(
160 |                     x,
161 |                     subnet['output'][-1],
162 |                     subnet['kernel'][-1],
163 |                     subnet['stride'][-1],
164 |                     normalizer_fn=None,
165 |                     activation_fn=None)
166 | 
167 |                 # pdb.set_trace()
168 |                 logit = x
169 |                 x = tf.nn.tanh(logit)
170 |         return x, logit
171 | 
172 | 
173 |     def _discriminator(self, x, is_training):
174 |     # def _discriminator(self, x, is_training):
175 |         ''' Note: In this version, `y` is useless '''
176 |         subnet = self.arch['discriminator']
177 |         n_layer = len(subnet['output'])
178 | 
179 |         # y_dim = self.arch['y_dim']
180 |         # h, w, _ = self.arch['hwc']
181 |         # y_emb = self._l2_regularized_embedding(y_dim, h * w, 'y_embedding_disc_in')
182 |         # y_vec = tf.nn.embedding_lookup(y_emb, y)
183 |         # y_vec = tf.reshape(y_vec, [-1, h, w, 1])
184 | 
185 |         intermediate = list()
186 |         intermediate.append(x)
187 | 
188 |         # x = tf.concat(3, [x, y_vec])   # inject y into x
189 |         with slim.arg_scope(
190 |             [slim.batch_norm],
191 |             scale=True, scope='BN',
192 |             updates_collections=None,
193 |             decay=0.9, epsilon=1e-5,
194 |             is_training=is_training):
195 |             with slim.arg_scope(
196 |                 [slim.conv2d],
197 |                 weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
198 |                 normalizer_fn=slim.batch_norm,
199 |                 activation_fn=lrelu):
200 | 
201 |                 # Radford: [do] not applying batchnorm to the discriminator input layer
202 |                 x = slim.conv2d(
203 |                     x,
204 |                     subnet['output'][0],
205 |                     subnet['kernel'][0],
206 |                     subnet['stride'][0],
207 |                     normalizer_fn=None)
208 |                 intermediate.append(x)
209 |                 for i in range(1, n_layer):
210 |                     x = slim.conv2d(
211 |                         x,
212 |                         subnet['output'][i],
213 |                         subnet['kernel'][i],
214 |                         subnet['stride'][i])
215 |                     intermediate.append(x)
216 |                     tf.summary.image(
217 |                         'upsampling{:d}'.format(i),
218 |                         tf.transpose(x[:, :, :, 0:3], [2, 1, 0, 3]))
219 | 
220 |         # Don't apply BN for the last layer
221 |         x = slim.flatten(x)
222 |         h = slim.flatten(intermediate[subnet['feature_layer'] - 1])
223 | 
224 |         # y_vec = tf.nn.embedding_lookup(self.y_emb, y)
225 | 
226 |         # x = self._merge([x], subnet['merge_dim'])
227 |         # x = lrelu(x)
228 | 
229 |         # x = slim.fully_connected(
230 |         #     x,
231 |         #     subnet['merge_dim'],
232 |         #     weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
233 |         #     activation_fn=lrelu)
234 | 
235 |         x = slim.fully_connected(
236 |             x,
237 |             1,
238 |             weights_regularizer=slim.l2_regularizer(subnet['l2-reg']),
239 |             activation_fn=None)
240 | 
241 |         return x, h  # no explicit `sigmoid`
242 | 
243 | 
244 |     def loss(self, x_s, y_s, x_t, y_t):
245 |         '''  '''
246 |         # def thresholding_add(x, v, minval=-1., maxval=1.):
247 |         #     x_v = x + v
248 |         #     x_v = tf.maximum(x_v, minval)
249 |         #     x_v = tf.minimum(x_v, maxval)
250 |         #     return x_v
251 | 
252 |         def circuit_loop(x, y):
253 |             # v = decay * tf.random_normal(shape=tf.shape(x))
254 | 
255 |             z_mu, z_lv = self._encode(x, is_training=self.is_training)
256 |             z = GaussianSampleLayer(z_mu, z_lv)
257 | 
258 |             x_logit, x_feature = self._discriminate(
259 |                 x, is_training=self.is_training)
260 | 
261 |             xh, xh_sig_logit = self._generate(z, y, is_training=self.is_training)
262 | 
263 |             zh_mu, zh_lv = self._encode(xh, is_training=self.is_training)
264 | 
265 |             xh_logit, xh_feature = self._discriminate(
266 |                 xh, is_training=self.is_training)
267 | 
268 |             return dict(
269 |                 z=z,
270 |                 z_mu=z_mu,
271 |                 z_lv=z_lv,
272 |                 # y_logit=y_logit,
273 |                 xh=xh,
274 |                 xh_sig_logit=xh_sig_logit,
275 |                 # xo=xo,
276 |                 x_logit=x_logit,
277 |                 x_feature=x_feature,
278 |                 zh_mu=zh_mu,
279 |                 zh_lv=zh_lv,
280 |                 # yh_logit=yh_logit,
281 |                 xh_logit=xh_logit,
282 |                 xh_feature=xh_feature,
283 |                 # xr=xr
284 |                 )
285 | 
286 |         s = circuit_loop(x_s, y_s)
287 |         t = circuit_loop(x_t, y_t)
288 |         s2t = circuit_loop(x_s, y_t)
289 | 
290 |         with tf.name_scope('loss'):
291 |             def mean_sigmoid_cross_entropy_with_logits(logit, truth):
292 |                 '''
293 |                 truth: 0. or 1.
294 |                 '''
295 |                 return tf.reduce_mean(
296 |                     tf.nn.sigmoid_cross_entropy_with_logits(
297 |                         logit,
298 |                         truth * tf.ones_like(logit)))
299 | 
300 |             loss = dict()
301 | 
302 |             # Parallel
303 |             loss['reconst_t'] = \
304 |                   tf.reduce_mean(t['x_logit']) \
305 |                 - tf.reduce_mean(t['xh_logit'])
306 | 
307 |             # Parallel
308 |             loss['reconst_s'] = \
309 |                   tf.reduce_mean(s['x_logit']) \
310 |                 - tf.reduce_mean(s['xh_logit'])
311 | 
312 |             # Non-parallel
313 |             loss['conv_s2t'] = \
314 |                   tf.reduce_mean(t['x_logit']) \
315 |                 - tf.reduce_mean(s2t['xh_logit'])
316 | 
317 |             # Non-parallel: s v. t
318 |             loss['real_s_t'] = \
319 |                   tf.reduce_mean(t['x_logit']) \
320 |                 - tf.reduce_mean(s['x_logit'])
321 | 
322 |             # That's why I only take the last term into consideration
323 |             loss['WGAN'] = loss['conv_s2t']
324 | 
325 |             # VAE's Kullback-Leibler Divergence
326 |             loss['KL(z)'] = \
327 |                 tf.reduce_mean(
328 |                     GaussianKLD(
329 |                         s['z_mu'], s['z_lv'],
330 |                         tf.zeros_like(s['z_mu']), tf.zeros_like(s['z_lv']))) +\
331 |                 tf.reduce_mean(
332 |                     GaussianKLD(
333 |                         t['z_mu'], t['z_lv'],
334 |                         tf.zeros_like(t['z_mu']), tf.zeros_like(t['z_lv'])))
335 |             loss['KL(z)'] /= 2.0
336 | 
337 |             # VAE's Reconstruction Neg. Log-Likelihood (on the 'feature' space of Dx)
338 |             loss['Dis'] = \
339 |                 tf.reduce_mean(
340 |                     GaussianLogDensity(
341 |                         slim.flatten(x_t),
342 |                         slim.flatten(t['xh']),
343 |                         tf.zeros_like(slim.flatten(x_t)))) +\
344 |                 tf.reduce_mean(
345 |                     GaussianLogDensity(
346 |                         slim.flatten(x_s),
347 |                         slim.flatten(s['xh']),
348 |                         tf.zeros_like(slim.flatten(x_s))))
349 |             loss['Dis'] /= - 2.0
350 | 
351 |             # loss['Dis'] = tf.reduce_mean(
352 |             #     tf.reduce_sum(
353 |             #         tf.nn.sigmoid_cross_entropy_with_logits(
354 |             #             slim.flatten(t['xh_sig_logit']),
355 |             #             slim.flatten(x_t * .5 + .5)) +\
356 |             #         tf.nn.sigmoid_cross_entropy_with_logits(
357 |             #             slim.flatten(s['xh_sig_logit']),
358 |             #             slim.flatten(x_s * .5 + .5)),
359 |             #         -1))
360 |             # # Note: Please remember to apply reduce_sum along the last dim
361 |             # # (here, it's not 1 as is in D/G loss.)
362 | 
363 |             # loss['Dis'] = tf.reduce_mean(
364 |             #     (s2t['xh_logit'] + s['xh_logit'] + t['xh_logit']) / 3.)
365 | 
366 |             # # Use 'feature' item instead of raw input because the former is 'flattened'
367 |             # loss['Dis'] = tf.reduce_mean(
368 |             #     tf.reduce_sum(tf.square(s['x_feature'] - s['xh_feature']), -1) +\
369 |             #     tf.reduce_sum(tf.square(t['x_feature'] - t['xh_feature']), -1))
370 | 
371 |             # [TODO] Maybe I should normalize (divide by 2 or 3) the above costs.
372 | 
373 |             # For summaries
374 |             with tf.name_scope('Summary'):
375 |                 # tf.summary.scalar('DKL_x', loss['KL(x)'])
376 |                 tf.summary.scalar('DKL_z', loss['KL(z)'])
377 |                 tf.summary.scalar('MMSE', loss['Dis'])
378 | 
379 |                 # tf.summary.scalar('D_real', loss['D_real'])
380 |                 # tf.summary.scalar('G_fake', loss['G_fake'])
381 | 
382 |                 tf.summary.scalar('WGAN', loss['WGAN'])
383 |                 tf.summary.scalar('WGAN-s', loss['reconst_s'])
384 |                 tf.summary.scalar('WGAN-t', loss['reconst_t'])
385 |                 tf.summary.scalar('WGAN-s2t', loss['conv_s2t'])
386 |                 tf.summary.scalar('WGAN-t-s', loss['real_s_t'])
387 |                 
388 | 
389 |                 tf.summary.histogram('y', tf.concat(0, [y_t, y_s]))
390 | 
391 |                 # tf.summary.histogram('yh_s', tf.argmax(s['yh_logit'], 1))
392 |                 # tf.summary.histogram('yh_t', tf.argmax(t['yh_logit'], 1))
393 | 
394 |                 tf.summary.histogram('z', tf.concat(0, [s['z'], t['z']]))
395 |                 tf.summary.histogram('z_s', s['z'])
396 |                 tf.summary.histogram('z_t', t['z'])
397 | 
398 |                 tf.summary.histogram('z_mu', tf.concat(0, [s['z_mu'], t['z_mu']]))
399 |                 tf.summary.histogram('z_mu_s', s['z_mu'])
400 |                 tf.summary.histogram('z_mu_t', t['z_mu'])
401 | 
402 |                 tf.summary.histogram('z_lv', tf.concat(0, [s['z_lv'], t['z_lv']]))
403 |                 tf.summary.histogram('z_lv_s', s['z_lv'])
404 |                 tf.summary.histogram('z_lv_t', t['z_lv'])
405 | 
406 |                 # tf.summary.histogram('D_t_t', tf.nn.sigmoid(x_t_from_t_logit))
407 |                 # tf.summary.histogram('D_t_s', tf.nn.sigmoid(x_t_from_s_logit))
408 |                 # tf.summary.histogram('D_t', tf.nn.sigmoid(x_t_logit))
409 |                 # tf.summary.histogram('D_s', tf.nn.sigmoid(x_s_logit))
410 | 
411 | 
412 |                 tf.summary.histogram('logit_t_from_t', t['xh_logit'])
413 |                 tf.summary.histogram('logit_t_from_s', s2t['xh_logit'])
414 |                 tf.summary.histogram('logit_t', t['x_logit'])
415 |                 # tf.summary.histogram('logit_s', s['x_logit'])
416 | 
417 |                 # tf.summary.histogram(
418 |                 #     'logit_s_True_and_Fake)',
419 |                 #     tf.concat(0, [x_s_from_s_logit, x_s_logit]))
420 |                 tf.summary.histogram(
421 |                     'logit_t_True_FromT_FromS',
422 |                     tf.concat(0, [t['x_logit'], t['xh_logit'], s2t['xh_logit']]))
423 |                 tf.summary.histogram(
424 |                     'logit_s_v_sh',
425 |                     tf.concat(0, [s['x_logit'], s['xh_logit']]))
426 |                 tf.summary.histogram(
427 |                     'logit_t_v_th',
428 |                     tf.concat(0, [t['x_logit'], t['xh_logit']]))
429 |                 # # tf.image_summary("G", xh)
430 | 
431 |                 # tf.summary.scalar('Ent_on_y', loss['y'])
432 |                 # tf.summary.scalar('Diff_zs_zt', loss['z'])
433 |         return loss
434 | 
435 |     # def sample(self, z=128):
436 |     #     ''' Generate fake samples given `z`
437 |     #     if z is not given or is an `int`,
438 |     #     this fcn generates (z=128) samples
439 |     #     '''
440 |     #     z = tf.random_uniform(
441 |     #         shape=[z, self.arch['z_dim']],
442 |     #         minval=-1.0,
443 |     #         maxval=1.0,
444 |     #         name='z_test')
445 |     #     return self._generate(z, is_training=False)
446 | 
447 |     def encode(self, x):
448 |         z_mu, z_lv = self._encode(x, is_training=False)
449 |         return dict(mu=z_mu, log_var=z_lv)
450 | 
451 |     def decode(self, z, y, tanh=False):
452 |         # if tanh:
453 |         #     return self._generate(z, y, is_training=False)
454 |         # else:
455 |         #     return self._generate(z, y, is_training=False)
456 |         xh, _ = self._generate(z, y, is_training=False)
457 |         # tf.summary.image('xh', tf.transpose(xh, [2, 1, 0, 3]))
458 |         # return self._filter(xh, is_training=False)
459 |         return xh
460 | 
461 |     def discriminate(self, x):
462 |         '''
463 |         To estimate the EMD, we need D to assign a score per sample.
464 |         *The batches can be of different size
465 |         '''
466 |         # s_true, _ = self._discriminate(x_true, is_training=False)
467 |         # s_fake, _ = self._discriminate(x_fake, is_training=False)
468 |         # return s_true, s_fake
469 |         s, _ = self._discriminate(x, is_training=False)
470 |         return s
471 | 
472 |     # def classify(self, x):
473 |     #     return self._classify(tf.nn.softmax(x), is_training=False)
474 | 
475 |     # def interpolate(self, x1, x2, n):
476 |     #     ''' Interpolation from the latent space '''
477 |     #     x1 = tf.expand_dims(x1, 0)
478 |     #     x2 = tf.expand_dims(x2, 0)
479 |     #     z1, _ = self._encode(x1, is_training=False)
480 |     #     z2, _ = self._encode(x2, is_training=False)
481 |     #     a = tf.reshape(tf.linspace(0., 1., n), [n, 1])
482 | 
483 |     #     z1 = tf.matmul(1. - a, z1)
484 |     #     z2 = tf.matmul(a, z2)
485 |     #     z = tf.nn.tanh(tf.add(z1, z2))  # Gaussian-to-Uniform
486 |     #     xh = self._generate(z, is_training=False)
487 |     #     xh = tf.concat(0, [x1, xh, x2])
488 |     #     return xh
489 | 


--------------------------------------------------------------------------------