├── ae.py ├── cvae.py ├── dae.py ├── dae_cnn.py ├── model_dae_cnn.png ├── sae.py ├── stacked_dae.py ├── vae.py └── vae_cnn.py /ae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 传统的自编码器(AE) 3 | 1、压缩结构,784-》32-》784 4 | 2、编解码器均为一层全连接网络 5 | ''' 6 | from keras.layers import Input, Dense 7 | from keras.models import Model, load_model 8 | import os 9 | 10 | # 指定gpu 11 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 12 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 13 | 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | 17 | ##### 完整的自编码器模型构建 ##### 18 | # 编码潜在空间表征维度 19 | encoding_dim = 32 20 | # 自编码输入 21 | input_img = Input(shape=(784,)) 22 | # 使用一个全连接网络来搭建编码器 23 | encoded = Dense(encoding_dim, activation='relu')(input_img) 24 | # 使用一个全连接网络来对编码器进行解码 25 | decoded = Dense(784, activation='sigmoid')(encoded) 26 | # 构建keras模型 27 | autoencoder = Model(input=input_img, output=decoded) 28 | 29 | ##### 也可以把编码器和解码器当做单独的模型来使用 ##### 30 | # 编码器模型 31 | encoder = Model(input=input_img, output=encoded) 32 | # 解码器模型 33 | encoded_input = Input(shape=(encoding_dim,)) 34 | decoded_layer = autoencoder.layers[-1] 35 | decoder = Model(input=encoded_input, output=decoded_layer(encoded_input)) 36 | 37 | ##### 对自编码器模型进行编译并使用mnist数据集进行训练 ##### 38 | # 编译模型 39 | autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') 40 | # 准备mnist数据 41 | from keras.datasets import mnist 42 | import numpy as np 43 | (x_train, _), (x_test, _) = mnist.load_data(path='mnist.npz') 44 | x_train = x_train.astype('float32')/255. 45 | x_test = x_test.astype('float32')/255. 46 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 47 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 48 | # 训练 49 | autoencoder.fit(x_train, x_train, nb_epoch=50, batch_size=256, shuffle=True, validation_data=(x_test, x_test)) 50 | # 保存模型 51 | autoencoder.save('./model/model_ae') 52 | 53 | new_model = load_model('./model/model_ae') 54 | ##### 对原始输入图像和自编码器训练后重构的图像进行可视化 ##### 55 | import matplotlib.pyplot as plt 56 | 57 | decoded_imgs = new_model.predict(x_test) 58 | n = 10 59 | plt.figure(figsize=(20, 4)) 60 | for i in range(1, n): 61 | # 展示原始图像 62 | ax = plt.subplot(2, n, i) 63 | plt.imshow(x_test[i].reshape(28, 28)) 64 | plt.gray() 65 | ax.get_xaxis().set_visible(False) 66 | ax.get_yaxis().set_visible(False) 67 | # 展示自编码器重构后的图像 68 | ax = plt.subplot(2, n, i + n) 69 | plt.imshow(decoded_imgs[i].reshape(28, 28)) 70 | plt.gray() 71 | ax.get_xaxis().set_visible(False) 72 | ax.get_yaxis().set_visible(False) 73 | 74 | plt.show() 75 | -------------------------------------------------------------------------------- /cvae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 条件变分自编码器 3 | 标签类别信息作为额外的输入 4 | 在生成图像时可指定类别来生成特定类别的图像 5 | ''' 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | from scipy.stats import norm 9 | 10 | from keras.layers import Input, Dense, Lambda 11 | from keras.models import Model, load_model 12 | from keras import backend as K 13 | from keras.datasets import mnist 14 | from keras.utils import to_categorical 15 | import os 16 | 17 | # 指定gpu 18 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 19 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 20 | 21 | ##### 设置网络参数 ##### 22 | batch_size = 2048 23 | original_dim = 784 24 | latent_dim = 2 25 | intermediate_dim = 256 26 | epochs = 100 27 | num_classes = 10 28 | 29 | ##### 加载mnist数据集 30 | (x_train, y_train_), (x_test, y_test_) = mnist.load_data('mnist.npz') 31 | x_train = x_train.astype('float32')/255. 32 | x_test = x_test.astype('float32')/255. 33 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 34 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 35 | y_train = to_categorical(y_train_, num_classes) 36 | y_test = to_categorical(y_test_, num_classes) 37 | 38 | x = Input(shape=(original_dim,)) 39 | h = Dense(intermediate_dim, activation='relu')(x) 40 | 41 | # 算p(Z|X)的均值 方差 42 | z_mean = Dense(latent_dim)(h) 43 | z_log_var = Dense(latent_dim)(h) 44 | 45 | y = Input(shape=(num_classes,)) # 输入类别 46 | yh = Dense(latent_dim)(y) # 直接构建每个类别的均值 47 | 48 | # 重参数技巧 49 | def sampling(args): 50 | z_mean, z_log_var = args 51 | epsilon = K.random_normal(shape=K.shape(z_mean)) 52 | return z_mean + epsilon * K.exp(z_log_var / 2) 53 | 54 | # 重参数层,相当于给输入加入噪声 55 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 56 | 57 | # 解码层 58 | decoder_h = Dense(intermediate_dim, activation='relu') 59 | decoder_mean = Dense(original_dim, activation='sigmoid') 60 | h_decoded = decoder_h(z) 61 | x_decoded_mean = decoder_mean(h_decoded) 62 | 63 | # 建立模型 64 | vae = Model([x, y], [x_decoded_mean, yh]) 65 | 66 | # xent是重构loss, kl_loss是KL loss 67 | xent_loss = K.sum(K.binary_crossentropy(x, x_decoded_mean), axis=-1) 68 | 69 | # 只需要修改K.square(z_mean)为K.square(z_mean - yh),也就是让隐变量向类内均值看齐 70 | kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean - yh) - K.exp(z_log_var), axis=-1) 71 | vae_loss = K.mean(xent_loss + kl_loss) 72 | 73 | vae.add_loss(vae_loss) 74 | vae.compile(optimizer='rmsprop') 75 | vae.summary() 76 | 77 | # 开始训练 78 | vae.fit( 79 | [x_train, y_train], 80 | shuffle=True, 81 | epochs=epochs, 82 | batch_size=batch_size, 83 | validation_data=([x_test, y_test], None) 84 | ) 85 | 86 | # 构建encoder,然后观察各个数字在隐空间的分布 87 | encoder = Model(x, z_mean) 88 | 89 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 90 | plt.figure(figsize=(6, 6)) 91 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test_) 92 | plt.colorbar() 93 | plt.show() 94 | 95 | # 构建生成器 96 | decoder_input = Input(shape=(latent_dim,)) 97 | _h_decoded = decoder_h(decoder_input) 98 | _x_decoded_mean = decoder_mean(_h_decoded) 99 | generator = Model(decoder_input, _x_decoded_mean) 100 | 101 | # 输出每个类的均值向量 102 | mu = Model(y, yh) 103 | mu = mu.predict(np.eye(num_classes)) 104 | 105 | # 观察能否通过控制隐变量的均值来输出特定类别的数字 106 | n = 15 107 | digit_size = 28 108 | figure = np.zeros((digit_size * n, digit_size * n)) 109 | 110 | output_digit = 9 # 指定输出数字 111 | 112 | # 用正态分布的分位数来构建隐变量对 113 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) + mu[output_digit][1] 114 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) + mu[output_digit][0] 115 | 116 | for i, yi in enumerate(grid_x): 117 | for j, xi in enumerate(grid_y): 118 | z_sample = np.array([[xi, yi]]) 119 | x_decoded = generator.predict(z_sample) 120 | digit = x_decoded[0].reshape(digit_size, digit_size) 121 | figure[i*digit_size:(i+1)*digit_size, j*digit_size:(j+1)*digit_size] = digit 122 | 123 | plt.figure(figsize=(10, 10)) 124 | plt.imshow(figure, cmap='Greys_r') 125 | plt.show() -------------------------------------------------------------------------------- /dae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 降噪自编码器(DAE) 3 | 1、给原始图像加上随机噪声,噪声呈高斯分布 4 | 2、编解码器均为一层全连接层 5 | ''' 6 | from keras.layers import Input, Dense 7 | from keras.models import Model, load_model 8 | import os 9 | 10 | # 指定gpu 11 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 12 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 13 | 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | 17 | ##### 完整的自编码器模型构建 ##### 18 | # 编码潜在空间表征维度 19 | encoding_dim = 32 20 | # 自编码输入 21 | input_img = Input(shape=(784,)) 22 | # 使用一个全连接网络来搭建编码器 23 | encoded = Dense(encoding_dim, activation='relu')(input_img) 24 | # 使用一个全连接网络来对编码器进行解码 25 | decoded = Dense(784, activation='sigmoid')(encoded) 26 | # 构建keras模型 27 | autoencoder = Model(input=input_img, output=decoded) 28 | 29 | ##### 也可以把编码器和解码器当做单独的模型来使用 ##### 30 | # 编码器模型 31 | encoder = Model(input=input_img, output=encoded) 32 | # 解码器模型 33 | encoded_input = Input(shape=(encoding_dim,)) 34 | decoded_layer = autoencoder.layers[-1] 35 | decoder = Model(input=encoded_input, output=decoded_layer(encoded_input)) 36 | 37 | ##### 对自编码器模型进行编译并使用mnist数据集进行训练 ##### 38 | # 编译模型 39 | autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') 40 | # 准备mnist数据 41 | from keras.datasets import mnist 42 | import numpy as np 43 | (x_train, _), (x_test, _) = mnist.load_data(path='mnist.npz') 44 | x_train = x_train.astype('float32')/255. 45 | x_test = x_test.astype('float32')/255. 46 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 47 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 48 | # 给数据添加噪声 49 | noise_factor = 0.5 50 | x_train_noisy = x_train + noise_factor * np.random.normal(loc = 0.0, scale = 1.0, size = x_train.shape) 51 | x_test_noisy = x_test + noise_factor * np.random.normal(loc = 0.0, scale = 1.0, size = x_test.shape) 52 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 53 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 54 | 55 | # 训练 56 | autoencoder.fit(x_train_noisy, x_train, nb_epoch=50, batch_size=256, shuffle=True, validation_data=(x_test_noisy, x_test)) 57 | # 保存模型 58 | autoencoder.save('./model/model_dae') 59 | 60 | new_model = load_model('./model/model_dae') 61 | ##### 对原始输入图像和自编码器训练后重构的图像进行可视化 ##### 62 | import matplotlib.pyplot as plt 63 | 64 | decoded_imgs = new_model.predict(x_test_noisy) 65 | n = 10 66 | plt.figure(figsize=(20, 4)) 67 | for i in range(1, n): 68 | # 展示原始图像 69 | ax = plt.subplot(2, n, i) 70 | plt.imshow(x_test_noisy[i].reshape(28, 28)) 71 | plt.gray() 72 | ax.get_xaxis().set_visible(False) 73 | ax.get_yaxis().set_visible(False) 74 | # 展示自编码器重构后的图像 75 | ax = plt.subplot(2, n, i + n) 76 | plt.imshow(decoded_imgs[i].reshape(28, 28)) 77 | plt.gray() 78 | ax.get_xaxis().set_visible(False) 79 | ax.get_yaxis().set_visible(False) 80 | 81 | plt.show() -------------------------------------------------------------------------------- /dae_cnn.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 降噪自编码器(DAE) 3 | 1、给原始图像加上随机噪声,噪声呈高斯分布 4 | 2、编解码器均为CNN 5 | ''' 6 | from keras.layers import Input, Dense, UpSampling2D 7 | from keras.layers import Convolution2D, MaxPooling2D 8 | from keras.models import Model, load_model 9 | import matplotlib.pyplot as plt 10 | import matplotlib.image as mpimg 11 | from keras.utils import plot_model 12 | import os 13 | # 指定gpu 14 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 15 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 16 | os.environ["PATH"] += os.pathsep + 'D:/Program Files/Graphviz2.38/bin/' 17 | 18 | # 输入维度 19 | input_img = Input(shape=(28, 28, 1)) 20 | # 基于卷积和池化的编码器 21 | x = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(input_img) 22 | x = MaxPooling2D((2, 2), border_mode='same')(x) 23 | x = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(x) 24 | encoded = MaxPooling2D((2, 2), border_mode='same')(x) 25 | # 基于卷积核上采样的解码器 26 | x = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(encoded) 27 | x = UpSampling2D((2, 2))(x) 28 | x = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(x) 29 | x = UpSampling2D((2, 2))(x) 30 | decoded = Convolution2D(1, 3, 3, activation='sigmoid', border_mode='same')(x) 31 | # 搭建模型并编译 32 | autoencoder = Model(input_img, decoded) 33 | autoencoder.summary() 34 | autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') 35 | 36 | # 准备加了噪声的mnist数据 37 | from keras.datasets import mnist 38 | import numpy as np 39 | 40 | (x_train, _), (x_test, _) = mnist.load_data('mnist.npz') 41 | x_train = x_train.astype('float32') / 255. 42 | x_test = x_test.astype('float32') / 255. 43 | x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) 44 | x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) 45 | # 给数据添加噪声 46 | noise_factor = 0.5 47 | x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape) 48 | x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape) 49 | 50 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 51 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 52 | 53 | # 对噪声数据进行自编码训练 54 | autoencoder.fit(x_train_noisy, x_train, 55 | nb_epoch=10, 56 | batch_size=512, 57 | shuffle=True, 58 | validation_data=(x_test_noisy, x_test)) 59 | 60 | # 保存模型 61 | autoencoder.save('./model/model_dae_cnn') 62 | 63 | # 模型画图 64 | plot_model(autoencoder, to_file='model_dae_cnn.png', show_shapes=True) 65 | model_img = mpimg.imread('model_dae_cnn.png') 66 | plt.imshow(model_img) 67 | plt.axis('off') 68 | plt.show() 69 | 70 | # 加载模型 71 | new_model = load_model('./model/model_dae_cnn') 72 | ##### 对原始输入图像和自编码器训练后重构的图像进行可视化 ##### 73 | import matplotlib.pyplot as plt 74 | 75 | decoded_imgs = new_model.predict(x_test_noisy) 76 | n = 10 77 | plt.figure(figsize=(20, 4)) 78 | for i in range(1, n): 79 | # 展示原始图像 80 | ax = plt.subplot(2, n, i) 81 | plt.imshow(x_test_noisy[i].reshape(28, 28)) 82 | plt.gray() 83 | ax.get_xaxis().set_visible(False) 84 | ax.get_yaxis().set_visible(False) 85 | # 展示自编码器重构后的图像 86 | ax = plt.subplot(2, n, i + n) 87 | plt.imshow(decoded_imgs[i].reshape(28, 28)) 88 | plt.gray() 89 | ax.get_xaxis().set_visible(False) 90 | ax.get_yaxis().set_visible(False) 91 | 92 | plt.show() 93 | 94 | 95 | -------------------------------------------------------------------------------- /model_dae_cnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CLaraRR/autoencoder_practice/269f1719731b806cd1f2ed9905ad858ce9c8d5d9/model_dae_cnn.png -------------------------------------------------------------------------------- /sae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 稀疏自编码器(Sparse AutoEncoder) 3 | 使用KL散度对神经元稀疏化 4 | ''' 5 | from keras.layers import Input, Dense 6 | from keras.models import Model, load_model 7 | from keras.datasets import mnist 8 | from keras import backend as K 9 | from keras import regularizers 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import os 13 | # 指定gpu 14 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 15 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 16 | 17 | ##### 设置网络参数 ##### 18 | p = 0.05 # 使大部分神经元的激活值(所有神经元的激活值的平均值)接近这个p值 19 | beta = 3 # 控制KL散度所占的比重 20 | input_dim = 784 21 | encoding_dim = 30 22 | lambda_val = 0.001 # weight decay 23 | epochs = 400 24 | batch_size = 2048 25 | 26 | 27 | 28 | #### 准备mnist数据 ###### 29 | (x_train, y_train_), (x_test, y_test_) = mnist.load_data('mnist.npz') 30 | x_train = x_train.astype('float32')/255. 31 | x_test = x_test.astype('float32')/255. 32 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 33 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 34 | 35 | ##### 定义网络 ###### 36 | input_img = Input(shape=(input_dim,)) 37 | 38 | # 自定义正则项函数, 计算KL散度 39 | def sparse_reg(activity_matrix): 40 | activity_matrix = K.softmax(activity_matrix, axis=0) # 把激活值先用softmax归一化 41 | p_hat = K.mean(activity_matrix, axis=0) # 将第j个神经元在batch_size个输入下所有的输出激活值取平均 42 | print('p_hat=', p_hat) 43 | KLD = p*(K.log(p/p_hat))+(1-p)*(K.log((1-p)/(1-p_hat))) # 计算KL散度 44 | print('KLD=', KLD) 45 | return beta*K.sum(KLD) # 所有神经元的KL散度相加并乘以beta 46 | 47 | encoded = Dense( 48 | encoding_dim, 49 | activation='relu', 50 | kernel_regularizer=regularizers.l2(lambda_val/2), 51 | activity_regularizer=sparse_reg 52 | )(input_img) 53 | 54 | decoded = Dense( 55 | input_dim, activation='sigmoid', 56 | kernel_regularizer=regularizers.l2(lambda_val/2), 57 | activity_regularizer=sparse_reg 58 | )(encoded) 59 | # sae模型 60 | sae = Model(input_img, decoded) 61 | 62 | 63 | # encoder模型 64 | encoder = Model(input_img, encoded) 65 | 66 | # decoder模型 67 | decoded_input = Input(shape=(encoding_dim,)) 68 | decoder_layer = sae.layers[-1](decoded_input) 69 | decoder = Model(decoded_input, decoder_layer) 70 | 71 | sae.compile(optimizer='adam', loss='binary_crossentropy') 72 | sae.summary() 73 | # 开始训练 74 | sae.fit( 75 | x_train, 76 | x_train, 77 | epochs=epochs, 78 | batch_size=batch_size, 79 | shuffle=True, 80 | validation_data=(x_test, x_test) 81 | ) 82 | 83 | # 用测试数据集看看重构的效果 84 | encoded_imgs = encoder.predict(x_test) 85 | decoded_imgs = decoder.predict(encoded_imgs) 86 | 87 | n = 10 88 | plt.figure(figsize=(20, 4)) 89 | for i in range(1, n): 90 | # 展示原始图像 91 | ax = plt.subplot(2, n, i) 92 | plt.imshow(x_test[i].reshape(28, 28)) 93 | plt.gray() 94 | ax.get_xaxis().set_visible(False) 95 | ax.get_yaxis().set_visible(False) 96 | # 展示自编码器重构后的图像 97 | ax = plt.subplot(2, n, i + n) 98 | plt.imshow(decoded_imgs[i].reshape(28, 28)) 99 | plt.gray() 100 | ax.get_xaxis().set_visible(False) 101 | ax.get_yaxis().set_visible(False) 102 | plt.show() -------------------------------------------------------------------------------- /stacked_dae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 堆叠降噪自编码器(Stacked Denoising AutoEncoder) 3 | 编解码各2层,且维数基于中心对称 4 | 各层维数:784->256->64->256->784 5 | 逐个预训练4个autoencoder:使用前一个训练好的ae的encoder输出作为下一个ae的输入来训练当前ae 6 | 堆叠ae:取前面预训练好的4个ae的encoder层前后连接起来,形成最终stacked ae的模型结构 7 | 堆叠ae的训练:使用预训练好的参数初始化stacked ae,然后进行全局训练优化 8 | ''' 9 | from keras.layers import Input, Dense 10 | from keras.models import Model, load_model 11 | from keras.datasets import mnist 12 | from keras import backend as K 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | import os 16 | # 指定gpu 17 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 18 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 19 | 20 | ##### 设置网络参数 ##### 21 | epochs_layer = 100 22 | epochs_whole = 200 23 | batch_size = 256 24 | origin_dim = 784 25 | h_dim1 = 256 26 | h_dim2 = 64 27 | 28 | 29 | ##### 准备mnist数据 ###### 30 | (x_train, _), (x_test, _) = mnist.load_data(path='mnist.npz') 31 | x_train = x_train.astype('float32')/255. 32 | x_test = x_test.astype('float32')/255. 33 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 34 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 35 | # 给数据添加噪声 36 | noise_factor = 0.2 37 | x_train_noisy = x_train + noise_factor * np.random.normal(loc = 0.0, scale = 1.0, size = x_train.shape) 38 | x_test_noisy = x_test + noise_factor * np.random.normal(loc = 0.0, scale = 1.0, size = x_test.shape) 39 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 40 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 41 | 42 | ##### 构建单个autoencoder ##### 43 | class AutoEncoderLayer(): 44 | def __init__(self, input_dim, output_dim): 45 | self.input_dim = input_dim 46 | self.output_dim = output_dim 47 | self.build() 48 | 49 | def build(self): 50 | self.input = Input(shape=(self.input_dim,)) 51 | self.encode_layer = Dense(self.output_dim, activation='sigmoid') 52 | self.encoded = self.encode_layer(self.input) 53 | self.encoder = Model(self.input, self.encoded) 54 | 55 | self.decode_layer = Dense(self.input_dim, activation='sigmoid') 56 | self.decoded = self.decode_layer(self.encoded) 57 | 58 | self.autoencoder = Model(self.input, self.decoded) 59 | 60 | 61 | # 构建堆叠DAE 62 | class StackedAutoEncoder(): 63 | def __init__(self, layer_list): 64 | self.layer_list = layer_list 65 | self.build() 66 | 67 | def build(self): 68 | out = self.layer_list[0].encoded 69 | for i in range(1, num_layers - 1): 70 | out = self.layer_list[i].encode_layer(out) 71 | self.model = Model(self.layer_list[0].input, out) 72 | 73 | 74 | 75 | 76 | def train_layers(encoder_list=None, layer=None, epochs=None, batch_size=None): 77 | ''' 78 | 预训练:逐层训练,当训练第layer个ae时,使用前(layer-1)个ae训练好的encoder的参数 79 | :param encoder_list: 80 | :param layer: 81 | :param epochs: 82 | :param batch_size: 83 | :return: 84 | ''' 85 | # 对前(layer-1)层用已经训练好的参数进行前向计算,ps:第0层没有前置层 86 | out = x_train_noisy 87 | origin = x_train 88 | if layer != 0: 89 | for i in range(layer): 90 | # print("encoder weight", str(i), ":", encoder_list[i].encoder.get_weights()[0]) 91 | out = encoder_list[i].encoder.predict(out) 92 | 93 | encoder_list[layer].autoencoder.summary() 94 | encoder_list[layer].autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') 95 | 96 | # 训练第layer个ae 97 | encoder_list[layer].autoencoder.fit( 98 | out, 99 | origin if layer == 0 else out, 100 | epochs=epochs, 101 | batch_size=batch_size, 102 | shuffle=True, 103 | verbose=2 104 | ) 105 | 106 | 107 | def train_whole(sae=None, epochs=None, batch_size=None): 108 | ''' 109 | 用预训练好的参数初始化stacked ae的参数,然后进行全局训练优化 110 | :param model: 111 | :param epochs: 112 | :param batch_size: 113 | :return: 114 | ''' 115 | # print("stacked sae weights:") 116 | # print(sae.model.get_weights()) 117 | sae.model.summary() 118 | sae.model.compile(optimizer='adadelta', loss='binary_crossentropy') 119 | sae.model.fit( 120 | x_train_noisy, 121 | x_train, 122 | epochs=epochs, 123 | batch_size=batch_size, 124 | shuffle=True, 125 | validation_data=(x_test_noisy, x_test), 126 | verbose=2 127 | ) 128 | 129 | 130 | # 5层的stacked ae,实际上要使用4个ae,实例化4个ae 131 | num_layers = 5 132 | encoder_1 = AutoEncoderLayer(origin_dim, h_dim1) 133 | encoder_2 = AutoEncoderLayer(h_dim1, h_dim2) 134 | decoder_3 = AutoEncoderLayer(h_dim2, h_dim1) 135 | decoder_4 = AutoEncoderLayer(h_dim1, origin_dim) 136 | autoencoder_list = [encoder_1, encoder_2, decoder_3, decoder_4] 137 | 138 | # 按照顺序对每一层进行预训练 139 | print("Pre training:") 140 | for level in range(num_layers - 1): 141 | print("level:", level) 142 | train_layers(encoder_list=autoencoder_list, layer=level, epochs=epochs_layer, batch_size=batch_size) 143 | 144 | 145 | # 用训练好的4个ae构建stacked dae 146 | stacked_ae = StackedAutoEncoder(autoencoder_list) 147 | print("Whole training:") 148 | # 进行全局训练优化 149 | train_whole(sae=stacked_ae, epochs=epochs_whole, batch_size=batch_size) 150 | 151 | 152 | ##### 显示stacked dae重构后的效果 ##### 153 | decoded_imgs = stacked_ae.model.predict(x_test_noisy) 154 | n = 10 155 | plt.figure(figsize=(20, 4)) 156 | for i in range(1, n): 157 | # 展示原始图像 158 | ax = plt.subplot(2, n, i) 159 | plt.imshow(x_test_noisy[i].reshape(28, 28)) 160 | plt.gray() 161 | ax.get_xaxis().set_visible(False) 162 | ax.get_yaxis().set_visible(False) 163 | # 展示自编码器重构后的图像 164 | ax = plt.subplot(2, n, i + n) 165 | plt.imshow(decoded_imgs[i].reshape(28, 28)) 166 | plt.gray() 167 | ax.get_xaxis().set_visible(False) 168 | ax.get_yaxis().set_visible(False) 169 | 170 | plt.show() -------------------------------------------------------------------------------- /vae.py: -------------------------------------------------------------------------------- 1 | ''' 2 | 变分自编码器(VAE):VAE不是将输入图像压缩伟潜在空间的编码, 3 | 而是将图像转换为最常见的两个统计分布参数——均值和标准差, 4 | 然后使用这两个参数来从分布中进行随机采样得到隐变量, 5 | 对隐变量进行解码重构即可。 6 | ''' 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | from scipy.stats import norm 10 | from keras.layers import Input, Dense, Lambda 11 | from keras.models import Model, load_model 12 | from keras import backend as K 13 | from keras import metrics 14 | from keras.datasets import mnist 15 | from keras.utils import to_categorical 16 | import os 17 | 18 | # 指定gpu 19 | # os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 20 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 21 | 22 | ##### 设置模型相关参数 ##### 23 | batch_size = 256 24 | original_dim = 784 25 | latent_dim = 2 26 | intermediate_dim = 256 27 | epochs = 50 28 | epsilon_std = 1.0 29 | num_classes = 10 30 | 31 | ##### 加载mnist数据集 ##### 32 | (x_train, y_train_), (x_test, y_test_) = mnist.load_data('mnist.npz') 33 | x_train = x_train.astype('float32')/255. 34 | x_test = x_test.astype('float32')/255. 35 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 36 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 37 | y_train = to_categorical(y_train_, num_classes) 38 | y_test = to_categorical(y_test_, num_classes) 39 | 40 | ##### 建立计算均值和方差的编码网络 ##### 41 | x = Input(shape=(original_dim,)) 42 | h = Dense(intermediate_dim, activation='relu')(x) 43 | # 算p(Z|X)的均值和方差 44 | z_mean = Dense(latent_dim)(h) 45 | z_log_var = Dense(latent_dim)(h) 46 | 47 | ##### 定义参数复现技巧函数和抽样层 ##### 48 | # 参数复现技巧 49 | def sampling(args): 50 | z_mean, z_log_var = args 51 | epsilon = K.random_normal( 52 | shape=(K.shape(z_mean)), 53 | mean=0., 54 | stddev=epsilon_std 55 | ) 56 | return z_mean + epsilon*K.exp(z_log_var/2) 57 | # 重参数层,相当于给输入加入噪声 58 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 59 | 60 | ##### 定义模型解码部分(生成器) ##### 61 | # 解码层 62 | decoder_h = Dense(intermediate_dim, activation='relu') 63 | decoder_mean = Dense(original_dim, activation='sigmoid') 64 | h_decoded = decoder_h(z) 65 | x_decoded_mean = decoder_mean(h_decoded) 66 | 67 | ##### 接下来实例化三个模型 ##### 68 | # 1、一个端到端的自动编码器,用于完成输入信号的重构 69 | vae = Model(x, x_decoded_mean) 70 | # 2、一个用于将输入空间映射为隐空间的编码器 71 | encoder = Model(x, z_mean) 72 | # 3、一个利用隐空间的分布产生的样本点生成对应的重构样本的生成器 73 | decoder_input = Input(shape=(latent_dim,)) 74 | _h_decoded = decoder_h(decoder_input) 75 | _x_decoded_mean = decoder_mean(_h_decoded) 76 | generator = Model(decoder_input, _x_decoded_mean) 77 | 78 | ##### 定义VAE损失函数并进行训练 ##### 79 | # xent_loss是重构损失,kl_loss是KL loss 80 | xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean) 81 | kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 82 | vae_loss = K.mean(xent_loss + kl_loss) 83 | # add_loss是新增的方法,用于更灵活的添加各种loss 84 | vae.add_loss(vae_loss) 85 | vae.compile(optimizer='rmsprop', loss=None) 86 | vae.summary() 87 | 88 | # 开始训练 89 | vae.fit( 90 | x_train, 91 | shuffle=True, 92 | epochs=epochs, 93 | batch_size=batch_size, 94 | validation_data=(x_test, None) 95 | ) 96 | # 保存模型 97 | vae.save('./model/model_vae') 98 | encoder.save('./model/model_vae_encoder') 99 | generator.save('./model/model_vae_generator') 100 | 101 | ##### 测试一下模型的重构效果 ##### 102 | decoded_imgs = vae.predict(x_test) 103 | n = 10 104 | plt.figure(figsize=(20, 4)) 105 | for i in range(1, n): 106 | # 展示原始图像 107 | ax = plt.subplot(2, n, i) 108 | plt.imshow(x_test[i].reshape(28, 28)) 109 | plt.gray() 110 | ax.get_xaxis().set_visible(False) 111 | ax.get_yaxis().set_visible(False) 112 | # 展示自编码器重构后的图像 113 | ax = plt.subplot(2, n, i + n) 114 | plt.imshow(decoded_imgs[i].reshape(28, 28)) 115 | plt.gray() 116 | ax.get_xaxis().set_visible(False) 117 | ax.get_yaxis().set_visible(False) 118 | 119 | plt.show() 120 | 121 | 122 | 123 | ##### 测试模型的生成能力(从隐空间采样然后利用训练好的生成器生成) ##### 124 | # VAE是一个生成模型,可以用它来生成新数字 125 | # 可以从隐平面上采样一些点,然后生成对应的显变量,即MNIST的数字 126 | # 观察隐变量的两个维度变化是如何影响输出结果的 127 | n = 15 128 | # figure with 15*15 digits 129 | digit_size = 28 130 | figure = cnp.zeros((digit_size*n, digit_size*n)) 131 | # 用正态分布的分位数来构建隐变量对 132 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) 133 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) 134 | for i, yi in enumerate(grid_x): 135 | for j, xi in enumerate(grid_y): 136 | z_sample = np.array([[xi, yi]]) 137 | x_decoded = generator.predict(z_sample) 138 | digit = x_decoded[0].reshape(digit_size, digit_size) 139 | figure[i*digit_size:(i+1)*digit_size, j*digit_size:(j+1)*digit_size] = digit 140 | 141 | plt.figure(figsize=(10, 10)) 142 | plt.imshow(figure, cmap='Greys_r') 143 | plt.show() -------------------------------------------------------------------------------- /vae_cnn.py: -------------------------------------------------------------------------------- 1 | ''' 2 | VAE,CNN版本 3 | 4 | ''' 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from scipy.stats import norm 8 | 9 | from keras.layers import Dense, Input 10 | from keras.layers import Convolution2D, Flatten, Lambda 11 | from keras.layers import Reshape, Conv2DTranspose 12 | from keras.models import Model, load_model 13 | from keras import backend as K 14 | from keras.datasets import mnist 15 | import os 16 | 17 | # 指定gpu 18 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 19 | os.environ['CUDA_VISIBLE_DEVICES'] = "0" 20 | 21 | ##### 加载mnist数据集 ##### 22 | (x_train, y_train_), (x_test, y_test_) = mnist.load_data('mnist.npz') 23 | image_size = x_train.shape[1] 24 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 25 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 26 | x_train = x_train.astype('float32')/255. 27 | x_test = x_test.astype('float32')/255. 28 | 29 | ##### 设置网络参数 ##### 30 | input_shape = (image_size, image_size, 1) 31 | batch_size = 256 32 | kernel_size = 3 33 | filters = 16 34 | latent_dim = 2 # 隐变量取2维是为了方便后面画图 35 | epochs = 30 36 | 37 | ##### 建立计算均值和方差的编码网络 ##### 38 | x_in = Input(shape=input_shape) 39 | x = x_in 40 | for i in range(2): 41 | filters *=2 42 | x = Convolution2D( 43 | filters=filters, 44 | kernel_size=kernel_size, 45 | activation='relu', 46 | strides=2, 47 | padding='same', 48 | )(x) 49 | 50 | # 备份当前shape,等下构建decoder的时候要用 51 | shape = K.int_shape(x) 52 | 53 | x = Flatten()(x) # 把多维的数据变为一维 54 | x = Dense(16, activation='relu')(x) 55 | # 算p(Z|X)的均值和方差 56 | z_mean = Dense(latent_dim)(x) 57 | z_log_var = Dense(latent_dim)(x) 58 | 59 | ##### 定义参数复现技巧函数和抽样层 ##### 60 | # 重参数技巧 61 | def sampling(args): 62 | z_mean, z_log_var = args 63 | epsilon = K.random_normal(shape=K.shape(z_mean)) 64 | return z_mean + epsilon * K.exp(z_log_var/2) 65 | # 重参数层,相当于给输入加入噪声 66 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 67 | 68 | ##### 定义模型解码部分(生成器) ##### 69 | # 先搭建一个独立的模型,再调用模型 70 | latent_inputs = Input(shape=(latent_dim,)) 71 | x = Dense(shape[1]*shape[2]*shape[3], activation='relu')(latent_inputs) 72 | x = Reshape((shape[1], shape[2], shape[3]))(x) # 把数据变回原来的维数 73 | 74 | for i in range(2): 75 | x = Conv2DTranspose( 76 | filters=filters, 77 | kernel_size=kernel_size, 78 | activation='relu', 79 | strides=2, 80 | padding='same' 81 | )(x) 82 | filters //=2 83 | 84 | outputs = Conv2DTranspose( 85 | filters=1, 86 | kernel_size=kernel_size, 87 | activation='sigmoid', 88 | padding='same' 89 | )(x) 90 | 91 | # 搭建为一个独立的encoder 92 | encoder = Model(x_in, z_mean) 93 | # 独立的decoder 94 | decoder = Model(latent_inputs, outputs) 95 | x_out = decoder(z) 96 | # 完整的vae 97 | vae = Model(x_in, x_out) 98 | 99 | # xent_loss是重构loss, kl_loss是KL loss 100 | xent_loss = K.sum(K.binary_crossentropy(x_in, x_out), axis=[1, 2, 3]) 101 | kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 102 | vae_loss = K.mean(xent_loss + kl_loss) 103 | 104 | vae.add_loss(vae_loss) # 往模型加入自定义的loss 105 | 106 | vae.compile(optimizer='rmsprop') 107 | vae.summary() 108 | 109 | # 开始训练 110 | vae.fit( 111 | x_train, 112 | shuffle=True, 113 | epochs=epochs, 114 | batch_size=batch_size, 115 | validation_data=(x_test, None) 116 | ) 117 | vae.save('./model/model_vae_cnn') 118 | encoder.save('./model/model_vae_cnn_encoder') 119 | decoder.save('./model/model_vae_cnn_decoder') 120 | 121 | # 观察各个数字在隐空间的分布 122 | encoder_model = load_model('./model/model_vae_cnn_encoder', custom_objects={'sampling':sampling}) 123 | x_test_encoded = encoder_model.predict(x_test, batch_size=batch_size) 124 | plt.figure(figsize=(6,6)) 125 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test_) 126 | plt.colorbar() 127 | plt.show() 128 | 129 | # 观察隐变量的两个维度变化是如何影响输出结果的 130 | decoder_model = load_model('./model/model_vae_generator', custom_objects={'sampling':sampling}) 131 | n = 15 132 | digit_size = 28 133 | figure = np.zeros((digit_size * n, digit_size * n)) 134 | 135 | # 用正态分布的分位数来构建隐变量对 136 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) 137 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) 138 | 139 | for i, yi in enumerate(grid_x): 140 | for j, xi in enumerate(grid_y): 141 | z_sample = np.array([[xi, yi]]) 142 | x_decoded = decoder_model.predict(z_sample) 143 | digit = x_decoded[0].reshape(digit_size, digit_size) 144 | figure[i * digit_size:(i + 1) * digit_size, j * digit_size:(j + 1) * digit_size] = digit 145 | 146 | plt.figure(figsize=(10, 10)) 147 | plt.imshow(figure, cmap='Greys_r') 148 | plt.show() 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | --------------------------------------------------------------------------------