├── Experiments ├── 1024.ipynb ├── Keras_GAN.ipynb ├── LSTM_PTB.ipynb ├── Res │ ├── 1.md │ ├── Synced.jpg │ ├── gan_tf_keras1.png │ ├── gan_tf_keras2.png │ ├── gan_tf_keras3.png │ ├── gan_tf_keras4.png │ ├── gan_tf_keras5.png │ ├── gan_tf_keras6.png │ └── gan_tf_keras7.png ├── Synced.py ├── Transformer_synced.ipynb ├── pytorch_TCN.ipynb ├── swish_test.ipynb ├── tf_CNN_Tutorial.ipynb ├── tf_GAN.ipynb ├── tf_Keras_CNN.ipynb ├── tf_LeNet5.ipynb ├── tf_orginal_CapsNet.ipynb └── tf_trial_1.ipynb └── README.md /Experiments/1024.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 13 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 14 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 15 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n", 16 | "After 0 training step(s), validation accuracy using average model is 0.1408 \n", 17 | "After 1000 training step(s), validation accuracy using average model is 0.9406 \n", 18 | "After 2000 training step(s), validation accuracy using average model is 0.9556 \n", 19 | "After 3000 training step(s), validation accuracy using average model is 0.9616 \n", 20 | "After 4000 training step(s), validation accuracy using average model is 0.9682 \n", 21 | "After 5000 training step(s), validation accuracy using average model is 0.9694 \n", 22 | "After 6000 training step(s), validation accuracy using average model is 0.97 \n", 23 | "After 7000 training step(s), validation accuracy using average model is 0.9712 \n", 24 | "After 8000 training step(s), validation accuracy using average model is 0.9708 \n", 25 | "After 9000 training step(s), validation accuracy using average model is 0.9716 \n", 26 | "After 10000 training step(s), test accuracy using average model is 0.9697\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "import tensorflow as tf\n", 32 | "from tensorflow.examples.tutorials.mnist import input_data\n", 33 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 34 | "\n", 35 | "\n", 36 | "INPUT_NODE = 784 \n", 37 | "OUTPUT_NODE = 10 \n", 38 | "LAYER1_NODE = 1024 \n", 39 | "LAYER2_NODE = 512 \n", 40 | "LAYER3_NODE = 256 \n", 41 | "LAYER4_NODE = 128\n", 42 | "LAYER5_NODE = 64 \n", 43 | "LAYER6_NODE = 64\n", 44 | "LAYER7_NODE = 128 \n", 45 | "LAYER8_NODE = 256 \n", 46 | "LAYER9_NODE = 512\n", 47 | "LAYER10_NODE = 1024 \n", 48 | " \n", 49 | "BATCH_SIZE = 50 \n", 50 | "\n", 51 | "# 模型相关的参数\n", 52 | "LEARNING_RATE_BASE = 0.008 \n", 53 | "LEARNING_RATE_DECAY = 0.99 \n", 54 | "REGULARAZTION_RATE = 0.0001 \n", 55 | "TRAINING_STEPS = 10000 \n", 56 | "MOVING_AVERAGE_DECAY = 0.99 \n", 57 | "\n", 58 | "def inference(input_tensor, avg_class, W, B):\n", 59 | " # 不使用滑动平均类\n", 60 | " if avg_class == None:\n", 61 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, W[0]) + B[0])\n", 62 | " layer2 = tf.nn.relu(tf.matmul(layer1, W[1]) + B[1])\n", 63 | " layer3 = tf.nn.relu(tf.matmul(layer2, W[2]) + B[2])\n", 64 | " layer4 = tf.nn.relu(tf.matmul(layer3, W[3]) + B[3])\n", 65 | " layer5 = tf.nn.relu(tf.matmul(layer4, W[4]) + B[4])\n", 66 | " layer6 = tf.nn.relu(tf.matmul(layer5, W[5]) + B[5])\n", 67 | " layer7 = tf.nn.relu(tf.matmul(layer6, W[6]) + B[6])\n", 68 | " layer8 = tf.nn.relu(tf.matmul(layer7, W[7]) + B[7])\n", 69 | " layer9 = tf.nn.relu(tf.matmul(layer8, W[8]) + B[8])\n", 70 | " layer10 = tf.nn.relu(tf.matmul(layer9, W[9]) + B[9])\n", 71 | " return tf.matmul(layer10, W[10]) + B[10]\n", 72 | " \n", 73 | " else:\n", 74 | " \n", 75 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(W[0])) + avg_class.average(B[0]))\n", 76 | " layer2 = tf.nn.relu(tf.matmul(layer1, avg_class.average(W[1])) + avg_class.average(B[1]))\n", 77 | " layer3 = tf.nn.relu(tf.matmul(layer2, avg_class.average(W[2])) + avg_class.average(B[2]))\n", 78 | " layer4 = tf.nn.relu(tf.matmul(layer3, avg_class.average(W[3])) + avg_class.average(B[3]))\n", 79 | " layer5 = tf.nn.relu(tf.matmul(layer4, avg_class.average(W[4])) + avg_class.average(B[4]))\n", 80 | " layer6 = tf.nn.relu(tf.matmul(layer5, avg_class.average(W[5])) + avg_class.average(B[5]))\n", 81 | " layer7 = tf.nn.relu(tf.matmul(layer6, avg_class.average(W[6])) + avg_class.average(B[6]))\n", 82 | " layer8 = tf.nn.relu(tf.matmul(layer7, avg_class.average(W[7])) + avg_class.average(B[7]))\n", 83 | " layer9 = tf.nn.relu(tf.matmul(layer8, avg_class.average(W[8])) + avg_class.average(B[8]))\n", 84 | " layer10 = tf.nn.relu(tf.matmul(layer9, avg_class.average(W[9])) + avg_class.average(B[9]))\n", 85 | " return tf.matmul(layer10, avg_class.average(W[10])) + avg_class.average(B[10]) \n", 86 | " \n", 87 | "def train(mnist):\n", 88 | " x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n", 89 | " y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n", 90 | " \n", 91 | " weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n", 92 | " biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n", 93 | " \n", 94 | " weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE], stddev=0.1))\n", 95 | " biases2 = tf.Variable(tf.constant(0.1, shape=[ LAYER2_NODE]))\n", 96 | " \n", 97 | " weights3 = tf.Variable(tf.truncated_normal([ LAYER2_NODE, LAYER3_NODE], stddev=0.1))\n", 98 | " biases3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))\n", 99 | " \n", 100 | " weights4 = tf.Variable(tf.truncated_normal([LAYER3_NODE, LAYER4_NODE], stddev=0.1))\n", 101 | " biases4 = tf.Variable(tf.constant(0.1, shape=[LAYER4_NODE]))\n", 102 | " \n", 103 | " weights5 = tf.Variable(tf.truncated_normal([LAYER4_NODE, LAYER5_NODE], stddev=0.1))\n", 104 | " biases5 = tf.Variable(tf.constant(0.1, shape=[LAYER5_NODE]))\n", 105 | " \n", 106 | " weights6 = tf.Variable(tf.truncated_normal([LAYER5_NODE, LAYER6_NODE], stddev=0.1))\n", 107 | " biases6 = tf.Variable(tf.constant(0.1, shape=[LAYER6_NODE]))\n", 108 | " \n", 109 | " weights7 = tf.Variable(tf.truncated_normal([LAYER6_NODE, LAYER7_NODE], stddev=0.1))\n", 110 | " biases7 = tf.Variable(tf.constant(0.1, shape=[LAYER7_NODE]))\n", 111 | " \n", 112 | " weights8 = tf.Variable(tf.truncated_normal([LAYER7_NODE, LAYER8_NODE], stddev=0.1))\n", 113 | " biases8 = tf.Variable(tf.constant(0.1, shape=[LAYER8_NODE]))\n", 114 | " \n", 115 | " weights9 = tf.Variable(tf.truncated_normal([LAYER8_NODE, LAYER9_NODE], stddev=0.1))\n", 116 | " biases9 = tf.Variable(tf.constant(0.1, shape=[LAYER9_NODE]))\n", 117 | " \n", 118 | " weights10 = tf.Variable(tf.truncated_normal([LAYER9_NODE, LAYER10_NODE], stddev=0.1))\n", 119 | " biases10 = tf.Variable(tf.constant(0.1, shape=[LAYER10_NODE]))\n", 120 | " \n", 121 | " weights11 = tf.Variable(tf.truncated_normal([LAYER10_NODE, OUTPUT_NODE], stddev=0.1))\n", 122 | " biases11 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n", 123 | " \n", 124 | " W=[weights1, weights2, weights3, weights4, weights5, weights6, weights7, weights8, weights9, weights10, weights11]\n", 125 | " B=[biases1, biases2, biases3, biases4, biases5, biases6, biases7, biases8, biases9, biases10, biases11]\n", 126 | " \n", 127 | " # 计算不含滑动平均类的前向传播结果\n", 128 | " y = inference(x, None, W, B)\n", 129 | " \n", 130 | " # 定义训练轮数及相关的滑动平均类 \n", 131 | " global_step = tf.Variable(0, trainable=False)\n", 132 | " variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n", 133 | " variables_averages_op = variable_averages.apply(tf.trainable_variables())\n", 134 | " average_y = inference(x, variable_averages, W, B)\n", 135 | " \n", 136 | " # 计算交叉熵及其平均值\n", 137 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n", 138 | " cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", 139 | " \n", 140 | " # 损失函数的计算\n", 141 | " regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n", 142 | " regularaztion = regularizer(W[0]) \n", 143 | " for i in range(1,11):\n", 144 | " regularazation=regularaztion + regularizer(W[i]) \n", 145 | " loss = cross_entropy_mean + regularaztion\n", 146 | " \n", 147 | " # 设置指数衰减的学习率。\n", 148 | " learning_rate = tf.train.exponential_decay(\n", 149 | " LEARNING_RATE_BASE,\n", 150 | " global_step,\n", 151 | " mnist.train.num_examples / BATCH_SIZE,\n", 152 | " LEARNING_RATE_DECAY,\n", 153 | " staircase=True)\n", 154 | " \n", 155 | " # 优化损失函数\n", 156 | " train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n", 157 | " \n", 158 | " # 反向传播更新参数和更新每一个参数的滑动平均值\n", 159 | " with tf.control_dependencies([train_step, variables_averages_op]):\n", 160 | " train_op = tf.no_op(name='train')\n", 161 | "\n", 162 | " # 计算正确率\n", 163 | " correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n", 164 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 165 | " \n", 166 | " # 初始化会话并开始训练过程。\n", 167 | " with tf.Session() as sess:\n", 168 | " tf.global_variables_initializer().run()\n", 169 | " validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n", 170 | " test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n", 171 | " \n", 172 | " # 循环的训练神经网络。\n", 173 | " for i in range(TRAINING_STEPS):\n", 174 | " if i % 1000 == 0:\n", 175 | " validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n", 176 | " print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n", 177 | " \n", 178 | " xs,ys=mnist.train.next_batch(BATCH_SIZE)\n", 179 | " sess.run(train_op,feed_dict={x:xs,y_:ys})\n", 180 | "\n", 181 | " test_acc=sess.run(accuracy,feed_dict=test_feed)\n", 182 | " print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n", 183 | "\n", 184 | "train(mnist)" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "collapsed": true 192 | }, 193 | "outputs": [], 194 | "source": [] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.5.4" 214 | } 215 | }, 216 | "nbformat": 4, 217 | "nbformat_minor": 2 218 | } 219 | -------------------------------------------------------------------------------- /Experiments/Keras_GAN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "Using TensorFlow backend.\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "from keras.models import Sequential\n", 20 | "from keras.layers import Dense\n", 21 | "from keras.layers import Reshape\n", 22 | "from keras.layers.core import Activation\n", 23 | "from keras.layers.normalization import BatchNormalization\n", 24 | "from keras.layers.convolutional import UpSampling2D\n", 25 | "from keras.layers.convolutional import Conv2D, MaxPooling2D\n", 26 | "from keras.layers.core import Flatten\n", 27 | "from keras.optimizers import SGD\n", 28 | "from keras.datasets import mnist\n", 29 | "import numpy as np\n", 30 | "from PIL import Image\n", 31 | "import argparse\n", 32 | "import math" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "整个训练过程可以说判别器 D 和生成器 G 对价值函数 V(G,D) 进行了极小极大化博弈:\n", 40 | "![gan_tf_keras1.png](./Res/gan_tf_keras1.png)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "### 最优生成器\n", 48 | "\n", 49 | "因为最优的判别器D(x)=P_data/(P_data+P_G),我们将其代入V(G,D)可得:\n", 50 | "![gan_tf_keras2.png](./Res/gan_tf_keras2.png)\n", 51 | "该积分进行变换得:\n", 52 | "![gan_tf_keras3.png](./Res/gan_tf_keras3.png)\n", 53 | "假设存在两个分布 P 和 Q,且这两个分布的平均分布 M=(P+Q)/2,那么这两个分布之间的 JS 散度为 P 与 M 之间的 KL 散度加上 Q 与 M 之间的 KL 散度再除以 2;因此可化为:\n", 54 | "![gan_tf_keras4.png](./Res/gan_tf_keras4.png)\n", 55 | "\n", 56 | "JS 散度的取值为 0 到 log2。若两个分布完全没有交集,那么 JS 散度取最大值 log2;若两个分布完全一样,那么 JS 散度取最小值 0。当 P_G=P_data 时,JSD(P_data||P_G) 为 0。综上所述,生成分布当且仅当等于真实数据分布式时,我们可以取得最优生成器。" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "metadata": { 63 | "collapsed": true 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "def generator_model():\n", 68 | " #下面搭建生成器的架构,首先导入序贯模型(sequential),即多个网络层的线性堆叠\n", 69 | " model = Sequential()\n", 70 | " #添加一个全连接层,输入为100维向量,输出为1024维\n", 71 | " model.add(Dense(input_dim=100, output_dim=1024))\n", 72 | " #添加一个激活函数tanh\n", 73 | " model.add(Activation('tanh'))\n", 74 | " #添加一个全连接层,输出为128×7×7维度\n", 75 | " model.add(Dense(128*7*7))\n", 76 | " #添加一个批量归一化层,该层在每个batch上将前一层的激活值重新规范化,即使得其输出数据的均值接近0,其标准差接近1\n", 77 | " model.add(BatchNormalization())\n", 78 | " model.add(Activation('tanh'))\n", 79 | " #Reshape层用来将输入shape转换为特定的shape,将含有128*7*7个元素的向量转化为7×7×128张量\n", 80 | " model.add(Reshape((7, 7, 128), input_shape=(128*7*7,)))\n", 81 | " #2维上采样层,即将数据的行和列分别重复2次\n", 82 | " model.add(UpSampling2D(size=(2, 2)))\n", 83 | " #添加一个2维卷积层,卷积核大小为5×5,激活函数为tanh,共64个卷积核,并采用padding以保持图像尺寸不变\n", 84 | " model.add(Conv2D(64, (5, 5), padding='same'))\n", 85 | " model.add(Activation('tanh'))\n", 86 | " model.add(UpSampling2D(size=(2, 2)))\n", 87 | " #卷积核设为1即输出图像的维度\n", 88 | " model.add(Conv2D(1, (5, 5), padding='same'))\n", 89 | " model.add(Activation('tanh'))\n", 90 | " return model" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "### 最优判别器\n", 98 | "\n", 99 | "原论文中价值函数可写为在 x 上的积分,即将数学期望展开为积分形式:\n", 100 | "![gan_tf_keras5.png](./Res/gan_tf_keras5.png)\n", 101 | "其实求积分的最大值可以转化为求被积函数的最大值。而求被积函数的最大值是为了求得最优判别器 D,因此不涉及判别器的项都可以看作为常数项。\n", 102 | "若令判别器 D(x) 等于 y,那么被积函数可以写为:\n", 103 | "![gan_tf_keras6.png](./Res/gan_tf_keras6.png)\n", 104 | "为了找到最优的极值点,如果 a+b≠0,我们可以用以下一阶导求解:\n", 105 | "![gan_tf_keras6.png](./Res/gan_tf_keras7.png)\n", 106 | "因此,最优判别器D(x)=P_data/(P_data+P_G)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 3, 112 | "metadata": { 113 | "collapsed": true 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "def discriminator_model():\n", 118 | " #下面搭建判别器架构,同样采用序贯模型\n", 119 | " model = Sequential()\n", 120 | " \n", 121 | " #添加2维卷积层,卷积核大小为5×5,激活函数为tanh,输入shape在‘channels_first’模式下为(samples,channels,rows,cols)\n", 122 | " #在‘channels_last’模式下为(samples,rows,cols,channels),输出为64维\n", 123 | " model.add(\n", 124 | " Conv2D(64, (5, 5),\n", 125 | " padding='same',\n", 126 | " input_shape=(28, 28, 1))\n", 127 | " )\n", 128 | " model.add(Activation('tanh'))\n", 129 | " #为空域信号施加最大值池化,pool_size取(2,2)代表使图片在两个维度上均变为原长的一半\n", 130 | " model.add(MaxPooling2D(pool_size=(2, 2)))\n", 131 | " model.add(Conv2D(128, (5, 5)))\n", 132 | " model.add(Activation('tanh'))\n", 133 | " model.add(MaxPooling2D(pool_size=(2, 2)))\n", 134 | " #Flatten层把多维输入一维化,常用在从卷积层到全连接层的过渡\n", 135 | " model.add(Flatten())\n", 136 | " model.add(Dense(1024))\n", 137 | " model.add(Activation('tanh'))\n", 138 | " #一个结点进行二值分类,并采用sigmoid函数的输出作为概念\n", 139 | " model.add(Dense(1))\n", 140 | " model.add(Activation('sigmoid'))\n", 141 | " return model" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 4, 147 | "metadata": { 148 | "collapsed": true 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "def generator_containing_discriminator(g, d):\n", 153 | " #将前面定义的生成器架构和判别器架构组拼接成一个大的神经网络,用于判别生成的图片\n", 154 | " model = Sequential()\n", 155 | " #先添加生成器架构,再令d不可训练,即固定d\n", 156 | " #因此在给定d的情况下训练生成器,即通过将生成的结果投入到判别器进行辨别而优化生成器\n", 157 | " model.add(g)\n", 158 | " d.trainable = False\n", 159 | " model.add(d)\n", 160 | " return model" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 5, 166 | "metadata": { 167 | "collapsed": true 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "def combine_images(generated_images):\n", 172 | " #生成图片拼接\n", 173 | " num = generated_images.shape[0]\n", 174 | " width = int(math.sqrt(num))\n", 175 | " height = int(math.ceil(float(num)/width))\n", 176 | " shape = generated_images.shape[1:3]\n", 177 | " image = np.zeros((height*shape[0], width*shape[1]),\n", 178 | " dtype=generated_images.dtype)\n", 179 | " for index, img in enumerate(generated_images):\n", 180 | " i = int(index/width)\n", 181 | " j = index % width\n", 182 | " image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \\\n", 183 | " img[:, :, 0]\n", 184 | " return image" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "#### 对于每一次迭代:\n", 192 | "\n", 193 | "- 从真实数据分布 P_data 抽取 m 个样本\n", 194 | "- 从先验分布 P_prior(z) 抽取 m 个噪声样本\n", 195 | "- 将噪声样本投入 G 而生成数据,即x^tilde = G(Z^i);通过最大化 V 的近似而更新判别器参数θ_d\n", 196 | "\n", 197 | "以上是学习判别器 D 的过程。因为学习 D 的过程是计算 JS 散度的过程,并且我们希望能最大化价值函数,所以该步骤会重复 k 次。\n", 198 | "\n", 199 | "- 从先验分布 P_prior(z) 中抽取另外 m 个噪声样本 {z^1,...,z^m}\n", 200 | "- 通过极小化 V^tilde 而更新生成器参数θ_g" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 6, 206 | "metadata": { 207 | "collapsed": true 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "def train(BATCH_SIZE):\n", 212 | " \n", 213 | " # 国内好像不能直接导入数据集,我们试了几次都不行,后来将数据集下载到本地'~/.keras/datasets/',也就是当前目录(我的是用户文件夹下)下的.keras文件夹中。\n", 214 | " #下载的地址为:https://s3.amazonaws.com/img-datasets/mnist.npz\n", 215 | " (X_train, y_train), (X_test, y_test) = mnist.load_data()\n", 216 | " #iamge_data_format选择\"channels_last\"或\"channels_first\",该选项指定了Keras将要使用的维度顺序。\n", 217 | " #\"channels_first\"假定2D数据的维度顺序为(channels, rows, cols),3D数据的维度顺序为(channels, conv_dim1, conv_dim2, conv_dim3)\n", 218 | " \n", 219 | " #转换字段类型,并将数据导入变量中\n", 220 | " X_train = (X_train.astype(np.float32) - 127.5)/127.5\n", 221 | " X_train = X_train[:, :, :, None]\n", 222 | " X_test = X_test[:, :, :, None]\n", 223 | " # X_train = X_train.reshape((X_train.shape, 1) + X_train.shape[1:])\n", 224 | " \n", 225 | " #将定义好的模型架构赋值给特定的变量\n", 226 | " d = discriminator_model()\n", 227 | " g = generator_model()\n", 228 | " d_on_g = generator_containing_discriminator(g, d)\n", 229 | " \n", 230 | " #定义生成器模型判别器模型更新所使用的优化算法及超参数\n", 231 | " d_optim = SGD(lr=0.001, momentum=0.9, nesterov=True)\n", 232 | " g_optim = SGD(lr=0.001, momentum=0.9, nesterov=True)\n", 233 | " \n", 234 | " #编译三个神经网络并设置损失函数和优化算法,其中损失函数都是用的是二元分类交叉熵函数。编译是用来配置模型学习过程的\n", 235 | " g.compile(loss='binary_crossentropy', optimizer=\"SGD\")\n", 236 | " d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim)\n", 237 | " \n", 238 | " #前一个架构在固定判别器的情况下训练了生成器,所以在训练判别器之前先要设定其为可训练。\n", 239 | " d.trainable = True\n", 240 | " d.compile(loss='binary_crossentropy', optimizer=d_optim)\n", 241 | " \n", 242 | " #下面在满足epoch条件下进行训练\n", 243 | " for epoch in range(30):\n", 244 | " print(\"Epoch is\", epoch)\n", 245 | " \n", 246 | " #计算一个epoch所需要的迭代数量,即训练样本数除批量大小数的值取整;其中shape[0]就是读取矩阵第一维度的长度\n", 247 | " print(\"Number of batches\", int(X_train.shape[0]/BATCH_SIZE))\n", 248 | " \n", 249 | " #在一个epoch内进行迭代训练\n", 250 | " for index in range(int(X_train.shape[0]/BATCH_SIZE)):\n", 251 | " \n", 252 | " #随机生成的噪声服从均匀分布,且采样下界为-1、采样上界为1,输出BATCH_SIZE×100个样本;即抽取一个批量的随机样本\n", 253 | " noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))\n", 254 | " \n", 255 | " #抽取一个批量的真实图片\n", 256 | " image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]\n", 257 | " \n", 258 | " #生成的图片使用生成器对随机噪声进行推断;verbose为日志显示,0为不在标准输出流输出日志信息,1为输出进度条记录\n", 259 | " generated_images = g.predict(noise, verbose=0)\n", 260 | " \n", 261 | " #每经过100次迭代输出一张生成的图片\n", 262 | " if index % 100 == 0:\n", 263 | " image = combine_images(generated_images)\n", 264 | " image = image*127.5+127.5\n", 265 | " Image.fromarray(image.astype(np.uint8)).save(\n", 266 | " \"./GAN/\"+str(epoch)+\"_\"+str(index)+\".png\")\n", 267 | " \n", 268 | " #将真实的图片和生成的图片以多维数组的形式拼接在一起,真实图片在上,生成图片在下\n", 269 | " X = np.concatenate((image_batch, generated_images))\n", 270 | " \n", 271 | " #生成图片真假标签,即一个包含两倍批量大小的列表;前一个批量大小都是1,代表真实图片,后一个批量大小都是0,代表伪造图片\n", 272 | " y = [1] * BATCH_SIZE + [0] * BATCH_SIZE\n", 273 | " \n", 274 | " #判别器的损失;在一个batch的数据上进行一次参数更新\n", 275 | " d_loss = d.train_on_batch(X, y)\n", 276 | " print(\"batch %d d_loss : %f\" % (index, d_loss))\n", 277 | " \n", 278 | " #随机生成的噪声服从均匀分布\n", 279 | " noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))\n", 280 | " \n", 281 | " #固定判别器\n", 282 | " d.trainable = False\n", 283 | " \n", 284 | " #计算生成器损失;在一个batch的数据上进行一次参数更新\n", 285 | " g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE)\n", 286 | " \n", 287 | " #令判别器可训练\n", 288 | " d.trainable = True\n", 289 | " print(\"batch %d g_loss : %f\" % (index, g_loss))\n", 290 | " \n", 291 | " #每100次迭代保存一次生成器和判别器的权重\n", 292 | " if index % 100 == 9:\n", 293 | " g.save_weights('generator', True)\n", 294 | " d.save_weights('discriminator', True)" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 22, 300 | "metadata": { 301 | "collapsed": true 302 | }, 303 | "outputs": [], 304 | "source": [ 305 | "def generate(BATCH_SIZE, nice= False ):\n", 306 | " #训练完模型后,可以运行该函数生成图片\n", 307 | " g = generator_model()\n", 308 | " g.compile(loss='binary_crossentropy', optimizer=\"SGD\")\n", 309 | " g.load_weights('generator')\n", 310 | " if nice:\n", 311 | " d = discriminator_model()\n", 312 | " d.compile(loss='binary_crossentropy', optimizer=\"SGD\")\n", 313 | " d.load_weights('discriminator')\n", 314 | " noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))\n", 315 | " generated_images = g.predict(noise, verbose=1)\n", 316 | " d_pret = d.predict(generated_images, verbose=1)\n", 317 | " index = np.arange(0, BATCH_SIZE*20)\n", 318 | " index.resize((BATCH_SIZE*20, 1))\n", 319 | " pre_with_index = list(np.append(d_pret, index, axis=1))\n", 320 | " pre_with_index.sort(key=lambda x: x[0], reverse=True)\n", 321 | " nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)\n", 322 | " nice_images = nice_images[:, :, :, None]\n", 323 | " for i in range(BATCH_SIZE):\n", 324 | " idx = int(pre_with_index[i][1])\n", 325 | " nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]\n", 326 | " image = combine_images(nice_images)\n", 327 | " else:\n", 328 | " noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))\n", 329 | " generated_images = g.predict(noise, verbose=0)\n", 330 | " image = combine_images(generated_images)\n", 331 | " image = image*127.5+127.5\n", 332 | " Image.fromarray(image.astype(np.uint8)).save(\n", 333 | " \"./GAN/generated_image.png\")" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 29, 339 | "metadata": { 340 | "collapsed": false 341 | }, 342 | "outputs": [ 343 | { 344 | "name": "stderr", 345 | "output_type": "stream", 346 | "text": [ 347 | "C:\\Users\\Horatio\\AppData\\Local\\conda\\conda\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:3: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(units=1024, input_dim=100)`\n", 348 | " This is separate from the ipykernel package so we can avoid doing imports until\n" 349 | ] 350 | }, 351 | { 352 | "name": "stdout", 353 | "output_type": "stream", 354 | "text": [ 355 | "128/132 [============================>.] - ETA: 0s\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b" 356 | ] 357 | } 358 | ], 359 | "source": [ 360 | "generate(132)" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": null, 366 | "metadata": { 367 | "collapsed": true 368 | }, 369 | "outputs": [], 370 | "source": [] 371 | } 372 | ], 373 | "metadata": { 374 | "kernelspec": { 375 | "display_name": "Python 3", 376 | "language": "python", 377 | "name": "python3" 378 | }, 379 | "language_info": { 380 | "codemirror_mode": { 381 | "name": "ipython", 382 | "version": 3 383 | }, 384 | "file_extension": ".py", 385 | "mimetype": "text/x-python", 386 | "name": "python", 387 | "nbconvert_exporter": "python", 388 | "pygments_lexer": "ipython3", 389 | "version": "3.5.3" 390 | } 391 | }, 392 | "nbformat": 4, 393 | "nbformat_minor": 2 394 | } 395 | -------------------------------------------------------------------------------- /Experiments/LSTM_PTB.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | " from ._conv import register_converters as _register_converters\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "#reader.py\n", 20 | "from __future__ import absolute_import\n", 21 | "from __future__ import division\n", 22 | "from __future__ import print_function\n", 23 | "\n", 24 | "import collections\n", 25 | "import os\n", 26 | "import sys\n", 27 | "\n", 28 | "import tensorflow as tf\n", 29 | "\n", 30 | "Py3 = sys.version_info[0] == 3\n", 31 | "\n", 32 | "def _read_words(filename):\n", 33 | " with tf.gfile.GFile(filename, \"r\") as f:\n", 34 | " if Py3:\n", 35 | " return f.read().replace(\"\\n\", \"\").split()\n", 36 | " else:\n", 37 | " return f.read().decode(\"utf-8\").replace(\"\\n\", \"\").split()\n", 38 | "\n", 39 | "\n", 40 | "def _build_vocab(filename):\n", 41 | " data = _read_words(filename)\n", 42 | "\n", 43 | " counter = collections.Counter(data)\n", 44 | " count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))\n", 45 | "\n", 46 | " words, _ = list(zip(*count_pairs))\n", 47 | " word_to_id = dict(zip(words, range(len(words))))\n", 48 | "\n", 49 | " return word_to_id\n", 50 | "\n", 51 | "\n", 52 | "def _file_to_word_ids(filename, word_to_id):\n", 53 | " data = _read_words(filename)\n", 54 | " return [word_to_id[word] for word in data if word in word_to_id]\n", 55 | "\n", 56 | "\n", 57 | "def ptb_raw_data(data_path=None):\n", 58 | " \"\"\"Load PTB raw data from data directory \"data_path\".\n", 59 | " Reads PTB text files, converts strings to integer ids,\n", 60 | " and performs mini-batching of the inputs.\n", 61 | " The PTB dataset comes from Tomas Mikolov's webpage:\n", 62 | " http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n", 63 | " Args:\n", 64 | " data_path: string path to the directory where simple-examples.tgz has\n", 65 | " been extracted.\n", 66 | " Returns:\n", 67 | " tuple (train_data, valid_data, test_data, vocabulary)\n", 68 | " where each of the data objects can be passed to PTBIterator.\n", 69 | " \"\"\"\n", 70 | "\n", 71 | " train_path = os.path.join(data_path, \"ptb.train.txt\")\n", 72 | " valid_path = os.path.join(data_path, \"ptb.valid.txt\")\n", 73 | " test_path = os.path.join(data_path, \"ptb.test.txt\")\n", 74 | "\n", 75 | " word_to_id = _build_vocab(train_path)\n", 76 | " train_data = _file_to_word_ids(train_path, word_to_id)\n", 77 | " valid_data = _file_to_word_ids(valid_path, word_to_id)\n", 78 | " test_data = _file_to_word_ids(test_path, word_to_id)\n", 79 | " vocabulary = len(word_to_id)\n", 80 | " return train_data, valid_data, test_data, vocabulary\n", 81 | "\n", 82 | "\n", 83 | "def ptb_producer(raw_data, batch_size, num_steps, name=None):\n", 84 | " \"\"\"Iterate on the raw PTB data.\n", 85 | " This chunks up raw_data into batches of examples and returns Tensors that\n", 86 | " are drawn from these batches.\n", 87 | " Args:\n", 88 | " raw_data: one of the raw data outputs from ptb_raw_data.\n", 89 | " batch_size: int, the batch size.\n", 90 | " num_steps: int, the number of unrolls.\n", 91 | " name: the name of this operation (optional).\n", 92 | " Returns:\n", 93 | " A pair of Tensors, each shaped [batch_size, num_steps]. The second element\n", 94 | " of the tuple is the same data time-shifted to the right by one.\n", 95 | " Raises:\n", 96 | " tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.\n", 97 | " \"\"\"\n", 98 | " with tf.name_scope(name, \"PTBProducer\", [raw_data, batch_size, num_steps]):\n", 99 | " raw_data = tf.convert_to_tensor(raw_data, name=\"raw_data\", dtype=tf.int32)\n", 100 | "\n", 101 | " data_len = tf.size(raw_data)\n", 102 | " batch_len = data_len // batch_size\n", 103 | " data = tf.reshape(raw_data[0 : batch_size * batch_len],\n", 104 | " [batch_size, batch_len])\n", 105 | "\n", 106 | " epoch_size = (batch_len - 1) // num_steps\n", 107 | " assertion = tf.assert_positive(\n", 108 | " epoch_size,\n", 109 | " message=\"epoch_size == 0, decrease batch_size or num_steps\")\n", 110 | " with tf.control_dependencies([assertion]):\n", 111 | " epoch_size = tf.identity(epoch_size, name=\"epoch_size\")\n", 112 | "\n", 113 | " i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()\n", 114 | " x = tf.strided_slice(data, [0, i * num_steps],\n", 115 | " [batch_size, (i + 1) * num_steps])\n", 116 | " x.set_shape([batch_size, num_steps])\n", 117 | " y = tf.strided_slice(data, [0, i * num_steps + 1],\n", 118 | " [batch_size, (i + 1) * num_steps + 1])\n", 119 | " y.set_shape([batch_size, num_steps])\n", 120 | " return x, y" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 2, 126 | "metadata": { 127 | "collapsed": true 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "import tensorflow as tf\n", 132 | "import numpy as np\n", 133 | "# 运行上面的reader.py\n", 134 | "\n", 135 | "data_path = './data/PTB-dataset-Tomas-Mikolov/data'\n", 136 | "# 隐藏层单元数与LSTM层级数\n", 137 | "hidden_size = 200\n", 138 | "num_layers = 2\n", 139 | "#词典规模\n", 140 | "vocab_size = 10000\n", 141 | "\n", 142 | "learning_rate = 1.0\n", 143 | "train_batch_size = 16\n", 144 | "# 训练数据截断长度\n", 145 | "train_num_step = 32\n", 146 | "\n", 147 | "# 在测试时不需要使用截断,测试数据为一个超长序列\n", 148 | "eval_batch_size = 1\n", 149 | "eval_num_step = 1\n", 150 | "num_epoch = 3\n", 151 | "#结点不被Dropout的概率\n", 152 | "keep_prob = 0.5\n", 153 | "\n", 154 | "# 用于控制梯度爆炸的参数\n", 155 | "max_grad_norm = 5" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 3, 161 | "metadata": { 162 | "collapsed": true 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "# 通过ptbmodel 的类描述模型\n", 167 | "class PTBModel(object):\n", 168 | " def __init__(self, is_training, batch_size, num_steps):\n", 169 | " # 记录使用的Batch大小和截断长度\n", 170 | " self.batch_size = batch_size\n", 171 | " self.num_steps = num_steps\n", 172 | "\n", 173 | " # 定义输入层,维度为批量大小×截断长度\n", 174 | " self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])\n", 175 | " # 定义预期输出\n", 176 | " self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])\n", 177 | "\n", 178 | " # 定义使用LSTM结构为循环体,带Dropout的深度RNN\n", 179 | " lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)\n", 180 | " if is_training:\n", 181 | " lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)\n", 182 | " cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)\n", 183 | "\n", 184 | " # 初始化状态为0\n", 185 | " self.initial_state = cell.zero_state(batch_size, tf.float32)\n", 186 | "\n", 187 | " # 将单词ID转换为单词向量,embedding的维度为vocab_size*hidden_size\n", 188 | " embedding = tf.get_variable('embedding', [vocab_size, hidden_size])\n", 189 | " # 将一个批量内的单词ID转化为词向量,转化后的输入维度为批量大小×截断长度×隐藏单元数\n", 190 | " inputs = tf.nn.embedding_lookup(embedding, self.input_data)\n", 191 | "\n", 192 | " # 只在训练时使用Dropout\n", 193 | " if is_training: inputs = tf.nn.dropout(inputs, keep_prob)\n", 194 | "\n", 195 | " # 定义输出列表,这里先将不同时刻LSTM的输出收集起来,再通过全连接层得到最终输出\n", 196 | " outputs = []\n", 197 | " # state 储存不同批量中LSTM的状态,初始为0\n", 198 | " state = self.initial_state\n", 199 | " with tf.variable_scope('RNN'):\n", 200 | " for time_step in range(num_steps):\n", 201 | " if time_step > 0: tf.get_variable_scope().reuse_variables()\n", 202 | " # 从输入数据获取当前时间步的输入与前一时间步的状态,并传入LSTM结构\n", 203 | " cell_output, state = cell(inputs[:, time_step, :], state)\n", 204 | " # 将当前输出加入输出队列\n", 205 | " outputs.append(cell_output)\n", 206 | "\n", 207 | " # 将输出队列展开成[batch,hidden*num_step]的形状,再reshape为[batch*num_step, hidden]\n", 208 | " output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size])\n", 209 | "\n", 210 | " # 将LSTM的输出传入全连接层以生成最后的预测结果。最后结果在每时刻上都是长度为vocab_size的张量\n", 211 | " # 且经过softmax层后表示下一个位置不同词的概率\n", 212 | " weight = tf.get_variable('weight', [hidden_size, vocab_size])\n", 213 | " bias = tf.get_variable('bias', [vocab_size])\n", 214 | " logits = tf.matmul(output, weight) + bias\n", 215 | "\n", 216 | " # 定义交叉熵损失函数,一个序列的交叉熵之和\n", 217 | " loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(\n", 218 | " [logits], # 预测的结果\n", 219 | " [tf.reshape(self.targets, [-1])], # 期望正确的结果,这里将[batch_size, num_steps]压缩为一维张量\n", 220 | " [tf.ones([batch_size * num_steps], dtype=tf.float32)]) # 损失的权重,所有为1表明不同批量和时刻的重要程度一样\n", 221 | "\n", 222 | " # 计算每个批量的平均损失\n", 223 | " self.cost = tf.reduce_sum(loss) / batch_size\n", 224 | " self.final_state = state\n", 225 | "\n", 226 | " # 只在训练模型时定义反向传播操作\n", 227 | " if not is_training: return\n", 228 | " trainable_variable = tf.trainable_variables()\n", 229 | "\n", 230 | " # 控制梯度爆炸问题\n", 231 | " grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variable), max_grad_norm)\n", 232 | " # 如果需要使用Adam作为优化器,可以改为tf.train.AdamOptimizer(learning_rate),学习率需要降低至0.001左右\n", 233 | " optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n", 234 | " # 定义训练步骤\n", 235 | " self.train_op = optimizer.apply_gradients(zip(grads, trainable_variable))" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 4, 241 | "metadata": { 242 | "collapsed": true 243 | }, 244 | "outputs": [], 245 | "source": [ 246 | "def run_epoch(session, model, data, train_op, output_log, epoch_size):\n", 247 | " total_costs = 0.0\n", 248 | " iters = 0\n", 249 | " state = session.run(model.initial_state)\n", 250 | "\n", 251 | " # # 使用当前数据训练或测试模型\n", 252 | " for step in range(epoch_size):\n", 253 | " x, y = session.run(data)\n", 254 | " # 在当前批量上运行train_op并计算损失值,交叉熵计算的是下一个单词为给定单词的概率\n", 255 | " cost, state, _ = session.run([model.cost, model.final_state, train_op],\n", 256 | " {model.input_data: x, model.targets: y, model.initial_state: state})\n", 257 | " # 将不同时刻和批量的概率就可得到困惑度的对数形式,将这个和做指数运算就可得到困惑度\n", 258 | " total_costs += cost\n", 259 | " iters += model.num_steps\n", 260 | "\n", 261 | " # 只在训练时输出日志\n", 262 | " if output_log and step % 100 == 0:\n", 263 | " print(\"After %d steps, perplexity is %.3f\" % (step, np.exp(total_costs / iters)))\n", 264 | " return np.exp(total_costs / iters)\n", 265 | " " 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 5, 271 | "metadata": { 272 | "collapsed": false 273 | }, 274 | "outputs": [ 275 | { 276 | "name": "stdout", 277 | "output_type": "stream", 278 | "text": [ 279 | "In iteration: 1\n", 280 | "After 0 steps, perplexity is 9997.503\n", 281 | "After 100 steps, perplexity is 1430.310\n", 282 | "After 200 steps, perplexity is 1019.812\n", 283 | "After 300 steps, perplexity is 884.877\n", 284 | "After 400 steps, perplexity is 770.536\n", 285 | "After 500 steps, perplexity is 685.862\n", 286 | "After 600 steps, perplexity is 627.773\n", 287 | "After 700 steps, perplexity is 584.603\n", 288 | "After 800 steps, perplexity is 550.645\n", 289 | "After 900 steps, perplexity is 522.333\n", 290 | "After 1000 steps, perplexity is 497.754\n", 291 | "After 1100 steps, perplexity is 477.317\n", 292 | "After 1200 steps, perplexity is 459.825\n", 293 | "After 1300 steps, perplexity is 446.264\n", 294 | "After 1400 steps, perplexity is 433.608\n", 295 | "After 1500 steps, perplexity is 423.149\n", 296 | "After 1600 steps, perplexity is 409.957\n", 297 | "After 1700 steps, perplexity is 400.390\n", 298 | "After 1800 steps, perplexity is 393.291\n", 299 | "Epoch: 1 Validation Perplexity: 239.055\n", 300 | "In iteration: 2\n", 301 | "After 0 steps, perplexity is 422.337\n", 302 | "After 100 steps, perplexity is 285.401\n", 303 | "After 200 steps, perplexity is 266.674\n", 304 | "After 300 steps, perplexity is 273.085\n", 305 | "After 400 steps, perplexity is 267.213\n", 306 | "After 500 steps, perplexity is 257.644\n", 307 | "After 600 steps, perplexity is 252.940\n", 308 | "After 700 steps, perplexity is 249.936\n", 309 | "After 800 steps, perplexity is 248.188\n", 310 | "After 900 steps, perplexity is 246.797\n", 311 | "After 1000 steps, perplexity is 244.862\n", 312 | "After 1100 steps, perplexity is 243.292\n", 313 | "After 1200 steps, perplexity is 241.627\n", 314 | "After 1300 steps, perplexity is 241.469\n", 315 | "After 1400 steps, perplexity is 240.662\n", 316 | "After 1500 steps, perplexity is 240.259\n", 317 | "After 1600 steps, perplexity is 237.447\n", 318 | "After 1700 steps, perplexity is 236.469\n", 319 | "After 1800 steps, perplexity is 236.575\n", 320 | "Epoch: 2 Validation Perplexity: 196.142\n", 321 | "In iteration: 3\n", 322 | "After 0 steps, perplexity is 353.527\n", 323 | "After 100 steps, perplexity is 236.278\n", 324 | "After 200 steps, perplexity is 221.385\n", 325 | "After 300 steps, perplexity is 228.476\n", 326 | "After 400 steps, perplexity is 224.843\n", 327 | "After 500 steps, perplexity is 217.486\n", 328 | "After 600 steps, perplexity is 214.905\n", 329 | "After 700 steps, perplexity is 213.104\n", 330 | "After 800 steps, perplexity is 212.414\n", 331 | "After 900 steps, perplexity is 211.959\n", 332 | "After 1000 steps, perplexity is 210.966\n", 333 | "After 1100 steps, perplexity is 210.475\n", 334 | "After 1200 steps, perplexity is 209.590\n", 335 | "After 1300 steps, perplexity is 200.107\n", 336 | "After 1400 steps, perplexity is 200.050\n", 337 | "After 1500 steps, perplexity is 193.237\n", 338 | "After 1600 steps, perplexity is 192.123\n", 339 | "After 1700 steps, perplexity is 180.731\n", 340 | "After 1800 steps, perplexity is 183.199\n", 341 | "Epoch: 3 Validation Perplexity: 169.009\n", 342 | "Test Perplexity: 142.681\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "def main():\n", 348 | " train_data, valid_data, test_data, _ = ptb_raw_data(data_path)\n", 349 | "\n", 350 | " # 计算一个epoch需要训练的次数\n", 351 | " train_data_len = len(train_data)\n", 352 | " train_batch_len = train_data_len // train_batch_size\n", 353 | " train_epoch_size = (train_batch_len - 1) // train_num_step\n", 354 | "\n", 355 | " valid_data_len = len(valid_data)\n", 356 | " valid_batch_len = valid_data_len // eval_batch_size\n", 357 | " valid_epoch_size = (valid_batch_len - 1) // eval_num_step\n", 358 | "\n", 359 | " test_data_len = len(test_data)\n", 360 | " test_batch_len = test_data_len // eval_batch_size\n", 361 | " test_epoch_size = (test_batch_len - 1) // eval_num_step\n", 362 | "\n", 363 | " initializer = tf.random_uniform_initializer(-0.05, 0.05)\n", 364 | " with tf.variable_scope(\"language_model\", reuse=None, initializer=initializer):\n", 365 | " train_model = PTBModel(True, train_batch_size, train_num_step)\n", 366 | "\n", 367 | " with tf.variable_scope(\"language_model\", reuse=True, initializer=initializer):\n", 368 | " eval_model = PTBModel(False, eval_batch_size, eval_num_step)\n", 369 | "\n", 370 | " # 训练模型。\n", 371 | " with tf.Session() as session:\n", 372 | " tf.global_variables_initializer().run()\n", 373 | "\n", 374 | " train_queue = ptb_producer(train_data, train_model.batch_size, train_model.num_steps)\n", 375 | " eval_queue = ptb_producer(valid_data, eval_model.batch_size, eval_model.num_steps)\n", 376 | " test_queue = ptb_producer(test_data, eval_model.batch_size, eval_model.num_steps)\n", 377 | "\n", 378 | " coord = tf.train.Coordinator()\n", 379 | " threads = tf.train.start_queue_runners(sess=session, coord=coord)\n", 380 | "\n", 381 | " for i in range(num_epoch):\n", 382 | " print(\"In iteration: %d\" % (i + 1))\n", 383 | " run_epoch(session, train_model, train_queue, train_model.train_op, True, train_epoch_size)\n", 384 | "\n", 385 | " valid_perplexity = run_epoch(session, eval_model, eval_queue, tf.no_op(), False, valid_epoch_size)\n", 386 | " print(\"Epoch: %d Validation Perplexity: %.3f\" % (i + 1, valid_perplexity))\n", 387 | "\n", 388 | " test_perplexity = run_epoch(session, eval_model, test_queue, tf.no_op(), False, test_epoch_size)\n", 389 | " print(\"Test Perplexity: %.3f\" % test_perplexity)\n", 390 | "\n", 391 | " coord.request_stop()\n", 392 | " coord.join(threads)\n", 393 | "\n", 394 | "if __name__ == \"__main__\":\n", 395 | " main()" 396 | ] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": null, 401 | "metadata": { 402 | "collapsed": true 403 | }, 404 | "outputs": [], 405 | "source": [] 406 | } 407 | ], 408 | "metadata": { 409 | "kernelspec": { 410 | "display_name": "Python 3", 411 | "language": "python", 412 | "name": "python3" 413 | }, 414 | "language_info": { 415 | "codemirror_mode": { 416 | "name": "ipython", 417 | "version": 3 418 | }, 419 | "file_extension": ".py", 420 | "mimetype": "text/x-python", 421 | "name": "python", 422 | "nbconvert_exporter": "python", 423 | "pygments_lexer": "ipython3", 424 | "version": "3.5.3" 425 | } 426 | }, 427 | "nbformat": 4, 428 | "nbformat_minor": 2 429 | } 430 | -------------------------------------------------------------------------------- /Experiments/Res/1.md: -------------------------------------------------------------------------------- 1 | all you need is here, Jupyter 2 | -------------------------------------------------------------------------------- /Experiments/Res/Synced.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/Synced.jpg -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras1.png -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras2.png -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras3.png -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras4.png -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras5.png -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras6.png -------------------------------------------------------------------------------- /Experiments/Res/gan_tf_keras7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras7.png -------------------------------------------------------------------------------- /Experiments/Synced.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Source Code:https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/Synced.py 3 | import tensorflow as tf 4 | import numpy as np 5 | import tensorflow.contrib.slim as slim 6 | import urllib.request 7 | 8 | OUTPUT_NODE = 36 9 | IMAGE_SIZE = 200 10 | NUM_CHANNELS = 1 11 | CONV1_SIZE = 2 12 | CONV2_SIZE = 3 13 | FC_SIZE = 512 14 | 15 | w = 0.44480515 16 | W = np.array([[57, 20.5, -19.33333206, -5.75, -7.20000076, -13.16666603], 17 | [2., 21.5, 7., -3.75, -8., -12.83333397], 18 | [2., 28., 7., -22., -9.20000076, -13.83333397], 19 | [88., 20.5, -19.33333206, -5.75, -8., -24.66666603], 20 | [67., 25., 6.66666794, -0.75, -10.60000038, -12.], 21 | [2., 26., 2.33333206, -1.5, -6.79999924, -9.83333397]]).astype(np.float32) 22 | 23 | 24 | def inference(input_tensor): 25 | with tf.variable_scope('layer1-conv1'): 26 | conv1_weights = tf.get_variable('weight', [CONV1_SIZE, CONV1_SIZE, 1, 1], 27 | initializer=tf.constant_initializer(W[0:2, 0:2])) 28 | conv1_biases = tf.get_variable('bias', [1], initializer=tf.constant_initializer(0.0)) 29 | conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') 30 | relu1 = tf.nn.sigmoid(tf.nn.bias_add(conv1, conv1_biases)) 31 | 32 | with tf.name_scope('layer2-pool1'): 33 | pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 34 | 35 | with tf.variable_scope('layer3-conv2'): 36 | conv2_weights = tf.get_variable('weight', [CONV2_SIZE, CONV2_SIZE, 1, 1], 37 | initializer=tf.constant_initializer(W[0:3, 0:3])) 38 | conv2_biases = tf.get_variable('bias', [1], initializer=tf.constant_initializer(0.0)) 39 | conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') 40 | relu2 = tf.nn.sigmoid(tf.nn.bias_add(conv2, conv2_biases)) 41 | 42 | with tf.name_scope('layer4-pool2'): 43 | pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') 44 | 45 | with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], activation_fn=tf.nn.sigmoid, stride=1, 46 | padding='SAME'): 47 | with tf.variable_scope('layer5-Inception_v3-Module'): 48 | with tf.variable_scope('Branch_0'): 49 | branch_0 = slim.conv2d(pool2, 1, [1, 1], 50 | weights_initializer=tf.constant_initializer(W[3:4, 3:4]), scope='Ince_0') 51 | with tf.variable_scope('Branch_1'): 52 | branch_1 = slim.conv2d(pool2, 1, [1, 1], 53 | weights_initializer=tf.constant_initializer(W[4:5, 4:5]), scope='Ince_1_1') 54 | branch_1 = tf.concat([slim.conv2d(branch_1, 32, [1, 3], 55 | weights_initializer=tf.constant_initializer(W[3:4, 1:4]), 56 | scope='Ince_1_2a'), 57 | slim.conv2d(branch_1, 32, [3, 1], 58 | weights_initializer=tf.constant_initializer(W[1:4, 3:4]), 59 | scope='Ince_1_2b')], 3) 60 | with tf.variable_scope('Branch_2'): 61 | branch_2 = slim.conv2d(pool2, 1, [1, 1], 62 | weights_initializer=tf.constant_initializer(W[4:5, 4:5]), scope='Ince_2_1') 63 | branch_2 = slim.conv2d(branch_2, 1, [3, 3], 64 | weights_initializer=tf.constant_initializer(W[0:3, 0:3]), scope='Ince_2_2') 65 | branch_2 = tf.concat([slim.conv2d(branch_2, 1, [1, 3], 66 | weights_initializer=tf.constant_initializer(W[0:1, 0:3]), 67 | scope='Ince_2_3a'), 68 | slim.conv2d(branch_2, 1, [3, 1], 69 | weights_initializer=tf.constant_initializer(W[0:3, 0:1]), 70 | scope='Ince_2_3b')], 3) 71 | with tf.variable_scope('Branch_3'): 72 | # branch_3 = slim.avg_pool2d(pool2, [3, 3],scope='Ince_3_1') 73 | branch_3 = slim.conv2d(pool2, 1, [1, 1], 74 | weights_initializer=tf.constant_initializer(W[4:5, 4:5]), scope='Ince_3_2') 75 | inception = tf.concat([branch_0, branch_1, branch_2, branch_3], 3) 76 | 77 | inception_shape = inception.get_shape().as_list() 78 | nodes = inception_shape[1] * inception_shape[2] * inception_shape[3] 79 | reshaped = tf.reshape(inception, [1, nodes]) 80 | 81 | with tf.variable_scope('layer6-fc1'): 82 | fc1_weights = tf.get_variable('weight', [nodes, FC_SIZE], 83 | initializer=tf.truncated_normal_initializer(stddev=3, seed=3), trainable=False) 84 | fc1_biases = tf.get_variable('bias', [FC_SIZE], initializer=tf.constant_initializer(-10.0)) 85 | fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases) 86 | 87 | with tf.variable_scope('layer7-fc2'): 88 | fc2_weights = tf.get_variable('weight', [FC_SIZE, OUTPUT_NODE], 89 | initializer=tf.constant_initializer(0.0001)) 90 | fc2_biases = tf.get_variable('bias', [OUTPUT_NODE], initializer=tf.constant_initializer(-11.0)) 91 | secret = tf.matmul(fc1, fc2_weights) + fc2_biases 92 | return secret 93 | 94 | 95 | def synced(image): 96 | img_data = tf.image.decode_jpeg(image) 97 | resized = tf.image.resize_images(img_data, [IMAGE_SIZE, IMAGE_SIZE], method=1) 98 | img_gray = tf.reshape(tf.image.rgb_to_grayscale(resized), [1, IMAGE_SIZE, IMAGE_SIZE, 1]) 99 | img_norm = tf.cast(img_gray / 128 - 1, dtype=tf.float32) 100 | 101 | y_hat = tf.reshape(inference(img_norm), [6, 6]) - w 102 | y_norm = tf.matmul(W + 30, y_hat + tf.cast(tf.diag([1, 2, 3, 4, 5, 6]), dtype=tf.float32)) 103 | y_int = tf.reshape(tf.cast(y_norm, dtype=tf.int16), [1, 36]) 104 | c = [] 105 | 106 | with tf.Session() as sess: 107 | sess.run(tf.global_variables_initializer()) 108 | y = sess.run(y_int) 109 | for i in range(OUTPUT_NODE): 110 | c.append(chr(abs(y[0][i]))) 111 | print("".join(c)) 112 | 113 | 114 | def main(argv=None): 115 | urllib.request.urlretrieve( 116 | 'https://image.jiqizhixin.com/uploads/editor/051635e7-a31d-44d8-a97e-b34da37ddbbc/82418Synced.jpg', 117 | 'Synced.jpg') 118 | 119 | # 本宝宝只对 Synced 图像感兴趣,其它图片一概不理~ 120 | img_raw = tf.gfile.FastGFile("./Synced.jpg", "rb").read() 121 | synced(img_raw) 122 | 123 | 124 | if __name__ == '__main__': 125 | tf.app.run() 126 | -------------------------------------------------------------------------------- /Experiments/pytorch_TCN.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"TCN.ipynb","version":"0.3.2","views":{},"default_view":{},"provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"w_1TesnEXIeI","colab_type":"text"},"cell_type":"markdown","source":["TCN原论文:An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling,https://arxiv.org/abs/1803.01271\n","官方实现地址:https://github.com/locuslab/TCN"]},{"metadata":{"id":"d0x7S8ufXIeJ","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0},"base_uri":"https://localhost:8080/","height":35},"outputId":"c28bfb8f-71b2-4f7c-dc47-05fae0a3dbf8","executionInfo":{"status":"ok","timestamp":1523528323720,"user_tz":-480,"elapsed":1665,"user":{"displayName":"Horatio J.S.Y","photoUrl":"//lh5.googleusercontent.com/-OyzfCRPtjb8/AAAAAAAAAAI/AAAAAAAAABE/e0LwcAUesbs/s50-c-k-no/photo.jpg","userId":"109157966837900544651"}}},"cell_type":"code","source":["!git clone https://github.com/HoratioJSY/tcn.git"],"execution_count":2,"outputs":[{"output_type":"stream","text":["fatal: destination path 'tcn' already exists and is not an empty directory.\r\n"],"name":"stdout"}]},{"metadata":{"id":"xiYn_TgxXIeM","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# http://pytorch.org/\n","from os import path\n","from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag\n","platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())\n","\n","accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'\n","\n","!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision\n","import torch"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8mY1bJNXXIeR","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["import os\n","import torch\n","from torch import nn\n","from torch.autograd import Variable\n","import pickle\n","from torch.nn.utils import weight_norm\n","import argparse\n","import time\n","import math\n","import torch.optim as optim"],"execution_count":0,"outputs":[]},{"metadata":{"id":"9i7QA4CxXIeT","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# utils.py,关于数据与词汇库等预定义的类与方法\n","\n","\"\"\"\n","Note: The meaning of batch_size in PTB is different from that in MNIST example. In MNIST,\n","batch_size is the # of sample data that is considered in each iteration; in PTB, however,\n","it is the number of segments to speed up computation. \n","The goal of PTB is to train a language model to predict the next word.\n","\"\"\"\n","\n","def data_generator(data_path):\n"," corpus = Corpus(data_path)\n"," pickle.dump(corpus, open(data_path + '/corpus', 'wb'))\n"," return corpus\n","\n","class Dictionary(object):\n"," def __init__(self):\n"," self.word2idx = {}\n"," self.idx2word = []\n","\n"," def add_word(self, word):\n"," if word not in self.word2idx:\n"," self.idx2word.append(word)\n"," self.word2idx[word] = len(self.idx2word) - 1\n"," return self.word2idx[word]\n","\n"," def __len__(self):\n"," return len(self.idx2word)\n","\n","class Corpus(object):\n"," def __init__(self, path):\n"," self.dictionary = Dictionary()\n"," self.train = self.tokenize(os.path.join(path, 'train.txt'))\n"," self.valid = self.tokenize(os.path.join(path, 'valid.txt'))\n"," self.test = self.tokenize(os.path.join(path, 'test.txt'))\n","\n"," def tokenize(self, path):\n"," \"\"\"Tokenizes a text file.\"\"\"\n"," assert os.path.exists(path)\n"," # Add words to the dictionary\n"," with open(path, 'r') as f:\n"," tokens = 0\n"," for line in f:\n"," words = line.split() + ['']\n"," tokens += len(words)\n"," for word in words:\n"," self.dictionary.add_word(word)\n","\n"," # Tokenize file content\n"," with open(path, 'r') as f:\n"," ids = torch.LongTensor(tokens)\n"," token = 0\n"," for line in f:\n"," words = line.split() + ['']\n"," for word in words:\n"," ids[token] = self.dictionary.word2idx[word]\n"," token += 1\n","\n"," return ids\n","\n","def batchify(data, batch_size, cuda):\n"," \"\"\"The output should have size [L x batch_size], where L could be a long sequence length\"\"\"\n"," # Work out how cleanly we can divide the dataset into batch_size parts (i.e. continuous seqs).\n"," nbatch = data.size(0) // batch_size\n"," # Trim off any extra elements that wouldn't cleanly fit (remainders).\n"," data = data.narrow(0, 0, nbatch * batch_size)\n"," # Evenly divide the data across the batch_size batches.\n"," data = data.view(batch_size, -1)\n"," if cuda:\n"," data = data.cuda()\n"," return data\n","\n","\n","def get_batch(source, i, seq_len, seq_le=None, evaluation=False):\n"," seq_le = min(seq_le if seq_le else seq_len, source.size(1) - 1 - i)\n"," data = Variable(source[:, i:i+seq_le], volatile=evaluation)\n"," target = Variable(source[:, i+1:i+1+seq_le]) # CAUTION: This is un-flattened!\n"," return data, target"],"execution_count":0,"outputs":[]},{"metadata":{"id":"F5WJz8jHXIeW","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# tcn.py\n","\n","# 定义实现因果卷积的类(继承自类nn.Module),其中super(Chomp1d, self).__init__()表示对继承自父类的属性进行初始化。\n","class Chomp1d(nn.Module):\n"," def __init__(self, chomp_size):\n"," super(Chomp1d, self).__init__()\n"," self.chomp_size = chomp_size\n","\n"," # 通过增加Padding的方式并对卷积后的张量做切片而实现因果卷积\n"," # tensor.contiguous()会返回有连续内存的相同张量\n"," def forward(self, x):\n"," return x[:, :, :-self.chomp_size].contiguous()\n","\n","\n","# 定义残差块,即两个一维卷积与恒等映射\n","class TemporalBlock(nn.Module):\n"," def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):\n"," super(TemporalBlock, self).__init__()\n","\n"," # 定义第一个空洞卷积层\n"," self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,\n"," stride=stride, padding=padding, dilation=dilation))\n"," # 根据第一个卷积层的输出与padding大小实现因果卷积\n"," self.chomp1 = Chomp1d(padding)\n"," # 添加激活函数与dropout正则化方法完成第一个卷积\n"," self.relu1 = nn.ReLU()\n"," self.dropout1 = nn.Dropout2d(dropout)\n","\n"," # 堆叠同样结构的第二个卷积层\n"," self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,\n"," stride=stride, padding=padding, dilation=dilation))\n"," self.chomp2 = Chomp1d(padding)\n"," self.relu2 = nn.ReLU()\n"," self.dropout2 = nn.Dropout2d(dropout)\n","\n"," # 将卷积模块的所有组建通过Sequential方法依次堆叠在一起\n"," self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,\n"," self.conv2, self.chomp2, self.relu2, self.dropout2)\n","\n"," # padding保证了输入序列与输出序列的长度相等,但卷积前的通道数与卷积后的通道数不一定一样。\n"," # 如果通道数不一样,那么需要对输入x做一个逐元素的一维卷积以使得它的纬度与前面两个卷积相等。\n"," self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None\n"," self.relu = nn.ReLU()\n"," self.init_weights()\n","\n"," # 初始化为从均值为0,标准差为0.01的正态分布中采样的随机值\n"," def init_weights(self):\n"," self.conv1.weight.data.normal_(0, 0.01)\n"," self.conv2.weight.data.normal_(0, 0.01)\n"," if self.downsample is not None:\n"," self.downsample.weight.data.normal_(0, 0.01)\n","\n"," # 结合卷积与输入的恒等映射(或输入的逐元素卷积),并投入ReLU 激活函数完成残差模块\n"," def forward(self, x):\n"," out = self.net(x)\n"," res = x if self.downsample is None else self.downsample(x)\n"," return self.relu(out + res)\n","\n","\n","# 定义时间卷积网络的架构\n","class TemporalConvNet(nn.Module):\n"," def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):\n"," super(TemporalConvNet, self).__init__()\n"," layers = []\n","\n"," # num_channels为各层卷积运算的输出通道数或卷积核数量,它的长度即需要执行的卷积层数量\n"," num_levels = len(num_channels)\n"," # 空洞卷积的扩张系数若随着网络层级的增加而成指数级增加,则可以增大感受野并不丢弃任何输入序列的元素\n"," # dilation_size根据层级数成指数增加,并从num_channels中抽取每一个残差模块的输入通道数与输出通道数\n"," for i in range(num_levels):\n"," dilation_size = 2 ** i\n"," in_channels = num_inputs if i == 0 else num_channels[i - 1]\n"," out_channels = num_channels[i]\n"," layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,\n"," padding=(kernel_size - 1) * dilation_size, dropout=dropout)]\n"," # 将所有残差模块堆叠起来组成一个深度卷积网络\n"," self.network = nn.Sequential(*layers)\n","\n"," def forward(self, x):\n"," return self.network(x)\n","\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"e_fhv7CeXIeZ","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# model.py\n","\n","class TCN(nn.Module):\n"," def __init__(self, input_size, output_size, num_channels,\n"," kernel_size=2, dropout=0.3, emb_dropout=0.1, tied_weights=False):\n"," super(TCN, self).__init__()\n","\n"," # 将一个批量的输入数据(one-hot encoding)送入编码器中成为一个批量的词嵌入向量\n"," # 其中output_size为词汇量,input_size为一个词向量的长度\n"," self.encoder = nn.Embedding(output_size, input_size)\n","\n"," # 构建网络\n"," self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)\n","\n"," # 定义最后线性变换的纬度,即最后一个卷积层的通道数(类似2D卷积中的特征图数)到所有词汇的映射\n"," self.decoder = nn.Linear(num_channels[-1], output_size)\n","\n"," # 是否共享编码器与解码器的权重,默认是共享。共享的话需要保持隐藏单元数等于词嵌入长度,这样预测的向量才可以视为词嵌入向量\n"," if tied_weights:\n"," if num_channels[-1] != input_size:\n"," raise ValueError('When using the tied flag, nhid must be equal to emsize')\n"," self.decoder.weight = self.encoder.weight\n"," print(\"Weight tied\")\n","\n"," # 对输入词嵌入执行Dropout 表示随机从句子中舍弃词,迫使模型不依赖于单个词完成任务\n"," self.drop = nn.Dropout(emb_dropout)\n"," self.emb_dropout = emb_dropout\n"," self.init_weights()\n","\n"," def init_weights(self):\n"," self.encoder.weight.data.normal_(0, 0.01)\n"," self.decoder.bias.data.fill_(0)\n"," self.decoder.weight.data.normal_(0, 0.01)\n","\n"," # 先编码,训练中再随机丢弃词,输入到网络实现推断,最后将推断结果解码为词\n"," def forward(self, input):\n"," \"\"\"Input ought to have dimension (N, C_in, L_in), where L_in is the seq_len; here the input is (N, L, C)\"\"\"\n"," emb = self.drop(self.encoder(input))\n"," y = self.tcn(emb.transpose(1, 2)).transpose(1, 2)\n"," y = self.decoder(y)\n"," return y.contiguous()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"LnWXKa0bXIeb","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0},"base_uri":"https://localhost:8080/","height":1367},"outputId":"aa46b8cf-6dea-462b-949a-0829b2b137d2","executionInfo":{"status":"ok","timestamp":1523528865489,"user_tz":-480,"elapsed":532292,"user":{"displayName":"Horatio J.S.Y","photoUrl":"//lh5.googleusercontent.com/-OyzfCRPtjb8/AAAAAAAAAAI/AAAAAAAAABE/e0LwcAUesbs/s50-c-k-no/photo.jpg","userId":"109157966837900544651"}}},"cell_type":"code","source":["#word_cnn_test.py\n","\n","cuda = True # 是否使用 CUDA\n","data_path = './tcn/data/penn' # 数据集地址\n","batch_size = 16 # 批量大小\n","nhid = 600 # 每层隐藏单元数\n","levels = 4 # 残差模块数\n","emsize = 600 # 词嵌入长度\n","k_size = 3 # 卷积核大小\n","dropout = 0.45 # 应用到网络层级中的随机失活率\n","emb_dropout = 0.25 # 应用到嵌入层的随机失活率\n","tied = True # 是否绑定编码器与解码器的权重\n","lr = 4 # 初始学习率\n","optimization ='SGD'\n","validseqlen = 40 # 验证序列长度\n","seq_len = 80 # 总序列长度\n","log_interval = 100 # 记录日志的间隔\n","clip = 0.35 # 梯度截断,-1表示不采用梯度截断\n","epochs =100 # 训练轮数的上限\n","\n","# Set the random seed manually for reproducibility.\n","torch.manual_seed(1111)\n","if torch.cuda.is_available():\n"," if not cuda:\n"," print(\"WARNING: You have a CUDA device, so you should probably run with --cuda\")\n","\n","corpus = data_generator(data_path)\n","eval_batch_size = 10\n","train_data = batchify(corpus.train, batch_size, cuda)\n","val_data = batchify(corpus.valid, eval_batch_size, cuda)\n","test_data = batchify(corpus.test, eval_batch_size, cuda)\n","\n","\n","n_words = len(corpus.dictionary)\n","num_chans = [nhid] * (levels - 1) + [emsize]\n","\n","model = TCN(emsize, n_words, num_chans, dropout=dropout, emb_dropout=emb_dropout, kernel_size=k_size, tied_weights=tied)\n","\n","if cuda:\n"," model.cuda()\n","\n","# May use adaptive softmax to speed up training\n","criterion = nn.CrossEntropyLoss()\n","\n","\n","optimizer = getattr(optim, optimization)(model.parameters(), lr=lr)\n","\n","\n","def evaluate(data_source):\n"," model.eval()\n"," total_loss = 0\n"," processed_data_size = 0\n"," for i in range(0, data_source.size(1) - 1, validseqlen):\n"," if i + seq_len - validseqlen >= data_source.size(1) - 1:\n"," continue\n"," data, targets = get_batch(data_source, i, seq_len, evaluation=True)\n"," output = model(data)\n","\n"," # Discard the effective history, just like in training\n"," eff_history = seq_len - validseqlen\n"," final_output = output[:, eff_history:].contiguous().view(-1, n_words)\n"," final_target = targets[:, eff_history:].contiguous().view(-1)\n","\n"," loss = criterion(final_output, final_target)\n","\n"," # Note that we don't add TAR loss here\n"," total_loss += (data.size(1) - eff_history) * loss.data\n"," processed_data_size += data.size(1) - eff_history\n"," return total_loss[0] / processed_data_size\n","\n","\n","def train():\n"," # Turn on training mode which enables dropout.\n"," global train_data\n"," model.train()\n"," total_loss = 0\n"," start_time = time.time()\n"," for batch_idx, i in enumerate(range(0, train_data.size(1) - 1, validseqlen)):\n"," if i + seq_len - validseqlen >= train_data.size(1) - 1:\n"," continue\n"," data, targets = get_batch(train_data, i, seq_len)\n"," optimizer.zero_grad()\n"," output = model(data)\n","\n"," # Discard the effective history part\n"," eff_history = seq_len - validseqlen\n"," if eff_history < 0:\n"," raise ValueError(\"Valid sequence length must be smaller than sequence length!\")\n"," final_target = targets[:, eff_history:].contiguous().view(-1)\n"," final_output = output[:, eff_history:].contiguous().view(-1, n_words)\n"," loss = criterion(final_output, final_target)\n","\n"," loss.backward()\n"," if clip > 0:\n"," torch.nn.utils.clip_grad_norm(model.parameters(), clip)\n"," optimizer.step()\n","\n"," total_loss += loss.data\n","\n"," if batch_idx % log_interval == 0 and batch_idx > 0:\n"," cur_loss = total_loss[0] / log_interval\n"," elapsed = time.time() - start_time\n"," print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.5f} | ms/batch {:5.5f} | '\n"," 'loss {:5.2f} | ppl {:8.2f}'.format(\n"," epoch, batch_idx, train_data.size(1) // validseqlen, lr,\n"," elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss)))\n"," total_loss = 0\n"," start_time = time.time()\n","\n","\n","if __name__ == \"__main__\":\n"," best_vloss = 1e8\n","\n"," # At any point you can hit Ctrl + C to break out of training early.\n"," try:\n"," all_vloss = []\n"," for epoch in range(1, epochs+1):\n"," epoch_start_time = time.time()\n"," train()\n"," val_loss = evaluate(val_data)\n"," test_loss = evaluate(test_data)\n","\n"," print('-' * 89)\n"," print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '\n"," 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),\n"," val_loss, math.exp(val_loss)))\n"," print('| end of epoch {:3d} | time: {:5.2f}s | test loss {:5.2f} | '\n"," 'test ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),\n"," test_loss, math.exp(test_loss)))\n"," print('-' * 89)\n","\n"," # Save the model if the validation loss is the best we've seen so far.\n"," if val_loss < best_vloss:\n"," with open(\"model.pt\", 'wb') as f:\n"," print('Save model!\\n')\n"," torch.save(model, f)\n"," best_vloss = val_loss\n","\n"," # Anneal the learning rate if the validation loss plateaus\n"," if epoch > 5 and val_loss >= max(all_vloss[-5:]):\n"," lr = lr / 2.\n"," for param_group in optimizer.param_groups:\n"," param_group['lr'] = lr\n"," all_vloss.append(val_loss)\n","\n"," except KeyboardInterrupt:\n"," print('-' * 89)\n"," print('Exiting from training early')\n","\n"," # Load the best saved model.\n"," with open(\"model.pt\", 'rb') as f:\n"," model = torch.load(f)\n","\n"," # Run on test data.\n"," test_loss = evaluate(test_data)\n"," print('=' * 89)\n"," print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(\n"," test_loss, math.exp(test_loss)))\n"," print('=' * 89)"],"execution_count":8,"outputs":[{"output_type":"stream","text":["Weight tied\n","| epoch 1 | 100/ 1452 batches | lr 4.00000 | ms/batch 118.81662 | loss 7.52 | ppl 1840.81\n","| epoch 1 | 200/ 1452 batches | lr 4.00000 | ms/batch 109.73478 | loss 6.81 | ppl 910.24\n","| epoch 1 | 300/ 1452 batches | lr 4.00000 | ms/batch 109.62409 | loss 6.58 | ppl 717.81\n","| epoch 1 | 400/ 1452 batches | lr 4.00000 | ms/batch 109.41673 | loss 6.37 | ppl 582.09\n","| epoch 1 | 500/ 1452 batches | lr 4.00000 | ms/batch 109.33626 | loss 6.23 | ppl 506.72\n","| epoch 1 | 600/ 1452 batches | lr 4.00000 | ms/batch 109.01555 | loss 6.21 | ppl 498.33\n","| epoch 1 | 700/ 1452 batches | lr 4.00000 | ms/batch 108.83427 | loss 6.12 | ppl 452.68\n","| epoch 1 | 800/ 1452 batches | lr 4.00000 | ms/batch 108.99423 | loss 6.01 | ppl 409.11\n","| epoch 1 | 900/ 1452 batches | lr 4.00000 | ms/batch 108.66103 | loss 5.98 | ppl 396.50\n","| epoch 1 | 1000/ 1452 batches | lr 4.00000 | ms/batch 108.71295 | loss 5.93 | ppl 376.19\n","| epoch 1 | 1100/ 1452 batches | lr 4.00000 | ms/batch 108.76589 | loss 5.90 | ppl 363.97\n","| epoch 1 | 1200/ 1452 batches | lr 4.00000 | ms/batch 108.65258 | loss 5.88 | ppl 357.96\n","| epoch 1 | 1300/ 1452 batches | lr 4.00000 | ms/batch 108.92401 | loss 5.76 | ppl 318.15\n","| epoch 1 | 1400/ 1452 batches | lr 4.00000 | ms/batch 108.80318 | loss 5.74 | ppl 312.15\n","-----------------------------------------------------------------------------------------\n","| end of epoch 1 | time: 168.27s | valid loss 5.67 | valid ppl 289.70\n","| end of epoch 1 | time: 168.27s | test loss 5.64 | test ppl 280.59\n","-----------------------------------------------------------------------------------------\n","Save model!\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type TCN. It won't be checked for correctness upon loading.\n"," \"type \" + obj.__name__ + \". It won't be checked \"\n","/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type TemporalConvNet. It won't be checked for correctness upon loading.\n"," \"type \" + obj.__name__ + \". It won't be checked \"\n","/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type TemporalBlock. It won't be checked for correctness upon loading.\n"," \"type \" + obj.__name__ + \". It won't be checked \"\n","/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type Chomp1d. It won't be checked for correctness upon loading.\n"," \"type \" + obj.__name__ + \". It won't be checked \"\n"],"name":"stderr"},{"output_type":"stream","text":["| epoch 2 | 100/ 1452 batches | lr 4.00000 | ms/batch 109.80678 | loss 5.82 | ppl 336.42\n","| epoch 2 | 200/ 1452 batches | lr 4.00000 | ms/batch 108.55440 | loss 5.70 | ppl 298.02\n","| epoch 2 | 300/ 1452 batches | lr 4.00000 | ms/batch 108.44461 | loss 5.67 | ppl 290.36\n","| epoch 2 | 400/ 1452 batches | lr 4.00000 | ms/batch 108.52798 | loss 5.55 | ppl 258.41\n","| epoch 2 | 500/ 1452 batches | lr 4.00000 | ms/batch 108.56836 | loss 5.53 | ppl 251.28\n","| epoch 2 | 600/ 1452 batches | lr 4.00000 | ms/batch 108.47103 | loss 5.58 | ppl 265.04\n","| epoch 2 | 700/ 1452 batches | lr 4.00000 | ms/batch 108.35539 | loss 5.55 | ppl 257.78\n","| epoch 2 | 800/ 1452 batches | lr 4.00000 | ms/batch 108.45743 | loss 5.51 | ppl 246.27\n","| epoch 2 | 900/ 1452 batches | lr 4.00000 | ms/batch 108.37481 | loss 5.51 | ppl 246.76\n","| epoch 2 | 1000/ 1452 batches | lr 4.00000 | ms/batch 108.62212 | loss 5.48 | ppl 240.42\n","| epoch 2 | 1100/ 1452 batches | lr 4.00000 | ms/batch 108.54413 | loss 5.50 | ppl 245.82\n","| epoch 2 | 1200/ 1452 batches | lr 4.00000 | ms/batch 108.48435 | loss 5.51 | ppl 246.79\n","| epoch 2 | 1300/ 1452 batches | lr 4.00000 | ms/batch 108.49503 | loss 5.39 | ppl 218.57\n","| epoch 2 | 1400/ 1452 batches | lr 4.00000 | ms/batch 108.47983 | loss 5.42 | ppl 226.72\n","-----------------------------------------------------------------------------------------\n","| end of epoch 2 | time: 166.66s | valid loss 5.39 | valid ppl 218.32\n","| end of epoch 2 | time: 166.66s | test loss 5.34 | test ppl 209.52\n","-----------------------------------------------------------------------------------------\n","Save model!\n","\n","| epoch 3 | 100/ 1452 batches | lr 4.00000 | ms/batch 109.54227 | loss 5.51 | ppl 247.27\n","| epoch 3 | 200/ 1452 batches | lr 4.00000 | ms/batch 108.50945 | loss 5.42 | ppl 226.21\n","| epoch 3 | 300/ 1452 batches | lr 4.00000 | ms/batch 108.29926 | loss 5.40 | ppl 222.48\n","| epoch 3 | 400/ 1452 batches | lr 4.00000 | ms/batch 108.35232 | loss 5.29 | ppl 197.79\n","| epoch 3 | 500/ 1452 batches | lr 4.00000 | ms/batch 108.38521 | loss 5.28 | ppl 196.93\n","| epoch 3 | 600/ 1452 batches | lr 4.00000 | ms/batch 108.17204 | loss 5.34 | ppl 208.93\n","| epoch 3 | 700/ 1452 batches | lr 4.00000 | ms/batch 108.27651 | loss 5.33 | ppl 205.63\n","| epoch 3 | 800/ 1452 batches | lr 4.00000 | ms/batch 108.37309 | loss 5.29 | ppl 198.83\n","| epoch 3 | 900/ 1452 batches | lr 4.00000 | ms/batch 108.25190 | loss 5.30 | ppl 199.43\n","| epoch 3 | 1000/ 1452 batches | lr 4.00000 | ms/batch 108.25034 | loss 5.28 | ppl 196.01\n","| epoch 3 | 1100/ 1452 batches | lr 4.00000 | ms/batch 108.43868 | loss 5.32 | ppl 204.39\n","| epoch 3 | 1200/ 1452 batches | lr 4.00000 | ms/batch 108.42956 | loss 5.32 | ppl 204.21\n","| epoch 3 | 1300/ 1452 batches | lr 4.00000 | ms/batch 108.51916 | loss 5.19 | ppl 179.70\n","| epoch 3 | 1400/ 1452 batches | lr 4.00000 | ms/batch 108.60412 | loss 5.25 | ppl 189.96\n","-----------------------------------------------------------------------------------------\n","| end of epoch 3 | time: 166.44s | valid loss 5.22 | valid ppl 185.76\n","| end of epoch 3 | time: 166.44s | test loss 5.18 | test ppl 177.27\n","-----------------------------------------------------------------------------------------\n","Save model!\n","\n","| epoch 4 | 100/ 1452 batches | lr 4.00000 | ms/batch 109.56948 | loss 5.35 | ppl 209.77\n","-----------------------------------------------------------------------------------------\n","Exiting from training early\n","=========================================================================================\n","| End of training | test loss 5.18 | test ppl 177.27\n","=========================================================================================\n"],"name":"stdout"}]}]} -------------------------------------------------------------------------------- /Experiments/swish_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 15 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 16 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 17 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n", 18 | "After 0 training step(s), validation accuracy using average model is 0.0592 \n", 19 | "After 1000 training step(s), validation accuracy using average model is 0.9758 \n", 20 | "After 2000 training step(s), validation accuracy using average model is 0.981 \n", 21 | "After 3000 training step(s), validation accuracy using average model is 0.9808 \n", 22 | "After 4000 training step(s), validation accuracy using average model is 0.9808 \n", 23 | "After 5000 training step(s), validation accuracy using average model is 0.9826 \n", 24 | "After 6000 training step(s), validation accuracy using average model is 0.9816 \n", 25 | "After 7000 training step(s), validation accuracy using average model is 0.9818 \n", 26 | "After 8000 training step(s), validation accuracy using average model is 0.9826 \n", 27 | "After 9000 training step(s), validation accuracy using average model is 0.9822 \n", 28 | "After 10000 training step(s), test accuracy using average model is 0.9829\n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import tensorflow as tf\n", 34 | "from tensorflow.examples.tutorials.mnist import input_data\n", 35 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 36 | "\n", 37 | "\n", 38 | "INPUT_NODE = 784 \n", 39 | "OUTPUT_NODE = 10 \n", 40 | "LAYER1_NODE = 500 \n", 41 | " \n", 42 | "BATCH_SIZE = 100 \n", 43 | "\n", 44 | "# 模型相关的参数\n", 45 | "LEARNING_RATE_BASE = 0.8 \n", 46 | "LEARNING_RATE_DECAY = 0.99 \n", 47 | "REGULARAZTION_RATE = 0.0001 \n", 48 | "TRAINING_STEPS = 10000 \n", 49 | "MOVING_AVERAGE_DECAY = 0.99 \n", 50 | "\n", 51 | "def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):\n", 52 | " # 不使用滑动平均类\n", 53 | " if avg_class == None:\n", 54 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)\n", 55 | " return tf.matmul(layer1, weights2) + biases2\n", 56 | "\n", 57 | " else:\n", 58 | " \n", 59 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))\n", 60 | " return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2) \n", 61 | " \n", 62 | "def train(mnist):\n", 63 | " x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n", 64 | " y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n", 65 | " # 生成隐藏层的参数。\n", 66 | " weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n", 67 | " biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n", 68 | " # 生成输出层的参数。\n", 69 | " weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))\n", 70 | " biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n", 71 | "\n", 72 | " # 计算不含滑动平均类的前向传播结果\n", 73 | " y = inference(x, None, weights1, biases1, weights2, biases2)\n", 74 | " \n", 75 | " # 定义训练轮数及相关的滑动平均类 \n", 76 | " global_step = tf.Variable(0, trainable=False)\n", 77 | " variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n", 78 | " variables_averages_op = variable_averages.apply(tf.trainable_variables())\n", 79 | " average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)\n", 80 | " \n", 81 | " # 计算交叉熵及其平均值\n", 82 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n", 83 | " cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", 84 | " \n", 85 | " # 损失函数的计算\n", 86 | " regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n", 87 | " regularaztion = regularizer(weights1) + regularizer(weights2)\n", 88 | " loss = cross_entropy_mean + regularaztion\n", 89 | " \n", 90 | " # 设置指数衰减的学习率。\n", 91 | " learning_rate = tf.train.exponential_decay(\n", 92 | " LEARNING_RATE_BASE,\n", 93 | " global_step,\n", 94 | " mnist.train.num_examples / BATCH_SIZE,\n", 95 | " LEARNING_RATE_DECAY,\n", 96 | " staircase=True)\n", 97 | " \n", 98 | " # 优化损失函数\n", 99 | " train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n", 100 | " \n", 101 | " # 反向传播更新参数和更新每一个参数的滑动平均值\n", 102 | " with tf.control_dependencies([train_step, variables_averages_op]):\n", 103 | " train_op = tf.no_op(name='train')\n", 104 | "\n", 105 | " # 计算正确率\n", 106 | " correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n", 107 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 108 | " \n", 109 | " # 初始化会话并开始训练过程。\n", 110 | " with tf.Session() as sess:\n", 111 | " tf.global_variables_initializer().run()\n", 112 | " validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n", 113 | " test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n", 114 | " \n", 115 | " # 循环的训练神经网络。\n", 116 | " for i in range(TRAINING_STEPS):\n", 117 | " if i % 1000 == 0:\n", 118 | " validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n", 119 | " print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n", 120 | " \n", 121 | " xs,ys=mnist.train.next_batch(BATCH_SIZE)\n", 122 | " sess.run(train_op,feed_dict={x:xs,y_:ys})\n", 123 | "\n", 124 | " test_acc=sess.run(accuracy,feed_dict=test_feed)\n", 125 | " print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n", 126 | "\n", 127 | "train(mnist)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 9, 133 | "metadata": { 134 | "collapsed": false 135 | }, 136 | "outputs": [ 137 | { 138 | "name": "stdout", 139 | "output_type": "stream", 140 | "text": [ 141 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 142 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 143 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 144 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n", 145 | "After 0 training step(s), validation accuracy using average model is 0.0932 \n", 146 | "After 1000 training step(s), validation accuracy using average model is 0.9466 \n", 147 | "After 2000 training step(s), validation accuracy using average model is 0.962 \n", 148 | "After 3000 training step(s), validation accuracy using average model is 0.9662 \n", 149 | "After 4000 training step(s), validation accuracy using average model is 0.967 \n", 150 | "After 5000 training step(s), validation accuracy using average model is 0.9696 \n", 151 | "After 6000 training step(s), validation accuracy using average model is 0.9686 \n", 152 | "After 7000 training step(s), validation accuracy using average model is 0.9698 \n", 153 | "After 8000 training step(s), validation accuracy using average model is 0.97 \n", 154 | "After 9000 training step(s), validation accuracy using average model is 0.97 \n", 155 | "After 10000 training step(s), test accuracy using average model is 0.9666\n" 156 | ] 157 | } 158 | ], 159 | "source": [ 160 | "import tensorflow as tf\n", 161 | "from tensorflow.examples.tutorials.mnist import input_data\n", 162 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 163 | "\n", 164 | "\n", 165 | "INPUT_NODE = 784 \n", 166 | "OUTPUT_NODE = 10 \n", 167 | "LAYER1_NODE = 500 \n", 168 | "LAYER2_NODE = 500 \n", 169 | "LAYER3_NODE = 500 \n", 170 | "LAYER4_NODE = 500 \n", 171 | "LAYER5_NODE = 500 \n", 172 | "LAYER6_NODE = 500 \n", 173 | "LAYER7_NODE = 500 \n", 174 | "LAYER8_NODE = 300 \n", 175 | "LAYER9_NODE = 200\n", 176 | "LAYER10_NODE = 100 \n", 177 | " \n", 178 | "BATCH_SIZE = 100 \n", 179 | "\n", 180 | "# 模型相关的参数\n", 181 | "LEARNING_RATE_BASE = 0.008 \n", 182 | "LEARNING_RATE_DECAY = 0.99 \n", 183 | "REGULARAZTION_RATE = 0.0001 \n", 184 | "TRAINING_STEPS = 10000 \n", 185 | "MOVING_AVERAGE_DECAY = 0.99 \n", 186 | "\n", 187 | "def inference(input_tensor, avg_class, W, B):\n", 188 | " # 不使用滑动平均类\n", 189 | " if avg_class == None:\n", 190 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, W[0]) + B[0])\n", 191 | " layer2 = tf.nn.relu(tf.matmul(layer1, W[1]) + B[1])\n", 192 | " layer3 = tf.nn.relu(tf.matmul(layer2, W[2]) + B[2])\n", 193 | " layer4 = tf.nn.relu(tf.matmul(layer3, W[3]) + B[3])\n", 194 | " layer5 = tf.nn.relu(tf.matmul(layer4, W[4]) + B[4])\n", 195 | " layer6 = tf.nn.relu(tf.matmul(layer5, W[5]) + B[5])\n", 196 | " layer7 = tf.nn.relu(tf.matmul(layer6, W[6]) + B[6])\n", 197 | " layer8 = tf.nn.relu(tf.matmul(layer7, W[7]) + B[7])\n", 198 | " layer9 = tf.nn.relu(tf.matmul(layer8, W[8]) + B[8])\n", 199 | " layer10 = tf.nn.relu(tf.matmul(layer9, W[9]) + B[9])\n", 200 | " return tf.matmul(layer10, W[10]) + B[10]\n", 201 | " \n", 202 | " else:\n", 203 | " \n", 204 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(W[0])) + avg_class.average(B[0]))\n", 205 | " layer2 = tf.nn.relu(tf.matmul(layer1, avg_class.average(W[1])) + avg_class.average(B[1]))\n", 206 | " layer3 = tf.nn.relu(tf.matmul(layer2, avg_class.average(W[2])) + avg_class.average(B[2]))\n", 207 | " layer4 = tf.nn.relu(tf.matmul(layer3, avg_class.average(W[3])) + avg_class.average(B[3]))\n", 208 | " layer5 = tf.nn.relu(tf.matmul(layer4, avg_class.average(W[4])) + avg_class.average(B[4]))\n", 209 | " layer6 = tf.nn.relu(tf.matmul(layer5, avg_class.average(W[5])) + avg_class.average(B[5]))\n", 210 | " layer7 = tf.nn.relu(tf.matmul(layer6, avg_class.average(W[6])) + avg_class.average(B[6]))\n", 211 | " layer8 = tf.nn.relu(tf.matmul(layer7, avg_class.average(W[7])) + avg_class.average(B[7]))\n", 212 | " layer9 = tf.nn.relu(tf.matmul(layer8, avg_class.average(W[8])) + avg_class.average(B[8]))\n", 213 | " layer10 = tf.nn.relu(tf.matmul(layer9, avg_class.average(W[9])) + avg_class.average(B[9]))\n", 214 | " return tf.matmul(layer10, avg_class.average(W[10])) + avg_class.average(B[10]) \n", 215 | " \n", 216 | "def train(mnist):\n", 217 | " x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n", 218 | " y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n", 219 | " \n", 220 | " weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n", 221 | " biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n", 222 | " \n", 223 | " weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE], stddev=0.1))\n", 224 | " biases2 = tf.Variable(tf.constant(0.1, shape=[ LAYER2_NODE]))\n", 225 | " \n", 226 | " weights3 = tf.Variable(tf.truncated_normal([ LAYER2_NODE, LAYER3_NODE], stddev=0.1))\n", 227 | " biases3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))\n", 228 | " \n", 229 | " weights4 = tf.Variable(tf.truncated_normal([LAYER3_NODE, LAYER4_NODE], stddev=0.1))\n", 230 | " biases4 = tf.Variable(tf.constant(0.1, shape=[LAYER4_NODE]))\n", 231 | " \n", 232 | " weights5 = tf.Variable(tf.truncated_normal([LAYER4_NODE, LAYER5_NODE], stddev=0.1))\n", 233 | " biases5 = tf.Variable(tf.constant(0.1, shape=[LAYER5_NODE]))\n", 234 | " \n", 235 | " weights6 = tf.Variable(tf.truncated_normal([LAYER5_NODE, LAYER6_NODE], stddev=0.1))\n", 236 | " biases6 = tf.Variable(tf.constant(0.1, shape=[LAYER6_NODE]))\n", 237 | " \n", 238 | " weights7 = tf.Variable(tf.truncated_normal([LAYER6_NODE, LAYER7_NODE], stddev=0.1))\n", 239 | " biases7 = tf.Variable(tf.constant(0.1, shape=[LAYER7_NODE]))\n", 240 | " \n", 241 | " weights8 = tf.Variable(tf.truncated_normal([LAYER7_NODE, LAYER8_NODE], stddev=0.1))\n", 242 | " biases8 = tf.Variable(tf.constant(0.1, shape=[LAYER8_NODE]))\n", 243 | " \n", 244 | " weights9 = tf.Variable(tf.truncated_normal([LAYER8_NODE, LAYER9_NODE], stddev=0.1))\n", 245 | " biases9 = tf.Variable(tf.constant(0.1, shape=[LAYER9_NODE]))\n", 246 | " \n", 247 | " weights10 = tf.Variable(tf.truncated_normal([LAYER9_NODE, LAYER10_NODE], stddev=0.1))\n", 248 | " biases10 = tf.Variable(tf.constant(0.1, shape=[LAYER10_NODE]))\n", 249 | " \n", 250 | " weights11 = tf.Variable(tf.truncated_normal([LAYER10_NODE, OUTPUT_NODE], stddev=0.1))\n", 251 | " biases11 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n", 252 | " \n", 253 | " W=[weights1, weights2, weights3, weights4, weights5, weights6, weights7, weights8, weights9, weights10, weights11]\n", 254 | " B=[biases1, biases2, biases3, biases4, biases5, biases6, biases7, biases8, biases9, biases10, biases11]\n", 255 | " \n", 256 | " # 计算不含滑动平均类的前向传播结果\n", 257 | " y = inference(x, None, W, B)\n", 258 | " \n", 259 | " # 定义训练轮数及相关的滑动平均类 \n", 260 | " global_step = tf.Variable(0, trainable=False)\n", 261 | " variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n", 262 | " variables_averages_op = variable_averages.apply(tf.trainable_variables())\n", 263 | " average_y = inference(x, variable_averages, W, B)\n", 264 | " \n", 265 | " # 计算交叉熵及其平均值\n", 266 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n", 267 | " cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", 268 | " \n", 269 | " # 损失函数的计算\n", 270 | " regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n", 271 | " regularaztion = regularizer(W[0]) \n", 272 | " for i in range(1,11):\n", 273 | " regularazation=regularaztion + regularizer(W[i]) \n", 274 | " loss = cross_entropy_mean + regularaztion\n", 275 | " \n", 276 | " # 设置指数衰减的学习率。\n", 277 | " learning_rate = tf.train.exponential_decay(\n", 278 | " LEARNING_RATE_BASE,\n", 279 | " global_step,\n", 280 | " mnist.train.num_examples / BATCH_SIZE,\n", 281 | " LEARNING_RATE_DECAY,\n", 282 | " staircase=True)\n", 283 | " \n", 284 | " # 优化损失函数\n", 285 | " train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n", 286 | " \n", 287 | " # 反向传播更新参数和更新每一个参数的滑动平均值\n", 288 | " with tf.control_dependencies([train_step, variables_averages_op]):\n", 289 | " train_op = tf.no_op(name='train')\n", 290 | "\n", 291 | " # 计算正确率\n", 292 | " correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n", 293 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 294 | " \n", 295 | " # 初始化会话并开始训练过程。\n", 296 | " with tf.Session() as sess:\n", 297 | " tf.global_variables_initializer().run()\n", 298 | " validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n", 299 | " test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n", 300 | " \n", 301 | " # 循环的训练神经网络。\n", 302 | " for i in range(TRAINING_STEPS):\n", 303 | " if i % 1000 == 0:\n", 304 | " validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n", 305 | " print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n", 306 | " \n", 307 | " xs,ys=mnist.train.next_batch(BATCH_SIZE)\n", 308 | " sess.run(train_op,feed_dict={x:xs,y_:ys})\n", 309 | "\n", 310 | " test_acc=sess.run(accuracy,feed_dict=test_feed)\n", 311 | " print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n", 312 | "\n", 313 | "train(mnist)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": 13, 319 | "metadata": { 320 | "collapsed": false 321 | }, 322 | "outputs": [ 323 | { 324 | "name": "stdout", 325 | "output_type": "stream", 326 | "text": [ 327 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 328 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 329 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 330 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n", 331 | "After 0 training step(s), validation accuracy using average model is 0.0902 \n", 332 | "After 1000 training step(s), validation accuracy using average model is 0.9516 \n", 333 | "After 2000 training step(s), validation accuracy using average model is 0.9614 \n", 334 | "After 3000 training step(s), validation accuracy using average model is 0.9668 \n", 335 | "After 4000 training step(s), validation accuracy using average model is 0.9688 \n", 336 | "After 5000 training step(s), validation accuracy using average model is 0.971 \n", 337 | "After 6000 training step(s), validation accuracy using average model is 0.9742 \n", 338 | "After 7000 training step(s), validation accuracy using average model is 0.9752 \n", 339 | "After 8000 training step(s), validation accuracy using average model is 0.9738 \n", 340 | "After 9000 training step(s), validation accuracy using average model is 0.975 \n", 341 | "After 10000 training step(s), test accuracy using average model is 0.9722\n" 342 | ] 343 | } 344 | ], 345 | "source": [ 346 | "import tensorflow as tf\n", 347 | "from tensorflow.examples.tutorials.mnist import input_data\n", 348 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 349 | "\n", 350 | "\n", 351 | "INPUT_NODE = 784 \n", 352 | "OUTPUT_NODE = 10 \n", 353 | "LAYER1_NODE = 500 \n", 354 | "LAYER2_NODE = 500 \n", 355 | "LAYER3_NODE = 500 \n", 356 | "LAYER4_NODE = 500 \n", 357 | "LAYER5_NODE = 500 \n", 358 | "LAYER6_NODE = 500 \n", 359 | "LAYER7_NODE = 500 \n", 360 | "LAYER8_NODE = 300 \n", 361 | "LAYER9_NODE = 200\n", 362 | "LAYER10_NODE = 100 \n", 363 | " \n", 364 | "BATCH_SIZE = 100 \n", 365 | "\n", 366 | "# 模型相关的参数\n", 367 | "LEARNING_RATE_BASE = 0.008 \n", 368 | "LEARNING_RATE_DECAY = 0.99 \n", 369 | "REGULARAZTION_RATE = 0.0001 \n", 370 | "TRAINING_STEPS = 10000 \n", 371 | "MOVING_AVERAGE_DECAY = 0.99 \n", 372 | "\n", 373 | "def inference(input_tensor, avg_class, W, B):\n", 374 | " # 不使用滑动平均类\n", 375 | " if avg_class == None:\n", 376 | " ac_1=tf.matmul(input_tensor, W[0]) + B[0]\n", 377 | " layer1 = ac_1*tf.nn.sigmoid(ac_1)\n", 378 | " ac_2 = tf.matmul(layer1, W[1]) + B[1]\n", 379 | " layer2 = ac_2*tf.nn.sigmoid(ac_2)\n", 380 | " ac_3 = tf.matmul(layer2, W[2]) + B[2]\n", 381 | " layer3 = ac_3*tf.nn.sigmoid(ac_3)\n", 382 | " ac_4 = tf.matmul(layer3, W[3]) + B[3]\n", 383 | " layer4 = ac_4*tf.nn.sigmoid(ac_4)\n", 384 | " ac_5 = tf.matmul(layer4, W[4]) + B[4]\n", 385 | " layer5 = ac_5*tf.nn.sigmoid(ac_5)\n", 386 | " ac_6 = tf.matmul(layer5, W[5]) + B[5]\n", 387 | " layer6 = ac_6*tf.nn.sigmoid(ac_6)\n", 388 | " ac_7 = tf.matmul(layer6, W[6]) + B[6]\n", 389 | " layer7 = ac_7*tf.nn.sigmoid(ac_7)\n", 390 | " ac_8 = tf.matmul(layer7, W[7]) + B[7]\n", 391 | " layer8 = ac_8*tf.nn.sigmoid(ac_8)\n", 392 | " ac_9 = tf.matmul(layer8, W[8]) + B[8]\n", 393 | " layer9 = ac_9*tf.nn.sigmoid(ac_9)\n", 394 | " ac_10 = tf.matmul(layer9, W[9]) + B[9]\n", 395 | " layer10 = ac_10*tf.nn.sigmoid(ac_10)\n", 396 | " return tf.matmul(layer10, W[10]) + B[10]\n", 397 | " \n", 398 | " else:\n", 399 | " ac_1=tf.matmul(input_tensor, avg_class.average(W[0])) + avg_class.average(B[0])\n", 400 | " layer1 = ac_1*tf.nn.sigmoid(ac_1)\n", 401 | " ac_2=tf.matmul(layer1, avg_class.average(W[1])) + avg_class.average(B[1])\n", 402 | " layer2 = ac_2*tf.nn.sigmoid(ac_2)\n", 403 | " ac_3=tf.matmul(layer2, avg_class.average(W[2])) + avg_class.average(B[2])\n", 404 | " layer3 = ac_3*tf.nn.sigmoid(ac_3)\n", 405 | " ac_4=tf.matmul(layer3, avg_class.average(W[3])) + avg_class.average(B[3])\n", 406 | " layer4 = ac_4*tf.nn.sigmoid(ac_4)\n", 407 | " ac_5=tf.matmul(layer4, avg_class.average(W[4])) + avg_class.average(B[4])\n", 408 | " layer5 = ac_5*tf.nn.sigmoid(ac_5)\n", 409 | " ac_6=tf.matmul(layer5, avg_class.average(W[5])) + avg_class.average(B[5])\n", 410 | " layer6 = ac_6*tf.nn.sigmoid(ac_6)\n", 411 | " ac_7=tf.matmul(layer6, avg_class.average(W[6])) + avg_class.average(B[6])\n", 412 | " layer7 = ac_7*tf.nn.sigmoid(ac_7)\n", 413 | " ac_8=tf.matmul(layer7, avg_class.average(W[7])) + avg_class.average(B[7])\n", 414 | " layer8 = ac_8*tf.nn.sigmoid(ac_8)\n", 415 | " ac_9=tf.matmul(layer8, avg_class.average(W[8])) + avg_class.average(B[8])\n", 416 | " layer9 = ac_9*tf.nn.sigmoid(ac_9)\n", 417 | " ac_10=tf.matmul(layer9, avg_class.average(W[9])) + avg_class.average(B[9])\n", 418 | " layer10 = ac_10*tf.nn.sigmoid(ac_10)\n", 419 | " return tf.matmul(layer10, avg_class.average(W[10])) + avg_class.average(B[10]) \n", 420 | " \n", 421 | "def train(mnist):\n", 422 | " x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n", 423 | " y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n", 424 | " \n", 425 | " weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n", 426 | " biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n", 427 | " \n", 428 | " weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE], stddev=0.1))\n", 429 | " biases2 = tf.Variable(tf.constant(0.1, shape=[ LAYER2_NODE]))\n", 430 | " \n", 431 | " weights3 = tf.Variable(tf.truncated_normal([ LAYER2_NODE, LAYER3_NODE], stddev=0.1))\n", 432 | " biases3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))\n", 433 | " \n", 434 | " weights4 = tf.Variable(tf.truncated_normal([LAYER3_NODE, LAYER4_NODE], stddev=0.1))\n", 435 | " biases4 = tf.Variable(tf.constant(0.1, shape=[LAYER4_NODE]))\n", 436 | " \n", 437 | " weights5 = tf.Variable(tf.truncated_normal([LAYER4_NODE, LAYER5_NODE], stddev=0.1))\n", 438 | " biases5 = tf.Variable(tf.constant(0.1, shape=[LAYER5_NODE]))\n", 439 | " \n", 440 | " weights6 = tf.Variable(tf.truncated_normal([LAYER5_NODE, LAYER6_NODE], stddev=0.1))\n", 441 | " biases6 = tf.Variable(tf.constant(0.1, shape=[LAYER6_NODE]))\n", 442 | " \n", 443 | " weights7 = tf.Variable(tf.truncated_normal([LAYER6_NODE, LAYER7_NODE], stddev=0.1))\n", 444 | " biases7 = tf.Variable(tf.constant(0.1, shape=[LAYER7_NODE]))\n", 445 | " \n", 446 | " weights8 = tf.Variable(tf.truncated_normal([LAYER7_NODE, LAYER8_NODE], stddev=0.1))\n", 447 | " biases8 = tf.Variable(tf.constant(0.1, shape=[LAYER8_NODE]))\n", 448 | " \n", 449 | " weights9 = tf.Variable(tf.truncated_normal([LAYER8_NODE, LAYER9_NODE], stddev=0.1))\n", 450 | " biases9 = tf.Variable(tf.constant(0.1, shape=[LAYER9_NODE]))\n", 451 | " \n", 452 | " weights10 = tf.Variable(tf.truncated_normal([LAYER9_NODE, LAYER10_NODE], stddev=0.1))\n", 453 | " biases10 = tf.Variable(tf.constant(0.1, shape=[LAYER10_NODE]))\n", 454 | " \n", 455 | " weights11 = tf.Variable(tf.truncated_normal([LAYER10_NODE, OUTPUT_NODE], stddev=0.1))\n", 456 | " biases11 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n", 457 | " \n", 458 | " W=[weights1, weights2, weights3, weights4, weights5, weights6, weights7, weights8, weights9, weights10, weights11]\n", 459 | " B=[biases1, biases2, biases3, biases4, biases5, biases6, biases7, biases8, biases9, biases10, biases11]\n", 460 | " \n", 461 | " # 计算不含滑动平均类的前向传播结果\n", 462 | " y = inference(x, None, W, B)\n", 463 | " \n", 464 | " # 定义训练轮数及相关的滑动平均类 \n", 465 | " global_step = tf.Variable(0, trainable=False)\n", 466 | " variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n", 467 | " variables_averages_op = variable_averages.apply(tf.trainable_variables())\n", 468 | " average_y = inference(x, variable_averages, W, B)\n", 469 | " \n", 470 | " # 计算交叉熵及其平均值\n", 471 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n", 472 | " cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", 473 | " \n", 474 | " # 损失函数的计算\n", 475 | " regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n", 476 | " regularaztion = regularizer(W[0]) \n", 477 | " for i in range(1,11):\n", 478 | " regularazation=regularaztion + regularizer(W[i]) \n", 479 | " loss = cross_entropy_mean + regularaztion\n", 480 | " \n", 481 | " # 设置指数衰减的学习率。\n", 482 | " learning_rate = tf.train.exponential_decay(\n", 483 | " LEARNING_RATE_BASE,\n", 484 | " global_step,\n", 485 | " mnist.train.num_examples / BATCH_SIZE,\n", 486 | " LEARNING_RATE_DECAY,\n", 487 | " staircase=True)\n", 488 | " \n", 489 | " # 优化损失函数\n", 490 | " train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n", 491 | " \n", 492 | " # 反向传播更新参数和更新每一个参数的滑动平均值\n", 493 | " with tf.control_dependencies([train_step, variables_averages_op]):\n", 494 | " train_op = tf.no_op(name='train')\n", 495 | "\n", 496 | " # 计算正确率\n", 497 | " correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n", 498 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 499 | " \n", 500 | " # 初始化会话并开始训练过程。\n", 501 | " with tf.Session() as sess:\n", 502 | " tf.global_variables_initializer().run()\n", 503 | " validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n", 504 | " test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n", 505 | " \n", 506 | " # 循环的训练神经网络。\n", 507 | " for i in range(TRAINING_STEPS):\n", 508 | " if i % 1000 == 0:\n", 509 | " validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n", 510 | " print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n", 511 | " \n", 512 | " xs,ys=mnist.train.next_batch(BATCH_SIZE)\n", 513 | " sess.run(train_op,feed_dict={x:xs,y_:ys})\n", 514 | "\n", 515 | " test_acc=sess.run(accuracy,feed_dict=test_feed)\n", 516 | " print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n", 517 | "\n", 518 | "train(mnist)" 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": { 525 | "collapsed": true 526 | }, 527 | "outputs": [], 528 | "source": [] 529 | } 530 | ], 531 | "metadata": { 532 | "kernelspec": { 533 | "display_name": "Python 3", 534 | "language": "python", 535 | "name": "python3" 536 | }, 537 | "language_info": { 538 | "codemirror_mode": { 539 | "name": "ipython", 540 | "version": 3 541 | }, 542 | "file_extension": ".py", 543 | "mimetype": "text/x-python", 544 | "name": "python", 545 | "nbconvert_exporter": "python", 546 | "pygments_lexer": "ipython3", 547 | "version": "3.5.3" 548 | } 549 | }, 550 | "nbformat": 4, 551 | "nbformat_minor": 2 552 | } 553 | -------------------------------------------------------------------------------- /Experiments/tf_CNN_Tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "b'Hello, TensorFlow!'\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import tensorflow as tf\n", 20 | "import numpy as np\n", 21 | "hello = tf.constant('Hello, TensorFlow!')\n", 22 | "sess = tf.Session()\n", 23 | "print(sess.run(hello))" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": { 30 | "collapsed": true 31 | }, 32 | "outputs": [], 33 | "source": [ 34 | "import numpy as np\n", 35 | "\n", 36 | "\n", 37 | "a = tf.constant(2, tf.int16)\n", 38 | "b = tf.constant(4, tf.float32)\n", 39 | "c = tf.constant(8, tf.float32)\n", 40 | "\n", 41 | "d = tf.Variable(2, tf.int16)\n", 42 | "e = tf.Variable(4, tf.float32)\n", 43 | "f = tf.Variable(8, tf.float32)\n", 44 | "\n", 45 | "g = tf.constant(np.zeros(shape=(2,2), dtype=np.float32)) #可以正常声明变量\n", 46 | "\n", 47 | "h = tf.zeros([11], tf.int16)\n", 48 | "i = tf.ones([2,2], tf.float32)\n", 49 | "j = tf.zeros([1000,4,3], tf.float64)\n", 50 | "\n", 51 | "k = tf.Variable(tf.zeros([2,2], tf.float32))\n", 52 | "l = tf.Variable(tf.zeros([5,6,5], tf.float32))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 11, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [ 62 | { 63 | "name": "stdout", 64 | "output_type": "stream", 65 | "text": [ 66 | "\n", 67 | "8\n", 68 | "[[ 0. 0.]\n", 69 | " [ 0. 0.]]\n" 70 | ] 71 | } 72 | ], 73 | "source": [ 74 | "a = tf.constant(2, tf.int16)\n", 75 | "b = tf.constant(4, tf.float32)\n", 76 | "\n", 77 | "graph = tf.Graph()\n", 78 | "with graph.as_default():\n", 79 | " a = tf.Variable(8, tf.float32)\n", 80 | " b = tf.Variable(tf.zeros([2,2], tf.float32))\n", 81 | " \n", 82 | "with tf.Session(graph=graph) as session:\n", 83 | " tf.global_variables_initializer().run()\n", 84 | " print(f)\n", 85 | " print(session.run(a))\n", 86 | " print(session.run(b))\n" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 3, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "Tensor(\"add:0\", shape=(2,), dtype=int32)\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "a=tf.constant([1,2],name=\"a\")\n", 106 | "b=tf.constant([2,4],name=\"b\")\n", 107 | "result = a+b\n", 108 | "print(result)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 6, 114 | "metadata": { 115 | "collapsed": false 116 | }, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "[2 4 6 8]\n", 123 | "[2 4 6 8]\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "a=tf.constant([1,2,3,4])\n", 129 | "b=tf.constant([1,2,3,4])\n", 130 | "result=a+b\n", 131 | "sess=tf.Session()\n", 132 | "print(sess.run(result))\n", 133 | "sess.close\n", 134 | "\n", 135 | "#输出 [2 4 6 8]\n", 136 | "\n", 137 | "with tf.Session() as sess:\n", 138 | " a=tf.constant([1,2,3,4])\n", 139 | " b=tf.constant([1,2,3,4])\n", 140 | " result=a+b\n", 141 | " print(sess.run(result))\n", 142 | " \n", 143 | "#输出 [2 4 6 8]" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 16, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [ 153 | { 154 | "name": "stdout", 155 | "output_type": "stream", 156 | "text": [ 157 | "[[-0.11131823 2.38459873]]\n", 158 | "[[-0.11131823 2.38459873]]\n" 159 | ] 160 | }, 161 | { 162 | "data": { 163 | "text/plain": [ 164 | ">" 165 | ] 166 | }, 167 | "execution_count": 16, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "w1=tf.Variable(tf.random_normal([1,2],stddev=1,seed=1))\n", 174 | "\n", 175 | "#因为需要重复输入x,而每建一个x就会生成一个结点,计算图的效率会低。所以使用占位符\n", 176 | "x=tf.placeholder(tf.float32,shape=(1,2))\n", 177 | "x1=tf.constant([[0.7,0.9]])\n", 178 | "\n", 179 | "a=x+w1\n", 180 | "b=x1+w1\n", 181 | "\n", 182 | "sess=tf.Session()\n", 183 | "sess.run(tf.global_variables_initializer())\n", 184 | "#运行y时将占位符填上,feed_dict为字典,变量名不可变\n", 185 | "y_1=sess.run(a,feed_dict={x:[[0.7,0.9]]})\n", 186 | "y_2=sess.run(b)\n", 187 | "print(y_1)\n", 188 | "print(y_2)\n", 189 | "sess.close" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 13, 195 | "metadata": { 196 | "collapsed": false 197 | }, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "the distance between [[1 2]] and [[15 16]] -> [19.79899]\n", 204 | "the distance between [[3 4]] and [[13 14]] -> [14.142136]\n", 205 | "the distance between [[5 6]] and [[11 12]] -> [8.485281]\n", 206 | "the distance between [[7 8]] and [[ 9 10]] -> [2.8284271]\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "list_of_points1_ = [[1,2], [3,4], [5,6], [7,8]]\n", 212 | "list_of_points2_ = [[15,16], [13,14], [11,12], [9,10]]\n", 213 | "list_of_points1 = np.array([np.array(elem).reshape(1,2) for elem in list_of_points1_])\n", 214 | "list_of_points2 = np.array([np.array(elem).reshape(1,2) for elem in list_of_points2_])\n", 215 | "\n", 216 | "graph = tf.Graph()\n", 217 | "with graph.as_default(): \n", 218 | " \n", 219 | " #我们使用 tf.placeholder() 创建占位符 ,在 session.run() 过程中再投递数据 \n", 220 | " point1 = tf.placeholder(tf.float32, shape=(1, 2))\n", 221 | " point2 = tf.placeholder(tf.float32, shape=(1, 2))\n", 222 | " \n", 223 | " def calculate_eucledian_distance(point1, point2):\n", 224 | " difference = tf.subtract(point1, point2)\n", 225 | " power2 = tf.pow(difference, tf.constant(2.0, shape=(1,2)))\n", 226 | " add = tf.reduce_sum(power2)\n", 227 | " eucledian_distance = tf.sqrt(add)\n", 228 | " return eucledian_distance\n", 229 | " \n", 230 | " dist = calculate_eucledian_distance(point1, point2)\n", 231 | " \n", 232 | "with tf.Session(graph=graph) as session:\n", 233 | " tf.global_variables_initializer().run() \n", 234 | " for ii in range(len(list_of_points1)):\n", 235 | " point1_ = list_of_points1[ii]\n", 236 | " point2_ = list_of_points2[ii]\n", 237 | " \n", 238 | " #使用feed_dict将数据投入到[dist]中\n", 239 | " feed_dict = {point1 : point1_, point2 : point2_}\n", 240 | " distance = session.run([dist], feed_dict=feed_dict)\n", 241 | " print(\"the distance between {} and {} -> {}\".format(point1_, point2_, distance))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 8, 247 | "metadata": { 248 | "collapsed": false 249 | }, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "[[-0.81131822 1.48459876 0.06532937]\n", 256 | " [-2.4427042 0.0992484 0.59122431]]\n", 257 | "[[-0.81131822]\n", 258 | " [ 1.48459876]\n", 259 | " [ 0.06532937]]\n", 260 | "在迭代 0 次后,训练损失为 0.308504\n", 261 | "在迭代 1000 次后,训练损失为 0.0393406\n", 262 | "在迭代 2000 次后,训练损失为 0.0182158\n", 263 | "在迭代 3000 次后,训练损失为 0.0104779\n", 264 | "在迭代 4000 次后,训练损失为 0.00680374\n", 265 | "在迭代 5000 次后,训练损失为 0.00446512\n", 266 | "在迭代 6000 次后,训练损失为 0.00296797\n", 267 | "在迭代 7000 次后,训练损失为 0.00218553\n", 268 | "在迭代 8000 次后,训练损失为 0.00179452\n", 269 | "在迭代 9000 次后,训练损失为 0.0013211\n", 270 | "在迭代 10000 次后,训练损失为 0.000957699\n", 271 | "在迭代 11000 次后,训练损失为 0.00081103\n", 272 | "在迭代 12000 次后,训练损失为 0.000643147\n", 273 | "在迭代 13000 次后,训练损失为 0.00047439\n", 274 | "在迭代 14000 次后,训练损失为 0.00030086\n", 275 | "在迭代 15000 次后,训练损失为 0.000137936\n", 276 | "[[-0.81131822 3.84255528 3.38165283]\n", 277 | " [-2.4427042 1.98635983 3.50722313]]\n", 278 | "[[-0.81131822]\n", 279 | " [ 4.02907705]\n", 280 | " [ 2.60285187]]\n" 281 | ] 282 | } 283 | ], 284 | "source": [ 285 | "import tensorflow as tf\n", 286 | "from numpy.random import RandomState\n", 287 | "\n", 288 | "batch_size=10\n", 289 | "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n", 290 | "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n", 291 | "\n", 292 | "\n", 293 | "# None 可以根据batch 大小确定维度,在shape的一个维度上使用None,方便不大的batch\n", 294 | "x=tf.placeholder(tf.float32,shape=(None,2))\n", 295 | "y=tf.placeholder(tf.float32,shape=(None,1))\n", 296 | "\n", 297 | "a=tf.nn.relu(tf.matmul(x,w1))\n", 298 | "yhat=tf.nn.relu(tf.matmul(a,w2))\n", 299 | "\n", 300 | "#定义交叉熵为损失函数,训练过程使用Adam算法最小化交叉熵\n", 301 | "cross_entropy=-tf.reduce_mean(y*tf.log(tf.clip_by_value(yhat,1e-10,1.0)))\n", 302 | "train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)\n", 303 | "\n", 304 | "rdm=RandomState(1)\n", 305 | "data_size=512\n", 306 | "\n", 307 | "#生成两个特征,共data_size个样本\n", 308 | "X=rdm.rand(data_size,2)\n", 309 | "#定义规则给出样本标签,所有x1+x2<1的样本认为是正样本,其他为负样本。Y,1为正样本\n", 310 | "Y = [[int(x1+x2 < 1)] for (x1, x2) in X]\n", 311 | "\n", 312 | "with tf.Session() as sess:\n", 313 | " sess.run(tf.global_variables_initializer())\n", 314 | " print(sess.run(w1))\n", 315 | " print(sess.run(w2))\n", 316 | " steps=15001\n", 317 | " for i in range(steps):\n", 318 | " \n", 319 | " #选定每一个批量读取的首尾位置,确保在1个epoch内采样训练\n", 320 | " start = i * batch_size % data_size\n", 321 | " end = min(start + batch_size,data_size)\n", 322 | " sess.run(train_step,feed_dict={x:X[start:end],y:Y[start:end]})\n", 323 | " if i % 1000 == 0:\n", 324 | " training_loss= sess.run(cross_entropy,feed_dict={x:X,y:Y})\n", 325 | " print(\"在迭代 %d 次后,训练损失为 %g\"%(i,training_loss))\n", 326 | " if i == steps-1:\n", 327 | " print(sess.run(w1))\n", 328 | " print(sess.run(w2))#输出更新后的权重矩阵" 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": {}, 334 | "source": [ 335 | "上面的代码定义了一个简单的三层全连接网络(输入层、隐藏层和输出层分别为2、3和2个神经元),隐藏层和输出层的激活函数使用的是ReLU函数。该模型训练的样本总数为512,每次迭代读取的批量为10。这个简单的全连接网络以交叉熵为损失函数,并使用Adam优化算法进行权重更新。\n", 336 | "\n", 337 | "其中需要注意的几个函数如tf.nn.relu()代表调用ReLU激活函数,tf.matmul()为矩阵乘法等。tf.clip_by_value(yhat,1e-10,1.0)这一语句代表的是截断yhat的值,因为这一语句是嵌套在tf.log()函数内的,所以我们需要确保yhat的取值不会导致对数无穷大。\n", 338 | "\n", 339 | "tf.train.AdamOptimizer(learning_rate).minimize(cost_function)是进行训练的函数,其中我们采用的是Adam优化算法更新权重,并且需要提供学习速率和损失函数这两个参数。后面就是生成训练数据,X=rdm.rand(512,2)表示随机生成512个样本,每个样本有两个特征值。最后就是迭代运行了,这里我们计算出每一次迭代抽取数据的起始位置(start)和结束位置(end),并且每一次抽取的数据量为前面我们定义的批量,如果一个epoch最后剩余的数据少于批量大小,那就只是用剩余的数据进行训练。最后两句代码是为了计算训练损失并迭代一些次数后输出训练损失。" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 3, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [], 349 | "source": [ 350 | "import numpy as np\n", 351 | "import pickle\n", 352 | "import json\n", 353 | "import os\n", 354 | "\n", 355 | "#定义一些预处理函数\n", 356 | "\n", 357 | "def flatten_tf_array(array):\n", 358 | " shape = array.get_shape().as_list()\n", 359 | " return tf.reshape(array, [shape[0], shape[1] * shape[2] * shape[3]])\n", 360 | "\n", 361 | "def accuracy(predictions, labels):\n", 362 | " return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])\n", 363 | "\n", 364 | "def randomize(dataset, labels):\n", 365 | " permutation = np.random.permutation(labels.shape[0])\n", 366 | " shuffled_dataset = dataset[permutation, :, :]\n", 367 | " shuffled_labels = labels[permutation]\n", 368 | " return shuffled_dataset, shuffled_labels\n", 369 | "\n", 370 | "def one_hot_encode(np_array):\n", 371 | " return (np.arange(10) == np_array[:,None]).astype(np.float32)\n", 372 | "\n", 373 | "def reformat_data(dataset, labels, image_width, image_height, image_depth):\n", 374 | " np_dataset_ = np.array([np.array(image_data).reshape(image_width, image_height, image_depth) for image_data in dataset])\n", 375 | " np_labels_ = one_hot_encode(np.array(labels, dtype=np.float32))\n", 376 | " np_dataset, np_labels = randomize(np_dataset_, np_labels_)\n", 377 | " return np_dataset, np_labels" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 4, 383 | "metadata": { 384 | "collapsed": false 385 | }, 386 | "outputs": [ 387 | { 388 | "name": "stdout", 389 | "output_type": "stream", 390 | "text": [ 391 | "训练集包含以下标签: [0 1 2 3 4 5 6 7 8 9]\n", 392 | "训练集维度 (50000, 32, 32, 3) (50000, 10)\n", 393 | "测试集维度 (10000, 32, 32, 3) (10000, 10)\n" 394 | ] 395 | } 396 | ], 397 | "source": [ 398 | "cifar10_folder = './data/cifar10/'\n", 399 | "train_datasets = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', ]\n", 400 | "test_dataset = ['test_batch']\n", 401 | "c10_image_height = 32\n", 402 | "c10_image_width = 32\n", 403 | "c10_image_depth = 3\n", 404 | "c10_num_labels = 10\n", 405 | "c10_image_size = 32 #Ahmet Taspinar的代码缺少了这一语句\n", 406 | "\n", 407 | "with open(cifar10_folder + test_dataset[0], 'rb') as f0:\n", 408 | " c10_test_dict = pickle.load(f0, encoding='bytes')\n", 409 | "\n", 410 | "c10_test_dataset, c10_test_labels = c10_test_dict[b'data'], c10_test_dict[b'labels']\n", 411 | "test_dataset_cifar10, test_labels_cifar10 = reformat_data(c10_test_dataset, c10_test_labels, c10_image_size, c10_image_size, c10_image_depth)\n", 412 | "\n", 413 | "c10_train_dataset, c10_train_labels = [], []\n", 414 | "for train_dataset in train_datasets:\n", 415 | " with open(cifar10_folder + train_dataset, 'rb') as f0:\n", 416 | " c10_train_dict = pickle.load(f0, encoding='bytes')\n", 417 | " c10_train_dataset_, c10_train_labels_ = c10_train_dict[b'data'], c10_train_dict[b'labels']\n", 418 | " \n", 419 | " c10_train_dataset.append(c10_train_dataset_)\n", 420 | " c10_train_labels += c10_train_labels_\n", 421 | "\n", 422 | "c10_train_dataset = np.concatenate(c10_train_dataset, axis=0)\n", 423 | "train_dataset_cifar10, train_labels_cifar10 = reformat_data(c10_train_dataset, c10_train_labels, c10_image_size, c10_image_size, c10_image_depth)\n", 424 | "del c10_train_dataset\n", 425 | "del c10_train_labels\n", 426 | "\n", 427 | "print(\"训练集包含以下标签: {}\".format(np.unique(c10_train_dict[b'labels'])))\n", 428 | "print('训练集维度', train_dataset_cifar10.shape, train_labels_cifar10.shape)\n", 429 | "print('测试集维度', test_dataset_cifar10.shape, test_labels_cifar10.shape)\n" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 5, 435 | "metadata": { 436 | "collapsed": false 437 | }, 438 | "outputs": [ 439 | { 440 | "name": "stdout", 441 | "output_type": "stream", 442 | "text": [ 443 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 444 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 445 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 446 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n" 447 | ] 448 | } 449 | ], 450 | "source": [ 451 | "from tensorflow.examples.tutorials.mnist import input_data\n", 452 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 5, 458 | "metadata": { 459 | "collapsed": false 460 | }, 461 | "outputs": [ 462 | { 463 | "name": "stdout", 464 | "output_type": "stream", 465 | "text": [ 466 | "Training data size: 55000\n", 467 | "Validating data size: 5000\n", 468 | "Testing data size: 10000\n" 469 | ] 470 | } 471 | ], 472 | "source": [ 473 | "print(\"Training data size: \", mnist.train.num_examples) \n", 474 | "print (\"Validating data size: \", mnist.validation.num_examples) \n", 475 | "print (\"Testing data size: \", mnist.test.num_examples) " 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 10, 481 | "metadata": { 482 | "collapsed": false 483 | }, 484 | "outputs": [ 485 | { 486 | "name": "stdout", 487 | "output_type": "stream", 488 | "text": [ 489 | "[ 0. 0. 0. 0. 0. 0. 0.\n", 490 | " 0. 0. 0. 0. 0. 0. 0.\n", 491 | " 0. 0. 0. 0. 0. 0. 0.\n", 492 | " 0. 0. 0. 0. 0. 0. 0.\n", 493 | " 0. 0. 0. 0. 0. 0. 0.\n", 494 | " 0. 0. 0. 0. 0. 0. 0.\n", 495 | " 0. 0. 0. 0. 0. 0. 0.\n", 496 | " 0. 0. 0. 0. 0. 0. 0.\n", 497 | " 0. 0. 0. 0. 0. 0. 0.\n", 498 | " 0. 0. 0. 0. 0. 0. 0.\n", 499 | " 0. 0. 0. 0. 0. 0. 0.\n", 500 | " 0. 0. 0. 0. 0. 0. 0.\n", 501 | " 0. 0. 0. 0. 0. 0. 0.\n", 502 | " 0. 0. 0. 0. 0. 0. 0.\n", 503 | " 0. 0. 0. 0. 0. 0. 0.\n", 504 | " 0. 0. 0. 0. 0. 0. 0.\n", 505 | " 0. 0. 0. 0. 0. 0. 0.\n", 506 | " 0. 0. 0. 0. 0. 0. 0.\n", 507 | " 0. 0. 0. 0. 0. 0. 0.\n", 508 | " 0. 0. 0. 0. 0. 0. 0.\n", 509 | " 0. 0. 0. 0. 0. 0. 0.\n", 510 | " 0. 0. 0. 0. 0. 0. 0.\n", 511 | " 0. 0. 0. 0. 0. 0. 0.\n", 512 | " 0. 0. 0. 0. 0. 0. 0.\n", 513 | " 0. 0. 0. 0. 0. 0. 0.\n", 514 | " 0. 0. 0. 0. 0. 0. 0.\n", 515 | " 0. 0. 0. 0. 0. 0. 0.\n", 516 | " 0. 0. 0. 0. 0. 0. 0.\n", 517 | " 0. 0. 0. 0. 0. 0. 0.\n", 518 | " 0. 0. 0. 0. 0.38039219 0.37647063\n", 519 | " 0.3019608 0.46274513 0.2392157 0. 0. 0. 0.\n", 520 | " 0. 0. 0. 0. 0. 0. 0.\n", 521 | " 0. 0. 0. 0. 0.35294119 0.5411765\n", 522 | " 0.92156869 0.92156869 0.92156869 0.92156869 0.92156869 0.92156869\n", 523 | " 0.98431379 0.98431379 0.97254908 0.99607849 0.96078438 0.92156869\n", 524 | " 0.74509805 0.08235294 0. 0. 0. 0. 0.\n", 525 | " 0. 0. 0. 0. 0. 0.\n", 526 | " 0.54901963 0.98431379 0.99607849 0.99607849 0.99607849 0.99607849\n", 527 | " 0.99607849 0.99607849 0.99607849 0.99607849 0.99607849 0.99607849\n", 528 | " 0.99607849 0.99607849 0.99607849 0.99607849 0.74117649 0.09019608\n", 529 | " 0. 0. 0. 0. 0. 0. 0.\n", 530 | " 0. 0. 0. 0.88627458 0.99607849 0.81568635\n", 531 | " 0.78039223 0.78039223 0.78039223 0.78039223 0.54509807 0.2392157\n", 532 | " 0.2392157 0.2392157 0.2392157 0.2392157 0.50196081 0.8705883\n", 533 | " 0.99607849 0.99607849 0.74117649 0.08235294 0. 0. 0.\n", 534 | " 0. 0. 0. 0. 0. 0.\n", 535 | " 0.14901961 0.32156864 0.0509804 0. 0. 0. 0.\n", 536 | " 0. 0. 0. 0. 0. 0. 0.\n", 537 | " 0.13333334 0.83529419 0.99607849 0.99607849 0.45098042 0. 0.\n", 538 | " 0. 0. 0. 0. 0. 0. 0.\n", 539 | " 0. 0. 0. 0. 0. 0. 0.\n", 540 | " 0. 0. 0. 0. 0. 0. 0.\n", 541 | " 0. 0.32941177 0.99607849 0.99607849 0.91764712 0. 0.\n", 542 | " 0. 0. 0. 0. 0. 0. 0.\n", 543 | " 0. 0. 0. 0. 0. 0. 0.\n", 544 | " 0. 0. 0. 0. 0. 0. 0.\n", 545 | " 0. 0.32941177 0.99607849 0.99607849 0.91764712 0. 0.\n", 546 | " 0. 0. 0. 0. 0. 0. 0.\n", 547 | " 0. 0. 0. 0. 0. 0. 0.\n", 548 | " 0. 0. 0. 0. 0. 0. 0.\n", 549 | " 0.41568631 0.6156863 0.99607849 0.99607849 0.95294124 0.20000002\n", 550 | " 0. 0. 0. 0. 0. 0. 0.\n", 551 | " 0. 0. 0. 0. 0. 0. 0.\n", 552 | " 0. 0. 0. 0.09803922 0.45882356 0.89411771\n", 553 | " 0.89411771 0.89411771 0.99215692 0.99607849 0.99607849 0.99607849\n", 554 | " 0.99607849 0.94117653 0. 0. 0. 0. 0.\n", 555 | " 0. 0. 0. 0. 0. 0. 0.\n", 556 | " 0. 0. 0. 0.26666668 0.4666667 0.86274517\n", 557 | " 0.99607849 0.99607849 0.99607849 0.99607849 0.99607849 0.99607849\n", 558 | " 0.99607849 0.99607849 0.99607849 0.55686277 0. 0. 0.\n", 559 | " 0. 0. 0. 0. 0. 0. 0.\n", 560 | " 0. 0. 0. 0.14509805 0.73333335 0.99215692\n", 561 | " 0.99607849 0.99607849 0.99607849 0.87450987 0.80784321 0.80784321\n", 562 | " 0.29411766 0.26666668 0.84313732 0.99607849 0.99607849 0.45882356\n", 563 | " 0. 0. 0. 0. 0. 0. 0.\n", 564 | " 0. 0. 0. 0. 0. 0.44313729\n", 565 | " 0.8588236 0.99607849 0.94901967 0.89019614 0.45098042 0.34901962\n", 566 | " 0.12156864 0. 0. 0. 0. 0.7843138\n", 567 | " 0.99607849 0.9450981 0.16078432 0. 0. 0. 0.\n", 568 | " 0. 0. 0. 0. 0. 0. 0.\n", 569 | " 0. 0.66274512 0.99607849 0.6901961 0.24313727 0. 0.\n", 570 | " 0. 0. 0. 0. 0. 0.18823531\n", 571 | " 0.90588242 0.99607849 0.91764712 0. 0. 0. 0.\n", 572 | " 0. 0. 0. 0. 0. 0. 0.\n", 573 | " 0. 0. 0.07058824 0.48627454 0. 0. 0.\n", 574 | " 0. 0. 0. 0. 0. 0.\n", 575 | " 0.32941177 0.99607849 0.99607849 0.65098041 0. 0. 0.\n", 576 | " 0. 0. 0. 0. 0. 0. 0.\n", 577 | " 0. 0. 0. 0. 0. 0. 0.\n", 578 | " 0. 0. 0. 0. 0. 0. 0.\n", 579 | " 0.54509807 0.99607849 0.9333334 0.22352943 0. 0. 0.\n", 580 | " 0. 0. 0. 0. 0. 0. 0.\n", 581 | " 0. 0. 0. 0. 0. 0. 0.\n", 582 | " 0. 0. 0. 0. 0. 0.\n", 583 | " 0.82352948 0.98039222 0.99607849 0.65882355 0. 0. 0.\n", 584 | " 0. 0. 0. 0. 0. 0. 0.\n", 585 | " 0. 0. 0. 0. 0. 0. 0.\n", 586 | " 0. 0. 0. 0. 0. 0. 0.\n", 587 | " 0.94901967 0.99607849 0.93725497 0.22352943 0. 0. 0.\n", 588 | " 0. 0. 0. 0. 0. 0. 0.\n", 589 | " 0. 0. 0. 0. 0. 0. 0.\n", 590 | " 0. 0. 0. 0. 0. 0.\n", 591 | " 0.34901962 0.98431379 0.9450981 0.33725491 0. 0. 0.\n", 592 | " 0. 0. 0. 0. 0. 0. 0.\n", 593 | " 0. 0. 0. 0. 0. 0. 0.\n", 594 | " 0. 0. 0. 0. 0. 0.\n", 595 | " 0.01960784 0.80784321 0.96470594 0.6156863 0. 0. 0.\n", 596 | " 0. 0. 0. 0. 0. 0. 0.\n", 597 | " 0. 0. 0. 0. 0. 0. 0.\n", 598 | " 0. 0. 0. 0. 0. 0. 0.\n", 599 | " 0.01568628 0.45882356 0.27058825 0. 0. 0. 0.\n", 600 | " 0. 0. 0. 0. 0. 0. 0.\n", 601 | " 0. 0. 0. 0. 0. 0. 0.\n", 602 | " 0. 0. 0. 0. 0. 0. 0.\n", 603 | " 0. 0. 0. 0. 0. 0. 0.\n", 604 | " 0. 0. 0. 0. 0. 0. 0. ]\n", 605 | "[ 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n", 606 | "[ 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]\n", 607 | "[ 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n" 608 | ] 609 | } 610 | ], 611 | "source": [ 612 | "print(mnist.train.images[0]) \n", 613 | "print(mnist.train.labels[0]) \n", 614 | "print(mnist.test.labels[0]) \n", 615 | "print(mnist.validation.labels[0]) " 616 | ] 617 | }, 618 | { 619 | "cell_type": "code", 620 | "execution_count": 6, 621 | "metadata": { 622 | "collapsed": false 623 | }, 624 | "outputs": [ 625 | { 626 | "name": "stdout", 627 | "output_type": "stream", 628 | "text": [ 629 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 630 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 631 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 632 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n" 633 | ] 634 | } 635 | ], 636 | "source": [ 637 | "import tensorflow as tf\n", 638 | "from tensorflow.examples.tutorials.mnist import input_data\n", 639 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 640 | "\n", 641 | "\n", 642 | "INPUT_NODE = 784 \n", 643 | "OUTPUT_NODE = 10 \n", 644 | "LAYER1_NODE = 500 \n", 645 | " \n", 646 | "BATCH_SIZE = 100 \n", 647 | "\n", 648 | "# 模型相关的参数\n", 649 | "LEARNING_RATE_BASE = 0.8 \n", 650 | "LEARNING_RATE_DECAY = 0.99 \n", 651 | "REGULARAZTION_RATE = 0.0001 \n", 652 | "TRAINING_STEPS = 10000 \n", 653 | "MOVING_AVERAGE_DECAY = 0.99 \n", 654 | "\n", 655 | "def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):\n", 656 | " # 使用滑动平均类\n", 657 | " if avg_class == None:\n", 658 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)\n", 659 | " return tf.matmul(layer1, weights2) + biases2\n", 660 | "\n", 661 | " else:\n", 662 | " \n", 663 | " layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))\n", 664 | " return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2) \n", 665 | " \n", 666 | "def train(mnist):\n", 667 | " x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n", 668 | " y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n", 669 | " # 生成隐藏层的参数。\n", 670 | " weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n", 671 | " biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n", 672 | " # 生成输出层的参数。\n", 673 | " weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))\n", 674 | " biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n", 675 | "\n", 676 | " # 计算不含滑动平均类的前向传播结果\n", 677 | " y = inference(x, None, weights1, biases1, weights2, biases2)\n", 678 | " \n", 679 | " # 定义训练轮数及相关的滑动平均类 \n", 680 | " global_step = tf.Variable(0, trainable=False)\n", 681 | " variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n", 682 | " variables_averages_op = variable_averages.apply(tf.trainable_variables())\n", 683 | " average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)\n", 684 | " \n", 685 | " # 计算交叉熵及其平均值\n", 686 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n", 687 | " cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", 688 | " \n", 689 | " # 损失函数的计算\n", 690 | " regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n", 691 | " regularaztion = regularizer(weights1) + regularizer(weights2)\n", 692 | " loss = cross_entropy_mean + regularaztion\n", 693 | " \n", 694 | " # 设置指数衰减的学习率。\n", 695 | " learning_rate = tf.train.exponential_decay(\n", 696 | " LEARNING_RATE_BASE,\n", 697 | " global_step,\n", 698 | " mnist.train.num_examples / BATCH_SIZE,\n", 699 | " LEARNING_RATE_DECAY,\n", 700 | " staircase=True)\n", 701 | " \n", 702 | " # 优化损失函数\n", 703 | " train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n", 704 | " \n", 705 | " # 反向传播更新参数和更新每一个参数的滑动平均值\n", 706 | " with tf.control_dependencies([train_step, variables_averages_op]):\n", 707 | " train_op = tf.no_op(name='train')\n", 708 | "\n", 709 | " # 计算正确率\n", 710 | " correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n", 711 | " accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", 712 | " \n", 713 | " # 初始化回话并开始训练过程。\n", 714 | " with tf.Session() as sess:\n", 715 | " tf.global_variables_initializer().run()\n", 716 | " validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n", 717 | " test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n", 718 | " \n", 719 | " # 循环的训练神经网络。\n", 720 | " for i in range(TRAINING_STEPS):\n", 721 | " if i % 1000 == 0:\n", 722 | " validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n", 723 | " print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n", 724 | " \n", 725 | " xs,ys=mnist.train.next_batch(BATCH_SIZE)\n", 726 | " sess.run(train_op,feed_dict={x:xs,y_:ys})\n", 727 | "\n", 728 | " test_acc=sess.run(accuracy,feed_dict=test_feed)\n", 729 | " print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 8, 735 | "metadata": { 736 | "collapsed": false 737 | }, 738 | "outputs": [ 739 | { 740 | "name": "stdout", 741 | "output_type": "stream", 742 | "text": [ 743 | "After 0 training step(s), validation accuracy using average model is 0.0912 \n", 744 | "After 1000 training step(s), validation accuracy using average model is 0.9756 \n", 745 | "After 2000 training step(s), validation accuracy using average model is 0.9794 \n", 746 | "After 3000 training step(s), validation accuracy using average model is 0.9818 \n", 747 | "After 4000 training step(s), validation accuracy using average model is 0.9814 \n", 748 | "After 5000 training step(s), test accuracy using average model is 0.9834\n" 749 | ] 750 | } 751 | ], 752 | "source": [ 753 | "train(mnist)" 754 | ] 755 | }, 756 | { 757 | "cell_type": "code", 758 | "execution_count": 7, 759 | "metadata": { 760 | "collapsed": false 761 | }, 762 | "outputs": [ 763 | { 764 | "name": "stdout", 765 | "output_type": "stream", 766 | "text": [ 767 | "After 0 training step(s), validation accuracy using average model is 0.1678 \n", 768 | "After 1000 training step(s), validation accuracy using average model is 0.9766 \n", 769 | "After 2000 training step(s), validation accuracy using average model is 0.981 \n", 770 | "After 3000 training step(s), validation accuracy using average model is 0.9808 \n", 771 | "After 4000 training step(s), validation accuracy using average model is 0.9824 \n", 772 | "After 5000 training step(s), validation accuracy using average model is 0.9822 \n", 773 | "After 6000 training step(s), validation accuracy using average model is 0.983 \n", 774 | "After 7000 training step(s), validation accuracy using average model is 0.9828 \n", 775 | "After 8000 training step(s), validation accuracy using average model is 0.9842 \n", 776 | "After 9000 training step(s), validation accuracy using average model is 0.9834 \n", 777 | "After 10000 training step(s), test accuracy using average model is 0.9851\n" 778 | ] 779 | } 780 | ], 781 | "source": [ 782 | "avg_class = None\n", 783 | "train(mnist)" 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": null, 789 | "metadata": { 790 | "collapsed": true 791 | }, 792 | "outputs": [], 793 | "source": [] 794 | }, 795 | { 796 | "cell_type": "code", 797 | "execution_count": null, 798 | "metadata": { 799 | "collapsed": true 800 | }, 801 | "outputs": [], 802 | "source": [] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": 32, 807 | "metadata": { 808 | "collapsed": false 809 | }, 810 | "outputs": [ 811 | { 812 | "name": "stdout", 813 | "output_type": "stream", 814 | "text": [ 815 | "Initialized with learning_rate 0.001\n", 816 | "step 0000 : loss is 057.10, accuracy on training set 6.25 %, accuracy on test set 9.34 %\n", 817 | "step 1000 : loss is 001.63, accuracy on training set 62.50 %, accuracy on test set 34.26 %\n", 818 | "step 2000 : loss is 001.38, accuracy on training set 50.00 %, accuracy on test set 40.97 %\n", 819 | "step 3000 : loss is 001.78, accuracy on training set 31.25 %, accuracy on test set 41.43 %\n", 820 | "step 4000 : loss is 001.41, accuracy on training set 56.25 %, accuracy on test set 42.54 %\n", 821 | "step 5000 : loss is 001.52, accuracy on training set 37.50 %, accuracy on test set 45.20 %\n", 822 | "step 6000 : loss is 001.28, accuracy on training set 56.25 %, accuracy on test set 42.72 %\n", 823 | "step 7000 : loss is 001.29, accuracy on training set 56.25 %, accuracy on test set 46.89 %\n", 824 | "step 8000 : loss is 001.43, accuracy on training set 43.75 %, accuracy on test set 47.97 %\n", 825 | "step 9000 : loss is 001.58, accuracy on training set 31.25 %, accuracy on test set 49.00 %\n", 826 | "step 10000 : loss is 001.41, accuracy on training set 43.75 %, accuracy on test set 49.80 %\n" 827 | ] 828 | } 829 | ], 830 | "source": [ 831 | "LENET5_LIKE_BATCH_SIZE = 32\n", 832 | "LENET5_LIKE_FILTER_SIZE = 5\n", 833 | "LENET5_LIKE_FILTER_DEPTH = 16\n", 834 | "LENET5_LIKE_NUM_HIDDEN = 120\n", 835 | "\n", 836 | "def variables_lenet5_like(filter_size = LENET5_LIKE_FILTER_SIZE, \n", 837 | " filter_depth = LENET5_LIKE_FILTER_DEPTH, \n", 838 | " num_hidden = LENET5_LIKE_NUM_HIDDEN,\n", 839 | " image_width = 32, image_height = 32, image_depth = 3, num_labels = 10):\n", 840 | " \n", 841 | " w1 = tf.Variable(tf.truncated_normal([filter_size, filter_size, image_depth, filter_depth], stddev=0.1))\n", 842 | " b1 = tf.Variable(tf.zeros([filter_depth]))\n", 843 | "\n", 844 | " w2 = tf.Variable(tf.truncated_normal([filter_size, filter_size, filter_depth, filter_depth], stddev=0.1))\n", 845 | " b2 = tf.Variable(tf.constant(1.0, shape=[filter_depth]))\n", 846 | " \n", 847 | " w3 = tf.Variable(tf.truncated_normal([(image_width // 4)*(image_height // 4)*filter_depth , num_hidden], stddev=0.1))\n", 848 | " b3 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))\n", 849 | "\n", 850 | " w4 = tf.Variable(tf.truncated_normal([num_hidden, num_hidden], stddev=0.1))\n", 851 | " b4 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))\n", 852 | " \n", 853 | " w5 = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))\n", 854 | " b5 = tf.Variable(tf.constant(1.0, shape = [num_labels]))\n", 855 | " variables = {\n", 856 | " 'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5,\n", 857 | " 'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5\n", 858 | " }\n", 859 | " return variables\n", 860 | "\n", 861 | "def model_lenet5_like(data, variables):\n", 862 | " layer1_conv = tf.nn.conv2d(data, variables['w1'], [1, 1, 1, 1], padding='SAME')\n", 863 | " layer1_actv = tf.nn.relu(layer1_conv + variables['b1'])\n", 864 | " layer1_pool = tf.nn.avg_pool(layer1_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')\n", 865 | "\n", 866 | " layer2_conv = tf.nn.conv2d(layer1_pool, variables['w2'], [1, 1, 1, 1], padding='SAME')\n", 867 | " layer2_actv = tf.nn.relu(layer2_conv + variables['b2'])\n", 868 | " layer2_pool = tf.nn.avg_pool(layer2_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')\n", 869 | " \n", 870 | " flat_layer = flatten_tf_array(layer2_pool)\n", 871 | " layer3_fccd = tf.matmul(flat_layer, variables['w3']) + variables['b3']\n", 872 | " layer3_actv = tf.nn.relu(layer3_fccd)\n", 873 | " layer3_drop = tf.nn.dropout(layer3_actv, 0.5)\n", 874 | " \n", 875 | " layer4_fccd = tf.matmul(layer3_actv, variables['w4']) + variables['b4']\n", 876 | " layer4_actv = tf.nn.relu(layer4_fccd)\n", 877 | " layer4_drop = tf.nn.dropout(layer4_actv, 0.5)\n", 878 | " \n", 879 | " logits = tf.matmul(layer4_actv, variables['w5']) + variables['b5']\n", 880 | " return logits\n", 881 | "\n", 882 | "\n", 883 | "#Variables used in the constructing and running the graph\n", 884 | "num_steps = 10001\n", 885 | "display_step = 1000\n", 886 | "learning_rate = 0.001\n", 887 | "batch_size = 16\n", 888 | "\n", 889 | "#定义数据的基本信息,传入变量\n", 890 | "image_width = 32\n", 891 | "image_height = 32\n", 892 | "image_depth = 3\n", 893 | "num_labels = 10\n", 894 | "\n", 895 | "\n", 896 | "test_dataset = test_dataset_cifar10\n", 897 | "test_labels = test_labels_cifar10\n", 898 | "train_dataset = train_dataset_cifar10\n", 899 | "train_labels = train_labels_cifar10\n", 900 | "\n", 901 | "\n", 902 | "\n", 903 | "\n", 904 | "graph = tf.Graph()\n", 905 | "with graph.as_default():\n", 906 | " #1 首先使用占位符定义数据变量的维度\n", 907 | " tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_width, image_height, image_depth))\n", 908 | " tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, num_labels))\n", 909 | " tf_test_dataset = tf.constant(test_dataset, tf.float32)\n", 910 | "\n", 911 | " #2 然后初始化权重矩阵和偏置向量\n", 912 | " variables = variables_lenet5_like(image_width = image_width, image_height=image_height, image_depth = image_depth, num_labels = num_labels)\n", 913 | "\n", 914 | "\n", 915 | " #3 使用模型计算分类\n", 916 | " logits = model_lenet5_like(tf_train_dataset, variables)\n", 917 | "\n", 918 | " #4 使用带softmax的交叉熵函数计算预测标签和真实标签之间的损失函数\n", 919 | " loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))\n", 920 | "\n", 921 | " #5 采用Adam优化算法优化上一步定义的损失函数,给定学习率\n", 922 | " optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)\n", 923 | "\n", 924 | " # 执行预测推断\n", 925 | " train_prediction = tf.nn.softmax(logits)\n", 926 | " test_prediction = tf.nn.softmax(model_lenet5_like(tf_test_dataset, variables))\n", 927 | "\n", 928 | "\n", 929 | "with tf.Session(graph=graph) as session:\n", 930 | " #初始化全部变量\n", 931 | " tf.global_variables_initializer().run()\n", 932 | " print('Initialized with learning_rate', learning_rate)\n", 933 | " for step in range(num_steps):\n", 934 | " offset = (step * batch_size) % (train_labels.shape[0] - batch_size)\n", 935 | " batch_data = train_dataset[offset:(offset + batch_size), :, :, :]\n", 936 | " batch_labels = train_labels[offset:(offset + batch_size), :]\n", 937 | " #在每一次批量中,获取当前的训练数据,并传入feed_dict以馈送到占位符中\n", 938 | " feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}\n", 939 | " _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)\n", 940 | " train_accuracy = accuracy(predictions, batch_labels)\n", 941 | " \n", 942 | " if step % display_step == 0:\n", 943 | " test_accuracy = accuracy(test_prediction.eval(), test_labels)\n", 944 | " message = \"step {:04d} : loss is {:06.2f}, accuracy on training set {:02.2f} %, accuracy on test set {:02.2f} %\".format(step, l, train_accuracy, test_accuracy)\n", 945 | " print(message)" 946 | ] 947 | }, 948 | { 949 | "cell_type": "code", 950 | "execution_count": null, 951 | "metadata": { 952 | "collapsed": true 953 | }, 954 | "outputs": [], 955 | "source": [] 956 | } 957 | ], 958 | "metadata": { 959 | "kernelspec": { 960 | "display_name": "Python 3", 961 | "language": "python", 962 | "name": "python3" 963 | }, 964 | "language_info": { 965 | "codemirror_mode": { 966 | "name": "ipython", 967 | "version": 3 968 | }, 969 | "file_extension": ".py", 970 | "mimetype": "text/x-python", 971 | "name": "python", 972 | "nbconvert_exporter": "python", 973 | "pygments_lexer": "ipython3", 974 | "version": "3.5.3" 975 | } 976 | }, 977 | "nbformat": 4, 978 | "nbformat_minor": 2 979 | } 980 | -------------------------------------------------------------------------------- /Experiments/tf_GAN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "from tensorflow.examples.tutorials.mnist import input_data\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import matplotlib.gridspec as gridspec\n", 16 | "import os" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": { 23 | "collapsed": true 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "#该函数将给出权重初始化的方法\n", 28 | "def variable_init(size):\n", 29 | " in_dim = size[0]\n", 30 | "\n", 31 | " #计算随机生成变量所服从的正态分布标准差\n", 32 | " w_stddev = 1. / tf.sqrt(in_dim / 2.)\n", 33 | " return tf.random_normal(shape=size, stddev=w_stddev)" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 3, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "#定义输入矩阵的占位符,输入层单元为784,None代表批量大小的占位,X代表输入的真实图片。占位符的数值类型为32位浮点型\n", 45 | "X = tf.placeholder(tf.float32, shape=[None, 784])\n", 46 | "\n", 47 | "#定义判别器的权重矩阵和偏置项向量,由此可知判别网络为三层全连接网络\n", 48 | "D_W1 = tf.Variable(variable_init([784, 128]))\n", 49 | "D_b1 = tf.Variable(tf.zeros(shape=[128]))\n", 50 | "\n", 51 | "D_W2 = tf.Variable(variable_init([128, 1]))\n", 52 | "D_b2 = tf.Variable(tf.zeros(shape=[1]))\n", 53 | "\n", 54 | "theta_D = [D_W1, D_W2, D_b1, D_b2]\n", 55 | "\n", 56 | "#定义生成器的输入噪声为100维度的向量组,None根据批量大小确定\n", 57 | "Z = tf.placeholder(tf.float32, shape=[None, 100])\n", 58 | "\n", 59 | "#定义生成器的权重与偏置项。输入层为100个神经元且接受随机噪声,\n", 60 | "#输出层为784个神经元,并输出手写字体图片。生成网络根据原论文为三层全连接网络\n", 61 | "G_W1 = tf.Variable(variable_init([100, 128]))\n", 62 | "G_b1 = tf.Variable(tf.zeros(shape=[128]))\n", 63 | "\n", 64 | "G_W2 = tf.Variable(variable_init([128, 784]))\n", 65 | "G_b2 = tf.Variable(tf.zeros(shape=[784]))\n", 66 | "\n", 67 | "theta_G = [G_W1, G_W2, G_b1, G_b2]" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": 4, 73 | "metadata": { 74 | "collapsed": true 75 | }, 76 | "outputs": [], 77 | "source": [ 78 | "#定义一个可以生成m*n阶随机矩阵的函数,该矩阵的元素服从均匀分布,随机生成的z就为生成器的输入\n", 79 | "def sample_Z(m, n):\n", 80 | " return np.random.uniform(-1., 1., size=[m, n])" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 5, 86 | "metadata": { 87 | "collapsed": true 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "#定义生成器\n", 92 | "def generator(z):\n", 93 | " \n", 94 | " #第一层先计算 y=z*G_W1+G-b1,然后投入激活函数计算G_h1=ReLU(y),G_h1 为第二次层神经网络的输出激活值\n", 95 | " G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)\n", 96 | " \n", 97 | " #以下两个语句计算第二层传播到第三层的激活结果,第三层的激活结果是含有784个元素的向量,该向量转化28×28就可以表示图像\n", 98 | " G_log_prob = tf.matmul(G_h1, G_W2) + G_b2\n", 99 | " G_prob = tf.nn.sigmoid(G_log_prob)\n", 100 | " return G_prob" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 6, 106 | "metadata": { 107 | "collapsed": true 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "#定义判别器\n", 112 | "def discriminator(x):\n", 113 | " \n", 114 | " #计算D_h1=ReLU(x*D_W1+D_b1),该层的输入为含784个元素的向量\n", 115 | " D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)\n", 116 | " \n", 117 | " #计算第三层的输出结果。因为使用的是Sigmoid函数,则该输出结果是一个取值为[0,1]间的标量(见上述权重定义)\n", 118 | " #即判别输入的图像到底是真(=1)还是假(=0)\n", 119 | " D_logit = tf.matmul(D_h1, D_W2) + D_b2\n", 120 | " D_prob = tf.nn.sigmoid(D_logit)\n", 121 | " \n", 122 | " #返回判别为真的概率和第三层的输入值,输出D_logit是为了将其输入tf.nn.sigmoid_cross_entropy_with_logits()以构建损失函数\n", 123 | " return D_prob, D_logit" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 7, 129 | "metadata": { 130 | "collapsed": true 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "#该函数用于输出生成图片\n", 135 | "def plot(samples):\n", 136 | " fig = plt.figure(figsize=(4, 4))\n", 137 | " gs = gridspec.GridSpec(4, 4)\n", 138 | " gs.update(wspace=0.05, hspace=0.05)\n", 139 | "\n", 140 | " for i, sample in enumerate(samples):\n", 141 | " ax = plt.subplot(gs[i])\n", 142 | " plt.axis('off')\n", 143 | " ax.set_xticklabels([])\n", 144 | " ax.set_yticklabels([])\n", 145 | " ax.set_aspect('equal')\n", 146 | " plt.imshow(sample.reshape(28, 28), cmap='Greys_r')\n", 147 | "\n", 148 | " return fig\n" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "#### 交叉熵损失函数\n", 156 | "sigmoid_cross_entropy_with_logits函数的输入是logits和targets,logits就是神经网络模型中的 W * X矩阵,且不需要经过Sigmoid激活函数。而targets的shape和logits相同,即正确的标注值。若令x = logits、 z = labels,那么该函数的表达式为z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 8, 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "#输入随机噪声z而输出生成样本\n", 168 | "G_sample = generator(Z)\n", 169 | "\n", 170 | "#分别输入真实图片和生成的图片,并投入判别器以判断真伪\n", 171 | "D_real, D_logit_real = discriminator(X)\n", 172 | "D_fake, D_logit_fake = discriminator(G_sample)\n", 173 | "\n", 174 | "#以下为原论文的判别器损失和生成器损失,但本实现并没有使用该损失函数\n", 175 | "# D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))\n", 176 | "# G_loss = -tf.reduce_mean(tf.log(D_fake))\n", 177 | "\n", 178 | "# 我们使用交叉熵作为判别器和生成器的损失函数,因为sigmoid_cross_entropy_with_logits内部会对预测输入执行Sigmoid函数,\n", 179 | "#所以我们取判别器最后一层未投入激活函数的值,即D_h1*D_W2+D_b2。\n", 180 | "#tf.ones_like(D_logit_real)创建维度和D_logit_real相等的全是1的标注,真实图片。\n", 181 | "D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))\n", 182 | "D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))\n", 183 | "\n", 184 | "#损失函数为两部分,即E[log(D(x))]+E[log(1-D(G(z)))],将真的判别为假和将假的判别为真\n", 185 | "D_loss = D_loss_real + D_loss_fake\n", 186 | "\n", 187 | "#同样使用交叉熵构建生成器损失函数\n", 188 | "G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))\n", 189 | "\n", 190 | "#定义判别器和生成器的优化方法为Adam算法,关键字var_list表明最小化损失函数所更新的权重矩阵\n", 191 | "D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)\n", 192 | "G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 9, 198 | "metadata": { 199 | "collapsed": false 200 | }, 201 | "outputs": [ 202 | { 203 | "name": "stdout", 204 | "output_type": "stream", 205 | "text": [ 206 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 207 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 208 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 209 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n" 210 | ] 211 | } 212 | ], 213 | "source": [ 214 | "#选择训练的批量大小和随机生成噪声的维度\n", 215 | "mb_size = 128\n", 216 | "Z_dim = 100\n", 217 | "\n", 218 | "#读取数据集MNIST,并放在当前目录data文件夹下MNIST文件夹中,如果该地址没有数据,则下载数据至该文件夹\n", 219 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 10, 225 | "metadata": { 226 | "collapsed": false 227 | }, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "Iter: 0\n", 234 | "D loss: 1.671\n", 235 | "G_loss: 1.718\n", 236 | "\n", 237 | "Iter: 2000\n", 238 | "D loss: 0.05008\n", 239 | "G_loss: 4.74\n", 240 | "\n", 241 | "Iter: 4000\n", 242 | "D loss: 0.3667\n", 243 | "G_loss: 4.85\n", 244 | "\n", 245 | "Iter: 6000\n", 246 | "D loss: 0.3974\n", 247 | "G_loss: 4.059\n", 248 | "\n", 249 | "Iter: 8000\n", 250 | "D loss: 0.7007\n", 251 | "G_loss: 2.628\n", 252 | "\n", 253 | "Iter: 10000\n", 254 | "D loss: 0.4421\n", 255 | "G_loss: 3.05\n", 256 | "\n", 257 | "Iter: 12000\n", 258 | "D loss: 0.7872\n", 259 | "G_loss: 2.562\n", 260 | "\n", 261 | "Iter: 14000\n", 262 | "D loss: 0.7155\n", 263 | "G_loss: 2.877\n", 264 | "\n", 265 | "Iter: 16000\n", 266 | "D loss: 0.9827\n", 267 | "G_loss: 2.042\n", 268 | "\n", 269 | "Iter: 18000\n", 270 | "D loss: 0.7171\n", 271 | "G_loss: 1.966\n", 272 | "\n" 273 | ] 274 | } 275 | ], 276 | "source": [ 277 | "#打开一个会话运行计算图\n", 278 | "sess = tf.Session()\n", 279 | "\n", 280 | "#初始化所有定义的变量\n", 281 | "sess.run(tf.global_variables_initializer())\n", 282 | "\n", 283 | "#如果当前目录下不存在out文件夹,则创建该文件夹\n", 284 | "if not os.path.exists('out/'):\n", 285 | " os.makedirs('out/')\n", 286 | "\n", 287 | "#初始化,并开始迭代训练,100W次\n", 288 | "i = 0\n", 289 | "for it in range(20000):\n", 290 | " \n", 291 | " #每2000次输出一张生成器生成的图片\n", 292 | " if it % 2000 == 0:\n", 293 | " samples = sess.run(G_sample, feed_dict={Z: sample_Z(16, Z_dim)})\n", 294 | "\n", 295 | " fig = plot(samples)\n", 296 | " plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')\n", 297 | " i += 1\n", 298 | " plt.close(fig)\n", 299 | " \n", 300 | " #next_batch抽取下一个批量的图片,该方法返回一个矩阵,即shape=[mb_size,784],每一行是一张图片,共批量大小行\n", 301 | " X_mb, _ = mnist.train.next_batch(mb_size)\n", 302 | " \n", 303 | " #投入数据并根据优化方法迭代一次,计算损失后返回损失值\n", 304 | " _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})\n", 305 | " _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})\n", 306 | "\n", 307 | "\n", 308 | " #每迭代2000次输出迭代数、生成器损失和判别器损失\n", 309 | " if it % 2000 == 0:\n", 310 | " print('Iter: {}'.format(it))\n", 311 | " print('D loss: {:.4}'. format(D_loss_curr))\n", 312 | " print('G_loss: {:.4}'.format(G_loss_curr))\n", 313 | " print()" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "metadata": { 320 | "collapsed": true 321 | }, 322 | "outputs": [], 323 | "source": [] 324 | } 325 | ], 326 | "metadata": { 327 | "kernelspec": { 328 | "display_name": "Python 3", 329 | "language": "python", 330 | "name": "python3" 331 | }, 332 | "language_info": { 333 | "codemirror_mode": { 334 | "name": "ipython", 335 | "version": 3 336 | }, 337 | "file_extension": ".py", 338 | "mimetype": "text/x-python", 339 | "name": "python", 340 | "nbconvert_exporter": "python", 341 | "pygments_lexer": "ipython3", 342 | "version": "3.5.3" 343 | } 344 | }, 345 | "nbformat": 4, 346 | "nbformat_minor": 2 347 | } 348 | -------------------------------------------------------------------------------- /Experiments/tf_LeNet5.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import tensorflow as tf\n", 12 | "\n", 13 | "# 配置卷积神经网络的架构参数\n", 14 | "INPUT_NODE = 784\n", 15 | "OUTPUT_NODE = 10\n", 16 | "\n", 17 | "IMAGE_SIZE = 28\n", 18 | "NUM_CHANNELS = 1\n", 19 | "NUM_LABELS = 10\n", 20 | "\n", 21 | "# 第一层卷积层的尺寸和深度\n", 22 | "CONV1_DEEP = 32\n", 23 | "CONV1_SIZE = 5\n", 24 | "# 第二层卷积层的尺寸和深度\n", 25 | "CONV2_DEEP = 64\n", 26 | "CONV2_SIZE = 5\n", 27 | "# 全连接层的结点个数\n", 28 | "FC_SIZE = 512\n", 29 | "\n", 30 | "\n", 31 | "# 定义卷积神经网络的前向传播过程。这里添加了一个新的参数train,用于区分训练过程和测试过程。在这个程序中将用到dropout方法,\n", 32 | "# dropout方法可进一步提升模型的可靠性并防止过拟合,dropout过程只在训练时使用\n", 33 | "def inference(input_tensor, train, regularizer):\n", 34 | " # 声明第一层卷积层的变量并实现前向传播过程。通过使用不同命名空间来隔离不同层的变量,让每一层中的变量命名只需要考虑在当前层的作用,\n", 35 | " # 不需担心重命名的问题。第一层输出为28×28×32的张量\n", 36 | " with tf.variable_scope('layer1-conv1'):\n", 37 | " conv1_weights = tf.get_variable('weight', [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],\n", 38 | " initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 39 | " conv1_biases = tf.get_variable('bias', [CONV1_DEEP], initializer=tf.constant_initializer(0.0))\n", 40 | "\n", 41 | " # 使用边长为5,深度为32的卷积核,卷积核的移动步幅为1,且使用0填充\n", 42 | " conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')\n", 43 | " relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))\n", 44 | "\n", 45 | " # 实现第二层池化层的前向传播过程。该最大池化层卷积核边长为2,使用0填充,移动步幅为2.\n", 46 | " # 该层的输入为28×28×32的张量,输出为14×14×32的张量\n", 47 | " with tf.name_scope('layer2-pool1'):\n", 48 | " pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1],strides=[1,2,2,1], padding='SAME')\n", 49 | "\n", 50 | " # 声明第三层卷积层的变量并实现前向传播过程,该卷积层的输入为14×14×32的张量,输出为14×14×64的矩阵\n", 51 | " with tf.variable_scope('layer3-conv2'):\n", 52 | " conv2_weights = tf.get_variable('weight', [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],\n", 53 | " initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 54 | " conv2_biases = tf.get_variable('bias', [CONV2_DEEP], initializer=tf.constant_initializer(0.0))\n", 55 | "\n", 56 | " # 使用尺寸为5×5,深度为64的卷积核,卷积核的移动步幅为1,且使用0填充\n", 57 | " conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')\n", 58 | " relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))\n", 59 | "\n", 60 | " # 实现第四层池化层的前向传播过程,输入为14×14×64,输出为7×7×64的张量\n", 61 | " with tf.name_scope('layer4-pool2'):\n", 62 | " pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')\n", 63 | "\n", 64 | " # 将第四层池化层的输出转化为第五层全连接层的输入格式。第四层为7×7×64的张量,第五层输入为向量,所以需要将该张量拉成一个向量\n", 65 | " # pool2.get_shape函数取第四层输出张量的维度,每层的输入输出都为一个BATCH的张量,所以这里得到的维度也包含一个BATCH中数据的数量。\n", 66 | " pool_shape = pool2.get_shape().as_list()\n", 67 | "\n", 68 | " # 计算将张量拉直成向量后的长度,该长度等于张量维度累乘。注意这里的pool_shape[0]为一个batch中数据的个数\n", 69 | " nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]\n", 70 | "\n", 71 | " # 通过tf.reshape函数将第四层的输出变成一个batch的向量\n", 72 | " reshaped = tf.reshape(pool2, [pool_shape[0], nodes])\n", 73 | "\n", 74 | " # 声明第五层全连接层的变量并实现前向传播过程。输入长度为3136的向量,输出长度为512的向量。该层引入了dropout的概念,\n", 75 | " # dropout在训练时随机将部分结点的输出改为0.dropout一般只在全连接层而不是卷积层或池化层使用。\n", 76 | " with tf.variable_scope('layer5-fcl'):\n", 77 | " fc1_weights = tf.get_variable('weight', [nodes, FC_SIZE],\n", 78 | " initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 79 | "\n", 80 | " # 只有全连接层权重需要加入正则化\n", 81 | " if regularizer != None:\n", 82 | " tf.add_to_collection('losses', regularizer(fc1_weights))\n", 83 | " fc1_biases = tf.get_variable('bias', [FC_SIZE], initializer=tf.constant_initializer(0.1))\n", 84 | "\n", 85 | " fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)\n", 86 | " if train: fc1 = tf.nn.dropout(fc1, 0.5)\n", 87 | "\n", 88 | " # 声明第六层全连接层变量并实现前向传播,输入长度为512的向量,输出长度为10的向量。输出通过softmax之后可得到最后的分类结果。\n", 89 | " with tf.variable_scope('layer6-fc2'):\n", 90 | " fc2_weights = tf.get_variable('weight', [FC_SIZE, NUM_LABELS],\n", 91 | " initializer=tf.truncated_normal_initializer(stddev=0.1))\n", 92 | " if regularizer != None:\n", 93 | " tf.add_to_collection('losses', regularizer(fc2_weights))\n", 94 | "\n", 95 | " fc2_biases = tf.get_variable('bias', [NUM_LABELS], initializer=tf.constant_initializer(0.1))\n", 96 | " logit = tf.matmul(fc1, fc2_weights) + fc2_biases\n", 97 | " return logit\n", 98 | "\n" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": { 105 | "collapsed": false 106 | }, 107 | "outputs": [ 108 | { 109 | "name": "stdout", 110 | "output_type": "stream", 111 | "text": [ 112 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 113 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 114 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 115 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n", 116 | "After 1 training steps, loss on training batch is 15.0016.\n" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "# -*- coding: utf-8 -*-\n", 122 | "import os\n", 123 | "from tensorflow.examples.tutorials.mnist import input_data\n", 124 | "import numpy as np\n", 125 | "\n", 126 | "# 配置神经网络的参数\n", 127 | "BATCH_SIZE = 8\n", 128 | "LEARNING_RATE_BASE = 0.8\n", 129 | "LEARNING_RATE_DECAY = 0.99\n", 130 | "REGULARIZATION_RATE = 0.0001\n", 131 | "TRAINING_STEPS = 10000\n", 132 | "MOVING_AVERAGE_DECAY = 0.99\n", 133 | "MODEL_SAVE_PATH = \"./model/fcn_mnist\"\n", 134 | "MODEL_NAME = \"fcn_mnist.ckpt\"\n", 135 | "\n", 136 | "\n", 137 | "def train(mnist):\n", 138 | " x = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], name='x-input')\n", 139 | " y = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-output')\n", 140 | "\n", 141 | " regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)\n", 142 | " # 调用推断过程\n", 143 | " y_hat = inference(x, True, regularizer)\n", 144 | " global_step = tf.Variable(0, trainable=False)\n", 145 | "\n", 146 | " # 定义损失函数、学习率、滑动平均操作及训练过程\n", 147 | " variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n", 148 | " variables_average_op = variable_averages.apply(tf.trainable_variables())\n", 149 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=tf.argmax(y, 1))\n", 150 | " cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", 151 | " loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))\n", 152 | " learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE,\n", 153 | " LEARNING_RATE_DECAY)\n", 154 | " train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n", 155 | "\n", 156 | " with tf.control_dependencies([train_step, variables_average_op]):\n", 157 | " train_op = tf.no_op(name='train')\n", 158 | "\n", 159 | " # 初始化TF持久化类\n", 160 | " saver = tf.train.Saver()\n", 161 | " with tf.Session() as sess:\n", 162 | " sess.run(tf.global_variables_initializer())\n", 163 | "\n", 164 | " # 在训练过程中不再测试模型在验证数据上的表现,验证和测试的过程会有独立的过程完成\n", 165 | " for i in range(TRAINING_STEPS):\n", 166 | " xs, ys = mnist.train.next_batch(BATCH_SIZE)\n", 167 | " reshaped_xs=np.reshape(xs,(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))\n", 168 | " _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y: ys})\n", 169 | "\n", 170 | " # 每1000次迭代保存一次模型\n", 171 | " if i % 1000 == 0:\n", 172 | " # 输出模型在当前训练批量下的损失函数大小\n", 173 | " print('After %d training steps, loss on training batch is %g.' % (step, loss_value))\n", 174 | "\n", 175 | " # 保存当前模型,并使用global_step 参数特定地命名\n", 176 | " saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)\n", 177 | "\n", 178 | "\n", 179 | "def main(argv=None):\n", 180 | " mnist = input_data.read_data_sets('./data/MNIST/', one_hot=True)\n", 181 | " train(mnist)\n", 182 | "\n", 183 | "\n", 184 | "if __name__ == '__main__':\n", 185 | " tf.app.run()\n" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": true 193 | }, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.5.3" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /Experiments/tf_orginal_CapsNet.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "#以下代码修改自naturomics的GitHub实现,包含三层CapsNet和后面的重构网络\n", 12 | "#改网络参数比较多,我们后面会只训练测试三层CapsNet。\n", 13 | "\n", 14 | "import tensorflow as tf\n", 15 | "import numpy as np\n", 16 | "import os\n", 17 | "from tqdm import tqdm\n", 18 | "\n", 19 | "epsilon = 1e-9\n", 20 | "batch_size = 8\n", 21 | "epoch = 1\n", 22 | "\n", 23 | "#margin loss 中调节上margin和下margind的权重\n", 24 | "lambda_val = 0.5\n", 25 | "#上margin与下margin的参数值\n", 26 | "m_plus = 0.9\n", 27 | "m_minus = 0.1\n", 28 | "\n", 29 | "# 路由更新c_ij所经过的迭代次数\n", 30 | "iter_routing = 3\n", 31 | "\n", 32 | "# Tensorboard 保存位置\n", 33 | "logdir ='logdir'\n", 34 | "# 数据集路径\n", 35 | "dataset_path = 'data/MNIST'\n", 36 | "is_training= True" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 2, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "# 定义加载mnist的函数\n", 48 | "def load_mnist(path, is_training):\n", 49 | "\n", 50 | " #trX将加载储存所有60000张灰度图\n", 51 | " fd = open(os.path.join(path, 'train-images.idx3-ubyte'))\n", 52 | " loaded = np.fromfile(file=fd, dtype=np.uint8)\n", 53 | " trX = loaded[16:].reshape((60000, 28, 28, 1)).astype(np.float)\n", 54 | "\n", 55 | " fd = open(os.path.join(path, 'train-labels.idx1-ubyte'))\n", 56 | " loaded = np.fromfile(file=fd, dtype=np.uint8)\n", 57 | " trY = loaded[8:].reshape((60000)).astype(np.float)\n", 58 | "\n", 59 | " #teX将储存所有一万张测试用的图片\n", 60 | " fd = open(os.path.join(path, 't10k-images.idx3-ubyte'))\n", 61 | " loaded = np.fromfile(file=fd, dtype=np.uint8)\n", 62 | " teX = loaded[16:].reshape((10000, 28, 28, 1)).astype(np.float)\n", 63 | "\n", 64 | " fd = open(os.path.join(path, 't10k-labels.idx1-ubyte'))\n", 65 | " loaded = np.fromfile(file=fd, dtype=np.uint8)\n", 66 | " teY = loaded[8:].reshape((10000)).astype(np.float)\n", 67 | "\n", 68 | " # 将所有训练图片表示为一个4维张量 [60000, 28, 28, 1],其中每个像素值缩放到0和1之间\n", 69 | " trX = tf.convert_to_tensor(trX / 255., tf.float32)\n", 70 | "\n", 71 | " # one hot编码为 [num_samples, 10]\n", 72 | " trY = tf.one_hot(trY, depth=10, axis=1, dtype=tf.float32)\n", 73 | " teY = tf.one_hot(teY, depth=10, axis=1, dtype=tf.float32)\n", 74 | "\n", 75 | " # 训练和测试时返回不同的数据\n", 76 | " if is_training:\n", 77 | " return trX, trY\n", 78 | " else:\n", 79 | " return teX / 255., teY\n", 80 | "\n", 81 | "def get_batch_data():\n", 82 | " trX, trY = load_mnist(dataset_path, True)\n", 83 | "\n", 84 | " # 每次产生一个切片\n", 85 | " data_queues = tf.train.slice_input_producer([trX, trY])\n", 86 | "\n", 87 | " # 对队列中的样本进行乱序处理\n", 88 | " X, Y = tf.train.shuffle_batch(data_queues,\n", 89 | " batch_size=batch_size,\n", 90 | " capacity=batch_size * 64,\n", 91 | " min_after_dequeue=batch_size * 32,\n", 92 | " allow_smaller_final_batch=False)\n", 93 | " return (X, Y)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 3, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "# 通过定义类和对象的方式定义Capssule层级\n", 105 | "class CapsLayer(object):\n", 106 | " ''' Capsule layer 类别参数有:\n", 107 | " Args:\n", 108 | " input: 一个4维张量\n", 109 | " num_outputs: 当前层的Capsule单元数量\n", 110 | " vec_len: 一个Capsule输出向量的长度\n", 111 | " layer_type: 选择'FC' 或 \"CONV\", 以确定是用全连接层还是卷积层\n", 112 | " with_routing: 当前Capsule是否从较低层级中Routing而得出输出向量\n", 113 | "\n", 114 | " Returns:\n", 115 | " 一个四维张量\n", 116 | " '''\n", 117 | " def __init__(self, num_outputs, vec_len, with_routing=True, layer_type='FC'):\n", 118 | " self.num_outputs = num_outputs\n", 119 | " self.vec_len = vec_len\n", 120 | " self.with_routing = with_routing\n", 121 | " self.layer_type = layer_type\n", 122 | "\n", 123 | " def __call__(self, input, kernel_size=None, stride=None):\n", 124 | " '''\n", 125 | " 当“Layer_type”选择的是“CONV”,我们将使用 'kernel_size' 和 'stride'\n", 126 | " '''\n", 127 | "\n", 128 | " # 开始构建卷积层\n", 129 | " if self.layer_type == 'CONV':\n", 130 | " self.kernel_size = kernel_size\n", 131 | " self.stride = stride\n", 132 | "\n", 133 | " # PrimaryCaps层没有Routing过程\n", 134 | " if not self.with_routing:\n", 135 | " # 卷积层为 PrimaryCaps 层(CapsNet第二层), 并将第一层卷积的输出张量作为输入。\n", 136 | " # 输入张量的维度为: [batch_size, 20, 20, 256]\n", 137 | " assert input.get_shape() == [batch_size, 20, 20, 256]\n", 138 | "\n", 139 | " # # 从CapsNet输出向量的每一个分量开始执行卷积,每个分量上执行带32个卷积核的9×9标准卷积\n", 140 | " # capsules = []\n", 141 | " # for i in range(self.vec_len):\n", 142 | " # # 所有Capsule的一个分量,其维度为: [batch_size, 6, 6, 32],即6×6×1×32\n", 143 | " # with tf.variable_scope('ConvUnit_' + str(i)):\n", 144 | " # caps_i = tf.contrib.layers.conv2d(input, self.num_outputs,\n", 145 | " # self.kernel_size, self.stride,\n", 146 | " # padding=\"VALID\")\n", 147 | " #\n", 148 | " # # 将一般卷积的结果张量拉平,并为添加到列表中\n", 149 | " # caps_i = tf.reshape(caps_i, shape=(batch_size, -1, 1, 1))\n", 150 | " # capsules.append(caps_i)\n", 151 | " #\n", 152 | " # # 为将卷积后张量各个分量合并为向量做准备\n", 153 | " # assert capsules[0].get_shape() == [batch_size, 1152, 1, 1]\n", 154 | " #\n", 155 | " # # 合并为PrimaryCaps的输出张量,即6×6×32个长度为8的向量,合并后的维度为 [batch_size, 1152, 8, 1]\n", 156 | " # capsules = tf.concat(capsules, axis=2)\n", 157 | " # # 将每个Capsule 向量投入非线性函数squash进行缩放与激活,第二层输出的向量要经过缩放\n", 158 | " # capsules = squash(capsules)\n", 159 | " # assert capsules.get_shape() == [batch_size, 1152, 8, 1]\n", 160 | " # return(capsules)\n", 161 | "\n", 162 | " # 以下更新后的计算方法\n", 163 | " capsules = tf.contrib.layers.conv2d(input, self.num_outputs * self.vec_len,\n", 164 | " self.kernel_size, self.stride, padding=\"VALID\")\n", 165 | " capsules = tf.reshape(capsules, (batch_size, -1, self.vec_len, 1))\n", 166 | "\n", 167 | " # [batch_size, 1152, 8, 1]\n", 168 | " capsules = squash(capsules)\n", 169 | " assert capsules.get_shape() == [batch_size, 1152, 8, 1]\n", 170 | " return (capsules)\n", 171 | "\n", 172 | " if self.layer_type == 'FC':\n", 173 | "\n", 174 | " # DigitCaps 带有Routing过程\n", 175 | " if self.with_routing:\n", 176 | " # CapsNet 的第三层 DigitCaps 层是一个全连接网络\n", 177 | " # 将输入张量重建为 [batch_size, 1152, 1, 8, 1]\n", 178 | " self.input = tf.reshape(input, shape=(batch_size, -1, 1, input.shape[-2].value, 1))\n", 179 | "\n", 180 | " with tf.variable_scope('routing'):\n", 181 | " # 初始化b_IJ的值为零,且维度满足: [1, 1, num_caps_l, num_caps_l_plus_1, 1]\n", 182 | " b_IJ = tf.constant(np.zeros([1, input.shape[1].value, self.num_outputs, 1, 1], dtype=np.float32))\n", 183 | " # 使用定义的Routing过程计算权值更新与s_j\n", 184 | " capsules = routing(self.input, b_IJ)\n", 185 | " # 将s_j投入 squeeze 函数以得出 DigitCaps 层的输出向量\n", 186 | " capsules = tf.squeeze(capsules, axis=1)\n", 187 | "\n", 188 | " return(capsules)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 4, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "# 定义路由算法的过程\n", 200 | "def routing(input, b_IJ):\n", 201 | " ''' 路由算法\n", 202 | "\n", 203 | " Args:\n", 204 | " input: 输入张量的维度为 [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1]\n", 205 | " 其中num_caps_l为上一层(PrimaryCaps)的Capsule单元数量\n", 206 | " Returns:\n", 207 | " 返回的张量维度为 [batch_size, num_caps_l_plus_1, length(v_j)=16, 1]\n", 208 | " 表征了i+1层的输出向量 `v_j`,num_caps_l_plus_1 为DigitCaps层的输出数\n", 209 | " Notes:\n", 210 | " u_i 表示l层中 capsule i 的输出向量\n", 211 | " v_j 表示l+1层中 capsule j 的输出向量\n", 212 | " '''\n", 213 | "\n", 214 | " # 定义W的张量维度为 [num_caps_j, num_caps_i, len_u_i, len_v_j]\n", 215 | " # W_ij共有1152×10个,每一个的维度为8×16\n", 216 | " W = tf.get_variable('Weight', shape=(1, 1152, 10, 8, 16), dtype=tf.float32,\n", 217 | " initializer=tf.random_normal_initializer(stddev=0.01))\n", 218 | "\n", 219 | " # 论文中的 Eq.2, 计算 u_hat\n", 220 | " # 在使用 W 和u_i计算u_hat前,先调整张量维度\n", 221 | " # input => [batch_size, 1152, 10, 8, 1]\n", 222 | " # W => [batch_size, 1152, 10, 8, 16]\n", 223 | " input = tf.tile(input, [1, 1, 10, 1, 1])\n", 224 | " W = tf.tile(W, [batch_size, 1, 1, 1, 1])\n", 225 | " assert input.get_shape() == [batch_size, 1152, 10, 8, 1]\n", 226 | "\n", 227 | " # 因为[8, 16].T x [8, 1] => [16, 1],所以矩阵乘法在最后得出的维度为 [batch_size, 1152, 10, 16, 1]\n", 228 | " u_hat = tf.matmul(W, input, transpose_a=True)\n", 229 | " assert u_hat.get_shape() == [batch_size, 1152, 10, 16, 1]\n", 230 | "\n", 231 | " # 前面是扩展的线性组合,后面是路由的部分,以下开始迭代路由过程更新耦合系数\n", 232 | " # 对应论文中伪代码的第三行\n", 233 | " for r_iter in range(iter_routing):\n", 234 | " with tf.variable_scope('iter_' + str(r_iter)):\n", 235 | " # 原论文伪代码第四行,计算softmax(b_ij)\n", 236 | " # => [1, 1152, 10, 1,1]\n", 237 | " c_IJ = tf.nn.softmax(b_IJ, dim=3)\n", 238 | " c_IJ = tf.tile(c_IJ, [batch_size, 1, 1, 1, 1])\n", 239 | " assert c_IJ.get_shape() == [batch_size, 1152, 10, 1, 1]\n", 240 | "\n", 241 | " # 原论文伪代码第五行,根据更新的c_ij计算s_j\n", 242 | " # 先利用 c_IJ 给 u_hat 加权,即在后两个维度采用对应元素的乘积\n", 243 | " # => [batch_size, 1152, 10, 16, 1]\n", 244 | " s_J = tf.multiply(c_IJ, u_hat)\n", 245 | " # 在第二个维度上求和, 产生的张量维度为 [batch_size, 1, 10, 16, 1]\n", 246 | " s_J = tf.reduce_sum(s_J, axis=1, keep_dims=True)\n", 247 | " assert s_J.get_shape() == [batch_size, 1, 10, 16, 1]\n", 248 | "\n", 249 | " # 原论文伪代码的第六行\n", 250 | " # 使用 Eq.1 计算squashing非线性函数\n", 251 | " v_J = squash(s_J)\n", 252 | " assert v_J.get_shape() == [batch_size, 1, 10, 16, 1]\n", 253 | "\n", 254 | " # 原论文伪代码的第七行\n", 255 | " # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 10, 1152, 16, 1]\n", 256 | " # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the\n", 257 | " # batch_size dim, resulting in [1, 1152, 10, 1, 1]\n", 258 | " v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1])\n", 259 | " u_produce_v = tf.matmul(u_hat, v_J_tiled, transpose_a=True)\n", 260 | " assert u_produce_v.get_shape() == [batch_size, 1152, 10, 1, 1]\n", 261 | " b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)\n", 262 | "\n", 263 | " return(v_J)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 5, 269 | "metadata": { 270 | "collapsed": true 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "def squash(vector):\n", 275 | " ''' 根据原论文中 Eq. 1 定义squashing函数\n", 276 | " Args:\n", 277 | " vector: 一个 5-D 张量,其维度是 [batch_size, 1, num_caps, vec_len, 1],\n", 278 | " Returns:\n", 279 | " 返回一个 5-D 张量,其第四和第五个维度经过了该非线性函数据算\n", 280 | " '''\n", 281 | " vec_squared_norm = tf.reduce_sum(tf.square(vector), -2, keep_dims=True)\n", 282 | " scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)\n", 283 | " vec_squashed = scalar_factor * vector # element-wise\n", 284 | " return(vec_squashed)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": { 291 | "collapsed": true 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "# 以下定义整个 CapsNet 的架构与正向传播过程\n", 296 | "class CapsNet():\n", 297 | " def __init__(self, is_training=True):\n", 298 | " self.graph = tf.Graph()\n", 299 | " with self.graph.as_default():\n", 300 | " if is_training:\n", 301 | " # 获取一个批量的训练数据\n", 302 | " self.X, self.Y = get_batch_data()\n", 303 | "\n", 304 | " self.build_arch()\n", 305 | " self.loss()\n", 306 | "\n", 307 | " # t_vars = tf.trainable_variables()\n", 308 | " self.optimizer = tf.train.AdamOptimizer()\n", 309 | " self.global_step = tf.Variable(0, name='global_step', trainable=False)\n", 310 | " self.train_op = self.optimizer.minimize(self.total_loss, global_step=self.global_step) # var_list=t_vars)\n", 311 | " else:\n", 312 | " self.X = tf.placeholder(tf.float32,\n", 313 | " shape=(batch_size, 28, 28, 1))\n", 314 | " self.build_arch()\n", 315 | "\n", 316 | " tf.logging.info('Seting up the main structure')\n", 317 | "\n", 318 | " # CapsNet 类中的build_arch方法能构建整个网络的架构\n", 319 | " def build_arch(self):\n", 320 | " # 以下构建第一个常规卷积层\n", 321 | " with tf.variable_scope('Conv1_layer'):\n", 322 | " # 第一个卷积层的输出张量为: [batch_size, 20, 20, 256]\n", 323 | " # 以下卷积输入图像X,采用256个9×9的卷积核,步幅为1,且不使用\n", 324 | " conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256,\n", 325 | " kernel_size=9, stride=1,\n", 326 | " padding='VALID')\n", 327 | " # 是用 assert 可以在出现错误条件时就返回错误,有助于调整\n", 328 | " assert conv1.get_shape() == [batch_size, 20, 20, 256]\n", 329 | "\n", 330 | " # 以下是原论文中PrimaryCaps层的构建过程,该层的输出维度为 [batch_size, 1152, 8, 1]\n", 331 | " with tf.variable_scope('PrimaryCaps_layer'):\n", 332 | " # 调用前面定义的CapLayer函数构建第二个卷积层,该过程相当于执行八次常规卷积,\n", 333 | " # 然后将各对应位置的元素组合成一个长度为8的向量,这八次常规卷积都是采用32个9×9的卷积核、步幅为2\n", 334 | " primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV')\n", 335 | " caps1 = primaryCaps(conv1, kernel_size=9, stride=2)\n", 336 | " assert caps1.get_shape() == [batch_size, 1152, 8, 1]\n", 337 | "\n", 338 | " # 以下构建 DigitCaps 层, 该层返回的张量维度为 [batch_size, 10, 16, 1]\n", 339 | " with tf.variable_scope('DigitCaps_layer'):\n", 340 | " # DigitCaps是最后一层,它返回对应10个类别的向量(每个有16个元素),该层的构建带有Routing过程\n", 341 | " digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC')\n", 342 | " self.caps2 = digitCaps(caps1)\n", 343 | "\n", 344 | " # 以下构建论文图2中的解码结构,即由16维向量重构出对应类别的整个图像\n", 345 | " # 1. Do masking, how:\n", 346 | " with tf.variable_scope('Masking'):\n", 347 | " # Method 1. masking with true label, default mode\n", 348 | "\n", 349 | " # mask_with_y是否用真实标签蒙住目标Capsule\n", 350 | " mask_with_y=True\n", 351 | " if mask_with_y:\n", 352 | " self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)\n", 353 | " self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon)\n", 354 | "\n", 355 | " # 通过3个全连接层重构MNIST图像,这三个全连接层的神经元数分别为512、1024、784\n", 356 | " # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]\n", 357 | " with tf.variable_scope('Decoder'):\n", 358 | " vector_j = tf.reshape(self.masked_v, shape=(batch_size, -1))\n", 359 | " fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)\n", 360 | " assert fc1.get_shape() == [batch_size, 512]\n", 361 | " fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)\n", 362 | " assert fc2.get_shape() == [batch_size, 1024]\n", 363 | " self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)\n", 364 | "\n", 365 | " # 定义 CapsNet 的损失函数,损失函数一共分为衡量 CapsNet准确度的Margin loss\n", 366 | " # 和衡量重构图像准确度的 Reconstruction loss\n", 367 | " def loss(self):\n", 368 | " # 以下先定义重构损失,因为DigitCaps的输出向量长度就为某类别的概率,因此可以借助计算向量长度计算损失\n", 369 | " # [batch_size, 10, 1, 1]\n", 370 | " # max_l = max(0, m_plus-||v_c||)^2\n", 371 | " max_l = tf.square(tf.maximum(0., m_plus - self.v_length))\n", 372 | " # max_r = max(0, ||v_c||-m_minus)^2\n", 373 | " max_r = tf.square(tf.maximum(0., self.v_length - m_minus))\n", 374 | " assert max_l.get_shape() == [batch_size, 10, 1, 1]\n", 375 | "\n", 376 | " # 将当前的维度[batch_size, 10, 1, 1] 转换为10个数字类别的one-hot编码 [batch_size, 10]\n", 377 | " max_l = tf.reshape(max_l, shape=(batch_size, -1))\n", 378 | " max_r = tf.reshape(max_r, shape=(batch_size, -1))\n", 379 | "\n", 380 | " # 计算 T_c: [batch_size, 10],其为分类的指示函数\n", 381 | " # 若令T_c = Y,那么对应元素相乘就是有类别相同才会有非零输出值,T_c 和 Y 都为One-hot编码\n", 382 | " T_c = self.Y\n", 383 | " # [batch_size, 10], 对应元素相乘并构建最后的Margin loss 函数\n", 384 | " L_c = T_c * max_l + lambda_val * (1 - T_c) * max_r\n", 385 | "\n", 386 | " self.margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1))\n", 387 | "\n", 388 | " # 以下构建reconstruction loss函数\n", 389 | " # 这一过程的损失函数通过计算FC Sigmoid层的输出像素点与原始图像像素点间的欧几里德距离而构建\n", 390 | " orgin = tf.reshape(self.X, shape=(batch_size, -1))\n", 391 | " squared = tf.square(self.decoded - orgin)\n", 392 | " self.reconstruction_err = tf.reduce_mean(squared)\n", 393 | "\n", 394 | " # 构建总损失函数,Hinton论文将reconstruction loss乘上0.0005\n", 395 | " # 以使它不会主导训练过程中的Margin loss\n", 396 | " self.total_loss = self.margin_loss + 0.0005 * self.reconstruction_err\n", 397 | "\n", 398 | " # 以下输出TensorBoard\n", 399 | " tf.summary.scalar('margin_loss', self.margin_loss)\n", 400 | " tf.summary.scalar('reconstruction_loss', self.reconstruction_err)\n", 401 | " tf.summary.scalar('total_loss', self.total_loss)\n", 402 | " recon_img = tf.reshape(self.decoded, shape=(batch_size, 28, 28, 1))\n", 403 | " tf.summary.image('reconstruction_img', recon_img)\n", 404 | " self.merged_sum = tf.summary.merge_all()" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": { 411 | "collapsed": false 412 | }, 413 | "outputs": [ 414 | { 415 | "name": "stdout", 416 | "output_type": "stream", 417 | "text": [ 418 | "INFO:tensorflow:Seting up the main structure\n", 419 | "INFO:tensorflow:Graph loaded\n" 420 | ] 421 | } 422 | ], 423 | "source": [ 424 | "if __name__ == \"__main__\":\n", 425 | " # 训练和推断\n", 426 | " capsNet = CapsNet(is_training=is_training)\n", 427 | " tf.logging.info('Graph loaded')\n", 428 | " sv = tf.train.Supervisor(graph=capsNet.graph,\n", 429 | " logdir=logdir,\n", 430 | " save_model_secs=0)\n", 431 | "\n", 432 | " with sv.managed_session() as sess:\n", 433 | " num_batch = int(60000 / batch_size)\n", 434 | " for epoch in range(epoch):\n", 435 | " if sv.should_stop():\n", 436 | " break\n", 437 | " for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'):\n", 438 | " sess.run(capsNet.train_op)\n", 439 | "\n", 440 | " global_step = sess.run(capsNet.global_step)\n", 441 | " sv.saver.save(sess, logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step))\n", 442 | "\n", 443 | " tf.logging.info('Training done')" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": null, 449 | "metadata": { 450 | "collapsed": true 451 | }, 452 | "outputs": [], 453 | "source": [] 454 | } 455 | ], 456 | "metadata": { 457 | "kernelspec": { 458 | "display_name": "Python 3", 459 | "language": "python", 460 | "name": "python3" 461 | }, 462 | "language_info": { 463 | "codemirror_mode": { 464 | "name": "ipython", 465 | "version": 3 466 | }, 467 | "file_extension": ".py", 468 | "mimetype": "text/x-python", 469 | "name": "python", 470 | "nbconvert_exporter": "python", 471 | "pygments_lexer": "ipython3", 472 | "version": "3.5.3" 473 | } 474 | }, 475 | "nbformat": 4, 476 | "nbformat_minor": 2 477 | } 478 | -------------------------------------------------------------------------------- /Experiments/tf_trial_1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "A=np.array([[11,12,13],[21,22,23],[31,32,33]])\n", 13 | "B=np.ones(shape=(3,3))" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "import tensorflow as tf\n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 8, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [ 34 | { 35 | "name": "stdout", 36 | "output_type": "stream", 37 | "text": [ 38 | "Tensor(\"add:0\", shape=(2,), dtype=int32)\n", 39 | "[3 6]\n" 40 | ] 41 | }, 42 | { 43 | "data": { 44 | "text/plain": [ 45 | ">" 46 | ] 47 | }, 48 | "execution_count": 8, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "a=tf.constant([1,2],name=\"a\")\n", 55 | "b=tf.constant([2,4],name=\"b\")\n", 56 | "result = a+b\n", 57 | "print(result)\n", 58 | "\n", 59 | "#上面只是定义了计算图,并没有运行计算图,所以不会输出运算结果\n", 60 | "\n", 61 | "sess=tf.Session()\n", 62 | "a=sess.run(result)\n", 63 | "print(a)\n", 64 | "sess.close\n", 65 | "\n", 66 | "#打开会话,运行计算图,关闭计算图" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 9, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "[ 1 4 9 16]\n", 81 | "[ 1 4 9 16]\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "with tf.Session() as sess:\n", 87 | " a=tf.constant([1,2,3,4])\n", 88 | " b=tf.constant([1,2,3,4])\n", 89 | " result=tf.multiply(a,b)\n", 90 | " c=sess.run(result)\n", 91 | " print(c)\n", 92 | " \n", 93 | "print(c)\n", 94 | "\n", 95 | "# with 结束,计算会话自动关闭" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 10, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "[[ 3.95757794]]\n", 110 | "[[ 3.95757794]]\n" 111 | ] 112 | } 113 | ], 114 | "source": [ 115 | "import tensorflow as tf\n", 116 | "#(2,3,1单元)3层前向神经网络(无激活函数)\n", 117 | "\n", 118 | "#生成服从标准差为1的正态分布的随机变量,作为初始化矩阵\n", 119 | "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n", 120 | "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n", 121 | "\n", 122 | "x=tf.constant([[0.7,0.9]])\n", 123 | "#矩阵乘法\n", 124 | "a=tf.matmul(x,w1)\n", 125 | "b=tf.matmul(a,w2)\n", 126 | "\n", 127 | "sess=tf.Session()\n", 128 | "#需要运行初始化赋值,前面只是定义,没运算\n", 129 | "sess.run(w1.initializer)\n", 130 | "sess.run(w2.initializer)\n", 131 | "y=sess.run(b)\n", 132 | "\n", 133 | "sess.close()\n", 134 | "print(y)\n", 135 | "\n", 136 | "with tf.Session() as sess:\n", 137 | " w3=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n", 138 | " w4=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n", 139 | " x1=tf.constant([[0.7,0.9]])\n", 140 | " a1=tf.matmul(x,w3)\n", 141 | " b1=tf.matmul(a1,w4)\n", 142 | " #可以直接嵌入初始化\n", 143 | " sess.run(tf.global_variables_initializer())\n", 144 | " print(sess.run(b1))\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 11, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "[[ 3.95757794]]\n", 159 | "[[ 3.95757794]\n", 160 | " [ 1.657197 ]\n", 161 | " [ 7.20209646]]\n" 162 | ] 163 | }, 164 | { 165 | "data": { 166 | "text/plain": [ 167 | ">" 168 | ] 169 | }, 170 | "execution_count": 11, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "import tensorflow as tf\n", 177 | "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n", 178 | "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n", 179 | "\n", 180 | "#因为需要重复输入x,而每建一个x就会生成一个结点,计算图的效率会低。所以使用占位符\n", 181 | "x=tf.placeholder(tf.float32,shape=(1,2))\n", 182 | "x1=tf.placeholder(tf.float32,shape=(3,2))\n", 183 | "a=tf.matmul(x,w1)\n", 184 | "a1=tf.matmul(x1,w1)\n", 185 | "y=tf.matmul(a,w2)\n", 186 | "y1=tf.matmul(a1,w2)\n", 187 | "\n", 188 | "sess=tf.Session()\n", 189 | "sess.run(tf.global_variables_initializer())\n", 190 | "#运行y时将占位符填上,feed_dict为字典,变量名不可变\n", 191 | "y_hat=sess.run(y,feed_dict={x:[[0.7,0.9]]})\n", 192 | "y_hat1=sess.run(y1,feed_dict={x1:[[0.7,0.9],[0.2,0.5],[1,2]]})# batch = 3\n", 193 | "print(y_hat)\n", 194 | "print(y_hat1)\n", 195 | "sess.close\n" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 2, 201 | "metadata": { 202 | "collapsed": false 203 | }, 204 | "outputs": [ 205 | { 206 | "name": "stdout", 207 | "output_type": "stream", 208 | "text": [ 209 | "[[-0.81131822 1.48459876 0.06532937]\n", 210 | " [-2.4427042 0.0992484 0.59122431]]\n", 211 | "[[-0.81131822]\n", 212 | " [ 1.48459876]\n", 213 | " [ 0.06532937]]\n", 214 | "在迭代 0 次后,训练损失为 0.047106\n", 215 | "在迭代 1000 次后,训练损失为 0.0114981\n", 216 | "在迭代 2000 次后,训练损失为 0.00481489\n", 217 | "在迭代 3000 次后,训练损失为 0.00285046\n", 218 | "在迭代 4000 次后,训练损失为 0.00207789\n", 219 | "在迭代 5000 次后,训练损失为 0.00158399\n", 220 | "在迭代 6000 次后,训练损失为 0.0011365\n", 221 | "在迭代 7000 次后,训练损失为 0.000963961\n", 222 | "在迭代 8000 次后,训练损失为 0.000769849\n", 223 | "在迭代 9000 次后,训练损失为 0.000558014\n", 224 | "在迭代 10000 次后,训练损失为 0.000354686\n" 225 | ] 226 | } 227 | ], 228 | "source": [ 229 | "import tensorflow as tf\n", 230 | "from numpy.random import RandomState\n", 231 | "\n", 232 | "batch_size=10\n", 233 | "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n", 234 | "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n", 235 | "\n", 236 | "# None 可以根据batch 大小确定维度,在shape的一个维度上使用None,方便不大的batch\n", 237 | "x=tf.placeholder(tf.float32,shape=(None,2))\n", 238 | "y=tf.placeholder(tf.float32,shape=(None,1))\n", 239 | "\n", 240 | "a=tf.matmul(x,w1)\n", 241 | "yhat=tf.matmul(a,w2)\n", 242 | "\n", 243 | "#定义交叉熵为损失函数,训练过程使用Adam算法最小化交叉熵\n", 244 | "cross_entropy=-tf.reduce_mean(y*tf.log(tf.clip_by_value(yhat,1e-10,1.0)))\n", 245 | "train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)\n", 246 | "\n", 247 | "rdm=RandomState(1)\n", 248 | "data_size=516\n", 249 | "\n", 250 | "#生成两个特征,共data_size个样本\n", 251 | "X=rdm.rand(data_size,2)\n", 252 | "#定义规则给出样本标签,所有x1+x2<1的样本认为是正样本,其他为负样本。Y,1为正样本\n", 253 | "Y = [[int(x1+x2 < 1)] for (x1, x2) in X]\n", 254 | "\n", 255 | "with tf.Session() as sess:\n", 256 | " sess.run(tf.global_variables_initializer())\n", 257 | " print(sess.run(w1))\n", 258 | " print(sess.run(w2))\n", 259 | " steps=11000\n", 260 | " for i in range(steps):\n", 261 | " \n", 262 | " #选定每一个批量读取的首尾位置,确保在1个epoch内采样训练\n", 263 | " start = i * batch_size % data_size\n", 264 | " end = min(start + batch_size,data_size)\n", 265 | " sess.run(train_step,feed_dict={x:X[start:end],y:Y[start:end]})\n", 266 | " if i % 1000 == 0:\n", 267 | " training_loss= sess.run(cross_entropy,feed_dict={x:X,y:Y})\n", 268 | " print(\"在迭代 %d 次后,训练损失为 %g\"%(i,training_loss))" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": { 274 | "collapsed": true 275 | }, 276 | "source": [ 277 | "#### 激活函数和偏置项:\n", 278 | "a=tf.nn.relu(tf.matmul(x,w1)+biases1)\n", 279 | "\n", 280 | "yhat=tf.nn.relu(tf.matmul(a,w2)+biases2)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "collapsed": true 287 | }, 288 | "source": [ 289 | "#### 交叉熵函数\n", 290 | "cross_entropy=-tf.reduce_mean(y*tf.log(tf.clip_by_value(yhat,1e-10,1.0)))\n", 291 | "\n", 292 | "tf.reduce_mean(x)表示计算全局平均值。tf.clip_by_value()函数可以将张量中的数值限制在一个范围内。tf.log()对张量内的所有元素依次求对数。交叉熵函数一般会与softmax回归一起使用,TensorFlow将它们进行了统一封装:cross_entropy=tf.nn.softmax_cross_entropy_with_logits(yhat,y)\n", 293 | "\n" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "#### 均方误差损失函数\n", 301 | "mse=tf.reduce_mean(tf.square(y-yhat))\n", 302 | "\n", 303 | "tf.select(tf.greater(y,yhat),y-yhat,yhat-y)\n", 304 | "\n", 305 | "tf.greater()的输入是两个张量,比较两个张量中的每一个元素,并返回比较结果(true或false的向量)。tf.select()有三个参数,第一个参数条件为真时选择第二个参数中的值,否则选择第三个参数的值。" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 10, 311 | "metadata": { 312 | "collapsed": true 313 | }, 314 | "outputs": [], 315 | "source": [ 316 | "#decayed_learning_rate=learning_rate*decay_rate^(global_steps/decay_steps),指数衰减函数的定义\n", 317 | "\n", 318 | "global_step=tf.Variable(0)\n", 319 | "#使用exponential_decay生成学习速率,因为staircase=tire,每100次迭代,学习率×0.96\n", 320 | "learning_rate=tf.train.exponential_decay(0.1,global_step,100,0.96,staircase=True)\n", 321 | "#在minimize中导入global_step将自动更新\n", 322 | "#learning_step=tf.train.GtadientDescentOptimizer(learning_rate).minimize(loss_function,global_step=global_step)\n" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": { 328 | "collapsed": true 329 | }, 330 | "source": [ 331 | "#### 带L2正则化的损失函数\n", 332 | "w=tf.Variable(tf.random_normal([2,1],stddev=1,seed=1))\n", 333 | "\n", 334 | "yhat=tf.matmul(x,w)\n", 335 | "\n", 336 | "loss=tf.reduce_mean(tf.square(y-yhat))+tf.contrib.layers.l2_regularizer(lambda)(w)" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 1, 342 | "metadata": { 343 | "collapsed": false 344 | }, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "6.5\n", 351 | "12.75\n" 352 | ] 353 | } 354 | ], 355 | "source": [ 356 | "import tensorflow as tf\n", 357 | "w=tf.constant([[1.0,-5.0],[-3.0,4.0]])\n", 358 | "with tf.Session() as sess:\n", 359 | " #L1正则化:(1+5+3+4)×0.5\n", 360 | " print(sess.run(tf.contrib.layers.l1_regularizer(0.5)(w)))\n", 361 | " #L2正则化:(1+25+9+16)/2×0.5,L2正则化会处以2,无偏估计?\n", 362 | " print(sess.run(tf.contrib.layers.l2_regularizer(0.5)(w)))" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "神经网络结构复杂后,定义网络结构的部分和计算损失函数的部分可能不在同一个函数中。所以采用collection在一个计算图中保留一组实体(如张量)。" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 2, 375 | "metadata": { 376 | "collapsed": true 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "import tensorflow as tf\n", 381 | "\n", 382 | "#通过集合(collection)计算一个5层神经网络带L2正则化的损失函数\n", 383 | "\n", 384 | "#随机正态初始化一层神经网络的权重,并将权重的L2正则化损失加入名为losses的集合中,返回初始化的权重\n", 385 | "def get_weight(shape,lambd):\n", 386 | " var=tf.Variable(tf.random_normal(shape),dtype=tf.float32)\n", 387 | " \n", 388 | " #tf.add_to_collection函数将新生成变量的L2正则化损失项加入集合,第一个参数为集合名,第二个参数为加入集合的内容。\n", 389 | " tf.add_to_collection('losses',tf.contrib.layers.l2_regularizer(lambd)(var))\n", 390 | " return var\n", 391 | "\n", 392 | "x=tf.placeholder(tf.float32,shape=(None,2))\n", 393 | "y=tf.placeholder(tf.float32,shape=(None,1))\n", 394 | "batch_size=8\n", 395 | "\n", 396 | "#定义每一层中结点个数和层数\n", 397 | "layer_dimension=[2,10,10,10,1]\n", 398 | "n_layers=len(layer_dimension)\n", 399 | "\n", 400 | "#该变量维护前向传播时最深层的结点,最开始为输入层\n", 401 | "cur_layer=x\n", 402 | "\n", 403 | "#输入层结点个数\n", 404 | "in_dimension=layer_dimension[0]\n", 405 | "\n", 406 | "#通过循环生成5层全连接神经网络\n", 407 | "for i in range(1,n_layers):\n", 408 | " \n", 409 | " #下一层节点数\n", 410 | " out_dimension=layer_dimension[i]\n", 411 | " \n", 412 | " #生成当前层中权重的变量,并将这个变量的L2正则化损失加入计算图上的集合\n", 413 | " #[in_dimension,out_dimension],例第一层到第二层之间的权重维度为2×10\n", 414 | " weight=get_weight([in_dimension,out_dimension],0.001)\n", 415 | " \n", 416 | " #偏置项和后一层维度相等,为什么是wx+0.1,而不是wx+b??\n", 417 | " bias=tf.Variable(tf.constant(0.1,shape=[out_dimension]))\n", 418 | " \n", 419 | " #使用ReLU激活函数,cur_layer储存传播一层后的激活情况,后一层激活函数的输出\n", 420 | " cur_layer=tf.nn.relu(tf.matmul(cur_layer,weight)+bias)\n", 421 | " in_dimension=layer_dimension[i]\n", 422 | " \n", 423 | "mse_loss=tf.reduce_mean(tf.square(y-cur_layer))\n", 424 | "\n", 425 | "#将均方误差函数加入损失集合\n", 426 | "tf.add_to_collection('losses',mse_loss)\n", 427 | "\n", 428 | "#get_collection返回一个列表,这个列表是集合中的所有元素,这些元素就是组成损失函数的误差和正则项,相加得最终损失函数\n", 429 | "loss=tf.add_n(tf.get_collection('losses'))\n", 430 | "\n" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 4, 436 | "metadata": { 437 | "collapsed": false 438 | }, 439 | "outputs": [ 440 | { 441 | "name": "stdout", 442 | "output_type": "stream", 443 | "text": [ 444 | "0.0\n", 445 | "10.0\n" 446 | ] 447 | } 448 | ], 449 | "source": [ 450 | "import tensorflow as tf\n", 451 | "\n", 452 | "#tf.assign(A, new_number),这个函数的功能主要是把A的值变为new_number\n", 453 | "A=tf.Variable(tf.constant(0.0),dtype=tf.float32)\n", 454 | "with tf.Session() as sess: \n", 455 | " sess.run(tf.global_variables_initializer()) \n", 456 | " print (sess.run(A)) \n", 457 | " sess.run(tf.assign(A, 10)) \n", 458 | " print (sess.run(A)) " 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": 2, 464 | "metadata": { 465 | "collapsed": false 466 | }, 467 | "outputs": [ 468 | { 469 | "name": "stdout", 470 | "output_type": "stream", 471 | "text": [ 472 | "[0.0, 0.0]\n", 473 | "[5.0, 4.5]\n", 474 | "[10.0, 4.5549998]\n", 475 | "[10.0, 4.6094499]\n" 476 | ] 477 | } 478 | ], 479 | "source": [ 480 | "import tensorflow as tf\n", 481 | "\n", 482 | "#滑动平均模型\n", 483 | "\n", 484 | "#定义一个变量计算滑动平均,初始值为0,所有需要计算滑动平均的变量必须是实数型\n", 485 | "v1=tf.Variable(0,dtype=tf.float32)\n", 486 | "\n", 487 | "#step变量模拟神经网络中的迭代次数,用于动态控制衰减率\n", 488 | "step=tf.Variable(0,trainable=False)\n", 489 | "\n", 490 | "#定义一个滑动平均的类,初始化时给定了衰减率和控制衰减率的变量step\n", 491 | "ema=tf.train.ExponentialMovingAverage(0.99,step)\n", 492 | "\n", 493 | "#定义一个更新变量的滑动平均操作,给定一个列表,每次执行操作时更新列表所有变量\n", 494 | "maintain_averages_op=ema.apply([v1])\n", 495 | "\n", 496 | "with tf.Session() as sess:\n", 497 | " \n", 498 | " #初始化所有变量\n", 499 | " sess.run(tf.global_variables_initializer())\n", 500 | " \n", 501 | " #通过ema.average(v1)获取滑动平均之后变量的取值。初始化后,v1的值和滑动平均都为0\n", 502 | " print(sess.run([v1,ema.average(v1)]))\n", 503 | " \n", 504 | " #更新变量v1的值为5,tf.assign将数值分配给变量\n", 505 | " sess.run(tf.assign(v1,5))\n", 506 | " \n", 507 | " #更新v1的滑动平均值。衰减率为min{0.99,(1+step)/(10+step)≈0.1}=0.1,所以v1的滑动平均值会更新为 0.1×0+0.9×5=4.5\n", 508 | " sess.run(maintain_averages_op)\n", 509 | " print(sess.run([v1,ema.average(v1)]))\n", 510 | " \n", 511 | " #将迭代设置为10000步\n", 512 | " sess.run(tf.assign(step,10000))\n", 513 | " \n", 514 | " #更新v1的值为10\n", 515 | " sess.run(tf.assign(v1,10))\n", 516 | " \n", 517 | " #更新v1的滑动平均值。衰减率为min{0.99,(1+step)/(10+step)≈0.999}=0.99,所以v1的滑动平均会被更新为0.99×4.5+0.01×10=4.555\n", 518 | " sess.run(maintain_averages_op)\n", 519 | " print(sess.run([v1,ema.average(v1)]))\n", 520 | " \n", 521 | " #再次更新滑动平均值,得到新的滑动平均值为0.99×4.555+0.01×10=4.60945\n", 522 | " sess.run(maintain_averages_op)\n", 523 | " print(sess.run([v1,ema.average(v1)]))\n", 524 | "\n" 525 | ] 526 | }, 527 | { 528 | "cell_type": "markdown", 529 | "metadata": { 530 | "collapsed": true 531 | }, 532 | "source": [ 533 | "### MNIST 手写字体识别" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 3, 539 | "metadata": { 540 | "collapsed": false 541 | }, 542 | "outputs": [ 543 | { 544 | "name": "stdout", 545 | "output_type": "stream", 546 | "text": [ 547 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 548 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 549 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 550 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n", 551 | "Training data size: 55000\n", 552 | "Validating data size: 5000\n", 553 | "Testing data size: 10000\n" 554 | ] 555 | } 556 | ], 557 | "source": [ 558 | "#导入数据\n", 559 | "import tensorflow as tf\n", 560 | "\n", 561 | "#原网站提供了6W张训练图片和1W张测试图片,导入的该工具会从训练图片分出5000张作为验证集\n", 562 | "from tensorflow.examples.tutorials.mnist import input_data\n", 563 | "\n", 564 | "#读取路径为当前路径下的data文件夹下的MNIST文件夹内,如果该文件夹没有,则自动下载数据至该文件夹\n", 565 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 566 | "\n", 567 | "print(\"Training data size: \", mnist.train.num_examples) \n", 568 | "print (\"Validating data size: \", mnist.validation.num_examples) \n", 569 | "print (\"Testing data size: \", mnist.test.num_examples) " 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 4, 575 | "metadata": { 576 | "collapsed": false 577 | }, 578 | "outputs": [ 579 | { 580 | "name": "stdout", 581 | "output_type": "stream", 582 | "text": [ 583 | "X shapr: (100, 784)\n", 584 | "Y shape: (100, 10)\n" 585 | ] 586 | } 587 | ], 588 | "source": [ 589 | "#为了方便使用SGD,mnist.train.next_batch函数可以从所有训练数据中取一个小批量投入训练\n", 590 | "\n", 591 | "batch_size=100\n", 592 | "\n", 593 | "#从训练集选取batch_size个训练数据\n", 594 | "xs,ys=mnist.train.next_batch(batch_size)\n", 595 | "\n", 596 | "#将图片展开成一个长度为28×28=784的一维数组,一张图片可作为一个特征向量。所以batch为100的矩阵维度为100×784\n", 597 | "print('X shapr:',xs.shape)\n", 598 | "print('Y shape:',ys.shape)" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": 21, 604 | "metadata": { 605 | "collapsed": false 606 | }, 607 | "outputs": [ 608 | { 609 | "name": "stdout", 610 | "output_type": "stream", 611 | "text": [ 612 | "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n", 613 | "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n", 614 | "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n", 615 | "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n" 616 | ] 617 | } 618 | ], 619 | "source": [ 620 | "import tensorflow as tf\n", 621 | "from tensorflow.examples.tutorials.mnist import input_data\n", 622 | "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n", 623 | "\n", 624 | "\n", 625 | "#输入结点数为像素点数,输出结点数为类别数\n", 626 | "INPUT_NODE=784\n", 627 | "OUTPUT_NODE=10\n", 628 | "\n", 629 | "#一个隐藏层\n", 630 | "LAYER1_NODE=500\n", 631 | "\n", 632 | "#一个批量中的样本量,数据量越小训练过程越接近SGD,数据量越大训练过程越接近梯度下降\n", 633 | "BATCH_SIZE=100\n", 634 | "\n", 635 | "#学习率和学习衰减率\n", 636 | "LEARNING_RATE_BASE=0.8\n", 637 | "LEARNING_RATE_DECAY=0.99\n", 638 | "\n", 639 | "#正则化系数、迭代次数和滑动平均衰减率\n", 640 | "REGULARIZATION_RATE=0.0001\n", 641 | "TRAINING_STEPS=3000\n", 642 | "MOVING_AVERAGE_DECAY=0.99\n", 643 | "\n", 644 | "#定义推断函数,给定所有参数下计算神经网络的前向传播结果。参数avg_class可确定推断中使不使用滑动平均模型\n", 645 | "def inference(input_tensor,avg_class,weights1,biases1,weights2,biases2):\n", 646 | " \n", 647 | " #没有提供滑动平均类时,直接使用参数当前的取值\n", 648 | " if avg_class == None:\n", 649 | " \n", 650 | " #计算隐藏层前向传播结果,使用ReLU激活函数\n", 651 | " layer1=tf.nn.relu(tf.matmul(input_tensor,weights1)+biases1)\n", 652 | " \n", 653 | " #计算输出层的前向传播结果\n", 654 | " return tf.matmul(layer1,weights2)+biases2\n", 655 | " else:\n", 656 | " \n", 657 | " #首先使用avg_class.averaage函数计算变量的滑动均值,然后计算相应的前向传播结果\n", 658 | " layer1=tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weights1))+avg_class.average(biases1))\n", 659 | " return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)\n", 660 | " \n", 661 | "#模型训练函数\n", 662 | "\n", 663 | "def train(mnist):\n", 664 | " x=tf.placeholder(tf.float32,[None,INPUT_NODE],name='x-input')\n", 665 | " y=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y-input')\n", 666 | " \n", 667 | " #生成隐藏层参数\n", 668 | " weights1=tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))\n", 669 | " biases1=tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))\n", 670 | " \n", 671 | " #生成输出层参数\n", 672 | " weights2=tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))\n", 673 | " biases2=tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))\n", 674 | " \n", 675 | " #计算当前参数下前向传播的结果,这里设为‘None’不会计算滑动平均值\n", 676 | " y_hat=inference(x,None,weights1,biases1,weights2,biases2)\n", 677 | "\n", 678 | " #定义储存迭代数的变量,这个变量不需要计算滑动平均值,所以这里指定的这个变量为不饿训练变量(trainable=False)\n", 679 | " global_step=tf.Variable(0,trainable=False)\n", 680 | " \n", 681 | " #给定滑动平均衰减率和迭代数,初始化滑动平均类。\n", 682 | " variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)\n", 683 | " \n", 684 | " #在所有代表神经网络参数的变量上使用滑动平均,其他超参数不需要。tf.trainable_variables返回的就是图上的集合GraphKeys.TRAINABLE_VARIABLES中的元素。\n", 685 | " variables_averages_op=variable_averages.apply(tf.trainable_variables())\n", 686 | " \n", 687 | " #计算使用滑动平均后的前向传播结果,滑动平均不会改变变量本身,而是使用影子变量记录滑动平均值,需要使用滑动平均再明确调用average函数\n", 688 | " average_y_hat=inference(x,variable_averages,weights1,biases1,weights2,biases2)\n", 689 | " \n", 690 | " #~使用tf.argmax函数得到正确答案对应的类别编号\n", 691 | " cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y, 1))\n", 692 | " \n", 693 | " #计算当前批量中所有样本的交叉熵均值\n", 694 | " cross_entropy_mean=tf.reduce_mean(cross_entropy)\n", 695 | " \n", 696 | " #计算L2正则化损失函数\n", 697 | " regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)\n", 698 | " \n", 699 | " #计算模型的正则化损失,只计算神经网络权重的正则化损失,不使用偏置项\n", 700 | " regularization=regularizer(weights1)+regularizer(weights2)\n", 701 | " \n", 702 | " #总损失函数\n", 703 | " loss=cross_entropy_mean+regularization\n", 704 | " \n", 705 | " #设置指数衰减学习率.基础学习率、当前迭代次数、一个epoch所需要的迭代次数、学习衰减率\n", 706 | " learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY)\n", 707 | " \n", 708 | " #使用梯度下降优化算法优化损失函数,损失函数包括交叉熵损失和L2正则化损失\n", 709 | " train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)\n", 710 | " \n", 711 | " #在训练神经网络模型时,每过一遍数据既需要通过反向传播来更新参数,也要更新每个参数的滑动平均值。为了一次完成多个操作\n", 712 | " #train_op=tf.group(train_step,variables_averages_op)\n", 713 | " with tf.control_dependencies([train_step,variables_averages_op]):\n", 714 | " train_op=tf.no_op(name='train')\n", 715 | " \n", 716 | " correct_prediction=tf.equal(tf.argmax(average_y_hat,1),tf.argmax(y,1))\n", 717 | " accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))\n", 718 | "\n", 719 | " #初始化会话并开始训练过程\n", 720 | " with tf.Session() as sess:\n", 721 | " sess.run(tf.global_variables_initializer())\n", 722 | " validate_feed={x:mnist.validation.images,y:mnist.validation.labels}\n", 723 | " \n", 724 | " test_feed={x:mnist.test.images,y:mnist.test.labels}\n", 725 | " \n", 726 | " # 循环的训练神经网络。\n", 727 | " for i in range(TRAINING_STEPS):\n", 728 | " if i % 1000 == 0:\n", 729 | " validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n", 730 | " print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n", 731 | " \n", 732 | " xs,ys=mnist.train.next_batch(BATCH_SIZE)\n", 733 | " sess.run(train_op,feed_dict={x:xs,y:ys})\n", 734 | "\n", 735 | " test_acc=sess.run(accuracy,feed_dict=test_feed)\n", 736 | " print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n", 737 | "\n", 738 | " \n", 739 | " " 740 | ] 741 | }, 742 | { 743 | "cell_type": "code", 744 | "execution_count": null, 745 | "metadata": { 746 | "collapsed": true 747 | }, 748 | "outputs": [], 749 | "source": [ 750 | "avg_class = None\n", 751 | "train(mnist)" 752 | ] 753 | } 754 | ], 755 | "metadata": { 756 | "kernelspec": { 757 | "display_name": "Python 3", 758 | "language": "python", 759 | "name": "python3" 760 | }, 761 | "language_info": { 762 | "codemirror_mode": { 763 | "name": "ipython", 764 | "version": 3 765 | }, 766 | "file_extension": ".py", 767 | "mimetype": "text/x-python", 768 | "name": "python", 769 | "nbconvert_exporter": "python", 770 | "pygments_lexer": "ipython3", 771 | "version": "3.5.3" 772 | } 773 | }, 774 | "nbformat": 4, 775 | "nbformat_minor": 2 776 | } 777 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ML-Tutorial-Experiment 2 | Coding the Machine Learning Tutorial for Learning to Learn 3 | 4 | - 第一期:[从零开始用TensorFlow搭建卷积神经网络](https://www.jiqizhixin.com/articles/2017-08-29-14)--&--[文章代码](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_CNN_Tutorial.ipynb) 5 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_CNN_Tutorial.ipynb) 6 | - 补充资料:[基础代码解析](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_trial_1.ipynb) 7 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_trial_1.ipynb) 8 | - 补充资料:[Keras构建CNN](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_Keras_CNN.ipynb) 9 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_Keras_CNN.ipynb) 10 | - 补充资料:[TensorFlow构建LeNet-5](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_LeNet5.ipynb) 11 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_LeNet5.ipynb) 12 | - 补充资料:[从DensNet到CliqueNet,探索卷积神经网络架构](https://www.jiqizhixin.com/articles/2018-05-23-6) 13 | - 第二期:[GAN完整理论推导与实现](https://www.jiqizhixin.com/articles/2017-10-1-1)--&--[文章代码](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/Keras_GAN.ipynb) 14 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2FKeras_GAN.ipynb) 15 | - 补充资料:[原版GAN的TensorFlow实现](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_GAN.ipynb) 16 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_GAN.ipynb) 17 | - 第三期:[CapsNet结构解析与实现](https://www.jiqizhixin.com/articles/2017-11-05)--&--[文章代码](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_orginal_CapsNet.ipynb) 18 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_orginal_CapsNet.ipynb) 19 | - 补充资料:[解读官方实现的核心代码](https://www.jiqizhixin.com/articles/capsule-implement-sara-sabour-Feb02) 20 | - 第四期:[RNN与CNN的序列建模](https://www.jiqizhixin.com/articles/2018-04-12-3)--&--[LSTM语言建模](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/LSTM_PTB.ipynb)--&--[TCN官方实现](https://github.com/locuslab/TCN)--&--[TCN语言建模(Colaboratory)](https://colab.research.google.com/drive/1GAXC0j9qzLyQu8G9_P_eHi-TtYm7uhXF) 21 | [](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2FLSTM_PTB.ipynb) 22 | - 第五期:[基于Transformer的神经机器翻译](https://www.jiqizhixin.com/articles/Synced-github-implement-project-machine-translation-by-transformer)--&--[Colaboratory实现](https://colab.research.google.com/drive/1Wt9Jwynnki6lipwUcy0Sz5WKG7MYSGs0) 23 | 24 | # 25 | ------ 26 | 为了扩展优秀模型与实现,机器之心将梳理历史优质文章,同时也欢迎各位开发者与研究者提供优质的文章。我们将尝试确定添加的文章都是可复现,且基本无理解性错误的文章,并按以下模型归类。若读者发现这些文章有错误或理解误差,可以在 GitHub 上提 issue,确定后我们将修改文章。 27 | 28 | * 数学与编程基础 29 | * 线性代数 30 | * [教程 | 基础入门:深度学习矩阵运算的概念和代码实现](https://www.jiqizhixin.com/articles/2017-08-07-2) 31 | * 概率与信息论 32 | * [从概率论到多分类问题:综述贝叶斯统计分类](https://www.jiqizhixin.com/articles/2017-09-28) 33 | * 数值计算 34 | * Python基础 35 | * [从变量到封装:一文带你为机器学习打下坚实的Python基础](https://www.jiqizhixin.com/articles/2017-10-13) 36 | * [一文带你了解 Python 集合与基本的集合运算](https://www.jiqizhixin.com/articles/062403) 37 | * NumPy基础 38 | * [搭建模型第一步:你需要预习的 NumPy 基础都在这了](https://www.jiqizhixin.com/articles/070101) 39 | * [从数组到矩阵的迹,NumPy常见使用大总结](https://www.jiqizhixin.com/articles/2017-10-28) 40 | * [数据科学初学者必知的NumPy基础知识](https://www.jiqizhixin.com/articles/2018-04-21-7) 41 | * 一般机器学习 42 | * 入门模型 43 | * 线性回归 44 | * [初学TensorFlow机器学习:如何实现线性回归?](https://www.jiqizhixin.com/articles/2017-05-14-2) 45 | * [Python环境下的8种简单线性回归算法](https://www.jiqizhixin.com/articles/2018-01-01) 46 | * [极简Python带你探索分类与回归的奥秘](https://www.jiqizhixin.com/articles/03132) 47 | * Logistic 回归 48 | * [从原理到应用:简述Logistics回归算法](https://www.jiqizhixin.com/articles/2018-05-13-3) 49 | * [从头开始:用Python实现带随机梯度下降的Logistic回归](https://www.jiqizhixin.com/articles/2017-02-17-5) 50 | * 朴素贝叶斯 51 | * [实践中最广泛应用的分类模型:朴素贝叶斯算法](https://www.jiqizhixin.com/articles/033088) 52 | * 决策树 53 | * 支持向量机 54 | * 聚类方法 55 | * K均值聚类 56 | * 层次聚类 57 | * 降维算法 58 | * PCA 59 | * 自编码器 60 | * t-SNE 61 | * 集成方法 62 | * Staking 63 | * Bagging 64 | * 随机森林 65 | * Boosting 66 | * AdaBoost 67 | * 提升树 68 | * 梯度提升树 69 | * 概率图模型 70 | * 隐马尔科夫模型 71 | * 隐马尔可夫随机场 72 | * 条件随机场 73 | * 半监督学习 74 | * Entropy-based 75 | * Graph-based 76 | * 深度学习 77 | * 最优化方法 78 | * 深度前馈网络 79 | * 深度卷积网络 80 | * 深度循环网络 81 | * 深度生成模型 82 | * PixelRNN/PixelCNN 83 | * VAE 84 | * GAN 85 | 86 | --------------------------------------------------------------------------------