├── Experiments
    ├── 1024.ipynb
    ├── Keras_GAN.ipynb
    ├── LSTM_PTB.ipynb
    ├── Res
    │   ├── 1.md
    │   ├── Synced.jpg
    │   ├── gan_tf_keras1.png
    │   ├── gan_tf_keras2.png
    │   ├── gan_tf_keras3.png
    │   ├── gan_tf_keras4.png
    │   ├── gan_tf_keras5.png
    │   ├── gan_tf_keras6.png
    │   └── gan_tf_keras7.png
    ├── Synced.py
    ├── Transformer_synced.ipynb
    ├── pytorch_TCN.ipynb
    ├── swish_test.ipynb
    ├── tf_CNN_Tutorial.ipynb
    ├── tf_GAN.ipynb
    ├── tf_Keras_CNN.ipynb
    ├── tf_LeNet5.ipynb
    ├── tf_orginal_CapsNet.ipynb
    └── tf_trial_1.ipynb
└── README.md


/Experiments/1024.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
 13 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
 14 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
 15 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n",
 16 |       "After 0 training step(s), validation accuracy using average model is 0.1408 \n",
 17 |       "After 1000 training step(s), validation accuracy using average model is 0.9406 \n",
 18 |       "After 2000 training step(s), validation accuracy using average model is 0.9556 \n",
 19 |       "After 3000 training step(s), validation accuracy using average model is 0.9616 \n",
 20 |       "After 4000 training step(s), validation accuracy using average model is 0.9682 \n",
 21 |       "After 5000 training step(s), validation accuracy using average model is 0.9694 \n",
 22 |       "After 6000 training step(s), validation accuracy using average model is 0.97 \n",
 23 |       "After 7000 training step(s), validation accuracy using average model is 0.9712 \n",
 24 |       "After 8000 training step(s), validation accuracy using average model is 0.9708 \n",
 25 |       "After 9000 training step(s), validation accuracy using average model is 0.9716 \n",
 26 |       "After 10000 training step(s), test accuracy using average model is 0.9697\n"
 27 |      ]
 28 |     }
 29 |    ],
 30 |    "source": [
 31 |     "import tensorflow as tf\n",
 32 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 33 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
 34 |     "\n",
 35 |     "\n",
 36 |     "INPUT_NODE = 784     \n",
 37 |     "OUTPUT_NODE = 10     \n",
 38 |     "LAYER1_NODE = 1024 \n",
 39 |     "LAYER2_NODE = 512 \n",
 40 |     "LAYER3_NODE = 256 \n",
 41 |     "LAYER4_NODE = 128\n",
 42 |     "LAYER5_NODE = 64 \n",
 43 |     "LAYER6_NODE = 64\n",
 44 |     "LAYER7_NODE = 128 \n",
 45 |     "LAYER8_NODE = 256 \n",
 46 |     "LAYER9_NODE = 512\n",
 47 |     "LAYER10_NODE = 1024 \n",
 48 |     "                              \n",
 49 |     "BATCH_SIZE = 50        \n",
 50 |     "\n",
 51 |     "# 模型相关的参数\n",
 52 |     "LEARNING_RATE_BASE = 0.008      \n",
 53 |     "LEARNING_RATE_DECAY = 0.99    \n",
 54 |     "REGULARAZTION_RATE = 0.0001   \n",
 55 |     "TRAINING_STEPS = 10000        \n",
 56 |     "MOVING_AVERAGE_DECAY = 0.99 \n",
 57 |     "\n",
 58 |     "def inference(input_tensor, avg_class, W, B):\n",
 59 |     "    # 不使用滑动平均类\n",
 60 |     "    if avg_class == None:\n",
 61 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, W[0]) + B[0])\n",
 62 |     "        layer2 = tf.nn.relu(tf.matmul(layer1, W[1]) + B[1])\n",
 63 |     "        layer3 = tf.nn.relu(tf.matmul(layer2, W[2]) + B[2])\n",
 64 |     "        layer4 = tf.nn.relu(tf.matmul(layer3, W[3]) + B[3])\n",
 65 |     "        layer5 = tf.nn.relu(tf.matmul(layer4, W[4]) + B[4])\n",
 66 |     "        layer6 = tf.nn.relu(tf.matmul(layer5, W[5]) + B[5])\n",
 67 |     "        layer7 = tf.nn.relu(tf.matmul(layer6, W[6]) + B[6])\n",
 68 |     "        layer8 = tf.nn.relu(tf.matmul(layer7, W[7]) + B[7])\n",
 69 |     "        layer9 = tf.nn.relu(tf.matmul(layer8, W[8]) + B[8])\n",
 70 |     "        layer10 = tf.nn.relu(tf.matmul(layer9, W[9]) + B[9])\n",
 71 |     "        return tf.matmul(layer10, W[10]) + B[10]\n",
 72 |     "    \n",
 73 |     "    else:\n",
 74 |     "        \n",
 75 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(W[0])) + avg_class.average(B[0]))\n",
 76 |     "        layer2 = tf.nn.relu(tf.matmul(layer1, avg_class.average(W[1])) + avg_class.average(B[1]))\n",
 77 |     "        layer3 = tf.nn.relu(tf.matmul(layer2, avg_class.average(W[2])) + avg_class.average(B[2]))\n",
 78 |     "        layer4 = tf.nn.relu(tf.matmul(layer3, avg_class.average(W[3])) + avg_class.average(B[3]))\n",
 79 |     "        layer5 = tf.nn.relu(tf.matmul(layer4, avg_class.average(W[4])) + avg_class.average(B[4]))\n",
 80 |     "        layer6 = tf.nn.relu(tf.matmul(layer5, avg_class.average(W[5])) + avg_class.average(B[5]))\n",
 81 |     "        layer7 = tf.nn.relu(tf.matmul(layer6, avg_class.average(W[6])) + avg_class.average(B[6]))\n",
 82 |     "        layer8 = tf.nn.relu(tf.matmul(layer7, avg_class.average(W[7])) + avg_class.average(B[7]))\n",
 83 |     "        layer9 = tf.nn.relu(tf.matmul(layer8, avg_class.average(W[8])) + avg_class.average(B[8]))\n",
 84 |     "        layer10 = tf.nn.relu(tf.matmul(layer9, avg_class.average(W[9])) + avg_class.average(B[9]))\n",
 85 |     "        return tf.matmul(layer10, avg_class.average(W[10])) + avg_class.average(B[10])  \n",
 86 |     "    \n",
 87 |     "def train(mnist):\n",
 88 |     "    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n",
 89 |     "    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n",
 90 |     "    \n",
 91 |     "    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n",
 92 |     "    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n",
 93 |     "    \n",
 94 |     "    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE], stddev=0.1))\n",
 95 |     "    biases2 = tf.Variable(tf.constant(0.1, shape=[ LAYER2_NODE]))\n",
 96 |     "    \n",
 97 |     "    weights3 = tf.Variable(tf.truncated_normal([ LAYER2_NODE,  LAYER3_NODE], stddev=0.1))\n",
 98 |     "    biases3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))\n",
 99 |     "    \n",
100 |     "    weights4 = tf.Variable(tf.truncated_normal([LAYER3_NODE, LAYER4_NODE], stddev=0.1))\n",
101 |     "    biases4 = tf.Variable(tf.constant(0.1, shape=[LAYER4_NODE]))\n",
102 |     "    \n",
103 |     "    weights5 = tf.Variable(tf.truncated_normal([LAYER4_NODE, LAYER5_NODE], stddev=0.1))\n",
104 |     "    biases5 = tf.Variable(tf.constant(0.1, shape=[LAYER5_NODE]))\n",
105 |     "    \n",
106 |     "    weights6 = tf.Variable(tf.truncated_normal([LAYER5_NODE, LAYER6_NODE], stddev=0.1))\n",
107 |     "    biases6 = tf.Variable(tf.constant(0.1, shape=[LAYER6_NODE]))\n",
108 |     "    \n",
109 |     "    weights7 = tf.Variable(tf.truncated_normal([LAYER6_NODE, LAYER7_NODE], stddev=0.1))\n",
110 |     "    biases7 = tf.Variable(tf.constant(0.1, shape=[LAYER7_NODE]))\n",
111 |     "    \n",
112 |     "    weights8 = tf.Variable(tf.truncated_normal([LAYER7_NODE, LAYER8_NODE], stddev=0.1))\n",
113 |     "    biases8 = tf.Variable(tf.constant(0.1, shape=[LAYER8_NODE]))\n",
114 |     "    \n",
115 |     "    weights9 = tf.Variable(tf.truncated_normal([LAYER8_NODE, LAYER9_NODE], stddev=0.1))\n",
116 |     "    biases9 = tf.Variable(tf.constant(0.1, shape=[LAYER9_NODE]))\n",
117 |     "    \n",
118 |     "    weights10 = tf.Variable(tf.truncated_normal([LAYER9_NODE, LAYER10_NODE], stddev=0.1))\n",
119 |     "    biases10 = tf.Variable(tf.constant(0.1, shape=[LAYER10_NODE]))\n",
120 |     "    \n",
121 |     "    weights11 = tf.Variable(tf.truncated_normal([LAYER10_NODE, OUTPUT_NODE], stddev=0.1))\n",
122 |     "    biases11 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n",
123 |     "    \n",
124 |     "    W=[weights1, weights2, weights3, weights4, weights5, weights6, weights7, weights8, weights9, weights10, weights11]\n",
125 |     "    B=[biases1, biases2, biases3, biases4, biases5, biases6, biases7, biases8, biases9, biases10, biases11]\n",
126 |     "    \n",
127 |     "    # 计算不含滑动平均类的前向传播结果\n",
128 |     "    y = inference(x, None, W, B)\n",
129 |     "    \n",
130 |     "    # 定义训练轮数及相关的滑动平均类 \n",
131 |     "    global_step = tf.Variable(0, trainable=False)\n",
132 |     "    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n",
133 |     "    variables_averages_op = variable_averages.apply(tf.trainable_variables())\n",
134 |     "    average_y = inference(x, variable_averages, W, B)\n",
135 |     "    \n",
136 |     "    # 计算交叉熵及其平均值\n",
137 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n",
138 |     "    cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
139 |     "    \n",
140 |     "    # 损失函数的计算\n",
141 |     "    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n",
142 |     "    regularaztion = regularizer(W[0]) \n",
143 |     "    for i in range(1,11):\n",
144 |     "        regularazation=regularaztion + regularizer(W[i]) \n",
145 |     "    loss = cross_entropy_mean + regularaztion\n",
146 |     "    \n",
147 |     "    # 设置指数衰减的学习率。\n",
148 |     "    learning_rate = tf.train.exponential_decay(\n",
149 |     "        LEARNING_RATE_BASE,\n",
150 |     "        global_step,\n",
151 |     "        mnist.train.num_examples / BATCH_SIZE,\n",
152 |     "        LEARNING_RATE_DECAY,\n",
153 |     "        staircase=True)\n",
154 |     "    \n",
155 |     "    # 优化损失函数\n",
156 |     "    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n",
157 |     "    \n",
158 |     "    # 反向传播更新参数和更新每一个参数的滑动平均值\n",
159 |     "    with tf.control_dependencies([train_step, variables_averages_op]):\n",
160 |     "        train_op = tf.no_op(name='train')\n",
161 |     "\n",
162 |     "    # 计算正确率\n",
163 |     "    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n",
164 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
165 |     "    \n",
166 |     "    # 初始化会话并开始训练过程。\n",
167 |     "    with tf.Session() as sess:\n",
168 |     "        tf.global_variables_initializer().run()\n",
169 |     "        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n",
170 |     "        test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n",
171 |     "        \n",
172 |     "        # 循环的训练神经网络。\n",
173 |     "        for i in range(TRAINING_STEPS):\n",
174 |     "            if i % 1000 == 0:\n",
175 |     "                validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n",
176 |     "                print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n",
177 |     "            \n",
178 |     "            xs,ys=mnist.train.next_batch(BATCH_SIZE)\n",
179 |     "            sess.run(train_op,feed_dict={x:xs,y_:ys})\n",
180 |     "\n",
181 |     "        test_acc=sess.run(accuracy,feed_dict=test_feed)\n",
182 |     "        print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n",
183 |     "\n",
184 |     "train(mnist)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {
191 |     "collapsed": true
192 |    },
193 |    "outputs": [],
194 |    "source": []
195 |   }
196 |  ],
197 |  "metadata": {
198 |   "kernelspec": {
199 |    "display_name": "Python 3",
200 |    "language": "python",
201 |    "name": "python3"
202 |   },
203 |   "language_info": {
204 |    "codemirror_mode": {
205 |     "name": "ipython",
206 |     "version": 3
207 |    },
208 |    "file_extension": ".py",
209 |    "mimetype": "text/x-python",
210 |    "name": "python",
211 |    "nbconvert_exporter": "python",
212 |    "pygments_lexer": "ipython3",
213 |    "version": "3.5.4"
214 |   }
215 |  },
216 |  "nbformat": 4,
217 |  "nbformat_minor": 2
218 | }
219 | 


--------------------------------------------------------------------------------
/Experiments/Keras_GAN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stderr",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Using TensorFlow backend.\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "from keras.models import Sequential\n",
 20 |     "from keras.layers import Dense\n",
 21 |     "from keras.layers import Reshape\n",
 22 |     "from keras.layers.core import Activation\n",
 23 |     "from keras.layers.normalization import BatchNormalization\n",
 24 |     "from keras.layers.convolutional import UpSampling2D\n",
 25 |     "from keras.layers.convolutional import Conv2D, MaxPooling2D\n",
 26 |     "from keras.layers.core import Flatten\n",
 27 |     "from keras.optimizers import SGD\n",
 28 |     "from keras.datasets import mnist\n",
 29 |     "import numpy as np\n",
 30 |     "from PIL import Image\n",
 31 |     "import argparse\n",
 32 |     "import math"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "整个训练过程可以说判别器 D 和生成器 G 对价值函数 V(G,D) 进行了极小极大化博弈：\n",
 40 |     "![gan_tf_keras1.png](./Res/gan_tf_keras1.png)"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "markdown",
 45 |    "metadata": {},
 46 |    "source": [
 47 |     "### 最优生成器\n",
 48 |     "\n",
 49 |     "因为最优的判别器D(x)=P_data/(P_data+P_G)，我们将其代入V（G,D）可得：\n",
 50 |     "![gan_tf_keras2.png](./Res/gan_tf_keras2.png)\n",
 51 |     "该积分进行变换得：\n",
 52 |     "![gan_tf_keras3.png](./Res/gan_tf_keras3.png)\n",
 53 |     "假设存在两个分布 P 和 Q，且这两个分布的平均分布 M=(P+Q)/2，那么这两个分布之间的 JS 散度为 P 与 M 之间的 KL 散度加上 Q 与 M 之间的 KL 散度再除以 2；因此可化为：\n",
 54 |     "![gan_tf_keras4.png](./Res/gan_tf_keras4.png)\n",
 55 |     "\n",
 56 |     "JS 散度的取值为 0 到 log2。若两个分布完全没有交集，那么 JS 散度取最大值 log2；若两个分布完全一样，那么 JS 散度取最小值 0。当 P_G=P_data 时，JSD(P_data||P_G) 为 0。综上所述，生成分布当且仅当等于真实数据分布式时，我们可以取得最优生成器。"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 2,
 62 |    "metadata": {
 63 |     "collapsed": true
 64 |    },
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "def generator_model():\n",
 68 |     "    #下面搭建生成器的架构，首先导入序贯模型（sequential），即多个网络层的线性堆叠\n",
 69 |     "    model = Sequential()\n",
 70 |     "    #添加一个全连接层，输入为100维向量，输出为1024维\n",
 71 |     "    model.add(Dense(input_dim=100, output_dim=1024))\n",
 72 |     "    #添加一个激活函数tanh\n",
 73 |     "    model.add(Activation('tanh'))\n",
 74 |     "    #添加一个全连接层，输出为128×7×7维度\n",
 75 |     "    model.add(Dense(128*7*7))\n",
 76 |     "    #添加一个批量归一化层，该层在每个batch上将前一层的激活值重新规范化，即使得其输出数据的均值接近0，其标准差接近1\n",
 77 |     "    model.add(BatchNormalization())\n",
 78 |     "    model.add(Activation('tanh'))\n",
 79 |     "    #Reshape层用来将输入shape转换为特定的shape，将含有128*7*7个元素的向量转化为7×7×128张量\n",
 80 |     "    model.add(Reshape((7, 7, 128), input_shape=(128*7*7,)))\n",
 81 |     "    #2维上采样层，即将数据的行和列分别重复2次\n",
 82 |     "    model.add(UpSampling2D(size=(2, 2)))\n",
 83 |     "    #添加一个2维卷积层，卷积核大小为5×5，激活函数为tanh，共64个卷积核，并采用padding以保持图像尺寸不变\n",
 84 |     "    model.add(Conv2D(64, (5, 5), padding='same'))\n",
 85 |     "    model.add(Activation('tanh'))\n",
 86 |     "    model.add(UpSampling2D(size=(2, 2)))\n",
 87 |     "    #卷积核设为1即输出图像的维度\n",
 88 |     "    model.add(Conv2D(1, (5, 5), padding='same'))\n",
 89 |     "    model.add(Activation('tanh'))\n",
 90 |     "    return model"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {},
 96 |    "source": [
 97 |     "### 最优判别器\n",
 98 |     "\n",
 99 |     "原论文中价值函数可写为在 x 上的积分，即将数学期望展开为积分形式：\n",
100 |     "![gan_tf_keras5.png](./Res/gan_tf_keras5.png)\n",
101 |     "其实求积分的最大值可以转化为求被积函数的最大值。而求被积函数的最大值是为了求得最优判别器 D，因此不涉及判别器的项都可以看作为常数项。\n",
102 |     "若令判别器 D(x) 等于 y，那么被积函数可以写为：\n",
103 |     "![gan_tf_keras6.png](./Res/gan_tf_keras6.png)\n",
104 |     "为了找到最优的极值点，如果 a+b≠0，我们可以用以下一阶导求解：\n",
105 |     "![gan_tf_keras6.png](./Res/gan_tf_keras7.png)\n",
106 |     "因此，最优判别器D(x)=P_data/(P_data+P_G)"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 3,
112 |    "metadata": {
113 |     "collapsed": true
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "def discriminator_model():\n",
118 |     "    #下面搭建判别器架构，同样采用序贯模型\n",
119 |     "    model = Sequential()\n",
120 |     "    \n",
121 |     "    #添加2维卷积层，卷积核大小为5×5，激活函数为tanh，输入shape在‘channels_first’模式下为（samples,channels，rows，cols）\n",
122 |     "    #在‘channels_last’模式下为（samples,rows,cols,channels），输出为64维\n",
123 |     "    model.add(\n",
124 |     "            Conv2D(64, (5, 5),\n",
125 |     "            padding='same',\n",
126 |     "            input_shape=(28, 28, 1))\n",
127 |     "            )\n",
128 |     "    model.add(Activation('tanh'))\n",
129 |     "    #为空域信号施加最大值池化，pool_size取（2，2）代表使图片在两个维度上均变为原长的一半\n",
130 |     "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
131 |     "    model.add(Conv2D(128, (5, 5)))\n",
132 |     "    model.add(Activation('tanh'))\n",
133 |     "    model.add(MaxPooling2D(pool_size=(2, 2)))\n",
134 |     "    #Flatten层把多维输入一维化，常用在从卷积层到全连接层的过渡\n",
135 |     "    model.add(Flatten())\n",
136 |     "    model.add(Dense(1024))\n",
137 |     "    model.add(Activation('tanh'))\n",
138 |     "    #一个结点进行二值分类，并采用sigmoid函数的输出作为概念\n",
139 |     "    model.add(Dense(1))\n",
140 |     "    model.add(Activation('sigmoid'))\n",
141 |     "    return model"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 4,
147 |    "metadata": {
148 |     "collapsed": true
149 |    },
150 |    "outputs": [],
151 |    "source": [
152 |     "def generator_containing_discriminator(g, d):\n",
153 |     "    #将前面定义的生成器架构和判别器架构组拼接成一个大的神经网络，用于判别生成的图片\n",
154 |     "    model = Sequential()\n",
155 |     "    #先添加生成器架构，再令d不可训练，即固定d\n",
156 |     "    #因此在给定d的情况下训练生成器，即通过将生成的结果投入到判别器进行辨别而优化生成器\n",
157 |     "    model.add(g)\n",
158 |     "    d.trainable = False\n",
159 |     "    model.add(d)\n",
160 |     "    return model"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 5,
166 |    "metadata": {
167 |     "collapsed": true
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "def combine_images(generated_images):\n",
172 |     "    #生成图片拼接\n",
173 |     "    num = generated_images.shape[0]\n",
174 |     "    width = int(math.sqrt(num))\n",
175 |     "    height = int(math.ceil(float(num)/width))\n",
176 |     "    shape = generated_images.shape[1:3]\n",
177 |     "    image = np.zeros((height*shape[0], width*shape[1]),\n",
178 |     "                     dtype=generated_images.dtype)\n",
179 |     "    for index, img in enumerate(generated_images):\n",
180 |     "        i = int(index/width)\n",
181 |     "        j = index % width\n",
182 |     "        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \\\n",
183 |     "            img[:, :, 0]\n",
184 |     "    return image"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "#### 对于每一次迭代：\n",
192 |     "\n",
193 |     "-  从真实数据分布 P_data 抽取 m 个样本\n",
194 |     "-  从先验分布 P_prior(z) 抽取 m 个噪声样本\n",
195 |     "-  将噪声样本投入 G 而生成数据，即x^tilde = G(Z^i)；通过最大化 V 的近似而更新判别器参数θ_d\n",
196 |     "\n",
197 |     "以上是学习判别器 D 的过程。因为学习 D 的过程是计算 JS 散度的过程，并且我们希望能最大化价值函数，所以该步骤会重复 k 次。\n",
198 |     "\n",
199 |     "-  从先验分布 P_prior(z) 中抽取另外 m 个噪声样本 {z^1,...,z^m}\n",
200 |     "-  通过极小化 V^tilde 而更新生成器参数θ_g"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": 6,
206 |    "metadata": {
207 |     "collapsed": true
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "def train(BATCH_SIZE):\n",
212 |     "    \n",
213 |     "    # 国内好像不能直接导入数据集，我们试了几次都不行，后来将数据集下载到本地'~/.keras/datasets/'，也就是当前目录（我的是用户文件夹下）下的.keras文件夹中。\n",
214 |     "    #下载的地址为：https://s3.amazonaws.com/img-datasets/mnist.npz\n",
215 |     "    (X_train, y_train), (X_test, y_test) = mnist.load_data()\n",
216 |     "    #iamge_data_format选择\"channels_last\"或\"channels_first\"，该选项指定了Keras将要使用的维度顺序。\n",
217 |     "    #\"channels_first\"假定2D数据的维度顺序为(channels, rows, cols)，3D数据的维度顺序为(channels, conv_dim1, conv_dim2, conv_dim3)\n",
218 |     "    \n",
219 |     "    #转换字段类型，并将数据导入变量中\n",
220 |     "    X_train = (X_train.astype(np.float32) - 127.5)/127.5\n",
221 |     "    X_train = X_train[:, :, :, None]\n",
222 |     "    X_test = X_test[:, :, :, None]\n",
223 |     "    # X_train = X_train.reshape((X_train.shape, 1) + X_train.shape[1:])\n",
224 |     "    \n",
225 |     "    #将定义好的模型架构赋值给特定的变量\n",
226 |     "    d = discriminator_model()\n",
227 |     "    g = generator_model()\n",
228 |     "    d_on_g = generator_containing_discriminator(g, d)\n",
229 |     "    \n",
230 |     "    #定义生成器模型判别器模型更新所使用的优化算法及超参数\n",
231 |     "    d_optim = SGD(lr=0.001, momentum=0.9, nesterov=True)\n",
232 |     "    g_optim = SGD(lr=0.001, momentum=0.9, nesterov=True)\n",
233 |     "    \n",
234 |     "    #编译三个神经网络并设置损失函数和优化算法，其中损失函数都是用的是二元分类交叉熵函数。编译是用来配置模型学习过程的\n",
235 |     "    g.compile(loss='binary_crossentropy', optimizer=\"SGD\")\n",
236 |     "    d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim)\n",
237 |     "    \n",
238 |     "    #前一个架构在固定判别器的情况下训练了生成器，所以在训练判别器之前先要设定其为可训练。\n",
239 |     "    d.trainable = True\n",
240 |     "    d.compile(loss='binary_crossentropy', optimizer=d_optim)\n",
241 |     "    \n",
242 |     "    #下面在满足epoch条件下进行训练\n",
243 |     "    for epoch in range(30):\n",
244 |     "        print(\"Epoch is\", epoch)\n",
245 |     "        \n",
246 |     "        #计算一个epoch所需要的迭代数量，即训练样本数除批量大小数的值取整；其中shape[0]就是读取矩阵第一维度的长度\n",
247 |     "        print(\"Number of batches\", int(X_train.shape[0]/BATCH_SIZE))\n",
248 |     "        \n",
249 |     "        #在一个epoch内进行迭代训练\n",
250 |     "        for index in range(int(X_train.shape[0]/BATCH_SIZE)):\n",
251 |     "            \n",
252 |     "            #随机生成的噪声服从均匀分布，且采样下界为-1、采样上界为1，输出BATCH_SIZE×100个样本；即抽取一个批量的随机样本\n",
253 |     "            noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))\n",
254 |     "            \n",
255 |     "            #抽取一个批量的真实图片\n",
256 |     "            image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]\n",
257 |     "            \n",
258 |     "            #生成的图片使用生成器对随机噪声进行推断；verbose为日志显示，0为不在标准输出流输出日志信息，1为输出进度条记录\n",
259 |     "            generated_images = g.predict(noise, verbose=0)\n",
260 |     "            \n",
261 |     "            #每经过100次迭代输出一张生成的图片\n",
262 |     "            if index % 100 == 0:\n",
263 |     "                image = combine_images(generated_images)\n",
264 |     "                image = image*127.5+127.5\n",
265 |     "                Image.fromarray(image.astype(np.uint8)).save(\n",
266 |     "                    \"./GAN/\"+str(epoch)+\"_\"+str(index)+\".png\")\n",
267 |     "            \n",
268 |     "            #将真实的图片和生成的图片以多维数组的形式拼接在一起，真实图片在上，生成图片在下\n",
269 |     "            X = np.concatenate((image_batch, generated_images))\n",
270 |     "            \n",
271 |     "            #生成图片真假标签，即一个包含两倍批量大小的列表；前一个批量大小都是1，代表真实图片，后一个批量大小都是0，代表伪造图片\n",
272 |     "            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE\n",
273 |     "            \n",
274 |     "            #判别器的损失；在一个batch的数据上进行一次参数更新\n",
275 |     "            d_loss = d.train_on_batch(X, y)\n",
276 |     "            print(\"batch %d d_loss : %f\" % (index, d_loss))\n",
277 |     "            \n",
278 |     "            #随机生成的噪声服从均匀分布\n",
279 |     "            noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))\n",
280 |     "            \n",
281 |     "            #固定判别器\n",
282 |     "            d.trainable = False\n",
283 |     "            \n",
284 |     "            #计算生成器损失；在一个batch的数据上进行一次参数更新\n",
285 |     "            g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE)\n",
286 |     "            \n",
287 |     "            #令判别器可训练\n",
288 |     "            d.trainable = True\n",
289 |     "            print(\"batch %d g_loss : %f\" % (index, g_loss))\n",
290 |     "            \n",
291 |     "            #每100次迭代保存一次生成器和判别器的权重\n",
292 |     "            if index % 100 == 9:\n",
293 |     "                g.save_weights('generator', True)\n",
294 |     "                d.save_weights('discriminator', True)"
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 22,
300 |    "metadata": {
301 |     "collapsed": true
302 |    },
303 |    "outputs": [],
304 |    "source": [
305 |     "def generate(BATCH_SIZE, nice= False ):\n",
306 |     "    #训练完模型后，可以运行该函数生成图片\n",
307 |     "    g = generator_model()\n",
308 |     "    g.compile(loss='binary_crossentropy', optimizer=\"SGD\")\n",
309 |     "    g.load_weights('generator')\n",
310 |     "    if nice:\n",
311 |     "        d = discriminator_model()\n",
312 |     "        d.compile(loss='binary_crossentropy', optimizer=\"SGD\")\n",
313 |     "        d.load_weights('discriminator')\n",
314 |     "        noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))\n",
315 |     "        generated_images = g.predict(noise, verbose=1)\n",
316 |     "        d_pret = d.predict(generated_images, verbose=1)\n",
317 |     "        index = np.arange(0, BATCH_SIZE*20)\n",
318 |     "        index.resize((BATCH_SIZE*20, 1))\n",
319 |     "        pre_with_index = list(np.append(d_pret, index, axis=1))\n",
320 |     "        pre_with_index.sort(key=lambda x: x[0], reverse=True)\n",
321 |     "        nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)\n",
322 |     "        nice_images = nice_images[:, :, :, None]\n",
323 |     "        for i in range(BATCH_SIZE):\n",
324 |     "            idx = int(pre_with_index[i][1])\n",
325 |     "            nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]\n",
326 |     "        image = combine_images(nice_images)\n",
327 |     "    else:\n",
328 |     "        noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))\n",
329 |     "        generated_images = g.predict(noise, verbose=0)\n",
330 |     "        image = combine_images(generated_images)\n",
331 |     "    image = image*127.5+127.5\n",
332 |     "    Image.fromarray(image.astype(np.uint8)).save(\n",
333 |     "        \"./GAN/generated_image.png\")"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 29,
339 |    "metadata": {
340 |     "collapsed": false
341 |    },
342 |    "outputs": [
343 |     {
344 |      "name": "stderr",
345 |      "output_type": "stream",
346 |      "text": [
347 |       "C:\\Users\\Horatio\\AppData\\Local\\conda\\conda\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:3: UserWarning: Update your `Dense` call to the Keras 2 API: `Dense(units=1024, input_dim=100)`\n",
348 |       "  This is separate from the ipykernel package so we can avoid doing imports until\n"
349 |      ]
350 |     },
351 |     {
352 |      "name": "stdout",
353 |      "output_type": "stream",
354 |      "text": [
355 |       "128/132 [============================>.] - ETA: 0s\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"
356 |      ]
357 |     }
358 |    ],
359 |    "source": [
360 |     "generate(132)"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "code",
365 |    "execution_count": null,
366 |    "metadata": {
367 |     "collapsed": true
368 |    },
369 |    "outputs": [],
370 |    "source": []
371 |   }
372 |  ],
373 |  "metadata": {
374 |   "kernelspec": {
375 |    "display_name": "Python 3",
376 |    "language": "python",
377 |    "name": "python3"
378 |   },
379 |   "language_info": {
380 |    "codemirror_mode": {
381 |     "name": "ipython",
382 |     "version": 3
383 |    },
384 |    "file_extension": ".py",
385 |    "mimetype": "text/x-python",
386 |    "name": "python",
387 |    "nbconvert_exporter": "python",
388 |    "pygments_lexer": "ipython3",
389 |    "version": "3.5.3"
390 |   }
391 |  },
392 |  "nbformat": 4,
393 |  "nbformat_minor": 2
394 | }
395 | 


--------------------------------------------------------------------------------
/Experiments/LSTM_PTB.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stderr",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "  from ._conv import register_converters as _register_converters\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "#reader.py\n",
 20 |     "from __future__ import absolute_import\n",
 21 |     "from __future__ import division\n",
 22 |     "from __future__ import print_function\n",
 23 |     "\n",
 24 |     "import collections\n",
 25 |     "import os\n",
 26 |     "import sys\n",
 27 |     "\n",
 28 |     "import tensorflow as tf\n",
 29 |     "\n",
 30 |     "Py3 = sys.version_info[0] == 3\n",
 31 |     "\n",
 32 |     "def _read_words(filename):\n",
 33 |     "  with tf.gfile.GFile(filename, \"r\") as f:\n",
 34 |     "    if Py3:\n",
 35 |     "      return f.read().replace(\"\\n\", \"<eos>\").split()\n",
 36 |     "    else:\n",
 37 |     "      return f.read().decode(\"utf-8\").replace(\"\\n\", \"<eos>\").split()\n",
 38 |     "\n",
 39 |     "\n",
 40 |     "def _build_vocab(filename):\n",
 41 |     "  data = _read_words(filename)\n",
 42 |     "\n",
 43 |     "  counter = collections.Counter(data)\n",
 44 |     "  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))\n",
 45 |     "\n",
 46 |     "  words, _ = list(zip(*count_pairs))\n",
 47 |     "  word_to_id = dict(zip(words, range(len(words))))\n",
 48 |     "\n",
 49 |     "  return word_to_id\n",
 50 |     "\n",
 51 |     "\n",
 52 |     "def _file_to_word_ids(filename, word_to_id):\n",
 53 |     "  data = _read_words(filename)\n",
 54 |     "  return [word_to_id[word] for word in data if word in word_to_id]\n",
 55 |     "\n",
 56 |     "\n",
 57 |     "def ptb_raw_data(data_path=None):\n",
 58 |     "  \"\"\"Load PTB raw data from data directory \"data_path\".\n",
 59 |     "  Reads PTB text files, converts strings to integer ids,\n",
 60 |     "  and performs mini-batching of the inputs.\n",
 61 |     "  The PTB dataset comes from Tomas Mikolov's webpage:\n",
 62 |     "  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n",
 63 |     "  Args:\n",
 64 |     "    data_path: string path to the directory where simple-examples.tgz has\n",
 65 |     "      been extracted.\n",
 66 |     "  Returns:\n",
 67 |     "    tuple (train_data, valid_data, test_data, vocabulary)\n",
 68 |     "    where each of the data objects can be passed to PTBIterator.\n",
 69 |     "  \"\"\"\n",
 70 |     "\n",
 71 |     "  train_path = os.path.join(data_path, \"ptb.train.txt\")\n",
 72 |     "  valid_path = os.path.join(data_path, \"ptb.valid.txt\")\n",
 73 |     "  test_path = os.path.join(data_path, \"ptb.test.txt\")\n",
 74 |     "\n",
 75 |     "  word_to_id = _build_vocab(train_path)\n",
 76 |     "  train_data = _file_to_word_ids(train_path, word_to_id)\n",
 77 |     "  valid_data = _file_to_word_ids(valid_path, word_to_id)\n",
 78 |     "  test_data = _file_to_word_ids(test_path, word_to_id)\n",
 79 |     "  vocabulary = len(word_to_id)\n",
 80 |     "  return train_data, valid_data, test_data, vocabulary\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "def ptb_producer(raw_data, batch_size, num_steps, name=None):\n",
 84 |     "  \"\"\"Iterate on the raw PTB data.\n",
 85 |     "  This chunks up raw_data into batches of examples and returns Tensors that\n",
 86 |     "  are drawn from these batches.\n",
 87 |     "  Args:\n",
 88 |     "    raw_data: one of the raw data outputs from ptb_raw_data.\n",
 89 |     "    batch_size: int, the batch size.\n",
 90 |     "    num_steps: int, the number of unrolls.\n",
 91 |     "    name: the name of this operation (optional).\n",
 92 |     "  Returns:\n",
 93 |     "    A pair of Tensors, each shaped [batch_size, num_steps]. The second element\n",
 94 |     "    of the tuple is the same data time-shifted to the right by one.\n",
 95 |     "  Raises:\n",
 96 |     "    tf.errors.InvalidArgumentError: if batch_size or num_steps are too high.\n",
 97 |     "  \"\"\"\n",
 98 |     "  with tf.name_scope(name, \"PTBProducer\", [raw_data, batch_size, num_steps]):\n",
 99 |     "    raw_data = tf.convert_to_tensor(raw_data, name=\"raw_data\", dtype=tf.int32)\n",
100 |     "\n",
101 |     "    data_len = tf.size(raw_data)\n",
102 |     "    batch_len = data_len // batch_size\n",
103 |     "    data = tf.reshape(raw_data[0 : batch_size * batch_len],\n",
104 |     "                      [batch_size, batch_len])\n",
105 |     "\n",
106 |     "    epoch_size = (batch_len - 1) // num_steps\n",
107 |     "    assertion = tf.assert_positive(\n",
108 |     "        epoch_size,\n",
109 |     "        message=\"epoch_size == 0, decrease batch_size or num_steps\")\n",
110 |     "    with tf.control_dependencies([assertion]):\n",
111 |     "      epoch_size = tf.identity(epoch_size, name=\"epoch_size\")\n",
112 |     "\n",
113 |     "    i = tf.train.range_input_producer(epoch_size, shuffle=False).dequeue()\n",
114 |     "    x = tf.strided_slice(data, [0, i * num_steps],\n",
115 |     "                         [batch_size, (i + 1) * num_steps])\n",
116 |     "    x.set_shape([batch_size, num_steps])\n",
117 |     "    y = tf.strided_slice(data, [0, i * num_steps + 1],\n",
118 |     "                         [batch_size, (i + 1) * num_steps + 1])\n",
119 |     "    y.set_shape([batch_size, num_steps])\n",
120 |     "    return x, y"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 2,
126 |    "metadata": {
127 |     "collapsed": true
128 |    },
129 |    "outputs": [],
130 |    "source": [
131 |     "import tensorflow as tf\n",
132 |     "import numpy as np\n",
133 |     "# 运行上面的reader.py\n",
134 |     "\n",
135 |     "data_path = './data/PTB-dataset-Tomas-Mikolov/data'\n",
136 |     "# 隐藏层单元数与LSTM层级数\n",
137 |     "hidden_size = 200\n",
138 |     "num_layers = 2\n",
139 |     "#词典规模\n",
140 |     "vocab_size = 10000\n",
141 |     "\n",
142 |     "learning_rate = 1.0\n",
143 |     "train_batch_size = 16\n",
144 |     "# 训练数据截断长度\n",
145 |     "train_num_step = 32\n",
146 |     "\n",
147 |     "# 在测试时不需要使用截断，测试数据为一个超长序列\n",
148 |     "eval_batch_size = 1\n",
149 |     "eval_num_step = 1\n",
150 |     "num_epoch = 3\n",
151 |     "#结点不被Dropout的概率\n",
152 |     "keep_prob = 0.5\n",
153 |     "\n",
154 |     "# 用于控制梯度爆炸的参数\n",
155 |     "max_grad_norm = 5"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 3,
161 |    "metadata": {
162 |     "collapsed": true
163 |    },
164 |    "outputs": [],
165 |    "source": [
166 |     "# 通过ptbmodel 的类描述模型\n",
167 |     "class PTBModel(object):\n",
168 |     "    def __init__(self, is_training, batch_size, num_steps):\n",
169 |     "        # 记录使用的Batch大小和截断长度\n",
170 |     "        self.batch_size = batch_size\n",
171 |     "        self.num_steps = num_steps\n",
172 |     "\n",
173 |     "        # 定义输入层，维度为批量大小×截断长度\n",
174 |     "        self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps])\n",
175 |     "        # 定义预期输出\n",
176 |     "        self.targets = tf.placeholder(tf.int32, [batch_size, num_steps])\n",
177 |     "\n",
178 |     "        # 定义使用LSTM结构为循环体，带Dropout的深度RNN\n",
179 |     "        lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size)\n",
180 |     "        if is_training:\n",
181 |     "            lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)\n",
182 |     "        cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)\n",
183 |     "\n",
184 |     "        # 初始化状态为0\n",
185 |     "        self.initial_state = cell.zero_state(batch_size, tf.float32)\n",
186 |     "\n",
187 |     "        # 将单词ID转换为单词向量，embedding的维度为vocab_size*hidden_size\n",
188 |     "        embedding = tf.get_variable('embedding', [vocab_size, hidden_size])\n",
189 |     "        # 将一个批量内的单词ID转化为词向量，转化后的输入维度为批量大小×截断长度×隐藏单元数\n",
190 |     "        inputs = tf.nn.embedding_lookup(embedding, self.input_data)\n",
191 |     "\n",
192 |     "        # 只在训练时使用Dropout\n",
193 |     "        if is_training: inputs = tf.nn.dropout(inputs, keep_prob)\n",
194 |     "\n",
195 |     "        # 定义输出列表，这里先将不同时刻LSTM的输出收集起来，再通过全连接层得到最终输出\n",
196 |     "        outputs = []\n",
197 |     "        # state 储存不同批量中LSTM的状态，初始为0\n",
198 |     "        state = self.initial_state\n",
199 |     "        with tf.variable_scope('RNN'):\n",
200 |     "            for time_step in range(num_steps):\n",
201 |     "                if time_step > 0: tf.get_variable_scope().reuse_variables()\n",
202 |     "                # 从输入数据获取当前时间步的输入与前一时间步的状态，并传入LSTM结构\n",
203 |     "                cell_output, state = cell(inputs[:, time_step, :], state)\n",
204 |     "                # 将当前输出加入输出队列\n",
205 |     "                outputs.append(cell_output)\n",
206 |     "\n",
207 |     "        # 将输出队列展开成[batch,hidden*num_step]的形状，再reshape为[batch*num_step, hidden]\n",
208 |     "        output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size])\n",
209 |     "\n",
210 |     "        # 将LSTM的输出传入全连接层以生成最后的预测结果。最后结果在每时刻上都是长度为vocab_size的张量\n",
211 |     "        # 且经过softmax层后表示下一个位置不同词的概率\n",
212 |     "        weight = tf.get_variable('weight', [hidden_size, vocab_size])\n",
213 |     "        bias = tf.get_variable('bias', [vocab_size])\n",
214 |     "        logits = tf.matmul(output, weight) + bias\n",
215 |     "\n",
216 |     "        # 定义交叉熵损失函数，一个序列的交叉熵之和\n",
217 |     "        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(\n",
218 |     "            [logits],  # 预测的结果\n",
219 |     "            [tf.reshape(self.targets, [-1])],  # 期望正确的结果，这里将[batch_size, num_steps]压缩为一维张量\n",
220 |     "            [tf.ones([batch_size * num_steps], dtype=tf.float32)])  # 损失的权重，所有为1表明不同批量和时刻的重要程度一样\n",
221 |     "\n",
222 |     "        # 计算每个批量的平均损失\n",
223 |     "        self.cost = tf.reduce_sum(loss) / batch_size\n",
224 |     "        self.final_state = state\n",
225 |     "\n",
226 |     "        # 只在训练模型时定义反向传播操作\n",
227 |     "        if not is_training: return\n",
228 |     "        trainable_variable = tf.trainable_variables()\n",
229 |     "\n",
230 |     "        # 控制梯度爆炸问题\n",
231 |     "        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variable), max_grad_norm)\n",
232 |     "        # 如果需要使用Adam作为优化器，可以改为tf.train.AdamOptimizer(learning_rate)，学习率需要降低至0.001左右\n",
233 |     "        optimizer = tf.train.GradientDescentOptimizer(learning_rate)\n",
234 |     "        # 定义训练步骤\n",
235 |     "        self.train_op = optimizer.apply_gradients(zip(grads, trainable_variable))"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 4,
241 |    "metadata": {
242 |     "collapsed": true
243 |    },
244 |    "outputs": [],
245 |    "source": [
246 |     "def run_epoch(session, model, data, train_op, output_log, epoch_size):\n",
247 |     "    total_costs = 0.0\n",
248 |     "    iters = 0\n",
249 |     "    state = session.run(model.initial_state)\n",
250 |     "\n",
251 |     "    # # 使用当前数据训练或测试模型\n",
252 |     "    for step in range(epoch_size):\n",
253 |     "        x, y = session.run(data)\n",
254 |     "        # 在当前批量上运行train_op并计算损失值，交叉熵计算的是下一个单词为给定单词的概率\n",
255 |     "        cost, state, _ = session.run([model.cost, model.final_state, train_op],\n",
256 |     "                                        {model.input_data: x, model.targets: y, model.initial_state: state})\n",
257 |     "        # 将不同时刻和批量的概率就可得到困惑度的对数形式，将这个和做指数运算就可得到困惑度\n",
258 |     "        total_costs += cost\n",
259 |     "        iters += model.num_steps\n",
260 |     "\n",
261 |     "        # 只在训练时输出日志\n",
262 |     "        if output_log and step % 100 == 0:\n",
263 |     "            print(\"After %d steps, perplexity is %.3f\" % (step, np.exp(total_costs / iters)))\n",
264 |     "    return np.exp(total_costs / iters)\n",
265 |     "        "
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 5,
271 |    "metadata": {
272 |     "collapsed": false
273 |    },
274 |    "outputs": [
275 |     {
276 |      "name": "stdout",
277 |      "output_type": "stream",
278 |      "text": [
279 |       "In iteration: 1\n",
280 |       "After 0 steps, perplexity is 9997.503\n",
281 |       "After 100 steps, perplexity is 1430.310\n",
282 |       "After 200 steps, perplexity is 1019.812\n",
283 |       "After 300 steps, perplexity is 884.877\n",
284 |       "After 400 steps, perplexity is 770.536\n",
285 |       "After 500 steps, perplexity is 685.862\n",
286 |       "After 600 steps, perplexity is 627.773\n",
287 |       "After 700 steps, perplexity is 584.603\n",
288 |       "After 800 steps, perplexity is 550.645\n",
289 |       "After 900 steps, perplexity is 522.333\n",
290 |       "After 1000 steps, perplexity is 497.754\n",
291 |       "After 1100 steps, perplexity is 477.317\n",
292 |       "After 1200 steps, perplexity is 459.825\n",
293 |       "After 1300 steps, perplexity is 446.264\n",
294 |       "After 1400 steps, perplexity is 433.608\n",
295 |       "After 1500 steps, perplexity is 423.149\n",
296 |       "After 1600 steps, perplexity is 409.957\n",
297 |       "After 1700 steps, perplexity is 400.390\n",
298 |       "After 1800 steps, perplexity is 393.291\n",
299 |       "Epoch: 1 Validation Perplexity: 239.055\n",
300 |       "In iteration: 2\n",
301 |       "After 0 steps, perplexity is 422.337\n",
302 |       "After 100 steps, perplexity is 285.401\n",
303 |       "After 200 steps, perplexity is 266.674\n",
304 |       "After 300 steps, perplexity is 273.085\n",
305 |       "After 400 steps, perplexity is 267.213\n",
306 |       "After 500 steps, perplexity is 257.644\n",
307 |       "After 600 steps, perplexity is 252.940\n",
308 |       "After 700 steps, perplexity is 249.936\n",
309 |       "After 800 steps, perplexity is 248.188\n",
310 |       "After 900 steps, perplexity is 246.797\n",
311 |       "After 1000 steps, perplexity is 244.862\n",
312 |       "After 1100 steps, perplexity is 243.292\n",
313 |       "After 1200 steps, perplexity is 241.627\n",
314 |       "After 1300 steps, perplexity is 241.469\n",
315 |       "After 1400 steps, perplexity is 240.662\n",
316 |       "After 1500 steps, perplexity is 240.259\n",
317 |       "After 1600 steps, perplexity is 237.447\n",
318 |       "After 1700 steps, perplexity is 236.469\n",
319 |       "After 1800 steps, perplexity is 236.575\n",
320 |       "Epoch: 2 Validation Perplexity: 196.142\n",
321 |       "In iteration: 3\n",
322 |       "After 0 steps, perplexity is 353.527\n",
323 |       "After 100 steps, perplexity is 236.278\n",
324 |       "After 200 steps, perplexity is 221.385\n",
325 |       "After 300 steps, perplexity is 228.476\n",
326 |       "After 400 steps, perplexity is 224.843\n",
327 |       "After 500 steps, perplexity is 217.486\n",
328 |       "After 600 steps, perplexity is 214.905\n",
329 |       "After 700 steps, perplexity is 213.104\n",
330 |       "After 800 steps, perplexity is 212.414\n",
331 |       "After 900 steps, perplexity is 211.959\n",
332 |       "After 1000 steps, perplexity is 210.966\n",
333 |       "After 1100 steps, perplexity is 210.475\n",
334 |       "After 1200 steps, perplexity is 209.590\n",
335 |       "After 1300 steps, perplexity is 200.107\n",
336 |       "After 1400 steps, perplexity is 200.050\n",
337 |       "After 1500 steps, perplexity is 193.237\n",
338 |       "After 1600 steps, perplexity is 192.123\n",
339 |       "After 1700 steps, perplexity is 180.731\n",
340 |       "After 1800 steps, perplexity is 183.199\n",
341 |       "Epoch: 3 Validation Perplexity: 169.009\n",
342 |       "Test Perplexity: 142.681\n"
343 |      ]
344 |     }
345 |    ],
346 |    "source": [
347 |     "def main():\n",
348 |     "    train_data, valid_data, test_data, _ = ptb_raw_data(data_path)\n",
349 |     "\n",
350 |     "    # 计算一个epoch需要训练的次数\n",
351 |     "    train_data_len = len(train_data)\n",
352 |     "    train_batch_len = train_data_len // train_batch_size\n",
353 |     "    train_epoch_size = (train_batch_len - 1) // train_num_step\n",
354 |     "\n",
355 |     "    valid_data_len = len(valid_data)\n",
356 |     "    valid_batch_len = valid_data_len // eval_batch_size\n",
357 |     "    valid_epoch_size = (valid_batch_len - 1) // eval_num_step\n",
358 |     "\n",
359 |     "    test_data_len = len(test_data)\n",
360 |     "    test_batch_len = test_data_len // eval_batch_size\n",
361 |     "    test_epoch_size = (test_batch_len - 1) // eval_num_step\n",
362 |     "\n",
363 |     "    initializer = tf.random_uniform_initializer(-0.05, 0.05)\n",
364 |     "    with tf.variable_scope(\"language_model\", reuse=None, initializer=initializer):\n",
365 |     "        train_model = PTBModel(True, train_batch_size, train_num_step)\n",
366 |     "\n",
367 |     "    with tf.variable_scope(\"language_model\", reuse=True, initializer=initializer):\n",
368 |     "        eval_model = PTBModel(False, eval_batch_size, eval_num_step)\n",
369 |     "\n",
370 |     "    # 训练模型。\n",
371 |     "    with tf.Session() as session:\n",
372 |     "        tf.global_variables_initializer().run()\n",
373 |     "\n",
374 |     "        train_queue = ptb_producer(train_data, train_model.batch_size, train_model.num_steps)\n",
375 |     "        eval_queue = ptb_producer(valid_data, eval_model.batch_size, eval_model.num_steps)\n",
376 |     "        test_queue = ptb_producer(test_data, eval_model.batch_size, eval_model.num_steps)\n",
377 |     "\n",
378 |     "        coord = tf.train.Coordinator()\n",
379 |     "        threads = tf.train.start_queue_runners(sess=session, coord=coord)\n",
380 |     "\n",
381 |     "        for i in range(num_epoch):\n",
382 |     "            print(\"In iteration: %d\" % (i + 1))\n",
383 |     "            run_epoch(session, train_model, train_queue, train_model.train_op, True, train_epoch_size)\n",
384 |     "\n",
385 |     "            valid_perplexity = run_epoch(session, eval_model, eval_queue, tf.no_op(), False, valid_epoch_size)\n",
386 |     "            print(\"Epoch: %d Validation Perplexity: %.3f\" % (i + 1, valid_perplexity))\n",
387 |     "\n",
388 |     "        test_perplexity = run_epoch(session, eval_model, test_queue, tf.no_op(), False, test_epoch_size)\n",
389 |     "        print(\"Test Perplexity: %.3f\" % test_perplexity)\n",
390 |     "\n",
391 |     "        coord.request_stop()\n",
392 |     "        coord.join(threads)\n",
393 |     "\n",
394 |     "if __name__ == \"__main__\":\n",
395 |     "    main()"
396 |    ]
397 |   },
398 |   {
399 |    "cell_type": "code",
400 |    "execution_count": null,
401 |    "metadata": {
402 |     "collapsed": true
403 |    },
404 |    "outputs": [],
405 |    "source": []
406 |   }
407 |  ],
408 |  "metadata": {
409 |   "kernelspec": {
410 |    "display_name": "Python 3",
411 |    "language": "python",
412 |    "name": "python3"
413 |   },
414 |   "language_info": {
415 |    "codemirror_mode": {
416 |     "name": "ipython",
417 |     "version": 3
418 |    },
419 |    "file_extension": ".py",
420 |    "mimetype": "text/x-python",
421 |    "name": "python",
422 |    "nbconvert_exporter": "python",
423 |    "pygments_lexer": "ipython3",
424 |    "version": "3.5.3"
425 |   }
426 |  },
427 |  "nbformat": 4,
428 |  "nbformat_minor": 2
429 | }
430 | 


--------------------------------------------------------------------------------
/Experiments/Res/1.md:
--------------------------------------------------------------------------------
1 | all you need is here, Jupyter
2 | 


--------------------------------------------------------------------------------
/Experiments/Res/Synced.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/Synced.jpg


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras1.png


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras2.png


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras3.png


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras4.png


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras5.png


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras6.png


--------------------------------------------------------------------------------
/Experiments/Res/gan_tf_keras7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiqizhixin/ML-Tutorial-Experiment/ce316d55439859e8aaf10903a55b52066e20146c/Experiments/Res/gan_tf_keras7.png


--------------------------------------------------------------------------------
/Experiments/Synced.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Source Code：https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/Synced.py
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import tensorflow.contrib.slim as slim
  6 | import urllib.request
  7 | 
  8 | OUTPUT_NODE = 36
  9 | IMAGE_SIZE = 200
 10 | NUM_CHANNELS = 1
 11 | CONV1_SIZE = 2
 12 | CONV2_SIZE = 3
 13 | FC_SIZE = 512
 14 | 
 15 | w = 0.44480515
 16 | W = np.array([[57, 20.5, -19.33333206, -5.75, -7.20000076, -13.16666603],
 17 |               [2., 21.5, 7., -3.75, -8., -12.83333397],
 18 |               [2., 28., 7., -22., -9.20000076, -13.83333397],
 19 |               [88., 20.5, -19.33333206, -5.75, -8., -24.66666603],
 20 |               [67., 25., 6.66666794, -0.75, -10.60000038, -12.],
 21 |               [2., 26., 2.33333206, -1.5, -6.79999924, -9.83333397]]).astype(np.float32)
 22 | 
 23 | 
 24 | def inference(input_tensor):
 25 |     with tf.variable_scope('layer1-conv1'):
 26 |         conv1_weights = tf.get_variable('weight', [CONV1_SIZE, CONV1_SIZE, 1, 1],
 27 |                                         initializer=tf.constant_initializer(W[0:2, 0:2]))
 28 |         conv1_biases = tf.get_variable('bias', [1], initializer=tf.constant_initializer(0.0))
 29 |         conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
 30 |         relu1 = tf.nn.sigmoid(tf.nn.bias_add(conv1, conv1_biases))
 31 | 
 32 |     with tf.name_scope('layer2-pool1'):
 33 |         pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 34 | 
 35 |     with tf.variable_scope('layer3-conv2'):
 36 |         conv2_weights = tf.get_variable('weight', [CONV2_SIZE, CONV2_SIZE, 1, 1],
 37 |                                         initializer=tf.constant_initializer(W[0:3, 0:3]))
 38 |         conv2_biases = tf.get_variable('bias', [1], initializer=tf.constant_initializer(0.0))
 39 |         conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
 40 |         relu2 = tf.nn.sigmoid(tf.nn.bias_add(conv2, conv2_biases))
 41 | 
 42 |     with tf.name_scope('layer4-pool2'):
 43 |         pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
 44 | 
 45 |     with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], activation_fn=tf.nn.sigmoid, stride=1,
 46 |                         padding='SAME'):
 47 |         with tf.variable_scope('layer5-Inception_v3-Module'):
 48 |             with tf.variable_scope('Branch_0'):
 49 |                 branch_0 = slim.conv2d(pool2, 1, [1, 1],
 50 |                                        weights_initializer=tf.constant_initializer(W[3:4, 3:4]), scope='Ince_0')
 51 |             with tf.variable_scope('Branch_1'):
 52 |                 branch_1 = slim.conv2d(pool2, 1, [1, 1],
 53 |                                        weights_initializer=tf.constant_initializer(W[4:5, 4:5]), scope='Ince_1_1')
 54 |                 branch_1 = tf.concat([slim.conv2d(branch_1, 32, [1, 3],
 55 |                                                   weights_initializer=tf.constant_initializer(W[3:4, 1:4]),
 56 |                                                   scope='Ince_1_2a'),
 57 |                                       slim.conv2d(branch_1, 32, [3, 1],
 58 |                                                   weights_initializer=tf.constant_initializer(W[1:4, 3:4]),
 59 |                                                   scope='Ince_1_2b')], 3)
 60 |             with tf.variable_scope('Branch_2'):
 61 |                 branch_2 = slim.conv2d(pool2, 1, [1, 1],
 62 |                                        weights_initializer=tf.constant_initializer(W[4:5, 4:5]), scope='Ince_2_1')
 63 |                 branch_2 = slim.conv2d(branch_2, 1, [3, 3],
 64 |                                        weights_initializer=tf.constant_initializer(W[0:3, 0:3]), scope='Ince_2_2')
 65 |                 branch_2 = tf.concat([slim.conv2d(branch_2, 1, [1, 3],
 66 |                                                   weights_initializer=tf.constant_initializer(W[0:1, 0:3]),
 67 |                                                   scope='Ince_2_3a'),
 68 |                                       slim.conv2d(branch_2, 1, [3, 1],
 69 |                                                   weights_initializer=tf.constant_initializer(W[0:3, 0:1]),
 70 |                                                   scope='Ince_2_3b')], 3)
 71 |             with tf.variable_scope('Branch_3'):
 72 |                 # branch_3 = slim.avg_pool2d(pool2, [3, 3],scope='Ince_3_1')
 73 |                 branch_3 = slim.conv2d(pool2, 1, [1, 1],
 74 |                                        weights_initializer=tf.constant_initializer(W[4:5, 4:5]), scope='Ince_3_2')
 75 |             inception = tf.concat([branch_0, branch_1, branch_2, branch_3], 3)
 76 | 
 77 |     inception_shape = inception.get_shape().as_list()
 78 |     nodes = inception_shape[1] * inception_shape[2] * inception_shape[3]
 79 |     reshaped = tf.reshape(inception, [1, nodes])
 80 | 
 81 |     with tf.variable_scope('layer6-fc1'):
 82 |         fc1_weights = tf.get_variable('weight', [nodes, FC_SIZE],
 83 |                                       initializer=tf.truncated_normal_initializer(stddev=3, seed=3), trainable=False)
 84 |         fc1_biases = tf.get_variable('bias', [FC_SIZE], initializer=tf.constant_initializer(-10.0))
 85 |         fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
 86 | 
 87 |     with tf.variable_scope('layer7-fc2'):
 88 |         fc2_weights = tf.get_variable('weight', [FC_SIZE, OUTPUT_NODE],
 89 |                                       initializer=tf.constant_initializer(0.0001))
 90 |         fc2_biases = tf.get_variable('bias', [OUTPUT_NODE], initializer=tf.constant_initializer(-11.0))
 91 |         secret = tf.matmul(fc1, fc2_weights) + fc2_biases
 92 |     return secret
 93 | 
 94 | 
 95 | def synced(image):
 96 |     img_data = tf.image.decode_jpeg(image)
 97 |     resized = tf.image.resize_images(img_data, [IMAGE_SIZE, IMAGE_SIZE], method=1)
 98 |     img_gray = tf.reshape(tf.image.rgb_to_grayscale(resized), [1, IMAGE_SIZE, IMAGE_SIZE, 1])
 99 |     img_norm = tf.cast(img_gray / 128 - 1, dtype=tf.float32)
100 | 
101 |     y_hat = tf.reshape(inference(img_norm), [6, 6]) - w
102 |     y_norm = tf.matmul(W + 30, y_hat + tf.cast(tf.diag([1, 2, 3, 4, 5, 6]), dtype=tf.float32))
103 |     y_int = tf.reshape(tf.cast(y_norm, dtype=tf.int16), [1, 36])
104 |     c = []
105 | 
106 |     with tf.Session() as sess:
107 |         sess.run(tf.global_variables_initializer())
108 |         y = sess.run(y_int)
109 |         for i in range(OUTPUT_NODE):
110 |             c.append(chr(abs(y[0][i])))
111 |         print("".join(c))
112 | 
113 | 
114 | def main(argv=None):
115 |     urllib.request.urlretrieve(
116 |         'https://image.jiqizhixin.com/uploads/editor/051635e7-a31d-44d8-a97e-b34da37ddbbc/82418Synced.jpg',
117 |         'Synced.jpg')
118 | 
119 |     # 本宝宝只对 Synced 图像感兴趣，其它图片一概不理~
120 |     img_raw = tf.gfile.FastGFile("./Synced.jpg", "rb").read()
121 |     synced(img_raw)
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     tf.app.run()
126 | 


--------------------------------------------------------------------------------
/Experiments/pytorch_TCN.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"TCN.ipynb","version":"0.3.2","views":{},"default_view":{},"provenance":[],"collapsed_sections":[]},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"w_1TesnEXIeI","colab_type":"text"},"cell_type":"markdown","source":["TCN原论文：An Empirical Evaluation of Generic Convolutional and Recurrent Networks for Sequence Modeling，https://arxiv.org/abs/1803.01271\n","官方实现地址：https://github.com/locuslab/TCN"]},{"metadata":{"id":"d0x7S8ufXIeJ","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0},"base_uri":"https://localhost:8080/","height":35},"outputId":"c28bfb8f-71b2-4f7c-dc47-05fae0a3dbf8","executionInfo":{"status":"ok","timestamp":1523528323720,"user_tz":-480,"elapsed":1665,"user":{"displayName":"Horatio J.S.Y","photoUrl":"//lh5.googleusercontent.com/-OyzfCRPtjb8/AAAAAAAAAAI/AAAAAAAAABE/e0LwcAUesbs/s50-c-k-no/photo.jpg","userId":"109157966837900544651"}}},"cell_type":"code","source":["!git clone https://github.com/HoratioJSY/tcn.git"],"execution_count":2,"outputs":[{"output_type":"stream","text":["fatal: destination path 'tcn' already exists and is not an empty directory.\r\n"],"name":"stdout"}]},{"metadata":{"id":"xiYn_TgxXIeM","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# http://pytorch.org/\n","from os import path\n","from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag\n","platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())\n","\n","accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'\n","\n","!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision\n","import torch"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8mY1bJNXXIeR","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["import os\n","import torch\n","from torch import nn\n","from torch.autograd import Variable\n","import pickle\n","from torch.nn.utils import weight_norm\n","import argparse\n","import time\n","import math\n","import torch.optim as optim"],"execution_count":0,"outputs":[]},{"metadata":{"id":"9i7QA4CxXIeT","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# utils.py，关于数据与词汇库等预定义的类与方法\n","\n","\"\"\"\n","Note: The meaning of batch_size in PTB is different from that in MNIST example. In MNIST,\n","batch_size is the # of sample data that is considered in each iteration; in PTB, however,\n","it is the number of segments to speed up computation. \n","The goal of PTB is to train a language model to predict the next word.\n","\"\"\"\n","\n","def data_generator(data_path):\n","    corpus = Corpus(data_path)\n","    pickle.dump(corpus, open(data_path + '/corpus', 'wb'))\n","    return corpus\n","\n","class Dictionary(object):\n","    def __init__(self):\n","        self.word2idx = {}\n","        self.idx2word = []\n","\n","    def add_word(self, word):\n","        if word not in self.word2idx:\n","            self.idx2word.append(word)\n","            self.word2idx[word] = len(self.idx2word) - 1\n","        return self.word2idx[word]\n","\n","    def __len__(self):\n","        return len(self.idx2word)\n","\n","class Corpus(object):\n","    def __init__(self, path):\n","        self.dictionary = Dictionary()\n","        self.train = self.tokenize(os.path.join(path, 'train.txt'))\n","        self.valid = self.tokenize(os.path.join(path, 'valid.txt'))\n","        self.test = self.tokenize(os.path.join(path, 'test.txt'))\n","\n","    def tokenize(self, path):\n","        \"\"\"Tokenizes a text file.\"\"\"\n","        assert os.path.exists(path)\n","        # Add words to the dictionary\n","        with open(path, 'r') as f:\n","            tokens = 0\n","            for line in f:\n","                words = line.split() + ['<eos>']\n","                tokens += len(words)\n","                for word in words:\n","                    self.dictionary.add_word(word)\n","\n","        # Tokenize file content\n","        with open(path, 'r') as f:\n","            ids = torch.LongTensor(tokens)\n","            token = 0\n","            for line in f:\n","                words = line.split() + ['<eos>']\n","                for word in words:\n","                    ids[token] = self.dictionary.word2idx[word]\n","                    token += 1\n","\n","        return ids\n","\n","def batchify(data, batch_size, cuda):\n","    \"\"\"The output should have size [L x batch_size], where L could be a long sequence length\"\"\"\n","    # Work out how cleanly we can divide the dataset into batch_size parts (i.e. continuous seqs).\n","    nbatch = data.size(0) // batch_size\n","    # Trim off any extra elements that wouldn't cleanly fit (remainders).\n","    data = data.narrow(0, 0, nbatch * batch_size)\n","    # Evenly divide the data across the batch_size batches.\n","    data = data.view(batch_size, -1)\n","    if cuda:\n","        data = data.cuda()\n","    return data\n","\n","\n","def get_batch(source, i, seq_len, seq_le=None, evaluation=False):\n","    seq_le = min(seq_le if seq_le else seq_len, source.size(1) - 1 - i)\n","    data = Variable(source[:, i:i+seq_le], volatile=evaluation)\n","    target = Variable(source[:, i+1:i+1+seq_le])     # CAUTION: This is un-flattened!\n","    return data, target"],"execution_count":0,"outputs":[]},{"metadata":{"id":"F5WJz8jHXIeW","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# tcn.py\n","\n","# 定义实现因果卷积的类（继承自类nn.Module），其中super(Chomp1d, self).__init__()表示对继承自父类的属性进行初始化。\n","class Chomp1d(nn.Module):\n","    def __init__(self, chomp_size):\n","        super(Chomp1d, self).__init__()\n","        self.chomp_size = chomp_size\n","\n","    # 通过增加Padding的方式并对卷积后的张量做切片而实现因果卷积\n","    # tensor.contiguous()会返回有连续内存的相同张量\n","    def forward(self, x):\n","        return x[:, :, :-self.chomp_size].contiguous()\n","\n","\n","# 定义残差块，即两个一维卷积与恒等映射\n","class TemporalBlock(nn.Module):\n","    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):\n","        super(TemporalBlock, self).__init__()\n","\n","        # 定义第一个空洞卷积层\n","        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,\n","                                           stride=stride, padding=padding, dilation=dilation))\n","        # 根据第一个卷积层的输出与padding大小实现因果卷积\n","        self.chomp1 = Chomp1d(padding)\n","        # 添加激活函数与dropout正则化方法完成第一个卷积\n","        self.relu1 = nn.ReLU()\n","        self.dropout1 = nn.Dropout2d(dropout)\n","\n","        # 堆叠同样结构的第二个卷积层\n","        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,\n","                                           stride=stride, padding=padding, dilation=dilation))\n","        self.chomp2 = Chomp1d(padding)\n","        self.relu2 = nn.ReLU()\n","        self.dropout2 = nn.Dropout2d(dropout)\n","\n","        # 将卷积模块的所有组建通过Sequential方法依次堆叠在一起\n","        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,\n","                                 self.conv2, self.chomp2, self.relu2, self.dropout2)\n","\n","        # padding保证了输入序列与输出序列的长度相等，但卷积前的通道数与卷积后的通道数不一定一样。\n","        # 如果通道数不一样，那么需要对输入x做一个逐元素的一维卷积以使得它的纬度与前面两个卷积相等。\n","        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None\n","        self.relu = nn.ReLU()\n","        self.init_weights()\n","\n","    # 初始化为从均值为0，标准差为0.01的正态分布中采样的随机值\n","    def init_weights(self):\n","        self.conv1.weight.data.normal_(0, 0.01)\n","        self.conv2.weight.data.normal_(0, 0.01)\n","        if self.downsample is not None:\n","            self.downsample.weight.data.normal_(0, 0.01)\n","\n","    # 结合卷积与输入的恒等映射（或输入的逐元素卷积），并投入ReLU 激活函数完成残差模块\n","    def forward(self, x):\n","        out = self.net(x)\n","        res = x if self.downsample is None else self.downsample(x)\n","        return self.relu(out + res)\n","\n","\n","# 定义时间卷积网络的架构\n","class TemporalConvNet(nn.Module):\n","    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):\n","        super(TemporalConvNet, self).__init__()\n","        layers = []\n","\n","        # num_channels为各层卷积运算的输出通道数或卷积核数量，它的长度即需要执行的卷积层数量\n","        num_levels = len(num_channels)\n","        # 空洞卷积的扩张系数若随着网络层级的增加而成指数级增加，则可以增大感受野并不丢弃任何输入序列的元素\n","        # dilation_size根据层级数成指数增加，并从num_channels中抽取每一个残差模块的输入通道数与输出通道数\n","        for i in range(num_levels):\n","            dilation_size = 2 ** i\n","            in_channels = num_inputs if i == 0 else num_channels[i - 1]\n","            out_channels = num_channels[i]\n","            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,\n","                                     padding=(kernel_size - 1) * dilation_size, dropout=dropout)]\n","        # 将所有残差模块堆叠起来组成一个深度卷积网络\n","        self.network = nn.Sequential(*layers)\n","\n","    def forward(self, x):\n","        return self.network(x)\n","\n"],"execution_count":0,"outputs":[]},{"metadata":{"id":"e_fhv7CeXIeZ","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0}}},"cell_type":"code","source":["# model.py\n","\n","class TCN(nn.Module):\n","    def __init__(self, input_size, output_size, num_channels,\n","                 kernel_size=2, dropout=0.3, emb_dropout=0.1, tied_weights=False):\n","        super(TCN, self).__init__()\n","\n","        # 将一个批量的输入数据（one-hot encoding）送入编码器中成为一个批量的词嵌入向量\n","        # 其中output_size为词汇量，input_size为一个词向量的长度\n","        self.encoder = nn.Embedding(output_size, input_size)\n","\n","        # 构建网络\n","        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size, dropout=dropout)\n","\n","        # 定义最后线性变换的纬度，即最后一个卷积层的通道数（类似2D卷积中的特征图数）到所有词汇的映射\n","        self.decoder = nn.Linear(num_channels[-1], output_size)\n","\n","        # 是否共享编码器与解码器的权重，默认是共享。共享的话需要保持隐藏单元数等于词嵌入长度，这样预测的向量才可以视为词嵌入向量\n","        if tied_weights:\n","            if num_channels[-1] != input_size:\n","                raise ValueError('When using the tied flag, nhid must be equal to emsize')\n","            self.decoder.weight = self.encoder.weight\n","            print(\"Weight tied\")\n","\n","        # 对输入词嵌入执行Dropout 表示随机从句子中舍弃词，迫使模型不依赖于单个词完成任务\n","        self.drop = nn.Dropout(emb_dropout)\n","        self.emb_dropout = emb_dropout\n","        self.init_weights()\n","\n","    def init_weights(self):\n","        self.encoder.weight.data.normal_(0, 0.01)\n","        self.decoder.bias.data.fill_(0)\n","        self.decoder.weight.data.normal_(0, 0.01)\n","\n","    # 先编码，训练中再随机丢弃词，输入到网络实现推断，最后将推断结果解码为词\n","    def forward(self, input):\n","        \"\"\"Input ought to have dimension (N, C_in, L_in), where L_in is the seq_len; here the input is (N, L, C)\"\"\"\n","        emb = self.drop(self.encoder(input))\n","        y = self.tcn(emb.transpose(1, 2)).transpose(1, 2)\n","        y = self.decoder(y)\n","        return y.contiguous()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"LnWXKa0bXIeb","colab_type":"code","colab":{"autoexec":{"startup":false,"wait_interval":0},"base_uri":"https://localhost:8080/","height":1367},"outputId":"aa46b8cf-6dea-462b-949a-0829b2b137d2","executionInfo":{"status":"ok","timestamp":1523528865489,"user_tz":-480,"elapsed":532292,"user":{"displayName":"Horatio J.S.Y","photoUrl":"//lh5.googleusercontent.com/-OyzfCRPtjb8/AAAAAAAAAAI/AAAAAAAAABE/e0LwcAUesbs/s50-c-k-no/photo.jpg","userId":"109157966837900544651"}}},"cell_type":"code","source":["#word_cnn_test.py\n","\n","cuda = True # 是否使用 CUDA\n","data_path = './tcn/data/penn' # 数据集地址\n","batch_size = 16 # 批量大小\n","nhid = 600 # 每层隐藏单元数\n","levels = 4 # 残差模块数\n","emsize = 600 # 词嵌入长度\n","k_size = 3 # 卷积核大小\n","dropout = 0.45 # 应用到网络层级中的随机失活率\n","emb_dropout = 0.25 # 应用到嵌入层的随机失活率\n","tied = True # 是否绑定编码器与解码器的权重\n","lr = 4 # 初始学习率\n","optimization ='SGD'\n","validseqlen = 40 # 验证序列长度\n","seq_len = 80 # 总序列长度\n","log_interval = 100 # 记录日志的间隔\n","clip = 0.35 # 梯度截断，-1表示不采用梯度截断\n","epochs =100 # 训练轮数的上限\n","\n","# Set the random seed manually for reproducibility.\n","torch.manual_seed(1111)\n","if torch.cuda.is_available():\n","    if not cuda:\n","        print(\"WARNING: You have a CUDA device, so you should probably run with --cuda\")\n","\n","corpus = data_generator(data_path)\n","eval_batch_size = 10\n","train_data = batchify(corpus.train, batch_size, cuda)\n","val_data = batchify(corpus.valid, eval_batch_size, cuda)\n","test_data = batchify(corpus.test, eval_batch_size, cuda)\n","\n","\n","n_words = len(corpus.dictionary)\n","num_chans = [nhid] * (levels - 1) + [emsize]\n","\n","model = TCN(emsize, n_words, num_chans, dropout=dropout, emb_dropout=emb_dropout, kernel_size=k_size, tied_weights=tied)\n","\n","if cuda:\n","    model.cuda()\n","\n","# May use adaptive softmax to speed up training\n","criterion = nn.CrossEntropyLoss()\n","\n","\n","optimizer = getattr(optim, optimization)(model.parameters(), lr=lr)\n","\n","\n","def evaluate(data_source):\n","    model.eval()\n","    total_loss = 0\n","    processed_data_size = 0\n","    for i in range(0, data_source.size(1) - 1, validseqlen):\n","        if i + seq_len - validseqlen >= data_source.size(1) - 1:\n","            continue\n","        data, targets = get_batch(data_source, i, seq_len, evaluation=True)\n","        output = model(data)\n","\n","        # Discard the effective history, just like in training\n","        eff_history = seq_len - validseqlen\n","        final_output = output[:, eff_history:].contiguous().view(-1, n_words)\n","        final_target = targets[:, eff_history:].contiguous().view(-1)\n","\n","        loss = criterion(final_output, final_target)\n","\n","        # Note that we don't add TAR loss here\n","        total_loss += (data.size(1) - eff_history) * loss.data\n","        processed_data_size += data.size(1) - eff_history\n","    return total_loss[0] / processed_data_size\n","\n","\n","def train():\n","    # Turn on training mode which enables dropout.\n","    global train_data\n","    model.train()\n","    total_loss = 0\n","    start_time = time.time()\n","    for batch_idx, i in enumerate(range(0, train_data.size(1) - 1, validseqlen)):\n","        if i + seq_len - validseqlen >= train_data.size(1) - 1:\n","            continue\n","        data, targets = get_batch(train_data, i, seq_len)\n","        optimizer.zero_grad()\n","        output = model(data)\n","\n","        # Discard the effective history part\n","        eff_history = seq_len - validseqlen\n","        if eff_history < 0:\n","            raise ValueError(\"Valid sequence length must be smaller than sequence length!\")\n","        final_target = targets[:, eff_history:].contiguous().view(-1)\n","        final_output = output[:, eff_history:].contiguous().view(-1, n_words)\n","        loss = criterion(final_output, final_target)\n","\n","        loss.backward()\n","        if clip > 0:\n","            torch.nn.utils.clip_grad_norm(model.parameters(), clip)\n","        optimizer.step()\n","\n","        total_loss += loss.data\n","\n","        if batch_idx % log_interval == 0 and batch_idx > 0:\n","            cur_loss = total_loss[0] / log_interval\n","            elapsed = time.time() - start_time\n","            print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.5f} | ms/batch {:5.5f} | '\n","                  'loss {:5.2f} | ppl {:8.2f}'.format(\n","                epoch, batch_idx, train_data.size(1) // validseqlen, lr,\n","                elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss)))\n","            total_loss = 0\n","            start_time = time.time()\n","\n","\n","if __name__ == \"__main__\":\n","    best_vloss = 1e8\n","\n","    # At any point you can hit Ctrl + C to break out of training early.\n","    try:\n","        all_vloss = []\n","        for epoch in range(1, epochs+1):\n","            epoch_start_time = time.time()\n","            train()\n","            val_loss = evaluate(val_data)\n","            test_loss = evaluate(test_data)\n","\n","            print('-' * 89)\n","            print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '\n","                    'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),\n","                                               val_loss, math.exp(val_loss)))\n","            print('| end of epoch {:3d} | time: {:5.2f}s | test loss {:5.2f} | '\n","                  'test ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),\n","                                            test_loss, math.exp(test_loss)))\n","            print('-' * 89)\n","\n","            # Save the model if the validation loss is the best we've seen so far.\n","            if val_loss < best_vloss:\n","                with open(\"model.pt\", 'wb') as f:\n","                    print('Save model!\\n')\n","                    torch.save(model, f)\n","                best_vloss = val_loss\n","\n","            # Anneal the learning rate if the validation loss plateaus\n","            if epoch > 5 and val_loss >= max(all_vloss[-5:]):\n","                lr = lr / 2.\n","                for param_group in optimizer.param_groups:\n","                    param_group['lr'] = lr\n","            all_vloss.append(val_loss)\n","\n","    except KeyboardInterrupt:\n","        print('-' * 89)\n","        print('Exiting from training early')\n","\n","    # Load the best saved model.\n","    with open(\"model.pt\", 'rb') as f:\n","        model = torch.load(f)\n","\n","    # Run on test data.\n","    test_loss = evaluate(test_data)\n","    print('=' * 89)\n","    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(\n","        test_loss, math.exp(test_loss)))\n","    print('=' * 89)"],"execution_count":8,"outputs":[{"output_type":"stream","text":["Weight tied\n","| epoch   1 |   100/ 1452 batches | lr 4.00000 | ms/batch 118.81662 | loss  7.52 | ppl  1840.81\n","| epoch   1 |   200/ 1452 batches | lr 4.00000 | ms/batch 109.73478 | loss  6.81 | ppl   910.24\n","| epoch   1 |   300/ 1452 batches | lr 4.00000 | ms/batch 109.62409 | loss  6.58 | ppl   717.81\n","| epoch   1 |   400/ 1452 batches | lr 4.00000 | ms/batch 109.41673 | loss  6.37 | ppl   582.09\n","| epoch   1 |   500/ 1452 batches | lr 4.00000 | ms/batch 109.33626 | loss  6.23 | ppl   506.72\n","| epoch   1 |   600/ 1452 batches | lr 4.00000 | ms/batch 109.01555 | loss  6.21 | ppl   498.33\n","| epoch   1 |   700/ 1452 batches | lr 4.00000 | ms/batch 108.83427 | loss  6.12 | ppl   452.68\n","| epoch   1 |   800/ 1452 batches | lr 4.00000 | ms/batch 108.99423 | loss  6.01 | ppl   409.11\n","| epoch   1 |   900/ 1452 batches | lr 4.00000 | ms/batch 108.66103 | loss  5.98 | ppl   396.50\n","| epoch   1 |  1000/ 1452 batches | lr 4.00000 | ms/batch 108.71295 | loss  5.93 | ppl   376.19\n","| epoch   1 |  1100/ 1452 batches | lr 4.00000 | ms/batch 108.76589 | loss  5.90 | ppl   363.97\n","| epoch   1 |  1200/ 1452 batches | lr 4.00000 | ms/batch 108.65258 | loss  5.88 | ppl   357.96\n","| epoch   1 |  1300/ 1452 batches | lr 4.00000 | ms/batch 108.92401 | loss  5.76 | ppl   318.15\n","| epoch   1 |  1400/ 1452 batches | lr 4.00000 | ms/batch 108.80318 | loss  5.74 | ppl   312.15\n","-----------------------------------------------------------------------------------------\n","| end of epoch   1 | time: 168.27s | valid loss  5.67 | valid ppl   289.70\n","| end of epoch   1 | time: 168.27s | test loss  5.64 | test ppl   280.59\n","-----------------------------------------------------------------------------------------\n","Save model!\n","\n"],"name":"stdout"},{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type TCN. It won't be checked for correctness upon loading.\n","  \"type \" + obj.__name__ + \". It won't be checked \"\n","/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type TemporalConvNet. It won't be checked for correctness upon loading.\n","  \"type \" + obj.__name__ + \". It won't be checked \"\n","/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type TemporalBlock. It won't be checked for correctness upon loading.\n","  \"type \" + obj.__name__ + \". It won't be checked \"\n","/usr/local/lib/python3.6/dist-packages/torch/serialization.py:158: UserWarning: Couldn't retrieve source code for container of type Chomp1d. It won't be checked for correctness upon loading.\n","  \"type \" + obj.__name__ + \". It won't be checked \"\n"],"name":"stderr"},{"output_type":"stream","text":["| epoch   2 |   100/ 1452 batches | lr 4.00000 | ms/batch 109.80678 | loss  5.82 | ppl   336.42\n","| epoch   2 |   200/ 1452 batches | lr 4.00000 | ms/batch 108.55440 | loss  5.70 | ppl   298.02\n","| epoch   2 |   300/ 1452 batches | lr 4.00000 | ms/batch 108.44461 | loss  5.67 | ppl   290.36\n","| epoch   2 |   400/ 1452 batches | lr 4.00000 | ms/batch 108.52798 | loss  5.55 | ppl   258.41\n","| epoch   2 |   500/ 1452 batches | lr 4.00000 | ms/batch 108.56836 | loss  5.53 | ppl   251.28\n","| epoch   2 |   600/ 1452 batches | lr 4.00000 | ms/batch 108.47103 | loss  5.58 | ppl   265.04\n","| epoch   2 |   700/ 1452 batches | lr 4.00000 | ms/batch 108.35539 | loss  5.55 | ppl   257.78\n","| epoch   2 |   800/ 1452 batches | lr 4.00000 | ms/batch 108.45743 | loss  5.51 | ppl   246.27\n","| epoch   2 |   900/ 1452 batches | lr 4.00000 | ms/batch 108.37481 | loss  5.51 | ppl   246.76\n","| epoch   2 |  1000/ 1452 batches | lr 4.00000 | ms/batch 108.62212 | loss  5.48 | ppl   240.42\n","| epoch   2 |  1100/ 1452 batches | lr 4.00000 | ms/batch 108.54413 | loss  5.50 | ppl   245.82\n","| epoch   2 |  1200/ 1452 batches | lr 4.00000 | ms/batch 108.48435 | loss  5.51 | ppl   246.79\n","| epoch   2 |  1300/ 1452 batches | lr 4.00000 | ms/batch 108.49503 | loss  5.39 | ppl   218.57\n","| epoch   2 |  1400/ 1452 batches | lr 4.00000 | ms/batch 108.47983 | loss  5.42 | ppl   226.72\n","-----------------------------------------------------------------------------------------\n","| end of epoch   2 | time: 166.66s | valid loss  5.39 | valid ppl   218.32\n","| end of epoch   2 | time: 166.66s | test loss  5.34 | test ppl   209.52\n","-----------------------------------------------------------------------------------------\n","Save model!\n","\n","| epoch   3 |   100/ 1452 batches | lr 4.00000 | ms/batch 109.54227 | loss  5.51 | ppl   247.27\n","| epoch   3 |   200/ 1452 batches | lr 4.00000 | ms/batch 108.50945 | loss  5.42 | ppl   226.21\n","| epoch   3 |   300/ 1452 batches | lr 4.00000 | ms/batch 108.29926 | loss  5.40 | ppl   222.48\n","| epoch   3 |   400/ 1452 batches | lr 4.00000 | ms/batch 108.35232 | loss  5.29 | ppl   197.79\n","| epoch   3 |   500/ 1452 batches | lr 4.00000 | ms/batch 108.38521 | loss  5.28 | ppl   196.93\n","| epoch   3 |   600/ 1452 batches | lr 4.00000 | ms/batch 108.17204 | loss  5.34 | ppl   208.93\n","| epoch   3 |   700/ 1452 batches | lr 4.00000 | ms/batch 108.27651 | loss  5.33 | ppl   205.63\n","| epoch   3 |   800/ 1452 batches | lr 4.00000 | ms/batch 108.37309 | loss  5.29 | ppl   198.83\n","| epoch   3 |   900/ 1452 batches | lr 4.00000 | ms/batch 108.25190 | loss  5.30 | ppl   199.43\n","| epoch   3 |  1000/ 1452 batches | lr 4.00000 | ms/batch 108.25034 | loss  5.28 | ppl   196.01\n","| epoch   3 |  1100/ 1452 batches | lr 4.00000 | ms/batch 108.43868 | loss  5.32 | ppl   204.39\n","| epoch   3 |  1200/ 1452 batches | lr 4.00000 | ms/batch 108.42956 | loss  5.32 | ppl   204.21\n","| epoch   3 |  1300/ 1452 batches | lr 4.00000 | ms/batch 108.51916 | loss  5.19 | ppl   179.70\n","| epoch   3 |  1400/ 1452 batches | lr 4.00000 | ms/batch 108.60412 | loss  5.25 | ppl   189.96\n","-----------------------------------------------------------------------------------------\n","| end of epoch   3 | time: 166.44s | valid loss  5.22 | valid ppl   185.76\n","| end of epoch   3 | time: 166.44s | test loss  5.18 | test ppl   177.27\n","-----------------------------------------------------------------------------------------\n","Save model!\n","\n","| epoch   4 |   100/ 1452 batches | lr 4.00000 | ms/batch 109.56948 | loss  5.35 | ppl   209.77\n","-----------------------------------------------------------------------------------------\n","Exiting from training early\n","=========================================================================================\n","| End of training | test loss  5.18 | test ppl   177.27\n","=========================================================================================\n"],"name":"stdout"}]}]}


--------------------------------------------------------------------------------
/Experiments/swish_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
 15 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
 16 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
 17 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n",
 18 |       "After 0 training step(s), validation accuracy using average model is 0.0592 \n",
 19 |       "After 1000 training step(s), validation accuracy using average model is 0.9758 \n",
 20 |       "After 2000 training step(s), validation accuracy using average model is 0.981 \n",
 21 |       "After 3000 training step(s), validation accuracy using average model is 0.9808 \n",
 22 |       "After 4000 training step(s), validation accuracy using average model is 0.9808 \n",
 23 |       "After 5000 training step(s), validation accuracy using average model is 0.9826 \n",
 24 |       "After 6000 training step(s), validation accuracy using average model is 0.9816 \n",
 25 |       "After 7000 training step(s), validation accuracy using average model is 0.9818 \n",
 26 |       "After 8000 training step(s), validation accuracy using average model is 0.9826 \n",
 27 |       "After 9000 training step(s), validation accuracy using average model is 0.9822 \n",
 28 |       "After 10000 training step(s), test accuracy using average model is 0.9829\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "import tensorflow as tf\n",
 34 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 35 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
 36 |     "\n",
 37 |     "\n",
 38 |     "INPUT_NODE = 784     \n",
 39 |     "OUTPUT_NODE = 10     \n",
 40 |     "LAYER1_NODE = 500         \n",
 41 |     "                              \n",
 42 |     "BATCH_SIZE = 100        \n",
 43 |     "\n",
 44 |     "# 模型相关的参数\n",
 45 |     "LEARNING_RATE_BASE = 0.8      \n",
 46 |     "LEARNING_RATE_DECAY = 0.99    \n",
 47 |     "REGULARAZTION_RATE = 0.0001   \n",
 48 |     "TRAINING_STEPS = 10000        \n",
 49 |     "MOVING_AVERAGE_DECAY = 0.99 \n",
 50 |     "\n",
 51 |     "def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):\n",
 52 |     "    # 不使用滑动平均类\n",
 53 |     "    if avg_class == None:\n",
 54 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)\n",
 55 |     "        return tf.matmul(layer1, weights2) + biases2\n",
 56 |     "\n",
 57 |     "    else:\n",
 58 |     "        \n",
 59 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))\n",
 60 |     "        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)  \n",
 61 |     "    \n",
 62 |     "def train(mnist):\n",
 63 |     "    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n",
 64 |     "    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n",
 65 |     "    # 生成隐藏层的参数。\n",
 66 |     "    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n",
 67 |     "    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n",
 68 |     "    # 生成输出层的参数。\n",
 69 |     "    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))\n",
 70 |     "    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n",
 71 |     "\n",
 72 |     "    # 计算不含滑动平均类的前向传播结果\n",
 73 |     "    y = inference(x, None, weights1, biases1, weights2, biases2)\n",
 74 |     "    \n",
 75 |     "    # 定义训练轮数及相关的滑动平均类 \n",
 76 |     "    global_step = tf.Variable(0, trainable=False)\n",
 77 |     "    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n",
 78 |     "    variables_averages_op = variable_averages.apply(tf.trainable_variables())\n",
 79 |     "    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)\n",
 80 |     "    \n",
 81 |     "    # 计算交叉熵及其平均值\n",
 82 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n",
 83 |     "    cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
 84 |     "    \n",
 85 |     "    # 损失函数的计算\n",
 86 |     "    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n",
 87 |     "    regularaztion = regularizer(weights1) + regularizer(weights2)\n",
 88 |     "    loss = cross_entropy_mean + regularaztion\n",
 89 |     "    \n",
 90 |     "    # 设置指数衰减的学习率。\n",
 91 |     "    learning_rate = tf.train.exponential_decay(\n",
 92 |     "        LEARNING_RATE_BASE,\n",
 93 |     "        global_step,\n",
 94 |     "        mnist.train.num_examples / BATCH_SIZE,\n",
 95 |     "        LEARNING_RATE_DECAY,\n",
 96 |     "        staircase=True)\n",
 97 |     "    \n",
 98 |     "    # 优化损失函数\n",
 99 |     "    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n",
100 |     "    \n",
101 |     "    # 反向传播更新参数和更新每一个参数的滑动平均值\n",
102 |     "    with tf.control_dependencies([train_step, variables_averages_op]):\n",
103 |     "        train_op = tf.no_op(name='train')\n",
104 |     "\n",
105 |     "    # 计算正确率\n",
106 |     "    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n",
107 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
108 |     "    \n",
109 |     "    # 初始化会话并开始训练过程。\n",
110 |     "    with tf.Session() as sess:\n",
111 |     "        tf.global_variables_initializer().run()\n",
112 |     "        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n",
113 |     "        test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n",
114 |     "        \n",
115 |     "        # 循环的训练神经网络。\n",
116 |     "        for i in range(TRAINING_STEPS):\n",
117 |     "            if i % 1000 == 0:\n",
118 |     "                validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n",
119 |     "                print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n",
120 |     "            \n",
121 |     "            xs,ys=mnist.train.next_batch(BATCH_SIZE)\n",
122 |     "            sess.run(train_op,feed_dict={x:xs,y_:ys})\n",
123 |     "\n",
124 |     "        test_acc=sess.run(accuracy,feed_dict=test_feed)\n",
125 |     "        print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n",
126 |     "\n",
127 |     "train(mnist)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 9,
133 |    "metadata": {
134 |     "collapsed": false
135 |    },
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
142 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
143 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
144 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n",
145 |       "After 0 training step(s), validation accuracy using average model is 0.0932 \n",
146 |       "After 1000 training step(s), validation accuracy using average model is 0.9466 \n",
147 |       "After 2000 training step(s), validation accuracy using average model is 0.962 \n",
148 |       "After 3000 training step(s), validation accuracy using average model is 0.9662 \n",
149 |       "After 4000 training step(s), validation accuracy using average model is 0.967 \n",
150 |       "After 5000 training step(s), validation accuracy using average model is 0.9696 \n",
151 |       "After 6000 training step(s), validation accuracy using average model is 0.9686 \n",
152 |       "After 7000 training step(s), validation accuracy using average model is 0.9698 \n",
153 |       "After 8000 training step(s), validation accuracy using average model is 0.97 \n",
154 |       "After 9000 training step(s), validation accuracy using average model is 0.97 \n",
155 |       "After 10000 training step(s), test accuracy using average model is 0.9666\n"
156 |      ]
157 |     }
158 |    ],
159 |    "source": [
160 |     "import tensorflow as tf\n",
161 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
162 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
163 |     "\n",
164 |     "\n",
165 |     "INPUT_NODE = 784     \n",
166 |     "OUTPUT_NODE = 10     \n",
167 |     "LAYER1_NODE = 500 \n",
168 |     "LAYER2_NODE = 500 \n",
169 |     "LAYER3_NODE = 500 \n",
170 |     "LAYER4_NODE = 500 \n",
171 |     "LAYER5_NODE = 500 \n",
172 |     "LAYER6_NODE = 500 \n",
173 |     "LAYER7_NODE = 500 \n",
174 |     "LAYER8_NODE = 300 \n",
175 |     "LAYER9_NODE = 200\n",
176 |     "LAYER10_NODE = 100 \n",
177 |     "                              \n",
178 |     "BATCH_SIZE = 100        \n",
179 |     "\n",
180 |     "# 模型相关的参数\n",
181 |     "LEARNING_RATE_BASE = 0.008      \n",
182 |     "LEARNING_RATE_DECAY = 0.99    \n",
183 |     "REGULARAZTION_RATE = 0.0001   \n",
184 |     "TRAINING_STEPS = 10000        \n",
185 |     "MOVING_AVERAGE_DECAY = 0.99 \n",
186 |     "\n",
187 |     "def inference(input_tensor, avg_class, W, B):\n",
188 |     "    # 不使用滑动平均类\n",
189 |     "    if avg_class == None:\n",
190 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, W[0]) + B[0])\n",
191 |     "        layer2 = tf.nn.relu(tf.matmul(layer1, W[1]) + B[1])\n",
192 |     "        layer3 = tf.nn.relu(tf.matmul(layer2, W[2]) + B[2])\n",
193 |     "        layer4 = tf.nn.relu(tf.matmul(layer3, W[3]) + B[3])\n",
194 |     "        layer5 = tf.nn.relu(tf.matmul(layer4, W[4]) + B[4])\n",
195 |     "        layer6 = tf.nn.relu(tf.matmul(layer5, W[5]) + B[5])\n",
196 |     "        layer7 = tf.nn.relu(tf.matmul(layer6, W[6]) + B[6])\n",
197 |     "        layer8 = tf.nn.relu(tf.matmul(layer7, W[7]) + B[7])\n",
198 |     "        layer9 = tf.nn.relu(tf.matmul(layer8, W[8]) + B[8])\n",
199 |     "        layer10 = tf.nn.relu(tf.matmul(layer9, W[9]) + B[9])\n",
200 |     "        return tf.matmul(layer10, W[10]) + B[10]\n",
201 |     "    \n",
202 |     "    else:\n",
203 |     "        \n",
204 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(W[0])) + avg_class.average(B[0]))\n",
205 |     "        layer2 = tf.nn.relu(tf.matmul(layer1, avg_class.average(W[1])) + avg_class.average(B[1]))\n",
206 |     "        layer3 = tf.nn.relu(tf.matmul(layer2, avg_class.average(W[2])) + avg_class.average(B[2]))\n",
207 |     "        layer4 = tf.nn.relu(tf.matmul(layer3, avg_class.average(W[3])) + avg_class.average(B[3]))\n",
208 |     "        layer5 = tf.nn.relu(tf.matmul(layer4, avg_class.average(W[4])) + avg_class.average(B[4]))\n",
209 |     "        layer6 = tf.nn.relu(tf.matmul(layer5, avg_class.average(W[5])) + avg_class.average(B[5]))\n",
210 |     "        layer7 = tf.nn.relu(tf.matmul(layer6, avg_class.average(W[6])) + avg_class.average(B[6]))\n",
211 |     "        layer8 = tf.nn.relu(tf.matmul(layer7, avg_class.average(W[7])) + avg_class.average(B[7]))\n",
212 |     "        layer9 = tf.nn.relu(tf.matmul(layer8, avg_class.average(W[8])) + avg_class.average(B[8]))\n",
213 |     "        layer10 = tf.nn.relu(tf.matmul(layer9, avg_class.average(W[9])) + avg_class.average(B[9]))\n",
214 |     "        return tf.matmul(layer10, avg_class.average(W[10])) + avg_class.average(B[10])  \n",
215 |     "    \n",
216 |     "def train(mnist):\n",
217 |     "    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n",
218 |     "    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n",
219 |     "    \n",
220 |     "    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n",
221 |     "    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n",
222 |     "    \n",
223 |     "    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE], stddev=0.1))\n",
224 |     "    biases2 = tf.Variable(tf.constant(0.1, shape=[ LAYER2_NODE]))\n",
225 |     "    \n",
226 |     "    weights3 = tf.Variable(tf.truncated_normal([ LAYER2_NODE,  LAYER3_NODE], stddev=0.1))\n",
227 |     "    biases3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))\n",
228 |     "    \n",
229 |     "    weights4 = tf.Variable(tf.truncated_normal([LAYER3_NODE, LAYER4_NODE], stddev=0.1))\n",
230 |     "    biases4 = tf.Variable(tf.constant(0.1, shape=[LAYER4_NODE]))\n",
231 |     "    \n",
232 |     "    weights5 = tf.Variable(tf.truncated_normal([LAYER4_NODE, LAYER5_NODE], stddev=0.1))\n",
233 |     "    biases5 = tf.Variable(tf.constant(0.1, shape=[LAYER5_NODE]))\n",
234 |     "    \n",
235 |     "    weights6 = tf.Variable(tf.truncated_normal([LAYER5_NODE, LAYER6_NODE], stddev=0.1))\n",
236 |     "    biases6 = tf.Variable(tf.constant(0.1, shape=[LAYER6_NODE]))\n",
237 |     "    \n",
238 |     "    weights7 = tf.Variable(tf.truncated_normal([LAYER6_NODE, LAYER7_NODE], stddev=0.1))\n",
239 |     "    biases7 = tf.Variable(tf.constant(0.1, shape=[LAYER7_NODE]))\n",
240 |     "    \n",
241 |     "    weights8 = tf.Variable(tf.truncated_normal([LAYER7_NODE, LAYER8_NODE], stddev=0.1))\n",
242 |     "    biases8 = tf.Variable(tf.constant(0.1, shape=[LAYER8_NODE]))\n",
243 |     "    \n",
244 |     "    weights9 = tf.Variable(tf.truncated_normal([LAYER8_NODE, LAYER9_NODE], stddev=0.1))\n",
245 |     "    biases9 = tf.Variable(tf.constant(0.1, shape=[LAYER9_NODE]))\n",
246 |     "    \n",
247 |     "    weights10 = tf.Variable(tf.truncated_normal([LAYER9_NODE, LAYER10_NODE], stddev=0.1))\n",
248 |     "    biases10 = tf.Variable(tf.constant(0.1, shape=[LAYER10_NODE]))\n",
249 |     "    \n",
250 |     "    weights11 = tf.Variable(tf.truncated_normal([LAYER10_NODE, OUTPUT_NODE], stddev=0.1))\n",
251 |     "    biases11 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n",
252 |     "    \n",
253 |     "    W=[weights1, weights2, weights3, weights4, weights5, weights6, weights7, weights8, weights9, weights10, weights11]\n",
254 |     "    B=[biases1, biases2, biases3, biases4, biases5, biases6, biases7, biases8, biases9, biases10, biases11]\n",
255 |     "    \n",
256 |     "    # 计算不含滑动平均类的前向传播结果\n",
257 |     "    y = inference(x, None, W, B)\n",
258 |     "    \n",
259 |     "    # 定义训练轮数及相关的滑动平均类 \n",
260 |     "    global_step = tf.Variable(0, trainable=False)\n",
261 |     "    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n",
262 |     "    variables_averages_op = variable_averages.apply(tf.trainable_variables())\n",
263 |     "    average_y = inference(x, variable_averages, W, B)\n",
264 |     "    \n",
265 |     "    # 计算交叉熵及其平均值\n",
266 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n",
267 |     "    cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
268 |     "    \n",
269 |     "    # 损失函数的计算\n",
270 |     "    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n",
271 |     "    regularaztion = regularizer(W[0]) \n",
272 |     "    for i in range(1,11):\n",
273 |     "        regularazation=regularaztion + regularizer(W[i]) \n",
274 |     "    loss = cross_entropy_mean + regularaztion\n",
275 |     "    \n",
276 |     "    # 设置指数衰减的学习率。\n",
277 |     "    learning_rate = tf.train.exponential_decay(\n",
278 |     "        LEARNING_RATE_BASE,\n",
279 |     "        global_step,\n",
280 |     "        mnist.train.num_examples / BATCH_SIZE,\n",
281 |     "        LEARNING_RATE_DECAY,\n",
282 |     "        staircase=True)\n",
283 |     "    \n",
284 |     "    # 优化损失函数\n",
285 |     "    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n",
286 |     "    \n",
287 |     "    # 反向传播更新参数和更新每一个参数的滑动平均值\n",
288 |     "    with tf.control_dependencies([train_step, variables_averages_op]):\n",
289 |     "        train_op = tf.no_op(name='train')\n",
290 |     "\n",
291 |     "    # 计算正确率\n",
292 |     "    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n",
293 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
294 |     "    \n",
295 |     "    # 初始化会话并开始训练过程。\n",
296 |     "    with tf.Session() as sess:\n",
297 |     "        tf.global_variables_initializer().run()\n",
298 |     "        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n",
299 |     "        test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n",
300 |     "        \n",
301 |     "        # 循环的训练神经网络。\n",
302 |     "        for i in range(TRAINING_STEPS):\n",
303 |     "            if i % 1000 == 0:\n",
304 |     "                validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n",
305 |     "                print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n",
306 |     "            \n",
307 |     "            xs,ys=mnist.train.next_batch(BATCH_SIZE)\n",
308 |     "            sess.run(train_op,feed_dict={x:xs,y_:ys})\n",
309 |     "\n",
310 |     "        test_acc=sess.run(accuracy,feed_dict=test_feed)\n",
311 |     "        print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n",
312 |     "\n",
313 |     "train(mnist)"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": 13,
319 |    "metadata": {
320 |     "collapsed": false
321 |    },
322 |    "outputs": [
323 |     {
324 |      "name": "stdout",
325 |      "output_type": "stream",
326 |      "text": [
327 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
328 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
329 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
330 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n",
331 |       "After 0 training step(s), validation accuracy using average model is 0.0902 \n",
332 |       "After 1000 training step(s), validation accuracy using average model is 0.9516 \n",
333 |       "After 2000 training step(s), validation accuracy using average model is 0.9614 \n",
334 |       "After 3000 training step(s), validation accuracy using average model is 0.9668 \n",
335 |       "After 4000 training step(s), validation accuracy using average model is 0.9688 \n",
336 |       "After 5000 training step(s), validation accuracy using average model is 0.971 \n",
337 |       "After 6000 training step(s), validation accuracy using average model is 0.9742 \n",
338 |       "After 7000 training step(s), validation accuracy using average model is 0.9752 \n",
339 |       "After 8000 training step(s), validation accuracy using average model is 0.9738 \n",
340 |       "After 9000 training step(s), validation accuracy using average model is 0.975 \n",
341 |       "After 10000 training step(s), test accuracy using average model is 0.9722\n"
342 |      ]
343 |     }
344 |    ],
345 |    "source": [
346 |     "import tensorflow as tf\n",
347 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
348 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
349 |     "\n",
350 |     "\n",
351 |     "INPUT_NODE = 784     \n",
352 |     "OUTPUT_NODE = 10     \n",
353 |     "LAYER1_NODE = 500 \n",
354 |     "LAYER2_NODE = 500 \n",
355 |     "LAYER3_NODE = 500 \n",
356 |     "LAYER4_NODE = 500 \n",
357 |     "LAYER5_NODE = 500 \n",
358 |     "LAYER6_NODE = 500 \n",
359 |     "LAYER7_NODE = 500 \n",
360 |     "LAYER8_NODE = 300 \n",
361 |     "LAYER9_NODE = 200\n",
362 |     "LAYER10_NODE = 100 \n",
363 |     "                              \n",
364 |     "BATCH_SIZE = 100        \n",
365 |     "\n",
366 |     "# 模型相关的参数\n",
367 |     "LEARNING_RATE_BASE = 0.008      \n",
368 |     "LEARNING_RATE_DECAY = 0.99    \n",
369 |     "REGULARAZTION_RATE = 0.0001   \n",
370 |     "TRAINING_STEPS = 10000        \n",
371 |     "MOVING_AVERAGE_DECAY = 0.99 \n",
372 |     "\n",
373 |     "def inference(input_tensor, avg_class, W, B):\n",
374 |     "    # 不使用滑动平均类\n",
375 |     "    if avg_class == None:\n",
376 |     "        ac_1=tf.matmul(input_tensor, W[0]) + B[0]\n",
377 |     "        layer1 = ac_1*tf.nn.sigmoid(ac_1)\n",
378 |     "        ac_2 = tf.matmul(layer1, W[1]) + B[1]\n",
379 |     "        layer2 = ac_2*tf.nn.sigmoid(ac_2)\n",
380 |     "        ac_3 = tf.matmul(layer2, W[2]) + B[2]\n",
381 |     "        layer3 = ac_3*tf.nn.sigmoid(ac_3)\n",
382 |     "        ac_4 = tf.matmul(layer3, W[3]) + B[3]\n",
383 |     "        layer4 = ac_4*tf.nn.sigmoid(ac_4)\n",
384 |     "        ac_5 = tf.matmul(layer4, W[4]) + B[4]\n",
385 |     "        layer5 = ac_5*tf.nn.sigmoid(ac_5)\n",
386 |     "        ac_6 = tf.matmul(layer5, W[5]) + B[5]\n",
387 |     "        layer6 = ac_6*tf.nn.sigmoid(ac_6)\n",
388 |     "        ac_7 = tf.matmul(layer6, W[6]) + B[6]\n",
389 |     "        layer7 = ac_7*tf.nn.sigmoid(ac_7)\n",
390 |     "        ac_8 = tf.matmul(layer7, W[7]) + B[7]\n",
391 |     "        layer8 = ac_8*tf.nn.sigmoid(ac_8)\n",
392 |     "        ac_9 = tf.matmul(layer8, W[8]) + B[8]\n",
393 |     "        layer9 = ac_9*tf.nn.sigmoid(ac_9)\n",
394 |     "        ac_10 = tf.matmul(layer9, W[9]) + B[9]\n",
395 |     "        layer10 = ac_10*tf.nn.sigmoid(ac_10)\n",
396 |     "        return tf.matmul(layer10, W[10]) + B[10]\n",
397 |     "    \n",
398 |     "    else:\n",
399 |     "        ac_1=tf.matmul(input_tensor, avg_class.average(W[0])) + avg_class.average(B[0])\n",
400 |     "        layer1 = ac_1*tf.nn.sigmoid(ac_1)\n",
401 |     "        ac_2=tf.matmul(layer1, avg_class.average(W[1])) + avg_class.average(B[1])\n",
402 |     "        layer2 = ac_2*tf.nn.sigmoid(ac_2)\n",
403 |     "        ac_3=tf.matmul(layer2, avg_class.average(W[2])) + avg_class.average(B[2])\n",
404 |     "        layer3 = ac_3*tf.nn.sigmoid(ac_3)\n",
405 |     "        ac_4=tf.matmul(layer3, avg_class.average(W[3])) + avg_class.average(B[3])\n",
406 |     "        layer4 = ac_4*tf.nn.sigmoid(ac_4)\n",
407 |     "        ac_5=tf.matmul(layer4, avg_class.average(W[4])) + avg_class.average(B[4])\n",
408 |     "        layer5 = ac_5*tf.nn.sigmoid(ac_5)\n",
409 |     "        ac_6=tf.matmul(layer5, avg_class.average(W[5])) + avg_class.average(B[5])\n",
410 |     "        layer6 = ac_6*tf.nn.sigmoid(ac_6)\n",
411 |     "        ac_7=tf.matmul(layer6, avg_class.average(W[6])) + avg_class.average(B[6])\n",
412 |     "        layer7 = ac_7*tf.nn.sigmoid(ac_7)\n",
413 |     "        ac_8=tf.matmul(layer7, avg_class.average(W[7])) + avg_class.average(B[7])\n",
414 |     "        layer8 = ac_8*tf.nn.sigmoid(ac_8)\n",
415 |     "        ac_9=tf.matmul(layer8, avg_class.average(W[8])) + avg_class.average(B[8])\n",
416 |     "        layer9 = ac_9*tf.nn.sigmoid(ac_9)\n",
417 |     "        ac_10=tf.matmul(layer9, avg_class.average(W[9])) + avg_class.average(B[9])\n",
418 |     "        layer10 = ac_10*tf.nn.sigmoid(ac_10)\n",
419 |     "        return tf.matmul(layer10, avg_class.average(W[10])) + avg_class.average(B[10])  \n",
420 |     "    \n",
421 |     "def train(mnist):\n",
422 |     "    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n",
423 |     "    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n",
424 |     "    \n",
425 |     "    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n",
426 |     "    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n",
427 |     "    \n",
428 |     "    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE], stddev=0.1))\n",
429 |     "    biases2 = tf.Variable(tf.constant(0.1, shape=[ LAYER2_NODE]))\n",
430 |     "    \n",
431 |     "    weights3 = tf.Variable(tf.truncated_normal([ LAYER2_NODE,  LAYER3_NODE], stddev=0.1))\n",
432 |     "    biases3 = tf.Variable(tf.constant(0.1, shape=[LAYER3_NODE]))\n",
433 |     "    \n",
434 |     "    weights4 = tf.Variable(tf.truncated_normal([LAYER3_NODE, LAYER4_NODE], stddev=0.1))\n",
435 |     "    biases4 = tf.Variable(tf.constant(0.1, shape=[LAYER4_NODE]))\n",
436 |     "    \n",
437 |     "    weights5 = tf.Variable(tf.truncated_normal([LAYER4_NODE, LAYER5_NODE], stddev=0.1))\n",
438 |     "    biases5 = tf.Variable(tf.constant(0.1, shape=[LAYER5_NODE]))\n",
439 |     "    \n",
440 |     "    weights6 = tf.Variable(tf.truncated_normal([LAYER5_NODE, LAYER6_NODE], stddev=0.1))\n",
441 |     "    biases6 = tf.Variable(tf.constant(0.1, shape=[LAYER6_NODE]))\n",
442 |     "    \n",
443 |     "    weights7 = tf.Variable(tf.truncated_normal([LAYER6_NODE, LAYER7_NODE], stddev=0.1))\n",
444 |     "    biases7 = tf.Variable(tf.constant(0.1, shape=[LAYER7_NODE]))\n",
445 |     "    \n",
446 |     "    weights8 = tf.Variable(tf.truncated_normal([LAYER7_NODE, LAYER8_NODE], stddev=0.1))\n",
447 |     "    biases8 = tf.Variable(tf.constant(0.1, shape=[LAYER8_NODE]))\n",
448 |     "    \n",
449 |     "    weights9 = tf.Variable(tf.truncated_normal([LAYER8_NODE, LAYER9_NODE], stddev=0.1))\n",
450 |     "    biases9 = tf.Variable(tf.constant(0.1, shape=[LAYER9_NODE]))\n",
451 |     "    \n",
452 |     "    weights10 = tf.Variable(tf.truncated_normal([LAYER9_NODE, LAYER10_NODE], stddev=0.1))\n",
453 |     "    biases10 = tf.Variable(tf.constant(0.1, shape=[LAYER10_NODE]))\n",
454 |     "    \n",
455 |     "    weights11 = tf.Variable(tf.truncated_normal([LAYER10_NODE, OUTPUT_NODE], stddev=0.1))\n",
456 |     "    biases11 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n",
457 |     "    \n",
458 |     "    W=[weights1, weights2, weights3, weights4, weights5, weights6, weights7, weights8, weights9, weights10, weights11]\n",
459 |     "    B=[biases1, biases2, biases3, biases4, biases5, biases6, biases7, biases8, biases9, biases10, biases11]\n",
460 |     "    \n",
461 |     "    # 计算不含滑动平均类的前向传播结果\n",
462 |     "    y = inference(x, None, W, B)\n",
463 |     "    \n",
464 |     "    # 定义训练轮数及相关的滑动平均类 \n",
465 |     "    global_step = tf.Variable(0, trainable=False)\n",
466 |     "    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n",
467 |     "    variables_averages_op = variable_averages.apply(tf.trainable_variables())\n",
468 |     "    average_y = inference(x, variable_averages, W, B)\n",
469 |     "    \n",
470 |     "    # 计算交叉熵及其平均值\n",
471 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n",
472 |     "    cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
473 |     "    \n",
474 |     "    # 损失函数的计算\n",
475 |     "    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n",
476 |     "    regularaztion = regularizer(W[0]) \n",
477 |     "    for i in range(1,11):\n",
478 |     "        regularazation=regularaztion + regularizer(W[i]) \n",
479 |     "    loss = cross_entropy_mean + regularaztion\n",
480 |     "    \n",
481 |     "    # 设置指数衰减的学习率。\n",
482 |     "    learning_rate = tf.train.exponential_decay(\n",
483 |     "        LEARNING_RATE_BASE,\n",
484 |     "        global_step,\n",
485 |     "        mnist.train.num_examples / BATCH_SIZE,\n",
486 |     "        LEARNING_RATE_DECAY,\n",
487 |     "        staircase=True)\n",
488 |     "    \n",
489 |     "    # 优化损失函数\n",
490 |     "    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n",
491 |     "    \n",
492 |     "    # 反向传播更新参数和更新每一个参数的滑动平均值\n",
493 |     "    with tf.control_dependencies([train_step, variables_averages_op]):\n",
494 |     "        train_op = tf.no_op(name='train')\n",
495 |     "\n",
496 |     "    # 计算正确率\n",
497 |     "    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n",
498 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
499 |     "    \n",
500 |     "    # 初始化会话并开始训练过程。\n",
501 |     "    with tf.Session() as sess:\n",
502 |     "        tf.global_variables_initializer().run()\n",
503 |     "        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n",
504 |     "        test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n",
505 |     "        \n",
506 |     "        # 循环的训练神经网络。\n",
507 |     "        for i in range(TRAINING_STEPS):\n",
508 |     "            if i % 1000 == 0:\n",
509 |     "                validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n",
510 |     "                print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n",
511 |     "            \n",
512 |     "            xs,ys=mnist.train.next_batch(BATCH_SIZE)\n",
513 |     "            sess.run(train_op,feed_dict={x:xs,y_:ys})\n",
514 |     "\n",
515 |     "        test_acc=sess.run(accuracy,feed_dict=test_feed)\n",
516 |     "        print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n",
517 |     "\n",
518 |     "train(mnist)"
519 |    ]
520 |   },
521 |   {
522 |    "cell_type": "code",
523 |    "execution_count": null,
524 |    "metadata": {
525 |     "collapsed": true
526 |    },
527 |    "outputs": [],
528 |    "source": []
529 |   }
530 |  ],
531 |  "metadata": {
532 |   "kernelspec": {
533 |    "display_name": "Python 3",
534 |    "language": "python",
535 |    "name": "python3"
536 |   },
537 |   "language_info": {
538 |    "codemirror_mode": {
539 |     "name": "ipython",
540 |     "version": 3
541 |    },
542 |    "file_extension": ".py",
543 |    "mimetype": "text/x-python",
544 |    "name": "python",
545 |    "nbconvert_exporter": "python",
546 |    "pygments_lexer": "ipython3",
547 |    "version": "3.5.3"
548 |   }
549 |  },
550 |  "nbformat": 4,
551 |  "nbformat_minor": 2
552 | }
553 | 


--------------------------------------------------------------------------------
/Experiments/tf_CNN_Tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "b'Hello, TensorFlow!'\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "import tensorflow as tf\n",
 20 |     "import numpy as np\n",
 21 |     "hello = tf.constant('Hello, TensorFlow!')\n",
 22 |     "sess = tf.Session()\n",
 23 |     "print(sess.run(hello))"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": 2,
 29 |    "metadata": {
 30 |     "collapsed": true
 31 |    },
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import numpy as np\n",
 35 |     "\n",
 36 |     "\n",
 37 |     "a = tf.constant(2, tf.int16)\n",
 38 |     "b = tf.constant(4, tf.float32)\n",
 39 |     "c = tf.constant(8, tf.float32)\n",
 40 |     "\n",
 41 |     "d = tf.Variable(2, tf.int16)\n",
 42 |     "e = tf.Variable(4, tf.float32)\n",
 43 |     "f = tf.Variable(8, tf.float32)\n",
 44 |     "\n",
 45 |     "g = tf.constant(np.zeros(shape=(2,2), dtype=np.float32)) #可以正常声明变量\n",
 46 |     "\n",
 47 |     "h = tf.zeros([11], tf.int16)\n",
 48 |     "i = tf.ones([2,2], tf.float32)\n",
 49 |     "j = tf.zeros([1000,4,3], tf.float64)\n",
 50 |     "\n",
 51 |     "k = tf.Variable(tf.zeros([2,2], tf.float32))\n",
 52 |     "l = tf.Variable(tf.zeros([5,6,5], tf.float32))"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": 11,
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    },
 61 |    "outputs": [
 62 |     {
 63 |      "name": "stdout",
 64 |      "output_type": "stream",
 65 |      "text": [
 66 |       "<tf.Variable 'Variable_2:0' shape=() dtype=int32_ref>\n",
 67 |       "8\n",
 68 |       "[[ 0.  0.]\n",
 69 |       " [ 0.  0.]]\n"
 70 |      ]
 71 |     }
 72 |    ],
 73 |    "source": [
 74 |     "a = tf.constant(2, tf.int16)\n",
 75 |     "b = tf.constant(4, tf.float32)\n",
 76 |     "\n",
 77 |     "graph = tf.Graph()\n",
 78 |     "with graph.as_default():\n",
 79 |     "    a = tf.Variable(8, tf.float32)\n",
 80 |     "    b = tf.Variable(tf.zeros([2,2], tf.float32))\n",
 81 |     "    \n",
 82 |     "with tf.Session(graph=graph) as session:\n",
 83 |     "    tf.global_variables_initializer().run()\n",
 84 |     "    print(f)\n",
 85 |     "    print(session.run(a))\n",
 86 |     "    print(session.run(b))\n"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 3,
 92 |    "metadata": {
 93 |     "collapsed": false
 94 |    },
 95 |    "outputs": [
 96 |     {
 97 |      "name": "stdout",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "Tensor(\"add:0\", shape=(2,), dtype=int32)\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "a=tf.constant([1,2],name=\"a\")\n",
106 |     "b=tf.constant([2,4],name=\"b\")\n",
107 |     "result = a+b\n",
108 |     "print(result)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 6,
114 |    "metadata": {
115 |     "collapsed": false
116 |    },
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "[2 4 6 8]\n",
123 |       "[2 4 6 8]\n"
124 |      ]
125 |     }
126 |    ],
127 |    "source": [
128 |     "a=tf.constant([1,2,3,4])\n",
129 |     "b=tf.constant([1,2,3,4])\n",
130 |     "result=a+b\n",
131 |     "sess=tf.Session()\n",
132 |     "print(sess.run(result))\n",
133 |     "sess.close\n",
134 |     "\n",
135 |     "#输出 [2 4 6 8]\n",
136 |     "\n",
137 |     "with tf.Session() as sess:\n",
138 |     "    a=tf.constant([1,2,3,4])\n",
139 |     "    b=tf.constant([1,2,3,4])\n",
140 |     "    result=a+b\n",
141 |     "    print(sess.run(result))\n",
142 |     "    \n",
143 |     "#输出 [2 4 6 8]"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 16,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [
153 |     {
154 |      "name": "stdout",
155 |      "output_type": "stream",
156 |      "text": [
157 |       "[[-0.11131823  2.38459873]]\n",
158 |       "[[-0.11131823  2.38459873]]\n"
159 |      ]
160 |     },
161 |     {
162 |      "data": {
163 |       "text/plain": [
164 |        "<bound method BaseSession.close of <tensorflow.python.client.session.Session object at 0x000001D9DB209D30>>"
165 |       ]
166 |      },
167 |      "execution_count": 16,
168 |      "metadata": {},
169 |      "output_type": "execute_result"
170 |     }
171 |    ],
172 |    "source": [
173 |     "w1=tf.Variable(tf.random_normal([1,2],stddev=1,seed=1))\n",
174 |     "\n",
175 |     "#因为需要重复输入x，而每建一个x就会生成一个结点，计算图的效率会低。所以使用占位符\n",
176 |     "x=tf.placeholder(tf.float32,shape=(1,2))\n",
177 |     "x1=tf.constant([[0.7,0.9]])\n",
178 |     "\n",
179 |     "a=x+w1\n",
180 |     "b=x1+w1\n",
181 |     "\n",
182 |     "sess=tf.Session()\n",
183 |     "sess.run(tf.global_variables_initializer())\n",
184 |     "#运行y时将占位符填上，feed_dict为字典，变量名不可变\n",
185 |     "y_1=sess.run(a,feed_dict={x:[[0.7,0.9]]})\n",
186 |     "y_2=sess.run(b)\n",
187 |     "print(y_1)\n",
188 |     "print(y_2)\n",
189 |     "sess.close"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 13,
195 |    "metadata": {
196 |     "collapsed": false
197 |    },
198 |    "outputs": [
199 |     {
200 |      "name": "stdout",
201 |      "output_type": "stream",
202 |      "text": [
203 |       "the distance between [[1 2]] and [[15 16]] -> [19.79899]\n",
204 |       "the distance between [[3 4]] and [[13 14]] -> [14.142136]\n",
205 |       "the distance between [[5 6]] and [[11 12]] -> [8.485281]\n",
206 |       "the distance between [[7 8]] and [[ 9 10]] -> [2.8284271]\n"
207 |      ]
208 |     }
209 |    ],
210 |    "source": [
211 |     "list_of_points1_ = [[1,2], [3,4], [5,6], [7,8]]\n",
212 |     "list_of_points2_ = [[15,16], [13,14], [11,12], [9,10]]\n",
213 |     "list_of_points1 = np.array([np.array(elem).reshape(1,2) for elem in list_of_points1_])\n",
214 |     "list_of_points2 = np.array([np.array(elem).reshape(1,2) for elem in list_of_points2_])\n",
215 |     "\n",
216 |     "graph = tf.Graph()\n",
217 |     "with graph.as_default():   \n",
218 |     "    \n",
219 |     "    #我们使用 tf.placeholder() 创建占位符 ，在 session.run() 过程中再投递数据 \n",
220 |     "    point1 = tf.placeholder(tf.float32, shape=(1, 2))\n",
221 |     "    point2 = tf.placeholder(tf.float32, shape=(1, 2))\n",
222 |     "    \n",
223 |     "    def calculate_eucledian_distance(point1, point2):\n",
224 |     "        difference = tf.subtract(point1, point2)\n",
225 |     "        power2 = tf.pow(difference, tf.constant(2.0, shape=(1,2)))\n",
226 |     "        add = tf.reduce_sum(power2)\n",
227 |     "        eucledian_distance = tf.sqrt(add)\n",
228 |     "        return eucledian_distance\n",
229 |     "    \n",
230 |     "    dist = calculate_eucledian_distance(point1, point2)\n",
231 |     "    \n",
232 |     "with tf.Session(graph=graph) as session:\n",
233 |     "    tf.global_variables_initializer().run()   \n",
234 |     "    for ii in range(len(list_of_points1)):\n",
235 |     "        point1_ = list_of_points1[ii]\n",
236 |     "        point2_ = list_of_points2[ii]\n",
237 |     "        \n",
238 |     "        #使用feed_dict将数据投入到[dist]中\n",
239 |     "        feed_dict = {point1 : point1_, point2 : point2_}\n",
240 |     "        distance = session.run([dist], feed_dict=feed_dict)\n",
241 |     "        print(\"the distance between {} and {} -> {}\".format(point1_, point2_, distance))"
242 |    ]
243 |   },
244 |   {
245 |    "cell_type": "code",
246 |    "execution_count": 8,
247 |    "metadata": {
248 |     "collapsed": false
249 |    },
250 |    "outputs": [
251 |     {
252 |      "name": "stdout",
253 |      "output_type": "stream",
254 |      "text": [
255 |       "[[-0.81131822  1.48459876  0.06532937]\n",
256 |       " [-2.4427042   0.0992484   0.59122431]]\n",
257 |       "[[-0.81131822]\n",
258 |       " [ 1.48459876]\n",
259 |       " [ 0.06532937]]\n",
260 |       "在迭代 0 次后，训练损失为 0.308504\n",
261 |       "在迭代 1000 次后，训练损失为 0.0393406\n",
262 |       "在迭代 2000 次后，训练损失为 0.0182158\n",
263 |       "在迭代 3000 次后，训练损失为 0.0104779\n",
264 |       "在迭代 4000 次后，训练损失为 0.00680374\n",
265 |       "在迭代 5000 次后，训练损失为 0.00446512\n",
266 |       "在迭代 6000 次后，训练损失为 0.00296797\n",
267 |       "在迭代 7000 次后，训练损失为 0.00218553\n",
268 |       "在迭代 8000 次后，训练损失为 0.00179452\n",
269 |       "在迭代 9000 次后，训练损失为 0.0013211\n",
270 |       "在迭代 10000 次后，训练损失为 0.000957699\n",
271 |       "在迭代 11000 次后，训练损失为 0.00081103\n",
272 |       "在迭代 12000 次后，训练损失为 0.000643147\n",
273 |       "在迭代 13000 次后，训练损失为 0.00047439\n",
274 |       "在迭代 14000 次后，训练损失为 0.00030086\n",
275 |       "在迭代 15000 次后，训练损失为 0.000137936\n",
276 |       "[[-0.81131822  3.84255528  3.38165283]\n",
277 |       " [-2.4427042   1.98635983  3.50722313]]\n",
278 |       "[[-0.81131822]\n",
279 |       " [ 4.02907705]\n",
280 |       " [ 2.60285187]]\n"
281 |      ]
282 |     }
283 |    ],
284 |    "source": [
285 |     "import tensorflow as tf\n",
286 |     "from numpy.random import RandomState\n",
287 |     "\n",
288 |     "batch_size=10\n",
289 |     "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n",
290 |     "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n",
291 |     "\n",
292 |     "\n",
293 |     "# None 可以根据batch 大小确定维度，在shape的一个维度上使用None，方便不大的batch\n",
294 |     "x=tf.placeholder(tf.float32,shape=(None,2))\n",
295 |     "y=tf.placeholder(tf.float32,shape=(None,1))\n",
296 |     "\n",
297 |     "a=tf.nn.relu(tf.matmul(x,w1))\n",
298 |     "yhat=tf.nn.relu(tf.matmul(a,w2))\n",
299 |     "\n",
300 |     "#定义交叉熵为损失函数，训练过程使用Adam算法最小化交叉熵\n",
301 |     "cross_entropy=-tf.reduce_mean(y*tf.log(tf.clip_by_value(yhat,1e-10,1.0)))\n",
302 |     "train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)\n",
303 |     "\n",
304 |     "rdm=RandomState(1)\n",
305 |     "data_size=512\n",
306 |     "\n",
307 |     "#生成两个特征，共data_size个样本\n",
308 |     "X=rdm.rand(data_size,2)\n",
309 |     "#定义规则给出样本标签，所有x1+x2<1的样本认为是正样本，其他为负样本。Y，1为正样本\n",
310 |     "Y = [[int(x1+x2 < 1)] for (x1, x2) in X]\n",
311 |     "\n",
312 |     "with tf.Session() as sess:\n",
313 |     "    sess.run(tf.global_variables_initializer())\n",
314 |     "    print(sess.run(w1))\n",
315 |     "    print(sess.run(w2))\n",
316 |     "    steps=15001\n",
317 |     "    for i in range(steps):\n",
318 |     "        \n",
319 |     "        #选定每一个批量读取的首尾位置，确保在1个epoch内采样训练\n",
320 |     "        start = i * batch_size % data_size\n",
321 |     "        end = min(start + batch_size,data_size)\n",
322 |     "        sess.run(train_step,feed_dict={x:X[start:end],y:Y[start:end]})\n",
323 |     "        if i % 1000 == 0:\n",
324 |     "            training_loss= sess.run(cross_entropy,feed_dict={x:X,y:Y})\n",
325 |     "            print(\"在迭代 %d 次后，训练损失为 %g\"%(i,training_loss))\n",
326 |     "        if i == steps-1:\n",
327 |     "            print(sess.run(w1))\n",
328 |     "            print(sess.run(w2))#输出更新后的权重矩阵"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "上面的代码定义了一个简单的三层全连接网络（输入层、隐藏层和输出层分别为2、3和2个神经元），隐藏层和输出层的激活函数使用的是ReLU函数。该模型训练的样本总数为512，每次迭代读取的批量为10。这个简单的全连接网络以交叉熵为损失函数，并使用Adam优化算法进行权重更新。\n",
336 |     "\n",
337 |     "其中需要注意的几个函数如tf.nn.relu()代表调用ReLU激活函数，tf.matmul()为矩阵乘法等。tf.clip_by_value(yhat,1e-10,1.0)这一语句代表的是截断yhat的值，因为这一语句是嵌套在tf.log()函数内的，所以我们需要确保yhat的取值不会导致对数无穷大。\n",
338 |     "\n",
339 |     "tf.train.AdamOptimizer(learning_rate).minimize(cost_function)是进行训练的函数，其中我们采用的是Adam优化算法更新权重，并且需要提供学习速率和损失函数这两个参数。后面就是生成训练数据，X=rdm.rand(512,2)表示随机生成512个样本，每个样本有两个特征值。最后就是迭代运行了，这里我们计算出每一次迭代抽取数据的起始位置（start）和结束位置（end），并且每一次抽取的数据量为前面我们定义的批量，如果一个epoch最后剩余的数据少于批量大小，那就只是用剩余的数据进行训练。最后两句代码是为了计算训练损失并迭代一些次数后输出训练损失。"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 3,
345 |    "metadata": {
346 |     "collapsed": true
347 |    },
348 |    "outputs": [],
349 |    "source": [
350 |     "import numpy as np\n",
351 |     "import pickle\n",
352 |     "import json\n",
353 |     "import os\n",
354 |     "\n",
355 |     "#定义一些预处理函数\n",
356 |     "\n",
357 |     "def flatten_tf_array(array):\n",
358 |     "    shape = array.get_shape().as_list()\n",
359 |     "    return tf.reshape(array, [shape[0], shape[1] * shape[2] * shape[3]])\n",
360 |     "\n",
361 |     "def accuracy(predictions, labels):\n",
362 |     "    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])\n",
363 |     "\n",
364 |     "def randomize(dataset, labels):\n",
365 |     "    permutation = np.random.permutation(labels.shape[0])\n",
366 |     "    shuffled_dataset = dataset[permutation, :, :]\n",
367 |     "    shuffled_labels = labels[permutation]\n",
368 |     "    return shuffled_dataset, shuffled_labels\n",
369 |     "\n",
370 |     "def one_hot_encode(np_array):\n",
371 |     "    return (np.arange(10) == np_array[:,None]).astype(np.float32)\n",
372 |     "\n",
373 |     "def reformat_data(dataset, labels, image_width, image_height, image_depth):\n",
374 |     "    np_dataset_ = np.array([np.array(image_data).reshape(image_width, image_height, image_depth) for image_data in dataset])\n",
375 |     "    np_labels_ = one_hot_encode(np.array(labels, dtype=np.float32))\n",
376 |     "    np_dataset, np_labels = randomize(np_dataset_, np_labels_)\n",
377 |     "    return np_dataset, np_labels"
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": 4,
383 |    "metadata": {
384 |     "collapsed": false
385 |    },
386 |    "outputs": [
387 |     {
388 |      "name": "stdout",
389 |      "output_type": "stream",
390 |      "text": [
391 |       "训练集包含以下标签: [0 1 2 3 4 5 6 7 8 9]\n",
392 |       "训练集维度 (50000, 32, 32, 3) (50000, 10)\n",
393 |       "测试集维度 (10000, 32, 32, 3) (10000, 10)\n"
394 |      ]
395 |     }
396 |    ],
397 |    "source": [
398 |     "cifar10_folder = './data/cifar10/'\n",
399 |     "train_datasets = ['data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', ]\n",
400 |     "test_dataset = ['test_batch']\n",
401 |     "c10_image_height = 32\n",
402 |     "c10_image_width = 32\n",
403 |     "c10_image_depth = 3\n",
404 |     "c10_num_labels = 10\n",
405 |     "c10_image_size = 32 #Ahmet Taspinar的代码缺少了这一语句\n",
406 |     "\n",
407 |     "with open(cifar10_folder + test_dataset[0], 'rb') as f0:\n",
408 |     "    c10_test_dict = pickle.load(f0, encoding='bytes')\n",
409 |     "\n",
410 |     "c10_test_dataset, c10_test_labels = c10_test_dict[b'data'], c10_test_dict[b'labels']\n",
411 |     "test_dataset_cifar10, test_labels_cifar10 = reformat_data(c10_test_dataset, c10_test_labels, c10_image_size, c10_image_size, c10_image_depth)\n",
412 |     "\n",
413 |     "c10_train_dataset, c10_train_labels = [], []\n",
414 |     "for train_dataset in train_datasets:\n",
415 |     "    with open(cifar10_folder + train_dataset, 'rb') as f0:\n",
416 |     "        c10_train_dict = pickle.load(f0, encoding='bytes')\n",
417 |     "        c10_train_dataset_, c10_train_labels_ = c10_train_dict[b'data'], c10_train_dict[b'labels']\n",
418 |     " \n",
419 |     "        c10_train_dataset.append(c10_train_dataset_)\n",
420 |     "        c10_train_labels += c10_train_labels_\n",
421 |     "\n",
422 |     "c10_train_dataset = np.concatenate(c10_train_dataset, axis=0)\n",
423 |     "train_dataset_cifar10, train_labels_cifar10 = reformat_data(c10_train_dataset, c10_train_labels, c10_image_size, c10_image_size, c10_image_depth)\n",
424 |     "del c10_train_dataset\n",
425 |     "del c10_train_labels\n",
426 |     "\n",
427 |     "print(\"训练集包含以下标签: {}\".format(np.unique(c10_train_dict[b'labels'])))\n",
428 |     "print('训练集维度', train_dataset_cifar10.shape, train_labels_cifar10.shape)\n",
429 |     "print('测试集维度', test_dataset_cifar10.shape, test_labels_cifar10.shape)\n"
430 |    ]
431 |   },
432 |   {
433 |    "cell_type": "code",
434 |    "execution_count": 5,
435 |    "metadata": {
436 |     "collapsed": false
437 |    },
438 |    "outputs": [
439 |     {
440 |      "name": "stdout",
441 |      "output_type": "stream",
442 |      "text": [
443 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
444 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
445 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
446 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n"
447 |      ]
448 |     }
449 |    ],
450 |    "source": [
451 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
452 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "code",
457 |    "execution_count": 5,
458 |    "metadata": {
459 |     "collapsed": false
460 |    },
461 |    "outputs": [
462 |     {
463 |      "name": "stdout",
464 |      "output_type": "stream",
465 |      "text": [
466 |       "Training data size:  55000\n",
467 |       "Validating data size:  5000\n",
468 |       "Testing data size:  10000\n"
469 |      ]
470 |     }
471 |    ],
472 |    "source": [
473 |     "print(\"Training data size: \", mnist.train.num_examples) \n",
474 |     "print (\"Validating data size: \", mnist.validation.num_examples) \n",
475 |     "print (\"Testing data size: \", mnist.test.num_examples) "
476 |    ]
477 |   },
478 |   {
479 |    "cell_type": "code",
480 |    "execution_count": 10,
481 |    "metadata": {
482 |     "collapsed": false
483 |    },
484 |    "outputs": [
485 |     {
486 |      "name": "stdout",
487 |      "output_type": "stream",
488 |      "text": [
489 |       "[ 0.          0.          0.          0.          0.          0.          0.\n",
490 |       "  0.          0.          0.          0.          0.          0.          0.\n",
491 |       "  0.          0.          0.          0.          0.          0.          0.\n",
492 |       "  0.          0.          0.          0.          0.          0.          0.\n",
493 |       "  0.          0.          0.          0.          0.          0.          0.\n",
494 |       "  0.          0.          0.          0.          0.          0.          0.\n",
495 |       "  0.          0.          0.          0.          0.          0.          0.\n",
496 |       "  0.          0.          0.          0.          0.          0.          0.\n",
497 |       "  0.          0.          0.          0.          0.          0.          0.\n",
498 |       "  0.          0.          0.          0.          0.          0.          0.\n",
499 |       "  0.          0.          0.          0.          0.          0.          0.\n",
500 |       "  0.          0.          0.          0.          0.          0.          0.\n",
501 |       "  0.          0.          0.          0.          0.          0.          0.\n",
502 |       "  0.          0.          0.          0.          0.          0.          0.\n",
503 |       "  0.          0.          0.          0.          0.          0.          0.\n",
504 |       "  0.          0.          0.          0.          0.          0.          0.\n",
505 |       "  0.          0.          0.          0.          0.          0.          0.\n",
506 |       "  0.          0.          0.          0.          0.          0.          0.\n",
507 |       "  0.          0.          0.          0.          0.          0.          0.\n",
508 |       "  0.          0.          0.          0.          0.          0.          0.\n",
509 |       "  0.          0.          0.          0.          0.          0.          0.\n",
510 |       "  0.          0.          0.          0.          0.          0.          0.\n",
511 |       "  0.          0.          0.          0.          0.          0.          0.\n",
512 |       "  0.          0.          0.          0.          0.          0.          0.\n",
513 |       "  0.          0.          0.          0.          0.          0.          0.\n",
514 |       "  0.          0.          0.          0.          0.          0.          0.\n",
515 |       "  0.          0.          0.          0.          0.          0.          0.\n",
516 |       "  0.          0.          0.          0.          0.          0.          0.\n",
517 |       "  0.          0.          0.          0.          0.          0.          0.\n",
518 |       "  0.          0.          0.          0.          0.38039219  0.37647063\n",
519 |       "  0.3019608   0.46274513  0.2392157   0.          0.          0.          0.\n",
520 |       "  0.          0.          0.          0.          0.          0.          0.\n",
521 |       "  0.          0.          0.          0.          0.35294119  0.5411765\n",
522 |       "  0.92156869  0.92156869  0.92156869  0.92156869  0.92156869  0.92156869\n",
523 |       "  0.98431379  0.98431379  0.97254908  0.99607849  0.96078438  0.92156869\n",
524 |       "  0.74509805  0.08235294  0.          0.          0.          0.          0.\n",
525 |       "  0.          0.          0.          0.          0.          0.\n",
526 |       "  0.54901963  0.98431379  0.99607849  0.99607849  0.99607849  0.99607849\n",
527 |       "  0.99607849  0.99607849  0.99607849  0.99607849  0.99607849  0.99607849\n",
528 |       "  0.99607849  0.99607849  0.99607849  0.99607849  0.74117649  0.09019608\n",
529 |       "  0.          0.          0.          0.          0.          0.          0.\n",
530 |       "  0.          0.          0.          0.88627458  0.99607849  0.81568635\n",
531 |       "  0.78039223  0.78039223  0.78039223  0.78039223  0.54509807  0.2392157\n",
532 |       "  0.2392157   0.2392157   0.2392157   0.2392157   0.50196081  0.8705883\n",
533 |       "  0.99607849  0.99607849  0.74117649  0.08235294  0.          0.          0.\n",
534 |       "  0.          0.          0.          0.          0.          0.\n",
535 |       "  0.14901961  0.32156864  0.0509804   0.          0.          0.          0.\n",
536 |       "  0.          0.          0.          0.          0.          0.          0.\n",
537 |       "  0.13333334  0.83529419  0.99607849  0.99607849  0.45098042  0.          0.\n",
538 |       "  0.          0.          0.          0.          0.          0.          0.\n",
539 |       "  0.          0.          0.          0.          0.          0.          0.\n",
540 |       "  0.          0.          0.          0.          0.          0.          0.\n",
541 |       "  0.          0.32941177  0.99607849  0.99607849  0.91764712  0.          0.\n",
542 |       "  0.          0.          0.          0.          0.          0.          0.\n",
543 |       "  0.          0.          0.          0.          0.          0.          0.\n",
544 |       "  0.          0.          0.          0.          0.          0.          0.\n",
545 |       "  0.          0.32941177  0.99607849  0.99607849  0.91764712  0.          0.\n",
546 |       "  0.          0.          0.          0.          0.          0.          0.\n",
547 |       "  0.          0.          0.          0.          0.          0.          0.\n",
548 |       "  0.          0.          0.          0.          0.          0.          0.\n",
549 |       "  0.41568631  0.6156863   0.99607849  0.99607849  0.95294124  0.20000002\n",
550 |       "  0.          0.          0.          0.          0.          0.          0.\n",
551 |       "  0.          0.          0.          0.          0.          0.          0.\n",
552 |       "  0.          0.          0.          0.09803922  0.45882356  0.89411771\n",
553 |       "  0.89411771  0.89411771  0.99215692  0.99607849  0.99607849  0.99607849\n",
554 |       "  0.99607849  0.94117653  0.          0.          0.          0.          0.\n",
555 |       "  0.          0.          0.          0.          0.          0.          0.\n",
556 |       "  0.          0.          0.          0.26666668  0.4666667   0.86274517\n",
557 |       "  0.99607849  0.99607849  0.99607849  0.99607849  0.99607849  0.99607849\n",
558 |       "  0.99607849  0.99607849  0.99607849  0.55686277  0.          0.          0.\n",
559 |       "  0.          0.          0.          0.          0.          0.          0.\n",
560 |       "  0.          0.          0.          0.14509805  0.73333335  0.99215692\n",
561 |       "  0.99607849  0.99607849  0.99607849  0.87450987  0.80784321  0.80784321\n",
562 |       "  0.29411766  0.26666668  0.84313732  0.99607849  0.99607849  0.45882356\n",
563 |       "  0.          0.          0.          0.          0.          0.          0.\n",
564 |       "  0.          0.          0.          0.          0.          0.44313729\n",
565 |       "  0.8588236   0.99607849  0.94901967  0.89019614  0.45098042  0.34901962\n",
566 |       "  0.12156864  0.          0.          0.          0.          0.7843138\n",
567 |       "  0.99607849  0.9450981   0.16078432  0.          0.          0.          0.\n",
568 |       "  0.          0.          0.          0.          0.          0.          0.\n",
569 |       "  0.          0.66274512  0.99607849  0.6901961   0.24313727  0.          0.\n",
570 |       "  0.          0.          0.          0.          0.          0.18823531\n",
571 |       "  0.90588242  0.99607849  0.91764712  0.          0.          0.          0.\n",
572 |       "  0.          0.          0.          0.          0.          0.          0.\n",
573 |       "  0.          0.          0.07058824  0.48627454  0.          0.          0.\n",
574 |       "  0.          0.          0.          0.          0.          0.\n",
575 |       "  0.32941177  0.99607849  0.99607849  0.65098041  0.          0.          0.\n",
576 |       "  0.          0.          0.          0.          0.          0.          0.\n",
577 |       "  0.          0.          0.          0.          0.          0.          0.\n",
578 |       "  0.          0.          0.          0.          0.          0.          0.\n",
579 |       "  0.54509807  0.99607849  0.9333334   0.22352943  0.          0.          0.\n",
580 |       "  0.          0.          0.          0.          0.          0.          0.\n",
581 |       "  0.          0.          0.          0.          0.          0.          0.\n",
582 |       "  0.          0.          0.          0.          0.          0.\n",
583 |       "  0.82352948  0.98039222  0.99607849  0.65882355  0.          0.          0.\n",
584 |       "  0.          0.          0.          0.          0.          0.          0.\n",
585 |       "  0.          0.          0.          0.          0.          0.          0.\n",
586 |       "  0.          0.          0.          0.          0.          0.          0.\n",
587 |       "  0.94901967  0.99607849  0.93725497  0.22352943  0.          0.          0.\n",
588 |       "  0.          0.          0.          0.          0.          0.          0.\n",
589 |       "  0.          0.          0.          0.          0.          0.          0.\n",
590 |       "  0.          0.          0.          0.          0.          0.\n",
591 |       "  0.34901962  0.98431379  0.9450981   0.33725491  0.          0.          0.\n",
592 |       "  0.          0.          0.          0.          0.          0.          0.\n",
593 |       "  0.          0.          0.          0.          0.          0.          0.\n",
594 |       "  0.          0.          0.          0.          0.          0.\n",
595 |       "  0.01960784  0.80784321  0.96470594  0.6156863   0.          0.          0.\n",
596 |       "  0.          0.          0.          0.          0.          0.          0.\n",
597 |       "  0.          0.          0.          0.          0.          0.          0.\n",
598 |       "  0.          0.          0.          0.          0.          0.          0.\n",
599 |       "  0.01568628  0.45882356  0.27058825  0.          0.          0.          0.\n",
600 |       "  0.          0.          0.          0.          0.          0.          0.\n",
601 |       "  0.          0.          0.          0.          0.          0.          0.\n",
602 |       "  0.          0.          0.          0.          0.          0.          0.\n",
603 |       "  0.          0.          0.          0.          0.          0.          0.\n",
604 |       "  0.          0.          0.          0.          0.          0.          0.        ]\n",
605 |       "[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]\n",
606 |       "[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]\n",
607 |       "[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]\n"
608 |      ]
609 |     }
610 |    ],
611 |    "source": [
612 |     "print(mnist.train.images[0]) \n",
613 |     "print(mnist.train.labels[0]) \n",
614 |     "print(mnist.test.labels[0]) \n",
615 |     "print(mnist.validation.labels[0]) "
616 |    ]
617 |   },
618 |   {
619 |    "cell_type": "code",
620 |    "execution_count": 6,
621 |    "metadata": {
622 |     "collapsed": false
623 |    },
624 |    "outputs": [
625 |     {
626 |      "name": "stdout",
627 |      "output_type": "stream",
628 |      "text": [
629 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
630 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
631 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
632 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n"
633 |      ]
634 |     }
635 |    ],
636 |    "source": [
637 |     "import tensorflow as tf\n",
638 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
639 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
640 |     "\n",
641 |     "\n",
642 |     "INPUT_NODE = 784     \n",
643 |     "OUTPUT_NODE = 10     \n",
644 |     "LAYER1_NODE = 500         \n",
645 |     "                              \n",
646 |     "BATCH_SIZE = 100        \n",
647 |     "\n",
648 |     "# 模型相关的参数\n",
649 |     "LEARNING_RATE_BASE = 0.8      \n",
650 |     "LEARNING_RATE_DECAY = 0.99    \n",
651 |     "REGULARAZTION_RATE = 0.0001   \n",
652 |     "TRAINING_STEPS = 10000        \n",
653 |     "MOVING_AVERAGE_DECAY = 0.99 \n",
654 |     "\n",
655 |     "def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):\n",
656 |     "    # 使用滑动平均类\n",
657 |     "    if avg_class == None:\n",
658 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)\n",
659 |     "        return tf.matmul(layer1, weights2) + biases2\n",
660 |     "\n",
661 |     "    else:\n",
662 |     "        \n",
663 |     "        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))\n",
664 |     "        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)  \n",
665 |     "    \n",
666 |     "def train(mnist):\n",
667 |     "    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')\n",
668 |     "    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')\n",
669 |     "    # 生成隐藏层的参数。\n",
670 |     "    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))\n",
671 |     "    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))\n",
672 |     "    # 生成输出层的参数。\n",
673 |     "    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))\n",
674 |     "    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))\n",
675 |     "\n",
676 |     "    # 计算不含滑动平均类的前向传播结果\n",
677 |     "    y = inference(x, None, weights1, biases1, weights2, biases2)\n",
678 |     "    \n",
679 |     "    # 定义训练轮数及相关的滑动平均类 \n",
680 |     "    global_step = tf.Variable(0, trainable=False)\n",
681 |     "    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n",
682 |     "    variables_averages_op = variable_averages.apply(tf.trainable_variables())\n",
683 |     "    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)\n",
684 |     "    \n",
685 |     "    # 计算交叉熵及其平均值\n",
686 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))\n",
687 |     "    cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
688 |     "    \n",
689 |     "    # 损失函数的计算\n",
690 |     "    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)\n",
691 |     "    regularaztion = regularizer(weights1) + regularizer(weights2)\n",
692 |     "    loss = cross_entropy_mean + regularaztion\n",
693 |     "    \n",
694 |     "    # 设置指数衰减的学习率。\n",
695 |     "    learning_rate = tf.train.exponential_decay(\n",
696 |     "        LEARNING_RATE_BASE,\n",
697 |     "        global_step,\n",
698 |     "        mnist.train.num_examples / BATCH_SIZE,\n",
699 |     "        LEARNING_RATE_DECAY,\n",
700 |     "        staircase=True)\n",
701 |     "    \n",
702 |     "    # 优化损失函数\n",
703 |     "    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n",
704 |     "    \n",
705 |     "    # 反向传播更新参数和更新每一个参数的滑动平均值\n",
706 |     "    with tf.control_dependencies([train_step, variables_averages_op]):\n",
707 |     "        train_op = tf.no_op(name='train')\n",
708 |     "\n",
709 |     "    # 计算正确率\n",
710 |     "    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))\n",
711 |     "    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n",
712 |     "    \n",
713 |     "    # 初始化回话并开始训练过程。\n",
714 |     "    with tf.Session() as sess:\n",
715 |     "        tf.global_variables_initializer().run()\n",
716 |     "        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}\n",
717 |     "        test_feed = {x: mnist.test.images, y_: mnist.test.labels} \n",
718 |     "        \n",
719 |     "        # 循环的训练神经网络。\n",
720 |     "        for i in range(TRAINING_STEPS):\n",
721 |     "            if i % 1000 == 0:\n",
722 |     "                validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n",
723 |     "                print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n",
724 |     "            \n",
725 |     "            xs,ys=mnist.train.next_batch(BATCH_SIZE)\n",
726 |     "            sess.run(train_op,feed_dict={x:xs,y_:ys})\n",
727 |     "\n",
728 |     "        test_acc=sess.run(accuracy,feed_dict=test_feed)\n",
729 |     "        print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n"
730 |    ]
731 |   },
732 |   {
733 |    "cell_type": "code",
734 |    "execution_count": 8,
735 |    "metadata": {
736 |     "collapsed": false
737 |    },
738 |    "outputs": [
739 |     {
740 |      "name": "stdout",
741 |      "output_type": "stream",
742 |      "text": [
743 |       "After 0 training step(s), validation accuracy using average model is 0.0912 \n",
744 |       "After 1000 training step(s), validation accuracy using average model is 0.9756 \n",
745 |       "After 2000 training step(s), validation accuracy using average model is 0.9794 \n",
746 |       "After 3000 training step(s), validation accuracy using average model is 0.9818 \n",
747 |       "After 4000 training step(s), validation accuracy using average model is 0.9814 \n",
748 |       "After 5000 training step(s), test accuracy using average model is 0.9834\n"
749 |      ]
750 |     }
751 |    ],
752 |    "source": [
753 |     "train(mnist)"
754 |    ]
755 |   },
756 |   {
757 |    "cell_type": "code",
758 |    "execution_count": 7,
759 |    "metadata": {
760 |     "collapsed": false
761 |    },
762 |    "outputs": [
763 |     {
764 |      "name": "stdout",
765 |      "output_type": "stream",
766 |      "text": [
767 |       "After 0 training step(s), validation accuracy using average model is 0.1678 \n",
768 |       "After 1000 training step(s), validation accuracy using average model is 0.9766 \n",
769 |       "After 2000 training step(s), validation accuracy using average model is 0.981 \n",
770 |       "After 3000 training step(s), validation accuracy using average model is 0.9808 \n",
771 |       "After 4000 training step(s), validation accuracy using average model is 0.9824 \n",
772 |       "After 5000 training step(s), validation accuracy using average model is 0.9822 \n",
773 |       "After 6000 training step(s), validation accuracy using average model is 0.983 \n",
774 |       "After 7000 training step(s), validation accuracy using average model is 0.9828 \n",
775 |       "After 8000 training step(s), validation accuracy using average model is 0.9842 \n",
776 |       "After 9000 training step(s), validation accuracy using average model is 0.9834 \n",
777 |       "After 10000 training step(s), test accuracy using average model is 0.9851\n"
778 |      ]
779 |     }
780 |    ],
781 |    "source": [
782 |     "avg_class = None\n",
783 |     "train(mnist)"
784 |    ]
785 |   },
786 |   {
787 |    "cell_type": "code",
788 |    "execution_count": null,
789 |    "metadata": {
790 |     "collapsed": true
791 |    },
792 |    "outputs": [],
793 |    "source": []
794 |   },
795 |   {
796 |    "cell_type": "code",
797 |    "execution_count": null,
798 |    "metadata": {
799 |     "collapsed": true
800 |    },
801 |    "outputs": [],
802 |    "source": []
803 |   },
804 |   {
805 |    "cell_type": "code",
806 |    "execution_count": 32,
807 |    "metadata": {
808 |     "collapsed": false
809 |    },
810 |    "outputs": [
811 |     {
812 |      "name": "stdout",
813 |      "output_type": "stream",
814 |      "text": [
815 |       "Initialized with learning_rate 0.001\n",
816 |       "step 0000 : loss is 057.10, accuracy on training set 6.25 %, accuracy on test set 9.34 %\n",
817 |       "step 1000 : loss is 001.63, accuracy on training set 62.50 %, accuracy on test set 34.26 %\n",
818 |       "step 2000 : loss is 001.38, accuracy on training set 50.00 %, accuracy on test set 40.97 %\n",
819 |       "step 3000 : loss is 001.78, accuracy on training set 31.25 %, accuracy on test set 41.43 %\n",
820 |       "step 4000 : loss is 001.41, accuracy on training set 56.25 %, accuracy on test set 42.54 %\n",
821 |       "step 5000 : loss is 001.52, accuracy on training set 37.50 %, accuracy on test set 45.20 %\n",
822 |       "step 6000 : loss is 001.28, accuracy on training set 56.25 %, accuracy on test set 42.72 %\n",
823 |       "step 7000 : loss is 001.29, accuracy on training set 56.25 %, accuracy on test set 46.89 %\n",
824 |       "step 8000 : loss is 001.43, accuracy on training set 43.75 %, accuracy on test set 47.97 %\n",
825 |       "step 9000 : loss is 001.58, accuracy on training set 31.25 %, accuracy on test set 49.00 %\n",
826 |       "step 10000 : loss is 001.41, accuracy on training set 43.75 %, accuracy on test set 49.80 %\n"
827 |      ]
828 |     }
829 |    ],
830 |    "source": [
831 |     "LENET5_LIKE_BATCH_SIZE = 32\n",
832 |     "LENET5_LIKE_FILTER_SIZE = 5\n",
833 |     "LENET5_LIKE_FILTER_DEPTH = 16\n",
834 |     "LENET5_LIKE_NUM_HIDDEN = 120\n",
835 |     "\n",
836 |     "def variables_lenet5_like(filter_size = LENET5_LIKE_FILTER_SIZE, \n",
837 |     "                          filter_depth = LENET5_LIKE_FILTER_DEPTH, \n",
838 |     "                          num_hidden = LENET5_LIKE_NUM_HIDDEN,\n",
839 |     "                          image_width = 32, image_height = 32, image_depth = 3, num_labels = 10):\n",
840 |     "    \n",
841 |     "    w1 = tf.Variable(tf.truncated_normal([filter_size, filter_size, image_depth, filter_depth], stddev=0.1))\n",
842 |     "    b1 = tf.Variable(tf.zeros([filter_depth]))\n",
843 |     "\n",
844 |     "    w2 = tf.Variable(tf.truncated_normal([filter_size, filter_size, filter_depth, filter_depth], stddev=0.1))\n",
845 |     "    b2 = tf.Variable(tf.constant(1.0, shape=[filter_depth]))\n",
846 |     "   \n",
847 |     "    w3 = tf.Variable(tf.truncated_normal([(image_width // 4)*(image_height // 4)*filter_depth , num_hidden], stddev=0.1))\n",
848 |     "    b3 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))\n",
849 |     "\n",
850 |     "    w4 = tf.Variable(tf.truncated_normal([num_hidden, num_hidden], stddev=0.1))\n",
851 |     "    b4 = tf.Variable(tf.constant(1.0, shape = [num_hidden]))\n",
852 |     "    \n",
853 |     "    w5 = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))\n",
854 |     "    b5 = tf.Variable(tf.constant(1.0, shape = [num_labels]))\n",
855 |     "    variables = {\n",
856 |     "        'w1': w1, 'w2': w2, 'w3': w3, 'w4': w4, 'w5': w5,\n",
857 |     "        'b1': b1, 'b2': b2, 'b3': b3, 'b4': b4, 'b5': b5\n",
858 |     "    }\n",
859 |     "    return variables\n",
860 |     "\n",
861 |     "def model_lenet5_like(data, variables):\n",
862 |     "    layer1_conv = tf.nn.conv2d(data, variables['w1'], [1, 1, 1, 1], padding='SAME')\n",
863 |     "    layer1_actv = tf.nn.relu(layer1_conv + variables['b1'])\n",
864 |     "    layer1_pool = tf.nn.avg_pool(layer1_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')\n",
865 |     "\n",
866 |     "    layer2_conv = tf.nn.conv2d(layer1_pool, variables['w2'], [1, 1, 1, 1], padding='SAME')\n",
867 |     "    layer2_actv = tf.nn.relu(layer2_conv + variables['b2'])\n",
868 |     "    layer2_pool = tf.nn.avg_pool(layer2_actv, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')\n",
869 |     "    \n",
870 |     "    flat_layer = flatten_tf_array(layer2_pool)\n",
871 |     "    layer3_fccd = tf.matmul(flat_layer, variables['w3']) + variables['b3']\n",
872 |     "    layer3_actv = tf.nn.relu(layer3_fccd)\n",
873 |     "    layer3_drop = tf.nn.dropout(layer3_actv, 0.5)\n",
874 |     "    \n",
875 |     "    layer4_fccd = tf.matmul(layer3_actv, variables['w4']) + variables['b4']\n",
876 |     "    layer4_actv = tf.nn.relu(layer4_fccd)\n",
877 |     "    layer4_drop = tf.nn.dropout(layer4_actv, 0.5)\n",
878 |     "    \n",
879 |     "    logits = tf.matmul(layer4_actv, variables['w5']) + variables['b5']\n",
880 |     "    return logits\n",
881 |     "\n",
882 |     "\n",
883 |     "#Variables used in the constructing and running the graph\n",
884 |     "num_steps = 10001\n",
885 |     "display_step = 1000\n",
886 |     "learning_rate = 0.001\n",
887 |     "batch_size = 16\n",
888 |     "\n",
889 |     "#定义数据的基本信息，传入变量\n",
890 |     "image_width = 32\n",
891 |     "image_height = 32\n",
892 |     "image_depth = 3\n",
893 |     "num_labels = 10\n",
894 |     "\n",
895 |     "\n",
896 |     "test_dataset = test_dataset_cifar10\n",
897 |     "test_labels = test_labels_cifar10\n",
898 |     "train_dataset = train_dataset_cifar10\n",
899 |     "train_labels = train_labels_cifar10\n",
900 |     "\n",
901 |     "\n",
902 |     "\n",
903 |     "\n",
904 |     "graph = tf.Graph()\n",
905 |     "with graph.as_default():\n",
906 |     "    #1 首先使用占位符定义数据变量的维度\n",
907 |     "    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_width, image_height, image_depth))\n",
908 |     "    tf_train_labels = tf.placeholder(tf.float32, shape = (batch_size, num_labels))\n",
909 |     "    tf_test_dataset = tf.constant(test_dataset, tf.float32)\n",
910 |     "\n",
911 |     "    #2 然后初始化权重矩阵和偏置向量\n",
912 |     "    variables = variables_lenet5_like(image_width = image_width, image_height=image_height, image_depth = image_depth, num_labels = num_labels)\n",
913 |     "\n",
914 |     "\n",
915 |     "    #3 使用模型计算分类\n",
916 |     "    logits = model_lenet5_like(tf_train_dataset, variables)\n",
917 |     "\n",
918 |     "    #4 使用带softmax的交叉熵函数计算预测标签和真实标签之间的损失函数\n",
919 |     "    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))\n",
920 |     "\n",
921 |     "    #5  采用Adam优化算法优化上一步定义的损失函数，给定学习率\n",
922 |     "    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)\n",
923 |     "\n",
924 |     "    # 执行预测推断\n",
925 |     "    train_prediction = tf.nn.softmax(logits)\n",
926 |     "    test_prediction = tf.nn.softmax(model_lenet5_like(tf_test_dataset, variables))\n",
927 |     "\n",
928 |     "\n",
929 |     "with tf.Session(graph=graph) as session:\n",
930 |     "    #初始化全部变量\n",
931 |     "    tf.global_variables_initializer().run()\n",
932 |     "    print('Initialized with learning_rate', learning_rate)\n",
933 |     "    for step in range(num_steps):\n",
934 |     "        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)\n",
935 |     "        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]\n",
936 |     "        batch_labels = train_labels[offset:(offset + batch_size), :]\n",
937 |     "        #在每一次批量中，获取当前的训练数据，并传入feed_dict以馈送到占位符中\n",
938 |     "        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}\n",
939 |     "        _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)\n",
940 |     "        train_accuracy = accuracy(predictions, batch_labels)\n",
941 |     "        \n",
942 |     "        if step % display_step == 0:\n",
943 |     "            test_accuracy = accuracy(test_prediction.eval(), test_labels)\n",
944 |     "            message = \"step {:04d} : loss is {:06.2f}, accuracy on training set {:02.2f} %, accuracy on test set {:02.2f} %\".format(step, l, train_accuracy, test_accuracy)\n",
945 |     "            print(message)"
946 |    ]
947 |   },
948 |   {
949 |    "cell_type": "code",
950 |    "execution_count": null,
951 |    "metadata": {
952 |     "collapsed": true
953 |    },
954 |    "outputs": [],
955 |    "source": []
956 |   }
957 |  ],
958 |  "metadata": {
959 |   "kernelspec": {
960 |    "display_name": "Python 3",
961 |    "language": "python",
962 |    "name": "python3"
963 |   },
964 |   "language_info": {
965 |    "codemirror_mode": {
966 |     "name": "ipython",
967 |     "version": 3
968 |    },
969 |    "file_extension": ".py",
970 |    "mimetype": "text/x-python",
971 |    "name": "python",
972 |    "nbconvert_exporter": "python",
973 |    "pygments_lexer": "ipython3",
974 |    "version": "3.5.3"
975 |   }
976 |  },
977 |  "nbformat": 4,
978 |  "nbformat_minor": 2
979 | }
980 | 


--------------------------------------------------------------------------------
/Experiments/tf_GAN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import tensorflow as tf\n",
 12 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
 13 |     "import numpy as np\n",
 14 |     "import matplotlib.pyplot as plt\n",
 15 |     "import matplotlib.gridspec as gridspec\n",
 16 |     "import os"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {
 23 |     "collapsed": true
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "#该函数将给出权重初始化的方法\n",
 28 |     "def variable_init(size):\n",
 29 |     "    in_dim = size[0]\n",
 30 |     "\n",
 31 |     "    #计算随机生成变量所服从的正态分布标准差\n",
 32 |     "    w_stddev = 1. / tf.sqrt(in_dim / 2.)\n",
 33 |     "    return tf.random_normal(shape=size, stddev=w_stddev)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 3,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "#定义输入矩阵的占位符，输入层单元为784，None代表批量大小的占位，X代表输入的真实图片。占位符的数值类型为32位浮点型\n",
 45 |     "X = tf.placeholder(tf.float32, shape=[None, 784])\n",
 46 |     "\n",
 47 |     "#定义判别器的权重矩阵和偏置项向量，由此可知判别网络为三层全连接网络\n",
 48 |     "D_W1 = tf.Variable(variable_init([784, 128]))\n",
 49 |     "D_b1 = tf.Variable(tf.zeros(shape=[128]))\n",
 50 |     "\n",
 51 |     "D_W2 = tf.Variable(variable_init([128, 1]))\n",
 52 |     "D_b2 = tf.Variable(tf.zeros(shape=[1]))\n",
 53 |     "\n",
 54 |     "theta_D = [D_W1, D_W2, D_b1, D_b2]\n",
 55 |     "\n",
 56 |     "#定义生成器的输入噪声为100维度的向量组，None根据批量大小确定\n",
 57 |     "Z = tf.placeholder(tf.float32, shape=[None, 100])\n",
 58 |     "\n",
 59 |     "#定义生成器的权重与偏置项。输入层为100个神经元且接受随机噪声，\n",
 60 |     "#输出层为784个神经元，并输出手写字体图片。生成网络根据原论文为三层全连接网络\n",
 61 |     "G_W1 = tf.Variable(variable_init([100, 128]))\n",
 62 |     "G_b1 = tf.Variable(tf.zeros(shape=[128]))\n",
 63 |     "\n",
 64 |     "G_W2 = tf.Variable(variable_init([128, 784]))\n",
 65 |     "G_b2 = tf.Variable(tf.zeros(shape=[784]))\n",
 66 |     "\n",
 67 |     "theta_G = [G_W1, G_W2, G_b1, G_b2]"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 4,
 73 |    "metadata": {
 74 |     "collapsed": true
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "#定义一个可以生成m*n阶随机矩阵的函数，该矩阵的元素服从均匀分布，随机生成的z就为生成器的输入\n",
 79 |     "def sample_Z(m, n):\n",
 80 |     "    return np.random.uniform(-1., 1., size=[m, n])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {
 87 |     "collapsed": true
 88 |    },
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "#定义生成器\n",
 92 |     "def generator(z):\n",
 93 |     "    \n",
 94 |     "    #第一层先计算 y=z*G_W1+G-b1,然后投入激活函数计算G_h1=ReLU（y）,G_h1 为第二次层神经网络的输出激活值\n",
 95 |     "    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)\n",
 96 |     "    \n",
 97 |     "    #以下两个语句计算第二层传播到第三层的激活结果，第三层的激活结果是含有784个元素的向量，该向量转化28×28就可以表示图像\n",
 98 |     "    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2\n",
 99 |     "    G_prob = tf.nn.sigmoid(G_log_prob)\n",
100 |     "    return G_prob"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 6,
106 |    "metadata": {
107 |     "collapsed": true
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "#定义判别器\n",
112 |     "def discriminator(x):\n",
113 |     "    \n",
114 |     "    #计算D_h1=ReLU（x*D_W1+D_b1）,该层的输入为含784个元素的向量\n",
115 |     "    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)\n",
116 |     "    \n",
117 |     "    #计算第三层的输出结果。因为使用的是Sigmoid函数，则该输出结果是一个取值为[0,1]间的标量（见上述权重定义）\n",
118 |     "    #即判别输入的图像到底是真（=1）还是假（=0）\n",
119 |     "    D_logit = tf.matmul(D_h1, D_W2) + D_b2\n",
120 |     "    D_prob = tf.nn.sigmoid(D_logit)\n",
121 |     "    \n",
122 |     "    #返回判别为真的概率和第三层的输入值，输出D_logit是为了将其输入tf.nn.sigmoid_cross_entropy_with_logits()以构建损失函数\n",
123 |     "    return D_prob, D_logit"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 7,
129 |    "metadata": {
130 |     "collapsed": true
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "#该函数用于输出生成图片\n",
135 |     "def plot(samples):\n",
136 |     "    fig = plt.figure(figsize=(4, 4))\n",
137 |     "    gs = gridspec.GridSpec(4, 4)\n",
138 |     "    gs.update(wspace=0.05, hspace=0.05)\n",
139 |     "\n",
140 |     "    for i, sample in enumerate(samples):\n",
141 |     "        ax = plt.subplot(gs[i])\n",
142 |     "        plt.axis('off')\n",
143 |     "        ax.set_xticklabels([])\n",
144 |     "        ax.set_yticklabels([])\n",
145 |     "        ax.set_aspect('equal')\n",
146 |     "        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')\n",
147 |     "\n",
148 |     "    return fig\n"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "#### 交叉熵损失函数\n",
156 |     "sigmoid_cross_entropy_with_logits函数的输入是logits和targets，logits就是神经网络模型中的 W * X矩阵，且不需要经过Sigmoid激活函数。而targets的shape和logits相同，即正确的标注值。若令x = logits、 z = labels，那么该函数的表达式为z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 8,
162 |    "metadata": {
163 |     "collapsed": true
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "#输入随机噪声z而输出生成样本\n",
168 |     "G_sample = generator(Z)\n",
169 |     "\n",
170 |     "#分别输入真实图片和生成的图片，并投入判别器以判断真伪\n",
171 |     "D_real, D_logit_real = discriminator(X)\n",
172 |     "D_fake, D_logit_fake = discriminator(G_sample)\n",
173 |     "\n",
174 |     "#以下为原论文的判别器损失和生成器损失，但本实现并没有使用该损失函数\n",
175 |     "# D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))\n",
176 |     "# G_loss = -tf.reduce_mean(tf.log(D_fake))\n",
177 |     "\n",
178 |     "# 我们使用交叉熵作为判别器和生成器的损失函数，因为sigmoid_cross_entropy_with_logits内部会对预测输入执行Sigmoid函数，\n",
179 |     "#所以我们取判别器最后一层未投入激活函数的值，即D_h1*D_W2+D_b2。\n",
180 |     "#tf.ones_like(D_logit_real)创建维度和D_logit_real相等的全是1的标注，真实图片。\n",
181 |     "D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))\n",
182 |     "D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))\n",
183 |     "\n",
184 |     "#损失函数为两部分，即E[log(D(x))]+E[log(1-D(G(z)))]，将真的判别为假和将假的判别为真\n",
185 |     "D_loss = D_loss_real + D_loss_fake\n",
186 |     "\n",
187 |     "#同样使用交叉熵构建生成器损失函数\n",
188 |     "G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))\n",
189 |     "\n",
190 |     "#定义判别器和生成器的优化方法为Adam算法，关键字var_list表明最小化损失函数所更新的权重矩阵\n",
191 |     "D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)\n",
192 |     "G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 9,
198 |    "metadata": {
199 |     "collapsed": false
200 |    },
201 |    "outputs": [
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
207 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
208 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
209 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n"
210 |      ]
211 |     }
212 |    ],
213 |    "source": [
214 |     "#选择训练的批量大小和随机生成噪声的维度\n",
215 |     "mb_size = 128\n",
216 |     "Z_dim = 100\n",
217 |     "\n",
218 |     "#读取数据集MNIST，并放在当前目录data文件夹下MNIST文件夹中，如果该地址没有数据，则下载数据至该文件夹\n",
219 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": 10,
225 |    "metadata": {
226 |     "collapsed": false
227 |    },
228 |    "outputs": [
229 |     {
230 |      "name": "stdout",
231 |      "output_type": "stream",
232 |      "text": [
233 |       "Iter: 0\n",
234 |       "D loss: 1.671\n",
235 |       "G_loss: 1.718\n",
236 |       "\n",
237 |       "Iter: 2000\n",
238 |       "D loss: 0.05008\n",
239 |       "G_loss: 4.74\n",
240 |       "\n",
241 |       "Iter: 4000\n",
242 |       "D loss: 0.3667\n",
243 |       "G_loss: 4.85\n",
244 |       "\n",
245 |       "Iter: 6000\n",
246 |       "D loss: 0.3974\n",
247 |       "G_loss: 4.059\n",
248 |       "\n",
249 |       "Iter: 8000\n",
250 |       "D loss: 0.7007\n",
251 |       "G_loss: 2.628\n",
252 |       "\n",
253 |       "Iter: 10000\n",
254 |       "D loss: 0.4421\n",
255 |       "G_loss: 3.05\n",
256 |       "\n",
257 |       "Iter: 12000\n",
258 |       "D loss: 0.7872\n",
259 |       "G_loss: 2.562\n",
260 |       "\n",
261 |       "Iter: 14000\n",
262 |       "D loss: 0.7155\n",
263 |       "G_loss: 2.877\n",
264 |       "\n",
265 |       "Iter: 16000\n",
266 |       "D loss: 0.9827\n",
267 |       "G_loss: 2.042\n",
268 |       "\n",
269 |       "Iter: 18000\n",
270 |       "D loss: 0.7171\n",
271 |       "G_loss: 1.966\n",
272 |       "\n"
273 |      ]
274 |     }
275 |    ],
276 |    "source": [
277 |     "#打开一个会话运行计算图\n",
278 |     "sess = tf.Session()\n",
279 |     "\n",
280 |     "#初始化所有定义的变量\n",
281 |     "sess.run(tf.global_variables_initializer())\n",
282 |     "\n",
283 |     "#如果当前目录下不存在out文件夹，则创建该文件夹\n",
284 |     "if not os.path.exists('out/'):\n",
285 |     "    os.makedirs('out/')\n",
286 |     "\n",
287 |     "#初始化，并开始迭代训练，100W次\n",
288 |     "i = 0\n",
289 |     "for it in range(20000):\n",
290 |     "    \n",
291 |     "    #每2000次输出一张生成器生成的图片\n",
292 |     "    if it % 2000 == 0:\n",
293 |     "        samples = sess.run(G_sample, feed_dict={Z: sample_Z(16, Z_dim)})\n",
294 |     "\n",
295 |     "        fig = plot(samples)\n",
296 |     "        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')\n",
297 |     "        i += 1\n",
298 |     "        plt.close(fig)\n",
299 |     "    \n",
300 |     "    #next_batch抽取下一个批量的图片，该方法返回一个矩阵，即shape=[mb_size，784]，每一行是一张图片，共批量大小行\n",
301 |     "    X_mb, _ = mnist.train.next_batch(mb_size)\n",
302 |     "    \n",
303 |     "    #投入数据并根据优化方法迭代一次，计算损失后返回损失值\n",
304 |     "    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})\n",
305 |     "    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})\n",
306 |     "\n",
307 |     "\n",
308 |     "    #每迭代2000次输出迭代数、生成器损失和判别器损失\n",
309 |     "    if it % 2000 == 0:\n",
310 |     "        print('Iter: {}'.format(it))\n",
311 |     "        print('D loss: {:.4}'. format(D_loss_curr))\n",
312 |     "        print('G_loss: {:.4}'.format(G_loss_curr))\n",
313 |     "        print()"
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {
320 |     "collapsed": true
321 |    },
322 |    "outputs": [],
323 |    "source": []
324 |   }
325 |  ],
326 |  "metadata": {
327 |   "kernelspec": {
328 |    "display_name": "Python 3",
329 |    "language": "python",
330 |    "name": "python3"
331 |   },
332 |   "language_info": {
333 |    "codemirror_mode": {
334 |     "name": "ipython",
335 |     "version": 3
336 |    },
337 |    "file_extension": ".py",
338 |    "mimetype": "text/x-python",
339 |    "name": "python",
340 |    "nbconvert_exporter": "python",
341 |    "pygments_lexer": "ipython3",
342 |    "version": "3.5.3"
343 |   }
344 |  },
345 |  "nbformat": 4,
346 |  "nbformat_minor": 2
347 | }
348 | 


--------------------------------------------------------------------------------
/Experiments/tf_LeNet5.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import tensorflow as tf\n",
 12 |     "\n",
 13 |     "# 配置卷积神经网络的架构参数\n",
 14 |     "INPUT_NODE = 784\n",
 15 |     "OUTPUT_NODE = 10\n",
 16 |     "\n",
 17 |     "IMAGE_SIZE = 28\n",
 18 |     "NUM_CHANNELS = 1\n",
 19 |     "NUM_LABELS = 10\n",
 20 |     "\n",
 21 |     "# 第一层卷积层的尺寸和深度\n",
 22 |     "CONV1_DEEP = 32\n",
 23 |     "CONV1_SIZE = 5\n",
 24 |     "# 第二层卷积层的尺寸和深度\n",
 25 |     "CONV2_DEEP = 64\n",
 26 |     "CONV2_SIZE = 5\n",
 27 |     "# 全连接层的结点个数\n",
 28 |     "FC_SIZE = 512\n",
 29 |     "\n",
 30 |     "\n",
 31 |     "# 定义卷积神经网络的前向传播过程。这里添加了一个新的参数train，用于区分训练过程和测试过程。在这个程序中将用到dropout方法，\n",
 32 |     "# dropout方法可进一步提升模型的可靠性并防止过拟合，dropout过程只在训练时使用\n",
 33 |     "def inference(input_tensor, train, regularizer):\n",
 34 |     "    # 声明第一层卷积层的变量并实现前向传播过程。通过使用不同命名空间来隔离不同层的变量，让每一层中的变量命名只需要考虑在当前层的作用，\n",
 35 |     "    # 不需担心重命名的问题。第一层输出为28×28×32的张量\n",
 36 |     "    with tf.variable_scope('layer1-conv1'):\n",
 37 |     "        conv1_weights = tf.get_variable('weight', [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],\n",
 38 |     "                                        initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
 39 |     "        conv1_biases = tf.get_variable('bias', [CONV1_DEEP], initializer=tf.constant_initializer(0.0))\n",
 40 |     "\n",
 41 |     "        # 使用边长为5，深度为32的卷积核，卷积核的移动步幅为1，且使用0填充\n",
 42 |     "        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')\n",
 43 |     "        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))\n",
 44 |     "\n",
 45 |     "    # 实现第二层池化层的前向传播过程。该最大池化层卷积核边长为2，使用0填充，移动步幅为2.\n",
 46 |     "    # 该层的输入为28×28×32的张量，输出为14×14×32的张量\n",
 47 |     "    with tf.name_scope('layer2-pool1'):\n",
 48 |     "        pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1],strides=[1,2,2,1], padding='SAME')\n",
 49 |     "\n",
 50 |     "    # 声明第三层卷积层的变量并实现前向传播过程，该卷积层的输入为14×14×32的张量，输出为14×14×64的矩阵\n",
 51 |     "    with tf.variable_scope('layer3-conv2'):\n",
 52 |     "        conv2_weights = tf.get_variable('weight', [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],\n",
 53 |     "                                        initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
 54 |     "        conv2_biases = tf.get_variable('bias', [CONV2_DEEP], initializer=tf.constant_initializer(0.0))\n",
 55 |     "\n",
 56 |     "        # 使用尺寸为5×5，深度为64的卷积核，卷积核的移动步幅为1，且使用0填充\n",
 57 |     "        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')\n",
 58 |     "        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))\n",
 59 |     "\n",
 60 |     "    # 实现第四层池化层的前向传播过程，输入为14×14×64，输出为7×7×64的张量\n",
 61 |     "    with tf.name_scope('layer4-pool2'):\n",
 62 |     "        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')\n",
 63 |     "\n",
 64 |     "    # 将第四层池化层的输出转化为第五层全连接层的输入格式。第四层为7×7×64的张量，第五层输入为向量，所以需要将该张量拉成一个向量\n",
 65 |     "    # pool2.get_shape函数取第四层输出张量的维度，每层的输入输出都为一个BATCH的张量，所以这里得到的维度也包含一个BATCH中数据的数量。\n",
 66 |     "    pool_shape = pool2.get_shape().as_list()\n",
 67 |     "\n",
 68 |     "    # 计算将张量拉直成向量后的长度，该长度等于张量维度累乘。注意这里的pool_shape[0]为一个batch中数据的个数\n",
 69 |     "    nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]\n",
 70 |     "\n",
 71 |     "    # 通过tf.reshape函数将第四层的输出变成一个batch的向量\n",
 72 |     "    reshaped = tf.reshape(pool2, [pool_shape[0], nodes])\n",
 73 |     "\n",
 74 |     "    # 声明第五层全连接层的变量并实现前向传播过程。输入长度为3136的向量，输出长度为512的向量。该层引入了dropout的概念，\n",
 75 |     "    # dropout在训练时随机将部分结点的输出改为0.dropout一般只在全连接层而不是卷积层或池化层使用。\n",
 76 |     "    with tf.variable_scope('layer5-fcl'):\n",
 77 |     "        fc1_weights = tf.get_variable('weight', [nodes, FC_SIZE],\n",
 78 |     "                                      initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
 79 |     "\n",
 80 |     "        # 只有全连接层权重需要加入正则化\n",
 81 |     "        if regularizer != None:\n",
 82 |     "            tf.add_to_collection('losses', regularizer(fc1_weights))\n",
 83 |     "        fc1_biases = tf.get_variable('bias', [FC_SIZE], initializer=tf.constant_initializer(0.1))\n",
 84 |     "\n",
 85 |     "        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)\n",
 86 |     "        if train: fc1 = tf.nn.dropout(fc1, 0.5)\n",
 87 |     "\n",
 88 |     "    # 声明第六层全连接层变量并实现前向传播，输入长度为512的向量，输出长度为10的向量。输出通过softmax之后可得到最后的分类结果。\n",
 89 |     "    with tf.variable_scope('layer6-fc2'):\n",
 90 |     "        fc2_weights = tf.get_variable('weight', [FC_SIZE, NUM_LABELS],\n",
 91 |     "                                      initializer=tf.truncated_normal_initializer(stddev=0.1))\n",
 92 |     "        if regularizer != None:\n",
 93 |     "            tf.add_to_collection('losses', regularizer(fc2_weights))\n",
 94 |     "\n",
 95 |     "        fc2_biases = tf.get_variable('bias', [NUM_LABELS], initializer=tf.constant_initializer(0.1))\n",
 96 |     "        logit = tf.matmul(fc1, fc2_weights) + fc2_biases\n",
 97 |     "    return logit\n",
 98 |     "\n"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {
105 |     "collapsed": false
106 |    },
107 |    "outputs": [
108 |     {
109 |      "name": "stdout",
110 |      "output_type": "stream",
111 |      "text": [
112 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
113 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
114 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
115 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n",
116 |       "After 1 training steps, loss on training batch is 15.0016.\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "# -*- coding: utf-8 -*-\n",
122 |     "import os\n",
123 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
124 |     "import numpy as np\n",
125 |     "\n",
126 |     "# 配置神经网络的参数\n",
127 |     "BATCH_SIZE = 8\n",
128 |     "LEARNING_RATE_BASE = 0.8\n",
129 |     "LEARNING_RATE_DECAY = 0.99\n",
130 |     "REGULARIZATION_RATE = 0.0001\n",
131 |     "TRAINING_STEPS = 10000\n",
132 |     "MOVING_AVERAGE_DECAY = 0.99\n",
133 |     "MODEL_SAVE_PATH = \"./model/fcn_mnist\"\n",
134 |     "MODEL_NAME = \"fcn_mnist.ckpt\"\n",
135 |     "\n",
136 |     "\n",
137 |     "def train(mnist):\n",
138 |     "    x = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS], name='x-input')\n",
139 |     "    y = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-output')\n",
140 |     "\n",
141 |     "    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)\n",
142 |     "    # 调用推断过程\n",
143 |     "    y_hat = inference(x, True, regularizer)\n",
144 |     "    global_step = tf.Variable(0, trainable=False)\n",
145 |     "\n",
146 |     "    # 定义损失函数、学习率、滑动平均操作及训练过程\n",
147 |     "    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)\n",
148 |     "    variables_average_op = variable_averages.apply(tf.trainable_variables())\n",
149 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_hat, labels=tf.argmax(y, 1))\n",
150 |     "    cross_entropy_mean = tf.reduce_mean(cross_entropy)\n",
151 |     "    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))\n",
152 |     "    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE,\n",
153 |     "                                              LEARNING_RATE_DECAY)\n",
154 |     "    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)\n",
155 |     "\n",
156 |     "    with tf.control_dependencies([train_step, variables_average_op]):\n",
157 |     "        train_op = tf.no_op(name='train')\n",
158 |     "\n",
159 |     "    # 初始化TF持久化类\n",
160 |     "    saver = tf.train.Saver()\n",
161 |     "    with tf.Session() as sess:\n",
162 |     "        sess.run(tf.global_variables_initializer())\n",
163 |     "\n",
164 |     "        # 在训练过程中不再测试模型在验证数据上的表现，验证和测试的过程会有独立的过程完成\n",
165 |     "        for i in range(TRAINING_STEPS):\n",
166 |     "            xs, ys = mnist.train.next_batch(BATCH_SIZE)\n",
167 |     "            reshaped_xs=np.reshape(xs,(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))\n",
168 |     "            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y: ys})\n",
169 |     "\n",
170 |     "            # 每1000次迭代保存一次模型\n",
171 |     "            if i % 1000 == 0:\n",
172 |     "                # 输出模型在当前训练批量下的损失函数大小\n",
173 |     "                print('After %d training steps, loss on training batch is %g.' % (step, loss_value))\n",
174 |     "\n",
175 |     "                # 保存当前模型，并使用global_step 参数特定地命名\n",
176 |     "                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)\n",
177 |     "\n",
178 |     "\n",
179 |     "def main(argv=None):\n",
180 |     "    mnist = input_data.read_data_sets('./data/MNIST/', one_hot=True)\n",
181 |     "    train(mnist)\n",
182 |     "\n",
183 |     "\n",
184 |     "if __name__ == '__main__':\n",
185 |     "    tf.app.run()\n"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": null,
191 |    "metadata": {
192 |     "collapsed": true
193 |    },
194 |    "outputs": [],
195 |    "source": []
196 |   }
197 |  ],
198 |  "metadata": {
199 |   "kernelspec": {
200 |    "display_name": "Python 3",
201 |    "language": "python",
202 |    "name": "python3"
203 |   },
204 |   "language_info": {
205 |    "codemirror_mode": {
206 |     "name": "ipython",
207 |     "version": 3
208 |    },
209 |    "file_extension": ".py",
210 |    "mimetype": "text/x-python",
211 |    "name": "python",
212 |    "nbconvert_exporter": "python",
213 |    "pygments_lexer": "ipython3",
214 |    "version": "3.5.3"
215 |   }
216 |  },
217 |  "nbformat": 4,
218 |  "nbformat_minor": 2
219 | }
220 | 


--------------------------------------------------------------------------------
/Experiments/tf_orginal_CapsNet.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "#以下代码修改自naturomics的GitHub实现，包含三层CapsNet和后面的重构网络\n",
 12 |     "#改网络参数比较多，我们后面会只训练测试三层CapsNet。\n",
 13 |     "\n",
 14 |     "import tensorflow as tf\n",
 15 |     "import numpy as np\n",
 16 |     "import os\n",
 17 |     "from tqdm import tqdm\n",
 18 |     "\n",
 19 |     "epsilon = 1e-9\n",
 20 |     "batch_size = 8\n",
 21 |     "epoch = 1\n",
 22 |     "\n",
 23 |     "#margin loss 中调节上margin和下margind的权重\n",
 24 |     "lambda_val = 0.5\n",
 25 |     "#上margin与下margin的参数值\n",
 26 |     "m_plus = 0.9\n",
 27 |     "m_minus = 0.1\n",
 28 |     "\n",
 29 |     "# 路由更新c_ij所经过的迭代次数\n",
 30 |     "iter_routing = 3\n",
 31 |     "\n",
 32 |     "# Tensorboard 保存位置\n",
 33 |     "logdir ='logdir'\n",
 34 |     "# 数据集路径\n",
 35 |     "dataset_path = 'data/MNIST'\n",
 36 |     "is_training= True"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 2,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "# 定义加载mnist的函数\n",
 48 |     "def load_mnist(path, is_training):\n",
 49 |     "\n",
 50 |     "    #trX将加载储存所有60000张灰度图\n",
 51 |     "    fd = open(os.path.join(path, 'train-images.idx3-ubyte'))\n",
 52 |     "    loaded = np.fromfile(file=fd, dtype=np.uint8)\n",
 53 |     "    trX = loaded[16:].reshape((60000, 28, 28, 1)).astype(np.float)\n",
 54 |     "\n",
 55 |     "    fd = open(os.path.join(path, 'train-labels.idx1-ubyte'))\n",
 56 |     "    loaded = np.fromfile(file=fd, dtype=np.uint8)\n",
 57 |     "    trY = loaded[8:].reshape((60000)).astype(np.float)\n",
 58 |     "\n",
 59 |     "    #teX将储存所有一万张测试用的图片\n",
 60 |     "    fd = open(os.path.join(path, 't10k-images.idx3-ubyte'))\n",
 61 |     "    loaded = np.fromfile(file=fd, dtype=np.uint8)\n",
 62 |     "    teX = loaded[16:].reshape((10000, 28, 28, 1)).astype(np.float)\n",
 63 |     "\n",
 64 |     "    fd = open(os.path.join(path, 't10k-labels.idx1-ubyte'))\n",
 65 |     "    loaded = np.fromfile(file=fd, dtype=np.uint8)\n",
 66 |     "    teY = loaded[8:].reshape((10000)).astype(np.float)\n",
 67 |     "\n",
 68 |     "    # 将所有训练图片表示为一个4维张量 [60000, 28, 28, 1]，其中每个像素值缩放到0和1之间\n",
 69 |     "    trX = tf.convert_to_tensor(trX / 255., tf.float32)\n",
 70 |     "\n",
 71 |     "    # one hot编码为 [num_samples, 10]\n",
 72 |     "    trY = tf.one_hot(trY, depth=10, axis=1, dtype=tf.float32)\n",
 73 |     "    teY = tf.one_hot(teY, depth=10, axis=1, dtype=tf.float32)\n",
 74 |     "\n",
 75 |     "    # 训练和测试时返回不同的数据\n",
 76 |     "    if is_training:\n",
 77 |     "        return trX, trY\n",
 78 |     "    else:\n",
 79 |     "        return teX / 255., teY\n",
 80 |     "\n",
 81 |     "def get_batch_data():\n",
 82 |     "    trX, trY = load_mnist(dataset_path, True)\n",
 83 |     "\n",
 84 |     "    # 每次产生一个切片\n",
 85 |     "    data_queues = tf.train.slice_input_producer([trX, trY])\n",
 86 |     "\n",
 87 |     "    # 对队列中的样本进行乱序处理\n",
 88 |     "    X, Y = tf.train.shuffle_batch(data_queues,\n",
 89 |     "                                  batch_size=batch_size,\n",
 90 |     "                                  capacity=batch_size * 64,\n",
 91 |     "                                  min_after_dequeue=batch_size * 32,\n",
 92 |     "                                  allow_smaller_final_batch=False)\n",
 93 |     "    return (X, Y)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 3,
 99 |    "metadata": {
100 |     "collapsed": true
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "# 通过定义类和对象的方式定义Capssule层级\n",
105 |     "class CapsLayer(object):\n",
106 |     "    ''' Capsule layer 类别参数有：\n",
107 |     "    Args:\n",
108 |     "        input: 一个4维张量\n",
109 |     "        num_outputs: 当前层的Capsule单元数量\n",
110 |     "        vec_len: 一个Capsule输出向量的长度\n",
111 |     "        layer_type: 选择'FC' 或 \"CONV\", 以确定是用全连接层还是卷积层\n",
112 |     "        with_routing: 当前Capsule是否从较低层级中Routing而得出输出向量\n",
113 |     "\n",
114 |     "    Returns:\n",
115 |     "        一个四维张量\n",
116 |     "    '''\n",
117 |     "    def __init__(self, num_outputs, vec_len, with_routing=True, layer_type='FC'):\n",
118 |     "        self.num_outputs = num_outputs\n",
119 |     "        self.vec_len = vec_len\n",
120 |     "        self.with_routing = with_routing\n",
121 |     "        self.layer_type = layer_type\n",
122 |     "\n",
123 |     "    def __call__(self, input, kernel_size=None, stride=None):\n",
124 |     "        '''\n",
125 |     "        当“Layer_type”选择的是“CONV”，我们将使用 'kernel_size' 和 'stride'\n",
126 |     "        '''\n",
127 |     "\n",
128 |     "        # 开始构建卷积层\n",
129 |     "        if self.layer_type == 'CONV':\n",
130 |     "            self.kernel_size = kernel_size\n",
131 |     "            self.stride = stride\n",
132 |     "\n",
133 |     "            # PrimaryCaps层没有Routing过程\n",
134 |     "            if not self.with_routing:\n",
135 |     "                # 卷积层为 PrimaryCaps 层（CapsNet第二层）, 并将第一层卷积的输出张量作为输入。\n",
136 |     "                # 输入张量的维度为： [batch_size, 20, 20, 256]\n",
137 |     "                assert input.get_shape() == [batch_size, 20, 20, 256]\n",
138 |     "\n",
139 |     "                # # 从CapsNet输出向量的每一个分量开始执行卷积，每个分量上执行带32个卷积核的9×9标准卷积\n",
140 |     "                # capsules = []\n",
141 |     "                # for i in range(self.vec_len):\n",
142 |     "                #     # 所有Capsule的一个分量，其维度为: [batch_size, 6, 6, 32]，即6×6×1×32\n",
143 |     "                #     with tf.variable_scope('ConvUnit_' + str(i)):\n",
144 |     "                #         caps_i = tf.contrib.layers.conv2d(input, self.num_outputs,\n",
145 |     "                #                                           self.kernel_size, self.stride,\n",
146 |     "                #                                           padding=\"VALID\")\n",
147 |     "                #\n",
148 |     "                #         # 将一般卷积的结果张量拉平，并为添加到列表中\n",
149 |     "                #         caps_i = tf.reshape(caps_i, shape=(batch_size, -1, 1, 1))\n",
150 |     "                #         capsules.append(caps_i)\n",
151 |     "                #\n",
152 |     "                # # 为将卷积后张量各个分量合并为向量做准备\n",
153 |     "                # assert capsules[0].get_shape() == [batch_size, 1152, 1, 1]\n",
154 |     "                #\n",
155 |     "                # # 合并为PrimaryCaps的输出张量，即6×6×32个长度为8的向量，合并后的维度为 [batch_size, 1152, 8, 1]\n",
156 |     "                # capsules = tf.concat(capsules, axis=2)\n",
157 |     "                # # 将每个Capsule 向量投入非线性函数squash进行缩放与激活,第二层输出的向量要经过缩放\n",
158 |     "                # capsules = squash(capsules)\n",
159 |     "                # assert capsules.get_shape() == [batch_size, 1152, 8, 1]\n",
160 |     "                # return(capsules)\n",
161 |     "\n",
162 |     "                # 以下更新后的计算方法\n",
163 |     "                capsules = tf.contrib.layers.conv2d(input, self.num_outputs * self.vec_len,\n",
164 |     "                                                    self.kernel_size, self.stride, padding=\"VALID\")\n",
165 |     "                capsules = tf.reshape(capsules, (batch_size, -1, self.vec_len, 1))\n",
166 |     "\n",
167 |     "                # [batch_size, 1152, 8, 1]\n",
168 |     "                capsules = squash(capsules)\n",
169 |     "                assert capsules.get_shape() == [batch_size, 1152, 8, 1]\n",
170 |     "                return (capsules)\n",
171 |     "\n",
172 |     "        if self.layer_type == 'FC':\n",
173 |     "\n",
174 |     "            # DigitCaps 带有Routing过程\n",
175 |     "            if self.with_routing:\n",
176 |     "                # CapsNet 的第三层 DigitCaps 层是一个全连接网络\n",
177 |     "                # 将输入张量重建为 [batch_size, 1152, 1, 8, 1]\n",
178 |     "                self.input = tf.reshape(input, shape=(batch_size, -1, 1, input.shape[-2].value, 1))\n",
179 |     "\n",
180 |     "                with tf.variable_scope('routing'):\n",
181 |     "                    # 初始化b_IJ的值为零，且维度满足: [1, 1, num_caps_l, num_caps_l_plus_1, 1]\n",
182 |     "                    b_IJ = tf.constant(np.zeros([1, input.shape[1].value, self.num_outputs, 1, 1], dtype=np.float32))\n",
183 |     "                    # 使用定义的Routing过程计算权值更新与s_j\n",
184 |     "                    capsules = routing(self.input, b_IJ)\n",
185 |     "                    # 将s_j投入 squeeze 函数以得出 DigitCaps 层的输出向量\n",
186 |     "                    capsules = tf.squeeze(capsules, axis=1)\n",
187 |     "\n",
188 |     "            return(capsules)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 4,
194 |    "metadata": {
195 |     "collapsed": true
196 |    },
197 |    "outputs": [],
198 |    "source": [
199 |     "# 定义路由算法的过程\n",
200 |     "def routing(input, b_IJ):\n",
201 |     "    ''' 路由算法\n",
202 |     "\n",
203 |     "    Args:\n",
204 |     "        input: 输入张量的维度为 [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1]\n",
205 |     "               其中num_caps_l为上一层（PrimaryCaps）的Capsule单元数量\n",
206 |     "    Returns:\n",
207 |     "        返回的张量维度为 [batch_size, num_caps_l_plus_1, length(v_j)=16, 1]\n",
208 |     "        表征了i+1层的输出向量 `v_j`，num_caps_l_plus_1 为DigitCaps层的输出数\n",
209 |     "    Notes:\n",
210 |     "        u_i 表示l层中 capsule i 的输出向量\n",
211 |     "        v_j 表示l+1层中 capsule j 的输出向量\n",
212 |     "     '''\n",
213 |     "\n",
214 |     "    # 定义W的张量维度为 [num_caps_j, num_caps_i, len_u_i, len_v_j]\n",
215 |     "    # W_ij共有1152×10个，每一个的维度为8×16\n",
216 |     "    W = tf.get_variable('Weight', shape=(1, 1152, 10, 8, 16), dtype=tf.float32,\n",
217 |     "                        initializer=tf.random_normal_initializer(stddev=0.01))\n",
218 |     "\n",
219 |     "    # 论文中的 Eq.2, 计算 u_hat\n",
220 |     "    # 在使用 W 和u_i计算u_hat前，先调整张量维度\n",
221 |     "    # input => [batch_size, 1152, 10, 8, 1]\n",
222 |     "    # W => [batch_size, 1152, 10, 8, 16]\n",
223 |     "    input = tf.tile(input, [1, 1, 10, 1, 1])\n",
224 |     "    W = tf.tile(W, [batch_size, 1, 1, 1, 1])\n",
225 |     "    assert input.get_shape() == [batch_size, 1152, 10, 8, 1]\n",
226 |     "\n",
227 |     "    # 因为[8, 16].T x [8, 1] => [16, 1]，所以矩阵乘法在最后得出的维度为 [batch_size, 1152, 10, 16, 1]\n",
228 |     "    u_hat = tf.matmul(W, input, transpose_a=True)\n",
229 |     "    assert u_hat.get_shape() == [batch_size, 1152, 10, 16, 1]\n",
230 |     "\n",
231 |     "    # 前面是扩展的线性组合，后面是路由的部分，以下开始迭代路由过程更新耦合系数\n",
232 |     "    # 对应论文中伪代码的第三行\n",
233 |     "    for r_iter in range(iter_routing):\n",
234 |     "        with tf.variable_scope('iter_' + str(r_iter)):\n",
235 |     "            # 原论文伪代码第四行，计算softmax(b_ij)\n",
236 |     "            # => [1, 1152, 10, 1,1]\n",
237 |     "            c_IJ = tf.nn.softmax(b_IJ, dim=3)\n",
238 |     "            c_IJ = tf.tile(c_IJ, [batch_size, 1, 1, 1, 1])\n",
239 |     "            assert c_IJ.get_shape() == [batch_size, 1152, 10, 1, 1]\n",
240 |     "\n",
241 |     "            # 原论文伪代码第五行，根据更新的c_ij计算s_j\n",
242 |     "            # 先利用 c_IJ 给 u_hat 加权，即在后两个维度采用对应元素的乘积\n",
243 |     "            # => [batch_size, 1152, 10, 16, 1]\n",
244 |     "            s_J = tf.multiply(c_IJ, u_hat)\n",
245 |     "            # 在第二个维度上求和, 产生的张量维度为 [batch_size, 1, 10, 16, 1]\n",
246 |     "            s_J = tf.reduce_sum(s_J, axis=1, keep_dims=True)\n",
247 |     "            assert s_J.get_shape() == [batch_size, 1, 10, 16, 1]\n",
248 |     "\n",
249 |     "            # 原论文伪代码的第六行\n",
250 |     "            # 使用 Eq.1 计算squashing非线性函数\n",
251 |     "            v_J = squash(s_J)\n",
252 |     "            assert v_J.get_shape() == [batch_size, 1, 10, 16, 1]\n",
253 |     "\n",
254 |     "            # 原论文伪代码的第七行\n",
255 |     "            # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 10, 1152, 16, 1]\n",
256 |     "            # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the\n",
257 |     "            # batch_size dim, resulting in [1, 1152, 10, 1, 1]\n",
258 |     "            v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1])\n",
259 |     "            u_produce_v = tf.matmul(u_hat, v_J_tiled, transpose_a=True)\n",
260 |     "            assert u_produce_v.get_shape() == [batch_size, 1152, 10, 1, 1]\n",
261 |     "            b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)\n",
262 |     "\n",
263 |     "    return(v_J)"
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": 5,
269 |    "metadata": {
270 |     "collapsed": true
271 |    },
272 |    "outputs": [],
273 |    "source": [
274 |     "def squash(vector):\n",
275 |     "    ''' 根据原论文中 Eq. 1 定义squashing函数\n",
276 |     "    Args:\n",
277 |     "        vector: 一个 5-D 张量，其维度是 [batch_size, 1, num_caps, vec_len, 1],\n",
278 |     "    Returns:\n",
279 |     "        返回一个 5-D 张量，其第四和第五个维度经过了该非线性函数据算\n",
280 |     "    '''\n",
281 |     "    vec_squared_norm = tf.reduce_sum(tf.square(vector), -2, keep_dims=True)\n",
282 |     "    scalar_factor = vec_squared_norm / (1 + vec_squared_norm) / tf.sqrt(vec_squared_norm + epsilon)\n",
283 |     "    vec_squashed = scalar_factor * vector  # element-wise\n",
284 |     "    return(vec_squashed)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {
291 |     "collapsed": true
292 |    },
293 |    "outputs": [],
294 |    "source": [
295 |     "# 以下定义整个 CapsNet 的架构与正向传播过程\n",
296 |     "class CapsNet():\n",
297 |     "    def __init__(self, is_training=True):\n",
298 |     "        self.graph = tf.Graph()\n",
299 |     "        with self.graph.as_default():\n",
300 |     "            if is_training:\n",
301 |     "                # 获取一个批量的训练数据\n",
302 |     "                self.X, self.Y = get_batch_data()\n",
303 |     "\n",
304 |     "                self.build_arch()\n",
305 |     "                self.loss()\n",
306 |     "\n",
307 |     "                # t_vars = tf.trainable_variables()\n",
308 |     "                self.optimizer = tf.train.AdamOptimizer()\n",
309 |     "                self.global_step = tf.Variable(0, name='global_step', trainable=False)\n",
310 |     "                self.train_op = self.optimizer.minimize(self.total_loss, global_step=self.global_step)  # var_list=t_vars)\n",
311 |     "            else:\n",
312 |     "                self.X = tf.placeholder(tf.float32,\n",
313 |     "                                        shape=(batch_size, 28, 28, 1))\n",
314 |     "                self.build_arch()\n",
315 |     "\n",
316 |     "        tf.logging.info('Seting up the main structure')\n",
317 |     "\n",
318 |     "    # CapsNet 类中的build_arch方法能构建整个网络的架构\n",
319 |     "    def build_arch(self):\n",
320 |     "        # 以下构建第一个常规卷积层\n",
321 |     "        with tf.variable_scope('Conv1_layer'):\n",
322 |     "            # 第一个卷积层的输出张量为： [batch_size, 20, 20, 256]\n",
323 |     "            # 以下卷积输入图像X,采用256个9×9的卷积核，步幅为1，且不使用\n",
324 |     "            conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256,\n",
325 |     "                                             kernel_size=9, stride=1,\n",
326 |     "                                             padding='VALID')\n",
327 |     "            # 是用 assert 可以在出现错误条件时就返回错误，有助于调整\n",
328 |     "            assert conv1.get_shape() == [batch_size, 20, 20, 256]\n",
329 |     "\n",
330 |     "        # 以下是原论文中PrimaryCaps层的构建过程，该层的输出维度为 [batch_size, 1152, 8, 1]\n",
331 |     "        with tf.variable_scope('PrimaryCaps_layer'):\n",
332 |     "            # 调用前面定义的CapLayer函数构建第二个卷积层，该过程相当于执行八次常规卷积，\n",
333 |     "            # 然后将各对应位置的元素组合成一个长度为8的向量，这八次常规卷积都是采用32个9×9的卷积核、步幅为2\n",
334 |     "            primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV')\n",
335 |     "            caps1 = primaryCaps(conv1, kernel_size=9, stride=2)\n",
336 |     "            assert caps1.get_shape() == [batch_size, 1152, 8, 1]\n",
337 |     "\n",
338 |     "        # 以下构建 DigitCaps 层, 该层返回的张量维度为 [batch_size, 10, 16, 1]\n",
339 |     "        with tf.variable_scope('DigitCaps_layer'):\n",
340 |     "            # DigitCaps是最后一层，它返回对应10个类别的向量（每个有16个元素），该层的构建带有Routing过程\n",
341 |     "            digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC')\n",
342 |     "            self.caps2 = digitCaps(caps1)\n",
343 |     "\n",
344 |     "        # 以下构建论文图2中的解码结构，即由16维向量重构出对应类别的整个图像\n",
345 |     "        # 1. Do masking, how:\n",
346 |     "        with tf.variable_scope('Masking'):\n",
347 |     "            # Method 1. masking with true label, default mode\n",
348 |     "\n",
349 |     "            # mask_with_y是否用真实标签蒙住目标Capsule\n",
350 |     "            mask_with_y=True\n",
351 |     "            if mask_with_y:\n",
352 |     "                self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True)\n",
353 |     "                self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon)\n",
354 |     "\n",
355 |     "        # 通过3个全连接层重构MNIST图像，这三个全连接层的神经元数分别为512、1024、784\n",
356 |     "        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]\n",
357 |     "        with tf.variable_scope('Decoder'):\n",
358 |     "            vector_j = tf.reshape(self.masked_v, shape=(batch_size, -1))\n",
359 |     "            fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512)\n",
360 |     "            assert fc1.get_shape() == [batch_size, 512]\n",
361 |     "            fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024)\n",
362 |     "            assert fc2.get_shape() == [batch_size, 1024]\n",
363 |     "            self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)\n",
364 |     "\n",
365 |     "    # 定义 CapsNet 的损失函数，损失函数一共分为衡量 CapsNet准确度的Margin loss\n",
366 |     "    # 和衡量重构图像准确度的 Reconstruction loss\n",
367 |     "    def loss(self):\n",
368 |     "        # 以下先定义重构损失，因为DigitCaps的输出向量长度就为某类别的概率，因此可以借助计算向量长度计算损失\n",
369 |     "        # [batch_size, 10, 1, 1]\n",
370 |     "        # max_l = max(0, m_plus-||v_c||)^2\n",
371 |     "        max_l = tf.square(tf.maximum(0., m_plus - self.v_length))\n",
372 |     "        # max_r = max(0, ||v_c||-m_minus)^2\n",
373 |     "        max_r = tf.square(tf.maximum(0., self.v_length - m_minus))\n",
374 |     "        assert max_l.get_shape() == [batch_size, 10, 1, 1]\n",
375 |     "\n",
376 |     "        # 将当前的维度[batch_size, 10, 1, 1] 转换为10个数字类别的one-hot编码 [batch_size, 10]\n",
377 |     "        max_l = tf.reshape(max_l, shape=(batch_size, -1))\n",
378 |     "        max_r = tf.reshape(max_r, shape=(batch_size, -1))\n",
379 |     "\n",
380 |     "        # 计算 T_c: [batch_size, 10]，其为分类的指示函数\n",
381 |     "        # 若令T_c = Y,那么对应元素相乘就是有类别相同才会有非零输出值，T_c 和 Y 都为One-hot编码\n",
382 |     "        T_c = self.Y\n",
383 |     "        # [batch_size, 10], 对应元素相乘并构建最后的Margin loss 函数\n",
384 |     "        L_c = T_c * max_l + lambda_val * (1 - T_c) * max_r\n",
385 |     "\n",
386 |     "        self.margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1))\n",
387 |     "\n",
388 |     "        # 以下构建reconstruction loss函数\n",
389 |     "        # 这一过程的损失函数通过计算FC Sigmoid层的输出像素点与原始图像像素点间的欧几里德距离而构建\n",
390 |     "        orgin = tf.reshape(self.X, shape=(batch_size, -1))\n",
391 |     "        squared = tf.square(self.decoded - orgin)\n",
392 |     "        self.reconstruction_err = tf.reduce_mean(squared)\n",
393 |     "\n",
394 |     "        # 构建总损失函数，Hinton论文将reconstruction loss乘上0.0005\n",
395 |     "        # 以使它不会主导训练过程中的Margin loss\n",
396 |     "        self.total_loss = self.margin_loss + 0.0005 * self.reconstruction_err\n",
397 |     "\n",
398 |     "        # 以下输出TensorBoard\n",
399 |     "        tf.summary.scalar('margin_loss', self.margin_loss)\n",
400 |     "        tf.summary.scalar('reconstruction_loss', self.reconstruction_err)\n",
401 |     "        tf.summary.scalar('total_loss', self.total_loss)\n",
402 |     "        recon_img = tf.reshape(self.decoded, shape=(batch_size, 28, 28, 1))\n",
403 |     "        tf.summary.image('reconstruction_img', recon_img)\n",
404 |     "        self.merged_sum = tf.summary.merge_all()"
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "metadata": {
411 |     "collapsed": false
412 |    },
413 |    "outputs": [
414 |     {
415 |      "name": "stdout",
416 |      "output_type": "stream",
417 |      "text": [
418 |       "INFO:tensorflow:Seting up the main structure\n",
419 |       "INFO:tensorflow:Graph loaded\n"
420 |      ]
421 |     }
422 |    ],
423 |    "source": [
424 |     "if __name__ == \"__main__\":\n",
425 |     "    # 训练和推断\n",
426 |     "    capsNet = CapsNet(is_training=is_training)\n",
427 |     "    tf.logging.info('Graph loaded')\n",
428 |     "    sv = tf.train.Supervisor(graph=capsNet.graph,\n",
429 |     "                             logdir=logdir,\n",
430 |     "                             save_model_secs=0)\n",
431 |     "\n",
432 |     "    with sv.managed_session() as sess:\n",
433 |     "        num_batch = int(60000 / batch_size)\n",
434 |     "        for epoch in range(epoch):\n",
435 |     "            if sv.should_stop():\n",
436 |     "                break\n",
437 |     "            for step in tqdm(range(num_batch), total=num_batch, ncols=70, leave=False, unit='b'):\n",
438 |     "                sess.run(capsNet.train_op)\n",
439 |     "\n",
440 |     "            global_step = sess.run(capsNet.global_step)\n",
441 |     "            sv.saver.save(sess, logdir + '/model_epoch_%04d_step_%02d' % (epoch, global_step))\n",
442 |     "\n",
443 |     "    tf.logging.info('Training done')"
444 |    ]
445 |   },
446 |   {
447 |    "cell_type": "code",
448 |    "execution_count": null,
449 |    "metadata": {
450 |     "collapsed": true
451 |    },
452 |    "outputs": [],
453 |    "source": []
454 |   }
455 |  ],
456 |  "metadata": {
457 |   "kernelspec": {
458 |    "display_name": "Python 3",
459 |    "language": "python",
460 |    "name": "python3"
461 |   },
462 |   "language_info": {
463 |    "codemirror_mode": {
464 |     "name": "ipython",
465 |     "version": 3
466 |    },
467 |    "file_extension": ".py",
468 |    "mimetype": "text/x-python",
469 |    "name": "python",
470 |    "nbconvert_exporter": "python",
471 |    "pygments_lexer": "ipython3",
472 |    "version": "3.5.3"
473 |   }
474 |  },
475 |  "nbformat": 4,
476 |  "nbformat_minor": 2
477 | }
478 | 


--------------------------------------------------------------------------------
/Experiments/tf_trial_1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import numpy as np\n",
 12 |     "A=np.array([[11,12,13],[21,22,23],[31,32,33]])\n",
 13 |     "B=np.ones(shape=(3,3))"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 1,
 19 |    "metadata": {
 20 |     "collapsed": true
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "import tensorflow as tf\n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 8,
 30 |    "metadata": {
 31 |     "collapsed": false
 32 |    },
 33 |    "outputs": [
 34 |     {
 35 |      "name": "stdout",
 36 |      "output_type": "stream",
 37 |      "text": [
 38 |       "Tensor(\"add:0\", shape=(2,), dtype=int32)\n",
 39 |       "[3 6]\n"
 40 |      ]
 41 |     },
 42 |     {
 43 |      "data": {
 44 |       "text/plain": [
 45 |        "<bound method BaseSession.close of <tensorflow.python.client.session.Session object at 0x000001F5A9E4E4E0>>"
 46 |       ]
 47 |      },
 48 |      "execution_count": 8,
 49 |      "metadata": {},
 50 |      "output_type": "execute_result"
 51 |     }
 52 |    ],
 53 |    "source": [
 54 |     "a=tf.constant([1,2],name=\"a\")\n",
 55 |     "b=tf.constant([2,4],name=\"b\")\n",
 56 |     "result = a+b\n",
 57 |     "print(result)\n",
 58 |     "\n",
 59 |     "#上面只是定义了计算图，并没有运行计算图，所以不会输出运算结果\n",
 60 |     "\n",
 61 |     "sess=tf.Session()\n",
 62 |     "a=sess.run(result)\n",
 63 |     "print(a)\n",
 64 |     "sess.close\n",
 65 |     "\n",
 66 |     "#打开会话，运行计算图，关闭计算图"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 9,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "[ 1  4  9 16]\n",
 81 |       "[ 1  4  9 16]\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "with tf.Session() as sess:\n",
 87 |     "    a=tf.constant([1,2,3,4])\n",
 88 |     "    b=tf.constant([1,2,3,4])\n",
 89 |     "    result=tf.multiply(a,b)\n",
 90 |     "    c=sess.run(result)\n",
 91 |     "    print(c)\n",
 92 |     "    \n",
 93 |     "print(c)\n",
 94 |     "\n",
 95 |     "# with 结束，计算会话自动关闭"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 10,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [
105 |     {
106 |      "name": "stdout",
107 |      "output_type": "stream",
108 |      "text": [
109 |       "[[ 3.95757794]]\n",
110 |       "[[ 3.95757794]]\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "import tensorflow as tf\n",
116 |     "#（2，3，1单元）3层前向神经网络（无激活函数）\n",
117 |     "\n",
118 |     "#生成服从标准差为1的正态分布的随机变量，作为初始化矩阵\n",
119 |     "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n",
120 |     "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n",
121 |     "\n",
122 |     "x=tf.constant([[0.7,0.9]])\n",
123 |     "#矩阵乘法\n",
124 |     "a=tf.matmul(x,w1)\n",
125 |     "b=tf.matmul(a,w2)\n",
126 |     "\n",
127 |     "sess=tf.Session()\n",
128 |     "#需要运行初始化赋值，前面只是定义，没运算\n",
129 |     "sess.run(w1.initializer)\n",
130 |     "sess.run(w2.initializer)\n",
131 |     "y=sess.run(b)\n",
132 |     "\n",
133 |     "sess.close()\n",
134 |     "print(y)\n",
135 |     "\n",
136 |     "with tf.Session() as sess:\n",
137 |     "    w3=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n",
138 |     "    w4=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n",
139 |     "    x1=tf.constant([[0.7,0.9]])\n",
140 |     "    a1=tf.matmul(x,w3)\n",
141 |     "    b1=tf.matmul(a1,w4)\n",
142 |     "    #可以直接嵌入初始化\n",
143 |     "    sess.run(tf.global_variables_initializer())\n",
144 |     "    print(sess.run(b1))\n"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 11,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [
154 |     {
155 |      "name": "stdout",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "[[ 3.95757794]]\n",
159 |       "[[ 3.95757794]\n",
160 |       " [ 1.657197  ]\n",
161 |       " [ 7.20209646]]\n"
162 |      ]
163 |     },
164 |     {
165 |      "data": {
166 |       "text/plain": [
167 |        "<bound method BaseSession.close of <tensorflow.python.client.session.Session object at 0x000001F5A9F53F60>>"
168 |       ]
169 |      },
170 |      "execution_count": 11,
171 |      "metadata": {},
172 |      "output_type": "execute_result"
173 |     }
174 |    ],
175 |    "source": [
176 |     "import tensorflow as tf\n",
177 |     "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n",
178 |     "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n",
179 |     "\n",
180 |     "#因为需要重复输入x，而每建一个x就会生成一个结点，计算图的效率会低。所以使用占位符\n",
181 |     "x=tf.placeholder(tf.float32,shape=(1,2))\n",
182 |     "x1=tf.placeholder(tf.float32,shape=(3,2))\n",
183 |     "a=tf.matmul(x,w1)\n",
184 |     "a1=tf.matmul(x1,w1)\n",
185 |     "y=tf.matmul(a,w2)\n",
186 |     "y1=tf.matmul(a1,w2)\n",
187 |     "\n",
188 |     "sess=tf.Session()\n",
189 |     "sess.run(tf.global_variables_initializer())\n",
190 |     "#运行y时将占位符填上，feed_dict为字典，变量名不可变\n",
191 |     "y_hat=sess.run(y,feed_dict={x:[[0.7,0.9]]})\n",
192 |     "y_hat1=sess.run(y1,feed_dict={x1:[[0.7,0.9],[0.2,0.5],[1,2]]})# batch = 3\n",
193 |     "print(y_hat)\n",
194 |     "print(y_hat1)\n",
195 |     "sess.close\n"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 2,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [
205 |     {
206 |      "name": "stdout",
207 |      "output_type": "stream",
208 |      "text": [
209 |       "[[-0.81131822  1.48459876  0.06532937]\n",
210 |       " [-2.4427042   0.0992484   0.59122431]]\n",
211 |       "[[-0.81131822]\n",
212 |       " [ 1.48459876]\n",
213 |       " [ 0.06532937]]\n",
214 |       "在迭代 0 次后，训练损失为 0.047106\n",
215 |       "在迭代 1000 次后，训练损失为 0.0114981\n",
216 |       "在迭代 2000 次后，训练损失为 0.00481489\n",
217 |       "在迭代 3000 次后，训练损失为 0.00285046\n",
218 |       "在迭代 4000 次后，训练损失为 0.00207789\n",
219 |       "在迭代 5000 次后，训练损失为 0.00158399\n",
220 |       "在迭代 6000 次后，训练损失为 0.0011365\n",
221 |       "在迭代 7000 次后，训练损失为 0.000963961\n",
222 |       "在迭代 8000 次后，训练损失为 0.000769849\n",
223 |       "在迭代 9000 次后，训练损失为 0.000558014\n",
224 |       "在迭代 10000 次后，训练损失为 0.000354686\n"
225 |      ]
226 |     }
227 |    ],
228 |    "source": [
229 |     "import tensorflow as tf\n",
230 |     "from numpy.random import RandomState\n",
231 |     "\n",
232 |     "batch_size=10\n",
233 |     "w1=tf.Variable(tf.random_normal([2,3],stddev=1,seed=1))\n",
234 |     "w2=tf.Variable(tf.random_normal([3,1],stddev=1,seed=1))\n",
235 |     "\n",
236 |     "# None 可以根据batch 大小确定维度，在shape的一个维度上使用None，方便不大的batch\n",
237 |     "x=tf.placeholder(tf.float32,shape=(None,2))\n",
238 |     "y=tf.placeholder(tf.float32,shape=(None,1))\n",
239 |     "\n",
240 |     "a=tf.matmul(x,w1)\n",
241 |     "yhat=tf.matmul(a,w2)\n",
242 |     "\n",
243 |     "#定义交叉熵为损失函数，训练过程使用Adam算法最小化交叉熵\n",
244 |     "cross_entropy=-tf.reduce_mean(y*tf.log(tf.clip_by_value(yhat,1e-10,1.0)))\n",
245 |     "train_step=tf.train.AdamOptimizer(0.001).minimize(cross_entropy)\n",
246 |     "\n",
247 |     "rdm=RandomState(1)\n",
248 |     "data_size=516\n",
249 |     "\n",
250 |     "#生成两个特征，共data_size个样本\n",
251 |     "X=rdm.rand(data_size,2)\n",
252 |     "#定义规则给出样本标签，所有x1+x2<1的样本认为是正样本，其他为负样本。Y，1为正样本\n",
253 |     "Y = [[int(x1+x2 < 1)] for (x1, x2) in X]\n",
254 |     "\n",
255 |     "with tf.Session() as sess:\n",
256 |     "    sess.run(tf.global_variables_initializer())\n",
257 |     "    print(sess.run(w1))\n",
258 |     "    print(sess.run(w2))\n",
259 |     "    steps=11000\n",
260 |     "    for i in range(steps):\n",
261 |     "        \n",
262 |     "        #选定每一个批量读取的首尾位置，确保在1个epoch内采样训练\n",
263 |     "        start = i * batch_size % data_size\n",
264 |     "        end = min(start + batch_size,data_size)\n",
265 |     "        sess.run(train_step,feed_dict={x:X[start:end],y:Y[start:end]})\n",
266 |     "        if i % 1000 == 0:\n",
267 |     "            training_loss= sess.run(cross_entropy,feed_dict={x:X,y:Y})\n",
268 |     "            print(\"在迭代 %d 次后，训练损失为 %g\"%(i,training_loss))"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "markdown",
273 |    "metadata": {
274 |     "collapsed": true
275 |    },
276 |    "source": [
277 |     "#### 激活函数和偏置项：\n",
278 |     "a=tf.nn.relu(tf.matmul(x,w1)+biases1)\n",
279 |     "\n",
280 |     "yhat=tf.nn.relu(tf.matmul(a,w2)+biases2)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {
286 |     "collapsed": true
287 |    },
288 |    "source": [
289 |     "#### 交叉熵函数\n",
290 |     "cross_entropy=-tf.reduce_mean(y*tf.log(tf.clip_by_value(yhat,1e-10,1.0)))\n",
291 |     "\n",
292 |     "tf.reduce_mean(x)表示计算全局平均值。tf.clip_by_value()函数可以将张量中的数值限制在一个范围内。tf.log()对张量内的所有元素依次求对数。交叉熵函数一般会与softmax回归一起使用，TensorFlow将它们进行了统一封装：cross_entropy=tf.nn.softmax_cross_entropy_with_logits(yhat,y)\n",
293 |     "\n"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "markdown",
298 |    "metadata": {},
299 |    "source": [
300 |     "#### 均方误差损失函数\n",
301 |     "mse=tf.reduce_mean(tf.square(y-yhat))\n",
302 |     "\n",
303 |     "tf.select(tf.greater(y,yhat),y-yhat,yhat-y)\n",
304 |     "\n",
305 |     "tf.greater()的输入是两个张量，比较两个张量中的每一个元素，并返回比较结果（true或false的向量）。tf.select()有三个参数，第一个参数条件为真时选择第二个参数中的值，否则选择第三个参数的值。"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": 10,
311 |    "metadata": {
312 |     "collapsed": true
313 |    },
314 |    "outputs": [],
315 |    "source": [
316 |     "#decayed_learning_rate=learning_rate*decay_rate^(global_steps/decay_steps)，指数衰减函数的定义\n",
317 |     "\n",
318 |     "global_step=tf.Variable(0)\n",
319 |     "#使用exponential_decay生成学习速率，因为staircase=tire，每100次迭代，学习率×0.96\n",
320 |     "learning_rate=tf.train.exponential_decay(0.1,global_step,100,0.96,staircase=True)\n",
321 |     "#在minimize中导入global_step将自动更新\n",
322 |     "#learning_step=tf.train.GtadientDescentOptimizer(learning_rate).minimize(loss_function,global_step=global_step)\n"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {
328 |     "collapsed": true
329 |    },
330 |    "source": [
331 |     "#### 带L2正则化的损失函数\n",
332 |     "w=tf.Variable(tf.random_normal([2,1],stddev=1,seed=1))\n",
333 |     "\n",
334 |     "yhat=tf.matmul(x,w)\n",
335 |     "\n",
336 |     "loss=tf.reduce_mean(tf.square(y-yhat))+tf.contrib.layers.l2_regularizer(lambda)(w)"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": 1,
342 |    "metadata": {
343 |     "collapsed": false
344 |    },
345 |    "outputs": [
346 |     {
347 |      "name": "stdout",
348 |      "output_type": "stream",
349 |      "text": [
350 |       "6.5\n",
351 |       "12.75\n"
352 |      ]
353 |     }
354 |    ],
355 |    "source": [
356 |     "import tensorflow as tf\n",
357 |     "w=tf.constant([[1.0,-5.0],[-3.0,4.0]])\n",
358 |     "with tf.Session() as sess:\n",
359 |     "    #L1正则化：(1+5+3+4)×0.5\n",
360 |     "    print(sess.run(tf.contrib.layers.l1_regularizer(0.5)(w)))\n",
361 |     "    #L2正则化：（1+25+9+16）/2×0.5，L2正则化会处以2，无偏估计？\n",
362 |     "    print(sess.run(tf.contrib.layers.l2_regularizer(0.5)(w)))"
363 |    ]
364 |   },
365 |   {
366 |    "cell_type": "markdown",
367 |    "metadata": {},
368 |    "source": [
369 |     "神经网络结构复杂后，定义网络结构的部分和计算损失函数的部分可能不在同一个函数中。所以采用collection在一个计算图中保留一组实体（如张量）。"
370 |    ]
371 |   },
372 |   {
373 |    "cell_type": "code",
374 |    "execution_count": 2,
375 |    "metadata": {
376 |     "collapsed": true
377 |    },
378 |    "outputs": [],
379 |    "source": [
380 |     "import tensorflow as tf\n",
381 |     "\n",
382 |     "#通过集合（collection）计算一个5层神经网络带L2正则化的损失函数\n",
383 |     "\n",
384 |     "#随机正态初始化一层神经网络的权重，并将权重的L2正则化损失加入名为losses的集合中，返回初始化的权重\n",
385 |     "def get_weight(shape,lambd):\n",
386 |     "    var=tf.Variable(tf.random_normal(shape),dtype=tf.float32)\n",
387 |     "    \n",
388 |     "    #tf.add_to_collection函数将新生成变量的L2正则化损失项加入集合，第一个参数为集合名，第二个参数为加入集合的内容。\n",
389 |     "    tf.add_to_collection('losses',tf.contrib.layers.l2_regularizer(lambd)(var))\n",
390 |     "    return var\n",
391 |     "\n",
392 |     "x=tf.placeholder(tf.float32,shape=(None,2))\n",
393 |     "y=tf.placeholder(tf.float32,shape=(None,1))\n",
394 |     "batch_size=8\n",
395 |     "\n",
396 |     "#定义每一层中结点个数和层数\n",
397 |     "layer_dimension=[2,10,10,10,1]\n",
398 |     "n_layers=len(layer_dimension)\n",
399 |     "\n",
400 |     "#该变量维护前向传播时最深层的结点，最开始为输入层\n",
401 |     "cur_layer=x\n",
402 |     "\n",
403 |     "#输入层结点个数\n",
404 |     "in_dimension=layer_dimension[0]\n",
405 |     "\n",
406 |     "#通过循环生成5层全连接神经网络\n",
407 |     "for i in range(1,n_layers):\n",
408 |     "    \n",
409 |     "    #下一层节点数\n",
410 |     "    out_dimension=layer_dimension[i]\n",
411 |     "    \n",
412 |     "    #生成当前层中权重的变量，并将这个变量的L2正则化损失加入计算图上的集合\n",
413 |     "    #[in_dimension,out_dimension],例第一层到第二层之间的权重维度为2×10\n",
414 |     "    weight=get_weight([in_dimension,out_dimension],0.001)\n",
415 |     "    \n",
416 |     "    #偏置项和后一层维度相等，为什么是wx+0.1，而不是wx+b？？\n",
417 |     "    bias=tf.Variable(tf.constant(0.1,shape=[out_dimension]))\n",
418 |     "    \n",
419 |     "    #使用ReLU激活函数，cur_layer储存传播一层后的激活情况，后一层激活函数的输出\n",
420 |     "    cur_layer=tf.nn.relu(tf.matmul(cur_layer,weight)+bias)\n",
421 |     "    in_dimension=layer_dimension[i]\n",
422 |     "    \n",
423 |     "mse_loss=tf.reduce_mean(tf.square(y-cur_layer))\n",
424 |     "\n",
425 |     "#将均方误差函数加入损失集合\n",
426 |     "tf.add_to_collection('losses',mse_loss)\n",
427 |     "\n",
428 |     "#get_collection返回一个列表，这个列表是集合中的所有元素，这些元素就是组成损失函数的误差和正则项，相加得最终损失函数\n",
429 |     "loss=tf.add_n(tf.get_collection('losses'))\n",
430 |     "\n"
431 |    ]
432 |   },
433 |   {
434 |    "cell_type": "code",
435 |    "execution_count": 4,
436 |    "metadata": {
437 |     "collapsed": false
438 |    },
439 |    "outputs": [
440 |     {
441 |      "name": "stdout",
442 |      "output_type": "stream",
443 |      "text": [
444 |       "0.0\n",
445 |       "10.0\n"
446 |      ]
447 |     }
448 |    ],
449 |    "source": [
450 |     "import tensorflow as tf\n",
451 |     "\n",
452 |     "#tf.assign(A, new_number),这个函数的功能主要是把A的值变为new_number\n",
453 |     "A=tf.Variable(tf.constant(0.0),dtype=tf.float32)\n",
454 |     "with tf.Session() as sess:  \n",
455 |     "    sess.run(tf.global_variables_initializer())  \n",
456 |     "    print (sess.run(A))  \n",
457 |     "    sess.run(tf.assign(A, 10))  \n",
458 |     "    print (sess.run(A))  "
459 |    ]
460 |   },
461 |   {
462 |    "cell_type": "code",
463 |    "execution_count": 2,
464 |    "metadata": {
465 |     "collapsed": false
466 |    },
467 |    "outputs": [
468 |     {
469 |      "name": "stdout",
470 |      "output_type": "stream",
471 |      "text": [
472 |       "[0.0, 0.0]\n",
473 |       "[5.0, 4.5]\n",
474 |       "[10.0, 4.5549998]\n",
475 |       "[10.0, 4.6094499]\n"
476 |      ]
477 |     }
478 |    ],
479 |    "source": [
480 |     "import tensorflow as tf\n",
481 |     "\n",
482 |     "#滑动平均模型\n",
483 |     "\n",
484 |     "#定义一个变量计算滑动平均，初始值为0，所有需要计算滑动平均的变量必须是实数型\n",
485 |     "v1=tf.Variable(0,dtype=tf.float32)\n",
486 |     "\n",
487 |     "#step变量模拟神经网络中的迭代次数，用于动态控制衰减率\n",
488 |     "step=tf.Variable(0,trainable=False)\n",
489 |     "\n",
490 |     "#定义一个滑动平均的类，初始化时给定了衰减率和控制衰减率的变量step\n",
491 |     "ema=tf.train.ExponentialMovingAverage(0.99,step)\n",
492 |     "\n",
493 |     "#定义一个更新变量的滑动平均操作，给定一个列表，每次执行操作时更新列表所有变量\n",
494 |     "maintain_averages_op=ema.apply([v1])\n",
495 |     "\n",
496 |     "with tf.Session() as sess:\n",
497 |     "    \n",
498 |     "    #初始化所有变量\n",
499 |     "    sess.run(tf.global_variables_initializer())\n",
500 |     "    \n",
501 |     "    #通过ema.average(v1)获取滑动平均之后变量的取值。初始化后，v1的值和滑动平均都为0\n",
502 |     "    print(sess.run([v1,ema.average(v1)]))\n",
503 |     "    \n",
504 |     "    #更新变量v1的值为5,tf.assign将数值分配给变量\n",
505 |     "    sess.run(tf.assign(v1,5))\n",
506 |     "    \n",
507 |     "    #更新v1的滑动平均值。衰减率为min{0.99,(1+step)/(10+step)≈0.1}=0.1，所以v1的滑动平均值会更新为 0.1×0+0.9×5=4.5\n",
508 |     "    sess.run(maintain_averages_op)\n",
509 |     "    print(sess.run([v1,ema.average(v1)]))\n",
510 |     "    \n",
511 |     "    #将迭代设置为10000步\n",
512 |     "    sess.run(tf.assign(step,10000))\n",
513 |     "    \n",
514 |     "    #更新v1的值为10\n",
515 |     "    sess.run(tf.assign(v1,10))\n",
516 |     "    \n",
517 |     "    #更新v1的滑动平均值。衰减率为min{0.99,(1+step)/(10+step)≈0.999}=0.99，所以v1的滑动平均会被更新为0.99×4.5+0.01×10=4.555\n",
518 |     "    sess.run(maintain_averages_op)\n",
519 |     "    print(sess.run([v1,ema.average(v1)]))\n",
520 |     "    \n",
521 |     "    #再次更新滑动平均值，得到新的滑动平均值为0.99×4.555+0.01×10=4.60945\n",
522 |     "    sess.run(maintain_averages_op)\n",
523 |     "    print(sess.run([v1,ema.average(v1)]))\n",
524 |     "\n"
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "markdown",
529 |    "metadata": {
530 |     "collapsed": true
531 |    },
532 |    "source": [
533 |     "### MNIST 手写字体识别"
534 |    ]
535 |   },
536 |   {
537 |    "cell_type": "code",
538 |    "execution_count": 3,
539 |    "metadata": {
540 |     "collapsed": false
541 |    },
542 |    "outputs": [
543 |     {
544 |      "name": "stdout",
545 |      "output_type": "stream",
546 |      "text": [
547 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
548 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
549 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
550 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n",
551 |       "Training data size:  55000\n",
552 |       "Validating data size:  5000\n",
553 |       "Testing data size:  10000\n"
554 |      ]
555 |     }
556 |    ],
557 |    "source": [
558 |     "#导入数据\n",
559 |     "import tensorflow as tf\n",
560 |     "\n",
561 |     "#原网站提供了6W张训练图片和1W张测试图片，导入的该工具会从训练图片分出5000张作为验证集\n",
562 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
563 |     "\n",
564 |     "#读取路径为当前路径下的data文件夹下的MNIST文件夹内，如果该文件夹没有，则自动下载数据至该文件夹\n",
565 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
566 |     "\n",
567 |     "print(\"Training data size: \", mnist.train.num_examples) \n",
568 |     "print (\"Validating data size: \", mnist.validation.num_examples) \n",
569 |     "print (\"Testing data size: \", mnist.test.num_examples) "
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "code",
574 |    "execution_count": 4,
575 |    "metadata": {
576 |     "collapsed": false
577 |    },
578 |    "outputs": [
579 |     {
580 |      "name": "stdout",
581 |      "output_type": "stream",
582 |      "text": [
583 |       "X shapr: (100, 784)\n",
584 |       "Y shape: (100, 10)\n"
585 |      ]
586 |     }
587 |    ],
588 |    "source": [
589 |     "#为了方便使用SGD，mnist.train.next_batch函数可以从所有训练数据中取一个小批量投入训练\n",
590 |     "\n",
591 |     "batch_size=100\n",
592 |     "\n",
593 |     "#从训练集选取batch_size个训练数据\n",
594 |     "xs,ys=mnist.train.next_batch(batch_size)\n",
595 |     "\n",
596 |     "#将图片展开成一个长度为28×28=784的一维数组，一张图片可作为一个特征向量。所以batch为100的矩阵维度为100×784\n",
597 |     "print('X shapr:',xs.shape)\n",
598 |     "print('Y shape:',ys.shape)"
599 |    ]
600 |   },
601 |   {
602 |    "cell_type": "code",
603 |    "execution_count": 21,
604 |    "metadata": {
605 |     "collapsed": false
606 |    },
607 |    "outputs": [
608 |     {
609 |      "name": "stdout",
610 |      "output_type": "stream",
611 |      "text": [
612 |       "Extracting ./data/MNIST/train-images-idx3-ubyte.gz\n",
613 |       "Extracting ./data/MNIST/train-labels-idx1-ubyte.gz\n",
614 |       "Extracting ./data/MNIST/t10k-images-idx3-ubyte.gz\n",
615 |       "Extracting ./data/MNIST/t10k-labels-idx1-ubyte.gz\n"
616 |      ]
617 |     }
618 |    ],
619 |    "source": [
620 |     "import tensorflow as tf\n",
621 |     "from tensorflow.examples.tutorials.mnist import input_data\n",
622 |     "mnist = input_data.read_data_sets(\"./data/MNIST/\", one_hot=True)\n",
623 |     "\n",
624 |     "\n",
625 |     "#输入结点数为像素点数，输出结点数为类别数\n",
626 |     "INPUT_NODE=784\n",
627 |     "OUTPUT_NODE=10\n",
628 |     "\n",
629 |     "#一个隐藏层\n",
630 |     "LAYER1_NODE=500\n",
631 |     "\n",
632 |     "#一个批量中的样本量，数据量越小训练过程越接近SGD，数据量越大训练过程越接近梯度下降\n",
633 |     "BATCH_SIZE=100\n",
634 |     "\n",
635 |     "#学习率和学习衰减率\n",
636 |     "LEARNING_RATE_BASE=0.8\n",
637 |     "LEARNING_RATE_DECAY=0.99\n",
638 |     "\n",
639 |     "#正则化系数、迭代次数和滑动平均衰减率\n",
640 |     "REGULARIZATION_RATE=0.0001\n",
641 |     "TRAINING_STEPS=3000\n",
642 |     "MOVING_AVERAGE_DECAY=0.99\n",
643 |     "\n",
644 |     "#定义推断函数，给定所有参数下计算神经网络的前向传播结果。参数avg_class可确定推断中使不使用滑动平均模型\n",
645 |     "def inference(input_tensor,avg_class,weights1,biases1,weights2,biases2):\n",
646 |     "    \n",
647 |     "    #没有提供滑动平均类时，直接使用参数当前的取值\n",
648 |     "    if avg_class == None:\n",
649 |     "        \n",
650 |     "        #计算隐藏层前向传播结果，使用ReLU激活函数\n",
651 |     "        layer1=tf.nn.relu(tf.matmul(input_tensor,weights1)+biases1)\n",
652 |     "        \n",
653 |     "        #计算输出层的前向传播结果\n",
654 |     "        return tf.matmul(layer1,weights2)+biases2\n",
655 |     "    else:\n",
656 |     "        \n",
657 |     "        #首先使用avg_class.averaage函数计算变量的滑动均值，然后计算相应的前向传播结果\n",
658 |     "        layer1=tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weights1))+avg_class.average(biases1))\n",
659 |     "        return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)\n",
660 |     "    \n",
661 |     "#模型训练函数\n",
662 |     "\n",
663 |     "def train(mnist):\n",
664 |     "    x=tf.placeholder(tf.float32,[None,INPUT_NODE],name='x-input')\n",
665 |     "    y=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y-input')\n",
666 |     "    \n",
667 |     "    #生成隐藏层参数\n",
668 |     "    weights1=tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))\n",
669 |     "    biases1=tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))\n",
670 |     "    \n",
671 |     "    #生成输出层参数\n",
672 |     "    weights2=tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))\n",
673 |     "    biases2=tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))\n",
674 |     "    \n",
675 |     "    #计算当前参数下前向传播的结果，这里设为‘None’不会计算滑动平均值\n",
676 |     "    y_hat=inference(x,None,weights1,biases1,weights2,biases2)\n",
677 |     "\n",
678 |     "    #定义储存迭代数的变量，这个变量不需要计算滑动平均值，所以这里指定的这个变量为不饿训练变量（trainable=False）\n",
679 |     "    global_step=tf.Variable(0,trainable=False)\n",
680 |     "    \n",
681 |     "    #给定滑动平均衰减率和迭代数，初始化滑动平均类。\n",
682 |     "    variable_averages=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)\n",
683 |     "    \n",
684 |     "    #在所有代表神经网络参数的变量上使用滑动平均，其他超参数不需要。tf.trainable_variables返回的就是图上的集合GraphKeys.TRAINABLE_VARIABLES中的元素。\n",
685 |     "    variables_averages_op=variable_averages.apply(tf.trainable_variables())\n",
686 |     "    \n",
687 |     "    #计算使用滑动平均后的前向传播结果，滑动平均不会改变变量本身，而是使用影子变量记录滑动平均值，需要使用滑动平均再明确调用average函数\n",
688 |     "    average_y_hat=inference(x,variable_averages,weights1,biases1,weights2,biases2)\n",
689 |     "    \n",
690 |     "    #~使用tf.argmax函数得到正确答案对应的类别编号\n",
691 |     "    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y, 1))\n",
692 |     "    \n",
693 |     "    #计算当前批量中所有样本的交叉熵均值\n",
694 |     "    cross_entropy_mean=tf.reduce_mean(cross_entropy)\n",
695 |     "    \n",
696 |     "    #计算L2正则化损失函数\n",
697 |     "    regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)\n",
698 |     "    \n",
699 |     "    #计算模型的正则化损失，只计算神经网络权重的正则化损失，不使用偏置项\n",
700 |     "    regularization=regularizer(weights1)+regularizer(weights2)\n",
701 |     "    \n",
702 |     "    #总损失函数\n",
703 |     "    loss=cross_entropy_mean+regularization\n",
704 |     "    \n",
705 |     "    #设置指数衰减学习率.基础学习率、当前迭代次数、一个epoch所需要的迭代次数、学习衰减率\n",
706 |     "    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,global_step,mnist.train.num_examples/BATCH_SIZE,LEARNING_RATE_DECAY)\n",
707 |     "    \n",
708 |     "    #使用梯度下降优化算法优化损失函数，损失函数包括交叉熵损失和L2正则化损失\n",
709 |     "    train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step)\n",
710 |     "    \n",
711 |     "    #在训练神经网络模型时，每过一遍数据既需要通过反向传播来更新参数，也要更新每个参数的滑动平均值。为了一次完成多个操作\n",
712 |     "    #train_op=tf.group(train_step,variables_averages_op)\n",
713 |     "    with tf.control_dependencies([train_step,variables_averages_op]):\n",
714 |     "        train_op=tf.no_op(name='train')\n",
715 |     "        \n",
716 |     "    correct_prediction=tf.equal(tf.argmax(average_y_hat,1),tf.argmax(y,1))\n",
717 |     "    accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))\n",
718 |     "\n",
719 |     "    #初始化会话并开始训练过程\n",
720 |     "    with tf.Session() as sess:\n",
721 |     "        sess.run(tf.global_variables_initializer())\n",
722 |     "        validate_feed={x:mnist.validation.images,y:mnist.validation.labels}\n",
723 |     "    \n",
724 |     "        test_feed={x:mnist.test.images,y:mnist.test.labels}\n",
725 |     "    \n",
726 |     "        # 循环的训练神经网络。\n",
727 |     "        for i in range(TRAINING_STEPS):\n",
728 |     "            if i % 1000 == 0:\n",
729 |     "                validate_acc = sess.run(accuracy, feed_dict=validate_feed)\n",
730 |     "                print(\"After %d training step(s), validation accuracy using average model is %g \" % (i, validate_acc))\n",
731 |     "            \n",
732 |     "            xs,ys=mnist.train.next_batch(BATCH_SIZE)\n",
733 |     "            sess.run(train_op,feed_dict={x:xs,y:ys})\n",
734 |     "\n",
735 |     "        test_acc=sess.run(accuracy,feed_dict=test_feed)\n",
736 |     "        print((\"After %d training step(s), test accuracy using average model is %g\" %(TRAINING_STEPS, test_acc)))\n",
737 |     "\n",
738 |     "    \n",
739 |     "    "
740 |    ]
741 |   },
742 |   {
743 |    "cell_type": "code",
744 |    "execution_count": null,
745 |    "metadata": {
746 |     "collapsed": true
747 |    },
748 |    "outputs": [],
749 |    "source": [
750 |     "avg_class = None\n",
751 |     "train(mnist)"
752 |    ]
753 |   }
754 |  ],
755 |  "metadata": {
756 |   "kernelspec": {
757 |    "display_name": "Python 3",
758 |    "language": "python",
759 |    "name": "python3"
760 |   },
761 |   "language_info": {
762 |    "codemirror_mode": {
763 |     "name": "ipython",
764 |     "version": 3
765 |    },
766 |    "file_extension": ".py",
767 |    "mimetype": "text/x-python",
768 |    "name": "python",
769 |    "nbconvert_exporter": "python",
770 |    "pygments_lexer": "ipython3",
771 |    "version": "3.5.3"
772 |   }
773 |  },
774 |  "nbformat": 4,
775 |  "nbformat_minor": 2
776 | }
777 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ML-Tutorial-Experiment
 2 | Coding the Machine Learning Tutorial for Learning to Learn 
 3 | 
 4 | - 第一期：[从零开始用TensorFlow搭建卷积神经网络](https://www.jiqizhixin.com/articles/2017-08-29-14)--&--[文章代码](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_CNN_Tutorial.ipynb)
 5 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_CNN_Tutorial.ipynb)
 6 |   -  补充资料：[基础代码解析](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_trial_1.ipynb)
 7 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_trial_1.ipynb)
 8 |   -  补充资料：[Keras构建CNN](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_Keras_CNN.ipynb)
 9 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_Keras_CNN.ipynb)
10 |   -  补充资料：[TensorFlow构建LeNet-5](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_LeNet5.ipynb)
11 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_LeNet5.ipynb)
12 |   -  补充资料：[从DensNet到CliqueNet，探索卷积神经网络架构](https://www.jiqizhixin.com/articles/2018-05-23-6)
13 | - 第二期：[GAN完整理论推导与实现](https://www.jiqizhixin.com/articles/2017-10-1-1)--&--[文章代码](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/Keras_GAN.ipynb)
14 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2FKeras_GAN.ipynb)
15 |   -  补充资料：[原版GAN的TensorFlow实现](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_GAN.ipynb)
16 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_GAN.ipynb)
17 | - 第三期：[CapsNet结构解析与实现](https://www.jiqizhixin.com/articles/2017-11-05)--&--[文章代码](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/tf_orginal_CapsNet.ipynb)
18 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2Ftf_orginal_CapsNet.ipynb)
19 |   -  补充资料：[解读官方实现的核心代码](https://www.jiqizhixin.com/articles/capsule-implement-sara-sabour-Feb02)
20 | - 第四期：[RNN与CNN的序列建模](https://www.jiqizhixin.com/articles/2018-04-12-3)--&--[LSTM语言建模](https://github.com/jiqizhixin/ML-Tutorial-Experiment/blob/master/Experiments/LSTM_PTB.ipynb)--&--[TCN官方实现](https://github.com/locuslab/TCN)--&--[TCN语言建模（Colaboratory）](https://colab.research.google.com/drive/1GAXC0j9qzLyQu8G9_P_eHi-TtYm7uhXF)
21 |  [<img align="right" height="22" src="https://beta.deepnote.org/buttons/launch-in-deepnote.svg">](https://beta.deepnote.org/launch?template=data-science&url=https%3A%2F%2Fgithub.com%2Fjiqizhixin%2FML-Tutorial-Experiment%2Fblob%2Fmaster%2FExperiments%2FLSTM_PTB.ipynb)
22 | - 第五期：[基于Transformer的神经机器翻译](https://www.jiqizhixin.com/articles/Synced-github-implement-project-machine-translation-by-transformer)--&--[Colaboratory实现](https://colab.research.google.com/drive/1Wt9Jwynnki6lipwUcy0Sz5WKG7MYSGs0)
23 | 
24 | #
25 | ------
26 | 为了扩展优秀模型与实现，机器之心将梳理历史优质文章，同时也欢迎各位开发者与研究者提供优质的文章。我们将尝试确定添加的文章都是可复现，且基本无理解性错误的文章，并按以下模型归类。若读者发现这些文章有错误或理解误差，可以在 GitHub 上提 issue，确定后我们将修改文章。
27 | 
28 | * 数学与编程基础
29 |     * 线性代数
30 |         * [教程 | 基础入门：深度学习矩阵运算的概念和代码实现](https://www.jiqizhixin.com/articles/2017-08-07-2)
31 |     * 概率与信息论
32 |         * [从概率论到多分类问题：综述贝叶斯统计分类](https://www.jiqizhixin.com/articles/2017-09-28)
33 |     * 数值计算
34 |     * Python基础
35 |         * [从变量到封装：一文带你为机器学习打下坚实的Python基础](https://www.jiqizhixin.com/articles/2017-10-13)
36 |         * [一文带你了解 Python 集合与基本的集合运算](https://www.jiqizhixin.com/articles/062403)
37 |     * NumPy基础
38 |         * [搭建模型第一步：你需要预习的 NumPy 基础都在这了](https://www.jiqizhixin.com/articles/070101)
39 |         * [从数组到矩阵的迹，NumPy常见使用大总结](https://www.jiqizhixin.com/articles/2017-10-28)
40 |         * [数据科学初学者必知的NumPy基础知识](https://www.jiqizhixin.com/articles/2018-04-21-7)
41 | * 一般机器学习
42 |     * 入门模型
43 |         * 线性回归
44 |             * [初学TensorFlow机器学习：如何实现线性回归？](https://www.jiqizhixin.com/articles/2017-05-14-2)
45 |             * [Python环境下的8种简单线性回归算法](https://www.jiqizhixin.com/articles/2018-01-01)
46 |             * [极简Python带你探索分类与回归的奥秘](https://www.jiqizhixin.com/articles/03132)
47 |         * Logistic 回归
48 |             * [从原理到应用：简述Logistics回归算法](https://www.jiqizhixin.com/articles/2018-05-13-3)
49 |             * [从头开始：用Python实现带随机梯度下降的Logistic回归](https://www.jiqizhixin.com/articles/2017-02-17-5)
50 |     * 朴素贝叶斯
51 |         * [实践中最广泛应用的分类模型：朴素贝叶斯算法](https://www.jiqizhixin.com/articles/033088)
52 |     * 决策树
53 |     * 支持向量机
54 |     * 聚类方法
55 |         * K均值聚类
56 |         * 层次聚类
57 |     * 降维算法
58 |         * PCA
59 |         * 自编码器
60 |         * t-SNE
61 |     * 集成方法
62 |         * Staking
63 |         * Bagging
64 |             * 随机森林
65 |         * Boosting
66 |             * AdaBoost
67 |             * 提升树
68 |             * 梯度提升树
69 |     * 概率图模型
70 |         * 隐马尔科夫模型
71 |         * 隐马尔可夫随机场
72 |         * 条件随机场
73 |     * 半监督学习
74 |         * Entropy-based
75 |         * Graph-based
76 | * 深度学习
77 |     * 最优化方法
78 |     * 深度前馈网络
79 |     * 深度卷积网络
80 |     * 深度循环网络
81 |     * 深度生成模型
82 |         * PixelRNN/PixelCNN
83 |         * VAE
84 |         * GAN
85 | 
86 | 


--------------------------------------------------------------------------------