├── .idea
├── NeuralNetworksGetStarted.iml
├── deployment.xml
├── dictionaries
│ └── mtianyan.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── misc.xml
├── modules.xml
├── other.xml
└── vcs.xml
├── 1-numpy_basic
└── 1-numpy_basic.py
├── 2-feedforward_neural_network
├── 2-11 stochasticgradient_descent.py
├── 2-12 neural_network_mnist.py
├── 2-7 forward_propagation.py
├── 2-9 back_propagation.py
└── 2.1-simple_network.py
├── 3-improve_neural_network_efficiency
├── 3-10 cross_entropy.py
├── 3-11 save_load_model.py
├── 3-12 neural_network_mnist_v2.py
├── 3-5 initialization_parameters.py
└── 3-7 L2_regularization.py
├── 4-convolutional_neural_network
├── 4-10 SoftmaxLayer.py
├── 4-11 ConvolutionalNeuralNetwork_mnist(gpu).py
├── 4-2 neural_network_mnist_v3(gpu).py
└── 4-6 ConvPoolLayer.py
├── 5-8 tensorflow_mnist_code(official)
└── examples
│ └── tutorials
│ └── mnist
│ ├── BUILD
│ ├── __init__.py
│ ├── fully_connected_feed.py
│ ├── input_data.py
│ ├── mnist.py
│ ├── mnist_deep.py
│ ├── mnist_softmax.py
│ ├── mnist_softmax_xla.py
│ └── mnist_with_summaries.py
├── 5-tensorflow_and_tensorboard
├── 5-1 TensorFlow_Get_Started.py
├── 5-4 tf_LinearRegression.py
├── 5-6 TensorBoard_Get_Started.py
└── 5-7 save_load_model_v2(tf).py
├── 6-cnn_image_classification(CIFAR-10)
└── 6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)
│ └── tutorials
│ └── image
│ └── cifar10
│ ├── BUILD
│ ├── README.md
│ ├── __init__.py
│ ├── cifar10.py
│ ├── cifar10_eval.py
│ ├── cifar10_input.py
│ ├── cifar10_input_test.py
│ ├── cifar10_multi_gpu_train.py
│ └── cifar10_train.py
├── 7-caffe_and_keras
├── 7-1 caffe-master
│ └── examples
│ │ └── cifar10
│ │ ├── cifar10_full.prototxt
│ │ ├── cifar10_full_sigmoid_solver.prototxt
│ │ ├── cifar10_full_sigmoid_solver_bn.prototxt
│ │ ├── cifar10_full_sigmoid_train_test.prototxt
│ │ ├── cifar10_full_sigmoid_train_test_bn.prototxt
│ │ ├── cifar10_full_solver.prototxt
│ │ ├── cifar10_full_solver_lr1.prototxt
│ │ ├── cifar10_full_solver_lr2.prototxt
│ │ ├── cifar10_full_train_test.prototxt
│ │ ├── cifar10_quick.prototxt
│ │ ├── cifar10_quick_solver.prototxt
│ │ ├── cifar10_quick_solver_lr1.prototxt
│ │ ├── cifar10_quick_train_test.prototxt
│ │ ├── convert_cifar_data.cpp
│ │ ├── create_cifar10.bat
│ │ ├── readme.md
│ │ ├── train_full.sh
│ │ ├── train_full_sigmoid.sh
│ │ ├── train_full_sigmoid_bn.sh
│ │ └── train_quick.sh
└── 7-3 keras-master
│ └── examples
│ ├── README.md
│ ├── addition_rnn.py
│ ├── antirectifier.py
│ ├── babi_memnn.py
│ ├── babi_rnn.py
│ ├── cifar10_cnn.py
│ ├── cifar10_cnn_capsule.py
│ ├── cifar10_cnn_tfaugment2d.py
│ ├── cifar10_resnet.py
│ ├── conv_filter_visualization.py
│ ├── conv_lstm.py
│ ├── deep_dream.py
│ ├── image_ocr.py
│ ├── imdb_bidirectional_lstm.py
│ ├── imdb_cnn.py
│ ├── imdb_cnn_lstm.py
│ ├── imdb_fasttext.py
│ ├── imdb_lstm.py
│ ├── lstm_seq2seq.py
│ ├── lstm_seq2seq_restore.py
│ ├── lstm_stateful.py
│ ├── lstm_text_generation.py
│ ├── mnist_acgan.py
│ ├── mnist_cnn.py
│ ├── mnist_dataset_api.py
│ ├── mnist_denoising_autoencoder.py
│ ├── mnist_hierarchical_rnn.py
│ ├── mnist_irnn.py
│ ├── mnist_mlp.py
│ ├── mnist_net2net.py
│ ├── mnist_siamese.py
│ ├── mnist_sklearn_wrapper.py
│ ├── mnist_swwae.py
│ ├── mnist_tfrecord.py
│ ├── mnist_transfer_cnn.py
│ ├── neural_doodle.py
│ ├── neural_style_transfer.py
│ ├── pretrained_word_embeddings.py
│ ├── reuters_mlp.py
│ ├── reuters_mlp_relu_vs_selu.py
│ ├── saved_models
│ └── keras_cifar10_trained_model.h5
│ ├── variational_autoencoder.py
│ └── variational_autoencoder_deconv.py
├── mnist_data
└── mnist.pkl.gz
└── utils
└── mnist_loader.py
/.idea/NeuralNetworksGetStarted.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/dictionaries/mtianyan.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | mtianyan
5 | randn
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/1-numpy_basic/1-numpy_basic.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/3/19 0019 23:54'
3 |
4 | import numpy as np
5 |
6 | a = np.array([2, 3, 4])
7 | print(a)
8 | # 元素数据类型
9 | print(a.dtype)
10 | # 数组的维度(3,) 一行三列
11 | print(a.shape)
12 | # 数组的维数 一维
13 | print(a.ndim)
14 | # 数组的元素个数
15 | print(a.size)
16 | print("*********************************")
17 |
18 | b = np.array([[1, 2], [3, 4]])
19 | print(b)
20 | # 元素数据类型
21 | print(b.dtype)
22 | # 数组的维度(2,2) 两行两列
23 | print(b.shape)
24 | # 数组的维数 一维
25 | print(b.ndim)
26 | # 数组的元素个数
27 | print(b.size)
28 | print("*********************************")
29 |
30 | c = np.array([[1, 2], [3, 4]], dtype=float)
31 | print(c)
32 | print("*********************************")
33 |
34 | # np.zeros创建零矩阵
35 | d = np.zeros((3, 4))
36 | print(d)
37 | print("*********************************")
38 |
39 | # np.ones创建全1矩阵,每个元素初始化为1.0
40 | e = np.ones((3, 4))
41 | print(e)
42 | print("*********************************")
43 |
44 | # 首先创建一个两行三列的数组
45 | b = np.ones((2, 3))
46 | print(b)
47 | # reshape成三行两列的数组
48 | print(b.reshape(3, 2))
49 | print("*********************************")
50 |
51 | # 如何组合两个数组
52 |
53 | # 1-数乘
54 | a = np.ones((3, 4))
55 | # a中的每一项都乘以2,然后赋值给b
56 | b = a * 2
57 | print(a)
58 | print(b)
59 | print("*********************************")
60 |
61 | # 2-水平合并:
62 | # 注意传入参数为元组,否则传入a,b不报错也没有结果
63 | print(np.hstack((a, b)))
64 | print("*********************************")
65 |
66 | # 3-垂直合并
67 | print(np.vstack((a, b)))
68 |
--------------------------------------------------------------------------------
/2-feedforward_neural_network/2-11 stochasticgradient_descent.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/3/31 0031 16:06'
3 |
4 | import random
5 | import numpy as np
6 |
7 |
8 | class Network(object):
9 | def __init__(self, sizes):
10 | # 网络层数
11 | self.num_layers = len(sizes)
12 | # 网络每层神经元个数
13 | self.sizes = sizes
14 | # 初始化每层的偏置
15 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
16 | # 初始化每层的权重
17 | self.weights = [np.random.randn(y, x)
18 | for x, y in zip(sizes[:-1], sizes[1:])]
19 |
20 | # 随机梯度下降
21 | def SGD(self, training_data, epochs, mini_batch_size, eta):
22 | # 取出训练数据总个数
23 | n = len(training_data)
24 |
25 | # 开始训练 循环每一个epochs
26 | for j in range(epochs):
27 | # 洗牌 打乱训练数据
28 | random.shuffle(training_data)
29 |
30 | # mini_batch
31 | mini_batches = [training_data[k:k + mini_batch_size]
32 | for k in range(0, n, mini_batch_size)]
33 |
34 | # 训练mini_batch
35 | for mini_batch in mini_batches:
36 | self.update_mini_batch(mini_batch, eta)
37 |
38 | print("Epoch {0} complete".format(j))
39 |
40 | # 更新mini_batch
41 | def update_mini_batch(self, mini_batch, eta):
42 | # 保存每层偏倒
43 | nabla_b = [np.zeros(b.shape) for b in self.biases]
44 | nabla_w = [np.zeros(w.shape) for w in self.weights]
45 |
46 | # 训练每一个mini_batch
47 | for x, y in mini_batch:
48 | delta_nable_b, delta_nabla_w = self.update(x, y)
49 |
50 | # 保存一次训练网络中每层的偏倒
51 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)]
52 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
53 |
54 | # 更新权重和偏置 Wn+1 = wn - eta * nw
55 | self.weights = [w - (eta / len(mini_batch)) * nw
56 | for w, nw in zip(self.weights, nabla_w)]
57 | self.biases = [b - (eta / len(mini_batch)) * nb
58 | for b, nb in zip(self.biases, nabla_b)]
59 |
60 | # 前向传播
61 | def update(self, x, y):
62 | # 保存每层偏倒
63 | nabla_b = [np.zeros(b.shape) for b in self.biases]
64 | nabla_w = [np.zeros(w.shape) for w in self.weights]
65 |
66 | activation = x
67 |
68 | # 保存每一层的激励值a=sigmoid(z)
69 | activations = [x]
70 |
71 | # 保存每一层的z=wx+b
72 | zs = []
73 | # 前向传播
74 | for b, w in zip(self.biases, self.weights):
75 | # 计算每层的z
76 | z = np.dot(w, activation) + b
77 |
78 | # 保存每层的z
79 | zs.append(z)
80 |
81 | # 计算每层的a
82 | activation = sigmoid(z)
83 |
84 | # 保存每一层的a
85 | activations.append(activation)
86 |
87 | # 反向更新了
88 | # 计算最后一层的误差
89 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
90 |
91 | # 最后一层权重和偏置的倒数
92 | nabla_b[-1] = delta
93 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
94 |
95 | # 倒数第二层一直到第一层 权重和偏置的倒数
96 | for l in range(2, self.num_layers):
97 | z = zs[-l]
98 |
99 | sp = sigmoid_prime(z)
100 |
101 | # 当前层的误差
102 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
103 |
104 | # 当前层偏置和权重的倒数
105 | nabla_b[-l] = delta
106 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
107 |
108 | return (nabla_b, nabla_w)
109 |
110 | def cost_derivative(self, output_activation, y):
111 | return (output_activation - y)
112 |
113 |
114 | def sigmoid(z):
115 | return 1.0 / (1.0 + np.exp(-z))
116 |
117 |
118 | def sigmoid_prime(z):
119 | return sigmoid(z) * (1 - sigmoid(z))
120 |
--------------------------------------------------------------------------------
/2-feedforward_neural_network/2-12 neural_network_mnist.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/3/31 0031 16:23'
3 |
4 | import random
5 | import numpy as np
6 |
7 |
8 | class Network(object):
9 | def __init__(self, sizes):
10 | # 网络层数
11 | self.num_layers = len(sizes)
12 | # 网络每层神经元个数
13 | self.sizes = sizes
14 | # 初始化每层的偏置
15 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
16 | # 初始化每层的权重
17 | self.weights = [np.random.randn(y, x)
18 | for x, y in zip(sizes[:-1], sizes[1:])]
19 |
20 | def feedforward(self, a):
21 | for b, w in zip(self.biases, self.weights):
22 | a = sigmoid(np.dot(w, a) + b)
23 | return a
24 |
25 | # 随机梯度下降
26 | def SGD(self, training_data, epochs, mini_batch_size, eta,
27 | test_data=None):
28 | if test_data:
29 | n_test = len(test_data)
30 | # 训练数据总个数
31 | n = len(training_data)
32 |
33 | # 开始训练 循环每一个epochs
34 | for j in range(epochs):
35 | # 洗牌 打乱训练数据
36 | random.shuffle(training_data)
37 |
38 | # mini_batch
39 | mini_batches = [training_data[k:k + mini_batch_size]
40 | for k in range(0, n, mini_batch_size)]
41 |
42 | # 训练mini_batch
43 | for mini_batch in mini_batches:
44 | self.update_mini_batch(mini_batch, eta)
45 |
46 | # 测试集上的表现
47 | if test_data:
48 | print("Epoch {0}: {1} / {2}".format(
49 | j, self.evaluate(test_data), n_test))
50 | print("Epoch {0} complete".format(j))
51 |
52 | # 更新mini_batch
53 | def update_mini_batch(self, mini_batch, eta):
54 | # 保存每层偏倒
55 | nabla_b = [np.zeros(b.shape) for b in self.biases]
56 | nabla_w = [np.zeros(w.shape) for w in self.weights]
57 |
58 | # 训练每一个mini_batch
59 | for x, y in mini_batch:
60 | delta_nable_b, delta_nabla_w = self.update(x, y)
61 |
62 | # 保存一次训练网络中每层的偏倒
63 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)]
64 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
65 |
66 | # 更新权重和偏置 Wn+1 = wn - eta * nw
67 | self.weights = [w - (eta / len(mini_batch)) * nw
68 | for w, nw in zip(self.weights, nabla_w)]
69 | self.biases = [b - (eta / len(mini_batch)) * nb
70 | for b, nb in zip(self.biases, nabla_b)]
71 |
72 | # 前向传播
73 | def update(self, x, y):
74 | # 保存每层偏倒
75 | nabla_b = [np.zeros(b.shape) for b in self.biases]
76 | nabla_w = [np.zeros(w.shape) for w in self.weights]
77 |
78 | activation = x
79 |
80 | # 保存每一层的激励值a=sigmoid(z)
81 | activations = [x]
82 |
83 | # 保存每一层的z=wx+b
84 | zs = []
85 | # 前向传播
86 | for b, w in zip(self.biases, self.weights):
87 | # 计算每层的z
88 | z = np.dot(w, activation) + b
89 |
90 | # 保存每层的z
91 | zs.append(z)
92 |
93 | # 计算每层的a
94 | activation = sigmoid(z)
95 |
96 | # 保存每一层的a
97 | activations.append(activation)
98 |
99 | # 反向更新了
100 | # 计算最后一层的误差
101 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
102 |
103 | # 最后一层权重和偏置的倒数
104 | nabla_b[-1] = delta
105 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
106 |
107 | # 倒数第二层一直到第一层 权重和偏置的倒数
108 | for l in range(2, self.num_layers):
109 | z = zs[-l]
110 |
111 | sp = sigmoid_prime(z)
112 |
113 | # 当前层的误差
114 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
115 |
116 | # 当前层偏置和权重的倒数
117 | nabla_b[-l] = delta
118 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
119 |
120 | return (nabla_b, nabla_w)
121 |
122 | def evaluate(self, test_data):
123 | test_results = [(np.argmax(self.feedforward(x)), y)
124 | for (x, y) in test_data]
125 | return sum(int(x == y) for (x, y) in test_results)
126 |
127 | def cost_derivative(self, output_activation, y):
128 | return (output_activation - y)
129 |
130 |
131 | def sigmoid(z):
132 | return 1.0 / (1.0 + np.exp(-z))
133 |
134 |
135 | def sigmoid_prime(z):
136 | return sigmoid(z) * (1 - sigmoid(z))
137 |
138 |
139 | if __name__ == '__main__':
140 | import mnist_loader
141 |
142 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper()
143 |
144 | net = Network([784, 30, 10])
145 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data)
146 |
--------------------------------------------------------------------------------
/2-feedforward_neural_network/2-7 forward_propagation.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 |
4 |
5 | class Network(object):
6 | """神经网络类"""
7 |
8 | def __init__(self, sizes):
9 | """
10 | 初始化构造方法
11 | :param sizes: 列表; 如[3,2,1] 定义输入层有3个神经元,隐藏层2个,输出层1个;这定义总共有多少层,每一层有多少个神经元。
12 | """
13 | # 网络层数: 一共有多少层
14 | self.num_layers = len(sizes)
15 | # 每层神经元的个数
16 | self.sizes = sizes
17 | # 初始化每层的偏置 b
18 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
19 | '''
20 | 上面这行代码的等价写法
21 | self.biases = []
22 | for y in sizes[1:]:
23 | self.biases.append(np.random.randn(y, 1)) # [一个(2,1), 一个(1,1)]
24 |
25 | size[1: ]; sizes=[3,2,1]; 我们只取2,1两个值,第一次循环时y为2,第二次为1, 表示输入到隐藏, 隐藏到输出,一共两种偏置。
26 | random.randn使用标准正态分布来初始化一个数组,,初始化一个y乘以1的数组,即初始化一个(2,1)的和一个(1,1)的,从输入层到隐藏层有两个偏置,隐藏层到输出层有一个偏置
27 | self.biases.append(np.random.randn(y, 1))
28 | '''
29 |
30 | # 初始化每层的权重 w
31 | self.weights = [np.random.randn(y, x)
32 | for x, y in zip(sizes[:-1], sizes[1:])]
33 | '''
34 | 上面这行代码的等价写法
35 | self.weights = []
36 | for x, y in zip(sizes[:-1], sizes[1:]):
37 | self.weights.append(np.random.randn(y, x)) # 输入层到隐藏层的连线总共有6条(2,3); 隐藏层到输出层的连线有2条(1,2)
38 | '''
39 |
40 | def update(self, x, y):
41 | """ 前向传播 过程"""
42 | # 传入输入的训练数据,
43 | activation = x
44 |
45 | # 保存每一层的激励值a=sigmoid(z) z=wx+b
46 | # 第0层(输入层)时输入数据就是它的激励值
47 | activations = [x]
48 |
49 | # zs用于保存每一层的z=wx+b
50 | zs = []
51 |
52 | # 前向传播
53 | # 使用for循环遍历每一层的偏置与权重:同时取第一层的偏置和权重
54 | for b, w in zip(self.biases, self.weights):
55 | # 计算每层的z
56 | # dot是点乘方法: 把两个数组进行点乘,对于二维数组相当于矩阵乘法。
57 | # 一维数组相当于向量的内积
58 | z = np.dot(w, activation) + b
59 |
60 | # 保存每层的z
61 | zs.append(z)
62 |
63 | # 计算每层经过激活函数后的输出
64 | activation = sigmoid(z)
65 |
66 | # 保存每一层的a
67 | activations.append(activation)
68 |
69 |
70 | def sigmoid(z):
71 | return 1.0 / (1.0 + np.exp(-z))
72 |
--------------------------------------------------------------------------------
/2-feedforward_neural_network/2-9 back_propagation.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/3/29 0029 22:20'
3 | import random
4 | import numpy as np
5 |
6 |
7 | class Network(object):
8 | """神经网络类"""
9 |
10 | def __init__(self, sizes):
11 | """
12 | 初始化构造方法
13 | :param sizes: 列表; 如[3,2,1] 定义输入层有3个神经元,隐藏层2个,输出层1个;这定义总共有多少层,每一层有多少个神经元。
14 | """
15 | # 网络层数: 一共有多少层
16 | self.num_layers = len(sizes)
17 | # 每层神经元的个数
18 | self.sizes = sizes
19 | # 初始化每层的偏置 b
20 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
21 | '''
22 | 上面这行代码的等价写法
23 | self.biases = []
24 | for y in sizes[1:]:
25 | self.biases.append(np.random.randn(y, 1)) # [一个(2,1), 一个(1,1)]
26 |
27 | size[1: ]; sizes=[3,2,1]; 我们只取2,1两个值,第一次循环时y为2,第二次为1, 表示输入到隐藏, 隐藏到输出,一共两种偏置。
28 | random.randn使用标准正态分布来初始化一个数组,,初始化一个y乘以1的数组,即初始化一个(2,1)的和一个(1,1)的,从输入层到隐藏层有两个偏置,隐藏层到输出层有一个偏置
29 | self.biases.append(np.random.randn(y, 1))
30 | '''
31 |
32 | # 初始化每层的权重 w
33 | self.weights = [np.random.randn(y, x)
34 | for x, y in zip(sizes[:-1], sizes[1:])]
35 | '''
36 | 上面这行代码的等价写法
37 | self.weights = []
38 | for x, y in zip(sizes[:-1], sizes[1:]):
39 | self.weights.append(np.random.randn(y, x)) # 输入层到隐藏层的连线总共有6条(2,3); 隐藏层到输出层的连线有2条(1,2)
40 | '''
41 |
42 | # 梯度下降
43 | def GD(self, training_data, epochs, eta):
44 | # 开始训练 循环每一个epochs
45 | for j in range(epochs):
46 | # 洗牌 打乱训练数据
47 | random.shuffle(training_data)
48 |
49 | # 反向: 保存每层偏导
50 | # 反向: 取到每一层的偏置值,取到它的形状,以这个形状创建零矩阵
51 | nabla_b = [np.zeros(b.shape) for b in self.biases]
52 | nabla_w = [np.zeros(w.shape) for w in self.weights]
53 |
54 | # 训练每一个数据
55 | for x, y in training_data:
56 | delta_nable_b, delta_nabla_w = self.update(x, y)
57 |
58 | # 保存一次训练网络中每层的偏倒
59 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)]
60 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
61 |
62 | # 更新权重和偏置 Wn+1 = wn - eta * nw
63 | self.weights = [w - (eta) * nw
64 | for w, nw in zip(self.weights, nabla_w)]
65 | self.biases = [b - (eta) * nb
66 | for b, nb in zip(self.biases, nabla_b)]
67 |
68 | print("Epoch {0} complete".format(j))
69 |
70 | # 前向传播
71 | def update(self, x, y):
72 | # 保存每层偏倒
73 | nabla_b = [np.zeros(b.shape) for b in self.biases]
74 | nabla_w = [np.zeros(w.shape) for w in self.weights]
75 |
76 | activation = x
77 |
78 | # 保存每一层的激励值a=sigmoid(z)
79 | activations = [x]
80 |
81 | # 保存每一层的z=wx+b
82 | zs = []
83 | # 前向传播
84 | for b, w in zip(self.biases, self.weights):
85 | # 计算每层的z
86 | z = np.dot(w, activation) + b
87 |
88 | # 保存每层的z
89 | zs.append(z)
90 |
91 | # 计算每层的a
92 | activation = sigmoid(z)
93 |
94 | # 保存每一层的a
95 | activations.append(activation)
96 |
97 | # 反向更新了: 从倒数第一层开始
98 | # 计算最后一层的误差
99 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
100 |
101 | # 最后一层权重和偏置的倒数
102 | # 偏loos/偏b = delta
103 | # 偏loss/偏w = 倒数第二层y 乘以 delta
104 | nabla_b[-1] = delta
105 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
106 |
107 | # 倒数第二层一直到第一层 权重和偏置的倒数
108 | for l in range(2, self.num_layers):
109 | # zs[-2]倒数第二层
110 | z = zs[-l]
111 |
112 | # 计算倒数第二层的偏导
113 | sp = sigmoid_prime(z)
114 |
115 | # 当前层的误差: delta_h公式 上一层的w乘以上一层的误差,点乘于本层计算出来的z
116 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
117 |
118 | # 当前层偏置和权重的倒数
119 | nabla_b[-l] = delta
120 | # 当前层误差乘以前一层y -l-1前一层
121 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
122 |
123 | # 返回当前层的偏置和权重的导数
124 | return (nabla_b, nabla_w)
125 |
126 | @staticmethod
127 | def cost_derivative(output_activation, y):
128 | return output_activation - y
129 |
130 |
131 | def sigmoid(z):
132 | return 1.0 / (1.0 + np.exp(-z))
133 |
134 |
135 | def sigmoid_prime(z):
136 | return sigmoid(z) * (1 - sigmoid(z))
137 |
--------------------------------------------------------------------------------
/2-feedforward_neural_network/2.1-simple_network.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Network(object):
5 | """神经网络类"""
6 |
7 | def __init__(self, sizes):
8 | """
9 | 初始化构造方法
10 | :param sizes: 列表; 如[3,2,1] 定义输入层有3个神经元,隐藏层2个,输出层1个;这定义总共有多少层,每一层有多少个神经元。
11 | """
12 | # 网络层数: 一共有多少层
13 | self.num_layers = len(sizes)
14 | # 每层神经元的个数
15 | self.sizes = sizes
16 | # 初始化每层的偏置 b
17 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
18 | '''
19 | 上面这行代码的等价写法
20 | self.biases = []
21 | for y in sizes[1:]:
22 | self.biases.append(np.random.randn(y, 1)) # [一个(2,1), 一个(1,1)]
23 |
24 | size[1: ]; sizes=[3,2,1]; 我们只取2,1两个值,第一次循环时y为2,第二次为1, 表示输入到隐藏, 隐藏到输出,一共两种偏置。
25 | random.randn使用标准正态分布来初始化一个数组,,初始化一个y乘以1的数组,即初始化一个(2,1)的和一个(1,1)的,从输入层到隐藏层有两个偏置,隐藏层到输出层有一个偏置
26 | self.biases.append(np.random.randn(y, 1))
27 | '''
28 |
29 | # 初始化每层的权重 w
30 | self.weights = [np.random.randn(y, x)
31 | for x, y in zip(sizes[:-1], sizes[1:])]
32 | '''
33 | 上面这行代码的等价写法
34 | self.weights = []
35 | for x, y in zip(sizes[:-1], sizes[1:]):
36 | self.weights.append(np.random.randn(y, x)) # 输入层到隐藏层的连线总共有6条(2,3); 隐藏层到输出层的连线有2条(1,2)
37 | '''
38 |
39 |
40 | def sigmoid(z):
41 | """sigmoid激励函数(1/1+e的-z次方)"""
42 | return 1.0 / (1.0 + np.exp(-z))
43 |
44 |
45 | if __name__ == '__main__':
46 | net = Network([3, 2, 1])
47 | print("网络层数: ", net.num_layers - 1)
48 | print("网络结构: ", net.sizes)
49 | print("*" * 20)
50 | print("输入到隐藏层偏置: ", net.biases[0])
51 | print("隐藏到输出层偏置: ", net.biases[1])
52 | print("*" * 20)
53 | print("输入到隐藏层权重: ", net.weights[0])
54 | print("隐藏到输出层权重: ", net.weights[1])
55 |
--------------------------------------------------------------------------------
/3-improve_neural_network_efficiency/3-10 cross_entropy.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/4/3 0003 16:26'
3 |
4 | import random
5 | import numpy as np
6 |
7 |
8 | class QuadraticCost(object):
9 | @staticmethod
10 | def fn(a, y):
11 | return 0.5 * np.linalg.norm(a - y) ** 2
12 |
13 | @staticmethod
14 | def delta(z, a, y):
15 | return (a - y) * sigmoid_prime(z)
16 |
17 |
18 | class CrossEntropyCost(object):
19 | '''
20 | >>>import numpy as np
21 | >>> a = np.array([[np.nan,np.inf],\
22 | ... [-np.nan,-np.inf]])
23 | >>> a
24 | array([[ nan, inf],
25 | [ nan, -inf]])
26 | >>> np.nan_to_num(a)
27 | array([[ 0.00000000e+000, 1.79769313e+308],
28 | [ 0.00000000e+000, -1.79769313e+308]])
29 | '''
30 |
31 | @staticmethod
32 | def fn(a, y):
33 | return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))
34 |
35 | @staticmethod
36 | def delta(z, a, y):
37 | return (a - y)
38 |
39 |
40 | class Network(object):
41 | def __init__(self, sizes, cost=CrossEntropyCost):
42 | # 网络层数
43 | self.num_layers = len(sizes)
44 | # 网络每层神经元个数
45 | self.sizes = sizes
46 | # 初始化每层的偏置和权重
47 | self.default_weight_initializer()
48 | # 损失函数
49 | self.cost = cost
50 |
51 | def default_weight_initializer(self):
52 | # 初始化每层的偏置
53 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
54 | # 初始化每层的权重
55 | self.weights = [np.random.randn(y, x) / np.sqrt(x)
56 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
57 |
58 | def large_weight_initializer(self):
59 | # 初始化每层的偏置
60 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
61 | # 初始化每层的权重
62 | self.weights = [np.random.randn(y, x)
63 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
64 |
65 | def feedforward(self, a):
66 | for b, w in zip(self.biases, self.weights):
67 | a = sigmoid(np.dot(w, a) + b)
68 | return a
69 |
70 | # 随机梯度下降
71 | def SGD(self, training_data, epochs, mini_batch_size, eta,
72 | lmbda=0.0,
73 | test_data=None):
74 | if test_data:
75 | n_test = len(test_data)
76 | # 训练数据总个数
77 | n = len(training_data)
78 |
79 | # 开始训练 循环每一个epochs
80 | for j in range(epochs):
81 | # 洗牌 打乱训练数据
82 | random.shuffle(training_data)
83 |
84 | # mini_batch
85 | mini_batches = [training_data[k:k + mini_batch_size]
86 | for k in range(0, n, mini_batch_size)]
87 |
88 | # 训练mini_batch
89 | for mini_batch in mini_batches:
90 | self.update_mini_batch(mini_batch, eta, lmbda, n)
91 |
92 | if test_data:
93 | print("Epoch {0}: {1} / {2}".format(
94 | j, self.evaluate(test_data), n_test))
95 | print("Epoch {0} complete".format(j))
96 |
97 | # 更新mini_batch
98 | def update_mini_batch(self, mini_batch, eta, lmbda, n):
99 | # 保存每层偏倒
100 | nabla_b = [np.zeros(b.shape) for b in self.biases]
101 | nabla_w = [np.zeros(w.shape) for w in self.weights]
102 |
103 | # 训练每一个mini_batch
104 | for x, y in mini_batch:
105 | delta_nable_b, delta_nabla_w = self.update(x, y)
106 |
107 | # 保存一次训练网络中每层的偏倒
108 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)]
109 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
110 |
111 | # 更新权重和偏置 Wn+1 = wn - eta * nw
112 | self.weights = [(1 - eta * (lmbda / n)) * w - (eta / len(mini_batch)) * nw
113 | for w, nw in zip(self.weights, nabla_w)]
114 | self.biases = [b - (eta / len(mini_batch)) * nb
115 | for b, nb in zip(self.biases, nabla_b)]
116 |
117 | # 前向传播
118 | def update(self, x, y):
119 | # 保存每层偏倒
120 | nabla_b = [np.zeros(b.shape) for b in self.biases]
121 | nabla_w = [np.zeros(w.shape) for w in self.weights]
122 |
123 | activation = x
124 |
125 | # 保存每一层的激励值a=sigmoid(z)
126 | activations = [x]
127 |
128 | # 保存每一层的z=wx+b
129 | zs = []
130 | # 前向传播
131 | for b, w in zip(self.biases, self.weights):
132 | # 计算每层的z
133 | z = np.dot(w, activation) + b
134 |
135 | # 保存每层的z
136 | zs.append(z)
137 |
138 | # 计算每层的a
139 | activation = sigmoid(z)
140 |
141 | # 保存每一层的a
142 | activations.append(activation)
143 |
144 | # 反向更新了
145 | # 计算最后一层的误差
146 | delta = (self.cost).delta(zs[-1], activations[-1], y)
147 |
148 | # 最后一层权重和偏置的倒数
149 | nabla_b[-1] = delta
150 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
151 |
152 | # 倒数第二层一直到第一层 权重和偏置的倒数
153 | for l in range(2, self.num_layers):
154 | z = zs[-l]
155 |
156 | sp = sigmoid_prime(z)
157 |
158 | # 当前层的误差
159 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
160 |
161 | # 当前层偏置和权重的倒数
162 | nabla_b[-l] = delta
163 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
164 |
165 | return (nabla_b, nabla_w)
166 |
167 | def evaluate(self, test_data):
168 | test_results = [(np.argmax(self.feedforward(x)), y)
169 | for (x, y) in test_data]
170 | return sum(int(x == y) for (x, y) in test_results)
171 |
172 | def cost_derivative(self, output_activation, y):
173 | return (output_activation - y)
174 |
175 |
176 | def sigmoid(z):
177 | return 1.0 / (1.0 + np.exp(-z))
178 |
179 |
180 | def sigmoid_prime(z):
181 | return sigmoid(z) * (1 - sigmoid(z))
182 |
183 |
184 | if __name__ == '__main__':
185 | import mnist_loader
186 |
187 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper()
188 |
189 | net = Network([784, 30, 10])
190 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data)
191 |
--------------------------------------------------------------------------------
/3-improve_neural_network_efficiency/3-5 initialization_parameters.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/3/31 0031 19:15'
3 | import random
4 | import numpy as np
5 |
6 |
7 | class Network(object):
8 | def __init__(self, sizes):
9 | # 网络层数
10 | self.num_layers = len(sizes)
11 | # 网络每层神经元个数
12 | self.sizes = sizes
13 | # 初始化每层的偏置和权重
14 | self.default_weight_initializer()
15 |
16 | def default_weight_initializer(self):
17 | # 初始化每层的偏置
18 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
19 | # 初始化每层的权重
20 | self.weights = [np.random.randn(y, x) / np.sqrt(x)
21 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
22 |
23 | def large_weight_initializer(self):
24 | # 初始化每层的偏置
25 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
26 | # 初始化每层的权重
27 | self.weights = [np.random.randn(y, x)
28 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
29 |
30 | def feedforward(self, a):
31 | for b, w in zip(self.biases, self.weights):
32 | a = sigmoid(np.dot(w, a) + b)
33 | return a
34 |
35 | # 随机梯度下降
36 | def SGD(self, training_data, epochs, mini_batch_size, eta,
37 | test_data=None):
38 | if test_data:
39 | n_test = len(test_data)
40 | # 训练数据总个数
41 | n = len(training_data)
42 |
43 | # 开始训练 循环每一个epochs
44 | for j in range(epochs):
45 | # 洗牌 打乱训练数据
46 | random.shuffle(training_data)
47 |
48 | # mini_batch
49 | mini_batches = [training_data[k:k + mini_batch_size]
50 | for k in range(0, n, mini_batch_size)]
51 |
52 | # 训练mini_batch
53 | for mini_batch in mini_batches:
54 | self.update_mini_batch(mini_batch, eta)
55 |
56 | if test_data:
57 | print("Epoch {0}: {1} / {2}".format(
58 | j, self.evaluate(test_data), n_test))
59 | print("Epoch {0} complete".format(j))
60 |
61 | # 更新mini_batch
62 | def update_mini_batch(self, mini_batch, eta):
63 | # 保存每层偏倒
64 | nabla_b = [np.zeros(b.shape) for b in self.biases]
65 | nabla_w = [np.zeros(w.shape) for w in self.weights]
66 |
67 | # 训练每一个mini_batch
68 | for x, y in mini_batch:
69 | delta_nable_b, delta_nabla_w = self.update(x, y)
70 |
71 | # 保存一次训练网络中每层的偏倒
72 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)]
73 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
74 |
75 | # 更新权重和偏置 Wn+1 = wn - eta * nw
76 | self.weights = [w - (eta / len(mini_batch)) * nw
77 | for w, nw in zip(self.weights, nabla_w)]
78 | self.biases = [b - (eta / len(mini_batch)) * nb
79 | for b, nb in zip(self.biases, nabla_b)]
80 |
81 | # 前向传播
82 | def update(self, x, y):
83 | # 保存每层偏倒
84 | nabla_b = [np.zeros(b.shape) for b in self.biases]
85 | nabla_w = [np.zeros(w.shape) for w in self.weights]
86 |
87 | activation = x
88 |
89 | # 保存每一层的激励值a=sigmoid(z)
90 | activations = [x]
91 |
92 | # 保存每一层的z=wx+b
93 | zs = []
94 | # 前向传播
95 | for b, w in zip(self.biases, self.weights):
96 | # 计算每层的z
97 | z = np.dot(w, activation) + b
98 |
99 | # 保存每层的z
100 | zs.append(z)
101 |
102 | # 计算每层的a
103 | activation = sigmoid(z)
104 |
105 | # 保存每一层的a
106 | activations.append(activation)
107 |
108 | # 反向更新了
109 | # 计算最后一层的误差
110 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
111 |
112 | # 最后一层权重和偏置的倒数
113 | nabla_b[-1] = delta
114 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
115 |
116 | # 倒数第二层一直到第一层 权重和偏置的倒数
117 | for l in range(2, self.num_layers):
118 | z = zs[-l]
119 |
120 | sp = sigmoid_prime(z)
121 |
122 | # 当前层的误差
123 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
124 |
125 | # 当前层偏置和权重的倒数
126 | nabla_b[-l] = delta
127 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
128 |
129 | return (nabla_b, nabla_w)
130 |
131 | def evaluate(self, test_data):
132 | test_results = [(np.argmax(self.feedforward(x)), y)
133 | for (x, y) in test_data]
134 | return sum(int(x == y) for (x, y) in test_results)
135 |
136 | def cost_derivative(self, output_activation, y):
137 | return (output_activation - y)
138 |
139 |
140 | def sigmoid(z):
141 | return 1.0 / (1.0 + np.exp(-z))
142 |
143 |
144 | def sigmoid_prime(z):
145 | return sigmoid(z) * (1 - sigmoid(z))
146 |
147 |
148 | if __name__ == '__main__':
149 | import mnist_loader
150 |
151 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper()
152 |
153 | net = Network([784, 30, 10])
154 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data)
155 |
--------------------------------------------------------------------------------
/3-improve_neural_network_efficiency/3-7 L2_regularization.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/4/3 0003 13:40'
3 |
4 | import random
5 | import numpy as np
6 |
7 |
8 | class Network(object):
9 | def __init__(self, sizes):
10 | # 网络层数
11 | self.num_layers = len(sizes)
12 | # 网络每层神经元个数
13 | self.sizes = sizes
14 | # 初始化每层的偏置和权重
15 | self.default_weight_initializer()
16 |
17 | def default_weight_initializer(self):
18 | # 初始化每层的偏置
19 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
20 | # 初始化每层的权重
21 | self.weights = [np.random.randn(y, x) / np.sqrt(x)
22 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
23 |
24 | def large_weight_initializer(self):
25 | # 初始化每层的偏置
26 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]]
27 | # 初始化每层的权重
28 | self.weights = [np.random.randn(y, x)
29 | for x, y in zip(self.sizes[:-1], self.sizes[1:])]
30 |
31 | def feedforward(self, a):
32 | for b, w in zip(self.biases, self.weights):
33 | a = sigmoid(np.dot(w, a) + b)
34 | return a
35 |
36 | # 随机梯度下降
37 | def SGD(self, training_data, epochs, mini_batch_size, eta,
38 | lmbda=0.0,
39 | test_data=None):
40 | if test_data: n_test = len(test_data)
41 | # 训练数据总个数
42 | n = len(training_data)
43 |
44 | # 开始训练 循环每一个epochs
45 | for j in range(epochs):
46 | # 洗牌 打乱训练数据
47 | random.shuffle(training_data)
48 |
49 | # mini_batch
50 | mini_batches = [training_data[k:k + mini_batch_size]
51 | for k in range(0, n, mini_batch_size)]
52 |
53 | # 训练mini_batch
54 | for mini_batch in mini_batches:
55 | self.update_mini_batch(mini_batch, eta, lmbda, n)
56 |
57 | if test_data:
58 | print("Epoch {0}: {1} / {2}".format(
59 | j, self.evaluate(test_data), n_test))
60 | print("Epoch {0} complete".format(j))
61 |
62 | # 更新mini_batch
63 | def update_mini_batch(self, mini_batch, eta, lmbda, n):
64 | # 保存每层偏倒
65 | nabla_b = [np.zeros(b.shape) for b in self.biases]
66 | nabla_w = [np.zeros(w.shape) for w in self.weights]
67 |
68 | # 训练每一个mini_batch
69 | for x, y in mini_batch:
70 | delta_nable_b, delta_nabla_w = self.update(x, y)
71 |
72 | # 保存一次训练网络中每层的偏倒
73 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)]
74 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
75 |
76 | # 更新权重和偏置 Wn+1 = wn - eta * nw
77 | self.weights = [(1 - eta * (lmbda / n)) * w - (eta / len(mini_batch)) * nw
78 | for w, nw in zip(self.weights, nabla_w)]
79 | self.biases = [b - (eta / len(mini_batch)) * nb
80 | for b, nb in zip(self.biases, nabla_b)]
81 |
82 | # 前向传播
83 | def update(self, x, y):
84 | # 保存每层偏倒
85 | nabla_b = [np.zeros(b.shape) for b in self.biases]
86 | nabla_w = [np.zeros(w.shape) for w in self.weights]
87 |
88 | activation = x
89 |
90 | # 保存每一层的激励值a=sigmoid(z)
91 | activations = [x]
92 |
93 | # 保存每一层的z=wx+b
94 | zs = []
95 | # 前向传播
96 | for b, w in zip(self.biases, self.weights):
97 | # 计算每层的z
98 | z = np.dot(w, activation) + b
99 |
100 | # 保存每层的z
101 | zs.append(z)
102 |
103 | # 计算每层的a
104 | activation = sigmoid(z)
105 |
106 | # 保存每一层的a
107 | activations.append(activation)
108 |
109 | # 反向更新了
110 | # 计算最后一层的误差
111 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1])
112 |
113 | # 最后一层权重和偏置的倒数
114 | nabla_b[-1] = delta
115 | nabla_w[-1] = np.dot(delta, activations[-2].transpose())
116 |
117 | # 倒数第二层一直到第一层 权重和偏置的倒数
118 | for l in range(2, self.num_layers):
119 | z = zs[-l]
120 |
121 | sp = sigmoid_prime(z)
122 |
123 | # 当前层的误差
124 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
125 |
126 | # 当前层偏置和权重的倒数
127 | nabla_b[-l] = delta
128 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
129 |
130 | return (nabla_b, nabla_w)
131 |
132 | def evaluate(self, test_data):
133 | test_results = [(np.argmax(self.feedforward(x)), y)
134 | for (x, y) in test_data]
135 | return sum(int(x == y) for (x, y) in test_results)
136 |
137 | def cost_derivative(self, output_activation, y):
138 | return (output_activation - y)
139 |
140 |
141 | def sigmoid(z):
142 | return 1.0 / (1.0 + np.exp(-z))
143 |
144 |
145 | def sigmoid_prime(z):
146 | return sigmoid(z) * (1 - sigmoid(z))
147 |
148 |
149 | if __name__ == '__main__':
150 | import mnist_loader
151 |
152 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper()
153 |
154 | net = Network([784, 30, 10])
155 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data)
156 |
--------------------------------------------------------------------------------
/5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/BUILD:
--------------------------------------------------------------------------------
1 | # Description:
2 | # Example TensorFlow models for MNIST used in tutorials
3 |
4 | licenses(["notice"]) # Apache 2.0
5 |
6 | exports_files(["LICENSE"])
7 |
8 | load("//tensorflow:tensorflow.bzl", "py_test")
9 |
10 | py_library(
11 | name = "package",
12 | srcs = [
13 | "__init__.py",
14 | ],
15 | srcs_version = "PY2AND3",
16 | visibility = ["//tensorflow:__subpackages__"],
17 | deps = [
18 | ":input_data",
19 | ":mnist",
20 | ],
21 | )
22 |
23 | py_library(
24 | name = "input_data",
25 | srcs = ["input_data.py"],
26 | srcs_version = "PY2AND3",
27 | visibility = ["//visibility:public"],
28 | deps = [
29 | "//tensorflow:tensorflow_py",
30 | "//tensorflow/contrib/learn/python/learn/datasets",
31 | "//third_party/py/numpy",
32 | "@six_archive//:six",
33 | ],
34 | )
35 |
36 | py_library(
37 | name = "mnist",
38 | srcs = [
39 | "mnist.py",
40 | ],
41 | srcs_version = "PY2AND3",
42 | visibility = ["//visibility:public"],
43 | deps = [
44 | "//tensorflow:tensorflow_py",
45 | ],
46 | )
47 |
48 | py_binary(
49 | name = "fully_connected_feed",
50 | srcs = [
51 | "fully_connected_feed.py",
52 | ],
53 | srcs_version = "PY2AND3",
54 | deps = [
55 | ":input_data",
56 | ":mnist",
57 | "//tensorflow:tensorflow_py",
58 | ],
59 | )
60 |
61 | py_binary(
62 | name = "mnist_with_summaries",
63 | srcs = [
64 | "mnist_with_summaries.py",
65 | ],
66 | srcs_version = "PY2AND3",
67 | deps = [
68 | ":input_data",
69 | "//tensorflow:tensorflow_py",
70 | ],
71 | )
72 |
73 | py_binary(
74 | name = "mnist_softmax",
75 | srcs = [
76 | "mnist_softmax.py",
77 | ],
78 | srcs_version = "PY2AND3",
79 | deps = [
80 | ":input_data",
81 | "//tensorflow:tensorflow_py",
82 | ],
83 | )
84 |
85 | py_binary(
86 | name = "mnist_deep",
87 | srcs = [
88 | "mnist_deep.py",
89 | ],
90 | srcs_version = "PY2AND3",
91 | deps = [
92 | ":input_data",
93 | "//tensorflow:tensorflow_py",
94 | ],
95 | )
96 |
97 | py_test(
98 | name = "fully_connected_feed_test",
99 | size = "small",
100 | srcs = [
101 | "fully_connected_feed.py",
102 | ],
103 | args = [
104 | "--fake_data",
105 | "--max_steps=10",
106 | ],
107 | main = "fully_connected_feed.py",
108 | srcs_version = "PY2AND3",
109 | deps = [
110 | ":input_data",
111 | ":mnist",
112 | "//tensorflow:tensorflow_py",
113 | ],
114 | )
115 |
116 | py_test(
117 | name = "mnist_with_summaries_test",
118 | size = "small",
119 | srcs = [
120 | "mnist_with_summaries.py",
121 | ],
122 | args = [
123 | "--fake_data",
124 | "--max_steps=10",
125 | "--learning_rate=0.00",
126 | ],
127 | main = "mnist_with_summaries.py",
128 | srcs_version = "PY2AND3",
129 | tags = ["notsan"], # http://b/29184009
130 | deps = [
131 | ":input_data",
132 | "//tensorflow:tensorflow_py",
133 | ],
134 | )
135 |
--------------------------------------------------------------------------------
/5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Imports mnist tutorial libraries used by tutorial examples."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | from tensorflow.examples.tutorials.mnist import input_data
22 | from tensorflow.examples.tutorials.mnist import mnist
23 |
--------------------------------------------------------------------------------
/5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/input_data.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Functions for downloading and reading MNIST mnist_data."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | # pylint: disable=unused-import
22 | import gzip
23 | import os
24 | import tempfile
25 |
26 | import numpy
27 | from six.moves import urllib
28 | from six.moves import xrange # pylint: disable=redefined-builtin
29 | import tensorflow as tf
30 | from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
31 | # pylint: enable=unused-import
32 |
--------------------------------------------------------------------------------
/5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/mnist.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Builds the MNIST network.
17 |
18 | Implements the inference/loss/training pattern for model building.
19 |
20 | 1. inference() - Builds the model as far as required for running the network
21 | forward to make predictions.
22 | 2. loss() - Adds to the inference model the layers required to generate loss.
23 | 3. training() - Adds to the loss model the Ops required to generate and
24 | apply gradients.
25 |
26 | This file is used by the various "fully_connected_*.py" files and not meant to
27 | be run.
28 | """
29 | from __future__ import absolute_import
30 | from __future__ import division
31 | from __future__ import print_function
32 |
33 | import math
34 |
35 | import tensorflow as tf
36 |
37 | # The MNIST dataset has 10 classes, representing the digits 0 through 9.
38 | NUM_CLASSES = 10
39 |
40 | # The MNIST images are always 28x28 pixels.
41 | IMAGE_SIZE = 28
42 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
43 |
44 |
45 | def inference(images, hidden1_units, hidden2_units):
46 | """Build the MNIST model up to where it may be used for inference.
47 |
48 | Args:
49 | images: Images placeholder, from inputs().
50 | hidden1_units: Size of the first hidden layer.
51 | hidden2_units: Size of the second hidden layer.
52 |
53 | Returns:
54 | softmax_linear: Output tensor with the computed logits.
55 | """
56 | # Hidden 1
57 | with tf.name_scope('hidden1'):
58 | weights = tf.Variable(
59 | tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
60 | stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
61 | name='weights')
62 | biases = tf.Variable(tf.zeros([hidden1_units]),
63 | name='biases')
64 | hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
65 | # Hidden 2
66 | with tf.name_scope('hidden2'):
67 | weights = tf.Variable(
68 | tf.truncated_normal([hidden1_units, hidden2_units],
69 | stddev=1.0 / math.sqrt(float(hidden1_units))),
70 | name='weights')
71 | biases = tf.Variable(tf.zeros([hidden2_units]),
72 | name='biases')
73 | hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
74 | # Linear
75 | with tf.name_scope('softmax_linear'):
76 | weights = tf.Variable(
77 | tf.truncated_normal([hidden2_units, NUM_CLASSES],
78 | stddev=1.0 / math.sqrt(float(hidden2_units))),
79 | name='weights')
80 | biases = tf.Variable(tf.zeros([NUM_CLASSES]),
81 | name='biases')
82 | logits = tf.matmul(hidden2, weights) + biases
83 | return logits
84 |
85 |
86 | def loss(logits, labels):
87 | """Calculates the loss from the logits and the labels.
88 |
89 | Args:
90 | logits: Logits tensor, float - [batch_size, NUM_CLASSES].
91 | labels: Labels tensor, int32 - [batch_size].
92 |
93 | Returns:
94 | loss: Loss tensor of type float.
95 | """
96 | labels = tf.to_int64(labels)
97 | return tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
98 |
99 |
100 | def training(loss, learning_rate):
101 | """Sets up the training Ops.
102 |
103 | Creates a summarizer to track the loss over time in TensorBoard.
104 |
105 | Creates an optimizer and applies the gradients to all trainable variables.
106 |
107 | The Op returned by this function is what must be passed to the
108 | `sess.run()` call to cause the model to train.
109 |
110 | Args:
111 | loss: Loss tensor, from loss().
112 | learning_rate: The learning rate to use for gradient descent.
113 |
114 | Returns:
115 | train_op: The Op for training.
116 | """
117 | # Add a scalar summary for the snapshot loss.
118 | tf.summary.scalar('loss', loss)
119 | # Create the gradient descent optimizer with the given learning rate.
120 | optimizer = tf.train.GradientDescentOptimizer(learning_rate)
121 | # Create a variable to track the global step.
122 | global_step = tf.Variable(0, name='global_step', trainable=False)
123 | # Use the optimizer to apply the gradients that minimize the loss
124 | # (and also increment the global step counter) as a single training step.
125 | train_op = optimizer.minimize(loss, global_step=global_step)
126 | return train_op
127 |
128 |
129 | def evaluation(logits, labels):
130 | """Evaluate the quality of the logits at predicting the label.
131 |
132 | Args:
133 | logits: Logits tensor, float - [batch_size, NUM_CLASSES].
134 | labels: Labels tensor, int32 - [batch_size], with values in the
135 | range [0, NUM_CLASSES).
136 |
137 | Returns:
138 | A scalar int32 tensor with the number of examples (out of batch_size)
139 | that were predicted correctly.
140 | """
141 | # For a classifier model, we can use the in_top_k Op.
142 | # It returns a bool tensor with shape [batch_size] that is true for
143 | # the examples where the label is in the top k (here k=1)
144 | # of all logits for that example.
145 | correct = tf.nn.in_top_k(logits, labels, 1)
146 | # Return the number of true entries.
147 | return tf.reduce_sum(tf.cast(correct, tf.int32))
148 |
--------------------------------------------------------------------------------
/5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/mnist_softmax.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """A very simple MNIST classifier.
16 |
17 | See extensive documentation at
18 | https://www.tensorflow.org/get_started/mnist/beginners
19 | """
20 | from __future__ import absolute_import
21 | from __future__ import division
22 | from __future__ import print_function
23 |
24 | import argparse
25 | import sys
26 |
27 | from tensorflow.examples.tutorials.mnist import input_data
28 |
29 | import tensorflow as tf
30 |
31 | FLAGS = None
32 |
33 |
34 | def main(_):
35 | # Import mnist_data
36 | mnist = input_data.read_data_sets(FLAGS.data_dir)
37 |
38 | # Create the model
39 | x = tf.placeholder(tf.float32, [None, 784])
40 | W = tf.Variable(tf.zeros([784, 10]))
41 | b = tf.Variable(tf.zeros([10]))
42 | y = tf.matmul(x, W) + b
43 |
44 | # Define loss and optimizer
45 | y_ = tf.placeholder(tf.int64, [None])
46 |
47 | # The raw formulation of cross-entropy,
48 | #
49 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
50 | # reduction_indices=[1]))
51 | #
52 | # can be numerically unstable.
53 | #
54 | # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
55 | # outputs of 'y', and then average across the batch.
56 | cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
57 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
58 |
59 | sess = tf.InteractiveSession()
60 | tf.global_variables_initializer().run()
61 | # Train
62 | for _ in range(1000):
63 | batch_xs, batch_ys = mnist.train.next_batch(100)
64 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
65 |
66 | # Test trained model
67 | correct_prediction = tf.equal(tf.argmax(y, 1), y_)
68 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
69 | print(sess.run(
70 | accuracy, feed_dict={
71 | x: mnist.test.images,
72 | y_: mnist.test.labels
73 | }))
74 |
75 |
76 | if __name__ == '__main__':
77 | parser = argparse.ArgumentParser()
78 | parser.add_argument(
79 | '--data_dir',
80 | type=str,
81 | default='/tmp/tensorflow/mnist/input_data',
82 | help='Directory for storing input mnist_data')
83 | FLAGS, unparsed = parser.parse_known_args()
84 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
85 |
--------------------------------------------------------------------------------
/5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/mnist_softmax_xla.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Simple MNIST classifier example with JIT XLA and timelines.
16 |
17 | """
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import argparse
23 | import sys
24 |
25 | import tensorflow as tf
26 |
27 | from tensorflow.examples.tutorials.mnist import input_data
28 | from tensorflow.python.client import timeline
29 |
30 | FLAGS = None
31 |
32 |
33 | def main(_):
34 | # Import mnist_data
35 | mnist = input_data.read_data_sets(FLAGS.data_dir)
36 |
37 | # Create the model
38 | x = tf.placeholder(tf.float32, [None, 784])
39 | w = tf.Variable(tf.zeros([784, 10]))
40 | b = tf.Variable(tf.zeros([10]))
41 | y = tf.matmul(x, w) + b
42 |
43 | # Define loss and optimizer
44 | y_ = tf.placeholder(tf.int64, [None])
45 |
46 | # The raw formulation of cross-entropy,
47 | #
48 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
49 | # reduction_indices=[1]))
50 | #
51 | # can be numerically unstable.
52 | #
53 | # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
54 | # logit outputs of 'y', and then average across the batch.
55 | cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y)
56 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
57 |
58 | config = tf.ConfigProto()
59 | jit_level = 0
60 | if FLAGS.xla:
61 | # Turns on XLA JIT compilation.
62 | jit_level = tf.OptimizerOptions.ON_1
63 |
64 | config.graph_options.optimizer_options.global_jit_level = jit_level
65 | run_metadata = tf.RunMetadata()
66 | sess = tf.Session(config=config)
67 | tf.global_variables_initializer().run(session=sess)
68 | # Train
69 | train_loops = 1000
70 | for i in range(train_loops):
71 | batch_xs, batch_ys = mnist.train.next_batch(100)
72 |
73 | # Create a timeline for the last loop and export to json to view with
74 | # chrome://tracing/.
75 | if i == train_loops - 1:
76 | sess.run(train_step,
77 | feed_dict={x: batch_xs,
78 | y_: batch_ys},
79 | options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
80 | run_metadata=run_metadata)
81 | trace = timeline.Timeline(step_stats=run_metadata.step_stats)
82 | with open('timeline.ctf.json', 'w') as trace_file:
83 | trace_file.write(trace.generate_chrome_trace_format())
84 | else:
85 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
86 |
87 | # Test trained model
88 | correct_prediction = tf.equal(tf.argmax(y, 1), y_)
89 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
90 | print(sess.run(accuracy,
91 | feed_dict={x: mnist.test.images,
92 | y_: mnist.test.labels}))
93 | sess.close()
94 |
95 |
96 | if __name__ == '__main__':
97 | parser = argparse.ArgumentParser()
98 | parser.add_argument(
99 | '--data_dir',
100 | type=str,
101 | default='/tmp/tensorflow/mnist/input_data',
102 | help='Directory for storing input mnist_data')
103 | parser.add_argument(
104 | '--xla', type=bool, default=True, help='Turn xla via JIT on')
105 | FLAGS, unparsed = parser.parse_known_args()
106 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
107 |
--------------------------------------------------------------------------------
/5-tensorflow_and_tensorboard/5-1 TensorFlow_Get_Started.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/4/4 0004 17:47'
3 | import tensorflow as tf
4 |
5 | # # 定义常量op
6 | # a = tf.constant(2)
7 | # b = tf.constant(3)
8 | #
9 | # # 使用seesion 启动默认图
10 | # with tf.Session() as sess:
11 | # print("a=2, b=3")
12 | # print("常量相加: %i" % sess.run(a + b))
13 | # print("常量相乘: %i" % sess.run(a * b))
14 |
15 |
16 | # # 定义连个变量op占位符
17 | # a = tf.placeholder(tf.int16)
18 | # b = tf.placeholder(tf.int16)
19 | #
20 | # # 定义2个op操作 加法 乘法
21 | # add = tf.add(a, b)
22 | # mul = tf.multiply(a, b)
23 | #
24 | # with tf.Session() as sess:
25 | # print("加法:%i" % sess.run(add, feed_dict={a: 2, b: 3}))
26 | # print("乘法:%i" % sess.run(mul, feed_dict={a: 2, b: 3}))
27 |
28 |
29 | # 1x2 矩阵常量op
30 | matrix1 = tf.constant([[3., 3.]])
31 |
32 | # 2x1 矩阵常量op
33 | matrix2 = tf.constant([[2.], [2.]])
34 |
35 | # 矩阵乘op
36 | product = tf.matmul(matrix1, matrix2)
37 |
38 | with tf.Session() as sess:
39 | result = sess.run(product)
40 | print(type(result))
41 | print(result)
42 |
--------------------------------------------------------------------------------
/5-tensorflow_and_tensorboard/5-4 tf_LinearRegression.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/4/4 0004 18:27'
3 | import tensorflow as tf
4 | import numpy
5 | import matplotlib.pyplot as plt
6 | from numpy import random
7 |
8 | # 训练参数
9 | learning_rate = 0.01
10 | training_epochs = 1000
11 | display_step = 50
12 |
13 | # 训练数据
14 | train_X = numpy.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167,
15 | 7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1])
16 | train_Y = numpy.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221,
17 | 2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3])
18 | # 总共有多少个训练数据
19 | n_samples = train_X.shape[0]
20 |
21 | # 定义两个变量op占位符
22 | X = tf.placeholder("float")
23 | Y = tf.placeholder("float")
24 |
25 | # 初始化模型里所有的w和b
26 | W = tf.Variable(random.random(), name="weight")
27 | b = tf.Variable(random.random(), name="bias")
28 |
29 | # 构造线性模型
30 | pred = tf.add(tf.multiply(X, W), b)
31 |
32 | # 均方误差
33 | cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples)
34 |
35 | # 'x' is [[1, 1, 1]
36 | # [1, 1, 1]]
37 | # tf.reduce_sum(x) ==> 6
38 | # tf.reduce_sum(x, 0) ==> [2, 2, 2]
39 | # tf.reduce_sum(x, 1) ==> [3, 3]
40 | # 梯度下降
41 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
42 |
43 | # 初始化所有变量
44 | init = tf.global_variables_initializer()
45 |
46 | # 使用session 启用默认图
47 | with tf.Session() as sess:
48 | sess.run(init)
49 |
50 | # 训练开始
51 | for epoch in range(training_epochs):
52 | for (x, y) in zip(train_X, train_Y):
53 | sess.run(optimizer, feed_dict={X: train_X, Y: train_Y})
54 |
55 | # 每个一个epoch打印一下结果
56 | if (epoch + 1) % display_step == 0:
57 | c = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
58 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \
59 | "W=", sess.run(W), "b=", sess.run(b))
60 |
61 | print("Optimization Finished!")
62 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
63 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
64 |
65 | # 图画
66 | plt.plot(train_X, train_Y, 'ro', label='Original mnist_data')
67 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
68 | plt.legend()
69 | plt.savefig('linear_train.png')
70 | plt.show()
71 |
72 | # 测试数据
73 | test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1])
74 | test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03])
75 |
76 | print("Testing... (Mean square loss Comparison)")
77 | testing_cost = sess.run(
78 | tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]),
79 | feed_dict={X: test_X, Y: test_Y}) # same function as cost above
80 | print("Testing cost=", testing_cost)
81 | print("Absolute mean square loss difference:", abs(
82 | training_cost - testing_cost))
83 |
84 | plt.plot(test_X, test_Y, 'bo', label='Testing mnist_data')
85 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
86 | plt.legend()
87 | plt.savefig('linear_test.png')
88 | plt.show()
89 |
--------------------------------------------------------------------------------
/5-tensorflow_and_tensorboard/5-6 TensorBoard_Get_Started.py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/4/4 0004 20:02'
3 | import tensorflow as tf
4 | import numpy
5 | from numpy import random
6 |
7 | # 训练参数
8 | learning_rate = 0.01
9 | training_epochs = 1000
10 | display_step = 50
11 | logs_path = './example'
12 |
13 | # 训练数据
14 | train_X = numpy.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167,
15 | 7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1])
16 | train_Y = numpy.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221,
17 | 2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3])
18 | n_samples = train_X.shape[0]
19 |
20 | # 定义两个变量op占位符
21 | X = tf.placeholder("float")
22 | Y = tf.placeholder("float")
23 |
24 | # 初始化模型里所有的w和b
25 | W = tf.Variable(random.random(), name="weight")
26 | b = tf.Variable(random.random(), name="bias")
27 |
28 | # 构造线性模型
29 | pred = tf.add(tf.multiply(X, W), b)
30 |
31 | # 均方误差
32 | cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples)
33 |
34 | # 'x' is [[1, 1, 1]
35 | # [1, 1, 1]]
36 | # tf.reduce_sum(x) ==> 6
37 | # tf.reduce_sum(x, 0) ==> [2, 2, 2]
38 | # tf.reduce_sum(x, 1) ==> [3, 3]
39 | # 梯度下降
40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
41 |
42 | # 初始化所有变量
43 | init = tf.global_variables_initializer()
44 |
45 | # 创建summary来观察损失值
46 | tf.summary.scalar("loss", cost)
47 | merged_summary_op = tf.summary.merge_all()
48 |
49 | # 使用session 启用默认图
50 | with tf.Session() as sess:
51 | sess.run(init)
52 |
53 | # op 写把需要记录的数据写入文件
54 | summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
55 |
56 | # 训练开始
57 | for epoch in range(training_epochs):
58 | for (x, y) in zip(train_X, train_Y):
59 | sess.run(optimizer, feed_dict={X: x, Y: y})
60 |
61 | # 每个一个epoch打印一下结果
62 | if (epoch + 1) % display_step == 0:
63 | c, summary = sess.run([cost, merged_summary_op], feed_dict={X: train_X, Y: train_Y})
64 | summary_writer.add_summary(summary, epoch * n_samples)
65 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \
66 | "W=", sess.run(W), "b=", sess.run(b))
67 |
68 | print("Optimization Finished!")
69 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
70 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
71 |
--------------------------------------------------------------------------------
/5-tensorflow_and_tensorboard/5-7 save_load_model_v2(tf).py:
--------------------------------------------------------------------------------
1 | __author__ = 'mtianyan'
2 | __date__ = '2018/4/5 0005 16:31'
3 | import tensorflow as tf
4 | import numpy
5 | import matplotlib.pyplot as plt
6 | from numpy import random
7 |
8 | # 训练参数
9 | learning_rate = 0.01
10 | training_epochs = 1000
11 | display_step = 50
12 | model_path = "./tmp/model.ckpt"
13 |
14 | # 训练数据
15 | train_X = numpy.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167,
16 | 7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1])
17 | train_Y = numpy.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221,
18 | 2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3])
19 | n_samples = train_X.shape[0]
20 |
21 | # 定义两个变量op占位符
22 | X = tf.placeholder("float")
23 | Y = tf.placeholder("float")
24 |
25 | # 初始化模型里所有的w和b
26 | W = tf.Variable(random.random(), name="weight")
27 | b = tf.Variable(random.random(), name="bias")
28 |
29 | # 构造线性模型
30 | pred = tf.add(tf.multiply(X, W), b)
31 |
32 | # 均方误差
33 | cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples)
34 |
35 | # 'x' is [[1, 1, 1]
36 | # [1, 1, 1]]
37 | # tf.reduce_sum(x) ==> 6
38 | # tf.reduce_sum(x, 0) ==> [2, 2, 2]
39 | # tf.reduce_sum(x, 1) ==> [3, 3]
40 | # 梯度下降
41 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
42 |
43 | # 初始化所有变量
44 | init = tf.global_variables_initializer()
45 |
46 | saver = tf.train.Saver()
47 |
48 | print("Starting 1st session...")
49 | # 使用session 启用默认图
50 | with tf.Session() as sess:
51 | sess.run(init)
52 |
53 | # 训练开始
54 | for epoch in range(200):
55 | for (x, y) in zip(train_X, train_Y):
56 | sess.run(optimizer, feed_dict={X: x, Y: y})
57 |
58 | # 每个一个epoch打印一下结果
59 | if (epoch + 1) % display_step == 0:
60 | c = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
61 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \
62 | "W=", sess.run(W), "b=", sess.run(b))
63 |
64 | print("Optimization Finished!")
65 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
66 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
67 |
68 | # Save model weights to disk
69 | save_path = saver.save(sess, model_path)
70 | print("Model saved in file: %s" % save_path)
71 |
72 | print("Starting 2st session...")
73 | # 使用session 启用默认图
74 | with tf.Session() as sess:
75 | sess.run(init)
76 |
77 | # Restore model weights from previously saved model
78 | saver.restore(sess, model_path)
79 | print("Model restored from file: %s" % save_path)
80 |
81 | # 训练开始
82 | for epoch in range(training_epochs - 200):
83 | for (x, y) in zip(train_X, train_Y):
84 | sess.run(optimizer, feed_dict={X: x, Y: y})
85 |
86 | # 每个一个epoch打印一下结果
87 | if (epoch + 1) % display_step == 0:
88 | c = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
89 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \
90 | "W=", sess.run(W), "b=", sess.run(b))
91 |
92 | print("Optimization Finished!")
93 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y})
94 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n')
95 |
96 | # 图画
97 | plt.plot(train_X, train_Y, 'ro', label='Original mnist_data')
98 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line')
99 | plt.legend()
100 | plt.savefig('save_restore_linear_train.png')
101 | plt.show()
102 |
--------------------------------------------------------------------------------
/6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/BUILD:
--------------------------------------------------------------------------------
1 | # Description:
2 | # Example TensorFlow models for CIFAR-10
3 |
4 | licenses(["notice"]) # Apache 2.0
5 |
6 | exports_files(["LICENSE"])
7 |
8 | py_library(
9 | name = "cifar10_input",
10 | srcs = ["cifar10_input.py"],
11 | srcs_version = "PY2AND3",
12 | visibility = ["//tensorflow:internal"],
13 | deps = [
14 | "//tensorflow:tensorflow_py",
15 | ],
16 | )
17 |
18 | py_test(
19 | name = "cifar10_input_test",
20 | size = "small",
21 | srcs = ["cifar10_input_test.py"],
22 | srcs_version = "PY2AND3",
23 | deps = [
24 | ":cifar10_input",
25 | "//tensorflow:tensorflow_py",
26 | "//tensorflow/python:framework_test_lib",
27 | "//tensorflow/python:platform_test",
28 | ],
29 | )
30 |
31 | py_library(
32 | name = "cifar10",
33 | srcs = ["cifar10.py"],
34 | srcs_version = "PY2AND3",
35 | deps = [
36 | ":cifar10_input",
37 | "//tensorflow:tensorflow_py",
38 | ],
39 | )
40 |
41 | py_binary(
42 | name = "cifar10_eval",
43 | srcs = [
44 | "cifar10_eval.py",
45 | ],
46 | srcs_version = "PY2AND3",
47 | visibility = ["//tensorflow:__subpackages__"],
48 | deps = [
49 | ":cifar10",
50 | ],
51 | )
52 |
53 | py_binary(
54 | name = "cifar10_train",
55 | srcs = [
56 | "cifar10_train.py",
57 | ],
58 | srcs_version = "PY2AND3",
59 | visibility = ["//tensorflow:__subpackages__"],
60 | deps = [
61 | ":cifar10",
62 | ],
63 | )
64 |
65 | py_binary(
66 | name = "cifar10_multi_gpu_train",
67 | srcs = [
68 | "cifar10_multi_gpu_train.py",
69 | ],
70 | srcs_version = "PY2AND3",
71 | visibility = ["//tensorflow:__subpackages__"],
72 | deps = [
73 | ":cifar10",
74 | ],
75 | )
76 |
77 | filegroup(
78 | name = "all_files",
79 | srcs = glob(
80 | ["**/*"],
81 | exclude = [
82 | "**/METADATA",
83 | "**/OWNERS",
84 | ],
85 | ),
86 | visibility = ["//tensorflow:__subpackages__"],
87 | )
88 |
--------------------------------------------------------------------------------
/6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/README.md:
--------------------------------------------------------------------------------
1 | **NOTE: For users interested in multi-GPU, we recommend looking at the newer [cifar10_estimator](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10_estimator) example instead.**
2 |
3 | ---
4 |
5 | CIFAR-10 is a common benchmark in machine learning for image recognition.
6 |
7 | http://www.cs.toronto.edu/~kriz/cifar.html
8 |
9 | Code in this directory demonstrates how to use TensorFlow to train and evaluate a convolutional neural network (CNN) on both CPU and GPU. We also demonstrate how to train a CNN over multiple GPUs.
10 |
11 | Detailed instructions on how to get started available at:
12 |
13 | http://tensorflow.org/tutorials/deep_cnn/
14 |
--------------------------------------------------------------------------------
/6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Makes helper libraries available in the cifar10 package."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | import cifar10
22 | import cifar10_input
23 |
--------------------------------------------------------------------------------
/6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/cifar10_eval.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Evaluation for CIFAR-10.
17 |
18 | Accuracy:
19 | cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs
20 | of mnist_data) as judged by cifar10_eval.py.
21 |
22 | Speed:
23 | On a single Tesla K40, cifar10_train.py processes a single batch of 128 images
24 | in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86%
25 | accuracy after 100K steps in 8 hours of training time.
26 |
27 | Usage:
28 | Please see the tutorial and website for how to download the CIFAR-10
29 | mnist_data set, compile the program and train the model.
30 |
31 | http://tensorflow.org/tutorials/deep_cnn/
32 | """
33 | from __future__ import absolute_import
34 | from __future__ import division
35 | from __future__ import print_function
36 |
37 | from datetime import datetime
38 | import math
39 | import time
40 |
41 | import numpy as np
42 | import tensorflow as tf
43 |
44 | import cifar10
45 |
46 | FLAGS = tf.app.flags.FLAGS
47 |
48 | tf.app.flags.DEFINE_string('eval_dir', '/tmp/cifar10_eval',
49 | """Directory where to write event logs.""")
50 | tf.app.flags.DEFINE_string('eval_data', 'test',
51 | """Either 'test' or 'train_eval'.""")
52 | tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cifar10_train',
53 | """Directory where to read model checkpoints.""")
54 | tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5,
55 | """How often to run the eval.""")
56 | tf.app.flags.DEFINE_integer('num_examples', 10000,
57 | """Number of examples to run.""")
58 | tf.app.flags.DEFINE_boolean('run_once', False,
59 | """Whether to run eval only once.""")
60 |
61 |
62 | def eval_once(saver, summary_writer, top_k_op, summary_op):
63 | """Run Eval once.
64 |
65 | Args:
66 | saver: Saver.
67 | summary_writer: Summary writer.
68 | top_k_op: Top K op.
69 | summary_op: Summary op.
70 | """
71 | with tf.Session() as sess:
72 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
73 | if ckpt and ckpt.model_checkpoint_path:
74 | # Restores from checkpoint
75 | saver.restore(sess, ckpt.model_checkpoint_path)
76 | # Assuming model_checkpoint_path looks something like:
77 | # /my-favorite-path/cifar10_train/model.ckpt-0,
78 | # extract global_step from it.
79 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
80 | else:
81 | print('No checkpoint file found')
82 | return
83 |
84 | # Start the queue runners.
85 | coord = tf.train.Coordinator()
86 | try:
87 | threads = []
88 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
89 | threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
90 | start=True))
91 |
92 | num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
93 | true_count = 0 # Counts the number of correct predictions.
94 | total_sample_count = num_iter * FLAGS.batch_size
95 | step = 0
96 | while step < num_iter and not coord.should_stop():
97 | predictions = sess.run([top_k_op])
98 | true_count += np.sum(predictions)
99 | step += 1
100 |
101 | # Compute precision @ 1.
102 | precision = true_count / total_sample_count
103 | print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
104 |
105 | summary = tf.Summary()
106 | summary.ParseFromString(sess.run(summary_op))
107 | summary.value.add(tag='Precision @ 1', simple_value=precision)
108 | summary_writer.add_summary(summary, global_step)
109 | except Exception as e: # pylint: disable=broad-except
110 | coord.request_stop(e)
111 |
112 | coord.request_stop()
113 | coord.join(threads, stop_grace_period_secs=10)
114 |
115 |
116 | def evaluate():
117 | """Eval CIFAR-10 for a number of steps."""
118 | with tf.Graph().as_default() as g:
119 | # Get images and labels for CIFAR-10.
120 | eval_data = FLAGS.eval_data == 'test'
121 | images, labels = cifar10.inputs(eval_data=eval_data)
122 |
123 | # Build a Graph that computes the logits predictions from the
124 | # inference model.
125 | logits = cifar10.inference(images)
126 |
127 | # Calculate predictions.
128 | top_k_op = tf.nn.in_top_k(logits, labels, 1)
129 |
130 | # Restore the moving average version of the learned variables for eval.
131 | variable_averages = tf.train.ExponentialMovingAverage(
132 | cifar10.MOVING_AVERAGE_DECAY)
133 | variables_to_restore = variable_averages.variables_to_restore()
134 | saver = tf.train.Saver(variables_to_restore)
135 |
136 | # Build the summary operation based on the TF collection of Summaries.
137 | summary_op = tf.summary.merge_all()
138 |
139 | summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
140 |
141 | while True:
142 | eval_once(saver, summary_writer, top_k_op, summary_op)
143 | if FLAGS.run_once:
144 | break
145 | time.sleep(FLAGS.eval_interval_secs)
146 |
147 |
148 | def main(argv=None): # pylint: disable=unused-argument
149 | cifar10.maybe_download_and_extract()
150 | if tf.gfile.Exists(FLAGS.eval_dir):
151 | tf.gfile.DeleteRecursively(FLAGS.eval_dir)
152 | tf.gfile.MakeDirs(FLAGS.eval_dir)
153 | evaluate()
154 |
155 |
156 | if __name__ == '__main__':
157 | tf.app.run()
158 |
--------------------------------------------------------------------------------
/6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/cifar10_input_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Tests for cifar10 input."""
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import os
23 |
24 | import tensorflow as tf
25 |
26 | import cifar10_input
27 |
28 |
29 | class CIFAR10InputTest(tf.test.TestCase):
30 |
31 | def _record(self, label, red, green, blue):
32 | image_size = 32 * 32
33 | record = bytes(bytearray([label] + [red] * image_size +
34 | [green] * image_size + [blue] * image_size))
35 | expected = [[[red, green, blue]] * 32] * 32
36 | return record, expected
37 |
38 | def testSimple(self):
39 | labels = [9, 3, 0]
40 | records = [self._record(labels[0], 0, 128, 255),
41 | self._record(labels[1], 255, 0, 1),
42 | self._record(labels[2], 254, 255, 0)]
43 | contents = b"".join([record for record, _ in records])
44 | expected = [expected for _, expected in records]
45 | filename = os.path.join(self.get_temp_dir(), "cifar")
46 | open(filename, "wb").write(contents)
47 |
48 | with self.test_session() as sess:
49 | q = tf.FIFOQueue(99, [tf.string], shapes=())
50 | q.enqueue([filename]).run()
51 | q.close().run()
52 | result = cifar10_input.read_cifar10(q)
53 |
54 | for i in range(3):
55 | key, label, uint8image = sess.run([
56 | result.key, result.label, result.uint8image])
57 | self.assertEqual("%s:%d" % (filename, i), tf.compat.as_text(key))
58 | self.assertEqual(labels[i], label)
59 | self.assertAllEqual(expected[i], uint8image)
60 |
61 | with self.assertRaises(tf.errors.OutOfRangeError):
62 | sess.run([result.key, result.uint8image])
63 |
64 |
65 | if __name__ == "__main__":
66 | tf.test.main()
67 |
--------------------------------------------------------------------------------
/6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/cifar10_train.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """A binary to train CIFAR-10 using a single GPU.
17 |
18 | Accuracy:
19 | cifar10_train.py achieves ~86% accuracy after 100K steps (256 epochs of
20 | mnist_data) as judged by cifar10_eval.py.
21 |
22 | Speed: With batch_size 128.
23 |
24 | System | Step Time (sec/batch) | Accuracy
25 | ------------------------------------------------------------------
26 | 1 Tesla K20m | 0.35-0.60 | ~86% at 60K steps (5 hours)
27 | 1 Tesla K40m | 0.25-0.35 | ~86% at 100K steps (4 hours)
28 |
29 | Usage:
30 | Please see the tutorial and website for how to download the CIFAR-10
31 | mnist_data set, compile the program and train the model.
32 |
33 | http://tensorflow.org/tutorials/deep_cnn/
34 | """
35 | from __future__ import absolute_import
36 | from __future__ import division
37 | from __future__ import print_function
38 |
39 | from datetime import datetime
40 | import time
41 |
42 | import tensorflow as tf
43 |
44 | import cifar10
45 |
46 | FLAGS = tf.app.flags.FLAGS
47 |
48 | tf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train',
49 | """Directory where to write event logs """
50 | """and checkpoint.""")
51 | tf.app.flags.DEFINE_integer('max_steps', 1000000,
52 | """Number of batches to run.""")
53 | tf.app.flags.DEFINE_boolean('log_device_placement', False,
54 | """Whether to log device placement.""")
55 | tf.app.flags.DEFINE_integer('log_frequency', 10,
56 | """How often to log results to the console.""")
57 |
58 |
59 | def train():
60 | """Train CIFAR-10 for a number of steps."""
61 | with tf.Graph().as_default():
62 | global_step = tf.train.get_or_create_global_step()
63 |
64 | # Get images and labels for CIFAR-10.
65 | # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
66 | # GPU and resulting in a slow down.
67 | with tf.device('/cpu:0'):
68 | images, labels = cifar10.distorted_inputs()
69 |
70 | # Build a Graph that computes the logits predictions from the
71 | # inference model.
72 | logits = cifar10.inference(images)
73 |
74 | # Calculate loss.
75 | loss = cifar10.loss(logits, labels)
76 |
77 | # Build a Graph that trains the model with one batch of examples and
78 | # updates the model parameters.
79 | train_op = cifar10.train(loss, global_step)
80 |
81 | class _LoggerHook(tf.train.SessionRunHook):
82 | """Logs loss and runtime."""
83 |
84 | def begin(self):
85 | self._step = -1
86 | self._start_time = time.time()
87 |
88 | def before_run(self, run_context):
89 | self._step += 1
90 | return tf.train.SessionRunArgs(loss) # Asks for loss value.
91 |
92 | def after_run(self, run_context, run_values):
93 | if self._step % FLAGS.log_frequency == 0:
94 | current_time = time.time()
95 | duration = current_time - self._start_time
96 | self._start_time = current_time
97 |
98 | loss_value = run_values.results
99 | examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
100 | sec_per_batch = float(duration / FLAGS.log_frequency)
101 |
102 | format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
103 | 'sec/batch)')
104 | print (format_str % (datetime.now(), self._step, loss_value,
105 | examples_per_sec, sec_per_batch))
106 |
107 | with tf.train.MonitoredTrainingSession(
108 | checkpoint_dir=FLAGS.train_dir,
109 | hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
110 | tf.train.NanTensorHook(loss),
111 | _LoggerHook()],
112 | config=tf.ConfigProto(
113 | log_device_placement=FLAGS.log_device_placement)) as mon_sess:
114 | while not mon_sess.should_stop():
115 | mon_sess.run(train_op)
116 |
117 |
118 | def main(argv=None): # pylint: disable=unused-argument
119 | cifar10.maybe_download_and_extract()
120 | if tf.gfile.Exists(FLAGS.train_dir):
121 | tf.gfile.DeleteRecursively(FLAGS.train_dir)
122 | tf.gfile.MakeDirs(FLAGS.train_dir)
123 | train()
124 |
125 |
126 | if __name__ == '__main__':
127 | tf.app.run()
128 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full.prototxt:
--------------------------------------------------------------------------------
1 | name: "CIFAR10_full_deploy"
2 | # N.B. input image must be in CIFAR-10 format
3 | # as described at http://www.cs.toronto.edu/~kriz/cifar.html
4 | layer {
5 | name: "data"
6 | type: "Input"
7 | top: "data"
8 | input_param { shape: { dim: 1 dim: 3 dim: 32 dim: 32 } }
9 | }
10 | layer {
11 | name: "conv1"
12 | type: "Convolution"
13 | bottom: "data"
14 | top: "conv1"
15 | param {
16 | lr_mult: 1
17 | }
18 | param {
19 | lr_mult: 2
20 | }
21 | convolution_param {
22 | num_output: 32
23 | pad: 2
24 | kernel_size: 5
25 | stride: 1
26 | }
27 | }
28 | layer {
29 | name: "pool1"
30 | type: "Pooling"
31 | bottom: "conv1"
32 | top: "pool1"
33 | pooling_param {
34 | pool: MAX
35 | kernel_size: 3
36 | stride: 2
37 | }
38 | }
39 | layer {
40 | name: "relu1"
41 | type: "ReLU"
42 | bottom: "pool1"
43 | top: "pool1"
44 | }
45 | layer {
46 | name: "norm1"
47 | type: "LRN"
48 | bottom: "pool1"
49 | top: "norm1"
50 | lrn_param {
51 | local_size: 3
52 | alpha: 5e-05
53 | beta: 0.75
54 | norm_region: WITHIN_CHANNEL
55 | }
56 | }
57 | layer {
58 | name: "conv2"
59 | type: "Convolution"
60 | bottom: "norm1"
61 | top: "conv2"
62 | param {
63 | lr_mult: 1
64 | }
65 | param {
66 | lr_mult: 2
67 | }
68 | convolution_param {
69 | num_output: 32
70 | pad: 2
71 | kernel_size: 5
72 | stride: 1
73 | }
74 | }
75 | layer {
76 | name: "relu2"
77 | type: "ReLU"
78 | bottom: "conv2"
79 | top: "conv2"
80 | }
81 | layer {
82 | name: "pool2"
83 | type: "Pooling"
84 | bottom: "conv2"
85 | top: "pool2"
86 | pooling_param {
87 | pool: AVE
88 | kernel_size: 3
89 | stride: 2
90 | }
91 | }
92 | layer {
93 | name: "norm2"
94 | type: "LRN"
95 | bottom: "pool2"
96 | top: "norm2"
97 | lrn_param {
98 | local_size: 3
99 | alpha: 5e-05
100 | beta: 0.75
101 | norm_region: WITHIN_CHANNEL
102 | }
103 | }
104 | layer {
105 | name: "conv3"
106 | type: "Convolution"
107 | bottom: "norm2"
108 | top: "conv3"
109 | convolution_param {
110 | num_output: 64
111 | pad: 2
112 | kernel_size: 5
113 | stride: 1
114 | }
115 | }
116 | layer {
117 | name: "relu3"
118 | type: "ReLU"
119 | bottom: "conv3"
120 | top: "conv3"
121 | }
122 | layer {
123 | name: "pool3"
124 | type: "Pooling"
125 | bottom: "conv3"
126 | top: "pool3"
127 | pooling_param {
128 | pool: AVE
129 | kernel_size: 3
130 | stride: 2
131 | }
132 | }
133 | layer {
134 | name: "ip1"
135 | type: "InnerProduct"
136 | bottom: "pool3"
137 | top: "ip1"
138 | param {
139 | lr_mult: 1
140 | decay_mult: 250
141 | }
142 | param {
143 | lr_mult: 2
144 | decay_mult: 0
145 | }
146 | inner_product_param {
147 | num_output: 10
148 | }
149 | }
150 | layer {
151 | name: "prob"
152 | type: "Softmax"
153 | bottom: "ip1"
154 | top: "prob"
155 | }
156 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_solver.prototxt:
--------------------------------------------------------------------------------
1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
2 | # then another factor of 10 after 10 more epochs (5000 iters)
3 |
4 | # The train/test net protocol buffer definition
5 | net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt"
6 | # test_iter specifies how many forward passes the test should carry out.
7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
8 | # covering the full 10,000 testing images.
9 | test_iter: 10
10 | # Carry out testing every 1000 training iterations.
11 | test_interval: 1000
12 | # The base learning rate, momentum and the weight decay of the network.
13 | base_lr: 0.001
14 | momentum: 0.9
15 | #weight_decay: 0.004
16 | # The learning rate policy
17 | lr_policy: "step"
18 | gamma: 1
19 | stepsize: 5000
20 | # Display every 100 iterations
21 | display: 100
22 | # The maximum number of iterations
23 | max_iter: 60000
24 | # snapshot intermediate results
25 | snapshot: 10000
26 | snapshot_prefix: "examples/cifar10_full_sigmoid"
27 | # solver mode: CPU or GPU
28 | solver_mode: GPU
29 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt:
--------------------------------------------------------------------------------
1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
2 | # then another factor of 10 after 10 more epochs (5000 iters)
3 |
4 | # The train/test net protocol buffer definition
5 | net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt"
6 | # test_iter specifies how many forward passes the test should carry out.
7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
8 | # covering the full 10,000 testing images.
9 | test_iter: 10
10 | # Carry out testing every 1000 training iterations.
11 | test_interval: 1000
12 | # The base learning rate, momentum and the weight decay of the network.
13 | base_lr: 0.001
14 | momentum: 0.9
15 | #weight_decay: 0.004
16 | # The learning rate policy
17 | lr_policy: "step"
18 | gamma: 1
19 | stepsize: 5000
20 | # Display every 100 iterations
21 | display: 100
22 | # The maximum number of iterations
23 | max_iter: 60000
24 | # snapshot intermediate results
25 | snapshot: 10000
26 | snapshot_prefix: "examples/cifar10_full_sigmoid_bn"
27 | # solver mode: CPU or GPU
28 | solver_mode: GPU
29 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt:
--------------------------------------------------------------------------------
1 | name: "CIFAR10_full"
2 | layer {
3 | name: "cifar"
4 | type: "Data"
5 | top: "data"
6 | top: "label"
7 | include {
8 | phase: TRAIN
9 | }
10 | transform_param {
11 | mean_file: "examples/cifar10/mean.binaryproto"
12 | }
13 | data_param {
14 | source: "examples/cifar10/cifar10_train_lmdb"
15 | batch_size: 111
16 | backend: LMDB
17 | }
18 | }
19 | layer {
20 | name: "cifar"
21 | type: "Data"
22 | top: "data"
23 | top: "label"
24 | include {
25 | phase: TEST
26 | }
27 | transform_param {
28 | mean_file: "examples/cifar10/mean.binaryproto"
29 | }
30 | data_param {
31 | source: "examples/cifar10/cifar10_test_lmdb"
32 | batch_size: 1000
33 | backend: LMDB
34 | }
35 | }
36 | layer {
37 | name: "conv1"
38 | type: "Convolution"
39 | bottom: "data"
40 | top: "conv1"
41 | param {
42 | lr_mult: 1
43 | }
44 | param {
45 | lr_mult: 2
46 | }
47 | convolution_param {
48 | num_output: 32
49 | pad: 2
50 | kernel_size: 5
51 | stride: 1
52 | weight_filler {
53 | type: "gaussian"
54 | std: 0.0001
55 | }
56 | bias_filler {
57 | type: "constant"
58 | }
59 | }
60 | }
61 | layer {
62 | name: "pool1"
63 | type: "Pooling"
64 | bottom: "conv1"
65 | top: "pool1"
66 | pooling_param {
67 | pool: MAX
68 | kernel_size: 3
69 | stride: 2
70 | }
71 | }
72 |
73 |
74 |
75 | layer {
76 | name: "Sigmoid1"
77 | type: "Sigmoid"
78 | bottom: "pool1"
79 | top: "Sigmoid1"
80 | }
81 |
82 | layer {
83 | name: "conv2"
84 | type: "Convolution"
85 | bottom: "Sigmoid1"
86 | top: "conv2"
87 | param {
88 | lr_mult: 1
89 | }
90 | param {
91 | lr_mult: 2
92 | }
93 | convolution_param {
94 | num_output: 32
95 | pad: 2
96 | kernel_size: 5
97 | stride: 1
98 | weight_filler {
99 | type: "gaussian"
100 | std: 0.01
101 | }
102 | bias_filler {
103 | type: "constant"
104 | }
105 | }
106 | }
107 |
108 |
109 | layer {
110 | name: "Sigmoid2"
111 | type: "Sigmoid"
112 | bottom: "conv2"
113 | top: "Sigmoid2"
114 | }
115 | layer {
116 | name: "pool2"
117 | type: "Pooling"
118 | bottom: "Sigmoid2"
119 | top: "pool2"
120 | pooling_param {
121 | pool: AVE
122 | kernel_size: 3
123 | stride: 2
124 | }
125 | }
126 | layer {
127 | name: "conv3"
128 | type: "Convolution"
129 | bottom: "pool2"
130 | top: "conv3"
131 | convolution_param {
132 | num_output: 64
133 | pad: 2
134 | kernel_size: 5
135 | stride: 1
136 | weight_filler {
137 | type: "gaussian"
138 | std: 0.01
139 | }
140 | bias_filler {
141 | type: "constant"
142 | }
143 | }
144 | param {
145 | lr_mult: 1
146 | }
147 | param {
148 | lr_mult: 1
149 | }
150 |
151 | }
152 |
153 | layer {
154 | name: "Sigmoid3"
155 | type: "Sigmoid"
156 | bottom: "conv3"
157 | top: "Sigmoid3"
158 | }
159 |
160 | layer {
161 | name: "pool3"
162 | type: "Pooling"
163 | bottom: "Sigmoid3"
164 | top: "pool3"
165 | pooling_param {
166 | pool: AVE
167 | kernel_size: 3
168 | stride: 2
169 | }
170 | }
171 |
172 | layer {
173 | name: "ip1"
174 | type: "InnerProduct"
175 | bottom: "pool3"
176 | top: "ip1"
177 | param {
178 | lr_mult: 1
179 | decay_mult: 0
180 | }
181 | param {
182 | lr_mult: 2
183 | decay_mult: 0
184 | }
185 | inner_product_param {
186 | num_output: 10
187 | weight_filler {
188 | type: "gaussian"
189 | std: 0.01
190 | }
191 | bias_filler {
192 | type: "constant"
193 | }
194 | }
195 | }
196 | layer {
197 | name: "accuracy"
198 | type: "Accuracy"
199 | bottom: "ip1"
200 | bottom: "label"
201 | top: "accuracy"
202 | include {
203 | phase: TEST
204 | }
205 | }
206 | layer {
207 | name: "loss"
208 | type: "SoftmaxWithLoss"
209 | bottom: "ip1"
210 | bottom: "label"
211 | top: "loss"
212 | }
213 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt:
--------------------------------------------------------------------------------
1 | name: "CIFAR10_full"
2 | layer {
3 | name: "cifar"
4 | type: "Data"
5 | top: "data"
6 | top: "label"
7 | include {
8 | phase: TRAIN
9 | }
10 | transform_param {
11 | mean_file: "examples/cifar10/mean.binaryproto"
12 | }
13 | data_param {
14 | source: "examples/cifar10/cifar10_train_lmdb"
15 | batch_size: 100
16 | backend: LMDB
17 | }
18 | }
19 | layer {
20 | name: "cifar"
21 | type: "Data"
22 | top: "data"
23 | top: "label"
24 | include {
25 | phase: TEST
26 | }
27 | transform_param {
28 | mean_file: "examples/cifar10/mean.binaryproto"
29 | }
30 | data_param {
31 | source: "examples/cifar10/cifar10_test_lmdb"
32 | batch_size: 1000
33 | backend: LMDB
34 | }
35 | }
36 | layer {
37 | name: "conv1"
38 | type: "Convolution"
39 | bottom: "data"
40 | top: "conv1"
41 | param {
42 | lr_mult: 1
43 | }
44 | convolution_param {
45 | num_output: 32
46 | pad: 2
47 | kernel_size: 5
48 | stride: 1
49 | bias_term: false
50 | weight_filler {
51 | type: "gaussian"
52 | std: 0.0001
53 | }
54 | }
55 | }
56 | layer {
57 | name: "pool1"
58 | type: "Pooling"
59 | bottom: "conv1"
60 | top: "pool1"
61 | pooling_param {
62 | pool: MAX
63 | kernel_size: 3
64 | stride: 2
65 | }
66 | }
67 |
68 | layer {
69 | name: "bn1"
70 | type: "BatchNorm"
71 | bottom: "pool1"
72 | top: "bn1"
73 | param {
74 | lr_mult: 0
75 | }
76 | param {
77 | lr_mult: 0
78 | }
79 | param {
80 | lr_mult: 0
81 | }
82 | }
83 |
84 | layer {
85 | name: "Sigmoid1"
86 | type: "Sigmoid"
87 | bottom: "bn1"
88 | top: "Sigmoid1"
89 | }
90 |
91 | layer {
92 | name: "conv2"
93 | type: "Convolution"
94 | bottom: "Sigmoid1"
95 | top: "conv2"
96 | param {
97 | lr_mult: 1
98 | }
99 | convolution_param {
100 | num_output: 32
101 | pad: 2
102 | kernel_size: 5
103 | stride: 1
104 | bias_term: false
105 | weight_filler {
106 | type: "gaussian"
107 | std: 0.01
108 | }
109 | }
110 | }
111 |
112 | layer {
113 | name: "bn2"
114 | type: "BatchNorm"
115 | bottom: "conv2"
116 | top: "bn2"
117 | param {
118 | lr_mult: 0
119 | }
120 | param {
121 | lr_mult: 0
122 | }
123 | param {
124 | lr_mult: 0
125 | }
126 | }
127 |
128 | layer {
129 | name: "Sigmoid2"
130 | type: "Sigmoid"
131 | bottom: "bn2"
132 | top: "Sigmoid2"
133 | }
134 | layer {
135 | name: "pool2"
136 | type: "Pooling"
137 | bottom: "Sigmoid2"
138 | top: "pool2"
139 | pooling_param {
140 | pool: AVE
141 | kernel_size: 3
142 | stride: 2
143 | }
144 | }
145 | layer {
146 | name: "conv3"
147 | type: "Convolution"
148 | bottom: "pool2"
149 | top: "conv3"
150 | param {
151 | lr_mult: 1
152 | }
153 | convolution_param {
154 | num_output: 64
155 | pad: 2
156 | kernel_size: 5
157 | stride: 1
158 | bias_term: false
159 | weight_filler {
160 | type: "gaussian"
161 | std: 0.01
162 | }
163 | }
164 | }
165 |
166 | layer {
167 | name: "bn3"
168 | type: "BatchNorm"
169 | bottom: "conv3"
170 | top: "bn3"
171 | param {
172 | lr_mult: 0
173 | }
174 | param {
175 | lr_mult: 0
176 | }
177 | param {
178 | lr_mult: 0
179 | }
180 | }
181 |
182 | layer {
183 | name: "Sigmoid3"
184 | type: "Sigmoid"
185 | bottom: "bn3"
186 | top: "Sigmoid3"
187 | }
188 | layer {
189 | name: "pool3"
190 | type: "Pooling"
191 | bottom: "Sigmoid3"
192 | top: "pool3"
193 | pooling_param {
194 | pool: AVE
195 | kernel_size: 3
196 | stride: 2
197 | }
198 | }
199 |
200 | layer {
201 | name: "ip1"
202 | type: "InnerProduct"
203 | bottom: "pool3"
204 | top: "ip1"
205 | param {
206 | lr_mult: 1
207 | decay_mult: 1
208 | }
209 | param {
210 | lr_mult: 1
211 | decay_mult: 0
212 | }
213 | inner_product_param {
214 | num_output: 10
215 | weight_filler {
216 | type: "gaussian"
217 | std: 0.01
218 | }
219 | bias_filler {
220 | type: "constant"
221 | }
222 | }
223 | }
224 | layer {
225 | name: "accuracy"
226 | type: "Accuracy"
227 | bottom: "ip1"
228 | bottom: "label"
229 | top: "accuracy"
230 | include {
231 | phase: TEST
232 | }
233 | }
234 | layer {
235 | name: "loss"
236 | type: "SoftmaxWithLoss"
237 | bottom: "ip1"
238 | bottom: "label"
239 | top: "loss"
240 | }
241 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_solver.prototxt:
--------------------------------------------------------------------------------
1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
2 | # then another factor of 10 after 10 more epochs (5000 iters)
3 |
4 | # The train/test net protocol buffer definition
5 | net: "examples/cifar10/cifar10_full_train_test.prototxt"
6 | # test_iter specifies how many forward passes the test should carry out.
7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
8 | # covering the full 10,000 testing images.
9 | test_iter: 100
10 | # Carry out testing every 1000 training iterations.
11 | test_interval: 1000
12 | # The base learning rate, momentum and the weight decay of the network.
13 | base_lr: 0.001
14 | momentum: 0.9
15 | weight_decay: 0.004
16 | # The learning rate policy
17 | lr_policy: "fixed"
18 | # Display every 200 iterations
19 | display: 200
20 | # The maximum number of iterations
21 | max_iter: 60000
22 | # snapshot intermediate results
23 | snapshot: 10000
24 | snapshot_format: HDF5
25 | snapshot_prefix: "examples/cifar10/cifar10_full"
26 | # solver mode: CPU or GPU
27 | solver_mode: GPU
28 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_solver_lr1.prototxt:
--------------------------------------------------------------------------------
1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
2 | # then another factor of 10 after 10 more epochs (5000 iters)
3 |
4 | # The train/test net protocol buffer definition
5 | net: "examples/cifar10/cifar10_full_train_test.prototxt"
6 | # test_iter specifies how many forward passes the test should carry out.
7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
8 | # covering the full 10,000 testing images.
9 | test_iter: 100
10 | # Carry out testing every 1000 training iterations.
11 | test_interval: 1000
12 | # The base learning rate, momentum and the weight decay of the network.
13 | base_lr: 0.0001
14 | momentum: 0.9
15 | weight_decay: 0.004
16 | # The learning rate policy
17 | lr_policy: "fixed"
18 | # Display every 200 iterations
19 | display: 200
20 | # The maximum number of iterations
21 | max_iter: 65000
22 | # snapshot intermediate results
23 | snapshot: 5000
24 | snapshot_format: HDF5
25 | snapshot_prefix: "examples/cifar10/cifar10_full"
26 | # solver mode: CPU or GPU
27 | solver_mode: GPU
28 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_solver_lr2.prototxt:
--------------------------------------------------------------------------------
1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
2 | # then another factor of 10 after 10 more epochs (5000 iters)
3 |
4 | # The train/test net protocol buffer definition
5 | net: "examples/cifar10/cifar10_full_train_test.prototxt"
6 | # test_iter specifies how many forward passes the test should carry out.
7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
8 | # covering the full 10,000 testing images.
9 | test_iter: 100
10 | # Carry out testing every 1000 training iterations.
11 | test_interval: 1000
12 | # The base learning rate, momentum and the weight decay of the network.
13 | base_lr: 0.00001
14 | momentum: 0.9
15 | weight_decay: 0.004
16 | # The learning rate policy
17 | lr_policy: "fixed"
18 | # Display every 200 iterations
19 | display: 200
20 | # The maximum number of iterations
21 | max_iter: 70000
22 | # snapshot intermediate results
23 | snapshot: 5000
24 | snapshot_format: HDF5
25 | snapshot_prefix: "examples/cifar10/cifar10_full"
26 | # solver mode: CPU or GPU
27 | solver_mode: GPU
28 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_train_test.prototxt:
--------------------------------------------------------------------------------
1 | name: "CIFAR10_full"
2 | layer {
3 | name: "cifar"
4 | type: "Data"
5 | top: "data"
6 | top: "label"
7 | include {
8 | phase: TRAIN
9 | }
10 | transform_param {
11 | mean_file: "examples/cifar10/mean.binaryproto"
12 | }
13 | data_param {
14 | source: "examples/cifar10/cifar10_train_lmdb"
15 | batch_size: 100
16 | backend: LMDB
17 | }
18 | }
19 | layer {
20 | name: "cifar"
21 | type: "Data"
22 | top: "data"
23 | top: "label"
24 | include {
25 | phase: TEST
26 | }
27 | transform_param {
28 | mean_file: "examples/cifar10/mean.binaryproto"
29 | }
30 | data_param {
31 | source: "examples/cifar10/cifar10_test_lmdb"
32 | batch_size: 100
33 | backend: LMDB
34 | }
35 | }
36 | layer {
37 | name: "conv1"
38 | type: "Convolution"
39 | bottom: "data"
40 | top: "conv1"
41 | param {
42 | lr_mult: 1
43 | }
44 | param {
45 | lr_mult: 2
46 | }
47 | convolution_param {
48 | num_output: 32
49 | pad: 2
50 | kernel_size: 5
51 | stride: 1
52 | weight_filler {
53 | type: "gaussian"
54 | std: 0.0001
55 | }
56 | bias_filler {
57 | type: "constant"
58 | }
59 | }
60 | }
61 | layer {
62 | name: "pool1"
63 | type: "Pooling"
64 | bottom: "conv1"
65 | top: "pool1"
66 | pooling_param {
67 | pool: MAX
68 | kernel_size: 3
69 | stride: 2
70 | }
71 | }
72 | layer {
73 | name: "relu1"
74 | type: "ReLU"
75 | bottom: "pool1"
76 | top: "pool1"
77 | }
78 | layer {
79 | name: "norm1"
80 | type: "LRN"
81 | bottom: "pool1"
82 | top: "norm1"
83 | lrn_param {
84 | local_size: 3
85 | alpha: 5e-05
86 | beta: 0.75
87 | norm_region: WITHIN_CHANNEL
88 | }
89 | }
90 | layer {
91 | name: "conv2"
92 | type: "Convolution"
93 | bottom: "norm1"
94 | top: "conv2"
95 | param {
96 | lr_mult: 1
97 | }
98 | param {
99 | lr_mult: 2
100 | }
101 | convolution_param {
102 | num_output: 32
103 | pad: 2
104 | kernel_size: 5
105 | stride: 1
106 | weight_filler {
107 | type: "gaussian"
108 | std: 0.01
109 | }
110 | bias_filler {
111 | type: "constant"
112 | }
113 | }
114 | }
115 | layer {
116 | name: "relu2"
117 | type: "ReLU"
118 | bottom: "conv2"
119 | top: "conv2"
120 | }
121 | layer {
122 | name: "pool2"
123 | type: "Pooling"
124 | bottom: "conv2"
125 | top: "pool2"
126 | pooling_param {
127 | pool: AVE
128 | kernel_size: 3
129 | stride: 2
130 | }
131 | }
132 | layer {
133 | name: "norm2"
134 | type: "LRN"
135 | bottom: "pool2"
136 | top: "norm2"
137 | lrn_param {
138 | local_size: 3
139 | alpha: 5e-05
140 | beta: 0.75
141 | norm_region: WITHIN_CHANNEL
142 | }
143 | }
144 | layer {
145 | name: "conv3"
146 | type: "Convolution"
147 | bottom: "norm2"
148 | top: "conv3"
149 | convolution_param {
150 | num_output: 64
151 | pad: 2
152 | kernel_size: 5
153 | stride: 1
154 | weight_filler {
155 | type: "gaussian"
156 | std: 0.01
157 | }
158 | bias_filler {
159 | type: "constant"
160 | }
161 | }
162 | }
163 | layer {
164 | name: "relu3"
165 | type: "ReLU"
166 | bottom: "conv3"
167 | top: "conv3"
168 | }
169 | layer {
170 | name: "pool3"
171 | type: "Pooling"
172 | bottom: "conv3"
173 | top: "pool3"
174 | pooling_param {
175 | pool: AVE
176 | kernel_size: 3
177 | stride: 2
178 | }
179 | }
180 | layer {
181 | name: "ip1"
182 | type: "InnerProduct"
183 | bottom: "pool3"
184 | top: "ip1"
185 | param {
186 | lr_mult: 1
187 | decay_mult: 250
188 | }
189 | param {
190 | lr_mult: 2
191 | decay_mult: 0
192 | }
193 | inner_product_param {
194 | num_output: 10
195 | weight_filler {
196 | type: "gaussian"
197 | std: 0.01
198 | }
199 | bias_filler {
200 | type: "constant"
201 | }
202 | }
203 | }
204 | layer {
205 | name: "accuracy"
206 | type: "Accuracy"
207 | bottom: "ip1"
208 | bottom: "label"
209 | top: "accuracy"
210 | include {
211 | phase: TEST
212 | }
213 | }
214 | layer {
215 | name: "loss"
216 | type: "SoftmaxWithLoss"
217 | bottom: "ip1"
218 | bottom: "label"
219 | top: "loss"
220 | }
221 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick.prototxt:
--------------------------------------------------------------------------------
1 | name: "CIFAR10_quick_test"
2 | layer {
3 | name: "data"
4 | type: "Input"
5 | top: "data"
6 | input_param { shape: { dim: 1 dim: 3 dim: 32 dim: 32 } }
7 | }
8 | layer {
9 | name: "conv1"
10 | type: "Convolution"
11 | bottom: "data"
12 | top: "conv1"
13 | param {
14 | lr_mult: 1
15 | }
16 | param {
17 | lr_mult: 2
18 | }
19 | convolution_param {
20 | num_output: 32
21 | pad: 2
22 | kernel_size: 5
23 | stride: 1
24 | }
25 | }
26 | layer {
27 | name: "pool1"
28 | type: "Pooling"
29 | bottom: "conv1"
30 | top: "pool1"
31 | pooling_param {
32 | pool: MAX
33 | kernel_size: 3
34 | stride: 2
35 | }
36 | }
37 | layer {
38 | name: "relu1"
39 | type: "ReLU"
40 | bottom: "pool1"
41 | top: "pool1"
42 | }
43 | layer {
44 | name: "conv2"
45 | type: "Convolution"
46 | bottom: "pool1"
47 | top: "conv2"
48 | param {
49 | lr_mult: 1
50 | }
51 | param {
52 | lr_mult: 2
53 | }
54 | convolution_param {
55 | num_output: 32
56 | pad: 2
57 | kernel_size: 5
58 | stride: 1
59 | }
60 | }
61 | layer {
62 | name: "relu2"
63 | type: "ReLU"
64 | bottom: "conv2"
65 | top: "conv2"
66 | }
67 | layer {
68 | name: "pool2"
69 | type: "Pooling"
70 | bottom: "conv2"
71 | top: "pool2"
72 | pooling_param {
73 | pool: AVE
74 | kernel_size: 3
75 | stride: 2
76 | }
77 | }
78 | layer {
79 | name: "conv3"
80 | type: "Convolution"
81 | bottom: "pool2"
82 | top: "conv3"
83 | param {
84 | lr_mult: 1
85 | }
86 | param {
87 | lr_mult: 2
88 | }
89 | convolution_param {
90 | num_output: 64
91 | pad: 2
92 | kernel_size: 5
93 | stride: 1
94 | }
95 | }
96 | layer {
97 | name: "relu3"
98 | type: "ReLU"
99 | bottom: "conv3"
100 | top: "conv3"
101 | }
102 | layer {
103 | name: "pool3"
104 | type: "Pooling"
105 | bottom: "conv3"
106 | top: "pool3"
107 | pooling_param {
108 | pool: AVE
109 | kernel_size: 3
110 | stride: 2
111 | }
112 | }
113 | layer {
114 | name: "ip1"
115 | type: "InnerProduct"
116 | bottom: "pool3"
117 | top: "ip1"
118 | param {
119 | lr_mult: 1
120 | }
121 | param {
122 | lr_mult: 2
123 | }
124 | inner_product_param {
125 | num_output: 64
126 | }
127 | }
128 | layer {
129 | name: "ip2"
130 | type: "InnerProduct"
131 | bottom: "ip1"
132 | top: "ip2"
133 | param {
134 | lr_mult: 1
135 | }
136 | param {
137 | lr_mult: 2
138 | }
139 | inner_product_param {
140 | num_output: 10
141 | }
142 | }
143 | layer {
144 | name: "prob"
145 | type: "Softmax"
146 | bottom: "ip2"
147 | top: "prob"
148 | }
149 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick_solver.prototxt:
--------------------------------------------------------------------------------
1 | # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10
2 |
3 | # The train/test net protocol buffer definition
4 | net: "./cifar10_quick_train_test.prototxt"
5 | # test_iter specifies how many forward passes the test should carry out.
6 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
7 | # covering the full 10,000 testing images.
8 | test_iter: 100
9 | # Carry out testing every 500 training iterations.
10 | test_interval: 500
11 | # The base learning rate, momentum and the weight decay of the network.
12 | base_lr: 0.001
13 | momentum: 0.9
14 | weight_decay: 0.004
15 | # The learning rate policy
16 | lr_policy: "fixed"
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 4000
21 | # snapshot intermediate results
22 | snapshot: 4000
23 | snapshot_prefix: "./cifar10_quick"
24 | # solver mode: CPU or GPU
25 | solver_mode: GPU
26 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick_solver_lr1.prototxt:
--------------------------------------------------------------------------------
1 | # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10
2 |
3 | # The train/test net protocol buffer definition
4 | net: "examples/cifar10/cifar10_quick_train_test.prototxt"
5 | # test_iter specifies how many forward passes the test should carry out.
6 | # In the case of MNIST, we have test batch size 100 and 100 test iterations,
7 | # covering the full 10,000 testing images.
8 | test_iter: 100
9 | # Carry out testing every 500 training iterations.
10 | test_interval: 500
11 | # The base learning rate, momentum and the weight decay of the network.
12 | base_lr: 0.0001
13 | momentum: 0.9
14 | weight_decay: 0.004
15 | # The learning rate policy
16 | lr_policy: "fixed"
17 | # Display every 100 iterations
18 | display: 100
19 | # The maximum number of iterations
20 | max_iter: 5000
21 | # snapshot intermediate results
22 | snapshot: 5000
23 | snapshot_format: HDF5
24 | snapshot_prefix: "examples/cifar10/cifar10_quick"
25 | # solver mode: CPU or GPU
26 | solver_mode: GPU
27 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick_train_test.prototxt:
--------------------------------------------------------------------------------
1 | name: "CIFAR10_quick"
2 | layer {
3 | name: "cifar"
4 | type: "Data"
5 | top: "data"
6 | top: "label"
7 | include {
8 | phase: TRAIN
9 | }
10 | transform_param {
11 | mean_file: "examples/cifar10/mean.binaryproto"
12 | }
13 | data_param {
14 | source: "examples/cifar10/cifar10_train_lmdb"
15 | batch_size: 100
16 | backend: LMDB
17 | }
18 | }
19 | layer {
20 | name: "cifar"
21 | type: "Data"
22 | top: "data"
23 | top: "label"
24 | include {
25 | phase: TEST
26 | }
27 | transform_param {
28 | mean_file: "examples/cifar10/mean.binaryproto"
29 | }
30 | data_param {
31 | source: "examples/cifar10/cifar10_test_lmdb"
32 | batch_size: 100
33 | backend: LMDB
34 | }
35 | }
36 | layer {
37 | name: "conv1"
38 | type: "Convolution"
39 | bottom: "data"
40 | top: "conv1"
41 | param {
42 | lr_mult: 1
43 | }
44 | param {
45 | lr_mult: 2
46 | }
47 | convolution_param {
48 | num_output: 32
49 | pad: 2
50 | kernel_size: 5
51 | stride: 1
52 | weight_filler {
53 | type: "gaussian"
54 | std: 0.0001
55 | }
56 | bias_filler {
57 | type: "constant"
58 | }
59 | }
60 | }
61 | layer {
62 | name: "pool1"
63 | type: "Pooling"
64 | bottom: "conv1"
65 | top: "pool1"
66 | pooling_param {
67 | pool: MAX
68 | kernel_size: 3
69 | stride: 2
70 | }
71 | }
72 | layer {
73 | name: "relu1"
74 | type: "ReLU"
75 | bottom: "pool1"
76 | top: "pool1"
77 | }
78 | layer {
79 | name: "conv2"
80 | type: "Convolution"
81 | bottom: "pool1"
82 | top: "conv2"
83 | param {
84 | lr_mult: 1
85 | }
86 | param {
87 | lr_mult: 2
88 | }
89 | convolution_param {
90 | num_output: 32
91 | pad: 2
92 | kernel_size: 5
93 | stride: 1
94 | weight_filler {
95 | type: "gaussian"
96 | std: 0.01
97 | }
98 | bias_filler {
99 | type: "constant"
100 | }
101 | }
102 | }
103 | layer {
104 | name: "relu2"
105 | type: "ReLU"
106 | bottom: "conv2"
107 | top: "conv2"
108 | }
109 | layer {
110 | name: "pool2"
111 | type: "Pooling"
112 | bottom: "conv2"
113 | top: "pool2"
114 | pooling_param {
115 | pool: AVE
116 | kernel_size: 3
117 | stride: 2
118 | }
119 | }
120 | layer {
121 | name: "conv3"
122 | type: "Convolution"
123 | bottom: "pool2"
124 | top: "conv3"
125 | param {
126 | lr_mult: 1
127 | }
128 | param {
129 | lr_mult: 2
130 | }
131 | convolution_param {
132 | num_output: 64
133 | pad: 2
134 | kernel_size: 5
135 | stride: 1
136 | weight_filler {
137 | type: "gaussian"
138 | std: 0.01
139 | }
140 | bias_filler {
141 | type: "constant"
142 | }
143 | }
144 | }
145 | layer {
146 | name: "relu3"
147 | type: "ReLU"
148 | bottom: "conv3"
149 | top: "conv3"
150 | }
151 | layer {
152 | name: "pool3"
153 | type: "Pooling"
154 | bottom: "conv3"
155 | top: "pool3"
156 | pooling_param {
157 | pool: AVE
158 | kernel_size: 3
159 | stride: 2
160 | }
161 | }
162 | layer {
163 | name: "ip1"
164 | type: "InnerProduct"
165 | bottom: "pool3"
166 | top: "ip1"
167 | param {
168 | lr_mult: 1
169 | }
170 | param {
171 | lr_mult: 2
172 | }
173 | inner_product_param {
174 | num_output: 64
175 | weight_filler {
176 | type: "gaussian"
177 | std: 0.1
178 | }
179 | bias_filler {
180 | type: "constant"
181 | }
182 | }
183 | }
184 | layer {
185 | name: "ip2"
186 | type: "InnerProduct"
187 | bottom: "ip1"
188 | top: "ip2"
189 | param {
190 | lr_mult: 1
191 | }
192 | param {
193 | lr_mult: 2
194 | }
195 | inner_product_param {
196 | num_output: 10
197 | weight_filler {
198 | type: "gaussian"
199 | std: 0.1
200 | }
201 | bias_filler {
202 | type: "constant"
203 | }
204 | }
205 | }
206 | layer {
207 | name: "accuracy"
208 | type: "Accuracy"
209 | bottom: "ip2"
210 | bottom: "label"
211 | top: "accuracy"
212 | include {
213 | phase: TEST
214 | }
215 | }
216 | layer {
217 | name: "loss"
218 | type: "SoftmaxWithLoss"
219 | bottom: "ip2"
220 | bottom: "label"
221 | top: "loss"
222 | }
223 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/convert_cifar_data.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // This script converts the CIFAR dataset to the leveldb format used
3 | // by caffe to perform classification.
4 | // Usage:
5 | // convert_cifar_data input_folder output_db_file
6 | // The CIFAR dataset could be downloaded at
7 | // http://www.cs.toronto.edu/~kriz/cifar.html
8 |
9 | #include // NOLINT(readability/streams)
10 | #include
11 |
12 | #include "boost/scoped_ptr.hpp"
13 | #include "glog/logging.h"
14 | #include "google/protobuf/text_format.h"
15 | #include "stdint.h"
16 |
17 | #include "caffe/proto/caffe.pb.h"
18 | #include "caffe/util/db.hpp"
19 | #include "caffe/util/format.hpp"
20 |
21 | using caffe::Datum;
22 | using boost::scoped_ptr;
23 | using std::string;
24 | namespace db = caffe::db;
25 |
26 | const int kCIFARSize = 32;
27 | const int kCIFARImageNBytes = 3072;
28 | const int kCIFARBatchSize = 10000;
29 | const int kCIFARTrainBatches = 5;
30 |
31 | void read_image(std::ifstream* file, int* label, char* buffer) {
32 | char label_char;
33 | file->read(&label_char, 1);
34 | *label = label_char;
35 | file->read(buffer, kCIFARImageNBytes);
36 | return;
37 | }
38 |
39 | void convert_dataset(const string& input_folder, const string& output_folder,
40 | const string& db_type) {
41 | scoped_ptr train_db(db::GetDB(db_type));
42 | train_db->Open(output_folder + "/cifar10_train_" + db_type, db::NEW);
43 | scoped_ptr txn(train_db->NewTransaction());
44 | // Data buffer
45 | int label;
46 | char str_buffer[kCIFARImageNBytes];
47 | Datum datum;
48 | datum.set_channels(3);
49 | datum.set_height(kCIFARSize);
50 | datum.set_width(kCIFARSize);
51 |
52 | LOG(INFO) << "Writing Training data";
53 | for (int fileid = 0; fileid < kCIFARTrainBatches; ++fileid) {
54 | // Open files
55 | LOG(INFO) << "Training Batch " << fileid + 1;
56 | string batchFileName = input_folder + "/data_batch_"
57 | + caffe::format_int(fileid+1) + ".bin";
58 | std::ifstream data_file(batchFileName.c_str(),
59 | std::ios::in | std::ios::binary);
60 | CHECK(data_file) << "Unable to open train file #" << fileid + 1;
61 | for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) {
62 | read_image(&data_file, &label, str_buffer);
63 | datum.set_label(label);
64 | datum.set_data(str_buffer, kCIFARImageNBytes);
65 | string out;
66 | CHECK(datum.SerializeToString(&out));
67 | txn->Put(caffe::format_int(fileid * kCIFARBatchSize + itemid, 5), out);
68 | }
69 | }
70 | txn->Commit();
71 | train_db->Close();
72 |
73 | LOG(INFO) << "Writing Testing data";
74 | scoped_ptr test_db(db::GetDB(db_type));
75 | test_db->Open(output_folder + "/cifar10_test_" + db_type, db::NEW);
76 | txn.reset(test_db->NewTransaction());
77 | // Open files
78 | std::ifstream data_file((input_folder + "/test_batch.bin").c_str(),
79 | std::ios::in | std::ios::binary);
80 | CHECK(data_file) << "Unable to open test file.";
81 | for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) {
82 | read_image(&data_file, &label, str_buffer);
83 | datum.set_label(label);
84 | datum.set_data(str_buffer, kCIFARImageNBytes);
85 | string out;
86 | CHECK(datum.SerializeToString(&out));
87 | txn->Put(caffe::format_int(itemid, 5), out);
88 | }
89 | txn->Commit();
90 | test_db->Close();
91 | }
92 |
93 | int main(int argc, char** argv) {
94 | FLAGS_alsologtostderr = 1;
95 |
96 | if (argc != 4) {
97 | printf("This script converts the CIFAR dataset to the leveldb format used\n"
98 | "by caffe to perform classification.\n"
99 | "Usage:\n"
100 | " convert_cifar_data input_folder output_folder db_type\n"
101 | "Where the input folder should contain the binary batch files.\n"
102 | "The CIFAR dataset could be downloaded at\n"
103 | " http://www.cs.toronto.edu/~kriz/cifar.html\n"
104 | "You should gunzip them after downloading.\n");
105 | } else {
106 | google::InitGoogleLogging(argv[0]);
107 | convert_dataset(string(argv[1]), string(argv[2]), string(argv[3]));
108 | }
109 | return 0;
110 | }
111 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/create_cifar10.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | echo Creating leveldb...
3 | if exist cifar10-leveldb (
4 | rmdir cifar10-leveldb /s /q
5 | ) else (
6 | mkdir cifar10-leveldb
7 | )
8 | SET DATA="./cifar-10-batches-bin"
9 | SET GLOG_logtostderr=1
10 | "convert_cifar_data.exe" %DATA% ./cifar10-leveldb
11 | SET GLOG_logtostderr=1
12 | echo Computing image mean...
13 | "compute_image_mean.exe" ./cifar10-leveldb/cifar-train-leveldb mean.binaryproto
14 | echo Done.
15 | pause
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/readme.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: CIFAR-10 tutorial
3 | category: example
4 | description: Train and test Caffe on CIFAR-10 data.
5 | include_in_docs: true
6 | priority: 5
7 | ---
8 |
9 | Alex's CIFAR-10 tutorial, Caffe style
10 | =====================================
11 |
12 | Alex Krizhevsky's [cuda-convnet](https://code.google.com/p/cuda-convnet/) details the model definitions, parameters, and training procedure for good performance on CIFAR-10. This example reproduces his results in Caffe.
13 |
14 | We will assume that you have Caffe successfully compiled. If not, please refer to the [Installation page](/installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`.
15 |
16 | We thank @chyojn for the pull request that defined the model schemas and solver configurations.
17 |
18 | *This example is a work-in-progress. It would be nice to further explain details of the network and training choices and benchmark the full training.*
19 |
20 | Prepare the Dataset
21 | -------------------
22 |
23 | You will first need to download and convert the data format from the [CIFAR-10 website](http://www.cs.toronto.edu/~kriz/cifar.html). To do this, simply run the following commands:
24 |
25 | cd $CAFFE_ROOT
26 | ./data/cifar10/get_cifar10.sh
27 | ./examples/cifar10/create_cifar10.sh
28 |
29 | If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be the dataset, `./cifar10-leveldb`, and the data set image mean `./mean.binaryproto`.
30 |
31 | The Model
32 | ---------
33 |
34 | The CIFAR-10 model is a CNN that composes layers of convolution, pooling, rectified linear unit (ReLU) nonlinearities, and local contrast normalization with a linear classifier on top of it all. We have defined the model in the `CAFFE_ROOT/examples/cifar10` directory's `cifar10_quick_train_test.prototxt`.
35 |
36 | Training and Testing the "Quick" Model
37 | --------------------------------------
38 |
39 | Training the model is simple after you have written the network definition protobuf and solver protobuf files (refer to [MNIST Tutorial](../examples/mnist.html)). Simply run `train_quick.sh`, or the following command directly:
40 |
41 | cd $CAFFE_ROOT
42 | ./examples/cifar10/train_quick.sh
43 |
44 | `train_quick.sh` is a simple script, so have a look inside. The main tool for training is `caffe` with the `train` action, and the solver protobuf text file as its argument.
45 |
46 | When you run the code, you will see a lot of messages flying by like this:
47 |
48 | I0317 21:52:48.945710 2008298256 net.cpp:74] Creating Layer conv1
49 | I0317 21:52:48.945716 2008298256 net.cpp:84] conv1 <- data
50 | I0317 21:52:48.945725 2008298256 net.cpp:110] conv1 -> conv1
51 | I0317 21:52:49.298691 2008298256 net.cpp:125] Top shape: 100 32 32 32 (3276800)
52 | I0317 21:52:49.298719 2008298256 net.cpp:151] conv1 needs backward computation.
53 |
54 | These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start:
55 |
56 | I0317 21:52:49.309370 2008298256 net.cpp:166] Network initialization done.
57 | I0317 21:52:49.309376 2008298256 net.cpp:167] Memory required for Data 23790808
58 | I0317 21:52:49.309422 2008298256 solver.cpp:36] Solver scaffolding done.
59 | I0317 21:52:49.309447 2008298256 solver.cpp:47] Solving CIFAR10_quick_train
60 |
61 | Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 500 iterations. You will see messages like this:
62 |
63 | I0317 21:53:12.179772 2008298256 solver.cpp:208] Iteration 100, lr = 0.001
64 | I0317 21:53:12.185698 2008298256 solver.cpp:65] Iteration 100, loss = 1.73643
65 | ...
66 | I0317 21:54:41.150030 2008298256 solver.cpp:87] Iteration 500, Testing net
67 | I0317 21:54:47.129461 2008298256 solver.cpp:114] Test score #0: 0.5504
68 | I0317 21:54:47.129500 2008298256 solver.cpp:114] Test score #1: 1.27805
69 |
70 | For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, **score 0 is the accuracy**, and **score 1 is the testing loss function**.
71 |
72 | And after making yourself a cup of coffee, you are done!
73 |
74 | I0317 22:12:19.666914 2008298256 solver.cpp:87] Iteration 5000, Testing net
75 | I0317 22:12:25.580330 2008298256 solver.cpp:114] Test score #0: 0.7533
76 | I0317 22:12:25.580379 2008298256 solver.cpp:114] Test score #1: 0.739837
77 | I0317 22:12:25.587262 2008298256 solver.cpp:130] Snapshotting to cifar10_quick_iter_5000
78 | I0317 22:12:25.590215 2008298256 solver.cpp:137] Snapshotting solver state to cifar10_quick_iter_5000.solverstate
79 | I0317 22:12:25.592813 2008298256 solver.cpp:81] Optimization Done.
80 |
81 | Our model achieved ~75% test accuracy. The model parameters are stored in binary protobuf format in
82 |
83 | cifar10_quick_iter_5000
84 |
85 | which is ready-to-deploy in CPU or GPU mode! Refer to the `CAFFE_ROOT/examples/cifar10/cifar10_quick.prototxt` for the deployment model definition that can be called on new data.
86 |
87 | Why train on a GPU?
88 | -------------------
89 |
90 | CIFAR-10, while still small, has enough data to make GPU training attractive.
91 |
92 | To compare CPU vs. GPU training speed, simply change one line in all the `cifar*solver.prototxt`:
93 |
94 | # solver mode: CPU or GPU
95 | solver_mode: CPU
96 |
97 | and you will be using CPU for training.
98 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_full.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 |
4 | TOOLS=./build/tools
5 |
6 | $TOOLS/caffe train \
7 | --solver=examples/cifar10/cifar10_full_solver.prototxt $@
8 |
9 | # reduce learning rate by factor of 10
10 | $TOOLS/caffe train \
11 | --solver=examples/cifar10/cifar10_full_solver_lr1.prototxt \
12 | --snapshot=examples/cifar10/cifar10_full_iter_60000.solverstate $@
13 |
14 | # reduce learning rate by factor of 10
15 | $TOOLS/caffe train \
16 | --solver=examples/cifar10/cifar10_full_solver_lr2.prototxt \
17 | --snapshot=examples/cifar10/cifar10_full_iter_65000.solverstate $@
18 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_full_sigmoid.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 |
4 | TOOLS=./build/tools
5 |
6 | $TOOLS/caffe train \
7 | --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt $@
8 |
9 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_full_sigmoid_bn.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 |
4 | TOOLS=./build/tools
5 |
6 | $TOOLS/caffe train \
7 | --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt $@
8 |
9 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_quick.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env sh
2 | set -e
3 |
4 | caffe train --solver=./cifar10_quick_solver.prototxt $@
5 |
6 | # reduce learning rate by factor of 10 after 8 epochs
7 | caffe train \
8 | --solver=./cifar10_quick_solver_lr1.prototxt \
9 | --snapshot=./cifar10_quick_iter_4000.solverstate $@
10 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/README.md:
--------------------------------------------------------------------------------
1 | # Keras examples directory
2 |
3 | ## Vision models examples
4 |
5 | [mnist_mlp.py](mnist_mlp.py)
6 | Trains a simple deep multi-layer perceptron on the MNIST dataset.
7 |
8 | [mnist_cnn.py](mnist_cnn.py)
9 | Trains a simple convnet on the MNIST dataset.
10 |
11 | [cifar10_cnn.py](cifar10_cnn.py)
12 | Trains a simple deep CNN on the CIFAR10 small images dataset.
13 |
14 | [cifar10_resnet.py](cifar10_resnet.py)
15 | Trains a ResNet on the CIFAR10 small images dataset.
16 |
17 | [conv_lstm.py](conv_lstm.py)
18 | Demonstrates the use of a convolutional LSTM network.
19 |
20 | [image_ocr.py](image_ocr.py)
21 | Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR).
22 |
23 | [mnist_acgan.py](mnist_acgan.py)
24 | Implementation of AC-GAN (Auxiliary Classifier GAN) on the MNIST dataset
25 |
26 | [mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py)
27 | Trains a Hierarchical RNN (HRNN) to classify MNIST digits.
28 |
29 | [mnist_siamese.py](mnist_siamese.py)
30 | Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset.
31 |
32 | [mnist_swwae.py](mnist_swwae.py)
33 | Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset.
34 |
35 | [mnist_transfer_cnn.py](mnist_transfer_cnn.py)
36 | Transfer learning toy example.
37 |
38 | ----
39 |
40 | ## Text & sequences examples
41 |
42 | [addition_rnn.py](addition_rnn.py)
43 | Implementation of sequence to sequence learning for performing addition of two numbers (as strings).
44 |
45 | [babi_rnn.py](babi_rnn.py)
46 | Trains a two-branch recurrent network on the bAbI dataset for reading comprehension.
47 |
48 | [babi_memnn.py](babi_memnn.py)
49 | Trains a memory network on the bAbI dataset for reading comprehension.
50 |
51 | [imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py)
52 | Trains a Bidirectional LSTM on the IMDB sentiment classification task.
53 |
54 | [imdb_cnn.py](imdb_cnn.py)
55 | Demonstrates the use of Convolution1D for text classification.
56 |
57 | [imdb_cnn_lstm.py](imdb_cnn_lstm.py)
58 | Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task.
59 |
60 | [imdb_fasttext.py](imdb_fasttext.py)
61 | Trains a FastText model on the IMDB sentiment classification task.
62 |
63 | [imdb_lstm.py](imdb_lstm.py)
64 | Trains an LSTM model on the IMDB sentiment classification task.
65 |
66 | [lstm_stateful.py](lstm_stateful.py)
67 | Demonstrates how to use stateful RNNs to model long sequences efficiently.
68 |
69 | [pretrained_word_embeddings.py](pretrained_word_embeddings.py)
70 | Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset.
71 |
72 | [reuters_mlp.py](reuters_mlp.py)
73 | Trains and evaluate a simple MLP on the Reuters newswire topic classification task.
74 |
75 | ----
76 |
77 | ## Generative models examples
78 |
79 | [lstm_text_generation.py](lstm_text_generation.py)
80 | Generates text from Nietzsche's writings.
81 |
82 | [conv_filter_visualization.py](conv_filter_visualization.py)
83 | Visualization of the filters of VGG16, via gradient ascent in input space.
84 |
85 | [deep_dream.py](deep_dream.py)
86 | Deep Dreams in Keras.
87 |
88 | [neural_doodle.py](neural_doodle.py)
89 | Neural doodle.
90 |
91 | [neural_style_transfer.py](neural_style_transfer.py)
92 | Neural style transfer.
93 |
94 | [variational_autoencoder.py](variational_autoencoder.py)
95 | Demonstrates how to build a variational autoencoder.
96 |
97 | [variational_autoencoder_deconv.py](variational_autoencoder_deconv.py)
98 | Demonstrates how to build a variational autoencoder with Keras using deconvolution layers.
99 |
100 | ----
101 |
102 | ## Examples demonstrating specific Keras functionality
103 |
104 | [antirectifier.py](antirectifier.py)
105 | Demonstrates how to write custom layers for Keras.
106 |
107 | [mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py)
108 | Demonstrates how to use the sklearn wrapper.
109 |
110 | [mnist_irnn.py](mnist_irnn.py)
111 | Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al.
112 |
113 | [mnist_net2net.py](mnist_net2net.py)
114 | Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer".
115 |
116 | [reuters_mlp_relu_vs_selu.py](reuters_mlp_relu_vs_selu.py)
117 | Compares self-normalizing MLPs with regular MLPs.
118 |
119 | [mnist_tfrecord.py](mnist_tfrecord.py)
120 | MNIST dataset with TFRecords, the standard TensorFlow data format.
121 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/antirectifier.py:
--------------------------------------------------------------------------------
1 | '''The example demonstrates how to write custom layers for Keras.
2 |
3 | We build a custom activation layer called 'Antirectifier',
4 | which modifies the shape of the tensor that passes through it.
5 | We need to specify two methods: `compute_output_shape` and `call`.
6 |
7 | Note that the same result can also be achieved via a Lambda layer.
8 |
9 | Because our custom layer is written with primitives from the Keras
10 | backend (`K`), our code can run both on TensorFlow and Theano.
11 | '''
12 |
13 | from __future__ import print_function
14 | import keras
15 | from keras.models import Sequential
16 | from keras import layers
17 | from keras.datasets import mnist
18 | from keras import backend as K
19 |
20 |
21 | class Antirectifier(layers.Layer):
22 | '''This is the combination of a sample-wise
23 | L2 normalization with the concatenation of the
24 | positive part of the input with the negative part
25 | of the input. The result is a tensor of samples that are
26 | twice as large as the input samples.
27 |
28 | It can be used in place of a ReLU.
29 |
30 | # Input shape
31 | 2D tensor of shape (samples, n)
32 |
33 | # Output shape
34 | 2D tensor of shape (samples, 2*n)
35 |
36 | # Theoretical justification
37 | When applying ReLU, assuming that the distribution
38 | of the previous output is approximately centered around 0.,
39 | you are discarding half of your input. This is inefficient.
40 |
41 | Antirectifier allows to return all-positive outputs like ReLU,
42 | without discarding any mnist_data.
43 |
44 | Tests on MNIST show that Antirectifier allows to train networks
45 | with twice less parameters yet with comparable
46 | classification accuracy as an equivalent ReLU-based network.
47 | '''
48 |
49 | def compute_output_shape(self, input_shape):
50 | shape = list(input_shape)
51 | assert len(shape) == 2 # only valid for 2D tensors
52 | shape[-1] *= 2
53 | return tuple(shape)
54 |
55 | def call(self, inputs):
56 | inputs -= K.mean(inputs, axis=1, keepdims=True)
57 | inputs = K.l2_normalize(inputs, axis=1)
58 | pos = K.relu(inputs)
59 | neg = K.relu(-inputs)
60 | return K.concatenate([pos, neg], axis=1)
61 |
62 | # global parameters
63 | batch_size = 128
64 | num_classes = 10
65 | epochs = 40
66 |
67 | # the mnist_data, split between train and test sets
68 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
69 |
70 | x_train = x_train.reshape(60000, 784)
71 | x_test = x_test.reshape(10000, 784)
72 | x_train = x_train.astype('float32')
73 | x_test = x_test.astype('float32')
74 | x_train /= 255
75 | x_test /= 255
76 | print(x_train.shape[0], 'train samples')
77 | print(x_test.shape[0], 'test samples')
78 |
79 | # convert class vectors to binary class matrices
80 | y_train = keras.utils.to_categorical(y_train, num_classes)
81 | y_test = keras.utils.to_categorical(y_test, num_classes)
82 |
83 | # build the model
84 | model = Sequential()
85 | model.add(layers.Dense(256, input_shape=(784,)))
86 | model.add(Antirectifier())
87 | model.add(layers.Dropout(0.1))
88 | model.add(layers.Dense(256))
89 | model.add(Antirectifier())
90 | model.add(layers.Dropout(0.1))
91 | model.add(layers.Dense(num_classes))
92 | model.add(layers.Activation('softmax'))
93 |
94 | # compile the model
95 | model.compile(loss='categorical_crossentropy',
96 | optimizer='rmsprop',
97 | metrics=['accuracy'])
98 |
99 | # train the model
100 | model.fit(x_train, y_train,
101 | batch_size=batch_size,
102 | epochs=epochs,
103 | verbose=1,
104 | validation_data=(x_test, y_test))
105 |
106 | # next, compare with an equivalent network
107 | # with2x bigger Dense layers and ReLU
108 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/cifar10_cnn.py:
--------------------------------------------------------------------------------
1 | '''Train a simple deep CNN on the CIFAR10 small images dataset.
2 |
3 | It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs.
4 | (it's still underfitting at that point, though).
5 | '''
6 |
7 | from __future__ import print_function
8 | import keras
9 | from keras.datasets import cifar10
10 | from keras.preprocessing.image import ImageDataGenerator
11 | from keras.models import Sequential
12 | from keras.layers import Dense, Dropout, Activation, Flatten
13 | from keras.layers import Conv2D, MaxPooling2D
14 | import os
15 |
16 | batch_size = 32
17 | num_classes = 10
18 | epochs = 100
19 | data_augmentation = True
20 | num_predictions = 20
21 | save_dir = os.path.join(os.getcwd(), 'saved_models')
22 | model_name = 'keras_cifar10_trained_model.h5'
23 |
24 | # The mnist_data, split between train and test sets:
25 | (x_train, y_train), (x_test, y_test) = cifar10.load_data()
26 | print('x_train shape:', x_train.shape)
27 | print(x_train.shape[0], 'train samples')
28 | print(x_test.shape[0], 'test samples')
29 |
30 | # Convert class vectors to binary class matrices.
31 | y_train = keras.utils.to_categorical(y_train, num_classes)
32 | y_test = keras.utils.to_categorical(y_test, num_classes)
33 |
34 | model = Sequential()
35 | model.add(Conv2D(32, (3, 3), padding='same',
36 | input_shape=x_train.shape[1:]))
37 | model.add(Activation('relu'))
38 | model.add(Conv2D(32, (3, 3)))
39 | model.add(Activation('relu'))
40 | model.add(MaxPooling2D(pool_size=(2, 2)))
41 | model.add(Dropout(0.25))
42 |
43 | model.add(Conv2D(64, (3, 3), padding='same'))
44 | model.add(Activation('relu'))
45 | model.add(Conv2D(64, (3, 3)))
46 | model.add(Activation('relu'))
47 | model.add(MaxPooling2D(pool_size=(2, 2)))
48 | model.add(Dropout(0.25))
49 |
50 | model.add(Flatten())
51 | model.add(Dense(512))
52 | model.add(Activation('relu'))
53 | model.add(Dropout(0.5))
54 | model.add(Dense(num_classes))
55 | model.add(Activation('softmax'))
56 |
57 | # initiate RMSprop optimizer
58 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
59 |
60 | # Let's train the model using RMSprop
61 | model.compile(loss='categorical_crossentropy',
62 | optimizer=opt,
63 | metrics=['accuracy'])
64 |
65 | x_train = x_train.astype('float32')
66 | x_test = x_test.astype('float32')
67 | x_train /= 255
68 | x_test /= 255
69 |
70 | if not data_augmentation:
71 | print('Not using mnist_data augmentation.')
72 | model.fit(x_train, y_train,
73 | batch_size=batch_size,
74 | epochs=epochs,
75 | validation_data=(x_test, y_test),
76 | shuffle=True)
77 | else:
78 | print('Using real-time mnist_data augmentation.')
79 | # This will do preprocessing and realtime mnist_data augmentation:
80 | datagen = ImageDataGenerator(
81 | featurewise_center=False, # set input mean to 0 over the dataset
82 | samplewise_center=False, # set each sample mean to 0
83 | featurewise_std_normalization=False, # divide inputs by std of the dataset
84 | samplewise_std_normalization=False, # divide each input by its std
85 | zca_whitening=False, # apply ZCA whitening
86 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180)
87 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
88 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height)
89 | horizontal_flip=True, # randomly flip images
90 | vertical_flip=False) # randomly flip images
91 |
92 | # Compute quantities required for feature-wise normalization
93 | # (std, mean, and principal components if ZCA whitening is applied).
94 | datagen.fit(x_train)
95 |
96 | # Fit the model on the batches generated by datagen.flow().
97 | model.fit_generator(datagen.flow(x_train, y_train,
98 | batch_size=batch_size),
99 | epochs=epochs,
100 | validation_data=(x_test, y_test),
101 | workers=4,
102 | steps_per_epoch=100)
103 |
104 | # Save model and weights
105 | if not os.path.isdir(save_dir):
106 | os.makedirs(save_dir)
107 | model_path = os.path.join(save_dir, model_name)
108 | model.save(model_path)
109 | print('Saved trained model at %s ' % model_path)
110 |
111 | # Score trained model.
112 | scores = model.evaluate(x_test, y_test, verbose=1)
113 | print('Test loss:', scores[0])
114 | print('Test accuracy:', scores[1])
115 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/conv_filter_visualization.py:
--------------------------------------------------------------------------------
1 | '''Visualization of the filters of VGG16, via gradient ascent in input space.
2 |
3 | This script can run on CPU in a few minutes.
4 |
5 | Results example: http://i.imgur.com/4nj4KjN.jpg
6 | '''
7 | from __future__ import print_function
8 |
9 | from scipy.misc import imsave
10 | import numpy as np
11 | import time
12 | from keras.applications import vgg16
13 | from keras import backend as K
14 |
15 | # dimensions of the generated pictures for each filter.
16 | img_width = 128
17 | img_height = 128
18 |
19 | # the name of the layer we want to visualize
20 | # (see model definition at keras/applications/vgg16.py)
21 | layer_name = 'block5_conv1'
22 |
23 | # util function to convert a tensor into a valid image
24 |
25 |
26 | def deprocess_image(x):
27 | # normalize tensor: center on 0., ensure std is 0.1
28 | x -= x.mean()
29 | x /= (x.std() + K.epsilon())
30 | x *= 0.1
31 |
32 | # clip to [0, 1]
33 | x += 0.5
34 | x = np.clip(x, 0, 1)
35 |
36 | # convert to RGB array
37 | x *= 255
38 | if K.image_data_format() == 'channels_first':
39 | x = x.transpose((1, 2, 0))
40 | x = np.clip(x, 0, 255).astype('uint8')
41 | return x
42 |
43 | # build the VGG16 network with ImageNet weights
44 | model = vgg16.VGG16(weights='imagenet', include_top=False)
45 | print('Model loaded.')
46 |
47 | model.summary()
48 |
49 | # this is the placeholder for the input images
50 | input_img = model.input
51 |
52 | # get the symbolic outputs of each "key" layer (we gave them unique names).
53 | layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
54 |
55 |
56 | def normalize(x):
57 | # utility function to normalize a tensor by its L2 norm
58 | return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
59 |
60 |
61 | kept_filters = []
62 | for filter_index in range(200):
63 | # we only scan through the first 200 filters,
64 | # but there are actually 512 of them
65 | print('Processing filter %d' % filter_index)
66 | start_time = time.time()
67 |
68 | # we build a loss function that maximizes the activation
69 | # of the nth filter of the layer considered
70 | layer_output = layer_dict[layer_name].output
71 | if K.image_data_format() == 'channels_first':
72 | loss = K.mean(layer_output[:, filter_index, :, :])
73 | else:
74 | loss = K.mean(layer_output[:, :, :, filter_index])
75 |
76 | # we compute the gradient of the input picture wrt this loss
77 | grads = K.gradients(loss, input_img)[0]
78 |
79 | # normalization trick: we normalize the gradient
80 | grads = normalize(grads)
81 |
82 | # this function returns the loss and grads given the input picture
83 | iterate = K.function([input_img], [loss, grads])
84 |
85 | # step size for gradient ascent
86 | step = 1.
87 |
88 | # we start from a gray image with some random noise
89 | if K.image_data_format() == 'channels_first':
90 | input_img_data = np.random.random((1, 3, img_width, img_height))
91 | else:
92 | input_img_data = np.random.random((1, img_width, img_height, 3))
93 | input_img_data = (input_img_data - 0.5) * 20 + 128
94 |
95 | # we run gradient ascent for 20 steps
96 | for i in range(20):
97 | loss_value, grads_value = iterate([input_img_data])
98 | input_img_data += grads_value * step
99 |
100 | print('Current loss value:', loss_value)
101 | if loss_value <= 0.:
102 | # some filters get stuck to 0, we can skip them
103 | break
104 |
105 | # decode the resulting input image
106 | if loss_value > 0:
107 | img = deprocess_image(input_img_data[0])
108 | kept_filters.append((img, loss_value))
109 | end_time = time.time()
110 | print('Filter %d processed in %ds' % (filter_index, end_time - start_time))
111 |
112 | # we will stich the best 64 filters on a 8 x 8 grid.
113 | n = 8
114 |
115 | # the filters that have the highest loss are assumed to be better-looking.
116 | # we will only keep the top 64 filters.
117 | kept_filters.sort(key=lambda x: x[1], reverse=True)
118 | kept_filters = kept_filters[:n * n]
119 |
120 | # build a black picture with enough space for
121 | # our 8 x 8 filters of size 128 x 128, with a 5px margin in between
122 | margin = 5
123 | width = n * img_width + (n - 1) * margin
124 | height = n * img_height + (n - 1) * margin
125 | stitched_filters = np.zeros((width, height, 3))
126 |
127 | # fill the picture with our saved filters
128 | for i in range(n):
129 | for j in range(n):
130 | img, loss = kept_filters[i * n + j]
131 | stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,
132 | (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img
133 |
134 | # save the result to disk
135 | imsave('stitched_filters_%dx%d.png' % (n, n), stitched_filters)
136 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/conv_lstm.py:
--------------------------------------------------------------------------------
1 | """ This script demonstrates the use of a convolutional LSTM network.
2 |
3 | This network is used to predict the next frame of an artificially
4 | generated movie which contains moving squares.
5 | """
6 | from keras.models import Sequential
7 | from keras.layers.convolutional import Conv3D
8 | from keras.layers.convolutional_recurrent import ConvLSTM2D
9 | from keras.layers.normalization import BatchNormalization
10 | import numpy as np
11 | import pylab as plt
12 |
13 | # We create a layer which take as input movies of shape
14 | # (n_frames, width, height, channels) and returns a movie
15 | # of identical shape.
16 |
17 | seq = Sequential()
18 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
19 | input_shape=(None, 40, 40, 1),
20 | padding='same', return_sequences=True))
21 | seq.add(BatchNormalization())
22 |
23 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
24 | padding='same', return_sequences=True))
25 | seq.add(BatchNormalization())
26 |
27 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
28 | padding='same', return_sequences=True))
29 | seq.add(BatchNormalization())
30 |
31 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
32 | padding='same', return_sequences=True))
33 | seq.add(BatchNormalization())
34 |
35 | seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3),
36 | activation='sigmoid',
37 | padding='same', data_format='channels_last'))
38 | seq.compile(loss='binary_crossentropy', optimizer='adadelta')
39 |
40 |
41 | # Artificial mnist_data generation:
42 | # Generate movies with 3 to 7 moving squares inside.
43 | # The squares are of shape 1x1 or 2x2 pixels,
44 | # which move linearly over time.
45 | # For convenience we first create movies with bigger width and height (80x80)
46 | # and at the end we select a 40x40 window.
47 |
48 | def generate_movies(n_samples=1200, n_frames=15):
49 | row = 80
50 | col = 80
51 | noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
52 | shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
53 | dtype=np.float)
54 |
55 | for i in range(n_samples):
56 | # Add 3 to 7 moving squares
57 | n = np.random.randint(3, 8)
58 |
59 | for j in range(n):
60 | # Initial position
61 | xstart = np.random.randint(20, 60)
62 | ystart = np.random.randint(20, 60)
63 | # Direction of motion
64 | directionx = np.random.randint(0, 3) - 1
65 | directiony = np.random.randint(0, 3) - 1
66 |
67 | # Size of the square
68 | w = np.random.randint(2, 4)
69 |
70 | for t in range(n_frames):
71 | x_shift = xstart + directionx * t
72 | y_shift = ystart + directiony * t
73 | noisy_movies[i, t, x_shift - w: x_shift + w,
74 | y_shift - w: y_shift + w, 0] += 1
75 |
76 | # Make it more robust by adding noise.
77 | # The idea is that if during inference,
78 | # the value of the pixel is not exactly one,
79 | # we need to train the network to be robust and still
80 | # consider it as a pixel belonging to a square.
81 | if np.random.randint(0, 2):
82 | noise_f = (-1)**np.random.randint(0, 2)
83 | noisy_movies[i, t,
84 | x_shift - w - 1: x_shift + w + 1,
85 | y_shift - w - 1: y_shift + w + 1,
86 | 0] += noise_f * 0.1
87 |
88 | # Shift the ground truth by 1
89 | x_shift = xstart + directionx * (t + 1)
90 | y_shift = ystart + directiony * (t + 1)
91 | shifted_movies[i, t, x_shift - w: x_shift + w,
92 | y_shift - w: y_shift + w, 0] += 1
93 |
94 | # Cut to a 40x40 window
95 | noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
96 | shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
97 | noisy_movies[noisy_movies >= 1] = 1
98 | shifted_movies[shifted_movies >= 1] = 1
99 | return noisy_movies, shifted_movies
100 |
101 | # Train the network
102 | noisy_movies, shifted_movies = generate_movies(n_samples=1200)
103 | seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
104 | epochs=300, validation_split=0.05)
105 |
106 | # Testing the network on one movie
107 | # feed it with the first 7 positions and then
108 | # predict the new positions
109 | which = 1004
110 | track = noisy_movies[which][:7, ::, ::, ::]
111 |
112 | for j in range(16):
113 | new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
114 | new = new_pos[::, -1, ::, ::, ::]
115 | track = np.concatenate((track, new), axis=0)
116 |
117 |
118 | # And then compare the predictions
119 | # to the ground truth
120 | track2 = noisy_movies[which][::, ::, ::, ::]
121 | for i in range(15):
122 | fig = plt.figure(figsize=(10, 5))
123 |
124 | ax = fig.add_subplot(121)
125 |
126 | if i >= 7:
127 | ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
128 | else:
129 | ax.text(1, 3, 'Initial trajectory', fontsize=20)
130 |
131 | toplot = track[i, ::, ::, 0]
132 |
133 | plt.imshow(toplot)
134 | ax = fig.add_subplot(122)
135 | plt.text(1, 3, 'Ground truth', fontsize=20)
136 |
137 | toplot = track2[i, ::, ::, 0]
138 | if i >= 2:
139 | toplot = shifted_movies[which][i - 1, ::, ::, 0]
140 |
141 | plt.imshow(toplot)
142 | plt.savefig('%i_animate.png' % (i + 1))
143 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/imdb_bidirectional_lstm.py:
--------------------------------------------------------------------------------
1 | '''Trains a Bidirectional LSTM on the IMDB sentiment classification task.
2 |
3 | Output after 4 epochs on CPU: ~0.8146
4 | Time per epoch on CPU (Core i7): ~150s.
5 | '''
6 |
7 | from __future__ import print_function
8 | import numpy as np
9 |
10 | from keras.preprocessing import sequence
11 | from keras.models import Sequential
12 | from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
13 | from keras.datasets import imdb
14 |
15 |
16 | max_features = 20000
17 | # cut texts after this number of words
18 | # (among top max_features most common words)
19 | maxlen = 100
20 | batch_size = 32
21 |
22 | print('Loading mnist_data...')
23 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
24 | print(len(x_train), 'train sequences')
25 | print(len(x_test), 'test sequences')
26 |
27 | print('Pad sequences (samples x time)')
28 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
29 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
30 | print('x_train shape:', x_train.shape)
31 | print('x_test shape:', x_test.shape)
32 | y_train = np.array(y_train)
33 | y_test = np.array(y_test)
34 |
35 | model = Sequential()
36 | model.add(Embedding(max_features, 128, input_length=maxlen))
37 | model.add(Bidirectional(LSTM(64)))
38 | model.add(Dropout(0.5))
39 | model.add(Dense(1, activation='sigmoid'))
40 |
41 | # try using different optimizers and different optimizer configs
42 | model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
43 |
44 | print('Train...')
45 | model.fit(x_train, y_train,
46 | batch_size=batch_size,
47 | epochs=4,
48 | validation_data=[x_test, y_test])
49 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/imdb_cnn.py:
--------------------------------------------------------------------------------
1 | '''This example demonstrates the use of Convolution1D for text classification.
2 |
3 | Gets to 0.89 test accuracy after 2 epochs.
4 | 90s/epoch on Intel i5 2.4Ghz CPU.
5 | 10s/epoch on Tesla K40 GPU.
6 | '''
7 | from __future__ import print_function
8 |
9 | from keras.preprocessing import sequence
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Dropout, Activation
12 | from keras.layers import Embedding
13 | from keras.layers import Conv1D, GlobalMaxPooling1D
14 | from keras.datasets import imdb
15 |
16 | # set parameters:
17 | max_features = 5000
18 | maxlen = 400
19 | batch_size = 32
20 | embedding_dims = 50
21 | filters = 250
22 | kernel_size = 3
23 | hidden_dims = 250
24 | epochs = 2
25 |
26 | print('Loading mnist_data...')
27 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
28 | print(len(x_train), 'train sequences')
29 | print(len(x_test), 'test sequences')
30 |
31 | print('Pad sequences (samples x time)')
32 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
33 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
34 | print('x_train shape:', x_train.shape)
35 | print('x_test shape:', x_test.shape)
36 |
37 | print('Build model...')
38 | model = Sequential()
39 |
40 | # we start off with an efficient embedding layer which maps
41 | # our vocab indices into embedding_dims dimensions
42 | model.add(Embedding(max_features,
43 | embedding_dims,
44 | input_length=maxlen))
45 | model.add(Dropout(0.2))
46 |
47 | # we add a Convolution1D, which will learn filters
48 | # word group filters of size filter_length:
49 | model.add(Conv1D(filters,
50 | kernel_size,
51 | padding='valid',
52 | activation='relu',
53 | strides=1))
54 | # we use max pooling:
55 | model.add(GlobalMaxPooling1D())
56 |
57 | # We add a vanilla hidden layer:
58 | model.add(Dense(hidden_dims))
59 | model.add(Dropout(0.2))
60 | model.add(Activation('relu'))
61 |
62 | # We project onto a single unit output layer, and squash it with a sigmoid:
63 | model.add(Dense(1))
64 | model.add(Activation('sigmoid'))
65 |
66 | model.compile(loss='binary_crossentropy',
67 | optimizer='adam',
68 | metrics=['accuracy'])
69 | model.fit(x_train, y_train,
70 | batch_size=batch_size,
71 | epochs=epochs,
72 | validation_data=(x_test, y_test))
73 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/imdb_cnn_lstm.py:
--------------------------------------------------------------------------------
1 | '''Train a recurrent convolutional network on the IMDB sentiment
2 | classification task.
3 |
4 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU.
5 | '''
6 | from __future__ import print_function
7 |
8 | from keras.preprocessing import sequence
9 | from keras.models import Sequential
10 | from keras.layers import Dense, Dropout, Activation
11 | from keras.layers import Embedding
12 | from keras.layers import LSTM
13 | from keras.layers import Conv1D, MaxPooling1D
14 | from keras.datasets import imdb
15 |
16 | # Embedding
17 | max_features = 20000
18 | maxlen = 100
19 | embedding_size = 128
20 |
21 | # Convolution
22 | kernel_size = 5
23 | filters = 64
24 | pool_size = 4
25 |
26 | # LSTM
27 | lstm_output_size = 70
28 |
29 | # Training
30 | batch_size = 30
31 | epochs = 2
32 |
33 | '''
34 | Note:
35 | batch_size is highly sensitive.
36 | Only 2 epochs are needed as the dataset is very small.
37 | '''
38 |
39 | print('Loading mnist_data...')
40 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
41 | print(len(x_train), 'train sequences')
42 | print(len(x_test), 'test sequences')
43 |
44 | print('Pad sequences (samples x time)')
45 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
46 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
47 | print('x_train shape:', x_train.shape)
48 | print('x_test shape:', x_test.shape)
49 |
50 | print('Build model...')
51 |
52 | model = Sequential()
53 | model.add(Embedding(max_features, embedding_size, input_length=maxlen))
54 | model.add(Dropout(0.25))
55 | model.add(Conv1D(filters,
56 | kernel_size,
57 | padding='valid',
58 | activation='relu',
59 | strides=1))
60 | model.add(MaxPooling1D(pool_size=pool_size))
61 | model.add(LSTM(lstm_output_size))
62 | model.add(Dense(1))
63 | model.add(Activation('sigmoid'))
64 |
65 | model.compile(loss='binary_crossentropy',
66 | optimizer='adam',
67 | metrics=['accuracy'])
68 |
69 | print('Train...')
70 | model.fit(x_train, y_train,
71 | batch_size=batch_size,
72 | epochs=epochs,
73 | validation_data=(x_test, y_test))
74 | score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
75 | print('Test score:', score)
76 | print('Test accuracy:', acc)
77 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/imdb_fasttext.py:
--------------------------------------------------------------------------------
1 | '''This example demonstrates the use of fasttext for text classification
2 |
3 | Based on Joulin et al's paper:
4 |
5 | Bags of Tricks for Efficient Text Classification
6 | https://arxiv.org/abs/1607.01759
7 |
8 | Results on IMDB datasets with uni and bi-gram embeddings:
9 | Uni-gram: 0.8813 test accuracy after 5 epochs. 8s/epoch on i7 cpu.
10 | Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTx 980M gpu.
11 | '''
12 |
13 | from __future__ import print_function
14 | import numpy as np
15 |
16 | from keras.preprocessing import sequence
17 | from keras.models import Sequential
18 | from keras.layers import Dense
19 | from keras.layers import Embedding
20 | from keras.layers import GlobalAveragePooling1D
21 | from keras.datasets import imdb
22 |
23 |
24 | def create_ngram_set(input_list, ngram_value=2):
25 | """
26 | Extract a set of n-grams from a list of integers.
27 |
28 | >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=2)
29 | {(4, 9), (4, 1), (1, 4), (9, 4)}
30 |
31 | >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=3)
32 | [(1, 4, 9), (4, 9, 4), (9, 4, 1), (4, 1, 4)]
33 | """
34 | return set(zip(*[input_list[i:] for i in range(ngram_value)]))
35 |
36 |
37 | def add_ngram(sequences, token_indice, ngram_range=2):
38 | """
39 | Augment the input list of list (sequences) by appending n-grams values.
40 |
41 | Example: adding bi-gram
42 | >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]]
43 | >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017}
44 | >>> add_ngram(sequences, token_indice, ngram_range=2)
45 | [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42]]
46 |
47 | Example: adding tri-gram
48 | >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]]
49 | >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017, (7, 9, 2): 2018}
50 | >>> add_ngram(sequences, token_indice, ngram_range=3)
51 | [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42, 2018]]
52 | """
53 | new_sequences = []
54 | for input_list in sequences:
55 | new_list = input_list[:]
56 | for ngram_value in range(2, ngram_range + 1):
57 | for i in range(len(new_list) - ngram_value + 1):
58 | ngram = tuple(new_list[i:i + ngram_value])
59 | if ngram in token_indice:
60 | new_list.append(token_indice[ngram])
61 | new_sequences.append(new_list)
62 |
63 | return new_sequences
64 |
65 | # Set parameters:
66 | # ngram_range = 2 will add bi-grams features
67 | ngram_range = 1
68 | max_features = 20000
69 | maxlen = 400
70 | batch_size = 32
71 | embedding_dims = 50
72 | epochs = 5
73 |
74 | print('Loading mnist_data...')
75 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
76 | print(len(x_train), 'train sequences')
77 | print(len(x_test), 'test sequences')
78 | print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
79 | print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))
80 |
81 | if ngram_range > 1:
82 | print('Adding {}-gram features'.format(ngram_range))
83 | # Create set of unique n-gram from the training set.
84 | ngram_set = set()
85 | for input_list in x_train:
86 | for i in range(2, ngram_range + 1):
87 | set_of_ngram = create_ngram_set(input_list, ngram_value=i)
88 | ngram_set.update(set_of_ngram)
89 |
90 | # Dictionary mapping n-gram token to a unique integer.
91 | # Integer values are greater than max_features in order
92 | # to avoid collision with existing features.
93 | start_index = max_features + 1
94 | token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
95 | indice_token = {token_indice[k]: k for k in token_indice}
96 |
97 | # max_features is the highest integer that could be found in the dataset.
98 | max_features = np.max(list(indice_token.keys())) + 1
99 |
100 | # Augmenting x_train and x_test with n-grams features
101 | x_train = add_ngram(x_train, token_indice, ngram_range)
102 | x_test = add_ngram(x_test, token_indice, ngram_range)
103 | print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
104 | print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))
105 |
106 | print('Pad sequences (samples x time)')
107 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
108 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
109 | print('x_train shape:', x_train.shape)
110 | print('x_test shape:', x_test.shape)
111 |
112 | print('Build model...')
113 | model = Sequential()
114 |
115 | # we start off with an efficient embedding layer which maps
116 | # our vocab indices into embedding_dims dimensions
117 | model.add(Embedding(max_features,
118 | embedding_dims,
119 | input_length=maxlen))
120 |
121 | # we add a GlobalAveragePooling1D, which will average the embeddings
122 | # of all words in the document
123 | model.add(GlobalAveragePooling1D())
124 |
125 | # We project onto a single unit output layer, and squash it with a sigmoid:
126 | model.add(Dense(1, activation='sigmoid'))
127 |
128 | model.compile(loss='binary_crossentropy',
129 | optimizer='adam',
130 | metrics=['accuracy'])
131 |
132 | model.fit(x_train, y_train,
133 | batch_size=batch_size,
134 | epochs=epochs,
135 | validation_data=(x_test, y_test))
136 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/imdb_lstm.py:
--------------------------------------------------------------------------------
1 | '''Trains an LSTM model on the IMDB sentiment classification task.
2 |
3 | The dataset is actually too small for LSTM to be of any advantage
4 | compared to simpler, much faster methods such as TF-IDF + LogReg.
5 |
6 | # Notes
7 |
8 | - RNNs are tricky. Choice of batch size is important,
9 | choice of loss and optimizer is critical, etc.
10 | Some configurations won't converge.
11 |
12 | - LSTM loss decrease patterns during training can be quite different
13 | from what you see with CNNs/MLPs/etc.
14 | '''
15 | from __future__ import print_function
16 |
17 | from keras.preprocessing import sequence
18 | from keras.models import Sequential
19 | from keras.layers import Dense, Embedding
20 | from keras.layers import LSTM
21 | from keras.datasets import imdb
22 |
23 | max_features = 20000
24 | maxlen = 80 # cut texts after this number of words (among top max_features most common words)
25 | batch_size = 32
26 |
27 | print('Loading mnist_data...')
28 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
29 | print(len(x_train), 'train sequences')
30 | print(len(x_test), 'test sequences')
31 |
32 | print('Pad sequences (samples x time)')
33 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
34 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
35 | print('x_train shape:', x_train.shape)
36 | print('x_test shape:', x_test.shape)
37 |
38 | print('Build model...')
39 | model = Sequential()
40 | model.add(Embedding(max_features, 128))
41 | model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
42 | model.add(Dense(1, activation='sigmoid'))
43 |
44 | # try using different optimizers and different optimizer configs
45 | model.compile(loss='binary_crossentropy',
46 | optimizer='adam',
47 | metrics=['accuracy'])
48 |
49 | print('Train...')
50 | model.fit(x_train, y_train,
51 | batch_size=batch_size,
52 | epochs=15,
53 | validation_data=(x_test, y_test))
54 | score, acc = model.evaluate(x_test, y_test,
55 | batch_size=batch_size)
56 | print('Test score:', score)
57 | print('Test accuracy:', acc)
58 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/lstm_text_generation.py:
--------------------------------------------------------------------------------
1 | '''Example script to generate text from Nietzsche's writings.
2 |
3 | At least 20 epochs are required before the generated text
4 | starts sounding coherent.
5 |
6 | It is recommended to run this script on GPU, as recurrent
7 | networks are quite computationally intensive.
8 |
9 | If you try this script on new mnist_data, make sure your corpus
10 | has at least ~100k characters. ~1M is better.
11 | '''
12 |
13 | from __future__ import print_function
14 | from keras.callbacks import LambdaCallback
15 | from keras.models import Sequential
16 | from keras.layers import Dense, Activation
17 | from keras.layers import LSTM
18 | from keras.optimizers import RMSprop
19 | from keras.utils.data_utils import get_file
20 | import numpy as np
21 | import random
22 | import sys
23 | import io
24 |
25 | path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
26 | with io.open(path, encoding='utf-8') as f:
27 | text = f.read().lower()
28 | print('corpus length:', len(text))
29 |
30 | chars = sorted(list(set(text)))
31 | print('total chars:', len(chars))
32 | char_indices = dict((c, i) for i, c in enumerate(chars))
33 | indices_char = dict((i, c) for i, c in enumerate(chars))
34 |
35 | # cut the text in semi-redundant sequences of maxlen characters
36 | maxlen = 40
37 | step = 3
38 | sentences = []
39 | next_chars = []
40 | for i in range(0, len(text) - maxlen, step):
41 | sentences.append(text[i: i + maxlen])
42 | next_chars.append(text[i + maxlen])
43 | print('nb sequences:', len(sentences))
44 |
45 | print('Vectorization...')
46 | x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
47 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
48 | for i, sentence in enumerate(sentences):
49 | for t, char in enumerate(sentence):
50 | x[i, t, char_indices[char]] = 1
51 | y[i, char_indices[next_chars[i]]] = 1
52 |
53 |
54 | # build the model: a single LSTM
55 | print('Build model...')
56 | model = Sequential()
57 | model.add(LSTM(128, input_shape=(maxlen, len(chars))))
58 | model.add(Dense(len(chars)))
59 | model.add(Activation('softmax'))
60 |
61 | optimizer = RMSprop(lr=0.01)
62 | model.compile(loss='categorical_crossentropy', optimizer=optimizer)
63 |
64 |
65 | def sample(preds, temperature=1.0):
66 | # helper function to sample an index from a probability array
67 | preds = np.asarray(preds).astype('float64')
68 | preds = np.log(preds) / temperature
69 | exp_preds = np.exp(preds)
70 | preds = exp_preds / np.sum(exp_preds)
71 | probas = np.random.multinomial(1, preds, 1)
72 | return np.argmax(probas)
73 |
74 |
75 | def on_epoch_end(epoch, logs):
76 | # Function invoked at end of each epoch. Prints generated text.
77 | print()
78 | print('----- Generating text after Epoch: %d' % epoch)
79 |
80 | start_index = random.randint(0, len(text) - maxlen - 1)
81 | for diversity in [0.2, 0.5, 1.0, 1.2]:
82 | print('----- diversity:', diversity)
83 |
84 | generated = ''
85 | sentence = text[start_index: start_index + maxlen]
86 | generated += sentence
87 | print('----- Generating with seed: "' + sentence + '"')
88 | sys.stdout.write(generated)
89 |
90 | for i in range(400):
91 | x_pred = np.zeros((1, maxlen, len(chars)))
92 | for t, char in enumerate(sentence):
93 | x_pred[0, t, char_indices[char]] = 1.
94 |
95 | preds = model.predict(x_pred, verbose=0)[0]
96 | next_index = sample(preds, diversity)
97 | next_char = indices_char[next_index]
98 |
99 | generated += next_char
100 | sentence = sentence[1:] + next_char
101 |
102 | sys.stdout.write(next_char)
103 | sys.stdout.flush()
104 | print()
105 |
106 | print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
107 |
108 | model.fit(x, y,
109 | batch_size=128,
110 | epochs=60,
111 | callbacks=[print_callback])
112 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_cnn.py:
--------------------------------------------------------------------------------
1 | '''Trains a simple convnet on the MNIST dataset.
2 |
3 | Gets to 99.25% test accuracy after 12 epochs
4 | (there is still a lot of margin for parameter tuning).
5 | 16 seconds per epoch on a GRID K520 GPU.
6 | '''
7 |
8 | from __future__ import print_function
9 | import keras
10 | from keras.datasets import mnist
11 | from keras.models import Sequential
12 | from keras.layers import Dense, Dropout, Flatten
13 | from keras.layers import Conv2D, MaxPooling2D
14 | from keras import backend as K
15 |
16 | batch_size = 128
17 | num_classes = 10
18 | epochs = 12
19 |
20 | # input image dimensions
21 | img_rows, img_cols = 28, 28
22 |
23 | # the mnist_data, split between train and test sets
24 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
25 |
26 | if K.image_data_format() == 'channels_first':
27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
29 | input_shape = (1, img_rows, img_cols)
30 | else:
31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
33 | input_shape = (img_rows, img_cols, 1)
34 |
35 | x_train = x_train.astype('float32')
36 | x_test = x_test.astype('float32')
37 | x_train /= 255
38 | x_test /= 255
39 | print('x_train shape:', x_train.shape)
40 | print(x_train.shape[0], 'train samples')
41 | print(x_test.shape[0], 'test samples')
42 |
43 | # convert class vectors to binary class matrices
44 | y_train = keras.utils.to_categorical(y_train, num_classes)
45 | y_test = keras.utils.to_categorical(y_test, num_classes)
46 |
47 | model = Sequential()
48 | model.add(Conv2D(32, kernel_size=(3, 3),
49 | activation='relu',
50 | input_shape=input_shape))
51 | model.add(Conv2D(64, (3, 3), activation='relu'))
52 | model.add(MaxPooling2D(pool_size=(2, 2)))
53 | model.add(Dropout(0.25))
54 | model.add(Flatten())
55 | model.add(Dense(128, activation='relu'))
56 | model.add(Dropout(0.5))
57 | model.add(Dense(num_classes, activation='softmax'))
58 |
59 | model.compile(loss=keras.losses.categorical_crossentropy,
60 | optimizer=keras.optimizers.Adadelta(),
61 | metrics=['accuracy'])
62 |
63 | model.fit(x_train, y_train,
64 | batch_size=batch_size,
65 | epochs=epochs,
66 | verbose=1,
67 | validation_data=(x_test, y_test))
68 | score = model.evaluate(x_test, y_test, verbose=0)
69 | print('Test loss:', score[0])
70 | print('Test accuracy:', score[1])
71 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_dataset_api.py:
--------------------------------------------------------------------------------
1 | '''MNIST classification with TensorFlow's Dataset API.
2 |
3 | Introduced in TensorFlow 1.3, the Dataset API is now the
4 | standard method for loading mnist_data into TensorFlow models.
5 | A Dataset is a sequence of elements, which are themselves
6 | composed of tf.Tensor components. For more details, see:
7 | https://www.tensorflow.org/programmers_guide/datasets
8 |
9 | To use this with Keras, we make a dataset out of elements
10 | of the form (input batch, output batch). From there, we
11 | create a one-shot iterator and a graph node corresponding
12 | to its get_next() method. Its components are then provided
13 | to the network's Input layer and the Model.compile() method,
14 | respectively.
15 |
16 | Note that from TensorFlow 1.4, tf.contrib.mnist_data is deprecated
17 | and tf.mnist_data is preferred. See the release notes for details.
18 |
19 | This example is intended to closely follow the
20 | mnist_tfrecord.py example.
21 | '''
22 | import numpy as np
23 | import os
24 | import tempfile
25 |
26 | import keras
27 | from keras import backend as K
28 | from keras import layers
29 | from keras.datasets import mnist
30 |
31 | import tensorflow as tf
32 | from tensorflow.contrib.data import Dataset
33 |
34 |
35 | if K.backend() != 'tensorflow':
36 | raise RuntimeError('This example can only run with the TensorFlow backend,'
37 | ' because it requires the Datset API, which is not'
38 | ' supported on other platforms.')
39 |
40 |
41 | def cnn_layers(inputs):
42 | x = layers.Conv2D(32, (3, 3),
43 | activation='relu', padding='valid')(inputs)
44 | x = layers.MaxPooling2D(pool_size=(2, 2))(x)
45 | x = layers.Conv2D(64, (3, 3), activation='relu')(x)
46 | x = layers.MaxPooling2D(pool_size=(2, 2))(x)
47 | x = layers.Flatten()(x)
48 | x = layers.Dense(512, activation='relu')(x)
49 | x = layers.Dropout(0.5)(x)
50 | predictions = layers.Dense(num_classes,
51 | activation='softmax',
52 | name='x_train_out')(x)
53 | return predictions
54 |
55 |
56 | batch_size = 128
57 | buffer_size = 10000
58 | steps_per_epoch = int(np.ceil(60000 / float(batch_size))) # = 469
59 | epochs = 5
60 | num_classes = 10
61 |
62 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
63 | x_train = x_train.astype(np.float32) / 255
64 | x_train = np.expand_dims(x_train, -1)
65 | y_train = tf.one_hot(y_train, num_classes)
66 |
67 | # Create the dataset and its associated one-shot iterator.
68 | dataset = Dataset.from_tensor_slices((x_train, y_train))
69 | dataset = dataset.repeat()
70 | dataset = dataset.shuffle(buffer_size)
71 | dataset = dataset.batch(batch_size)
72 | iterator = dataset.make_one_shot_iterator()
73 |
74 | # Model creation using tensors from the get_next() graph node.
75 | inputs, targets = iterator.get_next()
76 | model_input = layers.Input(tensor=inputs)
77 | model_output = cnn_layers(model_input)
78 | train_model = keras.models.Model(inputs=model_input, outputs=model_output)
79 |
80 | train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5),
81 | loss='categorical_crossentropy',
82 | metrics=['accuracy'],
83 | target_tensors=[targets])
84 | train_model.summary()
85 |
86 | train_model.fit(epochs=epochs,
87 | steps_per_epoch=steps_per_epoch)
88 |
89 | # Save the model weights.
90 | weight_path = os.path.join(tempfile.gettempdir(), 'saved_wt.h5')
91 | train_model.save_weights(weight_path)
92 |
93 | # Clean up the TF session.
94 | K.clear_session()
95 |
96 | # Second session to test loading trained model without tensors.
97 | x_test = x_test.astype(np.float32)
98 | x_test = np.expand_dims(x_test, -1)
99 |
100 | x_test_inp = layers.Input(shape=x_test.shape[1:])
101 | test_out = cnn_layers(x_test_inp)
102 | test_model = keras.models.Model(inputs=x_test_inp, outputs=test_out)
103 |
104 | test_model.load_weights(weight_path)
105 | test_model.compile(optimizer='rmsprop',
106 | loss='sparse_categorical_crossentropy',
107 | metrics=['accuracy'])
108 | test_model.summary()
109 |
110 | loss, acc = test_model.evaluate(x_test, y_test, num_classes)
111 | print('\nTest accuracy: {0}'.format(acc))
112 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_denoising_autoencoder.py:
--------------------------------------------------------------------------------
1 | '''Trains a denoising autoencoder on MNIST dataset.
2 |
3 | Denoising is one of the classic applications of autoencoders.
4 | The denoising process removes unwanted noise that corrupted the
5 | true signal.
6 |
7 | Noise + Data ---> Denoising Autoencoder ---> Data
8 |
9 | Given a training dataset of corrupted mnist_data as input and
10 | true signal as output, a denoising autoencoder can recover the
11 | hidden structure to generate clean mnist_data.
12 |
13 | This example has modular design. The encoder, decoder and autoencoder
14 | are 3 models that share weights. For example, after training the
15 | autoencoder, the encoder can be used to generate latent vectors
16 | of input mnist_data for low-dim visualization like PCA or TSNE.
17 | '''
18 |
19 | from __future__ import absolute_import
20 | from __future__ import division
21 | from __future__ import print_function
22 | import keras
23 | from keras.layers import Activation, Dense, Input
24 | from keras.layers import Conv2D, Flatten
25 | from keras.layers import Reshape, Conv2DTranspose
26 | from keras.models import Model
27 | from keras import backend as K
28 | from keras.datasets import mnist
29 | import numpy as np
30 | import matplotlib.pyplot as plt
31 | from PIL import Image
32 |
33 | np.random.seed(1337)
34 |
35 | # MNIST dataset
36 | (x_train, _), (x_test, _) = mnist.load_data()
37 |
38 | image_size = x_train.shape[1]
39 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
40 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
41 | x_train = x_train.astype('float32') / 255
42 | x_test = x_test.astype('float32') / 255
43 |
44 | # Generate corrupted MNIST images by adding noise with normal dist
45 | # centered at 0.5 and std=0.5
46 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape)
47 | x_train_noisy = x_train + noise
48 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape)
49 | x_test_noisy = x_test + noise
50 |
51 | x_train_noisy = np.clip(x_train_noisy, 0., 1.)
52 | x_test_noisy = np.clip(x_test_noisy, 0., 1.)
53 |
54 | # Network parameters
55 | input_shape = (image_size, image_size, 1)
56 | batch_size = 128
57 | kernel_size = 3
58 | latent_dim = 16
59 | # Encoder/Decoder number of CNN layers and filters per layer
60 | layer_filters = [32, 64]
61 |
62 | # Build the Autoencoder Model
63 | # First build the Encoder Model
64 | inputs = Input(shape=input_shape, name='encoder_input')
65 | x = inputs
66 | # Stack of Conv2D blocks
67 | # Notes:
68 | # 1) Use Batch Normalization before ReLU on deep networks
69 | # 2) Use MaxPooling2D as alternative to strides>1
70 | # - faster but not as good as strides>1
71 | for filters in layer_filters:
72 | x = Conv2D(filters=filters,
73 | kernel_size=kernel_size,
74 | strides=2,
75 | activation='relu',
76 | padding='same')(x)
77 |
78 | # Shape info needed to build Decoder Model
79 | shape = K.int_shape(x)
80 |
81 | # Generate the latent vector
82 | x = Flatten()(x)
83 | latent = Dense(latent_dim, name='latent_vector')(x)
84 |
85 | # Instantiate Encoder Model
86 | encoder = Model(inputs, latent, name='encoder')
87 | encoder.summary()
88 |
89 | # Build the Decoder Model
90 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
91 | x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)
92 | x = Reshape((shape[1], shape[2], shape[3]))(x)
93 |
94 | # Stack of Transposed Conv2D blocks
95 | # Notes:
96 | # 1) Use Batch Normalization before ReLU on deep networks
97 | # 2) Use UpSampling2D as alternative to strides>1
98 | # - faster but not as good as strides>1
99 | for filters in layer_filters[::-1]:
100 | x = Conv2DTranspose(filters=filters,
101 | kernel_size=kernel_size,
102 | strides=2,
103 | activation='relu',
104 | padding='same')(x)
105 |
106 | x = Conv2DTranspose(filters=1,
107 | kernel_size=kernel_size,
108 | padding='same')(x)
109 |
110 | outputs = Activation('sigmoid', name='decoder_output')(x)
111 |
112 | # Instantiate Decoder Model
113 | decoder = Model(latent_inputs, outputs, name='decoder')
114 | decoder.summary()
115 |
116 | # Autoencoder = Encoder + Decoder
117 | # Instantiate Autoencoder Model
118 | autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')
119 | autoencoder.summary()
120 |
121 | autoencoder.compile(loss='mse', optimizer='adam')
122 |
123 | # Train the autoencoder
124 | autoencoder.fit(x_train_noisy,
125 | x_train,
126 | validation_data=(x_test_noisy, x_test),
127 | epochs=30,
128 | batch_size=batch_size)
129 |
130 | # Predict the Autoencoder output from corrupted test images
131 | x_decoded = autoencoder.predict(x_test_noisy)
132 |
133 | # Display the 1st 8 corrupted and denoised images
134 | rows, cols = 10, 30
135 | num = rows * cols
136 | imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]])
137 | imgs = imgs.reshape((rows * 3, cols, image_size, image_size))
138 | imgs = np.vstack(np.split(imgs, rows, axis=1))
139 | imgs = imgs.reshape((rows * 3, -1, image_size, image_size))
140 | imgs = np.vstack([np.hstack(i) for i in imgs])
141 | imgs = (imgs * 255).astype(np.uint8)
142 | plt.figure()
143 | plt.axis('off')
144 | plt.title('Original images: top rows, '
145 | 'Corrupted Input: middle rows, '
146 | 'Denoised Input: third rows')
147 | plt.imshow(imgs, interpolation='none', cmap='gray')
148 | Image.fromarray(imgs).save('corrupted_and_denoised.png')
149 | plt.show()
150 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_hierarchical_rnn.py:
--------------------------------------------------------------------------------
1 | """Example of using Hierarchical RNN (HRNN) to classify MNIST digits.
2 |
3 | HRNNs can learn across multiple levels
4 | of temporal hierarchy over a complex sequence.
5 | Usually, the first recurrent layer of an HRNN
6 | encodes a sentence (e.g. of word vectors)
7 | into a sentence vector.
8 | The second recurrent layer then encodes a sequence of
9 | such vectors (encoded by the first layer) into a document vector.
10 | This document vector is considered to preserve both
11 | the word-level and sentence-level structure of the context.
12 |
13 | # References
14 |
15 | - [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057)
16 | Encodes paragraphs and documents with HRNN.
17 | Results have shown that HRNN outperforms standard
18 | RNNs and may play some role in more sophisticated generation tasks like
19 | summarization or question answering.
20 | - [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714)
21 | Achieved state-of-the-art results on
22 | skeleton based action recognition with 3 levels
23 | of bidirectional HRNN combined with fully connected layers.
24 |
25 | In the below MNIST example the first LSTM layer first encodes every
26 | column of pixels of shape (28, 1) to a column vector of shape (128,).
27 | The second LSTM layer encodes then these 28 column vectors of shape (28, 128)
28 | to a image vector representing the whole image.
29 | A final Dense layer is added for prediction.
30 |
31 | After 5 epochs: train acc: 0.9858, val acc: 0.9864
32 | """
33 | from __future__ import print_function
34 |
35 | import keras
36 | from keras.datasets import mnist
37 | from keras.models import Model
38 | from keras.layers import Input, Dense, TimeDistributed
39 | from keras.layers import LSTM
40 |
41 | # Training parameters.
42 | batch_size = 32
43 | num_classes = 10
44 | epochs = 5
45 |
46 | # Embedding dimensions.
47 | row_hidden = 128
48 | col_hidden = 128
49 |
50 | # The mnist_data, split between train and test sets.
51 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
52 |
53 | # Reshapes mnist_data to 4D for Hierarchical RNN.
54 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
55 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
56 | x_train = x_train.astype('float32')
57 | x_test = x_test.astype('float32')
58 | x_train /= 255
59 | x_test /= 255
60 | print('x_train shape:', x_train.shape)
61 | print(x_train.shape[0], 'train samples')
62 | print(x_test.shape[0], 'test samples')
63 |
64 | # Converts class vectors to binary class matrices.
65 | y_train = keras.utils.to_categorical(y_train, num_classes)
66 | y_test = keras.utils.to_categorical(y_test, num_classes)
67 |
68 | row, col, pixel = x_train.shape[1:]
69 |
70 | # 4D input.
71 | x = Input(shape=(row, col, pixel))
72 |
73 | # Encodes a row of pixels using TimeDistributed Wrapper.
74 | encoded_rows = TimeDistributed(LSTM(row_hidden))(x)
75 |
76 | # Encodes columns of encoded rows.
77 | encoded_columns = LSTM(col_hidden)(encoded_rows)
78 |
79 | # Final predictions and model.
80 | prediction = Dense(num_classes, activation='softmax')(encoded_columns)
81 | model = Model(x, prediction)
82 | model.compile(loss='categorical_crossentropy',
83 | optimizer='rmsprop',
84 | metrics=['accuracy'])
85 |
86 | # Training.
87 | model.fit(x_train, y_train,
88 | batch_size=batch_size,
89 | epochs=epochs,
90 | verbose=1,
91 | validation_data=(x_test, y_test))
92 |
93 | # Evaluation.
94 | scores = model.evaluate(x_test, y_test, verbose=0)
95 | print('Test loss:', scores[0])
96 | print('Test accuracy:', scores[1])
97 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_irnn.py:
--------------------------------------------------------------------------------
1 | '''This is a reproduction of the IRNN experiment
2 | with pixel-by-pixel sequential MNIST in
3 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units"
4 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton
5 |
6 | arxiv:1504.00941v2 [cs.NE] 7 Apr 2015
7 | http://arxiv.org/pdf/1504.00941v2.pdf
8 |
9 | Optimizer is replaced with RMSprop which yields more stable and steady
10 | improvement.
11 |
12 | Reaches 0.93 train/test accuracy after 900 epochs
13 | (which roughly corresponds to 1687500 steps in the original paper.)
14 | '''
15 |
16 | from __future__ import print_function
17 |
18 | import keras
19 | from keras.datasets import mnist
20 | from keras.models import Sequential
21 | from keras.layers import Dense, Activation
22 | from keras.layers import SimpleRNN
23 | from keras import initializers
24 | from keras.optimizers import RMSprop
25 |
26 | batch_size = 32
27 | num_classes = 10
28 | epochs = 200
29 | hidden_units = 100
30 |
31 | learning_rate = 1e-6
32 | clip_norm = 1.0
33 |
34 | # the mnist_data, split between train and test sets
35 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
36 |
37 | x_train = x_train.reshape(x_train.shape[0], -1, 1)
38 | x_test = x_test.reshape(x_test.shape[0], -1, 1)
39 | x_train = x_train.astype('float32')
40 | x_test = x_test.astype('float32')
41 | x_train /= 255
42 | x_test /= 255
43 | print('x_train shape:', x_train.shape)
44 | print(x_train.shape[0], 'train samples')
45 | print(x_test.shape[0], 'test samples')
46 |
47 | # convert class vectors to binary class matrices
48 | y_train = keras.utils.to_categorical(y_train, num_classes)
49 | y_test = keras.utils.to_categorical(y_test, num_classes)
50 |
51 | print('Evaluate IRNN...')
52 | model = Sequential()
53 | model.add(SimpleRNN(hidden_units,
54 | kernel_initializer=initializers.RandomNormal(stddev=0.001),
55 | recurrent_initializer=initializers.Identity(gain=1.0),
56 | activation='relu',
57 | input_shape=x_train.shape[1:]))
58 | model.add(Dense(num_classes))
59 | model.add(Activation('softmax'))
60 | rmsprop = RMSprop(lr=learning_rate)
61 | model.compile(loss='categorical_crossentropy',
62 | optimizer=rmsprop,
63 | metrics=['accuracy'])
64 |
65 | model.fit(x_train, y_train,
66 | batch_size=batch_size,
67 | epochs=epochs,
68 | verbose=1,
69 | validation_data=(x_test, y_test))
70 |
71 | scores = model.evaluate(x_test, y_test, verbose=0)
72 | print('IRNN test score:', scores[0])
73 | print('IRNN test accuracy:', scores[1])
74 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_mlp.py:
--------------------------------------------------------------------------------
1 | '''Trains a simple deep NN on the MNIST dataset.
2 |
3 | Gets to 98.40% test accuracy after 20 epochs
4 | (there is *a lot* of margin for parameter tuning).
5 | 2 seconds per epoch on a K520 GPU.
6 | '''
7 |
8 | from __future__ import print_function
9 |
10 | import keras
11 | from keras.datasets import mnist
12 | from keras.models import Sequential
13 | from keras.layers import Dense, Dropout
14 | from keras.optimizers import RMSprop
15 |
16 | batch_size = 128
17 | num_classes = 10
18 | epochs = 20
19 |
20 | # the mnist_data, split between train and test sets
21 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
22 |
23 | x_train = x_train.reshape(60000, 784)
24 | x_test = x_test.reshape(10000, 784)
25 | x_train = x_train.astype('float32')
26 | x_test = x_test.astype('float32')
27 | x_train /= 255
28 | x_test /= 255
29 | print(x_train.shape[0], 'train samples')
30 | print(x_test.shape[0], 'test samples')
31 |
32 | # convert class vectors to binary class matrices
33 | y_train = keras.utils.to_categorical(y_train, num_classes)
34 | y_test = keras.utils.to_categorical(y_test, num_classes)
35 |
36 | model = Sequential()
37 | model.add(Dense(512, activation='relu', input_shape=(784,)))
38 | model.add(Dropout(0.2))
39 | model.add(Dense(512, activation='relu'))
40 | model.add(Dropout(0.2))
41 | model.add(Dense(num_classes, activation='softmax'))
42 |
43 | model.summary()
44 |
45 | model.compile(loss='categorical_crossentropy',
46 | optimizer=RMSprop(),
47 | metrics=['accuracy'])
48 |
49 | history = model.fit(x_train, y_train,
50 | batch_size=batch_size,
51 | epochs=epochs,
52 | verbose=1,
53 | validation_data=(x_test, y_test))
54 | score = model.evaluate(x_test, y_test, verbose=0)
55 | print('Test loss:', score[0])
56 | print('Test accuracy:', score[1])
57 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_siamese.py:
--------------------------------------------------------------------------------
1 | '''Trains a Siamese MLP on pairs of digits from the MNIST dataset.
2 |
3 | It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
4 | output of the shared network and by optimizing the contrastive loss (see paper
5 | for mode details).
6 |
7 | # References
8 |
9 | - Dimensionality Reduction by Learning an Invariant Mapping
10 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
11 |
12 | Gets to 97.2% test accuracy after 20 epochs.
13 | 2 seconds per epoch on a Titan X Maxwell GPU
14 | '''
15 | from __future__ import absolute_import
16 | from __future__ import print_function
17 | import numpy as np
18 |
19 | import random
20 | from keras.datasets import mnist
21 | from keras.models import Model
22 | from keras.layers import Input, Flatten, Dense, Dropout, Lambda
23 | from keras.optimizers import RMSprop
24 | from keras import backend as K
25 |
26 | num_classes = 10
27 | epochs = 20
28 |
29 |
30 | def euclidean_distance(vects):
31 | x, y = vects
32 | return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
33 |
34 |
35 | def eucl_dist_output_shape(shapes):
36 | shape1, shape2 = shapes
37 | return (shape1[0], 1)
38 |
39 |
40 | def contrastive_loss(y_true, y_pred):
41 | '''Contrastive loss from Hadsell-et-al.'06
42 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
43 | '''
44 | margin = 1
45 | return K.mean(y_true * K.square(y_pred) +
46 | (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
47 |
48 |
49 | def create_pairs(x, digit_indices):
50 | '''Positive and negative pair creation.
51 | Alternates between positive and negative pairs.
52 | '''
53 | pairs = []
54 | labels = []
55 | n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1
56 | for d in range(num_classes):
57 | for i in range(n):
58 | z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
59 | pairs += [[x[z1], x[z2]]]
60 | inc = random.randrange(1, num_classes)
61 | dn = (d + inc) % num_classes
62 | z1, z2 = digit_indices[d][i], digit_indices[dn][i]
63 | pairs += [[x[z1], x[z2]]]
64 | labels += [1, 0]
65 | return np.array(pairs), np.array(labels)
66 |
67 |
68 | def create_base_network(input_shape):
69 | '''Base network to be shared (eq. to feature extraction).
70 | '''
71 | input = Input(shape=input_shape)
72 | x = Flatten()(input)
73 | x = Dense(128, activation='relu')(x)
74 | x = Dropout(0.1)(x)
75 | x = Dense(128, activation='relu')(x)
76 | x = Dropout(0.1)(x)
77 | x = Dense(128, activation='relu')(x)
78 | return Model(input, x)
79 |
80 |
81 | def compute_accuracy(y_true, y_pred):
82 | '''Compute classification accuracy with a fixed threshold on distances.
83 | '''
84 | pred = y_pred.ravel() < 0.5
85 | return np.mean(pred == y_true)
86 |
87 |
88 | def accuracy(y_true, y_pred):
89 | '''Compute classification accuracy with a fixed threshold on distances.
90 | '''
91 | return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
92 |
93 |
94 | # the mnist_data, split between train and test sets
95 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
96 | x_train = x_train.astype('float32')
97 | x_test = x_test.astype('float32')
98 | x_train /= 255
99 | x_test /= 255
100 | input_shape = x_train.shape[1:]
101 |
102 | # create training+test positive and negative pairs
103 | digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)]
104 | tr_pairs, tr_y = create_pairs(x_train, digit_indices)
105 |
106 | digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)]
107 | te_pairs, te_y = create_pairs(x_test, digit_indices)
108 |
109 | # network definition
110 | base_network = create_base_network(input_shape)
111 |
112 | input_a = Input(shape=input_shape)
113 | input_b = Input(shape=input_shape)
114 |
115 | # because we re-use the same instance `base_network`,
116 | # the weights of the network
117 | # will be shared across the two branches
118 | processed_a = base_network(input_a)
119 | processed_b = base_network(input_b)
120 |
121 | distance = Lambda(euclidean_distance,
122 | output_shape=eucl_dist_output_shape)([processed_a, processed_b])
123 |
124 | model = Model([input_a, input_b], distance)
125 |
126 | # train
127 | rms = RMSprop()
128 | model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
129 | model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
130 | batch_size=128,
131 | epochs=epochs,
132 | validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y))
133 |
134 | # compute final accuracy on training and test sets
135 | y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
136 | tr_acc = compute_accuracy(tr_y, y_pred)
137 | y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
138 | te_acc = compute_accuracy(te_y, y_pred)
139 |
140 | print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
141 | print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
142 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_sklearn_wrapper.py:
--------------------------------------------------------------------------------
1 | '''Example of how to use sklearn wrapper
2 |
3 | Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model
4 | '''
5 |
6 | from __future__ import print_function
7 |
8 | import keras
9 | from keras.datasets import mnist
10 | from keras.models import Sequential
11 | from keras.layers import Dense, Dropout, Activation, Flatten
12 | from keras.layers import Conv2D, MaxPooling2D
13 | from keras.wrappers.scikit_learn import KerasClassifier
14 | from keras import backend as K
15 | from sklearn.grid_search import GridSearchCV
16 |
17 |
18 | num_classes = 10
19 |
20 | # input image dimensions
21 | img_rows, img_cols = 28, 28
22 |
23 | # load training mnist_data and do basic mnist_data normalization
24 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
25 |
26 | if K.image_data_format() == 'channels_first':
27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
29 | input_shape = (1, img_rows, img_cols)
30 | else:
31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
33 | input_shape = (img_rows, img_cols, 1)
34 |
35 | x_train = x_train.astype('float32')
36 | x_test = x_test.astype('float32')
37 | x_train /= 255
38 | x_test /= 255
39 |
40 | # convert class vectors to binary class matrices
41 | y_train = keras.utils.to_categorical(y_train, num_classes)
42 | y_test = keras.utils.to_categorical(y_test, num_classes)
43 |
44 |
45 | def make_model(dense_layer_sizes, filters, kernel_size, pool_size):
46 | '''Creates model comprised of 2 convolutional layers followed by dense layers
47 |
48 | dense_layer_sizes: List of layer sizes.
49 | This list has one number for each layer
50 | filters: Number of convolutional filters in each convolutional layer
51 | kernel_size: Convolutional kernel size
52 | pool_size: Size of pooling area for max pooling
53 | '''
54 |
55 | model = Sequential()
56 | model.add(Conv2D(filters, kernel_size,
57 | padding='valid',
58 | input_shape=input_shape))
59 | model.add(Activation('relu'))
60 | model.add(Conv2D(filters, kernel_size))
61 | model.add(Activation('relu'))
62 | model.add(MaxPooling2D(pool_size=pool_size))
63 | model.add(Dropout(0.25))
64 |
65 | model.add(Flatten())
66 | for layer_size in dense_layer_sizes:
67 | model.add(Dense(layer_size))
68 | model.add(Activation('relu'))
69 | model.add(Dropout(0.5))
70 | model.add(Dense(num_classes))
71 | model.add(Activation('softmax'))
72 |
73 | model.compile(loss='categorical_crossentropy',
74 | optimizer='adadelta',
75 | metrics=['accuracy'])
76 |
77 | return model
78 |
79 | dense_size_candidates = [[32], [64], [32, 32], [64, 64]]
80 | my_classifier = KerasClassifier(make_model, batch_size=32)
81 | validator = GridSearchCV(my_classifier,
82 | param_grid={'dense_layer_sizes': dense_size_candidates,
83 | # epochs is avail for tuning even when not
84 | # an argument to model building function
85 | 'epochs': [3, 6],
86 | 'filters': [8],
87 | 'kernel_size': [3],
88 | 'pool_size': [2]},
89 | scoring='neg_log_loss',
90 | n_jobs=1)
91 | validator.fit(x_train, y_train)
92 |
93 | print('The parameters of the best model are: ')
94 | print(validator.best_params_)
95 |
96 | # validator.best_estimator_ returns sklearn-wrapped version of best model.
97 | # validator.best_estimator_.model returns the (unwrapped) keras model
98 | best_model = validator.best_estimator_.model
99 | metric_names = best_model.metrics_names
100 | metric_values = best_model.evaluate(x_test, y_test)
101 | for metric, value in zip(metric_names, metric_values):
102 | print(metric, ': ', value)
103 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/mnist_transfer_cnn.py:
--------------------------------------------------------------------------------
1 | '''Transfer learning toy example.
2 |
3 | 1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
4 | 2 - Freeze convolutional layers and fine-tune dense layers
5 | for the classification of digits [5..9].
6 |
7 | Get to 99.8% test accuracy after 5 epochs
8 | for the first five digits classifier
9 | and 99.2% for the last five digits after transfer + fine-tuning.
10 | '''
11 |
12 | from __future__ import print_function
13 |
14 | import datetime
15 | import keras
16 | from keras.datasets import mnist
17 | from keras.models import Sequential
18 | from keras.layers import Dense, Dropout, Activation, Flatten
19 | from keras.layers import Conv2D, MaxPooling2D
20 | from keras import backend as K
21 |
22 | now = datetime.datetime.now
23 |
24 | batch_size = 128
25 | num_classes = 5
26 | epochs = 5
27 |
28 | # input image dimensions
29 | img_rows, img_cols = 28, 28
30 | # number of convolutional filters to use
31 | filters = 32
32 | # size of pooling area for max pooling
33 | pool_size = 2
34 | # convolution kernel size
35 | kernel_size = 3
36 |
37 | if K.image_data_format() == 'channels_first':
38 | input_shape = (1, img_rows, img_cols)
39 | else:
40 | input_shape = (img_rows, img_cols, 1)
41 |
42 |
43 | def train_model(model, train, test, num_classes):
44 | x_train = train[0].reshape((train[0].shape[0],) + input_shape)
45 | x_test = test[0].reshape((test[0].shape[0],) + input_shape)
46 | x_train = x_train.astype('float32')
47 | x_test = x_test.astype('float32')
48 | x_train /= 255
49 | x_test /= 255
50 | print('x_train shape:', x_train.shape)
51 | print(x_train.shape[0], 'train samples')
52 | print(x_test.shape[0], 'test samples')
53 |
54 | # convert class vectors to binary class matrices
55 | y_train = keras.utils.to_categorical(train[1], num_classes)
56 | y_test = keras.utils.to_categorical(test[1], num_classes)
57 |
58 | model.compile(loss='categorical_crossentropy',
59 | optimizer='adadelta',
60 | metrics=['accuracy'])
61 |
62 | t = now()
63 | model.fit(x_train, y_train,
64 | batch_size=batch_size,
65 | epochs=epochs,
66 | verbose=1,
67 | validation_data=(x_test, y_test))
68 | print('Training time: %s' % (now() - t))
69 | score = model.evaluate(x_test, y_test, verbose=0)
70 | print('Test score:', score[0])
71 | print('Test accuracy:', score[1])
72 |
73 |
74 | # the mnist_data, split between train and test sets
75 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
76 |
77 | # create two datasets one with digits below 5 and one with 5 and above
78 | x_train_lt5 = x_train[y_train < 5]
79 | y_train_lt5 = y_train[y_train < 5]
80 | x_test_lt5 = x_test[y_test < 5]
81 | y_test_lt5 = y_test[y_test < 5]
82 |
83 | x_train_gte5 = x_train[y_train >= 5]
84 | y_train_gte5 = y_train[y_train >= 5] - 5
85 | x_test_gte5 = x_test[y_test >= 5]
86 | y_test_gte5 = y_test[y_test >= 5] - 5
87 |
88 | # define two groups of layers: feature (convolutions) and classification (dense)
89 | feature_layers = [
90 | Conv2D(filters, kernel_size,
91 | padding='valid',
92 | input_shape=input_shape),
93 | Activation('relu'),
94 | Conv2D(filters, kernel_size),
95 | Activation('relu'),
96 | MaxPooling2D(pool_size=pool_size),
97 | Dropout(0.25),
98 | Flatten(),
99 | ]
100 |
101 | classification_layers = [
102 | Dense(128),
103 | Activation('relu'),
104 | Dropout(0.5),
105 | Dense(num_classes),
106 | Activation('softmax')
107 | ]
108 |
109 | # create complete model
110 | model = Sequential(feature_layers + classification_layers)
111 |
112 | # train model for 5-digit classification [0..4]
113 | train_model(model,
114 | (x_train_lt5, y_train_lt5),
115 | (x_test_lt5, y_test_lt5), num_classes)
116 |
117 | # freeze feature layers and rebuild model
118 | for l in feature_layers:
119 | l.trainable = False
120 |
121 | # transfer: train dense layers for new classification task [5..9]
122 | train_model(model,
123 | (x_train_gte5, y_train_gte5),
124 | (x_test_gte5, y_test_gte5), num_classes)
125 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/pretrained_word_embeddings.py:
--------------------------------------------------------------------------------
1 | '''This script loads pre-trained word embeddings (GloVe embeddings)
2 | into a frozen Keras Embedding layer, and uses it to
3 | train a text classification model on the 20 Newsgroup dataset
4 | (classification of newsgroup messages into 20 different categories).
5 |
6 | GloVe embedding mnist_data can be found at:
7 | http://nlp.stanford.edu/mnist_data/glove.6B.zip
8 | (source page: http://nlp.stanford.edu/projects/glove/)
9 |
10 | 20 Newsgroup mnist_data can be found at:
11 | http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/mnist_data/news20.html
12 | '''
13 |
14 | from __future__ import print_function
15 |
16 | import os
17 | import sys
18 | import numpy as np
19 | from keras.preprocessing.text import Tokenizer
20 | from keras.preprocessing.sequence import pad_sequences
21 | from keras.utils import to_categorical
22 | from keras.layers import Dense, Input, GlobalMaxPooling1D
23 | from keras.layers import Conv1D, MaxPooling1D, Embedding
24 | from keras.models import Model
25 |
26 |
27 | BASE_DIR = ''
28 | GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B')
29 | TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup')
30 | MAX_SEQUENCE_LENGTH = 1000
31 | MAX_NUM_WORDS = 20000
32 | EMBEDDING_DIM = 100
33 | VALIDATION_SPLIT = 0.2
34 |
35 | # first, build index mapping words in the embeddings set
36 | # to their embedding vector
37 |
38 | print('Indexing word vectors.')
39 |
40 | embeddings_index = {}
41 | with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f:
42 | for line in f:
43 | values = line.split()
44 | word = values[0]
45 | coefs = np.asarray(values[1:], dtype='float32')
46 | embeddings_index[word] = coefs
47 |
48 | print('Found %s word vectors.' % len(embeddings_index))
49 |
50 | # second, prepare text samples and their labels
51 | print('Processing text dataset')
52 |
53 | texts = [] # list of text samples
54 | labels_index = {} # dictionary mapping label name to numeric id
55 | labels = [] # list of label ids
56 | for name in sorted(os.listdir(TEXT_DATA_DIR)):
57 | path = os.path.join(TEXT_DATA_DIR, name)
58 | if os.path.isdir(path):
59 | label_id = len(labels_index)
60 | labels_index[name] = label_id
61 | for fname in sorted(os.listdir(path)):
62 | if fname.isdigit():
63 | fpath = os.path.join(path, fname)
64 | args = {} if sys.version_info < (3,) else {'encoding': 'latin-1'}
65 | with open(fpath, **args) as f:
66 | t = f.read()
67 | i = t.find('\n\n') # skip header
68 | if 0 < i:
69 | t = t[i:]
70 | texts.append(t)
71 | labels.append(label_id)
72 |
73 | print('Found %s texts.' % len(texts))
74 |
75 | # finally, vectorize the text samples into a 2D integer tensor
76 | tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
77 | tokenizer.fit_on_texts(texts)
78 | sequences = tokenizer.texts_to_sequences(texts)
79 |
80 | word_index = tokenizer.word_index
81 | print('Found %s unique tokens.' % len(word_index))
82 |
83 | data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
84 |
85 | labels = to_categorical(np.asarray(labels))
86 | print('Shape of mnist_data tensor:', data.shape)
87 | print('Shape of label tensor:', labels.shape)
88 |
89 | # split the mnist_data into a training set and a validation set
90 | indices = np.arange(data.shape[0])
91 | np.random.shuffle(indices)
92 | data = data[indices]
93 | labels = labels[indices]
94 | num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
95 |
96 | x_train = data[:-num_validation_samples]
97 | y_train = labels[:-num_validation_samples]
98 | x_val = data[-num_validation_samples:]
99 | y_val = labels[-num_validation_samples:]
100 |
101 | print('Preparing embedding matrix.')
102 |
103 | # prepare embedding matrix
104 | num_words = min(MAX_NUM_WORDS, len(word_index) + 1)
105 | embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
106 | for word, i in word_index.items():
107 | if i >= MAX_NUM_WORDS:
108 | continue
109 | embedding_vector = embeddings_index.get(word)
110 | if embedding_vector is not None:
111 | # words not found in embedding index will be all-zeros.
112 | embedding_matrix[i] = embedding_vector
113 |
114 | # load pre-trained word embeddings into an Embedding layer
115 | # note that we set trainable = False so as to keep the embeddings fixed
116 | embedding_layer = Embedding(num_words,
117 | EMBEDDING_DIM,
118 | weights=[embedding_matrix],
119 | input_length=MAX_SEQUENCE_LENGTH,
120 | trainable=False)
121 |
122 | print('Training model.')
123 |
124 | # train a 1D convnet with global maxpooling
125 | sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
126 | embedded_sequences = embedding_layer(sequence_input)
127 | x = Conv1D(128, 5, activation='relu')(embedded_sequences)
128 | x = MaxPooling1D(5)(x)
129 | x = Conv1D(128, 5, activation='relu')(x)
130 | x = MaxPooling1D(5)(x)
131 | x = Conv1D(128, 5, activation='relu')(x)
132 | x = GlobalMaxPooling1D()(x)
133 | x = Dense(128, activation='relu')(x)
134 | preds = Dense(len(labels_index), activation='softmax')(x)
135 |
136 | model = Model(sequence_input, preds)
137 | model.compile(loss='categorical_crossentropy',
138 | optimizer='rmsprop',
139 | metrics=['acc'])
140 |
141 | model.fit(x_train, y_train,
142 | batch_size=128,
143 | epochs=10,
144 | validation_data=(x_val, y_val))
145 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/reuters_mlp.py:
--------------------------------------------------------------------------------
1 | '''Trains and evaluate a simple MLP
2 | on the Reuters newswire topic classification task.
3 | '''
4 | from __future__ import print_function
5 |
6 | import numpy as np
7 | import keras
8 | from keras.datasets import reuters
9 | from keras.models import Sequential
10 | from keras.layers import Dense, Dropout, Activation
11 | from keras.preprocessing.text import Tokenizer
12 |
13 | max_words = 1000
14 | batch_size = 32
15 | epochs = 5
16 |
17 | print('Loading mnist_data...')
18 | (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
19 | test_split=0.2)
20 | print(len(x_train), 'train sequences')
21 | print(len(x_test), 'test sequences')
22 |
23 | num_classes = np.max(y_train) + 1
24 | print(num_classes, 'classes')
25 |
26 | print('Vectorizing sequence mnist_data...')
27 | tokenizer = Tokenizer(num_words=max_words)
28 | x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
29 | x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
30 | print('x_train shape:', x_train.shape)
31 | print('x_test shape:', x_test.shape)
32 |
33 | print('Convert class vector to binary class matrix '
34 | '(for use with categorical_crossentropy)')
35 | y_train = keras.utils.to_categorical(y_train, num_classes)
36 | y_test = keras.utils.to_categorical(y_test, num_classes)
37 | print('y_train shape:', y_train.shape)
38 | print('y_test shape:', y_test.shape)
39 |
40 | print('Building model...')
41 | model = Sequential()
42 | model.add(Dense(512, input_shape=(max_words,)))
43 | model.add(Activation('relu'))
44 | model.add(Dropout(0.5))
45 | model.add(Dense(num_classes))
46 | model.add(Activation('softmax'))
47 |
48 | model.compile(loss='categorical_crossentropy',
49 | optimizer='adam',
50 | metrics=['accuracy'])
51 |
52 | history = model.fit(x_train, y_train,
53 | batch_size=batch_size,
54 | epochs=epochs,
55 | verbose=1,
56 | validation_split=0.1)
57 | score = model.evaluate(x_test, y_test,
58 | batch_size=batch_size, verbose=1)
59 | print('Test score:', score[0])
60 | print('Test accuracy:', score[1])
61 |
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/saved_models/keras_cifar10_trained_model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mtianyan/NeuralNetworksGetStarted/bee9ba10531b00ddef5ec45f419707139bd4814e/7-caffe_and_keras/7-3 keras-master/examples/saved_models/keras_cifar10_trained_model.h5
--------------------------------------------------------------------------------
/7-caffe_and_keras/7-3 keras-master/examples/variational_autoencoder.py:
--------------------------------------------------------------------------------
1 | '''This script demonstrates how to build a variational autoencoder with Keras.
2 |
3 | #Reference
4 |
5 | - Auto-Encoding Variational Bayes
6 | https://arxiv.org/abs/1312.6114
7 | '''
8 | from __future__ import print_function
9 |
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | from scipy.stats import norm
13 |
14 | from keras.layers import Input, Dense, Lambda
15 | from keras.models import Model
16 | from keras import backend as K
17 | from keras import metrics
18 | from keras.datasets import mnist
19 |
20 | batch_size = 100
21 | original_dim = 784
22 | latent_dim = 2
23 | intermediate_dim = 256
24 | epochs = 50
25 | epsilon_std = 1.0
26 |
27 |
28 | x = Input(shape=(original_dim,))
29 | h = Dense(intermediate_dim, activation='relu')(x)
30 | z_mean = Dense(latent_dim)(h)
31 | z_log_var = Dense(latent_dim)(h)
32 |
33 |
34 | def sampling(args):
35 | z_mean, z_log_var = args
36 | epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0.,
37 | stddev=epsilon_std)
38 | return z_mean + K.exp(z_log_var / 2) * epsilon
39 |
40 | # note that "output_shape" isn't necessary with the TensorFlow backend
41 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
42 |
43 | # we instantiate these layers separately so as to reuse them later
44 | decoder_h = Dense(intermediate_dim, activation='relu')
45 | decoder_mean = Dense(original_dim, activation='sigmoid')
46 | h_decoded = decoder_h(z)
47 | x_decoded_mean = decoder_mean(h_decoded)
48 |
49 | # instantiate VAE model
50 | vae = Model(x, x_decoded_mean)
51 |
52 | # Compute VAE loss
53 | xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)
54 | kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
55 | vae_loss = K.mean(xent_loss + kl_loss)
56 |
57 | vae.add_loss(vae_loss)
58 | vae.compile(optimizer='rmsprop')
59 | vae.summary()
60 |
61 |
62 | # train the VAE on MNIST digits
63 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
64 |
65 | x_train = x_train.astype('float32') / 255.
66 | x_test = x_test.astype('float32') / 255.
67 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
68 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
69 |
70 | vae.fit(x_train,
71 | shuffle=True,
72 | epochs=epochs,
73 | batch_size=batch_size,
74 | validation_data=(x_test, None))
75 |
76 | # build a model to project inputs on the latent space
77 | encoder = Model(x, z_mean)
78 |
79 | # display a 2D plot of the digit classes in the latent space
80 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
81 | plt.figure(figsize=(6, 6))
82 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
83 | plt.colorbar()
84 | plt.show()
85 |
86 | # build a digit generator that can sample from the learned distribution
87 | decoder_input = Input(shape=(latent_dim,))
88 | _h_decoded = decoder_h(decoder_input)
89 | _x_decoded_mean = decoder_mean(_h_decoded)
90 | generator = Model(decoder_input, _x_decoded_mean)
91 |
92 | # display a 2D manifold of the digits
93 | n = 15 # figure with 15x15 digits
94 | digit_size = 28
95 | figure = np.zeros((digit_size * n, digit_size * n))
96 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian
97 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian
98 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n))
99 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n))
100 |
101 | for i, yi in enumerate(grid_x):
102 | for j, xi in enumerate(grid_y):
103 | z_sample = np.array([[xi, yi]])
104 | x_decoded = generator.predict(z_sample)
105 | digit = x_decoded[0].reshape(digit_size, digit_size)
106 | figure[i * digit_size: (i + 1) * digit_size,
107 | j * digit_size: (j + 1) * digit_size] = digit
108 |
109 | plt.figure(figsize=(10, 10))
110 | plt.imshow(figure, cmap='Greys_r')
111 | plt.show()
112 |
--------------------------------------------------------------------------------
/mnist_data/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mtianyan/NeuralNetworksGetStarted/bee9ba10531b00ddef5ec45f419707139bd4814e/mnist_data/mnist.pkl.gz
--------------------------------------------------------------------------------
/utils/mnist_loader.py:
--------------------------------------------------------------------------------
1 | """
2 | mnist_loader
3 | ~~~~~~~~~~~~
4 |
5 | A library to load the MNIST image mnist_data. For details of the mnist_data
6 | structures that are returned, see the doc strings for ``load_data``
7 | and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the
8 | function usually called by our neural network code.
9 | """
10 |
11 | #### Libraries
12 | # Standard library
13 | import pickle
14 | import gzip
15 |
16 | # Third-party libraries
17 | import numpy as np
18 |
19 |
20 | def load_data():
21 | """Return the MNIST mnist_data as a tuple containing the training mnist_data,
22 | the validation mnist_data, and the test mnist_data.
23 |
24 | The ``training_data`` is returned as a tuple with two entries.
25 | The first entry contains the actual training images. This is a
26 | numpy ndarray with 50,000 entries. Each entry is, in turn, a
27 | numpy ndarray with 784 values, representing the 28 * 28 = 784
28 | pixels in a single MNIST image.
29 |
30 | The second entry in the ``training_data`` tuple is a numpy ndarray
31 | containing 50,000 entries. Those entries are just the digit
32 | values (0...9) for the corresponding images contained in the first
33 | entry of the tuple.
34 |
35 | The ``validation_data`` and ``test_data`` are similar, except
36 | each contains only 10,000 images.
37 |
38 | This is a nice mnist_data format, but for use in neural networks it's
39 | helpful to modify the format of the ``training_data`` a little.
40 | That's done in the wrapper function ``load_data_wrapper()``, see
41 | below.
42 | """
43 | f = gzip.open('./mnist_data/mnist.pkl.gz', 'rb')
44 | training_data, validation_data, test_data = pickle.load(f, encoding='bytes')
45 | f.close()
46 | return (training_data, validation_data, test_data)
47 |
48 |
49 | def load_data_wrapper():
50 | """Return a tuple containing ``(training_data, validation_data,
51 | test_data)``. Based on ``load_data``, but the format is more
52 | convenient for use in our implementation of neural networks.
53 |
54 | In particular, ``training_data`` is a list containing 50,000
55 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
56 | containing the input image. ``y`` is a 10-dimensional
57 | numpy.ndarray representing the unit vector corresponding to the
58 | correct digit for ``x``.
59 |
60 | ``validation_data`` and ``test_data`` are lists containing 10,000
61 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
62 | numpy.ndarry containing the input image, and ``y`` is the
63 | corresponding classification, i.e., the digit values (integers)
64 | corresponding to ``x``.
65 |
66 | Obviously, this means we're using slightly different formats for
67 | the training mnist_data and the validation / test mnist_data. These formats
68 | turn out to be the most convenient for use in our neural network
69 | code."""
70 | tr_d, va_d, te_d = load_data()
71 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
72 | training_results = [vectorized_result(y) for y in tr_d[1]]
73 | training_data = list(zip(training_inputs, training_results))
74 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
75 | validation_data = list(zip(validation_inputs, va_d[1]))
76 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
77 | test_data = list(zip(test_inputs, te_d[1]))
78 | return (training_data, validation_data, test_data)
79 |
80 |
81 | def vectorized_result(j):
82 | """Return a 10-dimensional unit vector with a 1.0 in the jth
83 | position and zeroes elsewhere. This is used to convert a digit
84 | (0...9) into a corresponding desired output from the neural
85 | network."""
86 | e = np.zeros((10, 1))
87 | e[j] = 1.0
88 | return e
89 |
90 |
91 | def load_data_wrapper2():
92 | """Return a tuple containing ``(training_data, validation_data,
93 | test_data)``. Based on ``load_data``, but the format is more
94 | convenient for use in our implementation of neural networks.
95 |
96 | In particular, ``training_data`` is a list containing 50,000
97 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
98 | containing the input image. ``y`` is a 10-dimensional
99 | numpy.ndarray representing the unit vector corresponding to the
100 | correct digit for ``x``.
101 |
102 | ``validation_data`` and ``test_data`` are lists containing 10,000
103 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
104 | numpy.ndarry containing the input image, and ``y`` is the
105 | corresponding classification, i.e., the digit values (integers)
106 | corresponding to ``x``.
107 |
108 | Obviously, this means we're using slightly different formats for
109 | the training mnist_data and the validation / test mnist_data. These formats
110 | turn out to be the most convenient for use in our neural network
111 | code."""
112 | tr_d, va_d, te_d = load_data()
113 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
114 | training_results = [vectorized_result(y) for y in tr_d[1]]
115 | training_data = zip(training_inputs, training_results)
116 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
117 | validation_data = zip(validation_inputs, va_d[1])
118 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
119 | test_data = zip(test_inputs, te_d[1])
120 | return (training_inputs, training_results, validation_data, test_data)
121 |
--------------------------------------------------------------------------------