├── .idea ├── NeuralNetworksGetStarted.iml ├── deployment.xml ├── dictionaries │ └── mtianyan.xml ├── inspectionProfiles │ └── Project_Default.xml ├── misc.xml ├── modules.xml ├── other.xml └── vcs.xml ├── 1-numpy_basic └── 1-numpy_basic.py ├── 2-feedforward_neural_network ├── 2-11 stochasticgradient_descent.py ├── 2-12 neural_network_mnist.py ├── 2-7 forward_propagation.py ├── 2-9 back_propagation.py └── 2.1-simple_network.py ├── 3-improve_neural_network_efficiency ├── 3-10 cross_entropy.py ├── 3-11 save_load_model.py ├── 3-12 neural_network_mnist_v2.py ├── 3-5 initialization_parameters.py └── 3-7 L2_regularization.py ├── 4-convolutional_neural_network ├── 4-10 SoftmaxLayer.py ├── 4-11 ConvolutionalNeuralNetwork_mnist(gpu).py ├── 4-2 neural_network_mnist_v3(gpu).py └── 4-6 ConvPoolLayer.py ├── 5-8 tensorflow_mnist_code(official) └── examples │ └── tutorials │ └── mnist │ ├── BUILD │ ├── __init__.py │ ├── fully_connected_feed.py │ ├── input_data.py │ ├── mnist.py │ ├── mnist_deep.py │ ├── mnist_softmax.py │ ├── mnist_softmax_xla.py │ └── mnist_with_summaries.py ├── 5-tensorflow_and_tensorboard ├── 5-1 TensorFlow_Get_Started.py ├── 5-4 tf_LinearRegression.py ├── 5-6 TensorBoard_Get_Started.py └── 5-7 save_load_model_v2(tf).py ├── 6-cnn_image_classification(CIFAR-10) └── 6-2 tensorflow_model_image_cifar10(single gpu& multi gpu) │ └── tutorials │ └── image │ └── cifar10 │ ├── BUILD │ ├── README.md │ ├── __init__.py │ ├── cifar10.py │ ├── cifar10_eval.py │ ├── cifar10_input.py │ ├── cifar10_input_test.py │ ├── cifar10_multi_gpu_train.py │ └── cifar10_train.py ├── 7-caffe_and_keras ├── 7-1 caffe-master │ └── examples │ │ └── cifar10 │ │ ├── cifar10_full.prototxt │ │ ├── cifar10_full_sigmoid_solver.prototxt │ │ ├── cifar10_full_sigmoid_solver_bn.prototxt │ │ ├── cifar10_full_sigmoid_train_test.prototxt │ │ ├── cifar10_full_sigmoid_train_test_bn.prototxt │ │ ├── cifar10_full_solver.prototxt │ │ ├── cifar10_full_solver_lr1.prototxt │ │ ├── cifar10_full_solver_lr2.prototxt │ │ ├── cifar10_full_train_test.prototxt │ │ ├── cifar10_quick.prototxt │ │ ├── cifar10_quick_solver.prototxt │ │ ├── cifar10_quick_solver_lr1.prototxt │ │ ├── cifar10_quick_train_test.prototxt │ │ ├── convert_cifar_data.cpp │ │ ├── create_cifar10.bat │ │ ├── readme.md │ │ ├── train_full.sh │ │ ├── train_full_sigmoid.sh │ │ ├── train_full_sigmoid_bn.sh │ │ └── train_quick.sh └── 7-3 keras-master │ └── examples │ ├── README.md │ ├── addition_rnn.py │ ├── antirectifier.py │ ├── babi_memnn.py │ ├── babi_rnn.py │ ├── cifar10_cnn.py │ ├── cifar10_cnn_capsule.py │ ├── cifar10_cnn_tfaugment2d.py │ ├── cifar10_resnet.py │ ├── conv_filter_visualization.py │ ├── conv_lstm.py │ ├── deep_dream.py │ ├── image_ocr.py │ ├── imdb_bidirectional_lstm.py │ ├── imdb_cnn.py │ ├── imdb_cnn_lstm.py │ ├── imdb_fasttext.py │ ├── imdb_lstm.py │ ├── lstm_seq2seq.py │ ├── lstm_seq2seq_restore.py │ ├── lstm_stateful.py │ ├── lstm_text_generation.py │ ├── mnist_acgan.py │ ├── mnist_cnn.py │ ├── mnist_dataset_api.py │ ├── mnist_denoising_autoencoder.py │ ├── mnist_hierarchical_rnn.py │ ├── mnist_irnn.py │ ├── mnist_mlp.py │ ├── mnist_net2net.py │ ├── mnist_siamese.py │ ├── mnist_sklearn_wrapper.py │ ├── mnist_swwae.py │ ├── mnist_tfrecord.py │ ├── mnist_transfer_cnn.py │ ├── neural_doodle.py │ ├── neural_style_transfer.py │ ├── pretrained_word_embeddings.py │ ├── reuters_mlp.py │ ├── reuters_mlp_relu_vs_selu.py │ ├── saved_models │ └── keras_cifar10_trained_model.h5 │ ├── variational_autoencoder.py │ └── variational_autoencoder_deconv.py ├── mnist_data └── mnist.pkl.gz └── utils └── mnist_loader.py /.idea/NeuralNetworksGetStarted.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | 14 | 17 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/dictionaries/mtianyan.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | mtianyan 5 | randn 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 12 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /1-numpy_basic/1-numpy_basic.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/3/19 0019 23:54' 3 | 4 | import numpy as np 5 | 6 | a = np.array([2, 3, 4]) 7 | print(a) 8 | # 元素数据类型 9 | print(a.dtype) 10 | # 数组的维度(3,) 一行三列 11 | print(a.shape) 12 | # 数组的维数 一维 13 | print(a.ndim) 14 | # 数组的元素个数 15 | print(a.size) 16 | print("*********************************") 17 | 18 | b = np.array([[1, 2], [3, 4]]) 19 | print(b) 20 | # 元素数据类型 21 | print(b.dtype) 22 | # 数组的维度(2,2) 两行两列 23 | print(b.shape) 24 | # 数组的维数 一维 25 | print(b.ndim) 26 | # 数组的元素个数 27 | print(b.size) 28 | print("*********************************") 29 | 30 | c = np.array([[1, 2], [3, 4]], dtype=float) 31 | print(c) 32 | print("*********************************") 33 | 34 | # np.zeros创建零矩阵 35 | d = np.zeros((3, 4)) 36 | print(d) 37 | print("*********************************") 38 | 39 | # np.ones创建全1矩阵,每个元素初始化为1.0 40 | e = np.ones((3, 4)) 41 | print(e) 42 | print("*********************************") 43 | 44 | # 首先创建一个两行三列的数组 45 | b = np.ones((2, 3)) 46 | print(b) 47 | # reshape成三行两列的数组 48 | print(b.reshape(3, 2)) 49 | print("*********************************") 50 | 51 | # 如何组合两个数组 52 | 53 | # 1-数乘 54 | a = np.ones((3, 4)) 55 | # a中的每一项都乘以2,然后赋值给b 56 | b = a * 2 57 | print(a) 58 | print(b) 59 | print("*********************************") 60 | 61 | # 2-水平合并: 62 | # 注意传入参数为元组,否则传入a,b不报错也没有结果 63 | print(np.hstack((a, b))) 64 | print("*********************************") 65 | 66 | # 3-垂直合并 67 | print(np.vstack((a, b))) 68 | -------------------------------------------------------------------------------- /2-feedforward_neural_network/2-11 stochasticgradient_descent.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/3/31 0031 16:06' 3 | 4 | import random 5 | import numpy as np 6 | 7 | 8 | class Network(object): 9 | def __init__(self, sizes): 10 | # 网络层数 11 | self.num_layers = len(sizes) 12 | # 网络每层神经元个数 13 | self.sizes = sizes 14 | # 初始化每层的偏置 15 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 16 | # 初始化每层的权重 17 | self.weights = [np.random.randn(y, x) 18 | for x, y in zip(sizes[:-1], sizes[1:])] 19 | 20 | # 随机梯度下降 21 | def SGD(self, training_data, epochs, mini_batch_size, eta): 22 | # 取出训练数据总个数 23 | n = len(training_data) 24 | 25 | # 开始训练 循环每一个epochs 26 | for j in range(epochs): 27 | # 洗牌 打乱训练数据 28 | random.shuffle(training_data) 29 | 30 | # mini_batch 31 | mini_batches = [training_data[k:k + mini_batch_size] 32 | for k in range(0, n, mini_batch_size)] 33 | 34 | # 训练mini_batch 35 | for mini_batch in mini_batches: 36 | self.update_mini_batch(mini_batch, eta) 37 | 38 | print("Epoch {0} complete".format(j)) 39 | 40 | # 更新mini_batch 41 | def update_mini_batch(self, mini_batch, eta): 42 | # 保存每层偏倒 43 | nabla_b = [np.zeros(b.shape) for b in self.biases] 44 | nabla_w = [np.zeros(w.shape) for w in self.weights] 45 | 46 | # 训练每一个mini_batch 47 | for x, y in mini_batch: 48 | delta_nable_b, delta_nabla_w = self.update(x, y) 49 | 50 | # 保存一次训练网络中每层的偏倒 51 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)] 52 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 53 | 54 | # 更新权重和偏置 Wn+1 = wn - eta * nw 55 | self.weights = [w - (eta / len(mini_batch)) * nw 56 | for w, nw in zip(self.weights, nabla_w)] 57 | self.biases = [b - (eta / len(mini_batch)) * nb 58 | for b, nb in zip(self.biases, nabla_b)] 59 | 60 | # 前向传播 61 | def update(self, x, y): 62 | # 保存每层偏倒 63 | nabla_b = [np.zeros(b.shape) for b in self.biases] 64 | nabla_w = [np.zeros(w.shape) for w in self.weights] 65 | 66 | activation = x 67 | 68 | # 保存每一层的激励值a=sigmoid(z) 69 | activations = [x] 70 | 71 | # 保存每一层的z=wx+b 72 | zs = [] 73 | # 前向传播 74 | for b, w in zip(self.biases, self.weights): 75 | # 计算每层的z 76 | z = np.dot(w, activation) + b 77 | 78 | # 保存每层的z 79 | zs.append(z) 80 | 81 | # 计算每层的a 82 | activation = sigmoid(z) 83 | 84 | # 保存每一层的a 85 | activations.append(activation) 86 | 87 | # 反向更新了 88 | # 计算最后一层的误差 89 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) 90 | 91 | # 最后一层权重和偏置的倒数 92 | nabla_b[-1] = delta 93 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 94 | 95 | # 倒数第二层一直到第一层 权重和偏置的倒数 96 | for l in range(2, self.num_layers): 97 | z = zs[-l] 98 | 99 | sp = sigmoid_prime(z) 100 | 101 | # 当前层的误差 102 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp 103 | 104 | # 当前层偏置和权重的倒数 105 | nabla_b[-l] = delta 106 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) 107 | 108 | return (nabla_b, nabla_w) 109 | 110 | def cost_derivative(self, output_activation, y): 111 | return (output_activation - y) 112 | 113 | 114 | def sigmoid(z): 115 | return 1.0 / (1.0 + np.exp(-z)) 116 | 117 | 118 | def sigmoid_prime(z): 119 | return sigmoid(z) * (1 - sigmoid(z)) 120 | -------------------------------------------------------------------------------- /2-feedforward_neural_network/2-12 neural_network_mnist.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/3/31 0031 16:23' 3 | 4 | import random 5 | import numpy as np 6 | 7 | 8 | class Network(object): 9 | def __init__(self, sizes): 10 | # 网络层数 11 | self.num_layers = len(sizes) 12 | # 网络每层神经元个数 13 | self.sizes = sizes 14 | # 初始化每层的偏置 15 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 16 | # 初始化每层的权重 17 | self.weights = [np.random.randn(y, x) 18 | for x, y in zip(sizes[:-1], sizes[1:])] 19 | 20 | def feedforward(self, a): 21 | for b, w in zip(self.biases, self.weights): 22 | a = sigmoid(np.dot(w, a) + b) 23 | return a 24 | 25 | # 随机梯度下降 26 | def SGD(self, training_data, epochs, mini_batch_size, eta, 27 | test_data=None): 28 | if test_data: 29 | n_test = len(test_data) 30 | # 训练数据总个数 31 | n = len(training_data) 32 | 33 | # 开始训练 循环每一个epochs 34 | for j in range(epochs): 35 | # 洗牌 打乱训练数据 36 | random.shuffle(training_data) 37 | 38 | # mini_batch 39 | mini_batches = [training_data[k:k + mini_batch_size] 40 | for k in range(0, n, mini_batch_size)] 41 | 42 | # 训练mini_batch 43 | for mini_batch in mini_batches: 44 | self.update_mini_batch(mini_batch, eta) 45 | 46 | # 测试集上的表现 47 | if test_data: 48 | print("Epoch {0}: {1} / {2}".format( 49 | j, self.evaluate(test_data), n_test)) 50 | print("Epoch {0} complete".format(j)) 51 | 52 | # 更新mini_batch 53 | def update_mini_batch(self, mini_batch, eta): 54 | # 保存每层偏倒 55 | nabla_b = [np.zeros(b.shape) for b in self.biases] 56 | nabla_w = [np.zeros(w.shape) for w in self.weights] 57 | 58 | # 训练每一个mini_batch 59 | for x, y in mini_batch: 60 | delta_nable_b, delta_nabla_w = self.update(x, y) 61 | 62 | # 保存一次训练网络中每层的偏倒 63 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)] 64 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 65 | 66 | # 更新权重和偏置 Wn+1 = wn - eta * nw 67 | self.weights = [w - (eta / len(mini_batch)) * nw 68 | for w, nw in zip(self.weights, nabla_w)] 69 | self.biases = [b - (eta / len(mini_batch)) * nb 70 | for b, nb in zip(self.biases, nabla_b)] 71 | 72 | # 前向传播 73 | def update(self, x, y): 74 | # 保存每层偏倒 75 | nabla_b = [np.zeros(b.shape) for b in self.biases] 76 | nabla_w = [np.zeros(w.shape) for w in self.weights] 77 | 78 | activation = x 79 | 80 | # 保存每一层的激励值a=sigmoid(z) 81 | activations = [x] 82 | 83 | # 保存每一层的z=wx+b 84 | zs = [] 85 | # 前向传播 86 | for b, w in zip(self.biases, self.weights): 87 | # 计算每层的z 88 | z = np.dot(w, activation) + b 89 | 90 | # 保存每层的z 91 | zs.append(z) 92 | 93 | # 计算每层的a 94 | activation = sigmoid(z) 95 | 96 | # 保存每一层的a 97 | activations.append(activation) 98 | 99 | # 反向更新了 100 | # 计算最后一层的误差 101 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) 102 | 103 | # 最后一层权重和偏置的倒数 104 | nabla_b[-1] = delta 105 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 106 | 107 | # 倒数第二层一直到第一层 权重和偏置的倒数 108 | for l in range(2, self.num_layers): 109 | z = zs[-l] 110 | 111 | sp = sigmoid_prime(z) 112 | 113 | # 当前层的误差 114 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp 115 | 116 | # 当前层偏置和权重的倒数 117 | nabla_b[-l] = delta 118 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) 119 | 120 | return (nabla_b, nabla_w) 121 | 122 | def evaluate(self, test_data): 123 | test_results = [(np.argmax(self.feedforward(x)), y) 124 | for (x, y) in test_data] 125 | return sum(int(x == y) for (x, y) in test_results) 126 | 127 | def cost_derivative(self, output_activation, y): 128 | return (output_activation - y) 129 | 130 | 131 | def sigmoid(z): 132 | return 1.0 / (1.0 + np.exp(-z)) 133 | 134 | 135 | def sigmoid_prime(z): 136 | return sigmoid(z) * (1 - sigmoid(z)) 137 | 138 | 139 | if __name__ == '__main__': 140 | import mnist_loader 141 | 142 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper() 143 | 144 | net = Network([784, 30, 10]) 145 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data) 146 | -------------------------------------------------------------------------------- /2-feedforward_neural_network/2-7 forward_propagation.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | 4 | 5 | class Network(object): 6 | """神经网络类""" 7 | 8 | def __init__(self, sizes): 9 | """ 10 | 初始化构造方法 11 | :param sizes: 列表; 如[3,2,1] 定义输入层有3个神经元,隐藏层2个,输出层1个;这定义总共有多少层,每一层有多少个神经元。 12 | """ 13 | # 网络层数: 一共有多少层 14 | self.num_layers = len(sizes) 15 | # 每层神经元的个数 16 | self.sizes = sizes 17 | # 初始化每层的偏置 b 18 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 19 | ''' 20 | 上面这行代码的等价写法 21 | self.biases = [] 22 | for y in sizes[1:]: 23 | self.biases.append(np.random.randn(y, 1)) # [一个(2,1), 一个(1,1)] 24 | 25 | size[1: ]; sizes=[3,2,1]; 我们只取2,1两个值,第一次循环时y为2,第二次为1, 表示输入到隐藏, 隐藏到输出,一共两种偏置。 26 | random.randn使用标准正态分布来初始化一个数组,,初始化一个y乘以1的数组,即初始化一个(2,1)的和一个(1,1)的,从输入层到隐藏层有两个偏置,隐藏层到输出层有一个偏置 27 | self.biases.append(np.random.randn(y, 1)) 28 | ''' 29 | 30 | # 初始化每层的权重 w 31 | self.weights = [np.random.randn(y, x) 32 | for x, y in zip(sizes[:-1], sizes[1:])] 33 | ''' 34 | 上面这行代码的等价写法 35 | self.weights = [] 36 | for x, y in zip(sizes[:-1], sizes[1:]): 37 | self.weights.append(np.random.randn(y, x)) # 输入层到隐藏层的连线总共有6条(2,3); 隐藏层到输出层的连线有2条(1,2) 38 | ''' 39 | 40 | def update(self, x, y): 41 | """ 前向传播 过程""" 42 | # 传入输入的训练数据, 43 | activation = x 44 | 45 | # 保存每一层的激励值a=sigmoid(z) z=wx+b 46 | # 第0层(输入层)时输入数据就是它的激励值 47 | activations = [x] 48 | 49 | # zs用于保存每一层的z=wx+b 50 | zs = [] 51 | 52 | # 前向传播 53 | # 使用for循环遍历每一层的偏置与权重:同时取第一层的偏置和权重 54 | for b, w in zip(self.biases, self.weights): 55 | # 计算每层的z 56 | # dot是点乘方法: 把两个数组进行点乘,对于二维数组相当于矩阵乘法。 57 | # 一维数组相当于向量的内积 58 | z = np.dot(w, activation) + b 59 | 60 | # 保存每层的z 61 | zs.append(z) 62 | 63 | # 计算每层经过激活函数后的输出 64 | activation = sigmoid(z) 65 | 66 | # 保存每一层的a 67 | activations.append(activation) 68 | 69 | 70 | def sigmoid(z): 71 | return 1.0 / (1.0 + np.exp(-z)) 72 | -------------------------------------------------------------------------------- /2-feedforward_neural_network/2-9 back_propagation.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/3/29 0029 22:20' 3 | import random 4 | import numpy as np 5 | 6 | 7 | class Network(object): 8 | """神经网络类""" 9 | 10 | def __init__(self, sizes): 11 | """ 12 | 初始化构造方法 13 | :param sizes: 列表; 如[3,2,1] 定义输入层有3个神经元,隐藏层2个,输出层1个;这定义总共有多少层,每一层有多少个神经元。 14 | """ 15 | # 网络层数: 一共有多少层 16 | self.num_layers = len(sizes) 17 | # 每层神经元的个数 18 | self.sizes = sizes 19 | # 初始化每层的偏置 b 20 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 21 | ''' 22 | 上面这行代码的等价写法 23 | self.biases = [] 24 | for y in sizes[1:]: 25 | self.biases.append(np.random.randn(y, 1)) # [一个(2,1), 一个(1,1)] 26 | 27 | size[1: ]; sizes=[3,2,1]; 我们只取2,1两个值,第一次循环时y为2,第二次为1, 表示输入到隐藏, 隐藏到输出,一共两种偏置。 28 | random.randn使用标准正态分布来初始化一个数组,,初始化一个y乘以1的数组,即初始化一个(2,1)的和一个(1,1)的,从输入层到隐藏层有两个偏置,隐藏层到输出层有一个偏置 29 | self.biases.append(np.random.randn(y, 1)) 30 | ''' 31 | 32 | # 初始化每层的权重 w 33 | self.weights = [np.random.randn(y, x) 34 | for x, y in zip(sizes[:-1], sizes[1:])] 35 | ''' 36 | 上面这行代码的等价写法 37 | self.weights = [] 38 | for x, y in zip(sizes[:-1], sizes[1:]): 39 | self.weights.append(np.random.randn(y, x)) # 输入层到隐藏层的连线总共有6条(2,3); 隐藏层到输出层的连线有2条(1,2) 40 | ''' 41 | 42 | # 梯度下降 43 | def GD(self, training_data, epochs, eta): 44 | # 开始训练 循环每一个epochs 45 | for j in range(epochs): 46 | # 洗牌 打乱训练数据 47 | random.shuffle(training_data) 48 | 49 | # 反向: 保存每层偏导 50 | # 反向: 取到每一层的偏置值,取到它的形状,以这个形状创建零矩阵 51 | nabla_b = [np.zeros(b.shape) for b in self.biases] 52 | nabla_w = [np.zeros(w.shape) for w in self.weights] 53 | 54 | # 训练每一个数据 55 | for x, y in training_data: 56 | delta_nable_b, delta_nabla_w = self.update(x, y) 57 | 58 | # 保存一次训练网络中每层的偏倒 59 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)] 60 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 61 | 62 | # 更新权重和偏置 Wn+1 = wn - eta * nw 63 | self.weights = [w - (eta) * nw 64 | for w, nw in zip(self.weights, nabla_w)] 65 | self.biases = [b - (eta) * nb 66 | for b, nb in zip(self.biases, nabla_b)] 67 | 68 | print("Epoch {0} complete".format(j)) 69 | 70 | # 前向传播 71 | def update(self, x, y): 72 | # 保存每层偏倒 73 | nabla_b = [np.zeros(b.shape) for b in self.biases] 74 | nabla_w = [np.zeros(w.shape) for w in self.weights] 75 | 76 | activation = x 77 | 78 | # 保存每一层的激励值a=sigmoid(z) 79 | activations = [x] 80 | 81 | # 保存每一层的z=wx+b 82 | zs = [] 83 | # 前向传播 84 | for b, w in zip(self.biases, self.weights): 85 | # 计算每层的z 86 | z = np.dot(w, activation) + b 87 | 88 | # 保存每层的z 89 | zs.append(z) 90 | 91 | # 计算每层的a 92 | activation = sigmoid(z) 93 | 94 | # 保存每一层的a 95 | activations.append(activation) 96 | 97 | # 反向更新了: 从倒数第一层开始 98 | # 计算最后一层的误差 99 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) 100 | 101 | # 最后一层权重和偏置的倒数 102 | # 偏loos/偏b = delta 103 | # 偏loss/偏w = 倒数第二层y 乘以 delta 104 | nabla_b[-1] = delta 105 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 106 | 107 | # 倒数第二层一直到第一层 权重和偏置的倒数 108 | for l in range(2, self.num_layers): 109 | # zs[-2]倒数第二层 110 | z = zs[-l] 111 | 112 | # 计算倒数第二层的偏导 113 | sp = sigmoid_prime(z) 114 | 115 | # 当前层的误差: delta_h公式 上一层的w乘以上一层的误差,点乘于本层计算出来的z 116 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp 117 | 118 | # 当前层偏置和权重的倒数 119 | nabla_b[-l] = delta 120 | # 当前层误差乘以前一层y -l-1前一层 121 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) 122 | 123 | # 返回当前层的偏置和权重的导数 124 | return (nabla_b, nabla_w) 125 | 126 | @staticmethod 127 | def cost_derivative(output_activation, y): 128 | return output_activation - y 129 | 130 | 131 | def sigmoid(z): 132 | return 1.0 / (1.0 + np.exp(-z)) 133 | 134 | 135 | def sigmoid_prime(z): 136 | return sigmoid(z) * (1 - sigmoid(z)) 137 | -------------------------------------------------------------------------------- /2-feedforward_neural_network/2.1-simple_network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Network(object): 5 | """神经网络类""" 6 | 7 | def __init__(self, sizes): 8 | """ 9 | 初始化构造方法 10 | :param sizes: 列表; 如[3,2,1] 定义输入层有3个神经元,隐藏层2个,输出层1个;这定义总共有多少层,每一层有多少个神经元。 11 | """ 12 | # 网络层数: 一共有多少层 13 | self.num_layers = len(sizes) 14 | # 每层神经元的个数 15 | self.sizes = sizes 16 | # 初始化每层的偏置 b 17 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 18 | ''' 19 | 上面这行代码的等价写法 20 | self.biases = [] 21 | for y in sizes[1:]: 22 | self.biases.append(np.random.randn(y, 1)) # [一个(2,1), 一个(1,1)] 23 | 24 | size[1: ]; sizes=[3,2,1]; 我们只取2,1两个值,第一次循环时y为2,第二次为1, 表示输入到隐藏, 隐藏到输出,一共两种偏置。 25 | random.randn使用标准正态分布来初始化一个数组,,初始化一个y乘以1的数组,即初始化一个(2,1)的和一个(1,1)的,从输入层到隐藏层有两个偏置,隐藏层到输出层有一个偏置 26 | self.biases.append(np.random.randn(y, 1)) 27 | ''' 28 | 29 | # 初始化每层的权重 w 30 | self.weights = [np.random.randn(y, x) 31 | for x, y in zip(sizes[:-1], sizes[1:])] 32 | ''' 33 | 上面这行代码的等价写法 34 | self.weights = [] 35 | for x, y in zip(sizes[:-1], sizes[1:]): 36 | self.weights.append(np.random.randn(y, x)) # 输入层到隐藏层的连线总共有6条(2,3); 隐藏层到输出层的连线有2条(1,2) 37 | ''' 38 | 39 | 40 | def sigmoid(z): 41 | """sigmoid激励函数(1/1+e的-z次方)""" 42 | return 1.0 / (1.0 + np.exp(-z)) 43 | 44 | 45 | if __name__ == '__main__': 46 | net = Network([3, 2, 1]) 47 | print("网络层数: ", net.num_layers - 1) 48 | print("网络结构: ", net.sizes) 49 | print("*" * 20) 50 | print("输入到隐藏层偏置: ", net.biases[0]) 51 | print("隐藏到输出层偏置: ", net.biases[1]) 52 | print("*" * 20) 53 | print("输入到隐藏层权重: ", net.weights[0]) 54 | print("隐藏到输出层权重: ", net.weights[1]) 55 | -------------------------------------------------------------------------------- /3-improve_neural_network_efficiency/3-10 cross_entropy.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/4/3 0003 16:26' 3 | 4 | import random 5 | import numpy as np 6 | 7 | 8 | class QuadraticCost(object): 9 | @staticmethod 10 | def fn(a, y): 11 | return 0.5 * np.linalg.norm(a - y) ** 2 12 | 13 | @staticmethod 14 | def delta(z, a, y): 15 | return (a - y) * sigmoid_prime(z) 16 | 17 | 18 | class CrossEntropyCost(object): 19 | ''' 20 | >>>import numpy as np 21 | >>> a = np.array([[np.nan,np.inf],\ 22 | ... [-np.nan,-np.inf]]) 23 | >>> a 24 | array([[ nan, inf], 25 | [ nan, -inf]]) 26 | >>> np.nan_to_num(a) 27 | array([[ 0.00000000e+000, 1.79769313e+308], 28 | [ 0.00000000e+000, -1.79769313e+308]]) 29 | ''' 30 | 31 | @staticmethod 32 | def fn(a, y): 33 | return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a))) 34 | 35 | @staticmethod 36 | def delta(z, a, y): 37 | return (a - y) 38 | 39 | 40 | class Network(object): 41 | def __init__(self, sizes, cost=CrossEntropyCost): 42 | # 网络层数 43 | self.num_layers = len(sizes) 44 | # 网络每层神经元个数 45 | self.sizes = sizes 46 | # 初始化每层的偏置和权重 47 | self.default_weight_initializer() 48 | # 损失函数 49 | self.cost = cost 50 | 51 | def default_weight_initializer(self): 52 | # 初始化每层的偏置 53 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 54 | # 初始化每层的权重 55 | self.weights = [np.random.randn(y, x) / np.sqrt(x) 56 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 57 | 58 | def large_weight_initializer(self): 59 | # 初始化每层的偏置 60 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 61 | # 初始化每层的权重 62 | self.weights = [np.random.randn(y, x) 63 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 64 | 65 | def feedforward(self, a): 66 | for b, w in zip(self.biases, self.weights): 67 | a = sigmoid(np.dot(w, a) + b) 68 | return a 69 | 70 | # 随机梯度下降 71 | def SGD(self, training_data, epochs, mini_batch_size, eta, 72 | lmbda=0.0, 73 | test_data=None): 74 | if test_data: 75 | n_test = len(test_data) 76 | # 训练数据总个数 77 | n = len(training_data) 78 | 79 | # 开始训练 循环每一个epochs 80 | for j in range(epochs): 81 | # 洗牌 打乱训练数据 82 | random.shuffle(training_data) 83 | 84 | # mini_batch 85 | mini_batches = [training_data[k:k + mini_batch_size] 86 | for k in range(0, n, mini_batch_size)] 87 | 88 | # 训练mini_batch 89 | for mini_batch in mini_batches: 90 | self.update_mini_batch(mini_batch, eta, lmbda, n) 91 | 92 | if test_data: 93 | print("Epoch {0}: {1} / {2}".format( 94 | j, self.evaluate(test_data), n_test)) 95 | print("Epoch {0} complete".format(j)) 96 | 97 | # 更新mini_batch 98 | def update_mini_batch(self, mini_batch, eta, lmbda, n): 99 | # 保存每层偏倒 100 | nabla_b = [np.zeros(b.shape) for b in self.biases] 101 | nabla_w = [np.zeros(w.shape) for w in self.weights] 102 | 103 | # 训练每一个mini_batch 104 | for x, y in mini_batch: 105 | delta_nable_b, delta_nabla_w = self.update(x, y) 106 | 107 | # 保存一次训练网络中每层的偏倒 108 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)] 109 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 110 | 111 | # 更新权重和偏置 Wn+1 = wn - eta * nw 112 | self.weights = [(1 - eta * (lmbda / n)) * w - (eta / len(mini_batch)) * nw 113 | for w, nw in zip(self.weights, nabla_w)] 114 | self.biases = [b - (eta / len(mini_batch)) * nb 115 | for b, nb in zip(self.biases, nabla_b)] 116 | 117 | # 前向传播 118 | def update(self, x, y): 119 | # 保存每层偏倒 120 | nabla_b = [np.zeros(b.shape) for b in self.biases] 121 | nabla_w = [np.zeros(w.shape) for w in self.weights] 122 | 123 | activation = x 124 | 125 | # 保存每一层的激励值a=sigmoid(z) 126 | activations = [x] 127 | 128 | # 保存每一层的z=wx+b 129 | zs = [] 130 | # 前向传播 131 | for b, w in zip(self.biases, self.weights): 132 | # 计算每层的z 133 | z = np.dot(w, activation) + b 134 | 135 | # 保存每层的z 136 | zs.append(z) 137 | 138 | # 计算每层的a 139 | activation = sigmoid(z) 140 | 141 | # 保存每一层的a 142 | activations.append(activation) 143 | 144 | # 反向更新了 145 | # 计算最后一层的误差 146 | delta = (self.cost).delta(zs[-1], activations[-1], y) 147 | 148 | # 最后一层权重和偏置的倒数 149 | nabla_b[-1] = delta 150 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 151 | 152 | # 倒数第二层一直到第一层 权重和偏置的倒数 153 | for l in range(2, self.num_layers): 154 | z = zs[-l] 155 | 156 | sp = sigmoid_prime(z) 157 | 158 | # 当前层的误差 159 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp 160 | 161 | # 当前层偏置和权重的倒数 162 | nabla_b[-l] = delta 163 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) 164 | 165 | return (nabla_b, nabla_w) 166 | 167 | def evaluate(self, test_data): 168 | test_results = [(np.argmax(self.feedforward(x)), y) 169 | for (x, y) in test_data] 170 | return sum(int(x == y) for (x, y) in test_results) 171 | 172 | def cost_derivative(self, output_activation, y): 173 | return (output_activation - y) 174 | 175 | 176 | def sigmoid(z): 177 | return 1.0 / (1.0 + np.exp(-z)) 178 | 179 | 180 | def sigmoid_prime(z): 181 | return sigmoid(z) * (1 - sigmoid(z)) 182 | 183 | 184 | if __name__ == '__main__': 185 | import mnist_loader 186 | 187 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper() 188 | 189 | net = Network([784, 30, 10]) 190 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data) 191 | -------------------------------------------------------------------------------- /3-improve_neural_network_efficiency/3-5 initialization_parameters.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/3/31 0031 19:15' 3 | import random 4 | import numpy as np 5 | 6 | 7 | class Network(object): 8 | def __init__(self, sizes): 9 | # 网络层数 10 | self.num_layers = len(sizes) 11 | # 网络每层神经元个数 12 | self.sizes = sizes 13 | # 初始化每层的偏置和权重 14 | self.default_weight_initializer() 15 | 16 | def default_weight_initializer(self): 17 | # 初始化每层的偏置 18 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 19 | # 初始化每层的权重 20 | self.weights = [np.random.randn(y, x) / np.sqrt(x) 21 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 22 | 23 | def large_weight_initializer(self): 24 | # 初始化每层的偏置 25 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 26 | # 初始化每层的权重 27 | self.weights = [np.random.randn(y, x) 28 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 29 | 30 | def feedforward(self, a): 31 | for b, w in zip(self.biases, self.weights): 32 | a = sigmoid(np.dot(w, a) + b) 33 | return a 34 | 35 | # 随机梯度下降 36 | def SGD(self, training_data, epochs, mini_batch_size, eta, 37 | test_data=None): 38 | if test_data: 39 | n_test = len(test_data) 40 | # 训练数据总个数 41 | n = len(training_data) 42 | 43 | # 开始训练 循环每一个epochs 44 | for j in range(epochs): 45 | # 洗牌 打乱训练数据 46 | random.shuffle(training_data) 47 | 48 | # mini_batch 49 | mini_batches = [training_data[k:k + mini_batch_size] 50 | for k in range(0, n, mini_batch_size)] 51 | 52 | # 训练mini_batch 53 | for mini_batch in mini_batches: 54 | self.update_mini_batch(mini_batch, eta) 55 | 56 | if test_data: 57 | print("Epoch {0}: {1} / {2}".format( 58 | j, self.evaluate(test_data), n_test)) 59 | print("Epoch {0} complete".format(j)) 60 | 61 | # 更新mini_batch 62 | def update_mini_batch(self, mini_batch, eta): 63 | # 保存每层偏倒 64 | nabla_b = [np.zeros(b.shape) for b in self.biases] 65 | nabla_w = [np.zeros(w.shape) for w in self.weights] 66 | 67 | # 训练每一个mini_batch 68 | for x, y in mini_batch: 69 | delta_nable_b, delta_nabla_w = self.update(x, y) 70 | 71 | # 保存一次训练网络中每层的偏倒 72 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)] 73 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 74 | 75 | # 更新权重和偏置 Wn+1 = wn - eta * nw 76 | self.weights = [w - (eta / len(mini_batch)) * nw 77 | for w, nw in zip(self.weights, nabla_w)] 78 | self.biases = [b - (eta / len(mini_batch)) * nb 79 | for b, nb in zip(self.biases, nabla_b)] 80 | 81 | # 前向传播 82 | def update(self, x, y): 83 | # 保存每层偏倒 84 | nabla_b = [np.zeros(b.shape) for b in self.biases] 85 | nabla_w = [np.zeros(w.shape) for w in self.weights] 86 | 87 | activation = x 88 | 89 | # 保存每一层的激励值a=sigmoid(z) 90 | activations = [x] 91 | 92 | # 保存每一层的z=wx+b 93 | zs = [] 94 | # 前向传播 95 | for b, w in zip(self.biases, self.weights): 96 | # 计算每层的z 97 | z = np.dot(w, activation) + b 98 | 99 | # 保存每层的z 100 | zs.append(z) 101 | 102 | # 计算每层的a 103 | activation = sigmoid(z) 104 | 105 | # 保存每一层的a 106 | activations.append(activation) 107 | 108 | # 反向更新了 109 | # 计算最后一层的误差 110 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) 111 | 112 | # 最后一层权重和偏置的倒数 113 | nabla_b[-1] = delta 114 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 115 | 116 | # 倒数第二层一直到第一层 权重和偏置的倒数 117 | for l in range(2, self.num_layers): 118 | z = zs[-l] 119 | 120 | sp = sigmoid_prime(z) 121 | 122 | # 当前层的误差 123 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp 124 | 125 | # 当前层偏置和权重的倒数 126 | nabla_b[-l] = delta 127 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) 128 | 129 | return (nabla_b, nabla_w) 130 | 131 | def evaluate(self, test_data): 132 | test_results = [(np.argmax(self.feedforward(x)), y) 133 | for (x, y) in test_data] 134 | return sum(int(x == y) for (x, y) in test_results) 135 | 136 | def cost_derivative(self, output_activation, y): 137 | return (output_activation - y) 138 | 139 | 140 | def sigmoid(z): 141 | return 1.0 / (1.0 + np.exp(-z)) 142 | 143 | 144 | def sigmoid_prime(z): 145 | return sigmoid(z) * (1 - sigmoid(z)) 146 | 147 | 148 | if __name__ == '__main__': 149 | import mnist_loader 150 | 151 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper() 152 | 153 | net = Network([784, 30, 10]) 154 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data) 155 | -------------------------------------------------------------------------------- /3-improve_neural_network_efficiency/3-7 L2_regularization.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/4/3 0003 13:40' 3 | 4 | import random 5 | import numpy as np 6 | 7 | 8 | class Network(object): 9 | def __init__(self, sizes): 10 | # 网络层数 11 | self.num_layers = len(sizes) 12 | # 网络每层神经元个数 13 | self.sizes = sizes 14 | # 初始化每层的偏置和权重 15 | self.default_weight_initializer() 16 | 17 | def default_weight_initializer(self): 18 | # 初始化每层的偏置 19 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 20 | # 初始化每层的权重 21 | self.weights = [np.random.randn(y, x) / np.sqrt(x) 22 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 23 | 24 | def large_weight_initializer(self): 25 | # 初始化每层的偏置 26 | self.biases = [np.random.randn(y, 1) for y in self.sizes[1:]] 27 | # 初始化每层的权重 28 | self.weights = [np.random.randn(y, x) 29 | for x, y in zip(self.sizes[:-1], self.sizes[1:])] 30 | 31 | def feedforward(self, a): 32 | for b, w in zip(self.biases, self.weights): 33 | a = sigmoid(np.dot(w, a) + b) 34 | return a 35 | 36 | # 随机梯度下降 37 | def SGD(self, training_data, epochs, mini_batch_size, eta, 38 | lmbda=0.0, 39 | test_data=None): 40 | if test_data: n_test = len(test_data) 41 | # 训练数据总个数 42 | n = len(training_data) 43 | 44 | # 开始训练 循环每一个epochs 45 | for j in range(epochs): 46 | # 洗牌 打乱训练数据 47 | random.shuffle(training_data) 48 | 49 | # mini_batch 50 | mini_batches = [training_data[k:k + mini_batch_size] 51 | for k in range(0, n, mini_batch_size)] 52 | 53 | # 训练mini_batch 54 | for mini_batch in mini_batches: 55 | self.update_mini_batch(mini_batch, eta, lmbda, n) 56 | 57 | if test_data: 58 | print("Epoch {0}: {1} / {2}".format( 59 | j, self.evaluate(test_data), n_test)) 60 | print("Epoch {0} complete".format(j)) 61 | 62 | # 更新mini_batch 63 | def update_mini_batch(self, mini_batch, eta, lmbda, n): 64 | # 保存每层偏倒 65 | nabla_b = [np.zeros(b.shape) for b in self.biases] 66 | nabla_w = [np.zeros(w.shape) for w in self.weights] 67 | 68 | # 训练每一个mini_batch 69 | for x, y in mini_batch: 70 | delta_nable_b, delta_nabla_w = self.update(x, y) 71 | 72 | # 保存一次训练网络中每层的偏倒 73 | nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nable_b)] 74 | nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 75 | 76 | # 更新权重和偏置 Wn+1 = wn - eta * nw 77 | self.weights = [(1 - eta * (lmbda / n)) * w - (eta / len(mini_batch)) * nw 78 | for w, nw in zip(self.weights, nabla_w)] 79 | self.biases = [b - (eta / len(mini_batch)) * nb 80 | for b, nb in zip(self.biases, nabla_b)] 81 | 82 | # 前向传播 83 | def update(self, x, y): 84 | # 保存每层偏倒 85 | nabla_b = [np.zeros(b.shape) for b in self.biases] 86 | nabla_w = [np.zeros(w.shape) for w in self.weights] 87 | 88 | activation = x 89 | 90 | # 保存每一层的激励值a=sigmoid(z) 91 | activations = [x] 92 | 93 | # 保存每一层的z=wx+b 94 | zs = [] 95 | # 前向传播 96 | for b, w in zip(self.biases, self.weights): 97 | # 计算每层的z 98 | z = np.dot(w, activation) + b 99 | 100 | # 保存每层的z 101 | zs.append(z) 102 | 103 | # 计算每层的a 104 | activation = sigmoid(z) 105 | 106 | # 保存每一层的a 107 | activations.append(activation) 108 | 109 | # 反向更新了 110 | # 计算最后一层的误差 111 | delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) 112 | 113 | # 最后一层权重和偏置的倒数 114 | nabla_b[-1] = delta 115 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 116 | 117 | # 倒数第二层一直到第一层 权重和偏置的倒数 118 | for l in range(2, self.num_layers): 119 | z = zs[-l] 120 | 121 | sp = sigmoid_prime(z) 122 | 123 | # 当前层的误差 124 | delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp 125 | 126 | # 当前层偏置和权重的倒数 127 | nabla_b[-l] = delta 128 | nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose()) 129 | 130 | return (nabla_b, nabla_w) 131 | 132 | def evaluate(self, test_data): 133 | test_results = [(np.argmax(self.feedforward(x)), y) 134 | for (x, y) in test_data] 135 | return sum(int(x == y) for (x, y) in test_results) 136 | 137 | def cost_derivative(self, output_activation, y): 138 | return (output_activation - y) 139 | 140 | 141 | def sigmoid(z): 142 | return 1.0 / (1.0 + np.exp(-z)) 143 | 144 | 145 | def sigmoid_prime(z): 146 | return sigmoid(z) * (1 - sigmoid(z)) 147 | 148 | 149 | if __name__ == '__main__': 150 | import mnist_loader 151 | 152 | traning_data, validation_data, test_data = mnist_loader.load_data_wrapper() 153 | 154 | net = Network([784, 30, 10]) 155 | net.SGD(traning_data, 30, 10, 0.5, test_data=test_data) 156 | -------------------------------------------------------------------------------- /5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # Example TensorFlow models for MNIST used in tutorials 3 | 4 | licenses(["notice"]) # Apache 2.0 5 | 6 | exports_files(["LICENSE"]) 7 | 8 | load("//tensorflow:tensorflow.bzl", "py_test") 9 | 10 | py_library( 11 | name = "package", 12 | srcs = [ 13 | "__init__.py", 14 | ], 15 | srcs_version = "PY2AND3", 16 | visibility = ["//tensorflow:__subpackages__"], 17 | deps = [ 18 | ":input_data", 19 | ":mnist", 20 | ], 21 | ) 22 | 23 | py_library( 24 | name = "input_data", 25 | srcs = ["input_data.py"], 26 | srcs_version = "PY2AND3", 27 | visibility = ["//visibility:public"], 28 | deps = [ 29 | "//tensorflow:tensorflow_py", 30 | "//tensorflow/contrib/learn/python/learn/datasets", 31 | "//third_party/py/numpy", 32 | "@six_archive//:six", 33 | ], 34 | ) 35 | 36 | py_library( 37 | name = "mnist", 38 | srcs = [ 39 | "mnist.py", 40 | ], 41 | srcs_version = "PY2AND3", 42 | visibility = ["//visibility:public"], 43 | deps = [ 44 | "//tensorflow:tensorflow_py", 45 | ], 46 | ) 47 | 48 | py_binary( 49 | name = "fully_connected_feed", 50 | srcs = [ 51 | "fully_connected_feed.py", 52 | ], 53 | srcs_version = "PY2AND3", 54 | deps = [ 55 | ":input_data", 56 | ":mnist", 57 | "//tensorflow:tensorflow_py", 58 | ], 59 | ) 60 | 61 | py_binary( 62 | name = "mnist_with_summaries", 63 | srcs = [ 64 | "mnist_with_summaries.py", 65 | ], 66 | srcs_version = "PY2AND3", 67 | deps = [ 68 | ":input_data", 69 | "//tensorflow:tensorflow_py", 70 | ], 71 | ) 72 | 73 | py_binary( 74 | name = "mnist_softmax", 75 | srcs = [ 76 | "mnist_softmax.py", 77 | ], 78 | srcs_version = "PY2AND3", 79 | deps = [ 80 | ":input_data", 81 | "//tensorflow:tensorflow_py", 82 | ], 83 | ) 84 | 85 | py_binary( 86 | name = "mnist_deep", 87 | srcs = [ 88 | "mnist_deep.py", 89 | ], 90 | srcs_version = "PY2AND3", 91 | deps = [ 92 | ":input_data", 93 | "//tensorflow:tensorflow_py", 94 | ], 95 | ) 96 | 97 | py_test( 98 | name = "fully_connected_feed_test", 99 | size = "small", 100 | srcs = [ 101 | "fully_connected_feed.py", 102 | ], 103 | args = [ 104 | "--fake_data", 105 | "--max_steps=10", 106 | ], 107 | main = "fully_connected_feed.py", 108 | srcs_version = "PY2AND3", 109 | deps = [ 110 | ":input_data", 111 | ":mnist", 112 | "//tensorflow:tensorflow_py", 113 | ], 114 | ) 115 | 116 | py_test( 117 | name = "mnist_with_summaries_test", 118 | size = "small", 119 | srcs = [ 120 | "mnist_with_summaries.py", 121 | ], 122 | args = [ 123 | "--fake_data", 124 | "--max_steps=10", 125 | "--learning_rate=0.00", 126 | ], 127 | main = "mnist_with_summaries.py", 128 | srcs_version = "PY2AND3", 129 | tags = ["notsan"], # http://b/29184009 130 | deps = [ 131 | ":input_data", 132 | "//tensorflow:tensorflow_py", 133 | ], 134 | ) 135 | -------------------------------------------------------------------------------- /5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Imports mnist tutorial libraries used by tutorial examples.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensorflow.examples.tutorials.mnist import input_data 22 | from tensorflow.examples.tutorials.mnist import mnist 23 | -------------------------------------------------------------------------------- /5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Functions for downloading and reading MNIST mnist_data.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | # pylint: disable=unused-import 22 | import gzip 23 | import os 24 | import tempfile 25 | 26 | import numpy 27 | from six.moves import urllib 28 | from six.moves import xrange # pylint: disable=redefined-builtin 29 | import tensorflow as tf 30 | from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets 31 | # pylint: enable=unused-import 32 | -------------------------------------------------------------------------------- /5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/mnist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Builds the MNIST network. 17 | 18 | Implements the inference/loss/training pattern for model building. 19 | 20 | 1. inference() - Builds the model as far as required for running the network 21 | forward to make predictions. 22 | 2. loss() - Adds to the inference model the layers required to generate loss. 23 | 3. training() - Adds to the loss model the Ops required to generate and 24 | apply gradients. 25 | 26 | This file is used by the various "fully_connected_*.py" files and not meant to 27 | be run. 28 | """ 29 | from __future__ import absolute_import 30 | from __future__ import division 31 | from __future__ import print_function 32 | 33 | import math 34 | 35 | import tensorflow as tf 36 | 37 | # The MNIST dataset has 10 classes, representing the digits 0 through 9. 38 | NUM_CLASSES = 10 39 | 40 | # The MNIST images are always 28x28 pixels. 41 | IMAGE_SIZE = 28 42 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE 43 | 44 | 45 | def inference(images, hidden1_units, hidden2_units): 46 | """Build the MNIST model up to where it may be used for inference. 47 | 48 | Args: 49 | images: Images placeholder, from inputs(). 50 | hidden1_units: Size of the first hidden layer. 51 | hidden2_units: Size of the second hidden layer. 52 | 53 | Returns: 54 | softmax_linear: Output tensor with the computed logits. 55 | """ 56 | # Hidden 1 57 | with tf.name_scope('hidden1'): 58 | weights = tf.Variable( 59 | tf.truncated_normal([IMAGE_PIXELS, hidden1_units], 60 | stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), 61 | name='weights') 62 | biases = tf.Variable(tf.zeros([hidden1_units]), 63 | name='biases') 64 | hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) 65 | # Hidden 2 66 | with tf.name_scope('hidden2'): 67 | weights = tf.Variable( 68 | tf.truncated_normal([hidden1_units, hidden2_units], 69 | stddev=1.0 / math.sqrt(float(hidden1_units))), 70 | name='weights') 71 | biases = tf.Variable(tf.zeros([hidden2_units]), 72 | name='biases') 73 | hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) 74 | # Linear 75 | with tf.name_scope('softmax_linear'): 76 | weights = tf.Variable( 77 | tf.truncated_normal([hidden2_units, NUM_CLASSES], 78 | stddev=1.0 / math.sqrt(float(hidden2_units))), 79 | name='weights') 80 | biases = tf.Variable(tf.zeros([NUM_CLASSES]), 81 | name='biases') 82 | logits = tf.matmul(hidden2, weights) + biases 83 | return logits 84 | 85 | 86 | def loss(logits, labels): 87 | """Calculates the loss from the logits and the labels. 88 | 89 | Args: 90 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 91 | labels: Labels tensor, int32 - [batch_size]. 92 | 93 | Returns: 94 | loss: Loss tensor of type float. 95 | """ 96 | labels = tf.to_int64(labels) 97 | return tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) 98 | 99 | 100 | def training(loss, learning_rate): 101 | """Sets up the training Ops. 102 | 103 | Creates a summarizer to track the loss over time in TensorBoard. 104 | 105 | Creates an optimizer and applies the gradients to all trainable variables. 106 | 107 | The Op returned by this function is what must be passed to the 108 | `sess.run()` call to cause the model to train. 109 | 110 | Args: 111 | loss: Loss tensor, from loss(). 112 | learning_rate: The learning rate to use for gradient descent. 113 | 114 | Returns: 115 | train_op: The Op for training. 116 | """ 117 | # Add a scalar summary for the snapshot loss. 118 | tf.summary.scalar('loss', loss) 119 | # Create the gradient descent optimizer with the given learning rate. 120 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 121 | # Create a variable to track the global step. 122 | global_step = tf.Variable(0, name='global_step', trainable=False) 123 | # Use the optimizer to apply the gradients that minimize the loss 124 | # (and also increment the global step counter) as a single training step. 125 | train_op = optimizer.minimize(loss, global_step=global_step) 126 | return train_op 127 | 128 | 129 | def evaluation(logits, labels): 130 | """Evaluate the quality of the logits at predicting the label. 131 | 132 | Args: 133 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 134 | labels: Labels tensor, int32 - [batch_size], with values in the 135 | range [0, NUM_CLASSES). 136 | 137 | Returns: 138 | A scalar int32 tensor with the number of examples (out of batch_size) 139 | that were predicted correctly. 140 | """ 141 | # For a classifier model, we can use the in_top_k Op. 142 | # It returns a bool tensor with shape [batch_size] that is true for 143 | # the examples where the label is in the top k (here k=1) 144 | # of all logits for that example. 145 | correct = tf.nn.in_top_k(logits, labels, 1) 146 | # Return the number of true entries. 147 | return tf.reduce_sum(tf.cast(correct, tf.int32)) 148 | -------------------------------------------------------------------------------- /5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/mnist_softmax.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """A very simple MNIST classifier. 16 | 17 | See extensive documentation at 18 | https://www.tensorflow.org/get_started/mnist/beginners 19 | """ 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | import argparse 25 | import sys 26 | 27 | from tensorflow.examples.tutorials.mnist import input_data 28 | 29 | import tensorflow as tf 30 | 31 | FLAGS = None 32 | 33 | 34 | def main(_): 35 | # Import mnist_data 36 | mnist = input_data.read_data_sets(FLAGS.data_dir) 37 | 38 | # Create the model 39 | x = tf.placeholder(tf.float32, [None, 784]) 40 | W = tf.Variable(tf.zeros([784, 10])) 41 | b = tf.Variable(tf.zeros([10])) 42 | y = tf.matmul(x, W) + b 43 | 44 | # Define loss and optimizer 45 | y_ = tf.placeholder(tf.int64, [None]) 46 | 47 | # The raw formulation of cross-entropy, 48 | # 49 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), 50 | # reduction_indices=[1])) 51 | # 52 | # can be numerically unstable. 53 | # 54 | # So here we use tf.losses.sparse_softmax_cross_entropy on the raw 55 | # outputs of 'y', and then average across the batch. 56 | cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y) 57 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 58 | 59 | sess = tf.InteractiveSession() 60 | tf.global_variables_initializer().run() 61 | # Train 62 | for _ in range(1000): 63 | batch_xs, batch_ys = mnist.train.next_batch(100) 64 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 65 | 66 | # Test trained model 67 | correct_prediction = tf.equal(tf.argmax(y, 1), y_) 68 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 69 | print(sess.run( 70 | accuracy, feed_dict={ 71 | x: mnist.test.images, 72 | y_: mnist.test.labels 73 | })) 74 | 75 | 76 | if __name__ == '__main__': 77 | parser = argparse.ArgumentParser() 78 | parser.add_argument( 79 | '--data_dir', 80 | type=str, 81 | default='/tmp/tensorflow/mnist/input_data', 82 | help='Directory for storing input mnist_data') 83 | FLAGS, unparsed = parser.parse_known_args() 84 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 85 | -------------------------------------------------------------------------------- /5-8 tensorflow_mnist_code(official)/examples/tutorials/mnist/mnist_softmax_xla.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Simple MNIST classifier example with JIT XLA and timelines. 16 | 17 | """ 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import argparse 23 | import sys 24 | 25 | import tensorflow as tf 26 | 27 | from tensorflow.examples.tutorials.mnist import input_data 28 | from tensorflow.python.client import timeline 29 | 30 | FLAGS = None 31 | 32 | 33 | def main(_): 34 | # Import mnist_data 35 | mnist = input_data.read_data_sets(FLAGS.data_dir) 36 | 37 | # Create the model 38 | x = tf.placeholder(tf.float32, [None, 784]) 39 | w = tf.Variable(tf.zeros([784, 10])) 40 | b = tf.Variable(tf.zeros([10])) 41 | y = tf.matmul(x, w) + b 42 | 43 | # Define loss and optimizer 44 | y_ = tf.placeholder(tf.int64, [None]) 45 | 46 | # The raw formulation of cross-entropy, 47 | # 48 | # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), 49 | # reduction_indices=[1])) 50 | # 51 | # can be numerically unstable. 52 | # 53 | # So here we use tf.losses.sparse_softmax_cross_entropy on the raw 54 | # logit outputs of 'y', and then average across the batch. 55 | cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y) 56 | train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) 57 | 58 | config = tf.ConfigProto() 59 | jit_level = 0 60 | if FLAGS.xla: 61 | # Turns on XLA JIT compilation. 62 | jit_level = tf.OptimizerOptions.ON_1 63 | 64 | config.graph_options.optimizer_options.global_jit_level = jit_level 65 | run_metadata = tf.RunMetadata() 66 | sess = tf.Session(config=config) 67 | tf.global_variables_initializer().run(session=sess) 68 | # Train 69 | train_loops = 1000 70 | for i in range(train_loops): 71 | batch_xs, batch_ys = mnist.train.next_batch(100) 72 | 73 | # Create a timeline for the last loop and export to json to view with 74 | # chrome://tracing/. 75 | if i == train_loops - 1: 76 | sess.run(train_step, 77 | feed_dict={x: batch_xs, 78 | y_: batch_ys}, 79 | options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), 80 | run_metadata=run_metadata) 81 | trace = timeline.Timeline(step_stats=run_metadata.step_stats) 82 | with open('timeline.ctf.json', 'w') as trace_file: 83 | trace_file.write(trace.generate_chrome_trace_format()) 84 | else: 85 | sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 86 | 87 | # Test trained model 88 | correct_prediction = tf.equal(tf.argmax(y, 1), y_) 89 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 90 | print(sess.run(accuracy, 91 | feed_dict={x: mnist.test.images, 92 | y_: mnist.test.labels})) 93 | sess.close() 94 | 95 | 96 | if __name__ == '__main__': 97 | parser = argparse.ArgumentParser() 98 | parser.add_argument( 99 | '--data_dir', 100 | type=str, 101 | default='/tmp/tensorflow/mnist/input_data', 102 | help='Directory for storing input mnist_data') 103 | parser.add_argument( 104 | '--xla', type=bool, default=True, help='Turn xla via JIT on') 105 | FLAGS, unparsed = parser.parse_known_args() 106 | tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) 107 | -------------------------------------------------------------------------------- /5-tensorflow_and_tensorboard/5-1 TensorFlow_Get_Started.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/4/4 0004 17:47' 3 | import tensorflow as tf 4 | 5 | # # 定义常量op 6 | # a = tf.constant(2) 7 | # b = tf.constant(3) 8 | # 9 | # # 使用seesion 启动默认图 10 | # with tf.Session() as sess: 11 | # print("a=2, b=3") 12 | # print("常量相加: %i" % sess.run(a + b)) 13 | # print("常量相乘: %i" % sess.run(a * b)) 14 | 15 | 16 | # # 定义连个变量op占位符 17 | # a = tf.placeholder(tf.int16) 18 | # b = tf.placeholder(tf.int16) 19 | # 20 | # # 定义2个op操作 加法 乘法 21 | # add = tf.add(a, b) 22 | # mul = tf.multiply(a, b) 23 | # 24 | # with tf.Session() as sess: 25 | # print("加法:%i" % sess.run(add, feed_dict={a: 2, b: 3})) 26 | # print("乘法:%i" % sess.run(mul, feed_dict={a: 2, b: 3})) 27 | 28 | 29 | # 1x2 矩阵常量op 30 | matrix1 = tf.constant([[3., 3.]]) 31 | 32 | # 2x1 矩阵常量op 33 | matrix2 = tf.constant([[2.], [2.]]) 34 | 35 | # 矩阵乘op 36 | product = tf.matmul(matrix1, matrix2) 37 | 38 | with tf.Session() as sess: 39 | result = sess.run(product) 40 | print(type(result)) 41 | print(result) 42 | -------------------------------------------------------------------------------- /5-tensorflow_and_tensorboard/5-4 tf_LinearRegression.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/4/4 0004 18:27' 3 | import tensorflow as tf 4 | import numpy 5 | import matplotlib.pyplot as plt 6 | from numpy import random 7 | 8 | # 训练参数 9 | learning_rate = 0.01 10 | training_epochs = 1000 11 | display_step = 50 12 | 13 | # 训练数据 14 | train_X = numpy.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167, 15 | 7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1]) 16 | train_Y = numpy.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221, 17 | 2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3]) 18 | # 总共有多少个训练数据 19 | n_samples = train_X.shape[0] 20 | 21 | # 定义两个变量op占位符 22 | X = tf.placeholder("float") 23 | Y = tf.placeholder("float") 24 | 25 | # 初始化模型里所有的w和b 26 | W = tf.Variable(random.random(), name="weight") 27 | b = tf.Variable(random.random(), name="bias") 28 | 29 | # 构造线性模型 30 | pred = tf.add(tf.multiply(X, W), b) 31 | 32 | # 均方误差 33 | cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples) 34 | 35 | # 'x' is [[1, 1, 1] 36 | # [1, 1, 1]] 37 | # tf.reduce_sum(x) ==> 6 38 | # tf.reduce_sum(x, 0) ==> [2, 2, 2] 39 | # tf.reduce_sum(x, 1) ==> [3, 3] 40 | # 梯度下降 41 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) 42 | 43 | # 初始化所有变量 44 | init = tf.global_variables_initializer() 45 | 46 | # 使用session 启用默认图 47 | with tf.Session() as sess: 48 | sess.run(init) 49 | 50 | # 训练开始 51 | for epoch in range(training_epochs): 52 | for (x, y) in zip(train_X, train_Y): 53 | sess.run(optimizer, feed_dict={X: train_X, Y: train_Y}) 54 | 55 | # 每个一个epoch打印一下结果 56 | if (epoch + 1) % display_step == 0: 57 | c = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 58 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \ 59 | "W=", sess.run(W), "b=", sess.run(b)) 60 | 61 | print("Optimization Finished!") 62 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 63 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n') 64 | 65 | # 图画 66 | plt.plot(train_X, train_Y, 'ro', label='Original mnist_data') 67 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line') 68 | plt.legend() 69 | plt.savefig('linear_train.png') 70 | plt.show() 71 | 72 | # 测试数据 73 | test_X = numpy.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1]) 74 | test_Y = numpy.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03]) 75 | 76 | print("Testing... (Mean square loss Comparison)") 77 | testing_cost = sess.run( 78 | tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * test_X.shape[0]), 79 | feed_dict={X: test_X, Y: test_Y}) # same function as cost above 80 | print("Testing cost=", testing_cost) 81 | print("Absolute mean square loss difference:", abs( 82 | training_cost - testing_cost)) 83 | 84 | plt.plot(test_X, test_Y, 'bo', label='Testing mnist_data') 85 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line') 86 | plt.legend() 87 | plt.savefig('linear_test.png') 88 | plt.show() 89 | -------------------------------------------------------------------------------- /5-tensorflow_and_tensorboard/5-6 TensorBoard_Get_Started.py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/4/4 0004 20:02' 3 | import tensorflow as tf 4 | import numpy 5 | from numpy import random 6 | 7 | # 训练参数 8 | learning_rate = 0.01 9 | training_epochs = 1000 10 | display_step = 50 11 | logs_path = './example' 12 | 13 | # 训练数据 14 | train_X = numpy.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167, 15 | 7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1]) 16 | train_Y = numpy.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221, 17 | 2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3]) 18 | n_samples = train_X.shape[0] 19 | 20 | # 定义两个变量op占位符 21 | X = tf.placeholder("float") 22 | Y = tf.placeholder("float") 23 | 24 | # 初始化模型里所有的w和b 25 | W = tf.Variable(random.random(), name="weight") 26 | b = tf.Variable(random.random(), name="bias") 27 | 28 | # 构造线性模型 29 | pred = tf.add(tf.multiply(X, W), b) 30 | 31 | # 均方误差 32 | cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples) 33 | 34 | # 'x' is [[1, 1, 1] 35 | # [1, 1, 1]] 36 | # tf.reduce_sum(x) ==> 6 37 | # tf.reduce_sum(x, 0) ==> [2, 2, 2] 38 | # tf.reduce_sum(x, 1) ==> [3, 3] 39 | # 梯度下降 40 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) 41 | 42 | # 初始化所有变量 43 | init = tf.global_variables_initializer() 44 | 45 | # 创建summary来观察损失值 46 | tf.summary.scalar("loss", cost) 47 | merged_summary_op = tf.summary.merge_all() 48 | 49 | # 使用session 启用默认图 50 | with tf.Session() as sess: 51 | sess.run(init) 52 | 53 | # op 写把需要记录的数据写入文件 54 | summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) 55 | 56 | # 训练开始 57 | for epoch in range(training_epochs): 58 | for (x, y) in zip(train_X, train_Y): 59 | sess.run(optimizer, feed_dict={X: x, Y: y}) 60 | 61 | # 每个一个epoch打印一下结果 62 | if (epoch + 1) % display_step == 0: 63 | c, summary = sess.run([cost, merged_summary_op], feed_dict={X: train_X, Y: train_Y}) 64 | summary_writer.add_summary(summary, epoch * n_samples) 65 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \ 66 | "W=", sess.run(W), "b=", sess.run(b)) 67 | 68 | print("Optimization Finished!") 69 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 70 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n') 71 | -------------------------------------------------------------------------------- /5-tensorflow_and_tensorboard/5-7 save_load_model_v2(tf).py: -------------------------------------------------------------------------------- 1 | __author__ = 'mtianyan' 2 | __date__ = '2018/4/5 0005 16:31' 3 | import tensorflow as tf 4 | import numpy 5 | import matplotlib.pyplot as plt 6 | from numpy import random 7 | 8 | # 训练参数 9 | learning_rate = 0.01 10 | training_epochs = 1000 11 | display_step = 50 12 | model_path = "./tmp/model.ckpt" 13 | 14 | # 训练数据 15 | train_X = numpy.asarray([3.3, 4.4, 5.5, 6.71, 6.93, 4.168, 9.779, 6.182, 7.59, 2.167, 16 | 7.042, 10.791, 5.313, 7.997, 5.654, 9.27, 3.1]) 17 | train_Y = numpy.asarray([1.7, 2.76, 2.09, 3.19, 1.694, 1.573, 3.366, 2.596, 2.53, 1.221, 18 | 2.827, 3.465, 1.65, 2.904, 2.42, 2.94, 1.3]) 19 | n_samples = train_X.shape[0] 20 | 21 | # 定义两个变量op占位符 22 | X = tf.placeholder("float") 23 | Y = tf.placeholder("float") 24 | 25 | # 初始化模型里所有的w和b 26 | W = tf.Variable(random.random(), name="weight") 27 | b = tf.Variable(random.random(), name="bias") 28 | 29 | # 构造线性模型 30 | pred = tf.add(tf.multiply(X, W), b) 31 | 32 | # 均方误差 33 | cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples) 34 | 35 | # 'x' is [[1, 1, 1] 36 | # [1, 1, 1]] 37 | # tf.reduce_sum(x) ==> 6 38 | # tf.reduce_sum(x, 0) ==> [2, 2, 2] 39 | # tf.reduce_sum(x, 1) ==> [3, 3] 40 | # 梯度下降 41 | optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) 42 | 43 | # 初始化所有变量 44 | init = tf.global_variables_initializer() 45 | 46 | saver = tf.train.Saver() 47 | 48 | print("Starting 1st session...") 49 | # 使用session 启用默认图 50 | with tf.Session() as sess: 51 | sess.run(init) 52 | 53 | # 训练开始 54 | for epoch in range(200): 55 | for (x, y) in zip(train_X, train_Y): 56 | sess.run(optimizer, feed_dict={X: x, Y: y}) 57 | 58 | # 每个一个epoch打印一下结果 59 | if (epoch + 1) % display_step == 0: 60 | c = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 61 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \ 62 | "W=", sess.run(W), "b=", sess.run(b)) 63 | 64 | print("Optimization Finished!") 65 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 66 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n') 67 | 68 | # Save model weights to disk 69 | save_path = saver.save(sess, model_path) 70 | print("Model saved in file: %s" % save_path) 71 | 72 | print("Starting 2st session...") 73 | # 使用session 启用默认图 74 | with tf.Session() as sess: 75 | sess.run(init) 76 | 77 | # Restore model weights from previously saved model 78 | saver.restore(sess, model_path) 79 | print("Model restored from file: %s" % save_path) 80 | 81 | # 训练开始 82 | for epoch in range(training_epochs - 200): 83 | for (x, y) in zip(train_X, train_Y): 84 | sess.run(optimizer, feed_dict={X: x, Y: y}) 85 | 86 | # 每个一个epoch打印一下结果 87 | if (epoch + 1) % display_step == 0: 88 | c = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 89 | print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(c), \ 90 | "W=", sess.run(W), "b=", sess.run(b)) 91 | 92 | print("Optimization Finished!") 93 | training_cost = sess.run(cost, feed_dict={X: train_X, Y: train_Y}) 94 | print("Training cost=", training_cost, "W=", sess.run(W), "b=", sess.run(b), '\n') 95 | 96 | # 图画 97 | plt.plot(train_X, train_Y, 'ro', label='Original mnist_data') 98 | plt.plot(train_X, sess.run(W) * train_X + sess.run(b), label='Fitted line') 99 | plt.legend() 100 | plt.savefig('save_restore_linear_train.png') 101 | plt.show() 102 | -------------------------------------------------------------------------------- /6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/BUILD: -------------------------------------------------------------------------------- 1 | # Description: 2 | # Example TensorFlow models for CIFAR-10 3 | 4 | licenses(["notice"]) # Apache 2.0 5 | 6 | exports_files(["LICENSE"]) 7 | 8 | py_library( 9 | name = "cifar10_input", 10 | srcs = ["cifar10_input.py"], 11 | srcs_version = "PY2AND3", 12 | visibility = ["//tensorflow:internal"], 13 | deps = [ 14 | "//tensorflow:tensorflow_py", 15 | ], 16 | ) 17 | 18 | py_test( 19 | name = "cifar10_input_test", 20 | size = "small", 21 | srcs = ["cifar10_input_test.py"], 22 | srcs_version = "PY2AND3", 23 | deps = [ 24 | ":cifar10_input", 25 | "//tensorflow:tensorflow_py", 26 | "//tensorflow/python:framework_test_lib", 27 | "//tensorflow/python:platform_test", 28 | ], 29 | ) 30 | 31 | py_library( 32 | name = "cifar10", 33 | srcs = ["cifar10.py"], 34 | srcs_version = "PY2AND3", 35 | deps = [ 36 | ":cifar10_input", 37 | "//tensorflow:tensorflow_py", 38 | ], 39 | ) 40 | 41 | py_binary( 42 | name = "cifar10_eval", 43 | srcs = [ 44 | "cifar10_eval.py", 45 | ], 46 | srcs_version = "PY2AND3", 47 | visibility = ["//tensorflow:__subpackages__"], 48 | deps = [ 49 | ":cifar10", 50 | ], 51 | ) 52 | 53 | py_binary( 54 | name = "cifar10_train", 55 | srcs = [ 56 | "cifar10_train.py", 57 | ], 58 | srcs_version = "PY2AND3", 59 | visibility = ["//tensorflow:__subpackages__"], 60 | deps = [ 61 | ":cifar10", 62 | ], 63 | ) 64 | 65 | py_binary( 66 | name = "cifar10_multi_gpu_train", 67 | srcs = [ 68 | "cifar10_multi_gpu_train.py", 69 | ], 70 | srcs_version = "PY2AND3", 71 | visibility = ["//tensorflow:__subpackages__"], 72 | deps = [ 73 | ":cifar10", 74 | ], 75 | ) 76 | 77 | filegroup( 78 | name = "all_files", 79 | srcs = glob( 80 | ["**/*"], 81 | exclude = [ 82 | "**/METADATA", 83 | "**/OWNERS", 84 | ], 85 | ), 86 | visibility = ["//tensorflow:__subpackages__"], 87 | ) 88 | -------------------------------------------------------------------------------- /6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/README.md: -------------------------------------------------------------------------------- 1 | **NOTE: For users interested in multi-GPU, we recommend looking at the newer [cifar10_estimator](https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10_estimator) example instead.** 2 | 3 | --- 4 | 5 | CIFAR-10 is a common benchmark in machine learning for image recognition. 6 | 7 | http://www.cs.toronto.edu/~kriz/cifar.html 8 | 9 | Code in this directory demonstrates how to use TensorFlow to train and evaluate a convolutional neural network (CNN) on both CPU and GPU. We also demonstrate how to train a CNN over multiple GPUs. 10 | 11 | Detailed instructions on how to get started available at: 12 | 13 | http://tensorflow.org/tutorials/deep_cnn/ 14 | -------------------------------------------------------------------------------- /6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Makes helper libraries available in the cifar10 package.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import cifar10 22 | import cifar10_input 23 | -------------------------------------------------------------------------------- /6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/cifar10_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Evaluation for CIFAR-10. 17 | 18 | Accuracy: 19 | cifar10_train.py achieves 83.0% accuracy after 100K steps (256 epochs 20 | of mnist_data) as judged by cifar10_eval.py. 21 | 22 | Speed: 23 | On a single Tesla K40, cifar10_train.py processes a single batch of 128 images 24 | in 0.25-0.35 sec (i.e. 350 - 600 images /sec). The model reaches ~86% 25 | accuracy after 100K steps in 8 hours of training time. 26 | 27 | Usage: 28 | Please see the tutorial and website for how to download the CIFAR-10 29 | mnist_data set, compile the program and train the model. 30 | 31 | http://tensorflow.org/tutorials/deep_cnn/ 32 | """ 33 | from __future__ import absolute_import 34 | from __future__ import division 35 | from __future__ import print_function 36 | 37 | from datetime import datetime 38 | import math 39 | import time 40 | 41 | import numpy as np 42 | import tensorflow as tf 43 | 44 | import cifar10 45 | 46 | FLAGS = tf.app.flags.FLAGS 47 | 48 | tf.app.flags.DEFINE_string('eval_dir', '/tmp/cifar10_eval', 49 | """Directory where to write event logs.""") 50 | tf.app.flags.DEFINE_string('eval_data', 'test', 51 | """Either 'test' or 'train_eval'.""") 52 | tf.app.flags.DEFINE_string('checkpoint_dir', '/tmp/cifar10_train', 53 | """Directory where to read model checkpoints.""") 54 | tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 5, 55 | """How often to run the eval.""") 56 | tf.app.flags.DEFINE_integer('num_examples', 10000, 57 | """Number of examples to run.""") 58 | tf.app.flags.DEFINE_boolean('run_once', False, 59 | """Whether to run eval only once.""") 60 | 61 | 62 | def eval_once(saver, summary_writer, top_k_op, summary_op): 63 | """Run Eval once. 64 | 65 | Args: 66 | saver: Saver. 67 | summary_writer: Summary writer. 68 | top_k_op: Top K op. 69 | summary_op: Summary op. 70 | """ 71 | with tf.Session() as sess: 72 | ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) 73 | if ckpt and ckpt.model_checkpoint_path: 74 | # Restores from checkpoint 75 | saver.restore(sess, ckpt.model_checkpoint_path) 76 | # Assuming model_checkpoint_path looks something like: 77 | # /my-favorite-path/cifar10_train/model.ckpt-0, 78 | # extract global_step from it. 79 | global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] 80 | else: 81 | print('No checkpoint file found') 82 | return 83 | 84 | # Start the queue runners. 85 | coord = tf.train.Coordinator() 86 | try: 87 | threads = [] 88 | for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): 89 | threads.extend(qr.create_threads(sess, coord=coord, daemon=True, 90 | start=True)) 91 | 92 | num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) 93 | true_count = 0 # Counts the number of correct predictions. 94 | total_sample_count = num_iter * FLAGS.batch_size 95 | step = 0 96 | while step < num_iter and not coord.should_stop(): 97 | predictions = sess.run([top_k_op]) 98 | true_count += np.sum(predictions) 99 | step += 1 100 | 101 | # Compute precision @ 1. 102 | precision = true_count / total_sample_count 103 | print('%s: precision @ 1 = %.3f' % (datetime.now(), precision)) 104 | 105 | summary = tf.Summary() 106 | summary.ParseFromString(sess.run(summary_op)) 107 | summary.value.add(tag='Precision @ 1', simple_value=precision) 108 | summary_writer.add_summary(summary, global_step) 109 | except Exception as e: # pylint: disable=broad-except 110 | coord.request_stop(e) 111 | 112 | coord.request_stop() 113 | coord.join(threads, stop_grace_period_secs=10) 114 | 115 | 116 | def evaluate(): 117 | """Eval CIFAR-10 for a number of steps.""" 118 | with tf.Graph().as_default() as g: 119 | # Get images and labels for CIFAR-10. 120 | eval_data = FLAGS.eval_data == 'test' 121 | images, labels = cifar10.inputs(eval_data=eval_data) 122 | 123 | # Build a Graph that computes the logits predictions from the 124 | # inference model. 125 | logits = cifar10.inference(images) 126 | 127 | # Calculate predictions. 128 | top_k_op = tf.nn.in_top_k(logits, labels, 1) 129 | 130 | # Restore the moving average version of the learned variables for eval. 131 | variable_averages = tf.train.ExponentialMovingAverage( 132 | cifar10.MOVING_AVERAGE_DECAY) 133 | variables_to_restore = variable_averages.variables_to_restore() 134 | saver = tf.train.Saver(variables_to_restore) 135 | 136 | # Build the summary operation based on the TF collection of Summaries. 137 | summary_op = tf.summary.merge_all() 138 | 139 | summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) 140 | 141 | while True: 142 | eval_once(saver, summary_writer, top_k_op, summary_op) 143 | if FLAGS.run_once: 144 | break 145 | time.sleep(FLAGS.eval_interval_secs) 146 | 147 | 148 | def main(argv=None): # pylint: disable=unused-argument 149 | cifar10.maybe_download_and_extract() 150 | if tf.gfile.Exists(FLAGS.eval_dir): 151 | tf.gfile.DeleteRecursively(FLAGS.eval_dir) 152 | tf.gfile.MakeDirs(FLAGS.eval_dir) 153 | evaluate() 154 | 155 | 156 | if __name__ == '__main__': 157 | tf.app.run() 158 | -------------------------------------------------------------------------------- /6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/cifar10_input_test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Tests for cifar10 input.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import os 23 | 24 | import tensorflow as tf 25 | 26 | import cifar10_input 27 | 28 | 29 | class CIFAR10InputTest(tf.test.TestCase): 30 | 31 | def _record(self, label, red, green, blue): 32 | image_size = 32 * 32 33 | record = bytes(bytearray([label] + [red] * image_size + 34 | [green] * image_size + [blue] * image_size)) 35 | expected = [[[red, green, blue]] * 32] * 32 36 | return record, expected 37 | 38 | def testSimple(self): 39 | labels = [9, 3, 0] 40 | records = [self._record(labels[0], 0, 128, 255), 41 | self._record(labels[1], 255, 0, 1), 42 | self._record(labels[2], 254, 255, 0)] 43 | contents = b"".join([record for record, _ in records]) 44 | expected = [expected for _, expected in records] 45 | filename = os.path.join(self.get_temp_dir(), "cifar") 46 | open(filename, "wb").write(contents) 47 | 48 | with self.test_session() as sess: 49 | q = tf.FIFOQueue(99, [tf.string], shapes=()) 50 | q.enqueue([filename]).run() 51 | q.close().run() 52 | result = cifar10_input.read_cifar10(q) 53 | 54 | for i in range(3): 55 | key, label, uint8image = sess.run([ 56 | result.key, result.label, result.uint8image]) 57 | self.assertEqual("%s:%d" % (filename, i), tf.compat.as_text(key)) 58 | self.assertEqual(labels[i], label) 59 | self.assertAllEqual(expected[i], uint8image) 60 | 61 | with self.assertRaises(tf.errors.OutOfRangeError): 62 | sess.run([result.key, result.uint8image]) 63 | 64 | 65 | if __name__ == "__main__": 66 | tf.test.main() 67 | -------------------------------------------------------------------------------- /6-cnn_image_classification(CIFAR-10)/6-2 tensorflow_model_image_cifar10(single gpu& multi gpu)/tutorials/image/cifar10/cifar10_train.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A binary to train CIFAR-10 using a single GPU. 17 | 18 | Accuracy: 19 | cifar10_train.py achieves ~86% accuracy after 100K steps (256 epochs of 20 | mnist_data) as judged by cifar10_eval.py. 21 | 22 | Speed: With batch_size 128. 23 | 24 | System | Step Time (sec/batch) | Accuracy 25 | ------------------------------------------------------------------ 26 | 1 Tesla K20m | 0.35-0.60 | ~86% at 60K steps (5 hours) 27 | 1 Tesla K40m | 0.25-0.35 | ~86% at 100K steps (4 hours) 28 | 29 | Usage: 30 | Please see the tutorial and website for how to download the CIFAR-10 31 | mnist_data set, compile the program and train the model. 32 | 33 | http://tensorflow.org/tutorials/deep_cnn/ 34 | """ 35 | from __future__ import absolute_import 36 | from __future__ import division 37 | from __future__ import print_function 38 | 39 | from datetime import datetime 40 | import time 41 | 42 | import tensorflow as tf 43 | 44 | import cifar10 45 | 46 | FLAGS = tf.app.flags.FLAGS 47 | 48 | tf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train', 49 | """Directory where to write event logs """ 50 | """and checkpoint.""") 51 | tf.app.flags.DEFINE_integer('max_steps', 1000000, 52 | """Number of batches to run.""") 53 | tf.app.flags.DEFINE_boolean('log_device_placement', False, 54 | """Whether to log device placement.""") 55 | tf.app.flags.DEFINE_integer('log_frequency', 10, 56 | """How often to log results to the console.""") 57 | 58 | 59 | def train(): 60 | """Train CIFAR-10 for a number of steps.""" 61 | with tf.Graph().as_default(): 62 | global_step = tf.train.get_or_create_global_step() 63 | 64 | # Get images and labels for CIFAR-10. 65 | # Force input pipeline to CPU:0 to avoid operations sometimes ending up on 66 | # GPU and resulting in a slow down. 67 | with tf.device('/cpu:0'): 68 | images, labels = cifar10.distorted_inputs() 69 | 70 | # Build a Graph that computes the logits predictions from the 71 | # inference model. 72 | logits = cifar10.inference(images) 73 | 74 | # Calculate loss. 75 | loss = cifar10.loss(logits, labels) 76 | 77 | # Build a Graph that trains the model with one batch of examples and 78 | # updates the model parameters. 79 | train_op = cifar10.train(loss, global_step) 80 | 81 | class _LoggerHook(tf.train.SessionRunHook): 82 | """Logs loss and runtime.""" 83 | 84 | def begin(self): 85 | self._step = -1 86 | self._start_time = time.time() 87 | 88 | def before_run(self, run_context): 89 | self._step += 1 90 | return tf.train.SessionRunArgs(loss) # Asks for loss value. 91 | 92 | def after_run(self, run_context, run_values): 93 | if self._step % FLAGS.log_frequency == 0: 94 | current_time = time.time() 95 | duration = current_time - self._start_time 96 | self._start_time = current_time 97 | 98 | loss_value = run_values.results 99 | examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration 100 | sec_per_batch = float(duration / FLAGS.log_frequency) 101 | 102 | format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 103 | 'sec/batch)') 104 | print (format_str % (datetime.now(), self._step, loss_value, 105 | examples_per_sec, sec_per_batch)) 106 | 107 | with tf.train.MonitoredTrainingSession( 108 | checkpoint_dir=FLAGS.train_dir, 109 | hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), 110 | tf.train.NanTensorHook(loss), 111 | _LoggerHook()], 112 | config=tf.ConfigProto( 113 | log_device_placement=FLAGS.log_device_placement)) as mon_sess: 114 | while not mon_sess.should_stop(): 115 | mon_sess.run(train_op) 116 | 117 | 118 | def main(argv=None): # pylint: disable=unused-argument 119 | cifar10.maybe_download_and_extract() 120 | if tf.gfile.Exists(FLAGS.train_dir): 121 | tf.gfile.DeleteRecursively(FLAGS.train_dir) 122 | tf.gfile.MakeDirs(FLAGS.train_dir) 123 | train() 124 | 125 | 126 | if __name__ == '__main__': 127 | tf.app.run() 128 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full.prototxt: -------------------------------------------------------------------------------- 1 | name: "CIFAR10_full_deploy" 2 | # N.B. input image must be in CIFAR-10 format 3 | # as described at http://www.cs.toronto.edu/~kriz/cifar.html 4 | layer { 5 | name: "data" 6 | type: "Input" 7 | top: "data" 8 | input_param { shape: { dim: 1 dim: 3 dim: 32 dim: 32 } } 9 | } 10 | layer { 11 | name: "conv1" 12 | type: "Convolution" 13 | bottom: "data" 14 | top: "conv1" 15 | param { 16 | lr_mult: 1 17 | } 18 | param { 19 | lr_mult: 2 20 | } 21 | convolution_param { 22 | num_output: 32 23 | pad: 2 24 | kernel_size: 5 25 | stride: 1 26 | } 27 | } 28 | layer { 29 | name: "pool1" 30 | type: "Pooling" 31 | bottom: "conv1" 32 | top: "pool1" 33 | pooling_param { 34 | pool: MAX 35 | kernel_size: 3 36 | stride: 2 37 | } 38 | } 39 | layer { 40 | name: "relu1" 41 | type: "ReLU" 42 | bottom: "pool1" 43 | top: "pool1" 44 | } 45 | layer { 46 | name: "norm1" 47 | type: "LRN" 48 | bottom: "pool1" 49 | top: "norm1" 50 | lrn_param { 51 | local_size: 3 52 | alpha: 5e-05 53 | beta: 0.75 54 | norm_region: WITHIN_CHANNEL 55 | } 56 | } 57 | layer { 58 | name: "conv2" 59 | type: "Convolution" 60 | bottom: "norm1" 61 | top: "conv2" 62 | param { 63 | lr_mult: 1 64 | } 65 | param { 66 | lr_mult: 2 67 | } 68 | convolution_param { 69 | num_output: 32 70 | pad: 2 71 | kernel_size: 5 72 | stride: 1 73 | } 74 | } 75 | layer { 76 | name: "relu2" 77 | type: "ReLU" 78 | bottom: "conv2" 79 | top: "conv2" 80 | } 81 | layer { 82 | name: "pool2" 83 | type: "Pooling" 84 | bottom: "conv2" 85 | top: "pool2" 86 | pooling_param { 87 | pool: AVE 88 | kernel_size: 3 89 | stride: 2 90 | } 91 | } 92 | layer { 93 | name: "norm2" 94 | type: "LRN" 95 | bottom: "pool2" 96 | top: "norm2" 97 | lrn_param { 98 | local_size: 3 99 | alpha: 5e-05 100 | beta: 0.75 101 | norm_region: WITHIN_CHANNEL 102 | } 103 | } 104 | layer { 105 | name: "conv3" 106 | type: "Convolution" 107 | bottom: "norm2" 108 | top: "conv3" 109 | convolution_param { 110 | num_output: 64 111 | pad: 2 112 | kernel_size: 5 113 | stride: 1 114 | } 115 | } 116 | layer { 117 | name: "relu3" 118 | type: "ReLU" 119 | bottom: "conv3" 120 | top: "conv3" 121 | } 122 | layer { 123 | name: "pool3" 124 | type: "Pooling" 125 | bottom: "conv3" 126 | top: "pool3" 127 | pooling_param { 128 | pool: AVE 129 | kernel_size: 3 130 | stride: 2 131 | } 132 | } 133 | layer { 134 | name: "ip1" 135 | type: "InnerProduct" 136 | bottom: "pool3" 137 | top: "ip1" 138 | param { 139 | lr_mult: 1 140 | decay_mult: 250 141 | } 142 | param { 143 | lr_mult: 2 144 | decay_mult: 0 145 | } 146 | inner_product_param { 147 | num_output: 10 148 | } 149 | } 150 | layer { 151 | name: "prob" 152 | type: "Softmax" 153 | bottom: "ip1" 154 | top: "prob" 155 | } 156 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_solver.prototxt: -------------------------------------------------------------------------------- 1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 2 | # then another factor of 10 after 10 more epochs (5000 iters) 3 | 4 | # The train/test net protocol buffer definition 5 | net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt" 6 | # test_iter specifies how many forward passes the test should carry out. 7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, 8 | # covering the full 10,000 testing images. 9 | test_iter: 10 10 | # Carry out testing every 1000 training iterations. 11 | test_interval: 1000 12 | # The base learning rate, momentum and the weight decay of the network. 13 | base_lr: 0.001 14 | momentum: 0.9 15 | #weight_decay: 0.004 16 | # The learning rate policy 17 | lr_policy: "step" 18 | gamma: 1 19 | stepsize: 5000 20 | # Display every 100 iterations 21 | display: 100 22 | # The maximum number of iterations 23 | max_iter: 60000 24 | # snapshot intermediate results 25 | snapshot: 10000 26 | snapshot_prefix: "examples/cifar10_full_sigmoid" 27 | # solver mode: CPU or GPU 28 | solver_mode: GPU 29 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt: -------------------------------------------------------------------------------- 1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 2 | # then another factor of 10 after 10 more epochs (5000 iters) 3 | 4 | # The train/test net protocol buffer definition 5 | net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt" 6 | # test_iter specifies how many forward passes the test should carry out. 7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, 8 | # covering the full 10,000 testing images. 9 | test_iter: 10 10 | # Carry out testing every 1000 training iterations. 11 | test_interval: 1000 12 | # The base learning rate, momentum and the weight decay of the network. 13 | base_lr: 0.001 14 | momentum: 0.9 15 | #weight_decay: 0.004 16 | # The learning rate policy 17 | lr_policy: "step" 18 | gamma: 1 19 | stepsize: 5000 20 | # Display every 100 iterations 21 | display: 100 22 | # The maximum number of iterations 23 | max_iter: 60000 24 | # snapshot intermediate results 25 | snapshot: 10000 26 | snapshot_prefix: "examples/cifar10_full_sigmoid_bn" 27 | # solver mode: CPU or GPU 28 | solver_mode: GPU 29 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "CIFAR10_full" 2 | layer { 3 | name: "cifar" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_file: "examples/cifar10/mean.binaryproto" 12 | } 13 | data_param { 14 | source: "examples/cifar10/cifar10_train_lmdb" 15 | batch_size: 111 16 | backend: LMDB 17 | } 18 | } 19 | layer { 20 | name: "cifar" 21 | type: "Data" 22 | top: "data" 23 | top: "label" 24 | include { 25 | phase: TEST 26 | } 27 | transform_param { 28 | mean_file: "examples/cifar10/mean.binaryproto" 29 | } 30 | data_param { 31 | source: "examples/cifar10/cifar10_test_lmdb" 32 | batch_size: 1000 33 | backend: LMDB 34 | } 35 | } 36 | layer { 37 | name: "conv1" 38 | type: "Convolution" 39 | bottom: "data" 40 | top: "conv1" 41 | param { 42 | lr_mult: 1 43 | } 44 | param { 45 | lr_mult: 2 46 | } 47 | convolution_param { 48 | num_output: 32 49 | pad: 2 50 | kernel_size: 5 51 | stride: 1 52 | weight_filler { 53 | type: "gaussian" 54 | std: 0.0001 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 3 69 | stride: 2 70 | } 71 | } 72 | 73 | 74 | 75 | layer { 76 | name: "Sigmoid1" 77 | type: "Sigmoid" 78 | bottom: "pool1" 79 | top: "Sigmoid1" 80 | } 81 | 82 | layer { 83 | name: "conv2" 84 | type: "Convolution" 85 | bottom: "Sigmoid1" 86 | top: "conv2" 87 | param { 88 | lr_mult: 1 89 | } 90 | param { 91 | lr_mult: 2 92 | } 93 | convolution_param { 94 | num_output: 32 95 | pad: 2 96 | kernel_size: 5 97 | stride: 1 98 | weight_filler { 99 | type: "gaussian" 100 | std: 0.01 101 | } 102 | bias_filler { 103 | type: "constant" 104 | } 105 | } 106 | } 107 | 108 | 109 | layer { 110 | name: "Sigmoid2" 111 | type: "Sigmoid" 112 | bottom: "conv2" 113 | top: "Sigmoid2" 114 | } 115 | layer { 116 | name: "pool2" 117 | type: "Pooling" 118 | bottom: "Sigmoid2" 119 | top: "pool2" 120 | pooling_param { 121 | pool: AVE 122 | kernel_size: 3 123 | stride: 2 124 | } 125 | } 126 | layer { 127 | name: "conv3" 128 | type: "Convolution" 129 | bottom: "pool2" 130 | top: "conv3" 131 | convolution_param { 132 | num_output: 64 133 | pad: 2 134 | kernel_size: 5 135 | stride: 1 136 | weight_filler { 137 | type: "gaussian" 138 | std: 0.01 139 | } 140 | bias_filler { 141 | type: "constant" 142 | } 143 | } 144 | param { 145 | lr_mult: 1 146 | } 147 | param { 148 | lr_mult: 1 149 | } 150 | 151 | } 152 | 153 | layer { 154 | name: "Sigmoid3" 155 | type: "Sigmoid" 156 | bottom: "conv3" 157 | top: "Sigmoid3" 158 | } 159 | 160 | layer { 161 | name: "pool3" 162 | type: "Pooling" 163 | bottom: "Sigmoid3" 164 | top: "pool3" 165 | pooling_param { 166 | pool: AVE 167 | kernel_size: 3 168 | stride: 2 169 | } 170 | } 171 | 172 | layer { 173 | name: "ip1" 174 | type: "InnerProduct" 175 | bottom: "pool3" 176 | top: "ip1" 177 | param { 178 | lr_mult: 1 179 | decay_mult: 0 180 | } 181 | param { 182 | lr_mult: 2 183 | decay_mult: 0 184 | } 185 | inner_product_param { 186 | num_output: 10 187 | weight_filler { 188 | type: "gaussian" 189 | std: 0.01 190 | } 191 | bias_filler { 192 | type: "constant" 193 | } 194 | } 195 | } 196 | layer { 197 | name: "accuracy" 198 | type: "Accuracy" 199 | bottom: "ip1" 200 | bottom: "label" 201 | top: "accuracy" 202 | include { 203 | phase: TEST 204 | } 205 | } 206 | layer { 207 | name: "loss" 208 | type: "SoftmaxWithLoss" 209 | bottom: "ip1" 210 | bottom: "label" 211 | top: "loss" 212 | } 213 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt: -------------------------------------------------------------------------------- 1 | name: "CIFAR10_full" 2 | layer { 3 | name: "cifar" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_file: "examples/cifar10/mean.binaryproto" 12 | } 13 | data_param { 14 | source: "examples/cifar10/cifar10_train_lmdb" 15 | batch_size: 100 16 | backend: LMDB 17 | } 18 | } 19 | layer { 20 | name: "cifar" 21 | type: "Data" 22 | top: "data" 23 | top: "label" 24 | include { 25 | phase: TEST 26 | } 27 | transform_param { 28 | mean_file: "examples/cifar10/mean.binaryproto" 29 | } 30 | data_param { 31 | source: "examples/cifar10/cifar10_test_lmdb" 32 | batch_size: 1000 33 | backend: LMDB 34 | } 35 | } 36 | layer { 37 | name: "conv1" 38 | type: "Convolution" 39 | bottom: "data" 40 | top: "conv1" 41 | param { 42 | lr_mult: 1 43 | } 44 | convolution_param { 45 | num_output: 32 46 | pad: 2 47 | kernel_size: 5 48 | stride: 1 49 | bias_term: false 50 | weight_filler { 51 | type: "gaussian" 52 | std: 0.0001 53 | } 54 | } 55 | } 56 | layer { 57 | name: "pool1" 58 | type: "Pooling" 59 | bottom: "conv1" 60 | top: "pool1" 61 | pooling_param { 62 | pool: MAX 63 | kernel_size: 3 64 | stride: 2 65 | } 66 | } 67 | 68 | layer { 69 | name: "bn1" 70 | type: "BatchNorm" 71 | bottom: "pool1" 72 | top: "bn1" 73 | param { 74 | lr_mult: 0 75 | } 76 | param { 77 | lr_mult: 0 78 | } 79 | param { 80 | lr_mult: 0 81 | } 82 | } 83 | 84 | layer { 85 | name: "Sigmoid1" 86 | type: "Sigmoid" 87 | bottom: "bn1" 88 | top: "Sigmoid1" 89 | } 90 | 91 | layer { 92 | name: "conv2" 93 | type: "Convolution" 94 | bottom: "Sigmoid1" 95 | top: "conv2" 96 | param { 97 | lr_mult: 1 98 | } 99 | convolution_param { 100 | num_output: 32 101 | pad: 2 102 | kernel_size: 5 103 | stride: 1 104 | bias_term: false 105 | weight_filler { 106 | type: "gaussian" 107 | std: 0.01 108 | } 109 | } 110 | } 111 | 112 | layer { 113 | name: "bn2" 114 | type: "BatchNorm" 115 | bottom: "conv2" 116 | top: "bn2" 117 | param { 118 | lr_mult: 0 119 | } 120 | param { 121 | lr_mult: 0 122 | } 123 | param { 124 | lr_mult: 0 125 | } 126 | } 127 | 128 | layer { 129 | name: "Sigmoid2" 130 | type: "Sigmoid" 131 | bottom: "bn2" 132 | top: "Sigmoid2" 133 | } 134 | layer { 135 | name: "pool2" 136 | type: "Pooling" 137 | bottom: "Sigmoid2" 138 | top: "pool2" 139 | pooling_param { 140 | pool: AVE 141 | kernel_size: 3 142 | stride: 2 143 | } 144 | } 145 | layer { 146 | name: "conv3" 147 | type: "Convolution" 148 | bottom: "pool2" 149 | top: "conv3" 150 | param { 151 | lr_mult: 1 152 | } 153 | convolution_param { 154 | num_output: 64 155 | pad: 2 156 | kernel_size: 5 157 | stride: 1 158 | bias_term: false 159 | weight_filler { 160 | type: "gaussian" 161 | std: 0.01 162 | } 163 | } 164 | } 165 | 166 | layer { 167 | name: "bn3" 168 | type: "BatchNorm" 169 | bottom: "conv3" 170 | top: "bn3" 171 | param { 172 | lr_mult: 0 173 | } 174 | param { 175 | lr_mult: 0 176 | } 177 | param { 178 | lr_mult: 0 179 | } 180 | } 181 | 182 | layer { 183 | name: "Sigmoid3" 184 | type: "Sigmoid" 185 | bottom: "bn3" 186 | top: "Sigmoid3" 187 | } 188 | layer { 189 | name: "pool3" 190 | type: "Pooling" 191 | bottom: "Sigmoid3" 192 | top: "pool3" 193 | pooling_param { 194 | pool: AVE 195 | kernel_size: 3 196 | stride: 2 197 | } 198 | } 199 | 200 | layer { 201 | name: "ip1" 202 | type: "InnerProduct" 203 | bottom: "pool3" 204 | top: "ip1" 205 | param { 206 | lr_mult: 1 207 | decay_mult: 1 208 | } 209 | param { 210 | lr_mult: 1 211 | decay_mult: 0 212 | } 213 | inner_product_param { 214 | num_output: 10 215 | weight_filler { 216 | type: "gaussian" 217 | std: 0.01 218 | } 219 | bias_filler { 220 | type: "constant" 221 | } 222 | } 223 | } 224 | layer { 225 | name: "accuracy" 226 | type: "Accuracy" 227 | bottom: "ip1" 228 | bottom: "label" 229 | top: "accuracy" 230 | include { 231 | phase: TEST 232 | } 233 | } 234 | layer { 235 | name: "loss" 236 | type: "SoftmaxWithLoss" 237 | bottom: "ip1" 238 | bottom: "label" 239 | top: "loss" 240 | } 241 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_solver.prototxt: -------------------------------------------------------------------------------- 1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 2 | # then another factor of 10 after 10 more epochs (5000 iters) 3 | 4 | # The train/test net protocol buffer definition 5 | net: "examples/cifar10/cifar10_full_train_test.prototxt" 6 | # test_iter specifies how many forward passes the test should carry out. 7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, 8 | # covering the full 10,000 testing images. 9 | test_iter: 100 10 | # Carry out testing every 1000 training iterations. 11 | test_interval: 1000 12 | # The base learning rate, momentum and the weight decay of the network. 13 | base_lr: 0.001 14 | momentum: 0.9 15 | weight_decay: 0.004 16 | # The learning rate policy 17 | lr_policy: "fixed" 18 | # Display every 200 iterations 19 | display: 200 20 | # The maximum number of iterations 21 | max_iter: 60000 22 | # snapshot intermediate results 23 | snapshot: 10000 24 | snapshot_format: HDF5 25 | snapshot_prefix: "examples/cifar10/cifar10_full" 26 | # solver mode: CPU or GPU 27 | solver_mode: GPU 28 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_solver_lr1.prototxt: -------------------------------------------------------------------------------- 1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 2 | # then another factor of 10 after 10 more epochs (5000 iters) 3 | 4 | # The train/test net protocol buffer definition 5 | net: "examples/cifar10/cifar10_full_train_test.prototxt" 6 | # test_iter specifies how many forward passes the test should carry out. 7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, 8 | # covering the full 10,000 testing images. 9 | test_iter: 100 10 | # Carry out testing every 1000 training iterations. 11 | test_interval: 1000 12 | # The base learning rate, momentum and the weight decay of the network. 13 | base_lr: 0.0001 14 | momentum: 0.9 15 | weight_decay: 0.004 16 | # The learning rate policy 17 | lr_policy: "fixed" 18 | # Display every 200 iterations 19 | display: 200 20 | # The maximum number of iterations 21 | max_iter: 65000 22 | # snapshot intermediate results 23 | snapshot: 5000 24 | snapshot_format: HDF5 25 | snapshot_prefix: "examples/cifar10/cifar10_full" 26 | # solver mode: CPU or GPU 27 | solver_mode: GPU 28 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_solver_lr2.prototxt: -------------------------------------------------------------------------------- 1 | # reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 2 | # then another factor of 10 after 10 more epochs (5000 iters) 3 | 4 | # The train/test net protocol buffer definition 5 | net: "examples/cifar10/cifar10_full_train_test.prototxt" 6 | # test_iter specifies how many forward passes the test should carry out. 7 | # In the case of CIFAR10, we have test batch size 100 and 100 test iterations, 8 | # covering the full 10,000 testing images. 9 | test_iter: 100 10 | # Carry out testing every 1000 training iterations. 11 | test_interval: 1000 12 | # The base learning rate, momentum and the weight decay of the network. 13 | base_lr: 0.00001 14 | momentum: 0.9 15 | weight_decay: 0.004 16 | # The learning rate policy 17 | lr_policy: "fixed" 18 | # Display every 200 iterations 19 | display: 200 20 | # The maximum number of iterations 21 | max_iter: 70000 22 | # snapshot intermediate results 23 | snapshot: 5000 24 | snapshot_format: HDF5 25 | snapshot_prefix: "examples/cifar10/cifar10_full" 26 | # solver mode: CPU or GPU 27 | solver_mode: GPU 28 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_full_train_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "CIFAR10_full" 2 | layer { 3 | name: "cifar" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_file: "examples/cifar10/mean.binaryproto" 12 | } 13 | data_param { 14 | source: "examples/cifar10/cifar10_train_lmdb" 15 | batch_size: 100 16 | backend: LMDB 17 | } 18 | } 19 | layer { 20 | name: "cifar" 21 | type: "Data" 22 | top: "data" 23 | top: "label" 24 | include { 25 | phase: TEST 26 | } 27 | transform_param { 28 | mean_file: "examples/cifar10/mean.binaryproto" 29 | } 30 | data_param { 31 | source: "examples/cifar10/cifar10_test_lmdb" 32 | batch_size: 100 33 | backend: LMDB 34 | } 35 | } 36 | layer { 37 | name: "conv1" 38 | type: "Convolution" 39 | bottom: "data" 40 | top: "conv1" 41 | param { 42 | lr_mult: 1 43 | } 44 | param { 45 | lr_mult: 2 46 | } 47 | convolution_param { 48 | num_output: 32 49 | pad: 2 50 | kernel_size: 5 51 | stride: 1 52 | weight_filler { 53 | type: "gaussian" 54 | std: 0.0001 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 3 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "relu1" 74 | type: "ReLU" 75 | bottom: "pool1" 76 | top: "pool1" 77 | } 78 | layer { 79 | name: "norm1" 80 | type: "LRN" 81 | bottom: "pool1" 82 | top: "norm1" 83 | lrn_param { 84 | local_size: 3 85 | alpha: 5e-05 86 | beta: 0.75 87 | norm_region: WITHIN_CHANNEL 88 | } 89 | } 90 | layer { 91 | name: "conv2" 92 | type: "Convolution" 93 | bottom: "norm1" 94 | top: "conv2" 95 | param { 96 | lr_mult: 1 97 | } 98 | param { 99 | lr_mult: 2 100 | } 101 | convolution_param { 102 | num_output: 32 103 | pad: 2 104 | kernel_size: 5 105 | stride: 1 106 | weight_filler { 107 | type: "gaussian" 108 | std: 0.01 109 | } 110 | bias_filler { 111 | type: "constant" 112 | } 113 | } 114 | } 115 | layer { 116 | name: "relu2" 117 | type: "ReLU" 118 | bottom: "conv2" 119 | top: "conv2" 120 | } 121 | layer { 122 | name: "pool2" 123 | type: "Pooling" 124 | bottom: "conv2" 125 | top: "pool2" 126 | pooling_param { 127 | pool: AVE 128 | kernel_size: 3 129 | stride: 2 130 | } 131 | } 132 | layer { 133 | name: "norm2" 134 | type: "LRN" 135 | bottom: "pool2" 136 | top: "norm2" 137 | lrn_param { 138 | local_size: 3 139 | alpha: 5e-05 140 | beta: 0.75 141 | norm_region: WITHIN_CHANNEL 142 | } 143 | } 144 | layer { 145 | name: "conv3" 146 | type: "Convolution" 147 | bottom: "norm2" 148 | top: "conv3" 149 | convolution_param { 150 | num_output: 64 151 | pad: 2 152 | kernel_size: 5 153 | stride: 1 154 | weight_filler { 155 | type: "gaussian" 156 | std: 0.01 157 | } 158 | bias_filler { 159 | type: "constant" 160 | } 161 | } 162 | } 163 | layer { 164 | name: "relu3" 165 | type: "ReLU" 166 | bottom: "conv3" 167 | top: "conv3" 168 | } 169 | layer { 170 | name: "pool3" 171 | type: "Pooling" 172 | bottom: "conv3" 173 | top: "pool3" 174 | pooling_param { 175 | pool: AVE 176 | kernel_size: 3 177 | stride: 2 178 | } 179 | } 180 | layer { 181 | name: "ip1" 182 | type: "InnerProduct" 183 | bottom: "pool3" 184 | top: "ip1" 185 | param { 186 | lr_mult: 1 187 | decay_mult: 250 188 | } 189 | param { 190 | lr_mult: 2 191 | decay_mult: 0 192 | } 193 | inner_product_param { 194 | num_output: 10 195 | weight_filler { 196 | type: "gaussian" 197 | std: 0.01 198 | } 199 | bias_filler { 200 | type: "constant" 201 | } 202 | } 203 | } 204 | layer { 205 | name: "accuracy" 206 | type: "Accuracy" 207 | bottom: "ip1" 208 | bottom: "label" 209 | top: "accuracy" 210 | include { 211 | phase: TEST 212 | } 213 | } 214 | layer { 215 | name: "loss" 216 | type: "SoftmaxWithLoss" 217 | bottom: "ip1" 218 | bottom: "label" 219 | top: "loss" 220 | } 221 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick.prototxt: -------------------------------------------------------------------------------- 1 | name: "CIFAR10_quick_test" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 1 dim: 3 dim: 32 dim: 32 } } 7 | } 8 | layer { 9 | name: "conv1" 10 | type: "Convolution" 11 | bottom: "data" 12 | top: "conv1" 13 | param { 14 | lr_mult: 1 15 | } 16 | param { 17 | lr_mult: 2 18 | } 19 | convolution_param { 20 | num_output: 32 21 | pad: 2 22 | kernel_size: 5 23 | stride: 1 24 | } 25 | } 26 | layer { 27 | name: "pool1" 28 | type: "Pooling" 29 | bottom: "conv1" 30 | top: "pool1" 31 | pooling_param { 32 | pool: MAX 33 | kernel_size: 3 34 | stride: 2 35 | } 36 | } 37 | layer { 38 | name: "relu1" 39 | type: "ReLU" 40 | bottom: "pool1" 41 | top: "pool1" 42 | } 43 | layer { 44 | name: "conv2" 45 | type: "Convolution" 46 | bottom: "pool1" 47 | top: "conv2" 48 | param { 49 | lr_mult: 1 50 | } 51 | param { 52 | lr_mult: 2 53 | } 54 | convolution_param { 55 | num_output: 32 56 | pad: 2 57 | kernel_size: 5 58 | stride: 1 59 | } 60 | } 61 | layer { 62 | name: "relu2" 63 | type: "ReLU" 64 | bottom: "conv2" 65 | top: "conv2" 66 | } 67 | layer { 68 | name: "pool2" 69 | type: "Pooling" 70 | bottom: "conv2" 71 | top: "pool2" 72 | pooling_param { 73 | pool: AVE 74 | kernel_size: 3 75 | stride: 2 76 | } 77 | } 78 | layer { 79 | name: "conv3" 80 | type: "Convolution" 81 | bottom: "pool2" 82 | top: "conv3" 83 | param { 84 | lr_mult: 1 85 | } 86 | param { 87 | lr_mult: 2 88 | } 89 | convolution_param { 90 | num_output: 64 91 | pad: 2 92 | kernel_size: 5 93 | stride: 1 94 | } 95 | } 96 | layer { 97 | name: "relu3" 98 | type: "ReLU" 99 | bottom: "conv3" 100 | top: "conv3" 101 | } 102 | layer { 103 | name: "pool3" 104 | type: "Pooling" 105 | bottom: "conv3" 106 | top: "pool3" 107 | pooling_param { 108 | pool: AVE 109 | kernel_size: 3 110 | stride: 2 111 | } 112 | } 113 | layer { 114 | name: "ip1" 115 | type: "InnerProduct" 116 | bottom: "pool3" 117 | top: "ip1" 118 | param { 119 | lr_mult: 1 120 | } 121 | param { 122 | lr_mult: 2 123 | } 124 | inner_product_param { 125 | num_output: 64 126 | } 127 | } 128 | layer { 129 | name: "ip2" 130 | type: "InnerProduct" 131 | bottom: "ip1" 132 | top: "ip2" 133 | param { 134 | lr_mult: 1 135 | } 136 | param { 137 | lr_mult: 2 138 | } 139 | inner_product_param { 140 | num_output: 10 141 | } 142 | } 143 | layer { 144 | name: "prob" 145 | type: "Softmax" 146 | bottom: "ip2" 147 | top: "prob" 148 | } 149 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick_solver.prototxt: -------------------------------------------------------------------------------- 1 | # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10 2 | 3 | # The train/test net protocol buffer definition 4 | net: "./cifar10_quick_train_test.prototxt" 5 | # test_iter specifies how many forward passes the test should carry out. 6 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 7 | # covering the full 10,000 testing images. 8 | test_iter: 100 9 | # Carry out testing every 500 training iterations. 10 | test_interval: 500 11 | # The base learning rate, momentum and the weight decay of the network. 12 | base_lr: 0.001 13 | momentum: 0.9 14 | weight_decay: 0.004 15 | # The learning rate policy 16 | lr_policy: "fixed" 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 4000 21 | # snapshot intermediate results 22 | snapshot: 4000 23 | snapshot_prefix: "./cifar10_quick" 24 | # solver mode: CPU or GPU 25 | solver_mode: GPU 26 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick_solver_lr1.prototxt: -------------------------------------------------------------------------------- 1 | # reduce the learning rate after 8 epochs (4000 iters) by a factor of 10 2 | 3 | # The train/test net protocol buffer definition 4 | net: "examples/cifar10/cifar10_quick_train_test.prototxt" 5 | # test_iter specifies how many forward passes the test should carry out. 6 | # In the case of MNIST, we have test batch size 100 and 100 test iterations, 7 | # covering the full 10,000 testing images. 8 | test_iter: 100 9 | # Carry out testing every 500 training iterations. 10 | test_interval: 500 11 | # The base learning rate, momentum and the weight decay of the network. 12 | base_lr: 0.0001 13 | momentum: 0.9 14 | weight_decay: 0.004 15 | # The learning rate policy 16 | lr_policy: "fixed" 17 | # Display every 100 iterations 18 | display: 100 19 | # The maximum number of iterations 20 | max_iter: 5000 21 | # snapshot intermediate results 22 | snapshot: 5000 23 | snapshot_format: HDF5 24 | snapshot_prefix: "examples/cifar10/cifar10_quick" 25 | # solver mode: CPU or GPU 26 | solver_mode: GPU 27 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/cifar10_quick_train_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "CIFAR10_quick" 2 | layer { 3 | name: "cifar" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mean_file: "examples/cifar10/mean.binaryproto" 12 | } 13 | data_param { 14 | source: "examples/cifar10/cifar10_train_lmdb" 15 | batch_size: 100 16 | backend: LMDB 17 | } 18 | } 19 | layer { 20 | name: "cifar" 21 | type: "Data" 22 | top: "data" 23 | top: "label" 24 | include { 25 | phase: TEST 26 | } 27 | transform_param { 28 | mean_file: "examples/cifar10/mean.binaryproto" 29 | } 30 | data_param { 31 | source: "examples/cifar10/cifar10_test_lmdb" 32 | batch_size: 100 33 | backend: LMDB 34 | } 35 | } 36 | layer { 37 | name: "conv1" 38 | type: "Convolution" 39 | bottom: "data" 40 | top: "conv1" 41 | param { 42 | lr_mult: 1 43 | } 44 | param { 45 | lr_mult: 2 46 | } 47 | convolution_param { 48 | num_output: 32 49 | pad: 2 50 | kernel_size: 5 51 | stride: 1 52 | weight_filler { 53 | type: "gaussian" 54 | std: 0.0001 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "pool1" 63 | type: "Pooling" 64 | bottom: "conv1" 65 | top: "pool1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 3 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "relu1" 74 | type: "ReLU" 75 | bottom: "pool1" 76 | top: "pool1" 77 | } 78 | layer { 79 | name: "conv2" 80 | type: "Convolution" 81 | bottom: "pool1" 82 | top: "conv2" 83 | param { 84 | lr_mult: 1 85 | } 86 | param { 87 | lr_mult: 2 88 | } 89 | convolution_param { 90 | num_output: 32 91 | pad: 2 92 | kernel_size: 5 93 | stride: 1 94 | weight_filler { 95 | type: "gaussian" 96 | std: 0.01 97 | } 98 | bias_filler { 99 | type: "constant" 100 | } 101 | } 102 | } 103 | layer { 104 | name: "relu2" 105 | type: "ReLU" 106 | bottom: "conv2" 107 | top: "conv2" 108 | } 109 | layer { 110 | name: "pool2" 111 | type: "Pooling" 112 | bottom: "conv2" 113 | top: "pool2" 114 | pooling_param { 115 | pool: AVE 116 | kernel_size: 3 117 | stride: 2 118 | } 119 | } 120 | layer { 121 | name: "conv3" 122 | type: "Convolution" 123 | bottom: "pool2" 124 | top: "conv3" 125 | param { 126 | lr_mult: 1 127 | } 128 | param { 129 | lr_mult: 2 130 | } 131 | convolution_param { 132 | num_output: 64 133 | pad: 2 134 | kernel_size: 5 135 | stride: 1 136 | weight_filler { 137 | type: "gaussian" 138 | std: 0.01 139 | } 140 | bias_filler { 141 | type: "constant" 142 | } 143 | } 144 | } 145 | layer { 146 | name: "relu3" 147 | type: "ReLU" 148 | bottom: "conv3" 149 | top: "conv3" 150 | } 151 | layer { 152 | name: "pool3" 153 | type: "Pooling" 154 | bottom: "conv3" 155 | top: "pool3" 156 | pooling_param { 157 | pool: AVE 158 | kernel_size: 3 159 | stride: 2 160 | } 161 | } 162 | layer { 163 | name: "ip1" 164 | type: "InnerProduct" 165 | bottom: "pool3" 166 | top: "ip1" 167 | param { 168 | lr_mult: 1 169 | } 170 | param { 171 | lr_mult: 2 172 | } 173 | inner_product_param { 174 | num_output: 64 175 | weight_filler { 176 | type: "gaussian" 177 | std: 0.1 178 | } 179 | bias_filler { 180 | type: "constant" 181 | } 182 | } 183 | } 184 | layer { 185 | name: "ip2" 186 | type: "InnerProduct" 187 | bottom: "ip1" 188 | top: "ip2" 189 | param { 190 | lr_mult: 1 191 | } 192 | param { 193 | lr_mult: 2 194 | } 195 | inner_product_param { 196 | num_output: 10 197 | weight_filler { 198 | type: "gaussian" 199 | std: 0.1 200 | } 201 | bias_filler { 202 | type: "constant" 203 | } 204 | } 205 | } 206 | layer { 207 | name: "accuracy" 208 | type: "Accuracy" 209 | bottom: "ip2" 210 | bottom: "label" 211 | top: "accuracy" 212 | include { 213 | phase: TEST 214 | } 215 | } 216 | layer { 217 | name: "loss" 218 | type: "SoftmaxWithLoss" 219 | bottom: "ip2" 220 | bottom: "label" 221 | top: "loss" 222 | } 223 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/convert_cifar_data.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // This script converts the CIFAR dataset to the leveldb format used 3 | // by caffe to perform classification. 4 | // Usage: 5 | // convert_cifar_data input_folder output_db_file 6 | // The CIFAR dataset could be downloaded at 7 | // http://www.cs.toronto.edu/~kriz/cifar.html 8 | 9 | #include // NOLINT(readability/streams) 10 | #include 11 | 12 | #include "boost/scoped_ptr.hpp" 13 | #include "glog/logging.h" 14 | #include "google/protobuf/text_format.h" 15 | #include "stdint.h" 16 | 17 | #include "caffe/proto/caffe.pb.h" 18 | #include "caffe/util/db.hpp" 19 | #include "caffe/util/format.hpp" 20 | 21 | using caffe::Datum; 22 | using boost::scoped_ptr; 23 | using std::string; 24 | namespace db = caffe::db; 25 | 26 | const int kCIFARSize = 32; 27 | const int kCIFARImageNBytes = 3072; 28 | const int kCIFARBatchSize = 10000; 29 | const int kCIFARTrainBatches = 5; 30 | 31 | void read_image(std::ifstream* file, int* label, char* buffer) { 32 | char label_char; 33 | file->read(&label_char, 1); 34 | *label = label_char; 35 | file->read(buffer, kCIFARImageNBytes); 36 | return; 37 | } 38 | 39 | void convert_dataset(const string& input_folder, const string& output_folder, 40 | const string& db_type) { 41 | scoped_ptr train_db(db::GetDB(db_type)); 42 | train_db->Open(output_folder + "/cifar10_train_" + db_type, db::NEW); 43 | scoped_ptr txn(train_db->NewTransaction()); 44 | // Data buffer 45 | int label; 46 | char str_buffer[kCIFARImageNBytes]; 47 | Datum datum; 48 | datum.set_channels(3); 49 | datum.set_height(kCIFARSize); 50 | datum.set_width(kCIFARSize); 51 | 52 | LOG(INFO) << "Writing Training data"; 53 | for (int fileid = 0; fileid < kCIFARTrainBatches; ++fileid) { 54 | // Open files 55 | LOG(INFO) << "Training Batch " << fileid + 1; 56 | string batchFileName = input_folder + "/data_batch_" 57 | + caffe::format_int(fileid+1) + ".bin"; 58 | std::ifstream data_file(batchFileName.c_str(), 59 | std::ios::in | std::ios::binary); 60 | CHECK(data_file) << "Unable to open train file #" << fileid + 1; 61 | for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) { 62 | read_image(&data_file, &label, str_buffer); 63 | datum.set_label(label); 64 | datum.set_data(str_buffer, kCIFARImageNBytes); 65 | string out; 66 | CHECK(datum.SerializeToString(&out)); 67 | txn->Put(caffe::format_int(fileid * kCIFARBatchSize + itemid, 5), out); 68 | } 69 | } 70 | txn->Commit(); 71 | train_db->Close(); 72 | 73 | LOG(INFO) << "Writing Testing data"; 74 | scoped_ptr test_db(db::GetDB(db_type)); 75 | test_db->Open(output_folder + "/cifar10_test_" + db_type, db::NEW); 76 | txn.reset(test_db->NewTransaction()); 77 | // Open files 78 | std::ifstream data_file((input_folder + "/test_batch.bin").c_str(), 79 | std::ios::in | std::ios::binary); 80 | CHECK(data_file) << "Unable to open test file."; 81 | for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) { 82 | read_image(&data_file, &label, str_buffer); 83 | datum.set_label(label); 84 | datum.set_data(str_buffer, kCIFARImageNBytes); 85 | string out; 86 | CHECK(datum.SerializeToString(&out)); 87 | txn->Put(caffe::format_int(itemid, 5), out); 88 | } 89 | txn->Commit(); 90 | test_db->Close(); 91 | } 92 | 93 | int main(int argc, char** argv) { 94 | FLAGS_alsologtostderr = 1; 95 | 96 | if (argc != 4) { 97 | printf("This script converts the CIFAR dataset to the leveldb format used\n" 98 | "by caffe to perform classification.\n" 99 | "Usage:\n" 100 | " convert_cifar_data input_folder output_folder db_type\n" 101 | "Where the input folder should contain the binary batch files.\n" 102 | "The CIFAR dataset could be downloaded at\n" 103 | " http://www.cs.toronto.edu/~kriz/cifar.html\n" 104 | "You should gunzip them after downloading.\n"); 105 | } else { 106 | google::InitGoogleLogging(argv[0]); 107 | convert_dataset(string(argv[1]), string(argv[2]), string(argv[3])); 108 | } 109 | return 0; 110 | } 111 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/create_cifar10.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | echo Creating leveldb... 3 | if exist cifar10-leveldb ( 4 | rmdir cifar10-leveldb /s /q 5 | ) else ( 6 | mkdir cifar10-leveldb 7 | ) 8 | SET DATA="./cifar-10-batches-bin" 9 | SET GLOG_logtostderr=1 10 | "convert_cifar_data.exe" %DATA% ./cifar10-leveldb 11 | SET GLOG_logtostderr=1 12 | echo Computing image mean... 13 | "compute_image_mean.exe" ./cifar10-leveldb/cifar-train-leveldb mean.binaryproto 14 | echo Done. 15 | pause -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/readme.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: CIFAR-10 tutorial 3 | category: example 4 | description: Train and test Caffe on CIFAR-10 data. 5 | include_in_docs: true 6 | priority: 5 7 | --- 8 | 9 | Alex's CIFAR-10 tutorial, Caffe style 10 | ===================================== 11 | 12 | Alex Krizhevsky's [cuda-convnet](https://code.google.com/p/cuda-convnet/) details the model definitions, parameters, and training procedure for good performance on CIFAR-10. This example reproduces his results in Caffe. 13 | 14 | We will assume that you have Caffe successfully compiled. If not, please refer to the [Installation page](/installation.html). In this tutorial, we will assume that your caffe installation is located at `CAFFE_ROOT`. 15 | 16 | We thank @chyojn for the pull request that defined the model schemas and solver configurations. 17 | 18 | *This example is a work-in-progress. It would be nice to further explain details of the network and training choices and benchmark the full training.* 19 | 20 | Prepare the Dataset 21 | ------------------- 22 | 23 | You will first need to download and convert the data format from the [CIFAR-10 website](http://www.cs.toronto.edu/~kriz/cifar.html). To do this, simply run the following commands: 24 | 25 | cd $CAFFE_ROOT 26 | ./data/cifar10/get_cifar10.sh 27 | ./examples/cifar10/create_cifar10.sh 28 | 29 | If it complains that `wget` or `gunzip` are not installed, you need to install them respectively. After running the script there should be the dataset, `./cifar10-leveldb`, and the data set image mean `./mean.binaryproto`. 30 | 31 | The Model 32 | --------- 33 | 34 | The CIFAR-10 model is a CNN that composes layers of convolution, pooling, rectified linear unit (ReLU) nonlinearities, and local contrast normalization with a linear classifier on top of it all. We have defined the model in the `CAFFE_ROOT/examples/cifar10` directory's `cifar10_quick_train_test.prototxt`. 35 | 36 | Training and Testing the "Quick" Model 37 | -------------------------------------- 38 | 39 | Training the model is simple after you have written the network definition protobuf and solver protobuf files (refer to [MNIST Tutorial](../examples/mnist.html)). Simply run `train_quick.sh`, or the following command directly: 40 | 41 | cd $CAFFE_ROOT 42 | ./examples/cifar10/train_quick.sh 43 | 44 | `train_quick.sh` is a simple script, so have a look inside. The main tool for training is `caffe` with the `train` action, and the solver protobuf text file as its argument. 45 | 46 | When you run the code, you will see a lot of messages flying by like this: 47 | 48 | I0317 21:52:48.945710 2008298256 net.cpp:74] Creating Layer conv1 49 | I0317 21:52:48.945716 2008298256 net.cpp:84] conv1 <- data 50 | I0317 21:52:48.945725 2008298256 net.cpp:110] conv1 -> conv1 51 | I0317 21:52:49.298691 2008298256 net.cpp:125] Top shape: 100 32 32 32 (3276800) 52 | I0317 21:52:49.298719 2008298256 net.cpp:151] conv1 needs backward computation. 53 | 54 | These messages tell you the details about each layer, its connections and its output shape, which may be helpful in debugging. After the initialization, the training will start: 55 | 56 | I0317 21:52:49.309370 2008298256 net.cpp:166] Network initialization done. 57 | I0317 21:52:49.309376 2008298256 net.cpp:167] Memory required for Data 23790808 58 | I0317 21:52:49.309422 2008298256 solver.cpp:36] Solver scaffolding done. 59 | I0317 21:52:49.309447 2008298256 solver.cpp:47] Solving CIFAR10_quick_train 60 | 61 | Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 500 iterations. You will see messages like this: 62 | 63 | I0317 21:53:12.179772 2008298256 solver.cpp:208] Iteration 100, lr = 0.001 64 | I0317 21:53:12.185698 2008298256 solver.cpp:65] Iteration 100, loss = 1.73643 65 | ... 66 | I0317 21:54:41.150030 2008298256 solver.cpp:87] Iteration 500, Testing net 67 | I0317 21:54:47.129461 2008298256 solver.cpp:114] Test score #0: 0.5504 68 | I0317 21:54:47.129500 2008298256 solver.cpp:114] Test score #1: 1.27805 69 | 70 | For each training iteration, `lr` is the learning rate of that iteration, and `loss` is the training function. For the output of the testing phase, **score 0 is the accuracy**, and **score 1 is the testing loss function**. 71 | 72 | And after making yourself a cup of coffee, you are done! 73 | 74 | I0317 22:12:19.666914 2008298256 solver.cpp:87] Iteration 5000, Testing net 75 | I0317 22:12:25.580330 2008298256 solver.cpp:114] Test score #0: 0.7533 76 | I0317 22:12:25.580379 2008298256 solver.cpp:114] Test score #1: 0.739837 77 | I0317 22:12:25.587262 2008298256 solver.cpp:130] Snapshotting to cifar10_quick_iter_5000 78 | I0317 22:12:25.590215 2008298256 solver.cpp:137] Snapshotting solver state to cifar10_quick_iter_5000.solverstate 79 | I0317 22:12:25.592813 2008298256 solver.cpp:81] Optimization Done. 80 | 81 | Our model achieved ~75% test accuracy. The model parameters are stored in binary protobuf format in 82 | 83 | cifar10_quick_iter_5000 84 | 85 | which is ready-to-deploy in CPU or GPU mode! Refer to the `CAFFE_ROOT/examples/cifar10/cifar10_quick.prototxt` for the deployment model definition that can be called on new data. 86 | 87 | Why train on a GPU? 88 | ------------------- 89 | 90 | CIFAR-10, while still small, has enough data to make GPU training attractive. 91 | 92 | To compare CPU vs. GPU training speed, simply change one line in all the `cifar*solver.prototxt`: 93 | 94 | # solver mode: CPU or GPU 95 | solver_mode: CPU 96 | 97 | and you will be using CPU for training. 98 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_full.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | 4 | TOOLS=./build/tools 5 | 6 | $TOOLS/caffe train \ 7 | --solver=examples/cifar10/cifar10_full_solver.prototxt $@ 8 | 9 | # reduce learning rate by factor of 10 10 | $TOOLS/caffe train \ 11 | --solver=examples/cifar10/cifar10_full_solver_lr1.prototxt \ 12 | --snapshot=examples/cifar10/cifar10_full_iter_60000.solverstate $@ 13 | 14 | # reduce learning rate by factor of 10 15 | $TOOLS/caffe train \ 16 | --solver=examples/cifar10/cifar10_full_solver_lr2.prototxt \ 17 | --snapshot=examples/cifar10/cifar10_full_iter_65000.solverstate $@ 18 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_full_sigmoid.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | 4 | TOOLS=./build/tools 5 | 6 | $TOOLS/caffe train \ 7 | --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt $@ 8 | 9 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_full_sigmoid_bn.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | 4 | TOOLS=./build/tools 5 | 6 | $TOOLS/caffe train \ 7 | --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt $@ 8 | 9 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-1 caffe-master/examples/cifar10/train_quick.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -e 3 | 4 | caffe train --solver=./cifar10_quick_solver.prototxt $@ 5 | 6 | # reduce learning rate by factor of 10 after 8 epochs 7 | caffe train \ 8 | --solver=./cifar10_quick_solver_lr1.prototxt \ 9 | --snapshot=./cifar10_quick_iter_4000.solverstate $@ 10 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/README.md: -------------------------------------------------------------------------------- 1 | # Keras examples directory 2 | 3 | ## Vision models examples 4 | 5 | [mnist_mlp.py](mnist_mlp.py) 6 | Trains a simple deep multi-layer perceptron on the MNIST dataset. 7 | 8 | [mnist_cnn.py](mnist_cnn.py) 9 | Trains a simple convnet on the MNIST dataset. 10 | 11 | [cifar10_cnn.py](cifar10_cnn.py) 12 | Trains a simple deep CNN on the CIFAR10 small images dataset. 13 | 14 | [cifar10_resnet.py](cifar10_resnet.py) 15 | Trains a ResNet on the CIFAR10 small images dataset. 16 | 17 | [conv_lstm.py](conv_lstm.py) 18 | Demonstrates the use of a convolutional LSTM network. 19 | 20 | [image_ocr.py](image_ocr.py) 21 | Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR). 22 | 23 | [mnist_acgan.py](mnist_acgan.py) 24 | Implementation of AC-GAN (Auxiliary Classifier GAN) on the MNIST dataset 25 | 26 | [mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py) 27 | Trains a Hierarchical RNN (HRNN) to classify MNIST digits. 28 | 29 | [mnist_siamese.py](mnist_siamese.py) 30 | Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset. 31 | 32 | [mnist_swwae.py](mnist_swwae.py) 33 | Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset. 34 | 35 | [mnist_transfer_cnn.py](mnist_transfer_cnn.py) 36 | Transfer learning toy example. 37 | 38 | ---- 39 | 40 | ## Text & sequences examples 41 | 42 | [addition_rnn.py](addition_rnn.py) 43 | Implementation of sequence to sequence learning for performing addition of two numbers (as strings). 44 | 45 | [babi_rnn.py](babi_rnn.py) 46 | Trains a two-branch recurrent network on the bAbI dataset for reading comprehension. 47 | 48 | [babi_memnn.py](babi_memnn.py) 49 | Trains a memory network on the bAbI dataset for reading comprehension. 50 | 51 | [imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py) 52 | Trains a Bidirectional LSTM on the IMDB sentiment classification task. 53 | 54 | [imdb_cnn.py](imdb_cnn.py) 55 | Demonstrates the use of Convolution1D for text classification. 56 | 57 | [imdb_cnn_lstm.py](imdb_cnn_lstm.py) 58 | Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task. 59 | 60 | [imdb_fasttext.py](imdb_fasttext.py) 61 | Trains a FastText model on the IMDB sentiment classification task. 62 | 63 | [imdb_lstm.py](imdb_lstm.py) 64 | Trains an LSTM model on the IMDB sentiment classification task. 65 | 66 | [lstm_stateful.py](lstm_stateful.py) 67 | Demonstrates how to use stateful RNNs to model long sequences efficiently. 68 | 69 | [pretrained_word_embeddings.py](pretrained_word_embeddings.py) 70 | Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset. 71 | 72 | [reuters_mlp.py](reuters_mlp.py) 73 | Trains and evaluate a simple MLP on the Reuters newswire topic classification task. 74 | 75 | ---- 76 | 77 | ## Generative models examples 78 | 79 | [lstm_text_generation.py](lstm_text_generation.py) 80 | Generates text from Nietzsche's writings. 81 | 82 | [conv_filter_visualization.py](conv_filter_visualization.py) 83 | Visualization of the filters of VGG16, via gradient ascent in input space. 84 | 85 | [deep_dream.py](deep_dream.py) 86 | Deep Dreams in Keras. 87 | 88 | [neural_doodle.py](neural_doodle.py) 89 | Neural doodle. 90 | 91 | [neural_style_transfer.py](neural_style_transfer.py) 92 | Neural style transfer. 93 | 94 | [variational_autoencoder.py](variational_autoencoder.py) 95 | Demonstrates how to build a variational autoencoder. 96 | 97 | [variational_autoencoder_deconv.py](variational_autoencoder_deconv.py) 98 | Demonstrates how to build a variational autoencoder with Keras using deconvolution layers. 99 | 100 | ---- 101 | 102 | ## Examples demonstrating specific Keras functionality 103 | 104 | [antirectifier.py](antirectifier.py) 105 | Demonstrates how to write custom layers for Keras. 106 | 107 | [mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py) 108 | Demonstrates how to use the sklearn wrapper. 109 | 110 | [mnist_irnn.py](mnist_irnn.py) 111 | Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al. 112 | 113 | [mnist_net2net.py](mnist_net2net.py) 114 | Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer". 115 | 116 | [reuters_mlp_relu_vs_selu.py](reuters_mlp_relu_vs_selu.py) 117 | Compares self-normalizing MLPs with regular MLPs. 118 | 119 | [mnist_tfrecord.py](mnist_tfrecord.py) 120 | MNIST dataset with TFRecords, the standard TensorFlow data format. 121 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/antirectifier.py: -------------------------------------------------------------------------------- 1 | '''The example demonstrates how to write custom layers for Keras. 2 | 3 | We build a custom activation layer called 'Antirectifier', 4 | which modifies the shape of the tensor that passes through it. 5 | We need to specify two methods: `compute_output_shape` and `call`. 6 | 7 | Note that the same result can also be achieved via a Lambda layer. 8 | 9 | Because our custom layer is written with primitives from the Keras 10 | backend (`K`), our code can run both on TensorFlow and Theano. 11 | ''' 12 | 13 | from __future__ import print_function 14 | import keras 15 | from keras.models import Sequential 16 | from keras import layers 17 | from keras.datasets import mnist 18 | from keras import backend as K 19 | 20 | 21 | class Antirectifier(layers.Layer): 22 | '''This is the combination of a sample-wise 23 | L2 normalization with the concatenation of the 24 | positive part of the input with the negative part 25 | of the input. The result is a tensor of samples that are 26 | twice as large as the input samples. 27 | 28 | It can be used in place of a ReLU. 29 | 30 | # Input shape 31 | 2D tensor of shape (samples, n) 32 | 33 | # Output shape 34 | 2D tensor of shape (samples, 2*n) 35 | 36 | # Theoretical justification 37 | When applying ReLU, assuming that the distribution 38 | of the previous output is approximately centered around 0., 39 | you are discarding half of your input. This is inefficient. 40 | 41 | Antirectifier allows to return all-positive outputs like ReLU, 42 | without discarding any mnist_data. 43 | 44 | Tests on MNIST show that Antirectifier allows to train networks 45 | with twice less parameters yet with comparable 46 | classification accuracy as an equivalent ReLU-based network. 47 | ''' 48 | 49 | def compute_output_shape(self, input_shape): 50 | shape = list(input_shape) 51 | assert len(shape) == 2 # only valid for 2D tensors 52 | shape[-1] *= 2 53 | return tuple(shape) 54 | 55 | def call(self, inputs): 56 | inputs -= K.mean(inputs, axis=1, keepdims=True) 57 | inputs = K.l2_normalize(inputs, axis=1) 58 | pos = K.relu(inputs) 59 | neg = K.relu(-inputs) 60 | return K.concatenate([pos, neg], axis=1) 61 | 62 | # global parameters 63 | batch_size = 128 64 | num_classes = 10 65 | epochs = 40 66 | 67 | # the mnist_data, split between train and test sets 68 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 69 | 70 | x_train = x_train.reshape(60000, 784) 71 | x_test = x_test.reshape(10000, 784) 72 | x_train = x_train.astype('float32') 73 | x_test = x_test.astype('float32') 74 | x_train /= 255 75 | x_test /= 255 76 | print(x_train.shape[0], 'train samples') 77 | print(x_test.shape[0], 'test samples') 78 | 79 | # convert class vectors to binary class matrices 80 | y_train = keras.utils.to_categorical(y_train, num_classes) 81 | y_test = keras.utils.to_categorical(y_test, num_classes) 82 | 83 | # build the model 84 | model = Sequential() 85 | model.add(layers.Dense(256, input_shape=(784,))) 86 | model.add(Antirectifier()) 87 | model.add(layers.Dropout(0.1)) 88 | model.add(layers.Dense(256)) 89 | model.add(Antirectifier()) 90 | model.add(layers.Dropout(0.1)) 91 | model.add(layers.Dense(num_classes)) 92 | model.add(layers.Activation('softmax')) 93 | 94 | # compile the model 95 | model.compile(loss='categorical_crossentropy', 96 | optimizer='rmsprop', 97 | metrics=['accuracy']) 98 | 99 | # train the model 100 | model.fit(x_train, y_train, 101 | batch_size=batch_size, 102 | epochs=epochs, 103 | verbose=1, 104 | validation_data=(x_test, y_test)) 105 | 106 | # next, compare with an equivalent network 107 | # with2x bigger Dense layers and ReLU 108 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/cifar10_cnn.py: -------------------------------------------------------------------------------- 1 | '''Train a simple deep CNN on the CIFAR10 small images dataset. 2 | 3 | It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs. 4 | (it's still underfitting at that point, though). 5 | ''' 6 | 7 | from __future__ import print_function 8 | import keras 9 | from keras.datasets import cifar10 10 | from keras.preprocessing.image import ImageDataGenerator 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Activation, Flatten 13 | from keras.layers import Conv2D, MaxPooling2D 14 | import os 15 | 16 | batch_size = 32 17 | num_classes = 10 18 | epochs = 100 19 | data_augmentation = True 20 | num_predictions = 20 21 | save_dir = os.path.join(os.getcwd(), 'saved_models') 22 | model_name = 'keras_cifar10_trained_model.h5' 23 | 24 | # The mnist_data, split between train and test sets: 25 | (x_train, y_train), (x_test, y_test) = cifar10.load_data() 26 | print('x_train shape:', x_train.shape) 27 | print(x_train.shape[0], 'train samples') 28 | print(x_test.shape[0], 'test samples') 29 | 30 | # Convert class vectors to binary class matrices. 31 | y_train = keras.utils.to_categorical(y_train, num_classes) 32 | y_test = keras.utils.to_categorical(y_test, num_classes) 33 | 34 | model = Sequential() 35 | model.add(Conv2D(32, (3, 3), padding='same', 36 | input_shape=x_train.shape[1:])) 37 | model.add(Activation('relu')) 38 | model.add(Conv2D(32, (3, 3))) 39 | model.add(Activation('relu')) 40 | model.add(MaxPooling2D(pool_size=(2, 2))) 41 | model.add(Dropout(0.25)) 42 | 43 | model.add(Conv2D(64, (3, 3), padding='same')) 44 | model.add(Activation('relu')) 45 | model.add(Conv2D(64, (3, 3))) 46 | model.add(Activation('relu')) 47 | model.add(MaxPooling2D(pool_size=(2, 2))) 48 | model.add(Dropout(0.25)) 49 | 50 | model.add(Flatten()) 51 | model.add(Dense(512)) 52 | model.add(Activation('relu')) 53 | model.add(Dropout(0.5)) 54 | model.add(Dense(num_classes)) 55 | model.add(Activation('softmax')) 56 | 57 | # initiate RMSprop optimizer 58 | opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) 59 | 60 | # Let's train the model using RMSprop 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=opt, 63 | metrics=['accuracy']) 64 | 65 | x_train = x_train.astype('float32') 66 | x_test = x_test.astype('float32') 67 | x_train /= 255 68 | x_test /= 255 69 | 70 | if not data_augmentation: 71 | print('Not using mnist_data augmentation.') 72 | model.fit(x_train, y_train, 73 | batch_size=batch_size, 74 | epochs=epochs, 75 | validation_data=(x_test, y_test), 76 | shuffle=True) 77 | else: 78 | print('Using real-time mnist_data augmentation.') 79 | # This will do preprocessing and realtime mnist_data augmentation: 80 | datagen = ImageDataGenerator( 81 | featurewise_center=False, # set input mean to 0 over the dataset 82 | samplewise_center=False, # set each sample mean to 0 83 | featurewise_std_normalization=False, # divide inputs by std of the dataset 84 | samplewise_std_normalization=False, # divide each input by its std 85 | zca_whitening=False, # apply ZCA whitening 86 | rotation_range=0, # randomly rotate images in the range (degrees, 0 to 180) 87 | width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 88 | height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 89 | horizontal_flip=True, # randomly flip images 90 | vertical_flip=False) # randomly flip images 91 | 92 | # Compute quantities required for feature-wise normalization 93 | # (std, mean, and principal components if ZCA whitening is applied). 94 | datagen.fit(x_train) 95 | 96 | # Fit the model on the batches generated by datagen.flow(). 97 | model.fit_generator(datagen.flow(x_train, y_train, 98 | batch_size=batch_size), 99 | epochs=epochs, 100 | validation_data=(x_test, y_test), 101 | workers=4, 102 | steps_per_epoch=100) 103 | 104 | # Save model and weights 105 | if not os.path.isdir(save_dir): 106 | os.makedirs(save_dir) 107 | model_path = os.path.join(save_dir, model_name) 108 | model.save(model_path) 109 | print('Saved trained model at %s ' % model_path) 110 | 111 | # Score trained model. 112 | scores = model.evaluate(x_test, y_test, verbose=1) 113 | print('Test loss:', scores[0]) 114 | print('Test accuracy:', scores[1]) 115 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/conv_filter_visualization.py: -------------------------------------------------------------------------------- 1 | '''Visualization of the filters of VGG16, via gradient ascent in input space. 2 | 3 | This script can run on CPU in a few minutes. 4 | 5 | Results example: http://i.imgur.com/4nj4KjN.jpg 6 | ''' 7 | from __future__ import print_function 8 | 9 | from scipy.misc import imsave 10 | import numpy as np 11 | import time 12 | from keras.applications import vgg16 13 | from keras import backend as K 14 | 15 | # dimensions of the generated pictures for each filter. 16 | img_width = 128 17 | img_height = 128 18 | 19 | # the name of the layer we want to visualize 20 | # (see model definition at keras/applications/vgg16.py) 21 | layer_name = 'block5_conv1' 22 | 23 | # util function to convert a tensor into a valid image 24 | 25 | 26 | def deprocess_image(x): 27 | # normalize tensor: center on 0., ensure std is 0.1 28 | x -= x.mean() 29 | x /= (x.std() + K.epsilon()) 30 | x *= 0.1 31 | 32 | # clip to [0, 1] 33 | x += 0.5 34 | x = np.clip(x, 0, 1) 35 | 36 | # convert to RGB array 37 | x *= 255 38 | if K.image_data_format() == 'channels_first': 39 | x = x.transpose((1, 2, 0)) 40 | x = np.clip(x, 0, 255).astype('uint8') 41 | return x 42 | 43 | # build the VGG16 network with ImageNet weights 44 | model = vgg16.VGG16(weights='imagenet', include_top=False) 45 | print('Model loaded.') 46 | 47 | model.summary() 48 | 49 | # this is the placeholder for the input images 50 | input_img = model.input 51 | 52 | # get the symbolic outputs of each "key" layer (we gave them unique names). 53 | layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) 54 | 55 | 56 | def normalize(x): 57 | # utility function to normalize a tensor by its L2 norm 58 | return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon()) 59 | 60 | 61 | kept_filters = [] 62 | for filter_index in range(200): 63 | # we only scan through the first 200 filters, 64 | # but there are actually 512 of them 65 | print('Processing filter %d' % filter_index) 66 | start_time = time.time() 67 | 68 | # we build a loss function that maximizes the activation 69 | # of the nth filter of the layer considered 70 | layer_output = layer_dict[layer_name].output 71 | if K.image_data_format() == 'channels_first': 72 | loss = K.mean(layer_output[:, filter_index, :, :]) 73 | else: 74 | loss = K.mean(layer_output[:, :, :, filter_index]) 75 | 76 | # we compute the gradient of the input picture wrt this loss 77 | grads = K.gradients(loss, input_img)[0] 78 | 79 | # normalization trick: we normalize the gradient 80 | grads = normalize(grads) 81 | 82 | # this function returns the loss and grads given the input picture 83 | iterate = K.function([input_img], [loss, grads]) 84 | 85 | # step size for gradient ascent 86 | step = 1. 87 | 88 | # we start from a gray image with some random noise 89 | if K.image_data_format() == 'channels_first': 90 | input_img_data = np.random.random((1, 3, img_width, img_height)) 91 | else: 92 | input_img_data = np.random.random((1, img_width, img_height, 3)) 93 | input_img_data = (input_img_data - 0.5) * 20 + 128 94 | 95 | # we run gradient ascent for 20 steps 96 | for i in range(20): 97 | loss_value, grads_value = iterate([input_img_data]) 98 | input_img_data += grads_value * step 99 | 100 | print('Current loss value:', loss_value) 101 | if loss_value <= 0.: 102 | # some filters get stuck to 0, we can skip them 103 | break 104 | 105 | # decode the resulting input image 106 | if loss_value > 0: 107 | img = deprocess_image(input_img_data[0]) 108 | kept_filters.append((img, loss_value)) 109 | end_time = time.time() 110 | print('Filter %d processed in %ds' % (filter_index, end_time - start_time)) 111 | 112 | # we will stich the best 64 filters on a 8 x 8 grid. 113 | n = 8 114 | 115 | # the filters that have the highest loss are assumed to be better-looking. 116 | # we will only keep the top 64 filters. 117 | kept_filters.sort(key=lambda x: x[1], reverse=True) 118 | kept_filters = kept_filters[:n * n] 119 | 120 | # build a black picture with enough space for 121 | # our 8 x 8 filters of size 128 x 128, with a 5px margin in between 122 | margin = 5 123 | width = n * img_width + (n - 1) * margin 124 | height = n * img_height + (n - 1) * margin 125 | stitched_filters = np.zeros((width, height, 3)) 126 | 127 | # fill the picture with our saved filters 128 | for i in range(n): 129 | for j in range(n): 130 | img, loss = kept_filters[i * n + j] 131 | stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width, 132 | (img_height + margin) * j: (img_height + margin) * j + img_height, :] = img 133 | 134 | # save the result to disk 135 | imsave('stitched_filters_%dx%d.png' % (n, n), stitched_filters) 136 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/conv_lstm.py: -------------------------------------------------------------------------------- 1 | """ This script demonstrates the use of a convolutional LSTM network. 2 | 3 | This network is used to predict the next frame of an artificially 4 | generated movie which contains moving squares. 5 | """ 6 | from keras.models import Sequential 7 | from keras.layers.convolutional import Conv3D 8 | from keras.layers.convolutional_recurrent import ConvLSTM2D 9 | from keras.layers.normalization import BatchNormalization 10 | import numpy as np 11 | import pylab as plt 12 | 13 | # We create a layer which take as input movies of shape 14 | # (n_frames, width, height, channels) and returns a movie 15 | # of identical shape. 16 | 17 | seq = Sequential() 18 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), 19 | input_shape=(None, 40, 40, 1), 20 | padding='same', return_sequences=True)) 21 | seq.add(BatchNormalization()) 22 | 23 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), 24 | padding='same', return_sequences=True)) 25 | seq.add(BatchNormalization()) 26 | 27 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), 28 | padding='same', return_sequences=True)) 29 | seq.add(BatchNormalization()) 30 | 31 | seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), 32 | padding='same', return_sequences=True)) 33 | seq.add(BatchNormalization()) 34 | 35 | seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3), 36 | activation='sigmoid', 37 | padding='same', data_format='channels_last')) 38 | seq.compile(loss='binary_crossentropy', optimizer='adadelta') 39 | 40 | 41 | # Artificial mnist_data generation: 42 | # Generate movies with 3 to 7 moving squares inside. 43 | # The squares are of shape 1x1 or 2x2 pixels, 44 | # which move linearly over time. 45 | # For convenience we first create movies with bigger width and height (80x80) 46 | # and at the end we select a 40x40 window. 47 | 48 | def generate_movies(n_samples=1200, n_frames=15): 49 | row = 80 50 | col = 80 51 | noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) 52 | shifted_movies = np.zeros((n_samples, n_frames, row, col, 1), 53 | dtype=np.float) 54 | 55 | for i in range(n_samples): 56 | # Add 3 to 7 moving squares 57 | n = np.random.randint(3, 8) 58 | 59 | for j in range(n): 60 | # Initial position 61 | xstart = np.random.randint(20, 60) 62 | ystart = np.random.randint(20, 60) 63 | # Direction of motion 64 | directionx = np.random.randint(0, 3) - 1 65 | directiony = np.random.randint(0, 3) - 1 66 | 67 | # Size of the square 68 | w = np.random.randint(2, 4) 69 | 70 | for t in range(n_frames): 71 | x_shift = xstart + directionx * t 72 | y_shift = ystart + directiony * t 73 | noisy_movies[i, t, x_shift - w: x_shift + w, 74 | y_shift - w: y_shift + w, 0] += 1 75 | 76 | # Make it more robust by adding noise. 77 | # The idea is that if during inference, 78 | # the value of the pixel is not exactly one, 79 | # we need to train the network to be robust and still 80 | # consider it as a pixel belonging to a square. 81 | if np.random.randint(0, 2): 82 | noise_f = (-1)**np.random.randint(0, 2) 83 | noisy_movies[i, t, 84 | x_shift - w - 1: x_shift + w + 1, 85 | y_shift - w - 1: y_shift + w + 1, 86 | 0] += noise_f * 0.1 87 | 88 | # Shift the ground truth by 1 89 | x_shift = xstart + directionx * (t + 1) 90 | y_shift = ystart + directiony * (t + 1) 91 | shifted_movies[i, t, x_shift - w: x_shift + w, 92 | y_shift - w: y_shift + w, 0] += 1 93 | 94 | # Cut to a 40x40 window 95 | noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::] 96 | shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::] 97 | noisy_movies[noisy_movies >= 1] = 1 98 | shifted_movies[shifted_movies >= 1] = 1 99 | return noisy_movies, shifted_movies 100 | 101 | # Train the network 102 | noisy_movies, shifted_movies = generate_movies(n_samples=1200) 103 | seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10, 104 | epochs=300, validation_split=0.05) 105 | 106 | # Testing the network on one movie 107 | # feed it with the first 7 positions and then 108 | # predict the new positions 109 | which = 1004 110 | track = noisy_movies[which][:7, ::, ::, ::] 111 | 112 | for j in range(16): 113 | new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::]) 114 | new = new_pos[::, -1, ::, ::, ::] 115 | track = np.concatenate((track, new), axis=0) 116 | 117 | 118 | # And then compare the predictions 119 | # to the ground truth 120 | track2 = noisy_movies[which][::, ::, ::, ::] 121 | for i in range(15): 122 | fig = plt.figure(figsize=(10, 5)) 123 | 124 | ax = fig.add_subplot(121) 125 | 126 | if i >= 7: 127 | ax.text(1, 3, 'Predictions !', fontsize=20, color='w') 128 | else: 129 | ax.text(1, 3, 'Initial trajectory', fontsize=20) 130 | 131 | toplot = track[i, ::, ::, 0] 132 | 133 | plt.imshow(toplot) 134 | ax = fig.add_subplot(122) 135 | plt.text(1, 3, 'Ground truth', fontsize=20) 136 | 137 | toplot = track2[i, ::, ::, 0] 138 | if i >= 2: 139 | toplot = shifted_movies[which][i - 1, ::, ::, 0] 140 | 141 | plt.imshow(toplot) 142 | plt.savefig('%i_animate.png' % (i + 1)) 143 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/imdb_bidirectional_lstm.py: -------------------------------------------------------------------------------- 1 | '''Trains a Bidirectional LSTM on the IMDB sentiment classification task. 2 | 3 | Output after 4 epochs on CPU: ~0.8146 4 | Time per epoch on CPU (Core i7): ~150s. 5 | ''' 6 | 7 | from __future__ import print_function 8 | import numpy as np 9 | 10 | from keras.preprocessing import sequence 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional 13 | from keras.datasets import imdb 14 | 15 | 16 | max_features = 20000 17 | # cut texts after this number of words 18 | # (among top max_features most common words) 19 | maxlen = 100 20 | batch_size = 32 21 | 22 | print('Loading mnist_data...') 23 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 24 | print(len(x_train), 'train sequences') 25 | print(len(x_test), 'test sequences') 26 | 27 | print('Pad sequences (samples x time)') 28 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 29 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 30 | print('x_train shape:', x_train.shape) 31 | print('x_test shape:', x_test.shape) 32 | y_train = np.array(y_train) 33 | y_test = np.array(y_test) 34 | 35 | model = Sequential() 36 | model.add(Embedding(max_features, 128, input_length=maxlen)) 37 | model.add(Bidirectional(LSTM(64))) 38 | model.add(Dropout(0.5)) 39 | model.add(Dense(1, activation='sigmoid')) 40 | 41 | # try using different optimizers and different optimizer configs 42 | model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) 43 | 44 | print('Train...') 45 | model.fit(x_train, y_train, 46 | batch_size=batch_size, 47 | epochs=4, 48 | validation_data=[x_test, y_test]) 49 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/imdb_cnn.py: -------------------------------------------------------------------------------- 1 | '''This example demonstrates the use of Convolution1D for text classification. 2 | 3 | Gets to 0.89 test accuracy after 2 epochs. 4 | 90s/epoch on Intel i5 2.4Ghz CPU. 5 | 10s/epoch on Tesla K40 GPU. 6 | ''' 7 | from __future__ import print_function 8 | 9 | from keras.preprocessing import sequence 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation 12 | from keras.layers import Embedding 13 | from keras.layers import Conv1D, GlobalMaxPooling1D 14 | from keras.datasets import imdb 15 | 16 | # set parameters: 17 | max_features = 5000 18 | maxlen = 400 19 | batch_size = 32 20 | embedding_dims = 50 21 | filters = 250 22 | kernel_size = 3 23 | hidden_dims = 250 24 | epochs = 2 25 | 26 | print('Loading mnist_data...') 27 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 28 | print(len(x_train), 'train sequences') 29 | print(len(x_test), 'test sequences') 30 | 31 | print('Pad sequences (samples x time)') 32 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 33 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 34 | print('x_train shape:', x_train.shape) 35 | print('x_test shape:', x_test.shape) 36 | 37 | print('Build model...') 38 | model = Sequential() 39 | 40 | # we start off with an efficient embedding layer which maps 41 | # our vocab indices into embedding_dims dimensions 42 | model.add(Embedding(max_features, 43 | embedding_dims, 44 | input_length=maxlen)) 45 | model.add(Dropout(0.2)) 46 | 47 | # we add a Convolution1D, which will learn filters 48 | # word group filters of size filter_length: 49 | model.add(Conv1D(filters, 50 | kernel_size, 51 | padding='valid', 52 | activation='relu', 53 | strides=1)) 54 | # we use max pooling: 55 | model.add(GlobalMaxPooling1D()) 56 | 57 | # We add a vanilla hidden layer: 58 | model.add(Dense(hidden_dims)) 59 | model.add(Dropout(0.2)) 60 | model.add(Activation('relu')) 61 | 62 | # We project onto a single unit output layer, and squash it with a sigmoid: 63 | model.add(Dense(1)) 64 | model.add(Activation('sigmoid')) 65 | 66 | model.compile(loss='binary_crossentropy', 67 | optimizer='adam', 68 | metrics=['accuracy']) 69 | model.fit(x_train, y_train, 70 | batch_size=batch_size, 71 | epochs=epochs, 72 | validation_data=(x_test, y_test)) 73 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/imdb_cnn_lstm.py: -------------------------------------------------------------------------------- 1 | '''Train a recurrent convolutional network on the IMDB sentiment 2 | classification task. 3 | 4 | Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU. 5 | ''' 6 | from __future__ import print_function 7 | 8 | from keras.preprocessing import sequence 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Dropout, Activation 11 | from keras.layers import Embedding 12 | from keras.layers import LSTM 13 | from keras.layers import Conv1D, MaxPooling1D 14 | from keras.datasets import imdb 15 | 16 | # Embedding 17 | max_features = 20000 18 | maxlen = 100 19 | embedding_size = 128 20 | 21 | # Convolution 22 | kernel_size = 5 23 | filters = 64 24 | pool_size = 4 25 | 26 | # LSTM 27 | lstm_output_size = 70 28 | 29 | # Training 30 | batch_size = 30 31 | epochs = 2 32 | 33 | ''' 34 | Note: 35 | batch_size is highly sensitive. 36 | Only 2 epochs are needed as the dataset is very small. 37 | ''' 38 | 39 | print('Loading mnist_data...') 40 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 41 | print(len(x_train), 'train sequences') 42 | print(len(x_test), 'test sequences') 43 | 44 | print('Pad sequences (samples x time)') 45 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 46 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 47 | print('x_train shape:', x_train.shape) 48 | print('x_test shape:', x_test.shape) 49 | 50 | print('Build model...') 51 | 52 | model = Sequential() 53 | model.add(Embedding(max_features, embedding_size, input_length=maxlen)) 54 | model.add(Dropout(0.25)) 55 | model.add(Conv1D(filters, 56 | kernel_size, 57 | padding='valid', 58 | activation='relu', 59 | strides=1)) 60 | model.add(MaxPooling1D(pool_size=pool_size)) 61 | model.add(LSTM(lstm_output_size)) 62 | model.add(Dense(1)) 63 | model.add(Activation('sigmoid')) 64 | 65 | model.compile(loss='binary_crossentropy', 66 | optimizer='adam', 67 | metrics=['accuracy']) 68 | 69 | print('Train...') 70 | model.fit(x_train, y_train, 71 | batch_size=batch_size, 72 | epochs=epochs, 73 | validation_data=(x_test, y_test)) 74 | score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) 75 | print('Test score:', score) 76 | print('Test accuracy:', acc) 77 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/imdb_fasttext.py: -------------------------------------------------------------------------------- 1 | '''This example demonstrates the use of fasttext for text classification 2 | 3 | Based on Joulin et al's paper: 4 | 5 | Bags of Tricks for Efficient Text Classification 6 | https://arxiv.org/abs/1607.01759 7 | 8 | Results on IMDB datasets with uni and bi-gram embeddings: 9 | Uni-gram: 0.8813 test accuracy after 5 epochs. 8s/epoch on i7 cpu. 10 | Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTx 980M gpu. 11 | ''' 12 | 13 | from __future__ import print_function 14 | import numpy as np 15 | 16 | from keras.preprocessing import sequence 17 | from keras.models import Sequential 18 | from keras.layers import Dense 19 | from keras.layers import Embedding 20 | from keras.layers import GlobalAveragePooling1D 21 | from keras.datasets import imdb 22 | 23 | 24 | def create_ngram_set(input_list, ngram_value=2): 25 | """ 26 | Extract a set of n-grams from a list of integers. 27 | 28 | >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=2) 29 | {(4, 9), (4, 1), (1, 4), (9, 4)} 30 | 31 | >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=3) 32 | [(1, 4, 9), (4, 9, 4), (9, 4, 1), (4, 1, 4)] 33 | """ 34 | return set(zip(*[input_list[i:] for i in range(ngram_value)])) 35 | 36 | 37 | def add_ngram(sequences, token_indice, ngram_range=2): 38 | """ 39 | Augment the input list of list (sequences) by appending n-grams values. 40 | 41 | Example: adding bi-gram 42 | >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]] 43 | >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017} 44 | >>> add_ngram(sequences, token_indice, ngram_range=2) 45 | [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42]] 46 | 47 | Example: adding tri-gram 48 | >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]] 49 | >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017, (7, 9, 2): 2018} 50 | >>> add_ngram(sequences, token_indice, ngram_range=3) 51 | [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42, 2018]] 52 | """ 53 | new_sequences = [] 54 | for input_list in sequences: 55 | new_list = input_list[:] 56 | for ngram_value in range(2, ngram_range + 1): 57 | for i in range(len(new_list) - ngram_value + 1): 58 | ngram = tuple(new_list[i:i + ngram_value]) 59 | if ngram in token_indice: 60 | new_list.append(token_indice[ngram]) 61 | new_sequences.append(new_list) 62 | 63 | return new_sequences 64 | 65 | # Set parameters: 66 | # ngram_range = 2 will add bi-grams features 67 | ngram_range = 1 68 | max_features = 20000 69 | maxlen = 400 70 | batch_size = 32 71 | embedding_dims = 50 72 | epochs = 5 73 | 74 | print('Loading mnist_data...') 75 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 76 | print(len(x_train), 'train sequences') 77 | print(len(x_test), 'test sequences') 78 | print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int))) 79 | print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int))) 80 | 81 | if ngram_range > 1: 82 | print('Adding {}-gram features'.format(ngram_range)) 83 | # Create set of unique n-gram from the training set. 84 | ngram_set = set() 85 | for input_list in x_train: 86 | for i in range(2, ngram_range + 1): 87 | set_of_ngram = create_ngram_set(input_list, ngram_value=i) 88 | ngram_set.update(set_of_ngram) 89 | 90 | # Dictionary mapping n-gram token to a unique integer. 91 | # Integer values are greater than max_features in order 92 | # to avoid collision with existing features. 93 | start_index = max_features + 1 94 | token_indice = {v: k + start_index for k, v in enumerate(ngram_set)} 95 | indice_token = {token_indice[k]: k for k in token_indice} 96 | 97 | # max_features is the highest integer that could be found in the dataset. 98 | max_features = np.max(list(indice_token.keys())) + 1 99 | 100 | # Augmenting x_train and x_test with n-grams features 101 | x_train = add_ngram(x_train, token_indice, ngram_range) 102 | x_test = add_ngram(x_test, token_indice, ngram_range) 103 | print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int))) 104 | print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int))) 105 | 106 | print('Pad sequences (samples x time)') 107 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 108 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 109 | print('x_train shape:', x_train.shape) 110 | print('x_test shape:', x_test.shape) 111 | 112 | print('Build model...') 113 | model = Sequential() 114 | 115 | # we start off with an efficient embedding layer which maps 116 | # our vocab indices into embedding_dims dimensions 117 | model.add(Embedding(max_features, 118 | embedding_dims, 119 | input_length=maxlen)) 120 | 121 | # we add a GlobalAveragePooling1D, which will average the embeddings 122 | # of all words in the document 123 | model.add(GlobalAveragePooling1D()) 124 | 125 | # We project onto a single unit output layer, and squash it with a sigmoid: 126 | model.add(Dense(1, activation='sigmoid')) 127 | 128 | model.compile(loss='binary_crossentropy', 129 | optimizer='adam', 130 | metrics=['accuracy']) 131 | 132 | model.fit(x_train, y_train, 133 | batch_size=batch_size, 134 | epochs=epochs, 135 | validation_data=(x_test, y_test)) 136 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/imdb_lstm.py: -------------------------------------------------------------------------------- 1 | '''Trains an LSTM model on the IMDB sentiment classification task. 2 | 3 | The dataset is actually too small for LSTM to be of any advantage 4 | compared to simpler, much faster methods such as TF-IDF + LogReg. 5 | 6 | # Notes 7 | 8 | - RNNs are tricky. Choice of batch size is important, 9 | choice of loss and optimizer is critical, etc. 10 | Some configurations won't converge. 11 | 12 | - LSTM loss decrease patterns during training can be quite different 13 | from what you see with CNNs/MLPs/etc. 14 | ''' 15 | from __future__ import print_function 16 | 17 | from keras.preprocessing import sequence 18 | from keras.models import Sequential 19 | from keras.layers import Dense, Embedding 20 | from keras.layers import LSTM 21 | from keras.datasets import imdb 22 | 23 | max_features = 20000 24 | maxlen = 80 # cut texts after this number of words (among top max_features most common words) 25 | batch_size = 32 26 | 27 | print('Loading mnist_data...') 28 | (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 29 | print(len(x_train), 'train sequences') 30 | print(len(x_test), 'test sequences') 31 | 32 | print('Pad sequences (samples x time)') 33 | x_train = sequence.pad_sequences(x_train, maxlen=maxlen) 34 | x_test = sequence.pad_sequences(x_test, maxlen=maxlen) 35 | print('x_train shape:', x_train.shape) 36 | print('x_test shape:', x_test.shape) 37 | 38 | print('Build model...') 39 | model = Sequential() 40 | model.add(Embedding(max_features, 128)) 41 | model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) 42 | model.add(Dense(1, activation='sigmoid')) 43 | 44 | # try using different optimizers and different optimizer configs 45 | model.compile(loss='binary_crossentropy', 46 | optimizer='adam', 47 | metrics=['accuracy']) 48 | 49 | print('Train...') 50 | model.fit(x_train, y_train, 51 | batch_size=batch_size, 52 | epochs=15, 53 | validation_data=(x_test, y_test)) 54 | score, acc = model.evaluate(x_test, y_test, 55 | batch_size=batch_size) 56 | print('Test score:', score) 57 | print('Test accuracy:', acc) 58 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/lstm_text_generation.py: -------------------------------------------------------------------------------- 1 | '''Example script to generate text from Nietzsche's writings. 2 | 3 | At least 20 epochs are required before the generated text 4 | starts sounding coherent. 5 | 6 | It is recommended to run this script on GPU, as recurrent 7 | networks are quite computationally intensive. 8 | 9 | If you try this script on new mnist_data, make sure your corpus 10 | has at least ~100k characters. ~1M is better. 11 | ''' 12 | 13 | from __future__ import print_function 14 | from keras.callbacks import LambdaCallback 15 | from keras.models import Sequential 16 | from keras.layers import Dense, Activation 17 | from keras.layers import LSTM 18 | from keras.optimizers import RMSprop 19 | from keras.utils.data_utils import get_file 20 | import numpy as np 21 | import random 22 | import sys 23 | import io 24 | 25 | path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt') 26 | with io.open(path, encoding='utf-8') as f: 27 | text = f.read().lower() 28 | print('corpus length:', len(text)) 29 | 30 | chars = sorted(list(set(text))) 31 | print('total chars:', len(chars)) 32 | char_indices = dict((c, i) for i, c in enumerate(chars)) 33 | indices_char = dict((i, c) for i, c in enumerate(chars)) 34 | 35 | # cut the text in semi-redundant sequences of maxlen characters 36 | maxlen = 40 37 | step = 3 38 | sentences = [] 39 | next_chars = [] 40 | for i in range(0, len(text) - maxlen, step): 41 | sentences.append(text[i: i + maxlen]) 42 | next_chars.append(text[i + maxlen]) 43 | print('nb sequences:', len(sentences)) 44 | 45 | print('Vectorization...') 46 | x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) 47 | y = np.zeros((len(sentences), len(chars)), dtype=np.bool) 48 | for i, sentence in enumerate(sentences): 49 | for t, char in enumerate(sentence): 50 | x[i, t, char_indices[char]] = 1 51 | y[i, char_indices[next_chars[i]]] = 1 52 | 53 | 54 | # build the model: a single LSTM 55 | print('Build model...') 56 | model = Sequential() 57 | model.add(LSTM(128, input_shape=(maxlen, len(chars)))) 58 | model.add(Dense(len(chars))) 59 | model.add(Activation('softmax')) 60 | 61 | optimizer = RMSprop(lr=0.01) 62 | model.compile(loss='categorical_crossentropy', optimizer=optimizer) 63 | 64 | 65 | def sample(preds, temperature=1.0): 66 | # helper function to sample an index from a probability array 67 | preds = np.asarray(preds).astype('float64') 68 | preds = np.log(preds) / temperature 69 | exp_preds = np.exp(preds) 70 | preds = exp_preds / np.sum(exp_preds) 71 | probas = np.random.multinomial(1, preds, 1) 72 | return np.argmax(probas) 73 | 74 | 75 | def on_epoch_end(epoch, logs): 76 | # Function invoked at end of each epoch. Prints generated text. 77 | print() 78 | print('----- Generating text after Epoch: %d' % epoch) 79 | 80 | start_index = random.randint(0, len(text) - maxlen - 1) 81 | for diversity in [0.2, 0.5, 1.0, 1.2]: 82 | print('----- diversity:', diversity) 83 | 84 | generated = '' 85 | sentence = text[start_index: start_index + maxlen] 86 | generated += sentence 87 | print('----- Generating with seed: "' + sentence + '"') 88 | sys.stdout.write(generated) 89 | 90 | for i in range(400): 91 | x_pred = np.zeros((1, maxlen, len(chars))) 92 | for t, char in enumerate(sentence): 93 | x_pred[0, t, char_indices[char]] = 1. 94 | 95 | preds = model.predict(x_pred, verbose=0)[0] 96 | next_index = sample(preds, diversity) 97 | next_char = indices_char[next_index] 98 | 99 | generated += next_char 100 | sentence = sentence[1:] + next_char 101 | 102 | sys.stdout.write(next_char) 103 | sys.stdout.flush() 104 | print() 105 | 106 | print_callback = LambdaCallback(on_epoch_end=on_epoch_end) 107 | 108 | model.fit(x, y, 109 | batch_size=128, 110 | epochs=60, 111 | callbacks=[print_callback]) 112 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_cnn.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple convnet on the MNIST dataset. 2 | 3 | Gets to 99.25% test accuracy after 12 epochs 4 | (there is still a lot of margin for parameter tuning). 5 | 16 seconds per epoch on a GRID K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | import keras 10 | from keras.datasets import mnist 11 | from keras.models import Sequential 12 | from keras.layers import Dense, Dropout, Flatten 13 | from keras.layers import Conv2D, MaxPooling2D 14 | from keras import backend as K 15 | 16 | batch_size = 128 17 | num_classes = 10 18 | epochs = 12 19 | 20 | # input image dimensions 21 | img_rows, img_cols = 28, 28 22 | 23 | # the mnist_data, split between train and test sets 24 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 25 | 26 | if K.image_data_format() == 'channels_first': 27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 29 | input_shape = (1, img_rows, img_cols) 30 | else: 31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 33 | input_shape = (img_rows, img_cols, 1) 34 | 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255 38 | x_test /= 255 39 | print('x_train shape:', x_train.shape) 40 | print(x_train.shape[0], 'train samples') 41 | print(x_test.shape[0], 'test samples') 42 | 43 | # convert class vectors to binary class matrices 44 | y_train = keras.utils.to_categorical(y_train, num_classes) 45 | y_test = keras.utils.to_categorical(y_test, num_classes) 46 | 47 | model = Sequential() 48 | model.add(Conv2D(32, kernel_size=(3, 3), 49 | activation='relu', 50 | input_shape=input_shape)) 51 | model.add(Conv2D(64, (3, 3), activation='relu')) 52 | model.add(MaxPooling2D(pool_size=(2, 2))) 53 | model.add(Dropout(0.25)) 54 | model.add(Flatten()) 55 | model.add(Dense(128, activation='relu')) 56 | model.add(Dropout(0.5)) 57 | model.add(Dense(num_classes, activation='softmax')) 58 | 59 | model.compile(loss=keras.losses.categorical_crossentropy, 60 | optimizer=keras.optimizers.Adadelta(), 61 | metrics=['accuracy']) 62 | 63 | model.fit(x_train, y_train, 64 | batch_size=batch_size, 65 | epochs=epochs, 66 | verbose=1, 67 | validation_data=(x_test, y_test)) 68 | score = model.evaluate(x_test, y_test, verbose=0) 69 | print('Test loss:', score[0]) 70 | print('Test accuracy:', score[1]) 71 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_dataset_api.py: -------------------------------------------------------------------------------- 1 | '''MNIST classification with TensorFlow's Dataset API. 2 | 3 | Introduced in TensorFlow 1.3, the Dataset API is now the 4 | standard method for loading mnist_data into TensorFlow models. 5 | A Dataset is a sequence of elements, which are themselves 6 | composed of tf.Tensor components. For more details, see: 7 | https://www.tensorflow.org/programmers_guide/datasets 8 | 9 | To use this with Keras, we make a dataset out of elements 10 | of the form (input batch, output batch). From there, we 11 | create a one-shot iterator and a graph node corresponding 12 | to its get_next() method. Its components are then provided 13 | to the network's Input layer and the Model.compile() method, 14 | respectively. 15 | 16 | Note that from TensorFlow 1.4, tf.contrib.mnist_data is deprecated 17 | and tf.mnist_data is preferred. See the release notes for details. 18 | 19 | This example is intended to closely follow the 20 | mnist_tfrecord.py example. 21 | ''' 22 | import numpy as np 23 | import os 24 | import tempfile 25 | 26 | import keras 27 | from keras import backend as K 28 | from keras import layers 29 | from keras.datasets import mnist 30 | 31 | import tensorflow as tf 32 | from tensorflow.contrib.data import Dataset 33 | 34 | 35 | if K.backend() != 'tensorflow': 36 | raise RuntimeError('This example can only run with the TensorFlow backend,' 37 | ' because it requires the Datset API, which is not' 38 | ' supported on other platforms.') 39 | 40 | 41 | def cnn_layers(inputs): 42 | x = layers.Conv2D(32, (3, 3), 43 | activation='relu', padding='valid')(inputs) 44 | x = layers.MaxPooling2D(pool_size=(2, 2))(x) 45 | x = layers.Conv2D(64, (3, 3), activation='relu')(x) 46 | x = layers.MaxPooling2D(pool_size=(2, 2))(x) 47 | x = layers.Flatten()(x) 48 | x = layers.Dense(512, activation='relu')(x) 49 | x = layers.Dropout(0.5)(x) 50 | predictions = layers.Dense(num_classes, 51 | activation='softmax', 52 | name='x_train_out')(x) 53 | return predictions 54 | 55 | 56 | batch_size = 128 57 | buffer_size = 10000 58 | steps_per_epoch = int(np.ceil(60000 / float(batch_size))) # = 469 59 | epochs = 5 60 | num_classes = 10 61 | 62 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 63 | x_train = x_train.astype(np.float32) / 255 64 | x_train = np.expand_dims(x_train, -1) 65 | y_train = tf.one_hot(y_train, num_classes) 66 | 67 | # Create the dataset and its associated one-shot iterator. 68 | dataset = Dataset.from_tensor_slices((x_train, y_train)) 69 | dataset = dataset.repeat() 70 | dataset = dataset.shuffle(buffer_size) 71 | dataset = dataset.batch(batch_size) 72 | iterator = dataset.make_one_shot_iterator() 73 | 74 | # Model creation using tensors from the get_next() graph node. 75 | inputs, targets = iterator.get_next() 76 | model_input = layers.Input(tensor=inputs) 77 | model_output = cnn_layers(model_input) 78 | train_model = keras.models.Model(inputs=model_input, outputs=model_output) 79 | 80 | train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), 81 | loss='categorical_crossentropy', 82 | metrics=['accuracy'], 83 | target_tensors=[targets]) 84 | train_model.summary() 85 | 86 | train_model.fit(epochs=epochs, 87 | steps_per_epoch=steps_per_epoch) 88 | 89 | # Save the model weights. 90 | weight_path = os.path.join(tempfile.gettempdir(), 'saved_wt.h5') 91 | train_model.save_weights(weight_path) 92 | 93 | # Clean up the TF session. 94 | K.clear_session() 95 | 96 | # Second session to test loading trained model without tensors. 97 | x_test = x_test.astype(np.float32) 98 | x_test = np.expand_dims(x_test, -1) 99 | 100 | x_test_inp = layers.Input(shape=x_test.shape[1:]) 101 | test_out = cnn_layers(x_test_inp) 102 | test_model = keras.models.Model(inputs=x_test_inp, outputs=test_out) 103 | 104 | test_model.load_weights(weight_path) 105 | test_model.compile(optimizer='rmsprop', 106 | loss='sparse_categorical_crossentropy', 107 | metrics=['accuracy']) 108 | test_model.summary() 109 | 110 | loss, acc = test_model.evaluate(x_test, y_test, num_classes) 111 | print('\nTest accuracy: {0}'.format(acc)) 112 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_denoising_autoencoder.py: -------------------------------------------------------------------------------- 1 | '''Trains a denoising autoencoder on MNIST dataset. 2 | 3 | Denoising is one of the classic applications of autoencoders. 4 | The denoising process removes unwanted noise that corrupted the 5 | true signal. 6 | 7 | Noise + Data ---> Denoising Autoencoder ---> Data 8 | 9 | Given a training dataset of corrupted mnist_data as input and 10 | true signal as output, a denoising autoencoder can recover the 11 | hidden structure to generate clean mnist_data. 12 | 13 | This example has modular design. The encoder, decoder and autoencoder 14 | are 3 models that share weights. For example, after training the 15 | autoencoder, the encoder can be used to generate latent vectors 16 | of input mnist_data for low-dim visualization like PCA or TSNE. 17 | ''' 18 | 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | import keras 23 | from keras.layers import Activation, Dense, Input 24 | from keras.layers import Conv2D, Flatten 25 | from keras.layers import Reshape, Conv2DTranspose 26 | from keras.models import Model 27 | from keras import backend as K 28 | from keras.datasets import mnist 29 | import numpy as np 30 | import matplotlib.pyplot as plt 31 | from PIL import Image 32 | 33 | np.random.seed(1337) 34 | 35 | # MNIST dataset 36 | (x_train, _), (x_test, _) = mnist.load_data() 37 | 38 | image_size = x_train.shape[1] 39 | x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) 40 | x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) 41 | x_train = x_train.astype('float32') / 255 42 | x_test = x_test.astype('float32') / 255 43 | 44 | # Generate corrupted MNIST images by adding noise with normal dist 45 | # centered at 0.5 and std=0.5 46 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape) 47 | x_train_noisy = x_train + noise 48 | noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape) 49 | x_test_noisy = x_test + noise 50 | 51 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 52 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 53 | 54 | # Network parameters 55 | input_shape = (image_size, image_size, 1) 56 | batch_size = 128 57 | kernel_size = 3 58 | latent_dim = 16 59 | # Encoder/Decoder number of CNN layers and filters per layer 60 | layer_filters = [32, 64] 61 | 62 | # Build the Autoencoder Model 63 | # First build the Encoder Model 64 | inputs = Input(shape=input_shape, name='encoder_input') 65 | x = inputs 66 | # Stack of Conv2D blocks 67 | # Notes: 68 | # 1) Use Batch Normalization before ReLU on deep networks 69 | # 2) Use MaxPooling2D as alternative to strides>1 70 | # - faster but not as good as strides>1 71 | for filters in layer_filters: 72 | x = Conv2D(filters=filters, 73 | kernel_size=kernel_size, 74 | strides=2, 75 | activation='relu', 76 | padding='same')(x) 77 | 78 | # Shape info needed to build Decoder Model 79 | shape = K.int_shape(x) 80 | 81 | # Generate the latent vector 82 | x = Flatten()(x) 83 | latent = Dense(latent_dim, name='latent_vector')(x) 84 | 85 | # Instantiate Encoder Model 86 | encoder = Model(inputs, latent, name='encoder') 87 | encoder.summary() 88 | 89 | # Build the Decoder Model 90 | latent_inputs = Input(shape=(latent_dim,), name='decoder_input') 91 | x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) 92 | x = Reshape((shape[1], shape[2], shape[3]))(x) 93 | 94 | # Stack of Transposed Conv2D blocks 95 | # Notes: 96 | # 1) Use Batch Normalization before ReLU on deep networks 97 | # 2) Use UpSampling2D as alternative to strides>1 98 | # - faster but not as good as strides>1 99 | for filters in layer_filters[::-1]: 100 | x = Conv2DTranspose(filters=filters, 101 | kernel_size=kernel_size, 102 | strides=2, 103 | activation='relu', 104 | padding='same')(x) 105 | 106 | x = Conv2DTranspose(filters=1, 107 | kernel_size=kernel_size, 108 | padding='same')(x) 109 | 110 | outputs = Activation('sigmoid', name='decoder_output')(x) 111 | 112 | # Instantiate Decoder Model 113 | decoder = Model(latent_inputs, outputs, name='decoder') 114 | decoder.summary() 115 | 116 | # Autoencoder = Encoder + Decoder 117 | # Instantiate Autoencoder Model 118 | autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') 119 | autoencoder.summary() 120 | 121 | autoencoder.compile(loss='mse', optimizer='adam') 122 | 123 | # Train the autoencoder 124 | autoencoder.fit(x_train_noisy, 125 | x_train, 126 | validation_data=(x_test_noisy, x_test), 127 | epochs=30, 128 | batch_size=batch_size) 129 | 130 | # Predict the Autoencoder output from corrupted test images 131 | x_decoded = autoencoder.predict(x_test_noisy) 132 | 133 | # Display the 1st 8 corrupted and denoised images 134 | rows, cols = 10, 30 135 | num = rows * cols 136 | imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]]) 137 | imgs = imgs.reshape((rows * 3, cols, image_size, image_size)) 138 | imgs = np.vstack(np.split(imgs, rows, axis=1)) 139 | imgs = imgs.reshape((rows * 3, -1, image_size, image_size)) 140 | imgs = np.vstack([np.hstack(i) for i in imgs]) 141 | imgs = (imgs * 255).astype(np.uint8) 142 | plt.figure() 143 | plt.axis('off') 144 | plt.title('Original images: top rows, ' 145 | 'Corrupted Input: middle rows, ' 146 | 'Denoised Input: third rows') 147 | plt.imshow(imgs, interpolation='none', cmap='gray') 148 | Image.fromarray(imgs).save('corrupted_and_denoised.png') 149 | plt.show() 150 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_hierarchical_rnn.py: -------------------------------------------------------------------------------- 1 | """Example of using Hierarchical RNN (HRNN) to classify MNIST digits. 2 | 3 | HRNNs can learn across multiple levels 4 | of temporal hierarchy over a complex sequence. 5 | Usually, the first recurrent layer of an HRNN 6 | encodes a sentence (e.g. of word vectors) 7 | into a sentence vector. 8 | The second recurrent layer then encodes a sequence of 9 | such vectors (encoded by the first layer) into a document vector. 10 | This document vector is considered to preserve both 11 | the word-level and sentence-level structure of the context. 12 | 13 | # References 14 | 15 | - [A Hierarchical Neural Autoencoder for Paragraphs and Documents](https://arxiv.org/abs/1506.01057) 16 | Encodes paragraphs and documents with HRNN. 17 | Results have shown that HRNN outperforms standard 18 | RNNs and may play some role in more sophisticated generation tasks like 19 | summarization or question answering. 20 | - [Hierarchical recurrent neural network for skeleton based action recognition](http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) 21 | Achieved state-of-the-art results on 22 | skeleton based action recognition with 3 levels 23 | of bidirectional HRNN combined with fully connected layers. 24 | 25 | In the below MNIST example the first LSTM layer first encodes every 26 | column of pixels of shape (28, 1) to a column vector of shape (128,). 27 | The second LSTM layer encodes then these 28 column vectors of shape (28, 128) 28 | to a image vector representing the whole image. 29 | A final Dense layer is added for prediction. 30 | 31 | After 5 epochs: train acc: 0.9858, val acc: 0.9864 32 | """ 33 | from __future__ import print_function 34 | 35 | import keras 36 | from keras.datasets import mnist 37 | from keras.models import Model 38 | from keras.layers import Input, Dense, TimeDistributed 39 | from keras.layers import LSTM 40 | 41 | # Training parameters. 42 | batch_size = 32 43 | num_classes = 10 44 | epochs = 5 45 | 46 | # Embedding dimensions. 47 | row_hidden = 128 48 | col_hidden = 128 49 | 50 | # The mnist_data, split between train and test sets. 51 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 52 | 53 | # Reshapes mnist_data to 4D for Hierarchical RNN. 54 | x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) 55 | x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) 56 | x_train = x_train.astype('float32') 57 | x_test = x_test.astype('float32') 58 | x_train /= 255 59 | x_test /= 255 60 | print('x_train shape:', x_train.shape) 61 | print(x_train.shape[0], 'train samples') 62 | print(x_test.shape[0], 'test samples') 63 | 64 | # Converts class vectors to binary class matrices. 65 | y_train = keras.utils.to_categorical(y_train, num_classes) 66 | y_test = keras.utils.to_categorical(y_test, num_classes) 67 | 68 | row, col, pixel = x_train.shape[1:] 69 | 70 | # 4D input. 71 | x = Input(shape=(row, col, pixel)) 72 | 73 | # Encodes a row of pixels using TimeDistributed Wrapper. 74 | encoded_rows = TimeDistributed(LSTM(row_hidden))(x) 75 | 76 | # Encodes columns of encoded rows. 77 | encoded_columns = LSTM(col_hidden)(encoded_rows) 78 | 79 | # Final predictions and model. 80 | prediction = Dense(num_classes, activation='softmax')(encoded_columns) 81 | model = Model(x, prediction) 82 | model.compile(loss='categorical_crossentropy', 83 | optimizer='rmsprop', 84 | metrics=['accuracy']) 85 | 86 | # Training. 87 | model.fit(x_train, y_train, 88 | batch_size=batch_size, 89 | epochs=epochs, 90 | verbose=1, 91 | validation_data=(x_test, y_test)) 92 | 93 | # Evaluation. 94 | scores = model.evaluate(x_test, y_test, verbose=0) 95 | print('Test loss:', scores[0]) 96 | print('Test accuracy:', scores[1]) 97 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_irnn.py: -------------------------------------------------------------------------------- 1 | '''This is a reproduction of the IRNN experiment 2 | with pixel-by-pixel sequential MNIST in 3 | "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" 4 | by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton 5 | 6 | arxiv:1504.00941v2 [cs.NE] 7 Apr 2015 7 | http://arxiv.org/pdf/1504.00941v2.pdf 8 | 9 | Optimizer is replaced with RMSprop which yields more stable and steady 10 | improvement. 11 | 12 | Reaches 0.93 train/test accuracy after 900 epochs 13 | (which roughly corresponds to 1687500 steps in the original paper.) 14 | ''' 15 | 16 | from __future__ import print_function 17 | 18 | import keras 19 | from keras.datasets import mnist 20 | from keras.models import Sequential 21 | from keras.layers import Dense, Activation 22 | from keras.layers import SimpleRNN 23 | from keras import initializers 24 | from keras.optimizers import RMSprop 25 | 26 | batch_size = 32 27 | num_classes = 10 28 | epochs = 200 29 | hidden_units = 100 30 | 31 | learning_rate = 1e-6 32 | clip_norm = 1.0 33 | 34 | # the mnist_data, split between train and test sets 35 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 36 | 37 | x_train = x_train.reshape(x_train.shape[0], -1, 1) 38 | x_test = x_test.reshape(x_test.shape[0], -1, 1) 39 | x_train = x_train.astype('float32') 40 | x_test = x_test.astype('float32') 41 | x_train /= 255 42 | x_test /= 255 43 | print('x_train shape:', x_train.shape) 44 | print(x_train.shape[0], 'train samples') 45 | print(x_test.shape[0], 'test samples') 46 | 47 | # convert class vectors to binary class matrices 48 | y_train = keras.utils.to_categorical(y_train, num_classes) 49 | y_test = keras.utils.to_categorical(y_test, num_classes) 50 | 51 | print('Evaluate IRNN...') 52 | model = Sequential() 53 | model.add(SimpleRNN(hidden_units, 54 | kernel_initializer=initializers.RandomNormal(stddev=0.001), 55 | recurrent_initializer=initializers.Identity(gain=1.0), 56 | activation='relu', 57 | input_shape=x_train.shape[1:])) 58 | model.add(Dense(num_classes)) 59 | model.add(Activation('softmax')) 60 | rmsprop = RMSprop(lr=learning_rate) 61 | model.compile(loss='categorical_crossentropy', 62 | optimizer=rmsprop, 63 | metrics=['accuracy']) 64 | 65 | model.fit(x_train, y_train, 66 | batch_size=batch_size, 67 | epochs=epochs, 68 | verbose=1, 69 | validation_data=(x_test, y_test)) 70 | 71 | scores = model.evaluate(x_test, y_test, verbose=0) 72 | print('IRNN test score:', scores[0]) 73 | print('IRNN test accuracy:', scores[1]) 74 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains a simple deep NN on the MNIST dataset. 2 | 3 | Gets to 98.40% test accuracy after 20 epochs 4 | (there is *a lot* of margin for parameter tuning). 5 | 2 seconds per epoch on a K520 GPU. 6 | ''' 7 | 8 | from __future__ import print_function 9 | 10 | import keras 11 | from keras.datasets import mnist 12 | from keras.models import Sequential 13 | from keras.layers import Dense, Dropout 14 | from keras.optimizers import RMSprop 15 | 16 | batch_size = 128 17 | num_classes = 10 18 | epochs = 20 19 | 20 | # the mnist_data, split between train and test sets 21 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 22 | 23 | x_train = x_train.reshape(60000, 784) 24 | x_test = x_test.reshape(10000, 784) 25 | x_train = x_train.astype('float32') 26 | x_test = x_test.astype('float32') 27 | x_train /= 255 28 | x_test /= 255 29 | print(x_train.shape[0], 'train samples') 30 | print(x_test.shape[0], 'test samples') 31 | 32 | # convert class vectors to binary class matrices 33 | y_train = keras.utils.to_categorical(y_train, num_classes) 34 | y_test = keras.utils.to_categorical(y_test, num_classes) 35 | 36 | model = Sequential() 37 | model.add(Dense(512, activation='relu', input_shape=(784,))) 38 | model.add(Dropout(0.2)) 39 | model.add(Dense(512, activation='relu')) 40 | model.add(Dropout(0.2)) 41 | model.add(Dense(num_classes, activation='softmax')) 42 | 43 | model.summary() 44 | 45 | model.compile(loss='categorical_crossentropy', 46 | optimizer=RMSprop(), 47 | metrics=['accuracy']) 48 | 49 | history = model.fit(x_train, y_train, 50 | batch_size=batch_size, 51 | epochs=epochs, 52 | verbose=1, 53 | validation_data=(x_test, y_test)) 54 | score = model.evaluate(x_test, y_test, verbose=0) 55 | print('Test loss:', score[0]) 56 | print('Test accuracy:', score[1]) 57 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_siamese.py: -------------------------------------------------------------------------------- 1 | '''Trains a Siamese MLP on pairs of digits from the MNIST dataset. 2 | 3 | It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the 4 | output of the shared network and by optimizing the contrastive loss (see paper 5 | for mode details). 6 | 7 | # References 8 | 9 | - Dimensionality Reduction by Learning an Invariant Mapping 10 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 11 | 12 | Gets to 97.2% test accuracy after 20 epochs. 13 | 2 seconds per epoch on a Titan X Maxwell GPU 14 | ''' 15 | from __future__ import absolute_import 16 | from __future__ import print_function 17 | import numpy as np 18 | 19 | import random 20 | from keras.datasets import mnist 21 | from keras.models import Model 22 | from keras.layers import Input, Flatten, Dense, Dropout, Lambda 23 | from keras.optimizers import RMSprop 24 | from keras import backend as K 25 | 26 | num_classes = 10 27 | epochs = 20 28 | 29 | 30 | def euclidean_distance(vects): 31 | x, y = vects 32 | return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon())) 33 | 34 | 35 | def eucl_dist_output_shape(shapes): 36 | shape1, shape2 = shapes 37 | return (shape1[0], 1) 38 | 39 | 40 | def contrastive_loss(y_true, y_pred): 41 | '''Contrastive loss from Hadsell-et-al.'06 42 | http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf 43 | ''' 44 | margin = 1 45 | return K.mean(y_true * K.square(y_pred) + 46 | (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) 47 | 48 | 49 | def create_pairs(x, digit_indices): 50 | '''Positive and negative pair creation. 51 | Alternates between positive and negative pairs. 52 | ''' 53 | pairs = [] 54 | labels = [] 55 | n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1 56 | for d in range(num_classes): 57 | for i in range(n): 58 | z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] 59 | pairs += [[x[z1], x[z2]]] 60 | inc = random.randrange(1, num_classes) 61 | dn = (d + inc) % num_classes 62 | z1, z2 = digit_indices[d][i], digit_indices[dn][i] 63 | pairs += [[x[z1], x[z2]]] 64 | labels += [1, 0] 65 | return np.array(pairs), np.array(labels) 66 | 67 | 68 | def create_base_network(input_shape): 69 | '''Base network to be shared (eq. to feature extraction). 70 | ''' 71 | input = Input(shape=input_shape) 72 | x = Flatten()(input) 73 | x = Dense(128, activation='relu')(x) 74 | x = Dropout(0.1)(x) 75 | x = Dense(128, activation='relu')(x) 76 | x = Dropout(0.1)(x) 77 | x = Dense(128, activation='relu')(x) 78 | return Model(input, x) 79 | 80 | 81 | def compute_accuracy(y_true, y_pred): 82 | '''Compute classification accuracy with a fixed threshold on distances. 83 | ''' 84 | pred = y_pred.ravel() < 0.5 85 | return np.mean(pred == y_true) 86 | 87 | 88 | def accuracy(y_true, y_pred): 89 | '''Compute classification accuracy with a fixed threshold on distances. 90 | ''' 91 | return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) 92 | 93 | 94 | # the mnist_data, split between train and test sets 95 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 96 | x_train = x_train.astype('float32') 97 | x_test = x_test.astype('float32') 98 | x_train /= 255 99 | x_test /= 255 100 | input_shape = x_train.shape[1:] 101 | 102 | # create training+test positive and negative pairs 103 | digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)] 104 | tr_pairs, tr_y = create_pairs(x_train, digit_indices) 105 | 106 | digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)] 107 | te_pairs, te_y = create_pairs(x_test, digit_indices) 108 | 109 | # network definition 110 | base_network = create_base_network(input_shape) 111 | 112 | input_a = Input(shape=input_shape) 113 | input_b = Input(shape=input_shape) 114 | 115 | # because we re-use the same instance `base_network`, 116 | # the weights of the network 117 | # will be shared across the two branches 118 | processed_a = base_network(input_a) 119 | processed_b = base_network(input_b) 120 | 121 | distance = Lambda(euclidean_distance, 122 | output_shape=eucl_dist_output_shape)([processed_a, processed_b]) 123 | 124 | model = Model([input_a, input_b], distance) 125 | 126 | # train 127 | rms = RMSprop() 128 | model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy]) 129 | model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, 130 | batch_size=128, 131 | epochs=epochs, 132 | validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)) 133 | 134 | # compute final accuracy on training and test sets 135 | y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) 136 | tr_acc = compute_accuracy(tr_y, y_pred) 137 | y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) 138 | te_acc = compute_accuracy(te_y, y_pred) 139 | 140 | print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) 141 | print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) 142 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_sklearn_wrapper.py: -------------------------------------------------------------------------------- 1 | '''Example of how to use sklearn wrapper 2 | 3 | Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model 4 | ''' 5 | 6 | from __future__ import print_function 7 | 8 | import keras 9 | from keras.datasets import mnist 10 | from keras.models import Sequential 11 | from keras.layers import Dense, Dropout, Activation, Flatten 12 | from keras.layers import Conv2D, MaxPooling2D 13 | from keras.wrappers.scikit_learn import KerasClassifier 14 | from keras import backend as K 15 | from sklearn.grid_search import GridSearchCV 16 | 17 | 18 | num_classes = 10 19 | 20 | # input image dimensions 21 | img_rows, img_cols = 28, 28 22 | 23 | # load training mnist_data and do basic mnist_data normalization 24 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 25 | 26 | if K.image_data_format() == 'channels_first': 27 | x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) 28 | x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) 29 | input_shape = (1, img_rows, img_cols) 30 | else: 31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 33 | input_shape = (img_rows, img_cols, 1) 34 | 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255 38 | x_test /= 255 39 | 40 | # convert class vectors to binary class matrices 41 | y_train = keras.utils.to_categorical(y_train, num_classes) 42 | y_test = keras.utils.to_categorical(y_test, num_classes) 43 | 44 | 45 | def make_model(dense_layer_sizes, filters, kernel_size, pool_size): 46 | '''Creates model comprised of 2 convolutional layers followed by dense layers 47 | 48 | dense_layer_sizes: List of layer sizes. 49 | This list has one number for each layer 50 | filters: Number of convolutional filters in each convolutional layer 51 | kernel_size: Convolutional kernel size 52 | pool_size: Size of pooling area for max pooling 53 | ''' 54 | 55 | model = Sequential() 56 | model.add(Conv2D(filters, kernel_size, 57 | padding='valid', 58 | input_shape=input_shape)) 59 | model.add(Activation('relu')) 60 | model.add(Conv2D(filters, kernel_size)) 61 | model.add(Activation('relu')) 62 | model.add(MaxPooling2D(pool_size=pool_size)) 63 | model.add(Dropout(0.25)) 64 | 65 | model.add(Flatten()) 66 | for layer_size in dense_layer_sizes: 67 | model.add(Dense(layer_size)) 68 | model.add(Activation('relu')) 69 | model.add(Dropout(0.5)) 70 | model.add(Dense(num_classes)) 71 | model.add(Activation('softmax')) 72 | 73 | model.compile(loss='categorical_crossentropy', 74 | optimizer='adadelta', 75 | metrics=['accuracy']) 76 | 77 | return model 78 | 79 | dense_size_candidates = [[32], [64], [32, 32], [64, 64]] 80 | my_classifier = KerasClassifier(make_model, batch_size=32) 81 | validator = GridSearchCV(my_classifier, 82 | param_grid={'dense_layer_sizes': dense_size_candidates, 83 | # epochs is avail for tuning even when not 84 | # an argument to model building function 85 | 'epochs': [3, 6], 86 | 'filters': [8], 87 | 'kernel_size': [3], 88 | 'pool_size': [2]}, 89 | scoring='neg_log_loss', 90 | n_jobs=1) 91 | validator.fit(x_train, y_train) 92 | 93 | print('The parameters of the best model are: ') 94 | print(validator.best_params_) 95 | 96 | # validator.best_estimator_ returns sklearn-wrapped version of best model. 97 | # validator.best_estimator_.model returns the (unwrapped) keras model 98 | best_model = validator.best_estimator_.model 99 | metric_names = best_model.metrics_names 100 | metric_values = best_model.evaluate(x_test, y_test) 101 | for metric, value in zip(metric_names, metric_values): 102 | print(metric, ': ', value) 103 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/mnist_transfer_cnn.py: -------------------------------------------------------------------------------- 1 | '''Transfer learning toy example. 2 | 3 | 1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. 4 | 2 - Freeze convolutional layers and fine-tune dense layers 5 | for the classification of digits [5..9]. 6 | 7 | Get to 99.8% test accuracy after 5 epochs 8 | for the first five digits classifier 9 | and 99.2% for the last five digits after transfer + fine-tuning. 10 | ''' 11 | 12 | from __future__ import print_function 13 | 14 | import datetime 15 | import keras 16 | from keras.datasets import mnist 17 | from keras.models import Sequential 18 | from keras.layers import Dense, Dropout, Activation, Flatten 19 | from keras.layers import Conv2D, MaxPooling2D 20 | from keras import backend as K 21 | 22 | now = datetime.datetime.now 23 | 24 | batch_size = 128 25 | num_classes = 5 26 | epochs = 5 27 | 28 | # input image dimensions 29 | img_rows, img_cols = 28, 28 30 | # number of convolutional filters to use 31 | filters = 32 32 | # size of pooling area for max pooling 33 | pool_size = 2 34 | # convolution kernel size 35 | kernel_size = 3 36 | 37 | if K.image_data_format() == 'channels_first': 38 | input_shape = (1, img_rows, img_cols) 39 | else: 40 | input_shape = (img_rows, img_cols, 1) 41 | 42 | 43 | def train_model(model, train, test, num_classes): 44 | x_train = train[0].reshape((train[0].shape[0],) + input_shape) 45 | x_test = test[0].reshape((test[0].shape[0],) + input_shape) 46 | x_train = x_train.astype('float32') 47 | x_test = x_test.astype('float32') 48 | x_train /= 255 49 | x_test /= 255 50 | print('x_train shape:', x_train.shape) 51 | print(x_train.shape[0], 'train samples') 52 | print(x_test.shape[0], 'test samples') 53 | 54 | # convert class vectors to binary class matrices 55 | y_train = keras.utils.to_categorical(train[1], num_classes) 56 | y_test = keras.utils.to_categorical(test[1], num_classes) 57 | 58 | model.compile(loss='categorical_crossentropy', 59 | optimizer='adadelta', 60 | metrics=['accuracy']) 61 | 62 | t = now() 63 | model.fit(x_train, y_train, 64 | batch_size=batch_size, 65 | epochs=epochs, 66 | verbose=1, 67 | validation_data=(x_test, y_test)) 68 | print('Training time: %s' % (now() - t)) 69 | score = model.evaluate(x_test, y_test, verbose=0) 70 | print('Test score:', score[0]) 71 | print('Test accuracy:', score[1]) 72 | 73 | 74 | # the mnist_data, split between train and test sets 75 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 76 | 77 | # create two datasets one with digits below 5 and one with 5 and above 78 | x_train_lt5 = x_train[y_train < 5] 79 | y_train_lt5 = y_train[y_train < 5] 80 | x_test_lt5 = x_test[y_test < 5] 81 | y_test_lt5 = y_test[y_test < 5] 82 | 83 | x_train_gte5 = x_train[y_train >= 5] 84 | y_train_gte5 = y_train[y_train >= 5] - 5 85 | x_test_gte5 = x_test[y_test >= 5] 86 | y_test_gte5 = y_test[y_test >= 5] - 5 87 | 88 | # define two groups of layers: feature (convolutions) and classification (dense) 89 | feature_layers = [ 90 | Conv2D(filters, kernel_size, 91 | padding='valid', 92 | input_shape=input_shape), 93 | Activation('relu'), 94 | Conv2D(filters, kernel_size), 95 | Activation('relu'), 96 | MaxPooling2D(pool_size=pool_size), 97 | Dropout(0.25), 98 | Flatten(), 99 | ] 100 | 101 | classification_layers = [ 102 | Dense(128), 103 | Activation('relu'), 104 | Dropout(0.5), 105 | Dense(num_classes), 106 | Activation('softmax') 107 | ] 108 | 109 | # create complete model 110 | model = Sequential(feature_layers + classification_layers) 111 | 112 | # train model for 5-digit classification [0..4] 113 | train_model(model, 114 | (x_train_lt5, y_train_lt5), 115 | (x_test_lt5, y_test_lt5), num_classes) 116 | 117 | # freeze feature layers and rebuild model 118 | for l in feature_layers: 119 | l.trainable = False 120 | 121 | # transfer: train dense layers for new classification task [5..9] 122 | train_model(model, 123 | (x_train_gte5, y_train_gte5), 124 | (x_test_gte5, y_test_gte5), num_classes) 125 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/pretrained_word_embeddings.py: -------------------------------------------------------------------------------- 1 | '''This script loads pre-trained word embeddings (GloVe embeddings) 2 | into a frozen Keras Embedding layer, and uses it to 3 | train a text classification model on the 20 Newsgroup dataset 4 | (classification of newsgroup messages into 20 different categories). 5 | 6 | GloVe embedding mnist_data can be found at: 7 | http://nlp.stanford.edu/mnist_data/glove.6B.zip 8 | (source page: http://nlp.stanford.edu/projects/glove/) 9 | 10 | 20 Newsgroup mnist_data can be found at: 11 | http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/mnist_data/news20.html 12 | ''' 13 | 14 | from __future__ import print_function 15 | 16 | import os 17 | import sys 18 | import numpy as np 19 | from keras.preprocessing.text import Tokenizer 20 | from keras.preprocessing.sequence import pad_sequences 21 | from keras.utils import to_categorical 22 | from keras.layers import Dense, Input, GlobalMaxPooling1D 23 | from keras.layers import Conv1D, MaxPooling1D, Embedding 24 | from keras.models import Model 25 | 26 | 27 | BASE_DIR = '' 28 | GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B') 29 | TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup') 30 | MAX_SEQUENCE_LENGTH = 1000 31 | MAX_NUM_WORDS = 20000 32 | EMBEDDING_DIM = 100 33 | VALIDATION_SPLIT = 0.2 34 | 35 | # first, build index mapping words in the embeddings set 36 | # to their embedding vector 37 | 38 | print('Indexing word vectors.') 39 | 40 | embeddings_index = {} 41 | with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f: 42 | for line in f: 43 | values = line.split() 44 | word = values[0] 45 | coefs = np.asarray(values[1:], dtype='float32') 46 | embeddings_index[word] = coefs 47 | 48 | print('Found %s word vectors.' % len(embeddings_index)) 49 | 50 | # second, prepare text samples and their labels 51 | print('Processing text dataset') 52 | 53 | texts = [] # list of text samples 54 | labels_index = {} # dictionary mapping label name to numeric id 55 | labels = [] # list of label ids 56 | for name in sorted(os.listdir(TEXT_DATA_DIR)): 57 | path = os.path.join(TEXT_DATA_DIR, name) 58 | if os.path.isdir(path): 59 | label_id = len(labels_index) 60 | labels_index[name] = label_id 61 | for fname in sorted(os.listdir(path)): 62 | if fname.isdigit(): 63 | fpath = os.path.join(path, fname) 64 | args = {} if sys.version_info < (3,) else {'encoding': 'latin-1'} 65 | with open(fpath, **args) as f: 66 | t = f.read() 67 | i = t.find('\n\n') # skip header 68 | if 0 < i: 69 | t = t[i:] 70 | texts.append(t) 71 | labels.append(label_id) 72 | 73 | print('Found %s texts.' % len(texts)) 74 | 75 | # finally, vectorize the text samples into a 2D integer tensor 76 | tokenizer = Tokenizer(num_words=MAX_NUM_WORDS) 77 | tokenizer.fit_on_texts(texts) 78 | sequences = tokenizer.texts_to_sequences(texts) 79 | 80 | word_index = tokenizer.word_index 81 | print('Found %s unique tokens.' % len(word_index)) 82 | 83 | data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) 84 | 85 | labels = to_categorical(np.asarray(labels)) 86 | print('Shape of mnist_data tensor:', data.shape) 87 | print('Shape of label tensor:', labels.shape) 88 | 89 | # split the mnist_data into a training set and a validation set 90 | indices = np.arange(data.shape[0]) 91 | np.random.shuffle(indices) 92 | data = data[indices] 93 | labels = labels[indices] 94 | num_validation_samples = int(VALIDATION_SPLIT * data.shape[0]) 95 | 96 | x_train = data[:-num_validation_samples] 97 | y_train = labels[:-num_validation_samples] 98 | x_val = data[-num_validation_samples:] 99 | y_val = labels[-num_validation_samples:] 100 | 101 | print('Preparing embedding matrix.') 102 | 103 | # prepare embedding matrix 104 | num_words = min(MAX_NUM_WORDS, len(word_index) + 1) 105 | embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) 106 | for word, i in word_index.items(): 107 | if i >= MAX_NUM_WORDS: 108 | continue 109 | embedding_vector = embeddings_index.get(word) 110 | if embedding_vector is not None: 111 | # words not found in embedding index will be all-zeros. 112 | embedding_matrix[i] = embedding_vector 113 | 114 | # load pre-trained word embeddings into an Embedding layer 115 | # note that we set trainable = False so as to keep the embeddings fixed 116 | embedding_layer = Embedding(num_words, 117 | EMBEDDING_DIM, 118 | weights=[embedding_matrix], 119 | input_length=MAX_SEQUENCE_LENGTH, 120 | trainable=False) 121 | 122 | print('Training model.') 123 | 124 | # train a 1D convnet with global maxpooling 125 | sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') 126 | embedded_sequences = embedding_layer(sequence_input) 127 | x = Conv1D(128, 5, activation='relu')(embedded_sequences) 128 | x = MaxPooling1D(5)(x) 129 | x = Conv1D(128, 5, activation='relu')(x) 130 | x = MaxPooling1D(5)(x) 131 | x = Conv1D(128, 5, activation='relu')(x) 132 | x = GlobalMaxPooling1D()(x) 133 | x = Dense(128, activation='relu')(x) 134 | preds = Dense(len(labels_index), activation='softmax')(x) 135 | 136 | model = Model(sequence_input, preds) 137 | model.compile(loss='categorical_crossentropy', 138 | optimizer='rmsprop', 139 | metrics=['acc']) 140 | 141 | model.fit(x_train, y_train, 142 | batch_size=128, 143 | epochs=10, 144 | validation_data=(x_val, y_val)) 145 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/reuters_mlp.py: -------------------------------------------------------------------------------- 1 | '''Trains and evaluate a simple MLP 2 | on the Reuters newswire topic classification task. 3 | ''' 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import keras 8 | from keras.datasets import reuters 9 | from keras.models import Sequential 10 | from keras.layers import Dense, Dropout, Activation 11 | from keras.preprocessing.text import Tokenizer 12 | 13 | max_words = 1000 14 | batch_size = 32 15 | epochs = 5 16 | 17 | print('Loading mnist_data...') 18 | (x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, 19 | test_split=0.2) 20 | print(len(x_train), 'train sequences') 21 | print(len(x_test), 'test sequences') 22 | 23 | num_classes = np.max(y_train) + 1 24 | print(num_classes, 'classes') 25 | 26 | print('Vectorizing sequence mnist_data...') 27 | tokenizer = Tokenizer(num_words=max_words) 28 | x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') 29 | x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') 30 | print('x_train shape:', x_train.shape) 31 | print('x_test shape:', x_test.shape) 32 | 33 | print('Convert class vector to binary class matrix ' 34 | '(for use with categorical_crossentropy)') 35 | y_train = keras.utils.to_categorical(y_train, num_classes) 36 | y_test = keras.utils.to_categorical(y_test, num_classes) 37 | print('y_train shape:', y_train.shape) 38 | print('y_test shape:', y_test.shape) 39 | 40 | print('Building model...') 41 | model = Sequential() 42 | model.add(Dense(512, input_shape=(max_words,))) 43 | model.add(Activation('relu')) 44 | model.add(Dropout(0.5)) 45 | model.add(Dense(num_classes)) 46 | model.add(Activation('softmax')) 47 | 48 | model.compile(loss='categorical_crossentropy', 49 | optimizer='adam', 50 | metrics=['accuracy']) 51 | 52 | history = model.fit(x_train, y_train, 53 | batch_size=batch_size, 54 | epochs=epochs, 55 | verbose=1, 56 | validation_split=0.1) 57 | score = model.evaluate(x_test, y_test, 58 | batch_size=batch_size, verbose=1) 59 | print('Test score:', score[0]) 60 | print('Test accuracy:', score[1]) 61 | -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/saved_models/keras_cifar10_trained_model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mtianyan/NeuralNetworksGetStarted/bee9ba10531b00ddef5ec45f419707139bd4814e/7-caffe_and_keras/7-3 keras-master/examples/saved_models/keras_cifar10_trained_model.h5 -------------------------------------------------------------------------------- /7-caffe_and_keras/7-3 keras-master/examples/variational_autoencoder.py: -------------------------------------------------------------------------------- 1 | '''This script demonstrates how to build a variational autoencoder with Keras. 2 | 3 | #Reference 4 | 5 | - Auto-Encoding Variational Bayes 6 | https://arxiv.org/abs/1312.6114 7 | ''' 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from scipy.stats import norm 13 | 14 | from keras.layers import Input, Dense, Lambda 15 | from keras.models import Model 16 | from keras import backend as K 17 | from keras import metrics 18 | from keras.datasets import mnist 19 | 20 | batch_size = 100 21 | original_dim = 784 22 | latent_dim = 2 23 | intermediate_dim = 256 24 | epochs = 50 25 | epsilon_std = 1.0 26 | 27 | 28 | x = Input(shape=(original_dim,)) 29 | h = Dense(intermediate_dim, activation='relu')(x) 30 | z_mean = Dense(latent_dim)(h) 31 | z_log_var = Dense(latent_dim)(h) 32 | 33 | 34 | def sampling(args): 35 | z_mean, z_log_var = args 36 | epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., 37 | stddev=epsilon_std) 38 | return z_mean + K.exp(z_log_var / 2) * epsilon 39 | 40 | # note that "output_shape" isn't necessary with the TensorFlow backend 41 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var]) 42 | 43 | # we instantiate these layers separately so as to reuse them later 44 | decoder_h = Dense(intermediate_dim, activation='relu') 45 | decoder_mean = Dense(original_dim, activation='sigmoid') 46 | h_decoded = decoder_h(z) 47 | x_decoded_mean = decoder_mean(h_decoded) 48 | 49 | # instantiate VAE model 50 | vae = Model(x, x_decoded_mean) 51 | 52 | # Compute VAE loss 53 | xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean) 54 | kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) 55 | vae_loss = K.mean(xent_loss + kl_loss) 56 | 57 | vae.add_loss(vae_loss) 58 | vae.compile(optimizer='rmsprop') 59 | vae.summary() 60 | 61 | 62 | # train the VAE on MNIST digits 63 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 64 | 65 | x_train = x_train.astype('float32') / 255. 66 | x_test = x_test.astype('float32') / 255. 67 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:]))) 68 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:]))) 69 | 70 | vae.fit(x_train, 71 | shuffle=True, 72 | epochs=epochs, 73 | batch_size=batch_size, 74 | validation_data=(x_test, None)) 75 | 76 | # build a model to project inputs on the latent space 77 | encoder = Model(x, z_mean) 78 | 79 | # display a 2D plot of the digit classes in the latent space 80 | x_test_encoded = encoder.predict(x_test, batch_size=batch_size) 81 | plt.figure(figsize=(6, 6)) 82 | plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test) 83 | plt.colorbar() 84 | plt.show() 85 | 86 | # build a digit generator that can sample from the learned distribution 87 | decoder_input = Input(shape=(latent_dim,)) 88 | _h_decoded = decoder_h(decoder_input) 89 | _x_decoded_mean = decoder_mean(_h_decoded) 90 | generator = Model(decoder_input, _x_decoded_mean) 91 | 92 | # display a 2D manifold of the digits 93 | n = 15 # figure with 15x15 digits 94 | digit_size = 28 95 | figure = np.zeros((digit_size * n, digit_size * n)) 96 | # linearly spaced coordinates on the unit square were transformed through the inverse CDF (ppf) of the Gaussian 97 | # to produce values of the latent variables z, since the prior of the latent space is Gaussian 98 | grid_x = norm.ppf(np.linspace(0.05, 0.95, n)) 99 | grid_y = norm.ppf(np.linspace(0.05, 0.95, n)) 100 | 101 | for i, yi in enumerate(grid_x): 102 | for j, xi in enumerate(grid_y): 103 | z_sample = np.array([[xi, yi]]) 104 | x_decoded = generator.predict(z_sample) 105 | digit = x_decoded[0].reshape(digit_size, digit_size) 106 | figure[i * digit_size: (i + 1) * digit_size, 107 | j * digit_size: (j + 1) * digit_size] = digit 108 | 109 | plt.figure(figsize=(10, 10)) 110 | plt.imshow(figure, cmap='Greys_r') 111 | plt.show() 112 | -------------------------------------------------------------------------------- /mnist_data/mnist.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mtianyan/NeuralNetworksGetStarted/bee9ba10531b00ddef5ec45f419707139bd4814e/mnist_data/mnist.pkl.gz -------------------------------------------------------------------------------- /utils/mnist_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | mnist_loader 3 | ~~~~~~~~~~~~ 4 | 5 | A library to load the MNIST image mnist_data. For details of the mnist_data 6 | structures that are returned, see the doc strings for ``load_data`` 7 | and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the 8 | function usually called by our neural network code. 9 | """ 10 | 11 | #### Libraries 12 | # Standard library 13 | import pickle 14 | import gzip 15 | 16 | # Third-party libraries 17 | import numpy as np 18 | 19 | 20 | def load_data(): 21 | """Return the MNIST mnist_data as a tuple containing the training mnist_data, 22 | the validation mnist_data, and the test mnist_data. 23 | 24 | The ``training_data`` is returned as a tuple with two entries. 25 | The first entry contains the actual training images. This is a 26 | numpy ndarray with 50,000 entries. Each entry is, in turn, a 27 | numpy ndarray with 784 values, representing the 28 * 28 = 784 28 | pixels in a single MNIST image. 29 | 30 | The second entry in the ``training_data`` tuple is a numpy ndarray 31 | containing 50,000 entries. Those entries are just the digit 32 | values (0...9) for the corresponding images contained in the first 33 | entry of the tuple. 34 | 35 | The ``validation_data`` and ``test_data`` are similar, except 36 | each contains only 10,000 images. 37 | 38 | This is a nice mnist_data format, but for use in neural networks it's 39 | helpful to modify the format of the ``training_data`` a little. 40 | That's done in the wrapper function ``load_data_wrapper()``, see 41 | below. 42 | """ 43 | f = gzip.open('./mnist_data/mnist.pkl.gz', 'rb') 44 | training_data, validation_data, test_data = pickle.load(f, encoding='bytes') 45 | f.close() 46 | return (training_data, validation_data, test_data) 47 | 48 | 49 | def load_data_wrapper(): 50 | """Return a tuple containing ``(training_data, validation_data, 51 | test_data)``. Based on ``load_data``, but the format is more 52 | convenient for use in our implementation of neural networks. 53 | 54 | In particular, ``training_data`` is a list containing 50,000 55 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray 56 | containing the input image. ``y`` is a 10-dimensional 57 | numpy.ndarray representing the unit vector corresponding to the 58 | correct digit for ``x``. 59 | 60 | ``validation_data`` and ``test_data`` are lists containing 10,000 61 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional 62 | numpy.ndarry containing the input image, and ``y`` is the 63 | corresponding classification, i.e., the digit values (integers) 64 | corresponding to ``x``. 65 | 66 | Obviously, this means we're using slightly different formats for 67 | the training mnist_data and the validation / test mnist_data. These formats 68 | turn out to be the most convenient for use in our neural network 69 | code.""" 70 | tr_d, va_d, te_d = load_data() 71 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] 72 | training_results = [vectorized_result(y) for y in tr_d[1]] 73 | training_data = list(zip(training_inputs, training_results)) 74 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] 75 | validation_data = list(zip(validation_inputs, va_d[1])) 76 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] 77 | test_data = list(zip(test_inputs, te_d[1])) 78 | return (training_data, validation_data, test_data) 79 | 80 | 81 | def vectorized_result(j): 82 | """Return a 10-dimensional unit vector with a 1.0 in the jth 83 | position and zeroes elsewhere. This is used to convert a digit 84 | (0...9) into a corresponding desired output from the neural 85 | network.""" 86 | e = np.zeros((10, 1)) 87 | e[j] = 1.0 88 | return e 89 | 90 | 91 | def load_data_wrapper2(): 92 | """Return a tuple containing ``(training_data, validation_data, 93 | test_data)``. Based on ``load_data``, but the format is more 94 | convenient for use in our implementation of neural networks. 95 | 96 | In particular, ``training_data`` is a list containing 50,000 97 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray 98 | containing the input image. ``y`` is a 10-dimensional 99 | numpy.ndarray representing the unit vector corresponding to the 100 | correct digit for ``x``. 101 | 102 | ``validation_data`` and ``test_data`` are lists containing 10,000 103 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional 104 | numpy.ndarry containing the input image, and ``y`` is the 105 | corresponding classification, i.e., the digit values (integers) 106 | corresponding to ``x``. 107 | 108 | Obviously, this means we're using slightly different formats for 109 | the training mnist_data and the validation / test mnist_data. These formats 110 | turn out to be the most convenient for use in our neural network 111 | code.""" 112 | tr_d, va_d, te_d = load_data() 113 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] 114 | training_results = [vectorized_result(y) for y in tr_d[1]] 115 | training_data = zip(training_inputs, training_results) 116 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] 117 | validation_data = zip(validation_inputs, va_d[1]) 118 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] 119 | test_data = zip(test_inputs, te_d[1]) 120 | return (training_inputs, training_results, validation_data, test_data) 121 | --------------------------------------------------------------------------------