├── CNNs ├── MobileNet.py ├── ResNet50.py ├── ShuffleNet.py ├── SqueezeNet.py ├── densenet.py ├── mobilenet_v2.py └── shufflenet_v2.py ├── ObjectDetections ├── SSD │ ├── SSD_demo.py │ ├── demo │ │ ├── README.md │ │ ├── car2.jpg │ │ ├── dog.jpg │ │ ├── eagle.jpg │ │ ├── horses.jpg │ │ ├── person.jpg │ │ └── street.jpg │ ├── ssd_300_vgg.py │ ├── ssd_anchors.py │ ├── ssd_layers.py │ ├── utils.py │ ├── var_name.txt │ └── visualization.py ├── yolo │ ├── test_images │ │ ├── car.jpg │ │ ├── cat.jpg │ │ └── person.jpg │ ├── yolo.py │ └── yolo_tf.py └── yolo2 │ ├── config.py │ ├── data │ └── coco_classes.txt │ ├── demo.py │ ├── detect_ops.py │ ├── loss.py │ ├── model.png │ ├── model.py │ └── utils.py ├── README.md ├── data └── text.txt ├── examples ├── Resnet │ └── resent.py ├── VAE │ ├── README.md │ ├── img_epoch20.jpg │ ├── rand_img_epoch_80.jpg │ ├── vae.ipynb │ └── vae_mnist.py ├── VGG │ ├── imagenet_classes.py │ ├── puzzle.jpeg │ ├── tiger.jpeg │ └── vgg16.py ├── cnn_setence_classification │ ├── data │ │ ├── rt-polarity.neg │ │ ├── rt-polarity.pos │ │ └── test.txt │ ├── data_helpers.py │ ├── text_cnn.py │ └── train_cnn.py ├── gan │ ├── DCGAN.py │ ├── GAN_simple.ipynb │ ├── GAN_simple.py │ ├── README.md │ └── epoch3_g_images.png ├── lstm_model_ptb │ ├── data │ │ ├── ptb.test.txt │ │ ├── ptb.train.txt │ │ └── ptb.valid.txt │ ├── ptb_lstm_model.py │ └── reader.py ├── lstm_time_series_regression │ ├── lstm_regression.py │ └── lstm_regression_results.png ├── rnn_language_model │ ├── data │ │ └── reddit-comments-2015-08.csv │ ├── input_data_rnn.py │ ├── rnn_numpy.py │ └── rnn_tensorflow.py └── test.py ├── models ├── cnn.py ├── da.py ├── dbn.py ├── gbrbm.py ├── input_data.py ├── logisticRegression.py ├── mlp.py ├── rbm.py ├── sda.py └── utils.py ├── notes ├── tf_autoencoder.ipynb └── tf_rnn.ipynb └── results ├── 10filters_at_epoch_0.png ├── 10filters_at_epoch_1.png ├── 10filters_at_epoch_10.png ├── 10filters_at_epoch_11.png ├── 10filters_at_epoch_12.png ├── 10filters_at_epoch_13.png ├── 10filters_at_epoch_14.png ├── 10filters_at_epoch_2.png ├── 10filters_at_epoch_3.png ├── 10filters_at_epoch_4.png ├── 10filters_at_epoch_5.png ├── 10filters_at_epoch_6.png ├── 10filters_at_epoch_7.png ├── 10filters_at_epoch_8.png ├── 10filters_at_epoch_9.png ├── 10original_and_10samples.png ├── 654362565405877642.jpg ├── DBN_results.png ├── filters_corruption_0.png ├── filters_corruption_30.png ├── new_filters_at_epoch_14.png ├── new_original_and_10samples.png ├── rnn_language_model.png └── weichat.jpg /CNNs/MobileNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2017/11/24 ref:https://github.com/Zehaos/MobileNet/blob/master/nets/mobilenet.py 3 | """ 4 | 5 | import tensorflow as tf 6 | from tensorflow.python.training import moving_averages 7 | 8 | UPDATE_OPS_COLLECTION = "_update_ops_" 9 | 10 | # create variable 11 | def create_variable(name, shape, initializer, 12 | dtype=tf.float32, trainable=True): 13 | return tf.get_variable(name, shape=shape, dtype=dtype, 14 | initializer=initializer, trainable=trainable) 15 | 16 | # batchnorm layer 17 | def bacthnorm(inputs, scope, epsilon=1e-05, momentum=0.99, is_training=True): 18 | inputs_shape = inputs.get_shape().as_list() 19 | params_shape = inputs_shape[-1:] 20 | axis = list(range(len(inputs_shape) - 1)) 21 | 22 | with tf.variable_scope(scope): 23 | beta = create_variable("beta", params_shape, 24 | initializer=tf.zeros_initializer()) 25 | gamma = create_variable("gamma", params_shape, 26 | initializer=tf.ones_initializer()) 27 | # for inference 28 | moving_mean = create_variable("moving_mean", params_shape, 29 | initializer=tf.zeros_initializer(), trainable=False) 30 | moving_variance = create_variable("moving_variance", params_shape, 31 | initializer=tf.ones_initializer(), trainable=False) 32 | if is_training: 33 | mean, variance = tf.nn.moments(inputs, axes=axis) 34 | update_move_mean = moving_averages.assign_moving_average(moving_mean, 35 | mean, decay=momentum) 36 | update_move_variance = moving_averages.assign_moving_average(moving_variance, 37 | variance, decay=momentum) 38 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_move_mean) 39 | tf.add_to_collection(UPDATE_OPS_COLLECTION, update_move_variance) 40 | else: 41 | mean, variance = moving_mean, moving_variance 42 | return tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) 43 | 44 | # depthwise conv2d layer 45 | def depthwise_conv2d(inputs, scope, filter_size=3, channel_multiplier=1, strides=1): 46 | inputs_shape = inputs.get_shape().as_list() 47 | in_channels = inputs_shape[-1] 48 | with tf.variable_scope(scope): 49 | filter = create_variable("filter", shape=[filter_size, filter_size, 50 | in_channels, channel_multiplier], 51 | initializer=tf.truncated_normal_initializer(stddev=0.01)) 52 | 53 | return tf.nn.depthwise_conv2d(inputs, filter, strides=[1, strides, strides, 1], 54 | padding="SAME", rate=[1, 1]) 55 | 56 | # conv2d layer 57 | def conv2d(inputs, scope, num_filters, filter_size=1, strides=1): 58 | inputs_shape = inputs.get_shape().as_list() 59 | in_channels = inputs_shape[-1] 60 | with tf.variable_scope(scope): 61 | filter = create_variable("filter", shape=[filter_size, filter_size, 62 | in_channels, num_filters], 63 | initializer=tf.truncated_normal_initializer(stddev=0.01)) 64 | return tf.nn.conv2d(inputs, filter, strides=[1, strides, strides, 1], 65 | padding="SAME") 66 | 67 | # avg pool layer 68 | def avg_pool(inputs, pool_size, scope): 69 | with tf.variable_scope(scope): 70 | return tf.nn.avg_pool(inputs, [1, pool_size, pool_size, 1], 71 | strides=[1, pool_size, pool_size, 1], padding="VALID") 72 | 73 | # fully connected layer 74 | def fc(inputs, n_out, scope, use_bias=True): 75 | inputs_shape = inputs.get_shape().as_list() 76 | n_in = inputs_shape[-1] 77 | with tf.variable_scope(scope): 78 | weight = create_variable("weight", shape=[n_in, n_out], 79 | initializer=tf.random_normal_initializer(stddev=0.01)) 80 | if use_bias: 81 | bias = create_variable("bias", shape=[n_out,], 82 | initializer=tf.zeros_initializer()) 83 | return tf.nn.xw_plus_b(inputs, weight, bias) 84 | return tf.matmul(inputs, weight) 85 | 86 | 87 | class MobileNet(object): 88 | def __init__(self, inputs, num_classes=1000, is_training=True, 89 | width_multiplier=1, scope="MobileNet"): 90 | """ 91 | The implement of MobileNet(ref:https://arxiv.org/abs/1704.04861) 92 | :param inputs: 4-D Tensor of [batch_size, height, width, channels] 93 | :param num_classes: number of classes 94 | :param is_training: Boolean, whether or not the model is training 95 | :param width_multiplier: float, controls the size of model 96 | :param scope: Optional scope for variables 97 | """ 98 | self.inputs = inputs 99 | self.num_classes = num_classes 100 | self.is_training = is_training 101 | self.width_multiplier = width_multiplier 102 | 103 | # construct model 104 | with tf.variable_scope(scope): 105 | # conv1 106 | net = conv2d(inputs, "conv_1", round(32 * width_multiplier), filter_size=3, 107 | strides=2) # ->[N, 112, 112, 32] 108 | net = tf.nn.relu(bacthnorm(net, "conv_1/bn", is_training=self.is_training)) 109 | net = self._depthwise_separable_conv2d(net, 64, self.width_multiplier, 110 | "ds_conv_2") # ->[N, 112, 112, 64] 111 | net = self._depthwise_separable_conv2d(net, 128, self.width_multiplier, 112 | "ds_conv_3", downsample=True) # ->[N, 56, 56, 128] 113 | net = self._depthwise_separable_conv2d(net, 128, self.width_multiplier, 114 | "ds_conv_4") # ->[N, 56, 56, 128] 115 | net = self._depthwise_separable_conv2d(net, 256, self.width_multiplier, 116 | "ds_conv_5", downsample=True) # ->[N, 28, 28, 256] 117 | net = self._depthwise_separable_conv2d(net, 256, self.width_multiplier, 118 | "ds_conv_6") # ->[N, 28, 28, 256] 119 | net = self._depthwise_separable_conv2d(net, 512, self.width_multiplier, 120 | "ds_conv_7", downsample=True) # ->[N, 14, 14, 512] 121 | net = self._depthwise_separable_conv2d(net, 512, self.width_multiplier, 122 | "ds_conv_8") # ->[N, 14, 14, 512] 123 | net = self._depthwise_separable_conv2d(net, 512, self.width_multiplier, 124 | "ds_conv_9") # ->[N, 14, 14, 512] 125 | net = self._depthwise_separable_conv2d(net, 512, self.width_multiplier, 126 | "ds_conv_10") # ->[N, 14, 14, 512] 127 | net = self._depthwise_separable_conv2d(net, 512, self.width_multiplier, 128 | "ds_conv_11") # ->[N, 14, 14, 512] 129 | net = self._depthwise_separable_conv2d(net, 512, self.width_multiplier, 130 | "ds_conv_12") # ->[N, 14, 14, 512] 131 | net = self._depthwise_separable_conv2d(net, 1024, self.width_multiplier, 132 | "ds_conv_13", downsample=True) # ->[N, 7, 7, 1024] 133 | net = self._depthwise_separable_conv2d(net, 1024, self.width_multiplier, 134 | "ds_conv_14") # ->[N, 7, 7, 1024] 135 | net = avg_pool(net, 7, "avg_pool_15") 136 | net = tf.squeeze(net, [1, 2], name="SpatialSqueeze") 137 | self.logits = fc(net, self.num_classes, "fc_16") 138 | self.predictions = tf.nn.softmax(self.logits) 139 | 140 | def _depthwise_separable_conv2d(self, inputs, num_filters, width_multiplier, 141 | scope, downsample=False): 142 | """depthwise separable convolution 2D function""" 143 | num_filters = round(num_filters * width_multiplier) 144 | strides = 2 if downsample else 1 145 | 146 | with tf.variable_scope(scope): 147 | # depthwise conv2d 148 | dw_conv = depthwise_conv2d(inputs, "depthwise_conv", strides=strides) 149 | # batchnorm 150 | bn = bacthnorm(dw_conv, "dw_bn", is_training=self.is_training) 151 | # relu 152 | relu = tf.nn.relu(bn) 153 | # pointwise conv2d (1x1) 154 | pw_conv = conv2d(relu, "pointwise_conv", num_filters) 155 | # bn 156 | bn = bacthnorm(pw_conv, "pw_bn", is_training=self.is_training) 157 | return tf.nn.relu(bn) 158 | 159 | if __name__ == "__main__": 160 | # test data 161 | inputs = tf.random_normal(shape=[4, 224, 224, 3]) 162 | mobileNet = MobileNet(inputs) 163 | writer = tf.summary.FileWriter("./logs", graph=tf.get_default_graph()) 164 | init = tf.global_variables_initializer() 165 | with tf.Session() as sess: 166 | sess.run(init) 167 | pred = sess.run(mobileNet.predictions) 168 | print(pred.shape) 169 | 170 | -------------------------------------------------------------------------------- /CNNs/ResNet50.py: -------------------------------------------------------------------------------- 1 | """ 2 | ResNet50 3 | 2017/12/06 4 | """ 5 | 6 | import tensorflow as tf 7 | from tensorflow.python.training import moving_averages 8 | 9 | fc_initializer = tf.contrib.layers.xavier_initializer 10 | conv2d_initializer = tf.contrib.layers.xavier_initializer_conv2d 11 | 12 | # create weight variable 13 | def create_var(name, shape, initializer, trainable=True): 14 | return tf.get_variable(name, shape=shape, dtype=tf.float32, 15 | initializer=initializer, trainable=trainable) 16 | 17 | # conv2d layer 18 | def conv2d(x, num_outputs, kernel_size, stride=1, scope="conv2d"): 19 | num_inputs = x.get_shape()[-1] 20 | with tf.variable_scope(scope): 21 | kernel = create_var("kernel", [kernel_size, kernel_size, 22 | num_inputs, num_outputs], 23 | conv2d_initializer()) 24 | return tf.nn.conv2d(x, kernel, strides=[1, stride, stride, 1], 25 | padding="SAME") 26 | 27 | # fully connected layer 28 | def fc(x, num_outputs, scope="fc"): 29 | num_inputs = x.get_shape()[-1] 30 | with tf.variable_scope(scope): 31 | weight = create_var("weight", [num_inputs, num_outputs], 32 | fc_initializer()) 33 | bias = create_var("bias", [num_outputs,], 34 | tf.zeros_initializer()) 35 | return tf.nn.xw_plus_b(x, weight, bias) 36 | 37 | 38 | # batch norm layer 39 | def batch_norm(x, decay=0.999, epsilon=1e-03, is_training=True, 40 | scope="scope"): 41 | x_shape = x.get_shape() 42 | num_inputs = x_shape[-1] 43 | reduce_dims = list(range(len(x_shape) - 1)) 44 | with tf.variable_scope(scope): 45 | beta = create_var("beta", [num_inputs,], 46 | initializer=tf.zeros_initializer()) 47 | gamma = create_var("gamma", [num_inputs,], 48 | initializer=tf.ones_initializer()) 49 | # for inference 50 | moving_mean = create_var("moving_mean", [num_inputs,], 51 | initializer=tf.zeros_initializer(), 52 | trainable=False) 53 | moving_variance = create_var("moving_variance", [num_inputs], 54 | initializer=tf.ones_initializer(), 55 | trainable=False) 56 | if is_training: 57 | mean, variance = tf.nn.moments(x, axes=reduce_dims) 58 | update_move_mean = moving_averages.assign_moving_average(moving_mean, 59 | mean, decay=decay) 60 | update_move_variance = moving_averages.assign_moving_average(moving_variance, 61 | variance, decay=decay) 62 | tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_mean) 63 | tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_move_variance) 64 | else: 65 | mean, variance = moving_mean, moving_variance 66 | return tf.nn.batch_normalization(x, mean, variance, beta, gamma, epsilon) 67 | 68 | 69 | # avg pool layer 70 | def avg_pool(x, pool_size, scope): 71 | with tf.variable_scope(scope): 72 | return tf.nn.avg_pool(x, [1, pool_size, pool_size, 1], 73 | strides=[1, pool_size, pool_size, 1], padding="VALID") 74 | 75 | # max pool layer 76 | def max_pool(x, pool_size, stride, scope): 77 | with tf.variable_scope(scope): 78 | return tf.nn.max_pool(x, [1, pool_size, pool_size, 1], 79 | [1, stride, stride, 1], padding="SAME") 80 | 81 | class ResNet50(object): 82 | def __init__(self, inputs, num_classes=1000, is_training=True, 83 | scope="resnet50"): 84 | self.inputs =inputs 85 | self.is_training = is_training 86 | self.num_classes = num_classes 87 | 88 | with tf.variable_scope(scope): 89 | # construct the model 90 | net = conv2d(inputs, 64, 7, 2, scope="conv1") # -> [batch, 112, 112, 64] 91 | net = tf.nn.relu(batch_norm(net, is_training=self.is_training, scope="bn1")) 92 | net = max_pool(net, 3, 2, scope="maxpool1") # -> [batch, 56, 56, 64] 93 | net = self._block(net, 256, 3, init_stride=1, is_training=self.is_training, 94 | scope="block2") # -> [batch, 56, 56, 256] 95 | net = self._block(net, 512, 4, is_training=self.is_training, scope="block3") 96 | # -> [batch, 28, 28, 512] 97 | net = self._block(net, 1024, 6, is_training=self.is_training, scope="block4") 98 | # -> [batch, 14, 14, 1024] 99 | net = self._block(net, 2048, 3, is_training=self.is_training, scope="block5") 100 | # -> [batch, 7, 7, 2048] 101 | net = avg_pool(net, 7, scope="avgpool5") # -> [batch, 1, 1, 2048] 102 | net = tf.squeeze(net, [1, 2], name="SpatialSqueeze") # -> [batch, 2048] 103 | self.logits = fc(net, self.num_classes, "fc6") # -> [batch, num_classes] 104 | self.predictions = tf.nn.softmax(self.logits) 105 | 106 | 107 | def _block(self, x, n_out, n, init_stride=2, is_training=True, scope="block"): 108 | with tf.variable_scope(scope): 109 | h_out = n_out // 4 110 | out = self._bottleneck(x, h_out, n_out, stride=init_stride, 111 | is_training=is_training, scope="bottlencek1") 112 | for i in range(1, n): 113 | out = self._bottleneck(out, h_out, n_out, is_training=is_training, 114 | scope=("bottlencek%s" % (i + 1))) 115 | return out 116 | 117 | def _bottleneck(self, x, h_out, n_out, stride=None, is_training=True, scope="bottleneck"): 118 | """ A residual bottleneck unit""" 119 | n_in = x.get_shape()[-1] 120 | if stride is None: 121 | stride = 1 if n_in == n_out else 2 122 | 123 | with tf.variable_scope(scope): 124 | h = conv2d(x, h_out, 1, stride=stride, scope="conv_1") 125 | h = batch_norm(h, is_training=is_training, scope="bn_1") 126 | h = tf.nn.relu(h) 127 | h = conv2d(h, h_out, 3, stride=1, scope="conv_2") 128 | h = batch_norm(h, is_training=is_training, scope="bn_2") 129 | h = tf.nn.relu(h) 130 | h = conv2d(h, n_out, 1, stride=1, scope="conv_3") 131 | h = batch_norm(h, is_training=is_training, scope="bn_3") 132 | 133 | if n_in != n_out: 134 | shortcut = conv2d(x, n_out, 1, stride=stride, scope="conv_4") 135 | shortcut = batch_norm(shortcut, is_training=is_training, scope="bn_4") 136 | else: 137 | shortcut = x 138 | return tf.nn.relu(shortcut + h) 139 | 140 | if __name__ == "__main__": 141 | x = tf.random_normal([32, 224, 224, 3]) 142 | resnet50 = ResNet50(x) 143 | print(resnet50.logits) -------------------------------------------------------------------------------- /CNNs/ShuffleNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | implement a shuffleNet by pytorch 3 | """ 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | 9 | dtype = torch.FloatTensor 10 | 11 | def shuffle_channels(x, groups): 12 | """shuffle channels of a 4-D Tensor""" 13 | batch_size, channels, height, width = x.size() 14 | assert channels % groups == 0 15 | channels_per_group = channels // groups 16 | # split into groups 17 | x = x.view(batch_size, groups, channels_per_group, 18 | height, width) 19 | # transpose 1, 2 axis 20 | x = x.transpose(1, 2).contiguous() 21 | # reshape into orignal 22 | x = x.view(batch_size, channels, height, width) 23 | return x 24 | 25 | class ShuffleNetUnitA(nn.Module): 26 | """ShuffleNet unit for stride=1""" 27 | def __init__(self, in_channels, out_channels, groups=3): 28 | super(ShuffleNetUnitA, self).__init__() 29 | assert in_channels == out_channels 30 | assert out_channels % 4 == 0 31 | bottleneck_channels = out_channels // 4 32 | self.groups = groups 33 | self.group_conv1 = nn.Conv2d(in_channels, bottleneck_channels, 34 | 1, groups=groups, stride=1) 35 | self.bn2 = nn.BatchNorm2d(bottleneck_channels) 36 | self.depthwise_conv3 = nn.Conv2d(bottleneck_channels, 37 | bottleneck_channels, 38 | 3, padding=1, stride=1, 39 | groups=bottleneck_channels) 40 | self.bn4 = nn.BatchNorm2d(bottleneck_channels) 41 | self.group_conv5 = nn.Conv2d(bottleneck_channels, out_channels, 42 | 1, stride=1, groups=groups) 43 | self.bn6 = nn.BatchNorm2d(out_channels) 44 | 45 | def forward(self, x): 46 | out = self.group_conv1(x) 47 | out = F.relu(self.bn2(out)) 48 | out = shuffle_channels(out, groups=self.groups) 49 | out = self.depthwise_conv3(out) 50 | out = self.bn4(out) 51 | out = self.group_conv5(out) 52 | out = self.bn6(out) 53 | out = F.relu(x + out) 54 | return out 55 | 56 | class ShuffleNetUnitB(nn.Module): 57 | """ShuffleNet unit for stride=2""" 58 | def __init__(self, in_channels, out_channels, groups=3): 59 | super(ShuffleNetUnitB, self).__init__() 60 | out_channels -= in_channels 61 | assert out_channels % 4 == 0 62 | bottleneck_channels = out_channels // 4 63 | self.groups = groups 64 | self.group_conv1 = nn.Conv2d(in_channels, bottleneck_channels, 65 | 1, groups=groups, stride=1) 66 | self.bn2 = nn.BatchNorm2d(bottleneck_channels) 67 | self.depthwise_conv3 = nn.Conv2d(bottleneck_channels, 68 | bottleneck_channels, 69 | 3, padding=1, stride=2, 70 | groups=bottleneck_channels) 71 | self.bn4 = nn.BatchNorm2d(bottleneck_channels) 72 | self.group_conv5 = nn.Conv2d(bottleneck_channels, out_channels, 73 | 1, stride=1, groups=groups) 74 | self.bn6 = nn.BatchNorm2d(out_channels) 75 | 76 | def forward(self, x): 77 | out = self.group_conv1(x) 78 | out = F.relu(self.bn2(out)) 79 | out = shuffle_channels(out, groups=self.groups) 80 | out = self.depthwise_conv3(out) 81 | out = self.bn4(out) 82 | out = self.group_conv5(out) 83 | out = self.bn6(out) 84 | x = F.avg_pool2d(x, 3, stride=2, padding=1) 85 | out = F.relu(torch.cat([x, out], dim=1)) 86 | return out 87 | 88 | class ShuffleNet(nn.Module): 89 | """ShuffleNet for groups=3""" 90 | def __init__(self, groups=3, in_channels=3, num_classes=1000): 91 | super(ShuffleNet, self).__init__() 92 | 93 | self.conv1 = nn.Conv2d(in_channels, 24, 3, stride=2, padding=1) 94 | stage2_seq = [ShuffleNetUnitB(24, 240, groups=3)] + \ 95 | [ShuffleNetUnitA(240, 240, groups=3) for i in range(3)] 96 | self.stage2 = nn.Sequential(*stage2_seq) 97 | stage3_seq = [ShuffleNetUnitB(240, 480, groups=3)] + \ 98 | [ShuffleNetUnitA(480, 480, groups=3) for i in range(7)] 99 | self.stage3 = nn.Sequential(*stage3_seq) 100 | stage4_seq = [ShuffleNetUnitB(480, 960, groups=3)] + \ 101 | [ShuffleNetUnitA(960, 960, groups=3) for i in range(3)] 102 | self.stage4 = nn.Sequential(*stage4_seq) 103 | self.fc = nn.Linear(960, num_classes) 104 | 105 | def forward(self, x): 106 | net = self.conv1(x) 107 | net = F.max_pool2d(net, 3, stride=2, padding=1) 108 | net = self.stage2(net) 109 | net = self.stage3(net) 110 | net = self.stage4(net) 111 | net = F.avg_pool2d(net, 7) 112 | net = net.view(net.size(0), -1) 113 | net = self.fc(net) 114 | logits = F.softmax(net) 115 | return logits 116 | 117 | if __name__ == "__main__": 118 | x = Variable(torch.randn([32, 3, 224, 224]).type(dtype), 119 | requires_grad=False) 120 | shuffleNet = ShuffleNet() 121 | out = shuffleNet(x) 122 | print(out.size()) 123 | -------------------------------------------------------------------------------- /CNNs/SqueezeNet.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2017/12/02 3 | """ 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | 9 | class SqueezeNet(object): 10 | def __init__(self, inputs, nb_classes=1000, is_training=True): 11 | # conv1 12 | net = tf.layers.conv2d(inputs, 96, [7, 7], strides=[2, 2], 13 | padding="SAME", activation=tf.nn.relu, 14 | name="conv1") 15 | # maxpool1 16 | net = tf.layers.max_pooling2d(net, [3, 3], strides=[2, 2], 17 | name="maxpool1") 18 | # fire2 19 | net = self._fire(net, 16, 64, "fire2") 20 | # fire3 21 | net = self._fire(net, 16, 64, "fire3") 22 | # fire4 23 | net = self._fire(net, 32, 128, "fire4") 24 | # maxpool4 25 | net = tf.layers.max_pooling2d(net, [3, 3], strides=[2, 2], 26 | name="maxpool4") 27 | # fire5 28 | net = self._fire(net, 32, 128, "fire5") 29 | # fire6 30 | net = self._fire(net, 48, 192, "fire6") 31 | # fire7 32 | net = self._fire(net, 48, 192, "fire7") 33 | # fire8 34 | net = self._fire(net, 64, 256, "fire8") 35 | # maxpool8 36 | net = tf.layers.max_pooling2d(net, [3, 3], strides=[2, 2], 37 | name="maxpool8") 38 | # fire9 39 | net = self._fire(net, 64, 256, "fire9") 40 | # dropout 41 | net = tf.layers.dropout(net, 0.5, training=is_training) 42 | # conv10 43 | net = tf.layers.conv2d(net, 1000, [1, 1], strides=[1, 1], 44 | padding="SAME", activation=tf.nn.relu, 45 | name="conv10") 46 | # avgpool10 47 | net = tf.layers.average_pooling2d(net, [13, 13], strides=[1, 1], 48 | name="avgpool10") 49 | # squeeze the axis 50 | net = tf.squeeze(net, axis=[1, 2]) 51 | 52 | self.logits = net 53 | self.prediction = tf.nn.softmax(net) 54 | 55 | 56 | def _fire(self, inputs, squeeze_depth, expand_depth, scope): 57 | with tf.variable_scope(scope): 58 | squeeze = tf.layers.conv2d(inputs, squeeze_depth, [1, 1], 59 | strides=[1, 1], padding="SAME", 60 | activation=tf.nn.relu, name="squeeze") 61 | # squeeze 62 | expand_1x1 = tf.layers.conv2d(squeeze, expand_depth, [1, 1], 63 | strides=[1, 1], padding="SAME", 64 | activation=tf.nn.relu, name="expand_1x1") 65 | expand_3x3 = tf.layers.conv2d(squeeze, expand_depth, [3, 3], 66 | strides=[1, 1], padding="SAME", 67 | activation=tf.nn.relu, name="expand_3x3") 68 | return tf.concat([expand_1x1, expand_3x3], axis=3) 69 | 70 | 71 | if __name__ == "__main__": 72 | inputs = tf.random_normal([32, 224, 224, 3]) 73 | net = SqueezeNet(inputs) 74 | print(net.prediction) 75 | -------------------------------------------------------------------------------- /CNNs/densenet.py: -------------------------------------------------------------------------------- 1 | """ 2 | DenseNet, original: https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py 3 | """ 4 | import re 5 | from collections import OrderedDict 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torch.utils.model_zoo as model_zoo 11 | import torchvision.transforms as transforms 12 | 13 | from PIL import Image 14 | import numpy as np 15 | 16 | model_urls = { 17 | 'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth', 18 | 'densenet169': 'https://download.pytorch.org/models/densenet169-b2777c0a.pth', 19 | 'densenet201': 'https://download.pytorch.org/models/densenet201-c1103571.pth', 20 | 'densenet161': 'https://download.pytorch.org/models/densenet161-8d451a50.pth', 21 | } 22 | 23 | 24 | class _DenseLayer(nn.Sequential): 25 | """Basic unit of DenseBlock (using bottleneck layer) """ 26 | def __init__(self, num_input_features, growth_rate, bn_size, drop_rate): 27 | super(_DenseLayer, self).__init__() 28 | self.add_module("norm1", nn.BatchNorm2d(num_input_features)) 29 | self.add_module("relu1", nn.ReLU(inplace=True)) 30 | self.add_module("conv1", nn.Conv2d(num_input_features, bn_size*growth_rate, 31 | kernel_size=1, stride=1, bias=False)) 32 | self.add_module("norm2", nn.BatchNorm2d(bn_size*growth_rate)) 33 | self.add_module("relu2", nn.ReLU(inplace=True)) 34 | self.add_module("conv2", nn.Conv2d(bn_size*growth_rate, growth_rate, 35 | kernel_size=3, stride=1, padding=1, bias=False)) 36 | self.drop_rate = drop_rate 37 | 38 | def forward(self, x): 39 | new_features = super(_DenseLayer, self).forward(x) 40 | if self.drop_rate > 0: 41 | new_features = F.dropout(new_features, p=self.drop_rate, training=self.training) 42 | return torch.cat([x, new_features], 1) 43 | 44 | class _DenseBlock(nn.Sequential): 45 | """DenseBlock""" 46 | def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): 47 | super(_DenseBlock, self).__init__() 48 | for i in range(num_layers): 49 | layer = _DenseLayer(num_input_features+i*growth_rate, growth_rate, bn_size, 50 | drop_rate) 51 | self.add_module("denselayer%d" % (i+1,), layer) 52 | 53 | 54 | class _Transition(nn.Sequential): 55 | """Transition layer between two adjacent DenseBlock""" 56 | def __init__(self, num_input_feature, num_output_features): 57 | super(_Transition, self).__init__() 58 | self.add_module("norm", nn.BatchNorm2d(num_input_feature)) 59 | self.add_module("relu", nn.ReLU(inplace=True)) 60 | self.add_module("conv", nn.Conv2d(num_input_feature, num_output_features, 61 | kernel_size=1, stride=1, bias=False)) 62 | self.add_module("pool", nn.AvgPool2d(2, stride=2)) 63 | 64 | 65 | class DenseNet(nn.Module): 66 | "DenseNet-BC model" 67 | def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_init_features=64, 68 | bn_size=4, compression_rate=0.5, drop_rate=0, num_classes=1000): 69 | """ 70 | :param growth_rate: (int) number of filters used in DenseLayer, `k` in the paper 71 | :param block_config: (list of 4 ints) number of layers in each DenseBlock 72 | :param num_init_features: (int) number of filters in the first Conv2d 73 | :param bn_size: (int) the factor using in the bottleneck layer 74 | :param compression_rate: (float) the compression rate used in Transition Layer 75 | :param drop_rate: (float) the drop rate after each DenseLayer 76 | :param num_classes: (int) number of classes for classification 77 | """ 78 | super(DenseNet, self).__init__() 79 | # first Conv2d 80 | self.features = nn.Sequential(OrderedDict([ 81 | ("conv0", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), 82 | ("norm0", nn.BatchNorm2d(num_init_features)), 83 | ("relu0", nn.ReLU(inplace=True)), 84 | ("pool0", nn.MaxPool2d(3, stride=2, padding=1)) 85 | ])) 86 | 87 | # DenseBlock 88 | num_features = num_init_features 89 | for i, num_layers in enumerate(block_config): 90 | block = _DenseBlock(num_layers, num_features, bn_size, growth_rate, drop_rate) 91 | self.features.add_module("denseblock%d" % (i + 1), block) 92 | num_features += num_layers*growth_rate 93 | if i != len(block_config) - 1: 94 | transition = _Transition(num_features, int(num_features*compression_rate)) 95 | self.features.add_module("transition%d" % (i + 1), transition) 96 | num_features = int(num_features * compression_rate) 97 | 98 | # final bn+ReLU 99 | self.features.add_module("norm5", nn.BatchNorm2d(num_features)) 100 | self.features.add_module("relu5", nn.ReLU(inplace=True)) 101 | 102 | # classification layer 103 | self.classifier = nn.Linear(num_features, num_classes) 104 | 105 | # params initialization 106 | for m in self.modules(): 107 | if isinstance(m, nn.Conv2d): 108 | nn.init.kaiming_normal_(m.weight) 109 | elif isinstance(m, nn.BatchNorm2d): 110 | nn.init.constant_(m.bias, 0) 111 | nn.init.constant_(m.weight, 1) 112 | elif isinstance(m, nn.Linear): 113 | nn.init.constant_(m.bias, 0) 114 | 115 | def forward(self, x): 116 | features = self.features(x) 117 | out = F.avg_pool2d(features, 7, stride=1).view(features.size(0), -1) 118 | out = self.classifier(out) 119 | return out 120 | 121 | class DenseNet_MNIST(nn.Module): 122 | """DenseNet for MNIST dataset""" 123 | def __init__(self, growth_rate=12, block_config=(6, 6, 6), num_init_features=16, 124 | bn_size=4, compression_rate=0.5, drop_rate=0, num_classes=10): 125 | """ 126 | :param growth_rate: (int) number of filters used in DenseLayer, `k` in the paper 127 | :param block_config: (list of 2 ints) number of layers in each DenseBlock 128 | :param num_init_features: (int) number of filters in the first Conv2d 129 | :param bn_size: (int) the factor using in the bottleneck layer 130 | :param compression_rate: (float) the compression rate used in Transition Layer 131 | :param drop_rate: (float) the drop rate after each DenseLayer 132 | :param num_classes: (int) number of classes for classification 133 | """ 134 | super(DenseNet_MNIST, self).__init__() 135 | # first Conv2d 136 | self.features = nn.Sequential(OrderedDict([ 137 | ("conv0", nn.Conv2d(1, num_init_features, kernel_size=3, stride=1, padding=1, bias=False)), 138 | ("norm0", nn.BatchNorm2d(num_init_features)), 139 | ("relu0", nn.ReLU(inplace=True)), 140 | ])) 141 | 142 | # DenseBlock 143 | num_features = num_init_features 144 | for i, num_layers in enumerate(block_config): 145 | block = _DenseBlock(num_layers, num_features, bn_size, growth_rate, drop_rate) 146 | self.features.add_module("denseblock%d" % (i + 1), block) 147 | num_features += num_layers * growth_rate 148 | if i != len(block_config) - 1: 149 | transition = _Transition(num_features, int(num_features * compression_rate)) 150 | self.features.add_module("transition%d" % (i + 1), transition) 151 | num_features = int(num_features * compression_rate) 152 | 153 | # final bn+ReLU 154 | self.features.add_module("norm5", nn.BatchNorm2d(num_features)) 155 | self.features.add_module("relu5", nn.ReLU(inplace=True)) 156 | 157 | # classification layer 158 | self.classifier = nn.Linear(num_features, num_classes) 159 | 160 | # params initialization 161 | for m in self.modules(): 162 | if isinstance(m, nn.Conv2d): 163 | nn.init.kaiming_normal_(m.weight) 164 | elif isinstance(m, nn.BatchNorm2d): 165 | nn.init.constant_(m.bias, 0) 166 | nn.init.constant_(m.weight, 1) 167 | elif isinstance(m, nn.Linear): 168 | nn.init.constant_(m.bias, 0) 169 | 170 | def forward(self, x): 171 | features = self.features(x) 172 | out = F.avg_pool2d(features, 7, stride=1).view(features.size(0), -1) 173 | out = self.classifier(out) 174 | return out 175 | 176 | 177 | def densenet121(pretrained=False, **kwargs): 178 | """DenseNet121""" 179 | model = DenseNet(num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16), 180 | **kwargs) 181 | 182 | if pretrained: 183 | # '.'s are no longer allowed in module names, but pervious _DenseLayer 184 | # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'. 185 | # They are also in the checkpoints in model_urls. This pattern is used 186 | # to find such keys. 187 | pattern = re.compile( 188 | r'^(.*denselayer\d+\.(?:norm|relu|conv))\.((?:[12])\.(?:weight|bias|running_mean|running_var))$') 189 | state_dict = model_zoo.load_url(model_urls['densenet121']) 190 | for key in list(state_dict.keys()): 191 | res = pattern.match(key) 192 | if res: 193 | new_key = res.group(1) + res.group(2) 194 | state_dict[new_key] = state_dict[key] 195 | del state_dict[key] 196 | model.load_state_dict(state_dict) 197 | return model 198 | 199 | if __name__ == "__main__": 200 | densenet = densenet121(pretrained=True) 201 | densenet.eval() 202 | 203 | img = Image.open("./images/cat.jpg") 204 | 205 | trans_ops = transforms.Compose([ 206 | transforms.Resize(256), 207 | transforms.CenterCrop(224), 208 | transforms.ToTensor(), 209 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 210 | std=[0.229, 0.224, 0.225]) 211 | ]) 212 | 213 | images = trans_ops(img).view(-1, 3, 224, 224) 214 | print(images) 215 | outputs = densenet(images) 216 | 217 | _, predictions = outputs.topk(5, dim=1) 218 | 219 | labels = list(map(lambda s: s.strip(), open("./data/imagenet/synset_words.txt").readlines())) 220 | for idx in predictions.numpy()[0]: 221 | print("Predicted labels:", labels[idx]) 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | -------------------------------------------------------------------------------- /CNNs/shufflenet_v2.py: -------------------------------------------------------------------------------- 1 | """ 2 | The implement of shufflenet_v2 by Keras 3 | """ 4 | 5 | import tensorflow as tf 6 | from tensorflow.keras.layers import Conv2D, DepthwiseConv2D 7 | from tensorflow.keras.layers import MaxPool2D, GlobalAveragePooling2D, Dense 8 | from tensorflow.keras.layers import BatchNormalization, Activation 9 | 10 | 11 | def channle_shuffle(inputs, group): 12 | """Shuffle the channel 13 | Args: 14 | inputs: 4D Tensor 15 | group: int, number of groups 16 | Returns: 17 | Shuffled 4D Tensor 18 | """ 19 | in_shape = inputs.get_shape().as_list() 20 | h, w, in_channel = in_shape[1:] 21 | assert in_channel % group == 0 22 | l = tf.reshape(inputs, [-1, h, w, in_channel // group, group]) 23 | l = tf.transpose(l, [0, 1, 2, 4, 3]) 24 | l = tf.reshape(l, [-1, h, w, in_channel]) 25 | 26 | return l 27 | 28 | class Conv2D_BN_ReLU(tf.keras.Model): 29 | """Conv2D -> BN -> ReLU""" 30 | def __init__(self, channel, kernel_size=1, stride=1): 31 | super(Conv2D_BN_ReLU, self).__init__() 32 | 33 | self.conv = Conv2D(channel, kernel_size, strides=stride, 34 | padding="SAME", use_bias=False) 35 | self.bn = BatchNormalization(axis=-1, momentum=0.9, epsilon=1e-5) 36 | self.relu = Activation("relu") 37 | 38 | def call(self, inputs, training=True): 39 | x = self.conv(inputs) 40 | x = self.bn(x, training=training) 41 | x = self.relu(x) 42 | return x 43 | 44 | class DepthwiseConv2D_BN(tf.keras.Model): 45 | """DepthwiseConv2D -> BN""" 46 | def __init__(self, kernel_size=3, stride=1): 47 | super(DepthwiseConv2D_BN, self).__init__() 48 | 49 | self.dconv = DepthwiseConv2D(kernel_size, strides=stride, 50 | depth_multiplier=1, 51 | padding="SAME", use_bias=False) 52 | self.bn = BatchNormalization(axis=-1, momentum=0.9, epsilon=1e-5) 53 | 54 | def call(self, inputs, training=True): 55 | x = self.dconv(inputs) 56 | x = self.bn(x, training=training) 57 | return x 58 | 59 | 60 | class ShufflenetUnit1(tf.keras.Model): 61 | def __init__(self, out_channel): 62 | """The unit of shufflenetv2 for stride=1 63 | Args: 64 | out_channel: int, number of channels 65 | """ 66 | super(ShufflenetUnit1, self).__init__() 67 | 68 | assert out_channel % 2 == 0 69 | self.out_channel = out_channel 70 | 71 | self.conv1_bn_relu = Conv2D_BN_ReLU(out_channel // 2, 1, 1) 72 | self.dconv_bn = DepthwiseConv2D_BN(3, 1) 73 | self.conv2_bn_relu = Conv2D_BN_ReLU(out_channel // 2, 1, 1) 74 | 75 | def call(self, inputs, training=False): 76 | # split the channel 77 | shortcut, x = tf.split(inputs, 2, axis=3) 78 | 79 | x = self.conv1_bn_relu(x, training=training) 80 | x = self.dconv_bn(x, training=training) 81 | x = self.conv2_bn_relu(x, training=training) 82 | 83 | x = tf.concat([shortcut, x], axis=3) 84 | x = channle_shuffle(x, 2) 85 | return x 86 | 87 | class ShufflenetUnit2(tf.keras.Model): 88 | """The unit of shufflenetv2 for stride=2""" 89 | def __init__(self, in_channel, out_channel): 90 | super(ShufflenetUnit2, self).__init__() 91 | 92 | assert out_channel % 2 == 0 93 | self.in_channel = in_channel 94 | self.out_channel = out_channel 95 | 96 | self.conv1_bn_relu = Conv2D_BN_ReLU(out_channel // 2, 1, 1) 97 | self.dconv_bn = DepthwiseConv2D_BN(3, 2) 98 | self.conv2_bn_relu = Conv2D_BN_ReLU(out_channel - in_channel, 1, 1) 99 | 100 | # for shortcut 101 | self.shortcut_dconv_bn = DepthwiseConv2D_BN(3, 2) 102 | self.shortcut_conv_bn_relu = Conv2D_BN_ReLU(in_channel, 1, 1) 103 | 104 | def call(self, inputs, training=False): 105 | shortcut, x = inputs, inputs 106 | 107 | x = self.conv1_bn_relu(x, training=training) 108 | x = self.dconv_bn(x, training=training) 109 | x = self.conv2_bn_relu(x, training=training) 110 | 111 | shortcut = self.shortcut_dconv_bn(shortcut, training=training) 112 | shortcut = self.shortcut_conv_bn_relu(shortcut, training=training) 113 | 114 | x = tf.concat([shortcut, x], axis=3) 115 | x = channle_shuffle(x, 2) 116 | return x 117 | 118 | class ShufflenetStage(tf.keras.Model): 119 | """The stage of shufflenet""" 120 | def __init__(self, in_channel, out_channel, num_blocks): 121 | super(ShufflenetStage, self).__init__() 122 | 123 | self.in_channel = in_channel 124 | self.out_channel = out_channel 125 | 126 | self.ops = [] 127 | for i in range(num_blocks): 128 | if i == 0: 129 | op = ShufflenetUnit2(in_channel, out_channel) 130 | else: 131 | op = ShufflenetUnit1(out_channel) 132 | self.ops.append(op) 133 | 134 | def call(self, inputs, training=False): 135 | x = inputs 136 | for op in self.ops: 137 | x = op(x, training=training) 138 | return x 139 | 140 | 141 | class ShuffleNetv2(tf.keras.Model): 142 | """Shufflenetv2""" 143 | def __init__(self, num_classes, first_channel=24, channels_per_stage=(116, 232, 464)): 144 | super(ShuffleNetv2, self).__init__() 145 | 146 | self.num_classes = num_classes 147 | 148 | self.conv1_bn_relu = Conv2D_BN_ReLU(first_channel, 3, 2) 149 | self.pool1 = MaxPool2D(3, strides=2, padding="SAME") 150 | self.stage2 = ShufflenetStage(first_channel, channels_per_stage[0], 4) 151 | self.stage3 = ShufflenetStage(channels_per_stage[0], channels_per_stage[1], 8) 152 | self.stage4 = ShufflenetStage(channels_per_stage[1], channels_per_stage[2], 4) 153 | self.conv5_bn_relu = Conv2D_BN_ReLU(1024, 1, 1) 154 | self.gap = GlobalAveragePooling2D() 155 | self.linear = Dense(num_classes) 156 | 157 | def call(self, inputs, training=False): 158 | x = self.conv1_bn_relu(inputs, training=training) 159 | x = self.pool1(x) 160 | x = self.stage2(x, training=training) 161 | x = self.stage3(x, training=training) 162 | x = self.stage4(x, training=training) 163 | x = self.conv5_bn_relu(x, training=training) 164 | x = self.gap(x) 165 | x = self.linear(x) 166 | return x 167 | 168 | 169 | if __name__ =="__main__": 170 | """ 171 | inputs = tf.placeholder(tf.float32, [None, 224, 224, 3]) 172 | 173 | model = ShuffleNetv2(1000) 174 | outputs = model(inputs) 175 | 176 | print(model.summary()) 177 | 178 | with tf.Session() as sess: 179 | pass 180 | 181 | 182 | vars = [] 183 | for v in tf.global_variables(): 184 | 185 | vars.append((v.name, v)) 186 | print(v.name) 187 | print(len(vars)) 188 | 189 | 190 | import numpy as np 191 | 192 | path = "C:/models/ShuffleNetV2-1x.npz" 193 | weights = np.load(path) 194 | np_vars = [] 195 | for k in weights: 196 | k_ = k.replace("beta", "gbeta") 197 | k_ = k_.replace("/dconv", "/conv10_dconv") 198 | k_ = k_.replace("shortcut_dconv", "shortcut_a_dconv") 199 | k_ = k_.replace("conv5", "su_conv5") 200 | k_ = k_.replace("linear", "t_linear") 201 | np_vars.append((k_, weights[k])) 202 | np_vars.sort(key=lambda x: x[0]) 203 | 204 | for k, _ in np_vars: 205 | print(k) 206 | 207 | saver = tf.train.Saver(tf.global_variables()) 208 | with tf.Session() as sess: 209 | sess.run(tf.global_variables_initializer()) 210 | 211 | assign_ops = [] 212 | for id in range(len(vars)): 213 | print(vars[id][0], np_vars[id][0]) 214 | assign_ops.append(tf.assign(vars[id][1], np_vars[id][1])) 215 | 216 | sess.run(assign_ops) 217 | saver.save(sess, "./models/shufflene_v2_1.0.ckpt") 218 | 219 | model.save("./models/shufflenet_v2_1.0.hdf5") 220 | 221 | """ 222 | 223 | import numpy as np 224 | from tensorflow.keras.preprocessing import image 225 | from tensorflow.keras.applications.densenet import preprocess_input, decode_predictions 226 | 227 | img_path = './images/cat.jpg' 228 | img = image.load_img(img_path, target_size=(224, 224)) 229 | x = image.img_to_array(img) 230 | x = np.expand_dims(x, axis=0) 231 | x = preprocess_input(x) 232 | 233 | inputs = tf.placeholder(tf.float32, [None, 224, 224, 3]) 234 | model = ShuffleNetv2(1000) 235 | outputs = model(inputs, training=False) 236 | outputs = tf.nn.softmax(outputs) 237 | 238 | saver = tf.train.Saver() 239 | with tf.Session() as sess: 240 | saver.restore(sess, "./models/shufflene_v2_1.0.ckpt") 241 | preds = sess.run(outputs, feed_dict={inputs: x}) 242 | print(decode_predictions(preds, top=3)[0]) 243 | 244 | -------------------------------------------------------------------------------- /ObjectDetections/SSD/SSD_demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | SSD demo 3 | """ 4 | 5 | import cv2 6 | import numpy as np 7 | import tensorflow as tf 8 | import matplotlib.image as mpimg 9 | 10 | from ssd_300_vgg import SSD 11 | from utils import preprocess_image, process_bboxes 12 | from visualization import plt_bboxes 13 | 14 | 15 | ssd_net = SSD() 16 | classes, scores, bboxes = ssd_net.detections() 17 | images = ssd_net.images() 18 | 19 | sess = tf.Session() 20 | # Restore SSD model. 21 | ckpt_filename = './ssd_checkpoints/ssd_vgg_300_weights.ckpt' 22 | sess.run(tf.global_variables_initializer()) 23 | saver = tf.train.Saver() 24 | saver.restore(sess, ckpt_filename) 25 | 26 | img = cv2.imread('./demo/dog.jpg') 27 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 28 | img_prepocessed = preprocess_image(img) 29 | rclasses, rscores, rbboxes = sess.run([classes, scores, bboxes], 30 | feed_dict={images: img_prepocessed}) 31 | rclasses, rscores, rbboxes = process_bboxes(rclasses, rscores, rbboxes) 32 | 33 | plt_bboxes(img, rclasses, rscores, rbboxes) 34 | -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/README.md: -------------------------------------------------------------------------------- 1 | 222 2 | -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/car2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/SSD/demo/car2.jpg -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/SSD/demo/dog.jpg -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/eagle.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/SSD/demo/eagle.jpg -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/horses.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/SSD/demo/horses.jpg -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/SSD/demo/person.jpg -------------------------------------------------------------------------------- /ObjectDetections/SSD/demo/street.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/SSD/demo/street.jpg -------------------------------------------------------------------------------- /ObjectDetections/SSD/ssd_anchors.py: -------------------------------------------------------------------------------- 1 | """ 2 | SSD anchors 3 | """ 4 | import math 5 | 6 | import numpy as np 7 | 8 | def ssd_size_bounds_to_values(size_bounds, 9 | n_feat_layers, 10 | img_shape=(300, 300)): 11 | """Compute the reference sizes of the anchor boxes from relative bounds. 12 | The absolute values are measured in pixels, based on the network 13 | default size (300 pixels). 14 | 15 | This function follows the computation performed in the original 16 | implementation of SSD in Caffe. 17 | 18 | Return: 19 | list of list containing the absolute sizes at each scale. For each scale, 20 | the ratios only apply to the first value. 21 | """ 22 | assert img_shape[0] == img_shape[1] 23 | 24 | img_size = img_shape[0] 25 | min_ratio = int(size_bounds[0] * 100) 26 | max_ratio = int(size_bounds[1] * 100) 27 | step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2))) 28 | # Start with the following smallest sizes. 29 | sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]] 30 | for ratio in range(min_ratio, max_ratio + 1, step): 31 | sizes.append((img_size * ratio / 100., 32 | img_size * (ratio + step) / 100.)) 33 | return sizes 34 | 35 | def ssd_anchor_one_layer(img_shape, 36 | feat_shape, 37 | sizes, 38 | ratios, 39 | step, 40 | offset=0.5, 41 | dtype=np.float32): 42 | """Computer SSD default anchor boxes for one feature layer. 43 | 44 | Determine the relative position grid of the centers, and the relative 45 | width and height. 46 | 47 | Arguments: 48 | feat_shape: Feature shape, used for computing relative position grids; 49 | size: Absolute reference sizes; 50 | ratios: Ratios to use on these features; 51 | img_shape: Image shape, used for computing height, width relatively to the 52 | former; 53 | offset: Grid offset. 54 | 55 | Return: 56 | y, x, h, w: Relative x and y grids, and height and width. 57 | """ 58 | # Compute the position grid: simple way. 59 | # y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] 60 | # y = (y.astype(dtype) + offset) / feat_shape[0] 61 | # x = (x.astype(dtype) + offset) / feat_shape[1] 62 | # Weird SSD-Caffe computation using steps values... 63 | y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] 64 | y = (y.astype(dtype) + offset) * step / img_shape[0] 65 | x = (x.astype(dtype) + offset) * step / img_shape[1] 66 | 67 | # Expand dims to support easy broadcasting. 68 | y = np.expand_dims(y, axis=-1) # [size, size, 1] 69 | x = np.expand_dims(x, axis=-1) # [size, size, 1] 70 | 71 | # Compute relative height and width. 72 | # Tries to follow the original implementation of SSD for the order. 73 | num_anchors = len(sizes) + len(ratios) 74 | h = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors] 75 | w = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors] 76 | # Add first anchor boxes with ratio=1. 77 | h[0] = sizes[0] / img_shape[0] 78 | w[0] = sizes[0] / img_shape[1] 79 | di = 1 80 | if len(sizes) > 1: 81 | h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0] 82 | w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1] 83 | di += 1 84 | for i, r in enumerate(ratios): 85 | h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r) 86 | w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r) 87 | return y, x, h, w 88 | 89 | 90 | def ssd_anchors_all_layers(img_shape, 91 | layers_shape, 92 | anchor_sizes, 93 | anchor_ratios, 94 | anchor_steps, 95 | offset=0.5, 96 | dtype=np.float32): 97 | """Compute anchor boxes for all feature layers. 98 | """ 99 | layers_anchors = [] 100 | for i, s in enumerate(layers_shape): 101 | anchor_bboxes = ssd_anchor_one_layer(img_shape, s, 102 | anchor_sizes[i], 103 | anchor_ratios[i], 104 | anchor_steps[i], 105 | offset=offset, dtype=dtype) 106 | layers_anchors.append(anchor_bboxes) 107 | return layers_anchors -------------------------------------------------------------------------------- /ObjectDetections/SSD/ssd_layers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Layers for SSD 3 | """ 4 | 5 | import tensorflow as tf 6 | 7 | # Conv2d: for stride = 1 8 | def conv2d(x, filters, kernel_size, stride=1, padding="same", 9 | dilation_rate=1, activation=tf.nn.relu, scope="conv2d"): 10 | kernel_sizes = [kernel_size] * 2 11 | strides = [stride] * 2 12 | dilation_rate = [dilation_rate] * 2 13 | return tf.layers.conv2d(x, filters, kernel_sizes, strides=strides, 14 | dilation_rate=dilation_rate, padding=padding, 15 | name=scope, activation=activation) 16 | 17 | # max pool2d: default pool_size = stride 18 | def max_pool2d(x, pool_size, stride=None, scope="max_pool2d"): 19 | pool_sizes = [pool_size] * 2 20 | strides = [pool_size] * 2 if stride is None else [stride] * 2 21 | return tf.layers.max_pooling2d(x, pool_sizes, strides, name=scope, padding="same") 22 | 23 | # pad2d: for conv2d with stride > 1 24 | def pad2d(x, pad): 25 | return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]]) 26 | 27 | # dropout 28 | def dropout(x, rate=0.5, is_training=True): 29 | return tf.layers.dropout(x, rate=rate, training=is_training) 30 | 31 | # l2norm (not bacth norm, spatial normalization) 32 | def l2norm(x, scale, trainable=True, scope="L2Normalization"): 33 | n_channels = x.get_shape().as_list()[-1] 34 | l2_norm = tf.nn.l2_normalize(x, [3], epsilon=1e-12) 35 | with tf.variable_scope(scope): 36 | gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32, 37 | initializer=tf.constant_initializer(scale), 38 | trainable=trainable) 39 | return l2_norm * gamma 40 | 41 | 42 | # multibox layer: get class and location predicitions from detection layer 43 | def ssd_multibox_layer(x, num_classes, sizes, ratios, normalization=-1, scope="multibox"): 44 | pre_shape = x.get_shape().as_list()[1:-1] 45 | pre_shape = [-1] + pre_shape 46 | with tf.variable_scope(scope): 47 | # l2 norm 48 | if normalization > 0: 49 | x = l2norm(x, normalization) 50 | print(x) 51 | # numbers of anchors 52 | n_anchors = len(sizes) + len(ratios) 53 | # location predictions 54 | loc_pred = conv2d(x, n_anchors*4, 3, activation=None, scope="conv_loc") 55 | loc_pred = tf.reshape(loc_pred, pre_shape + [n_anchors, 4]) 56 | # class prediction 57 | cls_pred = conv2d(x, n_anchors*num_classes, 3, activation=None, scope="conv_cls") 58 | cls_pred = tf.reshape(cls_pred, pre_shape + [n_anchors, num_classes]) 59 | return cls_pred, loc_pred 60 | 61 | 62 | -------------------------------------------------------------------------------- /ObjectDetections/SSD/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Help functions for SSD 3 | """ 4 | 5 | import cv2 6 | import numpy as np 7 | 8 | 9 | ############## preprocess image ################## 10 | # whiten the image 11 | def whiten_image(image, means=(123., 117., 104.)): 12 | """Subtracts the given means from each image channel""" 13 | if image.ndim != 3: 14 | raise ValueError('Input must be of size [height, width, C>0]') 15 | num_channels = image.shape[-1] 16 | if len(means) != num_channels: 17 | raise ValueError('len(means) must match the number of channels') 18 | 19 | mean = np.array(means, dtype=image.dtype) 20 | image = image - mean 21 | return image 22 | 23 | def resize_image(image, size=(300, 300)): 24 | return cv2.resize(image, size) 25 | 26 | def preprocess_image(image): 27 | """Preprocess a image to inference""" 28 | image_cp = np.copy(image).astype(np.float32) 29 | # whiten the image 30 | image_whitened = whiten_image(image_cp) 31 | # resize the image 32 | image_resized = resize_image(image_whitened) 33 | # expand the batch_size dim 34 | image_expanded = np.expand_dims(image_resized, axis=0) 35 | return image_expanded 36 | 37 | ############## process bboxes ################## 38 | def bboxes_clip(bbox_ref, bboxes): 39 | """Clip bounding boxes with respect to reference bbox. 40 | """ 41 | bboxes = np.copy(bboxes) 42 | bboxes = np.transpose(bboxes) 43 | bbox_ref = np.transpose(bbox_ref) 44 | bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) 45 | bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) 46 | bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) 47 | bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) 48 | bboxes = np.transpose(bboxes) 49 | return bboxes 50 | 51 | def bboxes_sort(classes, scores, bboxes, top_k=400): 52 | """Sort bounding boxes by decreasing order and keep only the top_k 53 | """ 54 | # if priority_inside: 55 | # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ 56 | # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) 57 | # idxes = np.argsort(-scores) 58 | # inside = inside[idxes] 59 | # idxes = np.concatenate([idxes[inside], idxes[~inside]]) 60 | idxes = np.argsort(-scores) 61 | classes = classes[idxes][:top_k] 62 | scores = scores[idxes][:top_k] 63 | bboxes = bboxes[idxes][:top_k] 64 | return classes, scores, bboxes 65 | 66 | def bboxes_iou(bboxes1, bboxes2): 67 | """Computing iou between bboxes1 and bboxes2. 68 | Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. 69 | """ 70 | bboxes1 = np.transpose(bboxes1) 71 | bboxes2 = np.transpose(bboxes2) 72 | # Intersection bbox and volume. 73 | int_ymin = np.maximum(bboxes1[0], bboxes2[0]) 74 | int_xmin = np.maximum(bboxes1[1], bboxes2[1]) 75 | int_ymax = np.minimum(bboxes1[2], bboxes2[2]) 76 | int_xmax = np.minimum(bboxes1[3], bboxes2[3]) 77 | 78 | int_h = np.maximum(int_ymax - int_ymin, 0.) 79 | int_w = np.maximum(int_xmax - int_xmin, 0.) 80 | int_vol = int_h * int_w 81 | # Union volume. 82 | vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) 83 | vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) 84 | iou = int_vol / (vol1 + vol2 - int_vol) 85 | return iou 86 | 87 | def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5): 88 | """Apply non-maximum selection to bounding boxes. 89 | """ 90 | keep_bboxes = np.ones(scores.shape, dtype=np.bool) 91 | for i in range(scores.size-1): 92 | if keep_bboxes[i]: 93 | # Computer overlap with bboxes which are following. 94 | overlap = bboxes_iou(bboxes[i], bboxes[(i+1):]) 95 | # Overlap threshold for keeping + checking part of the same class 96 | keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) 97 | keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) 98 | 99 | idxes = np.where(keep_bboxes) 100 | return classes[idxes], scores[idxes], bboxes[idxes] 101 | 102 | def bboxes_resize(bbox_ref, bboxes): 103 | """Resize bounding boxes based on a reference bounding box, 104 | assuming that the latter is [0, 0, 1, 1] after transform. 105 | """ 106 | bboxes = np.copy(bboxes) 107 | # Translate. 108 | bboxes[:, 0] -= bbox_ref[0] 109 | bboxes[:, 1] -= bbox_ref[1] 110 | bboxes[:, 2] -= bbox_ref[0] 111 | bboxes[:, 3] -= bbox_ref[1] 112 | # Resize. 113 | resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]] 114 | bboxes[:, 0] /= resize[0] 115 | bboxes[:, 1] /= resize[1] 116 | bboxes[:, 2] /= resize[0] 117 | bboxes[:, 3] /= resize[1] 118 | return bboxes 119 | 120 | def process_bboxes(rclasses, rscores, rbboxes, rbbox_img = (0.0, 0.0, 1.0, 1.0), 121 | top_k=400, nms_threshold=0.5): 122 | """Process the bboxes including sort and nms""" 123 | rbboxes = bboxes_clip(rbbox_img, rbboxes) 124 | rclasses, rscores, rbboxes = bboxes_sort(rclasses, rscores, rbboxes, top_k) 125 | rclasses, rscores, rbboxes = bboxes_nms(rclasses, rscores, rbboxes, nms_threshold) 126 | rbboxes = bboxes_resize(rbbox_img, rbboxes) 127 | return rclasses, rscores, rbboxes 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /ObjectDetections/SSD/var_name.txt: -------------------------------------------------------------------------------- 1 | ssd_300_vgg/conv1_1/kernel 2 | ssd_300_vgg/conv1_1/bias 3 | ssd_300_vgg/conv1_2/kernel 4 | ssd_300_vgg/conv1_2/bias 5 | ssd_300_vgg/conv2_1/kernel 6 | ssd_300_vgg/conv2_1/bias 7 | ssd_300_vgg/conv2_2/kernel 8 | ssd_300_vgg/conv2_2/bias 9 | ssd_300_vgg/conv3_1/kernel 10 | ssd_300_vgg/conv3_1/bias 11 | ssd_300_vgg/conv3_2/kernel 12 | ssd_300_vgg/conv3_2/bias 13 | ssd_300_vgg/conv3_3/kernel 14 | ssd_300_vgg/conv3_3/bias 15 | ssd_300_vgg/conv4_1/kernel 16 | ssd_300_vgg/conv4_1/bias 17 | ssd_300_vgg/conv4_2/kernel 18 | ssd_300_vgg/conv4_2/bias 19 | ssd_300_vgg/conv4_3/kernel 20 | ssd_300_vgg/conv4_3/bias 21 | ssd_300_vgg/conv5_1/kernel 22 | ssd_300_vgg/conv5_1/bias 23 | ssd_300_vgg/conv5_2/kernel 24 | ssd_300_vgg/conv5_2/bias 25 | ssd_300_vgg/conv5_3/kernel 26 | ssd_300_vgg/conv5_3/bias 27 | ssd_300_vgg/conv6/kernel 28 | ssd_300_vgg/conv6/bias 29 | ssd_300_vgg/conv7/kernel 30 | ssd_300_vgg/conv7/bias 31 | ssd_300_vgg/conv8_1x1/kernel 32 | ssd_300_vgg/conv8_1x1/bias 33 | ssd_300_vgg/conv8_3x3/kernel 34 | ssd_300_vgg/conv8_3x3/bias 35 | ssd_300_vgg/conv9_1x1/kernel 36 | ssd_300_vgg/conv9_1x1/bias 37 | ssd_300_vgg/conv9_3x3/kernel 38 | ssd_300_vgg/conv9_3x3/bias 39 | ssd_300_vgg/conv10_1x1/kernel 40 | ssd_300_vgg/conv10_1x1/bias 41 | ssd_300_vgg/conv10_3x3/kernel 42 | ssd_300_vgg/conv10_3x3/bias 43 | ssd_300_vgg/conv11_1x1/kernel 44 | ssd_300_vgg/conv11_1x1/bias 45 | ssd_300_vgg/conv11_3x3/kernel 46 | ssd_300_vgg/conv11_3x3/bias 47 | ssd_300_vgg/block4_box/L2Normalization/gamma 48 | ssd_300_vgg/block4_box/conv_loc/kernel 49 | ssd_300_vgg/block4_box/conv_loc/bias 50 | ssd_300_vgg/block4_box/conv_cls/kernel 51 | ssd_300_vgg/block4_box/conv_cls/bias 52 | ssd_300_vgg/block7_box/conv_loc/kernel 53 | ssd_300_vgg/block7_box/conv_loc/bias 54 | ssd_300_vgg/block7_box/conv_cls/kernel 55 | ssd_300_vgg/block7_box/conv_cls/bias 56 | ssd_300_vgg/block8_box/conv_loc/kernel 57 | ssd_300_vgg/block8_box/conv_loc/bias 58 | ssd_300_vgg/block8_box/conv_cls/kernel 59 | ssd_300_vgg/block8_box/conv_cls/bias 60 | ssd_300_vgg/block9_box/conv_loc/kernel 61 | ssd_300_vgg/block9_box/conv_loc/bias 62 | ssd_300_vgg/block9_box/conv_cls/kernel 63 | ssd_300_vgg/block9_box/conv_cls/bias 64 | ssd_300_vgg/block10_box/conv_loc/kernel 65 | ssd_300_vgg/block10_box/conv_loc/bias 66 | ssd_300_vgg/block10_box/conv_cls/kernel 67 | ssd_300_vgg/block10_box/conv_cls/bias 68 | ssd_300_vgg/block11_box/conv_loc/kernel 69 | ssd_300_vgg/block11_box/conv_loc/bias 70 | ssd_300_vgg/block11_box/conv_cls/kernel 71 | ssd_300_vgg/block11_box/conv_cls/bias 72 | -------------------------------------------------------------------------------- /ObjectDetections/SSD/visualization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 Paul Balanca. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | import cv2 16 | import random 17 | 18 | import matplotlib.pyplot as plt 19 | import matplotlib.image as mpimg 20 | import matplotlib.cm as mpcm 21 | 22 | 23 | # class names 24 | CLASSES = ["aeroplane", "bicycle", "bird", "boat", "bottle", 25 | "bus", "car", "cat", "chair", "cow", "diningtable", 26 | "dog", "horse", "motorbike", "person", "pottedplant", 27 | "sheep", "sofa", "train","tvmonitor"] 28 | # =========================================================================== # 29 | # Some colormaps. 30 | # =========================================================================== # 31 | def colors_subselect(colors, num_classes=21): 32 | dt = len(colors) // num_classes 33 | sub_colors = [] 34 | for i in range(num_classes): 35 | color = colors[i*dt] 36 | if isinstance(color[0], float): 37 | sub_colors.append([int(c * 255) for c in color]) 38 | else: 39 | sub_colors.append([c for c in color]) 40 | return sub_colors 41 | 42 | colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21) 43 | colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), 44 | (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), 45 | (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), 46 | (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), 47 | (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] 48 | 49 | 50 | # =========================================================================== # 51 | # OpenCV drawing. 52 | # =========================================================================== # 53 | def draw_lines(img, lines, color=[255, 0, 0], thickness=2): 54 | """Draw a collection of lines on an image. 55 | """ 56 | for line in lines: 57 | for x1, y1, x2, y2 in line: 58 | cv2.line(img, (x1, y1), (x2, y2), color, thickness) 59 | 60 | 61 | def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2): 62 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) 63 | 64 | 65 | def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2): 66 | p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) 67 | p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) 68 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) 69 | p1 = (p1[0]+15, p1[1]) 70 | cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1) 71 | 72 | 73 | def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2): 74 | shape = img.shape 75 | for i in range(bboxes.shape[0]): 76 | bbox = bboxes[i] 77 | color = colors[classes[i]] 78 | # Draw bounding box... 79 | p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1])) 80 | p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1])) 81 | cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness) 82 | # Draw text... 83 | s = '%s/%.3f' % (classes[i], scores[i]) 84 | p1 = (p1[0]-5, p1[1]) 85 | cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1) 86 | 87 | 88 | # =========================================================================== # 89 | # Matplotlib show... 90 | # =========================================================================== # 91 | def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5, show_class_name=True): 92 | """Visualize bounding boxes. Largely inspired by SSD-MXNET! 93 | """ 94 | fig = plt.figure(figsize=figsize) 95 | plt.imshow(img) 96 | height = img.shape[0] 97 | width = img.shape[1] 98 | colors = dict() 99 | for i in range(classes.shape[0]): 100 | cls_id = int(classes[i]) 101 | if cls_id >= 0: 102 | score = scores[i] 103 | if cls_id not in colors: 104 | colors[cls_id] = (random.random(), random.random(), random.random()) 105 | ymin = int(bboxes[i, 0] * height) 106 | xmin = int(bboxes[i, 1] * width) 107 | ymax = int(bboxes[i, 2] * height) 108 | xmax = int(bboxes[i, 3] * width) 109 | rect = plt.Rectangle((xmin, ymin), xmax - xmin, 110 | ymax - ymin, fill=False, 111 | edgecolor=colors[cls_id], 112 | linewidth=linewidth) 113 | plt.gca().add_patch(rect) 114 | class_name = CLASSES[cls_id-1] if show_class_name else str(cls_id) 115 | plt.gca().text(xmin, ymin - 2, 116 | '{:s} | {:.3f}'.format(class_name, score), 117 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 118 | fontsize=12, color='white') 119 | plt.show() 120 | -------------------------------------------------------------------------------- /ObjectDetections/yolo/test_images/car.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/yolo/test_images/car.jpg -------------------------------------------------------------------------------- /ObjectDetections/yolo/test_images/cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/yolo/test_images/cat.jpg -------------------------------------------------------------------------------- /ObjectDetections/yolo/test_images/person.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/yolo/test_images/person.jpg -------------------------------------------------------------------------------- /ObjectDetections/yolo2/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Yolov2 anchors and coco classes 3 | """ 4 | 5 | """ 6 | anchors = [[0.738768, 0.874946], 7 | [2.42204, 2.65704], 8 | [4.30971, 7.04493], 9 | [10.246, 4.59428], 10 | [12.6868, 11.8741]] 11 | """ 12 | anchors = [[0.57273, 0.677385], 13 | [1.87446, 2.06253], 14 | [3.33843, 5.47434], 15 | [7.88282, 3.52778], 16 | [9.77052, 9.16828]] 17 | 18 | def read_coco_labels(): 19 | f = open("./data/coco_classes.txt") 20 | class_names = [] 21 | for l in f.readlines(): 22 | class_names.append(l[:-1]) 23 | return class_names 24 | 25 | class_names = read_coco_labels() -------------------------------------------------------------------------------- /ObjectDetections/yolo2/data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /ObjectDetections/yolo2/demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Demo for yolov2 3 | """ 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | import cv2 8 | from PIL import Image 9 | 10 | from model import darknet 11 | from detect_ops import decode 12 | from utils import preprocess_image, postprocess, draw_detection 13 | from config import anchors, class_names 14 | 15 | 16 | input_size = (416, 416) 17 | image_file = "./images/car.jpg" 18 | image = cv2.imread(image_file) 19 | image_shape = image.shape[:2] 20 | image_cp = preprocess_image(image, input_size) 21 | """ 22 | image = Image.open(image_file) 23 | image_cp = image.resize(input_size, Image.BICUBIC) 24 | image_cp = np.array(image_cp, dtype=np.float32)/255.0 25 | image_cp = np.expand_dims(image_cp, 0) 26 | #print(image_cp) 27 | """ 28 | 29 | 30 | images = tf.placeholder(tf.float32, [1, input_size[0], input_size[1], 3]) 31 | detection_feat = darknet(images) 32 | feat_sizes = input_size[0] // 32, input_size[1] // 32 33 | detection_results = decode(detection_feat, feat_sizes, len(class_names), anchors) 34 | 35 | checkpoint_path = "./checkpoint_dir/yolo2_coco.ckpt" 36 | saver = tf.train.Saver() 37 | with tf.Session() as sess: 38 | saver.restore(sess, checkpoint_path) 39 | bboxes, obj_probs, class_probs = sess.run(detection_results, feed_dict={images: image_cp}) 40 | 41 | bboxes, scores, class_inds = postprocess(bboxes, obj_probs, class_probs, 42 | image_shape=image_shape) 43 | img_detection = draw_detection(image, bboxes, scores, class_inds, class_names) 44 | cv2.imwrite("detection.jpg", img_detection) 45 | cv2.imshow("detection results", img_detection) 46 | 47 | cv2.waitKey(0) 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /ObjectDetections/yolo2/detect_ops.py: -------------------------------------------------------------------------------- 1 | """ 2 | Detection ops for Yolov2 3 | """ 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | 8 | 9 | def decode(detection_feat, feat_sizes=(13, 13), num_classes=80, 10 | anchors=None): 11 | """decode from the detection feature""" 12 | H, W = feat_sizes 13 | num_anchors = len(anchors) 14 | detetion_results = tf.reshape(detection_feat, [-1, H * W, num_anchors, 15 | num_classes + 5]) 16 | 17 | bbox_xy = tf.nn.sigmoid(detetion_results[:, :, :, 0:2]) 18 | bbox_wh = tf.exp(detetion_results[:, :, :, 2:4]) 19 | obj_probs = tf.nn.sigmoid(detetion_results[:, :, :, 4]) 20 | class_probs = tf.nn.softmax(detetion_results[:, :, :, 5:]) 21 | 22 | anchors = tf.constant(anchors, dtype=tf.float32) 23 | 24 | height_ind = tf.range(H, dtype=tf.float32) 25 | width_ind = tf.range(W, dtype=tf.float32) 26 | x_offset, y_offset = tf.meshgrid(height_ind, width_ind) 27 | x_offset = tf.reshape(x_offset, [1, -1, 1]) 28 | y_offset = tf.reshape(y_offset, [1, -1, 1]) 29 | 30 | # decode 31 | bbox_x = (bbox_xy[:, :, :, 0] + x_offset) / W 32 | bbox_y = (bbox_xy[:, :, :, 1] + y_offset) / H 33 | bbox_w = bbox_wh[:, :, :, 0] * anchors[:, 0] / W * 0.5 34 | bbox_h = bbox_wh[:, :, :, 1] * anchors[:, 1] / H * 0.5 35 | 36 | bboxes = tf.stack([bbox_x - bbox_w, bbox_y - bbox_h, 37 | bbox_x + bbox_w, bbox_y + bbox_h], axis=3) 38 | 39 | return bboxes, obj_probs, class_probs 40 | -------------------------------------------------------------------------------- /ObjectDetections/yolo2/loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | Loss function for YOLOv2 3 | """ 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | def compute_loss(predictions, targets, anchors, scales, num_classes=20, feat_sizes=(13, 13)): 9 | """ 10 | Compute the loss of Yolov2 for training 11 | """ 12 | H, W = feat_sizes 13 | C = num_classes 14 | B = len(anchors) 15 | anchors = tf.constant(anchors, dtype=tf.float32) 16 | anchors = tf.reshape(anchors, [1, 1, B, 2]) 17 | 18 | sprob, sconf, snoob, scoor = scales # the scales for different parts 19 | 20 | _coords = targets["coords"] # ground truth [-1, H*W, B, 4] 21 | _probs = targets["probs"] # class probability [-1, H*W, B, C] one hot 22 | _confs = targets["confs"] # 1 for object, 0 for background, [-1, H*W, B] 23 | 24 | # decode the net output 25 | predictions = tf.reshape(predictions, [-1, H, W, B, (5 + C)]) 26 | coords = predictions[:, :, :, :, 0:4] # t_x, t_y, t_w, t_h 27 | coords = tf.reshape(coords, [-1, H*W, B, 4]) 28 | coords_xy = tf.nn.sigmoid(coords[:, :, :, 0:2]) # (0, 1) relative cell top left 29 | coords_wh = tf.sqrt(tf.exp(coords[:, :, :, 2:4]) * anchors / 30 | np.reshape([W, H], [1, 1, 1, 2])) # sqrt of w, h (0, 1) 31 | coords = tf.concat([coords_xy, coords_wh], axis=3) # [batch_size, H*W, B, 4] 32 | 33 | confs = tf.nn.sigmoid(predictions[:, :, :, :, 4]) # object confidence 34 | confs = tf.reshape(confs, [-1, H*W, B, 1]) 35 | 36 | probs = tf.nn.softmax(predictions[:, :, :, :, 5:]) # class probability 37 | probs = tf.reshape(probs, [-1, H*W, B, C]) 38 | 39 | preds = tf.concat([coords, confs, probs], axis=3) # [-1, H*W, B, (4+1+C)] 40 | 41 | # match ground truths with anchors (predictions in fact) 42 | # assign ground truths to the predictions with the best IOU (select 1 among 5 anchors) 43 | wh = tf.pow(coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) 44 | areas = wh[:, :, :, 0] * wh[:, :, :, 1] 45 | centers = coords[:, :, :, 0:2] 46 | up_left, down_right = centers - (wh * 0.5), centers + (wh * 0.5) 47 | 48 | # the ground truth 49 | _wh = tf.pow(_coords[:, :, :, 2:4], 2) * np.reshape([W, H], [1, 1, 1, 2]) 50 | _areas = _wh[:, :, :, 0] * _wh[:, :, :, 1] 51 | _centers = _coords[:, :, :, 0:2] 52 | _up_left, _down_right = _centers - (_wh * 0.5), _centers + (_wh * 0.5) 53 | 54 | # compute IOU 55 | inter_upleft = tf.maximum(up_left, _up_left) 56 | inter_downright = tf.minimum(down_right, _down_right) 57 | inter_wh = tf.maximum(inter_downright - inter_upleft, 0.0) 58 | intersects = inter_wh[:, :, :, 0] * inter_wh[:, :, :, 1] 59 | ious = tf.truediv(intersects, areas + _areas - intersects) 60 | 61 | best_iou_mask = tf.equal(ious, tf.reduce_max(ious, axis=2, keep_dims=True)) 62 | best_iou_mask = tf.cast(best_iou_mask, tf.float32) 63 | mask = best_iou_mask * _confs # [-1, H*W, B] 64 | mask = tf.expand_dims(mask, -1) # [-1, H*W, B, 1] 65 | 66 | # compute weight terms 67 | confs_w = snoob * (1 - mask) + sconf * mask 68 | coords_w = scoor * mask 69 | probs_w = sprob * mask 70 | weights = tf.concat([coords_w, confs_w, probs_w], axis=3) 71 | 72 | truths = tf.concat([_coords, tf.expand_dims(_confs, -1), _probs], 3) 73 | 74 | loss = tf.pow(preds - truths, 2) * weights 75 | loss = tf.reduce_sum(loss, axis=[1, 2, 3]) 76 | loss = 0.5 * tf.reduce_mean(loss) 77 | return loss 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /ObjectDetections/yolo2/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/ObjectDetections/yolo2/model.png -------------------------------------------------------------------------------- /ObjectDetections/yolo2/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | YOLOv2 implemented by Tensorflow, only for predicting 3 | """ 4 | import os 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | 10 | 11 | ######## basic layers ####### 12 | 13 | def leaky_relu(x): 14 | return tf.nn.leaky_relu(x, alpha=0.1, name="leaky_relu") 15 | 16 | # Conv2d 17 | def conv2d(x, filters, size, pad=0, stride=1, batch_normalize=1, 18 | activation=leaky_relu, use_bias=False, name="conv2d"): 19 | if pad > 0: 20 | x = tf.pad(x, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) 21 | out = tf.layers.conv2d(x, filters, size, strides=stride, padding="VALID", 22 | activation=None, use_bias=use_bias, name=name) 23 | if batch_normalize == 1: 24 | out = tf.layers.batch_normalization(out, axis=-1, momentum=0.9, 25 | training=False, name=name+"_bn") 26 | if activation: 27 | out = activation(out) 28 | return out 29 | 30 | # maxpool2d 31 | def maxpool(x, size=2, stride=2, name="maxpool"): 32 | return tf.layers.max_pooling2d(x, size, stride) 33 | 34 | # reorg layer 35 | def reorg(x, stride): 36 | return tf.extract_image_patches(x, [1, stride, stride, 1], 37 | [1, stride, stride, 1], [1,1,1,1], padding="VALID") 38 | 39 | 40 | def darknet(images, n_last_channels=425): 41 | """Darknet19 for YOLOv2""" 42 | net = conv2d(images, 32, 3, 1, name="conv1") 43 | net = maxpool(net, name="pool1") 44 | net = conv2d(net, 64, 3, 1, name="conv2") 45 | net = maxpool(net, name="pool2") 46 | net = conv2d(net, 128, 3, 1, name="conv3_1") 47 | net = conv2d(net, 64, 1, name="conv3_2") 48 | net = conv2d(net, 128, 3, 1, name="conv3_3") 49 | net = maxpool(net, name="pool3") 50 | net = conv2d(net, 256, 3, 1, name="conv4_1") 51 | net = conv2d(net, 128, 1, name="conv4_2") 52 | net = conv2d(net, 256, 3, 1, name="conv4_3") 53 | net = maxpool(net, name="pool4") 54 | net = conv2d(net, 512, 3, 1, name="conv5_1") 55 | net = conv2d(net, 256, 1, name="conv5_2") 56 | net = conv2d(net, 512, 3, 1, name="conv5_3") 57 | net = conv2d(net, 256, 1, name="conv5_4") 58 | net = conv2d(net, 512, 3, 1, name="conv5_5") 59 | shortcut = net 60 | net = maxpool(net, name="pool5") 61 | net = conv2d(net, 1024, 3, 1, name="conv6_1") 62 | net = conv2d(net, 512, 1, name="conv6_2") 63 | net = conv2d(net, 1024, 3, 1, name="conv6_3") 64 | net = conv2d(net, 512, 1, name="conv6_4") 65 | net = conv2d(net, 1024, 3, 1, name="conv6_5") 66 | # --------- 67 | net = conv2d(net, 1024, 3, 1, name="conv7_1") 68 | net = conv2d(net, 1024, 3, 1, name="conv7_2") 69 | # shortcut 70 | shortcut = conv2d(shortcut, 64, 1, name="conv_shortcut") 71 | shortcut = reorg(shortcut, 2) 72 | net = tf.concat([shortcut, net], axis=-1) 73 | net = conv2d(net, 1024, 3, 1, name="conv8") 74 | # detection layer 75 | net = conv2d(net, n_last_channels, 1, batch_normalize=0, 76 | activation=None, use_bias=True, name="conv_dec") 77 | return net 78 | 79 | 80 | 81 | if __name__ == "__main__": 82 | x = tf.random_normal([1, 416, 416, 3]) 83 | model = darknet(x) 84 | 85 | saver = tf.train.Saver() 86 | with tf.Session() as sess: 87 | saver.restore(sess, "./checkpoint_dir/yolo2_coco.ckpt") 88 | print(sess.run(model).shape) 89 | 90 | -------------------------------------------------------------------------------- /ObjectDetections/yolo2/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Help functions for YOLOv2 3 | """ 4 | import random 5 | import colorsys 6 | 7 | import cv2 8 | import numpy as np 9 | 10 | 11 | 12 | ############## preprocess image ################## 13 | 14 | 15 | def preprocess_image(image, image_size=(416, 416)): 16 | """Preprocess a image to inference""" 17 | image_cp = np.copy(image).astype(np.float32) 18 | # resize the image 19 | image_rgb = cv2.cvtColor(image_cp, cv2.COLOR_BGR2RGB) 20 | image_resized = cv2.resize(image_rgb, image_size) 21 | # normalize 22 | image_normalized = image_resized.astype(np.float32) / 255.0 23 | # expand the batch_size dim 24 | image_expanded = np.expand_dims(image_normalized, axis=0) 25 | return image_expanded 26 | 27 | def postprocess(bboxes, obj_probs, class_probs, image_shape=(416, 416), 28 | threshold=0.5): 29 | """post process the detection results""" 30 | bboxes = np.reshape(bboxes, [-1, 4]) 31 | bboxes[:, 0::2] *= float(image_shape[1]) 32 | bboxes[:, 1::2] *= float(image_shape[0]) 33 | bboxes = bboxes.astype(np.int32) 34 | 35 | # clip the bboxs 36 | bbox_ref = [0, 0, image_shape[1] - 1, image_shape[0] - 1] 37 | bboxes = bboxes_clip(bbox_ref, bboxes) 38 | 39 | obj_probs = np.reshape(obj_probs, [-1]) 40 | class_probs = np.reshape(class_probs, [len(obj_probs), -1]) 41 | class_inds = np.argmax(class_probs, axis=1) 42 | class_probs = class_probs[np.arange(len(obj_probs)), class_inds] 43 | scores = obj_probs * class_probs 44 | 45 | # filter bboxes with scores > threshold 46 | keep_inds = scores > threshold 47 | bboxes = bboxes[keep_inds] 48 | scores = scores[keep_inds] 49 | class_inds = class_inds[keep_inds] 50 | 51 | # sort top K 52 | class_inds, scores, bboxes = bboxes_sort(class_inds, scores, bboxes) 53 | # nms 54 | class_inds, scores, bboxes = bboxes_nms(class_inds, scores, bboxes) 55 | 56 | return bboxes, scores, class_inds 57 | 58 | def draw_detection(im, bboxes, scores, cls_inds, labels, thr=0.3): 59 | # for display 60 | ############################ 61 | # Generate colors for drawing bounding boxes. 62 | hsv_tuples = [(x / float(len(labels)), 1., 1.) 63 | for x in range(len(labels))] 64 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) 65 | colors = list( 66 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), 67 | colors)) 68 | random.seed(10101) # Fixed seed for consistent colors across runs. 69 | random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. 70 | random.seed(None) # Reset seed to default. 71 | # draw image 72 | imgcv = np.copy(im) 73 | h, w, _ = imgcv.shape 74 | for i, box in enumerate(bboxes): 75 | if scores[i] < thr: 76 | continue 77 | cls_indx = cls_inds[i] 78 | 79 | thick = int((h + w) / 300) 80 | cv2.rectangle(imgcv, 81 | (box[0], box[1]), (box[2], box[3]), 82 | colors[cls_indx], thick) 83 | mess = '%s: %.3f' % (labels[cls_indx], scores[i]) 84 | if box[1] < 20: 85 | text_loc = (box[0] + 2, box[1] + 15) 86 | else: 87 | text_loc = (box[0], box[1] - 10) 88 | cv2.putText(imgcv, mess, text_loc, 89 | cv2.FONT_HERSHEY_SIMPLEX, 1e-3 * h, colors[cls_indx], thick // 3) 90 | 91 | return imgcv 92 | 93 | 94 | ############## process bboxes ################## 95 | def bboxes_clip(bbox_ref, bboxes): 96 | """Clip bounding boxes with respect to reference bbox. 97 | """ 98 | bboxes = np.copy(bboxes) 99 | bboxes = np.transpose(bboxes) 100 | bbox_ref = np.transpose(bbox_ref) 101 | bboxes[0] = np.maximum(bboxes[0], bbox_ref[0]) 102 | bboxes[1] = np.maximum(bboxes[1], bbox_ref[1]) 103 | bboxes[2] = np.minimum(bboxes[2], bbox_ref[2]) 104 | bboxes[3] = np.minimum(bboxes[3], bbox_ref[3]) 105 | bboxes = np.transpose(bboxes) 106 | return bboxes 107 | 108 | def bboxes_sort(classes, scores, bboxes, top_k=400): 109 | """Sort bounding boxes by decreasing order and keep only the top_k 110 | """ 111 | # if priority_inside: 112 | # inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \ 113 | # (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin) 114 | # idxes = np.argsort(-scores) 115 | # inside = inside[idxes] 116 | # idxes = np.concatenate([idxes[inside], idxes[~inside]]) 117 | idxes = np.argsort(-scores) 118 | classes = classes[idxes][:top_k] 119 | scores = scores[idxes][:top_k] 120 | bboxes = bboxes[idxes][:top_k] 121 | return classes, scores, bboxes 122 | 123 | def bboxes_iou(bboxes1, bboxes2): 124 | """Computing iou between bboxes1 and bboxes2. 125 | Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable. 126 | """ 127 | bboxes1 = np.transpose(bboxes1) 128 | bboxes2 = np.transpose(bboxes2) 129 | # Intersection bbox and volume. 130 | int_ymin = np.maximum(bboxes1[0], bboxes2[0]) 131 | int_xmin = np.maximum(bboxes1[1], bboxes2[1]) 132 | int_ymax = np.minimum(bboxes1[2], bboxes2[2]) 133 | int_xmax = np.minimum(bboxes1[3], bboxes2[3]) 134 | 135 | int_h = np.maximum(int_ymax - int_ymin, 0.) 136 | int_w = np.maximum(int_xmax - int_xmin, 0.) 137 | int_vol = int_h * int_w 138 | # Union volume. 139 | vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1]) 140 | vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1]) 141 | iou = int_vol / (vol1 + vol2 - int_vol) 142 | return iou 143 | 144 | def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5): 145 | """Apply non-maximum selection to bounding boxes. 146 | """ 147 | keep_bboxes = np.ones(scores.shape, dtype=np.bool) 148 | for i in range(scores.size-1): 149 | if keep_bboxes[i]: 150 | # Computer overlap with bboxes which are following. 151 | overlap = bboxes_iou(bboxes[i], bboxes[(i+1):]) 152 | # Overlap threshold for keeping + checking part of the same class 153 | keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i]) 154 | keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap) 155 | 156 | idxes = np.where(keep_bboxes) 157 | return classes[idxes], scores[idxes], bboxes[idxes] 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Learning Tutorials with Tensorflow 2 | The deeplearning algorithms are carefully implemented by [tensorflow](https://www.tensorflow.org/). 3 | ### Environment 4 | - Python 3.5 5 | - tensorflow 1.4 6 | - pytorch 0.2.0 7 | 8 | ### The deeplearning algorithms includes (now): 9 | - Logistic Regression [logisticRegression.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/logisticRegression.py) 10 | - Multi-Layer Perceptron (MLP) [mlp.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/mlp.py) 11 | - Convolution Neural Network (CNN) [cnn.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/cnn.py) 12 | - Denoising Aotoencoder (DA) [da.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/da.py) 13 | - Stacked Denoising Autoencoder (SDA) [sda.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/sda.py) 14 | - Restricted Boltzmann Machine (RBM) [[rbm.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/rbm.py) [gbrbm.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/gbrbm.py)] 15 | - Deep Belief Network (DBN) [dbn.py](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/models/dbn.py) 16 | 17 | Note: the project aims at imitating the well-implemented algorithms in [Deep Learning Tutorials](http://www.deeplearning.net/tutorial/) (coded by [Theano](http://deeplearning.net/software/theano/index.html)). 18 | 19 | ### CNN Models 20 | - MobileNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/MobileNet.py) [paper](https://arxiv.org/abs/1704.04861) [ref](https://github.com/Zehaos/MobileNet/blob/master/nets/mobilenet.py)] 21 | - MobileNetv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/mobilenet_v2.py) [paper](https://arxiv.org/pdf/1801.04381.pdf) [ref](https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet)] 22 | - SqueezeNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/SqueezeNet.py) [paper](https://arxiv.org/abs/1602.07360)] 23 | - ResNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/ResNet50.py) [caffe ref](https://github.com/KaimingHe/deep-residual-networks) [paper1](https://arxiv.org/abs/1512.03385) [paper2](https://arxiv.org/abs/1603.05027)] 24 | - ShuffleNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/ShuffleNet.py) by pytorch [paper](http://cn.arxiv.org/pdf/1707.01083v2)] 25 | - ShuffleNetv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/shufflenet_v2.py) [ref](https://github.com/tensorpack/tensorpack/blob/master/examples/ImageNetModels/shufflenet.py) [paper](https://arxiv.org/abs/1807.11164)] 26 | - DenseNet [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/CNNs/densenet.py) [pytorch_ref](https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py) [paper](https://arxiv.org/abs/1608.06993)] 27 | 28 | ### Object detection 29 | - YOLOv1 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/ObjectDetections/yolo/yolo_tf.py) [paper](https://arxiv.org/abs/1506.02640) [ref](https://github.com/gliese581gg/YOLO_tensorflow)] 30 | - SSD [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/ObjectDetections/SSD/SSD_demo.py) [paper](https://arxiv.org/pdf/1611.10012.pdf) [slides](http://www.cs.unc.edu/~wliu/papers/ssd_eccv2016_slide.pdf) [cafe](https://github.com/weiliu89/caffe/tree/ssd) [TF](https://arxiv.org/abs/1512.02325) [pytorch](https://github.com/amdegroot/ssd.pytorch) ] 31 | - YOLOv2 [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/ObjectDetections/yolo2) [paper](https://arxiv.org/abs/1612.08242) [ref](https://github.com/yhcc/yolo2)] 32 | 33 | ### Practical examples 34 | You can find more practical examples with tensorflow here: 35 | - CNN for setence classification [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/examples/cnn_setence_classification)] [[blog](http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/)] [[paper](https://arxiv.org/pdf/1408.5882v2.pdf)] 36 | - RNN for language model [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/examples/rnn_language_model)] [[blog](http://www.wildml.com/2015/09/recurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano/)] [[blog_cn](http://blog.csdn.net/xiaohu2022/article/details/54578013)] 37 | - LSTM for language model (PTB data) [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/examples/lstm_model_ptb)] [[tutorial](https://www.tensorflow.org/versions/r0.12/tutorials/recurrent/index.html#recurrent-neural-networks)] [[paper](https://arxiv.org/pdf/1409.2329.pdf)] 38 | - VGG model for image classification (object recongnition) [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/examples/VGG)] [[source](https://github.com/machrisaa/tensorflow-vgg)] 39 | - Residual network for cifar10_dataset [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/examples/Resnet)] [[source](https://github.com/wenxinxu/resnet-in-tensorflow)] [[paper](https://arxiv.org/pdf/1603.05027v3.pdf)] 40 | - LSTM for time series prediction [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/examples/lstm_time_series_regression)] [[source](https://github.com/MorvanZhou/tutorials/blob/master/tensorflowTUT/tf20_RNN2.2/full_code.py)] 41 | - Generative adversarial network (GAN) [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/examples/gan)] 42 | - Variational autoencoder (VAE) [[self](https://github.com/xiaohu2015/DeepLearning_tutorials/tree/master/examples/VAE)] 43 | 44 | ### Results 45 | ![1](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/results/filters_corruption_30.png) 46 | ![2](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/results/new_filters_at_epoch_14.png) 47 | ![3](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/results/new_original_and_10samples.png) 48 | ![4](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/results/DBN_results.png) 49 | ![5](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/examples/lstm_time_series_regression/lstm_regression_results.png) 50 | 51 | ### Fun Blogs 52 | - [Chatbots with Seq2Seq](http://suriyadeepan.github.io/2016-06-28-easy-seq2seq/) 53 | 54 | ### Personal Notes 55 | - Tensorflow for RNNs [[tf_rnn.ipynb](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/notes/tf_rnn.ipynb)] 56 | - Tensorflow for Autoencoder [[tf_autoencoder.ipynb](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/notes/tf_autoencoder.ipynb)] 57 | 58 | ### Other Tutorials 59 | - [ageron/handson-ml 60 | ](https://github.com/ageron/handson-ml/) 61 | - [Hvass-Labs/TensorFlow-Tutorials 62 | ](https://github.com/Hvass-Labs/TensorFlow-Tutorials) 63 | - [BinRoot/TensorFlow-Book 64 | ](https://github.com/BinRoot/TensorFlow-Book) 65 | - [sjchoi86/dl_tutorials_10weeks 66 | ](https://github.com/sjchoi86/dl_tutorials_10weeks) 67 | 68 | #### Don't hesitate to star this project if it is helpful! 69 | ### If you benefit from the tutorial, please make a small donation by WeChat sweep. 70 | ![weichat](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/results/weichat.jpg) 71 | ## 微信号:xiaoxiaohu1994 72 | ## 欢迎关注微信公众号:机器学习算法全栈工程师(Jeemy110) 73 | ![公众号](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/results/654362565405877642.jpg) 74 | -------------------------------------------------------------------------------- /data/text.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/VAE/README.md: -------------------------------------------------------------------------------- 1 | # Variational Autoencoders (VAEs) 2 | - [Variational Autoencoder in TensorFlow](https://jmetzen.github.io/2015-11-27/vae.html) 3 | - [Understanding Variational Autoencoders (VAEs) from two perspectives: deep learning and graphical models.](https://jaan.io/what-is-variational-autoencoder-vae-tutorial/) 4 | - [Introduction to variational autoencoders](https://home.zhaw.ch/~dueo/bbs/files/vae.pdf) 5 | 6 | ## Results 7 | Reconstruction after 20 epoch with 20-D latent space: 8 | ![img_epoch20](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/examples/VAE/img_epoch20.jpg) 9 | Random sampling after 80 epoch with 2-D latent space: 10 | ![rand_sanpling80](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/examples/VAE/rand_img_epoch_80.jpg) 11 | -------------------------------------------------------------------------------- /examples/VAE/img_epoch20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/examples/VAE/img_epoch20.jpg -------------------------------------------------------------------------------- /examples/VAE/rand_img_epoch_80.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/examples/VAE/rand_img_epoch_80.jpg -------------------------------------------------------------------------------- /examples/VAE/vae_mnist.py: -------------------------------------------------------------------------------- 1 | """ 2 | Variational Autoencoder for MNIST data 3 | reference: https://jmetzen.github.io/2015-11-27/vae.html 4 | 2017/01/17 5 | """ 6 | import sys 7 | import numpy as np 8 | import tensorflow as tf 9 | import matplotlib.pyplot as plt 10 | 11 | from input_data import read_data_sets 12 | 13 | # Random seeds for reproduce 14 | np.random.seed(2017) 15 | tf.set_random_seed(2017) 16 | 17 | class VAE(object): 18 | """A simple class of variational autoencoder""" 19 | def __init__(self, input_dim=784, z_dim=50, batch_size=100, encoder_hidden_size=[500, 500], 20 | decoder_hidden_size=[500, 500], act_fn=tf.nn.softplus): 21 | """ 22 | :param input_dim: int, the dimension of input 23 | :param z_dim: int, the dimension of latent space 24 | :param batch_size: int, batch size 25 | :param encoder_hidden_size: list or tuple, the number of hidden units in encoder 26 | :param decoder_hidden_size: list or tuple, the number of hidden units in decoder 27 | :param act_fn: the activation function 28 | """ 29 | self.input_dim = input_dim 30 | self.z_dim = z_dim 31 | self.batch_size = batch_size 32 | self.encoder_hidden_size = encoder_hidden_size 33 | self.decoder_hidden_size = decoder_hidden_size 34 | self.act_fn = act_fn 35 | 36 | self._bulid_model() 37 | 38 | def _bulid_model(self): 39 | """The inner function to build the model""" 40 | # Input placeholder 41 | self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.input_dim]) 42 | # The encoder: determine the mean and (log) variance of Gaussian distribution 43 | self.z_mean, self.z_log_sigma_sq = self._encoder(self.x) 44 | # Sampling from Gaussian distribution 45 | eps = tf.random_normal([self.batch_size, self.z_dim], mean=0.0, stddev=1.0) 46 | # z = mean + sigma*epsilon 47 | self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps)) 48 | 49 | # Decoder: determine the mean of Bernoulli distribution of reconstructed input 50 | self.x_reconstr_mean = self._decoder(self.z) 51 | 52 | # Compute the loss 53 | with tf.name_scope("loss"): 54 | # The reconstruction loss: cross entropy 55 | reconstr_loss = -tf.reduce_sum(self.x * tf.log(1e-10 + self.x_reconstr_mean) + \ 56 | (1.0 - self.x) * tf.log(1e-10 + 1.0 - self.x_reconstr_mean), axis=1) 57 | # The latent loss: KL divergence 58 | latent_loss = -0.5 * tf.reduce_sum(1.0 + self.z_log_sigma_sq - tf.square(self.z_mean) - \ 59 | tf.exp(self.z_log_sigma_sq), axis=1) 60 | # Average over the batch 61 | self.cost = tf.reduce_mean(reconstr_loss + latent_loss) 62 | 63 | # The optimizer 64 | self.lr = tf.Variable(0.001, trainable=False) 65 | vars = tf.trainable_variables() 66 | self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.cost, var_list=vars) 67 | 68 | def _encoder(self, x, name="encoder"): 69 | """Encoder""" 70 | with tf.variable_scope(name): 71 | n_in = self.input_dim 72 | for i, s in enumerate(self.encoder_hidden_size): 73 | w, b = self._get_vars(n_in, s, name="h{0}".format(i)) 74 | if i == 0: 75 | h = self.act_fn(tf.nn.xw_plus_b(x, w, b)) 76 | else: 77 | h = self.act_fn(tf.nn.xw_plus_b(h, w, b)) 78 | n_in = s 79 | w, b = self._get_vars(n_in, self.z_dim, name="out_mean") 80 | z_mean = tf.nn.xw_plus_b(h, w, b) 81 | w, b = self._get_vars(n_in, self.z_dim, name="out_log_sigma") 82 | z_log_sigma_sq = tf.nn.xw_plus_b(h, w, b) 83 | return z_mean, z_log_sigma_sq 84 | 85 | def _decoder(self, z, name="decoder"): 86 | """Decoder""" 87 | with tf.variable_scope(name): 88 | n_in = self.z_dim 89 | for i, s in enumerate(self.decoder_hidden_size): 90 | w, b = self._get_vars(n_in, s, name="h{0}".format(i)) 91 | if i == 0: 92 | h = self.act_fn(tf.nn.xw_plus_b(z, w, b)) 93 | else: 94 | h = self.act_fn(tf.nn.xw_plus_b(h, w, b)) 95 | n_in = s 96 | # Use sigmoid for Bernoulli distribution 97 | w, b = self._get_vars(n_in, self.input_dim, name="out_mean") 98 | x_reconstr_mean = tf.nn.sigmoid(tf.nn.xw_plus_b(h, w, b)) 99 | return x_reconstr_mean 100 | 101 | def _get_vars(self, n_in, n_out, name=""): 102 | """ 103 | Create weight and bias variables 104 | """ 105 | with tf.variable_scope(name): 106 | w = tf.get_variable("w", [n_in, n_out], initializer=tf.contrib.layers.xavier_initializer()) 107 | b = tf.get_variable("b", [n_out,], initializer=tf.constant_initializer(0.1)) 108 | return w, b 109 | 110 | if __name__ == "__main__": 111 | n_epochs = 30 112 | lr = 0.001 113 | batch_size = 100 114 | display_every = 1 115 | 116 | path = sys.path[0] 117 | mnist = read_data_sets("MNIST_data/", one_hot=True) 118 | with tf.Session() as sess: 119 | vae = VAE(input_dim=784, z_dim=2, batch_size=batch_size, encoder_hidden_size=[500, 500], 120 | decoder_hidden_size=[500, 500], act_fn=tf.nn.softplus) 121 | sess.run(tf.global_variables_initializer()) 122 | saver = tf.train.Saver() 123 | #saver.restore(sess, save_path=path+"/model/model.ckpt") 124 | # Start training 125 | print("Start training...") 126 | total_batch = int(mnist.train.num_examples/batch_size) 127 | for epoch in range(n_epochs): 128 | avg_cost = 0.0 129 | # For each batch 130 | for i in range(total_batch): 131 | batch_xs, _ = mnist.train.next_batch(batch_size) 132 | c, _ = sess.run([vae.cost, vae.train_op], feed_dict={vae.x: batch_xs}) 133 | avg_cost += c/total_batch 134 | if epoch % display_every == 0: 135 | save_path = saver.save(sess, path+"/model/model.ckpt") 136 | #print("\tModel saved in file: {0}".format(save_path)) 137 | print("\tEpoch {0}, cost {1}".format(epoch, avg_cost)) 138 | 139 | # Sampling 140 | x_sample, _ = mnist.test.next_batch(batch_size) 141 | x_reconstr = sess.run(vae.x_reconstr_mean, feed_dict={vae.x: x_sample}) 142 | plt.figure(figsize=(8, 12)) 143 | for i in range(5): 144 | plt.subplot(5, 2, 2*i + 1) 145 | plt.imshow(np.reshape(x_sample[i],(28, 28)), vmin=0, vmax=1, cmap="gray") 146 | plt.title("Test input") 147 | plt.colorbar() 148 | plt.subplot(5, 2, 2*i + 2) 149 | plt.imshow(np.reshape(x_reconstr[i], [28, 28]), vmin=0, vmax=1, cmap="gray") 150 | plt.title("Reconstruction") 151 | plt.colorbar() 152 | plt.tight_layout() 153 | plt.savefig(path+"/results/img_epoch{0}.jpg".format(n_epochs)) 154 | plt.show() 155 | 156 | # Random sampling 157 | nx, ny = 20, 20 158 | xs = np.linspace(-3, 3, nx) 159 | ys = np.linspace(-3, 3, ny) 160 | xs, ys = np.meshgrid(xs, ys) 161 | xs = np.reshape(xs, [-1, 1]) 162 | ys = np.reshape(ys, [-1, 1]) 163 | zs = np.concatenate((xs, ys), axis=1) 164 | 165 | canvas = np.zeros((28*ny, 28*nx)) 166 | xs_recon = np.zeros((batch_size*4, 28*28)) 167 | for i in range(4): 168 | z_mu = zs[batch_size*i:batch_size*(i+1), :] 169 | x_mean = sess.run(vae.x_reconstr_mean, feed_dict={vae.z: z_mu}) 170 | xs_recon[i*batch_size:(i+1)*batch_size] = x_mean 171 | 172 | n = 0 173 | for i in range(nx): 174 | for j in range(ny): 175 | canvas[(ny-i-1)*28:(ny-i)*28, j*28:(j+1)*28] = xs_recon[n].reshape(28, 28) 176 | n = n + 1 177 | 178 | plt.figure(figsize=(8, 10)) 179 | plt.imshow(canvas, origin="upper", vmin=0, vmax=1, interpolation='none', cmap='gray') 180 | plt.tight_layout() 181 | plt.savefig(path+"/results/rand_img_epoch{0}.jpg".format(n_epochs)) 182 | plt.show() 183 | -------------------------------------------------------------------------------- /examples/VGG/puzzle.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/examples/VGG/puzzle.jpeg -------------------------------------------------------------------------------- /examples/VGG/tiger.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/examples/VGG/tiger.jpeg -------------------------------------------------------------------------------- /examples/cnn_setence_classification/data/test.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /examples/cnn_setence_classification/data_helpers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import re 3 | import itertools 4 | from collections import Counter 5 | 6 | 7 | def clean_str(string): 8 | """ 9 | Tokenization/string cleaning for all datasets except for SST. 10 | Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py 11 | """ 12 | string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string) 13 | string = re.sub(r"\'s", " \'s", string) 14 | string = re.sub(r"\'ve", " \'ve", string) 15 | string = re.sub(r"n\'t", " n\'t", string) 16 | string = re.sub(r"\'re", " \'re", string) 17 | string = re.sub(r"\'d", " \'d", string) 18 | string = re.sub(r"\'ll", " \'ll", string) 19 | string = re.sub(r",", " , ", string) 20 | string = re.sub(r"!", " ! ", string) 21 | string = re.sub(r"\(", " \( ", string) 22 | string = re.sub(r"\)", " \) ", string) 23 | string = re.sub(r"\?", " \? ", string) 24 | string = re.sub(r"\s{2,}", " ", string) 25 | return string.strip().lower() 26 | 27 | 28 | def load_data_and_labels(positive_data_file, negative_data_file): 29 | """ 30 | Loads MR polarity data from files, splits the data into words and generates labels. 31 | Returns split sentences and labels. 32 | """ 33 | # Load data from files 34 | positive_examples = list(open(positive_data_file, "r", encoding="utf-8").readlines()) 35 | positive_examples = [s.strip() for s in positive_examples] 36 | negative_examples = list(open(negative_data_file, "r", encoding="utf-8").readlines()) 37 | negative_examples = [s.strip() for s in negative_examples] 38 | # Split by words 39 | x_text = positive_examples + negative_examples 40 | x_text = [clean_str(sent) for sent in x_text] 41 | # Generate labels 42 | positive_labels = [[0, 1] for _ in positive_examples] 43 | negative_labels = [[1, 0] for _ in negative_examples] 44 | y = np.concatenate([positive_labels, negative_labels], 0) 45 | return [x_text, y] 46 | 47 | 48 | def batch_iter(data, batch_size, num_epochs, shuffle=True): 49 | """ 50 | Generates a batch iterator for a dataset. 51 | """ 52 | data = np.array(data) 53 | data_size = len(data) 54 | num_batches_per_epoch = int((len(data)-1)/batch_size) + 1 55 | for epoch in range(num_epochs): 56 | # Shuffle the data at each epoch 57 | if shuffle: 58 | shuffle_indices = np.random.permutation(np.arange(data_size)) 59 | shuffled_data = data[shuffle_indices] 60 | else: 61 | shuffled_data = data 62 | for batch_num in range(num_batches_per_epoch): 63 | start_index = batch_num * batch_size 64 | end_index = min((batch_num + 1) * batch_size, data_size) 65 | yield shuffled_data[start_index:end_index] 66 | -------------------------------------------------------------------------------- /examples/cnn_setence_classification/text_cnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | A CNN model for sentence classification 3 | source: 'https://github.com/dennybritz/cnn-text-classification-tf/blob/master/text_cnn.py' 4 | 2016/12/21 5 | """ 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | class TextCNN(object): 10 | """ 11 | A CNN class for sentence classification 12 | The model includes an embedding layer, a convolutional layer, a max-pooling layer and 13 | a softmax layer as the output. 14 | """ 15 | def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filters, 16 | num_classes=2, l2_reg_lambda=0.0): 17 | """ 18 | :param seq_len: int, the sequence length (i.e. the length of the sentences, 19 | keep all length same by zero-padding) 20 | :param vocab_size: int, the size of vocabulary to define the embedding layer 21 | :param embedding_size: int, the dimensionality of the embeddings (word vector). 22 | :param filter_sizes: list or tuple, The number of words we want our convolutional filters to cover. 23 | For example, [3, 4, 5] means that we will have filters that slide over 3, 4 24 | and 5 words respectively 25 | :param num_filters: int, the number of each filter with different filter_size, hence, we have a total of 26 | len(filter_sizes) * num_filters filters 27 | :param num_classes: the number of classes we want to predict in the output layer, default 2 28 | :param l2_reg_lambda: float, the ratio of L2 loss 29 | """ 30 | # keep track of all parameters 31 | self.seq_len = seq_len 32 | self.vocab_size = vocab_size 33 | self.embedding_szie = embedding_size 34 | self.filter_sizes = filter_sizes 35 | self.num_filters = num_filters 36 | self.num_classes = num_classes 37 | self.l2_reg_lambda = l2_reg_lambda 38 | # Define the input and output 39 | self.x = tf.placeholder(tf.int32, shape=[None, seq_len], name="x") 40 | self.y = tf.placeholder(tf.float32, shape=[None, num_classes], name="y") 41 | # The dropout probability 42 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") 43 | # Compute the L2 regularization loss 44 | L2_loss = tf.constant(0.0) # initial value 0.0 45 | 46 | # The Embedding layer 47 | with tf.device("/cpu:0"): # embedding implementation not support GPU 48 | with tf.name_scope("embedding"): 49 | # The embedding matrix 50 | self.W_embedding = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), 51 | dtype=tf.float32, name="W_embedding") 52 | # The embedding results 53 | self.embedded_chars = tf.nn.embedding_lookup(self.W_embedding, self.x) #[None, seq_len, embedding_size] 54 | # Expand it to use conv2D operation 55 | self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, axis=-1) # [None, seq_len, embedding_size, 1] 56 | 57 | # The convolution and maxpool layer 58 | pooled_outputs = [] 59 | self.Ws_conv = [] 60 | self.bs_conv = [] 61 | # For each filter 62 | for i, filter_size in enumerate(filter_sizes): 63 | with tf.name_scope("conv_maxpool_{0}".format(filter_size)): 64 | # Convolution layer 65 | filter_shape = [filter_size, embedding_size, 1, num_filters] 66 | # Conv params 67 | W_conv = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), 68 | dtype=tf.float32, name="W_conv") 69 | self.Ws_conv.append(W_conv) 70 | b_conv = tf.Variable(tf.constant(0.1, shape=[num_filters,]), dtype=tf.float32, 71 | name="b_conv") 72 | self.bs_conv.append(b_conv) 73 | # conv result 74 | conv_output = tf.nn.conv2d(self.embedded_chars_expanded, W_conv, strides=[1, 1, 1, 1], 75 | padding="VALID", name="conv") # [None, seq_len-filter_size+1, 1, num_filters] 76 | # use relu as activation 77 | conv_h = tf.nn.relu(tf.nn.bias_add(conv_output, b_conv), name="relu") 78 | # Use max-pooling 79 | pool_output = tf.nn.max_pool(conv_h, ksize=[1, seq_len-filter_size+1, 1, 1], 80 | strides=[1, 1, 1, 1], padding="VALID", name="max_pooling") 81 | pooled_outputs.append(pool_output) # [None, 1, 1, num_filters] 82 | # Combine all pooled features 83 | num_filters_total = num_filters * len(filter_sizes) 84 | self.h_pool = tf.concat( pooled_outputs,3) # [None, 1, 1, num_filters_total] 85 | self.h_pool_flat = tf.reshape(self.h_pool, shape=[-1, num_filters_total]) # [None, num_filters_total] 86 | 87 | # The dropout layer 88 | with tf.name_scope("dropout"): 89 | self.h_dropout = tf.nn.dropout(self.h_pool_flat, keep_prob=self.dropout_keep_prob, name="dropout") 90 | 91 | # The output layer (softmax) 92 | with tf.name_scope("output"): 93 | self.W_fullyconn = tf.get_variable("W_fullyconn", shape=[num_filters_total, num_classes], 94 | initializer=tf.contrib.layers.xavier_initializer()) 95 | self.b_fullyconn = tf.Variable(tf.constant(0.1, shape=[num_classes,]), dtype=tf.float32, name="b_fullyconn") 96 | # L2_loss 97 | L2_loss += tf.nn.l2_loss(self.W_fullyconn) 98 | self.scores = tf.nn.xw_plus_b(self.h_dropout, self.W_fullyconn, self.b_fullyconn, name="scores") 99 | self.preds = tf.argmax(self.scores, axis=1, name="preds") 100 | 101 | # The loss 102 | with tf.name_scope("loss"): 103 | losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y) 104 | self.loss = tf.reduce_mean(losses) + L2_loss * l2_reg_lambda 105 | 106 | # Accuracy 107 | with tf.name_scope("accuracy"): 108 | correct_preds = tf.equal(self.preds, tf.argmax(self.y, axis=1)) 109 | self.accuracy = tf.reduce_mean(tf.cast(correct_preds, tf.float32)) 110 | 111 | def save_weights(self, sess, filename, name="TextRNN"): 112 | """""" 113 | save_dicts = {name+"_W_embedding": self.W_embedding} 114 | for i in range(len(self.Ws_conv)): 115 | save_dicts.update({name+"_W_conv_"+str(i): self.Ws_conv[i], 116 | name+"_b_conv_"+str(i): self.bs_conv[i]}) 117 | save_dicts.update({name+"_W_fullyconn": self.W_fullyconn, 118 | name+"_b_fullyconn": self.b_fullyconn}) 119 | saver = tf.train.Saver(save_dicts) 120 | return saver.save(sess, filename) 121 | 122 | def load_weights(self, sess, filename, name="TextRNN"): 123 | """""" 124 | save_dicts = {name+"_W_embedding": self.W_embedding} 125 | for i in range(len(self.Ws_conv)): 126 | save_dicts.update({name+"_W_conv_"+str(i): self.Ws_conv[i], 127 | name+"_b_conv_"+str(i): self.bs_conv[i]}) 128 | save_dicts.update({name+"_W_fullyconn": self.W_fullyconn, 129 | name+"_b_fullyconn": self.b_fullyconn}) 130 | saver = tf.train.Saver(save_dicts) 131 | saver.restore(sess) -------------------------------------------------------------------------------- /examples/cnn_setence_classification/train_cnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the TextRNN class 3 | 2016/12/22 4 | """ 5 | import os 6 | import sys 7 | import numpy as np 8 | import tensorflow as tf 9 | from sklearn.model_selection import train_test_split 10 | from tensorflow.contrib import learn 11 | 12 | from data_helpers import load_data_and_labels, batch_iter 13 | from text_cnn import TextCNN 14 | import pudb;pu.db 15 | 16 | # Load original data 17 | path = sys.path[0] 18 | pos_filename = path + "/data/rt-polarity.pos" 19 | neg_filename = path + "/data/rt-polarity.neg" 20 | 21 | X_data, y_data = load_data_and_labels(pos_filename, neg_filename) 22 | max_document_length = max([len(sen.split(" ")) for sen in X_data]) 23 | print("Max_document_length:,", max_document_length) 24 | # Create the vacabulary 25 | vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length) 26 | # The idx data 27 | x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32) 28 | y = np.array(y_data, dtype=np.int32) 29 | vocabulary_size = len(vocab_processor.vocabulary_) 30 | print("The size of vocabulary:", vocabulary_size) 31 | # Split the data 32 | X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111) 33 | print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape)) 34 | print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape)) 35 | 36 | # The parameters of RNN 37 | seq_len = X_train.shape[1] 38 | vocab_size = vocabulary_size 39 | embedding_size = 128 40 | filter_sizes = [2, 3, 4] 41 | num_filters = 128 42 | num_classes = y_train.shape[1] 43 | l2_reg_lambda = 0.0 44 | 45 | # Construct RNN model 46 | text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes= 47 | filter_sizes, num_filters=num_filters, num_classes=num_classes) 48 | loss = text_rnn_model.loss 49 | train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) 50 | accuracy = text_rnn_model.accuracy 51 | # The parameters for training 52 | batch_size = 64 53 | training_epochs = 10 54 | dispaly_every = 1 55 | dropout_keep_prob = 0.5 56 | 57 | batch_num = int(X_train.shape[0]/batch_size) 58 | 59 | sess = tf.Session() 60 | sess.run(tf.global_variables_initializer()) 61 | print("Starting training...") 62 | for epoch in range(training_epochs): 63 | avg_cost = 0 64 | for batch in range(batch_num): 65 | _, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size], 66 | text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size], 67 | text_rnn_model.dropout_keep_prob:dropout_keep_prob}) 68 | avg_cost += cost 69 | if epoch % dispaly_every == 0: 70 | cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test, 71 | text_rnn_model.y: y_test, 72 | text_rnn_model.dropout_keep_prob: 1.0}) 73 | print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc)) 74 | 75 | -------------------------------------------------------------------------------- /examples/gan/DCGAN.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2017/01/09 3 | """ 4 | import sys 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras.datasets import mnist 8 | from PIL import Image 9 | 10 | # Batch normalization 11 | def batch_norm(inpt, epsilon=1e-05, decay=0.9, is_training=True, name="batch_norm"): 12 | """ 13 | Implements the bacth normalization 14 | The input is 4-D tensor 15 | """ 16 | bn = tf.contrib.layers.batch_norm(inpt, decay=decay, updates_collections=None, 17 | epsilon=epsilon, scale=True, is_training=is_training, scope=name) 18 | return bn 19 | 20 | # Convolution 2-D 21 | def conv2d(inpt, nb_filter, filter_size=5, strides=2, bias=True, stddev=0.02, padding="SAME", 22 | name="conv2d"): 23 | in_channels = inpt.get_shape().as_list()[-1] 24 | with tf.variable_scope(name): 25 | w = tf.get_variable("w", shape=[filter_size, filter_size, in_channels, nb_filter], 26 | initializer=tf.truncated_normal_initializer(mean=0.0, stddev=stddev)) 27 | conv = tf.nn.conv2d(inpt, w, strides=[1, strides, strides, 1], padding=padding) 28 | if bias: 29 | b = tf.get_variable("b", shape=[nb_filter,], initializer=tf.constant_initializer(0.0)) 30 | conv = tf.nn.bias_add(conv, b) 31 | return conv 32 | 33 | # Convolution 2D Transpose 34 | def deconv2d(inpt, output_shape, filter_size=5, strides=2, bias=True, stddev=0.02, 35 | padding="SAME", name="deconv2d"): 36 | in_channels = inpt.get_shape().as_list()[-1] 37 | with tf.variable_scope(name): 38 | # Note: filter with shape [height, width, output_channels, in_channels] 39 | w = tf.get_variable("w", shape=[filter_size, filter_size, output_shape[-1], in_channels], 40 | initializer=tf.truncated_normal_initializer(mean=0.0, stddev=stddev)) 41 | deconv = tf.nn.conv2d_transpose(inpt, w, output_shape=output_shape, strides=[1, strides, strides, 1], 42 | padding=padding) 43 | if bias: 44 | b = tf.get_variable("b", shape=[output_shape[-1]], initializer=tf.constant_initializer(0.0)) 45 | deconv = tf.nn.bias_add(deconv, b) 46 | return deconv 47 | 48 | # Leaky ReLU 49 | def lrelu(x, leak=0.2, name="lrelu"): 50 | return tf.maximum(x, x*leak) 51 | 52 | # Linear 53 | def linear(x, output_dim, stddev=0.02, name="linear"): 54 | input_dim = x.get_shape().as_list()[-1] 55 | with tf.variable_scope(name): 56 | w = tf.get_variable("w", shape=[input_dim, output_dim], initializer=\ 57 | tf.random_normal_initializer(stddev=stddev)) 58 | b = tf.get_variable("b", shape=[output_dim,], initializer=tf.constant_initializer(0.0)) 59 | return tf.nn.xw_plus_b(x, w, b) 60 | 61 | class DCGAN(object): 62 | """A class of DCGAN model""" 63 | def __init__(self, z_dim=100, output_dim=28, batch_size=100, c_dim=1, df_dim=64, gf_dim=64, dfc_dim=1024, 64 | n_conv=3, n_deconv=2): 65 | """ 66 | :param z_dim: int, the dimension of z (the noise input of generator) 67 | :param output_dim: int, the resolution in pixels of the images (height, width) 68 | :param batch_size: int, the size of the mini-batch 69 | :param c_dim: int, the dimension of image color, for minist, it is 1 (grayscale) 70 | :param df_dim: int, the number of filters in the first convolution layer of discriminator 71 | :param gf_dim: int, the number of filters in the penultimate deconvolution layer of generator (last is 1) 72 | :param dfc_dim: int, the number of units in the penultimate fully-connected layer of discriminator (last is 1) 73 | :param n_conv: int, number of convolution layer in discriminator (the number of filters is double increased) 74 | :param n_deconv: int, number of deconvolution layer in generator (the number of filters is double reduced) 75 | """ 76 | self.z_dim = z_dim 77 | self.output_dim = output_dim 78 | self.c_dim = c_dim 79 | self.df_dim = df_dim 80 | self.gf_dim = gf_dim 81 | self.dfc_dim = dfc_dim 82 | self.n_conv = n_conv 83 | self.n_deconv = n_deconv 84 | self.batch_size = batch_size 85 | 86 | self._build_model() 87 | 88 | def _build_model(self): 89 | # input 90 | self.z = tf.placeholder(tf.float32, shape=[self.batch_size, self.z_dim]) 91 | self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.output_dim, 92 | self.output_dim, self.c_dim]) 93 | 94 | # G 95 | self.G = self._generator(self.z) 96 | # D 97 | self.D1, d1_logits = self._discriminator(self.x, reuse=False) 98 | self.D2, d2_logits = self._discriminator(self.G, reuse=True) 99 | 100 | self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(d2_logits, tf.ones_like(self.D2))) 101 | real_loss = tf.nn.sigmoid_cross_entropy_with_logits(d1_logits, tf.ones_like(self.D1)) 102 | fake_loss = tf.nn.sigmoid_cross_entropy_with_logits(d2_logits, tf.zeros_like(self.D2)) 103 | self.d_loss = tf.reduce_mean(real_loss + fake_loss) 104 | 105 | vars = tf.trainable_variables() 106 | self.d_vars = [v for v in vars if "D" in v.name] 107 | self.g_vars = [v for v in vars if "G" in v.name] 108 | 109 | def _discriminator(self, input, reuse=False): 110 | with tf.variable_scope("D", reuse=reuse): 111 | h = lrelu(conv2d(input, nb_filter=self.df_dim, name="d_conv0")) 112 | for i in range(1, self.n_conv): 113 | conv = conv2d(h, nb_filter=self.df_dim*(2**i), name="d_conv{0}".format(i)) 114 | h = lrelu(batch_norm(conv, name="d_bn{0}".format(i))) 115 | h = linear(tf.reshape(h, shape=[self.batch_size, -1]), self.dfc_dim, name="d_lin0") 116 | h = linear(tf.nn.tanh(h), 1, name="d_lin1") 117 | return tf.nn.sigmoid(h), h 118 | 119 | def _generator(self, input): 120 | with tf.variable_scope("G"): 121 | nb_fliters = [self.gf_dim] 122 | f_size = [self.output_dim//2] 123 | for i in range(1, self.n_deconv): 124 | nb_fliters.append(nb_fliters[-1]*2) 125 | f_size.append(f_size[-1]//2) 126 | 127 | h = linear(input, nb_fliters[-1]*f_size[-1]*f_size[-1], name="g_lin0") 128 | h = tf.nn.relu(batch_norm(tf.reshape(h, shape=[-1, f_size[-1], f_size[-1], nb_fliters[-1]]), 129 | name="g_bn0")) 130 | for i in range(1, self.n_deconv): 131 | h = deconv2d(h, [self.batch_size, f_size[-i-1], f_size[-i-1], nb_fliters[-i-1]], 132 | name="g_deconv{0}".format(i-1)) 133 | h = tf.nn.relu(batch_norm(h, name="g_bn{0}".format(i))) 134 | 135 | h = deconv2d(h, [self.batch_size, self.output_dim, self.output_dim, self.c_dim], 136 | name="g_deconv{0}".format(self.n_deconv-1)) 137 | return tf.nn.tanh(h) 138 | 139 | def combine_images(images): 140 | """Combine the bacth images""" 141 | num = images.shape[0] 142 | width = int(np.sqrt(num)) 143 | height = int(np.ceil(num/width)) 144 | h, w = images.shape[1:-1] 145 | img = np.zeros((height*h, width*w), dtype=images.dtype) 146 | for index, m in enumerate(images): 147 | i = int(index/width) 148 | j = index % width 149 | img[i*h:(i+1)*h, j*w:(j+1)*w] = m[:, :, 0] 150 | return img 151 | 152 | if __name__ == "__main__": 153 | # Load minist data 154 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 155 | X_train = (np.asarray(X_train, dtype=np.float32) - 127.5)/127.5 156 | X_train = np.reshape(X_train, [-1, 28, 28, 1]) 157 | 158 | z_dim = 100 159 | batch_size = 128 160 | lr = 0.0002 161 | n_epochs = 10 162 | 163 | sess = tf.Session() 164 | dcgan = DCGAN(z_dim=z_dim, output_dim=28, batch_size=128, c_dim=1) 165 | # The optimizers 166 | d_train_op = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(dcgan.d_loss, 167 | var_list=dcgan.d_vars) 168 | g_train_op = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(dcgan.g_loss, 169 | var_list=dcgan.g_vars) 170 | sess.run(tf.global_variables_initializer()) 171 | 172 | num_batches = int(len(X_train)/batch_size) 173 | for epoch in range(n_epochs): 174 | print("Epoch", epoch) 175 | d_losses = 0 176 | g_losses = 0 177 | for idx in range(num_batches): 178 | # Train D 179 | z = np.random.uniform(-1, 1, size=[batch_size, z_dim]) 180 | x = X_train[idx*batch_size:(idx+1)*batch_size] 181 | _, d_loss = sess.run([d_train_op, dcgan.d_loss], feed_dict={dcgan.z: z, 182 | dcgan.x: x}) 183 | d_losses += d_loss/num_batches 184 | # Train G 185 | z = np.random.uniform(-1, 1, size=[batch_size, z_dim]) 186 | _, g_loss = sess.run([g_train_op, dcgan.g_loss], feed_dict={dcgan.z: z}) 187 | g_losses += g_loss/num_batches 188 | 189 | print("\td_loss {0}, g_loss {1}".format(d_losses, g_losses)) 190 | # Generate images 191 | z = np.random.uniform(-1, 1, size=[batch_size, z_dim]) 192 | images = sess.run(dcgan.G, feed_dict={dcgan.z: z}) 193 | img = combine_images(images) 194 | img = img*127.5 + 127.5 195 | Image.fromarray(img.astype(np.uint8)).save("epoch{0}_g_images.png".format(epoch)) 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /examples/gan/README.md: -------------------------------------------------------------------------------- 1 | - [Generative Adversarial Nets in TensorFlow](http://blog.evjang.com/2016/06/generative-adversarial-nets-in.html) [[myBlog](http://blog.csdn.net/xiaohu2022/article/details/54234263)] 2 | - [An introduction to Generative Adversarial Networks (with code in TensorFlow)](http://blog.aylien.com/introduction-generative-adversarial-networks-code-tensorflow/) 3 | - [Keras DCGAN](https://github.com/jacobgil/keras-dcgan) 4 | - [Tensorflow DCGAN](https://github.com/carpedm20/DCGAN-tensorflow) 5 | - [InforGAN](https://github.com/openai/InfoGAN) 6 | - [Image Completion with Deep Learning in TensorFlow](http://bamos.github.io/2016/08/09/deep-completion/) 7 | - [Generative models](https://openai.com/blog/generative-models/) 8 | 9 | ## Results 10 | ![minist](https://github.com/xiaohu2015/DeepLearning_tutorials/blob/master/examples/gan/epoch3_g_images.png) 11 | -------------------------------------------------------------------------------- /examples/gan/epoch3_g_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/examples/gan/epoch3_g_images.png -------------------------------------------------------------------------------- /examples/lstm_model_ptb/reader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | 17 | """Utilities for parsing PTB text files.""" 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | 22 | import collections 23 | import os 24 | 25 | import numpy as np 26 | import tensorflow as tf 27 | 28 | 29 | def _read_words(filename): 30 | with tf.gfile.GFile(filename, "r") as f: 31 | return f.read().decode("utf-8").replace("\n", "").split() 32 | 33 | 34 | def _build_vocab(filename): 35 | data = _read_words(filename) 36 | counter = collections.Counter(data) 37 | count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) 38 | 39 | words, _ = list(zip(*count_pairs)) 40 | word_to_id = dict(zip(words, range(len(words)))) 41 | 42 | return word_to_id 43 | 44 | 45 | def _file_to_word_ids(filename, word_to_id): 46 | data = _read_words(filename) 47 | return [word_to_id[word] for word in data] 48 | 49 | 50 | def ptb_raw_data(data_path=None): 51 | """Load PTB raw data from data directory "data_path". 52 | Reads PTB text files, converts strings to integer ids, 53 | and performs mini-batching of the inputs. 54 | The PTB dataset comes from Tomas Mikolov's webpage: 55 | http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz 56 | Args: 57 | data_path: string path to the directory where simple-examples.tgz has 58 | been extracted. 59 | Returns: 60 | tuple (train_data, valid_data, test_data, vocabulary) 61 | where each of the data objects can be passed to PTBIterator. 62 | """ 63 | 64 | train_path = os.path.join(data_path, "ptb.train.txt") 65 | valid_path = os.path.join(data_path, "ptb.valid.txt") 66 | test_path = os.path.join(data_path, "ptb.test.txt") 67 | 68 | word_to_id = _build_vocab(train_path) 69 | train_data = _file_to_word_ids(train_path, word_to_id) 70 | valid_data = _file_to_word_ids(valid_path, word_to_id) 71 | test_data = _file_to_word_ids(test_path, word_to_id) 72 | vocabulary = len(word_to_id) 73 | return train_data, valid_data, test_data, vocabulary 74 | 75 | 76 | def ptb_iterator(raw_data, batch_size, num_steps): 77 | """Iterate on the raw PTB data. 78 | This generates batch_size pointers into the raw PTB data, and allows 79 | minibatch iteration along these pointers. 80 | Args: 81 | raw_data: one of the raw data outputs from ptb_raw_data. 82 | batch_size: int, the batch size. 83 | num_steps: int, the number of unrolls. 84 | Yields: 85 | Pairs of the batched data, each a matrix of shape [batch_size, num_steps]. 86 | The second element of the tuple is the same data time-shifted to the 87 | right by one. 88 | Raises: 89 | ValueError: if batch_size or num_steps are too high. 90 | """ 91 | raw_data = np.array(raw_data, dtype=np.int32) 92 | 93 | data_len = len(raw_data) 94 | batch_len = data_len // batch_size 95 | data = np.zeros([batch_size, batch_len], dtype=np.int32) 96 | for i in range(batch_size): 97 | data[i] = raw_data[batch_len * i:batch_len * (i + 1)] 98 | 99 | epoch_size = (batch_len - 1) // num_steps 100 | 101 | if epoch_size == 0: 102 | raise ValueError("epoch_size == 0, decrease batch_size or num_steps") 103 | 104 | for i in range(epoch_size): 105 | x = data[:, i*num_steps:(i+1)*num_steps] 106 | y = data[:, i*num_steps+1:(i+1)*num_steps+1] 107 | yield (x, y) -------------------------------------------------------------------------------- /examples/lstm_time_series_regression/lstm_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | LSTM Model for Time Series Prediction/Regression 3 | source: 'https://github.com/MorvanZhou/tutorials/blob/master/tensorflowTUT/tf20_RNN2.2/full_code.py' 4 | 2017/01/03 5 | """ 6 | import sys 7 | import numpy as np 8 | import tensorflow as tf 9 | import matplotlib.pyplot as plt 10 | 11 | def batch_iterate(num_batchs, batch_size, num_steps): 12 | """ 13 | Generate the mini batch about sin and cos function 14 | """ 15 | start = 0 16 | for i in range(num_batchs): 17 | xo = np.arange(start, start+batch_size*num_steps).reshape( 18 | [batch_size, num_steps])/(10.0*np.pi) 19 | x = np.sin(xo) 20 | y = np.cos(xo) 21 | start += num_steps 22 | yield (x[:, :, np.newaxis], y[:, :, np.newaxis], xo) 23 | 24 | class LstmRegression(object): 25 | """ 26 | A lstm class for time series prediction 27 | """ 28 | def __init__(self, in_size, out_size, num_steps=20, cell_size=20, batch_size=50, 29 | num_lstm_layers=2, keep_prob=0.5, is_training=True): 30 | """ 31 | :param in_size: int, the dimension of input 32 | :param out_size: int, the dimension of output 33 | :param num_steps: int, the number of time steps 34 | :param cell_size: int, the size of lstm cell 35 | :param batch_size: int, the size of mini bacth 36 | :param num_lstm_layers: int, the number of lstm cells 37 | :param keep_prob: float, the keep probability of dropout layer 38 | :param is_training: bool, set True for training model, but False for test model 39 | """ 40 | self.in_size = in_size 41 | self.out_size = out_size 42 | self.num_steps = num_steps 43 | self.cell_size = cell_size 44 | self.batch_size = batch_size 45 | self.num_lstm_layers = num_lstm_layers 46 | self.keep_prob = keep_prob 47 | self.is_training = is_training 48 | self.__build_model__() 49 | 50 | def __build_model__(self): 51 | """ 52 | The inner method to construct the lstm model. 53 | """ 54 | # Input and output placeholders 55 | self.x = tf.placeholder(tf.float32, shape=[None, self.num_steps, self.in_size]) 56 | self.y = tf.placeholder(tf.float32, shape=[None, self.num_steps, self.out_size]) 57 | 58 | # Add the first input layer 59 | with tf.variable_scope("input"): 60 | # Reshape x to 2-D tensor 61 | inputs = tf.reshape(self.x, shape=[-1, self.in_size]) #[batch_size*num_steps, in_size] 62 | W, b = self._get_weight_bias(self.in_size, self.cell_size) 63 | inputs = tf.nn.xw_plus_b(inputs, W, b, name="input_xW_plus_b") 64 | # Reshep to 3-D tensor 65 | inputs = tf.reshape(inputs, shape=[-1, self.num_steps, self.cell_size]) #[batch_size, num_steps, in_size] 66 | 67 | # Dropout the inputs 68 | if self.is_training and self.keep_prob < 1.0: 69 | inputs = tf.nn.dropout(inputs, keep_prob=self.keep_prob) 70 | 71 | # Construct lstm cells 72 | lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.cell_size, forget_bias=1.0, state_is_tuple=True) 73 | if self.is_training and self.keep_prob < 1.0: 74 | lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=self.keep_prob) 75 | cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]*self.num_lstm_layers) 76 | # The initial state 77 | self.init_state = cell.zero_state(self.batch_size, dtype=tf.float32) 78 | 79 | # Add the lstm layer 80 | with tf.variable_scope("LSTM"): 81 | outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=self.init_state) 82 | self.final_state = final_state 83 | 84 | # Add the output layer 85 | with tf.variable_scope("output"): 86 | output = tf.reshape(outputs, shape=[-1, self.cell_size]) 87 | W, b = self._get_weight_bias(self.cell_size, self.out_size) 88 | output = tf.nn.xw_plus_b(output, W, b, name="output") 89 | 90 | self.pred = output 91 | losses = tf.nn.seq2seq.sequence_loss_by_example([tf.reshape(self.pred, [-1,])], [tf.reshape(self.y, [-1,])], 92 | [tf.ones([self.batch_size*self.num_steps])], average_across_timesteps=True, 93 | softmax_loss_function=self._ms_cost) 94 | self.cost = tf.reduce_sum(losses)/tf.to_float(self.batch_size) 95 | 96 | def _ms_cost(self, y_pred, y_target): 97 | """The quadratic cost function""" 98 | return 0.5*tf.square(y_pred - y_target) 99 | 100 | def _get_weight_bias(self, in_size, out_size): 101 | """ 102 | Create weight and bias variables 103 | """ 104 | weights = tf.get_variable("weight", shape=[in_size, out_size], 105 | initializer=tf.random_normal_initializer(mean=0.0, stddev=1.0)) 106 | biases = tf.get_variable("bias", shape=[out_size,], initializer=tf.constant_initializer(0.1)) 107 | return weights, biases 108 | 109 | if __name__ == "__main__": 110 | batch_size = 50 111 | in_size = 1 112 | out_size = 1 113 | cell_size = 10 114 | num_steps = 20 115 | lr = 0.002 116 | num_batchs = 200 117 | n_epochs = 10 118 | 119 | with tf.Session() as sess: 120 | with tf.variable_scope("model", reuse=None): 121 | model = LstmRegression(in_size, out_size, num_steps=num_steps, cell_size=cell_size, 122 | batch_size=batch_size, num_lstm_layers=2, keep_prob=0.5, is_training=True) 123 | with tf.variable_scope("model", reuse=True): 124 | pred_model = LstmRegression(in_size, out_size, num_steps=num_steps, cell_size=cell_size, 125 | batch_size=batch_size, num_lstm_layers=2, keep_prob=1.0, is_training=False) 126 | 127 | train_op = tf.train.AdamOptimizer(lr).minimize(model.cost) 128 | tf.summary.scalar("cost", model.cost) 129 | merged = tf.merge_all_summaries() 130 | writer = tf.train.SummaryWriter("logs", sess.graph) 131 | sess.run(tf.global_variables_initializer()) 132 | 133 | global_steps = 0 134 | state = sess.run(model.init_state) 135 | for epoch in range(n_epochs): 136 | losses = 0 137 | for x, y, xo in batch_iterate(num_batchs, batch_size, num_steps): 138 | _, cost, state = sess.run([train_op, model.cost, model.final_state], feed_dict={model.x: x, 139 | model.y: y, model.init_state: state}) 140 | losses += cost/num_batchs 141 | print("Epoch {0}, cost {1}".format(epoch, losses)) 142 | 143 | # The prediction 144 | plt.ion() 145 | plt.show() 146 | state = sess.run(pred_model.init_state) 147 | for x, y, xo in batch_iterate(num_batchs, batch_size, num_steps): 148 | pred, state = sess.run([pred_model.pred, pred_model.final_state], feed_dict={pred_model.x: x, 149 | pred_model.y: y, pred_model.init_state: state }) 150 | 151 | # plotting 152 | plt.plot(xo[0, :], y[0].flatten(), 'r', xo[0, :], pred.flatten()[:num_steps], 'b--') 153 | plt.ylim((-1.2, 1.2)) 154 | plt.draw() 155 | plt.pause(0.3) 156 | 157 | -------------------------------------------------------------------------------- /examples/lstm_time_series_regression/lstm_regression_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/examples/lstm_time_series_regression/lstm_regression_results.png -------------------------------------------------------------------------------- /examples/rnn_language_model/input_data_rnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | The data used in RNN language model. 3 | """ 4 | import csv 5 | import itertools 6 | import operator 7 | import numpy as np 8 | import nltk 9 | import sys 10 | from datetime import datetime 11 | 12 | 13 | def get_data(fileName='/data/reddit-comments-2015-08.csv', vocabulary_size = 8000, unknown_token = "UNKNOWN_TOKEN", 14 | sentence_start_token="SENTENCE_START", sentence_end_token = "SENTENCE_END"): 15 | # Read the data and append SENTENCE_START and SENTENCE_END tokens 16 | print("Reading CSV file...") 17 | with open(sys.path[0]+fileName, 'r', encoding='utf-8') as f: 18 | reader = csv.reader(f, skipinitialspace=True) 19 | reader.__next__() 20 | # Split full comments into sentences 21 | sentences = itertools.chain(*[nltk.sent_tokenize(x[0].lower()) for x in reader]) 22 | # Append SENTENCE_START and SENTENCE_END 23 | sentences = ["%s %s %s" % (sentence_start_token, x, sentence_end_token) for x in sentences] 24 | print("Parsed %d sentences." % (len(sentences))) 25 | 26 | # Tokenize the sentences into words 27 | tokenized_sentences = [nltk.word_tokenize(sent) for sent in sentences] 28 | 29 | # Count the word frequencies 30 | word_freq = nltk.FreqDist(itertools.chain(*tokenized_sentences)) 31 | print("Found %d unique words tokens." % len(word_freq.items())) 32 | 33 | # Get the most common words and build index_to_word and word_to_index vectors 34 | vocab = word_freq.most_common(vocabulary_size - 1) 35 | index_to_word = [x[0] for x in vocab] 36 | index_to_word.append(unknown_token) 37 | word_to_index = dict([(w, i) for i, w in enumerate(index_to_word)]) 38 | 39 | print("Using vocabulary size %d." % vocabulary_size) 40 | print("The least frequent word in our vocabulary is '%s' and appeared %d times." % (vocab[-1][0], vocab[-1][1])) 41 | 42 | # Replace all words not in our vocabulary with the unknown token 43 | for i, sent in enumerate(tokenized_sentences): 44 | tokenized_sentences[i] = [w if w in word_to_index else unknown_token for w in sent] 45 | print("\nExample sentence: '%s'" % sentences[0]) 46 | print("\nExample sentence after Pre-processing: '%s'" % tokenized_sentences[0]) 47 | # get the training set 48 | X_train = [] 49 | y_train = [] 50 | for sen in tokenized_sentences: 51 | X_train.append(list([word_to_index[w] for w in sen[:-1]])) 52 | y_train.append(list([word_to_index[w] for w in sen[1:]])) 53 | 54 | X_train = np.asarray([[word_to_index[w] for w in sent[:-1]] for sent in tokenized_sentences]) 55 | y_train = np.asarray([[word_to_index[w] for w in sent[1:]] for sent in tokenized_sentences]) 56 | 57 | x_example, y_example = X_train[17], y_train[17] 58 | print("x:\n%s\n%s" % (" ".join([index_to_word[x] for x in x_example]), x_example)) 59 | print("\ny:\n%s\n%s" % (" ".join([index_to_word[x] for x in y_example]), y_example)) 60 | return (X_train, y_train) 61 | 62 | if __name__ == "__main__": 63 | X_train, y_train = get_data() 64 | print(type(X_train[0])) -------------------------------------------------------------------------------- /examples/rnn_language_model/rnn_numpy.py: -------------------------------------------------------------------------------- 1 | """ 2 | RNN model implemented by Numpy library 3 | author: Ye Hu 4 | 2016/12/17 5 | from " https://github.com/dennybritz/rnn-tutorial-rnnlm/blob/master/RNNLM.ipynb " 6 | """ 7 | import sys 8 | import operator 9 | from datetime import datetime 10 | import timeit 11 | import numpy as np 12 | 13 | from input_data_rnn import get_data 14 | 15 | def softmax(x): 16 | xt = np.exp(x - np.max(x)) 17 | return xt / np.sum(xt) 18 | 19 | class RNN_np(object): 20 | """A simple rnn class with numpy""" 21 | def __init__(self, word_dim, hidden_dim=100, bptt_truncate=-1): 22 | """ 23 | """ 24 | # keep 25 | self.word_dim = word_dim 26 | self.hidden_dim = hidden_dim 27 | self.bptt_truncate = bptt_truncate 28 | # Randomly initialize the params 29 | bound = np.sqrt(1.0 / self.word_dim) 30 | self.U = np.random.uniform(-bound, bound, size=[self.word_dim, self.hidden_dim]) # input 31 | bound = np.sqrt(1.0 / self.hidden_dim) 32 | self.V = np.random.uniform(-bound, bound, size=[self.hidden_dim, self.word_dim]) # output 33 | self.W = np.random.uniform(-bound, bound, size=[self.hidden_dim, self.hidden_dim]) # old memeory 34 | 35 | def forward_propagation(self, x): 36 | """ 37 | Forward propagation 38 | """ 39 | sequence_dim = len(x) # time steps, also sequence dim 40 | # keep the hidden states 41 | s = np.zeros((sequence_dim+1, self.hidden_dim)) 42 | # the initial hidden of time step 0 (last) 43 | s[-1] = np.zeros((self.hidden_dim)) 44 | # the output of each time step 45 | o = np.zeros((sequence_dim, self.word_dim)) 46 | # for each time step 47 | for t in range(sequence_dim): 48 | # indeing with one-hot vector 49 | s[t] = np.tanh(self.U[x[t], :] + np.dot(s[t-1], self.W)) 50 | o[t] = softmax(np.dot(s[t], self.V)) 51 | 52 | return (o, s) 53 | 54 | def predict(self, x): 55 | """Give word with the highest probability """ 56 | o, s = self.forward_propagation(x) 57 | return np.argmax(o, axis=1) # for each time step 58 | 59 | def calculate_total_loss(self, xs, ys): 60 | """Cross entropy loss""" 61 | loss = 0 62 | # for each sequence 63 | for i in range(len(ys)): 64 | o, _ = self.forward_propagation(xs[i]) 65 | correct_predictions = o[np.arange(len(ys[i])), ys[i]] 66 | loss += -1.0*np.sum(np.log(correct_predictions)) 67 | return loss 68 | 69 | def calculate_loss(self, xs, ys): 70 | """""" 71 | # the training examples 72 | N = np.sum((len(e) for e in ys)) 73 | return self.calculate_total_loss(xs, ys)/float(N) 74 | 75 | def bptt(self, x, y): 76 | """Compute the gradients by BPTT""" 77 | N = len(x) # time steps, also sequence dim 78 | # Perform forward propagation 79 | o, s = self.forward_propagation(x) 80 | # the initial gradients 81 | dLdU = np.zeros(self.U.shape) 82 | dLdW = np.zeros(self.W.shape) 83 | dLdV = np.zeros(self.V.shape) 84 | # dL/do 85 | delta_o = o 86 | delta_o[np.arange(N), y] += -1.0 87 | # for each time step (also each output) 88 | for t in np.arange(N)[::-1]: 89 | # dL/dV 90 | dLdV += np.outer(s[t], delta_o[t]) 91 | # dL/ds 92 | delta_t = np.dot(self.V, delta_o[t])*(1 - (s[t]**2)) 93 | # Backpropagation through time (for at most self.bptt_truncate steps) 94 | for bptt_step in np.arange(max(0, t-self.bptt_truncate), t+1)[::-1]: 95 | #print("Backpropagation step t=%d bptt step=%d " % (t, bptt_step)) 96 | dLdW += np.outer(s[bptt_step-1], delta_t) 97 | dLdU[x[bptt_step], :] += delta_t 98 | # Update delta for next time step 99 | delta_t = np.dot(self.W, delta_t)*(1 - (s[bptt_step-1]**2)) 100 | return (dLdU, dLdV, dLdW) 101 | 102 | def gradient_check(self, x, y, h=0.001, error_threshold=0.01): 103 | # Calculate the gradients using backpropagation. We want to checker if these are correct. 104 | bptt_gradients = model.bptt(x, y) 105 | # List of all parameters we want to check. 106 | model_parameters = ['U', 'V', 'W'] 107 | # Gradient check for each parameter 108 | for pidx, pname in enumerate(model_parameters): 109 | # Get the actual parameter value from the mode, e.g. model.W 110 | parameter = operator.attrgetter(pname)(self) 111 | print("Performing gradient check for parameter %s with shape %s." % (pname, str(parameter.shape))) 112 | # Iterate over each element of the parameter matrix, e.g. (0,0), (0,1), ... 113 | it = np.nditer(parameter, flags=['multi_index'], op_flags=['readwrite']) 114 | while not it.finished: 115 | ix = it.multi_index 116 | # Save the original value so we can reset it later 117 | original_value = parameter[ix] 118 | # Estimate the gradient using (f(x+h) - f(x-h))/(2*h) 119 | parameter[ix] = original_value + h 120 | gradplus = model.calculate_total_loss([x], [y]) 121 | parameter[ix] = original_value - h 122 | gradminus = model.calculate_total_loss([x], [y]) 123 | estimated_gradient = (gradplus - gradminus) / (2 * h) 124 | # Reset parameter to original value 125 | parameter[ix] = original_value 126 | # The gradient for this parameter calculated using backpropagation 127 | backprop_gradient = bptt_gradients[pidx][ix] 128 | # calculate The relative error: (|x - y|/(|x| + |y|)) 129 | relative_error = np.abs(backprop_gradient - estimated_gradient) / ( 130 | np.abs(backprop_gradient) + np.abs(estimated_gradient)) 131 | # If the error is to large fail the gradient check 132 | if relative_error > error_threshold: 133 | print("Gradient Check ERROR: parameter=%s ix=%s" % (pname, ix)) 134 | print("+h Loss: %f" % gradplus) 135 | print("-h Loss: %f" % gradminus) 136 | print("Estimated_gradient: %f" % estimated_gradient) 137 | print("Backpropagation gradient: %f" % backprop_gradient) 138 | print("Relative Error: %f" % relative_error) 139 | return 140 | it.iternext() 141 | print("Gradient check for parameter %s passed." % (pname)) 142 | 143 | def sgd(self, x, y, learning_rate): 144 | """Train the model with SGD""" 145 | # Compute the gradients 146 | dLdU, dLdV, dLdW = self.bptt(x, y) 147 | # Update the parameters 148 | self.U += -learning_rate * dLdU 149 | self.W += -learning_rate * dLdW 150 | self.V += -learning_rate * dLdV 151 | 152 | 153 | def train_rnn_with_sgd(model, X_train, y_train, learning_rate=0.005, n_epochs=100, 154 | evaluate_loss_after=5): 155 | """""" 156 | N = len(X_train) # number of training examples 157 | losses = [] 158 | num_examples_seen = 0 159 | for epoch in range(n_epochs): 160 | # if evaluate the loss 161 | if epoch % evaluate_loss_after == 0: 162 | loss = model.calculate_loss(X_train, y_train) 163 | losses.append((num_examples_seen, loss)) 164 | time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 165 | print("%s: Loss after num_examples_seen=%d epoch=%d: %f" % 166 | (time, num_examples_seen, epoch, loss)) 167 | # Adjust the learning rate if loss increases 168 | if (len(losses) > 1 and losses[-1][1] > losses[-2][1]): 169 | learning_rate *= 0.5 170 | print("Setting learning rate to %f" % learning_rate) 171 | sys.stdout.flush() 172 | # Training 173 | for i in range(N): 174 | model.sgd(X_train[i], y_train[i], learning_rate=learning_rate) 175 | num_examples_seen += 1 176 | 177 | 178 | if __name__ == "__main__": 179 | 180 | np.random.seed(10) 181 | vocabulary_size = 8000 182 | X_train, y_train = get_data(vocabulary_size=vocabulary_size) 183 | 184 | model = RNN_np(word_dim=8000, bptt_truncate=-1) 185 | start_time = timeit.default_timer() 186 | train_rnn_with_sgd(model, X_train[:1000], y_train[:1000], n_epochs=10, evaluate_loss_after=1) 187 | end_time = timeit.default_timer() 188 | print("Time elapsed {0} seconds".format((end_time-start_time))) 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /examples/rnn_language_model/rnn_tensorflow.py: -------------------------------------------------------------------------------- 1 | """ 2 | A simple RNN model implemented by Tensorflow 3 | author: Ye Hu 4 | 2016/12/24 5 | """ 6 | import timeit 7 | from datetime import datetime 8 | import numpy as np 9 | import tensorflow as tf 10 | 11 | from input_data_rnn import get_data 12 | 13 | class RNN_tf(object): 14 | """ 15 | A RNN class for the language model 16 | """ 17 | def __init__(self, inpt=None, word_dim=8000, hidden_dim=100, bptt_truncate=4): 18 | """ 19 | :param inpt: tf.Tensor, the input tensor 20 | :param word_dim: int, the number of word in the input sentence 21 | :param hidden_dim: int, the size of hidden units 22 | :param bptt_truncate: int, (TO DO:) 23 | """ 24 | self.word_dim = word_dim 25 | self.hidden_dim = hidden_dim 26 | self.bptt_truncate = bptt_truncate 27 | if inpt is None: 28 | inpt = tf.placeholder(tf.int32, shape=[None, ]) 29 | self.x = inpt 30 | self.y = tf.placeholder(tf.int32, shape=[None, ]) 31 | 32 | # Initialize the network parameters 33 | bounds = np.sqrt(1.0/self.word_dim) 34 | # Input weight matrix 35 | self.U = tf.Variable(tf.random_uniform([self.word_dim, self.hidden_dim], minval=-bounds, maxval=bounds), 36 | name="U") 37 | bounds = np.sqrt(1.0/self.hidden_dim) 38 | self.W = tf.Variable(tf.random_uniform([self.hidden_dim, self.hidden_dim], minval=-bounds, maxval=bounds), 39 | name="W") # old state weight matrix 40 | self.V = tf.Variable(tf.random_uniform([self.hidden_dim, self.word_dim], minval=-bounds, maxval=bounds), 41 | name="V") # the output weight matrix 42 | # Keep track of all parameters for training 43 | self.params = [self.U, self.W, self.V] 44 | # Build the model 45 | self.__model_build__() 46 | 47 | def __model_build__(self): 48 | """ 49 | A private method to build the RNN model 50 | """ 51 | # The inner function for forward propagation 52 | def forward_propagation(s_t_prv, x_t): 53 | s_t = tf.nn.tanh(tf.slice(self.U, [x_t, 0], [1, -1]) + tf.matmul(s_t_prv, self.W)) 54 | return s_t 55 | # Use scan function to get the hidden state of all times 56 | s = tf.scan(forward_propagation, self.x, initializer=tf.zeros([1, self.hidden_dim])) # [seq_len, 1, hidden_dim] 57 | s = tf.squeeze(s) # [seq_len, hidden_dim] 58 | # The output 59 | o_wx = tf.matmul(s, self.V) 60 | o = tf.nn.softmax(o_wx) 61 | # The right prediction 62 | self.prediction = tf.argmax(o, axis=1) 63 | # The cost for training 64 | self.cost = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(o_wx, self.y)) 65 | self.loss = self.cost / tf.cast(tf.size(self.x), tf.float32) 66 | 67 | 68 | 69 | def train_rnn_with_sgd(sess, model, X_train, y_train, learning_rate=0.005, n_epochs=100, 70 | evaluate_loss_after=5): 71 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(model.cost, var_list=model.params) 72 | N = len(X_train) # number of training examples 73 | print("Start training...") 74 | start_time = timeit.default_timer() 75 | for epoch in range(n_epochs): 76 | # If output the loss for all training examples 77 | if epoch % evaluate_loss_after == 0: 78 | losses = 0 79 | for i in range(N): 80 | losses += sess.run(model.loss, feed_dict={model.x: X_train[i], model.y: y_train[i]}) 81 | time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 82 | print("\t{0}:Loss after Epoch {1} is {2}".format(time, epoch, losses/N)) 83 | # Traing each by each 84 | for i in range(N): 85 | sess.run(train_op, feed_dict={model.x: X_train[i], model.y: y_train[i]}) 86 | end_time = timeit.default_timer() 87 | print("Finished!") 88 | print("Time elapsed {0} minutes.".format((end_time-start_time)/60.0)) 89 | 90 | if __name__ == "__main__": 91 | np.random.seed(10) 92 | tf.set_random_seed(1111) 93 | vocabulary_size = 8000 94 | X_train, y_train = get_data(vocabulary_size=vocabulary_size) 95 | 96 | with tf.Session() as sess: 97 | model = RNN_tf(inpt=None, word_dim=8000, hidden_dim=100) 98 | sess.run(tf.global_variables_initializer()) 99 | train_rnn_with_sgd(sess, model, X_train[:1000], y_train[:1000], n_epochs=10, evaluate_loss_after=1) -------------------------------------------------------------------------------- /examples/test.py: -------------------------------------------------------------------------------- 1 | # test 2 | -------------------------------------------------------------------------------- /models/cnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convolution neural network 3 | author: Ye Hu 4 | 2016/12/15 5 | """ 6 | import numpy as np 7 | import tensorflow as tf 8 | import input_data 9 | from logisticRegression import LogisticRegression 10 | from mlp import HiddenLayer 11 | 12 | class ConvLayer(object): 13 | """ 14 | A convolution layer 15 | """ 16 | def __init__(self, inpt, filter_shape, strides=(1, 1, 1, 1), 17 | padding="SAME", activation=tf.nn.relu, bias_setting=True): 18 | """ 19 | inpt: tf.Tensor, shape [n_examples, witdth, height, channels] 20 | filter_shape: list or tuple, [witdth, height. channels, filter_nums] 21 | strides: list or tuple, the step of filter 22 | padding: 23 | activation: 24 | bias_setting: 25 | """ 26 | self.input = inpt 27 | # initializes the filter 28 | self.W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), dtype=tf.float32) 29 | if bias_setting: 30 | self.b = tf.Variable(tf.truncated_normal(filter_shape[-1:], stddev=0.1), 31 | dtype=tf.float32) 32 | else: 33 | self.b = None 34 | conv_output = tf.nn.conv2d(self.input, filter=self.W, strides=strides, 35 | padding=padding) 36 | conv_output = conv_output + self.b if self.b is not None else conv_output 37 | # the output 38 | self.output = conv_output if activation is None else activation(conv_output) 39 | # the params 40 | self.params = [self.W, self.b] if self.b is not None else [self.W, ] 41 | 42 | 43 | class MaxPoolLayer(object): 44 | """pool layer""" 45 | def __init__(self, inpt, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding="SAME"): 46 | """ 47 | """ 48 | self.input = inpt 49 | # the output 50 | self.output = tf.nn.max_pool(self.input, ksize=ksize, strides=strides, padding=padding) 51 | self.params = [] 52 | 53 | 54 | class FlattenLayer(object): 55 | """Flatten layer""" 56 | def __init__(self, inpt, shape): 57 | self.input = inpt 58 | self.output = tf.reshape(self.input, shape=shape) 59 | self.params = [] 60 | 61 | class DropoutLayer(object): 62 | """Dropout layer""" 63 | def __init__(self, inpt, keep_prob): 64 | """ 65 | keep_prob: float (0, 1] 66 | """ 67 | self.keep_prob = tf.placeholder(tf.float32) 68 | self.input = inpt 69 | self.output = tf.nn.dropout(self.input, keep_prob=self.keep_prob) 70 | self.train_dicts = {self.keep_prob: keep_prob} 71 | self.pred_dicts = {self.keep_prob: 1.0} 72 | 73 | if __name__ == "__main__": 74 | # mnist examples 75 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 76 | # define input and output placehoders 77 | x = tf.placeholder(tf.float32, shape=[None, 784]) 78 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 79 | # reshape 80 | inpt = tf.reshape(x, shape=[-1, 28, 28, 1]) 81 | 82 | # create network 83 | # params for training 84 | # conv and pool layer0 85 | layer0_conv = ConvLayer(inpt, filter_shape=[5, 5, 1, 32], strides=[1, 1, 1, 1], activation=tf.nn.relu, 86 | padding="SAME") # [?, 28, 28, 32] 87 | layer0_pool = MaxPoolLayer(layer0_conv.output, ksize=[1, 2, 2, 1], 88 | strides=[1, 2, 2, 1]) # [?, 14, 14, 32] 89 | # conv and pool layer1 90 | layer1_conv = ConvLayer(layer0_pool.output, filter_shape=[5, 5, 32, 64], strides=[1, 1, 1, 1], 91 | activation=tf.nn.relu, padding="SAME") # [?, 14, 14, 64] 92 | layer1_pool = MaxPoolLayer(layer1_conv.output, ksize=[1, 2, 2, 1], 93 | strides=[1, 2, 2, 1]) # [?, 7, 7, 64] 94 | # flatten layer 95 | layer2_flatten = FlattenLayer(layer1_pool.output, shape=[-1, 7*7*64]) 96 | # fully-connected layer 97 | layer3_fullyconn = HiddenLayer(layer2_flatten.output, n_in=7*7*64, n_out=256, activation=tf.nn.relu) 98 | # dropout layer 99 | layer3_dropout = DropoutLayer(layer3_fullyconn.output, keep_prob=0.5) 100 | # the output layer 101 | layer4_output = LogisticRegression(layer3_dropout.output, n_in=256, n_out=10) 102 | 103 | # params for training 104 | params = layer0_conv.params + layer1_conv.params + layer3_fullyconn.params + layer4_output.params 105 | # train dicts for dropout 106 | train_dicts = layer3_dropout.train_dicts 107 | # prediction dicts for dropout 108 | pred_dicts = layer3_dropout.pred_dicts 109 | 110 | # get cost 111 | cost = layer4_output.cost(y_) 112 | # accuracy 113 | accuracy = layer4_output.accuarcy(y_) 114 | predictor = layer4_output.y_pred 115 | # 定义训练器 116 | train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize( 117 | cost, var_list=params) 118 | 119 | # 初始化所有变量 120 | init = tf.global_variables_initializer() 121 | 122 | # 定义训练参数 123 | training_epochs = 10 124 | batch_size = 100 125 | display_step = 1 126 | 127 | # 开始训练 128 | print("Start to train...") 129 | with tf.Session() as sess: 130 | sess.run(init) 131 | for epoch in range(training_epochs): 132 | avg_cost = 0.0 133 | batch_num = int(mnist.train.num_examples / batch_size) 134 | for i in range(batch_num): 135 | x_batch, y_batch = mnist.train.next_batch(batch_size) 136 | # 训练 137 | train_dicts.update({x: x_batch, y_: y_batch}) 138 | 139 | sess.run(train_op, feed_dict=train_dicts) 140 | # 计算cost 141 | pred_dicts.update({x: x_batch, y_: y_batch}) 142 | avg_cost += sess.run(cost, feed_dict=pred_dicts) / batch_num 143 | # 输出 144 | if epoch % display_step == 0: 145 | pred_dicts.update({x: mnist.validation.images, 146 | y_: mnist.validation.labels}) 147 | val_acc = sess.run(accuracy, feed_dict=pred_dicts) 148 | print("Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch, 149 | avg_cost, val_acc)) 150 | 151 | print("Finished!") 152 | test_x = mnist.test.images[:10] 153 | test_y = mnist.test.labels[:10] 154 | print("Ture lables:") 155 | print(" ", np.argmax(test_y, 1)) 156 | print("Prediction:") 157 | pred_dicts.update({x: test_x}) 158 | print(" ", sess.run(predictor, feed_dict=pred_dicts)) 159 | tf.scan() 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /models/da.py: -------------------------------------------------------------------------------- 1 | """ 2 | Denoising Autoencoder (DA) 3 | author: Ye Hu 4 | 2016/12/16 5 | """ 6 | import os 7 | import timeit 8 | 9 | import numpy as np 10 | import tensorflow as tf 11 | from PIL import Image 12 | 13 | import input_data 14 | from utils import tile_raster_images 15 | 16 | 17 | 18 | class DA(object): 19 | """A denoising autoencoder class (using tied weight)""" 20 | def __init__(self, inpt, n_visiable=784, n_hidden=500, W=None, bhid=None, 21 | bvis=None, activation=tf.nn.sigmoid): 22 | """ 23 | inpt: tf.Tensor, the input 24 | :param n_visiable: int, number of hidden units 25 | :param n_hidden: int, number of visable units 26 | :param W, bhid, bvis: tf.Tensor, the weight, bias tensor 27 | """ 28 | self.n_visiable = n_visiable 29 | self.n_hidden = n_hidden 30 | # initialize the weight and bias if not given 31 | if W is None: 32 | bound = -4*np.sqrt(6.0 / (self.n_hidden + self.n_visiable)) 33 | W = tf.Variable(tf.random_uniform([self.n_visiable, self.n_hidden], minval=-bound, 34 | maxval=bound), dtype=tf.float32) 35 | if bhid is None: 36 | bhid = tf.Variable(tf.zeros([n_hidden,]), dtype=tf.float32) 37 | if bvis is None: 38 | bvis = tf.Variable(tf.zeros([n_visiable,]), dtype=tf.float32) 39 | self.W = W 40 | self.b = bhid 41 | # reconstruct params 42 | self.b_prime = bvis 43 | self.W_prime = tf.transpose(self.W) 44 | # keep track of input and params 45 | self.input = inpt 46 | self.params = [self.W, self.b, self.b_prime] 47 | # activation 48 | self.activation = activation 49 | 50 | def get_encode_values(self, inpt): 51 | """Compute the encode values""" 52 | return self.activation(tf.matmul(inpt, self.W) + self.b) 53 | 54 | def get_decode_values(self, encode_input): 55 | """Get the reconstructed values""" 56 | return self.activation(tf.matmul(encode_input, self.W_prime) + self.b_prime) 57 | 58 | def get_corrupted_input(self, inpt, corruption_level): 59 | """ 60 | Randomly zero the element of input 61 | corruption_level: float, (0,1] 62 | """ 63 | # the shape of input 64 | input_shape = tf.shape(inpt) 65 | # the probablity for corruption 66 | probs = tf.tile(tf.log([[corruption_level, 1-corruption_level]]), 67 | multiples=[input_shape[0], 1]) 68 | return tf.mul(tf.cast(tf.multinomial(probs, num_samples=input_shape[1]), 69 | dtype=tf.float32), inpt) 70 | 71 | def get_cost(self, corruption_level=0.3): 72 | """Get the cost for training""" 73 | corrupted_input = self.get_corrupted_input(self.input, corruption_level) 74 | encode_output = self.get_encode_values(corrupted_input) 75 | decode_output = self.get_decode_values(encode_output) 76 | # use cross_entropy 77 | cross = tf.mul(self.input, tf.log(decode_output)) + \ 78 | tf.mul(1.0-self.input, tf.log(1.0-decode_output)) 79 | cost = -tf.reduce_mean(tf.reduce_sum(cross, axis=1)) 80 | return cost 81 | 82 | if __name__ == "__main__": 83 | # mnist examples 84 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 85 | # define input 86 | x = tf.placeholder(tf.float32, shape=[None, 784]) 87 | # set random_seed 88 | tf.set_random_seed(seed=99999) 89 | # the DA model 90 | da = DA(x, n_visiable=784, n_hidden=500) 91 | # corruption level 92 | corruption_level = 0.0 93 | learning_rate = 0.1 94 | cost = da.get_cost(corruption_level) 95 | params = da.params 96 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, var_list=params) 97 | init = tf.global_variables_initializer() 98 | 99 | output_folder = "dA_plots" 100 | if not os.path.isdir(output_folder): 101 | os.makedirs(output_folder) 102 | os.chdir(output_folder) 103 | 104 | training_epochs = 10 105 | batch_size = 100 106 | display_step = 1 107 | print("Start training...") 108 | start_time = timeit.default_timer() 109 | with tf.Session() as sess: 110 | sess.run(init) 111 | for epoch in range(training_epochs): 112 | avg_cost = 0.0 113 | batch_num = int(mnist.train.num_examples / batch_size) 114 | for i in range(batch_num): 115 | x_batch, _ = mnist.train.next_batch(batch_size) 116 | # 训练 117 | sess.run(train_op, feed_dict={x: x_batch}) 118 | # 计算cost 119 | avg_cost += sess.run(cost, feed_dict={x: x_batch,}) / batch_num 120 | # 输出 121 | if epoch % display_step == 0: 122 | print("Epoch {0} cost: {1}".format(epoch, avg_cost)) 123 | 124 | end_time = timeit.default_timer() 125 | training_time = end_time - start_time 126 | print("Finished!") 127 | print(" The {0}%% corruption code ran for {1}.".format(corruption_level*100, training_time/60,)) 128 | W_value = sess.run(da.W_prime) 129 | image = Image.fromarray(tile_raster_images( 130 | X=W_value, 131 | img_shape=(28, 28), tile_shape=(10, 10), 132 | tile_spacing=(1, 1))) 133 | image.save('filters_corruption_{0}.png'.format(int(corruption_level*100))) 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /models/dbn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deep Belief Network 3 | author: Ye Hu 4 | 2016/12/20 5 | """ 6 | import timeit 7 | import numpy as np 8 | import tensorflow as tf 9 | import input_data 10 | from logisticRegression import LogisticRegression 11 | from mlp import HiddenLayer 12 | from rbm import RBM 13 | 14 | class DBN(object): 15 | """ 16 | An implement of deep belief network 17 | The hidden layers are firstly pretrained by RBM, then DBN is treated as a normal 18 | MLP by adding a output layer. 19 | """ 20 | def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=[500, 500]): 21 | """ 22 | :param n_in: int, the dimension of input 23 | :param n_out: int, the dimension of output 24 | :param hidden_layers_sizes: list or tuple, the hidden layer sizes 25 | """ 26 | # Number of layers 27 | assert len(hidden_layers_sizes) > 0 28 | self.n_layers = len(hidden_layers_sizes) 29 | self.layers = [] # normal sigmoid layer 30 | self.rbm_layers = [] # RBM layer 31 | self.params = [] # keep track of params for training 32 | 33 | # Define the input and output 34 | self.x = tf.placeholder(tf.float32, shape=[None, n_in]) 35 | self.y = tf.placeholder(tf.float32, shape=[None, n_out]) 36 | 37 | # Contruct the layers of DBN 38 | for i in range(self.n_layers): 39 | if i == 0: 40 | layer_input = self.x 41 | input_size = n_in 42 | else: 43 | layer_input = self.layers[i-1].output 44 | input_size = hidden_layers_sizes[i-1] 45 | # Sigmoid layer 46 | sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], 47 | activation=tf.nn.sigmoid) 48 | self.layers.append(sigmoid_layer) 49 | # Add the parameters for finetuning 50 | self.params.extend(sigmoid_layer.params) 51 | # Create the RBM layer 52 | self.rbm_layers.append(RBM(inpt=layer_input, n_visiable=input_size, n_hidden=hidden_layers_sizes[i], 53 | W=sigmoid_layer.W, hbias=sigmoid_layer.b)) 54 | # We use the LogisticRegression layer as the output layer 55 | self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], 56 | n_out=n_out) 57 | self.params.extend(self.output_layer.params) 58 | # The finetuning cost 59 | self.cost = self.output_layer.cost(self.y) 60 | # The accuracy 61 | self.accuracy = self.output_layer.accuarcy(self.y) 62 | 63 | def pretrain(self, sess, X_train, batch_size=50, pretraining_epochs=10, lr=0.1, k=1, 64 | display_step=1): 65 | """ 66 | Pretrain the layers (just train the RBM layers) 67 | :param sess: tf.Session 68 | :param X_train: the input of the train set (You might modidy this function if you do not use the desgined mnist) 69 | :param batch_size: int 70 | :param lr: float 71 | :param k: int, use CD-k 72 | :param pretraining_epoch: int 73 | :param display_step: int 74 | """ 75 | print('Starting pretraining...\n') 76 | start_time = timeit.default_timer() 77 | batch_num = int(X_train.train.num_examples / batch_size) 78 | # Pretrain layer by layer 79 | for i in range(self.n_layers): 80 | cost = self.rbm_layers[i].get_reconstruction_cost() 81 | train_ops = self.rbm_layers[i].get_train_ops(learning_rate=lr, k=k, persistent=None) 82 | for epoch in range(pretraining_epochs): 83 | avg_cost = 0.0 84 | for j in range(batch_num): 85 | x_batch, _ = X_train.train.next_batch(batch_size) 86 | # 训练 87 | sess.run(train_ops, feed_dict={self.x: x_batch}) 88 | # 计算cost 89 | avg_cost += sess.run(cost, feed_dict={self.x: x_batch,}) / batch_num 90 | # 输出 91 | if epoch % display_step == 0: 92 | print("\tPretraing layer {0} Epoch {1} cost: {2}".format(i, epoch, avg_cost)) 93 | 94 | end_time = timeit.default_timer() 95 | print("\nThe pretraining process ran for {0} minutes".format((end_time - start_time) / 60)) 96 | 97 | def finetuning(self, sess, trainSet, training_epochs=10, batch_size=100, lr=0.1, 98 | display_step=1): 99 | """ 100 | Finetuing the network 101 | """ 102 | print("\nStart finetuning...\n") 103 | start_time = timeit.default_timer() 104 | train_op = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize( 105 | self.cost, var_list=self.params) 106 | for epoch in range(training_epochs): 107 | avg_cost = 0.0 108 | batch_num = int(trainSet.train.num_examples / batch_size) 109 | for i in range(batch_num): 110 | x_batch, y_batch = trainSet.train.next_batch(batch_size) 111 | # 训练 112 | sess.run(train_op, feed_dict={self.x: x_batch, self.y: y_batch}) 113 | # 计算cost 114 | avg_cost += sess.run(self.cost, feed_dict= 115 | {self.x: x_batch, self.y: y_batch}) / batch_num 116 | # 输出 117 | if epoch % display_step == 0: 118 | val_acc = sess.run(self.accuracy, feed_dict={self.x: trainSet.validation.images, 119 | self.y: trainSet.validation.labels}) 120 | print("\tEpoch {0} cost: {1}, validation accuacy: {2}".format(epoch, avg_cost, val_acc)) 121 | 122 | end_time = timeit.default_timer() 123 | print("\nThe finetuning process ran for {0} minutes".format((end_time - start_time) / 60)) 124 | 125 | if __name__ == "__main__": 126 | # mnist examples 127 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 128 | dbn = DBN(n_in=784, n_out=10, hidden_layers_sizes=[500, 500, 500]) 129 | sess = tf.Session() 130 | init = tf.global_variables_initializer() 131 | sess.run(init) 132 | # set random_seed 133 | tf.set_random_seed(seed=1111) 134 | dbn.pretrain(sess, X_train=mnist) 135 | dbn.finetuning(sess, trainSet=mnist) -------------------------------------------------------------------------------- /models/gbrbm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Restricted Boltzmann Machines (RBM) 3 | author: Ye Hu 4 | 2016/12/18 5 | """ 6 | import os 7 | import timeit 8 | import numpy as np 9 | import tensorflow as tf 10 | from PIL import Image 11 | from utils import tile_raster_images 12 | import input_data 13 | from rbm import RBM 14 | 15 | 16 | class GBRBM(RBM): 17 | """ 18 | Gaussian-binary Restricted Boltzmann Machines 19 | Note we assume that the standard deviation is a constant (not training parameter) 20 | You better normalize you data with range of [0, 1.0]. 21 | """ 22 | def __init__(self, inpt=None, n_visiable=784, n_hidden=500, sigma=1.0, W=None, 23 | hbias=None, vbias=None, sample_visible=True): 24 | """ 25 | :param inpt: Tensor, the input tensor [None, n_visiable] 26 | :param n_visiable: int, number of visiable units 27 | :param n_hidden: int, number of hidden units 28 | :param sigma: float, the standard deviation (note we use the same σ for all visible units) 29 | :param W, hbias, vbias: Tensor, the parameters of RBM (tf.Variable) 30 | :param sample_visble: bool, if True, do gaussian sampling. 31 | """ 32 | super(GBRBM, self).__init__(inpt, n_visiable, n_hidden, W, hbias, vbias) 33 | self.sigma = sigma 34 | self.sample_visible = sample_visible 35 | 36 | @staticmethod 37 | def sample_gaussian(x, sigma): 38 | return x + tf.random_normal(tf.shape(x), mean=0.0, stddev=sigma) 39 | 40 | def propdown(self, h): 41 | """Compute the mean for visible units given hidden units""" 42 | return tf.matmul(h, tf.transpose(self.W)) + self.vbias 43 | 44 | def sample_v_given_h(self, h0_sample): 45 | """Sampling the visiable units given hidden sample""" 46 | v1_mean = self.propdown(h0_sample) 47 | v1_sample = v1_mean 48 | if self.sample_visible: 49 | v1_sample = GBRBM.sample_gaussian(v1_mean, self.sigma) 50 | return (v1_mean, v1_sample) 51 | 52 | def propup(self, v): 53 | """Compute the sigmoid activation for hidden units given visible units""" 54 | return tf.nn.sigmoid(tf.matmul(v, self.W) / self.sigma**2 + self.hbias) 55 | 56 | def free_energy(self, v_sample): 57 | """Compute the free energy""" 58 | wx_b = tf.matmul(v_sample, self.W) / self.sigma**2 + self.hbias 59 | vbias_term = tf.reduce_sum(0.5 * tf.square(v_sample - self.vbias) / self.sigma**2, axis=1) 60 | hidden_term = tf.reduce_sum(tf.log(1.0 + tf.exp(wx_b)), axis=1) 61 | return -hidden_term + vbias_term 62 | 63 | def get_reconstruction_cost(self): 64 | """Compute the mse of the original input and the reconstruction""" 65 | activation_h = self.propup(self.input) 66 | activation_v = self.propdown(activation_h) 67 | mse = tf.reduce_mean(tf.reduce_sum(tf.square(self.input - activation_v), axis=1)) 68 | return mse 69 | 70 | 71 | 72 | if __name__ == "__main__": 73 | # mnist examples 74 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 75 | # define input 76 | x = tf.placeholder(tf.float32, shape=[None, 784]) 77 | # set random_seed 78 | tf.set_random_seed(seed=99999) 79 | np.random.seed(123) 80 | # the rbm model 81 | n_visiable, n_hidden = 784, 500 82 | rbm = GBRBM(x, n_visiable=n_visiable, n_hidden=n_hidden) 83 | 84 | learning_rate = 0.01 85 | batch_size = 50 86 | cost = rbm.get_reconstruction_cost() 87 | # Create the persistent variable 88 | #persistent_chain = tf.Variable(tf.zeros([batch_size, n_hidden]), dtype=tf.float32) 89 | persistent_chain = None 90 | train_ops = rbm.get_train_ops(learning_rate=learning_rate, k=1, persistent=persistent_chain) 91 | init = tf.global_variables_initializer() 92 | 93 | output_folder = "rbm_plots" 94 | if not os.path.isdir(output_folder): 95 | os.makedirs(output_folder) 96 | os.chdir(output_folder) 97 | 98 | training_epochs = 15 99 | display_step = 1 100 | print("Start training...") 101 | 102 | with tf.Session() as sess: 103 | start_time = timeit.default_timer() 104 | sess.run(init) 105 | for epoch in range(training_epochs): 106 | avg_cost = 0.0 107 | batch_num = int(mnist.train.num_examples / batch_size) 108 | for i in range(batch_num): 109 | x_batch, _ = mnist.train.next_batch(batch_size) 110 | # 训练 111 | sess.run(train_ops, feed_dict={x: x_batch}) 112 | # 计算cost 113 | avg_cost += sess.run(cost, feed_dict={x: x_batch,}) / batch_num 114 | # 输出 115 | if epoch % display_step == 0: 116 | print("Epoch {0} cost: {1}".format(epoch, avg_cost)) 117 | # Construct image from the weight matrix 118 | image = Image.fromarray( 119 | tile_raster_images( 120 | X=sess.run(tf.transpose(rbm.W)), 121 | img_shape=(28, 28), 122 | tile_shape=(10, 10), 123 | tile_spacing=(1, 1))) 124 | image.save("test_filters_at_epoch_{0}.png".format(epoch)) 125 | 126 | end_time = timeit.default_timer() 127 | training_time = end_time - start_time 128 | print("Finished!") 129 | print(" The training ran for {0} minutes.".format(training_time/60,)) 130 | 131 | # Randomly select the 'n_chains' examples 132 | n_chains = 20 133 | n_batch = 10 134 | n_samples = n_batch*2 135 | number_test_examples = mnist.test.num_examples 136 | test_indexs = np.random.randint(number_test_examples - n_chains*n_batch) 137 | test_samples = mnist.test.images[test_indexs:test_indexs+n_chains*n_batch] 138 | image_data = np.zeros((29*(n_samples+1)+1, 29*(n_chains)-1), 139 | dtype="uint8") 140 | # Add the original images 141 | for i in range(n_batch): 142 | image_data[2*i*29:2*i*29+28,:] = tile_raster_images(X=test_samples[i*n_batch:(i+1)*n_chains], 143 | img_shape=(28, 28), 144 | tile_shape=(1, n_chains), 145 | tile_spacing=(1, 1)) 146 | samples = sess.run(rbm.reconstruct(x), feed_dict={x:test_samples[i*n_batch:(i+1)*n_chains]}) 147 | image_data[(2*i+1)*29:(2*i+1)*29+28,:] = tile_raster_images(X=samples, 148 | img_shape=(28, 28), 149 | tile_shape=(1, n_chains), 150 | tile_spacing=(1, 1)) 151 | 152 | image = Image.fromarray(image_data) 153 | image.save("original_and_reconstruct.png") 154 | 155 | 156 | -------------------------------------------------------------------------------- /models/input_data.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # Fichero descargado de https://tensorflow.googlesource.com/tensorflow/+/master/tensorflow/g3doc/tutorials/mnist/input_data.py 3 | # --- 4 | # Copyright 2015 Google Inc. All Rights Reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | # ============================================================================== 18 | """Functions for downloading and reading MNIST data.""" 19 | from __future__ import absolute_import 20 | from __future__ import division 21 | from __future__ import print_function 22 | import gzip 23 | import os 24 | import numpy 25 | from six.moves import urllib 26 | from six.moves import xrange # pylint: disable=redefined-builtin 27 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 28 | def maybe_download(filename, work_directory): 29 | """Download the data from Yann's website, unless it's already here.""" 30 | if not os.path.exists(work_directory): 31 | os.mkdir(work_directory) 32 | filepath = os.path.join(work_directory, filename) 33 | if not os.path.exists(filepath): 34 | filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath) 35 | statinfo = os.stat(filepath) 36 | print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') 37 | return filepath 38 | def _read32(bytestream): 39 | dt = numpy.dtype(numpy.uint32).newbyteorder('>') 40 | return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] 41 | def extract_images(filename): 42 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" 43 | print('Extracting', filename) 44 | with gzip.open(filename) as bytestream: 45 | magic = _read32(bytestream) 46 | if magic != 2051: 47 | raise ValueError( 48 | 'Invalid magic number %d in MNIST image file: %s' % 49 | (magic, filename)) 50 | num_images = _read32(bytestream) 51 | rows = _read32(bytestream) 52 | cols = _read32(bytestream) 53 | buf = bytestream.read(rows * cols * num_images) 54 | data = numpy.frombuffer(buf, dtype=numpy.uint8) 55 | data = data.reshape(num_images, rows, cols, 1) 56 | return data 57 | def dense_to_one_hot(labels_dense, num_classes=10): 58 | """Convert class labels from scalars to one-hot vectors.""" 59 | num_labels = labels_dense.shape[0] 60 | index_offset = numpy.arange(num_labels) * num_classes 61 | labels_one_hot = numpy.zeros((num_labels, num_classes)) 62 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 63 | return labels_one_hot 64 | def extract_labels(filename, one_hot=False): 65 | """Extract the labels into a 1D uint8 numpy array [index].""" 66 | print('Extracting', filename) 67 | with gzip.open(filename) as bytestream: 68 | magic = _read32(bytestream) 69 | if magic != 2049: 70 | raise ValueError( 71 | 'Invalid magic number %d in MNIST label file: %s' % 72 | (magic, filename)) 73 | num_items = _read32(bytestream) 74 | buf = bytestream.read(num_items) 75 | labels = numpy.frombuffer(buf, dtype=numpy.uint8) 76 | if one_hot: 77 | return dense_to_one_hot(labels) 78 | return labels 79 | class DataSet(object): 80 | def __init__(self, images, labels, fake_data=False): 81 | if fake_data: 82 | self._num_examples = 10000 83 | else: 84 | assert images.shape[0] == labels.shape[0], ( 85 | "images.shape: %s labels.shape: %s" % (images.shape, 86 | labels.shape)) 87 | self._num_examples = images.shape[0] 88 | # Convert shape from [num examples, rows, columns, depth] 89 | # to [num examples, rows*columns] (assuming depth == 1) 90 | assert images.shape[3] == 1 91 | images = images.reshape(images.shape[0], 92 | images.shape[1] * images.shape[2]) 93 | # Convert from [0, 255] -> [0.0, 1.0]. 94 | images = images.astype(numpy.float32) 95 | images = numpy.multiply(images, 1.0 / 255.0) 96 | self._images = images 97 | self._labels = labels 98 | self._epochs_completed = 0 99 | self._index_in_epoch = 0 100 | @property 101 | def images(self): 102 | return self._images 103 | @property 104 | def labels(self): 105 | return self._labels 106 | @property 107 | def num_examples(self): 108 | return self._num_examples 109 | @property 110 | def epochs_completed(self): 111 | return self._epochs_completed 112 | def next_batch(self, batch_size, fake_data=False): 113 | """Return the next `batch_size` examples from this data set.""" 114 | if fake_data: 115 | fake_image = [1.0 for _ in xrange(784)] 116 | fake_label = 0 117 | return [fake_image for _ in xrange(batch_size)], [ 118 | fake_label for _ in xrange(batch_size)] 119 | start = self._index_in_epoch 120 | self._index_in_epoch += batch_size 121 | if self._index_in_epoch > self._num_examples: 122 | # Finished epoch 123 | self._epochs_completed += 1 124 | # Shuffle the data 125 | perm = numpy.arange(self._num_examples) 126 | numpy.random.shuffle(perm) 127 | self._images = self._images[perm] 128 | self._labels = self._labels[perm] 129 | # Start next epoch 130 | start = 0 131 | self._index_in_epoch = batch_size 132 | assert batch_size <= self._num_examples 133 | end = self._index_in_epoch 134 | return self._images[start:end], self._labels[start:end] 135 | def read_data_sets(train_dir, fake_data=False, one_hot=False): 136 | class DataSets(object): 137 | pass 138 | data_sets = DataSets() 139 | if fake_data: 140 | data_sets.train = DataSet([], [], fake_data=True) 141 | data_sets.validation = DataSet([], [], fake_data=True) 142 | data_sets.test = DataSet([], [], fake_data=True) 143 | return data_sets 144 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 145 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 146 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 147 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 148 | VALIDATION_SIZE = 5000 149 | local_file = maybe_download(TRAIN_IMAGES, train_dir) 150 | train_images = extract_images(local_file) 151 | local_file = maybe_download(TRAIN_LABELS, train_dir) 152 | train_labels = extract_labels(local_file, one_hot=one_hot) 153 | local_file = maybe_download(TEST_IMAGES, train_dir) 154 | test_images = extract_images(local_file) 155 | local_file = maybe_download(TEST_LABELS, train_dir) 156 | test_labels = extract_labels(local_file, one_hot=one_hot) 157 | validation_images = train_images[:VALIDATION_SIZE] 158 | validation_labels = train_labels[:VALIDATION_SIZE] 159 | train_images = train_images[VALIDATION_SIZE:] 160 | train_labels = train_labels[VALIDATION_SIZE:] 161 | data_sets.train = DataSet(train_images, train_labels) 162 | data_sets.validation = DataSet(validation_images, validation_labels) 163 | data_sets.test = DataSet(test_images, test_labels) 164 | return data_sets 165 | -------------------------------------------------------------------------------- /models/logisticRegression.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logistic Regression 3 | author: Ye Hu 4 | 2016/12/14 update 2017/02/16 5 | """ 6 | import numpy as np 7 | import tensorflow as tf 8 | import input_data 9 | 10 | class LogisticRegression(object): 11 | """Multi-class logistic regression class""" 12 | def __init__(self, inpt, n_in, n_out): 13 | """ 14 | inpt: tf.Tensor, (one minibatch) [None, n_in] 15 | n_in: int, number of input units 16 | n_out: int, number of output units 17 | """ 18 | # weight 19 | self.W = tf.Variable(tf.zeros([n_in, n_out], dtype=tf.float32)) 20 | # bias 21 | self.b = tf.Variable(tf.zeros([n_out,]), dtype=tf.float32) 22 | # activation output 23 | self.output = tf.nn.softmax(tf.matmul(inpt, self.W) + self.b) 24 | # prediction 25 | self.y_pred = tf.argmax(self.output, axis=1) 26 | # keep track of variables 27 | self.params = [self.W, self.b] 28 | 29 | def cost(self, y): 30 | """ 31 | y: tf.Tensor, the target of the input 32 | """ 33 | # cross_entropy 34 | return -tf.reduce_mean(tf.reduce_sum(y * tf.log(self.output), axis=1)) 35 | 36 | def accuarcy(self, y): 37 | """errors""" 38 | correct_pred = tf.equal(self.y_pred, tf.argmax(y, axis=1)) 39 | return tf.reduce_mean(tf.cast(correct_pred, tf.float32)) 40 | 41 | 42 | 43 | 44 | if __name__ == "__main__": 45 | # Load mnist dataset 46 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 47 | # Define placeholder for input and target 48 | x = tf.placeholder(tf.float32, shape=[None, 784]) 49 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 50 | 51 | # Construct model 52 | classifier = LogisticRegression(x, n_in=784, n_out=10) 53 | cost = classifier.cost(y_) 54 | accuracy = classifier.accuarcy(y_) 55 | predictor = classifier.y_pred 56 | # Define the train operation 57 | train_op = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize( 58 | cost, var_list=classifier.params) 59 | 60 | # Initialize all variables 61 | init = tf.global_variables_initializer() 62 | 63 | # Training settings 64 | training_epochs = 50 65 | batch_size = 100 66 | display_step = 5 67 | 68 | # Train loop 69 | print("Start to train...") 70 | with tf.Session() as sess: 71 | sess.run(init) 72 | for epoch in range(training_epochs): 73 | avg_cost = 0.0 74 | batch_num = int(mnist.train.num_examples/batch_size) 75 | for i in range(batch_num): 76 | x_batch, y_batch = mnist.train.next_batch(batch_size) 77 | # Run train op 78 | c, _ = sess.run([cost, train_op], feed_dict={x: x_batch, y_: y_batch}) 79 | # Sum up cost 80 | avg_cost += c/batch_num 81 | 82 | if epoch % display_step == 0: 83 | val_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images, 84 | y_: mnist.validation.labels}) 85 | print("Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch, 86 | avg_cost, val_acc)) 87 | 88 | print("Finished!") 89 | test_x = mnist.test.images[:10] 90 | test_y = mnist.test.labels[:10] 91 | print("Ture lables:") 92 | print(" ", np.argmax(test_y, 1)) 93 | print("Prediction:") 94 | print(" ", sess.run(predictor, feed_dict={x: test_x})) 95 | 96 | -------------------------------------------------------------------------------- /models/mlp.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multi-Layer Perceptron Class 3 | author: Ye Hu 4 | 2016/12/15 5 | """ 6 | import numpy as np 7 | import tensorflow as tf 8 | import input_data 9 | from logisticRegression import LogisticRegression 10 | 11 | class HiddenLayer(object): 12 | """Typical hidden layer of MLP""" 13 | def __init__(self, inpt, n_in, n_out, W=None, b=None, 14 | activation=tf.nn.sigmoid): 15 | """ 16 | inpt: tf.Tensor, shape [n_examples, n_in] 17 | n_in: int, the dimensionality of input 18 | n_out: int, number of hidden units 19 | W, b: tf.Tensor, weight and bias 20 | activation: tf.op, activation function 21 | """ 22 | if W is None: 23 | bound_val = 4.0*np.sqrt(6.0/(n_in + n_out)) 24 | W = tf.Variable(tf.random_uniform([n_in, n_out], minval=-bound_val, maxval=bound_val), 25 | dtype=tf.float32, name="W") 26 | if b is None: 27 | b = tf.Variable(tf.zeros([n_out,]), dtype=tf.float32, name="b") 28 | 29 | self.W = W 30 | self.b = b 31 | # the output 32 | sum_W = tf.matmul(inpt, self.W) + self.b 33 | self.output = activation(sum_W) if activation is not None else sum_W 34 | # params 35 | self.params = [self.W, self.b] 36 | 37 | 38 | class MLP(object): 39 | """Multi-layer perceptron class""" 40 | def __init__(self, inpt, n_in, n_hidden, n_out): 41 | """ 42 | inpt: tf.Tensor, shape [n_examples, n_in] 43 | n_in: int, the dimensionality of input 44 | n_hidden: int, number of hidden units 45 | n_out: int, number of output units 46 | """ 47 | # hidden layer 48 | self.hiddenLayer = HiddenLayer(inpt, n_in=n_in, n_out=n_hidden) 49 | # output layer (logistic layer) 50 | self.outputLayer = LogisticRegression(self.hiddenLayer.output, n_in=n_hidden, 51 | n_out=n_out) 52 | # L1 norm 53 | self.L1 = tf.reduce_sum(tf.abs(self.hiddenLayer.W)) + \ 54 | tf.reduce_sum(tf.abs(self.outputLayer.W)) 55 | # L2 norm 56 | self.L2 = tf.reduce_sum(tf.square(self.hiddenLayer.W)) + \ 57 | tf.reduce_sum(tf.square(self.outputLayer.W)) 58 | # cross_entropy cost function 59 | self.cost = self.outputLayer.cost 60 | # accuracy function 61 | self.accuracy = self.outputLayer.accuarcy 62 | 63 | # params 64 | self.params = self.hiddenLayer.params + self.outputLayer.params 65 | # keep track of input 66 | self.input = inpt 67 | 68 | 69 | if __name__ == "__main__": 70 | # mnist examples 71 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 72 | # define input and output placehoders 73 | x = tf.placeholder(tf.float32, shape=[None, 784]) 74 | y_ = tf.placeholder(tf.float32, shape=[None, 10]) 75 | # create mlp model 76 | mlp_classifier = MLP(inpt=x, n_in=784, n_hidden=500, n_out=10) 77 | # get cost 78 | l2_reg = 0.0001 79 | cost = mlp_classifier.cost(y_) + l2_reg*mlp_classifier.L2 80 | # accuracy 81 | accuracy = mlp_classifier.accuracy(y_) 82 | predictor = mlp_classifier.outputLayer.y_pred 83 | # 定义训练器 84 | train_op = tf.train.AdamOptimizer(learning_rate=0.01).minimize( 85 | cost, var_list=mlp_classifier.params) 86 | 87 | # 初始化所有变量 88 | init = tf.global_variables_initializer() 89 | 90 | # 定义训练参数 91 | training_epochs = 10 92 | batch_size = 100 93 | display_step = 1 94 | 95 | # 开始训练 96 | print("Start to train...") 97 | with tf.Session() as sess: 98 | sess.run(init) 99 | for epoch in range(training_epochs): 100 | avg_cost = 0.0 101 | batch_num = int(mnist.train.num_examples / batch_size) 102 | for i in range(batch_num): 103 | x_batch, y_batch = mnist.train.next_batch(batch_size) 104 | # 训练 105 | sess.run(train_op, feed_dict={x: x_batch, y_: y_batch}) 106 | # 计算cost 107 | avg_cost += sess.run(cost, feed_dict={x: x_batch, y_: y_batch}) / batch_num 108 | # 输出 109 | if epoch % display_step == 0: 110 | val_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images, 111 | y_: mnist.validation.labels}) 112 | print("Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch, 113 | avg_cost, val_acc)) 114 | 115 | print("Finished!") 116 | test_x = mnist.test.images[:10] 117 | test_y = mnist.test.labels[:10] 118 | print("Ture lables:") 119 | print(" ", np.argmax(test_y, 1)) 120 | print("Prediction:") 121 | print(" ", sess.run(predictor, feed_dict={x: test_x})) 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /models/sda.py: -------------------------------------------------------------------------------- 1 | """ 2 | Stacked Denoising Autoencoders (SDA) 3 | author: Ye Hu 4 | 2016/12/16 5 | """ 6 | import timeit 7 | import numpy as np 8 | import tensorflow as tf 9 | import input_data 10 | 11 | from logisticRegression import LogisticRegression 12 | from mlp import HiddenLayer 13 | from da import DA 14 | 15 | class SdA(object): 16 | """ 17 | Stacked denoising autoencoder class 18 | the model is constructed by stacking several dAs 19 | the dA layers are used to initialize the network, after pre-training, 20 | the SdA is similar to a normal MLP 21 | """ 22 | def __init__(self, n_in=784, n_out=10, hidden_layers_sizes=(500, 500), 23 | corruption_levels=(0.1, 0.1)): 24 | """ 25 | :param n_in: int, the dimension of input 26 | :param n_out: int, the dimension of output 27 | :param hidden_layers_sizes: list or tuple, the hidden layer sizes 28 | :param corruption_levels: list or tuple, the corruption lever for each layer 29 | """ 30 | assert len(hidden_layers_sizes) >= 1 31 | assert len(hidden_layers_sizes) == len(corruption_levels) 32 | self.corruption_levels = corruption_levels 33 | self.n_layers = len(hidden_layers_sizes) 34 | # define the layers 35 | self.layers = [] # the normal layers 36 | self.dA_layers = [] # the dA layers 37 | self.params = [] # params 38 | # define the input and output 39 | self.x = tf.placeholder(tf.float32, shape=[None, n_in]) 40 | self.y = tf.placeholder(tf.float32, shape=[None, n_out]) 41 | # construct the layers 42 | for i in range(self.n_layers): 43 | if i == 0: # the input layer 44 | input_size = n_in 45 | layer_input = self.x 46 | else: 47 | input_size = hidden_layers_sizes[i-1] 48 | layer_input = self.layers[i-1].output 49 | # create the sigmoid layer 50 | sigmoid_layer = HiddenLayer(inpt=layer_input, n_in=input_size, 51 | n_out=hidden_layers_sizes[i], activation=tf.nn.sigmoid) 52 | self.layers.append(sigmoid_layer) 53 | # create the da layer 54 | dA_layer = DA(inpt=layer_input, n_hidden=hidden_layers_sizes[i], n_visiable=input_size, 55 | W=sigmoid_layer.W, bhid=sigmoid_layer.b) 56 | self.dA_layers.append(dA_layer) 57 | 58 | # collect the params 59 | self.params.extend(sigmoid_layer.params) 60 | 61 | # add the output layer 62 | self.output_layer = LogisticRegression(inpt=self.layers[-1].output, n_in=hidden_layers_sizes[-1], 63 | n_out=n_out) 64 | self.params.extend(self.output_layer.params) 65 | 66 | # the finetuning cost 67 | self.finetune_cost = self.output_layer.cost(self.y) 68 | # the accuracy 69 | self.accuracy = self.output_layer.accuarcy(self.y) 70 | 71 | def pretrain(self, sess, X_train, pretraining_epochs=10, batch_size=100, learning_rate=0.001, 72 | display_step=1): 73 | """ 74 | Pretrain the layers 75 | :param sess: tf.Session 76 | :param X_train: the input of the train set 77 | :param batch_size: int 78 | :param learning_rate: float 79 | """ 80 | print('Starting pretraining...') 81 | start_time = timeit.default_timer() 82 | batch_num = int(X_train.train.num_examples / batch_size) 83 | for i in range(self.n_layers): 84 | # pretraining layer by layer 85 | cost = self.dA_layers[i].get_cost(corruption_level=self.corruption_levels[i]) 86 | params = self.dA_layers[i].params 87 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, var_list=params) 88 | for epoch in range(pretraining_epochs): 89 | avg_cost = 0.0 90 | for j in range(batch_num): 91 | x_batch, _ = X_train.train.next_batch(batch_size) 92 | # 训练 93 | sess.run(train_op, feed_dict={self.x: x_batch}) 94 | # 计算cost 95 | avg_cost += sess.run(cost, feed_dict={self.x: x_batch,}) / batch_num 96 | # 输出 97 | if epoch % display_step == 0: 98 | print("Pretraing layer {0} Epoch {1} cost: {2}".format(i, epoch, avg_cost)) 99 | 100 | end_time = timeit.default_timer() 101 | print("The pretraining process ran for {0}m".format((end_time - start_time) / 60)) 102 | 103 | def finetuning(self, sess, trainSet, training_epochs=10, batch_size=100, learning_rate=0.1, 104 | display_step=1): 105 | """Finetuing the network""" 106 | print("Start finetuning...") 107 | start_time = timeit.default_timer() 108 | train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( 109 | self.finetune_cost, var_list=self.params) 110 | for epoch in range(training_epochs): 111 | avg_cost = 0.0 112 | batch_num = int(trainSet.train.num_examples / batch_size) 113 | for i in range(batch_num): 114 | x_batch, y_batch = trainSet.train.next_batch(batch_size) 115 | # 训练 116 | sess.run(train_op, feed_dict={self.x: x_batch, self.y: y_batch}) 117 | # 计算cost 118 | avg_cost += sess.run(self.finetune_cost, feed_dict= 119 | {self.x: x_batch, self.y: y_batch}) / batch_num 120 | # 输出 121 | if epoch % display_step == 0: 122 | val_acc = sess.run(self.accuracy, feed_dict={self.x: trainSet.validation.images, 123 | self.y: trainSet.validation.labels}) 124 | print(" Epoch {0} cost: {1}, validation accuacy: {2}".format(epoch, avg_cost, val_acc)) 125 | 126 | end_time = timeit.default_timer() 127 | print("The finetuning process ran for {0}m".format((end_time - start_time) / 60)) 128 | 129 | 130 | if __name__ == "__main__": 131 | # mnist examples 132 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 133 | sda = SdA(n_in=784, n_out=10, hidden_layers_sizes=[500, 500, 500], corruption_levels=[0.1, 0.2, 0.2]) 134 | sess = tf.Session() 135 | init = tf.global_variables_initializer() 136 | sess.run(init) 137 | # set random_seed 138 | tf.set_random_seed(seed=1111) 139 | sda.pretrain(sess, X_train=mnist) 140 | sda.finetuning(sess, trainSet=mnist) 141 | 142 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | """ 8 | 9 | import numpy 10 | 11 | 12 | def scale_to_unit_interval(ndar, eps=1e-8): 13 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 14 | ndar = ndar.copy() 15 | ndar -= ndar.min() 16 | ndar *= 1.0 / (ndar.max() + eps) 17 | return ndar 18 | 19 | 20 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 21 | scale_rows_to_unit_interval=True, 22 | output_pixel_vals=True): 23 | """ 24 | Transform an array with one flattened image per row, into an array in 25 | which images are reshaped and layed out like tiles on a floor. 26 | 27 | This function is useful for visualizing datasets whose rows are images, 28 | and also columns of matrices for transforming those rows 29 | (such as the first layer of a neural net). 30 | 31 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 32 | be 2-D ndarrays or None; 33 | :param X: a 2-D array in which every row is a flattened image. 34 | 35 | :type img_shape: tuple; (height, width) 36 | :param img_shape: the original shape of each image 37 | 38 | :type tile_shape: tuple; (rows, cols) 39 | :param tile_shape: the number of images to tile (rows, cols) 40 | 41 | :param output_pixel_vals: if output should be pixel values (i.e. int8 42 | values) or floats 43 | 44 | :param scale_rows_to_unit_interval: if the values need to be scaled before 45 | being plotted to [0,1] or not 46 | 47 | 48 | :returns: array suitable for viewing as an image. 49 | (See:`Image.fromarray`.) 50 | :rtype: a 2-d array with same dtype as X. 51 | 52 | """ 53 | 54 | assert len(img_shape) == 2 55 | assert len(tile_shape) == 2 56 | assert len(tile_spacing) == 2 57 | 58 | # The expression below can be re-written in a more C style as 59 | # follows : 60 | # 61 | # out_shape = [0,0] 62 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 63 | # tile_spacing[0] 64 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 65 | # tile_spacing[1] 66 | out_shape = [ 67 | (ishp + tsp) * tshp - tsp 68 | for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing) 69 | ] 70 | 71 | if isinstance(X, tuple): 72 | assert len(X) == 4 73 | # Create an output numpy ndarray to store the image 74 | if output_pixel_vals: 75 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 76 | dtype='uint8') 77 | else: 78 | out_array = numpy.zeros((out_shape[0], out_shape[1], 4), 79 | dtype=X.dtype) 80 | 81 | #colors default to 0, alpha defaults to 1 (opaque) 82 | if output_pixel_vals: 83 | channel_defaults = [0, 0, 0, 255] 84 | else: 85 | channel_defaults = [0., 0., 0., 1.] 86 | 87 | for i in range(4): 88 | if X[i] is None: 89 | # if channel is None, fill it with zeros of the correct 90 | # dtype 91 | dt = out_array.dtype 92 | if output_pixel_vals: 93 | dt = 'uint8' 94 | out_array[:, :, i] = numpy.zeros( 95 | out_shape, 96 | dtype=dt 97 | ) + channel_defaults[i] 98 | else: 99 | # use a recurrent call to compute the channel and store it 100 | # in the output 101 | out_array[:, :, i] = tile_raster_images( 102 | X[i], img_shape, tile_shape, tile_spacing, 103 | scale_rows_to_unit_interval, output_pixel_vals) 104 | return out_array 105 | 106 | else: 107 | # if we are dealing with only one channel 108 | H, W = img_shape 109 | Hs, Ws = tile_spacing 110 | 111 | # generate a matrix to store the output 112 | dt = X.dtype 113 | if output_pixel_vals: 114 | dt = 'uint8' 115 | out_array = numpy.zeros(out_shape, dtype=dt) 116 | 117 | for tile_row in range(tile_shape[0]): 118 | for tile_col in range(tile_shape[1]): 119 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 120 | this_x = X[tile_row * tile_shape[1] + tile_col] 121 | if scale_rows_to_unit_interval: 122 | # if we should scale values to be between 0 and 1 123 | # do this by calling the `scale_to_unit_interval` 124 | # function 125 | this_img = scale_to_unit_interval( 126 | this_x.reshape(img_shape)) 127 | else: 128 | this_img = this_x.reshape(img_shape) 129 | # add the slice to the corresponding position in the 130 | # output array 131 | c = 1 132 | if output_pixel_vals: 133 | c = 255 134 | out_array[ 135 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 136 | tile_col * (W + Ws): tile_col * (W + Ws) + W 137 | ] = this_img * c 138 | return out_array -------------------------------------------------------------------------------- /results/10filters_at_epoch_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_0.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_1.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_10.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_11.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_12.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_13.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_14.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_2.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_3.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_4.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_5.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_6.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_7.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_8.png -------------------------------------------------------------------------------- /results/10filters_at_epoch_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10filters_at_epoch_9.png -------------------------------------------------------------------------------- /results/10original_and_10samples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/10original_and_10samples.png -------------------------------------------------------------------------------- /results/654362565405877642.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/654362565405877642.jpg -------------------------------------------------------------------------------- /results/DBN_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/DBN_results.png -------------------------------------------------------------------------------- /results/filters_corruption_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/filters_corruption_0.png -------------------------------------------------------------------------------- /results/filters_corruption_30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/filters_corruption_30.png -------------------------------------------------------------------------------- /results/new_filters_at_epoch_14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/new_filters_at_epoch_14.png -------------------------------------------------------------------------------- /results/new_original_and_10samples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/new_original_and_10samples.png -------------------------------------------------------------------------------- /results/rnn_language_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/rnn_language_model.png -------------------------------------------------------------------------------- /results/weichat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiaohu2015/DeepLearning_tutorials/9804687d65db6ea1dd31ef716af0b8af824e29bd/results/weichat.jpg --------------------------------------------------------------------------------