├── CapsuleNet.py ├── README.md └── randmnist.mat /CapsuleNet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib as contrib 3 | import numpy as np 4 | import scipy.io as sci 5 | 6 | class CapsuleNet_DynamicRouting(): 7 | 8 | def __init__(self, batchsize, nums_outputs, vec_len,iter, data): 9 | self.batchsize = batchsize#Set the batchsize, this code is 50 10 | self.nums_outputs = nums_outputs#The number of outputs 11 | self.vec_len = vec_len#The length of one Vector in PrimaryCaps 12 | self.r = iter#The iteration number of Dynamic Routing 13 | #data is '.mat' 14 | self.traindata = data["traindata"]/255.0 15 | self.trainlabel = data["trainlabel"] 16 | self.testdata = data["testdata"]/255.0 17 | self.testlabel = data["testlabel"] 18 | self.X = tf.placeholder(dtype=tf.float32, shape=[batchsize, 784], name="X") 19 | self.Y = tf.placeholder(dtype=tf.float32, shape=[batchsize, 10], name="Lable") 20 | self.sess = tf.InteractiveSession() 21 | pass 22 | 23 | def squash(self, s_j): 24 | #The activation 25 | scale = tf.reduce_sum(tf.square(s_j), axis=2, keep_dims=True) / (1 + tf.reduce_sum(tf.square(s_j), axis=2, keep_dims=True)) 26 | Unit_s_j = s_j / (tf.sqrt(tf.reduce_sum(tf.square(s_j), axis=2, keep_dims=True)) + 1e-10) 27 | return scale * Unit_s_j 28 | 29 | def ROUTING(self, u_hat): 30 | #The function of Dynamic Routing 31 | u_hat = tf.stop_gradient(u_hat) 32 | u_hat = tf.squeeze(u_hat, axis=-2) 33 | b_ij = tf.zeros([1152, 10]) 34 | for r in range(self.r): 35 | c_ij = tf.nn.softmax(b_ij, dim=-1)#c_ij:[1152, 10] 36 | c_ij = tf.reshape(c_ij, [1, 1152, 10, 1])#c_ij:[50, 1152, 10, 1] 37 | c_ij = tf.tile(c_ij, [self.batchsize, 1, 1, 1]) 38 | s_j = tf.reduce_sum(c_ij * u_hat, axis=1)#s_j:[50, 10, 16] 39 | s_j = tf.reshape(s_j, [self.batchsize, self.nums_outputs, 16, 1])#s_j:[50, 10, 16, 1] 40 | v_j = self.squash(s_j)#v_j:[50, 10, 16, 1] 41 | b_ij = b_ij + tf.squeeze(tf.matmul(tf.transpose(tf.tile(tf.reshape(v_j, [self.batchsize, 1, 10, 16, 1]), [1, 1152, 1, 1, 1]), [0, 1, 2, 4, 3]), 42 | tf.reshape(u_hat, [self.batchsize, 1152, 10, 16, 1]))) 43 | b_ij = b_ij[0, :, :] 44 | return tf.squeeze(v_j)#v_j:[50, 10, 16] 45 | 46 | def CapsuleLayer(self, u_i): 47 | W = tf.get_variable(name="Capsule_Weight", shape=[1, np.size(u_i, 1), self.nums_outputs, 8, 16], dtype=tf.float32, 48 | initializer=contrib.layers.xavier_initializer()) 49 | W_tile = tf.tile(W, [self.batchsize, 1, 1, 1, 1]) 50 | u_i = tf.reshape(u_i, [self.batchsize, 1152, 1, 8, 1]) 51 | u_i = tf.tile(u_i, [1, 1, 10, 1, 1])#keep the dim same with the W 52 | u_i = tf.transpose(u_i, [0, 1, 2, 4, 3]) 53 | u_hat = tf.matmul(u_i, W_tile) 54 | del W_tile 55 | v_j = self.ROUTING(u_hat) 56 | return v_j 57 | 58 | def Loss(self, v_k, m_plus=0.9, m_min=0.1, lambd=0.5, scale_rec=0.0005): 59 | abs_vk = tf.sqrt(tf.reduce_sum(tf.square(v_k), axis=-1)) 60 | L_k = self.Y * tf.square(tf.maximum(0., m_plus - abs_vk)) + lambd * (1 - self.Y) * tf.square(tf.maximum(0., abs_vk - m_min)) 61 | loss1 = tf.reduce_sum(L_k) 62 | loss2 = self.Reconstruct(v_k) 63 | return loss1 + loss2 * scale_rec 64 | 65 | 66 | def CapsuleNet(self): 67 | images = tf.reshape(self.X, shape=[self.batchsize, 28, 28, 1]) 68 | with tf.variable_scope("Conv1"): 69 | conv1 = contrib.layers.conv2d(inputs=images, num_outputs=256, kernel_size=[9, 9], 70 | stride=1, weights_initializer=contrib.layers.xavier_initializer_conv2d(), 71 | weights_regularizer=contrib.layers.l2_regularizer, 72 | padding="VALID", activation_fn=tf.nn.relu) 73 | with tf.variable_scope("Conv2"): 74 | conv2 = contrib.layers.conv2d(inputs=conv1, num_outputs=256, kernel_size=[9, 9], 75 | stride=2, weights_initializer=contrib.layers.xavier_initializer_conv2d(), 76 | weights_regularizer=contrib.layers.l2_regularizer, 77 | padding="VALID", activation_fn=tf.nn.relu) 78 | with tf.variable_scope("PrimaryCaps"): 79 | self.primarycaps = tf.reshape(conv2, shape=[self.batchsize, 1152, 8, 1]) 80 | u_i = self.squash(self.primarycaps) 81 | with tf.variable_scope("CapsuleLayer_1"): 82 | self.DigitCaps = self.CapsuleLayer(u_i) 83 | self.loss = self.Loss(self.DigitCaps) 84 | pass 85 | 86 | def Reconstruct(self, DigitCaps): 87 | target = tf.matmul(tf.reshape(self.Y, [self.batchsize, 1, 10]), DigitCaps)#Representation of the reconstruction target:[50, 1, 16] 88 | target = tf.squeeze(target)#[50, 16] 89 | fc1 = contrib.layers.fully_connected(inputs=target, num_outputs=512, activation_fn=tf.nn.relu) 90 | fc2 = contrib.layers.fully_connected(inputs=fc1, num_outputs=1024, activation_fn=tf.nn.relu) 91 | self.fc_sigmoid = contrib.layers.fully_connected(inputs=fc2, num_outputs=784, activation_fn=tf.nn.sigmoid) 92 | return tf.reduce_mean(tf.reduce_sum(tf.square(self.X - self.fc_sigmoid), axis=-1)) 93 | 94 | def get_acc(self): 95 | prediction = tf.sqrt(tf.squeeze(tf.reduce_sum(tf.square(self.DigitCaps), axis=-1))) 96 | correct_prediction = tf.equal(tf.argmax(prediction, axis=-1), tf.argmax(self.Y, axis=-1)) 97 | acc = tf.reduce_mean(tf.cast(correct_prediction, "float")) 98 | return acc 99 | 100 | 101 | def train(self): 102 | train_step = tf.train.AdamOptimizer(0.0001).minimize(self.loss) 103 | self.sess.run(tf.global_variables_initializer()) 104 | k = 0 105 | for i in range(10000): 106 | batch0 = self.traindata[k:k + self.batchsize, :] 107 | batch1 = self.trainlabel[k:k + self.batchsize, :] 108 | k = k + self.batchsize 109 | if k >= np.size(self.traindata, 0): 110 | perm = np.arange(self.traindata, 0) 111 | np.random.shuffle(perm) 112 | self.traindata = self.traindata[perm] 113 | self.trainlabel = self.trainlabel[perm] 114 | k = 0 115 | self.sess.run(train_step, feed_dict={self.X: batch0, self.Y: batch1}) 116 | if i % 1 == 0: 117 | Trainacc = self.sess.run(self.get_acc(), feed_dict={self.X: batch0, self.Y: batch1}) 118 | print("Step %g,Train Accuracy:%g"%(i, Trainacc)) 119 | if i % 1 == 0: 120 | Testacc = self.sess.run(self.get_acc(), feed_dict={self.X: self.testdata[0:50, :], self.Y: self.testlabel[0:50, :]}) 121 | print("Test Accuracy:%g"%(Testacc)) 122 | pass 123 | 124 | if __name__ == "__main__": 125 | data = sci.loadmat("C://Users//gmt//Desktop//randmnist.mat") 126 | capsule = CapsuleNet_DynamicRouting(batchsize=50, nums_outputs=10, vec_len=8, iter=3, data=data) 127 | capsule.CapsuleNet() 128 | capsule.train() 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CapsuleNet_Tensorflow 2 | The code of CapsuleNet 3 | 4 | Capsule is a new Neural in Artificial Neural Network.General Neural like in CNN,for every Neural is scalar in scalar out,but in Capsule is not,It is vector in vector out.I think its expending is interesting,and it has a good performance on MNIST and SMALLNorb.Please see the detail in this paper https://arxiv.org/abs/1710.09829 5 | -------------------------------------------------------------------------------- /randmnist.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MingtaoGuo/CapsuleNet_Tensorflow/ed79758c4cf2459d75f3340d2454e25247b058ed/randmnist.mat --------------------------------------------------------------------------------