├── CapsuleNet.py
├── README.md
└── randmnist.mat


/CapsuleNet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.contrib as contrib
  3 | import numpy as np
  4 | import scipy.io as sci
  5 | 
  6 | class CapsuleNet_DynamicRouting():
  7 | 
  8 |     def __init__(self, batchsize, nums_outputs, vec_len,iter, data):
  9 |         self.batchsize = batchsize#Set the batchsize, this code is 50
 10 |         self.nums_outputs = nums_outputs#The number of outputs
 11 |         self.vec_len = vec_len#The length of one Vector in PrimaryCaps
 12 |         self.r = iter#The iteration number of Dynamic Routing
 13 |         #data is '.mat'
 14 |         self.traindata = data["traindata"]/255.0
 15 |         self.trainlabel = data["trainlabel"]
 16 |         self.testdata = data["testdata"]/255.0
 17 |         self.testlabel = data["testlabel"]
 18 |         self.X = tf.placeholder(dtype=tf.float32, shape=[batchsize, 784], name="X")
 19 |         self.Y = tf.placeholder(dtype=tf.float32, shape=[batchsize, 10], name="Lable")
 20 |         self.sess = tf.InteractiveSession()
 21 |         pass
 22 | 
 23 |     def squash(self, s_j):
 24 |         #The activation
 25 |         scale = tf.reduce_sum(tf.square(s_j), axis=2, keep_dims=True) / (1 + tf.reduce_sum(tf.square(s_j), axis=2, keep_dims=True))
 26 |         Unit_s_j = s_j / (tf.sqrt(tf.reduce_sum(tf.square(s_j), axis=2, keep_dims=True)) + 1e-10)
 27 |         return scale * Unit_s_j
 28 | 
 29 |     def ROUTING(self, u_hat):
 30 |         #The function of Dynamic Routing
 31 |         u_hat = tf.stop_gradient(u_hat)
 32 |         u_hat = tf.squeeze(u_hat, axis=-2)
 33 |         b_ij = tf.zeros([1152, 10])
 34 |         for r in range(self.r):
 35 |             c_ij = tf.nn.softmax(b_ij, dim=-1)#c_ij:[1152, 10]
 36 |             c_ij = tf.reshape(c_ij, [1, 1152, 10, 1])#c_ij:[50, 1152, 10, 1]
 37 |             c_ij = tf.tile(c_ij, [self.batchsize, 1, 1, 1])
 38 |             s_j = tf.reduce_sum(c_ij * u_hat, axis=1)#s_j:[50, 10, 16]
 39 |             s_j = tf.reshape(s_j, [self.batchsize, self.nums_outputs, 16, 1])#s_j:[50, 10, 16, 1]
 40 |             v_j = self.squash(s_j)#v_j:[50, 10, 16, 1]
 41 |             b_ij = b_ij + tf.squeeze(tf.matmul(tf.transpose(tf.tile(tf.reshape(v_j, [self.batchsize, 1, 10, 16, 1]), [1, 1152, 1, 1, 1]), [0, 1, 2, 4, 3]),
 42 |                                                tf.reshape(u_hat, [self.batchsize, 1152, 10, 16, 1])))
 43 |             b_ij = b_ij[0, :, :]
 44 |         return tf.squeeze(v_j)#v_j:[50, 10, 16]
 45 | 
 46 |     def CapsuleLayer(self, u_i):
 47 |         W = tf.get_variable(name="Capsule_Weight", shape=[1, np.size(u_i, 1), self.nums_outputs, 8, 16], dtype=tf.float32,
 48 |                             initializer=contrib.layers.xavier_initializer())
 49 |         W_tile = tf.tile(W, [self.batchsize, 1, 1, 1, 1])
 50 |         u_i = tf.reshape(u_i, [self.batchsize, 1152, 1, 8, 1])
 51 |         u_i = tf.tile(u_i, [1, 1, 10, 1, 1])#keep the dim same with the W
 52 |         u_i = tf.transpose(u_i, [0, 1, 2, 4, 3])
 53 |         u_hat = tf.matmul(u_i, W_tile)
 54 |         del W_tile
 55 |         v_j = self.ROUTING(u_hat)
 56 |         return v_j
 57 | 
 58 |     def Loss(self, v_k, m_plus=0.9, m_min=0.1, lambd=0.5, scale_rec=0.0005):
 59 |         abs_vk = tf.sqrt(tf.reduce_sum(tf.square(v_k), axis=-1))
 60 |         L_k = self.Y * tf.square(tf.maximum(0., m_plus - abs_vk)) + lambd * (1 - self.Y) * tf.square(tf.maximum(0., abs_vk - m_min))
 61 |         loss1 = tf.reduce_sum(L_k)
 62 |         loss2 = self.Reconstruct(v_k)
 63 |         return loss1 + loss2 * scale_rec
 64 | 
 65 | 
 66 |     def CapsuleNet(self):
 67 |         images = tf.reshape(self.X, shape=[self.batchsize, 28, 28, 1])
 68 |         with tf.variable_scope("Conv1"):
 69 |             conv1 = contrib.layers.conv2d(inputs=images, num_outputs=256, kernel_size=[9, 9],
 70 |                                           stride=1, weights_initializer=contrib.layers.xavier_initializer_conv2d(),
 71 |                                           weights_regularizer=contrib.layers.l2_regularizer,
 72 |                                           padding="VALID", activation_fn=tf.nn.relu)
 73 |         with tf.variable_scope("Conv2"):
 74 |             conv2 = contrib.layers.conv2d(inputs=conv1, num_outputs=256, kernel_size=[9, 9],
 75 |                                           stride=2, weights_initializer=contrib.layers.xavier_initializer_conv2d(),
 76 |                                           weights_regularizer=contrib.layers.l2_regularizer,
 77 |                                           padding="VALID", activation_fn=tf.nn.relu)
 78 |         with tf.variable_scope("PrimaryCaps"):
 79 |             self.primarycaps = tf.reshape(conv2, shape=[self.batchsize, 1152, 8, 1])
 80 |             u_i = self.squash(self.primarycaps)
 81 |         with tf.variable_scope("CapsuleLayer_1"):
 82 |             self.DigitCaps = self.CapsuleLayer(u_i)
 83 |             self.loss = self.Loss(self.DigitCaps)
 84 |             pass
 85 | 
 86 |     def Reconstruct(self, DigitCaps):
 87 |         target = tf.matmul(tf.reshape(self.Y, [self.batchsize, 1, 10]), DigitCaps)#Representation of the reconstruction target:[50, 1, 16]
 88 |         target = tf.squeeze(target)#[50, 16]
 89 |         fc1 = contrib.layers.fully_connected(inputs=target, num_outputs=512, activation_fn=tf.nn.relu)
 90 |         fc2 = contrib.layers.fully_connected(inputs=fc1, num_outputs=1024, activation_fn=tf.nn.relu)
 91 |         self.fc_sigmoid = contrib.layers.fully_connected(inputs=fc2, num_outputs=784, activation_fn=tf.nn.sigmoid)
 92 |         return tf.reduce_mean(tf.reduce_sum(tf.square(self.X - self.fc_sigmoid), axis=-1))
 93 | 
 94 |     def get_acc(self):
 95 |         prediction = tf.sqrt(tf.squeeze(tf.reduce_sum(tf.square(self.DigitCaps), axis=-1)))
 96 |         correct_prediction = tf.equal(tf.argmax(prediction, axis=-1), tf.argmax(self.Y, axis=-1))
 97 |         acc = tf.reduce_mean(tf.cast(correct_prediction, "float"))
 98 |         return acc
 99 | 
100 | 
101 |     def train(self):
102 |         train_step = tf.train.AdamOptimizer(0.0001).minimize(self.loss)
103 |         self.sess.run(tf.global_variables_initializer())
104 |         k = 0
105 |         for i in range(10000):
106 |             batch0 = self.traindata[k:k + self.batchsize, :]
107 |             batch1 = self.trainlabel[k:k + self.batchsize, :]
108 |             k = k + self.batchsize
109 |             if k >= np.size(self.traindata, 0):
110 |                 perm = np.arange(self.traindata, 0)
111 |                 np.random.shuffle(perm)
112 |                 self.traindata = self.traindata[perm]
113 |                 self.trainlabel = self.trainlabel[perm]
114 |                 k = 0
115 |             self.sess.run(train_step, feed_dict={self.X: batch0, self.Y: batch1})
116 |             if i % 1 == 0:
117 |                 Trainacc = self.sess.run(self.get_acc(), feed_dict={self.X: batch0, self.Y: batch1})
118 |                 print("Step %g,Train Accuracy:%g"%(i, Trainacc))
119 |             if i % 1 == 0:
120 |                 Testacc = self.sess.run(self.get_acc(), feed_dict={self.X: self.testdata[0:50, :], self.Y: self.testlabel[0:50, :]})
121 |                 print("Test Accuracy:%g"%(Testacc))
122 |         pass
123 | 
124 | if __name__ == "__main__":
125 |     data = sci.loadmat("C://Users//gmt//Desktop//randmnist.mat")
126 |     capsule = CapsuleNet_DynamicRouting(batchsize=50, nums_outputs=10, vec_len=8, iter=3, data=data)
127 |     capsule.CapsuleNet()
128 |     capsule.train()
129 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CapsuleNet_Tensorflow
2 | The code of CapsuleNet
3 | 
4 | Capsule is a new Neural in Artificial Neural Network.General Neural like in CNN,for every Neural is scalar in scalar out,but in Capsule is not,It is vector in vector out.I think its expending is interesting,and it has a good performance on MNIST and SMALLNorb.Please see the detail in this paper https://arxiv.org/abs/1710.09829
5 | 


--------------------------------------------------------------------------------
/randmnist.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MingtaoGuo/CapsuleNet_Tensorflow/ed79758c4cf2459d75f3340d2454e25247b058ed/randmnist.mat


--------------------------------------------------------------------------------