├── .gitignore
├── README.md
├── dbn_tf.py
├── input_data.py
├── nn_tf.py
├── opts.py
├── rbm.py
├── rbm_MNIST_test.py
├── rbm_tf.py
├── test.py
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | MNIST_data/
3 | *.pyc
4 | .ropeproject/
5 | *.png
6 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Usage
2 | See [example file](./test.py).
3 | # Tags
4 | See release page.
5 | # Further
6 | See [question](http://stackoverflow.com/questions/35622434/custom-operation-implementation-for-rbm-dbn-with-tensorflow)
7 | # Contact Info
8 | mailto: my id at gmail.com
9 | 


--------------------------------------------------------------------------------
/dbn_tf.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | #
 5 | # Copyright © 2016 Peng Liu <liupeng@imscv.com>
 6 | #
 7 | # Distributed under terms of the GNU GPL3 license.
 8 | 
 9 | """
10 | This file implement a class DBN.
11 | """
12 | 
13 | from rbm_tf import RBM
14 | 
15 | 
16 | class DBN(object):
17 | 
18 |     """Docstring for DBN. """
19 | 
20 |     def __init__(self, sizes, opts, X):
21 |         """TODO: to be defined1.
22 | 
23 |         :sizes: TODO
24 |         :opts: TODO
25 | 
26 |         """
27 |         self._sizes = sizes
28 |         self._opts = opts
29 |         self._X = X
30 |         self.rbm_list = []
31 |         input_size = X.shape[1]
32 |         for i, size in enumerate(self._sizes):
33 |             self.rbm_list.append(RBM("rbm%d" % i, input_size, size, self._opts))
34 |             input_size = size
35 | 
36 |     def train(self):
37 |         """TODO: Docstring for train.
38 |         :returns: TODO
39 | 
40 |         """
41 |         X = self._X
42 |         for rbm in self.rbm_list:
43 |             rbm.train(X)
44 |             X = rbm.rbmup(X)
45 | 


--------------------------------------------------------------------------------
/input_data.py:
--------------------------------------------------------------------------------
  1 | """Functions for downloading and reading MNIST data."""
  2 | from __future__ import print_function
  3 | import gzip
  4 | import os
  5 | import urllib
  6 | import numpy
  7 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
  8 | 
  9 | 
 10 | def maybe_download(filename, work_directory):
 11 |   """Download the data from Yann's website, unless it's already here."""
 12 |   if not os.path.exists(work_directory):
 13 |     os.mkdir(work_directory)
 14 |   filepath = os.path.join(work_directory, filename)
 15 |   if not os.path.exists(filepath):
 16 |     filepath, _ = urllib.urlretrieve(SOURCE_URL + filename, filepath)
 17 |     statinfo = os.stat(filepath)
 18 |     print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
 19 |   return filepath
 20 | 
 21 | 
 22 | def _read32(bytestream):
 23 |   dt = numpy.dtype(numpy.uint32).newbyteorder('>')
 24 |   return numpy.frombuffer(bytestream.read(4), dtype=dt)
 25 | 
 26 | 
 27 | def extract_images(filename):
 28 |   """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
 29 |   print('Extracting', filename)
 30 |   with gzip.open(filename) as bytestream:
 31 |     magic = _read32(bytestream)
 32 |     if magic != 2051:
 33 |       raise ValueError(
 34 |           'Invalid magic number %d in MNIST image file: %s' %
 35 |           (magic, filename))
 36 |     num_images = _read32(bytestream)
 37 |     rows = _read32(bytestream)
 38 |     cols = _read32(bytestream)
 39 |     buf = bytestream.read(rows * cols * num_images)
 40 |     data = numpy.frombuffer(buf, dtype=numpy.uint8)
 41 |     data = data.reshape(num_images, rows, cols, 1)
 42 |     return data
 43 | 
 44 | 
 45 | def dense_to_one_hot(labels_dense, num_classes=10):
 46 |   """Convert class labels from scalars to one-hot vectors."""
 47 |   num_labels = labels_dense.shape[0]
 48 |   index_offset = numpy.arange(num_labels) * num_classes
 49 |   labels_one_hot = numpy.zeros((num_labels, num_classes))
 50 |   labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
 51 |   return labels_one_hot
 52 | 
 53 | 
 54 | def extract_labels(filename, one_hot=False):
 55 |   """Extract the labels into a 1D uint8 numpy array [index]."""
 56 |   print('Extracting', filename)
 57 |   with gzip.open(filename) as bytestream:
 58 |     magic = _read32(bytestream)
 59 |     if magic != 2049:
 60 |       raise ValueError(
 61 |           'Invalid magic number %d in MNIST label file: %s' %
 62 |           (magic, filename))
 63 |     num_items = _read32(bytestream)
 64 |     buf = bytestream.read(num_items)
 65 |     labels = numpy.frombuffer(buf, dtype=numpy.uint8)
 66 |     if one_hot:
 67 |       return dense_to_one_hot(labels)
 68 |     return labels
 69 | 
 70 | 
 71 | class DataSet(object):
 72 | 
 73 |   def __init__(self, images, labels, fake_data=False):
 74 |     if fake_data:
 75 |       self._num_examples = 10000
 76 |     else:
 77 |       assert images.shape[0] == labels.shape[0], (
 78 |           "images.shape: %s labels.shape: %s" % (images.shape,
 79 |                                                  labels.shape))
 80 |       self._num_examples = images.shape[0]
 81 |       # Convert shape from [num examples, rows, columns, depth]
 82 |       # to [num examples, rows*columns] (assuming depth == 1)
 83 |       assert images.shape[3] == 1
 84 |       images = images.reshape(images.shape[0],
 85 |                               images.shape[1] * images.shape[2])
 86 |       # Convert from [0, 255] -> [0.0, 1.0].
 87 |       images = images.astype(numpy.float32)
 88 |       images = numpy.multiply(images, 1.0 / 255.0)
 89 |     self._images = images
 90 |     self._labels = labels
 91 |     self._epochs_completed = 0
 92 |     self._index_in_epoch = 0
 93 | 
 94 |   @property
 95 |   def images(self):
 96 |     return self._images
 97 | 
 98 |   @property
 99 |   def labels(self):
100 |     return self._labels
101 | 
102 |   @property
103 |   def num_examples(self):
104 |     return self._num_examples
105 | 
106 |   @property
107 |   def epochs_completed(self):
108 |     return self._epochs_completed
109 | 
110 |   def next_batch(self, batch_size, fake_data=False):
111 |     """Return the next `batch_size` examples from this data set."""
112 |     if fake_data:
113 |       fake_image = [1.0 for _ in xrange(784)]
114 |       fake_label = 0
115 |       return [fake_image for _ in xrange(batch_size)], [
116 |           fake_label for _ in xrange(batch_size)]
117 |     start = self._index_in_epoch
118 |     self._index_in_epoch += batch_size
119 |     if self._index_in_epoch > self._num_examples:
120 |       # Finished epoch
121 |       self._epochs_completed += 1
122 |       # Shuffle the data
123 |       perm = numpy.arange(self._num_examples)
124 |       numpy.random.shuffle(perm)
125 |       self._images = self._images[perm]
126 |       self._labels = self._labels[perm]
127 |       # Start next epoch
128 |       start = 0
129 |       self._index_in_epoch = batch_size
130 |       assert batch_size <= self._num_examples
131 |     end = self._index_in_epoch
132 |     return self._images[start:end], self._labels[start:end]
133 | 
134 | 
135 | def read_data_sets(train_dir, fake_data=False, one_hot=False):
136 |   class DataSets(object):
137 |     pass
138 |   data_sets = DataSets()
139 |   if fake_data:
140 |     data_sets.train = DataSet([], [], fake_data=True)
141 |     data_sets.validation = DataSet([], [], fake_data=True)
142 |     data_sets.test = DataSet([], [], fake_data=True)
143 |     return data_sets
144 |   TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
145 |   TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
146 |   TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
147 |   TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
148 |   VALIDATION_SIZE = 5000
149 |   local_file = maybe_download(TRAIN_IMAGES, train_dir)
150 |   train_images = extract_images(local_file)
151 |   local_file = maybe_download(TRAIN_LABELS, train_dir)
152 |   train_labels = extract_labels(local_file, one_hot=one_hot)
153 |   local_file = maybe_download(TEST_IMAGES, train_dir)
154 |   test_images = extract_images(local_file)
155 |   local_file = maybe_download(TEST_LABELS, train_dir)
156 |   test_labels = extract_labels(local_file, one_hot=one_hot)
157 |   validation_images = train_images[:VALIDATION_SIZE]
158 |   validation_labels = train_labels[:VALIDATION_SIZE]
159 |   train_images = train_images[VALIDATION_SIZE:]
160 |   train_labels = train_labels[VALIDATION_SIZE:]
161 |   data_sets.train = DataSet(train_images, train_labels)
162 |   data_sets.validation = DataSet(validation_images, validation_labels)
163 |   data_sets.test = DataSet(test_images, test_labels)
164 |   return data_sets
165 | 


--------------------------------------------------------------------------------
/nn_tf.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | # Copyright © 2016 Peng Liu <liupeng@imscv.com>
  6 | #
  7 | # Distributed under terms of the GNU GPL3 license.
  8 | 
  9 | """
 10 | This file implement a class NN for Backward propragation Neural Network.
 11 | """
 12 | import numpy as np
 13 | import math
 14 | import tensorflow as tf
 15 | 
 16 | 
 17 | class NN(object):
 18 | 
 19 |     """Docstring for NN. """
 20 | 
 21 |     def __init__(self, sizes, opts, X, Y):
 22 |         """TODO: to be defined1.
 23 | 
 24 |         :sizes: TODO
 25 |         :opts: TODO
 26 |         :X: TODO
 27 | 
 28 |         """
 29 |         self._sizes = sizes
 30 |         self._opts = opts
 31 |         self._X = X
 32 |         self._Y = Y
 33 |         self.w_list = []
 34 |         self.b_list = []
 35 |         input_size = X.shape[1]
 36 |         for size in self._sizes + [Y.shape[1]]:
 37 |             max_range = 4 * math.sqrt(6. / (input_size + size))
 38 |             self.w_list.append(
 39 |                 np.random.uniform(
 40 |                     -max_range, max_range, [input_size, size]
 41 |                 ).astype(np.float32))
 42 |             self.b_list.append(np.zeros([size], np.float32))
 43 |             input_size = size
 44 | 
 45 |     def load_from_dbn(self, dbn):
 46 |         """TODO: Docstring for load_from_dbn.
 47 | 
 48 |         :dbn: TODO
 49 |         :returns: TODO
 50 | 
 51 |         """
 52 |         assert len(dbn._sizes) == len(self._sizes)
 53 |         for i in range(len(self._sizes)):
 54 |             assert dbn._sizes[i] == self._sizes[i]
 55 |         for i in range(len(self._sizes)):
 56 |             self.w_list[i] = dbn.rbm_list[i].w
 57 |             self.b_list[i] = dbn.rbm_list[i].hb
 58 | 
 59 |     def train(self):
 60 |         """TODO: Docstring for train.
 61 |         :returns: TODO
 62 | 
 63 |         """
 64 |         _a = [None] * (len(self._sizes) + 2)
 65 |         _w = [None] * (len(self._sizes) + 1)
 66 |         _b = [None] * (len(self._sizes) + 1)
 67 |         _a[0] = tf.placeholder("float", [None, self._X.shape[1]])
 68 |         y = tf.placeholder("float", [None, self._Y.shape[1]])
 69 |         for i in range(len(self._sizes) + 1):
 70 |             _w[i] = tf.Variable(self.w_list[i])
 71 |             _b[i] = tf.Variable(self.b_list[i])
 72 |         for i in range(1, len(self._sizes) + 2):
 73 |             _a[i] = tf.nn.sigmoid(tf.matmul(_a[i - 1], _w[i - 1]) + _b[i - 1])
 74 |         cost = tf.reduce_mean(tf.square(_a[-1] - y))
 75 |         train_op = tf.train.MomentumOptimizer(
 76 |             self._opts._learning_rate, self._opts._momentum).minimize(cost)
 77 |         predict_op = tf.argmax(_a[-1], 1)
 78 |         with tf.Session() as sess:
 79 |             sess.run(tf.initialize_all_variables())
 80 |             for i in range(self._opts._epoches):
 81 |                 for start, end in zip(
 82 |                     range(
 83 |                         0, len(self._X),
 84 |                         self._opts._batchsize),
 85 |                     range(
 86 |                         self._opts._batchsize, len(
 87 |                             self._X),
 88 |                         self._opts._batchsize)):
 89 |                     sess.run(train_op, feed_dict={
 90 |                         _a[0]: self._X[start:end], y: self._Y[start:end]})
 91 |                 for i in range(len(self._sizes) + 1):
 92 |                     self.w_list[i] = sess.run(_w[i])
 93 |                     self.b_list[i] = sess.run(_b[i])
 94 |                 print np.mean(np.argmax(self._Y, axis=1) ==
 95 |                               sess.run(predict_op, feed_dict={
 96 |                                   _a[0]: self._X, y: self._Y}))
 97 | 
 98 |     def predict(self, X):
 99 |         """TODO: Docstring for predict.
100 | 
101 |         :X: TODO
102 |         :returns: TODO
103 | 
104 |         """
105 |         _a = [None] * (len(self._sizes) + 2)
106 |         _w = [None] * len(self.w_list)
107 |         _b = [None] * len(self.b_list)
108 |         _a[0] = tf.placeholder("float", [None, self._X.shape[1]])
109 |         for i in range(len(self.w_list)):
110 |             _w[i] = tf.constant(self.w_list[i])
111 |             _b[i] = tf.constant(self.b_list[i])
112 |         for i in range(1, len(self._sizes) + 2):
113 |             _a[i] = tf.nn.sigmoid(tf.matmul(_a[i - 1], _w[i - 1]) + _b[i - 1])
114 |         predict_op = tf.argmax(_a[-1], 1)
115 |         with tf.Session() as sess:
116 |             sess.run(tf.initialize_all_variables())
117 |             return sess.run(predict_op, feed_dict={_a[0]: X})
118 | 


--------------------------------------------------------------------------------
/opts.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | #
 5 | # Copyright © 2016 Peng Liu <liupeng@imscv.com>
 6 | #
 7 | # Distributed under terms of the GNU GPL3 license.
 8 | 
 9 | """
10 | This file implement a Opt class for storing some hyper-parameter of DL.
11 | """
12 | 
13 | 
14 | class DLOption(object):
15 | 
16 |     """Docstring for DLOption. """
17 | 
18 |     def __init__(self, epoches, learning_rate, batchsize, momentum, penaltyL2,
19 |                  dropoutProb):
20 |         """TODO: to be defined1.
21 | 
22 |         :epoches: TODO
23 |         :learning_rate: TODO
24 |         :batchsize: TODO
25 |         :momentum: TODO
26 |         :penaltyL2: TODO
27 |         :dropout: TODO
28 |         :dropoutProb: TODO
29 | 
30 |         """
31 |         self._epoches = epoches
32 |         self._learning_rate = learning_rate
33 |         self._batchsize = batchsize
34 |         self._momentum = momentum
35 |         self._penaltyL2 = penaltyL2
36 |         self._dropoutProb = dropoutProb
37 | 
38 | 


--------------------------------------------------------------------------------
/rbm.py:
--------------------------------------------------------------------------------
 1 | """ An rbm implementation for TensorFlow, based closely on the one in Theano """
 2 | import tensorflow as tf
 3 | import math
 4 | 
 5 | 
 6 | def sample_prob(probs):
 7 |     return tf.nn.relu(
 8 |         tf.sign(
 9 |             probs - tf.random_uniform(tf.shape(probs))))
10 | 
11 | 
12 | class RBM(object):
13 | 
14 |     def __init__(self, name, input_size, output_size):
15 |         with tf.name_scope("rbm_" + name):
16 |             self.weights = tf.Variable(
17 |                 tf.truncated_normal(
18 |                     [input_size, output_size],
19 |                     stddev=1.0 / math.sqrt(float(input_size))), name="weights")
20 |             self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias")
21 |             self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
22 | 
23 |     def propup(self, visible):
24 |         return tf.nn.sigmoid(tf.matmul(visible, self.weights) + self.h_bias)
25 | 
26 |     def propdown(self, hidden):
27 |         return tf.nn.sigmoid(
28 |             tf.matmul(hidden, tf.transpose(self.weights)) + self.v_bias)
29 | 
30 |     def sample_h_given_v(self, v_sample):
31 |         return sample_prob(self.propup(v_sample))
32 | 
33 |     def sample_v_given_h(self, h_sample):
34 |         return sample_prob(self.propdown(h_sample))
35 | 
36 |     def gibbs_hvh(self, h0_sample):
37 |         v_sample = self.sample_v_given_h(h0_sample)
38 |         h_sample = self.sample_h_given_v(v_sample)
39 |         return [v_sample, h_sample]
40 | 
41 |     def gibbs_vhv(self, v0_sample):
42 |         h_sample = self.sample_h_given_v(v0_sample)
43 |         v_sample = self.sample_v_given_h(h_sample)
44 |         return [h_sample, v_sample]
45 | 
46 |     def cd1(self, visibles, learning_rate=0.1):
47 |         h_start = self.propup(visibles)
48 |         v_end = self.propdown(h_start)
49 |         h_end = self.propup(v_end)
50 |         w_positive_grad = tf.matmul(tf.transpose(visibles), h_start)
51 |         w_negative_grad = tf.matmul(tf.transpose(v_end), h_end)
52 |         update_w = self.weights.assign_add(
53 |             learning_rate * (w_positive_grad - w_negative_grad))
54 |         update_vb = self.v_bias.assign_add(
55 |             learning_rate * tf.reduce_mean(visibles - v_end, 0))
56 |         update_hb = self.h_bias.assign_add(
57 |             learning_rate * tf.reduce_mean(h_start - h_end, 0))
58 |         return [update_w, update_vb, update_hb]
59 | 
60 |     def reconstruction_error(self, dataset):
61 |         err = tf.stop_gradient(dataset - self.gibbs_vhv(dataset)[1])
62 |         return tf.reduce_sum(err * err)
63 | 


--------------------------------------------------------------------------------
/rbm_MNIST_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | import input_data
 4 | import Image
 5 | from util import tile_raster_images
 6 | 
 7 | 
 8 | def sample_prob(probs):
 9 |     return tf.nn.relu(
10 |         tf.sign(
11 |             probs - tf.random_uniform(tf.shape(probs))))
12 | 
13 | alpha = 1.0
14 | batchsize = 100
15 | 
16 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
17 | trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
18 |     mnist.test.labels
19 | 
20 | X = tf.placeholder("float", [None, 784])
21 | Y = tf.placeholder("float", [None, 10])
22 | 
23 | rbm_w = tf.placeholder("float", [784, 500])
24 | rbm_vb = tf.placeholder("float", [784])
25 | rbm_hb = tf.placeholder("float", [500])
26 | h0 = sample_prob(tf.nn.sigmoid(tf.matmul(X, rbm_w) + rbm_hb))
27 | v1 = sample_prob(tf.nn.sigmoid(
28 |     tf.matmul(h0, tf.transpose(rbm_w)) + rbm_vb))
29 | h1 = tf.nn.sigmoid(tf.matmul(v1, rbm_w) + rbm_hb)
30 | w_positive_grad = tf.matmul(tf.transpose(X), h0)
31 | w_negative_grad = tf.matmul(tf.transpose(v1), h1)
32 | update_w = rbm_w + alpha * \
33 |     (w_positive_grad - w_negative_grad) / tf.to_float(tf.shape(X)[0])
34 | update_vb = rbm_vb + alpha * tf.reduce_mean(X - v1, 0)
35 | update_hb = rbm_hb + alpha * tf.reduce_mean(h0 - h1, 0)
36 | 
37 | h_sample = sample_prob(tf.nn.sigmoid(tf.matmul(X, rbm_w) + rbm_hb))
38 | v_sample = sample_prob(tf.nn.sigmoid(
39 |     tf.matmul(h_sample, tf.transpose(rbm_w)) + rbm_vb))
40 | err = X - v_sample
41 | err_sum = tf.reduce_mean(err * err)
42 | 
43 | sess = tf.Session()
44 | init = tf.initialize_all_variables()
45 | sess.run(init)
46 | 
47 | n_w = np.zeros([784, 500], np.float32)
48 | n_vb = np.zeros([784], np.float32)
49 | n_hb = np.zeros([500], np.float32)
50 | o_w = np.zeros([784, 500], np.float32)
51 | o_vb = np.zeros([784], np.float32)
52 | o_hb = np.zeros([500], np.float32)
53 | print sess.run(
54 |     err_sum, feed_dict={X: trX, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
55 | 
56 | for start, end in zip(
57 |         range(0, len(trX), batchsize), range(batchsize, len(trX), batchsize)):
58 |     batch = trX[start:end]
59 |     n_w = sess.run(update_w, feed_dict={
60 |                    X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
61 |     n_vb = sess.run(update_vb, feed_dict={
62 |                     X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
63 |     n_hb = sess.run(update_hb, feed_dict={
64 |                     X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb})
65 |     o_w = n_w
66 |     o_vb = n_vb
67 |     o_hb = n_hb
68 |     if start % 10000 == 0:
69 |         print sess.run(
70 |             err_sum, feed_dict={X: trX, rbm_w: n_w, rbm_vb: n_vb, rbm_hb: n_hb})
71 |         image = Image.fromarray(
72 |             tile_raster_images(
73 |                 X=n_w.T,
74 |                 img_shape=(28, 28),
75 |                 tile_shape=(25, 20),
76 |                 tile_spacing=(1, 1)
77 |             )
78 |         )
79 |         image.save("rbm_%d.png" % (start / 10000))
80 | 


--------------------------------------------------------------------------------
/rbm_tf.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim:fenc=utf-8
  4 | #
  5 | # Copyright © 2016 Peng Liu <liupeng@imscv.com>
  6 | #
  7 | # Distributed under terms of the GNU GPL3 license.
  8 | 
  9 | """
 10 | This file implement class RBM for tensorflow library.
 11 | """
 12 | import math
 13 | import tensorflow as tf
 14 | import numpy as np
 15 | import Image
 16 | from util import tile_raster_images
 17 | 
 18 | 
 19 | class RBM(object):
 20 | 
 21 |     """RBM class for tensorflow"""
 22 | 
 23 |     def __init__(self, name, input_size, output_size, opts):
 24 |         """Initialize a rbm object.
 25 | 
 26 |         :name: TODO
 27 |         :input_size: TODO
 28 |         :output_size: TODO
 29 | 
 30 |         """
 31 |         self._name = name
 32 |         self._input_size = input_size
 33 |         self._output_size = output_size
 34 |         self._opts = opts
 35 |         self.init_w = np.zeros([input_size, output_size], np.float32)
 36 |         self.init_hb = np.zeros([output_size], np.float32)
 37 |         self.init_vb = np.zeros([input_size], np.float32)
 38 |         self.w = np.zeros([input_size, output_size], np.float32)
 39 |         self.hb = np.zeros([output_size], np.float32)
 40 |         self.vb = np.zeros([input_size], np.float32)
 41 | 
 42 |     def reset_init_parameter(self, init_weights, init_hbias, init_vbias):
 43 |         """TODO: Docstring for reset_para.
 44 | 
 45 |         :init_weights: TODO
 46 |         :init_hbias: TODO
 47 |         :init_vbias: TODO
 48 |         :returns: TODO
 49 | 
 50 |         """
 51 |         self.init_w = init_weights
 52 |         self.init_hb = init_hbias
 53 |         self.init_vb = init_vbias
 54 | 
 55 |     def propup(self, visible, w, hb):
 56 |         """TODO: Docstring for propup.
 57 | 
 58 |         :visible: TODO
 59 |         :returns: TODO
 60 | 
 61 |         """
 62 |         return tf.nn.sigmoid(tf.matmul(visible, w) + hb)
 63 | 
 64 |     def propdown(self, hidden, w, vb):
 65 |         """TODO: Docstring for propdown.
 66 | 
 67 |         :hidden: TODO
 68 |         :returns: TODO
 69 | 
 70 |         """
 71 |         return tf.nn.sigmoid(
 72 |             tf.matmul(hidden, tf.transpose(w)) + vb)
 73 | 
 74 |     def sample_prob(self, probs):
 75 |         """TODO: Docstring for sample_prob.
 76 | 
 77 |         :probs: TODO
 78 |         :returns: TODO
 79 | 
 80 |         """
 81 |         return tf.nn.relu(tf.sign(probs - tf.random_uniform(tf.shape(probs))))
 82 | 
 83 |     def train(self, X):
 84 |         """TODO: Docstring for train.
 85 | 
 86 |         :X: TODO
 87 |         :returns: TODO
 88 | 
 89 |         """
 90 |         _w = tf.placeholder("float", [self._input_size, self._output_size])
 91 |         _hb = tf.placeholder("float", [self._output_size])
 92 |         _vb = tf.placeholder("float", [self._input_size])
 93 |         _vw = tf.placeholder("float", [self._input_size, self._output_size])
 94 |         _vhb = tf.placeholder("float", [self._output_size])
 95 |         _vvb = tf.placeholder("float", [self._input_size])
 96 |         _current_vw = np.zeros(
 97 |             [self._input_size, self._output_size], np.float32)
 98 |         _current_vhb = np.zeros([self._output_size], np.float32)
 99 |         _current_vvb = np.zeros([self._input_size], np.float32)
100 |         v0 = tf.placeholder("float", [None, self._input_size])
101 |         h0 = self.sample_prob(self.propup(v0, _w, _hb))
102 |         v1 = self.sample_prob(self.propdown(h0, _w, _vb))
103 |         h1 = self.propup(v1, _w, _hb)
104 |         positive_grad = tf.matmul(tf.transpose(v0), h0)
105 |         negative_grad = tf.matmul(tf.transpose(v1), h1)
106 |         update_vw = _vw * self._opts._momentum + self._opts._learning_rate *\
107 |             (positive_grad - negative_grad) / tf.to_float(tf.shape(v0)[0])
108 |         update_vvb = _vvb * self._opts._momentum + \
109 |             self._opts._learning_rate * tf.reduce_mean(v0 - v1, 0)
110 |         update_vhb = _vhb * self._opts._momentum + \
111 |             self._opts._learning_rate * tf.reduce_mean(h0 - h1, 0)
112 |         update_w = _w + _vw
113 |         update_vb = _vb + _vvb
114 |         update_hb = _hb + _vhb
115 |         with tf.Session() as sess:
116 |             sess.run(tf.initialize_all_variables())
117 |             old_w = self.init_w
118 |             old_hb = self.init_hb
119 |             old_vb = self.init_vb
120 |             for i in range(self._opts._epoches):
121 |                 for start, end in zip(range(0, len(X), self._opts._batchsize),
122 |                                       range(self._opts._batchsize,
123 |                                             len(X), self._opts._batchsize)):
124 |                     batch = X[start:end]
125 |                     _current_vw = sess.run(update_vw, feed_dict={
126 |                         v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb,
127 |                         _vw: _current_vw})
128 |                     _current_vhb = sess.run(update_vhb, feed_dict={
129 |                         v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb,
130 |                         _vhb: _current_vhb})
131 |                     _current_vvb = sess.run(update_vvb, feed_dict={
132 |                         v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb,
133 |                         _vvb: _current_vvb})
134 |                     old_w = sess.run(update_w, feed_dict={
135 |                                      _w: old_w, _vw: _current_vw})
136 |                     old_hb = sess.run(update_hb, feed_dict={
137 |                         _hb: old_hb, _vhb: _current_vhb})
138 |                     old_vb = sess.run(update_vb, feed_dict={
139 |                         _vb: old_vb, _vvb: _current_vvb})
140 |                 image = Image.fromarray(
141 |                     tile_raster_images(
142 |                         X=old_w.T,
143 |                         img_shape=(int(math.sqrt(self._input_size)),
144 |                                    int(math.sqrt(self._input_size))),
145 |                         tile_shape=(int(math.sqrt(self._output_size)),
146 |                                     int(math.sqrt(self._output_size))),
147 |                         tile_spacing=(1, 1)
148 |                     )
149 |                 )
150 |                 image.save("%s_%d.png" % (self._name, i))
151 |             self.w = old_w
152 |             self.hb = old_hb
153 |             self.vb = old_vb
154 | 
155 |     def rbmup(self, X):
156 |         """TODO: Docstring for rbmup.
157 | 
158 |         :X: TODO
159 |         :returns: TODO
160 | 
161 |         """
162 |         input_X = tf.constant(X)
163 |         _w = tf.constant(self.w)
164 |         _hb = tf.constant(self.hb)
165 |         out = tf.nn.sigmoid(tf.matmul(input_X, _w) + _hb)
166 |         with tf.Session() as sess:
167 |             sess.run(tf.initialize_all_variables())
168 |             return sess.run(out)
169 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | # vim:fenc=utf-8
 4 | #
 5 | # Copyright © 2016 Peng Liu <liupeng@imscv.com>
 6 | #
 7 | # Distributed under terms of the GNU GPL3 license.
 8 | 
 9 | """
10 | Test some function.
11 | """
12 | 
13 | import input_data
14 | from opts import DLOption
15 | from dbn_tf import DBN
16 | from nn_tf import NN
17 | import numpy as np
18 | 
19 | 
20 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
21 | trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images,\
22 |     mnist.test.labels
23 | 
24 | opts = DLOption(10, 1., 100, 0.0, 0., 0.)
25 | dbn = DBN([400, 100], opts, trX)
26 | dbn.train()
27 | nn = NN([100], opts, trX, trY)
28 | nn = NN([400, 100], opts, trX, trY)
29 | nn.load_from_dbn(dbn)
30 | nn.train()
31 | print np.mean(np.argmax(teY, axis=1) == nn.predict(teX))
32 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | """ This file contains different utility functions that are not connected
  2 | in anyway to the networks presented in the tutorials, but rather help in
  3 | processing the outputs into a more understandable way.
  4 | 
  5 | For example ``tile_raster_images`` helps in generating a easy to grasp
  6 | image from a set of samples or weights.
  7 | """
  8 | 
  9 | import numpy
 10 | 
 11 | 
 12 | def scale_to_unit_interval(ndar, eps=1e-8):
 13 |     """ Scales all values in the ndarray ndar to be between 0 and 1 """
 14 |     ndar = ndar.copy()
 15 |     ndar -= ndar.min()
 16 |     ndar *= 1.0 / (ndar.max() + eps)
 17 |     return ndar
 18 | 
 19 | 
 20 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 21 |                        scale_rows_to_unit_interval=True,
 22 |                        output_pixel_vals=True):
 23 |     """
 24 |     Transform an array with one flattened image per row, into an array in
 25 |     which images are reshaped and layed out like tiles on a floor.
 26 | 
 27 |     This function is useful for visualizing datasets whose rows are images,
 28 |     and also columns of matrices for transforming those rows
 29 |     (such as the first layer of a neural net).
 30 | 
 31 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
 32 |     be 2-D ndarrays or None;
 33 |     :param X: a 2-D array in which every row is a flattened image.
 34 | 
 35 |     :type img_shape: tuple; (height, width)
 36 |     :param img_shape: the original shape of each image
 37 | 
 38 |     :type tile_shape: tuple; (rows, cols)
 39 |     :param tile_shape: the number of images to tile (rows, cols)
 40 | 
 41 |     :param output_pixel_vals: if output should be pixel values (i.e. int8
 42 |     values) or floats
 43 | 
 44 |     :param scale_rows_to_unit_interval: if the values need to be scaled before
 45 |     being plotted to [0,1] or not
 46 | 
 47 | 
 48 |     :returns: array suitable for viewing as an image.
 49 |     (See:`Image.fromarray`.)
 50 |     :rtype: a 2-d array with same dtype as X.
 51 | 
 52 |     """
 53 | 
 54 |     assert len(img_shape) == 2
 55 |     assert len(tile_shape) == 2
 56 |     assert len(tile_spacing) == 2
 57 | 
 58 |     # The expression below can be re-written in a more C style as
 59 |     # follows :
 60 |     #
 61 |     # out_shape    = [0,0]
 62 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 63 |     #                tile_spacing[0]
 64 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 65 |     #                tile_spacing[1]
 66 |     out_shape = [
 67 |         (ishp + tsp) * tshp - tsp
 68 |         for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)
 69 |     ]
 70 | 
 71 |     if isinstance(X, tuple):
 72 |         assert len(X) == 4
 73 |         # Create an output numpy ndarray to store the image
 74 |         if output_pixel_vals:
 75 |             out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
 76 |                                     dtype='uint8')
 77 |         else:
 78 |             out_array = numpy.zeros((out_shape[0], out_shape[1], 4),
 79 |                                     dtype=X.dtype)
 80 | 
 81 |         #colors default to 0, alpha defaults to 1 (opaque)
 82 |         if output_pixel_vals:
 83 |             channel_defaults = [0, 0, 0, 255]
 84 |         else:
 85 |             channel_defaults = [0., 0., 0., 1.]
 86 | 
 87 |         for i in range(4):
 88 |             if X[i] is None:
 89 |                 # if channel is None, fill it with zeros of the correct
 90 |                 # dtype
 91 |                 dt = out_array.dtype
 92 |                 if output_pixel_vals:
 93 |                     dt = 'uint8'
 94 |                 out_array[:, :, i] = numpy.zeros(
 95 |                     out_shape,
 96 |                     dtype=dt
 97 |                 ) + channel_defaults[i]
 98 |             else:
 99 |                 # use a recurrent call to compute the channel and store it
100 |                 # in the output
101 |                 out_array[:, :, i] = tile_raster_images(
102 |                     X[i], img_shape, tile_shape, tile_spacing,
103 |                     scale_rows_to_unit_interval, output_pixel_vals)
104 |         return out_array
105 | 
106 |     else:
107 |         # if we are dealing with only one channel
108 |         H, W = img_shape
109 |         Hs, Ws = tile_spacing
110 | 
111 |         # generate a matrix to store the output
112 |         dt = X.dtype
113 |         if output_pixel_vals:
114 |             dt = 'uint8'
115 |         out_array = numpy.zeros(out_shape, dtype=dt)
116 | 
117 |         for tile_row in range(tile_shape[0]):
118 |             for tile_col in range(tile_shape[1]):
119 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
120 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
121 |                     if scale_rows_to_unit_interval:
122 |                         # if we should scale values to be between 0 and 1
123 |                         # do this by calling the `scale_to_unit_interval`
124 |                         # function
125 |                         this_img = scale_to_unit_interval(
126 |                             this_x.reshape(img_shape))
127 |                     else:
128 |                         this_img = this_x.reshape(img_shape)
129 |                     # add the slice to the corresponding position in the
130 |                     # output array
131 |                     c = 1
132 |                     if output_pixel_vals:
133 |                         c = 255
134 |                     out_array[
135 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
136 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
137 |                     ] = this_img * c
138 |         return out_array
139 | 


--------------------------------------------------------------------------------