├── .gitignore
├── LICENSE
├── README.md
├── param_collection.py
└── prog_nn.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 SynPon
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # prog_nn
 2 | 
 3 | ```
 4 |                  .▄▄ ·  ▄· ▄▌ ▐ ▄  ▄▄▄·       ▐ ▄  
 5 |                  ▐█ ▀. ▐█▪██▌•█▌▐█▐█ ▄█▪     •█▌▐█ 
 6 |                  ▄▀▀▀█▄▐█▌▐█▪▐█▐▐▌ ██▀· ▄█▀▄ ▐█▐▐▌ 
 7 |                  ▐█▄▪▐█ ▐█▀·.██▐█▌▐█▪·•▐█▌.▐▌██▐█▌ 
 8 |                   ▀▀▀▀   ▀ • ▀▀ █▪.▀    ▀█▄▀▪▀▀ █▪ 
 9 | 
10 |  ██████╗ ███████╗███████╗███████╗ █████╗ ██████╗  ██████╗██╗  ██╗ 
11 |  ██╔══██╗██╔════╝██╔════╝██╔════╝██╔══██╗██╔══██╗██╔════╝██║  ██║ 
12 |  ██████╔╝█████╗  ███████╗█████╗  ███████║██████╔╝██║     ███████║ 
13 |  ██╔══██╗██╔══╝  ╚════██║██╔══╝  ██╔══██║██╔══██╗██║     ██╔══██║ 
14 |  ██║  ██║███████╗███████║███████╗██║  ██║██║  ██║╚██████╗██║  ██║ 
15 |  ╚═╝  ╚═╝╚══════╝╚══════╝╚══════╝╚═╝  ╚═╝╚═╝  ╚═╝ ╚═════╝╚═╝  ╚═╝ 
16 | 
17 | ```
18 | 
19 | A Quick and Dirty [Progressive Neural Network](https://arxiv.org/abs/1606.04671v3) written in TensorFlow.
20 | 


--------------------------------------------------------------------------------
/param_collection.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | 
 4 | class ParamCollection(object):
 5 | 
 6 |     def __init__(self, sess, params):
 7 |         """
 8 |         params should be a list of TensorFlow nodes.
 9 |         """
10 |         self._params = params
11 |         # Have to import the session to get the values being used.
12 |         self.sess = sess
13 |         self.sess.run(tf.variables_initializer(params))
14 | 
15 |     @property
16 |     def params(self):
17 |         return self._params
18 | 
19 |     def get_values(self):
20 |         """
21 |         Returns list of values of parameter arrays
22 |         """
23 |         return [self.sess.run(param) for param in self._params]
24 | 
25 |     def get_shapes(self):
26 |         """
27 |         Shapes of parameter arrays
28 |         """
29 |         return [param.get_shape().as_list() for param in self._params]
30 | 
31 |     def get_total_size(self):
32 |         """
33 |         Total number of parameters
34 |         """
35 |         return sum(np.prod(shape) for shape in self.get_shapes())
36 | 
37 |     def num_vars(self):
38 |         """
39 |         Number of parameter arrays
40 |         """
41 |         return len(self._params)
42 | 
43 |     def set_values(self, parvals):
44 |         """
45 |         Set values of parameter arrays given list of values `parvals`
46 |         """
47 |         assert len(parvals) == len(self._params)
48 |         for (param, newval) in zip(self._params, parvals):
49 |             self.sess.run(tf.assign(param, newval))
50 |             assert tuple(param.get_shape().as_list()) == newval.shape
51 | 
52 |     def set_values_flat(self, theta):
53 |         """
54 |         Set parameters using a vector which represents all of the parameters
55 |         flattened and concatenated.
56 |         """
57 |         arrs = []
58 |         n = 0
59 |         for shape in self.get_shapes():
60 |             size = np.prod(shape)
61 |             arrs.append(theta[n:n+size].reshape(shape))
62 |             n += size
63 |         assert theta.size == n
64 |         self.set_values(arrs)
65 | 
66 |     def get_values_flat(self):
67 |         """
68 |         Flatten all parameter arrays into one vector and return it as a numpy array.
69 |         """
70 |         theta = np.empty(self.get_total_size())
71 |         n = 0
72 |         for param in self._params:
73 |             s = np.prod(param.get_shape().as_list())
74 |             theta[n:n+s] = self.sess.run(param).flatten()
75 |             n += s
76 |         assert theta.size == n
77 |         return theta
78 | 
79 |     def _params_names(self):
80 |         return [(param, param.name) for param in self._params]
81 | 
82 |     def to_h5(self, grp):
83 |         """
84 |         Save parameter arrays to hdf5 group `grp`
85 |         """
86 |         for (param, name) in self._params_names():
87 |             arr = self.sess.run(param)
88 |             grp[name] = arr
89 | 
90 |     def from_h5(self, grp):
91 |         parvals = [grp[name] for(_, name) in self._params_names()]
92 |         self.set_values(parvals)
93 | 


--------------------------------------------------------------------------------
/prog_nn.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8
  3 | from __future__ import print_function
  4 | import tensorflow as tf
  5 | import numpy as np
  6 | from pprint import pprint
  7 | from param_collection import ParamCollection
  8 | 
  9 | # Helper functions.
 10 | def weight_variable(shape, stddev=0.1, initial=None):
 11 |     if initial is None:
 12 |         initial = tf.truncated_normal(shape, stddev=stddev, dtype=tf.float64)
 13 |     return tf.Variable(initial)
 14 | 
 15 | def bias_variable(shape, init_bias=0.1, initial=None):
 16 |     if initial is None:
 17 |         initial = tf.constant(init_bias, shape=shape, dtype=tf.float64)
 18 |     return tf.Variable(initial)
 19 | 
 20 | class InitialColumnProgNN(object):
 21 |     """
 22 |     Descr: Initial network to train for later use transfer learning with a
 23 |         Progressive Neural Network.
 24 |     Args:
 25 |         topology - A list of number of units in each hidden dimension.
 26 |                    First entry is input dimension.
 27 |         activations - A list of activation functions to use on the transforms.
 28 |         session - A TensorFlow session.
 29 |     Returns:
 30 |         None - attaches objects to class for InitialColumnProgNN.session.run()
 31 |     """
 32 | 
 33 |     def __init__(self, topology, activations, session, dtype=tf.float64):
 34 |         n_input = topology[0]
 35 |         # Layers in network.
 36 |         L = len(topology) - 1
 37 |         self.session = session
 38 |         self.L = L
 39 |         self.topology = topology
 40 |         self.o_n = tf.placeholder(dtype,shape=[None, n_input])
 41 | 
 42 |         self.W = []
 43 |         self.b =[]
 44 |         self.h = [self.o_n]
 45 |         params = []
 46 |         for k in range(L):
 47 |             shape = topology[k:k+2]
 48 |             self.W.append(weight_variable(shape))
 49 |             self.b.append(bias_variable([shape[1]]))
 50 |             self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k]))
 51 |             params.append(self.W[-1])
 52 |             params.append(self.b[-1])
 53 |         self.pc = ParamCollection(self.session, params)
 54 | 
 55 | 
 56 | class ExtensibleColumnProgNN(object):
 57 |     """
 58 |     Descr: An extensible network column for use in transfer learning with a
 59 |         Progressive Neural Network.
 60 |     Args:
 61 |         topology - A list of number of units in each hidden dimension.
 62 |             First entry is input dimension.
 63 |         activations - A list of activation functions to use on the transforms.
 64 |         session - A TensorFlow session.
 65 |         prev_columns - Previously trained columns, either Initial or Extensible,
 66 |             we are going to create lateral connections to for the current column.
 67 |     Returns:
 68 |         None - attaches objects to class for ExtensibleColumnProgNN.session.run()
 69 |     """
 70 | 
 71 |     def __init__(self, topology, activations, session, prev_columns, dtype=tf.float64):
 72 |         n_input = topology[0]
 73 |         self.topology = topology
 74 |         self.session = session
 75 |         width = len(prev_columns)
 76 |         # Layers in network. First value is n_input, so it doesn't count.
 77 |         L = len(topology) -1
 78 |         self.L = L
 79 |         self.prev_columns = prev_columns
 80 | 
 81 |         # Doesn't work if the columns aren't the same height.
 82 |         assert all([self.L == x.L for x in prev_columns])
 83 | 
 84 |         self.o_n = tf.placeholder(dtype, shape=[None, n_input])
 85 | 
 86 |         self.W = [[]] * L
 87 |         self.b = [[]] * L
 88 |         self.U = []
 89 |         for k in range(L-1):
 90 |             self.U.append( [[]] * width )
 91 |         self.h = [self.o_n]
 92 |         # Collect parameters to hand off to ParamCollection.
 93 |         params = []
 94 |         for k in range(L):
 95 |             W_shape = topology[k:k+2]
 96 |             self.W[k] = weight_variable(W_shape)
 97 |             self.b[k] = bias_variable([W_shape[1]])
 98 |             if k == 0:
 99 |                 self.h.append(activations[k](tf.matmul(self.h[-1],self.W[k]) + self.b[k]))
100 |                 params.append(self.W[k])
101 |                 params.append(self.b[k])
102 |                 continue
103 |             preactivation = tf.matmul(self.h[-1],self.W[k]) + self.b[k]
104 |             for kk in range(width):
105 |                 U_shape = [prev_columns[kk].topology[k], topology[k+1]]
106 |                 # Remember len(self.U) == L - 1!
107 |                 self.U[k-1][kk] = weight_variable(U_shape)
108 |                 # pprint(prev_columns[kk].h[k].get_shape().as_list())
109 |                 # pprint(self.U[k-1][kk].get_shape().as_list())
110 |                 preactivation +=  tf.matmul(prev_columns[kk].h[k],self.U[k-1][kk])
111 |             self.h.append(activations[k](preactivation))
112 |             params.append(self.W[k])
113 |             params.append(self.b[k])
114 |             for kk in range(width):
115 |                 params.append(self.U[k-1][kk])
116 | 
117 |         self.pc = ParamCollection(self.session, params)
118 | 
119 | 
120 | def test_ProgNN():
121 |     # Make some fake observations.
122 |     fake1 = np.float64(np.random.rand(4000,128))
123 |     fake2 = np.float64(np.random.rand(4000,128))
124 |     fake3 = np.float64(np.random.rand(4000,128))
125 |     fake4 = np.float64(np.random.rand(4000,128))
126 |     fake5 = np.float64(np.random.rand(4000,128))
127 |     n_input = 128
128 |     topology1 = [n_input, 100, 64, 25, 9]
129 |     topology2 = [n_input, 68, 44, 19, 7]
130 |     topology3 = [n_input, 79, 58, 33, 12]
131 |     topology4 = [n_input, 40, 30, 20, 10]
132 |     topology5 = [n_input, 101, 73, 51, 8]
133 |     activations = [tf.nn.relu, tf.nn.relu, tf.nn.relu, tf.nn.softmax]
134 | 
135 |     session = tf.Session()
136 |     session.run(tf.global_variables_initializer())
137 | 
138 |     col_0 = InitialColumnProgNN(topology1, activations, session)
139 |     th0 = col_0.pc.get_values_flat()
140 |     col_1 = ExtensibleColumnProgNN(topology2, activations, session, [col_0])
141 |     th1 = col_1.pc.get_values_flat()
142 |     col_2 = ExtensibleColumnProgNN(topology3, activations, session, [col_0, col_1])
143 |     th2 = col_2.pc.get_values_flat()
144 |     col_3 = ExtensibleColumnProgNN(topology4, activations, session, [col_0, col_1, col_2])
145 |     th3 = col_3.pc.get_values_flat()
146 |     col_4 = ExtensibleColumnProgNN(topology5, activations, session, [col_0, col_1, col_2, col_3])
147 |     th4 = col_4.pc.get_values_flat()
148 | 
149 |     # This pattern to evaluate the Progressive NN can be extended to a
150 |     # arbitrarily large number of columns / models.
151 | 
152 |     # Fake train the first network. h_0[-1] has information loss functions need.
153 |     h_0 = col_0.session.run([col_0.h],
154 |         feed_dict={col_0.o_n:fake1})
155 | 
156 |     # Fake train the second network, but this time with lateral connections to
157 |     # fake pre-trained, constant weights from first column of Progressive NN.
158 |     h_1 = col_1.session.run([col_1.h],
159 |         feed_dict={col_1.o_n:fake2, col_1.prev_columns[0].o_n:fake2})
160 | 
161 |     # Now fake train a third column that has lateral connections to both
162 |     # previously "trained" columns.
163 |     h_2 = col_2.session.run([col_2.h],
164 |         feed_dict={col_2.o_n:fake3,
165 |             col_2.prev_columns[0].o_n:fake3,
166 |             col_2.prev_columns[1].o_n:fake3})
167 | 
168 |     # Fourth column / fake instance of training.
169 |     h_3 = col_3.session.run([col_3.h],
170 |         feed_dict={col_3.o_n:fake4,
171 |             col_3.prev_columns[0].o_n:fake4,
172 |             col_3.prev_columns[1].o_n:fake4,
173 |             col_3.prev_columns[2].o_n:fake4})
174 | 
175 |     # Fifth column. Notice we have to pass in n placeholder with the same
176 |     # obsevations to a Progressive NN with n columns.
177 |     h_4 = col_4.session.run([col_4.h],
178 |         feed_dict={col_4.o_n:fake5,
179 |             col_4.prev_columns[0].o_n:fake5,
180 |             col_4.prev_columns[1].o_n:fake5,
181 |             col_4.prev_columns[2].o_n:fake5,
182 |             col_4.prev_columns[3].o_n:fake5})
183 | 
184 |     # Anyway, you get the drift. Hope this helps someone understand
185 |     # Progressive Neural Networks!
186 | 
187 |     # Make sure the column parameters aren't changing when being used by
188 |     # later columns.
189 | 
190 |     # Should be a list of [0., 0., 0., ... 0.] if theta isn't changing.
191 |     # We add 1.0 to each element to see if they were all zero with np.all().
192 |     assert np.all(col_4.prev_columns[0].pc.get_values_flat() - th0 + 1.)
193 | 
194 | if __name__ == "__main__":
195 |     test_ProgNN()
196 | 


--------------------------------------------------------------------------------