├── .gitignore ├── LICENSE ├── README.md ├── param_collection.py └── prog_nn.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 SynPon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # prog_nn 2 | 3 | ``` 4 | .▄▄ · ▄· ▄▌ ▐ ▄ ▄▄▄· ▐ ▄ 5 | ▐█ ▀. ▐█▪██▌•█▌▐█▐█ ▄█▪ •█▌▐█ 6 | ▄▀▀▀█▄▐█▌▐█▪▐█▐▐▌ ██▀· ▄█▀▄ ▐█▐▐▌ 7 | ▐█▄▪▐█ ▐█▀·.██▐█▌▐█▪·•▐█▌.▐▌██▐█▌ 8 | ▀▀▀▀ ▀ • ▀▀ █▪.▀ ▀█▄▀▪▀▀ █▪ 9 | 10 | ██████╗ ███████╗███████╗███████╗ █████╗ ██████╗ ██████╗██╗ ██╗ 11 | ██╔══██╗██╔════╝██╔════╝██╔════╝██╔══██╗██╔══██╗██╔════╝██║ ██║ 12 | ██████╔╝█████╗ ███████╗█████╗ ███████║██████╔╝██║ ███████║ 13 | ██╔══██╗██╔══╝ ╚════██║██╔══╝ ██╔══██║██╔══██╗██║ ██╔══██║ 14 | ██║ ██║███████╗███████║███████╗██║ ██║██║ ██║╚██████╗██║ ██║ 15 | ╚═╝ ╚═╝╚══════╝╚══════╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝╚═╝ ╚═╝ 16 | 17 | ``` 18 | 19 | A Quick and Dirty [Progressive Neural Network](https://arxiv.org/abs/1606.04671v3) written in TensorFlow. 20 | -------------------------------------------------------------------------------- /param_collection.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class ParamCollection(object): 5 | 6 | def __init__(self, sess, params): 7 | """ 8 | params should be a list of TensorFlow nodes. 9 | """ 10 | self._params = params 11 | # Have to import the session to get the values being used. 12 | self.sess = sess 13 | self.sess.run(tf.variables_initializer(params)) 14 | 15 | @property 16 | def params(self): 17 | return self._params 18 | 19 | def get_values(self): 20 | """ 21 | Returns list of values of parameter arrays 22 | """ 23 | return [self.sess.run(param) for param in self._params] 24 | 25 | def get_shapes(self): 26 | """ 27 | Shapes of parameter arrays 28 | """ 29 | return [param.get_shape().as_list() for param in self._params] 30 | 31 | def get_total_size(self): 32 | """ 33 | Total number of parameters 34 | """ 35 | return sum(np.prod(shape) for shape in self.get_shapes()) 36 | 37 | def num_vars(self): 38 | """ 39 | Number of parameter arrays 40 | """ 41 | return len(self._params) 42 | 43 | def set_values(self, parvals): 44 | """ 45 | Set values of parameter arrays given list of values `parvals` 46 | """ 47 | assert len(parvals) == len(self._params) 48 | for (param, newval) in zip(self._params, parvals): 49 | self.sess.run(tf.assign(param, newval)) 50 | assert tuple(param.get_shape().as_list()) == newval.shape 51 | 52 | def set_values_flat(self, theta): 53 | """ 54 | Set parameters using a vector which represents all of the parameters 55 | flattened and concatenated. 56 | """ 57 | arrs = [] 58 | n = 0 59 | for shape in self.get_shapes(): 60 | size = np.prod(shape) 61 | arrs.append(theta[n:n+size].reshape(shape)) 62 | n += size 63 | assert theta.size == n 64 | self.set_values(arrs) 65 | 66 | def get_values_flat(self): 67 | """ 68 | Flatten all parameter arrays into one vector and return it as a numpy array. 69 | """ 70 | theta = np.empty(self.get_total_size()) 71 | n = 0 72 | for param in self._params: 73 | s = np.prod(param.get_shape().as_list()) 74 | theta[n:n+s] = self.sess.run(param).flatten() 75 | n += s 76 | assert theta.size == n 77 | return theta 78 | 79 | def _params_names(self): 80 | return [(param, param.name) for param in self._params] 81 | 82 | def to_h5(self, grp): 83 | """ 84 | Save parameter arrays to hdf5 group `grp` 85 | """ 86 | for (param, name) in self._params_names(): 87 | arr = self.sess.run(param) 88 | grp[name] = arr 89 | 90 | def from_h5(self, grp): 91 | parvals = [grp[name] for(_, name) in self._params_names()] 92 | self.set_values(parvals) 93 | -------------------------------------------------------------------------------- /prog_nn.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 3 | from __future__ import print_function 4 | import tensorflow as tf 5 | import numpy as np 6 | from pprint import pprint 7 | from param_collection import ParamCollection 8 | 9 | # Helper functions. 10 | def weight_variable(shape, stddev=0.1, initial=None): 11 | if initial is None: 12 | initial = tf.truncated_normal(shape, stddev=stddev, dtype=tf.float64) 13 | return tf.Variable(initial) 14 | 15 | def bias_variable(shape, init_bias=0.1, initial=None): 16 | if initial is None: 17 | initial = tf.constant(init_bias, shape=shape, dtype=tf.float64) 18 | return tf.Variable(initial) 19 | 20 | class InitialColumnProgNN(object): 21 | """ 22 | Descr: Initial network to train for later use transfer learning with a 23 | Progressive Neural Network. 24 | Args: 25 | topology - A list of number of units in each hidden dimension. 26 | First entry is input dimension. 27 | activations - A list of activation functions to use on the transforms. 28 | session - A TensorFlow session. 29 | Returns: 30 | None - attaches objects to class for InitialColumnProgNN.session.run() 31 | """ 32 | 33 | def __init__(self, topology, activations, session, dtype=tf.float64): 34 | n_input = topology[0] 35 | # Layers in network. 36 | L = len(topology) - 1 37 | self.session = session 38 | self.L = L 39 | self.topology = topology 40 | self.o_n = tf.placeholder(dtype,shape=[None, n_input]) 41 | 42 | self.W = [] 43 | self.b =[] 44 | self.h = [self.o_n] 45 | params = [] 46 | for k in range(L): 47 | shape = topology[k:k+2] 48 | self.W.append(weight_variable(shape)) 49 | self.b.append(bias_variable([shape[1]])) 50 | self.h.append(activations[k](tf.matmul(self.h[-1], self.W[k]) + self.b[k])) 51 | params.append(self.W[-1]) 52 | params.append(self.b[-1]) 53 | self.pc = ParamCollection(self.session, params) 54 | 55 | 56 | class ExtensibleColumnProgNN(object): 57 | """ 58 | Descr: An extensible network column for use in transfer learning with a 59 | Progressive Neural Network. 60 | Args: 61 | topology - A list of number of units in each hidden dimension. 62 | First entry is input dimension. 63 | activations - A list of activation functions to use on the transforms. 64 | session - A TensorFlow session. 65 | prev_columns - Previously trained columns, either Initial or Extensible, 66 | we are going to create lateral connections to for the current column. 67 | Returns: 68 | None - attaches objects to class for ExtensibleColumnProgNN.session.run() 69 | """ 70 | 71 | def __init__(self, topology, activations, session, prev_columns, dtype=tf.float64): 72 | n_input = topology[0] 73 | self.topology = topology 74 | self.session = session 75 | width = len(prev_columns) 76 | # Layers in network. First value is n_input, so it doesn't count. 77 | L = len(topology) -1 78 | self.L = L 79 | self.prev_columns = prev_columns 80 | 81 | # Doesn't work if the columns aren't the same height. 82 | assert all([self.L == x.L for x in prev_columns]) 83 | 84 | self.o_n = tf.placeholder(dtype, shape=[None, n_input]) 85 | 86 | self.W = [[]] * L 87 | self.b = [[]] * L 88 | self.U = [] 89 | for k in range(L-1): 90 | self.U.append( [[]] * width ) 91 | self.h = [self.o_n] 92 | # Collect parameters to hand off to ParamCollection. 93 | params = [] 94 | for k in range(L): 95 | W_shape = topology[k:k+2] 96 | self.W[k] = weight_variable(W_shape) 97 | self.b[k] = bias_variable([W_shape[1]]) 98 | if k == 0: 99 | self.h.append(activations[k](tf.matmul(self.h[-1],self.W[k]) + self.b[k])) 100 | params.append(self.W[k]) 101 | params.append(self.b[k]) 102 | continue 103 | preactivation = tf.matmul(self.h[-1],self.W[k]) + self.b[k] 104 | for kk in range(width): 105 | U_shape = [prev_columns[kk].topology[k], topology[k+1]] 106 | # Remember len(self.U) == L - 1! 107 | self.U[k-1][kk] = weight_variable(U_shape) 108 | # pprint(prev_columns[kk].h[k].get_shape().as_list()) 109 | # pprint(self.U[k-1][kk].get_shape().as_list()) 110 | preactivation += tf.matmul(prev_columns[kk].h[k],self.U[k-1][kk]) 111 | self.h.append(activations[k](preactivation)) 112 | params.append(self.W[k]) 113 | params.append(self.b[k]) 114 | for kk in range(width): 115 | params.append(self.U[k-1][kk]) 116 | 117 | self.pc = ParamCollection(self.session, params) 118 | 119 | 120 | def test_ProgNN(): 121 | # Make some fake observations. 122 | fake1 = np.float64(np.random.rand(4000,128)) 123 | fake2 = np.float64(np.random.rand(4000,128)) 124 | fake3 = np.float64(np.random.rand(4000,128)) 125 | fake4 = np.float64(np.random.rand(4000,128)) 126 | fake5 = np.float64(np.random.rand(4000,128)) 127 | n_input = 128 128 | topology1 = [n_input, 100, 64, 25, 9] 129 | topology2 = [n_input, 68, 44, 19, 7] 130 | topology3 = [n_input, 79, 58, 33, 12] 131 | topology4 = [n_input, 40, 30, 20, 10] 132 | topology5 = [n_input, 101, 73, 51, 8] 133 | activations = [tf.nn.relu, tf.nn.relu, tf.nn.relu, tf.nn.softmax] 134 | 135 | session = tf.Session() 136 | session.run(tf.global_variables_initializer()) 137 | 138 | col_0 = InitialColumnProgNN(topology1, activations, session) 139 | th0 = col_0.pc.get_values_flat() 140 | col_1 = ExtensibleColumnProgNN(topology2, activations, session, [col_0]) 141 | th1 = col_1.pc.get_values_flat() 142 | col_2 = ExtensibleColumnProgNN(topology3, activations, session, [col_0, col_1]) 143 | th2 = col_2.pc.get_values_flat() 144 | col_3 = ExtensibleColumnProgNN(topology4, activations, session, [col_0, col_1, col_2]) 145 | th3 = col_3.pc.get_values_flat() 146 | col_4 = ExtensibleColumnProgNN(topology5, activations, session, [col_0, col_1, col_2, col_3]) 147 | th4 = col_4.pc.get_values_flat() 148 | 149 | # This pattern to evaluate the Progressive NN can be extended to a 150 | # arbitrarily large number of columns / models. 151 | 152 | # Fake train the first network. h_0[-1] has information loss functions need. 153 | h_0 = col_0.session.run([col_0.h], 154 | feed_dict={col_0.o_n:fake1}) 155 | 156 | # Fake train the second network, but this time with lateral connections to 157 | # fake pre-trained, constant weights from first column of Progressive NN. 158 | h_1 = col_1.session.run([col_1.h], 159 | feed_dict={col_1.o_n:fake2, col_1.prev_columns[0].o_n:fake2}) 160 | 161 | # Now fake train a third column that has lateral connections to both 162 | # previously "trained" columns. 163 | h_2 = col_2.session.run([col_2.h], 164 | feed_dict={col_2.o_n:fake3, 165 | col_2.prev_columns[0].o_n:fake3, 166 | col_2.prev_columns[1].o_n:fake3}) 167 | 168 | # Fourth column / fake instance of training. 169 | h_3 = col_3.session.run([col_3.h], 170 | feed_dict={col_3.o_n:fake4, 171 | col_3.prev_columns[0].o_n:fake4, 172 | col_3.prev_columns[1].o_n:fake4, 173 | col_3.prev_columns[2].o_n:fake4}) 174 | 175 | # Fifth column. Notice we have to pass in n placeholder with the same 176 | # obsevations to a Progressive NN with n columns. 177 | h_4 = col_4.session.run([col_4.h], 178 | feed_dict={col_4.o_n:fake5, 179 | col_4.prev_columns[0].o_n:fake5, 180 | col_4.prev_columns[1].o_n:fake5, 181 | col_4.prev_columns[2].o_n:fake5, 182 | col_4.prev_columns[3].o_n:fake5}) 183 | 184 | # Anyway, you get the drift. Hope this helps someone understand 185 | # Progressive Neural Networks! 186 | 187 | # Make sure the column parameters aren't changing when being used by 188 | # later columns. 189 | 190 | # Should be a list of [0., 0., 0., ... 0.] if theta isn't changing. 191 | # We add 1.0 to each element to see if they were all zero with np.all(). 192 | assert np.all(col_4.prev_columns[0].pc.get_values_flat() - th0 + 1.) 193 | 194 | if __name__ == "__main__": 195 | test_ProgNN() 196 | --------------------------------------------------------------------------------