├── .gitignore ├── README.md ├── config.py ├── models ├── __init__.py └── clockwork_rnn.py ├── train.py └── utils ├── __init__.py └── data_generator.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom ignores 2 | .idea/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | 58 | # Sphinx documentation 59 | docs/_build/ 60 | 61 | # PyBuilder 62 | target/ 63 | 64 | #Ipython Notebook 65 | .ipynb_checkpoints 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Clockwork RNN 2 | 3 | This is a **TensorFlow** implementation of the **Clockwork RNN** proposed by Koutnik et al. ([ICML](https://arxiv.org/abs/1402.3511), 2014). In addition to the model itself there is a small data generator for testing the prediction capability of the CW-RNN. 4 | 5 | ## Dependencies 6 | 7 | * NumPy 8 | * TensorFlow (tested with v1.0) 9 | * Optionally MatplotLib and Pandas when using the data generator -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | 2 | class Config(object): 3 | 4 | output_dir = "./output/" 5 | 6 | # Clockwork RNN parameters 7 | periods = [1, 2, 4, 8, 16, 32, 64] #, 128, 256] 8 | num_steps = 100 9 | num_input = 2 10 | num_hidden = 294 11 | num_output = 2 12 | 13 | # Optmization parameters 14 | num_epochs = 100 15 | batch_size = 256 16 | optimizer = "rmsprop" 17 | max_norm_gradient = 10.0 18 | 19 | # Learning rate decay schedule 20 | learning_rate = 1e-3 21 | learning_rate_decay = 0.975 22 | learning_rate_step = 1000 23 | learning_rate_min = 1e-5 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomrunia/ClockworkRNN/7e2a3ede8d88ad268aa5bf13645cc5c62e2c42b0/models/__init__.py -------------------------------------------------------------------------------- /models/clockwork_rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | class ClockworkRNN(object): 6 | 7 | 8 | ''' 9 | A Clockwork RNN - Koutnik et al. 2014 [arXiv, https://arxiv.org/abs/1402.3511] 10 | 11 | The Clockwork RNN (CW-RNN), in which the hidden layer is partitioned into separate modules, 12 | each processing inputs at its own temporal granularity, making computations only at its prescribed clock rate. 13 | Rather than making the standard RNN models more complex, CW-RNN reduces the number of RNN parameters, 14 | improves the performance significantly in the tasks tested, and speeds up the network evaluation 15 | 16 | ''' 17 | 18 | 19 | def __init__(self, config): 20 | 21 | self.config = config 22 | 23 | # Check if the number of groups (periods) in the hidden layer 24 | # is compatible with the total number of units in the layer. Note that 25 | # this is not a requirement in the paper; there the extra neurons are 26 | # divided over the higher frequency groups. 27 | assert self.config.num_hidden % len(self.config.periods) == 0 28 | 29 | # Global training step 30 | self.global_step = tf.Variable(0, name='global_step', trainable=False) 31 | 32 | # Initialize placeholders 33 | self.inputs = tf.placeholder( 34 | dtype=tf.float32, 35 | shape=[None, self.config.num_steps, self.config.num_input], 36 | name="inputs") 37 | 38 | self.targets = tf.placeholder( 39 | dtype=tf.float32, 40 | shape=[None, self.config.num_output], 41 | name="targets") 42 | 43 | # Build the complete model 44 | self._build_model() 45 | 46 | # Initialize the optimizer with gradient clipping 47 | self._init_optimizer() 48 | 49 | # Operations for creating summaries 50 | self._build_summary_ops() 51 | 52 | 53 | def _build_model(self): 54 | 55 | # Weight and bias initializers 56 | initializer_weights = tf.contrib.layers.variance_scaling_initializer() 57 | initializer_bias = tf.constant_initializer(0.0) 58 | 59 | # Activation functions of the hidden and output state 60 | activation_hidden = tf.tanh 61 | activation_output = tf.nn.relu 62 | 63 | # Split into list of tensors, one for each timestep 64 | x_list = [tf.squeeze(x, axis=[1]) 65 | for x in tf.split( 66 | axis=1, num_or_size_splits=self.config.num_steps, 67 | value=self.inputs, name="inputs_list")] 68 | 69 | # Periods of each group: 1,2,4, ..., 256 (in the case num_periods=9) 70 | self.clockwork_periods = self.config.periods 71 | 72 | # Mask for matrix W_I to make sure it's upper triangular 73 | self.clockwork_mask = tf.constant(np.triu(np.ones([self.config.num_hidden, self.config.num_hidden])), dtype=tf.float32, name="mask") 74 | 75 | with tf.variable_scope("input"): 76 | self.input_W = tf.get_variable("W", shape=[self.config.num_input, self.config.num_hidden], initializer=initializer_weights) # W_I 77 | self.input_b = tf.get_variable("b", shape=[self.config.num_hidden], initializer=initializer_bias) # b_I 78 | 79 | with tf.variable_scope("hidden"): 80 | self.hidden_W = tf.get_variable("W", shape=[self.config.num_hidden, self.config.num_hidden], initializer=initializer_weights) # W_H 81 | self.hidden_W = tf.multiply(self.hidden_W, self.clockwork_mask) # => upper triangular matrix # W_H 82 | self.hidden_b = tf.get_variable("b", shape=[self.config.num_hidden], initializer=initializer_bias) # b_H 83 | 84 | with tf.variable_scope("output"): 85 | self.output_W = tf.get_variable("W", shape=[self.config.num_hidden, self.config.num_output], initializer=initializer_weights) # W_O 86 | self.output_b = tf.get_variable("b", shape=[self.config.num_output], initializer=initializer_bias) # b_O 87 | 88 | with tf.variable_scope("clockwork_cell") as scope: 89 | 90 | # Initialize the hidden state of the cell to zero (this is y_{t_1}) 91 | self.state = tf.get_variable("state", shape=[self.config.batch_size, self.config.num_hidden], initializer=tf.zeros_initializer(), trainable=False) 92 | 93 | for time_step in range(self.config.num_steps): 94 | 95 | # Only initialize variables in the first step 96 | if time_step > 0: scope.reuse_variables() 97 | 98 | # Find the groups of the hidden layer that are active 99 | group_index = 0 100 | for i in range(len(self.clockwork_periods)): 101 | # Check if (t MOD T_i == 0) 102 | if time_step % self.clockwork_periods[i] == 0: 103 | group_index = i+1 # note the +1 104 | 105 | # Compute (W_I*x_t + b_I) 106 | WI_x = tf.matmul(x_list[time_step], tf.slice(self.input_W, [0, 0], [-1, group_index])) 107 | WI_x = tf.nn.bias_add(WI_x, tf.slice(self.input_b, [0], [group_index]), name="WI_x") 108 | 109 | # Compute (W_H*y_{t-1} + b_H), note the multiplication of the clockwork mask (upper triangular matrix) 110 | self.hidden_W = tf.multiply(self.hidden_W, self.clockwork_mask) 111 | WH_y = tf.matmul(self.state, tf.slice(self.hidden_W, [0, 0], [-1, group_index])) 112 | WH_y = tf.nn.bias_add(WH_y, tf.slice(self.hidden_b, [0], [group_index]), name="WH_y") 113 | 114 | # Compute y_t = (...) and update the cell state 115 | y_update = tf.add(WH_y, WI_x, name="state_update") 116 | y_update = activation_hidden(y_update) 117 | 118 | # Copy the updates to the cell state 119 | self.state = tf.concat( 120 | axis=1, values=[y_update, tf.slice(self.state, [0, group_index], [-1,-1])]) 121 | 122 | # Save the final hidden state 123 | self.final_state = self.state 124 | 125 | # Compute the output, y = f(W_O*y_t + b_O) 126 | self.predictions = tf.matmul(self.final_state, self.output_W) 127 | self.predictions = tf.nn.bias_add(self.predictions, self.output_b) 128 | #self.predictions = activation_output(self.predictions, name="output") 129 | 130 | # Compute the loss 131 | self.error = tf.reduce_sum(tf.square(self.targets - self.predictions), axis=1) 132 | self.loss = tf.reduce_mean(self.error, name="loss") 133 | 134 | 135 | def _init_optimizer(self): 136 | 137 | # Learning rate decay, note that is self.learning_rate_decay == 1.0, 138 | # the decay schedule is disabled, i.e. learning rate is constant. 139 | self.learning_rate = tf.train.exponential_decay( 140 | self.config.learning_rate, 141 | self.global_step, 142 | self.config.learning_rate_step, 143 | self.config.learning_rate_decay, 144 | staircase=True 145 | ) 146 | self.learning_rate = tf.maximum(self.learning_rate, self.config.learning_rate_min) 147 | tf.summary.scalar("learning_rate", self.learning_rate) 148 | 149 | # Definition of the optimizer and computing gradients operation 150 | if self.config.optimizer == 'adam': 151 | # Adam optimizer 152 | self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) 153 | elif self.config.optimizer == 'rmsprop': 154 | # RMSProper optimizer 155 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate) 156 | elif self.config.optimizer == 'adagrad': 157 | # AdaGrad optimizer 158 | self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate) 159 | else: 160 | raise ValueError("Unknown optimizer specified") 161 | 162 | # Compute the gradients for each variable 163 | self.grads_and_vars = self.optimizer.compute_gradients(self.loss) 164 | 165 | # Optionally perform gradient clipping by max-norm 166 | if self.config.max_norm_gradient > 0: 167 | # Perform gradient clipping by the global norm 168 | grads, variables = zip(*self.grads_and_vars) 169 | grads_clipped, _ = tf.clip_by_global_norm( 170 | grads, clip_norm=self.config.max_norm_gradient) 171 | 172 | # Apply the gradients after clipping them 173 | self.train_op = self.optimizer.apply_gradients( 174 | zip(grads_clipped, variables), 175 | global_step=self.global_step 176 | ) 177 | 178 | else: 179 | # Unclipped gradients 180 | self.train_op = self.optimizer.apply_gradients( 181 | self.grads_and_vars, 182 | global_step=self.global_step 183 | ) 184 | 185 | # Keep track of gradient values and their sparsity 186 | grad_summaries = [] 187 | for g, v in self.grads_and_vars: 188 | if g is not None: 189 | grad_hist_summary = tf.summary.histogram("gradients/{}/hist".format(v.name), g) 190 | sparsity_summary = tf.summary.scalar("gradients/{}/sparsity".format(v.name), tf.nn.zero_fraction(g)) 191 | grad_summaries.append(grad_hist_summary) 192 | grad_summaries.append(sparsity_summary) 193 | self.gradient_summaries_merged = tf.summary.merge(grad_summaries) 194 | 195 | 196 | def _build_summary_ops(self): 197 | 198 | # Training summaries 199 | training_summaries = [ 200 | tf.summary.scalar("train/loss", self.loss), 201 | tf.summary.scalar("train/learning_rate", self.learning_rate), 202 | ] 203 | 204 | # Combine the training summaries with the gradient summaries 205 | self.train_summary_op = tf.summary.merge( 206 | [training_summaries, self.gradient_summaries_merged]) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import os 3 | import math 4 | import numpy as np 5 | 6 | import matplotlib as mpl 7 | import matplotlib.pyplot as plt 8 | 9 | import tensorflow as tf 10 | from tensorflow.python.framework import ops 11 | 12 | from sklearn.utils import shuffle 13 | 14 | from models.clockwork_rnn import ClockworkRNN 15 | from config import Config 16 | from utils.data_generator import * 17 | 18 | 19 | def train(config): 20 | 21 | plt.ion() 22 | 23 | # Load the training data 24 | (X_train, y_train), (X_validation, y_validation) = generate_data(1000) 25 | num_train = X_train.shape[0] 26 | num_validation = X_validation.shape[0] 27 | 28 | config.num_steps = X_train.shape[1] 29 | config.num_input = X_train.shape[2] 30 | config.num_output = y_train.shape[1] 31 | 32 | # Initialize TensorFlow model for counting as regression problem 33 | print("[x] Building TensorFlow Graph...") 34 | model = ClockworkRNN(config) 35 | 36 | # Compute the number of training steps 37 | step_in_epoch, steps_per_epoch = 0, int(math.floor(len(X_train)/config.batch_size)) 38 | num_steps = steps_per_epoch*config.num_epochs 39 | train_step = 0 40 | 41 | # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it 42 | checkpoint_dir = os.path.abspath(os.path.join(config.output_dir, "checkpoints")) 43 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 44 | if not os.path.exists(checkpoint_dir): 45 | os.makedirs(checkpoint_dir) 46 | 47 | # Initialize the TensorFlow session 48 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75) 49 | sess = tf.Session(config=tf.ConfigProto( 50 | gpu_options=gpu_options, 51 | log_device_placement=False 52 | )) 53 | 54 | # Create a saver for all variables 55 | tf_vars_to_save = tf.trainable_variables() + [model.global_step] 56 | saver = tf.train.Saver(tf_vars_to_save, max_to_keep=5) 57 | 58 | # Initialize summary writer 59 | summary_out_dir = os.path.join(config.output_dir, "summaries") 60 | summary_writer = tf.summary.FileWriter(summary_out_dir, sess.graph) 61 | 62 | # Initialize the session 63 | init = tf.global_variables_initializer() 64 | sess.run(init) 65 | 66 | for _ in range(num_steps): 67 | 68 | ################################################################ 69 | ########################## TRAINING ############################ 70 | ################################################################ 71 | 72 | index_start = step_in_epoch*config.batch_size 73 | index_end = index_start+config.batch_size 74 | 75 | # Actual training of the network 76 | _, train_step, train_loss, learning_rate, train_summary = sess.run( 77 | [model.train_op, 78 | model.global_step, 79 | model.loss, 80 | model.learning_rate, 81 | model.train_summary_op], 82 | feed_dict={ 83 | model.inputs: X_train[index_start:index_end,], 84 | model.targets: y_train[index_start:index_end,], 85 | } 86 | ) 87 | 88 | if train_step % 10 == 0: 89 | print("[%s] Step %05i/%05i, LR = %.2e, Loss = %.5f" % 90 | (datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, num_steps, learning_rate, train_loss)) 91 | 92 | # Save summaries to disk 93 | summary_writer.add_summary(train_summary, train_step) 94 | 95 | if train_step % 1000 == 0 and train_step > 0: 96 | path = saver.save(sess, checkpoint_prefix, global_step=train_step) 97 | print("[%s] Saving TensorFlow model checkpoint to disk." % datetime.now().strftime("%Y-%m-%d %H:%M")) 98 | 99 | step_in_epoch += 1 100 | 101 | ################################################################ 102 | ############### MODEL TESTING ON EVALUATION DATA ############### 103 | ################################################################ 104 | 105 | if step_in_epoch == steps_per_epoch: 106 | 107 | # End of epoch, check some validation examples 108 | print("#" * 100) 109 | print("MODEL TESTING ON VALIDATION DATA (%i examples):" % num_validation) 110 | 111 | for validation_step in range(int(math.floor(num_validation/config.batch_size))): 112 | 113 | index_start = validation_step*config.batch_size 114 | index_end = index_start+config.batch_size 115 | 116 | validation_loss, predictions = sess.run([model.loss, model.predictions], 117 | feed_dict={ 118 | model.inputs: X_validation[index_start:index_end,], 119 | model.targets: y_validation[index_start:index_end,], 120 | } 121 | ) 122 | 123 | # Show a plot of the ground truth and prediction of the singla 124 | if validation_step == 0: 125 | plt.clf() 126 | plt.title("Ground Truth and Predictions") 127 | plt.plot(y_validation[index_start:index_start+50,0], label="signal 0 (input)") 128 | plt.plot(predictions[0:50,0], ls='--', label="signal 0 (prediction)") 129 | plt.plot(y_validation[index_start:index_start+50,1], label="signal 1 (input)") 130 | plt.plot(predictions[0:50,1], ls='--', label="signal 1 (prediction)") 131 | legend = plt.legend(frameon=True) 132 | legend.get_frame().set_facecolor('white') 133 | plt.draw() 134 | plt.pause(0.001) 135 | 136 | print("[%s] Validation Step %03i. Loss = %.5f" % (datetime.now().strftime("%Y-%m-%d %H:%M"), validation_step, validation_loss)) 137 | 138 | # Reset for next epoch 139 | step_in_epoch = 0 140 | 141 | # Shuffle training data 142 | perm = np.arange(num_train) 143 | np.random.shuffle(perm) 144 | X_train = X_train[perm] 145 | y_train = y_train[perm] 146 | 147 | print("#" * 100) 148 | 149 | # Destroy the graph and close the session 150 | ops.reset_default_graph() 151 | sess.close() 152 | 153 | 154 | if __name__ == "__main__": 155 | train(Config()) 156 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tomrunia/ClockworkRNN/7e2a3ede8d88ad268aa5bf13645cc5c62e2c42b0/utils/__init__.py -------------------------------------------------------------------------------- /utils/data_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from random import random 4 | 5 | # Just a random example of sequences to train the Clockwork RNN. 6 | # http://danielhnyk.cz/predicting-sequences-vectors-keras-using-rnn-lstm/ 7 | 8 | 9 | def _load_data(data, n_prev=100): 10 | # data should be pd.DataFrame() 11 | docX, docY = [], [] 12 | for i in range(len(data)-n_prev): 13 | docX.append(data.iloc[i:i+n_prev].as_matrix()) 14 | docY.append(data.iloc[i+n_prev].as_matrix()) 15 | alsX = np.array(docX) 16 | alsY = np.array(docY) 17 | return alsX, alsY 18 | 19 | 20 | def train_test_split(df, test_size=0.1): 21 | ntrn = int(round(len(df) * (1 - test_size))) 22 | X_train, y_train = _load_data(df.iloc[0:ntrn]) 23 | X_test, y_test = _load_data(df.iloc[ntrn:]) 24 | return (X_train, y_train), (X_test, y_test) 25 | 26 | 27 | def generate_data(num_examples): 28 | print("[x] Generating training examples...") 29 | flow = (list(range(1, 10, 1)) + list(range(10, 1, -1))) * num_examples 30 | pdata = pd.DataFrame({"a": flow, "b": flow}) 31 | pdata.b = pdata.b.shift(9) 32 | data = pdata.iloc[10:] * random() # some noise 33 | return train_test_split(data) --------------------------------------------------------------------------------