├── .gitignore
├── README.md
├── config.py
├── models
    ├── __init__.py
    └── clockwork_rnn.py
├── train.py
└── utils
    ├── __init__.py
    └── data_generator.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Custom ignores
 2 | .idea/
 3 | 
 4 | # Byte-compiled / optimized / DLL files
 5 | __pycache__/
 6 | *.py[cod]
 7 | *$py.class
 8 | 
 9 | # C extensions
10 | *.so
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | 
30 | # PyInstaller
31 | #  Usually these files are written by a python script from a template
32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 | 
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 | 
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *,cover
49 | .hypothesis/
50 | 
51 | # Translations
52 | *.mo
53 | *.pot
54 | 
55 | # Django stuff:
56 | *.log
57 | 
58 | # Sphinx documentation
59 | docs/_build/
60 | 
61 | # PyBuilder
62 | target/
63 | 
64 | #Ipython Notebook
65 | .ipynb_checkpoints
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Clockwork RNN
2 | 
3 | This is a **TensorFlow** implementation of the **Clockwork RNN** proposed by Koutnik et al. ([ICML](https://arxiv.org/abs/1402.3511), 2014). In addition to the model itself there is a small data generator for testing the prediction capability of the CW-RNN.
4 | 
5 | ## Dependencies
6 | 
7 | * NumPy
8 | * TensorFlow (tested with v1.0)
9 | * Optionally MatplotLib and Pandas when using the data generator


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | class Config(object):
 3 | 
 4 |     output_dir = "./output/"
 5 | 
 6 |     # Clockwork RNN parameters
 7 |     periods     = [1, 2, 4, 8, 16, 32, 64] #, 128, 256]
 8 |     num_steps   = 100
 9 |     num_input   = 2
10 |     num_hidden  = 294
11 |     num_output  = 2
12 | 
13 |     # Optmization parameters
14 |     num_epochs          = 100
15 |     batch_size          = 256
16 |     optimizer           = "rmsprop"
17 |     max_norm_gradient   = 10.0
18 | 
19 |     # Learning rate decay schedule
20 |     learning_rate       = 1e-3
21 |     learning_rate_decay = 0.975
22 |     learning_rate_step  = 1000
23 |     learning_rate_min   = 1e-5
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomrunia/ClockworkRNN/7e2a3ede8d88ad268aa5bf13645cc5c62e2c42b0/models/__init__.py


--------------------------------------------------------------------------------
/models/clockwork_rnn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | 
  5 | class ClockworkRNN(object):
  6 | 
  7 | 
  8 |     '''
  9 |     A Clockwork RNN - Koutnik et al. 2014 [arXiv, https://arxiv.org/abs/1402.3511]
 10 | 
 11 |     The Clockwork RNN (CW-RNN), in which the hidden layer is partitioned into separate modules,
 12 |     each processing inputs at its own temporal granularity, making computations only at its prescribed clock rate.
 13 |     Rather than making the standard RNN models more complex, CW-RNN reduces the number of RNN parameters,
 14 |     improves the performance significantly in the tasks tested, and speeds up the network evaluation
 15 | 
 16 |     '''
 17 | 
 18 | 
 19 |     def __init__(self, config):
 20 | 
 21 |         self.config = config
 22 | 
 23 |         # Check if the number of groups (periods) in the hidden layer
 24 |         # is compatible with the total number of units in the layer. Note that
 25 |         # this is not a requirement in the paper; there the extra neurons are
 26 |         # divided over the higher frequency groups.
 27 |         assert self.config.num_hidden % len(self.config.periods) == 0
 28 | 
 29 |         # Global training step
 30 |         self.global_step = tf.Variable(0, name='global_step', trainable=False)
 31 | 
 32 |         # Initialize placeholders
 33 |         self.inputs  = tf.placeholder(
 34 |             dtype=tf.float32,
 35 |             shape=[None, self.config.num_steps, self.config.num_input],
 36 |             name="inputs")
 37 | 
 38 |         self.targets = tf.placeholder(
 39 |             dtype=tf.float32,
 40 |             shape=[None, self.config.num_output],
 41 |             name="targets")
 42 | 
 43 |         # Build the complete model
 44 |         self._build_model()
 45 | 
 46 |         # Initialize the optimizer with gradient clipping
 47 |         self._init_optimizer()
 48 | 
 49 |         # Operations for creating summaries
 50 |         self._build_summary_ops()
 51 | 
 52 | 
 53 |     def _build_model(self):
 54 | 
 55 |         # Weight and bias initializers
 56 |         initializer_weights = tf.contrib.layers.variance_scaling_initializer()
 57 |         initializer_bias    = tf.constant_initializer(0.0)
 58 | 
 59 |         # Activation functions of the hidden and output state
 60 |         activation_hidden = tf.tanh
 61 |         activation_output = tf.nn.relu
 62 | 
 63 |         # Split into list of tensors, one for each timestep
 64 |         x_list = [tf.squeeze(x, axis=[1])
 65 |                   for x in tf.split(
 66 |                     axis=1, num_or_size_splits=self.config.num_steps,
 67 |                     value=self.inputs, name="inputs_list")]
 68 | 
 69 |         # Periods of each group: 1,2,4, ..., 256 (in the case num_periods=9)
 70 |         self.clockwork_periods = self.config.periods
 71 | 
 72 |         # Mask for matrix W_I to make sure it's upper triangular
 73 |         self.clockwork_mask = tf.constant(np.triu(np.ones([self.config.num_hidden, self.config.num_hidden])), dtype=tf.float32, name="mask")
 74 | 
 75 |         with tf.variable_scope("input"):
 76 |             self.input_W = tf.get_variable("W", shape=[self.config.num_input, self.config.num_hidden], initializer=initializer_weights)    # W_I
 77 |             self.input_b = tf.get_variable("b", shape=[self.config.num_hidden], initializer=initializer_bias)                              # b_I
 78 | 
 79 |         with tf.variable_scope("hidden"):
 80 |             self.hidden_W = tf.get_variable("W", shape=[self.config.num_hidden, self.config.num_hidden], initializer=initializer_weights)  # W_H
 81 |             self.hidden_W = tf.multiply(self.hidden_W, self.clockwork_mask)  # => upper triangular matrix                                  # W_H
 82 |             self.hidden_b = tf.get_variable("b", shape=[self.config.num_hidden], initializer=initializer_bias)                             # b_H
 83 | 
 84 |         with tf.variable_scope("output"):
 85 |             self.output_W = tf.get_variable("W", shape=[self.config.num_hidden, self.config.num_output], initializer=initializer_weights)  # W_O
 86 |             self.output_b = tf.get_variable("b", shape=[self.config.num_output], initializer=initializer_bias)                             # b_O
 87 | 
 88 |         with tf.variable_scope("clockwork_cell") as scope:
 89 | 
 90 |             # Initialize the hidden state of the cell to zero (this is y_{t_1})
 91 |             self.state = tf.get_variable("state", shape=[self.config.batch_size, self.config.num_hidden], initializer=tf.zeros_initializer(), trainable=False)
 92 | 
 93 |             for time_step in range(self.config.num_steps):
 94 | 
 95 |                 # Only initialize variables in the first step
 96 |                 if time_step > 0: scope.reuse_variables()
 97 | 
 98 |                 # Find the groups of the hidden layer that are active
 99 |                 group_index = 0
100 |                 for i in range(len(self.clockwork_periods)):
101 |                     # Check if (t MOD T_i == 0)
102 |                     if time_step % self.clockwork_periods[i] == 0:
103 |                         group_index = i+1  # note the +1
104 | 
105 |                 # Compute (W_I*x_t + b_I)
106 |                 WI_x = tf.matmul(x_list[time_step], tf.slice(self.input_W, [0, 0], [-1, group_index]))
107 |                 WI_x = tf.nn.bias_add(WI_x, tf.slice(self.input_b, [0], [group_index]), name="WI_x")
108 | 
109 |                 # Compute (W_H*y_{t-1} + b_H), note the multiplication of the clockwork mask (upper triangular matrix)
110 |                 self.hidden_W = tf.multiply(self.hidden_W, self.clockwork_mask)
111 |                 WH_y = tf.matmul(self.state, tf.slice(self.hidden_W, [0, 0], [-1, group_index]))
112 |                 WH_y = tf.nn.bias_add(WH_y, tf.slice(self.hidden_b, [0], [group_index]), name="WH_y")
113 | 
114 |                 # Compute y_t = (...) and update the cell state
115 |                 y_update = tf.add(WH_y, WI_x, name="state_update")
116 |                 y_update = activation_hidden(y_update)
117 | 
118 |                 # Copy the updates to the cell state
119 |                 self.state = tf.concat(
120 |                     axis=1, values=[y_update, tf.slice(self.state, [0, group_index], [-1,-1])])
121 | 
122 |             # Save the final hidden state
123 |             self.final_state = self.state
124 | 
125 |             # Compute the output, y = f(W_O*y_t + b_O)
126 |             self.predictions = tf.matmul(self.final_state, self.output_W)
127 |             self.predictions = tf.nn.bias_add(self.predictions, self.output_b)
128 |             #self.predictions = activation_output(self.predictions, name="output")
129 | 
130 |             # Compute the loss
131 |             self.error = tf.reduce_sum(tf.square(self.targets - self.predictions), axis=1)
132 |             self.loss  = tf.reduce_mean(self.error, name="loss")
133 | 
134 | 
135 |     def _init_optimizer(self):
136 | 
137 |         # Learning rate decay, note that is self.learning_rate_decay == 1.0,
138 |         # the decay schedule is disabled, i.e. learning rate is constant.
139 |         self.learning_rate = tf.train.exponential_decay(
140 |             self.config.learning_rate,
141 |             self.global_step,
142 |             self.config.learning_rate_step,
143 |             self.config.learning_rate_decay,
144 |             staircase=True
145 |         )
146 |         self.learning_rate = tf.maximum(self.learning_rate, self.config.learning_rate_min)
147 |         tf.summary.scalar("learning_rate", self.learning_rate)
148 | 
149 |         # Definition of the optimizer and computing gradients operation
150 |         if self.config.optimizer == 'adam':
151 |             # Adam optimizer
152 |             self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
153 |         elif self.config.optimizer == 'rmsprop':
154 |             # RMSProper optimizer
155 |             self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate)
156 |         elif self.config.optimizer == 'adagrad':
157 |             # AdaGrad optimizer
158 |             self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate)
159 |         else:
160 |             raise ValueError("Unknown optimizer specified")
161 | 
162 |         # Compute the gradients for each variable
163 |         self.grads_and_vars = self.optimizer.compute_gradients(self.loss)
164 | 
165 |         # Optionally perform gradient clipping by max-norm
166 |         if self.config.max_norm_gradient > 0:
167 |             # Perform gradient clipping by the global norm
168 |             grads, variables = zip(*self.grads_and_vars)
169 |             grads_clipped, _ = tf.clip_by_global_norm(
170 |                 grads, clip_norm=self.config.max_norm_gradient)
171 | 
172 |             # Apply the gradients after clipping them
173 |             self.train_op = self.optimizer.apply_gradients(
174 |                 zip(grads_clipped, variables),
175 |                 global_step=self.global_step
176 |             )
177 | 
178 |         else:
179 |             # Unclipped gradients
180 |             self.train_op = self.optimizer.apply_gradients(
181 |                 self.grads_and_vars,
182 |                 global_step=self.global_step
183 |             )
184 | 
185 |         # Keep track of gradient values and their sparsity
186 |         grad_summaries = []
187 |         for g, v in self.grads_and_vars:
188 |             if g is not None:
189 |                 grad_hist_summary = tf.summary.histogram("gradients/{}/hist".format(v.name), g)
190 |                 sparsity_summary  = tf.summary.scalar("gradients/{}/sparsity".format(v.name), tf.nn.zero_fraction(g))
191 |                 grad_summaries.append(grad_hist_summary)
192 |                 grad_summaries.append(sparsity_summary)
193 |         self.gradient_summaries_merged = tf.summary.merge(grad_summaries)
194 | 
195 | 
196 |     def _build_summary_ops(self):
197 | 
198 |         # Training summaries
199 |         training_summaries = [
200 |             tf.summary.scalar("train/loss", self.loss),
201 |             tf.summary.scalar("train/learning_rate", self.learning_rate),
202 |         ]
203 | 
204 |         # Combine the training summaries with the gradient summaries
205 |         self.train_summary_op = tf.summary.merge(
206 |             [training_summaries, self.gradient_summaries_merged])


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import os
  3 | import math
  4 | import numpy as np
  5 | 
  6 | import matplotlib as mpl
  7 | import matplotlib.pyplot as plt
  8 | 
  9 | import tensorflow as tf
 10 | from tensorflow.python.framework import ops
 11 | 
 12 | from sklearn.utils import shuffle
 13 | 
 14 | from models.clockwork_rnn import ClockworkRNN
 15 | from config import Config
 16 | from utils.data_generator import *
 17 | 
 18 | 
 19 | def train(config):
 20 | 
 21 |     plt.ion()
 22 | 
 23 |     # Load the training data
 24 |     (X_train, y_train), (X_validation, y_validation) = generate_data(1000)
 25 |     num_train      = X_train.shape[0]
 26 |     num_validation = X_validation.shape[0]
 27 | 
 28 |     config.num_steps  = X_train.shape[1]
 29 |     config.num_input  = X_train.shape[2]
 30 |     config.num_output = y_train.shape[1]
 31 | 
 32 |     # Initialize TensorFlow model for counting as regression problem
 33 |     print("[x] Building TensorFlow Graph...")
 34 |     model = ClockworkRNN(config)
 35 | 
 36 |     # Compute the number of training steps
 37 |     step_in_epoch, steps_per_epoch = 0, int(math.floor(len(X_train)/config.batch_size))
 38 |     num_steps = steps_per_epoch*config.num_epochs
 39 |     train_step = 0
 40 | 
 41 |     # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
 42 |     checkpoint_dir = os.path.abspath(os.path.join(config.output_dir, "checkpoints"))
 43 |     checkpoint_prefix = os.path.join(checkpoint_dir, "model")
 44 |     if not os.path.exists(checkpoint_dir):
 45 |         os.makedirs(checkpoint_dir)
 46 | 
 47 |     # Initialize the TensorFlow session
 48 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
 49 |     sess = tf.Session(config=tf.ConfigProto(
 50 |         gpu_options=gpu_options,
 51 |         log_device_placement=False
 52 |     ))
 53 | 
 54 |     # Create a saver for all variables
 55 |     tf_vars_to_save = tf.trainable_variables() + [model.global_step]
 56 |     saver = tf.train.Saver(tf_vars_to_save, max_to_keep=5)
 57 | 
 58 |     # Initialize summary writer
 59 |     summary_out_dir = os.path.join(config.output_dir, "summaries")
 60 |     summary_writer  = tf.summary.FileWriter(summary_out_dir, sess.graph)
 61 | 
 62 |     # Initialize the session
 63 |     init = tf.global_variables_initializer()
 64 |     sess.run(init)
 65 | 
 66 |     for _ in range(num_steps):
 67 | 
 68 |         ################################################################
 69 |         ########################## TRAINING ############################
 70 |         ################################################################
 71 | 
 72 |         index_start = step_in_epoch*config.batch_size
 73 |         index_end   = index_start+config.batch_size
 74 | 
 75 |         # Actual training of the network
 76 |         _, train_step, train_loss, learning_rate, train_summary = sess.run(
 77 |             [model.train_op,
 78 |              model.global_step,
 79 |              model.loss,
 80 |              model.learning_rate,
 81 |              model.train_summary_op],
 82 |             feed_dict={
 83 |                 model.inputs:  X_train[index_start:index_end,],
 84 |                 model.targets: y_train[index_start:index_end,],
 85 |             }
 86 |         )
 87 | 
 88 |         if train_step % 10 == 0:
 89 |             print("[%s] Step %05i/%05i, LR = %.2e, Loss = %.5f" %
 90 |                 (datetime.now().strftime("%Y-%m-%d %H:%M"), train_step, num_steps, learning_rate, train_loss))
 91 | 
 92 |         # Save summaries to disk
 93 |         summary_writer.add_summary(train_summary, train_step)
 94 | 
 95 |         if train_step % 1000 == 0 and train_step > 0:
 96 |             path = saver.save(sess, checkpoint_prefix, global_step=train_step)
 97 |             print("[%s] Saving TensorFlow model checkpoint to disk." % datetime.now().strftime("%Y-%m-%d %H:%M"))
 98 | 
 99 |         step_in_epoch += 1
100 | 
101 |         ################################################################
102 |         ############### MODEL TESTING ON EVALUATION DATA ###############
103 |         ################################################################
104 | 
105 |         if step_in_epoch == steps_per_epoch:
106 | 
107 |             # End of epoch, check some validation examples
108 |             print("#" * 100)
109 |             print("MODEL TESTING ON VALIDATION DATA (%i examples):" % num_validation)
110 | 
111 |             for validation_step in range(int(math.floor(num_validation/config.batch_size))):
112 | 
113 |                 index_start = validation_step*config.batch_size
114 |                 index_end   = index_start+config.batch_size
115 | 
116 |                 validation_loss, predictions = sess.run([model.loss, model.predictions],
117 |                     feed_dict={
118 |                         model.inputs:  X_validation[index_start:index_end,],
119 |                         model.targets: y_validation[index_start:index_end,],
120 |                     }
121 |                 )
122 | 
123 |                 # Show a plot of the ground truth and prediction of the singla
124 |                 if validation_step == 0:
125 |                     plt.clf()
126 |                     plt.title("Ground Truth and Predictions")
127 |                     plt.plot(y_validation[index_start:index_start+50,0], label="signal 0 (input)")
128 |                     plt.plot(predictions[0:50,0], ls='--', label="signal 0 (prediction)")
129 |                     plt.plot(y_validation[index_start:index_start+50,1], label="signal 1 (input)")
130 |                     plt.plot(predictions[0:50,1], ls='--', label="signal 1 (prediction)")
131 |                     legend = plt.legend(frameon=True)
132 |                     legend.get_frame().set_facecolor('white')
133 |                     plt.draw()
134 |                     plt.pause(0.001)
135 | 
136 |                 print("[%s] Validation Step %03i. Loss = %.5f" % (datetime.now().strftime("%Y-%m-%d %H:%M"), validation_step, validation_loss))
137 | 
138 |             # Reset for next epoch
139 |             step_in_epoch = 0
140 | 
141 |             # Shuffle training data
142 |             perm = np.arange(num_train)
143 |             np.random.shuffle(perm)
144 |             X_train = X_train[perm]
145 |             y_train = y_train[perm]
146 | 
147 |             print("#" * 100)
148 | 
149 |     # Destroy the graph and close the session
150 |     ops.reset_default_graph()
151 |     sess.close()
152 | 
153 | 
154 | if __name__ == "__main__":
155 |     train(Config())
156 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tomrunia/ClockworkRNN/7e2a3ede8d88ad268aa5bf13645cc5c62e2c42b0/utils/__init__.py


--------------------------------------------------------------------------------
/utils/data_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from random import random
 4 | 
 5 | # Just a random example of sequences to train the Clockwork RNN.
 6 | # http://danielhnyk.cz/predicting-sequences-vectors-keras-using-rnn-lstm/
 7 | 
 8 | 
 9 | def _load_data(data, n_prev=100):
10 |     # data should be pd.DataFrame()
11 |     docX, docY = [], []
12 |     for i in range(len(data)-n_prev):
13 |         docX.append(data.iloc[i:i+n_prev].as_matrix())
14 |         docY.append(data.iloc[i+n_prev].as_matrix())
15 |     alsX = np.array(docX)
16 |     alsY = np.array(docY)
17 |     return alsX, alsY
18 | 
19 | 
20 | def train_test_split(df, test_size=0.1):
21 |     ntrn = int(round(len(df) * (1 - test_size)))
22 |     X_train, y_train = _load_data(df.iloc[0:ntrn])
23 |     X_test, y_test   = _load_data(df.iloc[ntrn:])
24 |     return (X_train, y_train), (X_test, y_test)
25 | 
26 | 
27 | def generate_data(num_examples):
28 |     print("[x] Generating training examples...")
29 |     flow = (list(range(1, 10, 1)) + list(range(10, 1, -1))) * num_examples
30 |     pdata = pd.DataFrame({"a": flow, "b": flow})
31 |     pdata.b = pdata.b.shift(9)
32 |     data = pdata.iloc[10:] * random()  # some noise
33 |     return train_test_split(data)


--------------------------------------------------------------------------------