├── README.md
├── rawtxt
    └── linux_kernel.txt
├── recurrent.ipynb
└── train.py


/README.md:
--------------------------------------------------------------------------------
 1 | # pycaffe-recurrent
 2 | IPython notebook for training multilayer LSTM and RNN networks with pycaffe
 3 | 
 4 | 
 5 | Example of generated code after training on the Linux kernel for a few hours (average test loss ~1):
 6 |  ``` cpp
 7 | static int __init bit_next_worker_lock_update(void *arg)
 8 | {
 9 | 	if (cpumask_set_cpu(cpu) + 1) {
10 | 		struct dentry *dst_cset = cgroup_mutex;
11 | 
12 | 		current->trace_buffers[cpu] = AUDIT_TIMER_SPINLOCK_SIZE << PAGE_SIZE)
13 | 		return;
14 | 
15 | 	/* initialize we be possible */
16 | 	for (kdb_size != STA_SYS_READ)
17 | 		return;
18 | 
19 | 	for_each_update_read(se);
20 | 
21 | 		rcu_read_lock();
22 | 	}
23 | 
24 | 	return 0;
25 | }
26 | 
27 | static inline void cmd_state_nr_callbacks, int reset_update_print_scan_mintatup(struct seq_file *m, void *v)
28 | {
29 | 	struct trace_array *tr;
30 | 	struct irq_data *start;
31 | 	struct rcu_node *rnp = trace_rcu_cleanup(size_t, kp);
32 | }
33 | 
34 | static void ftrace_print_ptr(const struct ftrace_hash *timer, struct compat_trigger *data)
35 | {
36 | 	if (should_hash->handler_len) {
37 | 		struct trace_buffer *buffer;
38 | 		if (!strtn | (trace_notifier_buffer_lock))
39 | 			create_lock_reserve(&rt_rq->rt_rq);
40 | 		continue;
41 | 		break;
42 | 
43 | 	case ENTRIESC_RESTART
44 | 		kdb_printf("\n");
45 | 		return 0;
46 | 	}
47 | 
48 | 	return true;
49 | }
50 | 
51 | /*
52 |  * Precent.
53 |  *
54 |  * We can get is to the ring buffer.
55 |  */
56 | static inline void tick_deferred(void *iter)
57 | {
58 | 	if (lock_count_start, commmtable_total->signal_cpus,
59 | 			           new_aux.dinable_regs)
60 | 		if (!sechdrs[cpu].expires & ALLOUS_PER_BOOTH, 0);
61 | 		return ret;
62 | 	}
63 | 
64 | 	if (iter->sequence;
65 | 	} while (trace_option_read_cpu(tsk));
66 | 
67 | 	return ret;
68 | }
69 | 
70 | ```
71 | 


--------------------------------------------------------------------------------
/rawtxt/linux_kernel.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuprel/pycaffe-recurrent/e28027288a5bec97a39fe7156505bb4f7923b3fb/rawtxt/linux_kernel.txt


--------------------------------------------------------------------------------
/recurrent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "#Recurrent Network for Character Prediction"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {
 14 |     "collapsed": false
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "from numpy import *\n",
 19 |     "from caffe import *\n",
 20 |     "from caffe.proto.caffe_pb2 import *\n",
 21 |     "Lr, Pr = layers, params\n",
 22 |     "import string, os, h5py, json\n",
 23 |     "from glob import glob\n",
 24 |     "sf = lambda *x: string.join([str(i) for i in x], '_')"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "##Specify Hyperparameters"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 9,
 37 |    "metadata": {
 38 |     "collapsed": false
 39 |    },
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "hypes = {\n",
 43 |     "    'sequence_length': 100,\n",
 44 |     "    'layers_num': 2,\n",
 45 |     "    'state_dim': 256,\n",
 46 |     "    'batch_size': 128,\n",
 47 |     "    'recurrent_unit': 'lstm', # rnn or lstm\n",
 48 |     "    'solver': {\n",
 49 |     "        'base_lr': 5e-3,\n",
 50 |     "        'weight_decay': 1e-4,\n",
 51 |     "        'lr_policy': 'exp',\n",
 52 |     "        'gamma': 0.9999,\n",
 53 |     "        'clip_gradients': 100,\n",
 54 |     "        'solver_type': SolverParameter.RMSPROP,\n",
 55 |     "        'rms_decay': 0.8,\n",
 56 |     "        'solver_mode': SolverParameter.GPU\n",
 57 |     "    }\n",
 58 |     "}\n",
 59 |     "\n",
 60 |     "txt_file = 'rawtxt/linux_kernel.txt'\n",
 61 |     "use_gpu = True"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 10,
 67 |    "metadata": {
 68 |     "collapsed": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "T = hypes['sequence_length']\n",
 73 |     "L = hypes['layers_num']\n",
 74 |     "d = hypes['state_dim']\n",
 75 |     "b = hypes['batch_size']\n",
 76 |     "\n",
 77 |     "json.dump(hypes, open('hypes.json', 'w'))"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "##Load Data"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 11,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "txt = open(txt_file, 'r').read()\n",
 96 |     "C = sorted(set(txt))\n",
 97 |     "k = len(C)\n",
 98 |     "if k <= 2**8: uintn = uint8\n",
 99 |     "else: uintn = uint16\n",
100 |     "X = array([C.index(c) for c in txt], dtype=uintn)\n",
101 |     "Y = X[1:].copy()\n",
102 |     "X = X[:-1]\n",
103 |     "\n",
104 |     "def chop(x, n=None, m=None):\n",
105 |     "    if n: m = len(x)//n\n",
106 |     "    if m: n = len(x)//m\n",
107 |     "    X = split(array(x[:m*n]), n)\n",
108 |     "    return array(X)\n",
109 |     "\n",
110 |     "rshape = lambda A: chop(rollaxis(chop(A,n=b),1,0),m=T)\n",
111 |     "X, Y = map(rshape, [X, Y])\n",
112 |     "\n",
113 |     "data = h5py.File('data.h5', 'w')\n",
114 |     "data.create_group('train')\n",
115 |     "data.create_group('test')\n",
116 |     "a = 9*len(X)/10\n",
117 |     "data['train']['X'] = X[:a]\n",
118 |     "data['train']['Y'] = Y[:a]\n",
119 |     "data['test']['X'] = X[a:]\n",
120 |     "data['test']['Y'] = Y[a:]\n",
121 |     "data.close()"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "##Create Model"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 12,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "def rnn_step(h, h_below, kwargs_fc):\n",
140 |     "    \n",
141 |     "    kwargs_fc['num_output'] = d\n",
142 |     "    \n",
143 |     "    h = Lr.Concat(h_below, h)\n",
144 |     "    h = Lr.InnerProduct(h, **kwargs_fc)\n",
145 |     "    h = Lr.TanH(h)\n",
146 |     "    \n",
147 |     "    return h"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 13,
153 |    "metadata": {
154 |     "collapsed": false
155 |    },
156 |    "outputs": [],
157 |    "source": [
158 |     "def lstm_step(h, h_below, kwargs_fc):\n",
159 |     "    \n",
160 |     "    kwargs_fc['num_output'] = 2*d\n",
161 |     "    \n",
162 |     "    PROD = EltwiseParameter.PROD\n",
163 |     "    c, h = Lr.Slice(h, slice_point=d/2, ntop=2)\n",
164 |     "    h = Lr.Concat(h_below, h)\n",
165 |     "    h = Lr.InnerProduct(h, **kwargs_fc)\n",
166 |     "    i, f, o, g = Lr.Slice(h, slice_point=[d/2,d,3*d/2], ntop=4)\n",
167 |     "    i, f, o = map(Lr.Sigmoid, [i, f, o])\n",
168 |     "    g = Lr.TanH(g)\n",
169 |     "    c = Lr.Eltwise(Lr.Eltwise(f, c, operation=PROD), Lr.Eltwise(i, g, operation=PROD))\n",
170 |     "    h = Lr.Eltwise(o, Lr.TanH(c), operation=PROD)\n",
171 |     "    h = Lr.Concat(c, h)\n",
172 |     "    \n",
173 |     "    return h"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 14,
179 |    "metadata": {
180 |     "collapsed": true
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "def get_net_param(T, b, drop=True):\n",
185 |     "    \n",
186 |     "    net_spec = NetSpec()\n",
187 |     "\n",
188 |     "    bsX, bsY, bsH = [BlobShape() for i in range(3)]\n",
189 |     "    bsH.dim.extend([b,d])\n",
190 |     "    bsX.dim.extend([b,k])\n",
191 |     "    bsY.dim.extend([b])\n",
192 |     "\n",
193 |     "    if hypes['recurrent_unit'] == 'rnn': step = rnn_step\n",
194 |     "    if hypes['recurrent_unit'] == 'lstm': step = lstm_step\n",
195 |     "    \n",
196 |     "    get_kwargs_fc = lambda t, l: {\n",
197 |     "        'param': [{'lr_mult': 1, 'decay_mult': 1, 'name': sf('W', l)},\n",
198 |     "                  {'lr_mult': 2, 'decay_mult': 0, 'name': sf('b', l)}],\n",
199 |     "        'weight_filler': {'type': 'uniform', 'min': -0.01, 'max': 0.01},\n",
200 |     "        'name': sf('fc', t, l)\n",
201 |     "    }\n",
202 |     "    \n",
203 |     "    h = []\n",
204 |     "    for l in range(L):\n",
205 |     "        h.append(Lr.DummyData(shape=bsH))\n",
206 |     "        setattr(net_spec, sf('h',0,l), h[l])\n",
207 |     "    \n",
208 |     "    losses = []\n",
209 |     "    \n",
210 |     "    for t in range(T):\n",
211 |     "\n",
212 |     "        x = Lr.DummyData(shape=bsX)\n",
213 |     "        y = Lr.DummyData(shape=bsY)\n",
214 |     "\n",
215 |     "        h[0] = step(h[0], x, get_kwargs_fc(t, 0))\n",
216 |     "        for l in range(1, L):\n",
217 |     "            h[l] = step(h[l], h[l-1], get_kwargs_fc(t, l))\n",
218 |     "            if drop: h[l] = Lr.Dropout(h[l])\n",
219 |     "\n",
220 |     "        kwargs_fc = get_kwargs_fc(t, L)\n",
221 |     "        kwargs_fc['num_output'] = k\n",
222 |     "        \n",
223 |     "        z = Lr.InnerProduct(h[-1], **kwargs_fc)\n",
224 |     "        loss = Lr.SoftmaxWithLoss(z, y)\n",
225 |     "        \n",
226 |     "        setattr(net_spec, sf('x', t), x)\n",
227 |     "        setattr(net_spec, sf('y', t), y)\n",
228 |     "        setattr(net_spec, sf('z', t), z)\n",
229 |     "        for l in range(L): \n",
230 |     "            setattr(net_spec, sf('h', t+1, l), h[l])\n",
231 |     "        setattr(net_spec, sf('loss', t), loss)\n",
232 |     "    \n",
233 |     "    return net_spec.to_proto()"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 15,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "solver_param = SolverParameter()\n",
245 |     "solver_param.net_param.CopyFrom(get_net_param(T, b))\n",
246 |     "solver_param.test_net_param.add()\n",
247 |     "solver_param.test_net_param[0].CopyFrom(get_net_param(T, b, drop=False))\n",
248 |     "solver_param.test_iter.extend([1])\n",
249 |     "solver_param.test_interval = 10**9\n",
250 |     "for pr, val in hypes['solver'].iteritems():\n",
251 |     "    setattr(solver_param, pr, val)\n",
252 |     "\n",
253 |     "with open('solver.prototxt', 'w') as f: f.write(str(solver_param))"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "markdown",
258 |    "metadata": {},
259 |    "source": [
260 |     "###Better to run this from the command line"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {
267 |     "collapsed": true
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "!python train.py"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "markdown",
276 |    "metadata": {},
277 |    "source": [
278 |     "##Deploy (run this while training)"
279 |    ]
280 |   },
281 |   {
282 |    "cell_type": "code",
283 |    "execution_count": null,
284 |    "metadata": {
285 |     "collapsed": true
286 |    },
287 |    "outputs": [],
288 |    "source": [
289 |     "seed_chars = '#inc'\n",
290 |     "temperature = 0.7\n",
291 |     "num_chars = 2000\n",
292 |     "\n",
293 |     "open('deploy.prototxt', 'w').write(str(get_net_param(1, 1, drop=False)))\n",
294 |     "net = Net('deploy.prototxt', 1)\n",
295 |     "\n",
296 |     "def load_params(net, params_file):\n",
297 |     "    params = h5py.File(params_file, 'r')\n",
298 |     "    for l in range(L+1):\n",
299 |     "        pr = net.params[sf('fc',0,l)]\n",
300 |     "        pr[0].data[...] = params[sf('fc',l)]['W'].value\n",
301 |     "        pr[1].data[...] = params[sf('fc',l)]['b'].value\n",
302 |     "\n",
303 |     "params_file = sorted(glob('params/iter*.h5'))[-2]\n",
304 |     "load_params(net, params_file)\n",
305 |     "        \n",
306 |     "for c in seed_chars:\n",
307 |     "    x = C.index(c)\n",
308 |     "    net.blobs[sf('x',0)].data[...] = 0\n",
309 |     "    net.blobs[sf('x',0)].data[0, x] = 1\n",
310 |     "    for l in range(L):\n",
311 |     "        state_i = net.blobs[sf('h',0,l)].data\n",
312 |     "        state_f = net.blobs[sf('h',1,l)].data\n",
313 |     "        state_i[...] = state_f\n",
314 |     "    net.forward()\n",
315 |     "\n",
316 |     "gen_chars = []\n",
317 |     "for t in range(num_chars):\n",
318 |     "    z = net.blobs[sf('z',0)].data[0].copy().astype(float)\n",
319 |     "    p = (lambda x: x/sum(x))(exp(z/temperature))\n",
320 |     "    x = random.choice(range(k), p=p)\n",
321 |     "    gen_chars.append(C[x])\n",
322 |     "    net.blobs[sf('x',0)].data[...] = 0\n",
323 |     "    net.blobs[sf('x',0)].data[0, x] = 1\n",
324 |     "    for l in range(L):\n",
325 |     "        state_i = net.blobs[sf('h',0,l)].data\n",
326 |     "        state_f = net.blobs[sf('h',1,l)].data\n",
327 |     "        state_i[...] = state_f\n",
328 |     "    net.forward()\n",
329 |     "            \n",
330 |     "print seed_chars + string.join(gen_chars, '')"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {
337 |     "collapsed": true
338 |    },
339 |    "outputs": [],
340 |    "source": []
341 |   }
342 |  ],
343 |  "metadata": {
344 |   "kernelspec": {
345 |    "display_name": "Python 2",
346 |    "language": "python",
347 |    "name": "python2"
348 |   },
349 |   "language_info": {
350 |    "codemirror_mode": {
351 |     "name": "ipython",
352 |     "version": 2
353 |    },
354 |    "file_extension": ".py",
355 |    "mimetype": "text/x-python",
356 |    "name": "python",
357 |    "nbconvert_exporter": "python",
358 |    "pygments_lexer": "ipython2",
359 |    "version": "2.7.6"
360 |   }
361 |  },
362 |  "nbformat": 4,
363 |  "nbformat_minor": 0
364 | }
365 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import caffe, numpy, string, os, shutil, h5py, json
  2 | sf = lambda *x: string.join([str(i) for i in x], '_')
  3 | 
  4 | # Load hyperparameters
  5 | hypes = json.load(open('hypes.json'))
  6 | 
  7 | T = hypes['sequence_length']
  8 | L = hypes['layers_num']
  9 | d = hypes['state_dim']
 10 | b = hypes['batch_size']
 11 | 
 12 | # Copy data to memory from disk
 13 | data_disk = h5py.File('data.h5', 'r')
 14 | data = {tt: {xy: data_disk[tt][xy].value for xy in ['X', 'Y']}
 15 | 		for tt in ['train', 'test']}
 16 | data_disk.close()
 17 | 
 18 | # Initialize solver
 19 | solver = caffe.get_solver('solver.prototxt')
 20 | nets = {
 21 | 	'train': solver.net,
 22 | 	'test': solver.test_nets[0]
 23 | }
 24 | 
 25 | # Create params directory
 26 | if os.path.isdir('params'): shutil.rmtree('params')
 27 | os.makedirs('params')
 28 | 
 29 | def copy_state(net):
 30 | 	"""
 31 | 	Copies previous final state to current initial state
 32 | 	"""
 33 | 	for l in range(L):
 34 | 		state_i = net.blobs[sf('h',0,l)].data
 35 | 		state_f = net.blobs[sf('h',T,l)].data
 36 | 		state_i[...] = state_f
 37 | 
 38 | def insert_data(net, X, Y):
 39 | 	for t in range(T):
 40 | 		net.blobs[sf('x',t)].data[...] = 0
 41 | 		net.blobs[sf('x',t)].data[range(b), X[t]] = 1
 42 | 		net.blobs[sf('y',t)].data[...] = Y[t]
 43 | 
 44 | def save_params(net, params_file):
 45 | 	params = h5py.File(params_file, 'w')
 46 | 	for l in range(L+1):
 47 | 		pr = solver.net.params[sf('fc',0,l)]
 48 | 		params.create_group(sf('fc',l))
 49 | 		params[sf('fc',l)]['W'] = pr[0].data
 50 | 		params[sf('fc',l)]['b'] = pr[1].data
 51 | 
 52 | def compute_loss(net):
 53 | 	loss = lambda t: net.blobs[sf('loss',t)].data
 54 | 	loss = numpy.mean([loss(t) for t in range(T)])
 55 | 	return loss
 56 | 
 57 | def update_iter(itr, epoch, tt):
 58 | 	"""
 59 | 	Increments iter, checks for new epoch, 
 60 | 	resets state to zero if new epoch
 61 | 	"""
 62 | 	itr += 1
 63 | 	new_epoch = False
 64 | 	if itr == len(data[tt]['X']):
 65 | 		new_epoch = True
 66 | 		epoch += 1
 67 | 		itr = 0
 68 | 		for l in range(L):
 69 | 			nets[tt].blobs[sf('h',0,l)].data[...] = 0
 70 | 	return itr, epoch, new_epoch
 71 | 
 72 | step_num = 5
 73 | test_interval = 5
 74 | epoch_train, epoch_test = 1, 1
 75 | 
 76 | # Test and train iters
 77 | i, j = 0, 0
 78 | 
 79 | while True:
 80 | 
 81 | 	copy_state(nets['train'])
 82 | 	X = data['train']['X'][i]
 83 | 	Y = data['train']['Y'][i]
 84 | 	insert_data(nets['train'], X, Y)
 85 | 	solver.step(step_num)
 86 | 	i, epoch_train, new_epoch = update_iter(i, epoch_train, 'train')
 87 | 	if new_epoch:
 88 | 		step_num = max(1, step_num/2)
 89 | 		print 'Epoch {}'.format(epoch_train)
 90 | 
 91 | 	if solver.iter%test_interval == 0:
 92 | 
 93 | 		copy_state(nets['test'])
 94 | 		X = data['test']['X'][j]
 95 | 		Y = data['test']['Y'][j]
 96 | 		insert_data(nets['test'], X, Y)
 97 | 		nets['test'].forward()
 98 | 
 99 | 		loss = compute_loss(nets['test'])
100 | 		print 'test loss: {}, iter {}'.format(loss, solver.iter)
101 | 
102 | 		params_file = 'params/iter%08d.h5'%solver.iter
103 | 		save_params(nets['test'], params_file)
104 | 
105 | 		j, epoch_test, new_epoch = update_iter(j, epoch_test, 'test')


--------------------------------------------------------------------------------