├── README.md ├── rawtxt └── linux_kernel.txt ├── recurrent.ipynb └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # pycaffe-recurrent 2 | IPython notebook for training multilayer LSTM and RNN networks with pycaffe 3 | 4 | 5 | Example of generated code after training on the Linux kernel for a few hours (average test loss ~1): 6 | ``` cpp 7 | static int __init bit_next_worker_lock_update(void *arg) 8 | { 9 | if (cpumask_set_cpu(cpu) + 1) { 10 | struct dentry *dst_cset = cgroup_mutex; 11 | 12 | current->trace_buffers[cpu] = AUDIT_TIMER_SPINLOCK_SIZE << PAGE_SIZE) 13 | return; 14 | 15 | /* initialize we be possible */ 16 | for (kdb_size != STA_SYS_READ) 17 | return; 18 | 19 | for_each_update_read(se); 20 | 21 | rcu_read_lock(); 22 | } 23 | 24 | return 0; 25 | } 26 | 27 | static inline void cmd_state_nr_callbacks, int reset_update_print_scan_mintatup(struct seq_file *m, void *v) 28 | { 29 | struct trace_array *tr; 30 | struct irq_data *start; 31 | struct rcu_node *rnp = trace_rcu_cleanup(size_t, kp); 32 | } 33 | 34 | static void ftrace_print_ptr(const struct ftrace_hash *timer, struct compat_trigger *data) 35 | { 36 | if (should_hash->handler_len) { 37 | struct trace_buffer *buffer; 38 | if (!strtn | (trace_notifier_buffer_lock)) 39 | create_lock_reserve(&rt_rq->rt_rq); 40 | continue; 41 | break; 42 | 43 | case ENTRIESC_RESTART 44 | kdb_printf("\n"); 45 | return 0; 46 | } 47 | 48 | return true; 49 | } 50 | 51 | /* 52 | * Precent. 53 | * 54 | * We can get is to the ring buffer. 55 | */ 56 | static inline void tick_deferred(void *iter) 57 | { 58 | if (lock_count_start, commmtable_total->signal_cpus, 59 | new_aux.dinable_regs) 60 | if (!sechdrs[cpu].expires & ALLOUS_PER_BOOTH, 0); 61 | return ret; 62 | } 63 | 64 | if (iter->sequence; 65 | } while (trace_option_read_cpu(tsk)); 66 | 67 | return ret; 68 | } 69 | 70 | ``` 71 | -------------------------------------------------------------------------------- /rawtxt/linux_kernel.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuprel/pycaffe-recurrent/e28027288a5bec97a39fe7156505bb4f7923b3fb/rawtxt/linux_kernel.txt -------------------------------------------------------------------------------- /recurrent.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#Recurrent Network for Character Prediction" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "from numpy import *\n", 19 | "from caffe import *\n", 20 | "from caffe.proto.caffe_pb2 import *\n", 21 | "Lr, Pr = layers, params\n", 22 | "import string, os, h5py, json\n", 23 | "from glob import glob\n", 24 | "sf = lambda *x: string.join([str(i) for i in x], '_')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "##Specify Hyperparameters" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 9, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "hypes = {\n", 43 | " 'sequence_length': 100,\n", 44 | " 'layers_num': 2,\n", 45 | " 'state_dim': 256,\n", 46 | " 'batch_size': 128,\n", 47 | " 'recurrent_unit': 'lstm', # rnn or lstm\n", 48 | " 'solver': {\n", 49 | " 'base_lr': 5e-3,\n", 50 | " 'weight_decay': 1e-4,\n", 51 | " 'lr_policy': 'exp',\n", 52 | " 'gamma': 0.9999,\n", 53 | " 'clip_gradients': 100,\n", 54 | " 'solver_type': SolverParameter.RMSPROP,\n", 55 | " 'rms_decay': 0.8,\n", 56 | " 'solver_mode': SolverParameter.GPU\n", 57 | " }\n", 58 | "}\n", 59 | "\n", 60 | "txt_file = 'rawtxt/linux_kernel.txt'\n", 61 | "use_gpu = True" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 10, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "T = hypes['sequence_length']\n", 73 | "L = hypes['layers_num']\n", 74 | "d = hypes['state_dim']\n", 75 | "b = hypes['batch_size']\n", 76 | "\n", 77 | "json.dump(hypes, open('hypes.json', 'w'))" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "##Load Data" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 11, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "txt = open(txt_file, 'r').read()\n", 96 | "C = sorted(set(txt))\n", 97 | "k = len(C)\n", 98 | "if k <= 2**8: uintn = uint8\n", 99 | "else: uintn = uint16\n", 100 | "X = array([C.index(c) for c in txt], dtype=uintn)\n", 101 | "Y = X[1:].copy()\n", 102 | "X = X[:-1]\n", 103 | "\n", 104 | "def chop(x, n=None, m=None):\n", 105 | " if n: m = len(x)//n\n", 106 | " if m: n = len(x)//m\n", 107 | " X = split(array(x[:m*n]), n)\n", 108 | " return array(X)\n", 109 | "\n", 110 | "rshape = lambda A: chop(rollaxis(chop(A,n=b),1,0),m=T)\n", 111 | "X, Y = map(rshape, [X, Y])\n", 112 | "\n", 113 | "data = h5py.File('data.h5', 'w')\n", 114 | "data.create_group('train')\n", 115 | "data.create_group('test')\n", 116 | "a = 9*len(X)/10\n", 117 | "data['train']['X'] = X[:a]\n", 118 | "data['train']['Y'] = Y[:a]\n", 119 | "data['test']['X'] = X[a:]\n", 120 | "data['test']['Y'] = Y[a:]\n", 121 | "data.close()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "##Create Model" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 12, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "def rnn_step(h, h_below, kwargs_fc):\n", 140 | " \n", 141 | " kwargs_fc['num_output'] = d\n", 142 | " \n", 143 | " h = Lr.Concat(h_below, h)\n", 144 | " h = Lr.InnerProduct(h, **kwargs_fc)\n", 145 | " h = Lr.TanH(h)\n", 146 | " \n", 147 | " return h" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 13, 153 | "metadata": { 154 | "collapsed": false 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def lstm_step(h, h_below, kwargs_fc):\n", 159 | " \n", 160 | " kwargs_fc['num_output'] = 2*d\n", 161 | " \n", 162 | " PROD = EltwiseParameter.PROD\n", 163 | " c, h = Lr.Slice(h, slice_point=d/2, ntop=2)\n", 164 | " h = Lr.Concat(h_below, h)\n", 165 | " h = Lr.InnerProduct(h, **kwargs_fc)\n", 166 | " i, f, o, g = Lr.Slice(h, slice_point=[d/2,d,3*d/2], ntop=4)\n", 167 | " i, f, o = map(Lr.Sigmoid, [i, f, o])\n", 168 | " g = Lr.TanH(g)\n", 169 | " c = Lr.Eltwise(Lr.Eltwise(f, c, operation=PROD), Lr.Eltwise(i, g, operation=PROD))\n", 170 | " h = Lr.Eltwise(o, Lr.TanH(c), operation=PROD)\n", 171 | " h = Lr.Concat(c, h)\n", 172 | " \n", 173 | " return h" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 14, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "def get_net_param(T, b, drop=True):\n", 185 | " \n", 186 | " net_spec = NetSpec()\n", 187 | "\n", 188 | " bsX, bsY, bsH = [BlobShape() for i in range(3)]\n", 189 | " bsH.dim.extend([b,d])\n", 190 | " bsX.dim.extend([b,k])\n", 191 | " bsY.dim.extend([b])\n", 192 | "\n", 193 | " if hypes['recurrent_unit'] == 'rnn': step = rnn_step\n", 194 | " if hypes['recurrent_unit'] == 'lstm': step = lstm_step\n", 195 | " \n", 196 | " get_kwargs_fc = lambda t, l: {\n", 197 | " 'param': [{'lr_mult': 1, 'decay_mult': 1, 'name': sf('W', l)},\n", 198 | " {'lr_mult': 2, 'decay_mult': 0, 'name': sf('b', l)}],\n", 199 | " 'weight_filler': {'type': 'uniform', 'min': -0.01, 'max': 0.01},\n", 200 | " 'name': sf('fc', t, l)\n", 201 | " }\n", 202 | " \n", 203 | " h = []\n", 204 | " for l in range(L):\n", 205 | " h.append(Lr.DummyData(shape=bsH))\n", 206 | " setattr(net_spec, sf('h',0,l), h[l])\n", 207 | " \n", 208 | " losses = []\n", 209 | " \n", 210 | " for t in range(T):\n", 211 | "\n", 212 | " x = Lr.DummyData(shape=bsX)\n", 213 | " y = Lr.DummyData(shape=bsY)\n", 214 | "\n", 215 | " h[0] = step(h[0], x, get_kwargs_fc(t, 0))\n", 216 | " for l in range(1, L):\n", 217 | " h[l] = step(h[l], h[l-1], get_kwargs_fc(t, l))\n", 218 | " if drop: h[l] = Lr.Dropout(h[l])\n", 219 | "\n", 220 | " kwargs_fc = get_kwargs_fc(t, L)\n", 221 | " kwargs_fc['num_output'] = k\n", 222 | " \n", 223 | " z = Lr.InnerProduct(h[-1], **kwargs_fc)\n", 224 | " loss = Lr.SoftmaxWithLoss(z, y)\n", 225 | " \n", 226 | " setattr(net_spec, sf('x', t), x)\n", 227 | " setattr(net_spec, sf('y', t), y)\n", 228 | " setattr(net_spec, sf('z', t), z)\n", 229 | " for l in range(L): \n", 230 | " setattr(net_spec, sf('h', t+1, l), h[l])\n", 231 | " setattr(net_spec, sf('loss', t), loss)\n", 232 | " \n", 233 | " return net_spec.to_proto()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 15, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "solver_param = SolverParameter()\n", 245 | "solver_param.net_param.CopyFrom(get_net_param(T, b))\n", 246 | "solver_param.test_net_param.add()\n", 247 | "solver_param.test_net_param[0].CopyFrom(get_net_param(T, b, drop=False))\n", 248 | "solver_param.test_iter.extend([1])\n", 249 | "solver_param.test_interval = 10**9\n", 250 | "for pr, val in hypes['solver'].iteritems():\n", 251 | " setattr(solver_param, pr, val)\n", 252 | "\n", 253 | "with open('solver.prototxt', 'w') as f: f.write(str(solver_param))" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "###Better to run this from the command line" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "collapsed": true 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "!python train.py" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "##Deploy (run this while training)" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": null, 284 | "metadata": { 285 | "collapsed": true 286 | }, 287 | "outputs": [], 288 | "source": [ 289 | "seed_chars = '#inc'\n", 290 | "temperature = 0.7\n", 291 | "num_chars = 2000\n", 292 | "\n", 293 | "open('deploy.prototxt', 'w').write(str(get_net_param(1, 1, drop=False)))\n", 294 | "net = Net('deploy.prototxt', 1)\n", 295 | "\n", 296 | "def load_params(net, params_file):\n", 297 | " params = h5py.File(params_file, 'r')\n", 298 | " for l in range(L+1):\n", 299 | " pr = net.params[sf('fc',0,l)]\n", 300 | " pr[0].data[...] = params[sf('fc',l)]['W'].value\n", 301 | " pr[1].data[...] = params[sf('fc',l)]['b'].value\n", 302 | "\n", 303 | "params_file = sorted(glob('params/iter*.h5'))[-2]\n", 304 | "load_params(net, params_file)\n", 305 | " \n", 306 | "for c in seed_chars:\n", 307 | " x = C.index(c)\n", 308 | " net.blobs[sf('x',0)].data[...] = 0\n", 309 | " net.blobs[sf('x',0)].data[0, x] = 1\n", 310 | " for l in range(L):\n", 311 | " state_i = net.blobs[sf('h',0,l)].data\n", 312 | " state_f = net.blobs[sf('h',1,l)].data\n", 313 | " state_i[...] = state_f\n", 314 | " net.forward()\n", 315 | "\n", 316 | "gen_chars = []\n", 317 | "for t in range(num_chars):\n", 318 | " z = net.blobs[sf('z',0)].data[0].copy().astype(float)\n", 319 | " p = (lambda x: x/sum(x))(exp(z/temperature))\n", 320 | " x = random.choice(range(k), p=p)\n", 321 | " gen_chars.append(C[x])\n", 322 | " net.blobs[sf('x',0)].data[...] = 0\n", 323 | " net.blobs[sf('x',0)].data[0, x] = 1\n", 324 | " for l in range(L):\n", 325 | " state_i = net.blobs[sf('h',0,l)].data\n", 326 | " state_f = net.blobs[sf('h',1,l)].data\n", 327 | " state_i[...] = state_f\n", 328 | " net.forward()\n", 329 | " \n", 330 | "print seed_chars + string.join(gen_chars, '')" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [] 341 | } 342 | ], 343 | "metadata": { 344 | "kernelspec": { 345 | "display_name": "Python 2", 346 | "language": "python", 347 | "name": "python2" 348 | }, 349 | "language_info": { 350 | "codemirror_mode": { 351 | "name": "ipython", 352 | "version": 2 353 | }, 354 | "file_extension": ".py", 355 | "mimetype": "text/x-python", 356 | "name": "python", 357 | "nbconvert_exporter": "python", 358 | "pygments_lexer": "ipython2", 359 | "version": "2.7.6" 360 | } 361 | }, 362 | "nbformat": 4, 363 | "nbformat_minor": 0 364 | } 365 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import caffe, numpy, string, os, shutil, h5py, json 2 | sf = lambda *x: string.join([str(i) for i in x], '_') 3 | 4 | # Load hyperparameters 5 | hypes = json.load(open('hypes.json')) 6 | 7 | T = hypes['sequence_length'] 8 | L = hypes['layers_num'] 9 | d = hypes['state_dim'] 10 | b = hypes['batch_size'] 11 | 12 | # Copy data to memory from disk 13 | data_disk = h5py.File('data.h5', 'r') 14 | data = {tt: {xy: data_disk[tt][xy].value for xy in ['X', 'Y']} 15 | for tt in ['train', 'test']} 16 | data_disk.close() 17 | 18 | # Initialize solver 19 | solver = caffe.get_solver('solver.prototxt') 20 | nets = { 21 | 'train': solver.net, 22 | 'test': solver.test_nets[0] 23 | } 24 | 25 | # Create params directory 26 | if os.path.isdir('params'): shutil.rmtree('params') 27 | os.makedirs('params') 28 | 29 | def copy_state(net): 30 | """ 31 | Copies previous final state to current initial state 32 | """ 33 | for l in range(L): 34 | state_i = net.blobs[sf('h',0,l)].data 35 | state_f = net.blobs[sf('h',T,l)].data 36 | state_i[...] = state_f 37 | 38 | def insert_data(net, X, Y): 39 | for t in range(T): 40 | net.blobs[sf('x',t)].data[...] = 0 41 | net.blobs[sf('x',t)].data[range(b), X[t]] = 1 42 | net.blobs[sf('y',t)].data[...] = Y[t] 43 | 44 | def save_params(net, params_file): 45 | params = h5py.File(params_file, 'w') 46 | for l in range(L+1): 47 | pr = solver.net.params[sf('fc',0,l)] 48 | params.create_group(sf('fc',l)) 49 | params[sf('fc',l)]['W'] = pr[0].data 50 | params[sf('fc',l)]['b'] = pr[1].data 51 | 52 | def compute_loss(net): 53 | loss = lambda t: net.blobs[sf('loss',t)].data 54 | loss = numpy.mean([loss(t) for t in range(T)]) 55 | return loss 56 | 57 | def update_iter(itr, epoch, tt): 58 | """ 59 | Increments iter, checks for new epoch, 60 | resets state to zero if new epoch 61 | """ 62 | itr += 1 63 | new_epoch = False 64 | if itr == len(data[tt]['X']): 65 | new_epoch = True 66 | epoch += 1 67 | itr = 0 68 | for l in range(L): 69 | nets[tt].blobs[sf('h',0,l)].data[...] = 0 70 | return itr, epoch, new_epoch 71 | 72 | step_num = 5 73 | test_interval = 5 74 | epoch_train, epoch_test = 1, 1 75 | 76 | # Test and train iters 77 | i, j = 0, 0 78 | 79 | while True: 80 | 81 | copy_state(nets['train']) 82 | X = data['train']['X'][i] 83 | Y = data['train']['Y'][i] 84 | insert_data(nets['train'], X, Y) 85 | solver.step(step_num) 86 | i, epoch_train, new_epoch = update_iter(i, epoch_train, 'train') 87 | if new_epoch: 88 | step_num = max(1, step_num/2) 89 | print 'Epoch {}'.format(epoch_train) 90 | 91 | if solver.iter%test_interval == 0: 92 | 93 | copy_state(nets['test']) 94 | X = data['test']['X'][j] 95 | Y = data['test']['Y'][j] 96 | insert_data(nets['test'], X, Y) 97 | nets['test'].forward() 98 | 99 | loss = compute_loss(nets['test']) 100 | print 'test loss: {}, iter {}'.format(loss, solver.iter) 101 | 102 | params_file = 'params/iter%08d.h5'%solver.iter 103 | save_params(nets['test'], params_file) 104 | 105 | j, epoch_test, new_epoch = update_iter(j, epoch_test, 'test') --------------------------------------------------------------------------------