├── simple.txt
├── convergence_experiment.png
├── triple_compare_embedded.png
├── triple_compare_simple.png
├── README.md
├── embedded_srnn_brnn_compare.png
├── triple_compare_PER_LAYER_AVG.png
├── different_eta_values_and_ebptt.png
├── triple_compare_ALL_MULTI_simple.png
├── triple_compare_ALL_MULTI_simple_2.png
├── embedded_srnn_brnn_compare_version2.png
├── triple_compare_averaged_resampling_simple_reber.png
├── dispatch.sh
├── comparison_experiment.py
├── convergence_experiment.py
├── rj_intro.txt
├── pyscript_to_run.py
├── genfigs.py
├── .gitignore
├── common.py
├── reber.py
├── rnn.py
├── example.py
├── brnn.py
├── LICENSE
├── histogram.svg
├── histogram_FAIL.svg
├── error_curve_FAIL.svg
└── error_curve.svg
/simple.txt:
--------------------------------------------------------------------------------
1 | abababababababababababababababababababababababababab
2 |
--------------------------------------------------------------------------------
/convergence_experiment.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/convergence_experiment.png
--------------------------------------------------------------------------------
/triple_compare_embedded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/triple_compare_embedded.png
--------------------------------------------------------------------------------
/triple_compare_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/triple_compare_simple.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # rikura
2 | Implementation of a recurrent neural network and a binary recurrent neural network.
3 |
--------------------------------------------------------------------------------
/embedded_srnn_brnn_compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/embedded_srnn_brnn_compare.png
--------------------------------------------------------------------------------
/triple_compare_PER_LAYER_AVG.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/triple_compare_PER_LAYER_AVG.png
--------------------------------------------------------------------------------
/different_eta_values_and_ebptt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/different_eta_values_and_ebptt.png
--------------------------------------------------------------------------------
/triple_compare_ALL_MULTI_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/triple_compare_ALL_MULTI_simple.png
--------------------------------------------------------------------------------
/triple_compare_ALL_MULTI_simple_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/triple_compare_ALL_MULTI_simple_2.png
--------------------------------------------------------------------------------
/embedded_srnn_brnn_compare_version2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/embedded_srnn_brnn_compare_version2.png
--------------------------------------------------------------------------------
/triple_compare_averaged_resampling_simple_reber.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cafe/rikura/master/triple_compare_averaged_resampling_simple_reber.png
--------------------------------------------------------------------------------
/dispatch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -l
2 | #SBATCH -p cortex
3 | ##SBATCH --job-name=test
4 | ##SBATCH --exclusive
5 | #SBATCH -x n0000.cortex0,n0001.cortex0,n0012.cortex0,n0013.cortex0
6 | module load python/anaconda2
7 |
8 | module unload intel
9 | python pyscript_to_run.py
10 |
--------------------------------------------------------------------------------
/comparison_experiment.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | import example
3 |
4 |
5 | binary = example.example(100, 500, 50, 1, None, True, True, True)[1]
6 |
7 | standard = {}
8 | eta_vals = 10.0**(-arange(25.0)/5.0)
9 |
10 | for eta in eta_vals:
11 | standard[eta] = example.example(100, 500, 50, eta, None, False, True, True)[1]
12 |
13 |
--------------------------------------------------------------------------------
/convergence_experiment.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | from example import example
3 |
4 | sample_curve_standard = lambda: example(hidden=100, examples=500, epochs=50, eta=0.04, binary=False, embedded=True)[1]
5 | sample_curve_binary = lambda: example(hidden=100, examples=500, epochs=50, eta=1, binary=True, embedded=True)[1]
6 | N = 10
7 |
8 | s_collection = []
9 | b_collection = []
10 |
11 | for i in range(N):
12 | s_collection.append(sample_curve_standard())
13 | b_collection.append(sample_curve_binary())
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/rj_intro.txt:
--------------------------------------------------------------------------------
1 | Two households, both alike in dignity,
2 | In fair Verona, where we lay our scene,
3 | From ancient grudge break to new mutiny,
4 | Where civil blood makes civil hands unclean.
5 | From forth the fatal loins of these two foes
6 | A pair of star-cross'd lovers take their life;
7 | Whose misadventured piteous overthrows
8 | Do with their death bury their parents' strife.
9 | The fearful passage of their death-mark'd love,
10 | And the continuance of their parents' rage,
11 | Which, but their children's end, nought could remove,
12 | Is now the two hours' traffic of our stage;
13 | The which if you with patient ears attend,
14 | What here shall miss, our toil shall strive to mend.
15 |
--------------------------------------------------------------------------------
/pyscript_to_run.py:
--------------------------------------------------------------------------------
1 | import example
2 | import h5py
3 | import time
4 | from datetime import datetime
5 |
6 |
7 | BINARY = True
8 | epochs = 500
9 | if BINARY:
10 | errs = example.text(fname='rj_intro.txt', hidden=500, seq_length=10, epochs=epochs, eta=1, binary=True, progress=False)[1]
11 | label = 'Train stats: binary'
12 | else:
13 | errs = example.text(fname='rj_intro.txt', hidden=500, seq_length=10, epochs=epochs, eta=1e-3, binary=False, progress=False)[1]
14 | label = 'Train stats: standard'
15 |
16 | start_date = datetime.fromtimestamp(time.time())
17 | f = h5py.File('runs/' + start_date.isoformat() + '.hdf5', 'w')
18 | run_stats = f.create_group(label)
19 | run_stats.create_dataset('errors', data=errs, compression='gzip')
20 |
21 | f.close()
22 |
--------------------------------------------------------------------------------
/genfigs.py:
--------------------------------------------------------------------------------
1 | from example import *
2 |
3 | if False:
4 | figure()
5 | h = 70
6 | rnn, costs = example(hidden=h, examples=1000, epochs=100, eta=1, rnn=None, binary=True, progress=True)
7 | plot(costs)
8 | title('Binary RNN with %d Hidden Units' % h)
9 | xlabel('Epoch #')
10 | ylabel('Discrete Error')
11 | savefig('error_curve.svg')
12 |
13 | figure()
14 | hist(ravel(rnn.aux['h']))
15 | title('Histogram of Recurrent Weight Average Values')
16 | savefig('histogram.svg')
17 |
18 | if True:
19 | figure()
20 | hids, resids = experiment()
21 | plots(hids, resids[:,-1])
22 | title('Residual Error for BRNN vs. Hidden Layer Size')
23 | xlabel('Hidden layer size')
24 | ylabel('Residual error')
25 | savefig('residual_error.svg')
26 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | slurm-*
2 | # Byte-compiled / optimized / DLL files
3 | __pycache__/
4 | *.py[cod]
5 | *$py.class
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | env/
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *,cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 |
56 | # Sphinx documentation
57 | docs/_build/
58 |
59 | # PyBuilder
60 | target/
61 |
62 | #Ipython Notebook
63 | .ipynb_checkpoints
64 |
--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | from scipy.stats import norm as norm_dist
3 |
4 |
5 | #backprop core functions, these are combined in the back propagation stage
6 | #These are essentially common between the RNN and BRNN
7 | def layer_updates(delta, mat_input, eta, binary=False):
8 | delta_b = (1/sqrt(len(mat_input)+1) if binary else -1) * eta * delta
9 | return outer(delta_b, mat_input), delta_b
10 | def top_backprop_signal(cost_grad, q_top):
11 | return cost_grad * q_top
12 | def backprop_step(delta_upper, w_upper, q_lower, binary=False):
13 | return (2/sqrt(len(q_lower)+1) if binary else 1) * w_upper.T.dot(delta_upper) * q_lower
14 |
15 |
16 | #commonly used standard functions:
17 | N0 = lambda mu,sigma: norm_dist.pdf(0, mu, sigma)
18 | phi = lambda x: norm_dist.cdf(x,0,1)
19 |
20 | mse_deriv = lambda x,a: x-a
21 | mse_cost = lambda x,a: norm(x-a)**2
22 |
23 | ce_logit_deriv = lambda x,a: a/sum(a) - exp(-x)/sum(exp(-x))
24 | def ce_logit_cost(x,a):
25 | a = a/sum(a)
26 | ce = a.dot(x) + log(sum(exp(-x)))
27 | a[a == 0] = 1
28 | return ce + a.dot(log(a))
29 |
30 | #definition of the computational context of RNN or BRNN
31 | class Context:
32 | def __init__(self):
33 | self.a, self.a_ = {}, {}
34 | self.out, self.out_ = {}, {}
35 |
36 |
37 |
38 | class RNN:
39 | #common method of calculating error for RNN and BRNN
40 | def calculate_cost(rnn, outs, ti=0, tf=None):
41 | c, tf = rnn.context, len(outs)-1 if tf is None else tf
42 | cost = 0
43 | for t in xrange(ti, tf+1):
44 | cost += rnn.cost(c.out[t], outs[t])
45 | return cost*1.0 / (tf-ti+1)
46 | def train(self, ins, outs, eta):
47 | self.fprop(ins)
48 | self.backprop(ins, outs, eta)
49 | return self.calculate_cost(outs)
50 | def train_session(self, data, eta, epoch_iterable, progress=None):
51 | res = []
52 | for _ in epoch_iterable:
53 | count, accum = 0, 0
54 | for ins, outs in data:
55 | accum += self.train(ins,outs,eta)
56 | if not (progress is None):
57 | next(progress)
58 | count += 1
59 | res.append(accum*1.0/count)
60 | return res
61 |
62 | #res = [sum(self.train(ins,outs,eta) for ins, outs in data) for _ in epoch_iterable]
63 |
64 |
65 |
--------------------------------------------------------------------------------
/reber.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | import numpy as np
4 |
5 | chars='BTSXPVE'
6 |
7 | graph = [[(1,5),('T','P')] , [(1,2),('S','X')], \
8 | [(3,5),('S','X')], [(6,),('E')], \
9 | [(3,2),('V','P')], [(4,5),('V','T')] ]
10 |
11 |
12 | def in_grammar(word):
13 | if word[0] != 'B':
14 | return False
15 | node = 0
16 | for c in word[1:]:
17 | transitions = graph[node]
18 | try:
19 | node = transitions[0][transitions[1].index(c)]
20 | except ValueError: # using exceptions for flow control in python is common
21 | return False
22 | return True
23 |
24 | def sequenceToWord(sequence):
25 | """
26 | converts a sequence (one-hot) in a reber string
27 | """
28 | reberString = ''
29 | for s in sequence:
30 | index = np.where(s==1.)[0][0]
31 | reberString += chars[index]
32 | return reberString
33 |
34 | def generateSequences(minLength):
35 | while True:
36 | inchars = ['B']
37 | node = 0
38 | outchars = []
39 | while node != 6:
40 | transitions = graph[node]
41 | i = np.random.randint(0, len(transitions[0]))
42 | inchars.append(transitions[1][i])
43 | outchars.append(transitions[1])
44 | node = transitions[0][i]
45 | if len(inchars) > minLength:
46 | return inchars, outchars
47 |
48 |
49 | def get_one_example(minLength):
50 | inchars, outchars = generateSequences(minLength)
51 | inseq = []
52 | outseq= []
53 | for i,o in zip(inchars, outchars):
54 | inpt = np.zeros(7)
55 | inpt[chars.find(i)] = 1.
56 | outpt = np.zeros(7)
57 | for oo in o:
58 | outpt[chars.find(oo)] = 1.
59 | inseq.append(inpt)
60 | outseq.append(outpt)
61 | return inseq, outseq
62 |
63 |
64 | def get_char_one_hot(char):
65 | char_oh = np.zeros(7)
66 | for c in char:
67 | char_oh[chars.find(c)] = 1.
68 | return [char_oh]
69 |
70 | def get_n_examples(n, minLength=10):
71 | examples = []
72 | for i in xrange(n):
73 | examples.append(get_one_example(minLength))
74 | return examples
75 |
76 | emb_chars = "TP"
77 |
78 |
79 | def get_one_embedded_example(minLength=10):
80 | i, o = get_one_example(minLength)
81 | emb_char = emb_chars[np.random.randint(0, len(emb_chars))]
82 | new_in = get_char_one_hot(('B',))
83 | new_in += get_char_one_hot((emb_char,))
84 | new_out= get_char_one_hot(emb_chars)
85 | new_out+= get_char_one_hot('B',)
86 | new_in += i
87 | new_out += o
88 | new_in += get_char_one_hot(('E',))
89 | new_in += get_char_one_hot((emb_char,))
90 | new_out += get_char_one_hot((emb_char, ))
91 | new_out += get_char_one_hot(('E',))
92 | return new_in, new_out
93 |
94 | def get_n_embedded_examples(n, minLength=10):
95 | examples = []
96 | for i in xrange(n):
97 | examples.append(get_one_embedded_example(minLength))
98 | return examples
99 |
--------------------------------------------------------------------------------
/rnn.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | from common import *
3 |
4 | #some convenience functions for forward propagation of rnn
5 | def linear_step(w, inp, b):
6 | return w.dot(inp) + b
7 | def forward_prop(sigma, sigma_, w, inp, b):
8 | z = linear_step(w, inp, b)
9 | return (sigma(z), sigma_(z))
10 | def double_forward_prop(sigma, sigma_, w1, inp1, b1, w2, inp2, b2):
11 | z = linear_step(w1, inp1, b1) + linear_step(w2, inp2, b2)
12 | return (sigma(z), sigma_(z))
13 |
14 | class SRNN(RNN):
15 | def sigma(self, x):
16 | return tanh(x)
17 | def prime(self, x):
18 | return 1-x**2
19 | def sigma_(self, x):
20 | return 1-tanh(x)**2
21 | def cost_deriv(self, x, a):
22 | return ce_logit_deriv(x,a)
23 | def cost(self, x, a):
24 | #return ce_logit_cost(x,a)
25 | return any(around(exp(-x)/sum(exp(-x)) * sum(a)) != a) #alternative error
26 | def __init__(self, input_size, recurrent_size, output_size):
27 | self.input_size = input_size
28 | self.recurrent_size = recurrent_size
29 | self.output_size = output_size
30 | self.params = {}
31 | self.params['w'] = randn(recurrent_size, recurrent_size) * 0.1 / sqrt(recurrent_size)
32 | self.params['wi'] = randn(recurrent_size, input_size) * 0.1 / sqrt(input_size)
33 | self.params['bi'] = zeros((recurrent_size,))
34 | self.params['wo'] = randn(output_size, recurrent_size) * 0.1 / sqrt(recurrent_size)
35 | self.params['bo'] = zeros((output_size,))
36 | #forward propagate the network using an initial state of init, from time ti to tf in the input list ins, erases previous context
37 | def fprop(n, ins, init=None, ti=0, tf=None):
38 | init, tf, c = init, tf, n.context = zeros((n.recurrent_size,)) if init is None else init, len(ins)-1 if tf is None else tf, Context()
39 | a, a_ = init, 0
40 | for t in xrange(ti, tf+1):
41 | c.a[t], c.a_[t] = a, a_ = double_forward_prop(n.sigma, n.sigma_, n.params['wi'], ins[t], n.params['bi'], n.params['w'], a, 0)
42 | c.out[t], c.out_[t] = linear_step(n.params['wo'], a, n.params['bo']), 1
43 | return n
44 | #back propagate ONCE, assumes n is already forward propagated (uses preexisting context)
45 | def backprop(n, ins, outs, eta, ti=0, tf=None):
46 | c, tf = n.context, len(ins)-1 if tf is None else tf
47 | delta_from_right = zeros(n.params['bi'].shape)
48 | delta = {}
49 | sum_delta = dict(wo=0,bo=0,w=0,wi=0,bi=0)
50 | for t in xrange(tf, ti-1, -1):
51 | delta_from_top = top_backprop_signal(n.cost_deriv(c.out[t], outs[t]), c.out_[t])
52 | delta['wo'], delta['bo'] = layer_updates(delta_from_top, c.a[t], eta)
53 | delta_to_left_or_down = backprop_step(delta_from_top, n.params['wo'], c.a_[t]) + backprop_step(delta_from_right, n.params['w'] , c.a_[t])
54 | delta['w'], _ = (0,0) if t == ti else layer_updates(delta_to_left_or_down, c.a[t-1], eta)
55 | delta['wi'], delta['bi'] = layer_updates(delta_to_left_or_down, ins[t], eta)
56 | sum_delta = { k : (sum_delta[k] + delta[k]) for k in delta.keys()}
57 | n.params = { k : (n.params[k] + sum_delta[k]) for k in sum_delta.keys()}
58 | return n
59 |
--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | from rnn import *
3 | from brnn import *
4 | from common import *
5 |
6 | def example(hidden=10, examples=1000, epochs=100, eta=0.001, rnn=None, binary=False, progress=True, embedded=False):
7 | import reber
8 | data_source = reber.get_n_embedded_examples if embedded else reber.get_n_examples
9 | DATA = map((lambda x: 2*x-1) if binary else (lambda x: x), map(np.array, data_source(examples)))
10 | if rnn is None:
11 | rnn = BRNN(7, hidden, 7) if binary else SRNN(7, hidden, 7)
12 | pbar = gen_pbar() if progress else (lambda x: x)
13 | costs = rnn.train_session(DATA, eta, iter(pbar(xrange(epochs))))
14 |
15 | #validate:
16 | eta=0
17 | DATA = map((lambda x: 2*x-1) if binary else (lambda x: x), map(np.array, data_source(examples)))
18 |
19 | pbar = gen_pbar() if progress else (lambda x: x)
20 | validation_costs = rnn.train_session(DATA, eta, iter(pbar(xrange(epochs))))
21 |
22 | return rnn, costs, validation_costs
23 |
24 | def compare_embedded(hidden=100, embedded=True, examples=1000, epochs=100):
25 | eta_srnn = 0.001
26 |
27 | _, costs_srnn, val_costs_srnn = example(hidden, examples, epochs, eta_srnn, binary=False, embedded=embedded)
28 | _, costs_brnn, val_costs_brnn = example(hidden, examples, epochs, 1, binary=True, embedded=embedded)
29 | return (costs_srnn, costs_brnn), (val_costs_srnn, val_costs_brnn)
30 |
31 | def triple_comparison():
32 | import reber
33 | #data_source = lambda ex: map(lambda x: 2*x-1,map(np.array,reber.get_n_embedded_examples(ex)))
34 | data_source = lambda ex: map(lambda x: 2*x-1,map(np.array,reber.get_n_examples(ex)))
35 | word_len = 7
36 | examples = 200
37 | epochs = 100
38 | hidden = 100
39 | data = data_source(examples)
40 | rnn = BRNN(word_len, hidden, word_len)
41 | pbar = gen_pbar()
42 | #train
43 | costs = rnn.train_session(data, 1, iter(pbar(xrange(epochs))))
44 |
45 | #validate / measure performance
46 | #get new data
47 | data = data_source(examples)
48 | #pbar = gen_pbar()
49 | #lazy_method_costs = rnn.train_session(DATA, 0, pbar(xrange(epochs)))
50 |
51 | funcs = dict(prob=rnn.fprop, det=rnn.fprop_multi_single_sample, resample_per_layer_avg=rnn.fprop_per_layer_avg)
52 | error_bins = dict(prob=[], det=[], resample_per_layer_avg=[])
53 | for k in funcs:
54 | for ins, outs in data:
55 | funcs[k](ins)
56 | error_bins[k].append(rnn.calculate_cost(outs))
57 |
58 | return rnn, costs, error_bins
59 |
60 |
61 |
62 |
63 | def experiment():
64 | import progressbar as pb
65 | hiddens = arange(1,101)
66 | pbar = gen_pbar()
67 | residuals = array([ example(h, 500, 50, 1, None, True, False)[1] for h in pbar(hiddens) ])
68 | return hiddens,residuals
69 |
70 | def text(fname='aiw.txt', hidden=10, seq_length=10, epochs=10, eta=1, rnn=None, binary=False, progress=True):
71 | # Data I/O
72 | data = open(fname, 'r').read()[:-1] # Use this source file as input for RNN #remove trailing newline
73 | chars = sorted(list(set(data)))
74 | data_size, vocab_size = len(data), len(chars)
75 | print('Data has %d characters, %d unique.' % (data_size, vocab_size))
76 | char_to_ix = dict([(ch, i) for i, ch in enumerate(chars)])
77 | ix_to_char = dict([(i, ch) for i, ch in enumerate(chars)])
78 |
79 |
80 | def one_hot(v):
81 | return np.eye(vocab_size)[v]
82 | def text_to_repr(text):
83 | if binary:
84 | return -1 + 2*one_hot([char_to_ix[ch] for ch in text])
85 | else:
86 | return one_hot([char_to_ix[ch] for ch in text])
87 |
88 | if rnn is None:
89 | if binary:
90 | rnn = BRNN(vocab_size, hidden, vocab_size)
91 | else:
92 | rnn = SRNN(vocab_size, hidden, vocab_size)
93 |
94 | dataset = [(text_to_repr(data[j :j+seq_length]),
95 | text_to_repr(data[j+1:j+seq_length] + data[(j+seq_length+1)%data_size])) for j in xrange(0,data_size,seq_length)]
96 | pbar = gen_pbar() if progress else (lambda x: x)
97 | costs = rnn.train_session(dataset, eta, xrange(epochs), iter(pbar(xrange(epochs*len(dataset)))))
98 | return rnn, costs, dataset
99 |
100 |
101 |
102 | def gen_pbar():
103 | import progressbar as pb
104 | return pb.ProgressBar(widgets=[pb.Percentage(), pb.Bar(marker=pb.RotatingMarker()),' ',pb.ETA(),' time to learn'])
105 |
--------------------------------------------------------------------------------
/brnn.py:
--------------------------------------------------------------------------------
1 | from pylab import *
2 | from common import *
3 |
4 | #some convenience functions for forward propagation of binary rnn, refer to formula sheet
5 | def binary_linear_step(w, w_, inp, b, is_first=False):
6 | K = 1.0*(len(inp) + 1)
7 | zmu = 1/sqrt(K) * (w.dot(inp) + b)
8 | zsigma = sqrt(1/K * (1 + (not is_first) * sum(1 - inp**2) + w_.dot(inp**2)))
9 | return (zmu, zsigma)
10 | def binary_nonlinear_step(zmu, zsigma):
11 | out = 2 * phi(zmu / zsigma) - 1
12 | out_ = N0(zmu, zsigma)
13 | return (out, out_)
14 | def binary_forward_prop(w, w_, inp, b, is_first=False):
15 | zmu, zsigma = binary_linear_step(w, w_, inp, b, is_first)
16 | return binary_nonlinear_step(zmu, zsigma)
17 | def binary_add_layers(lin1, lin2):
18 | m1,s1 = lin1
19 | m2,s2 = lin2
20 | return (m1+m2, sqrt(s1**2+s2**2))
21 | def binary_double_forward_prop(w1, w1_, inp1, b1, w2, w2_, inp2, b2, is_first1=False, is_first2=False):
22 | lin1 = binary_linear_step(w1, w1_, inp1, b1, is_first1)
23 | lin2 = binary_linear_step(w2, w2_, inp2, b2, is_first2)
24 | zmu, zsigma = binary_add_layers(lin1, lin2)
25 | return binary_nonlinear_step(zmu, zsigma)
26 | def sample_array(arr):
27 | return sign(arr - (2*sample(shape(arr))-1))
28 |
29 |
30 | class BRNN(RNN):
31 | def sigma(self, x):
32 | return tanh(x)
33 | def prime(self, x):
34 | return 1-x**2
35 | def sigma_(self, x):
36 | return 1 - tanh(x)**2
37 | def cost_deriv(self, z_out, y):
38 | return y/phi(y*z_out)
39 | def cost(self, z_out, y):
40 | return any(sign(z_out) != y)
41 | def __init__(self, input_size, recurrent_size, output_size):
42 | self.input_size = input_size
43 | self.recurrent_size = recurrent_size
44 | self.output_size = output_size
45 | self.params = {}
46 | self.params['h'] = randn(recurrent_size, recurrent_size) * 0.1 / sqrt(recurrent_size)
47 | self.params['hi'] = randn(recurrent_size, input_size) * 0.1 / sqrt(input_size)
48 | self.params['bi'] = zeros((recurrent_size,))
49 | self.params['ho'] = randn(output_size, recurrent_size) * 0.1 / sqrt(recurrent_size)
50 | self.params['bo'] = zeros((output_size,))
51 | self.aux, self.aux_ = {}, {}
52 | self.aux['h'] , self.aux_['h'] = self.sigma(self.params['h'] ), self.sigma_(self.params['h'] )
53 | self.aux['ho'], self.aux_['ho'] = self.sigma(self.params['ho']), self.sigma_(self.params['ho'])
54 | self.aux['hi'], self.aux_['hi'] = self.sigma(self.params['hi']), self.sigma_(self.params['hi'])
55 | def fprop(n, ins, init=None, ti=0, tf=None):
56 | init, tf, c = init, tf, n.context = zeros((n.recurrent_size,)) if init is None else init, len(ins)-1 if tf is None else tf, Context()
57 | a, a_ = init, 0
58 | for t in xrange(ti, tf+1):
59 | c.a[t], c.a_[t] = a, a_ = binary_double_forward_prop(n.aux['hi'], n.aux_['hi'], ins[t], n.params['bi'],
60 | n.aux['h'] , n.aux_['h'] , a , 0, is_first1=True)
61 | zmu_out, zsigma_out = binary_linear_step(n.aux['ho'], n.aux_['ho'], a, n.params['bo'])
62 | c.out[t] = zmu_out / zsigma_out
63 | _, c.out_[t] = binary_nonlinear_step(zmu_out, zsigma_out)
64 | return n
65 | def backprop(n, ins, outs, eta, ti=0, tf=None):
66 | c, tf = n.context, len(ins)-1 if tf is None else tf
67 | delta_from_right = zeros(n.params['bi'].shape)
68 | delta = {}
69 | sum_delta = dict(ho=0,bo=0,h=0,hi=0,bi=0)
70 | for t in xrange(tf, ti-1, -1):
71 | delta_from_top = top_backprop_signal(n.cost_deriv(c.out[t], outs[t]), c.out_[t])
72 | nanz = ~isfinite(delta_from_top)
73 | delta_from_top[nanz] = 0 #TODO???
74 | delta['ho'], delta['bo'] = layer_updates(delta_from_top, c.a[t], eta, binary=True)
75 | delta_to_left_or_down = backprop_step(delta_from_top, n.aux['ho'], c.a_[t], binary=True) + backprop_step(delta_from_right, n.aux['h'], c.a_[t], binary=True)
76 | delta['h'], _ = (0,0) if t == ti else layer_updates(delta_to_left_or_down, c.a[t-1], eta, binary=True)
77 | delta['hi'], delta['bi'] = layer_updates(delta_to_left_or_down, ins[t], eta, binary=True)
78 | sum_delta = { k : (sum_delta[k] + delta[k]) for k in delta.keys()}
79 | n.params = { k : (n.params[k] + sum_delta[k]) for k in sum_delta.keys()}
80 | n.aux = { k : n.sigma(n.params[k]) for k in n.aux.keys()}
81 | n.aux_= { k : n.prime(n.aux[k]) for k in n.aux_.keys()}
82 | return n
83 |
84 | def fprop_det(n, ins, init=None, ti=0, tf=None):
85 | init, tf, c = init, tf, n.context = zeros((n.recurrent_size,)) if init is None else init, len(ins)-1 if tf is None else tf, Context()
86 | a = init
87 | for t in xrange(ti, tf+1):
88 | c.a[t] = a = sign( sign(n.params['hi']).dot(ins[t]) + sign(n.params['h']).dot(a) + n.params['bi'])
89 | c.out[t] = sign( sign(n.params['ho']).dot(a) + n.params['bo'])
90 | return n
91 |
92 | def fprop_resample(n, ins, init=None, ti=0, tf=None):
93 | init, tf, c = init, tf, n.context = zeros((n.recurrent_size,)) if init is None else init, len(ins)-1 if tf is None else tf, Context()
94 | a = init
95 | for t in xrange(ti, tf+1):
96 | c.a[t] = a = sign( sample_array(n.aux['hi']).dot(ins[t]) + sample_array(n.aux['h']).dot(a) + n.params['bi'])
97 | c.out[t] = sign( sample_array(n.aux['ho']).dot(a) + n.params['bo'])
98 | return n
99 |
100 | def fprop_per_layer_avg(n, ins, init=None, ti=0, tf=None, reps=1000):
101 | init, tf, c = init, tf, n.context = zeros((n.recurrent_size,)) if init is None else init, len(ins)-1 if tf is None else tf, Context()
102 | a = init
103 | for t in xrange(ti, tf+1):
104 | c.a[t] = 0
105 | for i in xrange(reps):
106 | c.a[t] += sign( sample_array(n.aux['hi']).dot(ins[t]) + sample_array(n.aux['h']).dot(a) + n.params['bi'])
107 | c.a[t] = 1.0 * c.a[t] / reps
108 | a = c.a[t]
109 |
110 | c.out[t] = 0
111 | for i in xrange(reps):
112 | c.out[t] += sign( sample_array(n.aux['ho']).dot(a) + n.params['bo'])
113 | c.out[t] = 1.0 * c.out[t] / reps
114 | return n
115 |
116 |
117 |
118 |
119 | def fprop_single_sample(n, ins, init=None, ti=0, tf=None):
120 | init, tf, c = init, tf, n.context = zeros((n.recurrent_size,)) if init is None else init, len(ins)-1 if tf is None else tf, Context()
121 | a = init
122 | wi = sample_array(n.aux['hi']); w = sample_array(n.aux['h']); wo = sample_array(n.aux['ho'])
123 | for t in xrange(ti, tf+1):
124 | c.a[t] = a = sign( wi.dot(ins[t]) + w.dot(a) + n.params['bi'])
125 | c.out[t] = sign( wo.dot(a) + n.params['bo'])
126 | return n
127 |
128 |
129 | def fprop_multi_resample(n, ins, init=None, ti=0, tf=None, reps=1000):
130 | action = lambda: n.fprop_resample(ins, init=None, ti=0, tf=None)
131 | out = action().context.out
132 | for i in xrange(1,reps):
133 | temp_out = action().context.out
134 | out = { k : out[k] + temp_out[k] for k in out.keys() }
135 | out = { k : 1.0*out[k]/reps for k in out.keys() }
136 | n.context.out = out
137 | return n
138 |
139 | def fprop_multi_single_sample(n, ins, init=None, ti=0, tf=None, reps=1000):
140 | action = lambda: n.fprop_single_sample(ins, init=None, ti=0, tf=None)
141 | out = action().context.out
142 | for i in xrange(1,reps):
143 | temp_out = action().context.out
144 | out = { k : out[k] + temp_out[k] for k in out.keys() }
145 | out = { k : 1.0*out[k]/reps for k in out.keys() }
146 | n.context.out = out
147 | return n
148 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/histogram.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
1010 |
--------------------------------------------------------------------------------
/histogram_FAIL.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
1184 |
--------------------------------------------------------------------------------
/error_curve_FAIL.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
1281 |
--------------------------------------------------------------------------------
/error_curve.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
1388 |
--------------------------------------------------------------------------------