├── .gitignore
├── README.md
├── fig1.png
├── figE.png
├── figEHE.png
├── figFS.png
├── figH.png
├── gentext.py
├── inputeasy.txt
├── inputhard.txt
├── makefigE.py
├── makefigEHE.py
├── makefigFS.py
├── makefigH.py
├── makefigmulti.py
├── min-char-rnn-param.py
├── paper
    ├── IEEEtran.cls
    ├── bare_jrnl.tex
    ├── biblio.bib
    ├── figE.png
    ├── figEHE.png
    ├── figFS.png
    ├── figH.png
    ├── nips_2016.aux
    ├── nips_2016.dvi
    ├── nips_2016.log
    ├── nips_2016.out
    ├── nips_2016.pdf
    ├── nips_2016.sty
    ├── nips_2016.tex
    ├── paper-blx.bib
    ├── paper.aux
    ├── paper.bbl
    ├── paper.blg
    ├── paper.log
    ├── paper.out
    ├── paper.pdf
    ├── paper.run.xml
    ├── paper.tex
    ├── paper.tex.nips
    └── smallbiblio.bib
├── rnn.py
├── rnn.py.prev
├── rnnAltern.py
└── runexp.py


/.gitignore:
--------------------------------------------------------------------------------
1 | trial*/
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This is the source code for the arXiv preprint ["Neural networks with differentiable structure"](https://arxiv.org/abs/1606.06216). 
 2 | 
 3 | This code implements recurrent neural networks with differentiable structure:  the  number of neurons in the network undergoes gradient descent, just like the weights of the
 4 | network. The network adjusts its number of neurons to the complexity of the task at hand.
 5 | 
 6 | This code is based on Andrej Karpathy's [`min-char-rnn.py`](https://gist.github.com/karpathy/d4dee566867f8291f086) program.
 7 | 
 8 | `rnn.py` is the main program. You can run it "as is" (`python rnn.py`) to run
 9 | the model on the "hard" problem for 100000 cycles.  It will generate an output
10 | file called `output.txt`, updated every 1000 cycles, which logs the current
11 | cycle number, position in the input file, loss, number of neurons, and total absolute sum of multipliers. (see code).
12 | 
13 | Other
14 | python files in the repository generate inputs or figures, or submit jobs to a cluster.
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/fig1.png


--------------------------------------------------------------------------------
/figE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/figE.png


--------------------------------------------------------------------------------
/figEHE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/figEHE.png


--------------------------------------------------------------------------------
/figFS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/figFS.png


--------------------------------------------------------------------------------
/figH.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/figH.png


--------------------------------------------------------------------------------
/gentext.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import sys
 3 | n = 0
 4 | s1 = ["a", "b", "b", "a", "a", "b"]
 5 | with open('inputhard.txt', 'w') as f:
 6 |     while True:
 7 |         s2 = s1[::-1]
 8 |         pos = random.randint(0, len(s2)-1)
 9 |         if s2[pos] == "a":
10 |             s2[pos] = "b"
11 |         else:
12 |             s2[pos] = "a"
13 |         s1 = s2
14 |         n += len(s2)+2
15 |         #sys.stdout.write("("+"".join(s2)+")")
16 |         f.write("("+"".join(s2)+")")
17 |         if n > 1200000:
18 |             break
19 | with open('inputeasy.txt', 'w') as f:
20 |     for n in range (200000):
21 |         f.write("(ab")
22 |         while (random.random() < .6):
23 |             f.write("ab")
24 |         f.write(")")
25 | 
26 | #for n in range (200000):
27 | #    if random.random() < .5:
28 | #        sys.stdout.write("aa")
29 | #    else:
30 | #        sys.stdout.write("bb")
31 | 


--------------------------------------------------------------------------------
/makefigE.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | font = {#'family' : 'normal',
 6 | #                'weight' : 'bold',
 7 |                         'size'   : 9}
 8 | plt.rc('font', **font)
 9 | 
10 | plt.ion()
11 | np.set_printoptions(precision=3, suppress=True)
12 | 
13 | #dirz = glob.glob('trial-max10*')
14 | #dirz = glob.glob('trial-new-hardeasy*')
15 | #dirz = glob.glob('trial-easyhardeasy*')
16 | #dirz = glob.glob('trial-fixedsize*')
17 | dirz = glob.glob('trial-ref*-EASY-*')
18 | dirz.sort()
19 | NBPLOTS = len(dirz)
20 | SS = np.ceil(np.sqrt(NBPLOTS))
21 | 
22 | plt.figure(1, figsize=(3, 2), dpi=100, facecolor='w', edgecolor='k')
23 | 
24 | nplot = 1
25 | thards= []
26 | teasys=[]
27 | colorz=['b', 'b', 'b', 'r', 'g']
28 | labelz = ['', '', 'Loss', '# Neurons']
29 | for (num, droot) in enumerate(dirz):
30 |     t = []
31 |     for v in range(20):
32 |         dfull = droot + "/v" + str(v)
33 |         #t.append(np.loadtxt(dfull+"/test.txt")[:200,:])
34 |         t.append(np.loadtxt(dfull+"/output.txt"))
35 |     t = np.dstack(t)
36 |     tmean = np.mean(t, axis=2)
37 |     tstd = np.std(t, axis=2)
38 |     tmedian = np.median(t, axis=2)
39 |     tq25 = np.percentile(t, 25, axis=2)
40 |     tq75 = np.percentile(t, 75, axis=2)
41 |     
42 |     ax = plt.subplot(SS, SS, nplot)
43 |     ax.set_title('Easy problem')
44 |     for vari in [3, 2]:  # range(2, tmean.shape[1]):
45 |         plt.fill_between(range(tmean.shape[0]), tq25[:, vari], tq75[:, vari], linewidth=0.0, alpha=0.3, facecolor=colorz[vari])
46 |         plt.plot(tmedian[:, vari], color=colorz[vari], label=labelz[vari], linewidth=2)
47 |     plt.axis([0, tmean.shape[0], 0, 50])
48 | 
49 |     print num, tmean[90, :], tmean[190, :], tmean[-1, :], droot
50 |     thards.append(tmean[90,:])
51 |     teasys.append(tmean[-1,:])
52 | 
53 |     nplot += 1
54 | 
55 | plt.xlabel('Iterations (x1000)')
56 | #plt.ylabel('Loss', color='b')
57 | plt.legend(fontsize=8)
58 | plt.tight_layout()
59 | 
60 | print "Data read."
61 | 
62 | plt.show()
63 | 
64 | plt.savefig('figE.png', bbox_inches='tight')
65 | 


--------------------------------------------------------------------------------
/makefigEHE.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | font = {#'family' : 'normal',
 6 | #                'weight' : 'bold',
 7 |                         'size'   : 9}
 8 | plt.rc('font', **font)
 9 | 
10 | plt.ion()
11 | np.set_printoptions(precision=3, suppress=True)
12 | 
13 | #dirz = glob.glob('trial-max10*')
14 | #dirz = glob.glob('trial-new-hardeasy*')
15 | #dirz = glob.glob('trial-easyhardeasy*')
16 | #dirz = glob.glob('trial-fixedsize*')
17 | dirz = glob.glob('trial-ref*EASYHARDEASY*')
18 | dirz.sort()
19 | NBPLOTS = len(dirz)
20 | SS = np.ceil(np.sqrt(NBPLOTS))
21 | 
22 | plt.figure(3,  figsize=(4, 2.5), dpi=100, facecolor='w', edgecolor='k')
23 | 
24 | nplot = 1
25 | thards= []
26 | teasys=[]
27 | colorz=['b', 'b', 'b', 'r', 'g']
28 | labelz = ['', '', 'Loss', '# Neurons']
29 | for (num, droot) in enumerate(dirz):
30 |     t = []
31 |     for v in range(20):
32 |         dfull = droot + "/v" + str(v)
33 |         t.append(np.loadtxt(dfull+"/output.txt"))
34 |     t = np.dstack(t)
35 |     tmean = np.mean(t, axis=2)
36 |     tstd = np.std(t, axis=2)
37 |     tmedian = np.median(t, axis=2)
38 |     tq25 = np.percentile(t, 25, axis=2)
39 |     tq75 = np.percentile(t, 75, axis=2)
40 |     
41 |     ax = plt.subplot(SS, SS, nplot)
42 |     ax.set_title('Easy-Hard-Easy transition')
43 |     for vari in [3, 2]:  # range(2, tmean.shape[1]):
44 |         plt.fill_between(range(tmean.shape[0]), tq25[:, vari], tq75[:, vari], linewidth=0.0, alpha=0.3, facecolor=colorz[vari])
45 |         plt.plot(tmedian[:, vari], color=colorz[vari], label=labelz[vari], linewidth=2)
46 |     plt.axis([0, tmean.shape[0], 0, 50])
47 | 
48 |     print num, tmean[90, :], tmean[190, :], tmean[-1, :], droot
49 |     thards.append(tmean[90,:])
50 |     teasys.append(tmean[-1,:])
51 | 
52 |     nplot += 1
53 | 
54 | print "Data read."
55 | 
56 | plt.axvline(100, linestyle='--', c='k')
57 | plt.axvline(200, linestyle='--', c='k')
58 | plt.text(40, 30, 'Easy')
59 | plt.text(140, 30, 'Hard')
60 | plt.text(240, 30, 'Easy')
61 | 
62 | plt.xlabel('Iterations (x1000)')
63 | #plt.ylabel('Loss', color='b')
64 | plt.legend(fontsize=8)
65 | plt.tight_layout()
66 | 
67 | 
68 | plt.show()
69 | 
70 | plt.savefig('figEHE.png', bbox_inches='tight')
71 | 


--------------------------------------------------------------------------------
/makefigFS.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | font = {#'family' : 'normal',
 6 | #                'weight' : 'bold',
 7 |                         'size'   : 9}
 8 | plt.rc('font', **font)
 9 | 
10 | plt.ion()
11 | np.set_printoptions(precision=3, suppress=True)
12 | 
13 | #dirz = glob.glob('trial-max10*')
14 | #dirz = glob.glob('trial-new-hardeasy*')
15 | #dirz = glob.glob('trial-easyhardeasy*')
16 | dirz = glob.glob('trial-fixedsize*')
17 | dirz2 = glob.glob('trial-ref-*-HARD-*')
18 | 
19 | dirz = dirz + dirz2
20 | dirz.sort()
21 | NBPLOTS = len(dirz)
22 | SS = np.ceil(np.sqrt(NBPLOTS))
23 | linez=[]
24 | 
25 | plt.figure(1,  figsize=(4, 2.5), dpi=100, facecolor='w', edgecolor='k')
26 | 
27 | nplot = 1
28 | thards= []
29 | teasys=[]
30 | colorz=['b', 'r', 'g', 'm', 'c', 'orange']
31 | labelz=['10 neurons', '100 neurons', '27 neurons', '30 neurons', '50 neurons', 'Variable Size']
32 | for (num, droot) in enumerate(dirz):
33 |     t = []
34 |     for v in range(20):
35 |         dfull = droot + "/v" + str(v)
36 |         t.append(np.loadtxt(dfull+"/output.txt")[:200, :])
37 |     t = np.dstack(t)
38 |     tmean = np.mean(t, axis=2)
39 |     tstd = np.std(t, axis=2)
40 |     tmedian = np.median(t, axis=2)
41 |     tq25 = np.percentile(t, 25, axis=2)
42 |     tq75 = np.percentile(t, 75, axis=2)
43 |     
44 |     for vari in [2]:  # range(2, tmean.shape[1]):
45 |         #plt.fill_between(range(tmean.shape[0]), tq25[:, vari], tq75[:, vari], linewidth=0.0, alpha=0.3, facecolor=colorz[vari])
46 |         if num == len(dirz)-1: # The last curve is that of the variable-size runs
47 |             linez.append(plt.plot(tmedian[:, vari], color='k', linewidth=2, label=labelz[num]))
48 |         else:
49 |             linez.append(plt.plot(tmedian[:, vari], color=colorz[num], label=labelz[num]))
50 |     plt.axis([0, tmean.shape[0], 0, 50])
51 | 
52 |     print num, tmean[90, :], tmean[190, :], tmean[-1, :], droot
53 |     thards.append(tmean[90,:])
54 |     teasys.append(tmean[-1,:])
55 | 
56 |     nplot += 1
57 | 
58 | plt.xlabel('Iterations (x1000)')
59 | plt.ylabel('Loss')
60 | plt.legend(fontsize=8)
61 | plt.tight_layout()
62 | 
63 | print "Data read."
64 | 
65 | plt.show()
66 | 
67 | plt.savefig('figFS.png', bbox_inches='tight')
68 | 


--------------------------------------------------------------------------------
/makefigH.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | font = {#'family' : 'normal',
 6 | #                'weight' : 'bold',
 7 |                         'size'   : 10}
 8 | plt.rc('font', **font)
 9 | 
10 | plt.ion()
11 | np.set_printoptions(precision=3, suppress=True)
12 | 
13 | #dirz = glob.glob('trial-max10*')
14 | #dirz = glob.glob('trial-new-hardeasy*')
15 | #dirz = glob.glob('trial-easyhardeasy*')
16 | #dirz = glob.glob('trial-fixedsize*')
17 | dirz = glob.glob('trial-ref*HARD-*')
18 | dirz.sort()
19 | NBPLOTS = len(dirz)
20 | SS = np.ceil(np.sqrt(NBPLOTS))
21 | 
22 | plt.figure(2,  figsize=(3, 2), dpi=100, facecolor='w', edgecolor='k')
23 | 
24 | nplot = 1
25 | thards= []
26 | teasys=[]
27 | colorz=['b', 'b', 'b', 'r', 'g']
28 | labelz = ['', '', 'Loss', '# Neurons']
29 | for (num, droot) in enumerate(dirz):
30 |     t = []
31 |     for v in range(20):
32 |         dfull = droot + "/v" + str(v)
33 |         #t.append(np.loadtxt(dfull+"/output.txt")[:200,:])
34 |         t.append(np.loadtxt(dfull+"/output.txt"))
35 |     t = np.dstack(t)
36 |     tmean = np.mean(t, axis=2)
37 |     tstd = np.std(t, axis=2)
38 |     tmedian = np.median(t, axis=2)
39 |     tq25 = np.percentile(t, 25, axis=2)
40 |     tq75 = np.percentile(t, 75, axis=2)
41 |     
42 |     ax = plt.subplot(SS, SS, nplot)
43 |     ax.set_title('Hard problem')
44 |     for vari in [3, 2]:  # range(2, tmean.shape[1]):
45 |         plt.fill_between(range(tmean.shape[0]), tq25[:, vari], tq75[:, vari], linewidth=0.0, alpha=0.3, facecolor=colorz[vari])
46 |         plt.plot(tmedian[:, vari], color=colorz[vari], label=labelz[vari], linewidth=2)
47 |     plt.axis([0, tmean.shape[0], 0, 50])
48 | 
49 |     print num, tmean[90, :], tmean[190, :], tmean[-1, :], droot
50 |     thards.append(tmean[90,:])
51 |     teasys.append(tmean[-1,:])
52 | 
53 |     nplot += 1
54 | 
55 | plt.xlabel('Iterations (x1000)')
56 | #plt.ylabel('Loss', color='b')
57 | #plt.legend(fontsize=8)
58 | plt.tight_layout()
59 | 
60 | print "Data read."
61 | 
62 | plt.show()
63 | 
64 | plt.savefig('figH.png', bbox_inches='tight')
65 | 


--------------------------------------------------------------------------------
/makefigmulti.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | font = {#'family' : 'normal',
 6 | #                'weight' : 'bold',
 7 |                         'size'   : 10}
 8 | plt.rc('font', **font)
 9 | 
10 | plt.ion()
11 | np.set_printoptions(precision=3, suppress=True)
12 | 
13 | #dirz = glob.glob('trial-max10*')
14 | #dirz = glob.glob('trial-new-hardeasy*')
15 | #dirz = glob.glob('trial-EHE*MULTIPGRAD-1*')
16 | dirz = glob.glob('trial-EHE*900000*')
17 | #dirz = glob.glob('trial-fixedsize*')
18 | #dirz = glob.glob('trial-ref*EASYHARDEASY*')
19 | dirz.sort()
20 | NBPLOTS = len(dirz)
21 | SS = np.ceil(np.sqrt(NBPLOTS))
22 | 
23 | plt.figure(1,  figsize=(4, 3), dpi=100, facecolor='w', edgecolor='k')
24 | 
25 | nplot = 1
26 | perfs = []
27 | nbneurs = []
28 | dirs = []
29 | colorz=['b', 'b', 'b', 'r', 'g']
30 | for (num, droot) in enumerate(dirz):
31 |     t = []
32 |     for v in range(10):
33 |         dfull = droot + "/v" + str(v)
34 |         t.append(np.loadtxt(dfull+"/output.txt"))
35 |     t = np.dstack(t)
36 |     tmean = np.mean(t, axis=2)
37 |     tstd = np.std(t, axis=2)
38 |     tmedian = np.median(t, axis=2)
39 |     tq25 = np.percentile(t, 25, axis=2)
40 |     tq75 = np.percentile(t, 75, axis=2)
41 |     
42 |     ax = plt.subplot(SS, SS, nplot)
43 |     ax.set_title(num)
44 |     for vari in [3, 2]:  # range(2, tmean.shape[1]):
45 |         plt.fill_between(range(tmean.shape[0]), tq25[:, vari], tq75[:, vari], linewidth=0.0, alpha=0.3, facecolor=colorz[vari])
46 |         plt.plot(tmedian[:, vari], color=colorz[vari])
47 |     plt.axis([0, tmean.shape[0], 0, 50])
48 | 
49 |     p1 = int(tmean.shape[0] / 3)
50 |     p2 = 2*int(tmean.shape[0] / 3)
51 |     p3 = -1
52 | 
53 |     print num, tmean[p1, :], tmean[p2, :], tmean[p3, :], droot
54 |     perfs.append([tmean[p1,2], tmean[p2, 2], tmean[p3, 2]])
55 |     nbneurs.append([tmean[p1,3], tmean[p2, 3], tmean[p3, 3]])
56 |     dirs.append(droot)
57 | 
58 |     nplot += 1
59 | 
60 | print "Data read."
61 | 
62 | perfs = np.array(perfs)
63 | p = perfs[:,1]
64 | nbneurs = np.array(nbneurs)
65 | dneur = nbneurs[:, 1] - nbneurs[:,2]
66 | ord = np.argsort(p)
67 | data = np.vstack((ord, dneur[ord], p[ord])).T
68 | 
69 | 
70 | plt.show()
71 | 
72 | #plt.savefig('fig1.png', bbox_inches='tight')
73 | 


--------------------------------------------------------------------------------
/min-char-rnn-param.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
  3 | Modified to take parameters from the command line.
  4 | BSD License
  5 | """
  6 | import numpy as np
  7 | import sys
  8 | 
  9 | g = {
 10 | 'NBSTEPS' : 300000,
 11 | 'HIDDENSIZE' : 100,
 12 | 'RNGSEED' : 0
 13 | }
 14 | 
 15 | argpairs = [sys.argv[i:i+2] for i in range(1, len(sys.argv), 2)]
 16 | 
 17 | for argpair in argpairs:
 18 |     if not (argpair[0] in g):
 19 |         sys.exit("Error, tried to pass value of non-existent parameter "+argpair[0])
 20 |     g[argpair[0]] = int(argpair[1])
 21 | print g
 22 | 
 23 | # data I/O
 24 | myf = open("output.txt", "w")
 25 | myf.close()
 26 | data = open('../../inputhard.txt', 'r').read() # should be simple plain text file
 27 | chars = list(set(data))
 28 | data_size, vocab_size = len(data), len(chars)
 29 | print 'data has %d characters, %d unique.' % (data_size, vocab_size)
 30 | char_to_ix = { ch:i for i,ch in enumerate(chars) }
 31 | ix_to_char = { i:ch for i,ch in enumerate(chars) }
 32 | 
 33 | # hyperparameters
 34 | hidden_size = g['HIDDENSIZE'] # size of hidden layer of neurons
 35 | seq_length = 40 # number of steps to unroll the RNN for
 36 | learning_rate = 1e-1
 37 | 
 38 | # model parameters
 39 | Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
 40 | Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
 41 | Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
 42 | bh = np.zeros((hidden_size, 1)) # hidden bias
 43 | by = np.zeros((vocab_size, 1)) # output bias
 44 | 
 45 | def lossFun(inputs, targets, hprev):
 46 |   """
 47 |   inputs,targets are both list of integers.
 48 |   hprev is Hx1 array of initial hidden state
 49 |   returns the loss, gradients on model parameters, and last hidden state
 50 |   """
 51 |   xs, hs, ys, ps = {}, {}, {}, {}
 52 |   hs[-1] = np.copy(hprev)
 53 |   loss = 0
 54 |   # forward pass
 55 |   for t in xrange(len(inputs)):
 56 |     xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
 57 |     xs[t][inputs[t]] = 1
 58 |     hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
 59 |     ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
 60 |     ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
 61 |     loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
 62 |   # backward pass: compute gradients going backwards
 63 |   dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
 64 |   dbh, dby = np.zeros_like(bh), np.zeros_like(by)
 65 |   dhnext = np.zeros_like(hs[0])
 66 |   for t in reversed(xrange(len(inputs))):
 67 |     dy = np.copy(ps[t])
 68 |     dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
 69 |     dWhy += np.dot(dy, hs[t].T)
 70 |     dby += dy
 71 |     dh = np.dot(Why.T, dy) + dhnext # backprop into h
 72 |     dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
 73 |     dbh += dhraw
 74 |     dWxh += np.dot(dhraw, xs[t].T)
 75 |     dWhh += np.dot(dhraw, hs[t-1].T)
 76 |     dhnext = np.dot(Whh.T, dhraw)
 77 |   for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
 78 |     np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
 79 |   return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]
 80 | 
 81 | def sample(h, seed_ix, n):
 82 |   """ 
 83 |   sample a sequence of integers from the model 
 84 |   h is memory state, seed_ix is seed letter for first time step
 85 |   """
 86 |   x = np.zeros((vocab_size, 1))
 87 |   x[seed_ix] = 1
 88 |   ixes = []
 89 |   for t in xrange(n):
 90 |     h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
 91 |     y = np.dot(Why, h) + by
 92 |     p = np.exp(y) / np.sum(np.exp(y))
 93 |     ix = np.random.choice(range(vocab_size), p=p.ravel())
 94 |     x = np.zeros((vocab_size, 1))
 95 |     x[ix] = 1
 96 |     ixes.append(ix)
 97 |   return ixes
 98 | 
 99 | n, p = 0, 0
100 | mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
101 | mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
102 | smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
103 | while True:
104 |   # prepare inputs (we're sweeping from left to right in steps seq_length long)
105 |   if p+seq_length+1 >= len(data) or n == 0: 
106 |     hprev = np.zeros((hidden_size,1)) # reset RNN memory
107 |     p = 0 # go from start of data
108 |   inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
109 |   targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
110 | 
111 |   # sample from the model now and then
112 |   if n % 100 == 0:
113 |     sample_ix = sample(hprev, inputs[0], 200)
114 |     txt = ''.join(ix_to_char[ix] for ix in sample_ix)
115 |     print '----\n %s \n----' % (txt, )
116 | 
117 |   # forward seq_length characters through the net and fetch gradient
118 |   loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
119 |   smooth_loss = smooth_loss * 0.999 + loss * 0.001
120 |   if n % 100 == 0: print 'iter %d, loss: %f' % (n, smooth_loss) # print progress
121 |   
122 |   if n % 1000 == 0: 
123 |       with open("output.txt", "a") as myf:
124 |         msg = "%d %d %f  %d " % (n, p, smooth_loss, hidden_size) # print progress
125 |         myf.write(msg+"\n")
126 |   
127 |   # perform parameter update with Adagrad
128 |   for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
129 |                                 [dWxh, dWhh, dWhy, dbh, dby], 
130 |                                 [mWxh, mWhh, mWhy, mbh, mby]):
131 |     mem += dparam * dparam
132 |     param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update
133 | 
134 |   p += seq_length # move data pointer
135 |   n += 1 # iteration counter 
136 |   if n > g['NBSTEPS']:
137 |       sys.exit(0)
138 | 
139 | 


--------------------------------------------------------------------------------
/paper/bare_jrnl.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | %% bare_jrnl.tex
  3 | %% V1.4b
  4 | %% 2015/08/26
  5 | %% by Michael Shell
  6 | %% see http://www.michaelshell.org/
  7 | %% for current contact information.
  8 | %%
  9 | %% This is a skeleton file demonstrating the use of IEEEtran.cls
 10 | %% (requires IEEEtran.cls version 1.8b or later) with an IEEE
 11 | %% journal paper.
 12 | %%
 13 | %% Support sites:
 14 | %% http://www.michaelshell.org/tex/ieeetran/
 15 | %% http://www.ctan.org/pkg/ieeetran
 16 | %% and
 17 | %% http://www.ieee.org/
 18 | 
 19 | %%*************************************************************************
 20 | %% Legal Notice:
 21 | %% This code is offered as-is without any warranty either expressed or
 22 | %% implied; without even the implied warranty of MERCHANTABILITY or
 23 | %% FITNESS FOR A PARTICULAR PURPOSE! 
 24 | %% User assumes all risk.
 25 | %% In no event shall the IEEE or any contributor to this code be liable for
 26 | %% any damages or losses, including, but not limited to, incidental,
 27 | %% consequential, or any other damages, resulting from the use or misuse
 28 | %% of any information contained here.
 29 | %%
 30 | %% All comments are the opinions of their respective authors and are not
 31 | %% necessarily endorsed by the IEEE.
 32 | %%
 33 | %% This work is distributed under the LaTeX Project Public License (LPPL)
 34 | %% ( http://www.latex-project.org/ ) version 1.3, and may be freely used,
 35 | %% distributed and modified. A copy of the LPPL, version 1.3, is included
 36 | %% in the base LaTeX documentation of all distributions of LaTeX released
 37 | %% 2003/12/01 or later.
 38 | %% Retain all contribution notices and credits.
 39 | %% ** Modified files should be clearly indicated as such, including  **
 40 | %% ** renaming them and changing author support contact information. **
 41 | %%*************************************************************************
 42 | 
 43 | 
 44 | % *** Authors should verify (and, if needed, correct) their LaTeX system  ***
 45 | % *** with the testflow diagnostic prior to trusting their LaTeX platform ***
 46 | % *** with production work. The IEEE's font choices and paper sizes can   ***
 47 | % *** trigger bugs that do not appear when using other class files.       ***                          ***
 48 | % The testflow support page is at:
 49 | % http://www.michaelshell.org/tex/testflow/
 50 | 
 51 | 
 52 | 
 53 | \documentclass[journal]{IEEEtran}
 54 | %
 55 | % If IEEEtran.cls has not been installed into the LaTeX system files,
 56 | % manually specify the path to it like:
 57 | % \documentclass[journal]{../sty/IEEEtran}
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | % Some very useful LaTeX packages include:
 64 | % (uncomment the ones you want to load)
 65 | 
 66 | 
 67 | % *** MISC UTILITY PACKAGES ***
 68 | %
 69 | %\usepackage{ifpdf}
 70 | % Heiko Oberdiek's ifpdf.sty is very useful if you need conditional
 71 | % compilation based on whether the output is pdf or dvi.
 72 | % usage:
 73 | % \ifpdf
 74 | %   % pdf code
 75 | % \else
 76 | %   % dvi code
 77 | % \fi
 78 | % The latest version of ifpdf.sty can be obtained from:
 79 | % http://www.ctan.org/pkg/ifpdf
 80 | % Also, note that IEEEtran.cls V1.7 and later provides a builtin
 81 | % \ifCLASSINFOpdf conditional that works the same way.
 82 | % When switching from latex to pdflatex and vice-versa, the compiler may
 83 | % have to be run twice to clear warning/error messages.
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | % *** CITATION PACKAGES ***
 91 | %
 92 | %\usepackage{cite}
 93 | % cite.sty was written by Donald Arseneau
 94 | % V1.6 and later of IEEEtran pre-defines the format of the cite.sty package
 95 | % \cite{} output to follow that of the IEEE. Loading the cite package will
 96 | % result in citation numbers being automatically sorted and properly
 97 | % "compressed/ranged". e.g., [1], [9], [2], [7], [5], [6] without using
 98 | % cite.sty will become [1], [2], [5]--[7], [9] using cite.sty. cite.sty's
 99 | % \cite will automatically add leading space, if needed. Use cite.sty's
100 | % noadjust option (cite.sty V3.8 and later) if you want to turn this off
101 | % such as if a citation ever needs to be enclosed in parenthesis.
102 | % cite.sty is already installed on most LaTeX systems. Be sure and use
103 | % version 5.0 (2009-03-20) and later if using hyperref.sty.
104 | % The latest version can be obtained at:
105 | % http://www.ctan.org/pkg/cite
106 | % The documentation is contained in the cite.sty file itself.
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | % *** GRAPHICS RELATED PACKAGES ***
114 | %
115 | \ifCLASSINFOpdf
116 |   % \usepackage[pdftex]{graphicx}
117 |   % declare the path(s) where your graphic files are
118 |   % \graphicspath{{../pdf/}{../jpeg/}}
119 |   % and their extensions so you won't have to specify these with
120 |   % every instance of \includegraphics
121 |   % \DeclareGraphicsExtensions{.pdf,.jpeg,.png}
122 | \else
123 |   % or other class option (dvipsone, dvipdf, if not using dvips). graphicx
124 |   % will default to the driver specified in the system graphics.cfg if no
125 |   % driver is specified.
126 |   % \usepackage[dvips]{graphicx}
127 |   % declare the path(s) where your graphic files are
128 |   % \graphicspath{{../eps/}}
129 |   % and their extensions so you won't have to specify these with
130 |   % every instance of \includegraphics
131 |   % \DeclareGraphicsExtensions{.eps}
132 | \fi
133 | % graphicx was written by David Carlisle and Sebastian Rahtz. It is
134 | % required if you want graphics, photos, etc. graphicx.sty is already
135 | % installed on most LaTeX systems. The latest version and documentation
136 | % can be obtained at: 
137 | % http://www.ctan.org/pkg/graphicx
138 | % Another good source of documentation is "Using Imported Graphics in
139 | % LaTeX2e" by Keith Reckdahl which can be found at:
140 | % http://www.ctan.org/pkg/epslatex
141 | %
142 | % latex, and pdflatex in dvi mode, support graphics in encapsulated
143 | % postscript (.eps) format. pdflatex in pdf mode supports graphics
144 | % in .pdf, .jpeg, .png and .mps (metapost) formats. Users should ensure
145 | % that all non-photo figures use a vector format (.eps, .pdf, .mps) and
146 | % not a bitmapped formats (.jpeg, .png). The IEEE frowns on bitmapped formats
147 | % which can result in "jaggedy"/blurry rendering of lines and letters as
148 | % well as large increases in file sizes.
149 | %
150 | % You can find documentation about the pdfTeX application at:
151 | % http://www.tug.org/applications/pdftex
152 | 
153 | 
154 | 
155 | 
156 | 
157 | % *** MATH PACKAGES ***
158 | %
159 | %\usepackage{amsmath}
160 | % A popular package from the American Mathematical Society that provides
161 | % many useful and powerful commands for dealing with mathematics.
162 | %
163 | % Note that the amsmath package sets \interdisplaylinepenalty to 10000
164 | % thus preventing page breaks from occurring within multiline equations. Use:
165 | %\interdisplaylinepenalty=2500
166 | % after loading amsmath to restore such page breaks as IEEEtran.cls normally
167 | % does. amsmath.sty is already installed on most LaTeX systems. The latest
168 | % version and documentation can be obtained at:
169 | % http://www.ctan.org/pkg/amsmath
170 | 
171 | 
172 | 
173 | 
174 | 
175 | % *** SPECIALIZED LIST PACKAGES ***
176 | %
177 | %\usepackage{algorithmic}
178 | % algorithmic.sty was written by Peter Williams and Rogerio Brito.
179 | % This package provides an algorithmic environment fo describing algorithms.
180 | % You can use the algorithmic environment in-text or within a figure
181 | % environment to provide for a floating algorithm. Do NOT use the algorithm
182 | % floating environment provided by algorithm.sty (by the same authors) or
183 | % algorithm2e.sty (by Christophe Fiorio) as the IEEE does not use dedicated
184 | % algorithm float types and packages that provide these will not provide
185 | % correct IEEE style captions. The latest version and documentation of
186 | % algorithmic.sty can be obtained at:
187 | % http://www.ctan.org/pkg/algorithms
188 | % Also of interest may be the (relatively newer and more customizable)
189 | % algorithmicx.sty package by Szasz Janos:
190 | % http://www.ctan.org/pkg/algorithmicx
191 | 
192 | 
193 | 
194 | 
195 | % *** ALIGNMENT PACKAGES ***
196 | %
197 | %\usepackage{array}
198 | % Frank Mittelbach's and David Carlisle's array.sty patches and improves
199 | % the standard LaTeX2e array and tabular environments to provide better
200 | % appearance and additional user controls. As the default LaTeX2e table
201 | % generation code is lacking to the point of almost being broken with
202 | % respect to the quality of the end results, all users are strongly
203 | % advised to use an enhanced (at the very least that provided by array.sty)
204 | % set of table tools. array.sty is already installed on most systems. The
205 | % latest version and documentation can be obtained at:
206 | % http://www.ctan.org/pkg/array
207 | 
208 | 
209 | % IEEEtran contains the IEEEeqnarray family of commands that can be used to
210 | % generate multiline equations as well as matrices, tables, etc., of high
211 | % quality.
212 | 
213 | 
214 | 
215 | 
216 | % *** SUBFIGURE PACKAGES ***
217 | %\ifCLASSOPTIONcompsoc
218 | %  \usepackage[caption=false,font=normalsize,labelfont=sf,textfont=sf]{subfig}
219 | %\else
220 | %  \usepackage[caption=false,font=footnotesize]{subfig}
221 | %\fi
222 | % subfig.sty, written by Steven Douglas Cochran, is the modern replacement
223 | % for subfigure.sty, the latter of which is no longer maintained and is
224 | % incompatible with some LaTeX packages including fixltx2e. However,
225 | % subfig.sty requires and automatically loads Axel Sommerfeldt's caption.sty
226 | % which will override IEEEtran.cls' handling of captions and this will result
227 | % in non-IEEE style figure/table captions. To prevent this problem, be sure
228 | % and invoke subfig.sty's "caption=false" package option (available since
229 | % subfig.sty version 1.3, 2005/06/28) as this is will preserve IEEEtran.cls
230 | % handling of captions.
231 | % Note that the Computer Society format requires a larger sans serif font
232 | % than the serif footnote size font used in traditional IEEE formatting
233 | % and thus the need to invoke different subfig.sty package options depending
234 | % on whether compsoc mode has been enabled.
235 | %
236 | % The latest version and documentation of subfig.sty can be obtained at:
237 | % http://www.ctan.org/pkg/subfig
238 | 
239 | 
240 | 
241 | 
242 | % *** FLOAT PACKAGES ***
243 | %
244 | %\usepackage{fixltx2e}
245 | % fixltx2e, the successor to the earlier fix2col.sty, was written by
246 | % Frank Mittelbach and David Carlisle. This package corrects a few problems
247 | % in the LaTeX2e kernel, the most notable of which is that in current
248 | % LaTeX2e releases, the ordering of single and double column floats is not
249 | % guaranteed to be preserved. Thus, an unpatched LaTeX2e can allow a
250 | % single column figure to be placed prior to an earlier double column
251 | % figure.
252 | % Be aware that LaTeX2e kernels dated 2015 and later have fixltx2e.sty's
253 | % corrections already built into the system in which case a warning will
254 | % be issued if an attempt is made to load fixltx2e.sty as it is no longer
255 | % needed.
256 | % The latest version and documentation can be found at:
257 | % http://www.ctan.org/pkg/fixltx2e
258 | 
259 | 
260 | %\usepackage{stfloats}
261 | % stfloats.sty was written by Sigitas Tolusis. This package gives LaTeX2e
262 | % the ability to do double column floats at the bottom of the page as well
263 | % as the top. (e.g., "\begin{figure*}[!b]" is not normally possible in
264 | % LaTeX2e). It also provides a command:
265 | %\fnbelowfloat
266 | % to enable the placement of footnotes below bottom floats (the standard
267 | % LaTeX2e kernel puts them above bottom floats). This is an invasive package
268 | % which rewrites many portions of the LaTeX2e float routines. It may not work
269 | % with other packages that modify the LaTeX2e float routines. The latest
270 | % version and documentation can be obtained at:
271 | % http://www.ctan.org/pkg/stfloats
272 | % Do not use the stfloats baselinefloat ability as the IEEE does not allow
273 | % \baselineskip to stretch. Authors submitting work to the IEEE should note
274 | % that the IEEE rarely uses double column equations and that authors should try
275 | % to avoid such use. Do not be tempted to use the cuted.sty or midfloat.sty
276 | % packages (also by Sigitas Tolusis) as the IEEE does not format its papers in
277 | % such ways.
278 | % Do not attempt to use stfloats with fixltx2e as they are incompatible.
279 | % Instead, use Morten Hogholm'a dblfloatfix which combines the features
280 | % of both fixltx2e and stfloats:
281 | %
282 | % \usepackage{dblfloatfix}
283 | % The latest version can be found at:
284 | % http://www.ctan.org/pkg/dblfloatfix
285 | 
286 | 
287 | 
288 | 
289 | %\ifCLASSOPTIONcaptionsoff
290 | %  \usepackage[nomarkers]{endfloat}
291 | % \let\MYoriglatexcaption\caption
292 | % \renewcommand{\caption}[2][\relax]{\MYoriglatexcaption[#2]{#2}}
293 | %\fi
294 | % endfloat.sty was written by James Darrell McCauley, Jeff Goldberg and 
295 | % Axel Sommerfeldt. This package may be useful when used in conjunction with 
296 | % IEEEtran.cls'  captionsoff option. Some IEEE journals/societies require that
297 | % submissions have lists of figures/tables at the end of the paper and that
298 | % figures/tables without any captions are placed on a page by themselves at
299 | % the end of the document. If needed, the draftcls IEEEtran class option or
300 | % \CLASSINPUTbaselinestretch interface can be used to increase the line
301 | % spacing as well. Be sure and use the nomarkers option of endfloat to
302 | % prevent endfloat from "marking" where the figures would have been placed
303 | % in the text. The two hack lines of code above are a slight modification of
304 | % that suggested by in the endfloat docs (section 8.4.1) to ensure that
305 | % the full captions always appear in the list of figures/tables - even if
306 | % the user used the short optional argument of \caption[]{}.
307 | % IEEE papers do not typically make use of \caption[]'s optional argument,
308 | % so this should not be an issue. A similar trick can be used to disable
309 | % captions of packages such as subfig.sty that lack options to turn off
310 | % the subcaptions:
311 | % For subfig.sty:
312 | % \let\MYorigsubfloat\subfloat
313 | % \renewcommand{\subfloat}[2][\relax]{\MYorigsubfloat[]{#2}}
314 | % However, the above trick will not work if both optional arguments of
315 | % the \subfloat command are used. Furthermore, there needs to be a
316 | % description of each subfigure *somewhere* and endfloat does not add
317 | % subfigure captions to its list of figures. Thus, the best approach is to
318 | % avoid the use of subfigure captions (many IEEE journals avoid them anyway)
319 | % and instead reference/explain all the subfigures within the main caption.
320 | % The latest version of endfloat.sty and its documentation can obtained at:
321 | % http://www.ctan.org/pkg/endfloat
322 | %
323 | % The IEEEtran \ifCLASSOPTIONcaptionsoff conditional can also be used
324 | % later in the document, say, to conditionally put the References on a 
325 | % page by themselves.
326 | 
327 | 
328 | 
329 | 
330 | % *** PDF, URL AND HYPERLINK PACKAGES ***
331 | %
332 | %\usepackage{url}
333 | % url.sty was written by Donald Arseneau. It provides better support for
334 | % handling and breaking URLs. url.sty is already installed on most LaTeX
335 | % systems. The latest version and documentation can be obtained at:
336 | % http://www.ctan.org/pkg/url
337 | % Basically, \url{my_url_here}.
338 | 
339 | 
340 | 
341 | 
342 | % *** Do not adjust lengths that control margins, column widths, etc. ***
343 | % *** Do not use packages that alter fonts (such as pslatex).         ***
344 | % There should be no need to do such things with IEEEtran.cls V1.6 and later.
345 | % (Unless specifically asked to do so by the journal or conference you plan
346 | % to submit to, of course. )
347 | 
348 | 
349 | % correct bad hyphenation here
350 | \hyphenation{op-tical net-works semi-conduc-tor}
351 | 
352 | 
353 | \begin{document}
354 | %
355 | % paper title
356 | % Titles are generally capitalized except for words such as a, an, and, as,
357 | % at, but, by, for, in, nor, of, on, or, the, to and up, which are usually
358 | % not capitalized unless they are the first or last word of the title.
359 | % Linebreaks \\ can be used within to get better formatting as desired.
360 | % Do not put math or special symbols in the title.
361 | \title{Bare Demo of IEEEtran.cls\\ for IEEE Journals}
362 | %
363 | %
364 | % author names and IEEE memberships
365 | % note positions of commas and nonbreaking spaces ( ~ ) LaTeX will not break
366 | % a structure at a ~ so this keeps an author's name from being broken across
367 | % two lines.
368 | % use \thanks{} to gain access to the first footnote area
369 | % a separate \thanks must be used for each paragraph as LaTeX2e's \thanks
370 | % was not built to handle multiple paragraphs
371 | %
372 | 
373 | \author{Michael~Shell,~\IEEEmembership{Member,~IEEE,}
374 |         John~Doe,~\IEEEmembership{Fellow,~OSA,}
375 |         and~Jane~Doe,~\IEEEmembership{Life~Fellow,~IEEE}% <-this % stops a space
376 | \thanks{M. Shell was with the Department
377 | of Electrical and Computer Engineering, Georgia Institute of Technology, Atlanta,
378 | GA, 30332 USA e-mail: (see http://www.michaelshell.org/contact.html).}% <-this % stops a space
379 | \thanks{J. Doe and J. Doe are with Anonymous University.}% <-this % stops a space
380 | \thanks{Manuscript received April 19, 2005; revised August 26, 2015.}}
381 | 
382 | % note the % following the last \IEEEmembership and also \thanks - 
383 | % these prevent an unwanted space from occurring between the last author name
384 | % and the end of the author line. i.e., if you had this:
385 | % 
386 | % \author{....lastname \thanks{...} \thanks{...} }
387 | %                     ^------------^------------^----Do not want these spaces!
388 | %
389 | % a space would be appended to the last name and could cause every name on that
390 | % line to be shifted left slightly. This is one of those "LaTeX things". For
391 | % instance, "\textbf{A} \textbf{B}" will typeset as "A B" not "AB". To get
392 | % "AB" then you have to do: "\textbf{A}\textbf{B}"
393 | % \thanks is no different in this regard, so shield the last } of each \thanks
394 | % that ends a line with a % and do not let a space in before the next \thanks.
395 | % Spaces after \IEEEmembership other than the last one are OK (and needed) as
396 | % you are supposed to have spaces between the names. For what it is worth,
397 | % this is a minor point as most people would not even notice if the said evil
398 | % space somehow managed to creep in.
399 | 
400 | 
401 | 
402 | % The paper headers
403 | \markboth{Journal of \LaTeX\ Class Files,~Vol.~14, No.~8, August~2015}%
404 | {Shell \MakeLowercase{\textit{et al.}}: Bare Demo of IEEEtran.cls for IEEE Journals}
405 | % The only time the second header will appear is for the odd numbered pages
406 | % after the title page when using the twoside option.
407 | % 
408 | % *** Note that you probably will NOT want to include the author's ***
409 | % *** name in the headers of peer review papers.                   ***
410 | % You can use \ifCLASSOPTIONpeerreview for conditional compilation here if
411 | % you desire.
412 | 
413 | 
414 | 
415 | 
416 | % If you want to put a publisher's ID mark on the page you can do it like
417 | % this:
418 | %\IEEEpubid{0000--0000/00\$00.00~\copyright~2015 IEEE}
419 | % Remember, if you use this you must call \IEEEpubidadjcol in the second
420 | % column for its text to clear the IEEEpubid mark.
421 | 
422 | 
423 | 
424 | % use for special paper notices
425 | %\IEEEspecialpapernotice{(Invited Paper)}
426 | 
427 | 
428 | 
429 | 
430 | % make the title area
431 | \maketitle
432 | 
433 | % As a general rule, do not put math, special symbols or citations
434 | % in the abstract or keywords.
435 | \begin{abstract}
436 | The abstract goes here.
437 | \end{abstract}
438 | 
439 | % Note that keywords are not normally used for peerreview papers.
440 | \begin{IEEEkeywords}
441 | IEEE, IEEEtran, journal, \LaTeX, paper, template.
442 | \end{IEEEkeywords}
443 | 
444 | 
445 | 
446 | 
447 | 
448 | 
449 | % For peer review papers, you can put extra information on the cover
450 | % page as needed:
451 | % \ifCLASSOPTIONpeerreview
452 | % \begin{center} \bfseries EDICS Category: 3-BBND \end{center}
453 | % \fi
454 | %
455 | % For peerreview papers, this IEEEtran command inserts a page break and
456 | % creates the second title. It will be ignored for other modes.
457 | \IEEEpeerreviewmaketitle
458 | 
459 | 
460 | 
461 | \section{Introduction}
462 | % The very first letter is a 2 line initial drop letter followed
463 | % by the rest of the first word in caps.
464 | % 
465 | % form to use if the first word consists of a single letter:
466 | % \IEEEPARstart{A}{demo} file is ....
467 | % 
468 | % form to use if you need the single drop letter followed by
469 | % normal text (unknown if ever used by the IEEE):
470 | % \IEEEPARstart{A}{}demo file is ....
471 | % 
472 | % Some journals put the first two words in caps:
473 | % \IEEEPARstart{T}{his demo} file is ....
474 | % 
475 | % Here we have the typical use of a "T" for an initial drop letter
476 | % and "HIS" in caps to complete the first word.
477 | \IEEEPARstart{T}{his} demo file is intended to serve as a ``starter file''
478 | for IEEE journal papers produced under \LaTeX\ using
479 | IEEEtran.cls version 1.8b and later.
480 | % You must have at least 2 lines in the paragraph with the drop letter
481 | % (should never be an issue)
482 | I wish you the best of success.
483 | 
484 | \hfill mds
485 |  
486 | \hfill August 26, 2015
487 | 
488 | \subsection{Subsection Heading Here}
489 | Subsection text here.
490 | 
491 | % needed in second column of first page if using \IEEEpubid
492 | %\IEEEpubidadjcol
493 | 
494 | \subsubsection{Subsubsection Heading Here}
495 | Subsubsection text here.
496 | 
497 | 
498 | % An example of a floating figure using the graphicx package.
499 | % Note that \label must occur AFTER (or within) \caption.
500 | % For figures, \caption should occur after the \includegraphics.
501 | % Note that IEEEtran v1.7 and later has special internal code that
502 | % is designed to preserve the operation of \label within \caption
503 | % even when the captionsoff option is in effect. However, because
504 | % of issues like this, it may be the safest practice to put all your
505 | % \label just after \caption rather than within \caption{}.
506 | %
507 | % Reminder: the "draftcls" or "draftclsnofoot", not "draft", class
508 | % option should be used if it is desired that the figures are to be
509 | % displayed while in draft mode.
510 | %
511 | %\begin{figure}[!t]
512 | %\centering
513 | %\includegraphics[width=2.5in]{myfigure}
514 | % where an .eps filename suffix will be assumed under latex, 
515 | % and a .pdf suffix will be assumed for pdflatex; or what has been declared
516 | % via \DeclareGraphicsExtensions.
517 | %\caption{Simulation results for the network.}
518 | %\label{fig_sim}
519 | %\end{figure}
520 | 
521 | % Note that the IEEE typically puts floats only at the top, even when this
522 | % results in a large percentage of a column being occupied by floats.
523 | 
524 | 
525 | % An example of a double column floating figure using two subfigures.
526 | % (The subfig.sty package must be loaded for this to work.)
527 | % The subfigure \label commands are set within each subfloat command,
528 | % and the \label for the overall figure must come after \caption.
529 | % \hfil is used as a separator to get equal spacing.
530 | % Watch out that the combined width of all the subfigures on a 
531 | % line do not exceed the text width or a line break will occur.
532 | %
533 | %\begin{figure*}[!t]
534 | %\centering
535 | %\subfloat[Case I]{\includegraphics[width=2.5in]{box}%
536 | %\label{fig_first_case}}
537 | %\hfil
538 | %\subfloat[Case II]{\includegraphics[width=2.5in]{box}%
539 | %\label{fig_second_case}}
540 | %\caption{Simulation results for the network.}
541 | %\label{fig_sim}
542 | %\end{figure*}
543 | %
544 | % Note that often IEEE papers with subfigures do not employ subfigure
545 | % captions (using the optional argument to \subfloat[]), but instead will
546 | % reference/describe all of them (a), (b), etc., within the main caption.
547 | % Be aware that for subfig.sty to generate the (a), (b), etc., subfigure
548 | % labels, the optional argument to \subfloat must be present. If a
549 | % subcaption is not desired, just leave its contents blank,
550 | % e.g., \subfloat[].
551 | 
552 | 
553 | % An example of a floating table. Note that, for IEEE style tables, the
554 | % \caption command should come BEFORE the table and, given that table
555 | % captions serve much like titles, are usually capitalized except for words
556 | % such as a, an, and, as, at, but, by, for, in, nor, of, on, or, the, to
557 | % and up, which are usually not capitalized unless they are the first or
558 | % last word of the caption. Table text will default to \footnotesize as
559 | % the IEEE normally uses this smaller font for tables.
560 | % The \label must come after \caption as always.
561 | %
562 | %\begin{table}[!t]
563 | %% increase table row spacing, adjust to taste
564 | %\renewcommand{\arraystretch}{1.3}
565 | % if using array.sty, it might be a good idea to tweak the value of
566 | % \extrarowheight as needed to properly center the text within the cells
567 | %\caption{An Example of a Table}
568 | %\label{table_example}
569 | %\centering
570 | %% Some packages, such as MDW tools, offer better commands for making tables
571 | %% than the plain LaTeX2e tabular which is used here.
572 | %\begin{tabular}{|c||c|}
573 | %\hline
574 | %One & Two\\
575 | %\hline
576 | %Three & Four\\
577 | %\hline
578 | %\end{tabular}
579 | %\end{table}
580 | 
581 | 
582 | % Note that the IEEE does not put floats in the very first column
583 | % - or typically anywhere on the first page for that matter. Also,
584 | % in-text middle ("here") positioning is typically not used, but it
585 | % is allowed and encouraged for Computer Society conferences (but
586 | % not Computer Society journals). Most IEEE journals/conferences use
587 | % top floats exclusively. 
588 | % Note that, LaTeX2e, unlike IEEE journals/conferences, places
589 | % footnotes above bottom floats. This can be corrected via the
590 | % \fnbelowfloat command of the stfloats package.
591 | 
592 | 
593 | 
594 | 
595 | \section{Conclusion}
596 | The conclusion goes here.
597 | 
598 | 
599 | 
600 | 
601 | 
602 | % if have a single appendix:
603 | %\appendix[Proof of the Zonklar Equations]
604 | % or
605 | %\appendix  % for no appendix heading
606 | % do not use \section anymore after \appendix, only \section*
607 | % is possibly needed
608 | 
609 | % use appendices with more than one appendix
610 | % then use \section to start each appendix
611 | % you must declare a \section before using any
612 | % \subsection or using \label (\appendices by itself
613 | % starts a section numbered zero.)
614 | %
615 | 
616 | 
617 | \appendices
618 | \section{Proof of the First Zonklar Equation}
619 | Appendix one text goes here.
620 | 
621 | % you can choose not to have a title for an appendix
622 | % if you want by leaving the argument blank
623 | \section{}
624 | Appendix two text goes here.
625 | 
626 | 
627 | % use section* for acknowledgment
628 | \section*{Acknowledgment}
629 | 
630 | 
631 | The authors would like to thank...
632 | 
633 | 
634 | % Can use something like this to put references on a page
635 | % by themselves when using endfloat and the captionsoff option.
636 | \ifCLASSOPTIONcaptionsoff
637 |   \newpage
638 | \fi
639 | 
640 | 
641 | 
642 | % trigger a \newpage just before the given reference
643 | % number - used to balance the columns on the last page
644 | % adjust value as needed - may need to be readjusted if
645 | % the document is modified later
646 | %\IEEEtriggeratref{8}
647 | % The "triggered" command can be changed if desired:
648 | %\IEEEtriggercmd{\enlargethispage{-5in}}
649 | 
650 | % references section
651 | 
652 | % can use a bibliography generated by BibTeX as a .bbl file
653 | % BibTeX documentation can be easily obtained at:
654 | % http://mirror.ctan.org/biblio/bibtex/contrib/doc/
655 | % The IEEEtran BibTeX style support page is at:
656 | % http://www.michaelshell.org/tex/ieeetran/bibtex/
657 | %\bibliographystyle{IEEEtran}
658 | % argument is your BibTeX string definitions and bibliography database(s)
659 | %\bibliography{IEEEabrv,../bib/paper}
660 | %
661 | % <OR> manually copy in the resultant .bbl file
662 | % set second argument of \begin to the number of references
663 | % (used to reserve space for the reference number labels box)
664 | \begin{thebibliography}{1}
665 | 
666 | \bibitem{IEEEhowto:kopka}
667 | H.~Kopka and P.~W. Daly, \emph{A Guide to \LaTeX}, 3rd~ed.\hskip 1em plus
668 |   0.5em minus 0.4em\relax Harlow, England: Addison-Wesley, 1999.
669 | 
670 | \end{thebibliography}
671 | 
672 | % biography section
673 | % 
674 | % If you have an EPS/PDF photo (graphicx package needed) extra braces are
675 | % needed around the contents of the optional argument to biography to prevent
676 | % the LaTeX parser from getting confused when it sees the complicated
677 | % \includegraphics command within an optional argument. (You could create
678 | % your own custom macro containing the \includegraphics command to make things
679 | % simpler here.)
680 | %\begin{IEEEbiography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{mshell}}]{Michael Shell}
681 | % or if you just want to reserve a space for a photo:
682 | 
683 | \begin{IEEEbiography}{Michael Shell}
684 | Biography text here.
685 | \end{IEEEbiography}
686 | 
687 | % if you will not have a photo at all:
688 | \begin{IEEEbiographynophoto}{John Doe}
689 | Biography text here.
690 | \end{IEEEbiographynophoto}
691 | 
692 | % insert where needed to balance the two columns on the last page with
693 | % biographies
694 | %\newpage
695 | 
696 | \begin{IEEEbiographynophoto}{Jane Doe}
697 | Biography text here.
698 | \end{IEEEbiographynophoto}
699 | 
700 | % You can push biographies down or up by placing
701 | % a \vfill before or after them. The appropriate
702 | % use of \vfill depends on what kind of text is
703 | % on the last page and whether or not the columns
704 | % are being equalized.
705 | 
706 | %\vfill
707 | 
708 | % Can be used to pull up biographies so that the bottom of the last one
709 | % is flush with the other column.
710 | %\enlargethispage{-5in}
711 | 
712 | 
713 | 
714 | % that's all folks
715 | \end{document}
716 | 
717 | 
718 | 


--------------------------------------------------------------------------------
/paper/figE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/figE.png


--------------------------------------------------------------------------------
/paper/figEHE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/figEHE.png


--------------------------------------------------------------------------------
/paper/figFS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/figFS.png


--------------------------------------------------------------------------------
/paper/figH.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/figH.png


--------------------------------------------------------------------------------
/paper/nips_2016.aux:
--------------------------------------------------------------------------------
 1 | \relax 
 2 | \providecommand\hyper@newdestlabel[2]{}
 3 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 4 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 5 | \global\let\oldcontentsline\contentsline
 6 | \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
 7 | \global\let\oldnewlabel\newlabel
 8 | \gdef\newlabel#1#2{\newlabelxx{#1}#2}
 9 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
10 | \AtEndDocument{\ifx\hyper@anchor\@undefined
11 | \let\contentsline\oldcontentsline
12 | \let\newlabel\oldnewlabel
13 | \fi}
14 | \fi}
15 | \global\let\hyper@last\relax 
16 | \gdef\HyperFirstAtBeginDocument#1{#1}
17 | \providecommand\HyField@AuxAddToFields[1]{}
18 | \providecommand\HyField@AuxAddToCoFields[2]{}
19 | \@writefile{toc}{\contentsline {section}{\numberline {1}Submission of papers to NIPS 2016}{1}{section.1}}
20 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Style}{1}{subsection.1.1}}
21 | \@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Retrieval of style files}{1}{subsection.1.2}}
22 | \@writefile{toc}{\contentsline {section}{\numberline {2}General formatting instructions}{2}{section.2}}
23 | \newlabel{gen_inst}{{2}{2}{General formatting instructions}{section.2}{}}
24 | \@writefile{toc}{\contentsline {section}{\numberline {3}Headings: first level}{2}{section.3}}
25 | \newlabel{headings}{{3}{2}{Headings: first level}{section.3}{}}
26 | \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Headings: second level}{2}{subsection.3.1}}
27 | \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Headings: third level}{2}{subsubsection.3.1.1}}
28 | \@writefile{toc}{\contentsline {paragraph}{Paragraphs}{2}{section*.1}}
29 | \@writefile{toc}{\contentsline {section}{\numberline {4}Citations, figures, tables, references}{2}{section.4}}
30 | \newlabel{others}{{4}{2}{Citations, figures, tables, references}{section.4}{}}
31 | \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Citations within the text}{2}{subsection.4.1}}
32 | \@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Footnotes}{3}{subsection.4.2}}
33 | \@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Figures}{3}{subsection.4.3}}
34 | \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Sample figure caption.}}{3}{figure.1}}
35 | \@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Sample table title}}{4}{table.1}}
36 | \newlabel{sample-table}{{1}{4}{Sample table title}{table.1}{}}
37 | \@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Tables}{4}{subsection.4.4}}
38 | \@writefile{toc}{\contentsline {section}{\numberline {5}Final instructions}{4}{section.5}}
39 | \@writefile{toc}{\contentsline {section}{\numberline {6}Preparing PDF files}{4}{section.6}}
40 | \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Margins in \LaTeX  {}}{5}{subsection.6.1}}
41 | 


--------------------------------------------------------------------------------
/paper/nips_2016.dvi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/nips_2016.dvi


--------------------------------------------------------------------------------
/paper/nips_2016.log:
--------------------------------------------------------------------------------
  1 | This is pdfTeX, Version 3.14159265-2.6-1.40.16 (TeX Live 2015) (preloaded format=pdflatex 2015.8.19)  16 JUN 2016 19:07
  2 | entering extended mode
  3 |  restricted \write18 enabled.
  4 |  %&-line parsing enabled.
  5 | **nips_2016.tex
  6 | (./nips_2016.tex
  7 | LaTeX2e <2015/01/01> patch level 2
  8 | Babel <3.9m> and hyphenation patterns for 79 languages loaded.
  9 | (/opt/texlive/2015/texmf-dist/tex/latex/base/article.cls
 10 | Document Class: article 2014/09/29 v1.4h Standard LaTeX document class
 11 | (/opt/texlive/2015/texmf-dist/tex/latex/base/size10.clo
 12 | File: size10.clo 2014/09/29 v1.4h Standard LaTeX file (size option)
 13 | )
 14 | \c@part=\count79
 15 | \c@section=\count80
 16 | \c@subsection=\count81
 17 | \c@subsubsection=\count82
 18 | \c@paragraph=\count83
 19 | \c@subparagraph=\count84
 20 | \c@figure=\count85
 21 | \c@table=\count86
 22 | \abovecaptionskip=\skip41
 23 | \belowcaptionskip=\skip42
 24 | \bibindent=\dimen102
 25 | ) (./nips_2016.sty
 26 | Package: nips_2016 2016/03/07 NIPS 2016 submission/camera-ready style file
 27 | 
 28 | (/opt/texlive/2015/texmf-dist/tex/latex/natbib/natbib.sty
 29 | Package: natbib 2010/09/13 8.31b (PWD, AO)
 30 | \bibhang=\skip43
 31 | \bibsep=\skip44
 32 | LaTeX Info: Redefining \cite on input line 694.
 33 | \c@NAT@ctr=\count87
 34 | )
 35 | (/opt/texlive/2015/texmf-dist/tex/latex/geometry/geometry.sty
 36 | Package: geometry 2010/09/12 v5.6 Page Geometry
 37 | 
 38 | (/opt/texlive/2015/texmf-dist/tex/latex/graphics/keyval.sty
 39 | Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
 40 | \KV@toks@=\toks14
 41 | )
 42 | (/opt/texlive/2015/texmf-dist/tex/generic/oberdiek/ifpdf.sty
 43 | Package: ifpdf 2011/01/30 v2.3 Provides the ifpdf switch (HO)
 44 | Package ifpdf Info: pdfTeX in PDF mode is detected.
 45 | )
 46 | (/opt/texlive/2015/texmf-dist/tex/generic/oberdiek/ifvtex.sty
 47 | Package: ifvtex 2010/03/01 v1.5 Detect VTeX and its facilities (HO)
 48 | Package ifvtex Info: VTeX not detected.
 49 | )
 50 | (/opt/texlive/2015/texmf-dist/tex/generic/ifxetex/ifxetex.sty
 51 | Package: ifxetex 2010/09/12 v0.6 Provides ifxetex conditional
 52 | )
 53 | \Gm@cnth=\count88
 54 | \Gm@cntv=\count89
 55 | \c@Gm@tempcnt=\count90
 56 | \Gm@bindingoffset=\dimen103
 57 | \Gm@wd@mp=\dimen104
 58 | \Gm@odd@mp=\dimen105
 59 | \Gm@even@mp=\dimen106
 60 | \Gm@layoutwidth=\dimen107
 61 | \Gm@layoutheight=\dimen108
 62 | \Gm@layouthoffset=\dimen109
 63 | \Gm@layoutvoffset=\dimen110
 64 | \Gm@dimlist=\toks15
 65 | )
 66 | \@nipsabovecaptionskip=\skip45
 67 | \@nipsbelowcaptionskip=\skip46
 68 | )
 69 | (/opt/texlive/2015/texmf-dist/tex/latex/base/inputenc.sty
 70 | Package: inputenc 2015/03/17 v1.2c Input encoding file
 71 | \inpenc@prehook=\toks16
 72 | \inpenc@posthook=\toks17
 73 | 
 74 | (/opt/texlive/2015/texmf-dist/tex/latex/base/utf8.def
 75 | File: utf8.def 2015/06/27 v1.1n UTF-8 support for inputenc
 76 | Now handling font encoding OML ...
 77 | ... no UTF-8 mapping file for font encoding OML
 78 | Now handling font encoding T1 ...
 79 | ... processing UTF-8 mapping file for font encoding T1
 80 | 
 81 | (/opt/texlive/2015/texmf-dist/tex/latex/base/t1enc.dfu
 82 | File: t1enc.dfu 2015/06/27 v1.1n UTF-8 support for inputenc
 83 |    defining Unicode char U+00A1 (decimal 161)
 84 |    defining Unicode char U+00A3 (decimal 163)
 85 |    defining Unicode char U+00AB (decimal 171)
 86 |    defining Unicode char U+00BB (decimal 187)
 87 |    defining Unicode char U+00BF (decimal 191)
 88 |    defining Unicode char U+00C0 (decimal 192)
 89 |    defining Unicode char U+00C1 (decimal 193)
 90 |    defining Unicode char U+00C2 (decimal 194)
 91 |    defining Unicode char U+00C3 (decimal 195)
 92 |    defining Unicode char U+00C4 (decimal 196)
 93 |    defining Unicode char U+00C5 (decimal 197)
 94 |    defining Unicode char U+00C6 (decimal 198)
 95 |    defining Unicode char U+00C7 (decimal 199)
 96 |    defining Unicode char U+00C8 (decimal 200)
 97 |    defining Unicode char U+00C9 (decimal 201)
 98 |    defining Unicode char U+00CA (decimal 202)
 99 |    defining Unicode char U+00CB (decimal 203)
100 |    defining Unicode char U+00CC (decimal 204)
101 |    defining Unicode char U+00CD (decimal 205)
102 |    defining Unicode char U+00CE (decimal 206)
103 |    defining Unicode char U+00CF (decimal 207)
104 |    defining Unicode char U+00D0 (decimal 208)
105 |    defining Unicode char U+00D1 (decimal 209)
106 |    defining Unicode char U+00D2 (decimal 210)
107 |    defining Unicode char U+00D3 (decimal 211)
108 |    defining Unicode char U+00D4 (decimal 212)
109 |    defining Unicode char U+00D5 (decimal 213)
110 |    defining Unicode char U+00D6 (decimal 214)
111 |    defining Unicode char U+00D8 (decimal 216)
112 |    defining Unicode char U+00D9 (decimal 217)
113 |    defining Unicode char U+00DA (decimal 218)
114 |    defining Unicode char U+00DB (decimal 219)
115 |    defining Unicode char U+00DC (decimal 220)
116 |    defining Unicode char U+00DD (decimal 221)
117 |    defining Unicode char U+00DE (decimal 222)
118 |    defining Unicode char U+00DF (decimal 223)
119 |    defining Unicode char U+00E0 (decimal 224)
120 |    defining Unicode char U+00E1 (decimal 225)
121 |    defining Unicode char U+00E2 (decimal 226)
122 |    defining Unicode char U+00E3 (decimal 227)
123 |    defining Unicode char U+00E4 (decimal 228)
124 |    defining Unicode char U+00E5 (decimal 229)
125 |    defining Unicode char U+00E6 (decimal 230)
126 |    defining Unicode char U+00E7 (decimal 231)
127 |    defining Unicode char U+00E8 (decimal 232)
128 |    defining Unicode char U+00E9 (decimal 233)
129 |    defining Unicode char U+00EA (decimal 234)
130 |    defining Unicode char U+00EB (decimal 235)
131 |    defining Unicode char U+00EC (decimal 236)
132 |    defining Unicode char U+00ED (decimal 237)
133 |    defining Unicode char U+00EE (decimal 238)
134 |    defining Unicode char U+00EF (decimal 239)
135 |    defining Unicode char U+00F0 (decimal 240)
136 |    defining Unicode char U+00F1 (decimal 241)
137 |    defining Unicode char U+00F2 (decimal 242)
138 |    defining Unicode char U+00F3 (decimal 243)
139 |    defining Unicode char U+00F4 (decimal 244)
140 |    defining Unicode char U+00F5 (decimal 245)
141 |    defining Unicode char U+00F6 (decimal 246)
142 |    defining Unicode char U+00F8 (decimal 248)
143 |    defining Unicode char U+00F9 (decimal 249)
144 |    defining Unicode char U+00FA (decimal 250)
145 |    defining Unicode char U+00FB (decimal 251)
146 |    defining Unicode char U+00FC (decimal 252)
147 |    defining Unicode char U+00FD (decimal 253)
148 |    defining Unicode char U+00FE (decimal 254)
149 |    defining Unicode char U+00FF (decimal 255)
150 |    defining Unicode char U+0102 (decimal 258)
151 |    defining Unicode char U+0103 (decimal 259)
152 |    defining Unicode char U+0104 (decimal 260)
153 |    defining Unicode char U+0105 (decimal 261)
154 |    defining Unicode char U+0106 (decimal 262)
155 |    defining Unicode char U+0107 (decimal 263)
156 |    defining Unicode char U+010C (decimal 268)
157 |    defining Unicode char U+010D (decimal 269)
158 |    defining Unicode char U+010E (decimal 270)
159 |    defining Unicode char U+010F (decimal 271)
160 |    defining Unicode char U+0110 (decimal 272)
161 |    defining Unicode char U+0111 (decimal 273)
162 |    defining Unicode char U+0118 (decimal 280)
163 |    defining Unicode char U+0119 (decimal 281)
164 |    defining Unicode char U+011A (decimal 282)
165 |    defining Unicode char U+011B (decimal 283)
166 |    defining Unicode char U+011E (decimal 286)
167 |    defining Unicode char U+011F (decimal 287)
168 |    defining Unicode char U+0130 (decimal 304)
169 |    defining Unicode char U+0131 (decimal 305)
170 |    defining Unicode char U+0132 (decimal 306)
171 |    defining Unicode char U+0133 (decimal 307)
172 |    defining Unicode char U+0139 (decimal 313)
173 |    defining Unicode char U+013A (decimal 314)
174 |    defining Unicode char U+013D (decimal 317)
175 |    defining Unicode char U+013E (decimal 318)
176 |    defining Unicode char U+0141 (decimal 321)
177 |    defining Unicode char U+0142 (decimal 322)
178 |    defining Unicode char U+0143 (decimal 323)
179 |    defining Unicode char U+0144 (decimal 324)
180 |    defining Unicode char U+0147 (decimal 327)
181 |    defining Unicode char U+0148 (decimal 328)
182 |    defining Unicode char U+014A (decimal 330)
183 |    defining Unicode char U+014B (decimal 331)
184 |    defining Unicode char U+0150 (decimal 336)
185 |    defining Unicode char U+0151 (decimal 337)
186 |    defining Unicode char U+0152 (decimal 338)
187 |    defining Unicode char U+0153 (decimal 339)
188 |    defining Unicode char U+0154 (decimal 340)
189 |    defining Unicode char U+0155 (decimal 341)
190 |    defining Unicode char U+0158 (decimal 344)
191 |    defining Unicode char U+0159 (decimal 345)
192 |    defining Unicode char U+015A (decimal 346)
193 |    defining Unicode char U+015B (decimal 347)
194 |    defining Unicode char U+015E (decimal 350)
195 |    defining Unicode char U+015F (decimal 351)
196 |    defining Unicode char U+0160 (decimal 352)
197 |    defining Unicode char U+0161 (decimal 353)
198 |    defining Unicode char U+0162 (decimal 354)
199 |    defining Unicode char U+0163 (decimal 355)
200 |    defining Unicode char U+0164 (decimal 356)
201 |    defining Unicode char U+0165 (decimal 357)
202 |    defining Unicode char U+016E (decimal 366)
203 |    defining Unicode char U+016F (decimal 367)
204 |    defining Unicode char U+0170 (decimal 368)
205 |    defining Unicode char U+0171 (decimal 369)
206 |    defining Unicode char U+0178 (decimal 376)
207 |    defining Unicode char U+0179 (decimal 377)
208 |    defining Unicode char U+017A (decimal 378)
209 |    defining Unicode char U+017B (decimal 379)
210 |    defining Unicode char U+017C (decimal 380)
211 |    defining Unicode char U+017D (decimal 381)
212 |    defining Unicode char U+017E (decimal 382)
213 |    defining Unicode char U+200C (decimal 8204)
214 |    defining Unicode char U+2013 (decimal 8211)
215 |    defining Unicode char U+2014 (decimal 8212)
216 |    defining Unicode char U+2018 (decimal 8216)
217 |    defining Unicode char U+2019 (decimal 8217)
218 |    defining Unicode char U+201A (decimal 8218)
219 |    defining Unicode char U+201C (decimal 8220)
220 |    defining Unicode char U+201D (decimal 8221)
221 |    defining Unicode char U+201E (decimal 8222)
222 |    defining Unicode char U+2030 (decimal 8240)
223 |    defining Unicode char U+2031 (decimal 8241)
224 |    defining Unicode char U+2039 (decimal 8249)
225 |    defining Unicode char U+203A (decimal 8250)
226 |    defining Unicode char U+2423 (decimal 9251)
227 | )
228 | Now handling font encoding OT1 ...
229 | ... processing UTF-8 mapping file for font encoding OT1
230 | 
231 | (/opt/texlive/2015/texmf-dist/tex/latex/base/ot1enc.dfu
232 | File: ot1enc.dfu 2015/06/27 v1.1n UTF-8 support for inputenc
233 |    defining Unicode char U+00A1 (decimal 161)
234 |    defining Unicode char U+00A3 (decimal 163)
235 |    defining Unicode char U+00B8 (decimal 184)
236 |    defining Unicode char U+00BF (decimal 191)
237 |    defining Unicode char U+00C5 (decimal 197)
238 |    defining Unicode char U+00C6 (decimal 198)
239 |    defining Unicode char U+00D8 (decimal 216)
240 |    defining Unicode char U+00DF (decimal 223)
241 |    defining Unicode char U+00E6 (decimal 230)
242 |    defining Unicode char U+00EC (decimal 236)
243 |    defining Unicode char U+00ED (decimal 237)
244 |    defining Unicode char U+00EE (decimal 238)
245 |    defining Unicode char U+00EF (decimal 239)
246 |    defining Unicode char U+00F8 (decimal 248)
247 |    defining Unicode char U+0131 (decimal 305)
248 |    defining Unicode char U+0141 (decimal 321)
249 |    defining Unicode char U+0142 (decimal 322)
250 |    defining Unicode char U+0152 (decimal 338)
251 |    defining Unicode char U+0153 (decimal 339)
252 |    defining Unicode char U+2013 (decimal 8211)
253 |    defining Unicode char U+2014 (decimal 8212)
254 |    defining Unicode char U+2018 (decimal 8216)
255 |    defining Unicode char U+2019 (decimal 8217)
256 |    defining Unicode char U+201C (decimal 8220)
257 |    defining Unicode char U+201D (decimal 8221)
258 | )
259 | Now handling font encoding OMS ...
260 | ... processing UTF-8 mapping file for font encoding OMS
261 | 
262 | (/opt/texlive/2015/texmf-dist/tex/latex/base/omsenc.dfu
263 | File: omsenc.dfu 2015/06/27 v1.1n UTF-8 support for inputenc
264 |    defining Unicode char U+00A7 (decimal 167)
265 |    defining Unicode char U+00B6 (decimal 182)
266 |    defining Unicode char U+00B7 (decimal 183)
267 |    defining Unicode char U+2020 (decimal 8224)
268 |    defining Unicode char U+2021 (decimal 8225)
269 |    defining Unicode char U+2022 (decimal 8226)
270 | )
271 | Now handling font encoding OMX ...
272 | ... no UTF-8 mapping file for font encoding OMX
273 | Now handling font encoding U ...
274 | ... no UTF-8 mapping file for font encoding U
275 |    defining Unicode char U+00A9 (decimal 169)
276 |    defining Unicode char U+00AA (decimal 170)
277 |    defining Unicode char U+00AE (decimal 174)
278 |    defining Unicode char U+00BA (decimal 186)
279 |    defining Unicode char U+02C6 (decimal 710)
280 |    defining Unicode char U+02DC (decimal 732)
281 |    defining Unicode char U+200C (decimal 8204)
282 |    defining Unicode char U+2026 (decimal 8230)
283 |    defining Unicode char U+2122 (decimal 8482)
284 |    defining Unicode char U+2423 (decimal 9251)
285 | ))
286 | (/opt/texlive/2015/texmf-dist/tex/latex/base/fontenc.sty
287 | Package: fontenc 2005/09/27 v1.99g Standard LaTeX package
288 | 
289 | (/opt/texlive/2015/texmf-dist/tex/latex/base/t1enc.def
290 | File: t1enc.def 2005/09/27 v1.99g Standard LaTeX file
291 | LaTeX Font Info:    Redeclaring font encoding T1 on input line 48.
292 | ))
293 | (/opt/texlive/2015/texmf-dist/tex/latex/hyperref/hyperref.sty
294 | Package: hyperref 2012/11/06 v6.83m Hypertext links for LaTeX
295 | 
296 | (/opt/texlive/2015/texmf-dist/tex/generic/oberdiek/hobsub-hyperref.sty
297 | Package: hobsub-hyperref 2012/05/28 v1.13 Bundle oberdiek, subset hyperref (HO)
298 | 
299 | 
300 | (/opt/texlive/2015/texmf-dist/tex/generic/oberdiek/hobsub-generic.sty
301 | Package: hobsub-generic 2012/05/28 v1.13 Bundle oberdiek, subset generic (HO)
302 | Package: hobsub 2012/05/28 v1.13 Construct package bundles (HO)
303 | Package: infwarerr 2010/04/08 v1.3 Providing info/warning/error messages (HO)
304 | Package: ltxcmds 2011/11/09 v1.22 LaTeX kernel commands for general use (HO)
305 | Package: ifluatex 2010/03/01 v1.3 Provides the ifluatex switch (HO)
306 | Package ifluatex Info: LuaTeX not detected.
307 | Package hobsub Info: Skipping package `ifvtex' (already loaded).
308 | Package: intcalc 2007/09/27 v1.1 Expandable calculations with integers (HO)
309 | Package hobsub Info: Skipping package `ifpdf' (already loaded).
310 | Package: etexcmds 2011/02/16 v1.5 Avoid name clashes with e-TeX commands (HO)
311 | Package etexcmds Info: Could not find \expanded.
312 | (etexcmds)             That can mean that you are not using pdfTeX 1.50 or
313 | (etexcmds)             that some package has redefined \expanded.
314 | (etexcmds)             In the latter case, load this package earlier.
315 | Package: kvsetkeys 2012/04/25 v1.16 Key value parser (HO)
316 | Package: kvdefinekeys 2011/04/07 v1.3 Define keys (HO)
317 | Package: pdftexcmds 2011/11/29 v0.20 Utility functions of pdfTeX for LuaTeX (HO
318 | )
319 | Package pdftexcmds Info: LuaTeX not detected.
320 | Package pdftexcmds Info: \pdf@primitive is available.
321 | Package pdftexcmds Info: \pdf@ifprimitive is available.
322 | Package pdftexcmds Info: \pdfdraftmode found.
323 | Package: pdfescape 2011/11/25 v1.13 Implements pdfTeX's escape features (HO)
324 | Package: bigintcalc 2012/04/08 v1.3 Expandable calculations on big integers (HO
325 | )
326 | Package: bitset 2011/01/30 v1.1 Handle bit-vector datatype (HO)
327 | Package: uniquecounter 2011/01/30 v1.2 Provide unlimited unique counter (HO)
328 | )
329 | Package hobsub Info: Skipping package `hobsub' (already loaded).
330 | Package: letltxmacro 2010/09/02 v1.4 Let assignment for LaTeX macros (HO)
331 | Package: hopatch 2012/05/28 v1.2 Wrapper for package hooks (HO)
332 | Package: xcolor-patch 2011/01/30 xcolor patch
333 | Package: atveryend 2011/06/30 v1.8 Hooks at the very end of document (HO)
334 | Package atveryend Info: \enddocument detected (standard20110627).
335 | Package: atbegshi 2011/10/05 v1.16 At begin shipout hook (HO)
336 | Package: refcount 2011/10/16 v3.4 Data extraction from label references (HO)
337 | Package: hycolor 2011/01/30 v1.7 Color options for hyperref/bookmark (HO)
338 | )
339 | (/opt/texlive/2015/texmf-dist/tex/latex/oberdiek/auxhook.sty
340 | Package: auxhook 2011/03/04 v1.3 Hooks for auxiliary files (HO)
341 | )
342 | (/opt/texlive/2015/texmf-dist/tex/latex/oberdiek/kvoptions.sty
343 | Package: kvoptions 2011/06/30 v3.11 Key value format for package options (HO)
344 | )
345 | \@linkdim=\dimen111
346 | \Hy@linkcounter=\count91
347 | \Hy@pagecounter=\count92
348 | 
349 | (/opt/texlive/2015/texmf-dist/tex/latex/hyperref/pd1enc.def
350 | File: pd1enc.def 2012/11/06 v6.83m Hyperref: PDFDocEncoding definition (HO)
351 | Now handling font encoding PD1 ...
352 | ... no UTF-8 mapping file for font encoding PD1
353 | )
354 | \Hy@SavedSpaceFactor=\count93
355 | 
356 | (/opt/texlive/2015/texmf-dist/tex/latex/latexconfig/hyperref.cfg
357 | File: hyperref.cfg 2002/06/06 v1.2 hyperref configuration of TeXLive
358 | )
359 | Package hyperref Info: Hyper figures OFF on input line 4443.
360 | Package hyperref Info: Link nesting OFF on input line 4448.
361 | Package hyperref Info: Hyper index ON on input line 4451.
362 | Package hyperref Info: Plain pages OFF on input line 4458.
363 | Package hyperref Info: Backreferencing OFF on input line 4463.
364 | Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
365 | Package hyperref Info: Bookmarks ON on input line 4688.
366 | \c@Hy@tempcnt=\count94
367 | 
368 | (/opt/texlive/2015/texmf-dist/tex/latex/url/url.sty
369 | \Urlmuskip=\muskip10
370 | Package: url 2013/09/16  ver 3.4  Verb mode for urls, etc.
371 | )
372 | LaTeX Info: Redefining \url on input line 5041.
373 | \XeTeXLinkMargin=\dimen112
374 | \Fld@menulength=\count95
375 | \Field@Width=\dimen113
376 | \Fld@charsize=\dimen114
377 | Package hyperref Info: Hyper figures OFF on input line 6295.
378 | Package hyperref Info: Link nesting OFF on input line 6300.
379 | Package hyperref Info: Hyper index ON on input line 6303.
380 | Package hyperref Info: backreferencing OFF on input line 6310.
381 | Package hyperref Info: Link coloring OFF on input line 6315.
382 | Package hyperref Info: Link coloring with OCG OFF on input line 6320.
383 | Package hyperref Info: PDF/A mode OFF on input line 6325.
384 | LaTeX Info: Redefining \ref on input line 6365.
385 | LaTeX Info: Redefining \pageref on input line 6369.
386 | \Hy@abspage=\count96
387 | \c@Item=\count97
388 | \c@Hfootnote=\count98
389 | )
390 | 
391 | Package hyperref Message: Driver (autodetected): hpdftex.
392 | 
393 | (/opt/texlive/2015/texmf-dist/tex/latex/hyperref/hpdftex.def
394 | File: hpdftex.def 2012/11/06 v6.83m Hyperref driver for pdfTeX
395 | \Fld@listcount=\count99
396 | \c@bookmark@seq@number=\count100
397 | 
398 | (/opt/texlive/2015/texmf-dist/tex/latex/oberdiek/rerunfilecheck.sty
399 | Package: rerunfilecheck 2011/04/15 v1.7 Rerun checks for auxiliary files (HO)
400 | Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
401 | 82.
402 | )
403 | \Hy@SectionHShift=\skip47
404 | )
405 | (/opt/texlive/2015/texmf-dist/tex/latex/booktabs/booktabs.sty
406 | Package: booktabs 2005/04/14 v1.61803 publication quality tables
407 | \heavyrulewidth=\dimen115
408 | \lightrulewidth=\dimen116
409 | \cmidrulewidth=\dimen117
410 | \belowrulesep=\dimen118
411 | \belowbottomsep=\dimen119
412 | \aboverulesep=\dimen120
413 | \abovetopsep=\dimen121
414 | \cmidrulesep=\dimen122
415 | \cmidrulekern=\dimen123
416 | \defaultaddspace=\dimen124
417 | \@cmidla=\count101
418 | \@cmidlb=\count102
419 | \@aboverulesep=\dimen125
420 | \@belowrulesep=\dimen126
421 | \@thisruleclass=\count103
422 | \@lastruleclass=\count104
423 | \@thisrulewidth=\dimen127
424 | )
425 | (/opt/texlive/2015/texmf-dist/tex/latex/amsfonts/amsfonts.sty
426 | Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
427 | \@emptytoks=\toks18
428 | \symAMSa=\mathgroup4
429 | \symAMSb=\mathgroup5
430 | LaTeX Font Info:    Overwriting math alphabet `\mathfrak' in version `bold'
431 | (Font)                  U/euf/m/n --> U/euf/b/n on input line 106.
432 | )
433 | (/opt/texlive/2015/texmf-dist/tex/latex/units/nicefrac.sty
434 | Package: nicefrac 1998/08/04 v0.9b Nice fractions
435 | \L@UnitsRaiseDisplaystyle=\skip48
436 | \L@UnitsRaiseTextstyle=\skip49
437 | \L@UnitsRaiseScriptstyle=\skip50
438 | 
439 | (/opt/texlive/2015/texmf-dist/tex/latex/base/ifthen.sty
440 | Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
441 | ))
442 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/microtype.sty
443 | Package: microtype 2013/05/23 v2.5a Micro-typographical refinements (RS)
444 | \MT@toks=\toks19
445 | \MT@count=\count105
446 | LaTeX Info: Redefining \textls on input line 766.
447 | \MT@outer@kern=\dimen128
448 | LaTeX Info: Redefining \textmicrotypecontext on input line 1285.
449 | \MT@listname@count=\count106
450 | 
451 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/microtype-pdftex.def
452 | File: microtype-pdftex.def 2013/05/23 v2.5a Definitions specific to pdftex (RS)
453 | 
454 | LaTeX Info: Redefining \lsstyle on input line 915.
455 | LaTeX Info: Redefining \lslig on input line 915.
456 | \MT@outer@space=\skip51
457 | )
458 | Package microtype Info: Loading configuration file microtype.cfg.
459 | 
460 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/microtype.cfg
461 | File: microtype.cfg 2013/05/23 v2.5a microtype main configuration file (RS)
462 | ))
463 | (./nips_2016.aux)
464 | \openout1 = `nips_2016.aux'.
465 | 
466 | LaTeX Font Info:    Checking defaults for OML/cmm/m/it on input line 67.
467 | LaTeX Font Info:    ... okay on input line 67.
468 | LaTeX Font Info:    Checking defaults for T1/cmr/m/n on input line 67.
469 | LaTeX Font Info:    ... okay on input line 67.
470 | LaTeX Font Info:    Checking defaults for OT1/cmr/m/n on input line 67.
471 | LaTeX Font Info:    ... okay on input line 67.
472 | LaTeX Font Info:    Checking defaults for OMS/cmsy/m/n on input line 67.
473 | LaTeX Font Info:    ... okay on input line 67.
474 | LaTeX Font Info:    Checking defaults for OMX/cmex/m/n on input line 67.
475 | LaTeX Font Info:    ... okay on input line 67.
476 | LaTeX Font Info:    Checking defaults for U/cmr/m/n on input line 67.
477 | LaTeX Font Info:    ... okay on input line 67.
478 | LaTeX Font Info:    Checking defaults for PD1/pdf/m/n on input line 67.
479 | LaTeX Font Info:    ... okay on input line 67.
480 | LaTeX Font Info:    Try loading font information for T1+ptm on input line 67.
481 |  (/opt/texlive/2015/texmf-dist/tex/latex/psnfss/t1ptm.fd
482 | File: t1ptm.fd 2001/06/04 font definitions for T1/ptm.
483 | )
484 | *geometry* driver: auto-detecting
485 | *geometry* detected driver: pdftex
486 | *geometry* verbose mode - [ preamble ] result:
487 | * driver: pdftex
488 | * paper: letterpaper
489 | * layout: <same size as paper>
490 | * layoutoffset:(h,v)=(0.0pt,0.0pt)
491 | * modes: 
492 | * h-part:(L,W,R)=(108.405pt, 397.48499pt, 108.40501pt)
493 | * v-part:(T,H,B)=(72.26999pt, 650.43pt, 72.27pt)
494 | * \paperwidth=614.295pt
495 | * \paperheight=794.96999pt
496 | * \textwidth=397.48499pt
497 | * \textheight=650.43pt
498 | * \oddsidemargin=36.13501pt
499 | * \evensidemargin=36.13501pt
500 | * \topmargin=-37.0pt
501 | * \headheight=12.0pt
502 | * \headsep=25.0pt
503 | * \topskip=10.0pt
504 | * \footskip=30.0pt
505 | * \marginparwidth=65.0pt
506 | * \marginparsep=11.0pt
507 | * \columnsep=10.0pt
508 | * \skip\footins=9.0pt plus 4.0pt minus 2.0pt
509 | * \hoffset=0.0pt
510 | * \voffset=0.0pt
511 | * \mag=1000
512 | * \@twocolumnfalse
513 | * \@twosidefalse
514 | * \@mparswitchfalse
515 | * \@reversemarginfalse
516 | * (1in=72.27pt=25.4mm, 1cm=28.453pt)
517 | 
518 | \AtBeginShipoutBox=\box26
519 | Package hyperref Info: Link coloring OFF on input line 67.
520 | (/opt/texlive/2015/texmf-dist/tex/latex/hyperref/nameref.sty
521 | Package: nameref 2012/10/27 v2.43 Cross-referencing by name of section
522 | 
523 | (/opt/texlive/2015/texmf-dist/tex/generic/oberdiek/gettitlestring.sty
524 | Package: gettitlestring 2010/12/03 v1.4 Cleanup title references (HO)
525 | )
526 | \c@section@level=\count107
527 | )
528 | LaTeX Info: Redefining \ref on input line 67.
529 | LaTeX Info: Redefining \pageref on input line 67.
530 | LaTeX Info: Redefining \nameref on input line 67.
531 | 
532 | (./nips_2016.out) (./nips_2016.out)
533 | \@outlinefile=\write3
534 | \openout3 = `nips_2016.out'.
535 | 
536 | LaTeX Info: Redefining \microtypecontext on input line 67.
537 | Package microtype Info: Generating PDF output.
538 | Package microtype Info: Character protrusion enabled (level 2).
539 | Package microtype Info: Using default protrusion set `alltext'.
540 | Package microtype Info: Automatic font expansion enabled (level 2),
541 | (microtype)             stretch: 20, shrink: 20, step: 1, non-selected.
542 | Package microtype Info: Using default expansion set `basictext'.
543 | Package microtype Info: No adjustment of tracking.
544 | Package microtype Info: No adjustment of interword spacing.
545 | Package microtype Info: No adjustment of character kerning.
546 | 
547 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/mt-ptm.cfg
548 | File: mt-ptm.cfg 2006/04/20 v1.7 microtype config. file: Times (RS)
549 | )
550 | LaTeX Font Info:    Font shape `T1/ptm/bx/n' in size <17.28> not available
551 | (Font)              Font shape `T1/ptm/b/n' tried instead on input line 71.
552 | 
553 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/mt-cmr.cfg
554 | File: mt-cmr.cfg 2013/05/19 v2.2 microtype config. file: Computer Modern Roman 
555 | (RS)
556 | )
557 | LaTeX Font Info:    Try loading font information for U+msa on input line 71.
558 | 
559 | (/opt/texlive/2015/texmf-dist/tex/latex/amsfonts/umsa.fd
560 | File: umsa.fd 2013/01/14 v3.01 AMS symbols A
561 | )
562 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/mt-msa.cfg
563 | File: mt-msa.cfg 2006/02/04 v1.1 microtype config. file: AMS symbols (a) (RS)
564 | )
565 | LaTeX Font Info:    Try loading font information for U+msb on input line 71.
566 | 
567 | (/opt/texlive/2015/texmf-dist/tex/latex/amsfonts/umsb.fd
568 | File: umsb.fd 2013/01/14 v3.01 AMS symbols B
569 | )
570 | (/opt/texlive/2015/texmf-dist/tex/latex/microtype/mt-msb.cfg
571 | File: mt-msb.cfg 2005/06/01 v1.0 microtype config. file: AMS symbols (b) (RS)
572 | )
573 | LaTeX Font Info:    Font shape `T1/ptm/bx/n' in size <10> not available
574 | (Font)              Font shape `T1/ptm/b/n' tried instead on input line 71.
575 | LaTeX Font Info:    Try loading font information for T1+cmtt on input line 71.
576 | 
577 | (/opt/texlive/2015/texmf-dist/tex/latex/base/t1cmtt.fd
578 | File: t1cmtt.fd 2014/09/29 v2.5h Standard LaTeX font definitions
579 | )
580 | Package microtype Info: Loading generic settings for font family
581 | (microtype)             `cmtt' (encoding: T1).
582 | (microtype)             For optimal results, create family-specific settings.
583 | (microtype)             See the microtype manual for details.
584 | LaTeX Font Info:    Font shape `T1/ptm/bx/n' in size <12> not available
585 | (Font)              Font shape `T1/ptm/b/n' tried instead on input line 72.
586 | LaTeX Font Info:    Font shape `T1/ptm/bx/n' in size <7> not available
587 | (Font)              Font shape `T1/ptm/b/n' tried instead on input line 124.
588 |  [1
589 | 
590 | {/opt/texlive/2015/texmf-var/fonts/map/pdftex/updmap/pdftex.map}] [2] [3]
591 | LaTeX Font Info:    Try loading font information for OMS+ptm on input line 320.
592 | 
593 | 
594 | (/opt/texlive/2015/texmf-dist/tex/latex/psnfss/omsptm.fd
595 | File: omsptm.fd 
596 | )
597 | LaTeX Font Info:    Font shape `OMS/ptm/m/n' in size <10> not available
598 | (Font)              Font shape `OMS/cmsy/m/n' tried instead on input line 320.
599 |  [4]
600 | LaTeX Font Info:    Font shape `T1/ptm/bx/it' in size <10> not available
601 | (Font)              Font shape `T1/ptm/b/it' tried instead on input line 387.
602 | LaTeX Font Info:    Font shape `T1/ptm/bx/n' in size <9> not available
603 | (Font)              Font shape `T1/ptm/b/n' tried instead on input line 404.
604 | Package atveryend Info: Empty hook `BeforeClearDocument' on input line 406.
605 |  [5]
606 | Package atveryend Info: Empty hook `AfterLastShipout' on input line 406.
607 | 
608 | (./nips_2016.aux)
609 | Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 406.
610 | Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 406.
611 | Package rerunfilecheck Info: File `nips_2016.out' has not changed.
612 | (rerunfilecheck)             Checksum: 351AF28D8766A694F09B69678630ECDB;962.
613 | Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 406.
614 |  ) 
615 | Here is how much of TeX's memory you used:
616 |  7400 strings out of 493091
617 |  111987 string characters out of 6137640
618 |  228400 words of memory out of 5000000
619 |  10648 multiletter control sequences out of 15000+600000
620 |  51815 words of font info for 137 fonts, out of 8000000 for 9000
621 |  1141 hyphenation exceptions out of 8191
622 |  31i,10n,35p,210b,376s stack positions out of 5000i,500n,10000p,200000b,80000s
623 | {/opt/texlive/2015/texmf-dist/fonts/enc/dvips/base/8r.enc}{/opt/texlive/2015/
624 | texmf-dist/fonts/enc/dvips/cm-super/cm-super-t1.enc}</opt/texlive/2015/texmf-di
625 | st/fonts/type1/public/amsfonts/cm/cmmi10.pfb></opt/texlive/2015/texmf-dist/font
626 | s/type1/public/amsfonts/cm/cmr10.pfb></opt/texlive/2015/texmf-dist/fonts/type1/
627 | public/amsfonts/cm/cmr7.pfb></opt/texlive/2015/texmf-dist/fonts/type1/public/am
628 | sfonts/cm/cmsy10.pfb></opt/texlive/2015/texmf-dist/fonts/type1/public/amsfonts/
629 | cm/cmsy6.pfb></opt/texlive/2015/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7
630 | .pfb></opt/texlive/2015/texmf-dist/fonts/type1/public/amsfonts/symbols/msbm10.p
631 | fb></opt/texlive/2015/texmf-dist/fonts/type1/public/cm-super/sftt1000.pfb></opt
632 | /texlive/2015/texmf-dist/fonts/type1/urw/times/utmb8a.pfb></opt/texlive/2015/te
633 | xmf-dist/fonts/type1/urw/times/utmbi8a.pfb></opt/texlive/2015/texmf-dist/fonts/
634 | type1/urw/times/utmr8a.pfb></opt/texlive/2015/texmf-dist/fonts/type1/urw/times/
635 | utmri8a.pfb>
636 | Output written on nips_2016.pdf (5 pages, 152670 bytes).
637 | PDF statistics:
638 |  169 PDF objects out of 1000 (max. 8388607)
639 |  148 compressed objects within 2 object streams
640 |  28 named destinations out of 1000 (max. 500000)
641 |  26233 words of extra memory for PDF output out of 29859 (max. 10000000)
642 | 
643 | 


--------------------------------------------------------------------------------
/paper/nips_2016.out:
--------------------------------------------------------------------------------
 1 | \BOOKMARK [1][-]{section.1}{Submission of papers to NIPS 2016}{}% 1
 2 | \BOOKMARK [2][-]{subsection.1.1}{Style}{section.1}% 2
 3 | \BOOKMARK [2][-]{subsection.1.2}{Retrieval of style files}{section.1}% 3
 4 | \BOOKMARK [1][-]{section.2}{General formatting instructions}{}% 4
 5 | \BOOKMARK [1][-]{section.3}{Headings: first level}{}% 5
 6 | \BOOKMARK [2][-]{subsection.3.1}{Headings: second level}{section.3}% 6
 7 | \BOOKMARK [3][-]{subsubsection.3.1.1}{Headings: third level}{subsection.3.1}% 7
 8 | \BOOKMARK [1][-]{section.4}{Citations, figures, tables, references}{}% 8
 9 | \BOOKMARK [2][-]{subsection.4.1}{Citations within the text}{section.4}% 9
10 | \BOOKMARK [2][-]{subsection.4.2}{Footnotes}{section.4}% 10
11 | \BOOKMARK [2][-]{subsection.4.3}{Figures}{section.4}% 11
12 | \BOOKMARK [2][-]{subsection.4.4}{Tables}{section.4}% 12
13 | \BOOKMARK [1][-]{section.5}{Final instructions}{}% 13
14 | \BOOKMARK [1][-]{section.6}{Preparing PDF files}{}% 14
15 | \BOOKMARK [2][-]{subsection.6.1}{Margins in LaTeX}{section.6}% 15
16 | 


--------------------------------------------------------------------------------
/paper/nips_2016.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/nips_2016.pdf


--------------------------------------------------------------------------------
/paper/nips_2016.sty:
--------------------------------------------------------------------------------
  1 | % partial rewrite of the LaTeX2e package for submissions to the
  2 | % Conference on Neural Information Processing Systems (NIPS):
  3 | %
  4 | % - uses more LaTeX conventions
  5 | % - line numbers at submission time replaced with aligned numbers from
  6 | %   lineno package
  7 | % - \nipsfinalcopy replaced with [final] package option
  8 | % - automatically loads times package for authors
  9 | % - loads natbib automatically; this can be suppressed with the
 10 | %   [nonatbib] package option
 11 | % - adds foot line to first page identifying the conference
 12 | %
 13 | % Roman Garnett (garnett@wustl.edu) and the many authors of
 14 | % nips15submit_e.sty, including MK and drstrip@sandia
 15 | %
 16 | % last revision: March 2016
 17 | 
 18 | \NeedsTeXFormat{LaTeX2e}
 19 | \ProvidesPackage{nips_2016}[2016/03/07 NIPS 2016 submission/camera-ready style file]
 20 | 
 21 | % declare final option, which creates camera-ready copy
 22 | \newif\if@nipsfinal\@nipsfinalfalse
 23 | \DeclareOption{final}{
 24 |   \@nipsfinaltrue
 25 | }
 26 | 
 27 | % declare nonatbib option, which does not load natbib in case of
 28 | % package clash (users can pass options to natbib via
 29 | % \PassOptionsToPackage)
 30 | \newif\if@natbib\@natbibtrue
 31 | \DeclareOption{nonatbib}{
 32 |   \@natbibfalse
 33 | }
 34 | 
 35 | \ProcessOptions\relax
 36 | 
 37 | % fonts
 38 | \renewcommand{\rmdefault}{ptm}
 39 | \renewcommand{\sfdefault}{phv}
 40 | 
 41 | % change this every year for notice string at bottom
 42 | \newcommand{\@nipsordinal}{30th}
 43 | \newcommand{\@nipsyear}{2016}
 44 | \newcommand{\@nipslocation}{Barcelona, Spain}
 45 | 
 46 | % handle tweaks for camera-ready copy vs. submission copy
 47 | \if@nipsfinal
 48 |   \newcommand{\@noticestring}{%
 49 |     \@nipsordinal\/ Conference on Neural Information Processing Systems
 50 |     (NIPS \@nipsyear), \@nipslocation.%
 51 |   }
 52 | \else
 53 |   \newcommand{\@noticestring}{%
 54 |     Submitted to \@nipsordinal\/ Conference on Neural Information
 55 |     Processing Systems (NIPS \@nipsyear). Do not distribute.%
 56 |   }
 57 | 
 58 |   % line numbers for submission
 59 |   \RequirePackage{lineno}
 60 |   \linenumbers
 61 | 
 62 |   % fix incompatibilities between lineno and amsmath, if required, by
 63 |   % transparently wrapping linenomath environments around amsmath
 64 |   % environments
 65 |   \AtBeginDocument{%
 66 |     \@ifpackageloaded{amsmath}{%
 67 |       \newcommand*\patchAmsMathEnvironmentForLineno[1]{%
 68 |         \expandafter\let\csname old#1\expandafter\endcsname\csname #1\endcsname
 69 |         \expandafter\let\csname oldend#1\expandafter\endcsname\csname end#1\endcsname
 70 |         \renewenvironment{#1}%
 71 |           {\linenomath\csname old#1\endcsname}%
 72 |           {\csname oldend#1\endcsname\endlinenomath}%
 73 |       }%
 74 |       \newcommand*\patchBothAmsMathEnvironmentsForLineno[1]{%
 75 |         \patchAmsMathEnvironmentForLineno{#1}%
 76 |         \patchAmsMathEnvironmentForLineno{#1*}%
 77 |       }%
 78 |       \patchBothAmsMathEnvironmentsForLineno{equation}%
 79 |       \patchBothAmsMathEnvironmentsForLineno{align}%
 80 |       \patchBothAmsMathEnvironmentsForLineno{flalign}%
 81 |       \patchBothAmsMathEnvironmentsForLineno{alignat}%
 82 |       \patchBothAmsMathEnvironmentsForLineno{gather}%
 83 |       \patchBothAmsMathEnvironmentsForLineno{multline}%
 84 |     }{}
 85 |   }
 86 | \fi
 87 | 
 88 | % load natbib unless told otherwise
 89 | \if@natbib
 90 |   \RequirePackage{natbib}
 91 | \fi
 92 | 
 93 | % set page geometry
 94 | \usepackage[
 95 |   letterpaper,
 96 |   textheight=9in,
 97 |   textwidth=5.5in,
 98 |   top=1in
 99 |   ]{geometry}
100 | 
101 | \widowpenalty=10000
102 | \clubpenalty=10000
103 | \flushbottom
104 | \sloppy
105 | 
106 | % font sizes with reduced leading
107 | \renewcommand{\normalsize}{%
108 |   \@setfontsize\normalsize\@xpt\@xipt
109 |   \abovedisplayskip      7\p@ \@plus 2\p@ \@minus 5\p@
110 |   \abovedisplayshortskip \z@ \@plus 3\p@
111 |   \belowdisplayskip      \abovedisplayskip
112 |   \belowdisplayshortskip 4\p@ \@plus 3\p@ \@minus 3\p@
113 | }
114 | \normalsize
115 | \renewcommand{\small}{%
116 |   \@setfontsize\small\@ixpt\@xpt
117 |   \abovedisplayskip      6\p@ \@plus 1.5\p@ \@minus 4\p@
118 |   \abovedisplayshortskip \z@  \@plus 2\p@
119 |   \belowdisplayskip      \abovedisplayskip
120 |   \belowdisplayshortskip 3\p@ \@plus 2\p@   \@minus 2\p@
121 | }
122 | \renewcommand{\footnotesize}{\@setfontsize\footnotesize\@ixpt\@xpt}
123 | \renewcommand{\scriptsize}{\@setfontsize\scriptsize\@viipt\@viiipt}
124 | \renewcommand{\tiny}{\@setfontsize\tiny\@vipt\@viipt}
125 | \renewcommand{\large}{\@setfontsize\large\@xiipt{14}}
126 | \renewcommand{\Large}{\@setfontsize\Large\@xivpt{16}}
127 | \renewcommand{\LARGE}{\@setfontsize\LARGE\@xviipt{20}}
128 | \renewcommand{\huge}{\@setfontsize\huge\@xxpt{23}}
129 | \renewcommand{\Huge}{\@setfontsize\Huge\@xxvpt{28}}
130 | 
131 | % sections with less space
132 | \providecommand{\section}{}
133 | \renewcommand{\section}{%
134 |   \@startsection{section}{1}{\z@}%
135 |                 {-2.0ex \@plus -0.5ex \@minus -0.2ex}%
136 |                 { 1.5ex \@plus  0.3ex \@minus  0.2ex}%
137 |                 {\large\bf\raggedright}%
138 | }
139 | \providecommand{\subsection}{}
140 | \renewcommand{\subsection}{%
141 |   \@startsection{subsection}{2}{\z@}%
142 |                 {-1.8ex \@plus -0.5ex \@minus -0.2ex}%
143 |                 { 0.8ex \@plus  0.2ex}%
144 |                 {\normalsize\bf\raggedright}%
145 | }
146 | \providecommand{\subsubsection}{}
147 | \renewcommand{\subsubsection}{%
148 |   \@startsection{subsubsection}{3}{\z@}%
149 |                 {-1.5ex \@plus -0.5ex \@minus -0.2ex}%
150 |                 { 0.5ex \@plus  0.2ex}%
151 |                 {\normalsize\bf\raggedright}%
152 | }
153 | \providecommand{\paragraph}{}
154 | \renewcommand{\paragraph}{%
155 |   \@startsection{paragraph}{4}{\z@}%
156 |                 {1.5ex \@plus 0.5ex \@minus 0.2ex}%
157 |                 {-1em}%
158 |                 {\normalsize\bf}%
159 | }
160 | \providecommand{\subparagraph}{}
161 | \renewcommand{\subparagraph}{%
162 |   \@startsection{subparagraph}{5}{\z@}%
163 |                 {1.5ex \@plus 0.5ex \@minus 0.2ex}%
164 |                 {-1em}%
165 |                 {\normalsize\bf}%
166 | }
167 | \providecommand{\subsubsubsection}{}
168 | \renewcommand{\subsubsubsection}{%
169 |   \vskip5pt{\noindent\normalsize\rm\raggedright}%
170 | }
171 | 
172 | % float placement
173 | \renewcommand{\topfraction      }{0.85}
174 | \renewcommand{\bottomfraction   }{0.4}
175 | \renewcommand{\textfraction     }{0.1}
176 | \renewcommand{\floatpagefraction}{0.7}
177 | 
178 | \newlength{\@nipsabovecaptionskip}\setlength{\@nipsabovecaptionskip}{7\p@}
179 | \newlength{\@nipsbelowcaptionskip}\setlength{\@nipsbelowcaptionskip}{\z@}
180 | 
181 | \setlength{\abovecaptionskip}{\@nipsabovecaptionskip}
182 | \setlength{\belowcaptionskip}{\@nipsbelowcaptionskip}
183 | 
184 | % swap above/belowcaptionskip lengths for tables
185 | \renewenvironment{table}
186 |   {\setlength{\abovecaptionskip}{\@nipsbelowcaptionskip}%
187 |    \setlength{\belowcaptionskip}{\@nipsabovecaptionskip}%
188 |    \@float{table}}
189 |   {\end@float}
190 | 
191 | % footnote formatting
192 | \setlength{\footnotesep }{6.65\p@}
193 | \setlength{\skip\footins}{9\p@ \@plus 4\p@ \@minus 2\p@}
194 | \renewcommand{\footnoterule}{\kern-3\p@ \hrule width 12pc \kern 2.6\p@}
195 | \setcounter{footnote}{0}
196 | 
197 | % paragraph formatting
198 | \setlength{\parindent}{\z@}
199 | \setlength{\parskip  }{5.5\p@}
200 | 
201 | % list formatting
202 | \setlength{\topsep       }{4\p@ \@plus 1\p@   \@minus 2\p@}
203 | \setlength{\partopsep    }{1\p@ \@plus 0.5\p@ \@minus 0.5\p@}
204 | \setlength{\itemsep      }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
205 | \setlength{\parsep       }{2\p@ \@plus 1\p@   \@minus 0.5\p@}
206 | \setlength{\leftmargin   }{3pc}
207 | \setlength{\leftmargini  }{\leftmargin}
208 | \setlength{\leftmarginii }{2em}
209 | \setlength{\leftmarginiii}{1.5em}
210 | \setlength{\leftmarginiv }{1.0em}
211 | \setlength{\leftmarginv  }{0.5em}
212 | \def\@listi  {\leftmargin\leftmargini}
213 | \def\@listii {\leftmargin\leftmarginii
214 |               \labelwidth\leftmarginii
215 |               \advance\labelwidth-\labelsep
216 |               \topsep  2\p@ \@plus 1\p@    \@minus 0.5\p@
217 |               \parsep  1\p@ \@plus 0.5\p@ \@minus 0.5\p@
218 |               \itemsep \parsep}
219 | \def\@listiii{\leftmargin\leftmarginiii
220 |               \labelwidth\leftmarginiii
221 |               \advance\labelwidth-\labelsep
222 |               \topsep    1\p@ \@plus 0.5\p@ \@minus 0.5\p@
223 |               \parsep    \z@
224 |               \partopsep 0.5\p@ \@plus 0\p@ \@minus 0.5\p@
225 |               \itemsep \topsep}
226 | \def\@listiv {\leftmargin\leftmarginiv
227 |               \labelwidth\leftmarginiv
228 |               \advance\labelwidth-\labelsep}
229 | \def\@listv  {\leftmargin\leftmarginv
230 |               \labelwidth\leftmarginv
231 |               \advance\labelwidth-\labelsep}
232 | \def\@listvi {\leftmargin\leftmarginvi
233 |               \labelwidth\leftmarginvi
234 |               \advance\labelwidth-\labelsep}
235 | 
236 | % create title
237 | \providecommand{\maketitle}{}
238 | \renewcommand{\maketitle}{%
239 |   \par
240 |   \begingroup
241 |     \renewcommand{\thefootnote}{\fnsymbol{footnote}}
242 |     % for perfect author name centering
243 |     \renewcommand{\@makefnmark}{\hbox to \z@{$^{\@thefnmark}$\hss}}
244 |     % The footnote-mark was overlapping the footnote-text,
245 |     % added the following to fix this problem               (MK)
246 |     \long\def\@makefntext##1{%
247 |       \parindent 1em\noindent
248 |       \hbox to 1.8em{\hss $\m@th ^{\@thefnmark}$}##1
249 |     }
250 |     \thispagestyle{empty}
251 |     \@maketitle
252 |     \@thanks
253 | %    \@notice
254 |   \endgroup
255 |   \let\maketitle\relax
256 |   \let\thanks\relax
257 | }
258 | 
259 | % rules for title box at top of first page
260 | \newcommand{\@toptitlebar}{
261 |   \hrule height 4\p@
262 |   \vskip 0.25in
263 |   \vskip -\parskip%
264 | }
265 | \newcommand{\@bottomtitlebar}{
266 |   \vskip 0.29in
267 |   \vskip -\parskip
268 |   \hrule height 1\p@
269 |   \vskip 0.09in%
270 | }
271 | 
272 | % create title (includes both anonymized and non-anonymized versions)
273 | \providecommand{\@maketitle}{}
274 | \renewcommand{\@maketitle}{%
275 |   \vbox{%
276 |     \hsize\textwidth
277 |     \linewidth\hsize
278 |     \vskip 0.1in
279 |     \@toptitlebar
280 |     \centering
281 |     {\LARGE\bf \@title\par}
282 |     \@bottomtitlebar
283 |     \if@nipsfinal
284 |       \def\And{%
285 |         \end{tabular}\hfil\linebreak[0]\hfil%
286 |         \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
287 |       }
288 |       \def\AND{%
289 |         \end{tabular}\hfil\linebreak[4]\hfil%
290 |         \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\ignorespaces%
291 |       }
292 |       \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}\@author\end{tabular}%
293 |     \else
294 |       \begin{tabular}[t]{c}\bf\rule{\z@}{24\p@}
295 |         Anonymous Author(s) \\
296 |         Affiliation \\
297 |         Address \\
298 |         \texttt{email} \\
299 |       \end{tabular}%
300 |     \fi
301 |     \vskip 0.3in \@minus 0.1in
302 |   }
303 | }
304 | 
305 | % add conference notice to bottom of first page
306 | \newcommand{\ftype@noticebox}{8}
307 | \newcommand{\@notice}{%
308 |   % give a bit of extra room back to authors on first page
309 |   \enlargethispage{2\baselineskip}%
310 |   \@float{noticebox}[b]%
311 |     \footnotesize\@noticestring%
312 |   \end@float%
313 | }
314 | 
315 | % abstract styling
316 | \renewenvironment{abstract}%
317 | {%
318 |   \vskip 0.075in%
319 |   \centerline%
320 |   {\large\bf Abstract}%
321 |   \vspace{0.5ex}%
322 |   \begin{quote}%
323 | }
324 | {
325 |   \par%
326 |   \end{quote}%
327 |   \vskip 1ex%
328 | }
329 | 
330 | \endinput
331 | 


--------------------------------------------------------------------------------
/paper/nips_2016.tex:
--------------------------------------------------------------------------------
  1 | \documentclass{article}
  2 | 
  3 | % if you need to pass options to natbib, use, e.g.:
  4 | % \PassOptionsToPackage{numbers, compress}{natbib}
  5 | % before loading nips_2016
  6 | %
  7 | % to avoid loading the natbib package, add option nonatbib:
  8 | % \usepackage[nonatbib]{nips_2016}
  9 | 
 10 | \usepackage[final]{nips_2016}
 11 | 
 12 | % to compile a camera-ready version, add the [final] option, e.g.:
 13 | % \usepackage[final]{nips_2016}
 14 | 
 15 | \usepackage[utf8]{inputenc} % allow utf-8 input
 16 | \usepackage[T1]{fontenc}    % use 8-bit T1 fonts
 17 | \usepackage{hyperref}       % hyperlinks
 18 | \usepackage{url}            % simple URL typesetting
 19 | \usepackage{booktabs}       % professional-quality tables
 20 | \usepackage{amsfonts}       % blackboard math symbols
 21 | \usepackage{nicefrac}       % compact symbols for 1/2, etc.
 22 | \usepackage{microtype}      % microtypography
 23 | 
 24 | \title{Formatting instructions for NIPS 2016}
 25 | 
 26 | % The \author macro works with any number of authors. There are two
 27 | % commands used to separate the names and addresses of multiple
 28 | % authors: \And and \AND.
 29 | %
 30 | % Using \And between authors leaves it to LaTeX to determine where to
 31 | % break the lines. Using \AND forces a line break at that point. So,
 32 | % if LaTeX puts 3 of 4 authors names on the first line, and the last
 33 | % on the second line, try using \AND instead of \And before the third
 34 | % author name.
 35 | 
 36 | \author{
 37 |   David S.~Hippocampus\thanks{Use footnote for providing further
 38 |     information about author (webpage, alternative
 39 |     address)---\emph{not} for acknowledging funding agencies.} \\
 40 |   Department of Computer Science\\
 41 |   Cranberry-Lemon University\\
 42 |   Pittsburgh, PA 15213 \\
 43 |   \texttt{hippo@cs.cranberry-lemon.edu} \\
 44 |   %% examples of more authors
 45 |   %% \And
 46 |   %% Coauthor \\
 47 |   %% Affiliation \\
 48 |   %% Address \\
 49 |   %% \texttt{email} \\
 50 |   %% \AND
 51 |   %% Coauthor \\
 52 |   %% Affiliation \\
 53 |   %% Address \\
 54 |   %% \texttt{email} \\
 55 |   %% \And
 56 |   %% Coauthor \\
 57 |   %% Affiliation \\
 58 |   %% Address \\
 59 |   %% \texttt{email} \\
 60 |   %% \And
 61 |   %% Coauthor \\
 62 |   %% Affiliation \\
 63 |   %% Address \\
 64 |   %% \texttt{email} \\
 65 | }
 66 | 
 67 | \begin{document}
 68 | % \nipsfinalcopy is no longer used
 69 | 
 70 | \maketitle
 71 | 
 72 | \begin{abstract}
 73 |   The abstract paragraph should be indented \nicefrac{1}{2}~inch
 74 |   (3~picas) on both the left- and right-hand margins. Use 10~point
 75 |   type, with a vertical spacing (leading) of 11~points.  The word
 76 |   \textbf{Abstract} must be centered, bold, and in point size 12. Two
 77 |   line spaces precede the abstract. The abstract must be limited to
 78 |   one paragraph.
 79 | \end{abstract}
 80 | 
 81 | \section{Submission of papers to NIPS 2016}
 82 | 
 83 | \textbf{There is a new style file for papers submitted in 2016!}
 84 | 
 85 | NIPS requires electronic submissions.  The electronic submission site
 86 | is
 87 | \begin{center}
 88 |   \url{https://cmt.research.microsoft.com/NIPS2016/}
 89 | \end{center}
 90 | 
 91 | Please read carefully the instructions below and follow them
 92 | faithfully.
 93 | 
 94 | \subsection{Style}
 95 | 
 96 | Papers to be submitted to NIPS 2016 must be prepared according to the
 97 | instructions presented here. Papers may only be up to eight pages
 98 | long, including figures. Since 2009 an additional ninth page
 99 | \emph{containing only acknowledgments and/or cited references} is
100 | allowed. Papers that exceed nine pages will not be reviewed, or in any
101 | other way considered for presentation at the conference.
102 | 
103 | The margins in 2016 are the same as since 2007, which allow for
104 | $\sim$$15\%$ more words in the paper compared to earlier years.
105 | 
106 | Authors are required to use the NIPS \LaTeX{} style files obtainable
107 | at the NIPS website as indicated below. Please make sure you use the
108 | current files and not previous versions. Tweaking the style files may
109 | be grounds for rejection.
110 | 
111 | \subsection{Retrieval of style files}
112 | 
113 | The style files for NIPS and other conference information are
114 | available on the World Wide Web at
115 | \begin{center}
116 |   \url{http://www.nips.cc/}
117 | \end{center}
118 | The file \verb+nips_2016.pdf+ contains these instructions and
119 | illustrates the various formatting requirements your NIPS paper must
120 | satisfy.
121 | 
122 | The only supported style file for NIPS 2016 is \verb+nips_2016.sty+,
123 | rewritten for \LaTeXe{}.  \textbf{Previous style files for \LaTeX{}
124 |   2.09, Microsoft Word, and RTF are no longer supported!}
125 | 
126 | The new \LaTeX{} style file contains two optional arguments:
127 | \verb+final+, which creates a camera-ready copy, and \verb+nonatbib+,
128 | which will not load the \verb+natbib+ package for you in case of
129 | package clash.
130 | 
131 | At submission time, please omit the \verb+final+ option. This will
132 | anonymize your submission and add line numbers to aid review.  Please
133 | do \emph{not} refer to these line numbers in your paper as they will
134 | be removed during generation of camera-ready copies.
135 | 
136 | The file \verb+nips_2016.tex+ may be used as a ``shell'' for writing
137 | your paper. All you have to do is replace the author, title, abstract,
138 | and text of the paper with your own.
139 | 
140 | The formatting instructions contained in these style files are
141 | summarized in Sections \ref{gen_inst}, \ref{headings}, and
142 | \ref{others} below.
143 | 
144 | \section{General formatting instructions}
145 | \label{gen_inst}
146 | 
147 | The text must be confined within a rectangle 5.5~inches (33~picas)
148 | wide and 9~inches (54~picas) long. The left margin is 1.5~inch
149 | (9~picas).  Use 10~point type with a vertical spacing (leading) of
150 | 11~points.  Times New Roman is the preferred typeface throughout, and
151 | will be selected for you by default.  Paragraphs are separated by
152 | \nicefrac{1}{2}~line space (5.5 points), with no indentation.
153 | 
154 | The paper title should be 17~point, initial caps/lower case, bold,
155 | centered between two horizontal rules. The top rule should be 4~points
156 | thick and the bottom rule should be 1~point thick. Allow
157 | \nicefrac{1}{4}~inch space above and below the title to rules. All
158 | pages should start at 1~inch (6~picas) from the top of the page.
159 | 
160 | For the final version, authors' names are set in boldface, and each
161 | name is centered above the corresponding address. The lead author's
162 | name is to be listed first (left-most), and the co-authors' names (if
163 | different address) are set to follow. If there is only one co-author,
164 | list both author and co-author side by side.
165 | 
166 | Please pay special attention to the instructions in Section \ref{others}
167 | regarding figures, tables, acknowledgments, and references.
168 | 
169 | \section{Headings: first level}
170 | \label{headings}
171 | 
172 | All headings should be lower case (except for first word and proper
173 | nouns), flush left, and bold.
174 | 
175 | First-level headings should be in 12-point type.
176 | 
177 | \subsection{Headings: second level}
178 | 
179 | Second-level headings should be in 10-point type.
180 | 
181 | \subsubsection{Headings: third level}
182 | 
183 | Third-level headings should be in 10-point type.
184 | 
185 | \paragraph{Paragraphs}
186 | 
187 | There is also a \verb+\paragraph+ command available, which sets the
188 | heading in bold, flush left, and inline with the text, with the
189 | heading followed by 1\,em of space.
190 | 
191 | \section{Citations, figures, tables, references}
192 | \label{others}
193 | 
194 | These instructions apply to everyone.
195 | 
196 | \subsection{Citations within the text}
197 | 
198 | The \verb+natbib+ package will be loaded for you by default.
199 | Citations may be author/year or numeric, as long as you maintain
200 | internal consistency.  As to the format of the references themselves,
201 | any style is acceptable as long as it is used consistently.
202 | 
203 | The documentation for \verb+natbib+ may be found at
204 | \begin{center}
205 |   \url{http://mirrors.ctan.org/macros/latex/contrib/natbib/natnotes.pdf}
206 | \end{center}
207 | Of note is the command \verb+\citet+, which produces citations
208 | appropriate for use in inline text.  For example,
209 | \begin{verbatim}
210 |    \citet{hasselmo} investigated\dots
211 | \end{verbatim}
212 | produces
213 | \begin{quote}
214 |   Hasselmo, et al.\ (1995) investigated\dots
215 | \end{quote}
216 | 
217 | If you wish to load the \verb+natbib+ package with options, you may
218 | add the following before loading the \verb+nips_2016+ package:
219 | \begin{verbatim}
220 |    \PassOptionsToPackage{options}{natbib}
221 | \end{verbatim}
222 | 
223 | If \verb+natbib+ clashes with another package you load, you can add
224 | the optional argument \verb+nonatbib+ when loading the style file:
225 | \begin{verbatim}
226 |    \usepackage[nonatbib]{nips_2016}
227 | \end{verbatim}
228 | 
229 | As submission is double blind, refer to your own published work in the
230 | third person. That is, use ``In the previous work of Jones et
231 | al.\ [4],'' not ``In our previous work [4].'' If you cite your other
232 | papers that are not widely available (e.g., a journal paper under
233 | review), use anonymous author names in the citation, e.g., an author
234 | of the form ``A.\ Anonymous.''
235 | 
236 | \subsection{Footnotes}
237 | 
238 | Footnotes should be used sparingly.  If you do require a footnote,
239 | indicate footnotes with a number\footnote{Sample of the first
240 |   footnote.} in the text. Place the footnotes at the bottom of the
241 | page on which they appear.  Precede the footnote with a horizontal
242 | rule of 2~inches (12~picas).
243 | 
244 | Note that footnotes are properly typeset \emph{after} punctuation
245 | marks.\footnote{As in this example.}
246 | 
247 | \subsection{Figures}
248 | 
249 | All artwork must be neat, clean, and legible. Lines should be dark
250 | enough for purposes of reproduction. The figure number and caption
251 | always appear after the figure. Place one line space before the figure
252 | caption and one line space after the figure. The figure caption should
253 | be lower case (except for first word and proper nouns); figures are
254 | numbered consecutively.
255 | 
256 | You may use color figures.  However, it is best for the figure
257 | captions and the paper body to be legible if the paper is printed in
258 | either black/white or in color.
259 | \begin{figure}[h]
260 |   \centering
261 |   \fbox{\rule[-.5cm]{0cm}{4cm} \rule[-.5cm]{4cm}{0cm}}
262 |   \caption{Sample figure caption.}
263 | \end{figure}
264 | 
265 | \subsection{Tables}
266 | 
267 | All tables must be centered, neat, clean and legible.  The table
268 | number and title always appear before the table.  See
269 | Table~\ref{sample-table}.
270 | 
271 | Place one line space before the table title, one line space after the
272 | table title, and one line space after the table. The table title must
273 | be lower case (except for first word and proper nouns); tables are
274 | numbered consecutively.
275 | 
276 | Note that publication-quality tables \emph{do not contain vertical
277 |   rules.} We strongly suggest the use of the \verb+booktabs+ package,
278 | which allows for typesetting high-quality, professional tables:
279 | \begin{center}
280 |   \url{https://www.ctan.org/pkg/booktabs}
281 | \end{center}
282 | This package was used to typeset Table~\ref{sample-table}.
283 | 
284 | \begin{table}[t]
285 |   \caption{Sample table title}
286 |   \label{sample-table}
287 |   \centering
288 |   \begin{tabular}{lll}
289 |     \toprule
290 |     \multicolumn{2}{c}{Part}                   \\
291 |     \cmidrule{1-2}
292 |     Name     & Description     & Size ($\mu$m) \\
293 |     \midrule
294 |     Dendrite & Input terminal  & $\sim$100     \\
295 |     Axon     & Output terminal & $\sim$10      \\
296 |     Soma     & Cell body       & up to $10^6$  \\
297 |     \bottomrule
298 |   \end{tabular}
299 | \end{table}
300 | 
301 | \section{Final instructions}
302 | 
303 | Do not change any aspects of the formatting parameters in the style
304 | files.  In particular, do not modify the width or length of the
305 | rectangle the text should fit into, and do not change font sizes
306 | (except perhaps in the \textbf{References} section; see below). Please
307 | note that pages should be numbered.
308 | 
309 | \section{Preparing PDF files}
310 | 
311 | Please prepare submission files with paper size ``US Letter,'' and
312 | not, for example, ``A4.''
313 | 
314 | Fonts were the main cause of problems in the past years. Your PDF file
315 | must only contain Type 1 or Embedded TrueType fonts. Here are a few
316 | instructions to achieve this.
317 | 
318 | \begin{itemize}
319 | 
320 | \item You should directly generate PDF files using \verb+pdflatex+.
321 | 
322 | \item You can check which fonts a PDF files uses.  In Acrobat Reader,
323 |   select the menu Files$>$Document Properties$>$Fonts and select Show
324 |   All Fonts. You can also use the program \verb+pdffonts+ which comes
325 |   with \verb+xpdf+ and is available out-of-the-box on most Linux
326 |   machines.
327 | 
328 | \item The IEEE has recommendations for generating PDF files whose
329 |   fonts are also acceptable for NIPS. Please see
330 |   \url{http://www.emfield.org/icuwb2010/downloads/IEEE-PDF-SpecV32.pdf}
331 | 
332 | \item \verb+xfig+ "patterned" shapes are implemented with bitmap
333 |   fonts.  Use "solid" shapes instead.
334 | 
335 | \item The \verb+\bbold+ package almost always uses bitmap fonts.  You
336 |   should use the equivalent AMS Fonts:
337 | \begin{verbatim}
338 |    \usepackage{amsfonts}
339 | \end{verbatim}
340 | followed by, e.g., \verb+\mathbb{R}+, \verb+\mathbb{N}+, or
341 | \verb+\mathbb{C}+ for $\mathbb{R}$, $\mathbb{N}$ or $\mathbb{C}$.  You
342 | can also use the following workaround for reals, natural and complex:
343 | \begin{verbatim}
344 |    \newcommand{\RR}{I\!\!R} %real numbers
345 |    \newcommand{\Nat}{I\!\!N} %natural numbers
346 |    \newcommand{\CC}{I\!\!\!\!C} %complex numbers
347 | \end{verbatim}
348 | Note that \verb+amsfonts+ is automatically loaded by the
349 | \verb+amssymb+ package.
350 | 
351 | \end{itemize}
352 | 
353 | If your file contains type 3 fonts or non embedded TrueType fonts, we
354 | will ask you to fix it.
355 | 
356 | \subsection{Margins in \LaTeX{}}
357 | 
358 | Most of the margin problems come from figures positioned by hand using
359 | \verb+\special+ or other commands. We suggest using the command
360 | \verb+\includegraphics+ from the \verb+graphicx+ package. Always
361 | specify the figure width as a multiple of the line width as in the
362 | example below:
363 | \begin{verbatim}
364 |    \usepackage[pdftex]{graphicx} ...
365 |    \includegraphics[width=0.8\linewidth]{myfile.pdf}
366 | \end{verbatim}
367 | See Section 4.4 in the graphics bundle documentation
368 | (\url{http://mirrors.ctan.org/macros/latex/required/graphics/grfguide.pdf})
369 | 
370 | A number of width problems arise when \LaTeX{} cannot properly
371 | hyphenate a line. Please give LaTeX hyphenation hints using the
372 | \verb+\-+ command when necessary.
373 | 
374 | \subsubsection*{Acknowledgments}
375 | 
376 | Use unnumbered third level headings for the acknowledgments. All
377 | acknowledgments go at the end of the paper. Do not include
378 | acknowledgments in the anonymized submission, only in the final paper.
379 | 
380 | \section*{References}
381 | 
382 | References follow the acknowledgments. Use unnumbered first-level
383 | heading for the references. Any choice of citation style is acceptable
384 | as long as you are consistent. It is permissible to reduce the font
385 | size to \verb+small+ (9 point) when listing the references. {\bf
386 |   Remember that you can use a ninth page as long as it contains
387 |   \emph{only} cited references.}
388 | \medskip
389 | 
390 | \small
391 | 
392 | [1] Alexander, J.A.\ \& Mozer, M.C.\ (1995) Template-based algorithms
393 | for connectionist rule extraction. In G.\ Tesauro, D.S.\ Touretzky and
394 | T.K.\ Leen (eds.), {\it Advances in Neural Information Processing
395 |   Systems 7}, pp.\ 609--616. Cambridge, MA: MIT Press.
396 | 
397 | [2] Bower, J.M.\ \& Beeman, D.\ (1995) {\it The Book of GENESIS:
398 |   Exploring Realistic Neural Models with the GEneral NEural SImulation
399 |   System.}  New York: TELOS/Springer--Verlag.
400 | 
401 | [3] Hasselmo, M.E., Schnell, E.\ \& Barkai, E.\ (1995) Dynamics of
402 | learning and recall at excitatory recurrent synapses and cholinergic
403 | modulation in rat hippocampal region CA3. {\it Journal of
404 |   Neuroscience} {\bf 15}(7):5249-5262.
405 | 
406 | \end{document}
407 | 


--------------------------------------------------------------------------------
/paper/paper-blx.bib:
--------------------------------------------------------------------------------
 1 | @Comment{$ biblatex control file $}
 2 | @Comment{$ biblatex version 2.5 $}
 3 | Do not modify this file!
 4 | 
 5 | This is an auxiliary file used by the 'biblatex' package.
 6 | This file may safely be deleted. It will be recreated as
 7 | required.
 8 | 
 9 | @Control{biblatex-control,
10 |   options = {2.5:0:0:1:0:0:1:1:0:0:0:0:1:1:3:1:79:+},
11 | }
12 | 


--------------------------------------------------------------------------------
/paper/paper.aux:
--------------------------------------------------------------------------------
 1 | \relax 
 2 | \providecommand\hyper@newdestlabel[2]{}
 3 | \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
 4 | \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
 5 | \global\let\oldcontentsline\contentsline
 6 | \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
 7 | \global\let\oldnewlabel\newlabel
 8 | \gdef\newlabel#1#2{\newlabelxx{#1}#2}
 9 | \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
10 | \AtEndDocument{\ifx\hyper@anchor\@undefined
11 | \let\contentsline\oldcontentsline
12 | \let\newlabel\oldnewlabel
13 | \fi}
14 | \fi}
15 | \global\let\hyper@last\relax 
16 | \gdef\HyperFirstAtBeginDocument#1{#1}
17 | \providecommand\HyField@AuxAddToFields[1]{}
18 | \providecommand\HyField@AuxAddToCoFields[2]{}
19 | \bibstyle{biblatex}
20 | \bibdata{paper-blx,smallbiblio}
21 | \citation{biblatex-control}
22 | \citation{Stanley2002-ug}
23 | \citation{Yamins2014-us}
24 | \citation{Stanley2002-ug}
25 | \citation{Olshausen1996-vz}
26 | \@writefile{toc}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
27 | \@writefile{lof}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
28 | \@writefile{lot}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
29 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}{section.1}}
30 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {II}Method}{1}{section.2}}
31 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-A}}Description of the algorithm}{1}{subsection.2.1}}
32 | \citation{Stanley2002-ug}
33 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-B}}Implementation details}{2}{subsection.2.2}}
34 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {III}Experiments}{2}{section.3}}
35 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-A}}Tasks}{2}{subsection.3.1}}
36 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {IV}Results}{2}{section.4}}
37 | \newlabel{fig:easyandhard}{{IV}{2}{Results}{section.4}{}}
38 | \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Model performance on an easy task (left panel) and a hard task (right panel). Both performance (cross-entropy loss between predicted and actual character) and number of neurons are shown as a function of time. Dark curves and shaded areas indicate median and inter-quartile range over 20 runs, respectively. The model settles on larger network size for the more complex problem. }}{2}{figure.1}}
39 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}Performance and network size in hard and easy tasks}{2}{subsection.4.1}}
40 | \citation{Stanley2002-ug}
41 | \citation{He2015-gk}
42 | \newlabel{fig:fixedsize}{{\unhbox \voidb@x \hbox {IV-B}}{3}{Dynamical adjustment of network size in response to changing conditions}{subsection.4.2}{}}
43 | \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Comparison of performance for variable and fixed size, for the ``hard'' problem. The thick black line shows variable-size network performance and is identical to the blue curve in Fig. \ref  {fig:easyandhard}, right panel. Thin colored curves indicate performance of fixed-size networks of various sizes. Curves show medians over 20 runs; inter-quartile ranges (not shown for clarity) are comparable to those seen in Fig. \ref  {fig:easyandhard}. Variable-size networks outperform fixed-size networks for the problem described here. }}{3}{figure.2}}
44 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}Dynamical adjustment of network size in response to changing conditions}{3}{subsection.4.2}}
45 | \@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusions and future work}{3}{section.5}}
46 | \newlabel{fig:easyhardeasy}{{\unhbox \voidb@x \hbox {IV-B}}{3}{Dynamical adjustment of network size in response to changing conditions}{figure.2}{}}
47 | \@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Dynamic adjustment of network size in response to abrupt complexification and simplification of an ongoing task.}}{3}{figure.3}}
48 | 


--------------------------------------------------------------------------------
/paper/paper.bbl:
--------------------------------------------------------------------------------
  1 | % $ biblatex auxiliary file $
  2 | % $ biblatex version 2.5 $
  3 | % Do not modify the above lines!
  4 | %
  5 | % This is an auxiliary file used by the 'biblatex' package.
  6 | % This file may safely be deleted. It will be recreated as
  7 | % required.
  8 | %
  9 | \begingroup
 10 | \makeatletter
 11 | \@ifundefined{ver@biblatex.sty}
 12 |   {\@latex@error
 13 |      {Missing 'biblatex' package}
 14 |      {The bibliography requires the 'biblatex' package.}
 15 |       \aftergroup\endinput}
 16 |   {}
 17 | \endgroup
 18 | 
 19 | \entry{He2015-gk}{article}{}
 20 |   \name{author}{4}{}{%
 21 |     {{}%
 22 |      {He}{H.}%
 23 |      {Kaiming}{K.}%
 24 |      {}{}%
 25 |      {}{}}%
 26 |     {{}%
 27 |      {Zhang}{Z.}%
 28 |      {Xiangyu}{X.}%
 29 |      {}{}%
 30 |      {}{}}%
 31 |     {{}%
 32 |      {Ren}{R.}%
 33 |      {Shaoqing}{S.}%
 34 |      {}{}%
 35 |      {}{}}%
 36 |     {{}%
 37 |      {Sun}{S.}%
 38 |      {Jian}{J.}%
 39 |      {}{}%
 40 |      {}{}}%
 41 |   }
 42 |   \strng{namehash}{HK+1}
 43 |   \strng{fullhash}{HKZXRSSJ1}
 44 |   \field{sortinit}{H}
 45 |   \field{abstract}{%
 46 |   Deeper neural networks are more difficult to train. We present a residual
 47 |   learning framework to ease the training of networks that are substantially
 48 |   deeper than those used previously. We explicitly reformulate the layers as
 49 |   learning residual functions with reference to the layer inputs, instead of
 50 |   learning unreferenced functions. We provide comprehensive empirical evidence
 51 |   showing that these residual networks are easier to optimize, and can gain
 52 |   accuracy from considerably increased depth. On the ImageNet dataset we
 53 |   evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG
 54 |   nets but still having lower complexity. An ensemble of these residual nets
 55 |   achieves 3.57\% error on the ImageNet test set. This result won the 1st place
 56 |   on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10
 57 |   with 100 and 1000 layers. The depth of representations is of central
 58 |   importance for many visual recognition tasks. Solely due to our extremely
 59 |   deep representations, we obtain a 28\% relative improvement on the COCO
 60 |   object detection dataset. Deep residual nets are foundations of our
 61 |   submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st
 62 |   places on the tasks of ImageNet detection, ImageNet localization, COCO
 63 |   detection, and COCO segmentation.%
 64 |   }
 65 |   \verb{eprint}
 66 |   \verb 1512.03385
 67 |   \endverb
 68 |   \field{title}{Deep Residual Learning for Image Recognition}
 69 |   \field{eprinttype}{arXiv}
 70 |   \field{eprintclass}{cs.CV}
 71 |   \field{year}{2015}
 72 |   \warn{\item Invalid format of field 'month'}
 73 | \endentry
 74 | 
 75 | \entry{Olshausen1996-vz}{article}{}
 76 |   \name{author}{2}{}{%
 77 |     {{}%
 78 |      {Olshausen}{O.}%
 79 |      {Bruno~A}{B.~A.}%
 80 |      {}{}%
 81 |      {}{}}%
 82 |     {{}%
 83 |      {Field}{F.}%
 84 |      {David~J}{D.~J.}%
 85 |      {}{}%
 86 |      {}{}}%
 87 |   }
 88 |   \list{language}{1}{%
 89 |     {en}%
 90 |   }
 91 |   \strng{namehash}{OBAFDJ1}
 92 |   \strng{fullhash}{OBAFDJ1}
 93 |   \field{sortinit}{O}
 94 |   \field{number}{6583}
 95 |   \field{pages}{607\bibrangedash 609}
 96 |   \field{title}{Emergence of simple-cell receptive field properties by learning
 97 |   a sparse code for natural images}
 98 |   \field{volume}{381}
 99 |   \field{journaltitle}{Nature}
100 |   \field{year}{1996}
101 |   \warn{\item Invalid format of field 'month'}
102 | \endentry
103 | 
104 | \entry{Stanley2002-ug}{article}{}
105 |   \name{author}{2}{}{%
106 |     {{}%
107 |      {Stanley}{S.}%
108 |      {Kenneth~O}{K.~O.}%
109 |      {}{}%
110 |      {}{}}%
111 |     {{}%
112 |      {Miikkulainen}{M.}%
113 |      {Risto}{R.}%
114 |      {}{}%
115 |      {}{}}%
116 |   }
117 |   \list{language}{1}{%
118 |     {en}%
119 |   }
120 |   \strng{namehash}{SKOMR1}
121 |   \strng{fullhash}{SKOMR1}
122 |   \field{sortinit}{S}
123 |   \field{abstract}{%
124 |   An important question in neuroevolution is how to gain an advantage from
125 |   evolving neural network topologies along with weights. We present a method,
126 |   NeuroEvolution of Augmenting Topologies (NEAT), which outperforms the best
127 |   fixed-topology method on a challenging benchmark reinforcement learning task.
128 |   We claim that the increased efficiency is due to (1) employing a principled
129 |   method of crossover of different topologies, (2) protecting structural
130 |   innovation using speciation, and (3) incrementally growing from minimal
131 |   structure. We test this claim through a series of ablation studies that
132 |   demonstrate that each component is necessary to the system as a whole and to
133 |   each other. What results is significantly faster learning. NEAT is also an
134 |   important contribution to GAs because it shows how it is possible for
135 |   evolution to both optimize and complexify solutions simultaneously, offering
136 |   the possibility of evolving increasingly complex solutions over generations,
137 |   and strengthening the analogy with biological evolution.%
138 |   }
139 |   \field{number}{2}
140 |   \field{pages}{99\bibrangedash 127}
141 |   \field{title}{Evolving neural networks through augmenting topologies}
142 |   \field{volume}{10}
143 |   \field{journaltitle}{Evol. Comput.}
144 |   \field{year}{2002}
145 | \endentry
146 | 
147 | \entry{Yamins2014-us}{article}{}
148 |   \name{author}{6}{}{%
149 |     {{}%
150 |      {Yamins}{Y.}%
151 |      {Daniel L~K}{D.~L.~K.}%
152 |      {}{}%
153 |      {}{}}%
154 |     {{}%
155 |      {Hong}{H.}%
156 |      {Ha}{H.}%
157 |      {}{}%
158 |      {}{}}%
159 |     {{}%
160 |      {Cadieu}{C.}%
161 |      {Charles~F}{C.~F.}%
162 |      {}{}%
163 |      {}{}}%
164 |     {{}%
165 |      {Solomon}{S.}%
166 |      {Ethan~A}{E.~A.}%
167 |      {}{}%
168 |      {}{}}%
169 |     {{}%
170 |      {Seibert}{S.}%
171 |      {Darren}{D.}%
172 |      {}{}%
173 |      {}{}}%
174 |     {{}%
175 |      {DiCarlo}{D.}%
176 |      {James~J}{J.~J.}%
177 |      {}{}%
178 |      {}{}}%
179 |   }
180 |   \list{language}{1}{%
181 |     {en}%
182 |   }
183 |   \strng{namehash}{YDLK+1}
184 |   \strng{fullhash}{YDLKHHCCFSEASDDJJ1}
185 |   \field{sortinit}{Y}
186 |   \field{abstract}{%
187 |   The ventral visual stream underlies key human visual object recognition
188 |   abilities. However, neural encoding in the higher areas of the ventral stream
189 |   remains poorly understood. Here, we describe a modeling approach that yields
190 |   a quantitatively accurate model of inferior temporal (IT) cortex, the highest
191 |   ventral cortical area. Using high-throughput computational techniques, we
192 |   discovered that, within a class of biologically plausible hierarchical neural
193 |   network models, there is a strong correlation between a model’s
194 |   categorization performance and its ability to predict individual IT neural
195 |   unit response data. To pursue this idea, we then identified a high-performing
196 |   neural network that matches human performance on a range of recognition
197 |   tasks. Critically, even though we did not constrain this model to match
198 |   neural data, its top output layer turns out to be highly predictive of IT
199 |   spiking responses to complex naturalistic images at both the single site and
200 |   population levels. Moreover, the model’s intermediate layers are highly
201 |   predictive of neural responses in the V4 cortex, a midlevel visual area that
202 |   provides the dominant cortical input to IT. These results show that
203 |   performance optimization---applied in a biologically appropriate model
204 |   class---can be used to build quantitative predictive models of neural
205 |   processing.%
206 |   }
207 |   \field{number}{23}
208 |   \field{pages}{8619\bibrangedash 8624}
209 |   \field{title}{Performance-optimized hierarchical models predict neural
210 |   responses in higher visual cortex}
211 |   \field{volume}{111}
212 |   \field{journaltitle}{Proc. Natl. Acad. Sci. U. S. A.}
213 |   \field{year}{2014}
214 |   \warn{\item Invalid format of field 'month'}
215 | \endentry
216 | 
217 | \lossort
218 | \endlossort
219 | 
220 | \endinput
221 | 


--------------------------------------------------------------------------------
/paper/paper.blg:
--------------------------------------------------------------------------------
 1 | This is BibTeX, Version 0.99d (TeX Live 2015)
 2 | Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
 3 | The top-level auxiliary file: paper.aux
 4 | The style file: biblatex.bst
 5 | Reallocated singl_function (elt_size=4) to 100 items from 50.
 6 | Reallocated singl_function (elt_size=4) to 100 items from 50.
 7 | Reallocated singl_function (elt_size=4) to 100 items from 50.
 8 | Reallocated singl_function (elt_size=4) to 100 items from 50.
 9 | Reallocated singl_function (elt_size=4) to 100 items from 50.
10 | Reallocated wiz_functions (elt_size=4) to 6000 items from 3000.
11 | Reallocated singl_function (elt_size=4) to 100 items from 50.
12 | Reallocated singl_function (elt_size=4) to 100 items from 50.
13 | Reallocated singl_function (elt_size=4) to 100 items from 50.
14 | Reallocated singl_function (elt_size=4) to 100 items from 50.
15 | Reallocated singl_function (elt_size=4) to 100 items from 50.
16 | Database file #1: paper-blx.bib
17 | Database file #2: smallbiblio.bib
18 | Biblatex version: 3.0
19 | Reallocated singl_function (elt_size=4) to 100 items from 50.
20 | Reallocated wiz_functions (elt_size=4) to 9000 items from 6000.
21 | You've used 5 entries,
22 |             6047 wiz_defined-function locations,
23 |             1169 strings with 12524 characters,
24 | and the built_in function-call counts, 14691 in all, are:
25 | = -- 504
26 | > -- 526
27 | < -- 118
28 | + -- 175
29 | - -- 200
30 | * -- 1253
31 | := -- 1086
32 | add.period$ -- 0
33 | call.type$ -- 5
34 | change.case$ -- 48
35 | chr.to.int$ -- 43
36 | cite$ -- 8
37 | duplicate$ -- 1588
38 | empty$ -- 1515
39 | format.name$ -- 333
40 | if$ -- 3230
41 | int.to.chr$ -- 0
42 | int.to.str$ -- 11
43 | missing$ -- 0
44 | newline$ -- 170
45 | num.names$ -- 179
46 | pop$ -- 1337
47 | preamble$ -- 1
48 | purify$ -- 64
49 | quote$ -- 0
50 | skip$ -- 805
51 | stack$ -- 0
52 | substring$ -- 310
53 | swap$ -- 580
54 | text.length$ -- 128
55 | text.prefix$ -- 4
56 | top$ -- 1
57 | type$ -- 178
58 | warning$ -- 0
59 | while$ -- 127
60 | width$ -- 0
61 | write$ -- 164
62 | 


--------------------------------------------------------------------------------
/paper/paper.out:
--------------------------------------------------------------------------------
 1 | \BOOKMARK [1][-]{section.1}{Introduction}{}% 1
 2 | \BOOKMARK [1][-]{section.2}{Method}{}% 2
 3 | \BOOKMARK [2][-]{subsection.2.1}{Description of the algorithm}{section.2}% 3
 4 | \BOOKMARK [2][-]{subsection.2.2}{Implementation details}{section.2}% 4
 5 | \BOOKMARK [1][-]{section.3}{Experiments}{}% 5
 6 | \BOOKMARK [2][-]{subsection.3.1}{Tasks}{section.3}% 6
 7 | \BOOKMARK [1][-]{section.4}{Results}{}% 7
 8 | \BOOKMARK [2][-]{subsection.4.1}{Performance and network size in hard and easy tasks}{section.4}% 8
 9 | \BOOKMARK [2][-]{subsection.4.2}{Dynamical adjustment of network size in response to changing conditions}{section.4}% 9
10 | \BOOKMARK [1][-]{section.5}{Conclusions and future work}{}% 10
11 | 


--------------------------------------------------------------------------------
/paper/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasMiconi/DiffRNN/c81eb14ab9d51191b4d7891e0472036077f49bcc/paper/paper.pdf


--------------------------------------------------------------------------------
/paper/paper.run.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" standalone="yes"?>
 2 | <!-- logreq request file -->
 3 | <!-- logreq version 1.0 / dtd version 1.0 -->
 4 | <!-- Do not edit this file! -->
 5 | <!DOCTYPE requests [
 6 |   <!ELEMENT requests (internal | external)*>
 7 |   <!ELEMENT internal (generic, (provides | requires)*)>
 8 |   <!ELEMENT external (generic, cmdline?, input?, output?, (provides | requires)*)>
 9 |   <!ELEMENT cmdline (binary, (option | infile | outfile)*)>
10 |   <!ELEMENT input (file)+>
11 |   <!ELEMENT output (file)+>
12 |   <!ELEMENT provides (file)+>
13 |   <!ELEMENT requires (file)+>
14 |   <!ELEMENT generic (#PCDATA)>
15 |   <!ELEMENT binary (#PCDATA)>
16 |   <!ELEMENT option (#PCDATA)>
17 |   <!ELEMENT infile (#PCDATA)>
18 |   <!ELEMENT outfile (#PCDATA)>
19 |   <!ELEMENT file (#PCDATA)>
20 |   <!ATTLIST requests
21 |     version CDATA #REQUIRED
22 |   >
23 |   <!ATTLIST internal
24 |     package CDATA #REQUIRED
25 |     priority (9) #REQUIRED
26 |     active (0 | 1) #REQUIRED
27 |   >
28 |   <!ATTLIST external
29 |     package CDATA #REQUIRED
30 |     priority (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) #REQUIRED
31 |     active (0 | 1) #REQUIRED
32 |   >
33 |   <!ATTLIST provides
34 |     type (static | dynamic | editable) #REQUIRED
35 |   >
36 |   <!ATTLIST requires
37 |     type (static | dynamic | editable) #REQUIRED
38 |   >
39 |   <!ATTLIST file
40 |     type CDATA #IMPLIED
41 |   >
42 | ]>
43 | <requests version="1.0">
44 |   <internal package="biblatex" priority="9" active="0">
45 |     <generic>latex</generic>
46 |     <provides type="dynamic">
47 |       <file>paper.aux</file>
48 |       <file>paper-blx.bib</file>
49 |     </provides>
50 |     <requires type="dynamic">
51 |       <file>paper.bbl</file>
52 |     </requires>
53 |     <requires type="static">
54 |       <file>blx-compat.def</file>
55 |       <file>biblatex.def</file>
56 |       <file>numeric.bbx</file>
57 |       <file>standard.bbx</file>
58 |       <file>numeric.cbx</file>
59 |       <file>biblatex.cfg</file>
60 |       <file>english.lbx</file>
61 |     </requires>
62 |   </internal>
63 |   <external package="biblatex" priority="5" active="0">
64 |     <generic>bibtex</generic>
65 |     <cmdline>
66 |       <binary>bibtex</binary>
67 |       <option>-min-crossrefs 2</option>
68 |       <infile>paper</infile>
69 |     </cmdline>
70 |     <input>
71 |       <file>paper.aux</file>
72 |     </input>
73 |     <output>
74 |       <file>paper.bbl</file>
75 |     </output>
76 |     <provides type="dynamic">
77 |       <file>paper.bbl</file>
78 |     </provides>
79 |     <requires type="dynamic">
80 |       <file>paper.aux</file>
81 |       <file>paper-blx.bib</file>
82 |     </requires>
83 |     <requires type="editable">
84 |       <file>smallbiblio.bib</file>
85 |     </requires>
86 |     <requires type="static">
87 |       <file>biblatex.bst</file>
88 |     </requires>
89 |   </external>
90 | </requests>
91 | 


--------------------------------------------------------------------------------
/paper/paper.tex:
--------------------------------------------------------------------------------
  1 | %\documentclass{article}
  2 | \documentclass[journal]{IEEEtran}
  3 | 
  4 | %\usepackage[final,nonatbib]{nips_2016}
  5 | 
  6 | 
  7 | \usepackage[utf8]{inputenc} % allow utf-8 input
  8 | \usepackage[T1]{fontenc}    % use 8-bit T1 fonts
  9 | \usepackage{hyperref}       % hyperlinks
 10 | \usepackage{url}            % simple URL typesetting
 11 | \usepackage{booktabs}       % professional-quality tables
 12 | \usepackage{amsfonts}       % blackboard math symbols
 13 | \usepackage{nicefrac}       % compact symbols for 1/2, etc.
 14 | \usepackage{microtype}      % microtypography
 15 | \usepackage{graphicx}
 16 | 
 17 | \usepackage[backend=bibtex]{biblatex}
 18 | \bibliography{smallbiblio}
 19 | \AtEveryBibitem{%
 20 |   \clearlist{language}%
 21 | }
 22 | 
 23 | 
 24 | \title{Neural networks with differentiable structure}
 25 | 
 26 | \author{
 27 | Thomas Miconi\\%\thanks{Use footnote for providing further
 28 | %    information about author (webpage, alternative
 29 | %    address)---\emph{not} for acknowledging funding agencies.} \\
 30 |   The Neurosciences Institute\\
 31 |  La Jolla, CA, USA \\
 32 |   \texttt{miconi@nsi.edu} \\
 33 | }
 34 | 
 35 | \begin{document}
 36 |  
 37 | \maketitle
 38 | 
 39 | \begin{abstract}
 40 | 
 41 | While gradient descent has proven highly successful in learning connection
 42 | weights for neural networks, the actual structure of these networks is usually determined by hand, or by
 43 | other optimization algorithms.  Here we describe a simple method to make
 44 | network structure differentiable, and therefore accessible to gradient descent.
 45 | We test this method on recurrent neural networks applied to simple
 46 | sequence prediction problems. Starting with initial networks containing only
 47 | one node, the method automatically grows networks that successfully solve the
 48 | tasks. The number of nodes in the final network correlates with task
 49 | difficulty. The method can dynamically increase network size in response to an
 50 | abrupt complexification in the task.
 51 | Variable-size networks grown with the method outperform fixed-size
 52 | networks of higher, lower or identical size, hinting at a possible advantage of growing networks. We conclude by discussing how this
 53 | method could be applied to more complex networks, such as feedforward layered
 54 | networks, or multiple-area networks of arbitrary shape.
 55 | 
 56 | \end{abstract}
 57 | 
 58 | \section{Introduction}
 59 | 
 60 | Neural networks are usually optimized by applying some form gradient descent to
 61 | the numerical parameters of a fixed connectivity graph. This method can
 62 | successfully train very large networks for complex tasks. However, the actual
 63 | structure of the network itself (number of neurons, connectivity graph, etc.) is usually not modified by the gradient
 64 | descent algorithm. Most often, network structure is designed by hand, in a delicate process of parameter tuning.
 65 | When network structure is optimized, it is generally with a different
 66 | algorithm, including evolutionary techniques such as NEAT \cite{Stanley2002-ug} or heuristic-based methods such as HyperOpt \cite{Yamins2014-us}.
 67 | 
 68 | Manual design of network structure is time-consuming and subject to arbitrary
 69 | choices that may or may not reflect the demands of the task at hand.
 70 | Furthermore, letting the size of the network grow autonomously may actually
 71 | improve learning performance, as posited in the NEAT framework
 72 | \cite{Stanley2002-ug}.  It would therefore be desirable to extend the process
 73 | of gradient descent to network structure itself. This requires making network
 74 | structure differentiable, at least to a usable approximation. Here we describe
 75 | a simple method for performing gradient descent over network structure, and
 76 | show that this method can adaptively design recurrent networks of a few dozen
 77 | units for simple sequence prediction tasks.
 78 | 
 79 | 
 80 | \section{Method}
 81 | 
 82 | \subsection{Description of the algorithm}
 83 | 
 84 | Here we describe our method, in the context of recurrent networks with
 85 | all-to-all potential connectivity (in the conclusion, we suggest how the method
 86 | could be extended to more complex architectures, including layered feedforward
 87 | networks). In this situation, structure is determined by the number of nodes in
 88 | the network $N$, which automatically determines the connectivity graph as a
 89 | simple square matrix of size $N*N$. Our goal is to make the number of nodes
 90 | differentiable and amenable to gradient descent and backpropagation. 
 91 | 
 92 | The first step in our method is to impose a penalty on the L1-norm (sum of absolute values) of \textit{outgoing} weights from each neuron. This includes both lateral and feedforward weights.
 93 | As is well-known, minimizing the L1-norm 
 94 | tends to concentrate the remaining total weight among the fewest possible
 95 | elements, in comparison to Euclidean L2-norm minimization.  As a result,
 96 | backpropagation will tend to minimize the number of neurons with non-zero total output, and
 97 | thus of ``active'' neurons: each neuron must ``earn its keep'', by contributing
 98 | to overall network performance, to counter-balance the effect of L1-norm
 99 | minimization, or else face effective ``soft'' deletion by having its outgoing weights 
100 | fall to zero.\footnote{Importantly, note that L1 regularization on outgoing weights is quite different from directly imposing an
101 |     L1 regularization on neuron activities themselves. L1 regularization of
102 |     neuron activities ensures that few neurons will be active \textit{at any
103 |     given time}, but does not ensure that any neuron will become fully silent
104 |     over extended time.  Instead, L1 regularization of neuron activities may
105 |     encourage neurons to distribute and decorrelate their activations other
106 |     time so that each neuron responds to a small proportion of inputs; this is
107 |     precisely the (intended) effect of L1-regularization in \textit{sparse
108 | coding} schemes \cite{Olshausen1996-vz}. By contrast, penalizing outgoing weights can truly
109 | turn neurons ``on'' or ``off'' in a time-independent fashion: a neuron with 
110 | zero output weights is guaranteed to be silent for any input. }
111 | 
112 | This method creates a ``soft''
113 | structural variability, whereby gradient descent tries to solve the task at
114 | hand under the constraint of minimizing the number of neurons with non-zero
115 | outgoing weights. We want to turn these ``soft'' structure changes into hard
116 | structural changes in the actual number of neurons and size of the weight
117 | matrix. To this end, we first specify a \textit{deletion threshold} $T_D$, such
118 | that any neuron for which the L1-norm of outgoing weights falls below this threshold is marked for
119 | potential deletion. Then, we simply specify that at any given time, the network
120 | must only contain a fixed, small number $k$ of neurons below the deletion
121 | threshold. If the number of sub-threshold neurons exceeds $k$, then ``excess''
122 | sub-threshold neurons are actually deleted from the network.  Conversely, if
123 | backpropagation finds it necessary to inflate neuron output weights to the extent that
124 | fewer than $k$ neurons have sub-threshold output weight norm, then we add a new neuron
125 | to the simulation, with initially random connectivity and outgoing weights
126 | initially chosen to have L1-norm exactly equal to the deletion threshold. Note that, because the threshold
127 | value is low, new neurons initially have a very small effect on overall network
128 | behavior.
129 | 
130 | This mechanism allows backpropagation to adjust network size to problem
131 | demands. If more neurons are needed to solve the problem at hand,
132 | backpropagation will simply expand the outgoing weights of currently sub-threshold
133 | neurons, so as to allow them to have an impact on output computation, while adjusting their connectivity. By
134 | contrast, if new neurons fail to contribute to network performance,
135 | L1-minimization  will reduce their outgoing weights and eventually drive them below
136 | deletion threshold. The sub-threshold neurons thus act as a computational
137 | reserve, ready to be mobilized if the problem at hand demands it.
138 | 
139 | Finally, as a stabilization measure, we make
140 | addition and deletion probabilistic, so that whenever a neuron is to be added
141 | or deleted, the event only occur with a certain fixed probability $P_{add}$ or
142 | $P_{del}$. As a result, the network will occasionally possess more or less than
143 | $k$ subthreshold neurons. All networks in our experiment start with only one
144 | node, following the philosophy of ``augmenting topologies'' expounded in NEAT
145 | \cite{Stanley2002-ug}.
146 | 
147 | \subsection{Implementation details}
148 | 
149 | Our implementation is based on Andrej Karpathy's \verb+min-char-rnn.py+ and
150 | inherits most of its parameters.  The networks are trained for 100000 cycles,
151 | where each cycle consists of reading a sequence of 40 characters while trying
152 | to predict the next character, followed by a parameter update based on
153 | backpropagation through time. Network output is provided by a single output
154 | layer with 4 nodes (one per possible character), each of which reports the
155 | predicted probability that the corresponding character is next in the sequence. The output layer is fully connected with the variable-size recurrent layer.
156 | Loss is defined as cross-entropy between the predicted distribution and the
157 | actual (one-hot) outcome.  Any addition or deletion also occurs at the same time as parameter
158 | update (that is, at the end of each successive 40-char sequence). 
159 | 
160 | %% NOTE: This does not apply in the current version of the code
161 | %All multipliers are bounded from below by a low, but not trivial
162 | %value $M_{min}$. If a parameter update drives the value of a multiplier below $M_{min}$, it is automatically set to $M_{min}$. The intended effect is that every neuron (even those with multipliers
163 | %below deletion threshold) should still have a small, but not negligible effect
164 | %on network output, so that a reasonable gradient of error over any neuron's
165 | %parameters can always be computed. This allows all neurons to always be ``ready to help'' if needed. An additional side-effect is to make all multipliers strictly positive, although this is not a critical component of our method.
166 | 
167 | 
168 | There are thus 5 additional parameters in our method: $k$, $T_D$,  $P_{add}$,
169 | $P_{del}$, and $A_{L1reg}$ (the strength of the L1-norm penalty 
170 | over the weights). In all simulations shown here, those were set to $k=1$,
171 | $T_D=0.05$,  $P_{add}=0.01$, $P_{del}=0.05$, and
172 | $A_{L1reg}=10^{-4}$. 
173 | 
174 | 
175 | All code is available on GitHub at \url{https://github.com/ThomasMiconi/DiffRNN}.
176 | 
177 | 
178 | \section{Experiments}
179 | 
180 | \subsection{Tasks}
181 | 
182 | To test the plausibility of our method, we choose two simple sequence prediction
183 | problems. In each problem, the task of the network is to predict the next
184 | character in an ongoing sequence of characters. Both problems use the same
185 | alphabet, consisting of characters $a$, $b$, $($ and $)$. 
186 | 
187 | The first problem (``easy problem'') is composed of groups of one or more $ab$
188 | digraphs, enclosed in matching parentheses. After every $ab$ digraph, there is
189 | a constant probability of adding an additional $ab$ digraph (p=0.75), or to close the
190 | group with a closing parenthesis instead (p=0.25). Thus the number of digraphs in each
191 | group follows an exponential distribution. A typical sequence looks like this:
192 | 
193 | \begin{center}
194 | $(abab)(ab)(ab)(ababab)(abababababab)(abab)(abababab)\ldots$
195 | \end{center}
196 | 
197 | Note that the problem is highly constrained: the only choice occurs after a
198 | $b$, when the network must decide whether to insert a $)$ or an $a$, which has
199 | a well-defined probability. Every other choice is unambiguously specified by the problem. 
200 | 
201 | 
202 | The second problem (''hard problem'') is composed of groups of six letters
203 | enclosed in matching parentheses. The rule is that each new group must be the
204 | reverse of the previous group, with one randomly chosen letter changed. A
205 | typical sequence looks like this:
206 | 
207 | \begin{center}
208 | $(aabbab)(babaaa)(aaabbb)(bbaaaa)(abaabb)(baaaba) \ldots$
209 | \end{center}
210 | 
211 | To reach optimal performance on this task, the network must maintain a memory
212 | of the previous sequence of six characters, and then reverse it, in addition to opening and closing parentheses. This is a
213 |  more difficult problem than the previous one, and thus we expect that
214 | optimal networks for either task would look quite different from each other.
215 | 
216 | \section{Results}
217 | 
218 | \begin{figure}[ht]
219 | \label{fig:easyandhard}
220 |   \centering
221 | \includegraphics[scale=0.9]{figE.png}
222 | \includegraphics[scale=0.9]{figH.png}
223 |   \caption{Model performance on an easy task (left panel) and a hard task
224 | (right panel). Both performance (cross-entropy loss between predicted and
225 | actual character) and number of neurons are shown as a function of time. Dark curves and shaded areas indicate median and inter-quartile range over 20 runs, respectively. The
226 | model settles on larger network size for the more complex problem. }
227 | \end{figure}
228 | 
229 | 
230 | \subsection{Performance and network size in hard and easy tasks}
231 | 
232 | 
233 | Results are shown in Figure \ref{fig:easyandhard}. We show both median
234 | performance (cross-entropy loss) and median number of neurons as a function of
235 | time, over 20 runs. As expected, the hard problem leads to somewhat higher loss
236 | than the easy problem. Importantly, the hard problem elicits larger networks
237 | than the easy problem (37 neurons vs. 14 neurons after 100000 learning cycles).
238 | Thus, the algorithm appropriately allocated more neurons to solve a more difficult
239 | task.
240 | 
241 | An important question is whether the use of variable-size networks has an
242 | impact on performance. We compared the performance of our algorithm against
243 | fixed-size networks with various numbers of neurons, ranging from 10 to 100,
244 | including one with the same network size as was eventually preferred by our
245 | algorithm (i.e. 37 neurons).  Results are shown in figure \ref{fig:fixedsize},
246 | again showing the median loss among 20 runs as a function of time.
247 | Intriguingly, the variable-size network actually outperforms fixed-size
248 | networks of any size. This result may reflect the advantages of  ``augmenting
249 | topologies'' (starting with a minimal network and only adding complexity as
250 | needed), as expounded in NEAT \cite{Stanley2002-ug}, at least for the simple
251 | problems tackled here.
252 | 
253 | 
254 | 
255 | \subsection{Dynamical adjustment of network size in response to changing conditions}
256 | 
257 | What happens if task difficulty suddenly changes? We tested our network by
258 |  switching from the ``easy'' to the ``hard'' sequence after 33000
259 | cycles, and then back again to the ``easy'' sequence after 66000 cycles.
260 | Results are shown in Figure \ref{fig:easyhardeasy}. Interestingly, the network
261 | successfully handles the abrupt complexification of the problem by allocating
262 | more neurons. Following a large increase, the network then sheds off excess
263 | neurons, without damaging performance. This process continues when the problem
264 | switches back to the ``easy'' sequence (note that performance quickly returns
265 | to optimal levels). Thus, the network successfully adapts its size to the complexity of the problem at hand.
266 | 
267 | 
268 | \begin{figure}[t]
269 | \label{fig:fixedsize}
270 |   \centering
271 | \includegraphics[scale=0.9]{figFS.png}
272 |   \caption{Comparison of performance for variable and fixed size, for the
273 | ``hard'' problem. The thick black line shows variable-size network performance
274 | and is identical to the blue curve in Fig. \ref{fig:easyandhard}, right panel.
275 | Thin colored curves indicate performance of fixed-size networks of various
276 | sizes. Curves show medians over 20
277 | runs; inter-quartile ranges (not shown for clarity) are comparable to those
278 | seen in Fig. \ref{fig:easyandhard}. Variable-size networks outperform fixed-size
279 | networks for the problem described here. }
280 | \end{figure}
281 | 
282 | \begin{figure}[b]
283 | \label{fig:easyhardeasy}
284 |   \centering
285 | \includegraphics[scale=0.9]{figEHE.png}
286 |   \caption{Dynamic adjustment of network size in response to abrupt complexification and simplification of an ongoing task.}
287 | \end{figure}
288 | 
289 | 
290 | 
291 | \section{Conclusions and future work}
292 | 
293 | We have described a method through which the size of a recurrent network can be
294 | modified by gradient descent. The method described here can successfully build
295 | networks of appropriate size to handle simple problems. This simple method
296 | immediately suggests several alternatives and possible extensions.
297 | 
298 | For example, deletion of neurons could be biased by neuron ``age'' (i.e. how
299 | long the neuron has been present), rather than being random. Deleted neurons
300 | could be partially preserved, so that newly added neurons could actually
301 | inherit connectivity of previously deleted ones, rather than being randomly
302 | initialized. Such adaptations were not necessary for the problems considered
303 | here, but might be considered in future applications to more challenging tasks.
304 | 
305 | The method described here extends naturally to layered feedforward networks.
306 | Within each layer, the method can be applied essentially unchanged to adjust
307 | layer size. The number of layers can also be made differentiable, by adding and deleting
308 | \textit{residual} layers \cite{He2015-gk} with initially low 
309 | pre-additive output weights. These residual layers, which would initially have minimal impact
310 | on the network's output, would play the same role as sub-threshold neurons in
311 | the method described above.
312 |  Similarly, by considering each layer as a higher-order
313 | ``node'', subject to a global outgoing norm penalty, the method described above could in
314 | principle be extended to arbitrary networks composed of multiple areas, with
315 | arbitrary connectivity between areas. Further work is needed to assess the
316 | practicality of these and other possible extensions.
317 | 
318 | 
319 | \small
320 | 
321 | \printbibliography
322 | 
323 | \end{document}
324 | 


--------------------------------------------------------------------------------
/paper/paper.tex.nips:
--------------------------------------------------------------------------------
  1 | \documentclass{article}
  2 | 
  3 | \usepackage[final,nonatbib]{nips_2016}
  4 | 
  5 | 
  6 | \usepackage[utf8]{inputenc} % allow utf-8 input
  7 | \usepackage[T1]{fontenc}    % use 8-bit T1 fonts
  8 | \usepackage{hyperref}       % hyperlinks
  9 | \usepackage{url}            % simple URL typesetting
 10 | \usepackage{booktabs}       % professional-quality tables
 11 | \usepackage{amsfonts}       % blackboard math symbols
 12 | \usepackage{nicefrac}       % compact symbols for 1/2, etc.
 13 | \usepackage{microtype}      % microtypography
 14 | \usepackage{graphicx}
 15 | 
 16 | \usepackage[backend=bibtex]{biblatex}
 17 | \bibliography{smallbiblio}
 18 | \AtEveryBibitem{%
 19 |   \clearlist{language}%
 20 | }
 21 | 
 22 | 
 23 | \title{Neural networks with differentiable structure}
 24 | 
 25 | \author{
 26 | Thomas Miconi\\%\thanks{Use footnote for providing further
 27 | %    information about author (webpage, alternative
 28 | %    address)---\emph{not} for acknowledging funding agencies.} \\
 29 |   The Neurosciences Institute\\
 30 |  La Jolla, CA, USA \\
 31 |   \texttt{miconi@nsi.edu} \\
 32 | }
 33 | 
 34 | \begin{document}
 35 |  
 36 | \maketitle
 37 | 
 38 | \begin{abstract}
 39 | 
 40 | While gradient descent has proven highly successful in learning connection
 41 | weights for neural networks, the actual structure of these networks is usually determined by hand, or by
 42 | other optimization algorithms.  Here we describe a simple method to make
 43 | network structure differentiable, and therefore accessible to gradient descent.
 44 | We test this method on recurrent neural networks applied to simple
 45 | sequence prediction problems. Starting with initial networks containing only
 46 | one node, the method automatically grows networks that successfully solve the
 47 | tasks. The number of nodes in the final network correlates with task
 48 | difficulty. The method can dynamically increase network size in response to an
 49 | abrupt complexification in the task.
 50 | Variable-size networks grown with the method outperform fixed-size
 51 | networks of higher, lower or identical size, hinting at a possible advantage of growing networks. We conclude by discussing how this
 52 | method could be applied to more complex networks, such as feedforward layered
 53 | networks, or multiple-area networks of arbitrary shape.
 54 | 
 55 | \end{abstract}
 56 | 
 57 | \section{Introduction}
 58 | 
 59 | Neural networks are usually optimized by applying some form gradient descent to
 60 | the numerical parameters of a fixed connectivity graph. This method can
 61 | successfully train very large networks for complex tasks. However, the actual
 62 | structure of the network itself (number of neurons, connectivity graph, etc.) is usually not modified by the gradient
 63 | descent algorithm. Most often, network structure is designed by hand, in a delicate process of parameter tuning.
 64 | When network structure is optimized, it is generally with a different
 65 | algorithm, including evolutionary techniques such as NEAT \cite{Stanley2002-ug} or heuristic-based methods such as HyperOpt \cite{Yamins2014-us}.
 66 | 
 67 | Manual design of network structure is time-consuming and subject to arbitrary
 68 | choices that may or may not reflect the demands of the task at hand.
 69 | Furthermore, letting the size of the network grow autonomously may actually
 70 | improve learning performance, as posited in the NEAT framework
 71 | \cite{Stanley2002-ug}.  It would therefore be desirable to extend the process
 72 | of gradient descent to network structure itself. This requires making network
 73 | structure differentiable, at least to a usable approximation. Here we describe
 74 | a simple method for performing gradient descent over network structure, and
 75 | show that this method can adaptively design recurrent networks of a few dozen
 76 | units for simple sequence prediction tasks.
 77 | 
 78 | 
 79 | \section{Method}
 80 | 
 81 | \subsection{Description of the algorithm}
 82 | 
 83 | Here we describe our method, in the context of recurrent networks with
 84 | all-to-all potential connectivity (in the conclusion, we suggest how the method
 85 | could be extended to more complex architectures, including layered feedforward
 86 | networks). In this situation, structure is determined by the number of nodes in
 87 | the network $N$, which automatically determines the connectivity graph as a
 88 | simple square matrix of size $N*N$. Our goal is to make the number of nodes
 89 | differentiable and amenable to gradient descent and backpropagation. 
 90 | 
 91 | The first step in our method is to impose a penalty on the L1-norm (sum of absolute values) of \textit{outgoing} weights from each neuron. This includes both lateral and feedforward weights.
 92 | As is well-known, minimizing the L1-norm 
 93 | tends to concentrate the remaining total weight among the fewest possible
 94 | elements, in comparison to Euclidean L2-norm minimization.  As a result,
 95 | backpropagation will tend to minimize the number of neurons with non-zero total output, and
 96 | thus of ``active'' neurons: each neuron must ``earn its keep'', by contributing
 97 | to overall network performance, to counter-balance the effect of L1-norm
 98 | minimization, or else face effective ``soft'' deletion by having its outgoing weights 
 99 | fall to zero.\footnote{Importantly, note that L1 regularization on outgoing weights is quite different from directly imposing an
100 |     L1 regularization on neuron activities themselves. L1 regularization of
101 |     neuron activities ensures that few neurons will be active \textit{at any
102 |     given time}, but does not ensure that any neuron will become fully silent
103 |     over extended time.  Instead, L1 regularization of neuron activities may
104 |     encourage neurons to distribute and decorrelate their activations other
105 |     time so that each neuron responds to a small proportion of inputs; this is
106 |     precisely the (intended) effect of L1-regularization in \textit{sparse
107 | coding} schemes \cite{Olshausen1996-vz}. By contrast, penalizing outgoing weights can truly
108 | turn neurons ``on'' or ``off'' in a time-independent fashion: a neuron with 
109 | zero output weights is guaranteed to be silent for any input. }
110 | 
111 | This method creates a ``soft''
112 | structural variability, whereby gradient descent tries to solve the task at
113 | hand under the constraint of minimizing the number of neurons with non-zero
114 | outgoing weights. We want to turn these ``soft'' structure changes into hard
115 | structural changes in the actual number of neurons and size of the weight
116 | matrix. To this end, we first specify a \textit{deletion threshold} $T_D$, such
117 | that any neuron for which the L1-norm of outgoing weights fall below this threshold is marked for
118 | potential deletion. Then, we simply specify that at any given time, the network
119 | must only contain a fixed, small number $k$ of neurons below the deletion
120 | threshold. If the number of sub-threshold neurons exceeds $k$, then ``excess''
121 | sub-threshold neurons are actually deleted from the network.  Conversely, if
122 | backpropagation finds it necessary to inflate neuron output weights to the extent that
123 | fewer than $k$ neurons have sub-threshold output weight norm, then we add a new neuron
124 | to the simulation, with initially random connectivity and outgoing weights
125 | initially chosen to have L1-norm exactly equal to the deletion threshold. Note that, because the threshold
126 | value is low, new neurons initially have a very small effect on overall network
127 | behavior.
128 | 
129 | This mechanism allows backpropagation to adjust network size to problem
130 | demands. If more neurons are needed to solve the problem at hand,
131 | backpropagation will simply expand the outgoing weights of currently sub-threshold
132 | neurons, so as to allow them to have an impact on output computation, while adjusting their connectivity. By
133 | contrast, if new neurons fail to contribute to network performance,
134 | L1-minimization  will reduce their outgoing weights and eventually drive them below
135 | deletion threshold. The sub-threshold neurons thus act as a computational
136 | reserve, ready to be mobilized if the problem at hand demands it.
137 | 
138 | Finally, as a stabilization measure, we make
139 | addition and deletion probabilistic, so that whenever a neuron is to be added
140 | or deleted, the event only occur with a certain fixed probability $P_{add}$ or
141 | $P_{del}$. As a result, the network will occasionally possess more or less than
142 | $k$ subthreshold neurons. All networks in our experiment start with only one
143 | node, following the philosophy of ``augmenting topologies'' expounded in NEAT
144 | \cite{Stanley2002-ug}.
145 | 
146 | \subsection{Implementation details}
147 | 
148 | Our implementation is based on Andrej Karpathy's \verb+min-char-rnn.py+ and
149 | inherits most of its parameters.  The networks are trained for 100000 cycles,
150 | where each cycle consists of reading a sequence of 40 characters while trying
151 | to predict the next character, followed by a parameter update based on
152 | backpropagation through time. Network output is provided by a single output
153 | layer with 4 nodes (one per possible character), each of which reports the
154 | predicted probability that the corresponding character is next in the sequence. The output layer is fully connected with the variable-size recurrent layer.
155 | Loss is defined as cross-entropy between the predicted distribution and the
156 | actual (one-hot) outcome.  Any addition or deletion also occurs at the same time as parameter
157 | update (that is, at the end of each successive 40-char sequence). 
158 | 
159 | %% NOTE: This does not apply in the current version of the code
160 | %All multipliers are bounded from below by a low, but not trivial
161 | %value $M_{min}$. If a parameter update drives the value of a multiplier below $M_{min}$, it is automatically set to $M_{min}$. The intended effect is that every neuron (even those with multipliers
162 | %below deletion threshold) should still have a small, but not negligible effect
163 | %on network output, so that a reasonable gradient of error over any neuron's
164 | %parameters can always be computed. This allows all neurons to always be ``ready to help'' if needed. An additional side-effect is to make all multipliers strictly positive, although this is not a critical component of our method.
165 | 
166 | 
167 | There are thus 5 additional parameters in our method: $k$, $T_D$,  $P_{add}$,
168 | $P_{del}$, and $A_{L1reg}$ (the strength of the L1 regularization
169 | over the multipliers). In all simulations shown here, those were set to $k=1$,
170 | $T_D=0.05$,  $P_{add}=0.01$, $P_{del}=0.05$, and
171 | $A_{L1reg}=10^{-4}$. 
172 | 
173 | 
174 | All code is available on GitHub at \url{https://github.com/ThomasMiconi/DiffRNN}.
175 | 
176 | 
177 | \section{Experiments}
178 | 
179 | \subsection{Tasks}
180 | 
181 | To test the plausibility of our method, we choose two simple sequence prediction
182 | problems. In each problem, the task of the network is to predict the next
183 | character in an ongoing sequence of characters. Both problems use the same
184 | alphabet, consisting of characters $a$, $b$, $($ and $)$. 
185 | 
186 | The first problem (``easy problem'') is composed of groups of one or more $ab$
187 | digraphs, enclosed in matching parentheses. After every $ab$ digraph, there is
188 | a constant probability of adding an additional $ab$ digraph (p=0.75), or to close the
189 | group with a closing parenthesis instead (p=0.25). Thus the number of digraphs in each
190 | group follows an exponential distribution. A typical sequence looks like this:
191 | 
192 | \begin{center}
193 | $(abab)(ab)(ab)(ababab)(abababababab)(abab)(abababab)\ldots$
194 | \end{center}
195 | 
196 | Note that the problem is highly constrained: the only choice occurs after a
197 | $b$, when the network must decide whether to insert a $)$ or an $a$, which has
198 | a well-defined probability. Every other choice is unambiguously specified by the problem. 
199 | 
200 | 
201 | The second problem (''hard problem'') is composed of groups of six letters
202 | enclosed in matching parentheses. The rule is that each new group must be the
203 | reverse of the previous group, with one randomly chosen letter changed. A
204 | typical sequence looks like this:
205 | 
206 | \begin{center}
207 | $(aabbab)(babaaa)(aaabbb)(bbaaaa)(abaabb)(baaaba) \ldots$
208 | \end{center}
209 | 
210 | To reach optimal performance on this task, the network must maintain a memory
211 | of the previous sequence of six characters, and then reverse it, in addition to opening and closing parentheses. This is a
212 |  more difficult problem than the previous one, and thus we expect that
213 | optimal networks for either task would look quite different from each other.
214 | 
215 | \section{Results}
216 | 
217 | \begin{figure}[ht]
218 | \label{fig:easyandhard}
219 |   \centering
220 | \includegraphics[scale=0.9]{figE.png}
221 | \includegraphics[scale=0.9]{figH.png}
222 |   \caption{Model performance on an easy task (left panel) and a hard task
223 | (right panel). Both performance (cross-entropy loss between predicted and
224 | actual character) and number of neurons are shown as a function of time. Dark curves and shaded areas indicate median and inter-quartile range over 20 runs, respectively. The
225 | model settles on larger network size for the more complex problem. }
226 | \end{figure}
227 | 
228 | 
229 | \subsection{Performance and network size in hard and easy tasks}
230 | 
231 | 
232 | Results are shown in Figure \ref{fig:easyandhard}. We show both median
233 | performance (cross-entropy loss) and median number of neurons as a function of
234 | time, over 20 runs. As expected, the hard problem leads to somewhat higher loss
235 | than the easy problem. Importantly, the hard problem elicits larger networks
236 | than the easy problem (37 neurons vs. 14 neurons after 100000 learning cycles).
237 | Thus, the algorithm appropriately allocated more neurons to solve a more difficult
238 | task.
239 | 
240 | An important question is whether the use of variable-size networks has an
241 | impact on performance. We compared the performance of our algorithm against
242 | fixed-size networks with various numbers of neurons, ranging from 10 to 100,
243 | including one with the same network size as was eventually preferred by our
244 | algorithm (i.e. 37 neurons).  Results are shown in figure \ref{fig:fixedsize},
245 | again showing the median loss among 20 runs as a function of time.
246 | Intriguingly, the variable-size network actually outperforms fixed-size
247 | networks of any size. This result may reflect the advantages of  ``augmenting
248 | topologies'' (starting with a minimal network and only adding complexity as
249 | needed), as expounded in NEAT \cite{Stanley2002-ug}, at least for the simple
250 | problems tackled here.
251 | 
252 | 
253 | 
254 | \subsection{Dynamical adjustment of network size in response to changing conditions}
255 | 
256 | What happens if task difficulty suddenly changes? We tested our network by
257 |  switching from the ``easy'' to the ``hard'' sequence after 33000
258 | cycles, and then back again to the ``easy'' sequence after 66000 cycles.
259 | Results are shown in Figure \ref{fig:easyhardeasy}. Interestingly, the network
260 | successfully handles the abrupt complexification of the problem by allocating
261 | more neurons. Following a large increase, the network then sheds off excess
262 | neurons, without damaging performance. This process continues when the problem
263 | switches back to the ``easy'' sequence (note that performance quickly returns
264 | to optimal levels). Thus, the network successfully adapts its size to the complexity of the problem at hand.
265 | 
266 | 
267 | \begin{figure}[t]
268 | \label{fig:fixedsize}
269 |   \centering
270 | \includegraphics[scale=0.9]{figFS.png}
271 |   \caption{Comparison of performance for variable and fixed size, for the
272 | ``hard'' problem. The thick black line shows variable-size network performance
273 | and is identical to the blue curve in Fig. \ref{fig:easyandhard}, right panel.
274 | Thin colored curves indicate performance of fixed-size networks of various
275 | sizes. Curves show medians over 20
276 | runs; inter-quartile ranges (not shown for clarity) are comparable to those
277 | seen in Fig. \ref{fig:easyandhard}. Variable-size networks outperform fixed-size
278 | networks for the problem described here. }
279 | \end{figure}
280 | 
281 | \begin{figure}[b]
282 | \label{fig:easyhardeasy}
283 |   \centering
284 | \includegraphics[scale=0.9]{figEHE.png}
285 |   \caption{Dynamic adjustment of network size in response to abrupt complexification and simplification of an ongoing task.}
286 | \end{figure}
287 | 
288 | 
289 | 
290 | \section{Conclusions and future work}
291 | 
292 | We have described a method through which the size of a recurrent network can be
293 | modified by gradient descent. The method described here can successfully build
294 | networks of appropriate size to handle simple problems. This simple method
295 | immediately suggests several alternatives and possible extensions.
296 | 
297 | For example, deletion of neurons could be biased by neuron ``age'' (i.e. how
298 | long the neuron has been present), rather than being random. Deleted neurons
299 | could be partially preserved, so that newly added neurons could actually
300 | inherit connectivity of previously deleted ones, rather than being randomly
301 | initialized. Such adaptations were not necessary for the problems considered
302 | here, but might be considered in future applications to more challenging tasks.
303 | 
304 | The method described here extends naturally to layered feedforward networks.
305 | Within each layer, the method can be applied essentially unchanged to adjust
306 | layer size. The number of layers can also be made differentiable, by adding and deleting
307 | \textit{residual} layers \cite{He2015-gk} with initially low 
308 | pre-additive output weights. These residual layers, which would initially have minimal impact
309 | on the network's output, would play the same role as sub-threshold neurons in
310 | the method described above.
311 |  Similarly, by considering each layer as a higher-order
312 | ``node'', subject to a global outgoing norm penalty, the method described above could in
313 | principle be extended to arbitrary networks composed of multiple areas, with
314 | arbitrary connectivity between areas. Further work is needed to assess the
315 | practicality of these and other possible extensions.
316 | 
317 | 
318 | \small
319 | 
320 | \printbibliography
321 | 
322 | \end{document}
323 | 


--------------------------------------------------------------------------------
/paper/smallbiblio.bib:
--------------------------------------------------------------------------------
 1 | 
 2 | @INPROCEEDINGS{Bergstra2013-lr,
 3 | title = "Hyperopt: A Python Library for Optimizing the Hyperparameters of Machine Learning Algorithms",
 4 | booktitle = "Proceedings of the 12th Python in Science Conference",
 5 | author = "Bergstra, James and Yamins, Dan and Cox, David D",
 6 | editor = "der Walt, St\'{e}fan van and Millman, Jarrod and Huff, Katy",
 7 | pages = "13--20",
 8 | year =  2013
 9 | }
10 | 
11 | @ARTICLE{Stanley2002-ug,
12 | title = "Evolving neural networks through augmenting topologies",
13 | author = "Stanley, Kenneth O and Miikkulainen, Risto",
14 | affiliation = "Department of Computer Sciences, The University of Texas at Austin, Austin, TX 78712, USA. kstanley@cs.utexas.edu",
15 | abstract = "An important question in neuroevolution is how to gain an advantage from evolving neural network topologies along with weights. We present a method, NeuroEvolution of Augmenting Topologies (NEAT), which outperforms the best fixed-topology method on a challenging benchmark reinforcement learning task. We claim that the increased efficiency is due to (1) employing a principled method of crossover of different topologies, (2) protecting structural innovation using speciation, and (3) incrementally growing from minimal structure. We test this claim through a series of ablation studies that demonstrate that each component is necessary to the system as a whole and to each other. What results is significantly faster learning. NEAT is also an important contribution to GAs because it shows how it is possible for evolution to both optimize and complexify solutions simultaneously, offering the possibility of evolving increasingly complex solutions over generations, and strengthening the analogy with biological evolution.",
16 | journal = "Evol. Comput.",
17 | volume =  10,
18 | number =  2,
19 | pages = "99--127",
20 | year =  2002,
21 | language = "en"
22 | }
23 | 
24 | @ARTICLE{He2015-gk,
25 | title = "Deep Residual Learning for Image Recognition",
26 | author = "He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian",
27 | abstract = "Deeper neural networks are more difficult to train. We present a residual learning framework to ease the training of networks that are substantially deeper than those used previously. We explicitly reformulate the layers as learning residual functions with reference to the layer inputs, instead of learning unreferenced functions. We provide comprehensive empirical evidence showing that these residual networks are easier to optimize, and can gain accuracy from considerably increased depth. On the ImageNet dataset we evaluate residual nets with a depth of up to 152 layers---8x deeper than VGG nets but still having lower complexity. An ensemble of these residual nets achieves 3.57\% error on the ImageNet test set. This result won the 1st place on the ILSVRC 2015 classification task. We also present analysis on CIFAR-10 with 100 and 1000 layers. The depth of representations is of central importance for many visual recognition tasks. Solely due to our extremely deep representations, we obtain a 28\% relative improvement on the COCO object detection dataset. Deep residual nets are foundations of our submissions to ILSVRC \& COCO 2015 competitions, where we also won the 1st places on the tasks of ImageNet detection, ImageNet localization, COCO detection, and COCO segmentation.",
28 | month =  "10~" # dec,
29 | year =  2015,
30 | archivePrefix = "arXiv",
31 | primaryClass = "cs.CV",
32 | eprint = "1512.03385"
33 | }
34 | 
35 | % The entry below contains non-ASCII chars that could not be converted
36 | % to a LaTeX equivalent.
37 | @ARTICLE{Yamins2014-us,
38 | title = "Performance-optimized hierarchical models predict neural responses in higher visual cortex",
39 | author = "Yamins, Daniel L K and Hong, Ha and Cadieu, Charles F and Solomon, Ethan A and Seibert, Darren and DiCarlo, James J",
40 | abstract = "The ventral visual stream underlies key human visual object recognition abilities. However, neural encoding in the higher areas of the ventral stream remains poorly understood. Here, we describe a modeling approach that yields a quantitatively accurate model of inferior temporal (IT) cortex, the highest ventral cortical area. Using high-throughput computational techniques, we discovered that, within a class of biologically plausible hierarchical neural network models, there is a strong correlation between a model’s categorization performance and its ability to predict individual IT neural unit response data. To pursue this idea, we then identified a high-performing neural network that matches human performance on a range of recognition tasks. Critically, even though we did not constrain this model to match neural data, its top output layer turns out to be highly predictive of IT spiking responses to complex naturalistic images at both the single site and population levels. Moreover, the model’s intermediate layers are highly predictive of neural responses in the V4 cortex, a midlevel visual area that provides the dominant cortical input to IT. These results show that performance optimization---applied in a biologically appropriate model class---can be used to build quantitative predictive models of neural processing.",
41 | journal = "Proc. Natl. Acad. Sci. U. S. A.",
42 | volume =  111,
43 | number =  23,
44 | pages = "8619--8624",
45 | month =  "10~" # jun,
46 | year =  2014,
47 | language = "en"
48 | }
49 | 
50 | @ARTICLE{Olshausen1996-vz,
51 | title = "Emergence of simple-cell receptive field properties by learning a sparse code for natural images",
52 | author = "Olshausen, Bruno A and Field, David J",
53 | journal = "Nature",
54 | volume =  381,
55 | number =  6583,
56 | pages = "607--609",
57 | month =  "13~" # jun,
58 | year =  1996,
59 | language = "en"
60 | }
61 | 


--------------------------------------------------------------------------------
/rnn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable-structure RNN, by Thomas Miconi.
  3 | 
  4 | Mostly based on minimal character-level Vanilla RNN model by Andrej Karpathy
  5 | (@karpathy): https://gist.github.com/karpathy/d4dee566867f8291f086
  6 | 
  7 | BSD License
  8 | 
  9 | """
 10 | import numpy as np
 11 | import math
 12 | import sys
 13 | 
 14 | # Global meta-parameters, modifiable by command line
 15 | g = {
 16 | 'ADDDEL': 1,
 17 | 'ETA' : .01,
 18 | 'NBNEUR': 40,   # Number of neurons for fixed-size experiments (ignored if adddel is 1)
 19 | 'MAXDW': .01,  
 20 | 'DIR' : '.',  # The directory of input text files
 21 | 'NBSTEPS' : 100000,
 22 | 'COEFFWPEN' : 1e-4, 
 23 | 'EXPTYPE' : 'HARD',
 24 | 'DELETIONTHRESHOLD': .05,
 25 | 'MINMULTIP': .025,  # Must be lower than DELETIONTHRESHOLD ! NOTE: Has no effect in the current version of the code.
 26 | 'NBMARGIN' : 1,
 27 | 'PROBADEL': .05,
 28 | 'PROBAADD': .01,
 29 | 'RNGSEED' : 0
 30 | }
 31 | 
 32 | # Command line parameters parsing
 33 | 
 34 | argpairs = [sys.argv[i:i+2] for i in range(1, len(sys.argv), 2)]
 35 | for argpair in argpairs:
 36 |     if not (argpair[0] in g):
 37 |         raise Exception("Error, tried to pass value of non-existent parameter "+argpair[0])
 38 |     if argpair[0] == 'EXPTYPE' or argpair[0] == 'DIR':
 39 |         g[argpair[0]] = argpair[1]
 40 |     else:
 41 |         g[argpair[0]] = float(argpair[1])
 42 | 
 43 | if (g['EXPTYPE'] not in ['HARD', 'EASY', 'HARDEASY', 'EASYHARDEASY']):
 44 |     raise Exception('Wrong EXPTYPE value')
 45 | g['NBMARGIN'] = int(g['NBMARGIN'])
 46 | g['RNGSEED'] = int(g['RNGSEED'])
 47 | print g
 48 | 
 49 | np.random.seed(g['RNGSEED'])
 50 | 
 51 | 
 52 | # data I/O
 53 | myf = open("output.txt", "w")
 54 | myf.close()
 55 | if (g['EXPTYPE'] == 'EASY') | (g['EXPTYPE'] == 'EASYHARDEASY'):
 56 |     data = open(g['DIR'] + '/inputeasy.txt', 'r').read() # should be simple plain text file
 57 | else:
 58 |     data = open(g['DIR'] + '/inputhard.txt', 'r').read() # should be simple plain text file
 59 | chars = list(set(data))
 60 | data_size, vocab_size = len(data), len(chars)
 61 | print 'data has', data_size, 'characters,', vocab_size, 'unique.'# % (data_size, vocab_size)
 62 | char_to_ix = { ch:i for i,ch in enumerate(chars) }
 63 | ix_to_char = { i:ch for i,ch in enumerate(chars) }
 64 | 
 65 | # hyperparameters
 66 | MAX_HIDDEN_SIZE = 100 # Maximum size of hidden layer of neurons (same as fixed size in original min-char-rnn.py)
 67 | if g['ADDDEL']:
 68 |     hidden_size = 1 # size of hidden layer of neurons - start from 1 node.
 69 | else:
 70 |     hidden_size = g['NBNEUR'] # fixed size
 71 | seq_length = 40 # number of steps to unroll the RNN for 
 72 | learning_rate = g['ETA']
 73 | 
 74 | # network parameters
 75 | Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
 76 | Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
 77 | Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden (after multiplier) to output. See below
 78 | bh = np.zeros((hidden_size, 1)) # hidden bias
 79 | by = np.zeros((vocab_size, 1)) # output bias
 80 | normz = np.zeros_like(bh)
 81 | 
 82 | ages = np.zeros(hidden_size) # Ages of all neurons. Not used at present.
 83 | 
 84 | def lossFun(inputs, targets, hprev):
 85 |   """
 86 |   inputs,targets are both list of integers.
 87 |   hprev is Hx1 array of initial hidden state
 88 |   returns the loss, gradients on model parameters, and last hidden state
 89 |   """
 90 |   xs, hs, ys, ps = {}, {}, {}, {}
 91 |   hs[-1] = np.copy(hprev)
 92 |   loss = 0
 93 |   # forward pass
 94 |   for t in xrange(len(inputs)):
 95 |     xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
 96 |     xs[t][inputs[t]] = 1
 97 | 
 98 |     hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
 99 |     ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
100 |     ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
101 |     loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
102 |   
103 |   # backward pass: compute gradients going backwards
104 |   dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh),  np.zeros_like(Why)
105 |   dbh, dby = np.zeros_like(bh), np.zeros_like(by)
106 |   dhnext = np.zeros_like(hs[0])
107 |   for t in reversed(xrange(len(inputs))):
108 |     dy = np.copy(ps[t])
109 |     dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
110 |     dWhy += np.dot(dy, hs[t].T) 
111 |     dby += dy
112 |     dh = np.dot(Why.T, dy) + dhnext
113 |     dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
114 |     dbh += dhraw
115 |     dWxh += np.dot(dhraw, xs[t].T)
116 |     dWhh += np.dot(dhraw, hs[t-1].T)
117 |     dhnext = np.dot(Whh.T, dhraw)
118 |   #for dparam in [dWxh, dWhh,  dWhy, dbh, dby]:
119 |   #  np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients - clipping is actually done just before the update, after learning_rate has been applied - see below
120 |   return loss, dWxh, dWhh,  dWhy, dbh, dby, hs[len(inputs)-1]
121 | 
122 | def sample(h, seed_ix, n):
123 |   """ 
124 |   sample a sequence of integers from the model 
125 |   h is memory state, seed_ix is seed letter for first time step
126 |   """
127 |   x = np.zeros((vocab_size, 1))
128 |   x[seed_ix] = 1
129 |   ixes = []
130 |   for t in xrange(n):
131 |     h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
132 |     y = np.dot(Why, h) + by
133 |     p = np.exp(y) / np.sum(np.exp(y))
134 |     ix = np.random.choice(range(vocab_size), p=p.ravel())
135 |     x = np.zeros((vocab_size, 1))
136 |     x[ix] = 1
137 |     ixes.append(ix)
138 |   return ixes
139 | 
140 | n, p = 0, 0
141 | mWxh, mWhh,  mWhy = .01 * np.ones_like(Wxh), .01 * np.zeros_like(Whh),  .01 * np.zeros_like(Why)
142 | mbh, mby = .01 * np.ones_like(bh), .01 * np.zeros_like(by) # memory variables for RMSProp
143 | smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
144 | 
145 | 
146 | while True:
147 |   # prepare inputs (we're sweeping from left to right in steps seq_length long)
148 |   if p+seq_length+1 >= len(data) or n == 0: 
149 |     hprev = np.zeros((hidden_size,1)) # reset RNN memory
150 |     p = 0 # go from start of data
151 |   inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
152 |   targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
153 | 
154 |   # sample from the model now and then
155 |   if n % 100 == 0:
156 |     sample_ix = sample(hprev, inputs[0], 200)
157 |     txt = ''.join(ix_to_char[ix] for ix in sample_ix)
158 |     print '----\n %s \n----' % (txt, )
159 | 
160 |   # forward seq_length characters through the net and fetch gradient
161 |   loss, dWxh, dWhh,  dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
162 |   smooth_loss = smooth_loss * 0.99 + loss * 0.01
163 |   if n % 100 == 0: 
164 |       print 'iter %d, position in data %d, loss: %f , nb hidden neurons %d, sum-abs norms: %f' % (n, p, smooth_loss, hidden_size, sum(abs(normz))), # print progress
165 |       print normz.T
166 |   if n % 1000 == 0: 
167 |       with open("output.txt", "a") as myf:
168 |         msg = "%d %d %f  %d %f" % (n, p, smooth_loss, hidden_size, sum(abs(normz))) # print progress
169 |         myf.write(msg+"\n")
170 |  
171 | 
172 |   # perform parameter update with Adagrad
173 |   
174 |   for param, dparam, mem in zip([Wxh, Whh,  Why, bh, by], 
175 |                                 [dWxh, dWhh,  dWhy, dbh, dby], 
176 |                                 [mWxh, mWhh,  mWhy, mbh, mby]):
177 |     # mem += dparam * dparam   # Adagrad
178 |     mem += .01 * (dparam * dparam - mem)  # RMSProp
179 |     RMSdelta = -learning_rate * dparam / np.sqrt(mem + 1e-8) # RMSProp update
180 |     np.clip(RMSdelta, -g['MAXDW'], g['MAXDW'], out = RMSdelta)  # Clipping the weight modifications
181 |     param += RMSdelta
182 |  
183 |   # Note that 1-norm penalty on weights is applied even for fized-size! If you want to have no penalty, set COEFFWPEN to 0 (but this will decrease performance).
184 |   Why -= g['COEFFWPEN'] * np.sign(Why)
185 |   Whh -= g['COEFFWPEN'] * np.sign(Whh)
186 |   
187 |   # Computing the L1-norm of outgoing weights for each neuron.
188 |   # The norm of lateral weights is scaled by the number of neurons and multiplied by 4, so it should remain roughly similar to the norm of feedforward weights as the network changes size (there are 4 output neurons)
189 |   normz = .5 * (np.sum(np.abs(Why), axis = 0) + 4.0 * np.sum(np.abs(Whh), axis = 0) / hidden_size)
190 | 
191 |   
192 |   if g['ADDDEL']:
193 | 
194 | 
195 |       # Neuron addition / deletion
196 |       # Deletable neurons are those whose outgoing weights fall below a certain threshold in L1-norm.
197 |       # We want to delete excess below-threshold neurons, keeping only NBMARGIN below-threshold neuron at any time; or add one new neuron if no below-threshold neuron remains. (Both with a certain probability)
198 | 
199 |       ages += 1
200 | 
201 |       #normz[normz < g['MINMULTIP']] = g['MINMULTIP']  # outgoing weight norms are clipped from below
202 |       
203 | 
204 |       # Which neurons are above threshold ('selected' for preservation) ?
205 |       sel = abs(normz) > g['DELETIONTHRESHOLD']#[0] # | (ages < 500)
206 |       
207 |       if sum(sel) < hidden_size - g['NBMARGIN'] :
208 |        
209 |         # Preserve 1-PROBADEL% of the below-threshold neurons, in addition to NBMARGIN below-threshold neurons (NBMARGIN is usually set to 1).
210 |         # (Perhaps select the most recent neurons for deletion? Future work.)
211 |         deletable = np.where(sel == False)[0]
212 |         np.random.shuffle(deletable)
213 |         for xx in range(g['NBMARGIN']):
214 |             sel[deletable[xx]] = True
215 |         deletable = deletable[g['NBMARGIN']:]
216 |         for x in deletable:
217 |             if np.random.rand() > g['PROBADEL']: # Note that this is a test for preservation rather than deletion, hence the >
218 |                 sel[x] = True
219 | 
220 | 
221 |         # Delete all other deletable neurons
222 |         hidden_size = sum(sel)
223 |         Whh = Whh[sel,:][:, sel]
224 |         Wxh = Wxh[sel, :]
225 |         normz = normz[sel]
226 |         Why = Why[:, sel]
227 |         bh = bh[sel]
228 |         hprev = hprev[sel]
229 |         ages = ages[sel]
230 |         
231 |         mWxh = mWxh[sel, :]
232 |         mWhh = mWhh[sel,:][:, sel]
233 |         mWhy = mWhy[:, sel]
234 |         mbh = mbh[sel]
235 |         
236 | 
237 |       # Addition of new neurons, if appropriate:
238 |       if hidden_size < MAX_HIDDEN_SIZE -1:
239 |           if ( (sum((abs(normz) > g['DELETIONTHRESHOLD'])) > hidden_size - g['NBMARGIN']) & (np.random.rand() < g['PROBAADD']))  \
240 |             | (np.random.rand() < 1e-4):
241 | 
242 |               Whh = np.append(Whh, np.random.randn(1, hidden_size)*0.01, axis=0)
243 |               Wxh = np.append(Wxh, np.random.randn(1, vocab_size)*0.01, axis=0)
244 | 
245 |             # The (absolute values of) outgoing weights of the added neuron must sum to g['DELETIONTHRESHOLD']
246 |               newWhy = np.random.randn(vocab_size,1)
247 |               newWhy = .5 * g['DELETIONTHRESHOLD'] * newWhy / (1e-8 + np.sum(abs(newWhy)))
248 |               Why = np.append(Why, newWhy, axis=1)
249 |               
250 |               newWhh = np.random.randn(hidden_size+1, 1)
251 |               newWhh = .5 * hidden_size * g['DELETIONTHRESHOLD'] * newWhh / (1e-8 + 4.0 * np.sum(abs(newWhh)))
252 |               #newWhh *= .01
253 |               Whh = np.append(Whh, newWhh, axis=1)
254 | 
255 |               bh = np.append(bh, np.zeros((1,1)), axis=0)
256 |               hprev = np.append(hprev, np.zeros((1,1)), axis=0)
257 |               #normz = np.append(normz,  g['DELETIONTHRESHOLD'] )  
258 |               ages = np.append(ages, 0)
259 | 
260 |               mWhh = np.append(mWhh, .01 * np.ones((1, hidden_size)), axis=0)
261 |               mWhh = np.append(mWhh, .01 * np.ones((hidden_size+1, 1)), axis=1)
262 |               mWxh = np.append(mWxh, .01 * np.ones((1, vocab_size)), axis=0)
263 |               mWhy = np.append(mWhy, .01 * np.ones((vocab_size,1)), axis=1)
264 |               mbh = np.append(mbh, .01 * np.ones((1,1)), axis=0)
265 | 
266 |               hidden_size += 1
267 |               print "Adding Neuron"
268 | 
269 | 
270 | 
271 |   p += seq_length # move data pointer
272 |   n += 1 # iteration counter 
273 |   if (n == int(g['NBSTEPS'] / 3)) & (g['EXPTYPE'] == 'EASYHARDEASY'):
274 |       data = open(g['DIR'] + '/inputhard.txt', 'r').read() # should be simple plain text file
275 |       p = 0
276 |   if (n == int(g['NBSTEPS'] / 2)) & (g['EXPTYPE'] == 'HARDEASY'):
277 |       data = open(g['DIR'] + '/inputeasy.txt', 'r').read() # should be simple plain text file
278 |       p = 0
279 |   if (n == int(2 * g['NBSTEPS'] / 3)) & (g['EXPTYPE'] == 'EASYHARDEASY'):
280 |       data = open(g['DIR'] + '/inputeasy.txt', 'r').read() # should be simple plain text file
281 |       p = 0
282 |   if n > g['NBSTEPS']:
283 |       print "Done!"
284 |       sys.exit(0)
285 | 
286 | 


--------------------------------------------------------------------------------
/rnn.py.prev:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable-structure RNN, by Thomas Miconi.
  3 | 
  4 | Largely based on minimal character-level Vanilla RNN model by Andrej Karpathy (@karpathy): https://gist.github.com/karpathy/d4dee566867f8291f086
  5 | 
  6 | BSD License
  7 | 
  8 | """
  9 | import numpy as np
 10 | import math
 11 | import sys
 12 | 
 13 | # Global meta-parameters, modifiable by command line
 14 | g = {
 15 | 'NBSTEPS' : 300000,
 16 | 'COEFFMULTIPNORM' : 3e-5,
 17 | 'EXPTYPE' : 'HARD',
 18 | 'DELETIONTHRESHOLD': .05,
 19 | 'MINMULTIP': .025,  # Must be lower than DELETIONTHRESHOLD !
 20 | 'NBMARGIN' : 1,
 21 | 'PROBADEL': .25,
 22 | 'PROBAADD': .05,
 23 | 'RNGSEED' : 0
 24 | }
 25 | 
 26 | # Command line parameters parsing
 27 | 
 28 | argpairs = [sys.argv[i:i+2] for i in range(1, len(sys.argv), 2)]
 29 | for argpair in argpairs:
 30 |     if not (argpair[0] in g):
 31 |         sys.exit("Error, tried to pass value of non-existent parameter "+argpair[0])
 32 |     if argpair[0] == 'EXPTYPE':
 33 |         g['EXPTYPE'] = argpair[1]
 34 |     else:
 35 |         g[argpair[0]] = float(argpair[1])
 36 | 
 37 | if (g['EXPTYPE'] not in ['HARD', 'EASY', 'HARDEASY', 'EASYHARDEASY']):
 38 |     sys.exit('Wrong EXPTYPE value')
 39 | g['NBMARGIN'] = int(g['NBMARGIN'])
 40 | g['RNGSEED'] = int(g['RNGSEED'])
 41 | print g
 42 | 
 43 | np.random.seed(g['RNGSEED'])
 44 | 
 45 | 
 46 | # data I/O
 47 | # NOTE: the input files are specified two directories up because I generally use the program with a different working directory. Modify as needed.
 48 | myf = open("test.txt", "w")
 49 | myf.close()
 50 | if (g['EXPTYPE'] == 'EASY') | (g['EXPTYPE'] == 'EASYHARDEASY'):
 51 |     data = open('./inputeasy.txt', 'r').read() # should be simple plain text file
 52 | else:
 53 |     data = open('./inputhard.txt', 'r').read() # should be simple plain text file
 54 | chars = list(set(data))
 55 | data_size, vocab_size = len(data), len(chars)
 56 | print 'data has', data_size, 'characters,', vocab_size, 'unique.'# % (data_size, vocab_size)
 57 | char_to_ix = { ch:i for i,ch in enumerate(chars) }
 58 | ix_to_char = { i:ch for i,ch in enumerate(chars) }
 59 | 
 60 | # hyperparameters
 61 | MAX_HIDDEN_SIZE = 100 # Maximum size of hidden layer of neurons (same as fixed size in original min-char-rnn.py)
 62 | hidden_size = 1 # size of hidden layer of neurons - start from 1 node.
 63 | seq_length = 40 # number of steps to unroll the RNN for 
 64 | learning_rate = 1e-1
 65 | 
 66 | # network parameters
 67 | Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
 68 | Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
 69 | multips = .001 * np.ones((hidden_size, 1)); # multipliers 
 70 | multips[0,0] = 1.0 # Start with a multiplier of 1 on the single starting node.
 71 | Wiy = np.random.randn(vocab_size, hidden_size)*0.01 # hidden (after multiplier) to output. See below
 72 | bh = np.zeros((hidden_size, 1)) # hidden bias
 73 | by = np.zeros((vocab_size, 1)) # output bias
 74 | 
 75 | ages = np.zeros(hidden_size) # Ages of all neurons. Not used at present.
 76 | 
 77 | def lossFun(inputs, targets, hprev):
 78 |   """
 79 |   inputs,targets are both list of integers.
 80 |   hprev is Hx1 array of initial hidden state
 81 |   returns the loss, gradients on model parameters, and last hidden state
 82 |   """
 83 |   xs, hs, intoys, ys, ps = {}, {}, {}, {}, {}
 84 |   hs[-1] = np.copy(hprev)
 85 |   loss = 0
 86 |   # forward pass
 87 |   for t in xrange(len(inputs)):
 88 |     xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
 89 |     xs[t][inputs[t]] = 1
 90 | 
 91 |     hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
 92 |     intoys[t] = multips * hs[t]  # "intoy" is the output of the hidden layer after the multipliers, which is to be fed "into" y (through the Wiy weight matrix)
 93 |     ys[t] = np.dot(Wiy, intoys[t]) + by # unnormalized log probabilities for next chars
 94 |     ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
 95 |     loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
 96 |   
 97 |   # backward pass: compute gradients going backwards
 98 |   dWxh, dWhh, dmultips, dWiy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(multips), np.zeros_like(Wiy)
 99 |   dbh, dby = np.zeros_like(bh), np.zeros_like(by)
100 |   dhnext = np.zeros_like(hs[0])
101 |   for t in reversed(xrange(len(inputs))):
102 |     dy = np.copy(ps[t])
103 |     dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
104 |     dWiy += np.dot(dy, intoys[t].T) 
105 |     dby += dy
106 |     dintoy = np.dot(Wiy.T, dy) # dE/dIntoY, as a function of dE/dy
107 |     
108 |     # Gradient to be applied to the multipliers
109 |     dmultips += (1.0 * dintoy * multips # This part descends the error gradient
110 |             + g['COEFFMULTIPNORM'] * np.sign(multips)) # L1-norm regularization. The derivative of abs(x) is sign(x). Thus, descending the gradient of abs(x) over x is simply subtracting a constant multiple of sign(x). 
111 |             # + .001 * multips) # This would add an L2-regularization term, which we don't use here.
112 |     
113 |     dh = dintoy * multips + dhnext
114 |     dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
115 |     dbh += dhraw
116 |     dWxh += np.dot(dhraw, xs[t].T)
117 |     dWhh += np.dot(dhraw, hs[t-1].T)
118 |     dhnext = np.dot(Whh.T, dhraw)
119 |   for dparam in [dWxh, dWhh, dmultips, dWiy, dbh, dby]:
120 |     np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
121 |   return loss, dWxh, dWhh, dmultips, dWiy, dbh, dby, hs[len(inputs)-1]
122 | 
123 | def sample(h, seed_ix, n):
124 |   """ 
125 |   sample a sequence of integers from the model 
126 |   h is memory state, seed_ix is seed letter for first time step
127 |   """
128 |   x = np.zeros((vocab_size, 1))
129 |   x[seed_ix] = 1
130 |   ixes = []
131 |   for t in xrange(n):
132 |     h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
133 |     y = np.dot(Wiy, multips * h) + by
134 |     p = np.exp(y) / np.sum(np.exp(y))
135 |     ix = np.random.choice(range(vocab_size), p=p.ravel())
136 |     x = np.zeros((vocab_size, 1))
137 |     x[ix] = 1
138 |     ixes.append(ix)
139 |   return ixes
140 | 
141 | n, p = 0, 0
142 | mWxh, mWhh, mmultips, mWiy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(multips), np.zeros_like(Wiy)
143 | mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
144 | smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
145 | 
146 | 
147 | while True:
148 |   # prepare inputs (we're sweeping from left to right in steps seq_length long)
149 |   if p+seq_length+1 >= len(data) or n == 0: 
150 |     hprev = np.zeros((hidden_size,1)) # reset RNN memory
151 |     p = 0 # go from start of data
152 |   inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
153 |   targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
154 | 
155 |   # sample from the model now and then
156 |   if n % 100 == 0:
157 |     sample_ix = sample(hprev, inputs[0], 200)
158 |     txt = ''.join(ix_to_char[ix] for ix in sample_ix)
159 |     print '----\n %s \n----' % (txt, )
160 | 
161 |   # forward seq_length characters through the net and fetch gradient
162 |   loss, dWxh, dWhh, dmultips, dWiy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
163 |   smooth_loss = smooth_loss * 0.999 + loss * 0.001
164 |   if n % 100 == 0: 
165 |       print 'iter %d, position in data %d, loss: %f , nb hidden neurons %d, sum-abs multips: %f' % (n, p, smooth_loss, hidden_size, sum(abs(multips))), # print progress
166 |       print multips.T
167 |   if n % 1000 == 0: 
168 |       with open("output.txt", "a") as myf:
169 |         msg = "%d %d %f  %d %f" % (n, p, smooth_loss, hidden_size, sum(abs(multips))) # print progress
170 |         myf.write(msg+"\n")
171 |  
172 | 
173 |   # perform parameter update with Adagrad
174 |   for param, dparam, mem in zip([Wxh, Whh, multips, Wiy, bh, by], 
175 |                                 [dWxh, dWhh, dmultips, dWiy, dbh, dby], 
176 |                                 [mWxh, mWhh, mmultips, mWiy, mbh, mby]):
177 |     mem += dparam * dparam
178 |     param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update
179 | 
180 | 
181 |   # Neuron addition / deletion
182 |   # Deletable neurons are those whose multipliers fall below threshold.
183 |   # We want to delete excess below-threshold neurons, keeping only NBMARGIN below-threshold neuron at any time; or add one new neuron if no below-threshold neuron remains.
184 | 
185 |   ages += 1
186 | 
187 |   multips[multips < g['MINMULTIP']] = g['MINMULTIP']  # multipliers are clipped from below
188 |   
189 | 
190 |   # Which neurons are above threshold ('selected' for preservation) ?
191 |   sel = (abs(multips) > g['DELETIONTHRESHOLD'])[:,0] # | (ages < 500)
192 |   
193 |   if sum(sel) < hidden_size - g['NBMARGIN'] :
194 |    
195 |     # Preserve 1-PROBADEL% of the below-threshold neurons, in addition to NBMARGIN below-threshold neurons (NBMARGIN is usually set to 1).
196 |     # (Perhaps select the most recent neurons for deletion? Future work.)
197 |     deletable = np.where(sel == False)[0]
198 |     np.random.shuffle(deletable)
199 |     for xx in range(g['NBMARGIN']):
200 |         sel[deletable[xx]] = True
201 |     deletable = deletable[g['NBMARGIN']:]
202 |     for x in deletable:
203 |         if np.random.rand() > g['PROBADEL']: # Note that this is a test for preservation rather than deletion, hence the >
204 |             sel[x] = True
205 | 
206 | 
207 |     # Delete all other deletable neurons
208 |     hidden_size = sum(sel)
209 |     Whh = Whh[sel,:][:, sel]
210 |     Wxh = Wxh[sel, :]
211 |     multips = multips[sel]
212 |     Wiy = Wiy[:, sel]
213 |     bh = bh[sel]
214 |     hprev = hprev[sel]
215 |     ages = ages[sel]
216 |     
217 |     mWxh = mWxh[sel, :]
218 |     mWhh = mWhh[sel,:][:, sel]
219 |     mmultips = mmultips[sel]
220 |     mWiy = mWiy[:, sel]
221 |     mbh = mbh[sel]
222 |     
223 |   if hidden_size < MAX_HIDDEN_SIZE -1:
224 |       if ( (sum((abs(multips) > g['DELETIONTHRESHOLD'])[:,0]) > hidden_size - g['NBMARGIN']) & (np.random.rand() < g['PROBAADD']))  \
225 |         | (np.random.rand() < 1e-4):
226 |       # Add a new neuron
227 |           Whh = np.append(Whh, np.random.randn(1, hidden_size)*0.01, axis=0)
228 |           Whh = np.append(Whh, np.random.randn(hidden_size+1, 1)*0.01, axis=1)
229 |           Wxh = np.append(Wxh, np.random.randn(1, vocab_size)*0.01, axis=0)
230 |           Wiy = np.append(Wiy, np.random.randn(vocab_size,1)*0.01, axis=1)
231 |           bh = np.append(bh, np.zeros((1,1)), axis=0)
232 |           hprev = np.append(hprev, np.zeros((1,1)), axis=0)
233 |           multips = np.append(multips,  g['DELETIONTHRESHOLD'] * np.ones((1,1)), axis=0)  # Initial multiplier for new neurons is set to deletion threshold
234 |           ages = np.append(ages, 0)
235 | 
236 |           mWhh = np.append(mWhh, np.zeros((1, hidden_size)), axis=0)
237 |           mWhh = np.append(mWhh, np.zeros((hidden_size+1, 1)), axis=1)
238 |           mWxh = np.append(mWxh, np.zeros((1, vocab_size)), axis=0)
239 |           mWiy = np.append(mWiy, np.zeros((vocab_size,1)), axis=1)
240 |           mbh = np.append(mbh, np.zeros((1,1)), axis=0)
241 |           mmultips = np.append(mmultips, np.zeros((1,1)), axis=0)
242 | 
243 |           hidden_size += 1
244 |           print "Adding Neuron"
245 | 
246 | 
247 | 
248 |   p += seq_length # move data pointer
249 |   n += 1 # iteration counter 
250 |   if (n == 100000) & (g['EXPTYPE'] == 'EASYHARDEASY'):
251 |       data = open('./inputhard.txt', 'r').read() # should be simple plain text file
252 |       p = 0
253 |   if (n == 100000) & (g['EXPTYPE'] == 'HARDEASY'):
254 |       data = open('./inputeasy.txt', 'r').read() # should be simple plain text file
255 |       p = 0
256 |   if (n == 200000) & (g['EXPTYPE'] == 'EASYHARDEASY'):
257 |       data = open('./inputeasy.txt', 'r').read() # should be simple plain text file
258 |       p = 0
259 |   if n > g['NBSTEPS']:
260 |       sys.exit(0)
261 | 
262 | 


--------------------------------------------------------------------------------
/rnnAltern.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Differentiable-structure RNN, by Thomas Miconi.
  3 | 
  4 | This is an alternative version in which the multipliers apply directly to the
  5 | output of each hidden neuron, and thus also affect the recurrent connections.
  6 | It also works, but produces noticeably lower performance.
  7 | 
  8 | Largely based on minimal character-level Vanilla RNN model by Andrej Karpathy (@karpathy): https://gist.github.com/karpathy/d4dee566867f8291f086
  9 | 
 10 | REMINDER: if you modify something in the forward pass, remember to modify it also in the sampling function!
 11 | 
 12 | BSD License
 13 | 
 14 | """
 15 | import numpy as np
 16 | import math
 17 | import sys
 18 | 
 19 | # Global meta-parameters, modifiable by command line
 20 | g = {
 21 | 'DIR': '../..',
 22 | 'NBSTEPS' : 300000,
 23 | 'COEFFMULTIPGRAD' : 1.0, 
 24 | 'COEFFMULTIPNORM' : 3e-5, 
 25 | 'EXPTYPE' : 'EASYHARDEASY',
 26 | 'DELETIONTHRESHOLD': .05,
 27 | 'MINMULTIP':.025,  # Must be lower than DELETIONTHRESHOLD !
 28 | 'NBMARGIN' : 1,
 29 | 'PROBADEL': .25,
 30 | 'PROBAADD': .05,
 31 | 'RNGSEED' : 0
 32 | }
 33 | 
 34 | # Command line parameters parsing
 35 | 
 36 | argpairs = [sys.argv[i:i+2] for i in range(1, len(sys.argv), 2)]
 37 | for argpair in argpairs:
 38 |     if not (argpair[0] in g):
 39 |         sys.exit("Error, tried to pass value of non-existent parameter "+argpair[0])
 40 |     if (argpair[0] == 'EXPTYPE') or (argpair[0] == 'DIR'):
 41 |         g[argpair[0]] = argpair[1]
 42 |     else:
 43 |         g[argpair[0]] = float(argpair[1])
 44 | 
 45 | if (g['EXPTYPE'] not in ['HARD', 'EASY', 'HARDEASY', 'EASYHARDEASY']):
 46 |     sys.exit('Wrong EXPTYPE value')
 47 | g['NBMARGIN'] = int(g['NBMARGIN'])
 48 | g['RNGSEED'] = int(g['RNGSEED'])
 49 | print g
 50 | 
 51 | np.random.seed(g['RNGSEED'])
 52 | 
 53 | 
 54 | # data I/O
 55 | # NOTE: the input files are specified two directories up because I generally use the program with a different working directory. Modify as needed.
 56 | myf = open("test.txt", "w")
 57 | myf.close()
 58 | if (g['EXPTYPE'] == 'EASY') | (g['EXPTYPE'] == 'EASYHARDEASY'):
 59 |     data = open(g['DIR']+'/inputeasy.txt', 'r').read() # should be simple plain text file
 60 | else:
 61 |     #data = open('./inputhard.txt', 'r').read() # should be simple plain text file
 62 |     data = open(g['DIR']+'/inputhard.txt', 'r').read() # should be simple plain text file
 63 | chars = list(set(data))
 64 | data_size, vocab_size = len(data), len(chars)
 65 | print 'data has', data_size, 'characters,', vocab_size, 'unique.'# % (data_size, vocab_size)
 66 | char_to_ix = { ch:i for i,ch in enumerate(chars) }
 67 | ix_to_char = { i:ch for i,ch in enumerate(chars) }
 68 | 
 69 | # hyperparameters
 70 | MAX_HIDDEN_SIZE = 100 # Maximum size of hidden layer of neurons (same as fixed size in original min-char-rnn.py)
 71 | hidden_size = 1 # 1 # size of hidden layer of neurons - start from 1 node.
 72 | seq_length = 40 # number of steps to unroll the RNN for 
 73 | learning_rate = 1e-1
 74 | 
 75 | # network parameters
 76 | Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
 77 | Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
 78 | multips = .001 * np.ones((hidden_size, 1)); # multipliers 
 79 | multips.fill(1.0)  # Start with a multiplier of 1 on the single starting node.
 80 | 
 81 | Wiy = np.random.randn(vocab_size, hidden_size)*0.01 # hidden (after multiplier) to output. See below
 82 | bh = np.zeros((hidden_size, 1)) # hidden bias
 83 | by = np.zeros((vocab_size, 1)) # output bias
 84 | 
 85 | ages = np.zeros(hidden_size) # Ages of all neurons. Not used at present.
 86 | 
 87 | def lossFun(inputs, targets, postmultipprev):
 88 |   """
 89 |   inputs,targets are both list of integers.
 90 |   hprev is Hx1 array of initial hidden state
 91 |   returns the loss, gradients on model parameters, and last hidden state
 92 |   """
 93 |   xs, hs, postmultips, ys, ps = {}, {}, {}, {}, {}
 94 |   postmultips[-1] = np.copy(postmultipprev)
 95 |   loss = 0
 96 |   # forward pass
 97 |   for t in xrange(len(inputs)):
 98 |     xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
 99 |     xs[t][inputs[t]] = 1
100 | 
101 |     hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, postmultips[t-1]) + bh) # hidden state
102 |     postmultips[t] = multips * hs[t]  # "postmultip" is the output of the hidden layer after the multipliers, which is to be fed "into" y (through the Wiy weight matrix)
103 |     ys[t] = np.dot(Wiy, postmultips[t]) + by # unnormalized log probabilities for next chars
104 |     ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
105 |     loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
106 |   
107 |   # backward pass: compute gradients going backwards
108 |   dWxh, dWhh, dmultips, dWiy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(multips), np.zeros_like(Wiy)
109 |   dbh, dby = np.zeros_like(bh), np.zeros_like(by)
110 |   dpostmultipnext = np.zeros_like(postmultips[0])
111 |   for t in reversed(xrange(len(inputs))):
112 |     dy = np.copy(ps[t])
113 |     dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
114 |     dWiy += np.dot(dy, postmultips[t].T) 
115 |     dby += dy
116 |     dpostmultip = np.dot(Wiy.T, dy) + dpostmultipnext # dE/dIntoY, as a function of dE/dy
117 |     
118 |     # Gradient to be applied to the multipliers
119 |     dmultips += (g['COEFFMULTIPGRAD'] * dpostmultip * hs[t] # This part descends the error gradient
120 |             + g['COEFFMULTIPNORM'] * np.sign(multips)) # L1-norm regularization. The derivative of abs(x) is sign(x). 
121 |             # + .001 * multips) # This would add an L2-regularization term, which we don't use here.
122 | 
123 |     dh = dpostmultip * multips 
124 |     dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
125 |     dbh += dhraw
126 |     dWxh += np.dot(dhraw, xs[t].T)
127 |     dWhh += np.dot(dhraw, postmultips[t-1].T)
128 |     dpostmultipnext = np.dot(Whh.T, dhraw)
129 |   for dparam in [dWxh, dWhh, dmultips, dWiy, dbh, dby]:
130 |     np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
131 |   return loss, dWxh, dWhh, dmultips, dWiy, dbh, dby, postmultips[len(inputs)-1]
132 | 
133 | def sample(h, seed_ix, n):
134 |   """ 
135 |   sample a sequence of integers from the model 
136 |   h is memory state, seed_ix is seed letter for first time step
137 |   """
138 |   x = np.zeros((vocab_size, 1))
139 |   x[seed_ix] = 1
140 |   ixes = []
141 |   for t in xrange(n):
142 |     h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, multips*h) + bh)
143 |     y = np.dot(Wiy, multips * h) + by
144 |     p = np.exp(y) / np.sum(np.exp(y))
145 |     ix = np.random.choice(range(vocab_size), p=p.ravel())
146 |     x = np.zeros((vocab_size, 1))
147 |     x[ix] = 1
148 |     ixes.append(ix)
149 |   return ixes
150 | 
151 | n, p = 0, 0
152 | mWxh, mWhh, mmultips, mWiy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(multips), np.zeros_like(Wiy)
153 | mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
154 | smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
155 | 
156 | 
157 | while True:
158 |   # prepare inputs (we're sweeping from left to right in steps seq_length long)
159 |   if p+seq_length+1 >= len(data) or n == 0: 
160 |     postmultipprev = np.zeros((hidden_size,1)) # reset RNN memory
161 |     p = 0 # go from start of data
162 |   inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
163 |   targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]
164 | 
165 |   # sample from the model now and then
166 |   if n % 100 == 0:
167 |     sample_ix = sample(postmultipprev, inputs[0], 200)
168 |     txt = ''.join(ix_to_char[ix] for ix in sample_ix)
169 |     print '----\n %s \n----' % (txt, )
170 | 
171 |   # forward seq_length characters through the net and fetch gradient
172 |   loss, dWxh, dWhh, dmultips, dWiy, dbh, dby, postmultipprev = lossFun(inputs, targets, postmultipprev)
173 |   smooth_loss = smooth_loss * 0.999 + loss * 0.001
174 |   if n % 100 == 0: 
175 |       print 'iter %d, position in data %d, loss: %f , nb hidden neurons %d, sum-abs multips: %f' % (n, p, smooth_loss, hidden_size, sum(abs(multips))), # print progress
176 |       print multips.T
177 |   if n % 1000 == 0: 
178 |       with open("output.txt", "a") as myf:
179 |         msg = "%d %d %f  %d %f" % (n, p, smooth_loss, hidden_size, sum(abs(multips))) # print progress
180 |         myf.write(msg+"\n")
181 |  
182 | 
183 |   # perform parameter update with Adagrad
184 |   for param, dparam, mem in zip([Wxh, Whh, multips, Wiy, bh, by], 
185 |                                 [dWxh, dWhh, dmultips, dWiy, dbh, dby], 
186 |                                 [mWxh, mWhh, mmultips, mWiy, mbh, mby]):
187 |     mem += dparam * dparam
188 |     param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update
189 | 
190 | 
191 |   # Neuron addition / deletion
192 |   # Deletable neurons are those whose multipliers fall below threshold.
193 |   # We want to delete excess below-threshold neurons, keeping only NBMARGIN below-threshold neuron at any time; or add one new neuron if no below-threshold neuron remains.
194 | 
195 |   ages += 1
196 | 
197 |   multips[multips < g['MINMULTIP']] = g['MINMULTIP']  # multipliers are clipped from below
198 |   
199 |   #"""
200 | 
201 |   # Addition and deletion of neurons
202 |   # Which neurons are above threshold ('selected' for preservation) ?
203 |   sel = (abs(multips) > g['DELETIONTHRESHOLD'])[:,0] # | (ages < 500)
204 |   
205 |   if sum(sel) < hidden_size - g['NBMARGIN'] :
206 |    
207 |     # Preserve 1-PROBADEL% of the below-threshold neurons, in addition to NBMARGIN below-threshold neurons (NBMARGIN is usually set to 1).
208 |     # (Perhaps select the most recent neurons for deletion? Future work.)
209 |     deletable = np.where(sel == False)[0]
210 |     np.random.shuffle(deletable)
211 |     for xx in range(g['NBMARGIN']):
212 |         sel[deletable[xx]] = True
213 |     deletable = deletable[g['NBMARGIN']:]
214 |     for x in deletable:
215 |         if np.random.rand() > g['PROBADEL']: # Note that this is a test for preservation rather than deletion, hence the >
216 |             sel[x] = True
217 | 
218 | 
219 |     # Delete all other deletable neurons
220 |     hidden_size = sum(sel)
221 |     Whh = Whh[sel,:][:, sel]
222 |     Wxh = Wxh[sel, :]
223 |     multips = multips[sel]
224 |     Wiy = Wiy[:, sel]
225 |     bh = bh[sel]
226 |     postmultipprev = postmultipprev[sel]
227 |     ages = ages[sel]
228 |     
229 |     mWxh = mWxh[sel, :]
230 |     mWhh = mWhh[sel,:][:, sel]
231 |     mmultips = mmultips[sel]
232 |     mWiy = mWiy[:, sel]
233 |     mbh = mbh[sel]
234 |     
235 |   if hidden_size < MAX_HIDDEN_SIZE -1:
236 |       if ( (sum((abs(multips) > g['DELETIONTHRESHOLD'])[:,0]) > hidden_size - g['NBMARGIN']) & (np.random.rand() < g['PROBAADD']))  \
237 |         | (np.random.rand() < 1e-4):
238 |       # Add a new neuron
239 |           Whh = np.append(Whh, np.random.randn(1, hidden_size)*0.01, axis=0)
240 |           Whh = np.append(Whh, np.random.randn(hidden_size+1, 1)*0.01, axis=1)
241 |           Wxh = np.append(Wxh, np.random.randn(1, vocab_size)*0.01, axis=0)
242 |           Wiy = np.append(Wiy, np.random.randn(vocab_size,1)*0.01, axis=1)
243 |           bh = np.append(bh, np.zeros((1,1)), axis=0)
244 |           postmultipprev = np.append(postmultipprev, np.zeros((1,1)), axis=0)
245 |           multips = np.append(multips,  g['DELETIONTHRESHOLD'] * np.ones((1,1)), axis=0)  # Initial multiplier for new neurons is set to deletion threshold
246 |           ages = np.append(ages, 0)
247 | 
248 |           mWhh = np.append(mWhh, np.zeros((1, hidden_size)), axis=0)
249 |           mWhh = np.append(mWhh, np.zeros((hidden_size+1, 1)), axis=1)
250 |           mWxh = np.append(mWxh, np.zeros((1, vocab_size)), axis=0)
251 |           mWiy = np.append(mWiy, np.zeros((vocab_size,1)), axis=1)
252 |           mbh = np.append(mbh, np.zeros((1,1)), axis=0)
253 |           mmultips = np.append(mmultips, np.zeros((1,1)), axis=0)
254 | 
255 |           hidden_size += 1
256 |           print "Adding Neuron"
257 | 
258 |   #"""
259 | 
260 |   p += seq_length # move data pointer
261 |   n += 1 # iteration counter 
262 |   if (n == int(g['NBSTEPS'] / 3)) & (g['EXPTYPE'] == 'EASYHARDEASY'):
263 |       data = open(g['DIR']+'/inputhard.txt', 'r').read() # should be simple plain text file
264 |       p = 0
265 |   if (n == int(g['NBSTEPS'] / 3)) & (g['EXPTYPE'] == 'HARDEASY'):
266 |       data = open(g['DIR']+'/inputeasy.txt', 'r').read() # should be simple plain text file
267 |       p = 0
268 |   if (n == 2 * int(g['NBSTEPS'] / 3)) & (g['EXPTYPE'] == 'EASYHARDEASY'):
269 |       data = open(g['DIR']+'/inputeasy.txt', 'r').read() # should be simple plain text file
270 |       p = 0
271 |   if n > g['NBSTEPS']:
272 |       sys.exit(0)
273 | 
274 | 


--------------------------------------------------------------------------------
/runexp.py:
--------------------------------------------------------------------------------
 1 | # Submit jobs to the cluster. 
 2 | 
 3 | # /opt/python-2.7.10/bin/python
 4 | 
 5 | 
 6 | import sys
 7 | import os
 8 | import shutil
 9 | 
10 | """
11 | g = {
12 | 'COEFFMULTIPNORM' : 3e-5,
13 | 'DELETIONTHRESHOLD': .01,
14 | 'MINMULTIP': .01*.25,  # Must be lower than DELETIONTHRESHOLD !
15 | 'NBMARGIN' : 1,
16 | 'PROBADEL': .003,
17 | 'PROBAADD': .1,
18 | 'RNGSEED' : 0
19 | }
20 | """
21 | allopts = [
22 | 
23 |         #"HIDDENSIZE 10 NBSTEPS 300000",  
24 |         #"HIDDENSIZE 30 NBSTEPS 300000",  
25 |         #"HIDDENSIZE 22 NBSTEPS 300000",  
26 |         #"HIDDENSIZE 50 NBSTEPS 300000",  
27 |         ##"HIDDENSIZE 70 NBSTEPS 300000",  
28 |         #"HIDDENSIZE 100 NBSTEPS 300000",  
29 | 
30 | 
31 |         "EXPTYPE EASY COEFFMULTIPNORM 3e-5 NBMARGIN 1 DELETIONTHRESHOLD .05 MINMULTIP .025 PROBADEL .25 PROBAADD .05 NBSTEPS 300000", 
32 |         #"EXPTYPE HARD COEFFMULTIPNORM 3e-5 NBMARGIN 1 DELETIONTHRESHOLD .05 MINMULTIP .025 PROBADEL .25 PROBAADD .05 NBSTEPS 300000", 
33 |         #"EXPTYPE EASYHARDEASY COEFFMULTIPNORM 3e-5 NBMARGIN 1 DELETIONTHRESHOLD .05 MINMULTIP .025 PROBADEL .25 PROBAADD .05 NBSTEPS 300000", 
34 |         
35 |        
36 |         ]
37 | 
38 | 
39 | for optionz in allopts:
40 | 
41 |     #dirname = "trial-ref-" + optionz.replace(' ', '-')
42 |     #dirname = "trial-fixedsize-CMN-" + optionz.replace(' ', '-')
43 |     dirname = "trial-ref-CMN-" + optionz.replace(' ', '-')
44 | 
45 |     if os.path.exists(dirname):
46 |         shutil.rmtree(dirname)
47 |     os.mkdir(dirname)
48 |     os.chdir(dirname)
49 |     print os.getcwd()
50 | 
51 |     for v in range(20):
52 |         os.mkdir("v"+str(v))
53 |         os.chdir("v"+str(v))
54 |         CMD = "bsub -q short -W 4:00 -eo e.txt -g /rnn /opt/python-2.7.10/bin/python ../../rnn.py " + optionz + " RNGSEED " + str(v)
55 |         #CMD = "bsub -q short -W 4:00 -eo e.txt -oo o.txt -g /rnn /opt/python-2.7.10/bin/python ../../rnn.py " + optionz + " RNGSEED " + str(v)
56 |         #CMD = "bsub -q short -W 6:00 -eo e.txt -oo o.txt -g /rnn /opt/python-2.7.10/bin/python ../../min-char-rnn-param.py " + optionz + " RNGSEED " + str(v) # For fixed-size
57 |         #print CMD
58 |         retval = os.system(CMD)
59 |         print retval
60 |         os.chdir('..') 
61 |     
62 |     os.chdir('..') 
63 | 
64 | 
65 |     #print dirname
66 |     #for RNGSEED in range(2):
67 |     #st = "python rnn.py COEFFMULTIPNORM " + str(CMN) + " DELETIONTHRESHOLD " + str(DT) + " MINMULTIP " \
68 |     #+ str(MMmultiplierofDT*DT) + " PROBADEL " + str(PD) + " PROBAADD " + str(PAmultiplierofPD * PD) \
69 |     #+ " RNGSEED " + str(RNGSEED) + " NUMBERMARGIN " + str(NM)
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------