├── .classpath
├── .gitignore
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── LICENSE
├── README
└── src
├── Test.java
└── com
└── evolvingstuff
├── DistractedSequenceRecall.java
├── IAgentSupervised.java
├── IdentityNeuron.java
├── Neuron.java
├── NeuronType.java
├── SigmoidNeuron.java
├── SimpleLSTM.java
├── TanhNeuron.java
└── util.java
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/*
2 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | SimpleLSTM
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | #Mon Jun 04 21:08:35 PDT 2012
2 | eclipse.preferences.version=1
3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.6
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.source=1.6
13 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | copyright (c) 2013 Thomas Lahore
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | This is a recurrent neural network architecture inspired by Long Short Term Memory, but with a much simpler architecture.
2 |
3 | I will be adding more documentation to describe this architecture shortly. For now, a brief summary is that it has only a single gate (similar to the Forget Gate in LSTM) and an input squashing function. Rather than acting to reset the state of the cell, the gate modulates a weighted average of the current input with the state of the cell at the previous time step. So if the gate is fully active, the cell will ignore its current input and fully retain its state. If the gate is inactive then the cell will lose all traces of its previous state and shift to match the current input.
4 |
5 | So far on a few tests I've done, it appears to perform fairly well, although it tends to need more cell blocks than LSTM to do the same thing.
6 |
--------------------------------------------------------------------------------
/src/Test.java:
--------------------------------------------------------------------------------
1 | import java.util.Random;
2 | import com.evolvingstuff.DistractedSequenceRecall;
3 | import com.evolvingstuff.SimpleLSTM;
4 |
5 | public class Test {
6 | public static void main(String[] args) throws Exception {
7 |
8 | System.out.println("Test of SimpleLSTM\n");
9 |
10 | Random r = new Random(1234);
11 | DistractedSequenceRecall task = new DistractedSequenceRecall(r);
12 |
13 | int cell_blocks = 15;
14 | SimpleLSTM slstm = new SimpleLSTM(r, task.GetObservationDimension(), task.GetActionDimension(), cell_blocks);
15 |
16 | for (int epoch = 0; epoch < 5000; epoch++) {
17 | double fit = task.EvaluateFitnessSupervised(slstm);
18 | if (epoch % 10 == 0)
19 | System.out.println("["+epoch+"] error = " + (1 - fit));
20 | }
21 | System.out.println("done.");
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/DistractedSequenceRecall.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Random;
6 |
7 | public class DistractedSequenceRecall {
8 |
9 | int tests = 1000;
10 | int observation_dimension = 10;
11 | int action_dimension = 4;
12 | boolean validation_mode = false;
13 | Random r;
14 |
15 | public DistractedSequenceRecall(Random r) {
16 | this.r = r;
17 | }
18 |
19 | class Interaction {
20 | double[] observation;
21 | double[] target_output;
22 | boolean do_reset;
23 | }
24 |
25 | private List GenerateInteractions(int tests) {
26 | List result = new ArrayList();
27 | for (int test = 0; test < tests; test++) {
28 | int[] seq = new int[22];
29 | int target1 = r.nextInt(4);
30 | int target2 = r.nextInt(4);
31 | for (int t = 0; t < 22; t++) {
32 | seq[t] = r.nextInt(4)+4;//+4 so as not to overlap with target symbols
33 | }
34 | int loc1 = r.nextInt(22);
35 | int loc2 = r.nextInt(22);
36 | while (loc1 == loc2)
37 | loc2 = r.nextInt(22);
38 | if (loc1 > loc2) {
39 | int temp = loc1;
40 | loc1 = loc2;
41 | loc2 = temp;
42 | }
43 | seq[loc1] = target1;
44 | seq[loc2] = target2;
45 |
46 | for (int t = 0; t < seq.length; t++) {
47 | double[] input = new double[observation_dimension];
48 | input[seq[t]] = 1.0;
49 |
50 | Interaction inter = new Interaction();
51 | if (t == 0)
52 | inter.do_reset = true;
53 | inter.observation = input;
54 | result.add(inter);
55 | }
56 | //final 2 steps
57 | double[] input1 = new double[observation_dimension];
58 | input1[8] = 1.0;
59 | double[] target_output1 = new double[action_dimension];
60 | target_output1[target1] = 1.0;
61 | Interaction inter1 = new Interaction();
62 | inter1.observation = input1;
63 | inter1.target_output = target_output1;
64 | result.add(inter1);
65 |
66 | double[] input2 = new double[observation_dimension];
67 | input2[9] = 1.0;
68 | double[] target_output2 = new double[action_dimension];
69 | target_output2[target2] = 1.0;
70 | Interaction inter2 = new Interaction();
71 | inter2.observation = input2;
72 | inter2.target_output = target_output2;
73 | result.add(inter2);
74 | }
75 | return result;
76 | }
77 |
78 | public double EvaluateFitnessSupervised(IAgentSupervised agent) throws Exception {
79 |
80 | List interactions = this.GenerateInteractions(tests);
81 |
82 | double fit = 0;
83 | double max_fit = 0;
84 |
85 | for (Interaction inter : interactions) {
86 |
87 | if (inter.do_reset)
88 | agent.Reset();
89 |
90 | if (inter.target_output == null)
91 | agent.Next(inter.observation);
92 | else {
93 | double[] actual_output = null;
94 |
95 | if (validation_mode == true)
96 | actual_output = agent.Next(inter.observation);
97 | else
98 | actual_output = agent.Next(inter.observation, inter.target_output);
99 |
100 | if (util.argmax(actual_output) == util.argmax(inter.target_output))
101 | fit++;
102 |
103 | max_fit++;
104 | }
105 | }
106 | return fit/max_fit;
107 | }
108 |
109 |
110 | public int GetActionDimension() {
111 | return action_dimension;
112 | }
113 |
114 | public int GetObservationDimension() {
115 | return observation_dimension;
116 | }
117 |
118 | }
119 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/IAgentSupervised.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public interface IAgentSupervised
4 | {
5 | void Reset();
6 | double[] Next(double[] input, double[] target_output) throws Exception;
7 | double[] Next(double[] input) throws Exception;
8 | }
9 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/IdentityNeuron.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public class IdentityNeuron extends Neuron
4 | {
5 | @Override
6 | public double Activate(double x)
7 | {
8 | return x;
9 | }
10 |
11 | @Override
12 | public double Derivative(double x) {
13 | // TODO Auto-generated method stub
14 | return 1;
15 | }
16 | }
17 |
18 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/Neuron.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public abstract class Neuron
4 | {
5 | public static Neuron Factory(NeuronType neuron_type)
6 | {
7 | if (neuron_type == NeuronType.Sigmoid)
8 | return new SigmoidNeuron();
9 | else if (neuron_type == NeuronType.Identity)
10 | return new IdentityNeuron();
11 | else if (neuron_type == NeuronType.Tanh)
12 | return new TanhNeuron();
13 | else
14 | System.out.println("ERROR: unknown neuron type");
15 | return null;
16 | }
17 |
18 | abstract public double Activate(double x);
19 | abstract public double Derivative(double x);
20 | }
21 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/NeuronType.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public enum NeuronType
4 | {
5 | Sigmoid,
6 | Identity,
7 | Tanh
8 | }
9 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/SigmoidNeuron.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public class SigmoidNeuron extends Neuron
4 | {
5 | @Override
6 | public double Activate(double x) {
7 | return 1 / (1 + Math.exp(-x));
8 | }
9 |
10 | @Override
11 | public double Derivative(double x) {
12 | double act = Activate(x);
13 | return act * (1 - act);
14 | }
15 |
16 |
17 | }
18 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/SimpleLSTM.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | import java.util.*;
4 |
5 | public class SimpleLSTM implements IAgentSupervised
6 | {
7 |
8 | private double init_weight_range = 0.1;
9 | private int full_input_dimension;
10 | private int output_dimension;
11 | private int cell_blocks;
12 | private Neuron F;
13 | private Neuron G;
14 |
15 | private double [] context;
16 |
17 | private double [][] weightsF;
18 | private double [][] weightsG;
19 | private double [][] weightsOut;
20 |
21 | //partials (Need this for each output? Need to remind myself..)
22 | private double [][] dSdF;
23 | private double [][] dSdG;
24 |
25 | private NeuronType neuron_type_F = NeuronType.Sigmoid;
26 | private NeuronType neuron_type_G = NeuronType.Sigmoid;
27 |
28 | private double SCALE_OUTPUT_DELTA = 1.0;
29 |
30 | public static double learningRate = 0.07;//0.07
31 |
32 | public SimpleLSTM(Random r, int input_dimension, int output_dimension, int cell_blocks)
33 | {
34 | this.output_dimension = output_dimension;
35 | this.cell_blocks = cell_blocks;
36 |
37 | context = new double[cell_blocks];
38 |
39 | full_input_dimension = input_dimension + cell_blocks + 1; //+1 for bias
40 |
41 | F = Neuron.Factory(neuron_type_F);
42 | G = Neuron.Factory(neuron_type_G);
43 |
44 | weightsF = new double[cell_blocks][full_input_dimension];
45 | weightsG = new double[cell_blocks][full_input_dimension];
46 |
47 | dSdF = new double[cell_blocks][full_input_dimension];
48 | dSdG = new double[cell_blocks][full_input_dimension];
49 |
50 | for (int i = 0; i < full_input_dimension; i++) {
51 | for (int j = 0; j < cell_blocks; j++) {
52 | weightsF[j][i] = (r.nextDouble() * 2 - 1) * init_weight_range;
53 | weightsG[j][i] = (r.nextDouble() * 2 - 1) * init_weight_range;
54 | }
55 | }
56 |
57 | weightsOut = new double[output_dimension][cell_blocks + 1];
58 |
59 | for (int j = 0; j < cell_blocks + 1; j++) {
60 | for (int k = 0; k < output_dimension; k++)
61 | weightsOut[k][j] = (r.nextDouble() * 2 - 1) * init_weight_range;
62 | }
63 | }
64 |
65 | public void Reset()
66 | {
67 | for (int c = 0; c < context.length; c++)
68 | context[c] = 0.0;
69 | //reset accumulated partials
70 | for (int c = 0; c < cell_blocks; c++) {
71 | for (int i = 0; i < full_input_dimension; i++) {
72 | this.dSdG[c][i] = 0;
73 | this.dSdF[c][i] = 0;
74 | }
75 | }
76 | }
77 |
78 | public double[] Next(double[] input)
79 | {
80 | return Next(input, null);
81 | }
82 |
83 | public void Display()
84 | {
85 | System.out.println("==============================");
86 | System.out.println("DAGate: todo...");
87 | System.out.println("\n==============================");
88 | }
89 |
90 | public double[] Next(double[] input, double[] target_output) {
91 |
92 | //setup input vector
93 | double[] full_input = new double[full_input_dimension];
94 | int loc = 0;
95 | for (int i = 0; i < input.length; i++)
96 | full_input[loc++] = input[i];
97 | for (int c = 0; c < context.length; c++)
98 | full_input[loc++] = context[c];
99 | full_input[loc++] = 1.0; //bias
100 |
101 | //cell block arrays
102 | double[] sumF = new double[cell_blocks];
103 | double[] actF = new double[cell_blocks];
104 | double[] sumG = new double[cell_blocks];
105 | double[] actG = new double[cell_blocks];
106 | double[] actH = new double[cell_blocks];
107 |
108 | //inputs to cell blocks
109 | for (int i = 0; i < full_input_dimension; i++)
110 | {
111 | for (int j = 0; j < cell_blocks; j++)
112 | {
113 | sumF[j] += weightsF[j][i] * full_input[i];
114 | sumG[j] += weightsG[j][i] * full_input[i];
115 | }
116 | }
117 |
118 | for (int j = 0; j < cell_blocks; j++) {
119 | actF[j] = F.Activate(sumF[j]);
120 | actG[j] = G.Activate(sumG[j]);
121 | actH[j] = actF[j] * context[j] + (1 - actF[j]) * actG[j];
122 | }
123 |
124 | //prepare hidden layer plus bias
125 | double [] full_hidden = new double[cell_blocks + 1];
126 | loc = 0;
127 | for (int j = 0; j < cell_blocks; j++)
128 | full_hidden[loc++] = actH[j];
129 | full_hidden[loc++] = 1.0; //bias
130 |
131 | //calculate output
132 | double[] output = new double[output_dimension];
133 | for (int k = 0; k < output_dimension; k++)
134 | {
135 | for (int j = 0; j < cell_blocks + 1; j++)
136 | output[k] += weightsOut[k][j] * full_hidden[j];
137 | //output not squashed
138 | }
139 |
140 | //////////////////////////////////////////////////////////////
141 | //////////////////////////////////////////////////////////////
142 | //BACKPROP
143 | //////////////////////////////////////////////////////////////
144 | //////////////////////////////////////////////////////////////
145 |
146 | //scale partials
147 | for (int j = 0; j < cell_blocks; j++) {
148 |
149 | double f = actF[j];
150 | double df = F.Derivative(sumF[j]);
151 | double g = actG[j];
152 | double dg = G.Derivative(sumG[j]);
153 | double h_ = context[j]; //prev value of h
154 |
155 | for (int i = 0; i < full_input_dimension; i++) {
156 |
157 | double prevdSdF = dSdF[j][i];
158 | double prevdSdG = dSdG[j][i];
159 | double in = full_input[i];
160 |
161 | dSdG[j][i] = ((1 - f)*dg*in) + (f*prevdSdG);
162 | dSdF[j][i] = ((h_- g)*df*in) + (f*prevdSdF);
163 | }
164 | }
165 |
166 | if (target_output != null) {
167 |
168 | //output to hidden
169 | double [] deltaOutput = new double[output_dimension];
170 | double [] deltaH = new double[cell_blocks];
171 | for (int k = 0; k < output_dimension; k++) {
172 | deltaOutput[k] = (target_output[k] - output[k]) * SCALE_OUTPUT_DELTA;
173 | for (int j = 0; j < cell_blocks; j++) {
174 | deltaH[j] += deltaOutput[k] * weightsOut[k][j];
175 | weightsOut[k][j] += deltaOutput[k] * actH[j] * learningRate;
176 | }
177 | //bias
178 | weightsOut[k][cell_blocks] += deltaOutput[k] * 1.0 * learningRate;
179 | }
180 |
181 | //input to hidden
182 | for (int j = 0; j < cell_blocks; j++) {
183 | for (int i = 0; i < full_input_dimension; i++) {
184 | weightsF[j][i] += deltaH[j] * dSdF[j][i] * learningRate;
185 | weightsG[j][i] += deltaH[j] * dSdG[j][i] * learningRate;
186 | }
187 | }
188 | }
189 |
190 | //////////////////////////////////////////////////////////////
191 |
192 | //roll-over context to next time step
193 | for (int j = 0; j < cell_blocks; j++) {
194 | context[j] = actH[j];
195 | }
196 |
197 | //give results
198 | return output;
199 | }
200 | }
201 |
202 |
203 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/TanhNeuron.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public class TanhNeuron extends Neuron
4 | {
5 | @Override
6 | public double Activate(double x) {
7 | return Math.tanh(x);
8 | }
9 |
10 | @Override
11 | public double Derivative(double x) {
12 | double coshx = Math.cosh(x);
13 | double denom = (Math.cosh(2*x) + 1);
14 | return 4 * coshx * coshx / (denom * denom);
15 | }
16 |
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/src/com/evolvingstuff/util.java:
--------------------------------------------------------------------------------
1 | package com.evolvingstuff;
2 |
3 | public class util
4 | {
5 | public static int argmax(double[] vec) {
6 | int result = -1;
7 | double max = Double.NEGATIVE_INFINITY;
8 | for (int i = 0; i < vec.length; i++) {
9 | if (vec[i] > max)
10 | {
11 | max = vec[i];
12 | result = i;
13 | }
14 | }
15 | return result;
16 | }
17 |
18 | public static int argmin(double[] vec) {
19 | int result = -1;
20 | double min = Double.POSITIVE_INFINITY;
21 | for (int i = 0; i < vec.length; i++) {
22 | if (vec[i] < min)
23 | {
24 | min = vec[i];
25 | result = i;
26 | }
27 | }
28 | return result;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------