├── .classpath ├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── LICENSE ├── README └── src ├── Test.java └── com └── evolvingstuff ├── DistractedSequenceRecall.java ├── IAgentSupervised.java ├── IdentityNeuron.java ├── Neuron.java ├── NeuronType.java ├── SigmoidNeuron.java ├── SimpleLSTM.java ├── TanhNeuron.java └── util.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/* 2 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | SimpleLSTM 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Mon Jun 04 21:08:35 PDT 2012 2 | eclipse.preferences.version=1 3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.6 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.6 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | copyright (c) 2013 Thomas Lahore 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This is a recurrent neural network architecture inspired by Long Short Term Memory, but with a much simpler architecture. 2 | 3 | I will be adding more documentation to describe this architecture shortly. For now, a brief summary is that it has only a single gate (similar to the Forget Gate in LSTM) and an input squashing function. Rather than acting to reset the state of the cell, the gate modulates a weighted average of the current input with the state of the cell at the previous time step. So if the gate is fully active, the cell will ignore its current input and fully retain its state. If the gate is inactive then the cell will lose all traces of its previous state and shift to match the current input. 4 | 5 | So far on a few tests I've done, it appears to perform fairly well, although it tends to need more cell blocks than LSTM to do the same thing. 6 | -------------------------------------------------------------------------------- /src/Test.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | import com.evolvingstuff.DistractedSequenceRecall; 3 | import com.evolvingstuff.SimpleLSTM; 4 | 5 | public class Test { 6 | public static void main(String[] args) throws Exception { 7 | 8 | System.out.println("Test of SimpleLSTM\n"); 9 | 10 | Random r = new Random(1234); 11 | DistractedSequenceRecall task = new DistractedSequenceRecall(r); 12 | 13 | int cell_blocks = 15; 14 | SimpleLSTM slstm = new SimpleLSTM(r, task.GetObservationDimension(), task.GetActionDimension(), cell_blocks); 15 | 16 | for (int epoch = 0; epoch < 5000; epoch++) { 17 | double fit = task.EvaluateFitnessSupervised(slstm); 18 | if (epoch % 10 == 0) 19 | System.out.println("["+epoch+"] error = " + (1 - fit)); 20 | } 21 | System.out.println("done."); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/DistractedSequenceRecall.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Random; 6 | 7 | public class DistractedSequenceRecall { 8 | 9 | int tests = 1000; 10 | int observation_dimension = 10; 11 | int action_dimension = 4; 12 | boolean validation_mode = false; 13 | Random r; 14 | 15 | public DistractedSequenceRecall(Random r) { 16 | this.r = r; 17 | } 18 | 19 | class Interaction { 20 | double[] observation; 21 | double[] target_output; 22 | boolean do_reset; 23 | } 24 | 25 | private List GenerateInteractions(int tests) { 26 | List result = new ArrayList(); 27 | for (int test = 0; test < tests; test++) { 28 | int[] seq = new int[22]; 29 | int target1 = r.nextInt(4); 30 | int target2 = r.nextInt(4); 31 | for (int t = 0; t < 22; t++) { 32 | seq[t] = r.nextInt(4)+4;//+4 so as not to overlap with target symbols 33 | } 34 | int loc1 = r.nextInt(22); 35 | int loc2 = r.nextInt(22); 36 | while (loc1 == loc2) 37 | loc2 = r.nextInt(22); 38 | if (loc1 > loc2) { 39 | int temp = loc1; 40 | loc1 = loc2; 41 | loc2 = temp; 42 | } 43 | seq[loc1] = target1; 44 | seq[loc2] = target2; 45 | 46 | for (int t = 0; t < seq.length; t++) { 47 | double[] input = new double[observation_dimension]; 48 | input[seq[t]] = 1.0; 49 | 50 | Interaction inter = new Interaction(); 51 | if (t == 0) 52 | inter.do_reset = true; 53 | inter.observation = input; 54 | result.add(inter); 55 | } 56 | //final 2 steps 57 | double[] input1 = new double[observation_dimension]; 58 | input1[8] = 1.0; 59 | double[] target_output1 = new double[action_dimension]; 60 | target_output1[target1] = 1.0; 61 | Interaction inter1 = new Interaction(); 62 | inter1.observation = input1; 63 | inter1.target_output = target_output1; 64 | result.add(inter1); 65 | 66 | double[] input2 = new double[observation_dimension]; 67 | input2[9] = 1.0; 68 | double[] target_output2 = new double[action_dimension]; 69 | target_output2[target2] = 1.0; 70 | Interaction inter2 = new Interaction(); 71 | inter2.observation = input2; 72 | inter2.target_output = target_output2; 73 | result.add(inter2); 74 | } 75 | return result; 76 | } 77 | 78 | public double EvaluateFitnessSupervised(IAgentSupervised agent) throws Exception { 79 | 80 | List interactions = this.GenerateInteractions(tests); 81 | 82 | double fit = 0; 83 | double max_fit = 0; 84 | 85 | for (Interaction inter : interactions) { 86 | 87 | if (inter.do_reset) 88 | agent.Reset(); 89 | 90 | if (inter.target_output == null) 91 | agent.Next(inter.observation); 92 | else { 93 | double[] actual_output = null; 94 | 95 | if (validation_mode == true) 96 | actual_output = agent.Next(inter.observation); 97 | else 98 | actual_output = agent.Next(inter.observation, inter.target_output); 99 | 100 | if (util.argmax(actual_output) == util.argmax(inter.target_output)) 101 | fit++; 102 | 103 | max_fit++; 104 | } 105 | } 106 | return fit/max_fit; 107 | } 108 | 109 | 110 | public int GetActionDimension() { 111 | return action_dimension; 112 | } 113 | 114 | public int GetObservationDimension() { 115 | return observation_dimension; 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/IAgentSupervised.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public interface IAgentSupervised 4 | { 5 | void Reset(); 6 | double[] Next(double[] input, double[] target_output) throws Exception; 7 | double[] Next(double[] input) throws Exception; 8 | } 9 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/IdentityNeuron.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public class IdentityNeuron extends Neuron 4 | { 5 | @Override 6 | public double Activate(double x) 7 | { 8 | return x; 9 | } 10 | 11 | @Override 12 | public double Derivative(double x) { 13 | // TODO Auto-generated method stub 14 | return 1; 15 | } 16 | } 17 | 18 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/Neuron.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public abstract class Neuron 4 | { 5 | public static Neuron Factory(NeuronType neuron_type) 6 | { 7 | if (neuron_type == NeuronType.Sigmoid) 8 | return new SigmoidNeuron(); 9 | else if (neuron_type == NeuronType.Identity) 10 | return new IdentityNeuron(); 11 | else if (neuron_type == NeuronType.Tanh) 12 | return new TanhNeuron(); 13 | else 14 | System.out.println("ERROR: unknown neuron type"); 15 | return null; 16 | } 17 | 18 | abstract public double Activate(double x); 19 | abstract public double Derivative(double x); 20 | } 21 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/NeuronType.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public enum NeuronType 4 | { 5 | Sigmoid, 6 | Identity, 7 | Tanh 8 | } 9 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/SigmoidNeuron.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public class SigmoidNeuron extends Neuron 4 | { 5 | @Override 6 | public double Activate(double x) { 7 | return 1 / (1 + Math.exp(-x)); 8 | } 9 | 10 | @Override 11 | public double Derivative(double x) { 12 | double act = Activate(x); 13 | return act * (1 - act); 14 | } 15 | 16 | 17 | } 18 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/SimpleLSTM.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | import java.util.*; 4 | 5 | public class SimpleLSTM implements IAgentSupervised 6 | { 7 | 8 | private double init_weight_range = 0.1; 9 | private int full_input_dimension; 10 | private int output_dimension; 11 | private int cell_blocks; 12 | private Neuron F; 13 | private Neuron G; 14 | 15 | private double [] context; 16 | 17 | private double [][] weightsF; 18 | private double [][] weightsG; 19 | private double [][] weightsOut; 20 | 21 | //partials (Need this for each output? Need to remind myself..) 22 | private double [][] dSdF; 23 | private double [][] dSdG; 24 | 25 | private NeuronType neuron_type_F = NeuronType.Sigmoid; 26 | private NeuronType neuron_type_G = NeuronType.Sigmoid; 27 | 28 | private double SCALE_OUTPUT_DELTA = 1.0; 29 | 30 | public static double learningRate = 0.07;//0.07 31 | 32 | public SimpleLSTM(Random r, int input_dimension, int output_dimension, int cell_blocks) 33 | { 34 | this.output_dimension = output_dimension; 35 | this.cell_blocks = cell_blocks; 36 | 37 | context = new double[cell_blocks]; 38 | 39 | full_input_dimension = input_dimension + cell_blocks + 1; //+1 for bias 40 | 41 | F = Neuron.Factory(neuron_type_F); 42 | G = Neuron.Factory(neuron_type_G); 43 | 44 | weightsF = new double[cell_blocks][full_input_dimension]; 45 | weightsG = new double[cell_blocks][full_input_dimension]; 46 | 47 | dSdF = new double[cell_blocks][full_input_dimension]; 48 | dSdG = new double[cell_blocks][full_input_dimension]; 49 | 50 | for (int i = 0; i < full_input_dimension; i++) { 51 | for (int j = 0; j < cell_blocks; j++) { 52 | weightsF[j][i] = (r.nextDouble() * 2 - 1) * init_weight_range; 53 | weightsG[j][i] = (r.nextDouble() * 2 - 1) * init_weight_range; 54 | } 55 | } 56 | 57 | weightsOut = new double[output_dimension][cell_blocks + 1]; 58 | 59 | for (int j = 0; j < cell_blocks + 1; j++) { 60 | for (int k = 0; k < output_dimension; k++) 61 | weightsOut[k][j] = (r.nextDouble() * 2 - 1) * init_weight_range; 62 | } 63 | } 64 | 65 | public void Reset() 66 | { 67 | for (int c = 0; c < context.length; c++) 68 | context[c] = 0.0; 69 | //reset accumulated partials 70 | for (int c = 0; c < cell_blocks; c++) { 71 | for (int i = 0; i < full_input_dimension; i++) { 72 | this.dSdG[c][i] = 0; 73 | this.dSdF[c][i] = 0; 74 | } 75 | } 76 | } 77 | 78 | public double[] Next(double[] input) 79 | { 80 | return Next(input, null); 81 | } 82 | 83 | public void Display() 84 | { 85 | System.out.println("=============================="); 86 | System.out.println("DAGate: todo..."); 87 | System.out.println("\n=============================="); 88 | } 89 | 90 | public double[] Next(double[] input, double[] target_output) { 91 | 92 | //setup input vector 93 | double[] full_input = new double[full_input_dimension]; 94 | int loc = 0; 95 | for (int i = 0; i < input.length; i++) 96 | full_input[loc++] = input[i]; 97 | for (int c = 0; c < context.length; c++) 98 | full_input[loc++] = context[c]; 99 | full_input[loc++] = 1.0; //bias 100 | 101 | //cell block arrays 102 | double[] sumF = new double[cell_blocks]; 103 | double[] actF = new double[cell_blocks]; 104 | double[] sumG = new double[cell_blocks]; 105 | double[] actG = new double[cell_blocks]; 106 | double[] actH = new double[cell_blocks]; 107 | 108 | //inputs to cell blocks 109 | for (int i = 0; i < full_input_dimension; i++) 110 | { 111 | for (int j = 0; j < cell_blocks; j++) 112 | { 113 | sumF[j] += weightsF[j][i] * full_input[i]; 114 | sumG[j] += weightsG[j][i] * full_input[i]; 115 | } 116 | } 117 | 118 | for (int j = 0; j < cell_blocks; j++) { 119 | actF[j] = F.Activate(sumF[j]); 120 | actG[j] = G.Activate(sumG[j]); 121 | actH[j] = actF[j] * context[j] + (1 - actF[j]) * actG[j]; 122 | } 123 | 124 | //prepare hidden layer plus bias 125 | double [] full_hidden = new double[cell_blocks + 1]; 126 | loc = 0; 127 | for (int j = 0; j < cell_blocks; j++) 128 | full_hidden[loc++] = actH[j]; 129 | full_hidden[loc++] = 1.0; //bias 130 | 131 | //calculate output 132 | double[] output = new double[output_dimension]; 133 | for (int k = 0; k < output_dimension; k++) 134 | { 135 | for (int j = 0; j < cell_blocks + 1; j++) 136 | output[k] += weightsOut[k][j] * full_hidden[j]; 137 | //output not squashed 138 | } 139 | 140 | ////////////////////////////////////////////////////////////// 141 | ////////////////////////////////////////////////////////////// 142 | //BACKPROP 143 | ////////////////////////////////////////////////////////////// 144 | ////////////////////////////////////////////////////////////// 145 | 146 | //scale partials 147 | for (int j = 0; j < cell_blocks; j++) { 148 | 149 | double f = actF[j]; 150 | double df = F.Derivative(sumF[j]); 151 | double g = actG[j]; 152 | double dg = G.Derivative(sumG[j]); 153 | double h_ = context[j]; //prev value of h 154 | 155 | for (int i = 0; i < full_input_dimension; i++) { 156 | 157 | double prevdSdF = dSdF[j][i]; 158 | double prevdSdG = dSdG[j][i]; 159 | double in = full_input[i]; 160 | 161 | dSdG[j][i] = ((1 - f)*dg*in) + (f*prevdSdG); 162 | dSdF[j][i] = ((h_- g)*df*in) + (f*prevdSdF); 163 | } 164 | } 165 | 166 | if (target_output != null) { 167 | 168 | //output to hidden 169 | double [] deltaOutput = new double[output_dimension]; 170 | double [] deltaH = new double[cell_blocks]; 171 | for (int k = 0; k < output_dimension; k++) { 172 | deltaOutput[k] = (target_output[k] - output[k]) * SCALE_OUTPUT_DELTA; 173 | for (int j = 0; j < cell_blocks; j++) { 174 | deltaH[j] += deltaOutput[k] * weightsOut[k][j]; 175 | weightsOut[k][j] += deltaOutput[k] * actH[j] * learningRate; 176 | } 177 | //bias 178 | weightsOut[k][cell_blocks] += deltaOutput[k] * 1.0 * learningRate; 179 | } 180 | 181 | //input to hidden 182 | for (int j = 0; j < cell_blocks; j++) { 183 | for (int i = 0; i < full_input_dimension; i++) { 184 | weightsF[j][i] += deltaH[j] * dSdF[j][i] * learningRate; 185 | weightsG[j][i] += deltaH[j] * dSdG[j][i] * learningRate; 186 | } 187 | } 188 | } 189 | 190 | ////////////////////////////////////////////////////////////// 191 | 192 | //roll-over context to next time step 193 | for (int j = 0; j < cell_blocks; j++) { 194 | context[j] = actH[j]; 195 | } 196 | 197 | //give results 198 | return output; 199 | } 200 | } 201 | 202 | 203 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/TanhNeuron.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public class TanhNeuron extends Neuron 4 | { 5 | @Override 6 | public double Activate(double x) { 7 | return Math.tanh(x); 8 | } 9 | 10 | @Override 11 | public double Derivative(double x) { 12 | double coshx = Math.cosh(x); 13 | double denom = (Math.cosh(2*x) + 1); 14 | return 4 * coshx * coshx / (denom * denom); 15 | } 16 | 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/com/evolvingstuff/util.java: -------------------------------------------------------------------------------- 1 | package com.evolvingstuff; 2 | 3 | public class util 4 | { 5 | public static int argmax(double[] vec) { 6 | int result = -1; 7 | double max = Double.NEGATIVE_INFINITY; 8 | for (int i = 0; i < vec.length; i++) { 9 | if (vec[i] > max) 10 | { 11 | max = vec[i]; 12 | result = i; 13 | } 14 | } 15 | return result; 16 | } 17 | 18 | public static int argmin(double[] vec) { 19 | int result = -1; 20 | double min = Double.POSITIVE_INFINITY; 21 | for (int i = 0; i < vec.length; i++) { 22 | if (vec[i] < min) 23 | { 24 | min = vec[i]; 25 | result = i; 26 | } 27 | } 28 | return result; 29 | } 30 | } 31 | --------------------------------------------------------------------------------