├── DeepQLearning ├── App.config ├── ConvnetSharp │ ├── Gradient.cs │ ├── Layers │ │ ├── ConvLayer.cs │ │ ├── DropoutLayer.cs │ │ ├── FullyConnectedLayer.cs │ │ ├── InputLayer.cs │ │ ├── Layer.cs │ │ ├── LayerDefinition.cs │ │ ├── ReLULayer.cs │ │ ├── RegressionLayer.cs │ │ └── SVMLayer.cs │ ├── Net.cs │ ├── Trainer │ │ ├── Options.cs │ │ ├── Trainer.cs │ │ └── TrainingOptions.cs │ ├── Util.cs │ └── Volume.cs ├── DRLAgent │ ├── DeepQLearn.cs │ └── QAgent.cs ├── DeepQLearning.csproj ├── DeepQLearning.sln ├── DeepQLearning.v11.suo ├── Form1.Designer.cs ├── Form1.cs ├── Form1.resx ├── Program.cs ├── Properties │ ├── AssemblyInfo.cs │ ├── Resources.Designer.cs │ ├── Resources.resx │ ├── Settings.Designer.cs │ └── Settings.settings ├── bin │ └── Debug │ │ └── deepQnet.dat └── obj │ └── Debug │ ├── DeepQLearning.Form1.resources │ ├── DeepQLearning.Properties.Resources.resources │ ├── DeepQLearning.csproj.FileListAbsolute.txt │ ├── DeepQLearning.csproj.GenerateResource.Cache │ ├── DeepQLearning.csprojResolveAssemblyReference.cache │ ├── DeepQLearning.exe │ ├── DeepQLearning.pdb │ ├── DesignTimeResolveAssemblyReferences.cache │ └── DesignTimeResolveAssemblyReferencesInput.cache ├── LICENSE └── README.md /DeepQLearning/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Gradient.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class Gradient 11 | { 12 | public double[] w; 13 | public double[] dw; 14 | public double l1_decay_mul = double.MinValue; 15 | public double l2_decay_mul = double.MinValue; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/ConvLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | // - ConvLayer does convolutions (so weight sharing spatially) 12 | [Serializable] 13 | public class ConvLayer : LayerBase 14 | { 15 | Util util = new Util(); 16 | 17 | Volume biases; 18 | int stride, pad; 19 | 20 | public ConvLayer(LayerDefinition def) : base() 21 | { 22 | // required 23 | this.out_depth = def.n_filters; 24 | this.sx = def.sx; // filter size. Should be odd if possible, it's cleaner. 25 | this.in_depth = def.in_depth; 26 | this.in_sx = def.in_sx; 27 | this.in_sy = def.in_sy; 28 | 29 | // optional 30 | this.sy = def.sy != int.MinValue ? def.sy : this.sx; 31 | this.stride = def.stride != int.MinValue ? def.stride : 1; // stride at which we apply filters to input volume 32 | this.pad = def.pad != int.MinValue ? def.pad : 0; // amount of 0 padding to add around borders of input volume 33 | this.l1_decay_mul = def.l1_decay_mul != double.MinValue ? def.l1_decay_mul : 0.0; 34 | this.l2_decay_mul = def.l2_decay_mul != double.MinValue ? def.l2_decay_mul : 1.0; 35 | 36 | // computed 37 | // note we are doing floor, so if the strided convolution of the filter doesnt fit into the input 38 | // volume exactly, the output volume will be trimmed and not contain the (incomplete) computed 39 | // final application. 40 | this.out_sx = (int)Math.Floor((double)(def.in_sx + this.pad * 2 - this.sx) / this.stride + 1); 41 | this.out_sy = (int)Math.Floor((double)(def.in_sy + this.pad * 2 - this.sy) / this.stride + 1); 42 | this.type = "conv"; 43 | 44 | // initializations 45 | var bias = def.bias_pref != double.MinValue ? def.bias_pref : 0.0; 46 | this.filters = new List(); 47 | for (var i = 0; i < this.out_depth; i++) { this.filters.Add(new Volume(this.sx, this.sy, this.in_depth)); } 48 | this.biases = new Volume(1, 1, this.out_depth, bias); 49 | } 50 | 51 | public override Volume forward(Volume V, bool is_training) 52 | { 53 | // optimized code by @mdda that achieves 2x speedup over previous version 54 | 55 | this.in_act = V; 56 | var A = new Volume(this.out_sx | 0, this.out_sy | 0, this.out_depth | 0, 0.0); 57 | 58 | var V_sx = V.sx | 0; 59 | var V_sy = V.sy | 0; 60 | var xy_stride = this.stride | 0; 61 | 62 | for (var d = 0; d < this.out_depth; d++) 63 | { 64 | var f = this.filters[d]; 65 | var x = -this.pad | 0; 66 | var y = -this.pad | 0; 67 | for (var ay = 0; ay < this.out_sy; y += xy_stride, ay++) 68 | { // xy_stride 69 | x = -this.pad | 0; 70 | for (var ax = 0; ax < this.out_sx; x += xy_stride, ax++) 71 | { // xy_stride 72 | 73 | // convolve centered at this particular location 74 | var a = 0.0; 75 | for (var fy = 0; fy < f.sy; fy++) 76 | { 77 | var oy = y + fy; // coordinates in the original input array coordinates 78 | for (var fx = 0; fx < f.sx; fx++) 79 | { 80 | var ox = x + fx; 81 | if (oy >= 0 && oy < V_sy && ox >= 0 && ox < V_sx) 82 | { 83 | for (var fd = 0; fd < f.depth; fd++) 84 | { 85 | // avoid function call overhead (x2) for efficiency, compromise modularity :( 86 | a += f.w[((f.sx * fy) + fx) * f.depth + fd] * V.w[((V_sx * oy) + ox) * V.depth + fd]; 87 | } 88 | } 89 | } 90 | } 91 | a += this.biases.w[d]; 92 | A.set(ax, ay, d, a); 93 | } 94 | } 95 | } 96 | this.out_act = A; 97 | return this.out_act; 98 | } 99 | 100 | public override double backward(object _y) 101 | { 102 | var V = this.in_act; 103 | V.dw = util.zeros(V.w.Length); // zero out gradient wrt bottom data, we're about to fill it 104 | 105 | var V_sx = V.sx | 0; 106 | var V_sy = V.sy | 0; 107 | var xy_stride = this.stride | 0; 108 | 109 | for (var d = 0; d < this.out_depth; d++) 110 | { 111 | var f = this.filters[d]; 112 | var x = -this.pad | 0; 113 | var y = -this.pad | 0; 114 | for (var ay = 0; ay < this.out_sy; y += xy_stride, ay++) 115 | { // xy_stride 116 | x = -this.pad | 0; 117 | for (var ax = 0; ax < this.out_sx; x += xy_stride, ax++) 118 | { // xy_stride 119 | 120 | // convolve centered at this particular location 121 | var chain_grad = this.out_act.get_grad(ax, ay, d); // gradient from above, from chain rule 122 | for (var fy = 0; fy < f.sy; fy++) 123 | { 124 | var oy = y + fy; // coordinates in the original input array coordinates 125 | for (var fx = 0; fx < f.sx; fx++) 126 | { 127 | var ox = x + fx; 128 | if (oy >= 0 && oy < V_sy && ox >= 0 && ox < V_sx) 129 | { 130 | for (var fd = 0; fd < f.depth; fd++) 131 | { 132 | // avoid function call overhead (x2) for efficiency, compromise modularity :( 133 | var ix1 = ((V_sx * oy) + ox) * V.depth + fd; 134 | var ix2 = ((f.sx * fy) + fx) * f.depth + fd; 135 | f.dw[ix2] += V.w[ix1] * chain_grad; 136 | V.dw[ix1] += f.w[ix2] * chain_grad; 137 | } 138 | } 139 | } 140 | } 141 | this.biases.dw[d] += chain_grad; 142 | } 143 | } 144 | } 145 | 146 | return 0.0; 147 | } 148 | public override Gradient[] getParamsAndGrads() 149 | { 150 | var response = new List(); 151 | for (var i = 0; i < this.out_depth; i++) 152 | { 153 | response.Add(new Gradient { w = this.filters[i].w, dw = this.filters[i].dw, l2_decay_mul = this.l2_decay_mul, l1_decay_mul = this.l1_decay_mul }); 154 | } 155 | response.Add(new Gradient { w = this.biases.w, dw = this.biases.dw, l1_decay_mul = 0.0, l2_decay_mul = 0.0 }); 156 | return response.ToArray(); 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/DropoutLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | // An inefficient dropout layer 12 | // Note this is not most efficient implementation since the layer before 13 | // computed all these activations and now we're just going to drop them :( 14 | // same goes for backward pass. Also, if we wanted to be efficient at test time 15 | // we could equivalently be clever and upscale during train and copy pointers during test 16 | // todo: make more efficient. 17 | [Serializable] 18 | public class DropoutLayer : LayerBase 19 | { 20 | bool[] dropped; 21 | 22 | Util util = new Util(); 23 | 24 | public DropoutLayer(LayerDefinition def) : base() 25 | { 26 | // computed 27 | this.out_sx = def.in_sx; 28 | this.out_sy = def.in_sy; 29 | this.out_depth = def.in_depth; 30 | this.type = "dropout"; 31 | this.drop_prob = def.drop_prob != double.NaN ? def.drop_prob : 0.5; 32 | this.dropped = new bool[this.out_sx * this.out_sy * this.out_depth]; 33 | } 34 | 35 | public override Volume forward(Volume V, bool is_training) 36 | { 37 | this.in_act = V; 38 | 39 | var V2 = V.clone(); 40 | var N = V.w.Length; 41 | if (is_training) 42 | { 43 | // do dropout 44 | for (var i = 0; i < N; i++) 45 | { 46 | if (util.random.NextDouble() < this.drop_prob) 47 | { 48 | // drop! 49 | V2.w[i] = 0; 50 | this.dropped[i] = true; 51 | } 52 | 53 | else 54 | { 55 | this.dropped[i] = false; 56 | } 57 | } 58 | } 59 | else 60 | { 61 | // scale the activations during prediction 62 | for (var i = 0; i < N; i++) { V2.w[i] *= this.drop_prob; } 63 | } 64 | this.out_act = V2; 65 | return this.out_act; // dummy identity function for now 66 | } 67 | 68 | public override double backward(object y) 69 | { 70 | var V = this.in_act; // we need to set dw of this 71 | var chain_grad = this.out_act; 72 | var N = V.w.Length; 73 | V.dw = util.zeros(N); // zero out gradient wrt data 74 | for (var i = 0; i < N; i++) 75 | { 76 | if (!(this.dropped[i])) 77 | { 78 | V.dw[i] = chain_grad.dw[i]; // copy over the gradient 79 | } 80 | } 81 | 82 | return 0.0; 83 | } 84 | public override Gradient[] getParamsAndGrads() 85 | { 86 | return new List().ToArray(); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/FullyConnectedLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class FullyConnectedLayer : LayerBase 13 | { 14 | Volume biases; 15 | 16 | Util util = new Util(); 17 | 18 | public FullyConnectedLayer(LayerDefinition def) : base() 19 | { 20 | // required 21 | this.out_depth = def.num_neurons; 22 | 23 | // optional 24 | this.l1_decay_mul = def.l1_decay_mul != double.MinValue ? def.l1_decay_mul : 0.0; 25 | this.l2_decay_mul = def.l2_decay_mul != double.MinValue ? def.l2_decay_mul : 1.0; 26 | 27 | // computed 28 | this.num_inputs = def.in_sx * def.in_sy * def.in_depth; 29 | this.out_sx = 1; 30 | this.out_sy = 1; 31 | this.type = "fc"; 32 | 33 | // initializations 34 | var bias = def.bias_pref != double.MinValue ? def.bias_pref : 0.0; 35 | this.filters = new List(); 36 | for (var i = 0; i < this.out_depth; i++) { this.filters.Add(new Volume(1, 1, this.num_inputs)); } 37 | this.biases = new Volume(1, 1, this.out_depth, bias); 38 | } 39 | 40 | public override Volume forward(Volume V, bool is_training) 41 | { 42 | this.in_act = V; 43 | var A = new Volume(1, 1, this.out_depth, 0.0); 44 | var Vw = V.w; 45 | for (var i = 0; i < this.out_depth; i++) 46 | { 47 | var a = 0.0; 48 | var wi = this.filters[i].w; 49 | for (var d = 0; d < this.num_inputs; d++) 50 | { 51 | a += Vw[d] * wi[d]; // for efficiency use Vols directly for now 52 | } 53 | a += this.biases.w[i]; 54 | A.w[i] = a; 55 | } 56 | this.out_act = A; 57 | return this.out_act; 58 | } 59 | 60 | public override double backward(object y) 61 | { 62 | var V = this.in_act; 63 | V.dw = util.zeros(V.w.Length); // zero out the gradient in input Vol 64 | 65 | // compute gradient wrt weights and data 66 | for (var i = 0; i < this.out_depth; i++) 67 | { 68 | var tfi = this.filters[i]; 69 | var chain_grad = this.out_act.dw[i]; 70 | for (var d = 0; d < this.num_inputs; d++) 71 | { 72 | V.dw[d] += tfi.w[d] * chain_grad; // grad wrt input data 73 | tfi.dw[d] += V.w[d] * chain_grad; // grad wrt params 74 | } 75 | this.biases.dw[i] += chain_grad; 76 | } 77 | 78 | return 0.0; 79 | } 80 | public override Gradient[] getParamsAndGrads() 81 | { 82 | var response = new List(); 83 | for (var i = 0; i < this.out_depth; i++) 84 | { 85 | response.Add(new Gradient { w=this.filters[i].w, dw=this.filters[i].dw, l1_decay_mul=this.l1_decay_mul, l2_decay_mul=this.l2_decay_mul}); 86 | } 87 | 88 | response.Add(new Gradient { w=this.biases.w, dw=this.biases.dw, l1_decay_mul=0.0, l2_decay_mul=0.0}); 89 | return response.ToArray(); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/InputLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class InputLayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public InputLayer(LayerDefinition def) : base() 17 | { 18 | // required: depth 19 | this.out_depth = def.out_depth; 20 | 21 | // optional: default these dimensions to 1 22 | this.out_sx = def.out_sx; 23 | this.out_sy = def.out_sy; 24 | 25 | // computed 26 | this.type = "input"; 27 | } 28 | 29 | public override Volume forward(Volume V, bool is_training) 30 | { 31 | this.in_act = V; 32 | this.out_act = V; 33 | return this.out_act; // simply identity function for now 34 | } 35 | 36 | public override double backward(object y) { return 0.0; } 37 | public override Gradient[] getParamsAndGrads() 38 | { 39 | return new List().ToArray(); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/Layer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json.Linq; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace ConvnetSharp 9 | { 10 | [Serializable] 11 | public abstract class LayerBase 12 | { 13 | public string type; 14 | public string activation; 15 | public int group_size; 16 | public int num_neurons; 17 | public int num_classes; 18 | public int num_inputs; 19 | public double bias_pref; 20 | public double drop_prob; 21 | 22 | public int out_depth; 23 | public int out_sx; 24 | public int out_sy; 25 | public int in_depth; 26 | public int in_sx; 27 | public int in_sy; 28 | public int sx; 29 | public int sy; 30 | 31 | public Volume in_act; 32 | public Volume out_act; 33 | 34 | public double l1_decay_mul; 35 | public double l2_decay_mul; 36 | 37 | public List filters; 38 | 39 | public abstract Gradient[] getParamsAndGrads(); 40 | public abstract Volume forward(Volume V, bool is_training); 41 | public abstract double backward(object y); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/LayerDefinition.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json.Linq; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace ConvnetSharp 9 | { 10 | [Serializable] 11 | public class LayerDefinition 12 | { 13 | public string type; 14 | public string activation; 15 | public int group_size = int.MinValue; 16 | public int num_neurons = int.MinValue; 17 | public int num_classes = int.MinValue; 18 | public int num_inputs = int.MinValue; 19 | public double bias_pref = double.MinValue; 20 | public double drop_prob = double.MinValue; 21 | 22 | public int out_depth = int.MinValue; 23 | public int out_sx = int.MinValue; 24 | public int out_sy = int.MinValue; 25 | public int in_depth = int.MinValue; 26 | public int in_sx = int.MinValue; 27 | public int in_sy = int.MinValue; 28 | public int sx = int.MinValue; 29 | public int sy = int.MinValue; 30 | 31 | public double l1_decay_mul = double.MinValue; 32 | public double l2_decay_mul = double.MinValue; 33 | 34 | public List filters; 35 | public int n_filters = int.MinValue; 36 | public int stride = int.MinValue; 37 | public int pad = int.MinValue; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/ReLULayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class ReLULayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public ReLULayer(LayerDefinition def) : base() 17 | { 18 | // computed 19 | this.out_sx = def.in_sx; 20 | this.out_sy = def.in_sy; 21 | this.out_depth = def.in_depth; 22 | this.type = "relu"; 23 | } 24 | 25 | public override Volume forward(Volume V, bool is_training) 26 | { 27 | this.in_act = V; 28 | var V2 = V.clone(); 29 | var N = V.w.Length; 30 | var V2w = V2.w; 31 | for (var i = 0; i < N; i++) 32 | { 33 | if (V2w[i] < 0) V2w[i] = 0; // threshold at 0 34 | } 35 | this.out_act = V2; 36 | return this.out_act; 37 | } 38 | 39 | public override double backward(object y) 40 | { 41 | var V = this.in_act; // we need to set dw of this 42 | var V2 = this.out_act; 43 | var N = V.w.Length; 44 | V.dw = util.zeros(N); // zero out gradient wrt data 45 | for (var i = 0; i < N; i++) 46 | { 47 | if (V2.w[i] <= 0) V.dw[i] = 0; // threshold 48 | else V.dw[i] = V2.dw[i]; 49 | } 50 | 51 | return 0.0; 52 | } 53 | public override Gradient[] getParamsAndGrads() 54 | { 55 | return new List().ToArray(); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/RegressionLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class RegressionLayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public RegressionLayer(LayerDefinition def) : base() 17 | { 18 | // computed 19 | this.num_inputs = def.in_sx * def.in_sy * def.in_depth; 20 | this.out_depth = this.num_inputs; 21 | this.out_sx = 1; 22 | this.out_sy = 1; 23 | this.type = "regression"; 24 | } 25 | 26 | public override Volume forward(Volume V, bool is_training) 27 | { 28 | this.in_act = V; 29 | this.out_act = V; 30 | return this.out_act; // simply identity function for now 31 | } 32 | 33 | // y is a list here of size num_inputs 34 | // or it can be a number if only one value is regressed 35 | // or it can be a struct {dim: i, val: x} where we only want to 36 | // regress on dimension i and asking it to have value x 37 | public override double backward(object y) 38 | { 39 | // compute and accumulate gradient wrt weights and bias of this layer 40 | var x = this.in_act; 41 | x.dw = util.zeros(x.w.Length); // zero out the gradient of input Vol 42 | var loss = 0.0; 43 | if (y.GetType().Equals(typeof(Array))) { 44 | 45 | var Y = (double[])y; 46 | 47 | for (var i = 0; i < this.out_depth; i++) 48 | { 49 | var dy = x.w[i] - Y[i]; 50 | x.dw[i] = dy; 51 | loss += 0.5 * dy * dy; 52 | } 53 | } 54 | else if (y.GetType().Equals(typeof(Double))) 55 | { 56 | // lets hope that only one number is being regressed 57 | var dy = x.w[0] - (double)y; 58 | x.dw[0] = dy; 59 | loss += 0.5 * dy * dy; 60 | } 61 | else 62 | { 63 | // assume it is a struct with entries .dim and .val 64 | // and we pass gradient only along dimension dim to be equal to val 65 | var i = ((Entry)y).dim; 66 | var yi = ((Entry)y).val; 67 | var dy = x.w[i] - yi; 68 | x.dw[i] = dy; 69 | loss += 0.5 * dy * dy; 70 | } 71 | 72 | return loss; 73 | } 74 | 75 | public override Gradient[] getParamsAndGrads() 76 | { 77 | return new List().ToArray(); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/SVMLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class SVMLayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public SVMLayer(LayerDefinition def) : base() 17 | { 18 | // computed 19 | this.num_inputs = def.in_sx * def.in_sy * def.in_depth; 20 | this.out_depth = this.num_inputs; 21 | this.out_sx = 1; 22 | this.out_sy = 1; 23 | this.type = "svm"; 24 | } 25 | 26 | public override Volume forward(Volume V, bool is_training) 27 | { 28 | this.in_act = V; 29 | this.out_act = V; // nothing to do, output raw scores 30 | return V; 31 | } 32 | 33 | public override double backward(object y) 34 | { 35 | var index = (int)y; 36 | 37 | // compute and accumulate gradient wrt weights and bias of this layer 38 | var x = this.in_act; 39 | x.dw = util.zeros(x.w.Length); // zero out the gradient of input Vol 40 | 41 | // we're using structured loss here, which means that the score 42 | // of the ground truth should be higher than the score of any other 43 | // class, by a margin 44 | var yscore = x.w[index]; // score of ground truth 45 | var margin = 1.0; 46 | var loss = 0.0; 47 | for (var i = 0; i < this.out_depth; i++) 48 | { 49 | if (index == i) { continue; } 50 | var ydiff = -yscore + x.w[i] + margin; 51 | if (ydiff > 0) 52 | { 53 | // violating dimension, apply loss 54 | x.dw[i] += 1; 55 | x.dw[index] -= 1; 56 | loss += ydiff; 57 | } 58 | } 59 | 60 | return loss; 61 | } 62 | 63 | public override Gradient[] getParamsAndGrads() 64 | { 65 | return new List().ToArray(); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Net.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public struct Entry 13 | { 14 | public int dim; 15 | public double val; 16 | }; 17 | 18 | [Serializable] 19 | public class Net 20 | { 21 | public List layers = new List(); 22 | Util util = new Util(); 23 | 24 | // constructor 25 | public Net() 26 | { 27 | 28 | } 29 | 30 | // takes a list of layer definitions and creates the network layer objects 31 | public void makeLayers(List defs) 32 | { 33 | // few checks 34 | util.assert(defs.Count >= 2, "Error! At least one input layer and one loss layer are required."); 35 | util.assert(defs[0].type == "input", "Error! First layer must be the input layer, to declare size of inputs"); 36 | 37 | var new_defs = new List(); 38 | for (var i = 0; i < defs.Count; i++) 39 | { 40 | var def = defs[i]; 41 | 42 | if (def.type == "softmax" || def.type == "svm") 43 | { 44 | // add an fc layer here, there is no reason the user should 45 | // have to worry about this and we almost always want to 46 | new_defs.Add(new LayerDefinition { type = "fc", num_neurons = def.num_classes }); 47 | } 48 | 49 | if (def.type == "regression") 50 | { 51 | // add an fc layer here, there is no reason the user should 52 | // have to worry about this and we almost always want to 53 | new_defs.Add(new LayerDefinition { type = "fc", num_neurons = def.num_neurons }); 54 | } 55 | 56 | if ((def.type == "fc" || def.type == "conv") && def.bias_pref == int.MinValue) 57 | { 58 | def.bias_pref = 0.0; 59 | if (!string.IsNullOrEmpty(def.activation) && def.activation == "relu") 60 | { 61 | // relus like a bit of positive bias to get gradients early 62 | // otherwise it's technically possible that a relu unit will never turn on (by chance) 63 | // and will never get any gradient and never contribute any computation. Dead relu. 64 | def.bias_pref = 0.1; 65 | } 66 | } 67 | 68 | new_defs.Add(def); 69 | 70 | if (!string.IsNullOrEmpty(def.activation)) 71 | { 72 | if (def.activation == "relu") { new_defs.Add(new LayerDefinition { type = "relu" }); } 73 | else if (def.activation == "sigmoid") { new_defs.Add(new LayerDefinition { type = "sigmoid" }); } 74 | else if (def.activation == "tanh") { new_defs.Add(new LayerDefinition { type = "tanh" }); } 75 | else if (def.activation == "maxout") 76 | { 77 | // create maxout activation, and pass along group size, if provided 78 | var gs = def.group_size != int.MinValue ? def.group_size : 2; 79 | new_defs.Add(new LayerDefinition { type = "maxout", group_size = gs }); 80 | } 81 | else { Console.WriteLine("ERROR unsupported activation " + def.activation); } 82 | } 83 | 84 | if (def.drop_prob != double.MinValue && def.type != "dropout") 85 | { 86 | new_defs.Add(new LayerDefinition { type="dropout", drop_prob=def.drop_prob }); 87 | } 88 | } 89 | 90 | defs = new_defs; 91 | 92 | // create the layers 93 | this.layers = new List(); 94 | for (var i = 0; i < defs.Count; i++) 95 | { 96 | var def = defs[i]; 97 | if (i > 0) 98 | { 99 | var prev = this.layers[i - 1]; 100 | def.in_sx = prev.out_sx; 101 | def.in_sy = prev.out_sy; 102 | def.in_depth = prev.out_depth; 103 | } 104 | 105 | switch (def.type) 106 | { 107 | case "fc": this.layers.Add(new FullyConnectedLayer(def)); break; 108 | //case "lrn": this.layers.Add(new LocalResponseNormalizationLayer(def)); break; 109 | case "dropout": this.layers.Add(new DropoutLayer(def)); break; 110 | case "input": this.layers.Add(new InputLayer(def)); break; 111 | //case "softmax": this.layers.Add(new SoftmaxLayer(def)); break; 112 | case "regression": this.layers.Add(new RegressionLayer(def)); break; 113 | case "conv": this.layers.Add(new ConvLayer(def)); break; 114 | //case "pool": this.layers.Add(new PoolLayer(def)); break; 115 | case "relu": this.layers.Add(new ReLULayer(def)); break; 116 | //case "sigmoid": this.layers.Add(new SigmoidLayer(def)); break; 117 | //case "tanh": this.layers.Add(new TanhLayer(def)); break; 118 | //case "maxout": this.layers.Add(new MaxoutLayer(def)); break; 119 | case "svm": this.layers.Add(new SVMLayer(def)); break; 120 | default: Console.WriteLine("ERROR: UNRECOGNIZED LAYER TYPE: " + def.type); break; 121 | } 122 | } 123 | } 124 | 125 | // forward prop the network. 126 | // The trainer class passes is_training = true, but when this function is 127 | // called from outside (not from the trainer), it defaults to prediction mode 128 | public Volume forward(Volume V, bool is_training) 129 | { 130 | var act = this.layers[0].forward(V, is_training); 131 | 132 | for (int i = 1; i < this.layers.Count; i++) 133 | { 134 | act = this.layers[i].forward(act, is_training); 135 | } 136 | return act; 137 | } 138 | 139 | public double getCostLoss(Volume V, int y) 140 | { 141 | this.forward(V, false); 142 | var N = this.layers.Count; 143 | var loss = this.layers[N - 1].backward(y); 144 | return loss; 145 | } 146 | 147 | // backprop: compute gradients wrt all parameters 148 | public double backward(object y) 149 | { 150 | var N = this.layers.Count; 151 | var loss = this.layers[N - 1].backward(y); // last layer assumed to be loss layer 152 | for (var i = N - 2; i >= 0; i--) 153 | { 154 | // first layer assumed input 155 | this.layers[i].backward(y); 156 | } 157 | 158 | return loss; 159 | } 160 | 161 | public Gradient[] getParamsAndGrads() 162 | { 163 | // accumulate parameters and gradients for the entire network 164 | var response = new List(); 165 | for (var i = 0; i < this.layers.Count; i++) 166 | { 167 | var layer_reponse = this.layers[i].getParamsAndGrads(); 168 | for (var j = 0; j < layer_reponse.Length; j++) 169 | { 170 | response.Add(layer_reponse[j]); 171 | } 172 | } 173 | 174 | return response.ToArray(); 175 | } 176 | 177 | public int getPrediction() 178 | { 179 | // this is a convenience function for returning the argmax 180 | // prediction, assuming the last layer of the net is a softmax 181 | var S = this.layers[this.layers.Count - 1]; 182 | util.assert(S.type == "softmax", "getPrediction function assumes softmax as last layer of the net!"); 183 | 184 | var p = S.out_act.w; 185 | var maxv = p[0]; 186 | var maxi = 0; 187 | for (var i = 1; i < p.Length; i++) 188 | { 189 | if (p[i] > maxv) { maxv = p[i]; maxi = i; } 190 | } 191 | 192 | return maxi; // return index of the class with highest class probability 193 | } 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Trainer/Options.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class Options 11 | { 12 | public string method = string.Empty; 13 | public int batch_size = int.MinValue; 14 | 15 | public double learning_rate = double.MinValue; 16 | public double l1_decay = double.MinValue; 17 | public double l2_decay = double.MinValue; 18 | public double momentum = double.MinValue; 19 | public double beta1 = double.MinValue; 20 | public double beta2 = double.MinValue; 21 | public double ro = double.MinValue; 22 | public double eps = double.MinValue; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Trainer/Trainer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class Trainer 11 | { 12 | Net net; 13 | Options options; 14 | 15 | double learning_rate; 16 | double l1_decay; 17 | double l2_decay; 18 | public double batch_size; 19 | string method; 20 | 21 | double momentum; 22 | double ro; 23 | double eps; 24 | double beta1; 25 | double beta2; 26 | 27 | double k = 0; // iteration counter 28 | List gsum; // last iteration gradients (used for momentum calculations) 29 | List xsum; // used in adam or adadelta 30 | public bool regression; 31 | 32 | Util util; 33 | 34 | public Trainer(Net net, Options options) 35 | { 36 | this.util = new Util(); 37 | this.net = net; 38 | 39 | this.options = options; 40 | this.learning_rate = options.learning_rate != double.MinValue ? options.learning_rate : 0.01; 41 | this.l1_decay = options.l1_decay != double.MinValue ? options.l1_decay : 0.0; 42 | this.l2_decay = options.l2_decay != double.MinValue ? options.l2_decay : 0.0; 43 | this.batch_size = options.batch_size != int.MinValue ? options.batch_size : 1; 44 | 45 | // methods: sgd/adam/adagrad/adadelta/windowgrad/netsterov 46 | this.method = string.IsNullOrEmpty(options.method) ? "sgd" : options.method; 47 | 48 | this.momentum = options.momentum != double.MinValue ? options.momentum : 0.9; 49 | this.ro = options.ro != double.MinValue ? options.ro : 0.95; // used in adadelta 50 | this.eps = options.eps != double.MinValue ? options.eps : 1e-8; // used in adam or adadelta 51 | this.beta1 = options.beta1 != double.MinValue ? options.beta1 : 0.9; // used in adam 52 | this.beta2 = options.beta2 != double.MinValue ? options.beta2 : 0.999; // used in adam 53 | 54 | this.gsum = new List(); 55 | this.xsum = new List(); 56 | 57 | // check if regression is expected 58 | if (this.net.layers[this.net.layers.Count - 1].type == "regression") 59 | this.regression = true; 60 | else 61 | this.regression = false; 62 | } 63 | 64 | public Dictionary train(Volume x, object y) 65 | { 66 | var start = new DateTime(); 67 | this.net.forward(x, true); // also set the flag that lets the net know we're just training 68 | var end = new DateTime(); 69 | var fwd_time = end - start; 70 | 71 | start = new DateTime(); 72 | var cost_loss = this.net.backward(y); 73 | var l2_decay_loss = 0.0; 74 | var l1_decay_loss = 0.0; 75 | end = new DateTime(); 76 | var bwd_time = end - start; 77 | 78 | //if (this.regression && y.GetType().Equals(typeof(Array)) == false) 79 | //Console.WriteLine("Warning: a regression net requires an array as training output vector."); 80 | 81 | this.k++; 82 | if (this.k % this.batch_size == 0) 83 | { 84 | var pglist = this.net.getParamsAndGrads(); 85 | 86 | // initialize lists for accumulators. Will only be done once on first iteration 87 | if (this.gsum.Count == 0 && (this.method != "sgd" || this.momentum > 0.0)) 88 | { 89 | // only vanilla sgd doesnt need either lists 90 | // momentum needs gsum 91 | // adagrad needs gsum 92 | // adam and adadelta needs gsum and xsum 93 | for (var i = 0; i < pglist.Length; i++) 94 | { 95 | this.gsum.Add(util.zeros(pglist[i].w.Length)); 96 | if (this.method == "adam" || this.method == "adadelta") 97 | { 98 | this.xsum.Add(util.zeros(pglist[i].w.Length)); 99 | } 100 | else 101 | { 102 | this.xsum.Add(new List().ToArray()); // conserve memory 103 | } 104 | } 105 | } 106 | 107 | // perform an update for all sets of weights 108 | for (var i = 0; i < pglist.Length; i++) 109 | { 110 | var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc) 111 | var p = pg.w; 112 | var g = pg.dw; 113 | 114 | // learning rate for some parameters. 115 | var l2_decay_mul =pg.l2_decay_mul != double.MinValue ? pg.l2_decay_mul : 1.0; 116 | var l1_decay_mul = pg.l1_decay_mul != double.MinValue ? pg.l1_decay_mul : 1.0; 117 | var l2_decay = this.l2_decay * l2_decay_mul; 118 | var l1_decay = this.l1_decay * l1_decay_mul; 119 | 120 | var plen = p.Length; 121 | for (var j = 0; j < plen; j++) 122 | { 123 | l2_decay_loss += l2_decay * p[j] * p[j] / 2; // accumulate weight decay loss 124 | l1_decay_loss += l1_decay * Math.Abs(p[j]); 125 | var l1grad = l1_decay * (p[j] > 0 ? 1 : -1); 126 | var l2grad = l2_decay * (p[j]); 127 | 128 | var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient 129 | 130 | var gsumi = this.gsum[i]; 131 | var xsumi = this.xsum[i]; 132 | if (this.method == "adam") 133 | { 134 | // adam update 135 | gsumi[j] = gsumi[j] * this.beta1 + (1 - this.beta1) * gij; // update biased first moment estimate 136 | xsumi[j] = xsumi[j] * this.beta2 + (1 - this.beta2) * gij * gij; // update biased second moment estimate 137 | var biasCorr1 = gsumi[j] * (1 - Math.Pow(this.beta1, this.k)); // correct bias first moment estimate 138 | var biasCorr2 = xsumi[j] * (1 - Math.Pow(this.beta2, this.k)); // correct bias second moment estimate 139 | var dx = -this.learning_rate * biasCorr1 / (Math.Sqrt(biasCorr2) + this.eps); 140 | p[j] += dx; 141 | } 142 | else if (this.method == "adagrad") 143 | { 144 | // adagrad update 145 | gsumi[j] = gsumi[j] + gij * gij; 146 | var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; 147 | p[j] += dx; 148 | } 149 | else if (this.method == "windowgrad") 150 | { 151 | // this is adagrad but with a moving window weighted average 152 | // so the gradient is not accumulated over the entire history of the run. 153 | // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! 154 | gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; 155 | var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning 156 | p[j] += dx; 157 | } 158 | else if (this.method == "adadelta") 159 | { 160 | gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; 161 | var dx = -Math.Sqrt((xsumi[j] + this.eps) / (gsumi[j] + this.eps)) * gij; 162 | xsumi[j] = this.ro * xsumi[j] + (1 - this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. 163 | p[j] += dx; 164 | } 165 | else if (this.method == "nesterov") 166 | { 167 | var dx = gsumi[j]; 168 | gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij; 169 | dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j]; 170 | p[j] += dx; 171 | } 172 | else 173 | { 174 | // assume SGD 175 | if (this.momentum > 0.0) 176 | { 177 | // momentum update 178 | var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step 179 | gsumi[j] = dx; // back this up for next iteration of momentum 180 | p[j] += dx; // apply corrected gradient 181 | } 182 | else 183 | { 184 | // vanilla sgd 185 | p[j] += -this.learning_rate * gij; 186 | } 187 | } 188 | g[j] = 0.0; // zero out gradient so that we can begin accumulating anew 189 | } 190 | } 191 | } 192 | 193 | // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss 194 | // in future, TODO: have to completely redo the way loss is done around the network as currently 195 | // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer 196 | // and it should all be computed correctly and automatically. 197 | 198 | var result = new Dictionary(); 199 | result.Add("fwd_time", fwd_time.TotalMilliseconds + " millisec"); 200 | result.Add("bwd_time", bwd_time.TotalMilliseconds + " millisec"); 201 | result.Add("l2_decay_loss", l2_decay_loss.ToString()); 202 | result.Add("l1_decay_loss", l1_decay_loss.ToString()); 203 | result.Add("cost_loss", cost_loss.ToString()); 204 | result.Add("loss", (cost_loss + l1_decay_loss + l2_decay_loss).ToString()); 205 | 206 | return result; 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Trainer/TrainingOptions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class TrainingOptions 11 | { 12 | public int temporal_window = int.MinValue; 13 | public int experience_size = int.MinValue; 14 | public int start_learn_threshold = int.MinValue; 15 | public int learning_steps_total = int.MinValue; 16 | public int learning_steps_burnin = int.MinValue; 17 | public int[] hidden_layer_sizes; 18 | 19 | public double gamma = double.MinValue; 20 | public double learning_rate = double.MinValue; 21 | public double epsilon_min = double.MinValue; 22 | public double epsilon_test_time = double.MinValue; 23 | 24 | public Options options; 25 | public List layer_defs; 26 | public List random_action_distribution; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Util.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace ConvnetSharp 9 | { 10 | // a window stores _size_ number of values 11 | // and returns averages. Useful for keeping running 12 | // track of validation or training accuracy during SGD 13 | [Serializable] 14 | public class TrainingWindow 15 | { 16 | public List v; 17 | public int size; 18 | public int minsize; 19 | public double sum; 20 | 21 | public TrainingWindow(int size, int minsize) 22 | { 23 | this.v = new List(); 24 | this.size = size <= minsize ? 100 : size; 25 | this.minsize = minsize <= 2 ? 20 : minsize; 26 | this.sum = 0; 27 | } 28 | 29 | public void add(double x) 30 | { 31 | this.v.Add(x); 32 | this.sum += x; 33 | if (this.v.Count > this.size) 34 | { 35 | var xold = this.v[0]; 36 | v.RemoveAt(0); 37 | this.sum -= xold; 38 | } 39 | } 40 | 41 | public double get_average() 42 | { 43 | if (this.v.Count < this.minsize) 44 | return -1; 45 | else 46 | return this.sum / this.v.Count; 47 | } 48 | 49 | public void reset() 50 | { 51 | this.v = new List(); 52 | this.sum = 0; 53 | } 54 | } 55 | 56 | [Serializable] 57 | public class Util 58 | { 59 | // Random number utilities 60 | bool return_v = false; 61 | double v_val = 0.0; 62 | public Random random = new Random(); 63 | 64 | public double gaussRandom() 65 | { 66 | if(return_v) { 67 | return_v = false; 68 | return v_val; 69 | } 70 | 71 | var u = 2 * random.NextDouble() - 1; 72 | var v = 2 * random.NextDouble() - 1; 73 | var r = u * u + v * v; 74 | if(r == 0 || r > 1) return gaussRandom(); 75 | var c = Math.Sqrt(-2 * Math.Log(r) / r); 76 | v_val = v* c; // cache this 77 | return_v = true; 78 | return u* c; 79 | } 80 | 81 | public double randf(double a, double b) { return random.NextDouble() * (b-a)+a; } 82 | public int randi(int a, int b) { return random.Next(a, b); } 83 | public double randn(double mu, double std) { return mu+gaussRandom()* std; } 84 | 85 | // Array utilities 86 | public double[] zeros(int n) 87 | { 88 | if (n <= 0) 89 | { 90 | return new double[] { 0.0 }; 91 | } 92 | else 93 | { 94 | var arr = new double[n]; 95 | for (var i = 0; i < n; i++) { arr[i] = 0; } 96 | return arr; 97 | } 98 | } 99 | 100 | public bool arrContains(object[] arr, object elt) 101 | { 102 | for (int i = 0, n = arr.Length; i < n; i++) 103 | { 104 | if (arr[i] == elt) 105 | return true; 106 | } 107 | 108 | return false; 109 | } 110 | 111 | public object[] arrUnique(object[] arr) 112 | { 113 | var b = new List(); 114 | for (int i = 0, n = arr.Length; i < n; i++) 115 | { 116 | if (!arrContains(b.ToArray(), arr[i])) 117 | { 118 | b.Add(arr[i]); 119 | } 120 | } 121 | return b.ToArray(); 122 | } 123 | 124 | // sample from list lst according to probabilities in list probs 125 | // the two lists are of same size, and probs adds up to 1 126 | public double weightedSample(double[] lst, double[] probs) { 127 | double p = randf(0, 1.0); 128 | var cumprob = 0.0; 129 | 130 | for (int k = 0, n = lst.Length; k < n; k++) { 131 | cumprob += probs[k]; 132 | if (p < cumprob) { return lst[k]; } 133 | } 134 | 135 | return p; 136 | } 137 | 138 | // syntactic sugar function for getting default parameter values 139 | public string getopt(string opt_obj, object field_name, string default_value) { 140 | 141 | var opt = JsonConvert.DeserializeObject>(opt_obj); 142 | 143 | if (field_name.GetType().Equals(typeof(string))) { 144 | 145 | // case of single string 146 | if (opt.ContainsKey((string)field_name)) 147 | { 148 | return (string.IsNullOrEmpty(opt[(string)field_name])) ? opt[(string)field_name] : default_value; 149 | } 150 | else 151 | { 152 | return default_value; 153 | } 154 | } else { 155 | // assume we are given an array of string instead 156 | var ret = default_value; 157 | var fields = (string[])field_name; 158 | 159 | for (var i = 0; i< fields.Length;i++) { 160 | var field = fields[i]; 161 | 162 | if(opt.ContainsKey(field)) 163 | ret = opt[field]; 164 | } 165 | 166 | return ret; 167 | } 168 | } 169 | 170 | public void assert(Boolean condition, string message) 171 | { 172 | if (!condition) 173 | { 174 | throw new Exception(message); 175 | } 176 | } 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Volume.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | // Vol is the basic building block of all data in a net. 10 | // it is essentially just a 3D volume of numbers, with a 11 | // width (sx), height (sy), and depth (depth). 12 | // it is used to hold data for all filters, all volumes, 13 | // all weights, and also stores all gradients w.r.t. 14 | // the data. c is optionally a value to initialize the volume 15 | // with. If c is missing, fills the Vol with random numbers. 16 | [Serializable] 17 | public class Volume 18 | { 19 | public int sx, sy, depth; 20 | 21 | public double[] w; 22 | public double[] dw; 23 | 24 | Util util = new Util(); 25 | 26 | public Volume(int sx, int sy, int depth) 27 | { 28 | Init(sx, sy, depth, double.MinValue); 29 | } 30 | 31 | public Volume(int sx, int sy, int depth, double c) 32 | { 33 | Init(sx, sy, depth, c); 34 | } 35 | 36 | private void Init(int sx, int sy, int depth, double c) 37 | { 38 | // we were given dimensions of the vol 39 | if (sx == 1073741823) 40 | sx = sx; 41 | this.sx = sx; 42 | this.sy = sy; 43 | this.depth = depth; 44 | 45 | var n = sx * sy * depth; 46 | this.w = util.zeros(n); 47 | this.dw = util.zeros(n); 48 | 49 | if (c == double.MinValue) 50 | { 51 | // weight normalization is done to equalize the output 52 | // variance of every neuron, otherwise neurons with a lot 53 | // of incoming connections have outputs of larger variance 54 | var scale = Math.Sqrt(1.0 / (sx * sy * depth)); 55 | for (var i = 0; i < n; i++) 56 | { 57 | this.w[i] = util.randn(0.0, scale); 58 | } 59 | } 60 | else 61 | { 62 | for (int i = 0; i < n; i++) 63 | { 64 | this.w[i] = c; 65 | } 66 | } 67 | } 68 | 69 | public double get(int x, int y, int d) 70 | { 71 | var ix = ((this.sx * y) + x) * this.depth + d; 72 | return this.w[ix]; 73 | } 74 | 75 | public void set(int x, int y, int d, double v) 76 | { 77 | var ix = ((this.sx * y) + x) * this.depth + d; 78 | this.w[ix] = v; 79 | } 80 | 81 | public void add(int x, int y, int d, double v) 82 | { 83 | var ix = ((this.sx * y) + x) * this.depth + d; 84 | this.w[ix] += v; 85 | } 86 | 87 | public double get_grad(int x, int y, int d) 88 | { 89 | var ix = ((this.sx * y) + x) * this.depth + d; 90 | return this.dw[ix]; 91 | } 92 | 93 | public void set_grad(int x, int y, int d, double v) 94 | { 95 | var ix = ((this.sx * y) + x) * this.depth + d; 96 | this.dw[ix] = v; 97 | } 98 | 99 | public void add_grad(int x, int y, int d, double v) 100 | { 101 | var ix = ((this.sx * y) + x) * this.depth + d; 102 | this.dw[ix] += v; 103 | } 104 | 105 | public Volume cloneAndZero() { return new Volume(this.sx, this.sy, this.depth, 0.0); } 106 | public Volume clone() 107 | { 108 | var V = new Volume(this.sx, this.sy, this.depth, 0.0); 109 | var n = this.w.Length; 110 | for (var i = 0; i < n; i++) { V.w[i] = this.w[i]; } 111 | return V; 112 | } 113 | 114 | public void addFrom(Volume V) { for (var k = 0; k < this.w.Length; k++) { this.w[k] += V.w[k]; } } 115 | public void addFromScaled(Volume V, double a) { for (var k = 0; k < this.w.Length; k++) { this.w[k] += a * V.w[k]; } } 116 | public void setConst(double a) { for (var k = 0; k < this.w.Length; k++) { this.w[k] = a; } } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /DeepQLearning/DRLAgent/DeepQLearn.cs: -------------------------------------------------------------------------------- 1 | using ConvnetSharp; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace DeepQLearning.DRLAgent 9 | { 10 | // An agent is in state0 and does action0 11 | // environment then assigns reward0 and provides new state, state1 12 | // Experience nodes store all this information, which is used in the 13 | // Q-learning update step 14 | [Serializable] 15 | public class Experience 16 | { 17 | public double[] state0; 18 | public int action0; 19 | public double reward0; 20 | public double[] state1; 21 | 22 | public Experience() 23 | { 24 | 25 | } 26 | 27 | public Experience(double[] state0, int action0, double reward0, double[] state1) 28 | { 29 | this.state0 = state0; 30 | this.action0 = action0; 31 | this.reward0 = reward0; 32 | this.state1 = state1; 33 | } 34 | } 35 | 36 | [Serializable] 37 | public struct Action 38 | { 39 | public int action; 40 | public double value; 41 | }; 42 | 43 | // A Brain object does all the magic. 44 | // over time it receives some inputs and some rewards 45 | // and its job is to set the outputs to maximize the expected reward 46 | [Serializable] 47 | public class DeepQLearn 48 | { 49 | TrainingOptions opt; 50 | 51 | int temporal_window; 52 | int experience_size; 53 | double start_learn_threshold; 54 | double gamma; 55 | double learning_steps_total; 56 | double learning_steps_burnin; 57 | double epsilon_min; 58 | public double epsilon_test_time; 59 | 60 | int net_inputs; 61 | int num_states; 62 | int num_actions; 63 | int window_size; 64 | List state_window; 65 | List action_window; 66 | List reward_window; 67 | List net_window; 68 | 69 | double age; 70 | double forward_passes; 71 | public double epsilon; 72 | double latest_reward; 73 | Volume last_input; 74 | TrainingWindow average_reward_window; 75 | TrainingWindow average_loss_window; 76 | public bool learning; 77 | 78 | Net value_net; 79 | public Trainer tdtrainer; 80 | 81 | Util util; 82 | 83 | List random_action_distribution; 84 | List experience; 85 | 86 | public DeepQLearn(int num_states, int num_actions, TrainingOptions opt) 87 | { 88 | this.util = new Util(); 89 | this.opt = opt; 90 | 91 | // in number of time steps, of temporal memory 92 | // the ACTUAL input to the net will be (x,a) temporal_window times, and followed by current x 93 | // so to have no information from previous time step going into value function, set to 0. 94 | this.temporal_window = opt.temporal_window != int.MinValue ? opt.temporal_window : 1; 95 | // size of experience replay memory 96 | this.experience_size = opt.experience_size != int.MinValue ? opt.experience_size : 30000; 97 | // number of examples in experience replay memory before we begin learning 98 | this.start_learn_threshold = opt.start_learn_threshold != double.MinValue ? opt.start_learn_threshold : Math.Floor(Math.Min(this.experience_size * 0.1, 1000)); 99 | // gamma is a crucial parameter that controls how much plan-ahead the agent does. In [0,1] 100 | this.gamma = opt.gamma != double.MinValue ? opt.gamma : 0.8; 101 | 102 | // number of steps we will learn for 103 | this.learning_steps_total = opt.learning_steps_total != int.MinValue ? opt.learning_steps_total : 100000; 104 | // how many steps of the above to perform only random actions (in the beginning)? 105 | this.learning_steps_burnin = opt.learning_steps_burnin != int.MinValue ? opt.learning_steps_burnin : 3000; 106 | // what epsilon value do we bottom out on? 0.0 => purely deterministic policy at end 107 | this.epsilon_min = opt.epsilon_min != double.MinValue ? opt.epsilon_min : 0.05; 108 | // what epsilon to use at test time? (i.e. when learning is disabled) 109 | this.epsilon_test_time = opt.epsilon_test_time != double.MinValue ? opt.epsilon_test_time : 0.00; 110 | 111 | // advanced feature. Sometimes a random action should be biased towards some values 112 | // for example in flappy bird, we may want to choose to not flap more often 113 | if (opt.random_action_distribution != null) 114 | { 115 | // this better sum to 1 by the way, and be of length this.num_actions 116 | this.random_action_distribution = opt.random_action_distribution; 117 | if (this.random_action_distribution.Count != num_actions) 118 | { 119 | Console.WriteLine("TROUBLE. random_action_distribution should be same length as num_actions."); 120 | } 121 | 122 | var sum_of_dist = this.random_action_distribution.Sum(); 123 | if (Math.Abs(sum_of_dist - 1.0) > 0.0001) { Console.WriteLine("TROUBLE. random_action_distribution should sum to 1!"); } 124 | } 125 | else 126 | { 127 | this.random_action_distribution = new List(); 128 | } 129 | 130 | // states that go into neural net to predict optimal action look as 131 | // x0,a0,x1,a1,x2,a2,...xt 132 | // this variable controls the size of that temporal window. Actions are 133 | // encoded as 1-of-k hot vectors 134 | this.net_inputs = num_states * this.temporal_window + num_actions * this.temporal_window + num_states; 135 | this.num_states = num_states; 136 | this.num_actions = num_actions; 137 | this.window_size = Math.Max(this.temporal_window, 2); // must be at least 2, but if we want more context even more 138 | this.state_window = new List(); 139 | this.action_window = new List(); 140 | this.reward_window = new List(); 141 | this.net_window = new List(); 142 | 143 | // Init wth dummy data 144 | for (int i = 0; i < window_size; i++) this.state_window.Add(new Volume(1, 1, 1)); 145 | for (int i = 0; i < window_size; i++) this.action_window.Add(0); 146 | for (int i = 0; i < window_size; i++) this.reward_window.Add(0.0); 147 | for (int i = 0; i < window_size; i++) this.net_window.Add(new double[] { 0.0 }); 148 | 149 | // create [state -> value of all possible actions] modeling net for the value function 150 | var layer_defs = new List(); 151 | if (opt.layer_defs != null) 152 | { 153 | // this is an advanced usage feature, because size of the input to the network, and number of 154 | // actions must check out. This is not very pretty Object Oriented programming but I can't see 155 | // a way out of it :( 156 | layer_defs = opt.layer_defs; 157 | if (layer_defs.Count < 2) { Console.WriteLine("TROUBLE! must have at least 2 layers"); } 158 | if (layer_defs[0].type != "input") { Console.WriteLine("TROUBLE! first layer must be input layer!"); } 159 | if (layer_defs[layer_defs.Count - 1].type != "regression") { Console.WriteLine("TROUBLE! last layer must be input regression!"); } 160 | if (layer_defs[0].out_depth * layer_defs[0].out_sx * layer_defs[0].out_sy != this.net_inputs) 161 | { 162 | Console.WriteLine("TROUBLE! Number of inputs must be num_states * temporal_window + num_actions * temporal_window + num_states!"); 163 | } 164 | if (layer_defs[layer_defs.Count - 1].num_neurons != this.num_actions) 165 | { 166 | Console.WriteLine("TROUBLE! Number of regression neurons should be num_actions!"); 167 | } 168 | } 169 | else 170 | { 171 | // create a very simple neural net by default 172 | layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = this.net_inputs }); 173 | if (opt.hidden_layer_sizes != null) 174 | { 175 | // allow user to specify this via the option, for convenience 176 | var hl = opt.hidden_layer_sizes; 177 | for (var k = 0; k < hl.Length; k++) 178 | { 179 | layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = hl[k], activation = "relu" }); // relu by default 180 | } 181 | } 182 | } 183 | 184 | // Create the network 185 | this.value_net = new Net(); 186 | this.value_net.makeLayers(layer_defs); 187 | 188 | // and finally we need a Temporal Difference Learning trainer! 189 | var options = new Options { learning_rate = 0.01, momentum = 0.0, batch_size = 64, l2_decay = 0.01 }; 190 | if (opt.options != null) 191 | { 192 | options = opt.options; // allow user to overwrite this 193 | } 194 | 195 | this.tdtrainer = new Trainer(this.value_net, options); 196 | 197 | // experience replay 198 | this.experience = new List(); 199 | 200 | // various housekeeping variables 201 | this.age = 0; // incremented every backward() 202 | this.forward_passes = 0; // incremented every forward() 203 | this.epsilon = 1.0; // controls exploration exploitation tradeoff. Should be annealed over time 204 | this.latest_reward = 0; 205 | //this.last_input = []; 206 | this.average_reward_window = new TrainingWindow(1000, 10); 207 | this.average_loss_window = new TrainingWindow(1000, 10); 208 | this.learning = true; 209 | } 210 | 211 | public int random_action() 212 | { 213 | // a bit of a helper function. It returns a random action 214 | // we are abstracting this away because in future we may want to 215 | // do more sophisticated things. For example some actions could be more 216 | // or less likely at "rest"/default state. 217 | 218 | int action = util.randi(0, this.num_actions); 219 | 220 | if (this.random_action_distribution.Count != 0) 221 | { 222 | // okay, lets do some fancier sampling: 223 | var p = util.randf(0, 1.0); 224 | var cumprob = 0.0; 225 | for (var k = 0; k < this.num_actions; k++) 226 | { 227 | cumprob += this.random_action_distribution[k]; 228 | if (p < cumprob) { action = k; break; } 229 | } 230 | } 231 | 232 | return action; 233 | } 234 | 235 | public Action policy(double[] s) 236 | { 237 | // compute the value of doing any action in this state 238 | // and return the argmax action and its value 239 | var svol = new Volume(1, 1, this.net_inputs); 240 | svol.w = s; 241 | var action_values = this.value_net.forward(svol, false); 242 | var maxk = 0; 243 | var maxval = action_values.w[0]; 244 | for (var k = 1; k < this.num_actions; k++) 245 | { 246 | if (action_values.w[k] > maxval) { maxk = k; maxval = action_values.w[k]; } 247 | } 248 | return new Action { action = maxk, value = maxval }; 249 | } 250 | 251 | public double[] getNetInput(Volume xt) 252 | { 253 | // return s = (x,a,x,a,x,a,xt) state vector. 254 | // It's a concatenation of last window_size (x,a) pairs and current state x 255 | List w = new List(); 256 | 257 | // start with current state and now go backwards and append states and actions from history temporal_window times 258 | w.AddRange(xt.w); 259 | 260 | var n = this.window_size; 261 | for (var k = 0; k < this.temporal_window; k++) 262 | { 263 | // state 264 | w.AddRange(this.state_window[n - 1 - k].w); 265 | // action, encoded as 1-of-k indicator vector. We scale it up a bit because 266 | // we dont want weight regularization to undervalue this information, as it only exists once 267 | var action1ofk = new double[this.num_actions]; 268 | for (var q = 0; q < this.num_actions; q++) action1ofk[q] = 0.0; 269 | action1ofk[this.action_window[n - 1 - k]] = 1.0 * this.num_states; 270 | w.AddRange(action1ofk); 271 | } 272 | 273 | return w.ToArray(); 274 | } 275 | 276 | public int forward(Volume input_array) 277 | { 278 | // compute forward (behavior) pass given the input neuron signals from body 279 | this.forward_passes += 1; 280 | this.last_input = input_array; // back this up 281 | 282 | // create network input 283 | int action; 284 | double[] net_input; 285 | if (this.forward_passes > this.temporal_window) 286 | { 287 | // we have enough to actually do something reasonable 288 | net_input = this.getNetInput(input_array); 289 | if (this.learning) 290 | { 291 | // compute epsilon for the epsilon-greedy policy 292 | this.epsilon = Math.Min(1.0, Math.Max(this.epsilon_min, 1.0 - (this.age - this.learning_steps_burnin) / (this.learning_steps_total - this.learning_steps_burnin))); 293 | } 294 | else 295 | { 296 | this.epsilon = this.epsilon_test_time; // use test-time value 297 | } 298 | 299 | var rf = util.randf(0, 1); 300 | if (rf < this.epsilon) 301 | { 302 | // choose a random action with epsilon probability 303 | action = this.random_action(); 304 | } 305 | else 306 | { 307 | // otherwise use our policy to make decision 308 | var maxact = this.policy(net_input); 309 | action = maxact.action; 310 | } 311 | } 312 | else 313 | { 314 | // pathological case that happens first few iterations 315 | // before we accumulate window_size inputs 316 | net_input = new List().ToArray(); 317 | action = this.random_action(); 318 | } 319 | 320 | // remember the state and action we took for backward pass 321 | this.net_window.RemoveAt(0); 322 | this.net_window.Add(net_input); 323 | this.state_window.RemoveAt(0); 324 | this.state_window.Add(input_array); 325 | this.action_window.RemoveAt(0); 326 | this.action_window.Add(action); 327 | 328 | return action; 329 | } 330 | 331 | public void backward(double reward) 332 | { 333 | this.latest_reward = reward; 334 | this.average_reward_window.add(reward); 335 | 336 | this.reward_window.RemoveAt(0); 337 | this.reward_window.Add(reward); 338 | 339 | if (!this.learning) { return; } 340 | 341 | // various book-keeping 342 | this.age += 1; 343 | 344 | // it is time t+1 and we have to store (s_t, a_t, r_t, s_{t+1}) as new experience 345 | // (given that an appropriate number of state measurements already exist, of course) 346 | if (this.forward_passes > this.temporal_window + 1) 347 | { 348 | var e = new Experience(); 349 | var n = this.window_size; 350 | e.state0 = this.net_window[n - 2]; 351 | e.action0 = this.action_window[n - 2]; 352 | e.reward0 = this.reward_window[n - 2]; 353 | e.state1 = this.net_window[n - 1]; 354 | 355 | if (this.experience.Count < this.experience_size) 356 | { 357 | this.experience.Add(e); 358 | } 359 | else 360 | { 361 | // replace. finite memory! 362 | var ri = util.randi(0, this.experience_size); 363 | this.experience[ri] = e; 364 | } 365 | } 366 | 367 | // learn based on experience, once we have some samples to go on 368 | // this is where the magic happens... 369 | if (this.experience.Count > this.start_learn_threshold) 370 | { 371 | var avcost = 0.0; 372 | for (var k = 0; k < this.tdtrainer.batch_size; k++) 373 | { 374 | var re = util.randi(0, this.experience.Count); 375 | var e = this.experience[re]; 376 | var x = new Volume(1, 1, this.net_inputs); 377 | x.w = e.state0; 378 | var maxact = this.policy(e.state1); 379 | var r = e.reward0 + this.gamma * maxact.value; 380 | 381 | var ystruct = new Entry { dim=e.action0, val=r}; 382 | var loss = this.tdtrainer.train(x, ystruct); 383 | avcost += double.Parse(loss["loss"]); 384 | } 385 | 386 | avcost = avcost / this.tdtrainer.batch_size; 387 | this.average_loss_window.add(avcost); 388 | } 389 | } 390 | 391 | public string visSelf() 392 | { 393 | var t = ""; 394 | t += "experience replay size: " + this.experience.Count + Environment.NewLine; 395 | t += "exploration epsilon: " + this.epsilon + Environment.NewLine; 396 | t += "age: " + this.age + Environment.NewLine; 397 | t += "average Q-learning loss: " + this.average_loss_window.get_average() + Environment.NewLine; 398 | t += "smooth-ish reward: " + this.average_reward_window.get_average() + Environment.NewLine; 399 | 400 | return t; 401 | } 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /DeepQLearning/DRLAgent/QAgent.cs: -------------------------------------------------------------------------------- 1 | using ConvnetSharp; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Drawing; 5 | 6 | namespace DeepQLearning.DRLAgent 7 | { 8 | [Serializable] 9 | public struct Intersect 10 | { 11 | public double ua; 12 | public double ub; 13 | public Vec up; 14 | public int type; 15 | public bool intersect; 16 | }; 17 | 18 | // A 2D vector utility 19 | [Serializable] 20 | public class Vec 21 | { 22 | public double x, y; 23 | 24 | public Vec (double x, double y) 25 | { 26 | this.x = x; 27 | this.y = y; 28 | } 29 | 30 | // utilities 31 | public double dist_from(Vec v) { return Math.Sqrt(Math.Pow(this.x - v.x, 2) + Math.Pow(this.y - v.y, 2)); } 32 | public double length() { return Math.Sqrt(Math.Pow(this.x, 2) + Math.Pow(this.y, 2)); } 33 | 34 | // new vector returning operations 35 | public Vec add(Vec v) { return new Vec(this.x + v.x, this.y + v.y); } 36 | public Vec sub(Vec v) { return new Vec(this.x - v.x, this.y - v.y); } 37 | public Vec rotate(double a) 38 | { // CLOCKWISE 39 | return new Vec(this.x * Math.Cos(a) + this.y * Math.Sin(a), 40 | -this.x * Math.Sin(a) + this.y * Math.Cos(a)); 41 | } 42 | 43 | // in place operations 44 | public void scale(double s) { this.x *= s; this.y *= s; } 45 | public void normalize() { var d = this.length(); this.scale(1.0 / d); } 46 | } 47 | 48 | // Wall is made up of two points 49 | [Serializable] 50 | public class Wall 51 | { 52 | public Vec p1, p2; 53 | 54 | public Wall(Vec p1, Vec p2) 55 | { 56 | this.p1 = p1; 57 | this.p2 = p2; 58 | } 59 | } 60 | 61 | // Eye sensor has a maximum range and senses walls 62 | [Serializable] 63 | public class Eye 64 | { 65 | public double angle; 66 | public double max_range; 67 | public double sensed_proximity; 68 | public int sensed_type; 69 | 70 | public Eye(double angle) 71 | { 72 | this.angle = angle; // angle of the eye relative to the agent 73 | this.max_range = 85; // maximum proximity range 74 | this.sensed_proximity = 85; // proximity of what the eye is seeing. will be set in world.tick() 75 | this.sensed_type = -1; // what type of object does the eye see? 76 | } 77 | } 78 | 79 | // item is circle thing on the floor that agent can interact with (see or eat, etc) 80 | [Serializable] 81 | public class Item 82 | { 83 | public Vec p; 84 | public int type; 85 | public double rad; 86 | public int age; 87 | public bool cleanup_; 88 | 89 | public Item(double x, double y, int type) 90 | { 91 | this.p = new Vec(x, y); // position 92 | this.type = type; 93 | this.rad = 10; // default radius 94 | this.age = 0; 95 | this.cleanup_ = false; 96 | } 97 | } 98 | 99 | // A single agent 100 | [Serializable] 101 | public class Agent 102 | { 103 | public List eyes; 104 | public List actions; 105 | public double angle, oangle, reward_bonus, digestion_signal; 106 | public double rad, rot1, rot2, prevactionix; 107 | public Vec p, op; 108 | public int actionix; 109 | public DeepQLearn brain; 110 | 111 | public Agent(DeepQLearn brain) 112 | { 113 | this.brain = brain; 114 | 115 | // positional information 116 | this.p = new Vec(50, 50); 117 | this.op = this.p; // old position 118 | this.angle = 0; // direction facing 119 | 120 | this.actions = new List(); 121 | this.actions.Add(new double[] { 1, 1 }); 122 | this.actions.Add(new double[] { 0.8, 1 }); 123 | this.actions.Add(new double[] { 1, 0.8 }); 124 | this.actions.Add(new double[] { 0.5, 0 }); 125 | this.actions.Add(new double[] { 0, 0.5 }); 126 | 127 | // properties 128 | this.rad = 10; 129 | this.eyes = new List(); 130 | for (var k = 0; k < 9; k++) { this.eyes.Add(new Eye((k - 3) * 0.25)); } 131 | 132 | this.reward_bonus = 0.0; 133 | this.digestion_signal = 0.0; 134 | 135 | // outputs on world 136 | this.rot1 = 0.0; // rotation speed of 1st wheel 137 | this.rot2 = 0.0; // rotation speed of 2nd wheel 138 | 139 | this.prevactionix = -1; 140 | } 141 | 142 | public void forward() 143 | { 144 | // in forward pass the agent simply behaves in the environment 145 | // create input to brain 146 | var num_eyes = this.eyes.Count; 147 | var input_array = new double[num_eyes * 3]; 148 | for (var i = 0; i < num_eyes; i++) 149 | { 150 | var e = this.eyes[i]; 151 | input_array[i * 3] = 1.0; 152 | input_array[i * 3 + 1] = 1.0; 153 | input_array[i * 3 + 2] = 1.0; 154 | if (e.sensed_type != -1) 155 | { 156 | // sensed_type is 0 for wall, 1 for food and 2 for poison. 157 | // lets do a 1-of-k encoding into the input array 158 | input_array[i * 3 + e.sensed_type] = e.sensed_proximity / e.max_range; // normalize to [0,1] 159 | } 160 | } 161 | 162 | Volume input = new Volume(num_eyes, 3, 1); 163 | input.w = input_array; 164 | 165 | // get action from brain 166 | var actionix = this.brain.forward(input); 167 | var action = this.actions[actionix]; 168 | this.actionix = actionix; //back this up 169 | 170 | // demultiplex into behavior variables 171 | this.rot1 = action[0] * 1; 172 | this.rot2 = action[1] * 1; 173 | 174 | //this.rot1 = 0; 175 | //this.rot2 = 0; 176 | } 177 | 178 | public void backward() 179 | { 180 | // in backward pass agent learns. 181 | // compute reward 182 | var proximity_reward = 0.0; 183 | var num_eyes = this.eyes.Count; 184 | for (var i = 0; i < num_eyes; i++) 185 | { 186 | var e = this.eyes[i]; 187 | // agents dont like to see walls, especially up close 188 | proximity_reward += e.sensed_type == 0 ? e.sensed_proximity / e.max_range : 1.0; 189 | } 190 | proximity_reward = proximity_reward / num_eyes; 191 | proximity_reward = Math.Min(1.0, proximity_reward * 2); 192 | 193 | // agents like to go straight forward 194 | var forward_reward = 0.0; 195 | if (this.actionix == 0 && proximity_reward > 0.75) forward_reward = 0.1 * proximity_reward; 196 | 197 | // agents like to eat good things 198 | var digestion_reward = this.digestion_signal; 199 | this.digestion_signal = 0.0; 200 | 201 | var reward = proximity_reward + forward_reward + digestion_reward; 202 | 203 | // pass to brain for learning 204 | this.brain.backward(reward); 205 | } 206 | } 207 | 208 | // World object contains many agents and walls and food and stuff 209 | [Serializable] 210 | public class World 211 | { 212 | Util util; 213 | 214 | int W, H; 215 | int clock; 216 | 217 | public List walls; 218 | public List items; 219 | public List agents; 220 | 221 | List collpoints; 222 | 223 | public World(DeepQLearn brain, int canvas_Width, int canvas_Height) 224 | { 225 | this.agents = new List(); 226 | this.W = canvas_Width; 227 | this.H = canvas_Height; 228 | 229 | this.util = new Util(); 230 | this.clock = 0; 231 | 232 | // set up walls in the world 233 | this.walls = new List(); 234 | var pad = 10; 235 | 236 | util_add_box(this.walls, pad, pad, this.W - pad * 2, this.H - pad * 2); 237 | util_add_box(this.walls, 100, 100, 200, 300); // inner walls 238 | 239 | this.walls.RemoveAt(walls.Count - 1); 240 | util_add_box(this.walls, 400, 100, 200, 300); 241 | this.walls.RemoveAt(walls.Count - 1); 242 | 243 | // set up food and poison 244 | this.items = new List(); 245 | for (var k = 0; k < 30; k++) 246 | { 247 | var x = util.randf(20, this.W - 20); 248 | var y = util.randf(20, this.H - 20); 249 | var t = util.randi(1, 3); // food or poison (1 and 2) 250 | var it = new Item(x, y, t); 251 | this.items.Add(it); 252 | } 253 | 254 | // set up food and poison 255 | this.agents = new List(); 256 | this.agents.Add(new Agent(brain)); 257 | } 258 | 259 | private void util_add_box(List lst, double x, double y, double w, double h) 260 | { 261 | lst.Add(new Wall(new Vec(x, y), new Vec(x + w, y))); 262 | lst.Add(new Wall(new Vec(x + w, y), new Vec(x + w, y + h))); 263 | lst.Add(new Wall(new Vec(x + w, y + h), new Vec(x, y + h))); 264 | lst.Add(new Wall(new Vec(x, y + h), new Vec(x, y))); 265 | } 266 | 267 | // helper function to get closest colliding walls/items 268 | public Intersect stuff_collide_(Vec p1, Vec p2, bool check_walls, bool check_items) 269 | { 270 | Intersect minres = new Intersect() { intersect = false }; 271 | 272 | // collide with walls 273 | if (check_walls) 274 | { 275 | for (int i = 0, n = this.walls.Count; i < n; i++) 276 | { 277 | var wall = this.walls[i]; 278 | var res = line_intersect(p1, p2, wall.p1, wall.p2); 279 | if (res.intersect) 280 | { 281 | res.type = 0; // 0 is wall 282 | if (!minres.intersect) 283 | { 284 | minres = res; 285 | } 286 | else 287 | { // check if its closer 288 | if (res.ua < minres.ua) 289 | { 290 | // if yes replace it 291 | minres = res; 292 | } 293 | } 294 | } 295 | } 296 | } 297 | 298 | // collide with items 299 | if (check_items) 300 | { 301 | for (int i = 0, n = this.items.Count; i < n; i++) 302 | { 303 | var it = this.items[i]; 304 | var res = line_point_intersect(p1, p2, it.p, it.rad); 305 | if (res.intersect) 306 | { 307 | res.type = it.type; // store type of item 308 | if (!minres.intersect) { minres = res; } 309 | else 310 | { // check if its closer 311 | if (res.ua < minres.ua) 312 | { 313 | // if yes replace it 314 | minres = res; 315 | } 316 | } 317 | } 318 | } 319 | } 320 | 321 | return minres; 322 | } 323 | 324 | // line intersection helper function: does line segment (p1,p2) intersect segment (p3,p4) ? 325 | public Intersect line_intersect(Vec p1, Vec p2, Vec p3, Vec p4) 326 | { 327 | Intersect result = new Intersect() { intersect= false }; 328 | 329 | var denom = (p4.y - p3.y) * (p2.x - p1.x) - (p4.x - p3.x) * (p2.y - p1.y); 330 | if (denom == 0.0) { result.intersect = false; } // parallel lines 331 | 332 | var ua = ((p4.x - p3.x) * (p1.y - p3.y) - (p4.y - p3.y) * (p1.x - p3.x)) / denom; 333 | var ub = ((p2.x - p1.x) * (p1.y - p3.y) - (p2.y - p1.y) * (p1.x - p3.x)) / denom; 334 | if (ua > 0.0 && ua < 1.0 && ub > 0.0 && ub < 1.0) 335 | { 336 | var up = new Vec(p1.x + ua * (p2.x - p1.x), p1.y + ua * (p2.y - p1.y)); 337 | return new Intersect { ua = ua, ub = ub, up = up, intersect = true }; // up is intersection point 338 | } 339 | return result; 340 | } 341 | 342 | public Intersect line_point_intersect(Vec A, Vec B, Vec C, double rad) { 343 | 344 | Intersect result = new Intersect { intersect = false }; 345 | 346 | var v = new Vec(B.y-A.y,-(B.x-A.x)); // perpendicular vector 347 | var d = Math.Abs((B.x-A.x)*(A.y-C.y)-(A.x-C.x)*(B.y-A.y)); 348 | d = d / v.length(); 349 | if(d > rad) { return result; } 350 | 351 | v.normalize(); 352 | v.scale(d); 353 | double ua = 0.0; 354 | var up = C.add(v); 355 | if(Math.Abs(B.x-A.x)>Math.Abs(B.y-A.y)) { 356 | ua = (up.x - A.x) / (B.x - A.x); 357 | } else { 358 | ua = (up.y - A.y) / (B.y - A.y); 359 | } 360 | if(ua>0.0 && ua<1.0) { 361 | result = new Intersect { ua = ua, up = up, intersect = true }; 362 | } 363 | return result; 364 | } 365 | 366 | private Boolean AreSimilar(double a, double b, double tolerance) 367 | { 368 | // Values are within specified tolerance of each other.... 369 | return Math.Abs(a - b) < tolerance; 370 | } 371 | 372 | public void tick() 373 | { 374 | // tick the environment 375 | this.clock++; 376 | 377 | // fix input to all agents based on environment process eyes 378 | this.collpoints = new List(); 379 | for (int i = 0, n = this.agents.Count; i < n; i++) 380 | { 381 | var a = this.agents[i]; 382 | for (int ei = 0, ne = a.eyes.Count; ei < ne; ei++) 383 | { 384 | var e = a.eyes[ei]; 385 | // we have a line from p to p->eyep 386 | var eyep = new Vec(a.p.x + e.max_range * Math.Sin(a.angle + e.angle), a.p.y + e.max_range * Math.Cos(a.angle + e.angle)); 387 | var res = this.stuff_collide_(a.p, eyep, true, true); 388 | 389 | if (res.intersect) 390 | { 391 | // eye collided with wall 392 | e.sensed_proximity = res.up.dist_from(a.p); 393 | e.sensed_type = res.type; 394 | } 395 | else 396 | { 397 | e.sensed_proximity = e.max_range; 398 | e.sensed_type = -1; 399 | } 400 | } 401 | } 402 | 403 | // let the agents behave in the world based on their input 404 | for (int i = 0, n = this.agents.Count; i < n; i++) 405 | { 406 | this.agents[i].forward(); 407 | } 408 | 409 | // apply outputs of agents on evironment 410 | for (int i = 0, n = this.agents.Count; i < n; i++) 411 | { 412 | var a = this.agents[i]; 413 | a.op = a.p; // back up old position 414 | a.oangle = a.angle; // and angle 415 | 416 | // steer the agent according to outputs of wheel velocities 417 | var v = new Vec(0, a.rad / 2.0); 418 | v = v.rotate(a.angle + Math.PI / 2); 419 | var w1p = a.p.add(v); // positions of wheel 1 and 2 420 | var w2p = a.p.sub(v); 421 | var vv = a.p.sub(w2p); 422 | vv = vv.rotate(-a.rot1); 423 | var vv2 = a.p.sub(w1p); 424 | vv2 = vv2.rotate(a.rot2); 425 | var np = w2p.add(vv); 426 | np.scale(0.5); 427 | var np2 = w1p.add(vv2); 428 | np2.scale(0.5); 429 | a.p = np.add(np2); 430 | 431 | a.angle -= a.rot1; 432 | if (a.angle < 0) a.angle += 2 * Math.PI; 433 | a.angle += a.rot2; 434 | if (a.angle > 2 * Math.PI) a.angle -= 2 * Math.PI; 435 | 436 | // agent is trying to move from p to op. Check walls 437 | var res = this.stuff_collide_(a.op, a.p, true, false); 438 | if (res.intersect) 439 | { 440 | // wall collision! reset position 441 | a.p = a.op; 442 | } 443 | 444 | // handle boundary conditions 445 | if (a.p.x < 0) a.p.x = 0; 446 | if (a.p.x > this.W) a.p.x = this.W; 447 | if (a.p.y < 0) a.p.y = 0; 448 | if (a.p.y > this.H) a.p.y = this.H; 449 | } 450 | 451 | // tick all items 452 | var update_items = false; 453 | for (int i = 0, n = this.items.Count; i < n; i++) 454 | { 455 | var it = this.items[i]; 456 | it.age += 1; 457 | 458 | // see if some agent gets lunch 459 | for (int j = 0, m = this.agents.Count; j < m; j++) 460 | { 461 | var a = this.agents[j]; 462 | var d = a.p.dist_from(it.p); 463 | if (d < it.rad + a.rad) 464 | { 465 | 466 | // wait lets just make sure that this isn't through a wall 467 | var rescheck = this.stuff_collide_(a.p, it.p, true, false); 468 | if (!rescheck.intersect) 469 | { 470 | // ding! nom nom nom 471 | if (it.type == 1) a.digestion_signal += 5.0; // mmm delicious apple 472 | if (it.type == 2) a.digestion_signal += -6.0; // ewww poison 473 | it.cleanup_ = true; 474 | update_items = true; 475 | break; // break out of loop, item was consumed 476 | } 477 | } 478 | } 479 | 480 | if (it.age > 5000 && this.clock % 100 == 0 && util.randf(0, 1) < 0.1) 481 | { 482 | it.cleanup_ = true; // replace this one, has been around too long 483 | update_items = true; 484 | } 485 | } 486 | if (update_items) 487 | { 488 | var nt = new List(); 489 | for (int i = 0, n = this.items.Count; i < n; i++) 490 | { 491 | var it = this.items[i]; 492 | if (!it.cleanup_) nt.Add(it); 493 | } 494 | this.items = nt; // swap 495 | } 496 | if (this.items.Count < 30 && this.clock % 10 == 0 && util.randf(0, 1) < 0.25) 497 | { 498 | var newitx = util.randf(20, this.W - 20); 499 | var newity = util.randf(20, this.H - 20); 500 | var newitt = util.randi(1, 3); // food or poison (1 and 2) 501 | var newit = new Item(newitx, newity, newitt); 502 | this.items.Add(newit); 503 | } 504 | 505 | // agents are given the opportunity to learn based on feedback of their action on environment 506 | for (int i = 0, n = this.agents.Count; i < n; i++) 507 | { 508 | this.agents[i].backward(); 509 | } 510 | } 511 | } 512 | 513 | [Serializable] 514 | public class QAgent 515 | { 516 | public int simspeed = 1; 517 | World w; 518 | 519 | [NonSerialized] 520 | Pen greenPen = new Pen(Color.LightGreen, 2); 521 | 522 | [NonSerialized] 523 | Pen redPen = new Pen(Color.Red, 2); 524 | 525 | [NonSerialized] 526 | Pen greenPen2 = new Pen(Color.LightGreen, 1); 527 | 528 | [NonSerialized] 529 | Pen redPen2 = new Pen(Color.Red, 1); 530 | 531 | [NonSerialized] 532 | Pen bluePen = new Pen(Color.Blue, 2); 533 | 534 | [NonSerialized] 535 | Pen blackPen = new Pen(Color.Black); 536 | 537 | public QAgent(DeepQLearn brain, int canvas_W, int canvas_H) 538 | { 539 | this.w = new World(brain, canvas_W, canvas_H); 540 | } 541 | 542 | public void Reinitialize() 543 | { 544 | greenPen = new Pen(Color.LightGreen, 2); 545 | redPen = new Pen(Color.Red, 2); 546 | greenPen2 = new Pen(Color.LightGreen, 1); 547 | redPen2 = new Pen(Color.Red, 1); 548 | bluePen = new Pen(Color.Blue, 2); 549 | blackPen = new Pen(Color.Black); 550 | 551 | this.simspeed = 1; 552 | this.w.agents[0].brain.learning = false; 553 | this.w.agents[0].brain.epsilon_test_time = 0.01; 554 | 555 | this.w.agents[0].op.x = 500; 556 | this.w.agents[0].op.y = 500; 557 | } 558 | 559 | public void tick() 560 | { 561 | w.tick(); 562 | } 563 | 564 | // Draw everything and return stats 565 | public string draw_world(Graphics g) 566 | { 567 | var agents = w.agents; 568 | 569 | // draw walls in environment 570 | for (int i = 0, n = w.walls.Count; i < n; i++) 571 | { 572 | var q = w.walls[i]; 573 | drawLine(g, q.p1, q.p2, blackPen); 574 | } 575 | 576 | // draw agents 577 | for (int i = 0, n = agents.Count; i < n; i++) 578 | { 579 | // draw agent's body 580 | var a = agents[i]; 581 | drawArc(g, a.op, (int)a.rad, 0, (float)(Math.PI * 2), blackPen); 582 | 583 | // draw agent's sight 584 | for (int ei = 0, ne = a.eyes.Count; ei < ne; ei++) 585 | { 586 | var e = a.eyes[ei]; 587 | var sr = e.sensed_proximity; 588 | Pen pen; 589 | 590 | if (e.sensed_type == 1) pen = redPen2; // apples 591 | else if (e.sensed_type == 2) pen = greenPen2; // poison 592 | else pen = blackPen; // wall 593 | 594 | //var new_x = a.op.x + sr * Math.Sin(radToDegree((float)a.oangle) + radToDegree((float)e.angle)); 595 | //var new_y = a.op.y + sr * Math.Cos(radToDegree((float)a.oangle) + radToDegree((float)e.angle)); 596 | 597 | var new_x = a.op.x + sr * Math.Sin(a.oangle + e.angle); 598 | var new_y = a.op.y + sr * Math.Cos(a.oangle + e.angle); 599 | Vec b = new Vec(new_x, new_y); 600 | 601 | drawLine(g, a.op, b, pen); 602 | } 603 | } 604 | 605 | // draw items 606 | for (int i = 0, n = w.items.Count; i < n; i++) 607 | { 608 | Pen pen = blackPen; 609 | var it = w.items[i]; 610 | if (it.type == 1) pen = redPen; 611 | if (it.type == 2) pen = greenPen; 612 | 613 | drawArc(g, it.p, (int)it.rad, 0, (float)(Math.PI * 2), pen); 614 | } 615 | 616 | return w.agents[0].brain.visSelf(); 617 | } 618 | 619 | public void goveryfast() 620 | { 621 | simspeed = 3; 622 | } 623 | 624 | public void gofast() 625 | { 626 | simspeed = 2; 627 | } 628 | 629 | public void gonormal() 630 | { 631 | simspeed = 1; 632 | } 633 | 634 | public void goslow() 635 | { 636 | simspeed = 0; 637 | } 638 | 639 | public void startlearn() 640 | { 641 | this.w.agents[0].brain.learning = true; 642 | } 643 | 644 | public void stoplearn() 645 | { 646 | this.w.agents[0].brain.learning = false; 647 | } 648 | 649 | private void drawCircle(Graphics g, Vec center, int radius, Pen pen) 650 | { 651 | var rect = new Rectangle((int)center.x - radius, (int)center.y - radius, radius * 2, radius * 2); 652 | g.DrawEllipse(pen, rect); 653 | } 654 | 655 | private void drawArc(Graphics g, Vec center, int radius, float startAngle, float sweepAngle, Pen pen) 656 | { 657 | var rect = new Rectangle((int)center.x - radius, (int)center.y - radius, radius * 2, radius * 2); 658 | g.DrawArc(pen, rect, radToDegree(startAngle), radToDegree(sweepAngle)); 659 | } 660 | 661 | private void drawLine(Graphics g, Vec a, Vec b, Pen pen) 662 | { 663 | Point[] points = 664 | { 665 | new Point((int)a.x, (int)a.y), 666 | new Point((int)b.x, (int)b.y) 667 | }; 668 | 669 | g.DrawLines(pen, points); 670 | } 671 | 672 | private float radToDegree(float rad) 673 | { 674 | return (float)(rad * 180 / Math.PI); 675 | } 676 | } 677 | } 678 | -------------------------------------------------------------------------------- /DeepQLearning/DeepQLearning.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | {141003E8-8E41-4C66-AFBB-A1C725F51DF8} 8 | WinExe 9 | Properties 10 | DeepQLearning 11 | DeepQLearning 12 | v4.5.2 13 | 512 14 | true 15 | 16 | 17 | AnyCPU 18 | true 19 | full 20 | false 21 | bin\Debug\ 22 | DEBUG;TRACE 23 | prompt 24 | 4 25 | 26 | 27 | AnyCPU 28 | pdbonly 29 | true 30 | bin\Release\ 31 | TRACE 32 | prompt 33 | 4 34 | 35 | 36 | 37 | ..\..\..\packages\Newtonsoft.Json.6.0.8\lib\net45\Newtonsoft.Json.dll 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | Form 73 | 74 | 75 | Form1.cs 76 | 77 | 78 | 79 | 80 | Form1.cs 81 | 82 | 83 | ResXFileCodeGenerator 84 | Resources.Designer.cs 85 | Designer 86 | 87 | 88 | True 89 | Resources.resx 90 | 91 | 92 | SettingsSingleFileGenerator 93 | Settings.Designer.cs 94 | 95 | 96 | True 97 | Settings.settings 98 | True 99 | 100 | 101 | 102 | 103 | 104 | 105 | 112 | -------------------------------------------------------------------------------- /DeepQLearning/DeepQLearning.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 2012 4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DeepQLearning", "DeepQLearning.csproj", "{141003E8-8E41-4C66-AFBB-A1C725F51DF8}" 5 | EndProject 6 | Global 7 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 8 | Debug|Any CPU = Debug|Any CPU 9 | Release|Any CPU = Release|Any CPU 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {141003E8-8E41-4C66-AFBB-A1C725F51DF8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 13 | {141003E8-8E41-4C66-AFBB-A1C725F51DF8}.Debug|Any CPU.Build.0 = Debug|Any CPU 14 | {141003E8-8E41-4C66-AFBB-A1C725F51DF8}.Release|Any CPU.ActiveCfg = Release|Any CPU 15 | {141003E8-8E41-4C66-AFBB-A1C725F51DF8}.Release|Any CPU.Build.0 = Release|Any CPU 16 | EndGlobalSection 17 | GlobalSection(SolutionProperties) = preSolution 18 | HideSolutionNode = FALSE 19 | EndGlobalSection 20 | EndGlobal 21 | -------------------------------------------------------------------------------- /DeepQLearning/DeepQLearning.v11.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/DeepQLearning.v11.suo -------------------------------------------------------------------------------- /DeepQLearning/Form1.Designer.cs: -------------------------------------------------------------------------------- 1 | namespace DeepQLearning 2 | { 3 | partial class Form1 4 | { 5 | /// 6 | /// Required designer variable. 7 | /// 8 | private System.ComponentModel.IContainer components = null; 9 | 10 | /// 11 | /// Clean up any resources being used. 12 | /// 13 | /// true if managed resources should be disposed; otherwise, false. 14 | protected override void Dispose(bool disposing) 15 | { 16 | if (disposing && (components != null)) 17 | { 18 | components.Dispose(); 19 | } 20 | base.Dispose(disposing); 21 | } 22 | 23 | #region Windows Form Designer generated code 24 | 25 | /// 26 | /// Required method for Designer support - do not modify 27 | /// the contents of this method with the code editor. 28 | /// 29 | private void InitializeComponent() 30 | { 31 | System.Windows.Forms.DataVisualization.Charting.ChartArea chartArea7 = new System.Windows.Forms.DataVisualization.Charting.ChartArea(); 32 | System.Windows.Forms.DataVisualization.Charting.Legend legend7 = new System.Windows.Forms.DataVisualization.Charting.Legend(); 33 | System.Windows.Forms.DataVisualization.Charting.Series series7 = new System.Windows.Forms.DataVisualization.Charting.Series(); 34 | this.startLearning = new System.Windows.Forms.Button(); 35 | this.displayBox = new System.Windows.Forms.TextBox(); 36 | this.groupBox1 = new System.Windows.Forms.GroupBox(); 37 | this.goSlow = new System.Windows.Forms.Button(); 38 | this.goNormal = new System.Windows.Forms.Button(); 39 | this.goFast = new System.Windows.Forms.Button(); 40 | this.goVeryFast = new System.Windows.Forms.Button(); 41 | this.StopLearning = new System.Windows.Forms.Button(); 42 | this.chart1 = new System.Windows.Forms.DataVisualization.Charting.Chart(); 43 | this.groupBox2 = new System.Windows.Forms.GroupBox(); 44 | this.canvas = new System.Windows.Forms.Panel(); 45 | this.groupBox3 = new System.Windows.Forms.GroupBox(); 46 | this.PauseBtn = new System.Windows.Forms.Button(); 47 | this.saveNet = new System.Windows.Forms.Button(); 48 | this.loadNet = new System.Windows.Forms.Button(); 49 | this.groupBox1.SuspendLayout(); 50 | ((System.ComponentModel.ISupportInitialize)(this.chart1)).BeginInit(); 51 | this.groupBox2.SuspendLayout(); 52 | this.groupBox3.SuspendLayout(); 53 | this.SuspendLayout(); 54 | // 55 | // startLearning 56 | // 57 | this.startLearning.Location = new System.Drawing.Point(8, 21); 58 | this.startLearning.Name = "startLearning"; 59 | this.startLearning.Size = new System.Drawing.Size(134, 27); 60 | this.startLearning.TabIndex = 0; 61 | this.startLearning.Text = "Start Learning"; 62 | this.startLearning.UseVisualStyleBackColor = true; 63 | this.startLearning.Click += new System.EventHandler(this.startLearning_Click); 64 | // 65 | // displayBox 66 | // 67 | this.displayBox.Location = new System.Drawing.Point(8, 305); 68 | this.displayBox.Multiline = true; 69 | this.displayBox.Name = "displayBox"; 70 | this.displayBox.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; 71 | this.displayBox.Size = new System.Drawing.Size(360, 204); 72 | this.displayBox.TabIndex = 1; 73 | // 74 | // groupBox1 75 | // 76 | this.groupBox1.Controls.Add(this.loadNet); 77 | this.groupBox1.Controls.Add(this.saveNet); 78 | this.groupBox1.Controls.Add(this.PauseBtn); 79 | this.groupBox1.Controls.Add(this.goSlow); 80 | this.groupBox1.Controls.Add(this.goNormal); 81 | this.groupBox1.Controls.Add(this.goFast); 82 | this.groupBox1.Controls.Add(this.goVeryFast); 83 | this.groupBox1.Controls.Add(this.StopLearning); 84 | this.groupBox1.Controls.Add(this.startLearning); 85 | this.groupBox1.Location = new System.Drawing.Point(976, 534); 86 | this.groupBox1.Name = "groupBox1"; 87 | this.groupBox1.Size = new System.Drawing.Size(374, 127); 88 | this.groupBox1.TabIndex = 3; 89 | this.groupBox1.TabStop = false; 90 | this.groupBox1.Text = "Controls"; 91 | // 92 | // goSlow 93 | // 94 | this.goSlow.Location = new System.Drawing.Point(282, 54); 95 | this.goSlow.Name = "goSlow"; 96 | this.goSlow.Size = new System.Drawing.Size(86, 27); 97 | this.goSlow.TabIndex = 5; 98 | this.goSlow.Text = "Go slow"; 99 | this.goSlow.UseVisualStyleBackColor = true; 100 | this.goSlow.Click += new System.EventHandler(this.goSlow_Click); 101 | // 102 | // goNormal 103 | // 104 | this.goNormal.Location = new System.Drawing.Point(188, 54); 105 | this.goNormal.Name = "goNormal"; 106 | this.goNormal.Size = new System.Drawing.Size(88, 27); 107 | this.goNormal.TabIndex = 4; 108 | this.goNormal.Text = "Go normal"; 109 | this.goNormal.UseVisualStyleBackColor = true; 110 | this.goNormal.Click += new System.EventHandler(this.goNormal_Click); 111 | // 112 | // goFast 113 | // 114 | this.goFast.Location = new System.Drawing.Point(112, 54); 115 | this.goFast.Name = "goFast"; 116 | this.goFast.Size = new System.Drawing.Size(70, 27); 117 | this.goFast.TabIndex = 3; 118 | this.goFast.Text = "Go fast"; 119 | this.goFast.UseVisualStyleBackColor = true; 120 | this.goFast.Click += new System.EventHandler(this.goFast_Click); 121 | // 122 | // goVeryFast 123 | // 124 | this.goVeryFast.Location = new System.Drawing.Point(8, 54); 125 | this.goVeryFast.Name = "goVeryFast"; 126 | this.goVeryFast.Size = new System.Drawing.Size(98, 27); 127 | this.goVeryFast.TabIndex = 2; 128 | this.goVeryFast.Text = "Go very fast"; 129 | this.goVeryFast.UseVisualStyleBackColor = true; 130 | this.goVeryFast.Click += new System.EventHandler(this.goVeryFast_Click); 131 | // 132 | // StopLearning 133 | // 134 | this.StopLearning.Location = new System.Drawing.Point(235, 21); 135 | this.StopLearning.Name = "StopLearning"; 136 | this.StopLearning.Size = new System.Drawing.Size(133, 27); 137 | this.StopLearning.TabIndex = 1; 138 | this.StopLearning.Text = "Stop Learning"; 139 | this.StopLearning.UseVisualStyleBackColor = true; 140 | this.StopLearning.Click += new System.EventHandler(this.StopLearning_Click); 141 | // 142 | // chart1 143 | // 144 | chartArea7.Name = "ChartArea1"; 145 | this.chart1.ChartAreas.Add(chartArea7); 146 | legend7.Name = "Legend1"; 147 | this.chart1.Legends.Add(legend7); 148 | this.chart1.Location = new System.Drawing.Point(8, 21); 149 | this.chart1.Name = "chart1"; 150 | series7.ChartArea = "ChartArea1"; 151 | series7.Legend = "Legend1"; 152 | series7.Name = "Series1"; 153 | this.chart1.Series.Add(series7); 154 | this.chart1.Size = new System.Drawing.Size(360, 278); 155 | this.chart1.TabIndex = 4; 156 | this.chart1.Text = "chart1"; 157 | // 158 | // groupBox2 159 | // 160 | this.groupBox2.Controls.Add(this.canvas); 161 | this.groupBox2.Location = new System.Drawing.Point(12, 12); 162 | this.groupBox2.Name = "groupBox2"; 163 | this.groupBox2.Size = new System.Drawing.Size(958, 649); 164 | this.groupBox2.TabIndex = 5; 165 | this.groupBox2.TabStop = false; 166 | this.groupBox2.Text = "Visualization"; 167 | // 168 | // canvas 169 | // 170 | this.canvas.BackColor = System.Drawing.SystemColors.Info; 171 | this.canvas.Location = new System.Drawing.Point(6, 21); 172 | this.canvas.Name = "canvas"; 173 | this.canvas.Size = new System.Drawing.Size(946, 618); 174 | this.canvas.TabIndex = 0; 175 | this.canvas.Paint += new System.Windows.Forms.PaintEventHandler(this.canvas_Paint); 176 | // 177 | // groupBox3 178 | // 179 | this.groupBox3.Controls.Add(this.chart1); 180 | this.groupBox3.Controls.Add(this.displayBox); 181 | this.groupBox3.Location = new System.Drawing.Point(976, 12); 182 | this.groupBox3.Name = "groupBox3"; 183 | this.groupBox3.Size = new System.Drawing.Size(374, 516); 184 | this.groupBox3.TabIndex = 0; 185 | this.groupBox3.TabStop = false; 186 | this.groupBox3.Text = "Output"; 187 | // 188 | // PauseBtn 189 | // 190 | this.PauseBtn.Location = new System.Drawing.Point(148, 21); 191 | this.PauseBtn.Name = "PauseBtn"; 192 | this.PauseBtn.Size = new System.Drawing.Size(81, 27); 193 | this.PauseBtn.TabIndex = 6; 194 | this.PauseBtn.Text = "Pause"; 195 | this.PauseBtn.UseVisualStyleBackColor = true; 196 | this.PauseBtn.Click += new System.EventHandler(this.PauseBtn_Click); 197 | // 198 | // saveNet 199 | // 200 | this.saveNet.Location = new System.Drawing.Point(8, 88); 201 | this.saveNet.Name = "saveNet"; 202 | this.saveNet.Size = new System.Drawing.Size(174, 29); 203 | this.saveNet.TabIndex = 7; 204 | this.saveNet.Text = "Save QNetwork"; 205 | this.saveNet.UseVisualStyleBackColor = true; 206 | this.saveNet.Click += new System.EventHandler(this.saveNet_Click); 207 | // 208 | // loadNet 209 | // 210 | this.loadNet.Location = new System.Drawing.Point(188, 88); 211 | this.loadNet.Name = "loadNet"; 212 | this.loadNet.Size = new System.Drawing.Size(180, 29); 213 | this.loadNet.TabIndex = 8; 214 | this.loadNet.Text = "Load QNetwork"; 215 | this.loadNet.UseVisualStyleBackColor = true; 216 | this.loadNet.Click += new System.EventHandler(this.loadNet_Click); 217 | // 218 | // Form1 219 | // 220 | this.AutoScaleDimensions = new System.Drawing.SizeF(8F, 16F); 221 | this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; 222 | this.ClientSize = new System.Drawing.Size(1362, 673); 223 | this.Controls.Add(this.groupBox3); 224 | this.Controls.Add(this.groupBox2); 225 | this.Controls.Add(this.groupBox1); 226 | this.MaximizeBox = false; 227 | this.Name = "Form1"; 228 | this.SizeGripStyle = System.Windows.Forms.SizeGripStyle.Show; 229 | this.Text = "Deep Q Learning Demo"; 230 | this.FormClosed += new System.Windows.Forms.FormClosedEventHandler(this.Form1_FormClosed); 231 | this.groupBox1.ResumeLayout(false); 232 | ((System.ComponentModel.ISupportInitialize)(this.chart1)).EndInit(); 233 | this.groupBox2.ResumeLayout(false); 234 | this.groupBox3.ResumeLayout(false); 235 | this.groupBox3.PerformLayout(); 236 | this.ResumeLayout(false); 237 | 238 | } 239 | 240 | #endregion 241 | 242 | private System.Windows.Forms.Button startLearning; 243 | private System.Windows.Forms.TextBox displayBox; 244 | private System.Windows.Forms.GroupBox groupBox1; 245 | private System.Windows.Forms.DataVisualization.Charting.Chart chart1; 246 | private System.Windows.Forms.GroupBox groupBox2; 247 | private System.Windows.Forms.GroupBox groupBox3; 248 | private System.Windows.Forms.Button StopLearning; 249 | private System.Windows.Forms.Button goSlow; 250 | private System.Windows.Forms.Button goNormal; 251 | private System.Windows.Forms.Button goFast; 252 | private System.Windows.Forms.Button goVeryFast; 253 | private System.Windows.Forms.Panel canvas; 254 | private System.Windows.Forms.Button loadNet; 255 | private System.Windows.Forms.Button saveNet; 256 | private System.Windows.Forms.Button PauseBtn; 257 | } 258 | } 259 | 260 | -------------------------------------------------------------------------------- /DeepQLearning/Form1.cs: -------------------------------------------------------------------------------- 1 | using ConvnetSharp; 2 | using DeepQLearning.DRLAgent; 3 | using Newtonsoft.Json; 4 | using System; 5 | using System.Collections.Generic; 6 | using System.ComponentModel; 7 | using System.Data; 8 | using System.Drawing; 9 | using System.IO; 10 | using System.Linq; 11 | using System.Reflection; 12 | using System.Runtime.Serialization.Formatters.Binary; 13 | using System.Text; 14 | using System.Threading; 15 | using System.Threading.Tasks; 16 | using System.Windows.Forms; 17 | 18 | namespace DeepQLearning 19 | { 20 | public partial class Form1 : Form 21 | { 22 | Pen blackPen = new Pen(Color.Black); 23 | Pen greenPen = new Pen(Color.LightGreen, 5); 24 | 25 | // worker thread 26 | private Thread workerThread = null; 27 | 28 | Boolean needToStop = false, paused = false; 29 | QAgent qAgent; 30 | 31 | int interval = 30; 32 | 33 | string netFile = Environment.CurrentDirectory + "\\deepQnet.dat"; 34 | 35 | public Form1() 36 | { 37 | InitializeComponent(); 38 | 39 | // Fix Panel double buffering issue 40 | typeof(Panel).InvokeMember("DoubleBuffered", 41 | BindingFlags.SetProperty | BindingFlags.Instance | BindingFlags.NonPublic, 42 | null, canvas, new object[] { true }); 43 | } 44 | 45 | private void canvas_Paint(object sender, PaintEventArgs e) 46 | { 47 | if (qAgent != null) { 48 | displayBox.Text = qAgent.draw_world(e.Graphics); 49 | 50 | switch (qAgent.simspeed) 51 | { 52 | case 0: 53 | displayBox.Text += Environment.NewLine + "Simulation speed: Slow"; 54 | break; 55 | 56 | case 1: 57 | displayBox.Text += Environment.NewLine + "Simulation speed: Normal"; 58 | break; 59 | 60 | case 2: 61 | displayBox.Text += Environment.NewLine + "Simulation speed: Fast"; 62 | break; 63 | 64 | case 3: 65 | displayBox.Text += Environment.NewLine + "Simulation speed: Very Fast"; 66 | break; 67 | } 68 | } 69 | 70 | canvas.Update(); 71 | } 72 | 73 | #region // Button Controls 74 | private void StopLearning_Click(object sender, EventArgs e) 75 | { 76 | qAgent.stoplearn(); 77 | } 78 | 79 | private void startLearning_Click(object sender, EventArgs e) 80 | { 81 | if (qAgent == null) 82 | { 83 | var num_inputs = 27; // 9 eyes, each sees 3 numbers (wall, green, red thing proximity) 84 | var num_actions = 5; // 5 possible angles agent can turn 85 | var temporal_window = 4; // amount of temporal memory. 0 = agent lives in-the-moment :) 86 | var network_size = num_inputs * temporal_window + num_actions * temporal_window + num_inputs; 87 | 88 | var layer_defs = new List(); 89 | 90 | // the value function network computes a value of taking any of the possible actions 91 | // given an input state. Here we specify one explicitly the hard way 92 | // but user could also equivalently instead use opt.hidden_layer_sizes = [20,20] 93 | // to just insert simple relu hidden layers. 94 | layer_defs.Add(new LayerDefinition { type = "input", out_sx = 1, out_sy = 1, out_depth = network_size }); 95 | layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); 96 | layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); 97 | layer_defs.Add(new LayerDefinition { type = "fc", num_neurons = 96, activation = "relu" }); 98 | layer_defs.Add(new LayerDefinition { type = "regression", num_neurons = num_actions }); 99 | 100 | // options for the Temporal Difference learner that trains the above net 101 | // by backpropping the temporal difference learning rule. 102 | //var opt = new Options { method="sgd", learning_rate=0.01, l2_decay=0.001, momentum=0.9, batch_size=10, l1_decay=0.001 }; 103 | var opt = new Options { method = "adadelta", l2_decay = 0.001, batch_size = 10 }; 104 | 105 | var tdtrainer_options = new TrainingOptions(); 106 | tdtrainer_options.temporal_window = temporal_window; 107 | tdtrainer_options.experience_size = 30000; 108 | tdtrainer_options.start_learn_threshold = 1000; 109 | tdtrainer_options.gamma = 0.7; 110 | tdtrainer_options.learning_steps_total = 200000; 111 | tdtrainer_options.learning_steps_burnin = 3000; 112 | tdtrainer_options.epsilon_min = 0.05; 113 | tdtrainer_options.epsilon_test_time = 0.00; 114 | tdtrainer_options.layer_defs = layer_defs; 115 | tdtrainer_options.options = opt; 116 | 117 | var brain = new DeepQLearn(num_inputs, num_actions, tdtrainer_options); 118 | qAgent = new QAgent(brain, canvas.Width, canvas.Height); 119 | } 120 | else 121 | qAgent.startlearn(); 122 | 123 | if (workerThread == null) 124 | { 125 | workerThread = new Thread(new ThreadStart(BackgroundThread)); 126 | workerThread.Start(); 127 | } 128 | } 129 | 130 | private void PauseBtn_Click(object sender, EventArgs e) 131 | { 132 | if(paused) 133 | { 134 | PauseBtn.Text = "Pause"; 135 | paused = false; 136 | } 137 | else 138 | { 139 | PauseBtn.Text = "Continue"; 140 | paused = true; 141 | } 142 | } 143 | 144 | private void saveNet_Click(object sender, EventArgs e) 145 | { 146 | // Save the netwok to file 147 | using (FileStream fstream = new FileStream(netFile, FileMode.Create)) 148 | { 149 | new BinaryFormatter().Serialize(fstream, qAgent); 150 | } 151 | 152 | displayBox.Text = "QNetwork saved successfully"; 153 | } 154 | 155 | private void loadNet_Click(object sender, EventArgs e) 156 | { 157 | // Load the netwok from file 158 | using (FileStream fstream = new FileStream(netFile, FileMode.Open)) 159 | { 160 | qAgent = new BinaryFormatter().Deserialize(fstream) as QAgent; 161 | qAgent.Reinitialize(); 162 | } 163 | 164 | if (workerThread == null) 165 | { 166 | workerThread = new Thread(new ThreadStart(BackgroundThread)); 167 | workerThread.Start(); 168 | } 169 | } 170 | 171 | private void Form1_FormClosed(object sender, FormClosedEventArgs e) 172 | { 173 | needToStop = true; 174 | 175 | if (workerThread != null) 176 | { 177 | // stop worker thread 178 | needToStop = true; 179 | while (!workerThread.Join(100)) 180 | Application.DoEvents(); 181 | workerThread = null; 182 | } 183 | } 184 | 185 | private void goNormal_Click(object sender, EventArgs e) 186 | { 187 | qAgent.gonormal(); 188 | interval = 25; 189 | } 190 | 191 | private void goFast_Click(object sender, EventArgs e) 192 | { 193 | qAgent.gofast(); 194 | interval = 10; 195 | } 196 | 197 | private void goVeryFast_Click(object sender, EventArgs e) 198 | { 199 | qAgent.goveryfast(); 200 | interval = 0; 201 | } 202 | 203 | private void goSlow_Click(object sender, EventArgs e) 204 | { 205 | qAgent.goslow(); 206 | interval = 50; 207 | } 208 | #endregion 209 | 210 | // Delegates to enable async calls for setting controls properties 211 | private delegate void UpdateUICallback(Panel panel); 212 | 213 | // Thread safe updating of UI 214 | private void UpdateUI(Panel panel) 215 | { 216 | if(needToStop) 217 | return; 218 | 219 | if (panel.InvokeRequired) 220 | { 221 | UpdateUICallback d = new UpdateUICallback(UpdateUI); 222 | Invoke(d, new object[] { panel }); 223 | } 224 | else 225 | { 226 | panel.Refresh(); 227 | } 228 | } 229 | 230 | private void BackgroundThread() 231 | { 232 | while (!needToStop) 233 | { 234 | if (!paused) 235 | { 236 | qAgent.tick(); 237 | UpdateUI(canvas); 238 | } 239 | 240 | Thread.Sleep(interval); 241 | } 242 | } 243 | 244 | public Intersect line_point_intersect(Graphics g, Vec A, Vec B, Vec C, double rad) 245 | { 246 | Intersect result = new Intersect { intersect = false }; 247 | 248 | // compute the euclidean distance between A and B 249 | var LAB = Math.Sqrt(Math.Pow(B.x - A.x, 2) + Math.Pow(B.y - A.y, 2)); 250 | 251 | // compute the direction vector of line AB 252 | var thetaAB = Math.Atan2((B.x - A.x), (B.y - A.y)); 253 | 254 | // compute the direction vector D from A to B 255 | var Dx = (B.x - A.x) / LAB; 256 | var Dy = (B.y - A.y) / LAB; 257 | 258 | // Now the line equation is x = Dx*t + Ax, y = Dy*t + Ay with 0 <= t <= 1. 259 | // compute the value t of the closest point to the circle center (Cx, Cy) 260 | var t = Dx * (C.x - A.x) + Dy * (C.y - A.y); 261 | 262 | // This is the projection of C on the line from A to B. 263 | // compute the coordinates of the point E on line and closest to C 264 | var Ex = t * Dx + A.x; 265 | var Ey = t * Dy + A.y; 266 | 267 | // compute the euclidean distance from E to C 268 | var LEC = Math.Sqrt(Math.Pow(Ex - C.x, 2) + Math.Pow(Ey - C.y, 2)); 269 | 270 | // test if the line intersects the circle 271 | if (LEC < rad) 272 | { 273 | // compute distance from t to circle intersection point 274 | var dt = Math.Sqrt(Math.Pow(rad, 2) - Math.Pow(LEC, 2)); 275 | 276 | // compute first intersection point F 277 | var F = new Vec((t - dt) * Dx + A.x, (t - dt) * Dy + A.y); 278 | 279 | // compute second intersection point G 280 | var G = new Vec((t + dt) * Dx + A.x, (t + dt) * Dy + A.y); 281 | 282 | // compute the euclidean distance from A to F 283 | var LAF = Math.Sqrt(Math.Pow(F.x - A.x, 2) + Math.Pow(F.y - A.y, 2)); 284 | 285 | // compute the direction vector of line AF 286 | var thetaAF = Math.Atan2((F.x - A.x),(F.y - A.y)); 287 | 288 | if (LAF <= LAB && thetaAB == thetaAF) 289 | { 290 | // line intersects with point 291 | result = new Intersect { ua = dt, up = F, intersect = true }; 292 | } 293 | 294 | } 295 | else if (LEC == rad) // else test if the line is tangent to circle 296 | { 297 | ; // tangent point to circle is E 298 | } 299 | else 300 | { 301 | ; // line doesn't touch circle 302 | } 303 | 304 | return result; 305 | } 306 | 307 | private void drawCircle(Graphics g, Vec center, int radius, Pen pen) 308 | { 309 | var rect = new Rectangle((int)center.x - radius, (int)center.y - radius, radius * 2, radius * 2); 310 | g.DrawEllipse(pen, rect); 311 | } 312 | 313 | private void drawLine(Graphics g, Vec a, Vec b, Pen pen) 314 | { 315 | Point[] points = 316 | { 317 | new Point((int)a.x, (int)a.y), 318 | new Point((int)b.x, (int)b.y) 319 | }; 320 | 321 | g.DrawLines(pen, points); 322 | } 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /DeepQLearning/Form1.resx: -------------------------------------------------------------------------------- 1 |  2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | text/microsoft-resx 110 | 111 | 112 | 2.0 113 | 114 | 115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | 118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 119 | 120 | 121 | True 122 | 123 | -------------------------------------------------------------------------------- /DeepQLearning/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Threading.Tasks; 5 | using System.Windows.Forms; 6 | 7 | namespace DeepQLearning 8 | { 9 | static class Program 10 | { 11 | /// 12 | /// The main entry point for the application. 13 | /// 14 | [STAThread] 15 | static void Main() 16 | { 17 | Application.EnableVisualStyles(); 18 | Application.SetCompatibleTextRenderingDefault(false); 19 | Application.Run(new Form1()); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /DeepQLearning/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("DeepQLearning")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("DeepQLearning")] 13 | [assembly: AssemblyCopyright("Copyright © 2016")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("141003e8-8e41-4c66-afbb-a1c725f51df8")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /DeepQLearning/Properties/Resources.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.42000 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace DeepQLearning.Properties 12 | { 13 | 14 | 15 | /// 16 | /// A strongly-typed resource class, for looking up localized strings, etc. 17 | /// 18 | // This class was auto-generated by the StronglyTypedResourceBuilder 19 | // class via a tool like ResGen or Visual Studio. 20 | // To add or remove a member, edit your .ResX file then rerun ResGen 21 | // with the /str option, or rebuild your VS project. 22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")] 23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] 24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 25 | internal class Resources 26 | { 27 | 28 | private static global::System.Resources.ResourceManager resourceMan; 29 | 30 | private static global::System.Globalization.CultureInfo resourceCulture; 31 | 32 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] 33 | internal Resources() 34 | { 35 | } 36 | 37 | /// 38 | /// Returns the cached ResourceManager instance used by this class. 39 | /// 40 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 41 | internal static global::System.Resources.ResourceManager ResourceManager 42 | { 43 | get 44 | { 45 | if ((resourceMan == null)) 46 | { 47 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("DeepQLearning.Properties.Resources", typeof(Resources).Assembly); 48 | resourceMan = temp; 49 | } 50 | return resourceMan; 51 | } 52 | } 53 | 54 | /// 55 | /// Overrides the current thread's CurrentUICulture property for all 56 | /// resource lookups using this strongly typed resource class. 57 | /// 58 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 59 | internal static global::System.Globalization.CultureInfo Culture 60 | { 61 | get 62 | { 63 | return resourceCulture; 64 | } 65 | set 66 | { 67 | resourceCulture = value; 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /DeepQLearning/Properties/Resources.resx: -------------------------------------------------------------------------------- 1 |  2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | text/microsoft-resx 107 | 108 | 109 | 2.0 110 | 111 | 112 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 113 | 114 | 115 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | -------------------------------------------------------------------------------- /DeepQLearning/Properties/Settings.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.42000 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace DeepQLearning.Properties 12 | { 13 | 14 | 15 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 16 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "11.0.0.0")] 17 | internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase 18 | { 19 | 20 | private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); 21 | 22 | public static Settings Default 23 | { 24 | get 25 | { 26 | return defaultInstance; 27 | } 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /DeepQLearning/Properties/Settings.settings: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /DeepQLearning/bin/Debug/deepQnet.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/bin/Debug/deepQnet.dat -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.Form1.resources: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DeepQLearning.Form1.resources -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.Properties.Resources.resources: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DeepQLearning.Properties.Resources.resources -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.csproj.FileListAbsolute.txt: -------------------------------------------------------------------------------- 1 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\bin\Debug\DeepQLearning.exe.config 2 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\bin\Debug\DeepQLearning.exe 3 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\bin\Debug\DeepQLearning.pdb 4 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\bin\Debug\Newtonsoft.Json.dll 5 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\bin\Debug\Newtonsoft.Json.xml 6 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\obj\Debug\DeepQLearning.csprojResolveAssemblyReference.cache 7 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\obj\Debug\DeepQLearning.Form1.resources 8 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\obj\Debug\DeepQLearning.Properties.Resources.resources 9 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\obj\Debug\DeepQLearning.csproj.GenerateResource.Cache 10 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\obj\Debug\DeepQLearning.exe 11 | C:\Users\Shelton\documents\visual studio 2015\Projects\MachineLearning\DeepQLearning\obj\Debug\DeepQLearning.pdb 12 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\bin\Debug\DeepQLearning.exe.config 13 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\obj\Debug\DeepQLearning.exe 14 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\obj\Debug\DeepQLearning.pdb 15 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\bin\Debug\DeepQLearning.exe 16 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\bin\Debug\DeepQLearning.pdb 17 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\bin\Debug\Newtonsoft.Json.xml 18 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\obj\Debug\DeepQLearning.csprojResolveAssemblyReference.cache 19 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\obj\Debug\DeepQLearning.Form1.resources 20 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\obj\Debug\DeepQLearning.Properties.Resources.resources 21 | C:\Users\Shelton\Documents\Visual Studio 2012\Projects\DeepQLearning\obj\Debug\DeepQLearning.csproj.GenerateResource.Cache 22 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\bin\Debug\DeepQLearning.exe.config 23 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\obj\Debug\DeepQLearning.exe 24 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\obj\Debug\DeepQLearning.pdb 25 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\bin\Debug\DeepQLearning.exe 26 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\bin\Debug\DeepQLearning.pdb 27 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\bin\Debug\Newtonsoft.Json.xml 28 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\obj\Debug\DeepQLearning.csprojResolveAssemblyReference.cache 29 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\obj\Debug\DeepQLearning.Form1.resources 30 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\obj\Debug\DeepQLearning.Properties.Resources.resources 31 | C:\Users\Shelton\Documents\Visual Studio 2015\Projects\DeepQLearning\obj\Debug\DeepQLearning.csproj.GenerateResource.Cache 32 | -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.csproj.GenerateResource.Cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DeepQLearning.csproj.GenerateResource.Cache -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.csprojResolveAssemblyReference.cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DeepQLearning.csprojResolveAssemblyReference.cache -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DeepQLearning.exe -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DeepQLearning.pdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DeepQLearning.pdb -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DesignTimeResolveAssemblyReferences.cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DesignTimeResolveAssemblyReferences.cache -------------------------------------------------------------------------------- /DeepQLearning/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dubezOniner/Deep-QLearning-Demo-csharp/5a6d756cdc5548c23bcaa0dff4cb0921044ff47a/DeepQLearning/obj/Debug/DesignTimeResolveAssemblyReferencesInput.cache -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2017 dubezOniner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep QLearning demo 2 | This demo is a C# port of [ConvNetJS Reinforcement Learning Demo](https://cs.stanford.edu/people/karpathy/convnetjs/demo/rldemo.html) by Andrej Karpathy. 3 | Watch the demo video on [YouTube](https://www.youtube.com/watch?v=pTgI_-yYlBA&t=9s). 4 | This project also uses Cedric Bovar's [ConvNetSharp library](https://github.com/cbovar/ConvNetSharp) for the Convolutional Neural Networks. 5 | 6 | # Pretrained model 7 | I included a pretrained model that I trained for a few hours, you can load and use it if you don't like waiting :) 8 | --------------------------------------------------------------------------------