├── DeepQLearning ├── App.config ├── ConvnetSharp │ ├── Gradient.cs │ ├── Layers │ │ ├── ConvLayer.cs │ │ ├── DropoutLayer.cs │ │ ├── FullyConnectedLayer.cs │ │ ├── InputLayer.cs │ │ ├── Layer.cs │ │ ├── LayerDefinition.cs │ │ ├── ReLULayer.cs │ │ ├── RegressionLayer.cs │ │ └── SVMLayer.cs │ ├── Net.cs │ ├── Trainer │ │ ├── Options.cs │ │ ├── Trainer.cs │ │ └── TrainingOptions.cs │ ├── Util.cs │ └── Volume.cs ├── DRLAgent │ ├── DeepQLearn.cs │ └── QAgent.cs ├── DeepQLearning.csproj ├── DeepQLearning.sln ├── DeepQLearning.v11.suo ├── Form1.Designer.cs ├── Form1.cs ├── Form1.resx ├── Program.cs ├── Properties │ ├── AssemblyInfo.cs │ ├── Resources.Designer.cs │ ├── Resources.resx │ ├── Settings.Designer.cs │ └── Settings.settings ├── bin │ └── Debug │ │ └── deepQnet.dat └── obj │ └── Debug │ ├── DeepQLearning.Form1.resources │ ├── DeepQLearning.Properties.Resources.resources │ ├── DeepQLearning.csproj.FileListAbsolute.txt │ ├── DeepQLearning.csproj.GenerateResource.Cache │ ├── DeepQLearning.csprojResolveAssemblyReference.cache │ ├── DeepQLearning.exe │ ├── DeepQLearning.pdb │ ├── DesignTimeResolveAssemblyReferences.cache │ └── DesignTimeResolveAssemblyReferencesInput.cache ├── LICENSE └── README.md /DeepQLearning/App.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Gradient.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class Gradient 11 | { 12 | public double[] w; 13 | public double[] dw; 14 | public double l1_decay_mul = double.MinValue; 15 | public double l2_decay_mul = double.MinValue; 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/ConvLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | // - ConvLayer does convolutions (so weight sharing spatially) 12 | [Serializable] 13 | public class ConvLayer : LayerBase 14 | { 15 | Util util = new Util(); 16 | 17 | Volume biases; 18 | int stride, pad; 19 | 20 | public ConvLayer(LayerDefinition def) : base() 21 | { 22 | // required 23 | this.out_depth = def.n_filters; 24 | this.sx = def.sx; // filter size. Should be odd if possible, it's cleaner. 25 | this.in_depth = def.in_depth; 26 | this.in_sx = def.in_sx; 27 | this.in_sy = def.in_sy; 28 | 29 | // optional 30 | this.sy = def.sy != int.MinValue ? def.sy : this.sx; 31 | this.stride = def.stride != int.MinValue ? def.stride : 1; // stride at which we apply filters to input volume 32 | this.pad = def.pad != int.MinValue ? def.pad : 0; // amount of 0 padding to add around borders of input volume 33 | this.l1_decay_mul = def.l1_decay_mul != double.MinValue ? def.l1_decay_mul : 0.0; 34 | this.l2_decay_mul = def.l2_decay_mul != double.MinValue ? def.l2_decay_mul : 1.0; 35 | 36 | // computed 37 | // note we are doing floor, so if the strided convolution of the filter doesnt fit into the input 38 | // volume exactly, the output volume will be trimmed and not contain the (incomplete) computed 39 | // final application. 40 | this.out_sx = (int)Math.Floor((double)(def.in_sx + this.pad * 2 - this.sx) / this.stride + 1); 41 | this.out_sy = (int)Math.Floor((double)(def.in_sy + this.pad * 2 - this.sy) / this.stride + 1); 42 | this.type = "conv"; 43 | 44 | // initializations 45 | var bias = def.bias_pref != double.MinValue ? def.bias_pref : 0.0; 46 | this.filters = new List(); 47 | for (var i = 0; i < this.out_depth; i++) { this.filters.Add(new Volume(this.sx, this.sy, this.in_depth)); } 48 | this.biases = new Volume(1, 1, this.out_depth, bias); 49 | } 50 | 51 | public override Volume forward(Volume V, bool is_training) 52 | { 53 | // optimized code by @mdda that achieves 2x speedup over previous version 54 | 55 | this.in_act = V; 56 | var A = new Volume(this.out_sx | 0, this.out_sy | 0, this.out_depth | 0, 0.0); 57 | 58 | var V_sx = V.sx | 0; 59 | var V_sy = V.sy | 0; 60 | var xy_stride = this.stride | 0; 61 | 62 | for (var d = 0; d < this.out_depth; d++) 63 | { 64 | var f = this.filters[d]; 65 | var x = -this.pad | 0; 66 | var y = -this.pad | 0; 67 | for (var ay = 0; ay < this.out_sy; y += xy_stride, ay++) 68 | { // xy_stride 69 | x = -this.pad | 0; 70 | for (var ax = 0; ax < this.out_sx; x += xy_stride, ax++) 71 | { // xy_stride 72 | 73 | // convolve centered at this particular location 74 | var a = 0.0; 75 | for (var fy = 0; fy < f.sy; fy++) 76 | { 77 | var oy = y + fy; // coordinates in the original input array coordinates 78 | for (var fx = 0; fx < f.sx; fx++) 79 | { 80 | var ox = x + fx; 81 | if (oy >= 0 && oy < V_sy && ox >= 0 && ox < V_sx) 82 | { 83 | for (var fd = 0; fd < f.depth; fd++) 84 | { 85 | // avoid function call overhead (x2) for efficiency, compromise modularity :( 86 | a += f.w[((f.sx * fy) + fx) * f.depth + fd] * V.w[((V_sx * oy) + ox) * V.depth + fd]; 87 | } 88 | } 89 | } 90 | } 91 | a += this.biases.w[d]; 92 | A.set(ax, ay, d, a); 93 | } 94 | } 95 | } 96 | this.out_act = A; 97 | return this.out_act; 98 | } 99 | 100 | public override double backward(object _y) 101 | { 102 | var V = this.in_act; 103 | V.dw = util.zeros(V.w.Length); // zero out gradient wrt bottom data, we're about to fill it 104 | 105 | var V_sx = V.sx | 0; 106 | var V_sy = V.sy | 0; 107 | var xy_stride = this.stride | 0; 108 | 109 | for (var d = 0; d < this.out_depth; d++) 110 | { 111 | var f = this.filters[d]; 112 | var x = -this.pad | 0; 113 | var y = -this.pad | 0; 114 | for (var ay = 0; ay < this.out_sy; y += xy_stride, ay++) 115 | { // xy_stride 116 | x = -this.pad | 0; 117 | for (var ax = 0; ax < this.out_sx; x += xy_stride, ax++) 118 | { // xy_stride 119 | 120 | // convolve centered at this particular location 121 | var chain_grad = this.out_act.get_grad(ax, ay, d); // gradient from above, from chain rule 122 | for (var fy = 0; fy < f.sy; fy++) 123 | { 124 | var oy = y + fy; // coordinates in the original input array coordinates 125 | for (var fx = 0; fx < f.sx; fx++) 126 | { 127 | var ox = x + fx; 128 | if (oy >= 0 && oy < V_sy && ox >= 0 && ox < V_sx) 129 | { 130 | for (var fd = 0; fd < f.depth; fd++) 131 | { 132 | // avoid function call overhead (x2) for efficiency, compromise modularity :( 133 | var ix1 = ((V_sx * oy) + ox) * V.depth + fd; 134 | var ix2 = ((f.sx * fy) + fx) * f.depth + fd; 135 | f.dw[ix2] += V.w[ix1] * chain_grad; 136 | V.dw[ix1] += f.w[ix2] * chain_grad; 137 | } 138 | } 139 | } 140 | } 141 | this.biases.dw[d] += chain_grad; 142 | } 143 | } 144 | } 145 | 146 | return 0.0; 147 | } 148 | public override Gradient[] getParamsAndGrads() 149 | { 150 | var response = new List(); 151 | for (var i = 0; i < this.out_depth; i++) 152 | { 153 | response.Add(new Gradient { w = this.filters[i].w, dw = this.filters[i].dw, l2_decay_mul = this.l2_decay_mul, l1_decay_mul = this.l1_decay_mul }); 154 | } 155 | response.Add(new Gradient { w = this.biases.w, dw = this.biases.dw, l1_decay_mul = 0.0, l2_decay_mul = 0.0 }); 156 | return response.ToArray(); 157 | } 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/DropoutLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | // An inefficient dropout layer 12 | // Note this is not most efficient implementation since the layer before 13 | // computed all these activations and now we're just going to drop them :( 14 | // same goes for backward pass. Also, if we wanted to be efficient at test time 15 | // we could equivalently be clever and upscale during train and copy pointers during test 16 | // todo: make more efficient. 17 | [Serializable] 18 | public class DropoutLayer : LayerBase 19 | { 20 | bool[] dropped; 21 | 22 | Util util = new Util(); 23 | 24 | public DropoutLayer(LayerDefinition def) : base() 25 | { 26 | // computed 27 | this.out_sx = def.in_sx; 28 | this.out_sy = def.in_sy; 29 | this.out_depth = def.in_depth; 30 | this.type = "dropout"; 31 | this.drop_prob = def.drop_prob != double.NaN ? def.drop_prob : 0.5; 32 | this.dropped = new bool[this.out_sx * this.out_sy * this.out_depth]; 33 | } 34 | 35 | public override Volume forward(Volume V, bool is_training) 36 | { 37 | this.in_act = V; 38 | 39 | var V2 = V.clone(); 40 | var N = V.w.Length; 41 | if (is_training) 42 | { 43 | // do dropout 44 | for (var i = 0; i < N; i++) 45 | { 46 | if (util.random.NextDouble() < this.drop_prob) 47 | { 48 | // drop! 49 | V2.w[i] = 0; 50 | this.dropped[i] = true; 51 | } 52 | 53 | else 54 | { 55 | this.dropped[i] = false; 56 | } 57 | } 58 | } 59 | else 60 | { 61 | // scale the activations during prediction 62 | for (var i = 0; i < N; i++) { V2.w[i] *= this.drop_prob; } 63 | } 64 | this.out_act = V2; 65 | return this.out_act; // dummy identity function for now 66 | } 67 | 68 | public override double backward(object y) 69 | { 70 | var V = this.in_act; // we need to set dw of this 71 | var chain_grad = this.out_act; 72 | var N = V.w.Length; 73 | V.dw = util.zeros(N); // zero out gradient wrt data 74 | for (var i = 0; i < N; i++) 75 | { 76 | if (!(this.dropped[i])) 77 | { 78 | V.dw[i] = chain_grad.dw[i]; // copy over the gradient 79 | } 80 | } 81 | 82 | return 0.0; 83 | } 84 | public override Gradient[] getParamsAndGrads() 85 | { 86 | return new List().ToArray(); 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/FullyConnectedLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class FullyConnectedLayer : LayerBase 13 | { 14 | Volume biases; 15 | 16 | Util util = new Util(); 17 | 18 | public FullyConnectedLayer(LayerDefinition def) : base() 19 | { 20 | // required 21 | this.out_depth = def.num_neurons; 22 | 23 | // optional 24 | this.l1_decay_mul = def.l1_decay_mul != double.MinValue ? def.l1_decay_mul : 0.0; 25 | this.l2_decay_mul = def.l2_decay_mul != double.MinValue ? def.l2_decay_mul : 1.0; 26 | 27 | // computed 28 | this.num_inputs = def.in_sx * def.in_sy * def.in_depth; 29 | this.out_sx = 1; 30 | this.out_sy = 1; 31 | this.type = "fc"; 32 | 33 | // initializations 34 | var bias = def.bias_pref != double.MinValue ? def.bias_pref : 0.0; 35 | this.filters = new List(); 36 | for (var i = 0; i < this.out_depth; i++) { this.filters.Add(new Volume(1, 1, this.num_inputs)); } 37 | this.biases = new Volume(1, 1, this.out_depth, bias); 38 | } 39 | 40 | public override Volume forward(Volume V, bool is_training) 41 | { 42 | this.in_act = V; 43 | var A = new Volume(1, 1, this.out_depth, 0.0); 44 | var Vw = V.w; 45 | for (var i = 0; i < this.out_depth; i++) 46 | { 47 | var a = 0.0; 48 | var wi = this.filters[i].w; 49 | for (var d = 0; d < this.num_inputs; d++) 50 | { 51 | a += Vw[d] * wi[d]; // for efficiency use Vols directly for now 52 | } 53 | a += this.biases.w[i]; 54 | A.w[i] = a; 55 | } 56 | this.out_act = A; 57 | return this.out_act; 58 | } 59 | 60 | public override double backward(object y) 61 | { 62 | var V = this.in_act; 63 | V.dw = util.zeros(V.w.Length); // zero out the gradient in input Vol 64 | 65 | // compute gradient wrt weights and data 66 | for (var i = 0; i < this.out_depth; i++) 67 | { 68 | var tfi = this.filters[i]; 69 | var chain_grad = this.out_act.dw[i]; 70 | for (var d = 0; d < this.num_inputs; d++) 71 | { 72 | V.dw[d] += tfi.w[d] * chain_grad; // grad wrt input data 73 | tfi.dw[d] += V.w[d] * chain_grad; // grad wrt params 74 | } 75 | this.biases.dw[i] += chain_grad; 76 | } 77 | 78 | return 0.0; 79 | } 80 | public override Gradient[] getParamsAndGrads() 81 | { 82 | var response = new List(); 83 | for (var i = 0; i < this.out_depth; i++) 84 | { 85 | response.Add(new Gradient { w=this.filters[i].w, dw=this.filters[i].dw, l1_decay_mul=this.l1_decay_mul, l2_decay_mul=this.l2_decay_mul}); 86 | } 87 | 88 | response.Add(new Gradient { w=this.biases.w, dw=this.biases.dw, l1_decay_mul=0.0, l2_decay_mul=0.0}); 89 | return response.ToArray(); 90 | } 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/InputLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class InputLayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public InputLayer(LayerDefinition def) : base() 17 | { 18 | // required: depth 19 | this.out_depth = def.out_depth; 20 | 21 | // optional: default these dimensions to 1 22 | this.out_sx = def.out_sx; 23 | this.out_sy = def.out_sy; 24 | 25 | // computed 26 | this.type = "input"; 27 | } 28 | 29 | public override Volume forward(Volume V, bool is_training) 30 | { 31 | this.in_act = V; 32 | this.out_act = V; 33 | return this.out_act; // simply identity function for now 34 | } 35 | 36 | public override double backward(object y) { return 0.0; } 37 | public override Gradient[] getParamsAndGrads() 38 | { 39 | return new List().ToArray(); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/Layer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json.Linq; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace ConvnetSharp 9 | { 10 | [Serializable] 11 | public abstract class LayerBase 12 | { 13 | public string type; 14 | public string activation; 15 | public int group_size; 16 | public int num_neurons; 17 | public int num_classes; 18 | public int num_inputs; 19 | public double bias_pref; 20 | public double drop_prob; 21 | 22 | public int out_depth; 23 | public int out_sx; 24 | public int out_sy; 25 | public int in_depth; 26 | public int in_sx; 27 | public int in_sy; 28 | public int sx; 29 | public int sy; 30 | 31 | public Volume in_act; 32 | public Volume out_act; 33 | 34 | public double l1_decay_mul; 35 | public double l2_decay_mul; 36 | 37 | public List filters; 38 | 39 | public abstract Gradient[] getParamsAndGrads(); 40 | public abstract Volume forward(Volume V, bool is_training); 41 | public abstract double backward(object y); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/LayerDefinition.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json.Linq; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace ConvnetSharp 9 | { 10 | [Serializable] 11 | public class LayerDefinition 12 | { 13 | public string type; 14 | public string activation; 15 | public int group_size = int.MinValue; 16 | public int num_neurons = int.MinValue; 17 | public int num_classes = int.MinValue; 18 | public int num_inputs = int.MinValue; 19 | public double bias_pref = double.MinValue; 20 | public double drop_prob = double.MinValue; 21 | 22 | public int out_depth = int.MinValue; 23 | public int out_sx = int.MinValue; 24 | public int out_sy = int.MinValue; 25 | public int in_depth = int.MinValue; 26 | public int in_sx = int.MinValue; 27 | public int in_sy = int.MinValue; 28 | public int sx = int.MinValue; 29 | public int sy = int.MinValue; 30 | 31 | public double l1_decay_mul = double.MinValue; 32 | public double l2_decay_mul = double.MinValue; 33 | 34 | public List filters; 35 | public int n_filters = int.MinValue; 36 | public int stride = int.MinValue; 37 | public int pad = int.MinValue; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/ReLULayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class ReLULayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public ReLULayer(LayerDefinition def) : base() 17 | { 18 | // computed 19 | this.out_sx = def.in_sx; 20 | this.out_sy = def.in_sy; 21 | this.out_depth = def.in_depth; 22 | this.type = "relu"; 23 | } 24 | 25 | public override Volume forward(Volume V, bool is_training) 26 | { 27 | this.in_act = V; 28 | var V2 = V.clone(); 29 | var N = V.w.Length; 30 | var V2w = V2.w; 31 | for (var i = 0; i < N; i++) 32 | { 33 | if (V2w[i] < 0) V2w[i] = 0; // threshold at 0 34 | } 35 | this.out_act = V2; 36 | return this.out_act; 37 | } 38 | 39 | public override double backward(object y) 40 | { 41 | var V = this.in_act; // we need to set dw of this 42 | var V2 = this.out_act; 43 | var N = V.w.Length; 44 | V.dw = util.zeros(N); // zero out gradient wrt data 45 | for (var i = 0; i < N; i++) 46 | { 47 | if (V2.w[i] <= 0) V.dw[i] = 0; // threshold 48 | else V.dw[i] = V2.dw[i]; 49 | } 50 | 51 | return 0.0; 52 | } 53 | public override Gradient[] getParamsAndGrads() 54 | { 55 | return new List().ToArray(); 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/RegressionLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class RegressionLayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public RegressionLayer(LayerDefinition def) : base() 17 | { 18 | // computed 19 | this.num_inputs = def.in_sx * def.in_sy * def.in_depth; 20 | this.out_depth = this.num_inputs; 21 | this.out_sx = 1; 22 | this.out_sy = 1; 23 | this.type = "regression"; 24 | } 25 | 26 | public override Volume forward(Volume V, bool is_training) 27 | { 28 | this.in_act = V; 29 | this.out_act = V; 30 | return this.out_act; // simply identity function for now 31 | } 32 | 33 | // y is a list here of size num_inputs 34 | // or it can be a number if only one value is regressed 35 | // or it can be a struct {dim: i, val: x} where we only want to 36 | // regress on dimension i and asking it to have value x 37 | public override double backward(object y) 38 | { 39 | // compute and accumulate gradient wrt weights and bias of this layer 40 | var x = this.in_act; 41 | x.dw = util.zeros(x.w.Length); // zero out the gradient of input Vol 42 | var loss = 0.0; 43 | if (y.GetType().Equals(typeof(Array))) { 44 | 45 | var Y = (double[])y; 46 | 47 | for (var i = 0; i < this.out_depth; i++) 48 | { 49 | var dy = x.w[i] - Y[i]; 50 | x.dw[i] = dy; 51 | loss += 0.5 * dy * dy; 52 | } 53 | } 54 | else if (y.GetType().Equals(typeof(Double))) 55 | { 56 | // lets hope that only one number is being regressed 57 | var dy = x.w[0] - (double)y; 58 | x.dw[0] = dy; 59 | loss += 0.5 * dy * dy; 60 | } 61 | else 62 | { 63 | // assume it is a struct with entries .dim and .val 64 | // and we pass gradient only along dimension dim to be equal to val 65 | var i = ((Entry)y).dim; 66 | var yi = ((Entry)y).val; 67 | var dy = x.w[i] - yi; 68 | x.dw[i] = dy; 69 | loss += 0.5 * dy * dy; 70 | } 71 | 72 | return loss; 73 | } 74 | 75 | public override Gradient[] getParamsAndGrads() 76 | { 77 | return new List().ToArray(); 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Layers/SVMLayer.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public class SVMLayer : LayerBase 13 | { 14 | Util util = new Util(); 15 | 16 | public SVMLayer(LayerDefinition def) : base() 17 | { 18 | // computed 19 | this.num_inputs = def.in_sx * def.in_sy * def.in_depth; 20 | this.out_depth = this.num_inputs; 21 | this.out_sx = 1; 22 | this.out_sy = 1; 23 | this.type = "svm"; 24 | } 25 | 26 | public override Volume forward(Volume V, bool is_training) 27 | { 28 | this.in_act = V; 29 | this.out_act = V; // nothing to do, output raw scores 30 | return V; 31 | } 32 | 33 | public override double backward(object y) 34 | { 35 | var index = (int)y; 36 | 37 | // compute and accumulate gradient wrt weights and bias of this layer 38 | var x = this.in_act; 39 | x.dw = util.zeros(x.w.Length); // zero out the gradient of input Vol 40 | 41 | // we're using structured loss here, which means that the score 42 | // of the ground truth should be higher than the score of any other 43 | // class, by a margin 44 | var yscore = x.w[index]; // score of ground truth 45 | var margin = 1.0; 46 | var loss = 0.0; 47 | for (var i = 0; i < this.out_depth; i++) 48 | { 49 | if (index == i) { continue; } 50 | var ydiff = -yscore + x.w[i] + margin; 51 | if (ydiff > 0) 52 | { 53 | // violating dimension, apply loss 54 | x.dw[i] += 1; 55 | x.dw[index] -= 1; 56 | loss += ydiff; 57 | } 58 | } 59 | 60 | return loss; 61 | } 62 | 63 | public override Gradient[] getParamsAndGrads() 64 | { 65 | return new List().ToArray(); 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Net.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using Newtonsoft.Json.Linq; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Linq; 6 | using System.Text; 7 | using System.Threading.Tasks; 8 | 9 | namespace ConvnetSharp 10 | { 11 | [Serializable] 12 | public struct Entry 13 | { 14 | public int dim; 15 | public double val; 16 | }; 17 | 18 | [Serializable] 19 | public class Net 20 | { 21 | public List layers = new List(); 22 | Util util = new Util(); 23 | 24 | // constructor 25 | public Net() 26 | { 27 | 28 | } 29 | 30 | // takes a list of layer definitions and creates the network layer objects 31 | public void makeLayers(List defs) 32 | { 33 | // few checks 34 | util.assert(defs.Count >= 2, "Error! At least one input layer and one loss layer are required."); 35 | util.assert(defs[0].type == "input", "Error! First layer must be the input layer, to declare size of inputs"); 36 | 37 | var new_defs = new List(); 38 | for (var i = 0; i < defs.Count; i++) 39 | { 40 | var def = defs[i]; 41 | 42 | if (def.type == "softmax" || def.type == "svm") 43 | { 44 | // add an fc layer here, there is no reason the user should 45 | // have to worry about this and we almost always want to 46 | new_defs.Add(new LayerDefinition { type = "fc", num_neurons = def.num_classes }); 47 | } 48 | 49 | if (def.type == "regression") 50 | { 51 | // add an fc layer here, there is no reason the user should 52 | // have to worry about this and we almost always want to 53 | new_defs.Add(new LayerDefinition { type = "fc", num_neurons = def.num_neurons }); 54 | } 55 | 56 | if ((def.type == "fc" || def.type == "conv") && def.bias_pref == int.MinValue) 57 | { 58 | def.bias_pref = 0.0; 59 | if (!string.IsNullOrEmpty(def.activation) && def.activation == "relu") 60 | { 61 | // relus like a bit of positive bias to get gradients early 62 | // otherwise it's technically possible that a relu unit will never turn on (by chance) 63 | // and will never get any gradient and never contribute any computation. Dead relu. 64 | def.bias_pref = 0.1; 65 | } 66 | } 67 | 68 | new_defs.Add(def); 69 | 70 | if (!string.IsNullOrEmpty(def.activation)) 71 | { 72 | if (def.activation == "relu") { new_defs.Add(new LayerDefinition { type = "relu" }); } 73 | else if (def.activation == "sigmoid") { new_defs.Add(new LayerDefinition { type = "sigmoid" }); } 74 | else if (def.activation == "tanh") { new_defs.Add(new LayerDefinition { type = "tanh" }); } 75 | else if (def.activation == "maxout") 76 | { 77 | // create maxout activation, and pass along group size, if provided 78 | var gs = def.group_size != int.MinValue ? def.group_size : 2; 79 | new_defs.Add(new LayerDefinition { type = "maxout", group_size = gs }); 80 | } 81 | else { Console.WriteLine("ERROR unsupported activation " + def.activation); } 82 | } 83 | 84 | if (def.drop_prob != double.MinValue && def.type != "dropout") 85 | { 86 | new_defs.Add(new LayerDefinition { type="dropout", drop_prob=def.drop_prob }); 87 | } 88 | } 89 | 90 | defs = new_defs; 91 | 92 | // create the layers 93 | this.layers = new List(); 94 | for (var i = 0; i < defs.Count; i++) 95 | { 96 | var def = defs[i]; 97 | if (i > 0) 98 | { 99 | var prev = this.layers[i - 1]; 100 | def.in_sx = prev.out_sx; 101 | def.in_sy = prev.out_sy; 102 | def.in_depth = prev.out_depth; 103 | } 104 | 105 | switch (def.type) 106 | { 107 | case "fc": this.layers.Add(new FullyConnectedLayer(def)); break; 108 | //case "lrn": this.layers.Add(new LocalResponseNormalizationLayer(def)); break; 109 | case "dropout": this.layers.Add(new DropoutLayer(def)); break; 110 | case "input": this.layers.Add(new InputLayer(def)); break; 111 | //case "softmax": this.layers.Add(new SoftmaxLayer(def)); break; 112 | case "regression": this.layers.Add(new RegressionLayer(def)); break; 113 | case "conv": this.layers.Add(new ConvLayer(def)); break; 114 | //case "pool": this.layers.Add(new PoolLayer(def)); break; 115 | case "relu": this.layers.Add(new ReLULayer(def)); break; 116 | //case "sigmoid": this.layers.Add(new SigmoidLayer(def)); break; 117 | //case "tanh": this.layers.Add(new TanhLayer(def)); break; 118 | //case "maxout": this.layers.Add(new MaxoutLayer(def)); break; 119 | case "svm": this.layers.Add(new SVMLayer(def)); break; 120 | default: Console.WriteLine("ERROR: UNRECOGNIZED LAYER TYPE: " + def.type); break; 121 | } 122 | } 123 | } 124 | 125 | // forward prop the network. 126 | // The trainer class passes is_training = true, but when this function is 127 | // called from outside (not from the trainer), it defaults to prediction mode 128 | public Volume forward(Volume V, bool is_training) 129 | { 130 | var act = this.layers[0].forward(V, is_training); 131 | 132 | for (int i = 1; i < this.layers.Count; i++) 133 | { 134 | act = this.layers[i].forward(act, is_training); 135 | } 136 | return act; 137 | } 138 | 139 | public double getCostLoss(Volume V, int y) 140 | { 141 | this.forward(V, false); 142 | var N = this.layers.Count; 143 | var loss = this.layers[N - 1].backward(y); 144 | return loss; 145 | } 146 | 147 | // backprop: compute gradients wrt all parameters 148 | public double backward(object y) 149 | { 150 | var N = this.layers.Count; 151 | var loss = this.layers[N - 1].backward(y); // last layer assumed to be loss layer 152 | for (var i = N - 2; i >= 0; i--) 153 | { 154 | // first layer assumed input 155 | this.layers[i].backward(y); 156 | } 157 | 158 | return loss; 159 | } 160 | 161 | public Gradient[] getParamsAndGrads() 162 | { 163 | // accumulate parameters and gradients for the entire network 164 | var response = new List(); 165 | for (var i = 0; i < this.layers.Count; i++) 166 | { 167 | var layer_reponse = this.layers[i].getParamsAndGrads(); 168 | for (var j = 0; j < layer_reponse.Length; j++) 169 | { 170 | response.Add(layer_reponse[j]); 171 | } 172 | } 173 | 174 | return response.ToArray(); 175 | } 176 | 177 | public int getPrediction() 178 | { 179 | // this is a convenience function for returning the argmax 180 | // prediction, assuming the last layer of the net is a softmax 181 | var S = this.layers[this.layers.Count - 1]; 182 | util.assert(S.type == "softmax", "getPrediction function assumes softmax as last layer of the net!"); 183 | 184 | var p = S.out_act.w; 185 | var maxv = p[0]; 186 | var maxi = 0; 187 | for (var i = 1; i < p.Length; i++) 188 | { 189 | if (p[i] > maxv) { maxv = p[i]; maxi = i; } 190 | } 191 | 192 | return maxi; // return index of the class with highest class probability 193 | } 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Trainer/Options.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class Options 11 | { 12 | public string method = string.Empty; 13 | public int batch_size = int.MinValue; 14 | 15 | public double learning_rate = double.MinValue; 16 | public double l1_decay = double.MinValue; 17 | public double l2_decay = double.MinValue; 18 | public double momentum = double.MinValue; 19 | public double beta1 = double.MinValue; 20 | public double beta2 = double.MinValue; 21 | public double ro = double.MinValue; 22 | public double eps = double.MinValue; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Trainer/Trainer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class Trainer 11 | { 12 | Net net; 13 | Options options; 14 | 15 | double learning_rate; 16 | double l1_decay; 17 | double l2_decay; 18 | public double batch_size; 19 | string method; 20 | 21 | double momentum; 22 | double ro; 23 | double eps; 24 | double beta1; 25 | double beta2; 26 | 27 | double k = 0; // iteration counter 28 | List gsum; // last iteration gradients (used for momentum calculations) 29 | List xsum; // used in adam or adadelta 30 | public bool regression; 31 | 32 | Util util; 33 | 34 | public Trainer(Net net, Options options) 35 | { 36 | this.util = new Util(); 37 | this.net = net; 38 | 39 | this.options = options; 40 | this.learning_rate = options.learning_rate != double.MinValue ? options.learning_rate : 0.01; 41 | this.l1_decay = options.l1_decay != double.MinValue ? options.l1_decay : 0.0; 42 | this.l2_decay = options.l2_decay != double.MinValue ? options.l2_decay : 0.0; 43 | this.batch_size = options.batch_size != int.MinValue ? options.batch_size : 1; 44 | 45 | // methods: sgd/adam/adagrad/adadelta/windowgrad/netsterov 46 | this.method = string.IsNullOrEmpty(options.method) ? "sgd" : options.method; 47 | 48 | this.momentum = options.momentum != double.MinValue ? options.momentum : 0.9; 49 | this.ro = options.ro != double.MinValue ? options.ro : 0.95; // used in adadelta 50 | this.eps = options.eps != double.MinValue ? options.eps : 1e-8; // used in adam or adadelta 51 | this.beta1 = options.beta1 != double.MinValue ? options.beta1 : 0.9; // used in adam 52 | this.beta2 = options.beta2 != double.MinValue ? options.beta2 : 0.999; // used in adam 53 | 54 | this.gsum = new List(); 55 | this.xsum = new List(); 56 | 57 | // check if regression is expected 58 | if (this.net.layers[this.net.layers.Count - 1].type == "regression") 59 | this.regression = true; 60 | else 61 | this.regression = false; 62 | } 63 | 64 | public Dictionary train(Volume x, object y) 65 | { 66 | var start = new DateTime(); 67 | this.net.forward(x, true); // also set the flag that lets the net know we're just training 68 | var end = new DateTime(); 69 | var fwd_time = end - start; 70 | 71 | start = new DateTime(); 72 | var cost_loss = this.net.backward(y); 73 | var l2_decay_loss = 0.0; 74 | var l1_decay_loss = 0.0; 75 | end = new DateTime(); 76 | var bwd_time = end - start; 77 | 78 | //if (this.regression && y.GetType().Equals(typeof(Array)) == false) 79 | //Console.WriteLine("Warning: a regression net requires an array as training output vector."); 80 | 81 | this.k++; 82 | if (this.k % this.batch_size == 0) 83 | { 84 | var pglist = this.net.getParamsAndGrads(); 85 | 86 | // initialize lists for accumulators. Will only be done once on first iteration 87 | if (this.gsum.Count == 0 && (this.method != "sgd" || this.momentum > 0.0)) 88 | { 89 | // only vanilla sgd doesnt need either lists 90 | // momentum needs gsum 91 | // adagrad needs gsum 92 | // adam and adadelta needs gsum and xsum 93 | for (var i = 0; i < pglist.Length; i++) 94 | { 95 | this.gsum.Add(util.zeros(pglist[i].w.Length)); 96 | if (this.method == "adam" || this.method == "adadelta") 97 | { 98 | this.xsum.Add(util.zeros(pglist[i].w.Length)); 99 | } 100 | else 101 | { 102 | this.xsum.Add(new List().ToArray()); // conserve memory 103 | } 104 | } 105 | } 106 | 107 | // perform an update for all sets of weights 108 | for (var i = 0; i < pglist.Length; i++) 109 | { 110 | var pg = pglist[i]; // param, gradient, other options in future (custom learning rate etc) 111 | var p = pg.w; 112 | var g = pg.dw; 113 | 114 | // learning rate for some parameters. 115 | var l2_decay_mul =pg.l2_decay_mul != double.MinValue ? pg.l2_decay_mul : 1.0; 116 | var l1_decay_mul = pg.l1_decay_mul != double.MinValue ? pg.l1_decay_mul : 1.0; 117 | var l2_decay = this.l2_decay * l2_decay_mul; 118 | var l1_decay = this.l1_decay * l1_decay_mul; 119 | 120 | var plen = p.Length; 121 | for (var j = 0; j < plen; j++) 122 | { 123 | l2_decay_loss += l2_decay * p[j] * p[j] / 2; // accumulate weight decay loss 124 | l1_decay_loss += l1_decay * Math.Abs(p[j]); 125 | var l1grad = l1_decay * (p[j] > 0 ? 1 : -1); 126 | var l2grad = l2_decay * (p[j]); 127 | 128 | var gij = (l2grad + l1grad + g[j]) / this.batch_size; // raw batch gradient 129 | 130 | var gsumi = this.gsum[i]; 131 | var xsumi = this.xsum[i]; 132 | if (this.method == "adam") 133 | { 134 | // adam update 135 | gsumi[j] = gsumi[j] * this.beta1 + (1 - this.beta1) * gij; // update biased first moment estimate 136 | xsumi[j] = xsumi[j] * this.beta2 + (1 - this.beta2) * gij * gij; // update biased second moment estimate 137 | var biasCorr1 = gsumi[j] * (1 - Math.Pow(this.beta1, this.k)); // correct bias first moment estimate 138 | var biasCorr2 = xsumi[j] * (1 - Math.Pow(this.beta2, this.k)); // correct bias second moment estimate 139 | var dx = -this.learning_rate * biasCorr1 / (Math.Sqrt(biasCorr2) + this.eps); 140 | p[j] += dx; 141 | } 142 | else if (this.method == "adagrad") 143 | { 144 | // adagrad update 145 | gsumi[j] = gsumi[j] + gij * gij; 146 | var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; 147 | p[j] += dx; 148 | } 149 | else if (this.method == "windowgrad") 150 | { 151 | // this is adagrad but with a moving window weighted average 152 | // so the gradient is not accumulated over the entire history of the run. 153 | // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! 154 | gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; 155 | var dx = -this.learning_rate / Math.Sqrt(gsumi[j] + this.eps) * gij; // eps added for better conditioning 156 | p[j] += dx; 157 | } 158 | else if (this.method == "adadelta") 159 | { 160 | gsumi[j] = this.ro * gsumi[j] + (1 - this.ro) * gij * gij; 161 | var dx = -Math.Sqrt((xsumi[j] + this.eps) / (gsumi[j] + this.eps)) * gij; 162 | xsumi[j] = this.ro * xsumi[j] + (1 - this.ro) * dx * dx; // yes, xsum lags behind gsum by 1. 163 | p[j] += dx; 164 | } 165 | else if (this.method == "nesterov") 166 | { 167 | var dx = gsumi[j]; 168 | gsumi[j] = gsumi[j] * this.momentum + this.learning_rate * gij; 169 | dx = this.momentum * dx - (1.0 + this.momentum) * gsumi[j]; 170 | p[j] += dx; 171 | } 172 | else 173 | { 174 | // assume SGD 175 | if (this.momentum > 0.0) 176 | { 177 | // momentum update 178 | var dx = this.momentum * gsumi[j] - this.learning_rate * gij; // step 179 | gsumi[j] = dx; // back this up for next iteration of momentum 180 | p[j] += dx; // apply corrected gradient 181 | } 182 | else 183 | { 184 | // vanilla sgd 185 | p[j] += -this.learning_rate * gij; 186 | } 187 | } 188 | g[j] = 0.0; // zero out gradient so that we can begin accumulating anew 189 | } 190 | } 191 | } 192 | 193 | // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss 194 | // in future, TODO: have to completely redo the way loss is done around the network as currently 195 | // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer 196 | // and it should all be computed correctly and automatically. 197 | 198 | var result = new Dictionary(); 199 | result.Add("fwd_time", fwd_time.TotalMilliseconds + " millisec"); 200 | result.Add("bwd_time", bwd_time.TotalMilliseconds + " millisec"); 201 | result.Add("l2_decay_loss", l2_decay_loss.ToString()); 202 | result.Add("l1_decay_loss", l1_decay_loss.ToString()); 203 | result.Add("cost_loss", cost_loss.ToString()); 204 | result.Add("loss", (cost_loss + l1_decay_loss + l2_decay_loss).ToString()); 205 | 206 | return result; 207 | } 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Trainer/TrainingOptions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.Threading.Tasks; 6 | 7 | namespace ConvnetSharp 8 | { 9 | [Serializable] 10 | public class TrainingOptions 11 | { 12 | public int temporal_window = int.MinValue; 13 | public int experience_size = int.MinValue; 14 | public int start_learn_threshold = int.MinValue; 15 | public int learning_steps_total = int.MinValue; 16 | public int learning_steps_burnin = int.MinValue; 17 | public int[] hidden_layer_sizes; 18 | 19 | public double gamma = double.MinValue; 20 | public double learning_rate = double.MinValue; 21 | public double epsilon_min = double.MinValue; 22 | public double epsilon_test_time = double.MinValue; 23 | 24 | public Options options; 25 | public List layer_defs; 26 | public List random_action_distribution; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /DeepQLearning/ConvnetSharp/Util.cs: -------------------------------------------------------------------------------- 1 | using Newtonsoft.Json; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.Linq; 5 | using System.Text; 6 | using System.Threading.Tasks; 7 | 8 | namespace ConvnetSharp 9 | { 10 | // a window stores _size_ number of values 11 | // and returns averages. Useful for keeping running 12 | // track of validation or training accuracy during SGD 13 | [Serializable] 14 | public class TrainingWindow 15 | { 16 | public List v; 17 | public int size; 18 | public int minsize; 19 | public double sum; 20 | 21 | public TrainingWindow(int size, int minsize) 22 | { 23 | this.v = new List(); 24 | this.size = size <= minsize ? 100 : size; 25 | this.minsize = minsize <= 2 ? 20 : minsize; 26 | this.sum = 0; 27 | } 28 | 29 | public void add(double x) 30 | { 31 | this.v.Add(x); 32 | this.sum += x; 33 | if (this.v.Count > this.size) 34 | { 35 | var xold = this.v[0]; 36 | v.RemoveAt(0); 37 | this.sum -= xold; 38 | } 39 | } 40 | 41 | public double get_average() 42 | { 43 | if (this.v.Count < this.minsize) 44 | return -1; 45 | else 46 | return this.sum / this.v.Count; 47 | } 48 | 49 | public void reset() 50 | { 51 | this.v = new List(); 52 | this.sum = 0; 53 | } 54 | } 55 | 56 | [Serializable] 57 | public class Util 58 | { 59 | // Random number utilities 60 | bool return_v = false; 61 | double v_val = 0.0; 62 | public Random random = new Random(); 63 | 64 | public double gaussRandom() 65 | { 66 | if(return_v) { 67 | return_v = false; 68 | return v_val; 69 | } 70 | 71 | var u = 2 * random.NextDouble() - 1; 72 | var v = 2 * random.NextDouble() - 1; 73 | var r = u * u + v * v; 74 | if(r == 0 || r > 1) return gaussRandom(); 75 | var c = Math.Sqrt(-2 * Math.Log(r) / r); 76 | v_val = v* c; // cache this 77 | return_v = true; 78 | return u* c; 79 | } 80 | 81 | public double randf(double a, double b) { return random.NextDouble() * (b-a)+a; } 82 | public int randi(int a, int b) { return random.Next(a, b); } 83 | public double randn(double mu, double std) { return mu+gaussRandom()* std; } 84 | 85 | // Array utilities 86 | public double[] zeros(int n) 87 | { 88 | if (n <= 0) 89 | { 90 | return new double[] { 0.0 }; 91 | } 92 | else 93 | { 94 | var arr = new double[n]; 95 | for (var i = 0; i < n; i++) { arr[i] = 0; } 96 | return arr; 97 | } 98 | } 99 | 100 | public bool arrContains(object[] arr, object elt) 101 | { 102 | for (int i = 0, n = arr.Length; i < n; i++) 103 | { 104 | if (arr[i] == elt) 105 | return true; 106 | } 107 | 108 | return false; 109 | } 110 | 111 | public object[] arrUnique(object[] arr) 112 | { 113 | var b = new List