├── .gitignore ├── README.md ├── composer.json └── src ├── ConvLayer.php ├── DropoutLayer.php ├── FullyConnLayer.php ├── InputLayer.php ├── Layer.php ├── LocalResponseNormalizationLayer.php ├── MaxoutLayer.php ├── Net.php ├── PoolLayer.php ├── RegressionLayer.php ├── ReluLayer.php ├── SVMLayer.php ├── SigmoidLayer.php ├── SoftmaxLayer.php ├── TanhLayer.php ├── Trainer.php ├── Util.php └── Vol.php /.gitignore: -------------------------------------------------------------------------------- 1 | composer.lock 2 | vendor/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PHP ConvNet (Work in Progress) 2 | 3 | Pure PHP ConvNet Implementation, a PHP port of the amazing [convnetjs](https://github.com/karpathy/convnetjs), thank you @karpathy :-) 4 | 5 | ## TO-DO 6 | 7 | The "Issues" page from this repository is being used for TO-DO management, just search for the "to-do" tag. 8 | 9 | ## Credits 10 | 11 | [@gabrielrcouto](http://www.twitter.com/gabrielrcouto) 12 | 13 | ## License 14 | 15 | [MIT License](http://gabrielrcouto.mit-license.org/) 16 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gabrielrcouto/php-convnet", 3 | "description": "Pure PHP Convolutional Neural Network", 4 | "keywords": ["php","cnn","convnet","convolutional", "neural network"], 5 | "homepage": "http://github.com/gabrielrcouto/php-convnet", 6 | "license": "MIT", 7 | "authors": [ 8 | { 9 | "name": "Gabriel Rodrigues Couto", 10 | "email": "gabrielrcouto@gmail.com" 11 | } 12 | ], 13 | "require": { 14 | "php": ">=7.0" 15 | }, 16 | "require-dev" : { 17 | }, 18 | "autoload": { 19 | "psr-4": { 20 | "ConvNet\\": "src/" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/ConvLayer.php: -------------------------------------------------------------------------------- 1 | out_depth = $opt['filters']; 14 | // filter size. Should be odd if possible, it's cleaner. 15 | $this->sx = $opt['sx']; 16 | $this->in_depth = $opt['in_depth']; 17 | $this->in_sx = $opt['in_sx']; 18 | $this->in_sy = $opt['in_sy']; 19 | 20 | // optional 21 | $this->sy = array_key_exists('sy', $opt) ? $opt['sy'] : $this->sx; 22 | // stride at which we apply filters to input volume 23 | $this->stride = array_key_exists('stride', $opt) ? $opt['stride'] : 1; 24 | // amount of 0 padding to add around borders of input volume 25 | $this->pad = array_key_exists('pad', $opt) ? $opt['pad'] : 0; 26 | $this->l1_decay_mul = array_key_exists('l1_decay_mul', $opt) ? $opt['l1_decay_mul'] : 0.0; 27 | $this->l2_decay_mul = array_key_exists('l2_decay_mul', $opt) ? $opt['l2_decay_mul'] : 1.0; 28 | 29 | // computed 30 | // note we are doing floor, so if the strided convolution of the filter doesnt fit into the input 31 | // volume exactly, the output volume will be trimmed and not contain the (incomplete) computed 32 | // final application. 33 | $this->out_sx = floor(($this->in_sx + $this->pad * 2 - $this->sx) / $this->stride + 1); 34 | $this->out_sy = floor(($this->in_sy + $this->pad * 2 - $this->sy) / $this->stride + 1); 35 | $this->layer_type = 'conv'; 36 | 37 | // initializations 38 | $this->bias = array_key_exists('bias_pref', $opt) ? $opt['bias_pref'] : 0.0; 39 | $this->filters = []; 40 | 41 | for ($i = 0; $i < $this->out_depth; $i++) { 42 | $this->filters[] = new Vol($this->sx, $this->sy, $this->in_depth); 43 | } 44 | 45 | $this->biases = new Vol(1, 1, $this->out_depth, $this->bias); 46 | } 47 | 48 | public function forward($V, $is_training) 49 | { 50 | // optimized code by @mdda that achieves 2x speedup over previous version 51 | $this->in_act = $V; 52 | $A = new Vol($this->out_sx | 0, $this->out_sy | 0, $this->out_depth | 0, 0.0); 53 | 54 | $V_sx = $V->sx | 0; 55 | $V_sy = $V->sy |0; 56 | $xy_stride = $this->stride |0; 57 | 58 | for ($d = 0; $d < $this->out_depth; $d++) { 59 | $f = $this->filters[$d]; 60 | $x = -$this->pad | 0; 61 | $y = -$this->pad | 0; 62 | 63 | for ($ay = 0; $ay < $this->out_sy; $y += $xy_stride, $ay++) { // xy_stride 64 | $x = -$this->pad |0; 65 | 66 | for ($ax = 0; $ax < $this->out_sx; $x += $xy_stride, $ax++) { // xy_stride 67 | // convolve centered at this particular location 68 | $a = 0.0; 69 | 70 | for ($fy = 0; $fy < $f->sy; $fy++) { 71 | $oy = $y + $fy; // coordinates in the original input array coordinates 72 | 73 | for ($fx = 0; $fx<$f->sx; $fx++) { 74 | $ox = $x + $fx; 75 | if ($oy >= 0 && $oy < $V_sy && $ox >= 0 && $ox < $V_sx) { 76 | for ($fd = 0; $fd < $f->depth; $fd++) { 77 | // avoid function call overhead (x2) for efficiency, compromise modularity :( 78 | $a += $f->w[(($f->sx * $fy) + $fx) * $f->depth + $fd] * $V->w[(($V_sx * $oy) + $ox) * $V->depth + $fd]; 79 | } 80 | } 81 | } 82 | } 83 | 84 | $a += $this->biases->w[$d]; 85 | $A->set($ax, $ay, $d, $a); 86 | } 87 | } 88 | } 89 | 90 | $this->out_act = $A; 91 | 92 | return $this->out_act; 93 | } 94 | 95 | public function backward($y = null) 96 | { 97 | $V = $this->in_act; 98 | 99 | // zero out gradient wrt bottom data, we're about to fill it 100 | $V->dw = array_fill(0, count($V->w), 0); 101 | 102 | $V_sx = $V->sx | 0; 103 | $V_sy = $V->sy | 0; 104 | $xy_stride = $this->stride | 0; 105 | 106 | for ($d = 0; $d < $this->out_depth; $d++) { 107 | $f = $this->filters[$d]; 108 | $x = -$this->pad | 0; 109 | $y = -$this->pad | 0; 110 | for ($ay = 0; $ay < $this->out_sy; $y += $xy_stride, $ay++) { // xy_stride 111 | $x = -$this->pad | 0; 112 | for ($ax = 0; $ax < $this->out_sx; $x += $xy_stride, $ax++) { // xy_stride 113 | // convolve centered at this particular location 114 | $chain_grad = $this->out_act->get_grad($ax, $ay, $d); // gradient from above, from chain rule 115 | for ($fy = 0; $fy < $f->sy; $fy++) { 116 | $oy = $y + $fy; // coordinates in the original input array coordinates 117 | for ($fx = 0; $fx < $f->sx; $fx++) { 118 | $ox = $x + $fx; 119 | if ($oy >= 0 && $oy < $V_sy && $ox >= 0 && $ox < $V_sx) { 120 | for ($fd = 0; $fd < $f->depth; $fd++) { 121 | // avoid function call overhead (x2) for efficiency, compromise modularity :( 122 | $ix1 = (($V_sx * $oy) + $ox) * $V->depth + $fd; 123 | $ix2 = (($f->sx * $fy) + $fx) * $f->depth + $fd; 124 | $f->dw[$ix2] += $V->w[$ix1] * $chain_grad; 125 | $V->dw[$ix1] += $f->w[$ix2] * $chain_grad; 126 | } 127 | } 128 | } 129 | } 130 | $this->biases->dw[$d] += $chain_grad; 131 | } 132 | } 133 | } 134 | } 135 | 136 | public function getParamsAndGrads() 137 | { 138 | $response = []; 139 | 140 | for ($i = 0; $i < $this->out_depth; $i++) { 141 | $response[] = [ 142 | 'params' => &$this->filters[$i]->w, 143 | 'grads' => &$this->filters[$i]->dw, 144 | 'l2_decay_mul' => &$this->l2_decay_mul, 145 | 'l1_decay_mul' => &$this->l1_decay_mul 146 | ]; 147 | } 148 | 149 | $response[] = [ 150 | 'params' => &$this->biases->w, 151 | 'grads' => &$this->biases->dw, 152 | 'l1_decay_mul' => 0.0, 153 | 'l2_decay_mul' => 0.0 154 | ]; 155 | 156 | return $response; 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/DropoutLayer.php: -------------------------------------------------------------------------------- 1 | out_sx = $opt['in_sx']; 13 | $this->out_sy = $opt['in_sy']; 14 | $this->out_depth = $opt['in_depth']; 15 | $this->layer_type = 'dropout'; 16 | $this->drop_prob = array_key_exists('drop_prob', $opt) ? $opt['drop_prob'] : 0.5; 17 | $this->dropped = array_fill(0, $this->out_sx * $this->out_sy * $this->out_depth, 0); 18 | } 19 | 20 | public function forward($V, $is_training = false) 21 | { 22 | $this->in_act = $V; 23 | 24 | $V2 = $V->clone(); 25 | $N = count($V->w); 26 | 27 | if ($is_training) { 28 | // do dropout 29 | for ($i = 0; $i < $N; $i++) { 30 | if (rand() < $this->drop_prob) { 31 | $V2->w[$i] = 0; 32 | $this->dropped[$i] = true; 33 | // drop! 34 | } else { 35 | $this->dropped[$i] = false; 36 | } 37 | } 38 | } else { 39 | // scale the activations during prediction 40 | for ($i = 0; $i < $N; $i++) { 41 | $V2->w[$i] *= $this->drop_prob; 42 | } 43 | } 44 | 45 | $this->out_act = $V2; 46 | 47 | return $this->out_act; // dummy identity function for now 48 | } 49 | 50 | public function backward($y = null) 51 | { 52 | $V = $this->in_act; // we need to set dw of this 53 | $chain_grad = $this->out_act; 54 | $N = count($V->w); 55 | 56 | $V->dw = array_fill(0, $N, 0); // zero out gradient wrt data 57 | for ($i = 0; $i < $N; $i++) { 58 | if (!($this->dropped[$i])) { 59 | $V->dw[$i] = $chain_grad->dw[$i]; // copy over the gradient 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/FullyConnLayer.php: -------------------------------------------------------------------------------- 1 | out_depth = array_key_exists('num_neurons', $opt) ? $opt['num_neurons'] : $opt['filters']; 15 | 16 | // optional 17 | $this->l1_decay_mul = array_key_exists('l1_decay_mul', $opt) ? $opt['l1_decay_mul'] : 0.0; 18 | $this->l2_decay_mul = array_key_exists('l2_decay_mul', $opt) ? $opt['l2_decay_mul'] : 1.0; 19 | 20 | // computed 21 | $this->num_inputs = $opt['in_sx'] * $opt['in_sy'] * $opt['in_depth']; 22 | $this->out_sx = 1; 23 | $this->out_sy = 1; 24 | $this->layer_type = 'fc'; 25 | 26 | // initializations 27 | $this->bias = array_key_exists('bias_pref', $opt) ? $opt['bias_pref'] : 0.0; 28 | $this->filters = []; 29 | 30 | for ($i = 0; $i < $this->out_depth; $i++) { 31 | $this->filters[] = new Vol(1, 1, $this->num_inputs); 32 | } 33 | 34 | $this->biases = new Vol(1, 1, $this->out_depth, $this->bias); 35 | } 36 | 37 | public function forward($V, $is_training) 38 | { 39 | $this->in_act = $V; 40 | $A = new Vol(1, 1, $this->out_depth, 0.0); 41 | $Vw = $V->w; 42 | 43 | for ($i = 0; $i < $this->out_depth; $i++) { 44 | $a = 0.0; 45 | $wi = $this->filters[$i]->w; 46 | 47 | for ($d = 0; $d < $this->num_inputs; $d++) { 48 | $a += $Vw[$d] * $wi[$d]; // for efficiency use Vols directly for now 49 | } 50 | 51 | $a += $this->biases->w[$i]; 52 | $A->w[$i] = $a; 53 | } 54 | 55 | $this->out_act = $A; 56 | 57 | return $this->out_act; 58 | } 59 | 60 | public function backward($y = null) 61 | { 62 | $V = $this->in_act; 63 | // zero out the gradient in input Vol 64 | $V->dw = array_fill(0, count($V->w), 0); 65 | 66 | // compute gradient wrt weights and data 67 | for ($i = 0; $i < $this->out_depth; $i++) { 68 | $tfi = $this->filters[$i]; 69 | $chain_grad = $this->out_act->dw[$i]; 70 | 71 | for ($d = 0; $d < $this->num_inputs; $d++) { 72 | $V->dw[$d] += $tfi->w[$d] * $chain_grad; // grad wrt input data 73 | $tfi->dw[$d] += $V->w[$d] * $chain_grad; // grad wrt params 74 | } 75 | 76 | $this->biases->dw[$i] += $chain_grad; 77 | } 78 | } 79 | 80 | public function getParamsAndGrads() 81 | { 82 | $response = []; 83 | 84 | for ($i = 0; $i < $this->out_depth; $i++) { 85 | $response[] = [ 86 | 'params' => &$this->filters[$i]->w, 87 | 'grads' => &$this->filters[$i]->dw, 88 | 'l1_decay_mul' => &$this->l1_decay_mul, 89 | 'l2_decay_mul' => &$this->l2_decay_mul 90 | ]; 91 | } 92 | 93 | $response[] = [ 94 | 'params' => &$this->biases->w, 95 | 'grads' => &$this->biases->dw, 96 | 'l1_decay_mul' => 0.0, 97 | 'l2_decay_mul' => 0.0 98 | ]; 99 | 100 | return $response; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/InputLayer.php: -------------------------------------------------------------------------------- 1 | out_depth = Util::getopt($opt, ['out_depth', 'depth'], 0); 14 | 15 | // optional: default these dimensions to 1 16 | $this->out_sx = Util::getopt($opt, ['out_sx', 'sx', 'width'], 1); 17 | $this->out_sy = Util::getopt($opt, ['out_sy', 'sy', 'height'], 1); 18 | 19 | // computed 20 | $this->layer_type = 'input'; 21 | } 22 | 23 | public function forward($V, $is_training) 24 | { 25 | $this->in_act = $V; 26 | $this->out_act = $V; 27 | 28 | return $this->out_act; // simply identity function for now 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/Layer.php: -------------------------------------------------------------------------------- 1 | $value) { 42 | if ($key === 'filters' && is_array($value)) { 43 | foreach ($value as $filter) { 44 | if ($this->layer_type === 'conv') { 45 | $vol = new Vol($json['sx'], $json['sy'], $json['in_depth']); 46 | } else if ($this->layer_type === 'fc') { 47 | $vol = new Vol(1, 1, $json['num_inputs']); 48 | } 49 | 50 | $vol->fromJson($filter); 51 | $this->filters[] = $vol; 52 | } 53 | } else if ($key === 'biases') { 54 | $bias = array_key_exists('bias', $json) ? $json['bias'] : $json['biases']; 55 | 56 | $vol = new Vol(1, 1, $json['out_depth'], $bias); 57 | $vol->fromJson($value); 58 | $this->biases = $vol; 59 | } else { 60 | $this->$key = $value; 61 | } 62 | } 63 | } 64 | 65 | public function getParamsAndGrads() 66 | { 67 | return []; 68 | } 69 | 70 | public function setParamsAndGrads($params, $grads) 71 | { 72 | return; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/LocalResponseNormalizationLayer.php: -------------------------------------------------------------------------------- 1 | k = $opt['k']; 14 | $this->n = $opt['n']; 15 | $this->alpha = $opt['alpha']; 16 | $this->beta = $opt['beta']; 17 | 18 | // computed 19 | $this->out_sx = $opt['in_sx']; 20 | $this->out_sy = $opt['in_sy']; 21 | $this->out_depth = $opt['in_depth']; 22 | $this->layer_type = 'lrn'; 23 | 24 | // checks 25 | if ($this->n % 2 === 0) { 26 | echo 'WARNING n should be odd for LRN layer' . PHP_EOL; 27 | } 28 | } 29 | 30 | public function forward($V, $is_training) 31 | { 32 | $this->in_act = $V; 33 | 34 | $A = $V->cloneAndZero(); 35 | $this->S_cache_ = $V.cloneAndZero(); 36 | $n2 = floor($this->n / 2); 37 | 38 | for ($x = 0; $x < $V->sx; $x++) { 39 | for ($y = 0; $y < $V->sy; $y++) { 40 | for ($i = 0; $i < $V->depth; $i++) { 41 | $ai = $V->get($x, $y, $i); 42 | 43 | // normalize in a window of size n 44 | $den = 0.0; 45 | 46 | for ($j = max(0, $i - $n2); $j <= min($i + $n2, $V->depth - 1); $j++) { 47 | $aa = $V->get($x, $y, $j); 48 | $den += $aa * $aa; 49 | } 50 | 51 | $den *= $this->alpha / $this->n; 52 | $den += $this->k; 53 | $this->S_cache_.set($x, $y, $i, $den); // will be useful for backprop 54 | $den = pow($den, $this->beta); 55 | $A->set($x, $y, $i, $ai / $den); 56 | } 57 | } 58 | } 59 | 60 | $this->out_act = $A; 61 | 62 | return $this->out_act; // dummy identity function for now 63 | } 64 | 65 | public function backward($y = null) 66 | { 67 | // evaluate gradient wrt data 68 | $V = $this->in_act; // we need to set dw of this 69 | $V->dw = array_fill(0, count($V->w), 0); // zero out gradient wrt data 70 | $A = $this->out_act; // computed in forward pass 71 | 72 | $n2 = floor($this->n / 2); 73 | 74 | for ($x = 0; $x < $V->sx; $x++) { 75 | for ($y = 0; $y < $V->sy; $y++) { 76 | for ($i = 0; $i < $V->depth; $i++) { 77 | $chain_grad = $this->out_act->get_grad($x, $y, $i); 78 | $S = $this->S_cache_.get($x, $y, $i); 79 | $SB = pow($S, $this->beta); 80 | $SB2 = $SB * $SB; 81 | 82 | // normalize in a window of size n 83 | for ($j = max(0, $i - $n2); $j <= min($i + $n2, $V->depth - 1); $j++) { 84 | $aj = $V->get($x, $y, $j); 85 | $g = -$aj * $this->beta * pow($S, $this->beta - 1) * $this->alpha / $this->n * 2 * $aj; 86 | 87 | if ($j === $i) { 88 | $g += $SB; 89 | } 90 | 91 | $g /= $SB2; 92 | $g *= $chain_grad; 93 | $V->add_grad($x, $y, $j, $g); 94 | } 95 | } 96 | } 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/MaxoutLayer.php: -------------------------------------------------------------------------------- 1 | out_sx = $opt['in_sx']; 13 | $this->out_sy = $opt['in_sy']; 14 | $this->out_depth = floor($opt['in_depth'] / $this->group_size); 15 | $this->layer_type = 'maxout'; 16 | 17 | $this->switches = array_fill(0, $this->out_sx * $this->out_sy * $this->out_depth, 0); // useful for backprop 18 | } 19 | 20 | public function forward($V, $is_training) 21 | { 22 | $this->in_act = $V; 23 | $N = $this->out_depth; 24 | $V2 = new Vol($this->out_sx, $this->out_sy, $this->out_depth, 0.0); 25 | 26 | // optimization branch. If we're operating on 1D arrays we dont have 27 | // to worry about keeping track of x,y,d coordinates inside 28 | // input volumes. In convnets we do :( 29 | if ($this->out_sx === 1 && $this->out_sy === 1) { 30 | for ($i = 0; $i < $N; $i++) { 31 | $ix = $i * $this->group_size; // base index offset 32 | $a = $V->w[$ix]; 33 | $ai = 0; 34 | 35 | for ($j = 1; $j < $this->group_size; $j++) { 36 | $a2 = $V->w[$ix + $j]; 37 | 38 | if ($a2 > $a) { 39 | $a = $a2; 40 | $ai = $j; 41 | } 42 | } 43 | 44 | $V2->w[$i] = $a; 45 | $this->switches[$i] = $ix + $ai; 46 | } 47 | } else { 48 | $n = 0; // counter for switches 49 | for ($x = 0; $x < $V->sx; $x++) { 50 | for ($y = 0; $y < $V->sy; $y++) { 51 | for ($i = 0; $i < $N; $i++) { 52 | $ix = $i * $this->group_size; 53 | $a = $V->get($x, $y, $ix); 54 | $ai = 0; 55 | 56 | for ($j = 1; $j < $this->group_size; $j++) { 57 | $a2 = $V->get($x, $y, $ix + $j); 58 | if ($a2 > $a) { 59 | $a = $a2; 60 | $ai = $j; 61 | } 62 | } 63 | 64 | $V2->set($x, $y, $i, $a); 65 | $this->switches[$n] = $ix + $ai; 66 | $n++; 67 | } 68 | } 69 | } 70 | } 71 | 72 | $this->out_act = $V2; 73 | 74 | return $this->out_act; 75 | } 76 | 77 | public function backward($y = null) 78 | { 79 | $V = $this->in_act; // we need to set dw of this 80 | $V2 = $this->out_act; 81 | $N = $this->out_depth; 82 | $V->dw = array_fill(0, count($V->w), 0); // zero out gradient wrt data 83 | 84 | // pass the gradient through the appropriate switch 85 | if ($this->out_sx === 1 && $this->out_sy === 1) { 86 | for ($i = 0; $i < $N; $i++) { 87 | $chain_grad = $V2->dw[$i]; 88 | $V->dw[$this->switches[$i]] = $chain_grad; 89 | } 90 | } else { 91 | // bleh okay, lets do this the hard way 92 | $n = 0; // counter for switches 93 | for ($x = 0; $x < $V2->sx; $x++) { 94 | for ($y = 0; $y < $V2->sy; $y++) { 95 | for ($i = 0; $i < $N; $i++) { 96 | $chain_grad = $V2->get_grad($x, $y, $i); 97 | $V->set_grad($x, $y, $this->switches[$n], $chain_grad); 98 | $n++; 99 | } 100 | } 101 | } 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/Net.php: -------------------------------------------------------------------------------- 1 | layers = []; 11 | } 12 | 13 | public function makeLayers($defs) 14 | { 15 | // few checks 16 | if (count($defs) < 2) { 17 | throw new \Exception('Error! At least one input layer and one loss layer are required.', 1); 18 | } 19 | 20 | if ($defs[0]['type'] !== 'input') { 21 | throw new \Exception('Error! First layer must be the input layer, to declare size of inputs', 1); 22 | } 23 | 24 | // desugar layer_defs for adding activation, dropout layers etc 25 | $desugar = function ($defs) { 26 | $new_defs = []; 27 | 28 | for ($i = 0; $i < count($defs); $i++) { 29 | $def = $defs[$i]; 30 | 31 | if ($def['type'] === 'softmax' || $def['type'] === 'svm') { 32 | // add an fc layer here, there is no reason the user should 33 | // have to worry about this and we almost always want to 34 | $new_defs[] = ['type' => 'fc', 'num_neurons' => $def['num_classes']]; 35 | } 36 | 37 | if ($def['type'] === 'regression') { 38 | // add an fc layer here, there is no reason the user should 39 | // have to worry about this and we almost always want to 40 | $new_defs[] = ['type' => 'fc', 'num_neurons' => $def['num_neurons']]; 41 | } 42 | 43 | if (($def['type'] === 'fc' || $def['type'] === 'conv') && ! array_key_exists('bias_pref', $def)) { 44 | $def['bias_pref'] = 0.0; 45 | 46 | if (array_key_exists('activation', $def) && $def['activation'] === 'relu') { 47 | $def['bias_pref'] = 0.1; // relus like a bit of positive bias to get gradients early 48 | // otherwise it's technically possible that a relu unit will never turn on (by chance) 49 | // and will never get any gradient and never contribute any computation. Dead relu. 50 | } 51 | } 52 | 53 | $new_defs[] = $def; 54 | 55 | if (array_key_exists('activation', $def)) { 56 | if ($def['activation'] === 'relu') { 57 | $new_defs[] = ['type' => 'relu']; 58 | } else if ($def['activation'] === 'sigmoid') { 59 | $new_defs[] = ['type' => 'sigmoid']; 60 | } else if ($def['activation'] === 'tanh') { 61 | $new_defs[] = ['type' => 'tanh']; 62 | } else if ($def['activation'] === 'maxout') { 63 | // create maxout activation, and pass along group size, if provided 64 | $gs = array_key_exists('group_size', $def) ? $def['group_size'] : 2; 65 | $new_defs[] = ['type' => 'maxout', 'group_size' => $gs]; 66 | } else { 67 | echo 'ERROR unsupported activation ' + $def['activation'] . PHP_EOL; 68 | } 69 | } 70 | 71 | if (array_key_exists('drop_prob', $def) && $def['type'] !== 'dropout') { 72 | $new_defs[] = ['type' => 'dropout', 'drop_prob' => $def['drop_prob']]; 73 | } 74 | } 75 | 76 | return $new_defs; 77 | }; 78 | 79 | $defs = $desugar($defs); 80 | 81 | // create the layers 82 | $this->layers = []; 83 | 84 | for ($i = 0; $i < count($defs); $i++) { 85 | $def = $defs[$i]; 86 | 87 | if ($i > 0) { 88 | $prev = $this->layers[$i - 1]; 89 | $def['in_sx'] = $prev->out_sx; 90 | $def['in_sy'] = $prev->out_sy; 91 | $def['in_depth'] = $prev->out_depth; 92 | } 93 | 94 | switch ($def['type']) { 95 | case 'fc': 96 | $this->layers[] = new FullyConnLayer($def); 97 | break; 98 | case 'lrn': 99 | $this->layers[] = new LocalResponseNormalizationLayer($def); 100 | break; 101 | case 'dropout': 102 | $this->layers[] = new DropoutLayer($def); 103 | break; 104 | case 'input': 105 | $this->layers[] = new InputLayer($def); 106 | break; 107 | case 'softmax': 108 | $this->layers[] = new SoftmaxLayer($def); 109 | break; 110 | case 'regression': 111 | $this->layers[] = new RegressionLayer($def); 112 | break; 113 | case 'conv': 114 | $this->layers[] = new ConvLayer($def); 115 | break; 116 | case 'pool': 117 | $this->layers[] = new PoolLayer($def); 118 | break; 119 | case 'relu': 120 | $this->layers[] = new ReluLayer($def); 121 | break; 122 | case 'sigmoid': 123 | $this->layers[] = new SigmoidLayer($def); 124 | break; 125 | case 'tanh': 126 | $this->layers[] = new TanhLayer($def); 127 | break; 128 | case 'maxout': 129 | $this->layers[] = new MaxoutLayer($def); 130 | break; 131 | case 'svm': 132 | $this->layers[] = new SVMLayer($def); 133 | break; 134 | default: 135 | echo 'ERROR: UNRECOGNIZED LAYER TYPE: ' . $def['type']; 136 | } 137 | } 138 | } 139 | 140 | public function forward($V, $is_training = false) 141 | { 142 | $act = $this->layers[0]->forward($V, $is_training); 143 | 144 | for ($i = 1; $i < count($this->layers); $i++) { 145 | $act = $this->layers[$i]->forward($act, $is_training); 146 | } 147 | 148 | return $act; 149 | } 150 | 151 | public function getCostLoss($V, $y) 152 | { 153 | $this->forward($V, false); 154 | $N = count($this->layers); 155 | $loss = $this->layers[$N - 1]->backward($y); 156 | 157 | return $loss; 158 | } 159 | 160 | public function backward($y) 161 | { 162 | $N = count($this->layers); 163 | $loss = $this->layers[$N - 1]->backward($y); // last layer assumed to be loss layer 164 | 165 | for ($i = $N - 2; $i >= 0; $i--) { // first layer assumed input 166 | $this->layers[$i]->backward(); 167 | } 168 | 169 | return $loss; 170 | } 171 | 172 | public function getParamsAndGrads() 173 | { 174 | // accumulate parameters and gradients for the entire network 175 | $response = []; 176 | 177 | for ($i = 0; $i < count($this->layers); $i++) { 178 | $layer_reponse = $this->layers[$i]->getParamsAndGrads(); 179 | 180 | for ($j = 0; $j < count($layer_reponse); $j++) { 181 | $response[] = &$layer_reponse[$j]; 182 | } 183 | } 184 | 185 | return $response; 186 | } 187 | 188 | public function getPrediction() 189 | { 190 | // this is a convenience function for returning the argmax 191 | // prediction, assuming the last layer of the net is a softmax 192 | $S = $this->layers[count($this->layers) - 1]; 193 | 194 | if ($S->layer_type !== 'softmax') { 195 | throw new \Exception('getPrediction function assumes softmax as last layer of the net!', 1); 196 | } 197 | 198 | $p = $S->out_act->w; 199 | $maxv = $p[0]; 200 | $maxi = 0; 201 | 202 | for ($i = 1; $i < count($p); $i++) { 203 | if ($p[$i] > $maxv) { 204 | $maxv = $p[$i]; 205 | $maxi = $i; 206 | } 207 | } 208 | 209 | return $maxi; // return index of the class with highest class probability 210 | } 211 | 212 | public function save($file) 213 | { 214 | if (file_exists($file)) { 215 | unlink($file); 216 | } 217 | 218 | file_put_contents($file, json_encode($this)); 219 | } 220 | 221 | public function load($file) 222 | { 223 | if (! file_exists($file)) { 224 | throw new \Exception('File not found', 1); 225 | } 226 | 227 | $json = json_decode(file_get_contents($file), true); 228 | 229 | $this->layers = []; 230 | 231 | foreach ($json['layers'] as $key => $layer) { 232 | switch ($layer['layer_type']) { 233 | case 'input': 234 | $L = new InputLayer(); 235 | break; 236 | case 'relu': 237 | $L = new ReluLayer(); 238 | break; 239 | case 'sigmoid': 240 | $L = new SigmoidLayer(); 241 | break; 242 | case 'tanh': 243 | $L = new TanhLayer(); 244 | break; 245 | case 'dropout': 246 | $L = new DropoutLayer(); 247 | break; 248 | case 'conv': 249 | $L = new ConvLayer(); 250 | break; 251 | case 'pool': 252 | $L = new PoolLayer(); 253 | break; 254 | case 'lrn': 255 | $L = new LocalResponseNormalizationLayer(); 256 | break; 257 | case 'softmax': 258 | $L = new SoftmaxLayer(); 259 | break; 260 | case 'regression': 261 | $L = new RegressionLayer(); 262 | break; 263 | case 'fc': 264 | $L = new FullyConnLayer(); 265 | break; 266 | case 'maxout': 267 | $L = new MaxoutLayer(); 268 | break; 269 | case 'svm': 270 | $L = new SVMLayer(); 271 | break; 272 | default: 273 | throw new \Exception('Invalid Layer Type ', 1); 274 | } 275 | 276 | $L->fromJSON($layer); 277 | 278 | $this->layers[] = $L; 279 | } 280 | } 281 | } 282 | -------------------------------------------------------------------------------- /src/PoolLayer.php: -------------------------------------------------------------------------------- 1 | sx = $opt['sx']; // filter size 14 | $this->in_depth = $opt['in_depth']; 15 | $this->in_sx = $opt['in_sx']; 16 | $this->in_sy = $opt['in_sy']; 17 | 18 | // optional 19 | $this->sy = array_key_exists('sy', $opt) ? $opt['sy'] : $this->sx; 20 | $this->stride = array_key_exists('stride', $opt) ? $opt['stride'] : 2; 21 | $this->pad = array_key_exists('pad', $opt) ? $opt->pad : 0; // amount of 0 padding to add around borders of input volume 22 | 23 | // computed 24 | $this->out_depth = $this->in_depth; 25 | $this->out_sx = floor(($this->in_sx + $this->pad * 2 - $this->sx) / $this->stride + 1); 26 | $this->out_sy = floor(($this->in_sy + $this->pad * 2 - $this->sy) / $this->stride + 1); 27 | $this->layer_type = 'pool'; 28 | // store switches for x,y coordinates for where the max comes from, for each output neuron 29 | $this->switchx = array_fill(0, $this->out_sx * $this->out_sy * $this->out_depth, 0); 30 | $this->switchy = array_fill(0, $this->out_sx * $this->out_sy * $this->out_depth, 0); 31 | } 32 | 33 | public function forward($V, $is_training) 34 | { 35 | $this->in_act = $V; 36 | 37 | $A = new Vol($this->out_sx, $this->out_sy, $this->out_depth, 0.0); 38 | 39 | $n = 0; // a counter for switches 40 | 41 | for ($d = 0; $d < $this->out_depth; $d++) { 42 | $x = -$this->pad; 43 | $y = -$this->pad; 44 | 45 | for ($ax = 0; $ax < $this->out_sx; $x += $this->stride, $ax++) { 46 | $y = -$this->pad; 47 | 48 | for ($ay = 0; $ay < $this->out_sy; $y += $this->stride, $ay++) { 49 | // convolve centered at this particular location 50 | $a = -99999; // hopefully small enough ;\ 51 | $winx = -1; 52 | $winy = -1; 53 | 54 | for ($fx = 0; $fx < $this->sx; $fx++) { 55 | for ($fy = 0; $fy < $this->sy; $fy++) { 56 | $oy = $y + $fy; 57 | $ox = $x + $fx; 58 | 59 | if ($oy >= 0 && $oy < $V->sy && $ox >= 0 && $ox < $V->sx) { 60 | $v = $V->get($ox, $oy, $d); 61 | // perform max pooling and store pointers to where 62 | // the max came from. This will speed up backprop 63 | // and can help make nice visualizations in future 64 | if ($v > $a) { 65 | $a = $v; 66 | $winx = $ox; 67 | $winy = $oy; 68 | } 69 | } 70 | } 71 | } 72 | 73 | $this->switchx[$n] = $winx; 74 | $this->switchy[$n] = $winy; 75 | $n++; 76 | $A->set($ax, $ay, $d, $a); 77 | } 78 | } 79 | } 80 | 81 | $this->out_act = $A; 82 | 83 | return $this->out_act; 84 | } 85 | 86 | public function backward($y = null) 87 | { 88 | // pooling layers have no parameters, so simply compute 89 | // gradient wrt data here 90 | $V = $this->in_act; 91 | $V->dw = array_fill(0, count($V->w), 0); // zero out gradient wrt data 92 | $A = $this->out_act; // computed in forward pass 93 | $n = 0; 94 | 95 | for ($d = 0; $d < $this->out_depth; $d++) { 96 | $x = -$this->pad; 97 | $y = -$this->pad; 98 | for ($ax = 0; $ax < $this->out_sx; $x += $this->stride, $ax++) { 99 | $y = -$this->pad; 100 | for ($ay = 0; $ay < $this->out_sy; $y += $this->stride, $ay++) { 101 | $chain_grad = $this->out_act->get_grad($ax, $ay, $d); 102 | $V->add_grad($this->switchx[$n], $this->switchy[$n], $d, $chain_grad); 103 | $n++; 104 | } 105 | } 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/RegressionLayer.php: -------------------------------------------------------------------------------- 1 | num_inputs = $opt['in_sx'] * $opt['in_sy'] * $opt['in_depth']; 13 | $this->out_depth = $this->num_inputs; 14 | $this->out_sx = 1; 15 | $this->out_sy = 1; 16 | $this->layer_type = 'regression'; 17 | } 18 | 19 | public function forward($V, $is_training) 20 | { 21 | $this->in_act = $V; 22 | $this->out_act = $V; 23 | return $V; // identity function 24 | } 25 | 26 | public function backward($y = null) 27 | { 28 | // compute and accumulate gradient wrt weights and bias of this layer 29 | $x = $this->in_act; 30 | $x->dw = array_fill(0, count($x->w), 0); // zero out the gradient of input Vol 31 | $loss = 0.0; 32 | 33 | if (is_array($y)) { 34 | for ($i = 0; $i < $this->out_depth; $i++) { 35 | $dy = $x->w[$i] - $y[$i]; 36 | $x->dw[$i] = $dy; 37 | $loss += 0.5 * $dy * $dy; 38 | } 39 | } else if (is_numeric($y)) { 40 | // lets hope that only one number is being regressed 41 | $dy = $x->w[0] - $y; 42 | $x->dw[0] = $dy; 43 | $loss += 0.5 * $dy * $dy; 44 | } else { 45 | // assume it is a struct with entries .dim and .val 46 | // and we pass gradient only along dimension dim to be equal to val 47 | $i = $y->dim; 48 | $yi = $y->val; 49 | $dy = $x->w[$i] - $yi; 50 | $x->dw[$i] = $dy; 51 | $loss += 0.5 * $dy * $dy; 52 | } 53 | 54 | return $loss; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/ReluLayer.php: -------------------------------------------------------------------------------- 1 | out_sx = $opt['in_sx']; 13 | $this->out_sy = $opt['in_sy']; 14 | $this->out_depth = $opt['in_depth']; 15 | $this->layer_type = 'relu'; 16 | } 17 | 18 | public function forward($V, $is_training) 19 | { 20 | $this->in_act = $V; 21 | $V2 = $V->clone(); 22 | $N = count($V->w); 23 | $V2w = $V2->w; 24 | 25 | for ($i = 0; $i < $N; $i++) { 26 | if ($V2w[$i] < 0) { 27 | $V2w[$i] = 0; // threshold at 0 28 | } 29 | } 30 | 31 | $this->out_act = $V2; 32 | 33 | return $this->out_act; 34 | } 35 | 36 | public function backward($y = null) 37 | { 38 | $V = $this->in_act; // we need to set dw of this 39 | $V2 = $this->out_act; 40 | $N = count($V->w); 41 | $V->dw = array_fill(0, $N, 0); // zero out gradient wrt data 42 | 43 | for ($i = 0; $i < $N; $i++) { 44 | if ($V2->w[$i] <= 0) { 45 | $V->dw[$i] = 0; // threshold 46 | } else { 47 | $V->dw[$i] = $V2->dw[$i]; 48 | } 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/SVMLayer.php: -------------------------------------------------------------------------------- 1 | num_inputs = $opt['in_sx'] * $opt['in_sy'] * $opt['in_depth']; 13 | $this->out_depth = $this->num_inputs; 14 | $this->out_sx = 1; 15 | $this->out_sy = 1; 16 | $this->layer_type = 'svm'; 17 | } 18 | 19 | public function forward($V, $is_training) 20 | { 21 | $this->in_act = $V; 22 | $this->out_act = $V; 23 | return $V; // identity function 24 | } 25 | 26 | public function backward($y = null) 27 | { 28 | // compute and accumulate gradient wrt weights and bias of this layer 29 | $x = $this->in_act; 30 | $x->dw = array_fill(0, count($x->w), 0); // zero out the gradient of input Vol 31 | 32 | // we're using structured loss here, which means that the score 33 | // of the ground truth should be higher than the score of any other 34 | // class, by a margin 35 | $yscore = $x->w[$y]; // score of ground truth 36 | $margin = 1.0; 37 | $loss = 0.0; 38 | 39 | for ($i = 0; $i < $this->out_depth; $i++) { 40 | if ($y === $i) { 41 | continue; 42 | } 43 | 44 | $ydiff = -$yscore + $x->w[$i] + $margin; 45 | 46 | if ($ydiff > 0) { 47 | // violating dimension, apply loss 48 | $x->dw[$i] += 1; 49 | $x->dw[$y] -= 1; 50 | $loss += $ydiff; 51 | } 52 | } 53 | 54 | return $loss; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/SigmoidLayer.php: -------------------------------------------------------------------------------- 1 | out_sx = $opt['in_sx']; 13 | $this->out_sy = $opt['in_sy']; 14 | $this->out_depth = $opt['in_depth']; 15 | $this->layer_type = 'sigmoid'; 16 | } 17 | 18 | public function forward($V, $is_training) 19 | { 20 | $this->in_act = $V; 21 | $V2 = $V->cloneAndZero(); 22 | $N = count($V->w); 23 | $V2w = $V2->w; 24 | $Vw = $V->w; 25 | 26 | for ($i = 0; $i < $N; $i++) { 27 | $V2w[$i] = 1.0 / (1.0 + exp(-$Vw[$i])); 28 | } 29 | 30 | $this->out_act = $V2; 31 | 32 | return $this->out_act; 33 | } 34 | 35 | public function backward($y = null) 36 | { 37 | $V = $this->in_act; // we need to set dw of this 38 | $V2 = $this->out_act; 39 | $N = count($V->w); 40 | $V->dw = array_fill(0, $N, 0); // zero out gradient wrt data 41 | 42 | for ($i = 0; $i < $N; $i++) { 43 | $v2wi = $V2->w[$i]; 44 | $V->dw[$i] = $v2wi * (1.0 - $v2wi) * $V2->dw[$i]; 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/SoftmaxLayer.php: -------------------------------------------------------------------------------- 1 | num_inputs = $opt['in_sx'] * $opt['in_sy'] * $opt['in_depth']; 13 | $this->out_depth = $this->num_inputs; 14 | $this->out_sx = 1; 15 | $this->out_sy = 1; 16 | $this->layer_type = 'softmax'; 17 | } 18 | 19 | public function forward($V, $is_training) 20 | { 21 | $this->in_act = $V; 22 | 23 | $A = new Vol(1, 1, $this->out_depth, 0.0); 24 | 25 | // compute max activation 26 | $as = $V->w; 27 | $amax = $V->w[0]; 28 | 29 | for ($i = 1; $i < $this->out_depth; $i++) { 30 | if ($as[$i] > $amax) { 31 | $amax = $as[$i]; 32 | } 33 | } 34 | 35 | // compute exponentials (carefully to not blow up) 36 | $es = array_fill(0, $this->out_depth, 0); 37 | $esum = 0.0; 38 | 39 | for ($i = 0; $i < $this->out_depth; $i++) { 40 | $e = exp($as[$i] - $amax); 41 | $esum += $e; 42 | $es[$i] = $e; 43 | } 44 | 45 | // normalize and output to sum to one 46 | for ($i = 0; $i < $this->out_depth; $i++) { 47 | $es[$i] /= $esum; 48 | $A->w[$i] = $es[$i]; 49 | } 50 | 51 | $this->es = $es; // save these for backprop 52 | $this->out_act = $A; 53 | 54 | return $this->out_act; 55 | } 56 | 57 | public function backward($y = null) 58 | { 59 | // compute and accumulate gradient wrt weights and bias of this layer 60 | $x = $this->in_act; 61 | $x->dw = array_fill(0, count($x->w), 0); // zero out the gradient of input Vol 62 | 63 | for ($i = 0; $i < $this->out_depth; $i++) { 64 | $indicator = $i === $y ? 1.0 : 0.0; 65 | $mul = -($indicator - $this->es[$i]); 66 | $x->dw[$i] = $mul; 67 | } 68 | 69 | // loss is the class negative log likelihood 70 | return -log($this->es[$y]); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/TanhLayer.php: -------------------------------------------------------------------------------- 1 | out_sx = $opt['in_sx']; 13 | $this->out_sy = $opt['in_sy']; 14 | $this->out_depth = $opt['in_depth']; 15 | $this->layer_type = 'tanh'; 16 | } 17 | 18 | public function forward($V, $is_training) 19 | { 20 | $this->in_act = $V; 21 | $V2 = $V->cloneAndZero(); 22 | $N = count($V->w); 23 | 24 | for ($i = 0; $i < $N; $i++) { 25 | $V2->w[$i] = tanh($V->w[$i]); 26 | } 27 | 28 | $this->out_act = $V2; 29 | 30 | return $this->out_act; 31 | } 32 | 33 | public function backward($y = null) 34 | { 35 | $V = $this->in_act; // we need to set dw of this 36 | $V2 = $this->out_act; 37 | $N = count($V->w); 38 | $V->dw = array_fill(0, $N, 0); // zero out gradient wrt data 39 | 40 | for ($i = 0; $i < $N; $i++) { 41 | $v2wi = $V2->w[$i]; 42 | $V->dw[$i] = (1.0 - $v2wi * $v2wi) * $V2->dw[$i]; 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/Trainer.php: -------------------------------------------------------------------------------- 1 | net = $net; 26 | 27 | $this->learning_rate = array_key_exists('learning_rate', $options) ? $options['learning_rate'] : 0.01; 28 | $this->l1_decay = array_key_exists('l1_decay', $options) ? $options['l1_decay'] : 0.0; 29 | $this->l2_decay = array_key_exists('l2_decay', $options) ? $options['l2_decay'] : 0.0; 30 | $this->batch_size = array_key_exists('batch_size', $options) ? $options['batch_size'] : 1; 31 | $this->method = array_key_exists('method', $options) ? $options['method'] : 'sgd'; // sgd/adam/adagrad/adadelta/windowgrad/netsterov 32 | 33 | $this->momentum = array_key_exists('momentum', $options) ? $options['momentum'] : 0.9; 34 | $this->ro = array_key_exists('ro', $options) ? $options['ro'] : 0.95; // used in adadelta 35 | $this->eps = array_key_exists('eps', $options) ? $options['eps'] : 1e-8; // used in adam or adadelta 36 | $this->beta1 = array_key_exists('beta1', $options) ? $options['beta1'] : 0.9; // used in adam 37 | $this->beta2 = array_key_exists('beta2', $options) ? $options['beta2'] : 0.999; // used in adam 38 | 39 | $this->k = 0; // iteration counter 40 | $this->gsum = []; // last iteration gradients (used for momentum calculations) 41 | $this->xsum = []; // used in adam or adadelta 42 | 43 | // check if regression is expected 44 | if ($this->net->layers[count($this->net->layers) - 1]->layer_type === 'regression') { 45 | $this->regression = true; 46 | } else { 47 | $this->regression = false; 48 | } 49 | } 50 | 51 | public function train($x, $y) 52 | { 53 | $start = microtime(true); 54 | $this->net->forward($x, true); // also set the flag that lets the net know we're just training 55 | $end = microtime(true); 56 | $fwd_time = $end - $start; 57 | 58 | $start = microtime(true); 59 | $cost_loss = $this->net->backward($y); 60 | $l2_decay_loss = 0.0; 61 | $l1_decay_loss = 0.0; 62 | $end = microtime(true); 63 | $bwd_time = $end - $start; 64 | 65 | if ($this->regression && ! is_array($y)) { 66 | echo 'Warning: a regression net requires an array as training output vector.' . PHP_EOL; 67 | } 68 | 69 | $this->k++; 70 | 71 | if ($this->k % $this->batch_size === 0) { 72 | $pglist = $this->net->getParamsAndGrads(); 73 | // initialize lists for accumulators. Will only be done once on first iteration 74 | if (count($this->gsum) === 0 && ($this->method !== 'sgd' || $this->momentum > 0.0)) { 75 | // only vanilla sgd doesnt need either lists 76 | // momentum needs gsum 77 | // adagrad needs gsum 78 | // adam and adadelta needs gsum and xsum 79 | for ($i = 0; $i < count($pglist); $i++) { 80 | $this->gsum[] = array_fill(0, count($pglist[$i]['params']), 0); 81 | 82 | if ($this->method === 'adam' || $this->method === 'adadelta') { 83 | $this->xsum[] = array_fill(0, count($pglist[$i]['params']), 0); 84 | } else { 85 | $this->xsum[] = []; // conserve memory 86 | } 87 | } 88 | } 89 | 90 | // perform an update for all sets of weights 91 | for ($i = 0; $i < count($pglist); $i++) { 92 | $pg = &$pglist[$i]; // param, gradient, other options in future (custom learning rate etc) 93 | $p = &$pg['params']; 94 | $g = &$pg['grads']; 95 | 96 | // learning rate for some parameters. 97 | $l2_decay_mul = array_key_exists('l2_decay_mul', $pg) ? $pg['l2_decay_mul'] : 1.0; 98 | $l1_decay_mul = array_key_exists('l1_decay_mul', $pg) ? $pg['l1_decay_mul'] : 1.0; 99 | $l2_decay = $this->l2_decay * $l2_decay_mul; 100 | $l1_decay = $this->l1_decay * $l1_decay_mul; 101 | 102 | $plen = count($p); 103 | 104 | for ($j = 0; $j < $plen; $j++) { 105 | $l2_decay_loss += $l2_decay * $p[$j] * $p[$j] / 2; // accumulate weight decay loss 106 | $l1_decay_loss += $l1_decay * abs($p[$j]); 107 | $l1grad = $l1_decay * ($p[$j] > 0 ? 1 : -1); 108 | $l2grad = $l2_decay * ($p[$j]); 109 | 110 | $gij = ($l2grad + $l1grad + $g[$j]) / $this->batch_size; // raw batch gradient 111 | 112 | $gsumi = $this->gsum[$i]; 113 | $xsumi = $this->xsum[$i]; 114 | 115 | if ($this->method === 'adam') { 116 | // adam update 117 | $gsumi[$j] = $gsumi[$j] * $this->beta1 + (1 - $this->beta1) * $gij; // update biased first moment estimate 118 | $xsumi[$j] = $xsumi[$j] * $this->beta2 + (1 - $this->beta2) * $gij * $gij; // update biased second moment estimate 119 | $biasCorr1 = $gsumi[$j] * (1 - pow($this->beta1, $this->k)); // correct bias first moment estimate 120 | $biasCorr2 = $xsumi[$j] * (1 - pow($this->beta2, $this->k)); // correct bias second moment estimate 121 | $dx = - $this->learning_rate * $biasCorr1 / (sqrt($biasCorr2) + $this->eps); 122 | $p[$j] += $dx; 123 | } else if ($this->method === 'adagrad') { 124 | // adagrad update 125 | $gsumi[$j] = $gsumi[j] + $gij * $gij; 126 | $dx = - $this->learning_rate / sqrt($gsumi[$j] + $this->eps) * $gij; 127 | $p[$j] += $dx; 128 | } else if ($this->method === 'windowgrad') { 129 | // this is adagrad but with a moving window weighted average 130 | // so the gradient is not accumulated over the entire history of the run. 131 | // it's also referred to as Idea #1 in Zeiler paper on Adadelta. Seems reasonable to me! 132 | $gsumi[$j] = $this->ro * $gsumi[$j] + (1 - $this->ro) * $gij * $gij; 133 | $dx = - $this->learning_rate / sqrt($gsumi[$j] + $this->eps) * $gij; // eps added for better conditioning 134 | $p[$j] += $dx; 135 | } else if ($this->method === 'adadelta') { 136 | $gsumi[$j] = $this->ro * $gsumi[$j] + (1 - $this->ro) * $gij * $gij; 137 | $dx = - sqrt(($xsumi[$j] + $this->eps) / ($gsumi[$j] + $this->eps)) * $gij; 138 | $xsumi[$j] = $this->ro * $xsumi[$j] + (1 - $this->ro) * $dx * $dx; // yes, xsum lags behind gsum by 1. 139 | $p[$j] += $dx; 140 | } else if ($this->method === 'nesterov') { 141 | $dx = $gsumi[$j]; 142 | $gsumi[$j] = $gsumi[$j] * $this->momentum + $this->learning_rate * $gij; 143 | $dx = $this->momentum * $dx - (1.0 + $this->momentum) * $gsumi[$j]; 144 | $p[$j] += $dx; 145 | } else { 146 | // assume SGD 147 | if ($this->momentum > 0.0) { 148 | // momentum update 149 | $dx = $this->momentum * $gsumi[$j] - $this->learning_rate * $gij; // step 150 | $gsumi[$j] = $dx; // back this up for next iteration of momentum 151 | $p[$j] += $dx; // apply corrected gradient 152 | } else { 153 | // vanilla sgd 154 | $p[$j] += $this->learning_rate * $gij * (-1); 155 | } 156 | } 157 | 158 | $g[$j] = 0.0; // zero out gradient so that we can begin accumulating anew 159 | } 160 | } 161 | } 162 | 163 | // appending softmax_loss for backwards compatibility, but from now on we will always use cost_loss 164 | // in future, TODO: have to completely redo the way loss is done around the network as currently 165 | // loss is a bit of a hack. Ideally, user should specify arbitrary number of loss functions on any layer 166 | // and it should all be computed correctly and automatically. 167 | return [ 168 | 'fwd_time' => $fwd_time, 169 | 'bwd_time' => $bwd_time, 170 | 'l2_decay_loss' => $l2_decay_loss, 171 | 'l1_decay_loss' => $l1_decay_loss, 172 | 'cost_loss' => $cost_loss, 173 | 'softmax_loss' => $cost_loss, 174 | 'loss' => $cost_loss + $l1_decay_loss + $l2_decay_loss 175 | ]; 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/Util.php: -------------------------------------------------------------------------------- 1 | sx = 1; 19 | $this->sy = 1; 20 | $this->depth = count($sx); 21 | // we have to do the following copy because we want to use 22 | // fast typed arrays, not an ordinary javascript array 23 | $this->w = array_fill(0, $this->depth, 0); 24 | $this->dw = array_fill(0, $this->depth, 0); 25 | 26 | for ($i = 0; $i < $this->depth; $i++) { 27 | $this->w[$i] = $sx[$i]; 28 | } 29 | } else { 30 | // we were given dimensions of the vol 31 | $this->sx = $sx; 32 | $this->sy = $sy; 33 | $this->depth = $depth; 34 | $n = $sx * $sy * $depth; 35 | 36 | $this->w = array_fill(0, $n, 0); 37 | $this->dw = array_fill(0, $n, 0); 38 | 39 | if ($c === null) { 40 | // weight normalization is done to equalize the output 41 | // variance of every neuron, otherwise neurons with a lot 42 | // of incoming connections have outputs of larger variance 43 | $scale = sqrt(1.0 / ($sx * $sy * $depth)); 44 | 45 | for ($i = 0; $i < $n; $i++) { 46 | $this->w[$i] = rand(0.0, $scale); 47 | } 48 | } else { 49 | for ($i = 0; $i < $n; $i++) { 50 | $this->w[$i] = $c; 51 | } 52 | } 53 | } 54 | } 55 | 56 | public function get($x, $y, $d) 57 | { 58 | $ix = (($this->sx * $y) + $x) * $this->depth + $d; 59 | return $this->w[$ix]; 60 | } 61 | 62 | public function set($x, $y, $d, $v) 63 | { 64 | $ix = (($this->sx * $y) + $x) * $this->depth + $d; 65 | $this->w[$ix] = $v; 66 | } 67 | 68 | public function add($x, $y, $d, $v) 69 | { 70 | $ix = (($this->sx * $y) + $x) * $this->depth + $d; 71 | $this->w[$ix] += $v; 72 | } 73 | 74 | public function get_grad($x, $y, $d) 75 | { 76 | $ix = (($this->sx * $y) + $x) * $this->depth + $d; 77 | return $this->dw[$ix]; 78 | } 79 | 80 | public function set_grad($x, $y, $d, $v) 81 | { 82 | $ix = (($this->sx * $y) + $x) * $this->depth + $d; 83 | $this->dw[$ix] = $v; 84 | } 85 | 86 | public function add_grad($x, $y, $d, $v) 87 | { 88 | $ix = (($this->sx * $y) + $x) * $this->depth + $d; 89 | $this->dw[$ix] += $v; 90 | } 91 | 92 | public function cloneAndZero() 93 | { 94 | return new Vol($this->sx, $this->sy, $this->depth, 0.0); 95 | } 96 | 97 | public function clone() 98 | { 99 | $V = new Vol($this->sx, $this->sy, $this->depth, 0.0); 100 | $n = count($this->w); 101 | 102 | for ($i = 0; $i < $n; $i++) { 103 | $V->w[$i] = $this->w[$i]; 104 | } 105 | 106 | return $V; 107 | } 108 | 109 | public function addFrom($V) 110 | { 111 | for ($k = 0; $k < count($this->w); $k++) { 112 | $this->w[$k] += $V->w[$k]; 113 | } 114 | } 115 | 116 | public function addFromScaled($V, $a) 117 | { 118 | for ($k = 0; $k < count($this->w); $k++) { 119 | $this->w[$k] += $a * $V->w[$k]; 120 | } 121 | } 122 | 123 | public function setConst($a) 124 | { 125 | for ($k = 0; $k < count($this->w); $k++) { 126 | $this->w[$k] = $a; 127 | } 128 | } 129 | 130 | public static function img_to_vol($img, $convert_grayscale = false) 131 | { 132 | if (gettype($img) === 'resource') { 133 | $image = $img; 134 | } 135 | 136 | if (gettype($img) === 'string') { 137 | $pathinfo = pathinfo($img); 138 | 139 | if ($pathinfo['extension'] === 'png') { 140 | $image = imagecreatefrompng($img); 141 | } 142 | 143 | if ($pathinfo['extension'] === 'jpg') { 144 | $image = imagecreatefromjpeg($img); 145 | } 146 | 147 | if ($pathinfo['extension'] === 'pgm') { 148 | $image = Pgm::loadPGM($img); 149 | } 150 | } 151 | 152 | // prepare the input: get pixels and normalize them 153 | $pv = []; 154 | $H = imagesy($image); 155 | $W = imagesx($image); 156 | 157 | for ($y = 0; $y < $H; $y++) { 158 | for ($x = 0; $x < $W; $x++) { 159 | $pixelRgb = imagecolorat($image, $x, $y); 160 | $r = ($pixelRgb >> 16) & 0xFF; 161 | $g = ($pixelRgb >> 8) & 0xFF; 162 | $b = $pixelRgb & 0xFF; 163 | 164 | // normalize image pixels to [-0.5, 0.5] 165 | $pv[] = $r / 255.0 - 0.5; 166 | $pv[] = $g / 255.0 - 0.5; 167 | $pv[] = $b / 255.0 - 0.5; 168 | } 169 | } 170 | 171 | $x = new Vol($W, $H, 3, 0.0); //input volume (image) 172 | $x->w = $pv; 173 | 174 | if ($convert_grayscale) { 175 | // flatten into depth=1 array 176 | $x1 = new Vol($W, $H, 1, 0.0); 177 | 178 | for ($i = 0; $i < $W; $i++) { 179 | for ($j = 0; $j < $H; $j++) { 180 | $x1->set($i, $j, 0, $x->get($i, $j, 0)); 181 | } 182 | } 183 | 184 | $x = $x1; 185 | } 186 | 187 | return $x; 188 | } 189 | 190 | public function fromJson($json) 191 | { 192 | if ($json === null) { 193 | return; 194 | } 195 | 196 | foreach ($json as $key => $value) { 197 | $this->$key = $value; 198 | } 199 | } 200 | } 201 | --------------------------------------------------------------------------------