├── .gitignore
├── README.md
├── composer.json
├── examples
    └── 2d-dots.php
└── src
    └── Svm.php


/.gitignore:
--------------------------------------------------------------------------------
1 | composer.lock
2 | vendor/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PHP SVM (Work in Progress)
 2 | 
 3 | Pure PHP SVM Implementation, a simple PHP port of the [svmjs](https://github.com/karpathy/svmjs), thank you @karpathy :-)
 4 | 
 5 | ## TO-DO
 6 | 
 7 | The "Issues" page from this repository is being used for TO-DO management, just search for the "to-do" tag.
 8 | 
 9 | ## Credits
10 | 
11 | [@gabrielrcouto](http://www.twitter.com/gabrielrcouto)
12 | 
13 | ## License
14 | 
15 | [MIT License](http://gabrielrcouto.mit-license.org/)
16 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "gabrielrcouto/php-svm",
 3 |     "description": "Pure PHP Support Vector Machine",
 4 |     "keywords": ["php","svm","support","vector", "machine"],
 5 |     "homepage": "http://github.com/gabrielrcouto/php-svm",
 6 |     "license": "MIT",
 7 |     "authors": [
 8 |         {
 9 |             "name": "Gabriel Rodrigues Couto",
10 |             "email": "gabrielrcouto@gmail.com"
11 |         }
12 |     ],
13 |     "require": {
14 |         "php": ">=7.0"
15 |     },
16 |     "require-dev" : {
17 |     },
18 |     "autoload": {
19 |         "psr-4": {
20 |             "Svm\\": "src/"
21 |         }
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/examples/2d-dots.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | foreach (['vendor/autoload.php', '../vendor/autoload.php', '../../autoload.php'] as $autoload) {
 3 |     $autoload = __DIR__.'/'.$autoload;
 4 |     if (file_exists($autoload)) {
 5 |         require $autoload;
 6 |         break;
 7 |     }
 8 | }
 9 | 
10 | unset($autoload);
11 | 
12 | use Svm\Svm;
13 | 
14 | // Data below [0.5, 0.5] is -1, above is 1
15 | $data = [[0, 0], [0.5, 0.5], [0.7, 0.7], [1, 1]];
16 | $labels = [-1, -1, 1, 1];
17 | $svm = new Svm();
18 | $svm->train($data, $labels);
19 | 
20 | $predictions = $svm->predict([[0.1, 0.1], [0.8, 0.8]]);
21 | var_dump($predictions);
22 | 


--------------------------------------------------------------------------------
/src/Svm.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | namespace Svm;
  3 | 
  4 | class Svm
  5 | {
  6 |     protected $alpha;
  7 |     protected $b;
  8 |     protected $D;
  9 |     protected $data;
 10 |     protected $kernel;
 11 |     protected $kernelResults;
 12 |     protected $kernelType;
 13 |     protected $labels;
 14 |     protected $N;
 15 |     protected $usew_;
 16 |     protected $w;
 17 | 
 18 |     public function train($data, $labels, $options = [])
 19 |     {
 20 |         // we need these in helper functions
 21 |         $this->data = $data;
 22 |         $this->labels = $labels;
 23 | 
 24 |         // parameters
 25 |         // C value. Decrease for more regularization
 26 |         $C = array_key_exists('C', $options) ? $options['C'] : 1.0;
 27 |         // numerical tolerance. Don't touch unless you're pro
 28 |         $tol = array_key_exists('tol', $options) ? $options['tol'] : 1e-4;
 29 |         // non-support vectors for space and time efficiency are truncated. To guarantee correct result set this to 0 to do no truncating. If you want to increase efficiency, experiment with setting this little higher, up to maybe 1e-4 or so.
 30 |         $alphatol = array_key_exists('alphatol', $options) ? $options['alphatol'] : 1e-7;
 31 |         // max number of iterations
 32 |         $maxiter = array_key_exists('maxiter', $options) ? $options['maxiter'] : 10000;
 33 |         // how many passes over data with no change before we halt? Increase for more precision.
 34 |         $numpasses = array_key_exists('numpasses', $options) ? $options['numpasses'] : 20;
 35 | 
 36 |         // instantiate kernel according to options. kernel can be given as string or as a custom function
 37 |         $kernel = [$this, 'linearKernel'];
 38 |         $this->kernelType = 'linear';
 39 | 
 40 |         if (array_key_exists('kernel', $options)) {
 41 |             if (is_string($options['kernel'])) {
 42 |               // kernel was specified as a string. Handle these special cases appropriately
 43 |                 if ($options['kernel'] === 'linear') {
 44 |                     $kernel = [$this, 'linearKernel'];
 45 |                     $this->kernelType = 'linear';
 46 |                 }
 47 |             }
 48 | 
 49 |             if (is_callable($options['kernel'])) {
 50 |                 // assume kernel was specified as a function. Let's just use it
 51 |                 $kernel = $options['kernel'];
 52 |                 $this->kernelType = 'custom';
 53 |             }
 54 |         }
 55 | 
 56 |         // initializations
 57 |         $this->kernel = $kernel;
 58 |         $this->N = $N = count($data);
 59 |         $this->D = $D = count($data[0]);
 60 |         $this->alpha = array_fill(0, $N, 0);
 61 |         $this->b = 0.0;
 62 |         $this->usew_ = false; // internal efficiency flag
 63 | 
 64 |         // Cache kernel computations to avoid expensive recomputation.
 65 |         // This could use too much memory if N is large.
 66 |         if (array_key_exists('memoize', $options) && $options['memoize']) {
 67 |             $this->kernelResults = array_fill(0, $N, []);
 68 | 
 69 |             for ($i = 0; $i < $N; $i++) {
 70 |                 $this->kernelResults[$i] = array_fill(0, $N, []);
 71 | 
 72 |                 for ($j = 0; $j< $N; $j++) {
 73 |                     $this->kernelResults[$i][$j] = $kernel($data[$i], $data[$j]);
 74 |                 }
 75 |             }
 76 |         }
 77 | 
 78 |         // run SMO algorithm
 79 |         $iter = 0;
 80 |         $passes = 0;
 81 | 
 82 |         while ($passes < $numpasses && $iter < $maxiter) {
 83 |             $alphaChanged = 0;
 84 | 
 85 |             for ($i = 0; $i < $N; $i++) {
 86 |                 $Ei = $this->marginOne($data[$i]) - $labels[$i];
 87 | 
 88 |                 if (($labels[$i] * $Ei < -$tol && $this->alpha[$i] < $C)
 89 |                     || ($labels[$i] * $Ei > $tol && $this->alpha[$i] > 0)
 90 |                 ) {
 91 |                     // alpha_i needs updating! Pick a j to update it with
 92 |                     $j = $i;
 93 | 
 94 |                     while ($j === $i) {
 95 |                         $j = rand(0, $this->N - 1);
 96 |                     }
 97 | 
 98 |                     $Ej = $this->marginOne($data[$j]) - $labels[$j];
 99 | 
100 |                     // calculate L and H bounds for j to ensure we're in [0 C]x[0 C] box
101 |                     $ai = $this->alpha[$i];
102 |                     $aj = $this->alpha[$j];
103 |                     $L = 0;
104 |                     $H = $C;
105 | 
106 |                     if ($labels[$i] === $labels[$j]) {
107 |                         $L = max(0, $ai + $aj - $C);
108 |                         $H = min($C, $ai + $aj);
109 |                     } else {
110 |                         $L = max(0, $aj - $ai);
111 |                         $H = min($C, $C + $aj - $ai);
112 |                     }
113 | 
114 |                     if (abs($L - $H) < 1e-4) {
115 |                         continue;
116 |                     }
117 | 
118 |                     $eta = 2 * $this->kernelResult($i, $j) - $this->kernelResult($i, $i) - $this->kernelResult($j, $j);
119 | 
120 |                     if ($eta >= 0) {
121 |                         continue;
122 |                     }
123 | 
124 |                     // compute new alpha_j and clip it inside [0 C]x[0 C] box
125 |                     // then compute alpha_i based on it.
126 |                     $newaj = $aj - (($labels[$j] * ($Ei - $Ej)) / $eta);
127 | 
128 |                     if ($newaj > $H) {
129 |                         $newaj = $H;
130 |                     }
131 | 
132 |                     if ($newaj < $L) {
133 |                         $newaj = $L;
134 |                     }
135 | 
136 |                     if (abs($aj - $newaj) < 1e-4) {
137 |                         continue;
138 |                     }
139 | 
140 |                     $this->alpha[$j] = $newaj;
141 |                     $newai = $ai + $labels[$i] * $labels[$j] * ($aj - $newaj);
142 |                     $this->alpha[$i] = $newai;
143 | 
144 |                     // update the bias term
145 |                     $b1 = $this->b - $Ei - $labels[$i] * ($newai - $ai) * $this->kernelResult($i, $i)
146 |                              - $labels[$j] * ($newaj - $aj) * $this->kernelResult($i, $j);
147 | 
148 |                     $b2 = $this->b - $Ej - $labels[$i] * ($newai - $ai) * $this->kernelResult($i, $j)
149 |                              - $labels[$j] * ($newaj - $aj) * $this->kernelResult($j, $j);
150 | 
151 |                     $this->b = 0.5 * ($b1 + $b2);
152 | 
153 |                     if ($newai > 0 && $newai < $C) {
154 |                         $this->b = $b1;
155 |                     }
156 | 
157 |                     if ($newaj > 0 && $newaj < $C) {
158 |                         $this->b = $b2;
159 |                     }
160 | 
161 |                     $alphaChanged++;
162 |                 }
163 |             }
164 | 
165 |             $iter++;
166 | 
167 |             //echo 'iter: ' . $iter . ' alphaChanged: ' . $alphaChanged . PHP_EOL;
168 | 
169 |             //console.log("iter number %d, alphaChanged = %d", iter, alphaChanged);
170 |             $passes = ($alphaChanged == 0) ? $passes + 1 : 0;
171 |         }
172 | 
173 |         // if the user was using a linear kernel, lets also compute and store the
174 |         // weights. This will speed up evaluations during testing time
175 |         if ($this->kernelType === 'linear') {
176 |             // compute weights and store them
177 |             $this->w = array_fill(0, $this->D, 0);
178 | 
179 |             for ($j = 0; $j < $this->D; $j++) {
180 |                 $s = 0.0;
181 | 
182 |                 for ($i = 0; $i < $this->N; $i++) {
183 |                     $s += $this->alpha[$i] * $labels[$i] * $data[$i][$j];
184 |                 }
185 | 
186 |                 $this->w[$j] = $s;
187 |                 $this->usew_ = true;
188 |             }
189 |         } else {
190 |             // okay, we need to retain all the support vectors in the training data,
191 |             // we can't just get away with computing the weights and throwing it out
192 | 
193 |             // But! We only need to store the support vectors for evaluation of testing
194 |             // instances. So filter here based on this.alpha[i]. The training data
195 |             // for which this.alpha[i] = 0 is irrelevant for future.
196 |             $newdata = [];
197 |             $newlabels = [];
198 |             $newalpha = [];
199 | 
200 |             for ($i = 0; $i < $this->N; $i++) {
201 |                 //console.log("alpha=%f", this.alpha[i]);
202 |                 if ($this->alpha[$i] > $alphatol) {
203 |                     $newdata[] = $this->data[$i];
204 |                     $newlabels[] = $this->labels[$i];
205 |                     $newalpha[] = $this->alpha[$i];
206 |                 }
207 |             }
208 | 
209 |             // store data and labels
210 |             $this->data = $newdata;
211 |             $this->labels = $newlabels;
212 |             $this->alpha = $newalpha;
213 |             $this->N = count($this->data);
214 |             //console.log("filtered training data from %d to %d support vectors.", data.length, this.data.length);
215 |         }
216 | 
217 |         $trainstats = [];
218 |         $trainstats['iters'] = $iter;
219 | 
220 |         return $trainstats;
221 |     }
222 | 
223 |     // inst is an array of length D. Returns margin of given example
224 |     // this is the core prediction function. All others are for convenience mostly
225 |     // and end up calling this one somehow.
226 |     protected function marginOne($inst)
227 |     {
228 |         $f = $this->b;
229 | 
230 |         // if the linear kernel was used and w was computed and stored,
231 |         // (i.e. the svm has fully finished training)
232 |         // the internal class variable usew_ will be set to true.
233 |         if ($this->usew_) {
234 |             // we can speed this up a lot by using the computed weights
235 |             // we computed these during train(). This is significantly faster
236 |             // than the version below
237 |             for ($j = 0; $j < $this->D; $j++) {
238 |                 $f += $inst[$j] * $this->w[$j];
239 |             }
240 |         } else {
241 |             for ($i = 0; $i < $this->N; $i++) {
242 |                 $kernel = $this->kernel;
243 |                 $f += $this->alpha[$i] * $this->labels[$i] * $kernel($inst, $this->data[$i]);
244 |             }
245 |         }
246 | 
247 |         return $f;
248 |     }
249 | 
250 |     public function predictOne($inst)
251 |     {
252 |         return $this->marginOne($inst) > 0 ? 1 : -1;
253 |     }
254 | 
255 |     // data is an NxD array. Returns array of margins.
256 |     protected function margins($data)
257 |     {
258 |         // go over support vectors and accumulate the prediction.
259 |         $N = count($data);
260 |         $margins = array_fill(0, $N, 0);
261 | 
262 |         for ($i = 0; $i < $N; $i++) {
263 |             $margins[$i] = $this->marginOne($data[$i]);
264 |         }
265 | 
266 |         return $margins;
267 |     }
268 | 
269 |     protected function kernelResult($i, $j)
270 |     {
271 |         if ($this->kernelResults) {
272 |             return $this->kernelResults[$i][$j];
273 |         }
274 | 
275 |         $kernel = $this->kernel;
276 | 
277 |         return $kernel($this->data[$i], $this->data[$j]);
278 |     }
279 | 
280 |     // data is NxD array. Returns array of 1 or -1, predictions
281 |     public function predict($data)
282 |     {
283 |         $margs = $this->margins($data);
284 | 
285 |         for ($i = 0; $i < count($margs); $i++) {
286 |             $margs[$i] = $margs[$i] > 0 ? 1 : -1;
287 |         }
288 | 
289 |         return $margs;
290 |     }
291 | 
292 |     protected function linearKernel($v1, $v2)
293 |     {
294 |         $s = 0;
295 | 
296 |         for ($q = 0; $q < count($v1); $q++) {
297 |             $s += $v1[$q] * $v2[$q];
298 |         }
299 | 
300 |         return $s;
301 |     }
302 | 
303 |     public function save($file)
304 |     {
305 |         if (file_exists($file)) {
306 |             unlink($file);
307 |         }
308 | 
309 |         file_put_contents($file, serialize($this));
310 |     }
311 | 
312 |     public static function load($file)
313 |     {
314 |         if (! file_exists($file)) {
315 |             throw new \Exception('File not found', 1);
316 |         }
317 | 
318 |         return unserialize(file_get_contents($file));
319 |     }
320 | }
321 | 


--------------------------------------------------------------------------------