├── .gitignore ├── README.md ├── composer.json ├── examples └── 2d-dots.php └── src └── Svm.php /.gitignore: -------------------------------------------------------------------------------- 1 | composer.lock 2 | vendor/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PHP SVM (Work in Progress) 2 | 3 | Pure PHP SVM Implementation, a simple PHP port of the [svmjs](https://github.com/karpathy/svmjs), thank you @karpathy :-) 4 | 5 | ## TO-DO 6 | 7 | The "Issues" page from this repository is being used for TO-DO management, just search for the "to-do" tag. 8 | 9 | ## Credits 10 | 11 | [@gabrielrcouto](http://www.twitter.com/gabrielrcouto) 12 | 13 | ## License 14 | 15 | [MIT License](http://gabrielrcouto.mit-license.org/) 16 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gabrielrcouto/php-svm", 3 | "description": "Pure PHP Support Vector Machine", 4 | "keywords": ["php","svm","support","vector", "machine"], 5 | "homepage": "http://github.com/gabrielrcouto/php-svm", 6 | "license": "MIT", 7 | "authors": [ 8 | { 9 | "name": "Gabriel Rodrigues Couto", 10 | "email": "gabrielrcouto@gmail.com" 11 | } 12 | ], 13 | "require": { 14 | "php": ">=7.0" 15 | }, 16 | "require-dev" : { 17 | }, 18 | "autoload": { 19 | "psr-4": { 20 | "Svm\\": "src/" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /examples/2d-dots.php: -------------------------------------------------------------------------------- 1 | train($data, $labels); 19 | 20 | $predictions = $svm->predict([[0.1, 0.1], [0.8, 0.8]]); 21 | var_dump($predictions); 22 | -------------------------------------------------------------------------------- /src/Svm.php: -------------------------------------------------------------------------------- 1 | data = $data; 22 | $this->labels = $labels; 23 | 24 | // parameters 25 | // C value. Decrease for more regularization 26 | $C = array_key_exists('C', $options) ? $options['C'] : 1.0; 27 | // numerical tolerance. Don't touch unless you're pro 28 | $tol = array_key_exists('tol', $options) ? $options['tol'] : 1e-4; 29 | // non-support vectors for space and time efficiency are truncated. To guarantee correct result set this to 0 to do no truncating. If you want to increase efficiency, experiment with setting this little higher, up to maybe 1e-4 or so. 30 | $alphatol = array_key_exists('alphatol', $options) ? $options['alphatol'] : 1e-7; 31 | // max number of iterations 32 | $maxiter = array_key_exists('maxiter', $options) ? $options['maxiter'] : 10000; 33 | // how many passes over data with no change before we halt? Increase for more precision. 34 | $numpasses = array_key_exists('numpasses', $options) ? $options['numpasses'] : 20; 35 | 36 | // instantiate kernel according to options. kernel can be given as string or as a custom function 37 | $kernel = [$this, 'linearKernel']; 38 | $this->kernelType = 'linear'; 39 | 40 | if (array_key_exists('kernel', $options)) { 41 | if (is_string($options['kernel'])) { 42 | // kernel was specified as a string. Handle these special cases appropriately 43 | if ($options['kernel'] === 'linear') { 44 | $kernel = [$this, 'linearKernel']; 45 | $this->kernelType = 'linear'; 46 | } 47 | } 48 | 49 | if (is_callable($options['kernel'])) { 50 | // assume kernel was specified as a function. Let's just use it 51 | $kernel = $options['kernel']; 52 | $this->kernelType = 'custom'; 53 | } 54 | } 55 | 56 | // initializations 57 | $this->kernel = $kernel; 58 | $this->N = $N = count($data); 59 | $this->D = $D = count($data[0]); 60 | $this->alpha = array_fill(0, $N, 0); 61 | $this->b = 0.0; 62 | $this->usew_ = false; // internal efficiency flag 63 | 64 | // Cache kernel computations to avoid expensive recomputation. 65 | // This could use too much memory if N is large. 66 | if (array_key_exists('memoize', $options) && $options['memoize']) { 67 | $this->kernelResults = array_fill(0, $N, []); 68 | 69 | for ($i = 0; $i < $N; $i++) { 70 | $this->kernelResults[$i] = array_fill(0, $N, []); 71 | 72 | for ($j = 0; $j< $N; $j++) { 73 | $this->kernelResults[$i][$j] = $kernel($data[$i], $data[$j]); 74 | } 75 | } 76 | } 77 | 78 | // run SMO algorithm 79 | $iter = 0; 80 | $passes = 0; 81 | 82 | while ($passes < $numpasses && $iter < $maxiter) { 83 | $alphaChanged = 0; 84 | 85 | for ($i = 0; $i < $N; $i++) { 86 | $Ei = $this->marginOne($data[$i]) - $labels[$i]; 87 | 88 | if (($labels[$i] * $Ei < -$tol && $this->alpha[$i] < $C) 89 | || ($labels[$i] * $Ei > $tol && $this->alpha[$i] > 0) 90 | ) { 91 | // alpha_i needs updating! Pick a j to update it with 92 | $j = $i; 93 | 94 | while ($j === $i) { 95 | $j = rand(0, $this->N - 1); 96 | } 97 | 98 | $Ej = $this->marginOne($data[$j]) - $labels[$j]; 99 | 100 | // calculate L and H bounds for j to ensure we're in [0 C]x[0 C] box 101 | $ai = $this->alpha[$i]; 102 | $aj = $this->alpha[$j]; 103 | $L = 0; 104 | $H = $C; 105 | 106 | if ($labels[$i] === $labels[$j]) { 107 | $L = max(0, $ai + $aj - $C); 108 | $H = min($C, $ai + $aj); 109 | } else { 110 | $L = max(0, $aj - $ai); 111 | $H = min($C, $C + $aj - $ai); 112 | } 113 | 114 | if (abs($L - $H) < 1e-4) { 115 | continue; 116 | } 117 | 118 | $eta = 2 * $this->kernelResult($i, $j) - $this->kernelResult($i, $i) - $this->kernelResult($j, $j); 119 | 120 | if ($eta >= 0) { 121 | continue; 122 | } 123 | 124 | // compute new alpha_j and clip it inside [0 C]x[0 C] box 125 | // then compute alpha_i based on it. 126 | $newaj = $aj - (($labels[$j] * ($Ei - $Ej)) / $eta); 127 | 128 | if ($newaj > $H) { 129 | $newaj = $H; 130 | } 131 | 132 | if ($newaj < $L) { 133 | $newaj = $L; 134 | } 135 | 136 | if (abs($aj - $newaj) < 1e-4) { 137 | continue; 138 | } 139 | 140 | $this->alpha[$j] = $newaj; 141 | $newai = $ai + $labels[$i] * $labels[$j] * ($aj - $newaj); 142 | $this->alpha[$i] = $newai; 143 | 144 | // update the bias term 145 | $b1 = $this->b - $Ei - $labels[$i] * ($newai - $ai) * $this->kernelResult($i, $i) 146 | - $labels[$j] * ($newaj - $aj) * $this->kernelResult($i, $j); 147 | 148 | $b2 = $this->b - $Ej - $labels[$i] * ($newai - $ai) * $this->kernelResult($i, $j) 149 | - $labels[$j] * ($newaj - $aj) * $this->kernelResult($j, $j); 150 | 151 | $this->b = 0.5 * ($b1 + $b2); 152 | 153 | if ($newai > 0 && $newai < $C) { 154 | $this->b = $b1; 155 | } 156 | 157 | if ($newaj > 0 && $newaj < $C) { 158 | $this->b = $b2; 159 | } 160 | 161 | $alphaChanged++; 162 | } 163 | } 164 | 165 | $iter++; 166 | 167 | //echo 'iter: ' . $iter . ' alphaChanged: ' . $alphaChanged . PHP_EOL; 168 | 169 | //console.log("iter number %d, alphaChanged = %d", iter, alphaChanged); 170 | $passes = ($alphaChanged == 0) ? $passes + 1 : 0; 171 | } 172 | 173 | // if the user was using a linear kernel, lets also compute and store the 174 | // weights. This will speed up evaluations during testing time 175 | if ($this->kernelType === 'linear') { 176 | // compute weights and store them 177 | $this->w = array_fill(0, $this->D, 0); 178 | 179 | for ($j = 0; $j < $this->D; $j++) { 180 | $s = 0.0; 181 | 182 | for ($i = 0; $i < $this->N; $i++) { 183 | $s += $this->alpha[$i] * $labels[$i] * $data[$i][$j]; 184 | } 185 | 186 | $this->w[$j] = $s; 187 | $this->usew_ = true; 188 | } 189 | } else { 190 | // okay, we need to retain all the support vectors in the training data, 191 | // we can't just get away with computing the weights and throwing it out 192 | 193 | // But! We only need to store the support vectors for evaluation of testing 194 | // instances. So filter here based on this.alpha[i]. The training data 195 | // for which this.alpha[i] = 0 is irrelevant for future. 196 | $newdata = []; 197 | $newlabels = []; 198 | $newalpha = []; 199 | 200 | for ($i = 0; $i < $this->N; $i++) { 201 | //console.log("alpha=%f", this.alpha[i]); 202 | if ($this->alpha[$i] > $alphatol) { 203 | $newdata[] = $this->data[$i]; 204 | $newlabels[] = $this->labels[$i]; 205 | $newalpha[] = $this->alpha[$i]; 206 | } 207 | } 208 | 209 | // store data and labels 210 | $this->data = $newdata; 211 | $this->labels = $newlabels; 212 | $this->alpha = $newalpha; 213 | $this->N = count($this->data); 214 | //console.log("filtered training data from %d to %d support vectors.", data.length, this.data.length); 215 | } 216 | 217 | $trainstats = []; 218 | $trainstats['iters'] = $iter; 219 | 220 | return $trainstats; 221 | } 222 | 223 | // inst is an array of length D. Returns margin of given example 224 | // this is the core prediction function. All others are for convenience mostly 225 | // and end up calling this one somehow. 226 | protected function marginOne($inst) 227 | { 228 | $f = $this->b; 229 | 230 | // if the linear kernel was used and w was computed and stored, 231 | // (i.e. the svm has fully finished training) 232 | // the internal class variable usew_ will be set to true. 233 | if ($this->usew_) { 234 | // we can speed this up a lot by using the computed weights 235 | // we computed these during train(). This is significantly faster 236 | // than the version below 237 | for ($j = 0; $j < $this->D; $j++) { 238 | $f += $inst[$j] * $this->w[$j]; 239 | } 240 | } else { 241 | for ($i = 0; $i < $this->N; $i++) { 242 | $kernel = $this->kernel; 243 | $f += $this->alpha[$i] * $this->labels[$i] * $kernel($inst, $this->data[$i]); 244 | } 245 | } 246 | 247 | return $f; 248 | } 249 | 250 | public function predictOne($inst) 251 | { 252 | return $this->marginOne($inst) > 0 ? 1 : -1; 253 | } 254 | 255 | // data is an NxD array. Returns array of margins. 256 | protected function margins($data) 257 | { 258 | // go over support vectors and accumulate the prediction. 259 | $N = count($data); 260 | $margins = array_fill(0, $N, 0); 261 | 262 | for ($i = 0; $i < $N; $i++) { 263 | $margins[$i] = $this->marginOne($data[$i]); 264 | } 265 | 266 | return $margins; 267 | } 268 | 269 | protected function kernelResult($i, $j) 270 | { 271 | if ($this->kernelResults) { 272 | return $this->kernelResults[$i][$j]; 273 | } 274 | 275 | $kernel = $this->kernel; 276 | 277 | return $kernel($this->data[$i], $this->data[$j]); 278 | } 279 | 280 | // data is NxD array. Returns array of 1 or -1, predictions 281 | public function predict($data) 282 | { 283 | $margs = $this->margins($data); 284 | 285 | for ($i = 0; $i < count($margs); $i++) { 286 | $margs[$i] = $margs[$i] > 0 ? 1 : -1; 287 | } 288 | 289 | return $margs; 290 | } 291 | 292 | protected function linearKernel($v1, $v2) 293 | { 294 | $s = 0; 295 | 296 | for ($q = 0; $q < count($v1); $q++) { 297 | $s += $v1[$q] * $v2[$q]; 298 | } 299 | 300 | return $s; 301 | } 302 | 303 | public function save($file) 304 | { 305 | if (file_exists($file)) { 306 | unlink($file); 307 | } 308 | 309 | file_put_contents($file, serialize($this)); 310 | } 311 | 312 | public static function load($file) 313 | { 314 | if (! file_exists($file)) { 315 | throw new \Exception('File not found', 1); 316 | } 317 | 318 | return unserialize(file_get_contents($file)); 319 | } 320 | } 321 | --------------------------------------------------------------------------------