├── README.md ├── class.apriori.php ├── dataset.txt └── example.php /README.md: -------------------------------------------------------------------------------- 1 | Apriori Algorithm 2 | =============== 3 | 4 | Implementation of the Apriori algorithm in PHP. [Main Page](http://vtwo.org/algorithm/apriori/) 5 | 6 | ## Usage 7 | First step: 8 | ```php 9 | include 'class.apriori.php'; 10 | $Apriori = new Apriori(); 11 | ``` 12 | ## Methods 13 | setMaxScan(int), setMinSup(int), setMinConf(int), setDelimiter(string), getMinSup(void), getMinConf(void), getMaxScan(void), getDelimiter(void), process(string or array), printFreqItemsets(void), getFreqItemsets(void), printAssociationRules(void), getAssociationRules(void), saveFreqItemsets(string), saveAssociationRules(string) 14 | 15 | ## Initialize 16 | Initialize options: 17 | ```php 18 | $Apriori->setMaxScan(20); //Scan 2, 3, ... 19 | $Apriori->setMinSup(2); //Minimum support 1, 2, 3, ... 20 | $Apriori->setMinConf(75); //Minimum confidence - Percent 1, 2, ..., 100 21 | $Apriori->setDelimiter(','); //Delimiter 22 | ``` 23 | ## dataset.txt 24 | ```txt 25 | A, B, C, D 26 | A, D, C 27 | B, C 28 | A, E, C 29 | ``` 30 | ## Example 31 | minSup = 2, minConf = 75(%) 32 | ### Coding 33 | ```php 34 | setMaxScan(20); //Scan 2, 3, ... 40 | $Apriori->setMinSup(2); //Minimum support 1, 2, 3, ... 41 | $Apriori->setMinConf(75); //Minimum confidence - Percent 1, 2, ..., 100 42 | $Apriori->setDelimiter(','); //Delimiter 43 | 44 | /* 45 | Use Array: 46 | $dataset = array(); 47 | $dataset[] = array('A', 'B', 'C', 'D'); 48 | $dataset[] = array('A', 'D', 'C'); 49 | $dataset[] = array('B', 'C'); 50 | $dataset[] = array('A', 'E', 'C'); 51 | $Apriori->process($dataset); 52 | */ 53 | $Apriori->process('dataset.txt'); 54 | 55 | //Frequent Itemsets 56 | echo '

Frequent Itemsets

'; 57 | $Apriori->printFreqItemsets(); 58 | 59 | echo '

Frequent Itemsets Array

'; 60 | print_r($Apriori->getFreqItemsets()); 61 | 62 | //Association Rules 63 | echo '

Association Rules

'; 64 | $Apriori->printAssociationRules(); 65 | 66 | echo '

Association Rules Array

'; 67 | print_r($Apriori->getAssociationRules()); 68 | 69 | //Save to file 70 | $Apriori->saveFreqItemsets('freqItemsets.txt'); 71 | $Apriori->saveAssociationRules('associationRules.txt'); 72 | ?> 73 | ``` 74 | ### Result 75 | #### Frequent Itemsets 76 | ```txt 77 | Time: 0 second(s) 78 | =============================================================================== 79 | {B,C} = 2 80 | {A,C,D} = 2 81 | ``` 82 | #### Frequent Itemsets Array 83 | ```txt 84 | Array 85 | ( 86 | [0] => Array 87 | ( 88 | [sup] => 2 89 | [0] => B 90 | [1] => C 91 | ) 92 | 93 | [1] => Array 94 | ( 95 | [sup] => 2 96 | [0] => A 97 | [1] => C 98 | [2] => D 99 | ) 100 | 101 | ) 102 | ``` 103 | #### Association Rules 104 | ```txt 105 | Time: 0 second(s) 106 | =============================================================================== 107 | B => C = 100% 108 | D => C = 100% 109 | D => A = 100% 110 | D => A,C = 100% 111 | C => A = 75% 112 | A => C = 100% 113 | A,D => C = 100% 114 | C,D => A = 100% 115 | ``` 116 | #### Association Rules Array 117 | ```txt 118 | Array 119 | ( 120 | [B] => Array 121 | ( 122 | [C] => 100 123 | ) 124 | 125 | [D] => Array 126 | ( 127 | [C] => 100 128 | [A] => 100 129 | [A,C] => 100 130 | ) 131 | 132 | [C] => Array 133 | ( 134 | [A] => 75 135 | ) 136 | 137 | [A] => Array 138 | ( 139 | [C] => 100 140 | ) 141 | 142 | [A,D] => Array 143 | ( 144 | [C] => 100 145 | ) 146 | 147 | [C,D] => Array 148 | ( 149 | [A] => 100 150 | ) 151 | 152 | ) 153 | ``` 154 | -------------------------------------------------------------------------------- /class.apriori.php: -------------------------------------------------------------------------------- 1 | =2 27 | private $maxPhase = 20; 28 | 29 | private $fiTime = 0; 30 | private $arTime = 0; 31 | 32 | public function setDelimiter($char) 33 | { 34 | $this->delimiter = $char; 35 | } 36 | 37 | public function setMinSup($int) 38 | { 39 | $this->minSup = $int; 40 | } 41 | 42 | public function setMinConf($int) 43 | { 44 | $this->minConf = $int; 45 | } 46 | 47 | public function setMaxScan($int) 48 | { 49 | $this->maxPhase = $int; 50 | } 51 | 52 | public function getDelimiter() 53 | { 54 | return $this->delimiter; 55 | } 56 | 57 | public function getMinSup() 58 | { 59 | return $this->minSup; 60 | } 61 | 62 | public function getMinConf() 63 | { 64 | return $this->minConf; 65 | } 66 | 67 | public function getMaxScan() 68 | { 69 | return $this->maxPhase; 70 | } 71 | 72 | /** 73 | 1. جدول آیتمها را می سازد 74 | 2. کلید دسترسی به هر آیتم را تولید می کند 75 | 3. تمامی آیتمها و تکرار آنها را محاسبه می کند - سطح 1 76 | توجه: حداقل تکرار محاسبه میشود 77 | **/ 78 | private function makeTable($db) 79 | { 80 | $table = array(); 81 | $array = array(); 82 | $counter = 1; 83 | 84 | if(!is_array($db)) 85 | { 86 | $db = file($db); 87 | } 88 | 89 | $num = count($db); 90 | for($i=0; $i<$num; $i++) 91 | { 92 | $tmp = explode($this->delimiter, $db[$i]); 93 | $num1 = count($tmp); 94 | $x = array(); 95 | for($j=0; $j<$num1; $j++) 96 | { 97 | $x = trim($tmp[$j]); 98 | if($x==='') 99 | { 100 | continue; 101 | } 102 | 103 | if(!isset($this->keys['v->k'][$x])) 104 | { 105 | $this->keys['v->k'][$x] = $counter; 106 | $this->keys['k->v'][$counter] = $x; 107 | $counter++; 108 | } 109 | 110 | if(!isset($array[$this->keys['v->k'][$x]])) 111 | { 112 | $array[$this->keys['v->k'][$x]] = 1; 113 | $this->allsups[$this->keys['v->k'][$x]] = 1; 114 | } 115 | else 116 | { 117 | $array[$this->keys['v->k'][$x]]++; 118 | $this->allsups[$this->keys['v->k'][$x]]++; 119 | } 120 | 121 | $table[$i][$this->keys['v->k'][$x]] = 1; 122 | } 123 | } 124 | 125 | $tmp = array(); 126 | foreach($array as $item => $sup) 127 | { 128 | if($sup>=$this->minSup) 129 | { 130 | 131 | $tmp[] = array($item); 132 | } 133 | } 134 | 135 | $this->allthings[$this->phase] = $tmp; 136 | $this->table = $table; 137 | } 138 | 139 | /** 140 | 1. مقدار سوپریموم را با توجه به ورودی شناسه آیتمها شمارش می کند 141 | **/ 142 | private function scan($arr, $implodeArr = '') 143 | { 144 | $cr = 0; 145 | 146 | if($implodeArr) 147 | { 148 | if(isset($this->allsups[$implodeArr])) 149 | { 150 | return $this->allsups[$implodeArr]; 151 | } 152 | } 153 | else 154 | { 155 | sort($arr); 156 | $implodeArr = implode($this->delimiter, $arr); 157 | if(isset($this->allsups[$implodeArr])) 158 | { 159 | return $this->allsups[$implodeArr]; 160 | } 161 | } 162 | 163 | $num = count($this->table); 164 | $num1 = count($arr); 165 | for($i=0; $i<$num; $i++) 166 | { 167 | $bool = true; 168 | for($j=0; $j<$num1; $j++) 169 | { 170 | if(!isset($this->table[$i][$arr[$j]])) 171 | { 172 | $bool = false; 173 | break; 174 | } 175 | } 176 | 177 | if($bool) 178 | { 179 | $cr++; 180 | } 181 | } 182 | 183 | $this->allsups[$implodeArr] = $cr; 184 | 185 | return $cr; 186 | } 187 | 188 | /** 189 | 1. ترکیب دو آرایه و حذف مقادیر اضافی 190 | **/ 191 | private function combine($arr1, $arr2) 192 | { 193 | $result = array(); 194 | 195 | $num = count($arr1); 196 | $num1 = count($arr2); 197 | for($i=0; $i<$num; $i++) 198 | { 199 | if(!isset($result['k'][$arr1[$i]])) 200 | { 201 | $result['v'][] = $arr1[$i]; 202 | $result['k'][$arr1[$i]] = 1; 203 | } 204 | } 205 | 206 | for($i=0; $i<$num1; $i++) 207 | { 208 | if(!isset($result['k'][$arr2[$i]])) 209 | { 210 | $result['v'][] = $arr2[$i]; 211 | $result['k'][$arr2[$i]] = 1; 212 | } 213 | } 214 | 215 | return $result['v']; 216 | } 217 | 218 | /** 219 | 1. نام آیتم را با توجه به شناسه آیتم یا آیتمها بر می گرداند 220 | {1,2,3,4} => {A,B,C,D} 221 | **/ 222 | private function realName($arr) 223 | { 224 | $result = ''; 225 | 226 | $num = count($arr); 227 | for($j=0; $j<$num; $j++) 228 | { 229 | if($j) 230 | { 231 | $result .= $this->delimiter; 232 | } 233 | 234 | $result .= $this->keys['k->v'][$arr[$j]]; 235 | } 236 | 237 | return $result; 238 | } 239 | 240 | //1-2=>2-3 : false 241 | //1-2=>5-6 : true 242 | private function checkRule($a, $b) 243 | { 244 | $a_num = count($a); 245 | $b_num = count($b); 246 | for($i=0; $i<$a_num; $i++) 247 | { 248 | for($j=0; $j<$b_num; $j++) 249 | { 250 | if($a[$i]==$b[$j]) 251 | { 252 | return false; 253 | } 254 | } 255 | } 256 | 257 | return true; 258 | } 259 | 260 | private function confidence($sup_a, $sup_ab) 261 | { 262 | return round(($sup_ab / $sup_a) * 100, 2); 263 | } 264 | 265 | private function subsets($items) 266 | { 267 | $result = array(); 268 | $num = count($items); 269 | $members = pow(2, $num); 270 | for($i=0; $i<$members; $i++) 271 | { 272 | $b = sprintf("%0".$num."b", $i); 273 | $tmp = array(); 274 | for($j=0; $j<$num; $j++) 275 | { 276 | if($b[$j]=='1') 277 | { 278 | $tmp[] = $items[$j]; 279 | } 280 | } 281 | 282 | if($tmp) 283 | { 284 | sort($tmp); 285 | $result[] = $tmp; 286 | } 287 | } 288 | 289 | return $result; 290 | } 291 | 292 | /** 293 | 1. آیتم ستهای تکراری را بر می گرداند 294 | **/ 295 | private function freqItemsets($db) 296 | { 297 | $this->fiTime = $this->startTimer(); 298 | $this->makeTable($db); 299 | while(1) 300 | { 301 | if($this->phase>=$this->maxPhase) 302 | { 303 | break; 304 | } 305 | 306 | $num = count($this->allthings[$this->phase]); 307 | $cr = 0; 308 | for($i=0; $i<$num; $i++) 309 | { 310 | for($j=$i; $j<$num; $j++) 311 | { 312 | if($i==$j) 313 | { 314 | continue; 315 | } 316 | 317 | $item = $this->combine($this->allthings[$this->phase][$i], $this->allthings[$this->phase][$j]); 318 | sort($item); 319 | $implodeArr = implode($this->delimiter, $item); 320 | if(!isset($this->freqItmsts[$implodeArr])) 321 | { 322 | $sup = $this->scan($item, $implodeArr); 323 | if($sup>=$this->minSup) 324 | { 325 | $this->allthings[$this->phase+1][] = $item; 326 | $this->freqItmsts[$implodeArr] = 1; 327 | $cr++; 328 | } 329 | } 330 | } 331 | } 332 | 333 | if($cr<=1) 334 | { 335 | break; 336 | } 337 | 338 | $this->phase++; 339 | } 340 | 341 | //زیر مجموعه های مربوط به مجموعه های بزرگتر را حذف می کند 342 | foreach($this->freqItmsts as $k => $v) 343 | { 344 | $arr = explode($this->delimiter, $k); 345 | $num = count($arr); 346 | if($num>=3) 347 | { 348 | $subsets = $this->subsets($arr); 349 | $num1 = count($subsets); 350 | for($i=0; $i<$num1; $i++) 351 | { 352 | if(count($subsets[$i])<$num) 353 | { 354 | unset($this->freqItmsts[implode($this->delimiter, $subsets[$i])]); 355 | } 356 | else 357 | { 358 | break; 359 | } 360 | } 361 | } 362 | } 363 | 364 | $this->fiTime = $this->stopTimer($this->fiTime); 365 | } 366 | 367 | /** 368 | 1. قوانین نهایی را با توجه به مقدار حداقل کانفیندس محاسبه می کند 369 | **/ 370 | public function process($db) 371 | { 372 | $checked = $result = array(); 373 | 374 | $this->freqItemsets($db); 375 | $this->arTime = $this->startTimer(); 376 | 377 | foreach($this->freqItmsts as $k => $v) 378 | { 379 | $arr = explode($this->delimiter, $k); 380 | $subsets = $this->subsets($arr); 381 | $num = count($subsets); 382 | for($i=0; $i<$num; $i++) 383 | { 384 | for($j=0; $j<$num; $j++) 385 | { 386 | if($this->checkRule($subsets[$i], $subsets[$j])) 387 | { 388 | $n1 = $this->realName($subsets[$i]); 389 | $n2 = $this->realName($subsets[$j]); 390 | 391 | $scan = $this->scan($this->combine($subsets[$i], $subsets[$j])); 392 | $c1 = $this->confidence($this->scan($subsets[$i]), $scan); 393 | $c2 = $this->confidence($this->scan($subsets[$j]), $scan); 394 | 395 | if($c1>=$this->minConf) 396 | { 397 | $result[$n1][$n2] = $c1; 398 | } 399 | 400 | if($c2>=$this->minConf) 401 | { 402 | $result[$n2][$n1] = $c2; 403 | } 404 | 405 | $checked[$n1.$this->delimiter.$n2] = 1; 406 | $checked[$n2.$this->delimiter.$n1] = 1; 407 | } 408 | } 409 | } 410 | } 411 | 412 | $this->arTime = $this->stopTimer($this->arTime); 413 | 414 | return $this->rules = $result; 415 | } 416 | 417 | public function printFreqItemsets() 418 | { 419 | echo 'Time: '.$this->fiTime.' second(s)
===============================================================================
'; 420 | 421 | foreach($this->freqItmsts as $k => $v) 422 | { 423 | $tmp = ''; 424 | $tmp1 = ''; 425 | $k = explode($this->delimiter, $k); 426 | $num = count($k); 427 | for($i=0; $i<$num; $i++) 428 | { 429 | if($i) 430 | { 431 | $tmp .= $this->delimiter.$this->realName($k[$i]); 432 | $tmp1 .= $this->delimiter.$k[$i]; 433 | } 434 | else 435 | { 436 | $tmp = $this->realName($k[$i]); 437 | $tmp1 = $k[$i]; 438 | } 439 | } 440 | 441 | echo '{'.$tmp.'} = '.$this->allsups[$tmp1].'
'; 442 | } 443 | } 444 | 445 | public function saveFreqItemsets($filename) 446 | { 447 | $content = ''; 448 | 449 | foreach($this->freqItmsts as $k => $v) 450 | { 451 | $tmp = ''; 452 | $tmp1 = ''; 453 | $k = explode($this->delimiter, $k); 454 | $num = count($k); 455 | for($i=0; $i<$num; $i++) 456 | { 457 | if($i) 458 | { 459 | $tmp .= $this->delimiter.$this->realName($k[$i]); 460 | $tmp1 .= $this->delimiter.$k[$i]; 461 | } 462 | else 463 | { 464 | $tmp = $this->realName($k[$i]); 465 | $tmp1 = $k[$i]; 466 | } 467 | } 468 | 469 | $content .= '{'.$tmp.'} = '.$this->allsups[$tmp1]."\n"; 470 | } 471 | 472 | file_put_contents($filename, $content); 473 | } 474 | 475 | public function getFreqItemsets() 476 | { 477 | $result = array(); 478 | 479 | foreach($this->freqItmsts as $k => $v) 480 | { 481 | $tmp = array(); 482 | $tmp['sup'] = $this->allsups[$k]; 483 | $k = explode($this->delimiter, $k); 484 | $num = count($k); 485 | for($i=0; $i<$num; $i++) 486 | { 487 | $tmp[] = $this->realName($k[$i]); 488 | } 489 | 490 | $result[] = $tmp; 491 | } 492 | 493 | return $result; 494 | } 495 | 496 | public function printAssociationRules() 497 | { 498 | echo 'Time: '.$this->arTime.' second(s)
===============================================================================
'; 499 | 500 | foreach($this->rules as $a => $arr) 501 | { 502 | foreach($arr as $b => $conf) 503 | { 504 | echo "$a => $b = $conf%
"; 505 | } 506 | } 507 | } 508 | 509 | public function saveAssociationRules($filename) 510 | { 511 | $content = ''; 512 | 513 | foreach($this->rules as $a => $arr) 514 | { 515 | foreach($arr as $b => $conf) 516 | { 517 | $content .= "$a => $b = $conf%\n"; 518 | } 519 | } 520 | 521 | file_put_contents($filename, $content); 522 | } 523 | 524 | public function getAssociationRules() 525 | { 526 | return $this->rules; 527 | } 528 | 529 | private function startTimer() 530 | { 531 | list($usec, $sec) = explode(" ", microtime()); 532 | return ((float)$usec + (float)$sec); 533 | } 534 | 535 | private function stopTimer($start, $round=2) 536 | { 537 | $endtime = $this->startTimer()-$start; 538 | $round = pow(10, $round); 539 | return round($endtime*$round)/$round; 540 | } 541 | } 542 | ?> 543 | -------------------------------------------------------------------------------- /dataset.txt: -------------------------------------------------------------------------------- 1 | A, B, C, D 2 | A, D, C 3 | B, C 4 | A, E, C 5 | -------------------------------------------------------------------------------- /example.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Apriori Alghoritm 6 | 7 | 8 | setMaxScan(20); //Scan 2, 3, ... 14 | $Apriori->setMinSup(2); //Minimum support 1, 2, 3, ... 15 | $Apriori->setMinConf(75); //Minimum confidence - Percent 1, 2, ..., 100 16 | $Apriori->setDelimiter(','); //Delimiter 17 | 18 | /* 19 | Use Array: 20 | $dataset = array(); 21 | $dataset[] = array('A', 'B', 'C', 'D'); 22 | $dataset[] = array('A', 'D', 'C'); 23 | $dataset[] = array('B', 'C'); 24 | $dataset[] = array('A', 'E', 'C'); 25 | $Apriori->process($dataset); 26 | */ 27 | $Apriori->process('dataset.txt'); 28 | 29 | //Frequent Itemsets 30 | echo '

Frequent Itemsets

'; 31 | $Apriori->printFreqItemsets(); 32 | 33 | echo '

Frequent Itemsets Array

'; 34 | print_r($Apriori->getFreqItemsets()); 35 | 36 | //Association Rules 37 | echo '

Association Rules

'; 38 | $Apriori->printAssociationRules(); 39 | 40 | echo '

Association Rules Array

'; 41 | print_r($Apriori->getAssociationRules()); 42 | 43 | //Save to file 44 | $Apriori->saveFreqItemsets('freqItemsets.txt'); 45 | $Apriori->saveAssociationRules('associationRules.txt'); 46 | ?> 47 | 48 | 49 | --------------------------------------------------------------------------------