├── README.md
├── class.apriori.php
├── dataset.txt
└── example.php
/README.md:
--------------------------------------------------------------------------------
1 | Apriori Algorithm
2 | ===============
3 |
4 | Implementation of the Apriori algorithm in PHP. [Main Page](http://vtwo.org/algorithm/apriori/)
5 |
6 | ## Usage
7 | First step:
8 | ```php
9 | include 'class.apriori.php';
10 | $Apriori = new Apriori();
11 | ```
12 | ## Methods
13 | setMaxScan(int), setMinSup(int), setMinConf(int), setDelimiter(string), getMinSup(void), getMinConf(void), getMaxScan(void), getDelimiter(void), process(string or array), printFreqItemsets(void), getFreqItemsets(void), printAssociationRules(void), getAssociationRules(void), saveFreqItemsets(string), saveAssociationRules(string)
14 |
15 | ## Initialize
16 | Initialize options:
17 | ```php
18 | $Apriori->setMaxScan(20); //Scan 2, 3, ...
19 | $Apriori->setMinSup(2); //Minimum support 1, 2, 3, ...
20 | $Apriori->setMinConf(75); //Minimum confidence - Percent 1, 2, ..., 100
21 | $Apriori->setDelimiter(','); //Delimiter
22 | ```
23 | ## dataset.txt
24 | ```txt
25 | A, B, C, D
26 | A, D, C
27 | B, C
28 | A, E, C
29 | ```
30 | ## Example
31 | minSup = 2, minConf = 75(%)
32 | ### Coding
33 | ```php
34 | setMaxScan(20); //Scan 2, 3, ...
40 | $Apriori->setMinSup(2); //Minimum support 1, 2, 3, ...
41 | $Apriori->setMinConf(75); //Minimum confidence - Percent 1, 2, ..., 100
42 | $Apriori->setDelimiter(','); //Delimiter
43 |
44 | /*
45 | Use Array:
46 | $dataset = array();
47 | $dataset[] = array('A', 'B', 'C', 'D');
48 | $dataset[] = array('A', 'D', 'C');
49 | $dataset[] = array('B', 'C');
50 | $dataset[] = array('A', 'E', 'C');
51 | $Apriori->process($dataset);
52 | */
53 | $Apriori->process('dataset.txt');
54 |
55 | //Frequent Itemsets
56 | echo '
Frequent Itemsets
';
57 | $Apriori->printFreqItemsets();
58 |
59 | echo 'Frequent Itemsets Array
';
60 | print_r($Apriori->getFreqItemsets());
61 |
62 | //Association Rules
63 | echo 'Association Rules
';
64 | $Apriori->printAssociationRules();
65 |
66 | echo 'Association Rules Array
';
67 | print_r($Apriori->getAssociationRules());
68 |
69 | //Save to file
70 | $Apriori->saveFreqItemsets('freqItemsets.txt');
71 | $Apriori->saveAssociationRules('associationRules.txt');
72 | ?>
73 | ```
74 | ### Result
75 | #### Frequent Itemsets
76 | ```txt
77 | Time: 0 second(s)
78 | ===============================================================================
79 | {B,C} = 2
80 | {A,C,D} = 2
81 | ```
82 | #### Frequent Itemsets Array
83 | ```txt
84 | Array
85 | (
86 | [0] => Array
87 | (
88 | [sup] => 2
89 | [0] => B
90 | [1] => C
91 | )
92 |
93 | [1] => Array
94 | (
95 | [sup] => 2
96 | [0] => A
97 | [1] => C
98 | [2] => D
99 | )
100 |
101 | )
102 | ```
103 | #### Association Rules
104 | ```txt
105 | Time: 0 second(s)
106 | ===============================================================================
107 | B => C = 100%
108 | D => C = 100%
109 | D => A = 100%
110 | D => A,C = 100%
111 | C => A = 75%
112 | A => C = 100%
113 | A,D => C = 100%
114 | C,D => A = 100%
115 | ```
116 | #### Association Rules Array
117 | ```txt
118 | Array
119 | (
120 | [B] => Array
121 | (
122 | [C] => 100
123 | )
124 |
125 | [D] => Array
126 | (
127 | [C] => 100
128 | [A] => 100
129 | [A,C] => 100
130 | )
131 |
132 | [C] => Array
133 | (
134 | [A] => 75
135 | )
136 |
137 | [A] => Array
138 | (
139 | [C] => 100
140 | )
141 |
142 | [A,D] => Array
143 | (
144 | [C] => 100
145 | )
146 |
147 | [C,D] => Array
148 | (
149 | [A] => 100
150 | )
151 |
152 | )
153 | ```
154 |
--------------------------------------------------------------------------------
/class.apriori.php:
--------------------------------------------------------------------------------
1 | =2
27 | private $maxPhase = 20;
28 |
29 | private $fiTime = 0;
30 | private $arTime = 0;
31 |
32 | public function setDelimiter($char)
33 | {
34 | $this->delimiter = $char;
35 | }
36 |
37 | public function setMinSup($int)
38 | {
39 | $this->minSup = $int;
40 | }
41 |
42 | public function setMinConf($int)
43 | {
44 | $this->minConf = $int;
45 | }
46 |
47 | public function setMaxScan($int)
48 | {
49 | $this->maxPhase = $int;
50 | }
51 |
52 | public function getDelimiter()
53 | {
54 | return $this->delimiter;
55 | }
56 |
57 | public function getMinSup()
58 | {
59 | return $this->minSup;
60 | }
61 |
62 | public function getMinConf()
63 | {
64 | return $this->minConf;
65 | }
66 |
67 | public function getMaxScan()
68 | {
69 | return $this->maxPhase;
70 | }
71 |
72 | /**
73 | 1. جدول آیتمها را می سازد
74 | 2. کلید دسترسی به هر آیتم را تولید می کند
75 | 3. تمامی آیتمها و تکرار آنها را محاسبه می کند - سطح 1
76 | توجه: حداقل تکرار محاسبه میشود
77 | **/
78 | private function makeTable($db)
79 | {
80 | $table = array();
81 | $array = array();
82 | $counter = 1;
83 |
84 | if(!is_array($db))
85 | {
86 | $db = file($db);
87 | }
88 |
89 | $num = count($db);
90 | for($i=0; $i<$num; $i++)
91 | {
92 | $tmp = explode($this->delimiter, $db[$i]);
93 | $num1 = count($tmp);
94 | $x = array();
95 | for($j=0; $j<$num1; $j++)
96 | {
97 | $x = trim($tmp[$j]);
98 | if($x==='')
99 | {
100 | continue;
101 | }
102 |
103 | if(!isset($this->keys['v->k'][$x]))
104 | {
105 | $this->keys['v->k'][$x] = $counter;
106 | $this->keys['k->v'][$counter] = $x;
107 | $counter++;
108 | }
109 |
110 | if(!isset($array[$this->keys['v->k'][$x]]))
111 | {
112 | $array[$this->keys['v->k'][$x]] = 1;
113 | $this->allsups[$this->keys['v->k'][$x]] = 1;
114 | }
115 | else
116 | {
117 | $array[$this->keys['v->k'][$x]]++;
118 | $this->allsups[$this->keys['v->k'][$x]]++;
119 | }
120 |
121 | $table[$i][$this->keys['v->k'][$x]] = 1;
122 | }
123 | }
124 |
125 | $tmp = array();
126 | foreach($array as $item => $sup)
127 | {
128 | if($sup>=$this->minSup)
129 | {
130 |
131 | $tmp[] = array($item);
132 | }
133 | }
134 |
135 | $this->allthings[$this->phase] = $tmp;
136 | $this->table = $table;
137 | }
138 |
139 | /**
140 | 1. مقدار سوپریموم را با توجه به ورودی شناسه آیتمها شمارش می کند
141 | **/
142 | private function scan($arr, $implodeArr = '')
143 | {
144 | $cr = 0;
145 |
146 | if($implodeArr)
147 | {
148 | if(isset($this->allsups[$implodeArr]))
149 | {
150 | return $this->allsups[$implodeArr];
151 | }
152 | }
153 | else
154 | {
155 | sort($arr);
156 | $implodeArr = implode($this->delimiter, $arr);
157 | if(isset($this->allsups[$implodeArr]))
158 | {
159 | return $this->allsups[$implodeArr];
160 | }
161 | }
162 |
163 | $num = count($this->table);
164 | $num1 = count($arr);
165 | for($i=0; $i<$num; $i++)
166 | {
167 | $bool = true;
168 | for($j=0; $j<$num1; $j++)
169 | {
170 | if(!isset($this->table[$i][$arr[$j]]))
171 | {
172 | $bool = false;
173 | break;
174 | }
175 | }
176 |
177 | if($bool)
178 | {
179 | $cr++;
180 | }
181 | }
182 |
183 | $this->allsups[$implodeArr] = $cr;
184 |
185 | return $cr;
186 | }
187 |
188 | /**
189 | 1. ترکیب دو آرایه و حذف مقادیر اضافی
190 | **/
191 | private function combine($arr1, $arr2)
192 | {
193 | $result = array();
194 |
195 | $num = count($arr1);
196 | $num1 = count($arr2);
197 | for($i=0; $i<$num; $i++)
198 | {
199 | if(!isset($result['k'][$arr1[$i]]))
200 | {
201 | $result['v'][] = $arr1[$i];
202 | $result['k'][$arr1[$i]] = 1;
203 | }
204 | }
205 |
206 | for($i=0; $i<$num1; $i++)
207 | {
208 | if(!isset($result['k'][$arr2[$i]]))
209 | {
210 | $result['v'][] = $arr2[$i];
211 | $result['k'][$arr2[$i]] = 1;
212 | }
213 | }
214 |
215 | return $result['v'];
216 | }
217 |
218 | /**
219 | 1. نام آیتم را با توجه به شناسه آیتم یا آیتمها بر می گرداند
220 | {1,2,3,4} => {A,B,C,D}
221 | **/
222 | private function realName($arr)
223 | {
224 | $result = '';
225 |
226 | $num = count($arr);
227 | for($j=0; $j<$num; $j++)
228 | {
229 | if($j)
230 | {
231 | $result .= $this->delimiter;
232 | }
233 |
234 | $result .= $this->keys['k->v'][$arr[$j]];
235 | }
236 |
237 | return $result;
238 | }
239 |
240 | //1-2=>2-3 : false
241 | //1-2=>5-6 : true
242 | private function checkRule($a, $b)
243 | {
244 | $a_num = count($a);
245 | $b_num = count($b);
246 | for($i=0; $i<$a_num; $i++)
247 | {
248 | for($j=0; $j<$b_num; $j++)
249 | {
250 | if($a[$i]==$b[$j])
251 | {
252 | return false;
253 | }
254 | }
255 | }
256 |
257 | return true;
258 | }
259 |
260 | private function confidence($sup_a, $sup_ab)
261 | {
262 | return round(($sup_ab / $sup_a) * 100, 2);
263 | }
264 |
265 | private function subsets($items)
266 | {
267 | $result = array();
268 | $num = count($items);
269 | $members = pow(2, $num);
270 | for($i=0; $i<$members; $i++)
271 | {
272 | $b = sprintf("%0".$num."b", $i);
273 | $tmp = array();
274 | for($j=0; $j<$num; $j++)
275 | {
276 | if($b[$j]=='1')
277 | {
278 | $tmp[] = $items[$j];
279 | }
280 | }
281 |
282 | if($tmp)
283 | {
284 | sort($tmp);
285 | $result[] = $tmp;
286 | }
287 | }
288 |
289 | return $result;
290 | }
291 |
292 | /**
293 | 1. آیتم ستهای تکراری را بر می گرداند
294 | **/
295 | private function freqItemsets($db)
296 | {
297 | $this->fiTime = $this->startTimer();
298 | $this->makeTable($db);
299 | while(1)
300 | {
301 | if($this->phase>=$this->maxPhase)
302 | {
303 | break;
304 | }
305 |
306 | $num = count($this->allthings[$this->phase]);
307 | $cr = 0;
308 | for($i=0; $i<$num; $i++)
309 | {
310 | for($j=$i; $j<$num; $j++)
311 | {
312 | if($i==$j)
313 | {
314 | continue;
315 | }
316 |
317 | $item = $this->combine($this->allthings[$this->phase][$i], $this->allthings[$this->phase][$j]);
318 | sort($item);
319 | $implodeArr = implode($this->delimiter, $item);
320 | if(!isset($this->freqItmsts[$implodeArr]))
321 | {
322 | $sup = $this->scan($item, $implodeArr);
323 | if($sup>=$this->minSup)
324 | {
325 | $this->allthings[$this->phase+1][] = $item;
326 | $this->freqItmsts[$implodeArr] = 1;
327 | $cr++;
328 | }
329 | }
330 | }
331 | }
332 |
333 | if($cr<=1)
334 | {
335 | break;
336 | }
337 |
338 | $this->phase++;
339 | }
340 |
341 | //زیر مجموعه های مربوط به مجموعه های بزرگتر را حذف می کند
342 | foreach($this->freqItmsts as $k => $v)
343 | {
344 | $arr = explode($this->delimiter, $k);
345 | $num = count($arr);
346 | if($num>=3)
347 | {
348 | $subsets = $this->subsets($arr);
349 | $num1 = count($subsets);
350 | for($i=0; $i<$num1; $i++)
351 | {
352 | if(count($subsets[$i])<$num)
353 | {
354 | unset($this->freqItmsts[implode($this->delimiter, $subsets[$i])]);
355 | }
356 | else
357 | {
358 | break;
359 | }
360 | }
361 | }
362 | }
363 |
364 | $this->fiTime = $this->stopTimer($this->fiTime);
365 | }
366 |
367 | /**
368 | 1. قوانین نهایی را با توجه به مقدار حداقل کانفیندس محاسبه می کند
369 | **/
370 | public function process($db)
371 | {
372 | $checked = $result = array();
373 |
374 | $this->freqItemsets($db);
375 | $this->arTime = $this->startTimer();
376 |
377 | foreach($this->freqItmsts as $k => $v)
378 | {
379 | $arr = explode($this->delimiter, $k);
380 | $subsets = $this->subsets($arr);
381 | $num = count($subsets);
382 | for($i=0; $i<$num; $i++)
383 | {
384 | for($j=0; $j<$num; $j++)
385 | {
386 | if($this->checkRule($subsets[$i], $subsets[$j]))
387 | {
388 | $n1 = $this->realName($subsets[$i]);
389 | $n2 = $this->realName($subsets[$j]);
390 |
391 | $scan = $this->scan($this->combine($subsets[$i], $subsets[$j]));
392 | $c1 = $this->confidence($this->scan($subsets[$i]), $scan);
393 | $c2 = $this->confidence($this->scan($subsets[$j]), $scan);
394 |
395 | if($c1>=$this->minConf)
396 | {
397 | $result[$n1][$n2] = $c1;
398 | }
399 |
400 | if($c2>=$this->minConf)
401 | {
402 | $result[$n2][$n1] = $c2;
403 | }
404 |
405 | $checked[$n1.$this->delimiter.$n2] = 1;
406 | $checked[$n2.$this->delimiter.$n1] = 1;
407 | }
408 | }
409 | }
410 | }
411 |
412 | $this->arTime = $this->stopTimer($this->arTime);
413 |
414 | return $this->rules = $result;
415 | }
416 |
417 | public function printFreqItemsets()
418 | {
419 | echo 'Time: '.$this->fiTime.' second(s)
===============================================================================
';
420 |
421 | foreach($this->freqItmsts as $k => $v)
422 | {
423 | $tmp = '';
424 | $tmp1 = '';
425 | $k = explode($this->delimiter, $k);
426 | $num = count($k);
427 | for($i=0; $i<$num; $i++)
428 | {
429 | if($i)
430 | {
431 | $tmp .= $this->delimiter.$this->realName($k[$i]);
432 | $tmp1 .= $this->delimiter.$k[$i];
433 | }
434 | else
435 | {
436 | $tmp = $this->realName($k[$i]);
437 | $tmp1 = $k[$i];
438 | }
439 | }
440 |
441 | echo '{'.$tmp.'} = '.$this->allsups[$tmp1].'
';
442 | }
443 | }
444 |
445 | public function saveFreqItemsets($filename)
446 | {
447 | $content = '';
448 |
449 | foreach($this->freqItmsts as $k => $v)
450 | {
451 | $tmp = '';
452 | $tmp1 = '';
453 | $k = explode($this->delimiter, $k);
454 | $num = count($k);
455 | for($i=0; $i<$num; $i++)
456 | {
457 | if($i)
458 | {
459 | $tmp .= $this->delimiter.$this->realName($k[$i]);
460 | $tmp1 .= $this->delimiter.$k[$i];
461 | }
462 | else
463 | {
464 | $tmp = $this->realName($k[$i]);
465 | $tmp1 = $k[$i];
466 | }
467 | }
468 |
469 | $content .= '{'.$tmp.'} = '.$this->allsups[$tmp1]."\n";
470 | }
471 |
472 | file_put_contents($filename, $content);
473 | }
474 |
475 | public function getFreqItemsets()
476 | {
477 | $result = array();
478 |
479 | foreach($this->freqItmsts as $k => $v)
480 | {
481 | $tmp = array();
482 | $tmp['sup'] = $this->allsups[$k];
483 | $k = explode($this->delimiter, $k);
484 | $num = count($k);
485 | for($i=0; $i<$num; $i++)
486 | {
487 | $tmp[] = $this->realName($k[$i]);
488 | }
489 |
490 | $result[] = $tmp;
491 | }
492 |
493 | return $result;
494 | }
495 |
496 | public function printAssociationRules()
497 | {
498 | echo 'Time: '.$this->arTime.' second(s)
===============================================================================
';
499 |
500 | foreach($this->rules as $a => $arr)
501 | {
502 | foreach($arr as $b => $conf)
503 | {
504 | echo "$a => $b = $conf%
";
505 | }
506 | }
507 | }
508 |
509 | public function saveAssociationRules($filename)
510 | {
511 | $content = '';
512 |
513 | foreach($this->rules as $a => $arr)
514 | {
515 | foreach($arr as $b => $conf)
516 | {
517 | $content .= "$a => $b = $conf%\n";
518 | }
519 | }
520 |
521 | file_put_contents($filename, $content);
522 | }
523 |
524 | public function getAssociationRules()
525 | {
526 | return $this->rules;
527 | }
528 |
529 | private function startTimer()
530 | {
531 | list($usec, $sec) = explode(" ", microtime());
532 | return ((float)$usec + (float)$sec);
533 | }
534 |
535 | private function stopTimer($start, $round=2)
536 | {
537 | $endtime = $this->startTimer()-$start;
538 | $round = pow(10, $round);
539 | return round($endtime*$round)/$round;
540 | }
541 | }
542 | ?>
543 |
--------------------------------------------------------------------------------
/dataset.txt:
--------------------------------------------------------------------------------
1 | A, B, C, D
2 | A, D, C
3 | B, C
4 | A, E, C
5 |
--------------------------------------------------------------------------------
/example.php:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Apriori Alghoritm
6 |
7 |
8 | setMaxScan(20); //Scan 2, 3, ...
14 | $Apriori->setMinSup(2); //Minimum support 1, 2, 3, ...
15 | $Apriori->setMinConf(75); //Minimum confidence - Percent 1, 2, ..., 100
16 | $Apriori->setDelimiter(','); //Delimiter
17 |
18 | /*
19 | Use Array:
20 | $dataset = array();
21 | $dataset[] = array('A', 'B', 'C', 'D');
22 | $dataset[] = array('A', 'D', 'C');
23 | $dataset[] = array('B', 'C');
24 | $dataset[] = array('A', 'E', 'C');
25 | $Apriori->process($dataset);
26 | */
27 | $Apriori->process('dataset.txt');
28 |
29 | //Frequent Itemsets
30 | echo 'Frequent Itemsets
';
31 | $Apriori->printFreqItemsets();
32 |
33 | echo 'Frequent Itemsets Array
';
34 | print_r($Apriori->getFreqItemsets());
35 |
36 | //Association Rules
37 | echo 'Association Rules
';
38 | $Apriori->printAssociationRules();
39 |
40 | echo 'Association Rules Array
';
41 | print_r($Apriori->getAssociationRules());
42 |
43 | //Save to file
44 | $Apriori->saveFreqItemsets('freqItemsets.txt');
45 | $Apriori->saveAssociationRules('associationRules.txt');
46 | ?>
47 |
48 |
49 |
--------------------------------------------------------------------------------