├── .editorconfig ├── .gitignore ├── LICENSE ├── Readme.md ├── composer.json ├── example.xlsx └── src ├── C45.php ├── Calculator ├── AbstractCalculator.php ├── GainCalculator.php ├── GainRatioCalculator.php └── SplitInfoCalculator.php ├── DataInput ├── DataInput.php └── DataInputInterface.php └── TreeNode.php /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | 4 | # Unix-style newlines with a newline ending every file 5 | [*] 6 | end_of_line = lf 7 | insert_final_newline = false 8 | 9 | # Matches multiple files with brace expansion notation 10 | # Set default charset 11 | [*] 12 | charset = utf-8 13 | 14 | # Tab indentation (no size specified) 15 | indent_style = tab -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | composer.lock 2 | vendor/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Juliardi and Agung Dirgantara 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # C45 Algorithm - PHP Language 2 | 3 | > [**Use Example File**](example.xlsx) 4 | 5 | ## Installation 6 | 7 | The recommended way to install the C45 PHP library is through [Composer](https://getcomposer.org) : 8 | 9 | ```bash 10 | composer require medansoftware/c45-algorithm-php 11 | ``` 12 | 13 | ## Manual Installation 14 | 15 | ```bash 16 | composer dump-autoload 17 | ``` 18 | 19 | ## Setup 20 | 21 | ```php 22 | $c45 = new Algorithm\C45('example.xlsx', 'PLAY'); 23 | $initialize = $c45->initialize(); // initialize 24 | $buildTree = $initialize->buildTree(); // build tree 25 | 26 | $arrayTree = $buildTree->toArray(); // set to array 27 | $stringTree = $buildTree->toString(); // set to string 28 | 29 | echo "
"; 30 | print_r ($arrayTree); 31 | echo ""; 32 | 33 | echo $stringTree; 34 | ``` 35 | or 36 | 37 | ```php 38 | $c45 = new Algorithm\C45(); 39 | $c45->loadFile('example.xlsx'); // load example file 40 | $c45->setTargetAttribute('PLAY'); // set target attribute 41 | 42 | $initialize = $c45->initialize(); // initialize 43 | $buildTree = $initialize->buildTree(); // build tree 44 | 45 | $arrayTree = $buildTree->toArray(); // set to array 46 | $stringTree = $buildTree->toString(); // set to string 47 | 48 | echo "
"; 49 | print_r ($arrayTree); 50 | echo ""; 51 | 52 | echo $stringTree; 53 | ``` 54 | 55 | ## Other Examples 56 | 57 | ```php 58 | $c45 = new Algorithm\C45(); 59 | $c45->loadFile('example.xlsx')->setTargetAttribute('PLAY')->initialize(); 60 | 61 | echo "
"; 62 | print_r ($c45->buildTree()->toString()); // print as string 63 | echo ""; 64 | 65 | echo "
"; 66 | print_r ($c45->buildTree()->toJson()); // print as JSON 67 | echo ""; 68 | 69 | echo "
"; 70 | print_r ($c45->buildTree()->toArray()); // print as array 71 | echo ""; 72 | ``` 73 | 74 | ## Initialize Data from Array 75 | 76 | ```php 77 | $c45 = new Algorithm\C45(); 78 | $input = new Algorithm\C45\DataInput; 79 | $data = array( 80 | array( 81 | "OUTLOOK" => "Sunny", 82 | "TEMPERATURE" => "Hot", 83 | "HUMIDITY" => "High", 84 | "WINDY" => "False", 85 | "PLAY" => "No" 86 | ), 87 | array( 88 | "OUTLOOK" => "Sunny", 89 | "TEMPERATURE" => "Hot", 90 | "HUMIDITY" => "High", 91 | "WINDY" => "True", 92 | "PLAY" => "No" 93 | ), 94 | array( 95 | "OUTLOOK" => "Cloudy", 96 | "TEMPERATURE" => "Hot", 97 | "HUMIDITY" => "High", 98 | "WINDY" => "False", 99 | "PLAY" => "Yes" 100 | ), 101 | array( 102 | "OUTLOOK" => "Rainy", 103 | "TEMPERATURE" => "Mild", 104 | "HUMIDITY" => "High", 105 | "WINDY" => "False", 106 | "PLAY" => "Yes" 107 | ), 108 | array( 109 | "OUTLOOK" => "Rainy", 110 | "TEMPERATURE" => "Cool", 111 | "HUMIDITY" => "Normal", 112 | "WINDY" => "False", 113 | "PLAY" => "Yes" 114 | ), 115 | array( 116 | "OUTLOOK" => "Rainy", 117 | "TEMPERATURE" => "Cool", 118 | "HUMIDITY" => "Normal", 119 | "WINDY" => "True", 120 | "PLAY" => "No" 121 | ), 122 | array( 123 | "OUTLOOK" => "Cloudy", 124 | "TEMPERATURE" => "Cool", 125 | "HUMIDITY" => "Normal", 126 | "WINDY" => "True", 127 | "PLAY" => "Yes" 128 | ), 129 | array( 130 | "OUTLOOK" => "Sunny", 131 | "TEMPERATURE" => "Mild", 132 | "HUMIDITY" => "High", 133 | "WINDY" => "False", 134 | "PLAY" => "No" 135 | ), 136 | array( 137 | "OUTLOOK" => "Sunny", 138 | "TEMPERATURE" => "Cool", 139 | "HUMIDITY" => "Normal", 140 | "WINDY" => "False", 141 | "PLAY" => "Yes" 142 | ), 143 | array( 144 | "OUTLOOK" => "Rainy", 145 | "TEMPERATURE" => "Mild", 146 | "HUMIDITY" => "Normal", 147 | "WINDY" => "False", 148 | "PLAY" => "Yes" 149 | ), 150 | array( 151 | "OUTLOOK" => "Sunny", 152 | "TEMPERATURE" => "Mild", 153 | "HUMIDITY" => "Normal", 154 | "WINDY" => "True", 155 | "PLAY" => "Yes" 156 | ), 157 | array( 158 | "OUTLOOK" => "Cloudy", 159 | "TEMPERATURE" => "Mild", 160 | "HUMIDITY" => "High", 161 | "WINDY" => "True", 162 | "PLAY" => "Yes" 163 | ), 164 | array( 165 | "OUTLOOK" => "Cloudy", 166 | "TEMPERATURE" => "Hot", 167 | "HUMIDITY" => "Normal", 168 | "WINDY" => "False", 169 | "PLAY" => "Yes" 170 | ), 171 | array( 172 | "OUTLOOK" => "Rainy", 173 | "TEMPERATURE" => "Mild", 174 | "HUMIDITY" => "High", 175 | "WINDY" => "True", 176 | "PLAY" => "No" 177 | ) 178 | ); 179 | 180 | // Initialize Data 181 | $input->setData($data); // Set data from array 182 | $input->setAttributes(array('OUTLOOK', 'TEMPERATURE', 'HUMIDITY', 'WINDY', 'PLAY')); // Set attributes of data 183 | 184 | // Initialize C4.5 185 | $c45->c45 = $input; // Set input data 186 | $c45->setTargetAttribute('PLAY'); // Set target attribute 187 | $initialize = $c45->initialize(); // initialize 188 | 189 | // Build Output 190 | $buildTree = $initialize->buildTree(); // Build tree 191 | $arrayTree = $buildTree->toArray(); // Set to array 192 | $stringTree = $buildTree->toString(); // Set to string 193 | 194 | echo "
"; 195 | print_r ($arrayTree); 196 | echo ""; 197 | 198 | echo $stringTree; 199 | ``` 200 | 201 | ```php 202 | 203 | $new_data = array( 204 | 'OUTLOOK' => 'Sunny', 205 | 'TEMPERATURE' => 'Hot', 206 | 'HUMIDITY' => 'High', 207 | 'WINDY' => FALSE 208 | ); 209 | 210 | echo $c45->initialize()->buildTree()->classify($new_data); // print "No" 211 | ``` 212 | 213 | [Refrence](https://github.com/juliardi/C45) 214 | 215 |
Made with ❤️ + ☕ ~ Agung Dirgantara
216 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "medansoftware/c45-algorithm-php", 3 | "type": "library", 4 | "license": "MIT", 5 | "authors": [ 6 | { 7 | "name": "Agung Dirgantara", 8 | "email": "agungmasda29@gmail.com" 9 | } 10 | ], 11 | "require": { 12 | "php": ">=5.5", 13 | "phpoffice/phpspreadsheet": "^1.9" 14 | }, 15 | "autoload": { 16 | "classmap": [ 17 | "src" 18 | ] 19 | } 20 | } -------------------------------------------------------------------------------- /example.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/medansoftware/C45-Algorithm-PHP/e0ca25b3e91a67741e72f73162425f0db65ce5fe/example.xlsx -------------------------------------------------------------------------------- /src/C45.php: -------------------------------------------------------------------------------- 1 | c45 = new \Algorithm\C45\DataInput($file); 71 | $this->setTargetAttribute($target_attribute); 72 | } 73 | 74 | /** 75 | * Load file 76 | * 77 | * @param string $file 78 | * @return Algorithm\C45 79 | */ 80 | public function loadFile($file) 81 | { 82 | $this->c45 = new \Algorithm\C45\DataInput($file); 83 | return $this; 84 | } 85 | 86 | /** 87 | * Set target attribute 88 | * 89 | * @param string $target_attribute 90 | * @return Algorithm\C45 91 | */ 92 | public function setTargetAttribute($target_attribute) 93 | { 94 | if (!empty($target_attribute)) 95 | { 96 | $attributes = $this->c45->getAttributes(); 97 | 98 | if (in_array($target_attribute, $attributes)) 99 | { 100 | $this->target_attribute = $target_attribute; 101 | } 102 | else 103 | { 104 | $this->target_attribute = end($attributes); 105 | } 106 | } 107 | 108 | return $this; 109 | } 110 | 111 | /** 112 | * Initialize class 113 | * 114 | * @return object Algorithm\C45 115 | */ 116 | public function initialize() 117 | { 118 | $this->target_values = $this->getAttributeValues($this->target_attribute); 119 | 120 | foreach ($this->target_values as $value) 121 | { 122 | $criteria[$this->target_attribute] = $value; 123 | $this->targetCount[$value] = $this->c45->countByCriteria($criteria); 124 | } 125 | 126 | $this->gainCalculator = new GainCalculator($this->c45, $this->target_attribute); 127 | $this->splitInfoCalculator = new SplitInfoCalculator($this->c45, $this->target_attribute); 128 | $this->gainRatioCalculator = new GainRatioCalculator($this->c45, $this->target_attribute); 129 | 130 | return $this; 131 | } 132 | 133 | /** 134 | * Build decision tree 135 | * 136 | * @param array $criteria 137 | * @return TreeNode 138 | */ 139 | public function buildTree($criteria = array()) 140 | { 141 | $tree_node = new TreeNode; 142 | 143 | $check_class = $this->isBelongToOneClass($criteria); 144 | 145 | if ($check_class['return']) 146 | { 147 | $tree_node->setAttribute($this->target_attribute); 148 | $tree_node->addChild('result', $check_class['class']); 149 | $tree_node->setIsLeaf(true); 150 | return $tree_node; 151 | } 152 | 153 | $split_criterion = $this->calculateSplitCriterion($criteria); 154 | $best_attribute_name = $this->getBiggestArrayAttribute($split_criterion); 155 | $best_attribute_values = $this->getAttributeValues($best_attribute_name); 156 | 157 | $tree_node->setAttribute($best_attribute_name); 158 | unset($split_criterion[$best_attribute_name]); 159 | 160 | foreach ($best_attribute_values as $value) 161 | { 162 | $criteria[$best_attribute_name] = $value; 163 | $targetCount = $this->countTargetByCriteria($criteria); 164 | $tree_node->addClassesCount($value, $targetCount); 165 | 166 | if (array_sum($targetCount) == 0) 167 | { 168 | $target_count2 = $this->countTargetByCriteria([$best_attribute_name => $value]); 169 | $biggest_class = $this->getBiggestArrayAttribute($target_count2); 170 | 171 | $child = new TreeNode(); 172 | $child->setParent($tree_node); 173 | $child->setAttribute($this->target_attribute); 174 | $child->addChild('result', $biggest_class); 175 | $child->setIsLeaf(true); 176 | 177 | $tree_node->addChild($value, $child); 178 | } 179 | elseif (!empty($split_criterion)) 180 | { 181 | $child = $this->buildTree($criteria); 182 | $child->setParent($tree_node); 183 | $tree_node->addChild($value, $child); 184 | } 185 | else 186 | { 187 | $class_probability = $this->calculateClassProbability($criteria); 188 | $biggest_class = $this->getBiggestArrayAttribute($class_probability); 189 | 190 | $child = new TreeNode(); 191 | $child->setParent($tree_node); 192 | $child->setAttribute($this->target_attribute); 193 | $child->addChild('result', $biggest_class); 194 | $child->setIsLeaf(true); 195 | 196 | $tree_node->addChild($value, $child); 197 | } 198 | } 199 | 200 | return $tree_node; 201 | } 202 | 203 | public function calculateSplitCriterion($criteria = array()) 204 | { 205 | $gain = $this->gainCalculator->calculateGainAllAttributes($criteria); 206 | 207 | if ($this->split_criterion == $this->split_gain) 208 | { 209 | return $gain; 210 | } 211 | else 212 | { 213 | $split_info = $this->splitInfoCalculator->calculateSplitInfoAllAttributes($criteria); 214 | $gain_ratio = $this->gainRatioCalculator->calculateGainRatio($gain, $split_info); 215 | 216 | return $gain_ratio; 217 | } 218 | } 219 | 220 | public function calculateClassProbability($criteria = array()) 221 | { 222 | $count_target = $this->countTargetByCriteria($criteria); 223 | $total = array_sum($count_target); 224 | $class_probability = []; 225 | 226 | foreach ($this->target_values as $value) 227 | { 228 | $class_probability[$value] = $this->classProbability($count_target[$value], $total); 229 | } 230 | 231 | return $class_probability; 232 | } 233 | 234 | public function classProbability($count_target_class, $total) 235 | { 236 | if ($total == 0) 237 | { 238 | return 0; 239 | } 240 | 241 | return $count_target_class / $total; 242 | } 243 | 244 | public function isBelongToOneClass($criteria = array()) 245 | { 246 | $countAll = $this->c45->countByCriteria($criteria); 247 | 248 | foreach ($this->target_values as $value) 249 | { 250 | $criteria[$this->target_attribute] = $value; 251 | $count_by_target = $this->c45->countByCriteria($criteria); 252 | unset($criteria[$this->target_attribute]); 253 | if ($countAll === $count_by_target) 254 | { 255 | return [ 256 | 'return' => true, 257 | 'class' => $value, 258 | ]; 259 | } 260 | } 261 | 262 | return ['return' => false]; 263 | } 264 | 265 | public function getBiggestArrayAttribute($array = array()) 266 | { 267 | array_multisort($array, SORT_DESC); 268 | reset($array); 269 | $key = key($array); 270 | 271 | return $key; 272 | } 273 | 274 | public function countTargetByCriteria($criteria = array()) 275 | { 276 | $target_count = []; 277 | 278 | foreach ($this->target_values as $value) 279 | { 280 | $criteria[$this->target_attribute] = $value; 281 | $target_count[$value] = $this->c45->countByCriteria($criteria); 282 | } 283 | 284 | unset($criteria[$this->target_attribute]); 285 | 286 | return $target_count; 287 | } 288 | 289 | public function getAttributeValues($attribute_name) 290 | { 291 | return $this->c45->getClasses([$attribute_name])[$attribute_name]; 292 | } 293 | } -------------------------------------------------------------------------------- /src/Calculator/AbstractCalculator.php: -------------------------------------------------------------------------------- 1 | data = $data; 17 | $this->setTargetAttribute($targetAttribute); 18 | } 19 | 20 | public function setTargetAttribute($targetAttributeName) 21 | { 22 | $this->targetAttribute = $targetAttributeName; 23 | $this->targetValues = $this->getAttributeValues($this->targetAttribute); 24 | 25 | foreach ($this->targetValues as $value) { 26 | $criteria[$this->targetAttribute] = $value; 27 | $this->targetCount[$value] = $this->data->countByCriteria($criteria); 28 | } 29 | } 30 | 31 | protected function getAttributeValues($attributeName) 32 | { 33 | return $this->data->getClasses([$attributeName])[$attributeName]; 34 | } 35 | 36 | protected function getAttributeNames($criteria) 37 | { 38 | $attributeNames = $this->data->getAttributes(); 39 | 40 | foreach ($criteria as $key => $value) { 41 | $idx = array_search($key, $attributeNames); 42 | if ($idx !== false) { 43 | unset($attributeNames[$idx]); 44 | } 45 | } 46 | 47 | return $attributeNames; 48 | } 49 | } -------------------------------------------------------------------------------- /src/Calculator/GainCalculator.php: -------------------------------------------------------------------------------- 1 | getAttributeNames($criteria); 10 | 11 | $gain = []; 12 | 13 | foreach ($attributeNames as $value) 14 | { 15 | if ($value != $this->targetAttribute) 16 | { 17 | $gain[$value] = $this->calculateGainOfAttribute($value, $criteria); 18 | } 19 | } 20 | 21 | return $gain; 22 | } 23 | 24 | public function calculateGainOfAttribute($attributeName, $criteria = []) 25 | { 26 | $gain = 0; 27 | $attributeCount = []; 28 | $attributeValues = $this->getAttributeValues($attributeName); 29 | 30 | foreach ($attributeValues as $value) 31 | { 32 | $criteria[$attributeName] = $value; 33 | foreach ($this->targetValues as $targetValue) 34 | { 35 | $criteria[$this->targetAttribute] = $targetValue; 36 | $attributeCount[$value][$targetValue] = $this->data->countByCriteria($criteria); 37 | } 38 | } 39 | 40 | $gain = $this->gain($this->targetCount, $attributeCount); 41 | 42 | return $gain; 43 | } 44 | 45 | private function gain($classifier_values, $values) 46 | { 47 | $entropy_all = $this->entropy($classifier_values); 48 | $total_records = 0; 49 | 50 | foreach ($values as $sub_values) 51 | { 52 | $total_records += array_sum($sub_values); 53 | } 54 | 55 | $gain = 0; 56 | 57 | foreach ($values as $sub_values) 58 | { 59 | try 60 | { 61 | $sub_total_values = array_sum($sub_values); 62 | $entropy = $this->entropy($sub_values); 63 | $gain += ($sub_total_values / $total_records) * $entropy; 64 | } 65 | catch (\Exception $e) 66 | { 67 | error_log($e->getMessage()); 68 | error_log($e->getTraceAsString()); 69 | } 70 | } 71 | 72 | $gain = $entropy_all - $gain; 73 | 74 | return $gain; 75 | } 76 | 77 | private function entropy(array $values) 78 | { 79 | $result = 0; 80 | $sum = array_sum($values); 81 | 82 | foreach ($values as $value) 83 | { 84 | if ($value > 0) 85 | { 86 | $proportion = $value / $sum; 87 | $result += -($proportion * log($proportion, 2)); 88 | } 89 | } 90 | 91 | return $result; 92 | } 93 | } -------------------------------------------------------------------------------- /src/Calculator/GainRatioCalculator.php: -------------------------------------------------------------------------------- 1 | $value) 12 | { 13 | if ($splitInfo[$key] == 0) 14 | { 15 | $gainRatio[$key] = 0; 16 | } 17 | else 18 | { 19 | $gainRatio[$key] = $value / $splitInfo[$key]; 20 | } 21 | } 22 | 23 | return $gainRatio; 24 | } 25 | } -------------------------------------------------------------------------------- /src/Calculator/SplitInfoCalculator.php: -------------------------------------------------------------------------------- 1 | getAttributeNames($criteria); 10 | 11 | $splitInfo = []; 12 | 13 | foreach ($attributeNames as $value) 14 | { 15 | $splitInfo[$value] = $this->calculateSplitInfoOfAttribute($value, $criteria); 16 | } 17 | 18 | return $splitInfo; 19 | } 20 | 21 | public function calculateSplitInfoOfAttribute($attributeName, $criteria = []) 22 | { 23 | $attributeCount = []; 24 | 25 | $attributeValues = $this->getAttributeValues($attributeName); 26 | 27 | foreach ($attributeValues as $value) 28 | { 29 | $criteria[$attributeName] = $value; 30 | $attributeCount[$value] = $this->data->countByCriteria($criteria); 31 | } 32 | 33 | $splitInfo = $this->splitInfo($attributeCount); 34 | return $this->splitInfo($attributeCount); 35 | } 36 | 37 | private function splitInfo(array $values) 38 | { 39 | $result = 0; 40 | $sum = array_sum($values); 41 | 42 | foreach ($values as $value) 43 | { 44 | if ($value > 0) 45 | { 46 | $proportion = $value / $sum; 47 | $result += -1 * ($proportion * log($proportion, 2)); 48 | } 49 | } 50 | 51 | return $result; 52 | } 53 | } -------------------------------------------------------------------------------- /src/DataInput/DataInput.php: -------------------------------------------------------------------------------- 1 | file = $file; 23 | $this->parseFile(); 24 | $this->populateClasses(); 25 | } 26 | } 27 | 28 | /** 29 | * Set file 30 | * 31 | * @param string $path_to_file 32 | */ 33 | public function setFile($path_to_file) 34 | { 35 | $this->file = $path_to_file; 36 | return $this; 37 | } 38 | 39 | /** 40 | * Read file 41 | * 42 | * @param mixed $spreadsheet instance of \PhpOffice\PhpSpreadsheet\Spreadsheet or null 43 | * @param integer $sheet set current sheet 44 | * @return array 45 | */ 46 | public function readFile($spreadsheet = null, $sheet = 0) 47 | { 48 | if (!empty($this->file) OR !empty($spreadsheet)) 49 | { 50 | if (empty($spreadsheet)) 51 | { 52 | $spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($this->file); 53 | } 54 | 55 | return $spreadsheet->setActiveSheetIndex($sheet)->toArray(); 56 | } 57 | else 58 | { 59 | throw new \Exception('File not set'); 60 | } 61 | } 62 | 63 | /** 64 | * Parse file 65 | * 66 | * @param mixed $Spreadsheet instance of \PhpOffice\PhpSpreadsheet\Spreadsheet or null 67 | * @param integer $sheet set current sheet 68 | * @return array 69 | */ 70 | public function parseFile($spreadsheet = null, $sheet = 0) 71 | { 72 | if (!empty($this->file) OR !empty($spreadsheet)) 73 | { 74 | if (empty($spreadsheet)) 75 | { 76 | $spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($this->file); 77 | } 78 | 79 | $data = $spreadsheet->setActiveSheetIndex($sheet)->toArray(); 80 | 81 | $result = array(); 82 | 83 | if ($data) 84 | { 85 | if (empty($this->attributes)) 86 | { 87 | $this->attributes = $data[0]; 88 | array_shift($data); 89 | } 90 | 91 | foreach ($data as $value) 92 | { 93 | $temp = array(); 94 | 95 | for ($i = 0; $i < count($this->attributes); $i++) 96 | { 97 | $value[$i] = (is_bool($value[$i]))?($value[$i])?'True':'False':$value[$i]; 98 | $attribute_name = $this->attributes[$i]; 99 | 100 | $temp[$attribute_name] = trim($value[$i]); 101 | } 102 | 103 | $result[] = $temp; 104 | } 105 | } 106 | 107 | $this->data = $result; 108 | 109 | return $result; 110 | } 111 | } 112 | 113 | /** 114 | * {@inheritdoc} 115 | */ 116 | public function setAttributes($attributes = array()) 117 | { 118 | $this->attributes = $attributes; 119 | } 120 | 121 | /** 122 | * {@inheritdoc} 123 | */ 124 | public function hasAttribute($attribute) 125 | { 126 | return array_search($attribute, $this->attributes) !== false; 127 | } 128 | 129 | /** 130 | * {@inheritdoc} 131 | */ 132 | public function getAttributes() 133 | { 134 | return $this->attributes; 135 | } 136 | 137 | /** 138 | * {@inheritdoc} 139 | */ 140 | public function setData($data = array()) 141 | { 142 | $this->data = $data; 143 | $this->populateClasses(); 144 | } 145 | 146 | /** 147 | * {@inheritdoc} 148 | */ 149 | public function getData($start = 0, $length = null) 150 | { 151 | if ($length == null) 152 | { 153 | return $this->data; 154 | } 155 | else 156 | { 157 | return array_slice($this->data, $start, $length); 158 | } 159 | } 160 | 161 | /** 162 | * {@inheritdoc} 163 | */ 164 | public function getClasses($attributes = array()) 165 | { 166 | if (!empty($attributes)) 167 | { 168 | $result = []; 169 | 170 | foreach ($attributes as $value) 171 | { 172 | if ($this->hasAttribute($value)) 173 | { 174 | $result[$value] = $this->classes[$value]; 175 | } 176 | } 177 | 178 | return $result; 179 | } 180 | else 181 | { 182 | return $this->classes; 183 | } 184 | } 185 | 186 | /** 187 | * Populate classes 188 | */ 189 | protected function populateClasses() 190 | { 191 | if (is_array($this->data)) 192 | { 193 | $this->classes = []; 194 | 195 | for ($i = 0; $i < count($this->data); ++$i) 196 | { 197 | $data = $this->data[$i]; 198 | 199 | foreach ($data as $key => $value) 200 | { 201 | if (array_key_exists($key, $this->classes)) 202 | { 203 | if (array_search($value, $this->classes[$key]) === false) 204 | { 205 | array_push($this->classes[$key], $value); 206 | } 207 | } 208 | else 209 | { 210 | $this->classes[$key] = [$value]; 211 | } 212 | } 213 | } 214 | } 215 | } 216 | 217 | /** 218 | * {@inheritdoc} 219 | */ 220 | public function getByCriteria($criteria = array(), $length = null) 221 | { 222 | $result = []; 223 | 224 | foreach ($this->data as $row) 225 | { 226 | if ($length === 0) 227 | { 228 | break; 229 | } 230 | if ($this->isMatch($row, $criteria)) 231 | { 232 | array_push($result, $row); 233 | --$length; 234 | } 235 | } 236 | 237 | return $result; 238 | } 239 | 240 | /** 241 | * {@inheritdoc} 242 | */ 243 | public function countByCriteria($criteria = array()) 244 | { 245 | $result = 0; 246 | 247 | foreach ($this->data as $row) 248 | { 249 | if ($this->isMatch($row, $criteria)) 250 | { 251 | ++$result; 252 | } 253 | } 254 | 255 | return $result; 256 | } 257 | 258 | /** 259 | * Checks whether $data matched $criteria. 260 | * 261 | * @param array $data 262 | * @param array $criteria 263 | * @return boolean 264 | */ 265 | private function isMatch($data, $criteria) 266 | { 267 | $result = true; 268 | 269 | foreach ($criteria as $key => $value) 270 | { 271 | if ($this->hasAttribute($key)) 272 | { 273 | if (is_array($data)) 274 | { 275 | if ($data[$key] != $value) 276 | { 277 | $result = $result && false; 278 | } 279 | } 280 | elseif(is_object($data)) 281 | { 282 | if ($data->{$key} != $value) 283 | { 284 | $result = $result && false; 285 | } 286 | } 287 | else 288 | { 289 | $result = false; 290 | } 291 | } 292 | } 293 | 294 | return $result; 295 | } 296 | } -------------------------------------------------------------------------------- /src/DataInput/DataInputInterface.php: -------------------------------------------------------------------------------- 1 | {value}] 61 | * @param integer $length ammount of data 62 | * @return array 63 | */ 64 | public function getByCriteria($criteria = array(), $length = null); 65 | 66 | /** 67 | * Counts rows that matched the criteria. 68 | * 69 | * @param array $criteria [description] 70 | * @return [type] [description] 71 | */ 72 | public function countByCriteria($criteria = array()); 73 | } -------------------------------------------------------------------------------- /src/TreeNode.php: -------------------------------------------------------------------------------- 1 | parent = $parent; 48 | } 49 | 50 | /** 51 | * Get parent 52 | * 53 | * @return Algorithm\C45\TreeNode|null 54 | */ 55 | public function getParent() 56 | { 57 | return $this->parent; 58 | } 59 | 60 | /** 61 | * Set attribute name 62 | * 63 | * @param string $attribute 64 | */ 65 | public function setAttribute($attribute) 66 | { 67 | $this->attribute = $attribute; 68 | } 69 | 70 | /** 71 | * Add classes count 72 | * 73 | * @param string $valueName 74 | * @param array $classesCount 75 | */ 76 | public function addClassesCount($valueName, $classesCount = array()) 77 | { 78 | $this->classes_count[$valueName] = $classesCount; 79 | } 80 | 81 | /** 82 | * Set is leaf 83 | * 84 | * @param boolean $is_leaf 85 | * @return Algorithm\C45\TreeNode 86 | */ 87 | public function setIsLeaf($is_leaf = false) 88 | { 89 | $this->is_leaf = $is_leaf; 90 | return $this; 91 | } 92 | 93 | /** 94 | * Get is leaf 95 | * 96 | * @return boolean 97 | */ 98 | public function getIsLeaf() 99 | { 100 | return $this->is_leaf; 101 | } 102 | 103 | /** 104 | * Add child TreeNode 105 | * 106 | * @param string $value 107 | * @param mixed $child 108 | */ 109 | public function addChild($value, $child) 110 | { 111 | if (!isset($this->values)) 112 | { 113 | $this->values = []; 114 | } 115 | 116 | $this->values[$value] = $child; 117 | return $this; 118 | } 119 | 120 | /** 121 | * Get child 122 | * 123 | * @param string $value 124 | * @return Algorithm\C45\TreeNode 125 | */ 126 | public function getChild($value) 127 | { 128 | if ($this->hasValue($value)) 129 | { 130 | return $this->values[$value]; 131 | } 132 | } 133 | 134 | /** 135 | * Get values 136 | * 137 | * @return array current of node value 138 | */ 139 | public function getValues() 140 | { 141 | return array_keys($this->values); 142 | } 143 | 144 | /** 145 | * Get attribute name 146 | * 147 | * @return string 148 | */ 149 | public function getAttributeName() 150 | { 151 | return $this->attribute; 152 | } 153 | 154 | /** 155 | * Remove value from TreeNode 156 | * 157 | * @param string $value 158 | * @return Algorithm\C45\TreeNode 159 | */ 160 | public function removeValue($value) 161 | { 162 | if ($this->hasValue($value)) 163 | { 164 | unset($this->values[$value]); 165 | } 166 | 167 | return $this; 168 | } 169 | 170 | /** 171 | * Check value in current node 172 | * 173 | * @param string $value 174 | * @return boolean 175 | */ 176 | public function hasValue($value) 177 | { 178 | if (!isset($this->values)) 179 | { 180 | return false; 181 | } 182 | 183 | return array_key_exists($value, $this->values); 184 | } 185 | 186 | /** 187 | * Classify data 188 | * 189 | * @param array $data 190 | * @return string 191 | */ 192 | public function classify(array $data) 193 | { 194 | if (isset($data[$this->attribute])) 195 | { 196 | $attrValue = $data[$this->attribute]; 197 | 198 | if (!$this->hasValue($attrValue)) 199 | { 200 | return 'unclassified'; 201 | } 202 | 203 | $child = $this->values[$attrValue]; 204 | 205 | if (!$child->getIsLeaf()) 206 | { 207 | return $child->classify($data); 208 | } 209 | else 210 | { 211 | return $child->getChild('result'); 212 | } 213 | } 214 | } 215 | 216 | /** 217 | * 218 | * Draw tree with array 219 | * 220 | * @return array 221 | */ 222 | public function toArray() 223 | { 224 | $data = []; 225 | $data['attribute'] = $this->attribute; 226 | foreach ($this->values as $key => $value) 227 | { 228 | if (!is_null($value)) 229 | { 230 | if ($value instanceof self) 231 | { 232 | $data['values'][$key] = $value->toArray(); 233 | } 234 | } 235 | } 236 | 237 | return $data; 238 | } 239 | 240 | /** 241 | * 242 | * Draw tree with array 243 | * 244 | * @return array 245 | */ 246 | public function toJson() 247 | { 248 | $data = []; 249 | $data['attribute'] = $this->attribute; 250 | foreach ($this->values as $key => $value) 251 | { 252 | if (!is_null($value)) 253 | { 254 | if ($value instanceof self) 255 | { 256 | $data['values'][$key] = $value->toJson(); 257 | } 258 | } 259 | } 260 | 261 | return json_encode($data); 262 | } 263 | 264 | /** 265 | * Draw tree with string 266 | * 267 | * @param string $tabs 268 | * @return string 269 | */ 270 | public function toString($tabs = '') 271 | { 272 | $result = ''; 273 | 274 | foreach ($this->values as $key => $child) 275 | { 276 | $result .= $tabs.$this->attribute.' = '.$key; 277 | 278 | if ($child->getIsLeaf()) 279 | { 280 | $classCount = $this->getInstanceCountAsString($key); 281 | $result .= ' : '.$child->getChild('result').' '.$classCount."\n"; 282 | } 283 | else 284 | { 285 | $result .= "\n"; 286 | $result .= $child->toString($tabs."|\t"); 287 | } 288 | } 289 | 290 | return $result; 291 | } 292 | 293 | /** 294 | * Get classes count as string 295 | * 296 | * @param string $attribute_value 297 | * @return string 298 | */ 299 | private function getClassesCountAsString($attribute_value) 300 | { 301 | $result = '('; 302 | $total = array_sum($this->classes_count[$attribute_value]); 303 | 304 | foreach ($this->classes_count[$attribute_value] as $key => $value) { 305 | $result .= $value.'/'; 306 | } 307 | 308 | $result .= $total.')'; 309 | 310 | return $result; 311 | } 312 | 313 | /** 314 | * Get instance count as string 315 | * 316 | * @param string $attribute_value string 317 | * @return string 318 | */ 319 | private function getInstanceCountAsString($attribute_value) 320 | { 321 | $result = '('; 322 | $total = array_sum($this->classes_count[$attribute_value]); 323 | $child = $this->getChild($attribute_value); 324 | $className = $child->getChild('result'); 325 | $classCount = $this->classes_count[$attribute_value][$className]; 326 | 327 | if ($total > $classCount) { 328 | $result .= $total.'.0'; 329 | $result .= '/'.($total - $classCount).'.0'; 330 | } else { 331 | $result .= $classCount.'.0'; 332 | } 333 | 334 | $result .= ')'; 335 | 336 | return $result; 337 | } 338 | } --------------------------------------------------------------------------------