├── .gitignore ├── LICENSE ├── README.md ├── composer.json ├── nbproject ├── project.properties └── project.xml ├── src └── webd │ └── language │ ├── LCS.php │ ├── PorterStemmer.php │ ├── SpamSum.php │ └── StringDistance.php └── tests ├── bootstrap.php └── src └── webd └── language ├── LCSTest.php ├── PorterStemmerTest.php ├── SpamSumTest.php └── StringDistanceTest.php /.gitignore: -------------------------------------------------------------------------------- 1 | /nbproject/private/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Thibault Debatty and others. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # php-language-processing 2 | 3 | [![Latest Stable Version](https://poser.pugx.org/webd/language/v/stable)](https://packagist.org/packages/webd/language) [![Total Downloads](https://poser.pugx.org/webd/language/downloads)](https://packagist.org/packages/webd/language) 4 | 5 | A PHP library for language processing. Includes string distance function 6 | (Levenshtein, Jaro-Winkler, LCS-distance...), stemming, hashing etc. 7 | 8 | Installation using Composer 9 | --------------------------- 10 | 11 | in composer.json : 12 | ``` 13 | "require": { 14 | "webd/language": "dev-master" 15 | } 16 | ``` 17 | 18 | Then 19 | ``` 20 | composer install 21 | ``` 22 | 23 | Usage 24 | ----- 25 | 26 | ```php 27 | use webd\language\StringDistance; 28 | 29 | $string1 = "You won 10000$"; 30 | $string2 = "You won 15500$"; 31 | 32 | echo "Edit distance : " . StringDistance::EditDistance($string1, $string2); 33 | echo "Levenshtein : " . StringDistance::Levenshtein($string1, $string2); 34 | echo "Jaro-Winkler : " . StringDistance::JaroWinkler($string1, $string2); 35 | echo "Jaro-Winkler (prefix scale = 0.2) : " . StringDistance::JaroWinkler($string1, $string2, 0.2); 36 | 37 | use webd\language\PorterStemmer; 38 | echo "analyzing => " . PorterStemmer::Stem("analyzing"); 39 | echo "abandoned => " . PorterStemmer::Stem("abandoned"); 40 | echo "inclination => " . PorterStemmer::Stem("inclination"); 41 | 42 | $lcs = new \webd\language\LCS($str1, $str2); 43 | echo $lcs->value(); 44 | echo $lcs->length(); 45 | echo $lcs->distance(); 46 | 47 | // SpamSum, aka ssdeep, aka Context-Triggered Piecewize Hashing (CTPH): 48 | $s = new \webd\language\SpamSum; 49 | echo $s->HashString(file_get_contents($f)); 50 | ``` 51 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "webd/language", 3 | "description": "A library for language processing. Includes string distance function (Levenshtein, Jaro-Winkler,...), stemming, etc.", 4 | "autoload": { 5 | "psr-0": {"": "src/"} 6 | } 7 | } -------------------------------------------------------------------------------- /nbproject/project.properties: -------------------------------------------------------------------------------- 1 | auxiliary.org-netbeans-modules-php-phpunit.bootstrap_2e_create_2e_tests=true 2 | auxiliary.org-netbeans-modules-php-phpunit.bootstrap_2e_enabled=true 3 | auxiliary.org-netbeans-modules-php-phpunit.bootstrap_2e_path=tests/bootstrap.php 4 | auxiliary.org-netbeans-modules-php-phpunit.configuration_2e_enabled=false 5 | auxiliary.org-netbeans-modules-php-phpunit.configuration_2e_path= 6 | auxiliary.org-netbeans-modules-php-phpunit.customSuite_2e_enabled=false 7 | auxiliary.org-netbeans-modules-php-phpunit.customSuite_2e_path= 8 | auxiliary.org-netbeans-modules-php-phpunit.phpUnit_2e_enabled=false 9 | auxiliary.org-netbeans-modules-php-phpunit.phpUnit_2e_path= 10 | auxiliary.org-netbeans-modules-php-phpunit.test_2e_groups_2e_ask=false 11 | auxiliary.org-netbeans-modules-php-phpunit.test_2e_run_2e_all=false 12 | include.path=${php.global.include.path} 13 | php.version=PHP_53 14 | source.encoding=UTF-8 15 | src.dir=. 16 | tags.asp=false 17 | tags.short=false 18 | test.src.dir=tests 19 | testing.providers=PhpUnit 20 | web.root=. 21 | -------------------------------------------------------------------------------- /nbproject/project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.netbeans.modules.php.project 4 | 5 | 6 | php-language-processing 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /src/webd/language/LCS.php: -------------------------------------------------------------------------------- 1 | X = $str1; 21 | $this->Y = $str2; 22 | 23 | $m = strlen($str1); 24 | $n = strlen($str2); 25 | 26 | $this->C = array(); 27 | 28 | for ($i = 0; $i <= $m; $i++) { 29 | $this->C[$i][0] = 0; 30 | } 31 | 32 | for ($j = 0; $j <= $n; $j++) { 33 | $this->C[0][$j] = 0; 34 | } 35 | 36 | for ($i = 1; $i <= $m; $i++) { 37 | for ($j = 1; $j <= $n; $j++) { 38 | if ($str1[$i-1] == $str2[$j-1]) { 39 | $this->C[$i][$j] = $this->C[$i-1][$j-1] + 1; 40 | 41 | } else { 42 | $this->C[$i][$j] = max($this->C[$i][$j-1], $this->C[$i-1][$j]); 43 | } 44 | } 45 | } 46 | 47 | //for i := 1..m 48 | // for j := 1..n 49 | // if X[i] = Y[j] 50 | // C[i,j] := C[i-1,j-1] + 1 51 | // else 52 | // C[i,j] := max(C[i,j-1], C[i-1,j]) 53 | } 54 | 55 | public function length() { 56 | return $this->C[strlen($this->X)][strlen($this->Y)]; 57 | } 58 | 59 | public function __toString() { 60 | return $this->value(); 61 | } 62 | 63 | public function value() { 64 | return $this->backtrack(strlen($this->X), strlen($this->Y)); 65 | } 66 | 67 | /** 68 | * Edit distance when only insertion and deletion is allowed (no 69 | * substitution) 70 | * = strlen(str1) + strlen(str2) - 2 * length(LCS(str1, str2)) 71 | * @param type $string1 72 | * @param type $string2 73 | */ 74 | public function distance() { 75 | return strlen($this->X) + strlen($this->Y) - 2 * $this->length(); 76 | } 77 | 78 | 79 | private function backtrack($i, $j) { 80 | if ($i == 0 || $j == 0) { 81 | return ""; 82 | } 83 | 84 | if ($this->X[$i-1] == $this->Y[$j-1]) { 85 | return $this->backtrack($i-1, $j-1) . $this->X[$i-1]; 86 | } 87 | 88 | if ($this->C[$i][$j-1] > $this->C[$i-1][$j]) { 89 | return $this->backtrack($i, $j-1); 90 | } 91 | 92 | return $this->backtrack($i-1, $j); 93 | 94 | // function backtrack(C[0..m,0..n], X[1..m], Y[1..n], i, j) 95 | // if i = 0 or j = 0 96 | // return "" 97 | // else if X[i] = Y[j] 98 | // return backtrack(C, X, Y, i-1, j-1) + X[i] 99 | // else 100 | // if C[i,j-1] > C[i-1,j] 101 | // return backtrack(C, X, Y, i, j-1) 102 | // else 103 | // return backtrack(C, X, Y, i-1, j) 104 | } 105 | 106 | // /** 107 | // * Edit distance when only insertion and deletion is allowed (no 108 | // * substitution) 109 | // * = strlen(str1) + strlen(str2) - 2 * length(LCS(str1, str2)) 110 | // * @param type $string1 111 | // * @param type $string2 112 | // */ 113 | // public static function distance($str1, $str2) { 114 | // return strlen($str1) + strlen($str2) - 2 * self::length($str1, $str2); 115 | // } 116 | // 117 | // public static function lcs($str1, $str2) { 118 | // $lcs = new LCS($str1, $str2); 119 | // return $lcs->backtrack(strlen($str1), strlen($str2)); 120 | // } 121 | // /** 122 | // * 123 | // * @param type $string1 124 | // * @param type $string2 125 | // */ 126 | // public static function length($string1, $string2) { 127 | // $lcs = new LCS($str1, $str2); 128 | // return $lcs->C; 129 | // 130 | // } 131 | } -------------------------------------------------------------------------------- /src/webd/language/PorterStemmer.php: -------------------------------------------------------------------------------- 1 | 1) { 256 | self::replace($word, 'e', ''); 257 | } else if (self::m(substr($word, 0, -1)) == 1) { 258 | 259 | if (!self::cvc(substr($word, 0, -1))) { 260 | self::replace($word, 'e', ''); 261 | } 262 | } 263 | } 264 | 265 | // Part b 266 | if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') { 267 | $word = substr($word, 0, -1); 268 | } 269 | 270 | return $word; 271 | } 272 | 273 | /** 274 | * Replaces the first string with the second, at the end of the string. If third 275 | * arg is given, then the preceding string must match that m count at least. 276 | * 277 | * @param string $str String to check 278 | * @param string $check Ending to check for 279 | * @param string $repl Replacement string 280 | * @param int $m Optional minimum number of m() to meet 281 | * @return bool Whether the $check string was at the end 282 | * of the $str string. True does not necessarily mean 283 | * that it was replaced. 284 | */ 285 | private static function replace(&$str, $check, $repl, $m = null) { 286 | $len = 0 - strlen($check); 287 | 288 | if (substr($str, $len) == $check) { 289 | $substr = substr($str, 0, $len); 290 | if (is_null($m) OR self::m($substr) > $m) { 291 | $str = $substr . $repl; 292 | } 293 | 294 | return true; 295 | } 296 | 297 | return false; 298 | } 299 | 300 | /** 301 | * What, you mean it's not obvious from the name? 302 | * 303 | * m() measures the number of consonant sequences in $str. if c is 304 | * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary 305 | * presence, 306 | * 307 | * gives 0 308 | * vc gives 1 309 | * vcvc gives 2 310 | * vcvcvc gives 3 311 | * 312 | * @param string $str The string to return the m count for 313 | * @return int The m count 314 | */ 315 | private static function m($str) { 316 | $c = self::$regex_consonant; 317 | $v = self::$regex_vowel; 318 | 319 | $str = preg_replace("#^$c+#", '', $str); 320 | $str = preg_replace("#$v+$#", '', $str); 321 | 322 | preg_match_all("#($v+$c+)#", $str, $matches); 323 | 324 | return count($matches[1]); 325 | } 326 | 327 | /** 328 | * Returns true/false as to whether the given string contains two 329 | * of the same consonant next to each other at the end of the string. 330 | * 331 | * @param string $str String to check 332 | * @return bool Result 333 | */ 334 | private static function doubleConsonant($str) { 335 | $c = self::$regex_consonant; 336 | 337 | return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1}; 338 | } 339 | 340 | /** 341 | * Checks for ending CVC sequence where second C is not W, X or Y 342 | * 343 | * @param string $str String to check 344 | * @return bool Result 345 | */ 346 | private static function cvc($str) { 347 | $c = self::$regex_consonant; 348 | $v = self::$regex_vowel; 349 | 350 | return preg_match("#($c$v$c)$#", $str, $matches) AND strlen($matches[1]) == 3 AND $matches[1]{2} != 'w' AND $matches[1]{2} != 'x' AND $matches[1]{2} != 'y'; 351 | } 352 | 353 | } 354 | 355 | ?> -------------------------------------------------------------------------------- /src/webd/language/SpamSum.php: -------------------------------------------------------------------------------- 1 | HashString($string); 23 | return $ss; 24 | } 25 | 26 | const HASH_PRIME = 0x01000193; 27 | const HASH_INIT = 0x28021967; 28 | const B64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 29 | 30 | protected $SPAMSUM_LENGTH = 64; 31 | protected $LETTERS = 64; 32 | protected $BLOCKSIZE = 0; 33 | protected $MIN_BLOCKSIZE = 3; 34 | protected $auto_blocksize = true; 35 | 36 | protected $left; 37 | protected $right; 38 | 39 | /** 40 | * Set a different hash length 41 | * min = 1, default = 64 42 | * 43 | * @param type $l 44 | */ 45 | public function SetHashLength($l) { 46 | $this->SPAMSUM_LENGTH = $l; 47 | } 48 | 49 | /** 50 | * Set the number of letters to use to create the hash 51 | * min = 2, max = 64, default = 64 (base64) 52 | * @param type $l 53 | */ 54 | public function SetLetters($l) { 55 | $this->LETTERS = $l; 56 | } 57 | 58 | /** 59 | * Manually set the minimum block size 60 | * min = 1, default = 3 61 | * @param type $s 62 | */ 63 | public function SetMinBlocksize($s) { 64 | $this->MIN_BLOCKSIZE = $s; 65 | } 66 | 67 | /** 68 | * Set the blok size manually, so that it won't be computed from the length of 69 | * the string 70 | * @param type $s 71 | */ 72 | public function SetBlockSize($s) { 73 | $this->BLOCKSIZE = $s; 74 | $this->auto_blocksize = false; 75 | } 76 | 77 | /** 78 | * 79 | * @param type $string 80 | * @return \webd\language\SpamSum 81 | */ 82 | public function HashString($string) { 83 | $b64 = self::B64; 84 | $length = strlen($string); 85 | 86 | $in = unpack('C*', $string); 87 | 88 | // Reindex (to start from 0) 89 | foreach ($in as $k => $v) { 90 | $in[$k - 1] = $v; 91 | } 92 | unset($in[count($in)]); 93 | 94 | // Guess a a reasonable block size 95 | if ($this->auto_blocksize) { 96 | $this->BLOCKSIZE = $this->MIN_BLOCKSIZE; 97 | 98 | while ($this->BLOCKSIZE * $this->SPAMSUM_LENGTH < $length) { 99 | $this->BLOCKSIZE = $this->BLOCKSIZE * 2; 100 | } 101 | } 102 | 103 | again: 104 | 105 | $this->left = array(); 106 | $this->right = array(); 107 | 108 | $k = $j = 0; 109 | $h3 = $h2 = self::HASH_INIT; 110 | $h = $this->rolling_hash_reset(); 111 | 112 | for ($i = 0; $i < $length; $i++) { 113 | 114 | /* at each character we update the rolling hash and the normal 115 | * hash. When the rolling hash hits the reset value then we emit 116 | * the normal hash as a element of the signature and reset both 117 | * hashes 118 | */ 119 | $h = $this->rolling_hash($in[$i]); 120 | $h2 = self::sum_hash($in[$i], $h2); 121 | $h3 = self::sum_hash($in[$i], $h3); 122 | 123 | if ($h % $this->BLOCKSIZE == ($this->BLOCKSIZE - 1)) { 124 | 125 | /* we have hit a reset point. We now emit a hash which is based 126 | * on all chacaters in the piece of the string between the last 127 | * reset point and this one 128 | */ 129 | $this->left[$j] = $b64[$h2 % $this->LETTERS]; 130 | if ($j < $this->SPAMSUM_LENGTH - 1) { 131 | 132 | /* we can have a problem with the tail overflowing. The easiest way 133 | * to cope with this is to only reset the second hash if we have 134 | * room for more characters in our signature. This has the effect of 135 | * combining the last few pieces of the message into a single piece 136 | */ 137 | $h2 = self::HASH_INIT; 138 | $j++; 139 | } 140 | } 141 | 142 | /* this produces a second signature with a block size of block_size*2. 143 | * By producing dual signatures in this way the effect of small changes 144 | * in the string near a block size boundary is greatly reduced. 145 | */ 146 | if ($h % ($this->BLOCKSIZE * 2) == (($this->BLOCKSIZE * 2) - 1)) { 147 | $this->right[$k] = $b64[$h3 % $this->LETTERS]; 148 | if ($k < $this->SPAMSUM_LENGTH / 2 - 1) { 149 | $h3 = self::HASH_INIT; 150 | $k++; 151 | } 152 | } 153 | } 154 | 155 | /* If we have anything left then add it to the end. This ensures that the 156 | * last part of the string is always considered 157 | */ 158 | if ($h != 0) { 159 | $this->left[$j] = $b64[$h2 % $this->LETTERS]; 160 | $this->right[$k] = $b64[$h3 % $this->LETTERS]; 161 | } 162 | 163 | /* Our blocksize guess may have been way off - repeat if necessary 164 | */ 165 | if ($this->auto_blocksize 166 | && $this->BLOCKSIZE > $this->MIN_BLOCKSIZE 167 | && $j < $this->SPAMSUM_LENGTH / 2) { 168 | 169 | $this->BLOCKSIZE = $this->BLOCKSIZE / 2; 170 | goto again; 171 | } 172 | 173 | return $this; 174 | } 175 | 176 | public function __toString() { 177 | return 178 | $this->BLOCKSIZE . ":" . $this->Left() . ":" . $this->Right(); 179 | } 180 | 181 | public function BlockSize() { 182 | return $this->BLOCKSIZE; 183 | } 184 | 185 | public function Left() { 186 | return implode("", $this->left); 187 | } 188 | 189 | public function Right() { 190 | return implode("", $this->right); 191 | } 192 | 193 | /* A simple non-rolling hash, based on the FNV hash 194 | */ 195 | protected static function sum_hash($c, $h) { 196 | $h = ($h * self::HASH_PRIME) % pow(2, 32); 197 | $h = ($h ^ $c) % pow(2, 32); 198 | return $h; 199 | } 200 | 201 | 202 | /* A rolling hash, based on the Adler checksum. By using a rolling hash 203 | * we can perform auto resynchronisation after inserts/deletes internally, 204 | * h1 is the sum of the bytes in the window and h2 is the sum of the bytes 205 | * times the index h3 is a shift/xor based rolling hash, and is mostly 206 | * needed to ensure that we can cope with large blocksize values 207 | */ 208 | const ROLLING_WINDOW = 7; 209 | 210 | protected $rolling_window = array(); 211 | protected $rolling_h1; 212 | protected $rolling_h2; 213 | protected $rolling_h3; 214 | protected $rolling_n; 215 | 216 | protected function rolling_hash($c) { 217 | $this->rolling_h2 -= $this->rolling_h1; 218 | $this->rolling_h2 += self::ROLLING_WINDOW * $c; 219 | 220 | $this->rolling_h1 += $c; 221 | $this->rolling_h1 -= $this->rolling_window[$this->rolling_n % self::ROLLING_WINDOW]; 222 | 223 | $this->rolling_window[$this->rolling_n % self::ROLLING_WINDOW] = $c; 224 | $this->rolling_n++; 225 | 226 | $this->rolling_h3 = ($this->rolling_h3 << 5) & 0xFFFFFFFF; 227 | $this->rolling_h3 ^= $c; 228 | 229 | return $this->rolling_h1 + $this->rolling_h2 + $this->rolling_h3; 230 | } 231 | 232 | protected function rolling_hash_reset() { 233 | for ($i = 0; $i < self::ROLLING_WINDOW; $i++) { 234 | $this->rolling_window[$i] = 0; 235 | } 236 | 237 | $this->rolling_h1 = 0; 238 | $this->rolling_h2 = 0; 239 | $this->rolling_h3 = 0; 240 | $this->rolling_n = 0; 241 | 242 | return 0; 243 | } 244 | 245 | } -------------------------------------------------------------------------------- /src/webd/language/StringDistance.php: -------------------------------------------------------------------------------- 1 | $char) { 65 | $search = strpos($string2, $char, $i <= $allowedDistance ? 0 : min($i - $allowedDistance, $str2_len)); 66 | if ($search !== false && $search <= $i + $allowedDistance + 1) { 67 | $commonCharacters .= $char; 68 | } 69 | } 70 | } 71 | 72 | return $commonCharacters; 73 | } 74 | 75 | protected static function getPrefixLength($string1, $string2, $MINPREFIXLENGTH = 4) { 76 | 77 | $n = min(array($MINPREFIXLENGTH, strlen($string1), strlen($string2))); 78 | 79 | for ($i = 0; $i < $n; $i++) { 80 | if ($string1[$i] != $string2[$i]) { 81 | // return index of first occurrence of different characters 82 | return $i; 83 | } 84 | } 85 | 86 | // first n characters are the same 87 | return $n; 88 | } 89 | 90 | /** 91 | * Returns the minimum number of single-character edits 92 | * (i.e. insertions, deletions or substitutions) required to change one 93 | * word into the other 94 | * @param type $string1 95 | * @param type $string2 96 | * @return type 97 | */ 98 | public static function Levenshtein($string1, $string2) { 99 | return levenshtein($string1, $string2); 100 | } 101 | 102 | /** 103 | * Levenshtein($string1, $string2) 104 | * @param type $string1 105 | * @param type $string2 106 | * @return type 107 | */ 108 | public static function EditDistance($string1, $string2) { 109 | return self::Levenshtein($string1, $string2); 110 | } 111 | 112 | } 113 | 114 | -------------------------------------------------------------------------------- /tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | object = new LCS("BACBAD", "BATBAD"); 21 | } 22 | 23 | /** 24 | * Tears down the fixture, for example, closes a network connection. 25 | * This method is called after a test is executed. 26 | */ 27 | protected function tearDown() { 28 | 29 | } 30 | 31 | /** 32 | * @covers webd\language\LCS::LCS 33 | * @todo Implement testLCS(). 34 | */ 35 | public function testValue() { 36 | $this->assertEquals($this->object->value(), "BABAD"); 37 | } 38 | 39 | /** 40 | * @covers webd\language\LCS::length 41 | * @todo Implement testLength(). 42 | */ 43 | public function testLength() { 44 | $this->assertEquals($this->object->length(), 5); 45 | } 46 | 47 | public function testDistance() { 48 | $this->assertEquals($this->object->distance(), 2); 49 | } 50 | 51 | /** 52 | * @covers webd\language\LCS::__toString 53 | * @todo Implement test__toString(). 54 | */ 55 | public function test__toString() { 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /tests/src/webd/language/PorterStemmerTest.php: -------------------------------------------------------------------------------- 1 | object = new PorterStemmer; 21 | } 22 | 23 | /** 24 | * Tears down the fixture, for example, closes a network connection. 25 | * This method is called after a test is executed. 26 | */ 27 | protected function tearDown() { 28 | 29 | } 30 | 31 | /** 32 | * @covers webd\language\PorterStemmer::Stem 33 | * @todo Implement testStem(). 34 | */ 35 | public function testStem() { 36 | $this->assertEquals(PorterStemmer::Stem("caresses"), "caress"); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /tests/src/webd/language/SpamSumTest.php: -------------------------------------------------------------------------------- 1 | object = new SpamSum; 23 | } 24 | 25 | /** 26 | * Tears down the fixture, for example, closes a network connection. 27 | * This method is called after a test is executed. 28 | */ 29 | protected function tearDown() { 30 | 31 | } 32 | 33 | /** 34 | * @covers webd\language\SpamSum::Hash 35 | * @todo Implement testHash(). 36 | */ 37 | public function testHashString() { 38 | $this->assertEquals( 39 | $this->object->HashString($this->str1), 40 | "6:MZEYWZDrpCGgFLLELGrX+TPdLgN98M6S8HROQ9Svb:M+hpTGgiNiM58LSj"); 41 | } 42 | 43 | /** 44 | * @covers webd\language\SpamSum::SetHashLength 45 | * @todo Implement testSetHashLength(). 46 | */ 47 | public function testSetHashLength() { 48 | $s = new SpamSum; 49 | $s->SetHashLength(10); 50 | $s->HashString($this->str1); 51 | $this->assertEquals( 52 | "M0Gj58Lo", 53 | $s->Left()); 54 | 55 | } 56 | 57 | /** 58 | * @covers webd\language\SpamSum::SetLetters 59 | * @todo Implement testSetLetters(). 60 | */ 61 | public function testSetLetters() { 62 | $s = new SpamSum; 63 | $s->SetLetters(8); 64 | $s->HashString($this->str1); 65 | $this->assertEquals( 66 | "EBEAGBDDBCGAFDDEDGDHGDHFDAFFEECCEHBGAFCHD", 67 | $s->Left()); 68 | } 69 | 70 | public function testSetMinBlocksize() { 71 | $s = new SpamSum; 72 | $s->SetMinBlocksize(1); 73 | $s->HashString($this->str1); 74 | $this->assertEquals( 75 | "4:M1yuN7qZF30RqjKgBDlWdH0eKyXCBMqGUAiDmNA1XEGAnFNuoILPaFAAhNj:MLN7qZvjKgJU0VmC7GmSFL8PaFAAhh", 76 | $s->__toString()); 77 | } 78 | 79 | 80 | /** 81 | * @covers webd\language\SpamSum::__toString 82 | * @todo Implement test__toString(). 83 | */ 84 | public function test__toString() { 85 | 86 | } 87 | 88 | /** 89 | * @covers webd\language\SpamSum::BlockSize 90 | * @todo Implement testBlockSize(). 91 | */ 92 | public function testBlockSize() { 93 | 94 | } 95 | 96 | /** 97 | * @covers webd\language\SpamSum::Left 98 | * @todo Implement testLeft(). 99 | */ 100 | public function testLeft() { 101 | 102 | } 103 | 104 | /** 105 | * @covers webd\language\SpamSum::Right 106 | * @todo Implement testRight(). 107 | */ 108 | public function testRight() { 109 | 110 | } 111 | 112 | } 113 | -------------------------------------------------------------------------------- /tests/src/webd/language/StringDistanceTest.php: -------------------------------------------------------------------------------- 1 | object = new StringDistance; 21 | } 22 | 23 | /** 24 | * Tears down the fixture, for example, closes a network connection. 25 | * This method is called after a test is executed. 26 | */ 27 | protected function tearDown() { 28 | 29 | } 30 | 31 | /** 32 | * @covers webd\language\Distance::Jaro 33 | * @todo Implement testJaro(). 34 | */ 35 | public function testJaro() { 36 | $this->assertEquals(0.944, StringDistance::Jaro("MARTHA", "MARHTA"), "", 0.001); 37 | } 38 | 39 | /** 40 | * @covers webd\language\Distance::JaroWinkler 41 | * @todo Implement testJaroWinkler(). 42 | */ 43 | public function testJaroWinkler() { 44 | $this->assertEquals(0.961, StringDistance::JaroWinkler("MARTHA", "MARHTA", 0.1), "", 0.001); 45 | } 46 | 47 | public function testLevenshtein() { 48 | $this->assertEquals(6, StringDistance::Levenshtein("bordure", "contexte")); 49 | } 50 | 51 | } 52 | --------------------------------------------------------------------------------