├── .gitignore
├── LICENSE
├── README.md
├── composer.json
├── nbproject
├── project.properties
└── project.xml
├── src
└── webd
│ └── language
│ ├── LCS.php
│ ├── PorterStemmer.php
│ ├── SpamSum.php
│ └── StringDistance.php
└── tests
├── bootstrap.php
└── src
└── webd
└── language
├── LCSTest.php
├── PorterStemmerTest.php
├── SpamSumTest.php
└── StringDistanceTest.php
/.gitignore:
--------------------------------------------------------------------------------
1 | /nbproject/private/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Thibault Debatty and others.
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # php-language-processing
2 |
3 | [](https://packagist.org/packages/webd/language) [](https://packagist.org/packages/webd/language)
4 |
5 | A PHP library for language processing. Includes string distance function
6 | (Levenshtein, Jaro-Winkler, LCS-distance...), stemming, hashing etc.
7 |
8 | Installation using Composer
9 | ---------------------------
10 |
11 | in composer.json :
12 | ```
13 | "require": {
14 | "webd/language": "dev-master"
15 | }
16 | ```
17 |
18 | Then
19 | ```
20 | composer install
21 | ```
22 |
23 | Usage
24 | -----
25 |
26 | ```php
27 | use webd\language\StringDistance;
28 |
29 | $string1 = "You won 10000$";
30 | $string2 = "You won 15500$";
31 |
32 | echo "Edit distance : " . StringDistance::EditDistance($string1, $string2);
33 | echo "Levenshtein : " . StringDistance::Levenshtein($string1, $string2);
34 | echo "Jaro-Winkler : " . StringDistance::JaroWinkler($string1, $string2);
35 | echo "Jaro-Winkler (prefix scale = 0.2) : " . StringDistance::JaroWinkler($string1, $string2, 0.2);
36 |
37 | use webd\language\PorterStemmer;
38 | echo "analyzing => " . PorterStemmer::Stem("analyzing");
39 | echo "abandoned => " . PorterStemmer::Stem("abandoned");
40 | echo "inclination => " . PorterStemmer::Stem("inclination");
41 |
42 | $lcs = new \webd\language\LCS($str1, $str2);
43 | echo $lcs->value();
44 | echo $lcs->length();
45 | echo $lcs->distance();
46 |
47 | // SpamSum, aka ssdeep, aka Context-Triggered Piecewize Hashing (CTPH):
48 | $s = new \webd\language\SpamSum;
49 | echo $s->HashString(file_get_contents($f));
50 | ```
51 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "webd/language",
3 | "description": "A library for language processing. Includes string distance function (Levenshtein, Jaro-Winkler,...), stemming, etc.",
4 | "autoload": {
5 | "psr-0": {"": "src/"}
6 | }
7 | }
--------------------------------------------------------------------------------
/nbproject/project.properties:
--------------------------------------------------------------------------------
1 | auxiliary.org-netbeans-modules-php-phpunit.bootstrap_2e_create_2e_tests=true
2 | auxiliary.org-netbeans-modules-php-phpunit.bootstrap_2e_enabled=true
3 | auxiliary.org-netbeans-modules-php-phpunit.bootstrap_2e_path=tests/bootstrap.php
4 | auxiliary.org-netbeans-modules-php-phpunit.configuration_2e_enabled=false
5 | auxiliary.org-netbeans-modules-php-phpunit.configuration_2e_path=
6 | auxiliary.org-netbeans-modules-php-phpunit.customSuite_2e_enabled=false
7 | auxiliary.org-netbeans-modules-php-phpunit.customSuite_2e_path=
8 | auxiliary.org-netbeans-modules-php-phpunit.phpUnit_2e_enabled=false
9 | auxiliary.org-netbeans-modules-php-phpunit.phpUnit_2e_path=
10 | auxiliary.org-netbeans-modules-php-phpunit.test_2e_groups_2e_ask=false
11 | auxiliary.org-netbeans-modules-php-phpunit.test_2e_run_2e_all=false
12 | include.path=${php.global.include.path}
13 | php.version=PHP_53
14 | source.encoding=UTF-8
15 | src.dir=.
16 | tags.asp=false
17 | tags.short=false
18 | test.src.dir=tests
19 | testing.providers=PhpUnit
20 | web.root=.
21 |
--------------------------------------------------------------------------------
/nbproject/project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | org.netbeans.modules.php.project
4 |
5 |
6 | php-language-processing
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/src/webd/language/LCS.php:
--------------------------------------------------------------------------------
1 | X = $str1;
21 | $this->Y = $str2;
22 |
23 | $m = strlen($str1);
24 | $n = strlen($str2);
25 |
26 | $this->C = array();
27 |
28 | for ($i = 0; $i <= $m; $i++) {
29 | $this->C[$i][0] = 0;
30 | }
31 |
32 | for ($j = 0; $j <= $n; $j++) {
33 | $this->C[0][$j] = 0;
34 | }
35 |
36 | for ($i = 1; $i <= $m; $i++) {
37 | for ($j = 1; $j <= $n; $j++) {
38 | if ($str1[$i-1] == $str2[$j-1]) {
39 | $this->C[$i][$j] = $this->C[$i-1][$j-1] + 1;
40 |
41 | } else {
42 | $this->C[$i][$j] = max($this->C[$i][$j-1], $this->C[$i-1][$j]);
43 | }
44 | }
45 | }
46 |
47 | //for i := 1..m
48 | // for j := 1..n
49 | // if X[i] = Y[j]
50 | // C[i,j] := C[i-1,j-1] + 1
51 | // else
52 | // C[i,j] := max(C[i,j-1], C[i-1,j])
53 | }
54 |
55 | public function length() {
56 | return $this->C[strlen($this->X)][strlen($this->Y)];
57 | }
58 |
59 | public function __toString() {
60 | return $this->value();
61 | }
62 |
63 | public function value() {
64 | return $this->backtrack(strlen($this->X), strlen($this->Y));
65 | }
66 |
67 | /**
68 | * Edit distance when only insertion and deletion is allowed (no
69 | * substitution)
70 | * = strlen(str1) + strlen(str2) - 2 * length(LCS(str1, str2))
71 | * @param type $string1
72 | * @param type $string2
73 | */
74 | public function distance() {
75 | return strlen($this->X) + strlen($this->Y) - 2 * $this->length();
76 | }
77 |
78 |
79 | private function backtrack($i, $j) {
80 | if ($i == 0 || $j == 0) {
81 | return "";
82 | }
83 |
84 | if ($this->X[$i-1] == $this->Y[$j-1]) {
85 | return $this->backtrack($i-1, $j-1) . $this->X[$i-1];
86 | }
87 |
88 | if ($this->C[$i][$j-1] > $this->C[$i-1][$j]) {
89 | return $this->backtrack($i, $j-1);
90 | }
91 |
92 | return $this->backtrack($i-1, $j);
93 |
94 | // function backtrack(C[0..m,0..n], X[1..m], Y[1..n], i, j)
95 | // if i = 0 or j = 0
96 | // return ""
97 | // else if X[i] = Y[j]
98 | // return backtrack(C, X, Y, i-1, j-1) + X[i]
99 | // else
100 | // if C[i,j-1] > C[i-1,j]
101 | // return backtrack(C, X, Y, i, j-1)
102 | // else
103 | // return backtrack(C, X, Y, i-1, j)
104 | }
105 |
106 | // /**
107 | // * Edit distance when only insertion and deletion is allowed (no
108 | // * substitution)
109 | // * = strlen(str1) + strlen(str2) - 2 * length(LCS(str1, str2))
110 | // * @param type $string1
111 | // * @param type $string2
112 | // */
113 | // public static function distance($str1, $str2) {
114 | // return strlen($str1) + strlen($str2) - 2 * self::length($str1, $str2);
115 | // }
116 | //
117 | // public static function lcs($str1, $str2) {
118 | // $lcs = new LCS($str1, $str2);
119 | // return $lcs->backtrack(strlen($str1), strlen($str2));
120 | // }
121 | // /**
122 | // *
123 | // * @param type $string1
124 | // * @param type $string2
125 | // */
126 | // public static function length($string1, $string2) {
127 | // $lcs = new LCS($str1, $str2);
128 | // return $lcs->C;
129 | //
130 | // }
131 | }
--------------------------------------------------------------------------------
/src/webd/language/PorterStemmer.php:
--------------------------------------------------------------------------------
1 | 1) {
256 | self::replace($word, 'e', '');
257 | } else if (self::m(substr($word, 0, -1)) == 1) {
258 |
259 | if (!self::cvc(substr($word, 0, -1))) {
260 | self::replace($word, 'e', '');
261 | }
262 | }
263 | }
264 |
265 | // Part b
266 | if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') {
267 | $word = substr($word, 0, -1);
268 | }
269 |
270 | return $word;
271 | }
272 |
273 | /**
274 | * Replaces the first string with the second, at the end of the string. If third
275 | * arg is given, then the preceding string must match that m count at least.
276 | *
277 | * @param string $str String to check
278 | * @param string $check Ending to check for
279 | * @param string $repl Replacement string
280 | * @param int $m Optional minimum number of m() to meet
281 | * @return bool Whether the $check string was at the end
282 | * of the $str string. True does not necessarily mean
283 | * that it was replaced.
284 | */
285 | private static function replace(&$str, $check, $repl, $m = null) {
286 | $len = 0 - strlen($check);
287 |
288 | if (substr($str, $len) == $check) {
289 | $substr = substr($str, 0, $len);
290 | if (is_null($m) OR self::m($substr) > $m) {
291 | $str = $substr . $repl;
292 | }
293 |
294 | return true;
295 | }
296 |
297 | return false;
298 | }
299 |
300 | /**
301 | * What, you mean it's not obvious from the name?
302 | *
303 | * m() measures the number of consonant sequences in $str. if c is
304 | * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
305 | * presence,
306 | *
307 | * gives 0
308 | * vc gives 1
309 | * vcvc gives 2
310 | * vcvcvc gives 3
311 | *
312 | * @param string $str The string to return the m count for
313 | * @return int The m count
314 | */
315 | private static function m($str) {
316 | $c = self::$regex_consonant;
317 | $v = self::$regex_vowel;
318 |
319 | $str = preg_replace("#^$c+#", '', $str);
320 | $str = preg_replace("#$v+$#", '', $str);
321 |
322 | preg_match_all("#($v+$c+)#", $str, $matches);
323 |
324 | return count($matches[1]);
325 | }
326 |
327 | /**
328 | * Returns true/false as to whether the given string contains two
329 | * of the same consonant next to each other at the end of the string.
330 | *
331 | * @param string $str String to check
332 | * @return bool Result
333 | */
334 | private static function doubleConsonant($str) {
335 | $c = self::$regex_consonant;
336 |
337 | return preg_match("#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};
338 | }
339 |
340 | /**
341 | * Checks for ending CVC sequence where second C is not W, X or Y
342 | *
343 | * @param string $str String to check
344 | * @return bool Result
345 | */
346 | private static function cvc($str) {
347 | $c = self::$regex_consonant;
348 | $v = self::$regex_vowel;
349 |
350 | return preg_match("#($c$v$c)$#", $str, $matches) AND strlen($matches[1]) == 3 AND $matches[1]{2} != 'w' AND $matches[1]{2} != 'x' AND $matches[1]{2} != 'y';
351 | }
352 |
353 | }
354 |
355 | ?>
--------------------------------------------------------------------------------
/src/webd/language/SpamSum.php:
--------------------------------------------------------------------------------
1 | HashString($string);
23 | return $ss;
24 | }
25 |
26 | const HASH_PRIME = 0x01000193;
27 | const HASH_INIT = 0x28021967;
28 | const B64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
29 |
30 | protected $SPAMSUM_LENGTH = 64;
31 | protected $LETTERS = 64;
32 | protected $BLOCKSIZE = 0;
33 | protected $MIN_BLOCKSIZE = 3;
34 | protected $auto_blocksize = true;
35 |
36 | protected $left;
37 | protected $right;
38 |
39 | /**
40 | * Set a different hash length
41 | * min = 1, default = 64
42 | *
43 | * @param type $l
44 | */
45 | public function SetHashLength($l) {
46 | $this->SPAMSUM_LENGTH = $l;
47 | }
48 |
49 | /**
50 | * Set the number of letters to use to create the hash
51 | * min = 2, max = 64, default = 64 (base64)
52 | * @param type $l
53 | */
54 | public function SetLetters($l) {
55 | $this->LETTERS = $l;
56 | }
57 |
58 | /**
59 | * Manually set the minimum block size
60 | * min = 1, default = 3
61 | * @param type $s
62 | */
63 | public function SetMinBlocksize($s) {
64 | $this->MIN_BLOCKSIZE = $s;
65 | }
66 |
67 | /**
68 | * Set the blok size manually, so that it won't be computed from the length of
69 | * the string
70 | * @param type $s
71 | */
72 | public function SetBlockSize($s) {
73 | $this->BLOCKSIZE = $s;
74 | $this->auto_blocksize = false;
75 | }
76 |
77 | /**
78 | *
79 | * @param type $string
80 | * @return \webd\language\SpamSum
81 | */
82 | public function HashString($string) {
83 | $b64 = self::B64;
84 | $length = strlen($string);
85 |
86 | $in = unpack('C*', $string);
87 |
88 | // Reindex (to start from 0)
89 | foreach ($in as $k => $v) {
90 | $in[$k - 1] = $v;
91 | }
92 | unset($in[count($in)]);
93 |
94 | // Guess a a reasonable block size
95 | if ($this->auto_blocksize) {
96 | $this->BLOCKSIZE = $this->MIN_BLOCKSIZE;
97 |
98 | while ($this->BLOCKSIZE * $this->SPAMSUM_LENGTH < $length) {
99 | $this->BLOCKSIZE = $this->BLOCKSIZE * 2;
100 | }
101 | }
102 |
103 | again:
104 |
105 | $this->left = array();
106 | $this->right = array();
107 |
108 | $k = $j = 0;
109 | $h3 = $h2 = self::HASH_INIT;
110 | $h = $this->rolling_hash_reset();
111 |
112 | for ($i = 0; $i < $length; $i++) {
113 |
114 | /* at each character we update the rolling hash and the normal
115 | * hash. When the rolling hash hits the reset value then we emit
116 | * the normal hash as a element of the signature and reset both
117 | * hashes
118 | */
119 | $h = $this->rolling_hash($in[$i]);
120 | $h2 = self::sum_hash($in[$i], $h2);
121 | $h3 = self::sum_hash($in[$i], $h3);
122 |
123 | if ($h % $this->BLOCKSIZE == ($this->BLOCKSIZE - 1)) {
124 |
125 | /* we have hit a reset point. We now emit a hash which is based
126 | * on all chacaters in the piece of the string between the last
127 | * reset point and this one
128 | */
129 | $this->left[$j] = $b64[$h2 % $this->LETTERS];
130 | if ($j < $this->SPAMSUM_LENGTH - 1) {
131 |
132 | /* we can have a problem with the tail overflowing. The easiest way
133 | * to cope with this is to only reset the second hash if we have
134 | * room for more characters in our signature. This has the effect of
135 | * combining the last few pieces of the message into a single piece
136 | */
137 | $h2 = self::HASH_INIT;
138 | $j++;
139 | }
140 | }
141 |
142 | /* this produces a second signature with a block size of block_size*2.
143 | * By producing dual signatures in this way the effect of small changes
144 | * in the string near a block size boundary is greatly reduced.
145 | */
146 | if ($h % ($this->BLOCKSIZE * 2) == (($this->BLOCKSIZE * 2) - 1)) {
147 | $this->right[$k] = $b64[$h3 % $this->LETTERS];
148 | if ($k < $this->SPAMSUM_LENGTH / 2 - 1) {
149 | $h3 = self::HASH_INIT;
150 | $k++;
151 | }
152 | }
153 | }
154 |
155 | /* If we have anything left then add it to the end. This ensures that the
156 | * last part of the string is always considered
157 | */
158 | if ($h != 0) {
159 | $this->left[$j] = $b64[$h2 % $this->LETTERS];
160 | $this->right[$k] = $b64[$h3 % $this->LETTERS];
161 | }
162 |
163 | /* Our blocksize guess may have been way off - repeat if necessary
164 | */
165 | if ($this->auto_blocksize
166 | && $this->BLOCKSIZE > $this->MIN_BLOCKSIZE
167 | && $j < $this->SPAMSUM_LENGTH / 2) {
168 |
169 | $this->BLOCKSIZE = $this->BLOCKSIZE / 2;
170 | goto again;
171 | }
172 |
173 | return $this;
174 | }
175 |
176 | public function __toString() {
177 | return
178 | $this->BLOCKSIZE . ":" . $this->Left() . ":" . $this->Right();
179 | }
180 |
181 | public function BlockSize() {
182 | return $this->BLOCKSIZE;
183 | }
184 |
185 | public function Left() {
186 | return implode("", $this->left);
187 | }
188 |
189 | public function Right() {
190 | return implode("", $this->right);
191 | }
192 |
193 | /* A simple non-rolling hash, based on the FNV hash
194 | */
195 | protected static function sum_hash($c, $h) {
196 | $h = ($h * self::HASH_PRIME) % pow(2, 32);
197 | $h = ($h ^ $c) % pow(2, 32);
198 | return $h;
199 | }
200 |
201 |
202 | /* A rolling hash, based on the Adler checksum. By using a rolling hash
203 | * we can perform auto resynchronisation after inserts/deletes internally,
204 | * h1 is the sum of the bytes in the window and h2 is the sum of the bytes
205 | * times the index h3 is a shift/xor based rolling hash, and is mostly
206 | * needed to ensure that we can cope with large blocksize values
207 | */
208 | const ROLLING_WINDOW = 7;
209 |
210 | protected $rolling_window = array();
211 | protected $rolling_h1;
212 | protected $rolling_h2;
213 | protected $rolling_h3;
214 | protected $rolling_n;
215 |
216 | protected function rolling_hash($c) {
217 | $this->rolling_h2 -= $this->rolling_h1;
218 | $this->rolling_h2 += self::ROLLING_WINDOW * $c;
219 |
220 | $this->rolling_h1 += $c;
221 | $this->rolling_h1 -= $this->rolling_window[$this->rolling_n % self::ROLLING_WINDOW];
222 |
223 | $this->rolling_window[$this->rolling_n % self::ROLLING_WINDOW] = $c;
224 | $this->rolling_n++;
225 |
226 | $this->rolling_h3 = ($this->rolling_h3 << 5) & 0xFFFFFFFF;
227 | $this->rolling_h3 ^= $c;
228 |
229 | return $this->rolling_h1 + $this->rolling_h2 + $this->rolling_h3;
230 | }
231 |
232 | protected function rolling_hash_reset() {
233 | for ($i = 0; $i < self::ROLLING_WINDOW; $i++) {
234 | $this->rolling_window[$i] = 0;
235 | }
236 |
237 | $this->rolling_h1 = 0;
238 | $this->rolling_h2 = 0;
239 | $this->rolling_h3 = 0;
240 | $this->rolling_n = 0;
241 |
242 | return 0;
243 | }
244 |
245 | }
--------------------------------------------------------------------------------
/src/webd/language/StringDistance.php:
--------------------------------------------------------------------------------
1 | $char) {
65 | $search = strpos($string2, $char, $i <= $allowedDistance ? 0 : min($i - $allowedDistance, $str2_len));
66 | if ($search !== false && $search <= $i + $allowedDistance + 1) {
67 | $commonCharacters .= $char;
68 | }
69 | }
70 | }
71 |
72 | return $commonCharacters;
73 | }
74 |
75 | protected static function getPrefixLength($string1, $string2, $MINPREFIXLENGTH = 4) {
76 |
77 | $n = min(array($MINPREFIXLENGTH, strlen($string1), strlen($string2)));
78 |
79 | for ($i = 0; $i < $n; $i++) {
80 | if ($string1[$i] != $string2[$i]) {
81 | // return index of first occurrence of different characters
82 | return $i;
83 | }
84 | }
85 |
86 | // first n characters are the same
87 | return $n;
88 | }
89 |
90 | /**
91 | * Returns the minimum number of single-character edits
92 | * (i.e. insertions, deletions or substitutions) required to change one
93 | * word into the other
94 | * @param type $string1
95 | * @param type $string2
96 | * @return type
97 | */
98 | public static function Levenshtein($string1, $string2) {
99 | return levenshtein($string1, $string2);
100 | }
101 |
102 | /**
103 | * Levenshtein($string1, $string2)
104 | * @param type $string1
105 | * @param type $string2
106 | * @return type
107 | */
108 | public static function EditDistance($string1, $string2) {
109 | return self::Levenshtein($string1, $string2);
110 | }
111 |
112 | }
113 |
114 |
--------------------------------------------------------------------------------
/tests/bootstrap.php:
--------------------------------------------------------------------------------
1 | object = new LCS("BACBAD", "BATBAD");
21 | }
22 |
23 | /**
24 | * Tears down the fixture, for example, closes a network connection.
25 | * This method is called after a test is executed.
26 | */
27 | protected function tearDown() {
28 |
29 | }
30 |
31 | /**
32 | * @covers webd\language\LCS::LCS
33 | * @todo Implement testLCS().
34 | */
35 | public function testValue() {
36 | $this->assertEquals($this->object->value(), "BABAD");
37 | }
38 |
39 | /**
40 | * @covers webd\language\LCS::length
41 | * @todo Implement testLength().
42 | */
43 | public function testLength() {
44 | $this->assertEquals($this->object->length(), 5);
45 | }
46 |
47 | public function testDistance() {
48 | $this->assertEquals($this->object->distance(), 2);
49 | }
50 |
51 | /**
52 | * @covers webd\language\LCS::__toString
53 | * @todo Implement test__toString().
54 | */
55 | public function test__toString() {
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/tests/src/webd/language/PorterStemmerTest.php:
--------------------------------------------------------------------------------
1 | object = new PorterStemmer;
21 | }
22 |
23 | /**
24 | * Tears down the fixture, for example, closes a network connection.
25 | * This method is called after a test is executed.
26 | */
27 | protected function tearDown() {
28 |
29 | }
30 |
31 | /**
32 | * @covers webd\language\PorterStemmer::Stem
33 | * @todo Implement testStem().
34 | */
35 | public function testStem() {
36 | $this->assertEquals(PorterStemmer::Stem("caresses"), "caress");
37 | }
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/tests/src/webd/language/SpamSumTest.php:
--------------------------------------------------------------------------------
1 | object = new SpamSum;
23 | }
24 |
25 | /**
26 | * Tears down the fixture, for example, closes a network connection.
27 | * This method is called after a test is executed.
28 | */
29 | protected function tearDown() {
30 |
31 | }
32 |
33 | /**
34 | * @covers webd\language\SpamSum::Hash
35 | * @todo Implement testHash().
36 | */
37 | public function testHashString() {
38 | $this->assertEquals(
39 | $this->object->HashString($this->str1),
40 | "6:MZEYWZDrpCGgFLLELGrX+TPdLgN98M6S8HROQ9Svb:M+hpTGgiNiM58LSj");
41 | }
42 |
43 | /**
44 | * @covers webd\language\SpamSum::SetHashLength
45 | * @todo Implement testSetHashLength().
46 | */
47 | public function testSetHashLength() {
48 | $s = new SpamSum;
49 | $s->SetHashLength(10);
50 | $s->HashString($this->str1);
51 | $this->assertEquals(
52 | "M0Gj58Lo",
53 | $s->Left());
54 |
55 | }
56 |
57 | /**
58 | * @covers webd\language\SpamSum::SetLetters
59 | * @todo Implement testSetLetters().
60 | */
61 | public function testSetLetters() {
62 | $s = new SpamSum;
63 | $s->SetLetters(8);
64 | $s->HashString($this->str1);
65 | $this->assertEquals(
66 | "EBEAGBDDBCGAFDDEDGDHGDHFDAFFEECCEHBGAFCHD",
67 | $s->Left());
68 | }
69 |
70 | public function testSetMinBlocksize() {
71 | $s = new SpamSum;
72 | $s->SetMinBlocksize(1);
73 | $s->HashString($this->str1);
74 | $this->assertEquals(
75 | "4:M1yuN7qZF30RqjKgBDlWdH0eKyXCBMqGUAiDmNA1XEGAnFNuoILPaFAAhNj:MLN7qZvjKgJU0VmC7GmSFL8PaFAAhh",
76 | $s->__toString());
77 | }
78 |
79 |
80 | /**
81 | * @covers webd\language\SpamSum::__toString
82 | * @todo Implement test__toString().
83 | */
84 | public function test__toString() {
85 |
86 | }
87 |
88 | /**
89 | * @covers webd\language\SpamSum::BlockSize
90 | * @todo Implement testBlockSize().
91 | */
92 | public function testBlockSize() {
93 |
94 | }
95 |
96 | /**
97 | * @covers webd\language\SpamSum::Left
98 | * @todo Implement testLeft().
99 | */
100 | public function testLeft() {
101 |
102 | }
103 |
104 | /**
105 | * @covers webd\language\SpamSum::Right
106 | * @todo Implement testRight().
107 | */
108 | public function testRight() {
109 |
110 | }
111 |
112 | }
113 |
--------------------------------------------------------------------------------
/tests/src/webd/language/StringDistanceTest.php:
--------------------------------------------------------------------------------
1 | object = new StringDistance;
21 | }
22 |
23 | /**
24 | * Tears down the fixture, for example, closes a network connection.
25 | * This method is called after a test is executed.
26 | */
27 | protected function tearDown() {
28 |
29 | }
30 |
31 | /**
32 | * @covers webd\language\Distance::Jaro
33 | * @todo Implement testJaro().
34 | */
35 | public function testJaro() {
36 | $this->assertEquals(0.944, StringDistance::Jaro("MARTHA", "MARHTA"), "", 0.001);
37 | }
38 |
39 | /**
40 | * @covers webd\language\Distance::JaroWinkler
41 | * @todo Implement testJaroWinkler().
42 | */
43 | public function testJaroWinkler() {
44 | $this->assertEquals(0.961, StringDistance::JaroWinkler("MARTHA", "MARHTA", 0.1), "", 0.001);
45 | }
46 |
47 | public function testLevenshtein() {
48 | $this->assertEquals(6, StringDistance::Levenshtein("bordure", "contexte"));
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------