├── .github └── workflows │ ├── cover.yml │ ├── lint.yml │ └── tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── go.mod ├── porterstemmer.go ├── porterstemmer_contains_vowel_test.go ├── porterstemmer_fixes_test.go ├── porterstemmer_fuzz_test.go ├── porterstemmer_has_repeat_double_consonant_suffix_test.go ├── porterstemmer_has_suffix_test.go ├── porterstemmer_is_consontant_test.go ├── porterstemmer_measure_test.go ├── porterstemmer_stem_string_test.go ├── porterstemmer_stem_without_lower_casing_test.go ├── porterstemmer_step1a_test.go ├── porterstemmer_step1b_test.go ├── porterstemmer_step1c_test.go ├── porterstemmer_step2_test.go ├── porterstemmer_step3_test.go ├── porterstemmer_step4_test.go ├── porterstemmer_step5a_test.go └── porterstemmer_step5b_test.go /.github/workflows/cover.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | name: Coverage 7 | jobs: 8 | coverage: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Install Go 12 | uses: actions/setup-go@v1 13 | with: 14 | go-version: '1.14.x' 15 | - name: Checkout code 16 | uses: actions/checkout@v2 17 | - name: Test 18 | run: | 19 | go test -coverprofile=profile.cov ./... 20 | - name: Send coverage 21 | uses: shogo82148/actions-goveralls@v1 22 | with: 23 | path-to-profile: profile.cov 24 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | name: Lint 7 | jobs: 8 | lint: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v2 13 | - name: Run golangci-lint 14 | uses: actions-contrib/golangci-lint@v1 15 | with: 16 | args: run -E gofmt -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | pull_request: 6 | name: Tests 7 | jobs: 8 | test: 9 | strategy: 10 | matrix: 11 | go-version: [1.13.x, 1.14.x] 12 | platform: [ubuntu-latest, macos-latest, windows-latest] 13 | runs-on: ${{ matrix.platform }} 14 | steps: 15 | - name: Install Go 16 | uses: actions/setup-go@v1 17 | with: 18 | go-version: ${{ matrix.go-version }} 19 | - name: Checkout code 20 | uses: actions/checkout@v2 21 | - name: Test 22 | run: | 23 | go version 24 | go test -race ./... 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #* 2 | *.sublime-* 3 | *~ 4 | .#* 5 | .project 6 | .settings 7 | .DS_Store 8 | /testdata 9 | **/.idea/ 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Charles Iliya Krempeaux :: http://changelog.ca/ 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This fork... 2 | 3 | I'm maintaining this fork because the original author was not replying to issues or pull requests. For now I plan on maintaining this fork as necessary. 4 | 5 | ## Status 6 | 7 | [![Tests](https://github.com/blevesearch/go-porterstemmer/workflows/Tests/badge.svg?branch=master&event=push)](https://github.com/blevesearch/go-porterstemmer/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster) [![Lint](https://github.com/blevesearch/go-porterstemmer/workflows/Lint/badge.svg?branch=master&event=push)](https://github.com/blevesearch/go-porterstemmer/actions?query=workflow%3ALint+event%3Apush+branch%3Amaster) [![Coverage Status](https://coveralls.io/repos/github/blevesearch/go-porterstemmer/badge.svg)](https://coveralls.io/github/blevesearch/go-porterstemmer) 8 | 9 | # Go Porter Stemmer 10 | 11 | A native Go clean room implementation of the Porter Stemming Algorithm. 12 | 13 | This algorithm is of interest to people doing Machine Learning or 14 | Natural Language Processing (NLP). 15 | 16 | This is NOT a port. This is a native Go implementation from the human-readable 17 | description of the algorithm. 18 | 19 | I've tried to make it (more) efficient by NOT internally using string's, but 20 | instead internally using []rune's and using the same (array) buffer used by 21 | the []rune slice (and sub-slices) at all steps of the algorithm. 22 | 23 | For Porter Stemmer algorithm, see: 24 | 25 | http://tartarus.org/martin/PorterStemmer/def.txt (URL #1) 26 | 27 | http://tartarus.org/martin/PorterStemmer/ (URL #2) 28 | 29 | # Departures 30 | 31 | Also, since when I initially implemented it, it failed the tests at... 32 | 33 | http://tartarus.org/martin/PorterStemmer/voc.txt (URL #3) 34 | 35 | http://tartarus.org/martin/PorterStemmer/output.txt (URL #4) 36 | 37 | ... after reading the human-readble text over and over again to try to figure out 38 | what the error I made was (and doing all sorts of things to debug it) I came to the 39 | conclusion that the some of these tests were wrong according to the human-readable 40 | description of the algorithm. 41 | 42 | This led me to wonder if maybe other people's code that was passing these tests had 43 | rules that were not in the human-readable description. Which led me to look at the source 44 | code here... 45 | 46 | http://tartarus.org/martin/PorterStemmer/c.txt (URL #5) 47 | 48 | ... When I looked there I noticed that there are some items marked as a "DEPARTURE", 49 | which differ from the original algorithm. (There are 2 of these.) 50 | 51 | I implemented these departures, and the tests at URL #3 and URL #4 all passed. 52 | 53 | ## Usage 54 | 55 | To use this Golang library, use with something like: 56 | 57 | package main 58 | 59 | import ( 60 | "fmt" 61 | "github.com/reiver/go-porterstemmer" 62 | ) 63 | 64 | func main() { 65 | 66 | word := "Waxes" 67 | 68 | stem := porterstemmer.StemString(word) 69 | 70 | fmt.Printf("The word [%s] has the stem [%s].\n", word, stem) 71 | } 72 | 73 | Alternatively, if you want to be a bit more efficient, use []rune slices instead, with code like: 74 | 75 | package main 76 | 77 | import ( 78 | "fmt" 79 | "github.com/reiver/go-porterstemmer" 80 | ) 81 | 82 | func main() { 83 | 84 | word := []rune("Waxes") 85 | 86 | stem := porterstemmer.Stem(word) 87 | 88 | fmt.Printf("The word [%s] has the stem [%s].\n", string(word), string(stem)) 89 | } 90 | 91 | Although NOTE that the above code may modify original slice (named "word" in the example) as a side 92 | effect, for efficiency reasons. And that the slice named "stem" in the example above may be a 93 | sub-slice of the slice named "word". 94 | 95 | Also alternatively, if you already know that your word is already lowercase (and you don't need 96 | this library to lowercase your word for you) you can instead use code like: 97 | 98 | package main 99 | 100 | import ( 101 | "fmt" 102 | "github.com/reiver/go-porterstemmer" 103 | ) 104 | 105 | func main() { 106 | 107 | word := []rune("waxes") 108 | 109 | stem := porterstemmer.StemWithoutLowerCasing(word) 110 | 111 | fmt.Printf("The word [%s] has the stem [%s].\n", string(word), string(stem)) 112 | } 113 | 114 | Again NOTE (like with the previous example) that the above code may modify original slice (named 115 | "word" in the example) as a side effect, for efficiency reasons. And that the slice named "stem" 116 | in the example above may be a sub-slice of the slice named "word". 117 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/blevesearch/go-porterstemmer 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /porterstemmer.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | // "log" 5 | "unicode" 6 | ) 7 | 8 | func isConsonant(s []rune, i int) bool { 9 | 10 | //DEBUG 11 | //log.Printf("isConsonant: [%+v]", string(s[i])) 12 | 13 | var result bool 14 | 15 | switch s[i] { 16 | case 'a', 'e', 'i', 'o', 'u': 17 | result = false 18 | case 'y': 19 | if 0 == i { 20 | result = true 21 | } else { 22 | result = !isConsonant(s, i-1) 23 | } 24 | default: 25 | result = true 26 | } 27 | 28 | return result 29 | } 30 | 31 | func measure(s []rune) uint { 32 | 33 | // Initialize. 34 | lenS := len(s) 35 | result := uint(0) 36 | i := 0 37 | 38 | // Short Circuit. 39 | if 0 == lenS { 40 | /////////// RETURN 41 | return result 42 | } 43 | 44 | // Ignore (potential) consonant sequence at the beginning of word. 45 | for isConsonant(s, i) { 46 | 47 | //DEBUG 48 | //log.Printf("[measure([%s])] Eat Consonant [%d] -> [%s]", string(s), i, string(s[i])) 49 | 50 | i++ 51 | if i >= lenS { 52 | /////////////// RETURN 53 | return result 54 | } 55 | } 56 | 57 | // For each pair of a vowel sequence followed by a consonant sequence, increment result. 58 | Outer: 59 | for i < lenS { 60 | 61 | for !isConsonant(s, i) { 62 | 63 | //DEBUG 64 | //log.Printf("[measure([%s])] VOWEL [%d] -> [%s]", string(s), i, string(s[i])) 65 | 66 | i++ 67 | if i >= lenS { 68 | /////////// BREAK 69 | break Outer 70 | } 71 | } 72 | for isConsonant(s, i) { 73 | 74 | //DEBUG 75 | //log.Printf("[measure([%s])] CONSONANT [%d] -> [%s]", string(s), i, string(s[i])) 76 | 77 | i++ 78 | if i >= lenS { 79 | result++ 80 | /////////// BREAK 81 | break Outer 82 | } 83 | } 84 | result++ 85 | } 86 | 87 | // Return 88 | return result 89 | } 90 | 91 | func hasSuffix(s, suffix []rune) bool { 92 | 93 | lenSMinusOne := len(s) - 1 94 | lenSuffixMinusOne := len(suffix) - 1 95 | 96 | if lenSMinusOne <= lenSuffixMinusOne { 97 | return false 98 | } else if s[lenSMinusOne] != suffix[lenSuffixMinusOne] { // I suspect checking this first should speed this function up in practice. 99 | /////// RETURN 100 | return false 101 | } else { 102 | 103 | for i := 0; i < lenSuffixMinusOne; i++ { 104 | 105 | if suffix[i] != s[lenSMinusOne-lenSuffixMinusOne+i] { 106 | /////////////// RETURN 107 | return false 108 | } 109 | 110 | } 111 | 112 | } 113 | 114 | return true 115 | } 116 | 117 | func containsVowel(s []rune) bool { 118 | 119 | lenS := len(s) 120 | 121 | for i := 0; i < lenS; i++ { 122 | 123 | if !isConsonant(s, i) { 124 | /////////// RETURN 125 | return true 126 | } 127 | 128 | } 129 | 130 | return false 131 | } 132 | 133 | func hasRepeatDoubleConsonantSuffix(s []rune) bool { 134 | 135 | // Initialize. 136 | lenS := len(s) 137 | 138 | result := false 139 | 140 | // Do it! 141 | if 2 > lenS { 142 | result = false 143 | } else if s[lenS-1] == s[lenS-2] && isConsonant(s, lenS-1) { // Will using isConsonant() cause a problem with "YY"? 144 | result = true 145 | } else { 146 | result = false 147 | } 148 | 149 | // Return, 150 | return result 151 | } 152 | 153 | func hasConsonantVowelConsonantSuffix(s []rune) bool { 154 | 155 | // Initialize. 156 | lenS := len(s) 157 | 158 | result := false 159 | 160 | // Do it! 161 | if 3 > lenS { 162 | result = false 163 | } else if isConsonant(s, lenS-3) && !isConsonant(s, lenS-2) && isConsonant(s, lenS-1) { 164 | result = true 165 | } else { 166 | result = false 167 | } 168 | 169 | // Return 170 | return result 171 | } 172 | 173 | func step1a(s []rune) []rune { 174 | 175 | // Initialize. 176 | var result []rune = s 177 | 178 | lenS := len(s) 179 | 180 | // Do it! 181 | if suffix := []rune("sses"); hasSuffix(s, suffix) { 182 | 183 | lenTrim := 2 184 | 185 | subSlice := s[:lenS-lenTrim] 186 | 187 | result = subSlice 188 | } else if suffix := []rune("ies"); hasSuffix(s, suffix) { 189 | lenTrim := 2 190 | 191 | subSlice := s[:lenS-lenTrim] 192 | 193 | result = subSlice 194 | } else if suffix := []rune("ss"); hasSuffix(s, suffix) { 195 | 196 | result = s 197 | } else if suffix := []rune("s"); hasSuffix(s, suffix) { 198 | 199 | lenSuffix := 1 200 | 201 | subSlice := s[:lenS-lenSuffix] 202 | 203 | result = subSlice 204 | } 205 | 206 | // Return. 207 | return result 208 | } 209 | 210 | func step1b(s []rune) []rune { 211 | 212 | // Initialize. 213 | var result []rune = s 214 | 215 | lenS := len(s) 216 | 217 | // Do it! 218 | if suffix := []rune("eed"); hasSuffix(s, suffix) { 219 | lenSuffix := len(suffix) 220 | 221 | subSlice := s[:lenS-lenSuffix] 222 | 223 | m := measure(subSlice) 224 | 225 | if 0 < m { 226 | lenTrim := 1 227 | 228 | result = s[:lenS-lenTrim] 229 | } 230 | } else if suffix := []rune("ed"); hasSuffix(s, suffix) { 231 | lenSuffix := len(suffix) 232 | 233 | subSlice := s[:lenS-lenSuffix] 234 | 235 | if containsVowel(subSlice) { 236 | 237 | if suffix2 := []rune("at"); hasSuffix(subSlice, suffix2) { 238 | lenTrim := -1 239 | 240 | result = s[:lenS-lenSuffix-lenTrim] 241 | } else if suffix2 := []rune("bl"); hasSuffix(subSlice, suffix2) { 242 | lenTrim := -1 243 | 244 | result = s[:lenS-lenSuffix-lenTrim] 245 | } else if suffix2 := []rune("iz"); hasSuffix(subSlice, suffix2) { 246 | lenTrim := -1 247 | 248 | result = s[:lenS-lenSuffix-lenTrim] 249 | } else if c := subSlice[len(subSlice)-1]; 'l' != c && 's' != c && 'z' != c && hasRepeatDoubleConsonantSuffix(subSlice) { 250 | lenTrim := 1 251 | 252 | lenSubSlice := len(subSlice) 253 | 254 | result = subSlice[:lenSubSlice-lenTrim] 255 | } else if c := subSlice[len(subSlice)-1]; 1 == measure(subSlice) && hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c { 256 | lenTrim := -1 257 | 258 | result = s[:lenS-lenSuffix-lenTrim] 259 | 260 | result[len(result)-1] = 'e' 261 | } else { 262 | result = subSlice 263 | } 264 | 265 | } 266 | } else if suffix := []rune("ing"); hasSuffix(s, suffix) { 267 | lenSuffix := len(suffix) 268 | 269 | subSlice := s[:lenS-lenSuffix] 270 | 271 | if containsVowel(subSlice) { 272 | 273 | if suffix2 := []rune("at"); hasSuffix(subSlice, suffix2) { 274 | lenTrim := -1 275 | 276 | result = s[:lenS-lenSuffix-lenTrim] 277 | 278 | result[len(result)-1] = 'e' 279 | } else if suffix2 := []rune("bl"); hasSuffix(subSlice, suffix2) { 280 | lenTrim := -1 281 | 282 | result = s[:lenS-lenSuffix-lenTrim] 283 | 284 | result[len(result)-1] = 'e' 285 | } else if suffix2 := []rune("iz"); hasSuffix(subSlice, suffix2) { 286 | lenTrim := -1 287 | 288 | result = s[:lenS-lenSuffix-lenTrim] 289 | 290 | result[len(result)-1] = 'e' 291 | } else if c := subSlice[len(subSlice)-1]; 'l' != c && 's' != c && 'z' != c && hasRepeatDoubleConsonantSuffix(subSlice) { 292 | lenTrim := 1 293 | 294 | lenSubSlice := len(subSlice) 295 | 296 | result = subSlice[:lenSubSlice-lenTrim] 297 | } else if c := subSlice[len(subSlice)-1]; 1 == measure(subSlice) && hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c { 298 | lenTrim := -1 299 | 300 | result = s[:lenS-lenSuffix-lenTrim] 301 | 302 | result[len(result)-1] = 'e' 303 | } else { 304 | result = subSlice 305 | } 306 | 307 | } 308 | } 309 | 310 | // Return. 311 | return result 312 | } 313 | 314 | func step1c(s []rune) []rune { 315 | 316 | // Initialize. 317 | lenS := len(s) 318 | 319 | result := s 320 | 321 | // Do it! 322 | if 2 > lenS { 323 | /////////// RETURN 324 | return result 325 | } 326 | 327 | if 'y' == s[lenS-1] && containsVowel(s[:lenS-1]) { 328 | 329 | result[lenS-1] = 'i' 330 | 331 | } else if 'Y' == s[lenS-1] && containsVowel(s[:lenS-1]) { 332 | 333 | result[lenS-1] = 'I' 334 | 335 | } 336 | 337 | // Return. 338 | return result 339 | } 340 | 341 | func step2(s []rune) []rune { 342 | 343 | // Initialize. 344 | lenS := len(s) 345 | 346 | result := s 347 | 348 | // Do it! 349 | if suffix := []rune("ational"); hasSuffix(s, suffix) { 350 | if 0 < measure(s[:lenS-len(suffix)]) { 351 | result[lenS-5] = 'e' 352 | result = result[:lenS-4] 353 | } 354 | } else if suffix := []rune("tional"); hasSuffix(s, suffix) { 355 | if 0 < measure(s[:lenS-len(suffix)]) { 356 | result = result[:lenS-2] 357 | } 358 | } else if suffix := []rune("enci"); hasSuffix(s, suffix) { 359 | if 0 < measure(s[:lenS-len(suffix)]) { 360 | result[lenS-1] = 'e' 361 | } 362 | } else if suffix := []rune("anci"); hasSuffix(s, suffix) { 363 | if 0 < measure(s[:lenS-len(suffix)]) { 364 | result[lenS-1] = 'e' 365 | } 366 | } else if suffix := []rune("izer"); hasSuffix(s, suffix) { 367 | if 0 < measure(s[:lenS-len(suffix)]) { 368 | result = s[:lenS-1] 369 | } 370 | } else if suffix := []rune("bli"); hasSuffix(s, suffix) { // --DEPARTURE-- 371 | // } else if suffix := []rune("abli") ; hasSuffix(s, suffix) { 372 | if 0 < measure(s[:lenS-len(suffix)]) { 373 | result[lenS-1] = 'e' 374 | } 375 | } else if suffix := []rune("alli"); hasSuffix(s, suffix) { 376 | if 0 < measure(s[:lenS-len(suffix)]) { 377 | result = s[:lenS-2] 378 | } 379 | } else if suffix := []rune("entli"); hasSuffix(s, suffix) { 380 | if 0 < measure(s[:lenS-len(suffix)]) { 381 | result = s[:lenS-2] 382 | } 383 | } else if suffix := []rune("eli"); hasSuffix(s, suffix) { 384 | if 0 < measure(s[:lenS-len(suffix)]) { 385 | result = s[:lenS-2] 386 | } 387 | } else if suffix := []rune("ousli"); hasSuffix(s, suffix) { 388 | if 0 < measure(s[:lenS-len(suffix)]) { 389 | result = s[:lenS-2] 390 | } 391 | } else if suffix := []rune("ization"); hasSuffix(s, suffix) { 392 | if 0 < measure(s[:lenS-len(suffix)]) { 393 | result[lenS-5] = 'e' 394 | 395 | result = s[:lenS-4] 396 | } 397 | } else if suffix := []rune("ation"); hasSuffix(s, suffix) { 398 | if 0 < measure(s[:lenS-len(suffix)]) { 399 | result[lenS-3] = 'e' 400 | 401 | result = s[:lenS-2] 402 | } 403 | } else if suffix := []rune("ator"); hasSuffix(s, suffix) { 404 | if 0 < measure(s[:lenS-len(suffix)]) { 405 | result[lenS-2] = 'e' 406 | 407 | result = s[:lenS-1] 408 | } 409 | } else if suffix := []rune("alism"); hasSuffix(s, suffix) { 410 | if 0 < measure(s[:lenS-len(suffix)]) { 411 | result = s[:lenS-3] 412 | } 413 | } else if suffix := []rune("iveness"); hasSuffix(s, suffix) { 414 | if 0 < measure(s[:lenS-len(suffix)]) { 415 | result = s[:lenS-4] 416 | } 417 | } else if suffix := []rune("fulness"); hasSuffix(s, suffix) { 418 | if 0 < measure(s[:lenS-len(suffix)]) { 419 | result = s[:lenS-4] 420 | } 421 | } else if suffix := []rune("ousness"); hasSuffix(s, suffix) { 422 | if 0 < measure(s[:lenS-len(suffix)]) { 423 | result = s[:lenS-4] 424 | } 425 | } else if suffix := []rune("aliti"); hasSuffix(s, suffix) { 426 | if 0 < measure(s[:lenS-len(suffix)]) { 427 | result = s[:lenS-3] 428 | } 429 | } else if suffix := []rune("iviti"); hasSuffix(s, suffix) { 430 | if 0 < measure(s[:lenS-len(suffix)]) { 431 | result[lenS-3] = 'e' 432 | 433 | result = result[:lenS-2] 434 | } 435 | } else if suffix := []rune("biliti"); hasSuffix(s, suffix) { 436 | if 0 < measure(s[:lenS-len(suffix)]) { 437 | result[lenS-5] = 'l' 438 | result[lenS-4] = 'e' 439 | 440 | result = result[:lenS-3] 441 | } 442 | } else if suffix := []rune("logi"); hasSuffix(s, suffix) { // --DEPARTURE-- 443 | if 0 < measure(s[:lenS-len(suffix)]) { 444 | lenTrim := 1 445 | 446 | result = s[:lenS-lenTrim] 447 | } 448 | } 449 | 450 | // Return. 451 | return result 452 | } 453 | 454 | func step3(s []rune) []rune { 455 | 456 | // Initialize. 457 | lenS := len(s) 458 | result := s 459 | 460 | // Do it! 461 | if suffix := []rune("icate"); hasSuffix(s, suffix) { 462 | lenSuffix := len(suffix) 463 | 464 | if 0 < measure(s[:lenS-lenSuffix]) { 465 | result = result[:lenS-3] 466 | } 467 | } else if suffix := []rune("ative"); hasSuffix(s, suffix) { 468 | lenSuffix := len(suffix) 469 | 470 | subSlice := s[:lenS-lenSuffix] 471 | 472 | m := measure(subSlice) 473 | 474 | if 0 < m { 475 | result = subSlice 476 | } 477 | } else if suffix := []rune("alize"); hasSuffix(s, suffix) { 478 | lenSuffix := len(suffix) 479 | 480 | if 0 < measure(s[:lenS-lenSuffix]) { 481 | result = result[:lenS-3] 482 | } 483 | } else if suffix := []rune("iciti"); hasSuffix(s, suffix) { 484 | lenSuffix := len(suffix) 485 | 486 | if 0 < measure(s[:lenS-lenSuffix]) { 487 | result = result[:lenS-3] 488 | } 489 | } else if suffix := []rune("ical"); hasSuffix(s, suffix) { 490 | lenSuffix := len(suffix) 491 | 492 | if 0 < measure(s[:lenS-lenSuffix]) { 493 | result = result[:lenS-2] 494 | } 495 | } else if suffix := []rune("ful"); hasSuffix(s, suffix) { 496 | lenSuffix := len(suffix) 497 | 498 | subSlice := s[:lenS-lenSuffix] 499 | 500 | m := measure(subSlice) 501 | 502 | if 0 < m { 503 | result = subSlice 504 | } 505 | } else if suffix := []rune("ness"); hasSuffix(s, suffix) { 506 | lenSuffix := len(suffix) 507 | 508 | subSlice := s[:lenS-lenSuffix] 509 | 510 | m := measure(subSlice) 511 | 512 | if 0 < m { 513 | result = subSlice 514 | } 515 | } 516 | 517 | // Return. 518 | return result 519 | } 520 | 521 | func step4(s []rune) []rune { 522 | 523 | // Initialize. 524 | lenS := len(s) 525 | result := s 526 | 527 | // Do it! 528 | if suffix := []rune("al"); hasSuffix(s, suffix) { 529 | lenSuffix := len(suffix) 530 | 531 | subSlice := s[:lenS-lenSuffix] 532 | 533 | m := measure(subSlice) 534 | 535 | if 1 < m { 536 | result = result[:lenS-lenSuffix] 537 | } 538 | } else if suffix := []rune("ance"); hasSuffix(s, suffix) { 539 | lenSuffix := len(suffix) 540 | 541 | subSlice := s[:lenS-lenSuffix] 542 | 543 | m := measure(subSlice) 544 | 545 | if 1 < m { 546 | result = result[:lenS-lenSuffix] 547 | } 548 | } else if suffix := []rune("ence"); hasSuffix(s, suffix) { 549 | lenSuffix := len(suffix) 550 | 551 | subSlice := s[:lenS-lenSuffix] 552 | 553 | m := measure(subSlice) 554 | 555 | if 1 < m { 556 | result = result[:lenS-lenSuffix] 557 | } 558 | } else if suffix := []rune("er"); hasSuffix(s, suffix) { 559 | lenSuffix := len(suffix) 560 | 561 | subSlice := s[:lenS-lenSuffix] 562 | 563 | m := measure(subSlice) 564 | 565 | if 1 < m { 566 | result = subSlice 567 | } 568 | } else if suffix := []rune("ic"); hasSuffix(s, suffix) { 569 | lenSuffix := len(suffix) 570 | 571 | subSlice := s[:lenS-lenSuffix] 572 | 573 | m := measure(subSlice) 574 | 575 | if 1 < m { 576 | result = subSlice 577 | } 578 | } else if suffix := []rune("able"); hasSuffix(s, suffix) { 579 | lenSuffix := len(suffix) 580 | 581 | subSlice := s[:lenS-lenSuffix] 582 | 583 | m := measure(subSlice) 584 | 585 | if 1 < m { 586 | result = subSlice 587 | } 588 | } else if suffix := []rune("ible"); hasSuffix(s, suffix) { 589 | lenSuffix := len(suffix) 590 | 591 | subSlice := s[:lenS-lenSuffix] 592 | 593 | m := measure(subSlice) 594 | 595 | if 1 < m { 596 | result = subSlice 597 | } 598 | } else if suffix := []rune("ant"); hasSuffix(s, suffix) { 599 | lenSuffix := len(suffix) 600 | 601 | subSlice := s[:lenS-lenSuffix] 602 | 603 | m := measure(subSlice) 604 | 605 | if 1 < m { 606 | result = subSlice 607 | } 608 | } else if suffix := []rune("ement"); hasSuffix(s, suffix) { 609 | lenSuffix := len(suffix) 610 | 611 | subSlice := s[:lenS-lenSuffix] 612 | 613 | m := measure(subSlice) 614 | 615 | if 1 < m { 616 | result = subSlice 617 | } 618 | } else if suffix := []rune("ment"); hasSuffix(s, suffix) { 619 | lenSuffix := len(suffix) 620 | 621 | subSlice := s[:lenS-lenSuffix] 622 | 623 | m := measure(subSlice) 624 | 625 | if 1 < m { 626 | result = subSlice 627 | } 628 | } else if suffix := []rune("ent"); hasSuffix(s, suffix) { 629 | lenSuffix := len(suffix) 630 | 631 | subSlice := s[:lenS-lenSuffix] 632 | 633 | m := measure(subSlice) 634 | 635 | if 1 < m { 636 | result = subSlice 637 | } 638 | } else if suffix := []rune("ion"); hasSuffix(s, suffix) { 639 | lenSuffix := len(suffix) 640 | 641 | subSlice := s[:lenS-lenSuffix] 642 | 643 | m := measure(subSlice) 644 | 645 | c := subSlice[len(subSlice)-1] 646 | 647 | if 1 < m && ('s' == c || 't' == c) { 648 | result = subSlice 649 | } 650 | } else if suffix := []rune("ou"); hasSuffix(s, suffix) { 651 | lenSuffix := len(suffix) 652 | 653 | subSlice := s[:lenS-lenSuffix] 654 | 655 | m := measure(subSlice) 656 | 657 | if 1 < m { 658 | result = subSlice 659 | } 660 | } else if suffix := []rune("ism"); hasSuffix(s, suffix) { 661 | lenSuffix := len(suffix) 662 | 663 | subSlice := s[:lenS-lenSuffix] 664 | 665 | m := measure(subSlice) 666 | 667 | if 1 < m { 668 | result = subSlice 669 | } 670 | } else if suffix := []rune("ate"); hasSuffix(s, suffix) { 671 | lenSuffix := len(suffix) 672 | 673 | subSlice := s[:lenS-lenSuffix] 674 | 675 | m := measure(subSlice) 676 | 677 | if 1 < m { 678 | result = subSlice 679 | } 680 | } else if suffix := []rune("iti"); hasSuffix(s, suffix) { 681 | lenSuffix := len(suffix) 682 | 683 | subSlice := s[:lenS-lenSuffix] 684 | 685 | m := measure(subSlice) 686 | 687 | if 1 < m { 688 | result = subSlice 689 | } 690 | } else if suffix := []rune("ous"); hasSuffix(s, suffix) { 691 | lenSuffix := len(suffix) 692 | 693 | subSlice := s[:lenS-lenSuffix] 694 | 695 | m := measure(subSlice) 696 | 697 | if 1 < m { 698 | result = subSlice 699 | } 700 | } else if suffix := []rune("ive"); hasSuffix(s, suffix) { 701 | lenSuffix := len(suffix) 702 | 703 | subSlice := s[:lenS-lenSuffix] 704 | 705 | m := measure(subSlice) 706 | 707 | if 1 < m { 708 | result = subSlice 709 | } 710 | } else if suffix := []rune("ize"); hasSuffix(s, suffix) { 711 | lenSuffix := len(suffix) 712 | 713 | subSlice := s[:lenS-lenSuffix] 714 | 715 | m := measure(subSlice) 716 | 717 | if 1 < m { 718 | result = subSlice 719 | } 720 | } 721 | 722 | // Return. 723 | return result 724 | } 725 | 726 | func step5a(s []rune) []rune { 727 | 728 | // Initialize. 729 | lenS := len(s) 730 | result := s 731 | 732 | // Do it! 733 | if 'e' == s[lenS-1] { 734 | lenSuffix := 1 735 | 736 | subSlice := s[:lenS-lenSuffix] 737 | 738 | m := measure(subSlice) 739 | 740 | if 1 < m { 741 | result = subSlice 742 | } else if 1 == m { 743 | if c := subSlice[len(subSlice)-1]; !(hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c) { 744 | result = subSlice 745 | } 746 | } 747 | } 748 | 749 | // Return. 750 | return result 751 | } 752 | 753 | func step5b(s []rune) []rune { 754 | 755 | // Initialize. 756 | lenS := len(s) 757 | result := s 758 | 759 | // Do it! 760 | if 2 < lenS && 'l' == s[lenS-2] && 'l' == s[lenS-1] { 761 | 762 | lenSuffix := 1 763 | 764 | subSlice := s[:lenS-lenSuffix] 765 | 766 | m := measure(subSlice) 767 | 768 | if 1 < m { 769 | result = subSlice 770 | } 771 | } 772 | 773 | // Return. 774 | return result 775 | } 776 | 777 | func StemString(s string) string { 778 | 779 | // Convert string to []rune 780 | runeArr := []rune(s) 781 | 782 | // Stem. 783 | runeArr = Stem(runeArr) 784 | 785 | // Convert []rune to string 786 | str := string(runeArr) 787 | 788 | // Return. 789 | return str 790 | } 791 | 792 | func Stem(s []rune) []rune { 793 | 794 | // Initialize. 795 | lenS := len(s) 796 | 797 | // Short circuit. 798 | if 0 == lenS { 799 | /////////// RETURN 800 | return s 801 | } 802 | 803 | // Make all runes lowercase. 804 | for i := 0; i < lenS; i++ { 805 | s[i] = unicode.ToLower(s[i]) 806 | } 807 | 808 | // Stem 809 | result := StemWithoutLowerCasing(s) 810 | 811 | // Return. 812 | return result 813 | } 814 | 815 | func StemWithoutLowerCasing(s []rune) []rune { 816 | 817 | // Initialize. 818 | lenS := len(s) 819 | 820 | // Words that are of length 2 or less is already stemmed. 821 | // Don't do anything. 822 | if 2 >= lenS { 823 | /////////// RETURN 824 | return s 825 | } 826 | 827 | // Stem 828 | s = step1a(s) 829 | s = step1b(s) 830 | s = step1c(s) 831 | s = step2(s) 832 | s = step3(s) 833 | s = step4(s) 834 | s = step5a(s) 835 | s = step5b(s) 836 | 837 | // Return. 838 | return s 839 | } 840 | -------------------------------------------------------------------------------- /porterstemmer_contains_vowel_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestContainsVowel(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected bool 14 | }, 15) 15 | 16 | tests[i].S = []rune("apple") 17 | tests[i].Expected = true 18 | i++ 19 | 20 | tests[i].S = []rune("f") 21 | tests[i].Expected = false 22 | i++ 23 | 24 | tests[i].S = []rune("a") 25 | tests[i].Expected = true 26 | i++ 27 | 28 | tests[i].S = []rune("e") 29 | tests[i].Expected = true 30 | i++ 31 | 32 | tests[i].S = []rune("i") 33 | tests[i].Expected = true 34 | i++ 35 | 36 | tests[i].S = []rune("o") 37 | tests[i].Expected = true 38 | i++ 39 | 40 | tests[i].S = []rune("u") 41 | tests[i].Expected = true 42 | i++ 43 | 44 | tests[i].S = []rune("y") 45 | tests[i].Expected = false 46 | i++ 47 | 48 | tests[i].S = []rune("cy") 49 | tests[i].Expected = true 50 | 51 | for _, datum := range tests { 52 | if actual := containsVowel(datum.S); actual != datum.Expected { 53 | t.Errorf("Did NOT get what was expected for calling containsVowel() on [%s]. Expect [%t] but got [%t]", string(datum.S), datum.Expected, actual) 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /porterstemmer_fixes_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | // Test for issue listed here: 8 | // https://github.com/reiver/go-porterstemmer/issues/1 9 | // 10 | // StemString("ion") was causing runtime exception 11 | func TestStemStringIon(t *testing.T) { 12 | 13 | expected := "ion" 14 | 15 | s := "ion" 16 | actual := StemString(s) 17 | if expected != actual { 18 | t.Errorf("Input: [%s] -> Actual: [%s]. Expected: [%s]", s, actual, expected) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /porterstemmer_fuzz_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | const maxFuzzLen = 6 9 | 10 | // Test inputs of English characters less than maxFuzzLen 11 | // Added to help diagnose https://github.com/reiver/go-porterstemmer/issues/4 12 | func TestStemFuzz(t *testing.T) { 13 | 14 | input := []byte{'a'} 15 | for len(input) < maxFuzzLen { 16 | // test input 17 | 18 | panicked := false 19 | func() { 20 | defer func() { panicked = recover() != nil }() 21 | StemString(string(input)) 22 | }() 23 | if panicked { 24 | t.Errorf("StemString panicked for input '%s'", input) 25 | } 26 | 27 | // if all z's extend 28 | if allZs(input) { 29 | input = bytes.Repeat([]byte{'a'}, len(input)+1) 30 | } else { 31 | // increment 32 | input = incrementBytes(input) 33 | } 34 | } 35 | } 36 | 37 | func incrementBytes(in []byte) []byte { 38 | rv := make([]byte, len(in)) 39 | copy(rv, in) 40 | for i := len(rv) - 1; i >= 0; i-- { 41 | if rv[i]+1 == '{' { 42 | rv[i] = 'a' 43 | continue 44 | } 45 | rv[i] = rv[i] + 1 46 | break 47 | 48 | } 49 | return rv 50 | } 51 | 52 | func allZs(in []byte) bool { 53 | for _, b := range in { 54 | if b != 'z' { 55 | return false 56 | } 57 | } 58 | return true 59 | } 60 | -------------------------------------------------------------------------------- /porterstemmer_has_repeat_double_consonant_suffix_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestHasDoubleConsonantSuffix(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected bool 14 | }, 12) 15 | 16 | tests[i].S = []rune("apple") 17 | tests[i].Expected = false 18 | i++ 19 | 20 | tests[i].S = []rune("hiss") 21 | tests[i].Expected = true 22 | i++ 23 | 24 | tests[i].S = []rune("fizz") 25 | tests[i].Expected = true 26 | i++ 27 | 28 | tests[i].S = []rune("fill") 29 | tests[i].Expected = true 30 | i++ 31 | 32 | tests[i].S = []rune("ahaa") 33 | tests[i].Expected = false 34 | 35 | for _, datum := range tests { 36 | 37 | if actual := hasRepeatDoubleConsonantSuffix(datum.S); actual != datum.Expected { 38 | t.Errorf("Did NOT get what was expected for calling hasDoubleConsonantSuffix() on [%s]. Expect [%t] but got [%t]", string(datum.S), datum.Expected, actual) 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /porterstemmer_has_suffix_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestHasSuffix(t *testing.T) { 8 | 9 | tests := make([]struct { 10 | S []rune 11 | Suffix []rune 12 | Expected bool 13 | }, 82) 14 | 15 | i := 0 16 | 17 | tests[i].S = []rune("ran") 18 | tests[i].Suffix = []rune("er") 19 | tests[i].Expected = false 20 | i++ 21 | 22 | tests[i].S = []rune("runner") 23 | tests[i].Suffix = []rune("er") 24 | tests[i].Expected = true 25 | i++ 26 | 27 | tests[i].S = []rune("runnar") 28 | tests[i].Suffix = []rune("er") 29 | tests[i].Expected = false 30 | i++ 31 | 32 | tests[i].S = []rune("runned") 33 | tests[i].Suffix = []rune("er") 34 | tests[i].Expected = false 35 | i++ 36 | 37 | tests[i].S = []rune("runnre") 38 | tests[i].Suffix = []rune("er") 39 | tests[i].Expected = false 40 | i++ 41 | 42 | // FIXME marty changed Expected 43 | // to false here because it seems 44 | // the contract does not support 45 | // suffix of same length as input 46 | // as this test implied 47 | tests[i].S = []rune("er") 48 | tests[i].Suffix = []rune("er") 49 | tests[i].Expected = false 50 | i++ 51 | 52 | tests[i].S = []rune("re") 53 | tests[i].Suffix = []rune("er") 54 | tests[i].Expected = false 55 | i++ 56 | 57 | tests[i].S = []rune("ran") 58 | tests[i].Suffix = []rune("ER") 59 | tests[i].Expected = false 60 | i++ 61 | 62 | tests[i].S = []rune("runner") 63 | tests[i].Suffix = []rune("ER") 64 | tests[i].Expected = false 65 | i++ 66 | 67 | tests[i].S = []rune("runnar") 68 | tests[i].Suffix = []rune("ER") 69 | tests[i].Expected = false 70 | i++ 71 | 72 | tests[i].S = []rune("runned") 73 | tests[i].Suffix = []rune("ER") 74 | tests[i].Expected = false 75 | i++ 76 | 77 | tests[i].S = []rune("runnre") 78 | tests[i].Suffix = []rune("ER") 79 | tests[i].Expected = false 80 | i++ 81 | 82 | tests[i].S = []rune("er") 83 | tests[i].Suffix = []rune("ER") 84 | tests[i].Expected = false 85 | i++ 86 | 87 | tests[i].S = []rune("re") 88 | tests[i].Suffix = []rune("ER") 89 | tests[i].Expected = false 90 | i++ 91 | 92 | tests[i].S = []rune("") 93 | tests[i].Suffix = []rune("er") 94 | tests[i].Expected = false 95 | i++ 96 | 97 | tests[i].S = []rune("e") 98 | tests[i].Suffix = []rune("er") 99 | tests[i].Expected = false 100 | i++ 101 | 102 | tests[i].S = []rune("caresses") 103 | tests[i].Suffix = []rune("sses") 104 | tests[i].Expected = true 105 | i++ 106 | 107 | tests[i].S = []rune("ponies") 108 | tests[i].Suffix = []rune("ies") 109 | tests[i].Expected = true 110 | i++ 111 | 112 | tests[i].S = []rune("caress") 113 | tests[i].Suffix = []rune("ss") 114 | tests[i].Expected = true 115 | i++ 116 | 117 | tests[i].S = []rune("cats") 118 | tests[i].Suffix = []rune("s") 119 | tests[i].Expected = true 120 | i++ 121 | 122 | tests[i].S = []rune("feed") 123 | tests[i].Suffix = []rune("eed") 124 | tests[i].Expected = true 125 | i++ 126 | 127 | tests[i].S = []rune("agreed") 128 | tests[i].Suffix = []rune("eed") 129 | tests[i].Expected = true 130 | i++ 131 | 132 | tests[i].S = []rune("plastered") 133 | tests[i].Suffix = []rune("ed") 134 | tests[i].Expected = true 135 | i++ 136 | 137 | tests[i].S = []rune("bled") 138 | tests[i].Suffix = []rune("ed") 139 | tests[i].Expected = true 140 | i++ 141 | 142 | tests[i].S = []rune("motoring") 143 | tests[i].Suffix = []rune("ing") 144 | tests[i].Expected = true 145 | i++ 146 | 147 | tests[i].S = []rune("sing") 148 | tests[i].Suffix = []rune("ing") 149 | tests[i].Expected = true 150 | i++ 151 | 152 | tests[i].S = []rune("conflat") 153 | tests[i].Suffix = []rune("at") 154 | tests[i].Expected = true 155 | i++ 156 | 157 | tests[i].S = []rune("troubl") 158 | tests[i].Suffix = []rune("bl") 159 | tests[i].Expected = true 160 | i++ 161 | 162 | tests[i].S = []rune("siz") 163 | tests[i].Suffix = []rune("iz") 164 | tests[i].Expected = true 165 | i++ 166 | 167 | tests[i].S = []rune("happy") 168 | tests[i].Suffix = []rune("y") 169 | tests[i].Expected = true 170 | i++ 171 | 172 | tests[i].S = []rune("sky") 173 | tests[i].Suffix = []rune("y") 174 | tests[i].Expected = true 175 | i++ 176 | 177 | tests[i].S = []rune("relational") 178 | tests[i].Suffix = []rune("ational") 179 | tests[i].Expected = true 180 | i++ 181 | 182 | tests[i].S = []rune("conditional") 183 | tests[i].Suffix = []rune("tional") 184 | tests[i].Expected = true 185 | i++ 186 | 187 | tests[i].S = []rune("rational") 188 | tests[i].Suffix = []rune("tional") 189 | tests[i].Expected = true 190 | i++ 191 | 192 | tests[i].S = []rune("valenci") 193 | tests[i].Suffix = []rune("enci") 194 | tests[i].Expected = true 195 | i++ 196 | 197 | tests[i].S = []rune("hesitanci") 198 | tests[i].Suffix = []rune("anci") 199 | tests[i].Expected = true 200 | i++ 201 | 202 | tests[i].S = []rune("digitizer") 203 | tests[i].Suffix = []rune("izer") 204 | tests[i].Expected = true 205 | i++ 206 | 207 | tests[i].S = []rune("conformabli") 208 | tests[i].Suffix = []rune("abli") 209 | tests[i].Expected = true 210 | i++ 211 | 212 | tests[i].S = []rune("radicalli") 213 | tests[i].Suffix = []rune("alli") 214 | tests[i].Expected = true 215 | i++ 216 | 217 | tests[i].S = []rune("differentli") 218 | tests[i].Suffix = []rune("entli") 219 | tests[i].Expected = true 220 | i++ 221 | 222 | tests[i].S = []rune("vileli") 223 | tests[i].Suffix = []rune("eli") 224 | tests[i].Expected = true 225 | i++ 226 | 227 | tests[i].S = []rune("analogousli") 228 | tests[i].Suffix = []rune("ousli") 229 | tests[i].Expected = true 230 | i++ 231 | 232 | tests[i].S = []rune("vietnamization") 233 | tests[i].Suffix = []rune("ization") 234 | tests[i].Expected = true 235 | i++ 236 | 237 | tests[i].S = []rune("predication") 238 | tests[i].Suffix = []rune("ation") 239 | tests[i].Expected = true 240 | i++ 241 | 242 | tests[i].S = []rune("operator") 243 | tests[i].Suffix = []rune("ator") 244 | tests[i].Expected = true 245 | i++ 246 | 247 | tests[i].S = []rune("feudalism") 248 | tests[i].Suffix = []rune("alism") 249 | tests[i].Expected = true 250 | i++ 251 | 252 | tests[i].S = []rune("decisiveness") 253 | tests[i].Suffix = []rune("iveness") 254 | tests[i].Expected = true 255 | i++ 256 | 257 | tests[i].S = []rune("hopefulness") 258 | tests[i].Suffix = []rune("fulness") 259 | tests[i].Expected = true 260 | i++ 261 | 262 | tests[i].S = []rune("callousness") 263 | tests[i].Suffix = []rune("ousness") 264 | tests[i].Expected = true 265 | i++ 266 | 267 | tests[i].S = []rune("formaliti") 268 | tests[i].Suffix = []rune("aliti") 269 | tests[i].Expected = true 270 | i++ 271 | 272 | tests[i].S = []rune("sensitiviti") 273 | tests[i].Suffix = []rune("iviti") 274 | tests[i].Expected = true 275 | i++ 276 | 277 | tests[i].S = []rune("sensibiliti") 278 | tests[i].Suffix = []rune("biliti") 279 | tests[i].Expected = true 280 | i++ 281 | 282 | tests[i].S = []rune("triplicate") 283 | tests[i].Suffix = []rune("icate") 284 | tests[i].Expected = true 285 | i++ 286 | 287 | tests[i].S = []rune("formative") 288 | tests[i].Suffix = []rune("ative") 289 | tests[i].Expected = true 290 | i++ 291 | 292 | tests[i].S = []rune("formalize") 293 | tests[i].Suffix = []rune("alize") 294 | tests[i].Expected = true 295 | i++ 296 | 297 | tests[i].S = []rune("electriciti") 298 | tests[i].Suffix = []rune("iciti") 299 | tests[i].Expected = true 300 | i++ 301 | 302 | tests[i].S = []rune("electrical") 303 | tests[i].Suffix = []rune("ical") 304 | tests[i].Expected = true 305 | i++ 306 | 307 | tests[i].S = []rune("hopeful") 308 | tests[i].Suffix = []rune("ful") 309 | tests[i].Expected = true 310 | i++ 311 | 312 | tests[i].S = []rune("goodness") 313 | tests[i].Suffix = []rune("ness") 314 | tests[i].Expected = true 315 | i++ 316 | 317 | tests[i].S = []rune("revival") 318 | tests[i].Suffix = []rune("al") 319 | tests[i].Expected = true 320 | i++ 321 | 322 | tests[i].S = []rune("allowance") 323 | tests[i].Suffix = []rune("ance") 324 | tests[i].Expected = true 325 | i++ 326 | 327 | tests[i].S = []rune("inference") 328 | tests[i].Suffix = []rune("ence") 329 | tests[i].Expected = true 330 | i++ 331 | 332 | tests[i].S = []rune("airliner") 333 | tests[i].Suffix = []rune("er") 334 | tests[i].Expected = true 335 | i++ 336 | 337 | tests[i].S = []rune("gyroscopic") 338 | tests[i].Suffix = []rune("ic") 339 | tests[i].Expected = true 340 | i++ 341 | 342 | tests[i].S = []rune("adjustable") 343 | tests[i].Suffix = []rune("able") 344 | tests[i].Expected = true 345 | i++ 346 | 347 | tests[i].S = []rune("defensible") 348 | tests[i].Suffix = []rune("ible") 349 | tests[i].Expected = true 350 | i++ 351 | 352 | tests[i].S = []rune("irritant") 353 | tests[i].Suffix = []rune("ant") 354 | tests[i].Expected = true 355 | i++ 356 | 357 | tests[i].S = []rune("replacement") 358 | tests[i].Suffix = []rune("ement") 359 | tests[i].Expected = true 360 | i++ 361 | 362 | tests[i].S = []rune("adjustment") 363 | tests[i].Suffix = []rune("ment") 364 | tests[i].Expected = true 365 | i++ 366 | 367 | tests[i].S = []rune("dependent") 368 | tests[i].Suffix = []rune("ent") 369 | tests[i].Expected = true 370 | i++ 371 | 372 | tests[i].S = []rune("adoption") 373 | tests[i].Suffix = []rune("ion") 374 | tests[i].Expected = true 375 | i++ 376 | 377 | tests[i].S = []rune("homologou") 378 | tests[i].Suffix = []rune("ou") 379 | tests[i].Expected = true 380 | i++ 381 | 382 | tests[i].S = []rune("communism") 383 | tests[i].Suffix = []rune("ism") 384 | tests[i].Expected = true 385 | i++ 386 | 387 | tests[i].S = []rune("activate") 388 | tests[i].Suffix = []rune("ate") 389 | tests[i].Expected = true 390 | i++ 391 | 392 | tests[i].S = []rune("angulariti") 393 | tests[i].Suffix = []rune("iti") 394 | tests[i].Expected = true 395 | i++ 396 | 397 | tests[i].S = []rune("homologous") 398 | tests[i].Suffix = []rune("ous") 399 | tests[i].Expected = true 400 | i++ 401 | 402 | tests[i].S = []rune("effective") 403 | tests[i].Suffix = []rune("ive") 404 | tests[i].Expected = true 405 | i++ 406 | 407 | tests[i].S = []rune("bowdlerize") 408 | tests[i].Suffix = []rune("ize") 409 | tests[i].Expected = true 410 | i++ 411 | 412 | tests[i].S = []rune("probate") 413 | tests[i].Suffix = []rune("e") 414 | tests[i].Expected = true 415 | i++ 416 | 417 | tests[i].S = []rune("rate") 418 | tests[i].Suffix = []rune("e") 419 | tests[i].Expected = true 420 | i++ 421 | 422 | tests[i].S = []rune("cease") 423 | tests[i].Suffix = []rune("e") 424 | tests[i].Expected = true 425 | 426 | for _, datum := range tests { 427 | if actual := hasSuffix(datum.S, datum.Suffix); actual != datum.Expected { 428 | t.Errorf("Did NOT get what was expected for calling hasSuffix() on [%s] with suffix [%s]. Expect [%t] but got [%t]", string(datum.S), string(datum.Suffix), datum.Expected, actual) 429 | } 430 | } 431 | } 432 | -------------------------------------------------------------------------------- /porterstemmer_is_consontant_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestIsConsontant(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []bool 14 | }, 12) 15 | 16 | tests[i].S = []rune("apple") 17 | tests[i].Expected = []bool{false, true, true, true, false} 18 | i++ 19 | 20 | tests[i].S = []rune("cyan") 21 | tests[i].Expected = []bool{true, false, false, true} 22 | i++ 23 | 24 | tests[i].S = []rune("connects") 25 | tests[i].Expected = []bool{true, false, true, true, false, true, true, true} 26 | i++ 27 | 28 | tests[i].S = []rune("yellow") 29 | tests[i].Expected = []bool{true, false, true, true, false, true} 30 | i++ 31 | 32 | tests[i].S = []rune("excellent") 33 | tests[i].Expected = []bool{false, true, true, false, true, true, false, true, true} 34 | i++ 35 | 36 | tests[i].S = []rune("yuk") 37 | tests[i].Expected = []bool{true, false, true} 38 | i++ 39 | 40 | tests[i].S = []rune("syzygy") 41 | tests[i].Expected = []bool{true, false, true, false, true, false} 42 | i++ 43 | 44 | tests[i].S = []rune("school") 45 | tests[i].Expected = []bool{true, true, true, false, false, true} 46 | i++ 47 | 48 | tests[i].S = []rune("pay") 49 | tests[i].Expected = []bool{true, false, true} 50 | i++ 51 | 52 | tests[i].S = []rune("golang") 53 | tests[i].Expected = []bool{true, false, true, false, true, true} 54 | i++ 55 | 56 | // NOTE: The Porter Stemmer technical should make a mistake on the second "y". 57 | // Really, both the 1st and 2nd "y" are consontants. But 58 | tests[i].S = []rune("sayyid") 59 | tests[i].Expected = []bool{true, false, true, false, false, true} 60 | i++ 61 | 62 | tests[i].S = []rune("ya") 63 | tests[i].Expected = []bool{true, false} 64 | 65 | for _, datum := range tests { 66 | for i = 0; i < len(datum.S); i++ { 67 | 68 | if actual := isConsonant(datum.S, i); actual != datum.Expected[i] { 69 | t.Errorf("Did NOT get what was expected for calling isConsonant() on [%s] at [%d] (i.e., [%s]). Expect [%t] but got [%t]", string(datum.S), i, string(datum.S[i]), datum.Expected[i], actual) 70 | } 71 | } // for 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /porterstemmer_measure_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestMeasure(t *testing.T) { 8 | 9 | tests := make([]struct { 10 | S []rune 11 | Expected uint 12 | }, 27) 13 | 14 | tests[0].S = []rune("ya") 15 | tests[0].Expected = 0 16 | 17 | tests[1].S = []rune("cyan") 18 | tests[1].Expected = 1 19 | 20 | tests[2].S = []rune("connects") 21 | tests[2].Expected = 2 22 | 23 | tests[3].S = []rune("yellow") 24 | tests[3].Expected = 2 25 | 26 | tests[4].S = []rune("excellent") 27 | tests[4].Expected = 3 28 | 29 | tests[5].S = []rune("yuk") 30 | tests[5].Expected = 1 31 | 32 | tests[6].S = []rune("syzygy") 33 | tests[6].Expected = 2 34 | 35 | tests[7].S = []rune("school") 36 | tests[7].Expected = 1 37 | 38 | tests[8].S = []rune("pay") 39 | tests[8].Expected = 1 40 | 41 | tests[9].S = []rune("golang") 42 | tests[9].Expected = 2 43 | 44 | // NOTE: The Porter Stemmer technical should make a mistake on the second "y". 45 | // Really, both the 1st and 2nd "y" are consontants. But 46 | tests[10].S = []rune("sayyid") 47 | tests[10].Expected = 2 48 | 49 | tests[11].S = []rune("ya") 50 | tests[11].Expected = 0 51 | 52 | tests[12].S = []rune("") 53 | tests[12].Expected = 0 54 | 55 | tests[13].S = []rune("tr") 56 | tests[13].Expected = 0 57 | 58 | tests[14].S = []rune("ee") 59 | tests[14].Expected = 0 60 | 61 | tests[15].S = []rune("tree") 62 | tests[15].Expected = 0 63 | 64 | tests[16].S = []rune("t") 65 | tests[16].Expected = 0 66 | 67 | tests[18].S = []rune("by") 68 | tests[18].Expected = 0 69 | 70 | tests[19].S = []rune("trouble") 71 | tests[19].Expected = 1 72 | 73 | tests[20].S = []rune("oats") 74 | tests[20].Expected = 1 75 | 76 | tests[21].S = []rune("trees") 77 | tests[21].Expected = 1 78 | 79 | tests[22].S = []rune("ivy") 80 | tests[22].Expected = 1 81 | 82 | tests[23].S = []rune("troubles") 83 | tests[23].Expected = 2 84 | 85 | tests[24].S = []rune("private") 86 | tests[24].Expected = 2 87 | 88 | tests[25].S = []rune("oaten") 89 | tests[25].Expected = 2 90 | 91 | tests[26].S = []rune("orrery") 92 | tests[26].Expected = 2 93 | 94 | for _, datum := range tests { 95 | if actual := measure(datum.S); actual != datum.Expected { 96 | t.Errorf("Did NOT get what was expected for calling measure() on [%s]. Expect [%d] but got [%d]", string(datum.S), datum.Expected, actual) 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /porterstemmer_stem_string_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "bufio" 5 | "io/ioutil" 6 | "net/http" 7 | "os" 8 | "strings" 9 | "testing" 10 | ) 11 | 12 | func TestStemString(t *testing.T) { 13 | 14 | testDataDirName := "testdata" 15 | 16 | _, err := os.Stat(testDataDirName) 17 | if nil != err { 18 | _ = os.Mkdir(testDataDirName, 0755) 19 | } 20 | _, err = os.Stat(testDataDirName) 21 | if nil != err { 22 | t.Errorf("The test data folder ([%s]) does not exists (and could not create it). Received error: [%v]", testDataDirName, err) 23 | /////// RETURN 24 | return 25 | } 26 | 27 | vocFileName := testDataDirName + "/voc.txt" 28 | _, err = os.Stat(vocFileName) 29 | if nil != err { 30 | 31 | vocHref := "http://tartarus.org/martin/PorterStemmer/voc.txt" 32 | 33 | resp, err := http.Get(vocHref) 34 | if nil != err { 35 | t.Errorf("Could not download test file (from web) from URL: [%s]. Received error: [%v]", vocHref, err) 36 | /////////// RETURN 37 | return 38 | } 39 | 40 | respBody, err := ioutil.ReadAll(resp.Body) 41 | if nil != err { 42 | t.Errorf("Error loading the contents of from URL: [%s]. Received error: [%v].", vocHref, err) 43 | /////////// RETURN 44 | return 45 | } 46 | 47 | _ = ioutil.WriteFile(vocFileName, respBody, 0644) 48 | 49 | } 50 | vocFd, err := os.Open(vocFileName) 51 | if nil != err { 52 | t.Errorf("Could NOT open testdata file: [%s]. Received error: [%v]", vocFileName, err) 53 | /////// RETURN 54 | return 55 | } 56 | defer vocFd.Close() 57 | 58 | voc := bufio.NewReaderSize(vocFd, 1024) 59 | 60 | outFileName := testDataDirName + "/output.txt" 61 | _, err = os.Stat(outFileName) 62 | if nil != err { 63 | 64 | outHref := "http://tartarus.org/martin/PorterStemmer/output.txt" 65 | 66 | resp, err := http.Get(outHref) 67 | if nil != err { 68 | t.Errorf("Could not download test file (from web) from URL: [%s]. Received error: [%v]", outHref, err) 69 | /////////// RETURN 70 | return 71 | } 72 | 73 | respBody, err := ioutil.ReadAll(resp.Body) 74 | if nil != err { 75 | t.Errorf("Error loading the contents of from URL: [%s]. Received error: [%v].", outHref, err) 76 | /////////// RETURN 77 | return 78 | } 79 | 80 | _ = ioutil.WriteFile(outFileName, respBody, 0644) 81 | 82 | } 83 | outFd, err := os.Open(outFileName) 84 | if nil != err { 85 | t.Errorf("Could NOT open testdata file: [%s]. Received error: [%v]", outFileName, err) 86 | /////// RETURN 87 | return 88 | } 89 | defer outFd.Close() 90 | 91 | out := bufio.NewReaderSize(outFd, 1024) 92 | 93 | for { 94 | 95 | vocS, err := voc.ReadString('\n') 96 | if nil != err { 97 | /////// BREAK 98 | break 99 | } 100 | 101 | vocS = strings.Trim(vocS, "\n\r\t ") 102 | 103 | expected, err := out.ReadString('\n') 104 | if nil != err { 105 | t.Errorf("Received unexpected error when trying to read a line from [%s]. Received error: [%v]", outFileName, err) 106 | /////// BREAK 107 | break 108 | 109 | } 110 | 111 | expected = strings.Trim(expected, "\n\r\t ") 112 | 113 | actual := StemString(vocS) 114 | if expected != actual { 115 | t.Errorf("Input: [%s] -> Actual: [%s]. Expected: [%s]", vocS, actual, expected) 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /porterstemmer_stem_without_lower_casing_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStemWithoutLowerCasing(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 3) 15 | 16 | tests[i].S = []rune("controll") 17 | tests[i].Expected = []rune("control") 18 | i++ 19 | 20 | tests[i].S = []rune("roll") 21 | tests[i].Expected = []rune("roll") 22 | 23 | for _, datum := range tests { 24 | 25 | actual := make([]rune, len(datum.S)) 26 | copy(actual, datum.S) 27 | 28 | actual = StemWithoutLowerCasing(actual) 29 | 30 | lenActual := len(actual) 31 | lenExpected := len(datum.Expected) 32 | 33 | equal := true 34 | if 0 == lenActual && 0 == lenExpected { 35 | equal = true 36 | } else if lenActual != lenExpected { 37 | equal = false 38 | } else if actual[0] != datum.Expected[0] { 39 | equal = false 40 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 41 | equal = false 42 | } else { 43 | for j := 0; j < lenActual; j++ { 44 | 45 | if actual[j] != datum.Expected[j] { 46 | equal = false 47 | } 48 | } 49 | } 50 | 51 | if !equal { 52 | t.Errorf("Did NOT get what was expected for calling StemWithoutLowerCasing() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 53 | } 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /porterstemmer_step1a_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep1a(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 12) 15 | 16 | tests[i].S = []rune("caresses") 17 | tests[i].Expected = []rune("caress") 18 | i++ 19 | 20 | tests[i].S = []rune("ponies") 21 | tests[i].Expected = []rune("poni") 22 | i++ 23 | 24 | tests[i].S = []rune("ties") 25 | tests[i].Expected = []rune("ti") 26 | i++ 27 | 28 | tests[i].S = []rune("caress") 29 | tests[i].Expected = []rune("caress") 30 | i++ 31 | 32 | tests[i].S = []rune("cats") 33 | tests[i].Expected = []rune("cat") 34 | 35 | for _, datum := range tests { 36 | for i = 0; i < len(datum.S); i++ { 37 | 38 | actual := make([]rune, len(datum.S)) 39 | copy(actual, datum.S) 40 | 41 | actual = step1a(actual) 42 | 43 | lenActual := len(actual) 44 | lenExpected := len(datum.Expected) 45 | 46 | equal := true 47 | if 0 == lenActual && 0 == lenExpected { 48 | equal = true 49 | } else if lenActual != lenExpected { 50 | equal = false 51 | } else if actual[0] != datum.Expected[0] { 52 | equal = false 53 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 54 | equal = false 55 | } else { 56 | for j := 0; j < lenActual; j++ { 57 | 58 | if actual[j] != datum.Expected[j] { 59 | equal = false 60 | } 61 | } 62 | } 63 | 64 | if !equal { 65 | t.Errorf("Did NOT get what was expected for calling step1a() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 66 | } 67 | } // for 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /porterstemmer_step1b_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep1b(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 17) 15 | 16 | tests[i].S = []rune("feed") 17 | tests[i].Expected = []rune("feed") 18 | i++ 19 | 20 | tests[i].S = []rune("agreed") 21 | tests[i].Expected = []rune("agree") 22 | i++ 23 | 24 | tests[i].S = []rune("plastered") 25 | tests[i].Expected = []rune("plaster") 26 | i++ 27 | 28 | tests[i].S = []rune("bled") 29 | tests[i].Expected = []rune("bled") 30 | i++ 31 | 32 | tests[i].S = []rune("motoring") 33 | tests[i].Expected = []rune("motor") 34 | i++ 35 | 36 | tests[i].S = []rune("sing") 37 | tests[i].Expected = []rune("sing") 38 | i++ 39 | 40 | tests[i].S = []rune("conflated") 41 | tests[i].Expected = []rune("conflate") 42 | i++ 43 | 44 | tests[i].S = []rune("troubled") 45 | tests[i].Expected = []rune("trouble") 46 | i++ 47 | 48 | tests[i].S = []rune("sized") 49 | tests[i].Expected = []rune("size") 50 | i++ 51 | 52 | tests[i].S = []rune("hopping") 53 | tests[i].Expected = []rune("hop") 54 | i++ 55 | 56 | tests[i].S = []rune("tanned") 57 | tests[i].Expected = []rune("tan") 58 | i++ 59 | 60 | tests[i].S = []rune("falling") 61 | tests[i].Expected = []rune("fall") 62 | i++ 63 | 64 | tests[i].S = []rune("hissing") 65 | tests[i].Expected = []rune("hiss") 66 | i++ 67 | 68 | tests[i].S = []rune("fizzed") 69 | tests[i].Expected = []rune("fizz") 70 | i++ 71 | 72 | tests[i].S = []rune("failing") 73 | tests[i].Expected = []rune("fail") 74 | i++ 75 | 76 | tests[i].S = []rune("filing") 77 | tests[i].Expected = []rune("file") 78 | 79 | for _, datum := range tests { 80 | 81 | actual := make([]rune, len(datum.S)) 82 | copy(actual, datum.S) 83 | 84 | actual = step1b(actual) 85 | 86 | lenActual := len(actual) 87 | lenExpected := len(datum.Expected) 88 | 89 | equal := true 90 | if 0 == lenActual && 0 == lenExpected { 91 | equal = true 92 | } else if lenActual != lenExpected { 93 | equal = false 94 | } else if actual[0] != datum.Expected[0] { 95 | equal = false 96 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 97 | equal = false 98 | } else { 99 | for j := 0; j < lenActual; j++ { 100 | 101 | if actual[j] != datum.Expected[j] { 102 | equal = false 103 | } 104 | } 105 | } 106 | 107 | if !equal { 108 | t.Errorf("Did NOT get what was expected for calling step1b() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 109 | } 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /porterstemmer_step1c_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep1c(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 17) 15 | 16 | tests[i].S = []rune("happy") 17 | tests[i].Expected = []rune("happi") 18 | i++ 19 | 20 | tests[i].S = []rune("sky") 21 | tests[i].Expected = []rune("sky") 22 | i++ 23 | 24 | tests[i].S = []rune("apology") 25 | tests[i].Expected = []rune("apologi") 26 | 27 | for _, datum := range tests { 28 | 29 | actual := make([]rune, len(datum.S)) 30 | copy(actual, datum.S) 31 | 32 | actual = step1c(actual) 33 | 34 | lenActual := len(actual) 35 | lenExpected := len(datum.Expected) 36 | 37 | equal := true 38 | if 0 == lenActual && 0 == lenExpected { 39 | equal = true 40 | } else if lenActual != lenExpected { 41 | equal = false 42 | } else if actual[0] != datum.Expected[0] { 43 | equal = false 44 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 45 | equal = false 46 | } else { 47 | for j := 0; j < lenActual; j++ { 48 | 49 | if actual[j] != datum.Expected[j] { 50 | equal = false 51 | } 52 | } 53 | } 54 | 55 | if !equal { 56 | t.Errorf("Did NOT get what was expected for calling step1c() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /porterstemmer_step2_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep2(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 22) 15 | 16 | tests[i].S = []rune("relational") 17 | tests[i].Expected = []rune("relate") 18 | i++ 19 | 20 | tests[i].S = []rune("conditional") 21 | tests[i].Expected = []rune("condition") 22 | i++ 23 | 24 | tests[i].S = []rune("rational") 25 | tests[i].Expected = []rune("rational") 26 | i++ 27 | 28 | tests[i].S = []rune("valenci") 29 | tests[i].Expected = []rune("valence") 30 | i++ 31 | 32 | tests[i].S = []rune("hesitanci") 33 | tests[i].Expected = []rune("hesitance") 34 | i++ 35 | 36 | tests[i].S = []rune("digitizer") 37 | tests[i].Expected = []rune("digitize") 38 | i++ 39 | 40 | tests[i].S = []rune("conformabli") 41 | tests[i].Expected = []rune("conformable") 42 | i++ 43 | 44 | tests[i].S = []rune("radicalli") 45 | tests[i].Expected = []rune("radical") 46 | i++ 47 | 48 | tests[i].S = []rune("differentli") 49 | tests[i].Expected = []rune("different") 50 | i++ 51 | 52 | tests[i].S = []rune("vileli") 53 | tests[i].Expected = []rune("vile") 54 | i++ 55 | 56 | tests[i].S = []rune("analogousli") 57 | tests[i].Expected = []rune("analogous") 58 | i++ 59 | 60 | tests[i].S = []rune("vietnamization") 61 | tests[i].Expected = []rune("vietnamize") 62 | i++ 63 | 64 | tests[i].S = []rune("predication") 65 | tests[i].Expected = []rune("predicate") 66 | i++ 67 | 68 | tests[i].S = []rune("operator") 69 | tests[i].Expected = []rune("operate") 70 | i++ 71 | 72 | tests[i].S = []rune("feudalism") 73 | tests[i].Expected = []rune("feudal") 74 | i++ 75 | 76 | tests[i].S = []rune("decisiveness") 77 | tests[i].Expected = []rune("decisive") 78 | i++ 79 | 80 | tests[i].S = []rune("hopefulness") 81 | tests[i].Expected = []rune("hopeful") 82 | i++ 83 | 84 | tests[i].S = []rune("callousness") 85 | tests[i].Expected = []rune("callous") 86 | i++ 87 | 88 | tests[i].S = []rune("formaliti") 89 | tests[i].Expected = []rune("formal") 90 | i++ 91 | 92 | tests[i].S = []rune("sensitiviti") 93 | tests[i].Expected = []rune("sensitive") 94 | i++ 95 | 96 | tests[i].S = []rune("sensibiliti") 97 | tests[i].Expected = []rune("sensible") 98 | 99 | for _, datum := range tests { 100 | 101 | actual := make([]rune, len(datum.S)) 102 | copy(actual, datum.S) 103 | 104 | actual = step2(actual) 105 | 106 | lenActual := len(actual) 107 | lenExpected := len(datum.Expected) 108 | 109 | equal := true 110 | if 0 == lenActual && 0 == lenExpected { 111 | equal = true 112 | } else if lenActual != lenExpected { 113 | equal = false 114 | } else if actual[0] != datum.Expected[0] { 115 | equal = false 116 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 117 | equal = false 118 | } else { 119 | for j := 0; j < lenActual; j++ { 120 | 121 | if actual[j] != datum.Expected[j] { 122 | equal = false 123 | } 124 | } 125 | } 126 | 127 | if !equal { 128 | t.Errorf("Did NOT get what was expected for calling step2() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /porterstemmer_step3_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep3(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 22) 15 | 16 | tests[i].S = []rune("triplicate") 17 | tests[i].Expected = []rune("triplic") 18 | i++ 19 | 20 | tests[i].S = []rune("formative") 21 | tests[i].Expected = []rune("form") 22 | i++ 23 | 24 | tests[i].S = []rune("formalize") 25 | tests[i].Expected = []rune("formal") 26 | i++ 27 | 28 | tests[i].S = []rune("electriciti") 29 | tests[i].Expected = []rune("electric") 30 | i++ 31 | 32 | tests[i].S = []rune("electrical") 33 | tests[i].Expected = []rune("electric") 34 | i++ 35 | 36 | tests[i].S = []rune("hopeful") 37 | tests[i].Expected = []rune("hope") 38 | i++ 39 | 40 | tests[i].S = []rune("goodness") 41 | tests[i].Expected = []rune("good") 42 | 43 | for _, datum := range tests { 44 | 45 | actual := make([]rune, len(datum.S)) 46 | copy(actual, datum.S) 47 | 48 | actual = step3(actual) 49 | 50 | lenActual := len(actual) 51 | lenExpected := len(datum.Expected) 52 | 53 | equal := true 54 | if 0 == lenActual && 0 == lenExpected { 55 | equal = true 56 | } else if lenActual != lenExpected { 57 | equal = false 58 | } else if actual[0] != datum.Expected[0] { 59 | equal = false 60 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 61 | equal = false 62 | } else { 63 | for j := 0; j < lenActual; j++ { 64 | 65 | if actual[j] != datum.Expected[j] { 66 | equal = false 67 | } 68 | } 69 | } 70 | 71 | if !equal { 72 | t.Errorf("Did NOT get what was expected for calling step3() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /porterstemmer_step4_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep4(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 20) 15 | 16 | tests[i].S = []rune("revival") 17 | tests[i].Expected = []rune("reviv") 18 | i++ 19 | 20 | tests[i].S = []rune("allowance") 21 | tests[i].Expected = []rune("allow") 22 | i++ 23 | 24 | tests[i].S = []rune("inference") 25 | tests[i].Expected = []rune("infer") 26 | i++ 27 | 28 | tests[i].S = []rune("airliner") 29 | tests[i].Expected = []rune("airlin") 30 | i++ 31 | 32 | tests[i].S = []rune("gyroscopic") 33 | tests[i].Expected = []rune("gyroscop") 34 | i++ 35 | 36 | tests[i].S = []rune("adjustable") 37 | tests[i].Expected = []rune("adjust") 38 | i++ 39 | 40 | tests[i].S = []rune("defensible") 41 | tests[i].Expected = []rune("defens") 42 | i++ 43 | 44 | tests[i].S = []rune("irritant") 45 | tests[i].Expected = []rune("irrit") 46 | i++ 47 | 48 | tests[i].S = []rune("replacement") 49 | tests[i].Expected = []rune("replac") 50 | i++ 51 | 52 | tests[i].S = []rune("adjustment") 53 | tests[i].Expected = []rune("adjust") 54 | i++ 55 | 56 | tests[i].S = []rune("dependent") 57 | tests[i].Expected = []rune("depend") 58 | i++ 59 | 60 | tests[i].S = []rune("adoption") 61 | tests[i].Expected = []rune("adopt") 62 | i++ 63 | 64 | tests[i].S = []rune("homologou") 65 | tests[i].Expected = []rune("homolog") 66 | i++ 67 | 68 | tests[i].S = []rune("communism") 69 | tests[i].Expected = []rune("commun") 70 | i++ 71 | 72 | tests[i].S = []rune("activate") 73 | tests[i].Expected = []rune("activ") 74 | i++ 75 | 76 | tests[i].S = []rune("angulariti") 77 | tests[i].Expected = []rune("angular") 78 | i++ 79 | 80 | tests[i].S = []rune("homologous") 81 | tests[i].Expected = []rune("homolog") 82 | i++ 83 | 84 | tests[i].S = []rune("effective") 85 | tests[i].Expected = []rune("effect") 86 | i++ 87 | 88 | tests[i].S = []rune("bowdlerize") 89 | tests[i].Expected = []rune("bowdler") 90 | 91 | for _, datum := range tests { 92 | 93 | actual := make([]rune, len(datum.S)) 94 | copy(actual, datum.S) 95 | 96 | actual = step4(actual) 97 | 98 | lenActual := len(actual) 99 | lenExpected := len(datum.Expected) 100 | 101 | equal := true 102 | if 0 == lenActual && 0 == lenExpected { 103 | equal = true 104 | } else if lenActual != lenExpected { 105 | equal = false 106 | } else if actual[0] != datum.Expected[0] { 107 | equal = false 108 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 109 | equal = false 110 | } else { 111 | for j := 0; j < lenActual; j++ { 112 | 113 | if actual[j] != datum.Expected[j] { 114 | equal = false 115 | } 116 | } 117 | } 118 | 119 | if !equal { 120 | t.Errorf("Did NOT get what was expected for calling step4() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 121 | } 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /porterstemmer_step5a_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep5a(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 3) 15 | 16 | tests[i].S = []rune("probate") 17 | tests[i].Expected = []rune("probat") 18 | i++ 19 | 20 | tests[i].S = []rune("rate") 21 | tests[i].Expected = []rune("rate") 22 | i++ 23 | 24 | tests[i].S = []rune("cease") 25 | tests[i].Expected = []rune("ceas") 26 | 27 | for _, datum := range tests { 28 | 29 | actual := make([]rune, len(datum.S)) 30 | copy(actual, datum.S) 31 | 32 | actual = step5a(actual) 33 | 34 | lenActual := len(actual) 35 | lenExpected := len(datum.Expected) 36 | 37 | equal := true 38 | if 0 == lenActual && 0 == lenExpected { 39 | equal = true 40 | } else if lenActual != lenExpected { 41 | equal = false 42 | } else if actual[0] != datum.Expected[0] { 43 | equal = false 44 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 45 | equal = false 46 | } else { 47 | for j := 0; j < lenActual; j++ { 48 | 49 | if actual[j] != datum.Expected[j] { 50 | equal = false 51 | } 52 | } 53 | } 54 | 55 | if !equal { 56 | t.Errorf("Did NOT get what was expected for calling step5a() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /porterstemmer_step5b_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestStep5b(t *testing.T) { 8 | 9 | i := 0 10 | 11 | tests := make([]struct { 12 | S []rune 13 | Expected []rune 14 | }, 3) 15 | 16 | tests[i].S = []rune("controll") 17 | tests[i].Expected = []rune("control") 18 | i++ 19 | 20 | tests[i].S = []rune("roll") 21 | tests[i].Expected = []rune("roll") 22 | 23 | for _, datum := range tests { 24 | 25 | actual := make([]rune, len(datum.S)) 26 | copy(actual, datum.S) 27 | 28 | actual = step5b(actual) 29 | 30 | lenActual := len(actual) 31 | lenExpected := len(datum.Expected) 32 | 33 | equal := true 34 | if 0 == lenActual && 0 == lenExpected { 35 | equal = true 36 | } else if lenActual != lenExpected { 37 | equal = false 38 | } else if actual[0] != datum.Expected[0] { 39 | equal = false 40 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 41 | equal = false 42 | } else { 43 | for j := 0; j < lenActual; j++ { 44 | 45 | if actual[j] != datum.Expected[j] { 46 | equal = false 47 | } 48 | } 49 | } 50 | 51 | if !equal { 52 | t.Errorf("Did NOT get what was expected for calling step5b() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 53 | } 54 | } 55 | } 56 | --------------------------------------------------------------------------------