├── porterstemmer_fixes_test.go ├── porterstemmer_has_repeat_double_consonant_suffix_test.go ├── LICENSE ├── porterstemmer_contains_vowel_test.go ├── porterstemmer_step5b_test.go ├── porterstemmer_stem_without_lower_casing_test.go ├── porterstemmer_fuzz_test.go ├── porterstemmer_step5a_test.go ├── porterstemmer_step1c_test.go ├── porterstemmer_step1a_test.go ├── porterstemmer_step3_test.go ├── porterstemmer_is_consontant_test.go ├── porterstemmer_measure_test.go ├── porterstemmer_step1b_test.go ├── porterstemmer_step4_test.go ├── porterstemmer_stem_string_test.go ├── porterstemmer_step2_test.go ├── README.md ├── porterstemmer_has_suffix_test.go └── porterstemmer.go /porterstemmer_fixes_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | // Test for issue listed here: 12 | // https://github.com/reiver/go-porterstemmer/issues/1 13 | // 14 | // StemString("ion") was causing runtime exception 15 | func TestStemStringIon(t *testing.T) { 16 | 17 | expected := "ion" 18 | 19 | s := "ion" 20 | actual := StemString(s) 21 | if expected != actual { 22 | t.Errorf("Input: [%s] -> Actual: [%s]. Expected: [%s]", s, actual, expected) 23 | } 24 | } 25 | 26 | 27 | // Test for issue listed here: 28 | // https://github.com/reiver/go-porterstemmer/pull/10 29 | // 30 | // StemString("eeg") was causing runtime exception 31 | func TestStemStringEeg(t *testing.T) { 32 | 33 | expected := "eeg" 34 | 35 | s := "eeg" 36 | actual := StemString(s) 37 | if expected != actual { 38 | t.Errorf("Input: [%s] -> Actual: [%s]. Expected: [%s]", s, actual, expected) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /porterstemmer_has_repeat_double_consonant_suffix_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestHasDoubleConsonantSuffix(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected bool 18 | }, 12) 19 | 20 | 21 | tests[i].S = []rune("apple") 22 | tests[i].Expected = false 23 | i++ 24 | 25 | tests[i].S = []rune("hiss") 26 | tests[i].Expected = true 27 | i++ 28 | 29 | tests[i].S = []rune("fizz") 30 | tests[i].Expected = true 31 | i++ 32 | 33 | tests[i].S = []rune("fill") 34 | tests[i].Expected = true 35 | i++ 36 | 37 | tests[i].S = []rune("ahaa") 38 | tests[i].Expected = false 39 | i++ 40 | 41 | 42 | for _,datum := range tests { 43 | 44 | if actual := hasRepeatDoubleConsonantSuffix(datum.S) ; actual != datum.Expected { 45 | t.Errorf("Did NOT get what was expected for calling hasDoubleConsonantSuffix() on [%s]. Expect [%t] but got [%t]", string(datum.S), datum.Expected, actual) 46 | } 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Charles Iliya Krempeaux :: http://changelog.ca/ 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /porterstemmer_contains_vowel_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestContainsVowel(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected bool 18 | }, 15) 19 | 20 | 21 | tests[i].S = []rune("apple") 22 | tests[i].Expected = true 23 | i++ 24 | 25 | tests[i].S = []rune("f") 26 | tests[i].Expected = false 27 | i++ 28 | 29 | 30 | 31 | tests[i].S = []rune("a") 32 | tests[i].Expected = true 33 | i++ 34 | 35 | tests[i].S = []rune("e") 36 | tests[i].Expected = true 37 | i++ 38 | 39 | tests[i].S = []rune("i") 40 | tests[i].Expected = true 41 | i++ 42 | 43 | tests[i].S = []rune("o") 44 | tests[i].Expected = true 45 | i++ 46 | 47 | tests[i].S = []rune("u") 48 | tests[i].Expected = true 49 | i++ 50 | 51 | 52 | 53 | tests[i].S = []rune("y") 54 | tests[i].Expected = false 55 | i++ 56 | 57 | 58 | 59 | tests[i].S = []rune("cy") 60 | tests[i].Expected = true 61 | i++ 62 | 63 | 64 | for _,datum := range tests { 65 | if actual := containsVowel(datum.S) ; actual != datum.Expected { 66 | t.Errorf("Did NOT get what was expected for calling containsVowel() on [%s]. Expect [%t] but got [%t]", string(datum.S), datum.Expected, actual) 67 | } 68 | } 69 | } 70 | 71 | -------------------------------------------------------------------------------- /porterstemmer_step5b_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep5b(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 3) 19 | 20 | 21 | tests[i].S = []rune("controll") 22 | tests[i].Expected = []rune("control") 23 | i++ 24 | 25 | tests[i].S = []rune("roll") 26 | tests[i].Expected = []rune("roll") 27 | i++ 28 | 29 | 30 | for _,datum := range tests { 31 | 32 | actual := make([]rune, len(datum.S)) 33 | copy(actual, datum.S) 34 | 35 | actual = step5b(actual) 36 | 37 | lenActual := len(actual) 38 | lenExpected := len(datum.Expected) 39 | 40 | equal := true 41 | if 0 == lenActual && 0 == lenExpected { 42 | equal = true 43 | } else if lenActual != lenExpected { 44 | equal = false 45 | } else if actual[0] != datum.Expected[0] { 46 | equal = false 47 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 48 | equal = false 49 | } else { 50 | for j := 0 ; j < lenActual ; j++ { 51 | 52 | if actual[j] != datum.Expected[j] { 53 | equal = false 54 | } 55 | } 56 | } 57 | 58 | if !equal { 59 | t.Errorf("Did NOT get what was expected for calling step5b() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /porterstemmer_stem_without_lower_casing_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStemWithoutLowerCasing(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 3) 19 | 20 | 21 | tests[i].S = []rune("controll") 22 | tests[i].Expected = []rune("control") 23 | i++ 24 | 25 | tests[i].S = []rune("roll") 26 | tests[i].Expected = []rune("roll") 27 | i++ 28 | 29 | 30 | for _,datum := range tests { 31 | 32 | actual := make([]rune, len(datum.S)) 33 | copy(actual, datum.S) 34 | 35 | actual = StemWithoutLowerCasing(actual) 36 | 37 | lenActual := len(actual) 38 | lenExpected := len(datum.Expected) 39 | 40 | equal := true 41 | if 0 == lenActual && 0 == lenExpected { 42 | equal = true 43 | } else if lenActual != lenExpected { 44 | equal = false 45 | } else if actual[0] != datum.Expected[0] { 46 | equal = false 47 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 48 | equal = false 49 | } else { 50 | for j := 0 ; j < lenActual ; j++ { 51 | 52 | if actual[j] != datum.Expected[j] { 53 | equal = false 54 | } 55 | } 56 | } 57 | 58 | if !equal { 59 | t.Errorf("Did NOT get what was expected for calling StemWithoutLowerCasing() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /porterstemmer_fuzz_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | ) 7 | 8 | const maxFuzzLen = 6 9 | 10 | // Test inputs of English characters less than maxFuzzLen 11 | // Added to help diagnose https://github.com/reiver/go-porterstemmer/issues/4 12 | func TestStemFuzz(t *testing.T) { 13 | 14 | input := []byte{'a'} 15 | for len(input) < maxFuzzLen { 16 | // test input 17 | 18 | panicked := false 19 | func() { 20 | defer func() { panicked = recover() != nil }() 21 | StemString(string(input)) 22 | }() 23 | if panicked { 24 | t.Errorf("StemString panicked for input '%s'", input) 25 | } 26 | 27 | // if all z's extend 28 | if allZs(input) { 29 | input = bytes.Repeat([]byte{'a'}, len(input)+1) 30 | } else { 31 | // increment 32 | input = incrementBytes(input) 33 | } 34 | } 35 | } 36 | 37 | func incrementBytes(in []byte) []byte { 38 | rv := make([]byte, len(in)) 39 | copy(rv, in) 40 | for i := len(rv) - 1; i >= 0; i-- { 41 | if rv[i]+1 == '{' { 42 | rv[i] = 'a' 43 | continue 44 | } 45 | rv[i] = rv[i] + 1 46 | break 47 | 48 | } 49 | return rv 50 | } 51 | 52 | func allZs(in []byte) bool { 53 | for _, b := range in { 54 | if b != 'z' { 55 | return false 56 | } 57 | } 58 | return true 59 | } 60 | -------------------------------------------------------------------------------- /porterstemmer_step5a_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep5a(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 3) 19 | 20 | 21 | tests[i].S = []rune("probate") 22 | tests[i].Expected = []rune("probat") 23 | i++ 24 | 25 | tests[i].S = []rune("rate") 26 | tests[i].Expected = []rune("rate") 27 | i++ 28 | 29 | tests[i].S = []rune("cease") 30 | tests[i].Expected = []rune("ceas") 31 | i++ 32 | 33 | 34 | for _,datum := range tests { 35 | 36 | actual := make([]rune, len(datum.S)) 37 | copy(actual, datum.S) 38 | 39 | actual = step5a(actual) 40 | 41 | lenActual := len(actual) 42 | lenExpected := len(datum.Expected) 43 | 44 | equal := true 45 | if 0 == lenActual && 0 == lenExpected { 46 | equal = true 47 | } else if lenActual != lenExpected { 48 | equal = false 49 | } else if actual[0] != datum.Expected[0] { 50 | equal = false 51 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 52 | equal = false 53 | } else { 54 | for j := 0 ; j < lenActual ; j++ { 55 | 56 | if actual[j] != datum.Expected[j] { 57 | equal = false 58 | } 59 | } 60 | } 61 | 62 | if !equal { 63 | t.Errorf("Did NOT get what was expected for calling step5a() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /porterstemmer_step1c_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep1c(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 17) 19 | 20 | 21 | tests[i].S = []rune("happy") 22 | tests[i].Expected = []rune("happi") 23 | i++ 24 | 25 | tests[i].S = []rune("sky") 26 | tests[i].Expected = []rune("sky") 27 | i++ 28 | 29 | 30 | 31 | tests[i].S = []rune("apology") 32 | tests[i].Expected = []rune("apologi") 33 | i++ 34 | 35 | for _,datum := range tests { 36 | 37 | actual := make([]rune, len(datum.S)) 38 | copy(actual, datum.S) 39 | 40 | actual = step1c(actual) 41 | 42 | lenActual := len(actual) 43 | lenExpected := len(datum.Expected) 44 | 45 | equal := true 46 | if 0 == lenActual && 0 == lenExpected { 47 | equal = true 48 | } else if lenActual != lenExpected { 49 | equal = false 50 | } else if actual[0] != datum.Expected[0] { 51 | equal = false 52 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 53 | equal = false 54 | } else { 55 | for j := 0 ; j < lenActual ; j++ { 56 | 57 | if actual[j] != datum.Expected[j] { 58 | equal = false 59 | } 60 | } 61 | } 62 | 63 | if !equal { 64 | t.Errorf("Did NOT get what was expected for calling step1c() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 65 | } 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /porterstemmer_step1a_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep1a(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 12) 19 | 20 | 21 | tests[i].S = []rune("caresses") 22 | tests[i].Expected = []rune("caress") 23 | i++ 24 | 25 | tests[i].S = []rune("ponies") 26 | tests[i].Expected = []rune("poni") 27 | i++ 28 | 29 | tests[i].S = []rune("ties") 30 | tests[i].Expected = []rune("ti") 31 | i++ 32 | 33 | tests[i].S = []rune("caress") 34 | tests[i].Expected = []rune("caress") 35 | i++ 36 | 37 | tests[i].S = []rune("cats") 38 | tests[i].Expected = []rune("cat") 39 | i++ 40 | 41 | 42 | for _,datum := range tests { 43 | for i = 0 ; i < len(datum.S) ; i++ { 44 | 45 | actual := make([]rune, len(datum.S)) 46 | copy(actual, datum.S) 47 | 48 | actual = step1a(actual) 49 | 50 | lenActual := len(actual) 51 | lenExpected := len(datum.Expected) 52 | 53 | equal := true 54 | if 0 == lenActual && 0 == lenExpected { 55 | equal = true 56 | } else if lenActual != lenExpected { 57 | equal = false 58 | } else if actual[0] != datum.Expected[0] { 59 | equal = false 60 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 61 | equal = false 62 | } else { 63 | for j := 0 ; j < lenActual ; j++ { 64 | 65 | if actual[j] != datum.Expected[j] { 66 | equal = false 67 | } 68 | } 69 | } 70 | 71 | if !equal { 72 | t.Errorf("Did NOT get what was expected for calling step1a() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 73 | } 74 | } // for 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /porterstemmer_step3_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep3(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 22) 19 | 20 | 21 | tests[i].S = []rune("triplicate") 22 | tests[i].Expected = []rune("triplic") 23 | i++ 24 | 25 | tests[i].S = []rune("formative") 26 | tests[i].Expected = []rune("form") 27 | i++ 28 | 29 | tests[i].S = []rune("formalize") 30 | tests[i].Expected = []rune("formal") 31 | i++ 32 | 33 | tests[i].S = []rune("electriciti") 34 | tests[i].Expected = []rune("electric") 35 | i++ 36 | 37 | tests[i].S = []rune("electrical") 38 | tests[i].Expected = []rune("electric") 39 | i++ 40 | 41 | tests[i].S = []rune("hopeful") 42 | tests[i].Expected = []rune("hope") 43 | i++ 44 | 45 | tests[i].S = []rune("goodness") 46 | tests[i].Expected = []rune("good") 47 | i++ 48 | 49 | 50 | for _,datum := range tests { 51 | 52 | actual := make([]rune, len(datum.S)) 53 | copy(actual, datum.S) 54 | 55 | actual = step3(actual) 56 | 57 | lenActual := len(actual) 58 | lenExpected := len(datum.Expected) 59 | 60 | equal := true 61 | if 0 == lenActual && 0 == lenExpected { 62 | equal = true 63 | } else if lenActual != lenExpected { 64 | equal = false 65 | } else if actual[0] != datum.Expected[0] { 66 | equal = false 67 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 68 | equal = false 69 | } else { 70 | for j := 0 ; j < lenActual ; j++ { 71 | 72 | if actual[j] != datum.Expected[j] { 73 | equal = false 74 | } 75 | } 76 | } 77 | 78 | if !equal { 79 | t.Errorf("Did NOT get what was expected for calling step3() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /porterstemmer_is_consontant_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestIsConsontant(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []bool 18 | }, 12) 19 | 20 | 21 | tests[i].S = []rune("apple") 22 | tests[i].Expected = []bool{false, true, true, true, false} 23 | i++ 24 | 25 | tests[i].S = []rune("cyan") 26 | tests[i].Expected = []bool{true, false, false, true} 27 | i++ 28 | 29 | tests[i].S = []rune("connects") 30 | tests[i].Expected = []bool{true, false, true, true, false, true, true, true} 31 | i++ 32 | 33 | tests[i].S = []rune("yellow") 34 | tests[i].Expected = []bool{true, false, true, true, false, true} 35 | i++ 36 | 37 | tests[i].S = []rune("excellent") 38 | tests[i].Expected = []bool{false, true, true, false, true, true, false, true, true} 39 | i++ 40 | 41 | tests[i].S = []rune("yuk") 42 | tests[i].Expected = []bool{true, false, true} 43 | i++ 44 | 45 | tests[i].S = []rune("syzygy") 46 | tests[i].Expected = []bool{true, false, true, false, true, false} 47 | i++ 48 | 49 | tests[i].S = []rune("school") 50 | tests[i].Expected = []bool{true, true, true, false, false, true} 51 | i++ 52 | 53 | tests[i].S = []rune("pay") 54 | tests[i].Expected = []bool{true, false, true} 55 | i++ 56 | 57 | tests[i].S = []rune("golang") 58 | tests[i].Expected = []bool{true, false, true, false, true, true} 59 | i++ 60 | 61 | // NOTE: The Porter Stemmer technical should make a mistake on the second "y". 62 | // Really, both the 1st and 2nd "y" are consontants. But 63 | tests[i].S = []rune("sayyid") 64 | tests[i].Expected = []bool{true, false, true, false, false, true} 65 | i++ 66 | 67 | tests[i].S = []rune("ya") 68 | tests[i].Expected = []bool{true, false} 69 | i++ 70 | 71 | for _,datum := range tests { 72 | for i = 0 ; i < len(datum.S) ; i++ { 73 | 74 | if actual := isConsonant(datum.S, i) ; actual != datum.Expected[i] { 75 | t.Errorf("Did NOT get what was expected for calling isConsonant() on [%s] at [%d] (i.e., [%s]). Expect [%t] but got [%t]", string(datum.S), i, string(datum.S[i]), datum.Expected[i], actual) 76 | } 77 | } // for 78 | } 79 | } 80 | 81 | -------------------------------------------------------------------------------- /porterstemmer_measure_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestMeasure(t *testing.T) { 12 | 13 | tests := make([]struct { 14 | S []rune 15 | Expected uint 16 | }, 27) 17 | 18 | 19 | tests[0].S = []rune("ya") 20 | tests[0].Expected = 0 21 | 22 | tests[1].S = []rune("cyan") 23 | tests[1].Expected = 1 24 | 25 | tests[2].S = []rune("connects") 26 | tests[2].Expected = 2 27 | 28 | tests[3].S = []rune("yellow") 29 | tests[3].Expected = 2 30 | 31 | tests[4].S = []rune("excellent") 32 | tests[4].Expected = 3 33 | 34 | tests[5].S = []rune("yuk") 35 | tests[5].Expected = 1 36 | 37 | tests[6].S = []rune("syzygy") 38 | tests[6].Expected = 2 39 | 40 | tests[7].S = []rune("school") 41 | tests[7].Expected = 1 42 | 43 | tests[8].S = []rune("pay") 44 | tests[8].Expected = 1 45 | 46 | tests[9].S = []rune("golang") 47 | tests[9].Expected = 2 48 | 49 | // NOTE: The Porter Stemmer technical should make a mistake on the second "y". 50 | // Really, both the 1st and 2nd "y" are consontants. But 51 | tests[10].S = []rune("sayyid") 52 | tests[10].Expected = 2 53 | 54 | tests[11].S = []rune("ya") 55 | tests[11].Expected = 0 56 | 57 | tests[12].S = []rune("") 58 | tests[12].Expected = 0 59 | 60 | tests[13].S = []rune("tr") 61 | tests[13].Expected = 0 62 | 63 | tests[14].S = []rune("ee") 64 | tests[14].Expected = 0 65 | 66 | tests[15].S = []rune("tree") 67 | tests[15].Expected = 0 68 | 69 | tests[16].S = []rune("t") 70 | tests[16].Expected = 0 71 | 72 | tests[18].S = []rune("by") 73 | tests[18].Expected = 0 74 | 75 | tests[19].S = []rune("trouble") 76 | tests[19].Expected = 1 77 | 78 | tests[20].S = []rune("oats") 79 | tests[20].Expected = 1 80 | 81 | tests[21].S = []rune("trees") 82 | tests[21].Expected = 1 83 | 84 | tests[22].S = []rune("ivy") 85 | tests[22].Expected = 1 86 | 87 | tests[23].S = []rune("troubles") 88 | tests[23].Expected = 2 89 | 90 | tests[24].S = []rune("private") 91 | tests[24].Expected = 2 92 | 93 | tests[25].S = []rune("oaten") 94 | tests[25].Expected = 2 95 | 96 | tests[26].S = []rune("orrery") 97 | tests[26].Expected = 2 98 | 99 | for _,datum := range tests { 100 | if actual := measure(datum.S) ; actual != datum.Expected { 101 | t.Errorf("Did NOT get what was expected for calling measure() on [%s]. Expect [%d] but got [%d]", string(datum.S), datum.Expected, actual) 102 | } 103 | } 104 | } 105 | 106 | -------------------------------------------------------------------------------- /porterstemmer_step1b_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep1b(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 17) 19 | 20 | 21 | tests[i].S = []rune("feed") 22 | tests[i].Expected = []rune("feed") 23 | i++ 24 | 25 | tests[i].S = []rune("agreed") 26 | tests[i].Expected = []rune("agree") 27 | i++ 28 | 29 | tests[i].S = []rune("plastered") 30 | tests[i].Expected = []rune("plaster") 31 | i++ 32 | 33 | tests[i].S = []rune("bled") 34 | tests[i].Expected = []rune("bled") 35 | i++ 36 | 37 | tests[i].S = []rune("motoring") 38 | tests[i].Expected = []rune("motor") 39 | i++ 40 | 41 | tests[i].S = []rune("sing") 42 | tests[i].Expected = []rune("sing") 43 | i++ 44 | 45 | 46 | 47 | tests[i].S = []rune("conflated") 48 | tests[i].Expected = []rune("conflate") 49 | i++ 50 | 51 | tests[i].S = []rune("troubled") 52 | tests[i].Expected = []rune("trouble") 53 | i++ 54 | 55 | tests[i].S = []rune("sized") 56 | tests[i].Expected = []rune("size") 57 | i++ 58 | 59 | tests[i].S = []rune("hopping") 60 | tests[i].Expected = []rune("hop") 61 | i++ 62 | 63 | tests[i].S = []rune("tanned") 64 | tests[i].Expected = []rune("tan") 65 | i++ 66 | 67 | tests[i].S = []rune("falling") 68 | tests[i].Expected = []rune("fall") 69 | i++ 70 | 71 | tests[i].S = []rune("hissing") 72 | tests[i].Expected = []rune("hiss") 73 | i++ 74 | 75 | tests[i].S = []rune("fizzed") 76 | tests[i].Expected = []rune("fizz") 77 | i++ 78 | 79 | tests[i].S = []rune("failing") 80 | tests[i].Expected = []rune("fail") 81 | i++ 82 | 83 | tests[i].S = []rune("filing") 84 | tests[i].Expected = []rune("file") 85 | i++ 86 | 87 | for _,datum := range tests { 88 | 89 | actual := make([]rune, len(datum.S)) 90 | copy(actual, datum.S) 91 | 92 | actual = step1b(actual) 93 | 94 | lenActual := len(actual) 95 | lenExpected := len(datum.Expected) 96 | 97 | equal := true 98 | if 0 == lenActual && 0 == lenExpected { 99 | equal = true 100 | } else if lenActual != lenExpected { 101 | equal = false 102 | } else if actual[0] != datum.Expected[0] { 103 | equal = false 104 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 105 | equal = false 106 | } else { 107 | for j := 0 ; j < lenActual ; j++ { 108 | 109 | if actual[j] != datum.Expected[j] { 110 | equal = false 111 | } 112 | } 113 | } 114 | 115 | if !equal { 116 | t.Errorf("Did NOT get what was expected for calling step1b() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 117 | } 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /porterstemmer_step4_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep4(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 20) 19 | 20 | 21 | tests[i].S = []rune("revival") 22 | tests[i].Expected = []rune("reviv") 23 | i++ 24 | 25 | tests[i].S = []rune("allowance") 26 | tests[i].Expected = []rune("allow") 27 | i++ 28 | 29 | tests[i].S = []rune("inference") 30 | tests[i].Expected = []rune("infer") 31 | i++ 32 | 33 | tests[i].S = []rune("airliner") 34 | tests[i].Expected = []rune("airlin") 35 | i++ 36 | 37 | tests[i].S = []rune("gyroscopic") 38 | tests[i].Expected = []rune("gyroscop") 39 | i++ 40 | 41 | tests[i].S = []rune("adjustable") 42 | tests[i].Expected = []rune("adjust") 43 | i++ 44 | 45 | tests[i].S = []rune("defensible") 46 | tests[i].Expected = []rune("defens") 47 | i++ 48 | 49 | tests[i].S = []rune("irritant") 50 | tests[i].Expected = []rune("irrit") 51 | i++ 52 | 53 | tests[i].S = []rune("replacement") 54 | tests[i].Expected = []rune("replac") 55 | i++ 56 | 57 | tests[i].S = []rune("adjustment") 58 | tests[i].Expected = []rune("adjust") 59 | i++ 60 | 61 | tests[i].S = []rune("dependent") 62 | tests[i].Expected = []rune("depend") 63 | i++ 64 | 65 | tests[i].S = []rune("adoption") 66 | tests[i].Expected = []rune("adopt") 67 | i++ 68 | 69 | tests[i].S = []rune("homologou") 70 | tests[i].Expected = []rune("homolog") 71 | i++ 72 | 73 | tests[i].S = []rune("communism") 74 | tests[i].Expected = []rune("commun") 75 | i++ 76 | 77 | tests[i].S = []rune("activate") 78 | tests[i].Expected = []rune("activ") 79 | i++ 80 | 81 | tests[i].S = []rune("angulariti") 82 | tests[i].Expected = []rune("angular") 83 | i++ 84 | 85 | tests[i].S = []rune("homologous") 86 | tests[i].Expected = []rune("homolog") 87 | i++ 88 | 89 | tests[i].S = []rune("effective") 90 | tests[i].Expected = []rune("effect") 91 | i++ 92 | 93 | tests[i].S = []rune("bowdlerize") 94 | tests[i].Expected = []rune("bowdler") 95 | i++ 96 | 97 | 98 | for _,datum := range tests { 99 | 100 | actual := make([]rune, len(datum.S)) 101 | copy(actual, datum.S) 102 | 103 | actual = step4(actual) 104 | 105 | lenActual := len(actual) 106 | lenExpected := len(datum.Expected) 107 | 108 | equal := true 109 | if 0 == lenActual && 0 == lenExpected { 110 | equal = true 111 | } else if lenActual != lenExpected { 112 | equal = false 113 | } else if actual[0] != datum.Expected[0] { 114 | equal = false 115 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 116 | equal = false 117 | } else { 118 | for j := 0 ; j < lenActual ; j++ { 119 | 120 | if actual[j] != datum.Expected[j] { 121 | equal = false 122 | } 123 | } 124 | } 125 | 126 | if !equal { 127 | t.Errorf("Did NOT get what was expected for calling step4() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /porterstemmer_stem_string_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "bufio" 7 | "io/ioutil" 8 | "net/http" 9 | "os" 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | 15 | 16 | func TestStemString(t *testing.T) { 17 | 18 | testDataDirName := "testdata" 19 | 20 | _, err := os.Stat(testDataDirName) 21 | if nil != err { 22 | _ = os.Mkdir(testDataDirName, 0755) 23 | } 24 | _, err = os.Stat(testDataDirName) 25 | if nil != err { 26 | t.Errorf("The test data folder ([%s]) does not exists (and could not create it). Received error: [%v]", testDataDirName, err) 27 | /////// RETURN 28 | return 29 | } 30 | 31 | 32 | vocFileName := testDataDirName + "/voc.txt" 33 | _, err = os.Stat(vocFileName) 34 | if nil != err { 35 | 36 | vocHref := "http://tartarus.org/martin/PorterStemmer/voc.txt" 37 | 38 | resp, err := http.Get(vocHref) 39 | if nil != err { 40 | t.Errorf("Could not download test file (from web) from URL: [%s]. Received error: [%v]", vocHref, err) 41 | /////////// RETURN 42 | return 43 | } 44 | 45 | respBody, err := ioutil.ReadAll(resp.Body) 46 | if nil != err { 47 | t.Errorf("Error loading the contents of from URL: [%s]. Received error: [%v].", vocHref, err) 48 | /////////// RETURN 49 | return 50 | } 51 | 52 | _ = ioutil.WriteFile(vocFileName, respBody, 0644) 53 | 54 | } 55 | vocFd, err := os.Open(vocFileName) 56 | if nil != err { 57 | t.Errorf("Could NOT open testdata file: [%s]. Received error: [%v]", vocFileName, err) 58 | /////// RETURN 59 | return 60 | } 61 | defer vocFd.Close() 62 | 63 | voc := bufio.NewReaderSize(vocFd, 1024) 64 | 65 | 66 | 67 | outFileName := testDataDirName + "/output.txt" 68 | _, err = os.Stat(outFileName) 69 | if nil != err { 70 | 71 | outHref := "http://tartarus.org/martin/PorterStemmer/output.txt" 72 | 73 | resp, err := http.Get(outHref) 74 | if nil != err { 75 | t.Errorf("Could not download test file (from web) from URL: [%s]. Received error: [%v]", outHref, err) 76 | /////////// RETURN 77 | return 78 | } 79 | 80 | respBody, err := ioutil.ReadAll(resp.Body) 81 | if nil != err { 82 | t.Errorf("Error loading the contents of from URL: [%s]. Received error: [%v].", outHref, err) 83 | /////////// RETURN 84 | return 85 | } 86 | 87 | _ = ioutil.WriteFile(outFileName, respBody, 0644) 88 | 89 | } 90 | outFd, err := os.Open(outFileName) 91 | if nil != err { 92 | t.Errorf("Could NOT open testdata file: [%s]. Received error: [%v]", outFileName, err) 93 | /////// RETURN 94 | return 95 | } 96 | defer outFd.Close() 97 | 98 | out := bufio.NewReaderSize(outFd, 1024) 99 | 100 | 101 | 102 | for { 103 | 104 | vocS, err := voc.ReadString('\n') 105 | if nil != err { 106 | /////// BREAK 107 | break 108 | } 109 | 110 | vocS = strings.Trim(vocS, "\n\r\t ") 111 | 112 | 113 | 114 | expected, err := out.ReadString('\n') 115 | if nil != err { 116 | t.Errorf("Received unexpected error when trying to read a line from [%s]. Received error: [%v]", outFileName, err) 117 | /////// BREAK 118 | break 119 | 120 | } 121 | 122 | expected = strings.Trim(expected, "\n\r\t ") 123 | 124 | 125 | 126 | actual := StemString(vocS) 127 | if expected != actual { 128 | t.Errorf("Input: [%s] -> Actual: [%s]. Expected: [%s]", vocS, actual, expected) 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /porterstemmer_step2_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestStep2(t *testing.T) { 12 | 13 | i := 0 14 | 15 | tests := make([]struct { 16 | S []rune 17 | Expected []rune 18 | }, 22) 19 | 20 | 21 | tests[i].S = []rune("relational") 22 | tests[i].Expected = []rune("relate") 23 | i++ 24 | 25 | tests[i].S = []rune("conditional") 26 | tests[i].Expected = []rune("condition") 27 | i++ 28 | 29 | tests[i].S = []rune("rational") 30 | tests[i].Expected = []rune("rational") 31 | i++ 32 | 33 | tests[i].S = []rune("valenci") 34 | tests[i].Expected = []rune("valence") 35 | i++ 36 | 37 | tests[i].S = []rune("hesitanci") 38 | tests[i].Expected = []rune("hesitance") 39 | i++ 40 | 41 | tests[i].S = []rune("digitizer") 42 | tests[i].Expected = []rune("digitize") 43 | i++ 44 | 45 | tests[i].S = []rune("conformabli") 46 | tests[i].Expected = []rune("conformable") 47 | i++ 48 | 49 | tests[i].S = []rune("radicalli") 50 | tests[i].Expected = []rune("radical") 51 | i++ 52 | 53 | tests[i].S = []rune("differentli") 54 | tests[i].Expected = []rune("different") 55 | i++ 56 | 57 | tests[i].S = []rune("vileli") 58 | tests[i].Expected = []rune("vile") 59 | i++ 60 | 61 | tests[i].S = []rune("analogousli") 62 | tests[i].Expected = []rune("analogous") 63 | i++ 64 | 65 | tests[i].S = []rune("vietnamization") 66 | tests[i].Expected = []rune("vietnamize") 67 | i++ 68 | 69 | tests[i].S = []rune("predication") 70 | tests[i].Expected = []rune("predicate") 71 | i++ 72 | 73 | tests[i].S = []rune("operator") 74 | tests[i].Expected = []rune("operate") 75 | i++ 76 | 77 | tests[i].S = []rune("feudalism") 78 | tests[i].Expected = []rune("feudal") 79 | i++ 80 | 81 | tests[i].S = []rune("decisiveness") 82 | tests[i].Expected = []rune("decisive") 83 | i++ 84 | 85 | tests[i].S = []rune("hopefulness") 86 | tests[i].Expected = []rune("hopeful") 87 | i++ 88 | 89 | tests[i].S = []rune("callousness") 90 | tests[i].Expected = []rune("callous") 91 | i++ 92 | 93 | tests[i].S = []rune("formaliti") 94 | tests[i].Expected = []rune("formal") 95 | i++ 96 | 97 | tests[i].S = []rune("sensitiviti") 98 | tests[i].Expected = []rune("sensitive") 99 | i++ 100 | 101 | tests[i].S = []rune("sensibiliti") 102 | tests[i].Expected = []rune("sensible") 103 | i++ 104 | 105 | 106 | for _,datum := range tests { 107 | 108 | actual := make([]rune, len(datum.S)) 109 | copy(actual, datum.S) 110 | 111 | actual = step2(actual) 112 | 113 | lenActual := len(actual) 114 | lenExpected := len(datum.Expected) 115 | 116 | equal := true 117 | if 0 == lenActual && 0 == lenExpected { 118 | equal = true 119 | } else if lenActual != lenExpected { 120 | equal = false 121 | } else if actual[0] != datum.Expected[0] { 122 | equal = false 123 | } else if actual[lenActual-1] != datum.Expected[lenExpected-1] { 124 | equal = false 125 | } else { 126 | for j := 0 ; j < lenActual ; j++ { 127 | 128 | if actual[j] != datum.Expected[j] { 129 | equal = false 130 | } 131 | } 132 | } 133 | 134 | if !equal { 135 | t.Errorf("Did NOT get what was expected for calling step2() on [%s]. Expect [%s] but got [%s]", string(datum.S), string(datum.Expected), string(actual)) 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go Porter Stemmer 2 | 3 | A native Go clean room implementation of the Porter Stemming Algorithm. 4 | 5 | This algorithm is of interest to people doing Machine Learning or 6 | Natural Language Processing (NLP). 7 | 8 | This is NOT a port. This is a native Go implementation from the human-readable 9 | description of the algorithm. 10 | 11 | I've tried to make it (more) efficient by NOT internally using string's, but 12 | instead internally using []rune's and using the same (array) buffer used by 13 | the []rune slice (and sub-slices) at all steps of the algorithm. 14 | 15 | For Porter Stemmer algorithm, see: 16 | 17 | http://tartarus.org/martin/PorterStemmer/def.txt (URL #1) 18 | 19 | http://tartarus.org/martin/PorterStemmer/ (URL #2) 20 | 21 | # Departures 22 | 23 | Also, since when I initially implemented it, it failed the tests at... 24 | 25 | http://tartarus.org/martin/PorterStemmer/voc.txt (URL #3) 26 | 27 | http://tartarus.org/martin/PorterStemmer/output.txt (URL #4) 28 | 29 | ... after reading the human-readble text over and over again to try to figure out 30 | what the error I made was (and doing all sorts of things to debug it) I came to the 31 | conclusion that the some of these tests were wrong according to the human-readable 32 | description of the algorithm. 33 | 34 | This led me to wonder if maybe other people's code that was passing these tests had 35 | rules that were not in the human-readable description. Which led me to look at the source 36 | code here... 37 | 38 | http://tartarus.org/martin/PorterStemmer/c.txt (URL #5) 39 | 40 | ... When I looked there I noticed that there are some items marked as a "DEPARTURE", 41 | which differ from the original algorithm. (There are 2 of these.) 42 | 43 | I implemented these departures, and the tests at URL #3 and URL #4 all passed. 44 | 45 | ## Usage 46 | 47 | To use this Golang library, use with something like: 48 | 49 | package main 50 | 51 | import ( 52 | "fmt" 53 | "github.com/reiver/go-porterstemmer" 54 | ) 55 | 56 | func main() { 57 | 58 | word := "Waxes" 59 | 60 | stem := porterstemmer.StemString(word) 61 | 62 | fmt.Printf("The word [%s] has the stem [%s].\n", word, stem) 63 | } 64 | 65 | Alternatively, if you want to be a bit more efficient, use []rune slices instead, with code like: 66 | 67 | package main 68 | 69 | import ( 70 | "fmt" 71 | "github.com/reiver/go-porterstemmer" 72 | ) 73 | 74 | func main() { 75 | 76 | word := []rune("Waxes") 77 | 78 | stem := porterstemmer.Stem(word) 79 | 80 | fmt.Printf("The word [%s] has the stem [%s].\n", string(word), string(stem)) 81 | } 82 | 83 | Although NOTE that the above code may modify original slice (named "word" in the example) as a side 84 | effect, for efficiency reasons. And that the slice named "stem" in the example above may be a 85 | sub-slice of the slice named "word". 86 | 87 | Also alternatively, if you already know that your word is already lowercase (and you don't need 88 | this library to lowercase your word for you) you can instead use code like: 89 | 90 | package main 91 | 92 | import ( 93 | "fmt" 94 | "github.com/reiver/go-porterstemmer" 95 | ) 96 | 97 | func main() { 98 | 99 | word := []rune("waxes") 100 | 101 | stem := porterstemmer.StemWithoutLowerCasing(word) 102 | 103 | fmt.Printf("The word [%s] has the stem [%s].\n", string(word), string(stem)) 104 | } 105 | 106 | Again NOTE (like with the previous example) that the above code may modify original slice (named 107 | "word" in the example) as a side effect, for efficiency reasons. And that the slice named "stem" 108 | in the example above may be a sub-slice of the slice named "word". 109 | -------------------------------------------------------------------------------- /porterstemmer_has_suffix_test.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | "testing" 7 | ) 8 | 9 | 10 | 11 | func TestHasSuffix(t *testing.T) { 12 | 13 | tests := make([]struct { 14 | S []rune 15 | Suffix []rune 16 | Expected bool 17 | }, 82) 18 | 19 | 20 | 21 | i := 0 22 | 23 | 24 | tests[i].S = []rune("ran") 25 | tests[i].Suffix = []rune("er") 26 | tests[i].Expected = false 27 | i++ 28 | 29 | tests[i].S = []rune("runner") 30 | tests[i].Suffix = []rune("er") 31 | tests[i].Expected = true 32 | i++ 33 | 34 | tests[i].S = []rune("runnar") 35 | tests[i].Suffix = []rune("er") 36 | tests[i].Expected = false 37 | i++ 38 | 39 | tests[i].S = []rune("runned") 40 | tests[i].Suffix = []rune("er") 41 | tests[i].Expected = false 42 | i++ 43 | 44 | tests[i].S = []rune("runnre") 45 | tests[i].Suffix = []rune("er") 46 | tests[i].Expected = false 47 | i++ 48 | 49 | tests[i].S = []rune("er") 50 | tests[i].Suffix = []rune("er") 51 | tests[i].Expected = true 52 | i++ 53 | 54 | tests[i].S = []rune("re") 55 | tests[i].Suffix = []rune("er") 56 | tests[i].Expected = false 57 | i++ 58 | 59 | 60 | 61 | tests[i].S = []rune("ran") 62 | tests[i].Suffix = []rune("ER") 63 | tests[i].Expected = false 64 | i++ 65 | 66 | tests[i].S = []rune("runner") 67 | tests[i].Suffix = []rune("ER") 68 | tests[i].Expected = false 69 | i++ 70 | 71 | tests[i].S = []rune("runnar") 72 | tests[i].Suffix = []rune("ER") 73 | tests[i].Expected = false 74 | i++ 75 | 76 | tests[i].S = []rune("runned") 77 | tests[i].Suffix = []rune("ER") 78 | tests[i].Expected = false 79 | i++ 80 | 81 | tests[i].S = []rune("runnre") 82 | tests[i].Suffix = []rune("ER") 83 | tests[i].Expected = false 84 | i++ 85 | 86 | tests[i].S = []rune("er") 87 | tests[i].Suffix = []rune("ER") 88 | tests[i].Expected = false 89 | i++ 90 | 91 | tests[i].S = []rune("re") 92 | tests[i].Suffix = []rune("ER") 93 | tests[i].Expected = false 94 | i++ 95 | 96 | 97 | 98 | tests[i].S = []rune("") 99 | tests[i].Suffix = []rune("er") 100 | tests[i].Expected = false 101 | i++ 102 | 103 | tests[i].S = []rune("e") 104 | tests[i].Suffix = []rune("er") 105 | tests[i].Expected = false 106 | i++ 107 | 108 | 109 | 110 | tests[i].S = []rune("caresses") 111 | tests[i].Suffix = []rune("sses") 112 | tests[i].Expected = true 113 | i++ 114 | 115 | tests[i].S = []rune("ponies") 116 | tests[i].Suffix = []rune("ies") 117 | tests[i].Expected = true 118 | i++ 119 | 120 | tests[i].S = []rune("caress") 121 | tests[i].Suffix = []rune("ss") 122 | tests[i].Expected = true 123 | i++ 124 | 125 | tests[i].S = []rune("cats") 126 | tests[i].Suffix = []rune("s") 127 | tests[i].Expected = true 128 | i++ 129 | 130 | 131 | 132 | tests[i].S = []rune("feed") 133 | tests[i].Suffix = []rune("eed") 134 | tests[i].Expected = true 135 | i++ 136 | 137 | tests[i].S = []rune("agreed") 138 | tests[i].Suffix = []rune("eed") 139 | tests[i].Expected = true 140 | i++ 141 | 142 | tests[i].S = []rune("plastered") 143 | tests[i].Suffix = []rune("ed") 144 | tests[i].Expected = true 145 | i++ 146 | 147 | tests[i].S = []rune("bled") 148 | tests[i].Suffix = []rune("ed") 149 | tests[i].Expected = true 150 | i++ 151 | 152 | tests[i].S = []rune("motoring") 153 | tests[i].Suffix = []rune("ing") 154 | tests[i].Expected = true 155 | i++ 156 | 157 | tests[i].S = []rune("sing") 158 | tests[i].Suffix = []rune("ing") 159 | tests[i].Expected = true 160 | i++ 161 | 162 | 163 | 164 | tests[i].S = []rune("conflat") 165 | tests[i].Suffix = []rune("at") 166 | tests[i].Expected = true 167 | i++ 168 | 169 | tests[i].S = []rune("troubl") 170 | tests[i].Suffix = []rune("bl") 171 | tests[i].Expected = true 172 | i++ 173 | 174 | tests[i].S = []rune("siz") 175 | tests[i].Suffix = []rune("iz") 176 | tests[i].Expected = true 177 | i++ 178 | 179 | 180 | 181 | tests[i].S = []rune("happy") 182 | tests[i].Suffix = []rune("y") 183 | tests[i].Expected = true 184 | i++ 185 | 186 | tests[i].S = []rune("sky") 187 | tests[i].Suffix = []rune("y") 188 | tests[i].Expected = true 189 | i++ 190 | 191 | 192 | 193 | tests[i].S = []rune("relational") 194 | tests[i].Suffix = []rune("ational") 195 | tests[i].Expected = true 196 | i++ 197 | 198 | tests[i].S = []rune("conditional") 199 | tests[i].Suffix = []rune("tional") 200 | tests[i].Expected = true 201 | i++ 202 | 203 | tests[i].S = []rune("rational") 204 | tests[i].Suffix = []rune("tional") 205 | tests[i].Expected = true 206 | i++ 207 | 208 | tests[i].S = []rune("valenci") 209 | tests[i].Suffix = []rune("enci") 210 | tests[i].Expected = true 211 | i++ 212 | 213 | tests[i].S = []rune("hesitanci") 214 | tests[i].Suffix = []rune("anci") 215 | tests[i].Expected = true 216 | i++ 217 | 218 | tests[i].S = []rune("digitizer") 219 | tests[i].Suffix = []rune("izer") 220 | tests[i].Expected = true 221 | i++ 222 | 223 | tests[i].S = []rune("conformabli") 224 | tests[i].Suffix = []rune("abli") 225 | tests[i].Expected = true 226 | i++ 227 | 228 | tests[i].S = []rune("radicalli") 229 | tests[i].Suffix = []rune("alli") 230 | tests[i].Expected = true 231 | i++ 232 | 233 | tests[i].S = []rune("differentli") 234 | tests[i].Suffix = []rune("entli") 235 | tests[i].Expected = true 236 | i++ 237 | 238 | tests[i].S = []rune("vileli") 239 | tests[i].Suffix = []rune("eli") 240 | tests[i].Expected = true 241 | i++ 242 | 243 | tests[i].S = []rune("analogousli") 244 | tests[i].Suffix = []rune("ousli") 245 | tests[i].Expected = true 246 | i++ 247 | 248 | tests[i].S = []rune("vietnamization") 249 | tests[i].Suffix = []rune("ization") 250 | tests[i].Expected = true 251 | i++ 252 | 253 | tests[i].S = []rune("predication") 254 | tests[i].Suffix = []rune("ation") 255 | tests[i].Expected = true 256 | i++ 257 | 258 | tests[i].S = []rune("operator") 259 | tests[i].Suffix = []rune("ator") 260 | tests[i].Expected = true 261 | i++ 262 | 263 | tests[i].S = []rune("feudalism") 264 | tests[i].Suffix = []rune("alism") 265 | tests[i].Expected = true 266 | i++ 267 | 268 | tests[i].S = []rune("decisiveness") 269 | tests[i].Suffix = []rune("iveness") 270 | tests[i].Expected = true 271 | i++ 272 | 273 | tests[i].S = []rune("hopefulness") 274 | tests[i].Suffix = []rune("fulness") 275 | tests[i].Expected = true 276 | i++ 277 | 278 | tests[i].S = []rune("callousness") 279 | tests[i].Suffix = []rune("ousness") 280 | tests[i].Expected = true 281 | i++ 282 | 283 | tests[i].S = []rune("formaliti") 284 | tests[i].Suffix = []rune("aliti") 285 | tests[i].Expected = true 286 | i++ 287 | 288 | tests[i].S = []rune("sensitiviti") 289 | tests[i].Suffix = []rune("iviti") 290 | tests[i].Expected = true 291 | i++ 292 | 293 | tests[i].S = []rune("sensibiliti") 294 | tests[i].Suffix = []rune("biliti") 295 | tests[i].Expected = true 296 | i++ 297 | 298 | 299 | 300 | tests[i].S = []rune("triplicate") 301 | tests[i].Suffix = []rune("icate") 302 | tests[i].Expected = true 303 | i++ 304 | 305 | tests[i].S = []rune("formative") 306 | tests[i].Suffix = []rune("ative") 307 | tests[i].Expected = true 308 | i++ 309 | 310 | tests[i].S = []rune("formalize") 311 | tests[i].Suffix = []rune("alize") 312 | tests[i].Expected = true 313 | i++ 314 | 315 | tests[i].S = []rune("electriciti") 316 | tests[i].Suffix = []rune("iciti") 317 | tests[i].Expected = true 318 | i++ 319 | 320 | tests[i].S = []rune("electrical") 321 | tests[i].Suffix = []rune("ical") 322 | tests[i].Expected = true 323 | i++ 324 | 325 | tests[i].S = []rune("hopeful") 326 | tests[i].Suffix = []rune("ful") 327 | tests[i].Expected = true 328 | i++ 329 | 330 | tests[i].S = []rune("goodness") 331 | tests[i].Suffix = []rune("ness") 332 | tests[i].Expected = true 333 | i++ 334 | 335 | 336 | 337 | tests[i].S = []rune("revival") 338 | tests[i].Suffix = []rune("al") 339 | tests[i].Expected = true 340 | i++ 341 | 342 | tests[i].S = []rune("allowance") 343 | tests[i].Suffix = []rune("ance") 344 | tests[i].Expected = true 345 | i++ 346 | 347 | tests[i].S = []rune("inference") 348 | tests[i].Suffix = []rune("ence") 349 | tests[i].Expected = true 350 | i++ 351 | 352 | tests[i].S = []rune("airliner") 353 | tests[i].Suffix = []rune("er") 354 | tests[i].Expected = true 355 | i++ 356 | 357 | tests[i].S = []rune("gyroscopic") 358 | tests[i].Suffix = []rune("ic") 359 | tests[i].Expected = true 360 | i++ 361 | 362 | tests[i].S = []rune("adjustable") 363 | tests[i].Suffix = []rune("able") 364 | tests[i].Expected = true 365 | i++ 366 | 367 | tests[i].S = []rune("defensible") 368 | tests[i].Suffix = []rune("ible") 369 | tests[i].Expected = true 370 | i++ 371 | 372 | tests[i].S = []rune("irritant") 373 | tests[i].Suffix = []rune("ant") 374 | tests[i].Expected = true 375 | i++ 376 | 377 | tests[i].S = []rune("replacement") 378 | tests[i].Suffix = []rune("ement") 379 | tests[i].Expected = true 380 | i++ 381 | 382 | tests[i].S = []rune("adjustment") 383 | tests[i].Suffix = []rune("ment") 384 | tests[i].Expected = true 385 | i++ 386 | 387 | tests[i].S = []rune("dependent") 388 | tests[i].Suffix = []rune("ent") 389 | tests[i].Expected = true 390 | i++ 391 | 392 | tests[i].S = []rune("adoption") 393 | tests[i].Suffix = []rune("ion") 394 | tests[i].Expected = true 395 | i++ 396 | 397 | tests[i].S = []rune("homologou") 398 | tests[i].Suffix = []rune("ou") 399 | tests[i].Expected = true 400 | i++ 401 | 402 | tests[i].S = []rune("communism") 403 | tests[i].Suffix = []rune("ism") 404 | tests[i].Expected = true 405 | i++ 406 | 407 | tests[i].S = []rune("activate") 408 | tests[i].Suffix = []rune("ate") 409 | tests[i].Expected = true 410 | i++ 411 | 412 | tests[i].S = []rune("angulariti") 413 | tests[i].Suffix = []rune("iti") 414 | tests[i].Expected = true 415 | i++ 416 | 417 | tests[i].S = []rune("homologous") 418 | tests[i].Suffix = []rune("ous") 419 | tests[i].Expected = true 420 | i++ 421 | 422 | tests[i].S = []rune("effective") 423 | tests[i].Suffix = []rune("ive") 424 | tests[i].Expected = true 425 | i++ 426 | 427 | tests[i].S = []rune("bowdlerize") 428 | tests[i].Suffix = []rune("ize") 429 | tests[i].Expected = true 430 | i++ 431 | 432 | 433 | 434 | tests[i].S = []rune("probate") 435 | tests[i].Suffix = []rune("e") 436 | tests[i].Expected = true 437 | i++ 438 | 439 | tests[i].S = []rune("rate") 440 | tests[i].Suffix = []rune("e") 441 | tests[i].Expected = true 442 | i++ 443 | 444 | tests[i].S = []rune("cease") 445 | tests[i].Suffix = []rune("e") 446 | tests[i].Expected = true 447 | i++ 448 | 449 | for _,datum := range tests { 450 | if actual := hasSuffix(datum.S, datum.Suffix) ; actual != datum.Expected { 451 | t.Errorf("Did NOT get what was expected for calling hasSuffix() on [%s] with suffix [%s]. Expect [%d] but got [%d]", string(datum.S), string(datum.Suffix), datum.Expected, actual) 452 | } 453 | } 454 | } 455 | 456 | -------------------------------------------------------------------------------- /porterstemmer.go: -------------------------------------------------------------------------------- 1 | package porterstemmer 2 | 3 | 4 | 5 | import ( 6 | // "log" 7 | "unicode" 8 | ) 9 | 10 | 11 | 12 | func isConsonant(s []rune, i int) bool { 13 | 14 | //DEBUG 15 | //log.Printf("isConsonant: [%+v]", string(s[i])) 16 | 17 | result := true 18 | 19 | switch ( s[i] ) { 20 | case 'a', 'e', 'i', 'o', 'u': 21 | result = false 22 | case 'y': 23 | if 0 == i { 24 | result = true 25 | } else { 26 | result = !isConsonant(s, i-1) 27 | } 28 | default: 29 | result = true 30 | } 31 | 32 | return result 33 | } 34 | 35 | 36 | 37 | func measure(s []rune) uint { 38 | 39 | // Initialize. 40 | lenS := len(s) 41 | result := uint(0) 42 | i := 0 43 | 44 | 45 | // Short Circuit. 46 | if 0 == lenS { 47 | /////////// RETURN 48 | return result 49 | } 50 | 51 | 52 | // Ignore (potential) consonant sequence at the beginning of word. 53 | for isConsonant(s, i) { 54 | 55 | //DEBUG 56 | //log.Printf("[measure([%s])] Eat Consonant [%d] -> [%s]", string(s), i, string(s[i])) 57 | 58 | i++ 59 | if i >= lenS { 60 | /////////////// RETURN 61 | return result 62 | } 63 | } 64 | 65 | 66 | // For each pair of a vowel sequence followed by a consonant sequence, increment result. 67 | Outer: 68 | for i < lenS { 69 | 70 | for !isConsonant(s, i) { 71 | 72 | //DEBUG 73 | //log.Printf("[measure([%s])] VOWEL [%d] -> [%s]", string(s), i, string(s[i])) 74 | 75 | i++ 76 | if i >= lenS { 77 | /////////// BREAK 78 | break Outer 79 | } 80 | } 81 | for isConsonant(s, i) { 82 | 83 | //DEBUG 84 | //log.Printf("[measure([%s])] CONSONANT [%d] -> [%s]", string(s), i, string(s[i])) 85 | 86 | i++ 87 | if i >= lenS { 88 | result++ 89 | /////////// BREAK 90 | break Outer 91 | } 92 | } 93 | result++ 94 | } 95 | 96 | 97 | // Return 98 | return result 99 | } 100 | 101 | 102 | 103 | func hasSuffix(s, suffix []rune) bool { 104 | 105 | lenSMinusOne := len(s) - 1 106 | lenSuffixMinusOne := len(suffix) - 1 107 | 108 | if lenSMinusOne <= lenSuffixMinusOne { 109 | return false 110 | } else if s[lenSMinusOne] != suffix[lenSuffixMinusOne] { // I suspect checking this first should speed this function up in practice. 111 | /////// RETURN 112 | return false 113 | } else { 114 | 115 | for i := 0; i < lenSuffixMinusOne ; i++ { 116 | 117 | if suffix[i] != s[lenSMinusOne-lenSuffixMinusOne+i] { 118 | /////////////// RETURN 119 | return false 120 | } 121 | 122 | } 123 | 124 | } 125 | 126 | 127 | return true 128 | } 129 | 130 | 131 | 132 | func containsVowel(s []rune) bool { 133 | 134 | lenS := len(s) 135 | 136 | for i := 0 ; i < lenS ; i++ { 137 | 138 | if !isConsonant(s, i) { 139 | /////////// RETURN 140 | return true 141 | } 142 | 143 | } 144 | 145 | return false 146 | } 147 | 148 | 149 | 150 | func hasRepeatDoubleConsonantSuffix(s []rune) bool { 151 | 152 | // Initialize. 153 | lenS := len(s) 154 | 155 | result := false 156 | 157 | 158 | // Do it! 159 | if 2 > lenS { 160 | result = false 161 | } else if s[lenS-1] == s[lenS-2] && isConsonant(s, lenS-1) { // Will using isConsonant() cause a problem with "YY"? 162 | result = true 163 | } else { 164 | result = false 165 | } 166 | 167 | 168 | // Return, 169 | return result 170 | } 171 | 172 | 173 | 174 | func hasConsonantVowelConsonantSuffix(s []rune) bool { 175 | 176 | // Initialize. 177 | lenS := len(s) 178 | 179 | result := false 180 | 181 | 182 | // Do it! 183 | if 3 > lenS { 184 | result = false 185 | } else if isConsonant(s, lenS-3) && !isConsonant(s, lenS-2) && isConsonant(s, lenS-1) { 186 | result = true 187 | } else { 188 | result = false 189 | } 190 | 191 | 192 | // Return 193 | return result 194 | } 195 | 196 | 197 | 198 | func step1a(s []rune) []rune { 199 | 200 | // Initialize. 201 | var result []rune = s 202 | 203 | lenS := len(s) 204 | 205 | 206 | // Do it! 207 | if suffix := []rune("sses") ; hasSuffix(s, suffix) { 208 | 209 | lenTrim := 2 210 | 211 | subSlice := s[:lenS-lenTrim] 212 | 213 | result = subSlice 214 | } else if suffix := []rune("ies") ; hasSuffix(s, suffix) { 215 | lenTrim := 2 216 | 217 | subSlice := s[:lenS-lenTrim] 218 | 219 | result = subSlice 220 | } else if suffix := []rune("ss") ; hasSuffix(s, suffix) { 221 | 222 | result = s 223 | } else if suffix := []rune("s") ; hasSuffix(s, suffix) { 224 | 225 | lenSuffix := 1 226 | 227 | subSlice := s[:lenS-lenSuffix] 228 | 229 | result = subSlice 230 | } 231 | 232 | 233 | // Return. 234 | return result 235 | } 236 | 237 | 238 | 239 | func step1b(s []rune) []rune { 240 | 241 | // Initialize. 242 | var result []rune = s 243 | 244 | lenS := len(s) 245 | 246 | 247 | // Do it! 248 | if suffix := []rune("eed") ; hasSuffix(s, suffix) { 249 | lenSuffix := len(suffix) 250 | 251 | subSlice := s[:lenS-lenSuffix] 252 | 253 | m := measure(subSlice) 254 | 255 | if 0 < m { 256 | lenTrim := 1 257 | 258 | result = s[:lenS-lenTrim] 259 | } 260 | } else if suffix := []rune("ed") ; hasSuffix(s, suffix) { 261 | lenSuffix := len(suffix) 262 | 263 | subSlice := s[:lenS-lenSuffix] 264 | 265 | if containsVowel(subSlice) { 266 | 267 | if suffix2 := []rune("at") ; hasSuffix(subSlice, suffix2) { 268 | lenTrim := -1 269 | 270 | result = s[:lenS-lenSuffix-lenTrim] 271 | } else if suffix2 := []rune("bl") ; hasSuffix(subSlice, suffix2) { 272 | lenTrim := -1 273 | 274 | result = s[:lenS-lenSuffix-lenTrim] 275 | } else if suffix2 := []rune("iz") ; hasSuffix(subSlice, suffix2) { 276 | lenTrim := -1 277 | 278 | result = s[:lenS-lenSuffix-lenTrim] 279 | } else if c := subSlice[len(subSlice)-1] ; 'l' != c && 's' != c && 'z' != c && hasRepeatDoubleConsonantSuffix(subSlice) { 280 | lenTrim := 1 281 | 282 | lenSubSlice := len(subSlice) 283 | 284 | result = subSlice[:lenSubSlice-lenTrim] 285 | } else if c := subSlice[len(subSlice)-1] ; 1 == measure(subSlice) && hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c { 286 | lenTrim := -1 287 | 288 | result = s[:lenS-lenSuffix-lenTrim] 289 | 290 | result[len(result)-1] = 'e' 291 | } else { 292 | result = subSlice 293 | } 294 | 295 | } 296 | } else if suffix := []rune("ing") ; hasSuffix(s, suffix) { 297 | lenSuffix := len(suffix) 298 | 299 | subSlice := s[:lenS-lenSuffix] 300 | 301 | if containsVowel(subSlice) { 302 | 303 | if suffix2 := []rune("at") ; hasSuffix(subSlice, suffix2) { 304 | lenTrim := -1 305 | 306 | result = s[:lenS-lenSuffix-lenTrim] 307 | 308 | result[len(result)-1] = 'e' 309 | } else if suffix2 := []rune("bl") ; hasSuffix(subSlice, suffix2) { 310 | lenTrim := -1 311 | 312 | result = s[:lenS-lenSuffix-lenTrim] 313 | 314 | result[len(result)-1] = 'e' 315 | } else if suffix2 := []rune("iz") ; hasSuffix(subSlice, suffix2) { 316 | lenTrim := -1 317 | 318 | result = s[:lenS-lenSuffix-lenTrim] 319 | 320 | result[len(result)-1] = 'e' 321 | } else if c := subSlice[len(subSlice)-1] ; 'l' != c && 's' != c && 'z' != c && hasRepeatDoubleConsonantSuffix(subSlice) { 322 | lenTrim := 1 323 | 324 | lenSubSlice := len(subSlice) 325 | 326 | result = subSlice[:lenSubSlice-lenTrim] 327 | } else if c := subSlice[len(subSlice)-1] ; 1 == measure(subSlice) && hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c { 328 | lenTrim := -1 329 | 330 | result = s[:lenS-lenSuffix-lenTrim] 331 | 332 | result[len(result)-1] = 'e' 333 | } else { 334 | result = subSlice 335 | } 336 | 337 | } 338 | } 339 | 340 | 341 | // Return. 342 | return result 343 | } 344 | 345 | 346 | 347 | func step1c(s []rune) []rune { 348 | 349 | // Initialize. 350 | lenS := len(s) 351 | 352 | result := s 353 | 354 | 355 | // Do it! 356 | if 2 > lenS { 357 | /////////// RETURN 358 | return result 359 | } 360 | 361 | if 'y' == s[lenS-1] && containsVowel(s[:lenS-1]) { 362 | 363 | result[lenS-1] = 'i'; 364 | 365 | } else if 'Y' == s[lenS-1] && containsVowel(s[:lenS-1]) { 366 | 367 | result[lenS-1] = 'I'; 368 | 369 | } 370 | 371 | 372 | // Return. 373 | return result 374 | } 375 | 376 | 377 | 378 | func step2(s []rune) []rune { 379 | 380 | // Initialize. 381 | lenS := len(s) 382 | 383 | result := s 384 | 385 | 386 | // Do it! 387 | if suffix := []rune("ational") ; hasSuffix(s, suffix) { 388 | if 0 < measure(s[:lenS-len(suffix)]) { 389 | result[lenS-5] = 'e' 390 | result = result[:lenS-4] 391 | } 392 | } else if suffix := []rune("tional") ; hasSuffix(s, suffix) { 393 | if 0 < measure(s[:lenS-len(suffix)]) { 394 | result = result[:lenS-2] 395 | } 396 | } else if suffix := []rune("enci") ; hasSuffix(s, suffix) { 397 | if 0 < measure(s[:lenS-len(suffix)]) { 398 | result[lenS-1] = 'e' 399 | } 400 | } else if suffix := []rune("anci") ; hasSuffix(s, suffix) { 401 | if 0 < measure(s[:lenS-len(suffix)]) { 402 | result[lenS-1] = 'e' 403 | } 404 | } else if suffix := []rune("izer") ; hasSuffix(s, suffix) { 405 | if 0 < measure(s[:lenS-len(suffix)]) { 406 | result = s[:lenS-1] 407 | } 408 | } else if suffix := []rune("bli") ; hasSuffix(s, suffix) { // --DEPARTURE-- 409 | // } else if suffix := []rune("abli") ; hasSuffix(s, suffix) { 410 | if 0 < measure(s[:lenS-len(suffix)]) { 411 | result[lenS-1] = 'e' 412 | } 413 | } else if suffix := []rune("alli") ; hasSuffix(s, suffix) { 414 | if 0 < measure(s[:lenS-len(suffix)]) { 415 | result = s[:lenS-2] 416 | } 417 | } else if suffix := []rune("entli") ; hasSuffix(s, suffix) { 418 | if 0 < measure(s[:lenS-len(suffix)]) { 419 | result = s[:lenS-2] 420 | } 421 | } else if suffix := []rune("eli") ; hasSuffix(s, suffix) { 422 | if 0 < measure(s[:lenS-len(suffix)]) { 423 | result = s[:lenS-2] 424 | } 425 | } else if suffix := []rune("ousli") ; hasSuffix(s, suffix) { 426 | if 0 < measure(s[:lenS-len(suffix)]) { 427 | result = s[:lenS-2] 428 | } 429 | } else if suffix := []rune("ization") ; hasSuffix(s, suffix) { 430 | if 0 < measure(s[:lenS-len(suffix)]) { 431 | result[lenS-5] = 'e' 432 | 433 | result = s[:lenS-4] 434 | } 435 | } else if suffix := []rune("ation") ; hasSuffix(s, suffix) { 436 | if 0 < measure(s[:lenS-len(suffix)]) { 437 | result[lenS-3] = 'e' 438 | 439 | result = s[:lenS-2] 440 | } 441 | } else if suffix := []rune("ator") ; hasSuffix(s, suffix) { 442 | if 0 < measure(s[:lenS-len(suffix)]) { 443 | result[lenS-2] = 'e' 444 | 445 | result = s[:lenS-1] 446 | } 447 | } else if suffix := []rune("alism") ; hasSuffix(s, suffix) { 448 | if 0 < measure(s[:lenS-len(suffix)]) { 449 | result = s[:lenS-3] 450 | } 451 | } else if suffix := []rune("iveness") ; hasSuffix(s, suffix) { 452 | if 0 < measure(s[:lenS-len(suffix)]) { 453 | result = s[:lenS-4] 454 | } 455 | } else if suffix := []rune("fulness") ; hasSuffix(s, suffix) { 456 | if 0 < measure(s[:lenS-len(suffix)]) { 457 | result = s[:lenS-4] 458 | } 459 | } else if suffix := []rune("ousness") ; hasSuffix(s, suffix) { 460 | if 0 < measure(s[:lenS-len(suffix)]) { 461 | result = s[:lenS-4] 462 | } 463 | } else if suffix := []rune("aliti") ; hasSuffix(s, suffix) { 464 | if 0 < measure(s[:lenS-len(suffix)]) { 465 | result = s[:lenS-3] 466 | } 467 | } else if suffix := []rune("iviti") ; hasSuffix(s, suffix) { 468 | if 0 < measure(s[:lenS-len(suffix)]) { 469 | result[lenS-3] = 'e' 470 | 471 | result = result[:lenS-2] 472 | } 473 | } else if suffix := []rune("biliti") ; hasSuffix(s, suffix) { 474 | if 0 < measure(s[:lenS-len(suffix)]) { 475 | result[lenS-5] = 'l' 476 | result[lenS-4] = 'e' 477 | 478 | result = result[:lenS-3] 479 | } 480 | } else if suffix := []rune("logi") ; hasSuffix(s, suffix) { // --DEPARTURE-- 481 | if 0 < measure(s[:lenS-len(suffix)]) { 482 | lenTrim := 1 483 | 484 | result = s[:lenS-lenTrim] 485 | } 486 | } 487 | 488 | 489 | // Return. 490 | return result 491 | } 492 | 493 | 494 | 495 | func step3(s []rune) []rune { 496 | 497 | // Initialize. 498 | lenS := len(s) 499 | result := s 500 | 501 | 502 | // Do it! 503 | if suffix := []rune("icate") ; hasSuffix(s, suffix) { 504 | lenSuffix := len(suffix) 505 | 506 | if 0 < measure(s[:lenS-lenSuffix]) { 507 | result = result[:lenS-3] 508 | } 509 | } else if suffix := []rune("ative") ; hasSuffix(s, suffix) { 510 | lenSuffix := len(suffix) 511 | 512 | subSlice := s[:lenS-lenSuffix] 513 | 514 | m := measure(subSlice) 515 | 516 | if 0 < m { 517 | result = subSlice 518 | } 519 | } else if suffix := []rune("alize") ; hasSuffix(s, suffix) { 520 | lenSuffix := len(suffix) 521 | 522 | if 0 < measure(s[:lenS-lenSuffix]) { 523 | result = result[:lenS-3] 524 | } 525 | } else if suffix := []rune("iciti") ; hasSuffix(s, suffix) { 526 | lenSuffix := len(suffix) 527 | 528 | if 0 < measure(s[:lenS-lenSuffix]) { 529 | result = result[:lenS-3] 530 | } 531 | } else if suffix := []rune("ical") ; hasSuffix(s, suffix) { 532 | lenSuffix := len(suffix) 533 | 534 | if 0 < measure(s[:lenS-lenSuffix]) { 535 | result = result[:lenS-2] 536 | } 537 | } else if suffix := []rune("ful") ; hasSuffix(s, suffix) { 538 | lenSuffix := len(suffix) 539 | 540 | subSlice := s[:lenS-lenSuffix] 541 | 542 | m := measure(subSlice) 543 | 544 | if 0 < m { 545 | result = subSlice 546 | } 547 | } else if suffix := []rune("ness") ; hasSuffix(s, suffix) { 548 | lenSuffix := len(suffix) 549 | 550 | subSlice := s[:lenS-lenSuffix] 551 | 552 | m := measure(subSlice) 553 | 554 | if 0 < m { 555 | result = subSlice 556 | } 557 | } 558 | 559 | 560 | // Return. 561 | return result 562 | } 563 | 564 | 565 | 566 | func step4(s []rune) []rune { 567 | 568 | // Initialize. 569 | lenS := len(s) 570 | result := s 571 | 572 | 573 | // Do it! 574 | if suffix := []rune("al") ; hasSuffix(s, suffix) { 575 | lenSuffix := len(suffix) 576 | 577 | subSlice := s[:lenS-lenSuffix] 578 | 579 | m := measure(subSlice) 580 | 581 | if 1 < m { 582 | result = result[:lenS-lenSuffix] 583 | } 584 | } else if suffix := []rune("ance") ; hasSuffix(s, suffix) { 585 | lenSuffix := len(suffix) 586 | 587 | subSlice := s[:lenS-lenSuffix] 588 | 589 | m := measure(subSlice) 590 | 591 | if 1 < m { 592 | result = result[:lenS-lenSuffix] 593 | } 594 | } else if suffix := []rune("ence") ; hasSuffix(s, suffix) { 595 | lenSuffix := len(suffix) 596 | 597 | subSlice := s[:lenS-lenSuffix] 598 | 599 | m := measure(subSlice) 600 | 601 | if 1 < m { 602 | result = result[:lenS-lenSuffix] 603 | } 604 | } else if suffix := []rune("er") ; hasSuffix(s, suffix) { 605 | lenSuffix := len(suffix) 606 | 607 | subSlice := s[:lenS-lenSuffix] 608 | 609 | m := measure(subSlice) 610 | 611 | if 1 < m { 612 | result = subSlice 613 | } 614 | } else if suffix := []rune("ic") ; hasSuffix(s, suffix) { 615 | lenSuffix := len(suffix) 616 | 617 | subSlice := s[:lenS-lenSuffix] 618 | 619 | m := measure(subSlice) 620 | 621 | if 1 < m { 622 | result = subSlice 623 | } 624 | } else if suffix := []rune("able") ; hasSuffix(s, suffix) { 625 | lenSuffix := len(suffix) 626 | 627 | subSlice := s[:lenS-lenSuffix] 628 | 629 | m := measure(subSlice) 630 | 631 | if 1 < m { 632 | result = subSlice 633 | } 634 | } else if suffix := []rune("ible") ; hasSuffix(s, suffix) { 635 | lenSuffix := len(suffix) 636 | 637 | subSlice := s[:lenS-lenSuffix] 638 | 639 | m := measure(subSlice) 640 | 641 | if 1 < m { 642 | result = subSlice 643 | } 644 | } else if suffix := []rune("ant") ; hasSuffix(s, suffix) { 645 | lenSuffix := len(suffix) 646 | 647 | subSlice := s[:lenS-lenSuffix] 648 | 649 | m := measure(subSlice) 650 | 651 | if 1 < m { 652 | result = subSlice 653 | } 654 | } else if suffix := []rune("ement") ; hasSuffix(s, suffix) { 655 | lenSuffix := len(suffix) 656 | 657 | subSlice := s[:lenS-lenSuffix] 658 | 659 | m := measure(subSlice) 660 | 661 | if 1 < m { 662 | result = subSlice 663 | } 664 | } else if suffix := []rune("ment") ; hasSuffix(s, suffix) { 665 | lenSuffix := len(suffix) 666 | 667 | subSlice := s[:lenS-lenSuffix] 668 | 669 | m := measure(subSlice) 670 | 671 | if 1 < m { 672 | result = subSlice 673 | } 674 | } else if suffix := []rune("ent") ; hasSuffix(s, suffix) { 675 | lenSuffix := len(suffix) 676 | 677 | subSlice := s[:lenS-lenSuffix] 678 | 679 | m := measure(subSlice) 680 | 681 | if 1 < m { 682 | result = subSlice 683 | } 684 | } else if suffix := []rune("ion") ; hasSuffix(s, suffix) { 685 | lenSuffix := len(suffix) 686 | 687 | subSlice := s[:lenS-lenSuffix] 688 | 689 | m := measure(subSlice) 690 | 691 | c := subSlice[len(subSlice)-1] 692 | 693 | if 1 < m && ('s' == c || 't' == c) { 694 | result = subSlice 695 | } 696 | } else if suffix := []rune("ou") ; hasSuffix(s, suffix) { 697 | lenSuffix := len(suffix) 698 | 699 | subSlice := s[:lenS-lenSuffix] 700 | 701 | m := measure(subSlice) 702 | 703 | if 1 < m { 704 | result = subSlice 705 | } 706 | } else if suffix := []rune("ism") ; hasSuffix(s, suffix) { 707 | lenSuffix := len(suffix) 708 | 709 | subSlice := s[:lenS-lenSuffix] 710 | 711 | m := measure(subSlice) 712 | 713 | if 1 < m { 714 | result = subSlice 715 | } 716 | } else if suffix := []rune("ate") ; hasSuffix(s, suffix) { 717 | lenSuffix := len(suffix) 718 | 719 | subSlice := s[:lenS-lenSuffix] 720 | 721 | m := measure(subSlice) 722 | 723 | if 1 < m { 724 | result = subSlice 725 | } 726 | } else if suffix := []rune("iti") ; hasSuffix(s, suffix) { 727 | lenSuffix := len(suffix) 728 | 729 | subSlice := s[:lenS-lenSuffix] 730 | 731 | m := measure(subSlice) 732 | 733 | if 1 < m { 734 | result = subSlice 735 | } 736 | } else if suffix := []rune("ous") ; hasSuffix(s, suffix) { 737 | lenSuffix := len(suffix) 738 | 739 | subSlice := s[:lenS-lenSuffix] 740 | 741 | m := measure(subSlice) 742 | 743 | if 1 < m { 744 | result = subSlice 745 | } 746 | } else if suffix := []rune("ive") ; hasSuffix(s, suffix) { 747 | lenSuffix := len(suffix) 748 | 749 | subSlice := s[:lenS-lenSuffix] 750 | 751 | m := measure(subSlice) 752 | 753 | if 1 < m { 754 | result = subSlice 755 | } 756 | } else if suffix := []rune("ize") ; hasSuffix(s, suffix) { 757 | lenSuffix := len(suffix) 758 | 759 | subSlice := s[:lenS-lenSuffix] 760 | 761 | m := measure(subSlice) 762 | 763 | if 1 < m { 764 | result = subSlice 765 | } 766 | } 767 | 768 | 769 | // Return. 770 | return result 771 | } 772 | 773 | 774 | 775 | func step5a(s []rune) []rune { 776 | 777 | // Initialize. 778 | lenS := len(s) 779 | result := s 780 | 781 | 782 | // Do it! 783 | if 'e' == s[lenS-1] { 784 | lenSuffix := 1 785 | 786 | subSlice := s[:lenS-lenSuffix] 787 | if len(subSlice) == 0 { 788 | return result 789 | } 790 | m := measure(subSlice) 791 | 792 | if 1 < m { 793 | result = subSlice 794 | } else if 1 == m { 795 | if c := subSlice[len(subSlice)-1] ; !( hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c) { 796 | result = subSlice 797 | } 798 | } 799 | } 800 | 801 | 802 | // Return. 803 | return result 804 | } 805 | 806 | 807 | 808 | func step5b(s []rune) []rune { 809 | 810 | // Initialize. 811 | lenS := len(s) 812 | result := s 813 | 814 | 815 | // Do it! 816 | if 2 < lenS && 'l' == s[lenS-2] && 'l' == s[lenS-1] { 817 | 818 | lenSuffix := 1 819 | 820 | subSlice := s[:lenS-lenSuffix] 821 | 822 | m := measure(subSlice) 823 | 824 | if 1 < m { 825 | result = subSlice 826 | } 827 | } 828 | 829 | 830 | // Return. 831 | return result 832 | } 833 | 834 | 835 | 836 | 837 | func StemString(s string) string { 838 | 839 | // Convert string to []rune 840 | runeArr := []rune(s) 841 | 842 | // Stem. 843 | runeArr = Stem(runeArr) 844 | 845 | // Convert []rune to string 846 | str := string(runeArr) 847 | 848 | // Return. 849 | return str 850 | } 851 | 852 | func Stem(s []rune) []rune { 853 | 854 | // Initialize. 855 | lenS := len(s) 856 | 857 | 858 | // Short circuit. 859 | if 0 == lenS { 860 | /////////// RETURN 861 | return s 862 | } 863 | 864 | 865 | // Make all runes lowercase. 866 | for i := 0 ; i < lenS ; i++ { 867 | s[i] = unicode.ToLower(s[i]) 868 | } 869 | 870 | 871 | // Stem 872 | result := StemWithoutLowerCasing(s) 873 | 874 | 875 | // Return. 876 | return result 877 | } 878 | 879 | func StemWithoutLowerCasing(s []rune) []rune { 880 | 881 | // Initialize. 882 | lenS := len(s) 883 | 884 | 885 | // Words that are of length 2 or less is already stemmed. 886 | // Don't do anything. 887 | if 2 >= lenS { 888 | /////////// RETURN 889 | return s 890 | } 891 | 892 | 893 | // Stem 894 | s = step1a(s) 895 | s = step1b(s) 896 | s = step1c(s) 897 | s = step2(s) 898 | s = step3(s) 899 | s = step4(s) 900 | s = step5a(s) 901 | s = step5b(s) 902 | 903 | 904 | // Return. 905 | return s 906 | } 907 | 908 | --------------------------------------------------------------------------------