├── .travis.yml ├── AUTHORS ├── CONTRIBUTING.md ├── CONTRIBUTORS ├── LICENSE ├── README.md ├── img └── gwizo.png ├── input.txt ├── porter.go ├── stem.go ├── step1a.go ├── step1a_test.go ├── step1b.go ├── step1b_test.go ├── step1c.go ├── step1c_test.go ├── step2.go ├── step2_test.go ├── step3.go ├── step3_test.go ├── step4.go ├── step4_test.go ├── step5a.go ├── step5a_test.go ├── step5b.go ├── step5b_test.go └── test.sh /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the official list of gwizo authors for copyright purposes. 2 | # This file is distinct from the CONTRIBUTORS files. 3 | # See the latter for an explanation. 4 | # [Name]=> ---------------------------- 5 | # [Email]=> --------------------------- 6 | # [Github]=> -------------------------- 7 | # [Social]=> -------------------------- 8 | 9 | [Name]=> kampsy kampamba chanda 10 | [Email]=> kampambachanda@gmail.com 11 | [Github]=> https://github.com/kampsy 12 | [Social]=> google.com/+kampambachanda 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing to Gwizo 2 | 3 | Gwizo is an open source project. 4 | 5 | ## Contributing code 6 | Please make sure you clearly explain why your patches or feature should be pulled. 7 | 8 | **We accept GitHub pull requests**. 9 | All Contributions are accept through the GitHub pull requests. 10 | 11 | 12 | General questions should go to [kampambachanda@gmail.com] 13 | 14 | Unless otherwise noted, Gwizo source files are distributed under 15 | the BSD-style license found in the LICENSE file. 16 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | # This is the official list of people who can contribute 2 | # (and typically have contributed) code to the Cognilog repository. 3 | # The AUTHORS file lists the copyright holders; This file 4 | # lists people who have contributed to cognilog. 5 | # 6 | # People are automatically added to the list by the Author after a successful 7 | # **GitHub pull requests** for more information please see the CONTRIBUTING.md file. 8 | # 9 | # [Name]=> ---------------------------- 10 | # [Email]=> --------------------------- 11 | # [Github]=> -------------------------- 12 | # [Social]=> -------------------------- 13 | 14 | [Name]=> kampsy kampamba chanda 15 | [Email]=> kampambachanda@gmail.com 16 | [Github]=> https://github.com/kampsy 17 | [Social]=> google.com/+kampambachanda 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 kampamba chanda. All rights reserved. 2 | 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are 6 | met: 7 | * Redistribution, modification and use in source code and binary forms are 8 | permitted provided that the above copyright notice, this list of conditions 9 | and the following disclaimer is duplicated in all such forms. 10 | * All advertising materials mentioning features or use of this software must 11 | include a link to the AUTHOR and CONTRIBUTORS files. 12 | * Neither the names of the Developer nor the names of its 13 | contributors may be used to endorse or promote products 14 | derived from this software without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gwizo 2 | 3 | ![home](https://github.com/kampsy/gwizo/blob/master/img/gwizo.png) 4 | 5 | [![Gwizo version](https://img.shields.io/badge/gwizo-2.0-green.svg)](https://github.com/kampsy/gwizo) 6 | [![GoDoc](https://godoc.org/github.com/kampsy/gwizo?status.svg)](https://godoc.org/github.com/kampsy/gwizo) 7 | [![License](https://img.shields.io/badge/license-BSD%20Style-blue.svg)](https://github.com/kampsy/gwizo/blob/master/LICENSE) 8 | [![Twitter](https://img.shields.io/twitter/url/http/shields.io.svg?style=social)](https://twitter.com/kampsy) 9 | 10 | Package gwizo implements Porter Stemmer algorithm, M. "An algorithm for suffix stripping." 11 | Program 14.3 (1980): 130-137. 12 | Martin Porter, the algorithm's inventor, maintains a web page about the 13 | algorithm at http://www.tartarus.org/~martin/PorterStemmer/ 14 | 15 | ## Installation 16 | 17 | To install, simply run in a terminal: 18 | 19 | go get github.com/kampsy/gwizo 20 | 21 | 22 | ## Stem 23 | 24 | Stem: stem the word. 25 | ```go 26 | package main 27 | 28 | import ( 29 | "fmt" 30 | "github.com/kampsy/gwizo" 31 | ) 32 | 33 | func main() { 34 | stem := gwizo.Stem("abilities") 35 | fmt.Printf("Stem: %s\n", stem) 36 | } 37 | ``` 38 | ```shell 39 | $ go run main.go 40 | 41 | Stem: able 42 | ``` 43 | 44 | ## Vowels, Consonants and Measure 45 | 46 | gwizo returns a type Token which has two fileds, VowCon which is the vowel consonut pattern 47 | and the Measure value [v]vc{m}[c] 48 | ```go 49 | package main 50 | 51 | import ( 52 | "fmt" 53 | "github.com/kampsy/gwizo" 54 | "strings" 55 | ) 56 | 57 | func main() { 58 | word := "abilities" 59 | token := gwizo.Parse(word) 60 | 61 | // VowCon 62 | fmt.Printf("%s has Pattern %s \n", word, token.VowCon) 63 | 64 | // Measure value [v]vc{m}[c] 65 | fmt.Printf("%s has Measure value %d \n", word, token.Measure) 66 | 67 | // Number of Vowels 68 | v := strings.Count(token.VowCon, "v") 69 | fmt.Printf("%s Has %d Vowels \n", word, v) 70 | 71 | // Number of Consonants 72 | c := strings.Count(token.VowCon, "c") 73 | fmt.Printf("%s Has %d Consonants\n", word, c) 74 | } 75 | ``` 76 | 77 | ```bash 78 | $ go run main.go 79 | 80 | abilities has Pattern vcvcvcvvc 81 | abilities has Measure value 4 82 | abilities Has 5 Vowels 83 | abilities Has 4 Consonants 84 | ``` 85 | 86 | ## File Stem Performance. 87 | 88 | ```go 89 | package main 90 | 91 | import ( 92 | "fmt" 93 | "github.com/kampsy/gwizo" 94 | "bufio" 95 | "io/ioutil" 96 | "strings" 97 | "os" 98 | "time" 99 | ) 100 | 101 | func main() { 102 | curr := time.Now() 103 | writeOut() 104 | elaps := time.Since(curr) 105 | fmt.Println("============================") 106 | fmt.Println("Done After:", elaps) 107 | fmt.Println("============================") 108 | } 109 | 110 | func writeOut() { 111 | re, err := ioutil.ReadFile("input.txt") 112 | if err != nil { 113 | fmt.Println(err) 114 | } 115 | 116 | file := strings.NewReader(fmt.Sprintf("%s", re)) 117 | scanner := bufio.NewScanner(file) 118 | out, err := os.Create("stem.txt") 119 | if err != nil { 120 | fmt.Println(err) 121 | } 122 | defer out.Close() 123 | for scanner.Scan() { 124 | txt := scanner.Text() 125 | stem := gwizo.Stem(txt) 126 | out.WriteString(fmt.Sprintf("%s\n", stem)) 127 | fmt.Println(txt, "--->", str) 128 | } 129 | if err := scanner.Err(); err != nil { 130 | fmt.Println(err) 131 | } 132 | } 133 | ``` 134 | ```shell 135 | $ go run main.go 136 | 137 | ``` 138 | -------------------------------------------------------------------------------- /img/gwizo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kampsy/gwizo/4e6d03aa986f0a5146aef02518b475ebb5e525ff/img/gwizo.png -------------------------------------------------------------------------------- /porter.go: -------------------------------------------------------------------------------- 1 | /*Package gwizo implements Porter Stemmer algorithm, M. "An algorithm for suffix stripping." 2 | Program 14.3 (1980): 130-137. 3 | Martin Porter, the algorithm's inventor, maintains a web page about the 4 | algorithm at http://www.tartarus.org/~martin/PorterStemmer/ 5 | */ 6 | package gwizo 7 | 8 | import ( 9 | "fmt" 10 | "strings" 11 | ) 12 | 13 | // list of vowels and y. 14 | const ( 15 | letterY = "y" 16 | letterA = "a" 17 | letterE = "e" 18 | letterI = "i" 19 | letterO = "o" 20 | letterU = "u" 21 | ) 22 | 23 | /*Token collects Tokenmation about the word to be stemmed. 24 | A consonant is defined in the paper as follows: 25 | A consonant in a word is a letter other than A, E, I, O or 26 | U, and other than Y preceded by a consonant. (The fact that 27 | the term `consonant' is defined to some extent in terms of 28 | itself does not make it ambiguous.) So in TOY the consonants 29 | are T and Y, and in SYZYGY they are S, Z and G. If a letter 30 | is not a consonant it is a vowel. 31 | 32 | 33 | From the paper: 34 | A consonant will be denoted by c, a vowel by v. A list 35 | ccc... of length greater than 0 will be denoted by C, and a 36 | list vvv... of length greater than 0 will be denoted by V. 37 | Any word, or part of a word, therefore has one of the four 38 | forms: 39 | CVCV ... C 40 | CVCV ... V 41 | VCVC ... C 42 | VCVC ... V 43 | 44 | These may all be represented by the single form 45 | 46 | [C]VCVC ... [V] 47 | 48 | where the square brackets denote arbitrary presence of their 49 | contents. Using (VC){m} to denote VC repeated m times, this 50 | may again be written as 51 | [C](VC){m}[V]. 52 | m will be called the \measure\ of any word or word part when 53 | represented in this form. The case m = 0 covers the null 54 | word. Here are some examples: 55 | m=0 TR, EE, TREE, Y, BY. 56 | m=1 TROUBLE, OATS, TREES, IVY. 57 | m=2 TROUBLES, PRIVATE, OATEN, ORRERY. 58 | */ 59 | type Token struct { 60 | VowCon string // example vcvcvc. Where v = vowel and c = consonant. 61 | Measure int // Number of times the rune pair vc appears in the word. 62 | } 63 | 64 | /*Parse assigns the Word, VowCon and Measure filds for the word w. 65 | */ 66 | func Parse(w string) Token { 67 | // Collection of vowels and consonants 68 | var collection []string 69 | // Change the word to lowercase letters. 70 | wordLower := strings.ToLower(w) 71 | for num := 0; num < len(wordLower); num++ { 72 | // Check if y is the first letter of the word, if true then y is a vowel. 73 | // Check if the vowels a, e, i, o, u are at index[0] of the word 74 | if num == 0 { 75 | if string(wordLower[num]) == letterY || string(wordLower[num]) == letterA || 76 | string(wordLower[num]) == letterE || string(wordLower[num]) == letterI || 77 | string(wordLower[num]) == letterO || string(wordLower[num]) == letterU { 78 | collection = append(collection, "v") 79 | } else { 80 | collection = append(collection, "c") 81 | } 82 | continue 83 | } 84 | // If Y is preceded by a vowel Y becomes a consonant and if Y is preceded 85 | // by a consonant Y becomes a vowel. 86 | if collection[num-1] == "v" && string(wordLower[num]) == letterY { 87 | collection = append(collection, "c") 88 | continue 89 | } else if collection[num-1] == "c" && string(wordLower[num]) == letterY { 90 | collection = append(collection, "v") 91 | continue 92 | } 93 | 94 | if string(wordLower[num]) == letterA || string(wordLower[num]) == letterE || 95 | string(wordLower[num]) == letterI || string(wordLower[num]) == letterO || 96 | string(wordLower[num]) == letterU { 97 | collection = append(collection, "v") 98 | } else { 99 | collection = append(collection, "c") 100 | } 101 | } 102 | // make a pair of vowels and consonants eg vcvcvc 103 | pair := strings.Join(collection, "") 104 | var token Token 105 | token.VowCon = pair 106 | token.Measure = strings.Count(pair, "vc") 107 | return token 108 | } 109 | 110 | // implementation of String Method and so Stringer interface 111 | func (token *Token) String() string { 112 | return fmt.Sprintf("%s %d", token.VowCon, token.Measure) 113 | } 114 | 115 | // HasVowel returns bool of (*v*) 116 | func HasVowel(word string) bool { 117 | token := Parse(word) 118 | return strings.Contains(token.VowCon, "v") 119 | } 120 | 121 | // HasConsonant returns bool of (*c*) 122 | func HasConsonant(word string) bool { 123 | token := Parse(word) 124 | return strings.Contains(token.VowCon, "c") 125 | } 126 | 127 | // MeasureNum return the measure int 128 | func MeasureNum(word string) int { 129 | token := Parse(word) 130 | return token.Measure 131 | } 132 | 133 | // MeasureGreaterThan0 checks if measure value is grater than 0 134 | func MeasureGreaterThan0(word string) bool { 135 | token := Parse(word) 136 | if token.Measure > 0 { 137 | return true 138 | } 139 | return false 140 | } 141 | 142 | // MeasureEqualTo1 checks if measure value == 1 143 | func MeasureEqualTo1(word string) bool { 144 | token := Parse(word) 145 | if token.Measure == 1 { 146 | return true 147 | } 148 | return false 149 | } 150 | 151 | // MeasureGreaterThan1 checks if measure value is grater than 1 152 | func MeasureGreaterThan1(word string) bool { 153 | token := Parse(word) 154 | if token.Measure > 1 { 155 | return true 156 | } 157 | return false 158 | } 159 | 160 | // HasEndst checks if word has suffix S or T 161 | func HasEndst(word string) bool { 162 | s := strings.HasSuffix(word, "s") 163 | t := strings.HasSuffix(word, "t") 164 | 165 | if s == true || t == true { 166 | return true 167 | } 168 | return false 169 | } 170 | 171 | // HasEndl checks if word has suffix L 172 | func HasEndl(word string) bool { 173 | l := strings.HasSuffix(word, "l") 174 | if l == true { 175 | return true 176 | } 177 | return false 178 | } 179 | 180 | // HascvcEndLastNotwxy checks if VowCon pattern ends with cvc, where second 181 | // c is not W, X, Y 182 | func HascvcEndLastNotwxy(word string) bool { 183 | token := Parse(word) 184 | cvc := strings.HasSuffix(token.VowCon, "cvc") 185 | wordLen := len(word) 186 | lastLetter := string(word[(wordLen - 1)]) 187 | w := strings.Contains(lastLetter, "w") 188 | x := strings.Contains(lastLetter, "x") 189 | y := strings.Contains(lastLetter, letterY) 190 | 191 | if cvc == true && w == false && x == false && y == false { 192 | return true 193 | } 194 | return false 195 | } 196 | 197 | // HasSameDoubleConsonant checks if the word's suffix has a 198 | // double consonant "cc" 199 | func HasSameDoubleConsonant(word string) bool { 200 | token := Parse(word) 201 | cc := strings.HasSuffix(token.VowCon, "cc") 202 | if cc == true { 203 | wordLen := (len(word) - 1) 204 | letter := string(word[wordLen]) 205 | letter2 := string(word[(wordLen - 1)]) 206 | if letter == letter2 { 207 | return true 208 | } 209 | } 210 | return false 211 | } 212 | -------------------------------------------------------------------------------- /stem.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "strings" 5 | "unicode" 6 | ) 7 | 8 | /*Stem from "An algorithm for suffix stripping". 9 | */ 10 | func Stem(word string) string { 11 | if len(word) <= 2 { 12 | return word 13 | } 14 | var str string 15 | for _, char := range word { 16 | if unicode.IsLetter(rune(char)) { 17 | str = str + string(char) 18 | } 19 | } 20 | if strings.TrimSpace(str) == "" { 21 | return word 22 | } 23 | word = Step1a(str) 24 | word = Step1b(word) 25 | word = Step1c(word) 26 | word = Step2(word) 27 | word = Step3(word) 28 | word = Step4(word) 29 | word = Step5a(word) 30 | word = Step5b(word) 31 | return word 32 | } 33 | -------------------------------------------------------------------------------- /step1a.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step1a from "An algorithm for suffix stripping". 6 | Deals with plurals and past participles. The subsequent steps 7 | are much more straightforward. 8 | 9 | From the paper: 10 | 11 | SSES -> SS caresses -> caress 12 | IES -> I ponies -> poni 13 | ties -> ti 14 | SS -> SS caress -> caress 15 | S -> cats -> cat 16 | */ 17 | func Step1a(word string) string { 18 | // For SSES suffix. SSES -> SS 19 | sses := strings.HasSuffix(word, "sses") 20 | if sses { 21 | pre := strings.TrimSuffix(word, "sses") 22 | word = pre + "ss" 23 | return word 24 | } 25 | 26 | // For IES suffix. IES -> I 27 | ies := strings.HasSuffix(word, "ies") 28 | if ies { 29 | pre := strings.TrimSuffix(word, "ies") 30 | word = pre + letterI 31 | return word 32 | } 33 | 34 | // For SS suffix. SS -> SS 35 | ss := strings.HasSuffix(word, "ss") 36 | if ss { 37 | pre := strings.TrimSuffix(word, "ss") 38 | word = pre + "ss" 39 | return word 40 | } 41 | 42 | // For S suffix. S -> 43 | s := strings.HasSuffix(word, "s") 44 | if s { 45 | pre := strings.TrimSuffix(word, "s") 46 | word = pre 47 | return word 48 | } 49 | return word 50 | } 51 | -------------------------------------------------------------------------------- /step1a_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep1a(t *testing.T) { 11 | input := []string{ 12 | "caresses", "ponies", "ties", "caress", "cats", 13 | } 14 | 15 | stem := []string{ 16 | "caress", "poni", "ti", "caress", "cat", 17 | } 18 | 19 | for i := 0; i < len(input); i++ { 20 | token := gwizo.Step1a(input[i]) 21 | if token != stem[i] { 22 | t.Errorf(fmt.Sprintf("Test For %s FAIL [%s != %s]", input[i], token, stem[i])) 23 | } else { 24 | t.Log(fmt.Sprintf("Test For %s -PASS- [%s == %s]", input[i], token, stem[i])) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /step1b.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step1b from "An algorithm for suffix stripping". 6 | Deals with plurals and past participles. The subsequent steps 7 | are much more straightforward. 8 | 9 | From the paper: 10 | 11 | (m>0) EED -> EE feed -> feed 12 | agreed -> agree 13 | (*v*) ED -> plastered -> plaster 14 | bled -> bled 15 | (*v*) ING -> motoring -> motor 16 | sing -> sing 17 | 18 | If the second or third of the rules in Step 1b is successful, 19 | the following is done: 20 | AT -> ATE conflat(ed) -> conflate 21 | BL -> BLE troubl(ed) -> trouble 22 | IZ -> IZE siz(ed) -> size 23 | (*d and not (*L or *S or *Z)) 24 | -> single letter 25 | hopp(ing) -> hop 26 | tann(ed) -> tan 27 | fall(ing) -> fall 28 | hiss(ing) -> hiss 29 | fizz(ed) -> fizz 30 | (m=1 and *o) -> E fail(ing) -> fail 31 | fil(ing) -> file 32 | The rule to map to a single letter causes the removal of one of 33 | the double letter pair. The -E is put back on -AT, -BL and -IZ, 34 | so that the suffixes -ATE, -BLE and -IZE can be recognised 35 | later. This E may be removed in step 4. 36 | */ 37 | func Step1b(word string) string { 38 | // Word Measure (m > 0) and EED suffix. EED -> EE 39 | eed := strings.HasSuffix(word, "eed") 40 | if eed { 41 | pre := strings.TrimSuffix(word, "eed") 42 | if MeasureNum(pre) > 0 { 43 | str := pre + "ee" 44 | return str 45 | } 46 | return word 47 | } 48 | 49 | // Word has Vowel and ED suffix. ED -> 50 | ed := strings.HasSuffix(word, "ed") 51 | if ed { 52 | pre := strings.TrimSuffix(word, "ed") 53 | if !HasVowel(pre) { 54 | return word 55 | } 56 | word = pre 57 | } 58 | 59 | // Word has Vowel and ING suffix. ING -> 60 | ing := strings.HasSuffix(word, "ing") 61 | if ing { 62 | pre := strings.TrimSuffix(word, "ing") 63 | if !HasVowel(pre) { 64 | return word 65 | } 66 | word = pre 67 | } 68 | 69 | /*If the second or third of the rules in Step 1b is successful, 70 | the following is done 71 | */ 72 | if ed || ing { 73 | // Word has AT suffix. AT -> ATE 74 | at := strings.HasSuffix(word, "at") 75 | if at { 76 | pre := strings.TrimSuffix(word, "at") 77 | word = pre + "ate" 78 | } 79 | 80 | // Word has BL suffix. BL -> BLE 81 | bl := strings.HasSuffix(word, "bl") 82 | if bl { 83 | pre := strings.TrimSuffix(word, "bl") 84 | word = pre + "ble" 85 | } 86 | 87 | // Word has IZ suffix. IZ -> IZE 88 | iz := strings.HasSuffix(word, "iz") 89 | if iz { 90 | pre := strings.TrimSuffix(word, "iz") 91 | word = pre + "ize" 92 | } 93 | 94 | // (*d and not (*L or *S or *Z)) -> single letter 95 | if HasSameDoubleConsonant(word) { 96 | ll := strings.HasSuffix(word, "ll") 97 | ss := strings.HasSuffix(word, "ss") 98 | zz := strings.HasSuffix(word, "zz") 99 | if ll || ss || zz { 100 | return word 101 | } 102 | wordLen := len(word) 103 | lastLetter := word[(wordLen - 1):] 104 | pre := strings.TrimSuffix(word, lastLetter) 105 | return pre 106 | } 107 | // (m=1 and *o) -> E 108 | if MeasureEqualTo1(word) && HascvcEndLastNotwxy(word) { 109 | word = word + letterE 110 | } 111 | } 112 | return word 113 | } 114 | -------------------------------------------------------------------------------- /step1b_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep1b(t *testing.T) { 11 | input := []string{ 12 | "feed", "agreed", "plastered", "bled", "motoring", "sing", "conflated", 13 | "troubled", "sized", "hopping", "tanned", "falling", "hissing", "fizzed", 14 | "failing", "filing", 15 | } 16 | 17 | stem := []string{ 18 | "feed", "agree", "plaster", "bled", "motor", "sing", "conflate", "trouble", 19 | "size", "hop", "tan", "fall", "hiss", "fizz", "fail", "file", 20 | } 21 | 22 | for i := 0; i < len(input); i++ { 23 | token := gwizo.Step1b(input[i]) 24 | if token != stem[i] { 25 | t.Errorf(fmt.Sprintf("Test For %s -FAIL- [%s != %s]", input[i], token, stem[i])) 26 | } else { 27 | t.Log(fmt.Sprintf("Test For %s *PASS* [%s == %s]", input[i], token, stem[i])) 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /step1c.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | /*Step1c from "An algorithm for suffix stripping". 8 | Deals with plurals and past participles. The subsequent steps 9 | are much more straightforward. 10 | 11 | From the paper: 12 | 13 | Step 1c 14 | (*v*) Y -> I happy -> happi 15 | sky -> 16 | */ 17 | func Step1c(word string) string { 18 | // (*v*) Y -> I 19 | // Word has Vowel and Y suffix. Y -> I 20 | y := strings.HasSuffix(word, letterY) 21 | if y { 22 | pre := strings.TrimSuffix(word, letterY) 23 | if HasVowel(pre) { 24 | return pre + letterI 25 | } 26 | } 27 | return word 28 | } 29 | -------------------------------------------------------------------------------- /step1c_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep1c(t *testing.T) { 11 | input := []string{ 12 | "happy", "sky", 13 | } 14 | 15 | stem := []string{ 16 | "happi", "sky", 17 | } 18 | 19 | for i := 0; i < len(input); i++ { 20 | token := gwizo.Step1c(input[i]) 21 | if token != stem[i] { 22 | t.Errorf(fmt.Sprintf("Test For %s -FAiL- [%s != %s]", input[i], token, stem[i])) 23 | } else { 24 | t.Log(fmt.Sprintf("Test For %s **PASS* [%s == %s]", input[i], token, stem[i])) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /step2.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step2 from "An algorithm for suffix stripping" 6 | 7 | From the paper: 8 | 9 | Step 2 10 | (m>0) ATIONAL -> ATE relational -> relate 11 | (m>0) TIONAL -> TION conditional -> condition 12 | rational -> rational 13 | (m>0) ENCI -> ENCE valenci -> valence 14 | (m>0) ANCI -> ANCE hesitanci -> hesitance 15 | (m>0) IZER -> IZE digitizer -> digitize 16 | (m>0) ABLI -> ABLE conformabli -> conformable 17 | (m>0) ALLI -> AL radicalli -> radical 18 | (m>0) ENTLI -> ENT differentli -> different 19 | (m>0) ELI -> E vileli - > vile 20 | (m>0) OUSLI -> OUS analogousli -> analogous 21 | (m>0) IZATION -> IZE vietnamization -> vietnamize 22 | (m>0) ATION -> ATE predication -> predicate 23 | (m>0) ATOR -> ATE operator -> operate 24 | (m>0) ALISM -> AL feudalism -> feudal 25 | (m>0) IVENESS -> IVE decisiveness -> decisive 26 | (m>0) FULNESS -> FUL hopefulness -> hopeful 27 | (m>0) OUSNESS -> OUS callousness -> callous 28 | (m>0) ALITI -> AL formaliti -> formal 29 | (m>0) IVITI -> IVE sensitiviti -> sensitive 30 | (m>0) BILITI -> BLE sensibiliti -> sensible 31 | */ 32 | func Step2(word string) string { 33 | // For ATIONAL suffix. ATIONAL -> ATE 34 | ational := strings.HasSuffix(word, "ational") 35 | if ational { 36 | pre := strings.TrimSuffix(word, "ational") 37 | if MeasureGreaterThan0(pre) { 38 | word = pre + "ate" 39 | return word 40 | } 41 | 42 | } 43 | 44 | // For TIONAL suffix. TIONAL -> TION 45 | tional := strings.HasSuffix(word, "tional") 46 | if tional { 47 | pre := strings.TrimSuffix(word, "tional") 48 | if MeasureGreaterThan0(pre) { 49 | word = pre + "tion" 50 | return word 51 | } 52 | 53 | } 54 | 55 | // For ENCI suffix. ENCI -> ENCE 56 | enci := strings.HasSuffix(word, "enci") 57 | if enci { 58 | pre := strings.TrimSuffix(word, "enci") 59 | if MeasureGreaterThan0(pre) { 60 | word = pre + "ence" 61 | return word 62 | } 63 | 64 | } 65 | 66 | // For ANCI suffix. ANCI -> ANCE 67 | anci := strings.HasSuffix(word, "anci") 68 | if anci { 69 | pre := strings.TrimSuffix(word, "anci") 70 | if MeasureGreaterThan0(pre) { 71 | word = pre + "ance" 72 | return word 73 | } 74 | 75 | } 76 | 77 | // For IZER suffix. IZER -> IZE 78 | izer := strings.HasSuffix(word, "izer") 79 | if izer { 80 | pre := strings.TrimSuffix(word, "izer") 81 | if MeasureGreaterThan0(pre) { 82 | word = pre + "ize" 83 | return word 84 | } 85 | 86 | } 87 | 88 | // For ABLI suffix. ABLI -> ABLE 89 | abli := strings.HasSuffix(word, "abli") 90 | if abli { 91 | pre := strings.TrimSuffix(word, "abli") 92 | if MeasureGreaterThan0(pre) { 93 | word = pre + "able" 94 | return word 95 | } 96 | 97 | } 98 | 99 | // For ALLI suffix. ALLI -> AL 100 | alli := strings.HasSuffix(word, "alli") 101 | if alli { 102 | pre := strings.TrimSuffix(word, "alli") 103 | if MeasureGreaterThan0(pre) { 104 | word = pre + "al" 105 | return word 106 | } 107 | 108 | } 109 | 110 | // For ENTLI suffix. ENTLI -> ENT 111 | entli := strings.HasSuffix(word, "entli") 112 | if entli { 113 | pre := strings.TrimSuffix(word, "entli") 114 | if MeasureGreaterThan0(pre) { 115 | word = pre + "ent" 116 | return word 117 | } 118 | 119 | } 120 | 121 | // For ELI suffix. ELI -> E 122 | eli := strings.HasSuffix(word, "eli") 123 | if eli { 124 | pre := strings.TrimSuffix(word, "eli") 125 | if MeasureGreaterThan0(pre) { 126 | word = pre + "e" 127 | return word 128 | } 129 | 130 | } 131 | 132 | // For OUSLI suffix. OUSLI -> OUS 133 | ousli := strings.HasSuffix(word, "ousli") 134 | if ousli { 135 | pre := strings.TrimSuffix(word, "ousli") 136 | if MeasureGreaterThan0(pre) { 137 | word = pre + "ous" 138 | return word 139 | } 140 | 141 | } 142 | 143 | // For IZATION suffix. IZATION -> IZE 144 | ization := strings.HasSuffix(word, "ization") 145 | if ization { 146 | pre := strings.TrimSuffix(word, "ization") 147 | if MeasureGreaterThan0(pre) { 148 | word = pre + "ize" 149 | return word 150 | } 151 | 152 | } 153 | 154 | // For ATION suffix. ATION -> ATE 155 | ation := strings.HasSuffix(word, "ation") 156 | if ation { 157 | pre := strings.TrimSuffix(word, "ation") 158 | if MeasureGreaterThan0(pre) { 159 | word = pre + "ate" 160 | return word 161 | } 162 | 163 | } 164 | 165 | // For ATOR suffix. ATOR -> ATE 166 | ator := strings.HasSuffix(word, "ator") 167 | if ator { 168 | pre := strings.TrimSuffix(word, "ator") 169 | if MeasureGreaterThan0(pre) { 170 | word = pre + "ate" 171 | return word 172 | } 173 | 174 | } 175 | 176 | // For ALISM suffix. ALISM -> AL 177 | alism := strings.HasSuffix(word, "alism") 178 | if alism { 179 | pre := strings.TrimSuffix(word, "alism") 180 | if MeasureGreaterThan0(pre) { 181 | word = pre + "al" 182 | return word 183 | } 184 | 185 | } 186 | 187 | // For IVENESS suffix. IVENESS -> IVE 188 | iveness := strings.HasSuffix(word, "iveness") 189 | if iveness { 190 | pre := strings.TrimSuffix(word, "iveness") 191 | if MeasureGreaterThan0(pre) { 192 | word = pre + "ive" 193 | return word 194 | } 195 | 196 | } 197 | 198 | // For FULNESS suffix. FULNESS -> FUL 199 | fulness := strings.HasSuffix(word, "fulness") 200 | if fulness { 201 | pre := strings.TrimSuffix(word, "fulness") 202 | if MeasureGreaterThan0(pre) { 203 | word = pre + "ful" 204 | return word 205 | } 206 | 207 | } 208 | 209 | // For OUSNESS suffix. OUSNESS -> OUS 210 | ousness := strings.HasSuffix(word, "ousness") 211 | if ousness { 212 | pre := strings.TrimSuffix(word, "ousness") 213 | if MeasureGreaterThan0(pre) { 214 | word = pre + "ous" 215 | return word 216 | } 217 | 218 | } 219 | 220 | // For ALITI suffix. ALITI -> AL 221 | aliti := strings.HasSuffix(word, "aliti") 222 | if aliti { 223 | pre := strings.TrimSuffix(word, "aliti") 224 | if MeasureGreaterThan0(pre) { 225 | word = pre + "al" 226 | return word 227 | } 228 | 229 | } 230 | 231 | // For IVITI suffix. IVITI -> IVE 232 | iviti := strings.HasSuffix(word, "iviti") 233 | if iviti { 234 | pre := strings.TrimSuffix(word, "iviti") 235 | if MeasureGreaterThan0(pre) { 236 | word = pre + "ive" 237 | return word 238 | } 239 | 240 | } 241 | 242 | // For BILITI suffix. BILITI -> BLE 243 | biliti := strings.HasSuffix(word, "biliti") 244 | if biliti { 245 | pre := strings.TrimSuffix(word, "biliti") 246 | if MeasureGreaterThan0(pre) { 247 | word = pre + "ble" 248 | return word 249 | } 250 | 251 | } 252 | 253 | return word 254 | } 255 | -------------------------------------------------------------------------------- /step2_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep2(t *testing.T) { 11 | input := []string{ 12 | "relational", "conditional", "rational", "valenci", "hesitanci", "digitizer", 13 | "conformabli", "radicalli", "differentli", "vileli", "analogousli", 14 | "vietnamization", "predication", "operator", "feudalism", "decisiveness", 15 | "hopefulness", "callousness", "formaliti", "sensitiviti", "sensibiliti", 16 | } 17 | 18 | stem := []string{ 19 | "relate", "condition", "rational", "valence", "hesitance", "digitize", 20 | "conformable", "radical", "different", "vile", "analogous", "vietnamize", 21 | "predicate", "operate", "feudal", "decisive", "hopeful", "callous", 22 | "formal", "sensitive", "sensible", 23 | } 24 | 25 | for i := 0; i < len(input); i++ { 26 | token := gwizo.Step2(input[i]) 27 | if token != stem[i] { 28 | t.Errorf(fmt.Sprintf("Test For %s -FAIL- [%s != %s]", input[i], token, stem[i])) 29 | } else { 30 | t.Log(fmt.Sprintf("Test For %s *PASS* [%s == %s]", input[i], token, stem[i])) 31 | } 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /step3.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step3 from "An algorithm for suffix stripping". 6 | 7 | From the paper: 8 | 9 | Step 3 10 | (m>0) ICATE -> IC triplicate -> triplic 11 | (m>0) ATIVE -> formative -> form 12 | (m>0) ALIZE -> AL formalize -> formal 13 | (m>0) ICITI -> IC electriciti -> electric 14 | (m>0) ICAL -> IC electrical -> electric 15 | (m>0) FUL -> hopeful -> hope 16 | (m>0) NESS -> goodness -> good 17 | */ 18 | func Step3(word string) string { 19 | // For ICATE suffix. ICATE -> IC 20 | icate := strings.HasSuffix(word, "icate") 21 | if icate { 22 | pre := strings.TrimSuffix(word, "icate") 23 | if MeasureGreaterThan0(pre) { 24 | word = pre + "ic" 25 | return word 26 | } 27 | } 28 | 29 | // For ATIVE suffix. ATIVE -> 30 | ative := strings.HasSuffix(word, "ative") 31 | if ative { 32 | pre := strings.TrimSuffix(word, "ative") 33 | if MeasureGreaterThan0(pre) { 34 | word = pre 35 | return word 36 | } 37 | } 38 | 39 | // For ALIZE suffix. ALIZE -> AL 40 | alize := strings.HasSuffix(word, "alize") 41 | if alize { 42 | pre := strings.TrimSuffix(word, "alize") 43 | if MeasureGreaterThan0(pre) { 44 | word = pre + "al" 45 | return word 46 | } 47 | } 48 | 49 | // For ICITI suffix. ICITI -> IC 50 | iciti := strings.HasSuffix(word, "iciti") 51 | if iciti { 52 | pre := strings.TrimSuffix(word, "iciti") 53 | if MeasureGreaterThan0(pre) { 54 | word = pre + "ic" 55 | return word 56 | } 57 | } 58 | 59 | // For ICAL suffix. ICAL -> IC 60 | ical := strings.HasSuffix(word, "ical") 61 | if ical { 62 | pre := strings.TrimSuffix(word, "ical") 63 | if MeasureGreaterThan0(pre) { 64 | word = pre + "ic" 65 | return word 66 | } 67 | } 68 | 69 | // For FUL suffix. FUL -> 70 | ful := strings.HasSuffix(word, "ful") 71 | if ful { 72 | pre := strings.TrimSuffix(word, "ful") 73 | if MeasureGreaterThan0(pre) { 74 | word = pre 75 | return word 76 | } 77 | } 78 | 79 | // For NESS suffix. NESS -> 80 | ness := strings.HasSuffix(word, "ness") 81 | if ness { 82 | pre := strings.TrimSuffix(word, "ness") 83 | if MeasureGreaterThan0(pre) { 84 | word = pre 85 | return word 86 | } 87 | } 88 | return word 89 | } 90 | -------------------------------------------------------------------------------- /step3_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep3(t *testing.T) { 11 | input := []string{ 12 | "triplicate", "formative", "formalize", "electriciti", "electrical", 13 | "hopeful", "goodness", 14 | } 15 | 16 | stem := []string{ 17 | "triplic", "form", "formal", "electric", "electric", "hope", "good", 18 | } 19 | 20 | for i := 0; i < len(input); i++ { 21 | token := gwizo.Step3(input[i]) 22 | if token != stem[i] { 23 | t.Errorf(fmt.Sprintf("Test For %s -FAIL- [%s != %s]", input[i], token, stem[i])) 24 | } else { 25 | t.Log(fmt.Sprintf("Test For %s *PASS* [%s == %s]", input[i], token, stem[i])) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /step4.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step4 from "An algorithm for suffix stripping". 6 | From the paper: 7 | 8 | Step 4 9 | (m>1) AL -> revival -> reviv 10 | (m>1) ANCE -> allowance -> allow 11 | (m>1) ENCE -> inference -> infer 12 | (m>1) ER -> airliner -> airlin 13 | (m>1) IC -> gyroscopic -> gyroscop 14 | (m>1) ABLE -> adjustable -> adjust 15 | (m>1) IBLE -> defensible -> defens 16 | (m>1) ANT -> irritant -> irrit 17 | (m>1) EMENT -> replacement -> replac 18 | (m>1) MENT -> adjustment -> adjust 19 | (m>1) ENT -> dependent -> depend 20 | (m>1 and (*S or *T)) ION -> adoption -> adopt 21 | (m>1) OU -> homologou -> homolog 22 | (m>1) ISM -> communism -> commun 23 | (m>1) ATE -> activate -> activ 24 | (m>1) ITI -> angulariti -> angular 25 | (m>1) OUS -> homologous -> homolog 26 | (m>1) IVE -> effective -> effect 27 | (m>1) IZE -> bowdlerize -> bowdler 28 | The suffixes are now removed. All that remains is a little 29 | tidying up. 30 | */ 31 | func Step4(word string) string { 32 | // For AL suffix. AL -> 33 | al := strings.HasSuffix(word, "al") 34 | if al { 35 | pre := strings.TrimSuffix(word, "al") 36 | if MeasureGreaterThan1(pre) { 37 | return pre 38 | } 39 | } 40 | 41 | // For ANCE suffix. ANCE -> 42 | ance := strings.HasSuffix(word, "ance") 43 | if ance { 44 | pre := strings.TrimSuffix(word, "ance") 45 | if MeasureGreaterThan1(pre) { 46 | return pre 47 | } 48 | } 49 | 50 | // For ENCE suffix. ENCE -> 51 | ence := strings.HasSuffix(word, "ence") 52 | if ence { 53 | pre := strings.TrimSuffix(word, "ence") 54 | if MeasureGreaterThan1(pre) { 55 | return pre 56 | } 57 | } 58 | 59 | // For ER suffix. ER -> 60 | er := strings.HasSuffix(word, "er") 61 | if er { 62 | pre := strings.TrimSuffix(word, "er") 63 | if MeasureGreaterThan1(pre) { 64 | return pre 65 | } 66 | } 67 | 68 | // For IC suffix. IC -> 69 | ic := strings.HasSuffix(word, "ic") 70 | if ic { 71 | pre := strings.TrimSuffix(word, "ic") 72 | if MeasureGreaterThan1(pre) { 73 | return pre 74 | } 75 | } 76 | 77 | // For ABLE suffix. ABLE -> 78 | able := strings.HasSuffix(word, "able") 79 | if able { 80 | pre := strings.TrimSuffix(word, "able") 81 | if MeasureGreaterThan1(pre) { 82 | return pre 83 | } 84 | } 85 | 86 | // For IBLE suffix. IBLE -> 87 | ible := strings.HasSuffix(word, "ible") 88 | if ible { 89 | pre := strings.TrimSuffix(word, "ible") 90 | if MeasureGreaterThan1(pre) { 91 | return pre 92 | } 93 | } 94 | 95 | // For ANT suffix. ANT -> 96 | ant := strings.HasSuffix(word, "ant") 97 | if ant { 98 | pre := strings.TrimSuffix(word, "ant") 99 | if MeasureGreaterThan1(pre) { 100 | return pre 101 | } 102 | } 103 | 104 | // For EMENT suffix. EMENT -> 105 | ement := strings.HasSuffix(word, "ement") 106 | if ement { 107 | pre := strings.TrimSuffix(word, "ement") 108 | if MeasureGreaterThan1(pre) { 109 | return pre 110 | } 111 | } 112 | 113 | // For MENT suffix. MENT -> 114 | ment := strings.HasSuffix(word, "ment") 115 | if ment { 116 | pre := strings.TrimSuffix(word, "ment") 117 | if MeasureGreaterThan1(pre) { 118 | return pre 119 | } 120 | } 121 | 122 | // For ENT suffix. ENT -> 123 | ent := strings.HasSuffix(word, "ent") 124 | if ent { 125 | pre := strings.TrimSuffix(word, "ent") 126 | if MeasureGreaterThan1(pre) { 127 | return pre 128 | } 129 | } 130 | 131 | // (m>1 and (*S or *T)) ION -> 132 | ion := strings.HasSuffix(word, "ion") 133 | if ion { 134 | pre := strings.TrimSuffix(word, "ion") 135 | if MeasureGreaterThan1(pre) && HasEndst(pre) { 136 | return pre 137 | } 138 | } 139 | 140 | // For OU suffix. OU -> 141 | ou := strings.HasSuffix(word, "ou") 142 | if ou { 143 | pre := strings.TrimSuffix(word, "ou") 144 | if MeasureGreaterThan1(pre) { 145 | return pre 146 | } 147 | } 148 | 149 | // For ISM suffix. ISM -> 150 | ism := strings.HasSuffix(word, "ism") 151 | if ism { 152 | pre := strings.TrimSuffix(word, "ism") 153 | if MeasureGreaterThan1(pre) { 154 | return pre 155 | } 156 | } 157 | 158 | // For ATE suffix. ATE -> 159 | ate := strings.HasSuffix(word, "ate") 160 | if ate { 161 | pre := strings.TrimSuffix(word, "ate") 162 | if MeasureGreaterThan1(pre) { 163 | return pre 164 | } 165 | } 166 | 167 | // For ITI suffix. ITI -> 168 | iti := strings.HasSuffix(word, "iti") 169 | if iti { 170 | pre := strings.TrimSuffix(word, "iti") 171 | if MeasureGreaterThan1(pre) { 172 | return pre 173 | } 174 | } 175 | 176 | // For OUS suffix. OUS -> 177 | ous := strings.HasSuffix(word, "ous") 178 | if ous { 179 | pre := strings.TrimSuffix(word, "ous") 180 | if MeasureGreaterThan1(pre) { 181 | return pre 182 | } 183 | } 184 | 185 | // For IVE suffix. IVE -> 186 | ive := strings.HasSuffix(word, "ive") 187 | if ive { 188 | pre := strings.TrimSuffix(word, "ive") 189 | if MeasureGreaterThan1(pre) { 190 | return pre 191 | } 192 | } 193 | 194 | // For IZE suffix. IZE -> 195 | ize := strings.HasSuffix(word, "ize") 196 | if ize { 197 | pre := strings.TrimSuffix(word, "ize") 198 | if MeasureGreaterThan1(pre) { 199 | return pre 200 | } 201 | } 202 | return word 203 | } 204 | -------------------------------------------------------------------------------- /step4_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep4(t *testing.T) { 11 | input := []string{ 12 | "revival", "allowance", "inference", "airliner", "gyroscopic", "adjustable", 13 | "defensible", "irritant", "replacement", "adjustment", "dependent", 14 | "adoption", "homologou", "communism", "activate", "angulariti", "homologous", 15 | "effective", "bowdlerize", 16 | } 17 | 18 | stem := []string{ 19 | "reviv", "allow", "infer", "airlin", "gyroscop", "adjust", "defens", 20 | "irrit", "replac", "adjust", "depend", "adopt", "homolog", "commun", "activ", 21 | "angular", "homolog", "effect", "bowdler", 22 | } 23 | 24 | for i := 0; i < len(input); i++ { 25 | token := gwizo.Step4(input[i]) 26 | if token != stem[i] { 27 | t.Errorf(fmt.Sprintf("Test For %s -FAIL- [%s != %s]", input[i], token, stem[i])) 28 | } else { 29 | t.Log(fmt.Sprintf("Test For %s *PASS* [%s == %s]", input[i], token, stem[i])) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /step5a.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step5a from "An algorithm for suffix stripping". 6 | 7 | From the paper: 8 | 9 | Step 5a 10 | (m>1) E -> probate -> probat 11 | rate -> rate 12 | (m=1 and not *o) E -> cease -> ceas 13 | */ 14 | func Step5a(word string) string { 15 | // E suffix. E -> 16 | e := strings.HasSuffix(word, letterE) 17 | if e { 18 | pre := strings.TrimSuffix(word, letterE) 19 | if MeasureGreaterThan1(pre) { 20 | return pre 21 | } 22 | } 23 | 24 | // (m=1 and not *o) E -> 25 | pre := strings.TrimSuffix(word, letterE) 26 | if MeasureEqualTo1(pre) && !HascvcEndLastNotwxy(pre) { 27 | return pre 28 | } 29 | return word 30 | } 31 | -------------------------------------------------------------------------------- /step5a_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep5a(t *testing.T) { 11 | input := []string{ 12 | "probate", "rate", "cease", 13 | } 14 | 15 | stem := []string{ 16 | "probat", "rate", "ceas", 17 | } 18 | 19 | for i := 0; i < len(input); i++ { 20 | token := gwizo.Step5a(input[i]) 21 | if token != stem[i] { 22 | t.Errorf(fmt.Sprintf("Test For %s -FAIL- [%s != %s]", input[i], token, stem[i])) 23 | } else { 24 | t.Log(fmt.Sprintf("Test For %s *PASS* [%s == %s]", input[i], token, stem[i])) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /step5b.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import "strings" 4 | 5 | /*Step5b from "An algorithm for suffix stripping". 6 | 7 | From the paper: 8 | 9 | Step 5b 10 | (m > 1 and *d and *L) -> single letter 11 | controll -> control 12 | roll -> roll 13 | */ 14 | func Step5b(word string) string { 15 | // (m > 1 and *d and *L) -> single letter 16 | if MeasureGreaterThan1(word) && HasSameDoubleConsonant(word) && HasEndl(word) { 17 | wordLen := len(word) 18 | lastLetter := word[(wordLen - 1):] 19 | pre := strings.TrimSuffix(word, lastLetter) 20 | return pre 21 | } 22 | return word 23 | } 24 | -------------------------------------------------------------------------------- /step5b_test.go: -------------------------------------------------------------------------------- 1 | package gwizo 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/kampsy/gwizo" 8 | ) 9 | 10 | func TestStep5b(t *testing.T) { 11 | input := []string{ 12 | "controll", "roll", 13 | } 14 | 15 | stem := []string{ 16 | "control", "roll", 17 | } 18 | 19 | for i := 0; i < len(input); i++ { 20 | token := gwizo.Step5b(input[i]) 21 | if token != stem[i] { 22 | t.Errorf(fmt.Sprintf("Test For %s -FAIL- [%s != %s]", input[i], token, stem[i])) 23 | } else { 24 | t.Log(fmt.Sprintf("Test For %s *PASS* [%s == %s]", input[i], token, stem[i])) 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | #Package gwizo implement Porter, M. "An algorithm for suffix stripping." 4 | #Program 14.3 (1980): 130-137. 5 | #Martin Porter, the algorithm's inventor, maintains a web page about the 6 | #algorithm at http://www.tartarus.org/~martin/PorterStemmer/ 7 | 8 | #------------------------ 9 | # Test all steps 10 | #------------------------ 11 | cd .. 12 | 13 | go test -v gwizo/step1a_test.go 14 | go test -v gwizo/step1b_test.go 15 | go test -v gwizo/step1c_test.go 16 | go test -v gwizo/step2_test.go 17 | go test -v gwizo/step3_test.go 18 | go test -v gwizo/step4_test.go 19 | go test -v gwizo/step5a_test.go 20 | go test -v gwizo/step5b_test.go --------------------------------------------------------------------------------