├── .github └── workflows │ └── test.yml ├── ChangeLog ├── LICENSE.txt ├── README.md ├── _example ├── song.txt └── stem.go ├── api.c ├── api.h ├── example_test.go ├── go.mod ├── go.sum ├── header.h ├── lib-version.txt ├── libstemmer.c ├── libstemmer.h ├── modules.h ├── modules_utf8.h ├── snowball.go ├── snowball_test.go ├── stem_ISO_8859_1_basque.c ├── stem_ISO_8859_1_basque.h ├── stem_ISO_8859_1_catalan.c ├── stem_ISO_8859_1_catalan.h ├── stem_ISO_8859_1_danish.c ├── stem_ISO_8859_1_danish.h ├── stem_ISO_8859_1_dutch.c ├── stem_ISO_8859_1_dutch.h ├── stem_ISO_8859_1_english.c ├── stem_ISO_8859_1_english.h ├── stem_ISO_8859_1_finnish.c ├── stem_ISO_8859_1_finnish.h ├── stem_ISO_8859_1_french.c ├── stem_ISO_8859_1_french.h ├── stem_ISO_8859_1_german.c ├── stem_ISO_8859_1_german.h ├── stem_ISO_8859_1_indonesian.c ├── stem_ISO_8859_1_indonesian.h ├── stem_ISO_8859_1_irish.c ├── stem_ISO_8859_1_irish.h ├── stem_ISO_8859_1_italian.c ├── stem_ISO_8859_1_italian.h ├── stem_ISO_8859_1_norwegian.c ├── stem_ISO_8859_1_norwegian.h ├── stem_ISO_8859_1_porter.c ├── stem_ISO_8859_1_porter.h ├── stem_ISO_8859_1_portuguese.c ├── stem_ISO_8859_1_portuguese.h ├── stem_ISO_8859_1_spanish.c ├── stem_ISO_8859_1_spanish.h ├── stem_ISO_8859_1_swedish.c ├── stem_ISO_8859_1_swedish.h ├── stem_ISO_8859_2_hungarian.c ├── stem_ISO_8859_2_hungarian.h ├── stem_ISO_8859_2_romanian.c ├── stem_ISO_8859_2_romanian.h ├── stem_KOI8_R_russian.c ├── stem_KOI8_R_russian.h ├── stem_UTF_8_arabic.c ├── stem_UTF_8_arabic.h ├── stem_UTF_8_armenian.c ├── stem_UTF_8_armenian.h ├── stem_UTF_8_basque.c ├── stem_UTF_8_basque.h ├── stem_UTF_8_catalan.c ├── stem_UTF_8_catalan.h ├── stem_UTF_8_danish.c ├── stem_UTF_8_danish.h ├── stem_UTF_8_dutch.c ├── stem_UTF_8_dutch.h ├── stem_UTF_8_english.c ├── stem_UTF_8_english.h ├── stem_UTF_8_finnish.c ├── stem_UTF_8_finnish.h ├── stem_UTF_8_french.c ├── stem_UTF_8_french.h ├── stem_UTF_8_german.c ├── stem_UTF_8_german.h ├── stem_UTF_8_greek.c ├── stem_UTF_8_greek.h ├── stem_UTF_8_hindi.c ├── stem_UTF_8_hindi.h ├── stem_UTF_8_hungarian.c ├── stem_UTF_8_hungarian.h ├── stem_UTF_8_indonesian.c ├── stem_UTF_8_indonesian.h ├── stem_UTF_8_irish.c ├── stem_UTF_8_irish.h ├── stem_UTF_8_italian.c ├── stem_UTF_8_italian.h ├── stem_UTF_8_lithuanian.c ├── stem_UTF_8_lithuanian.h ├── stem_UTF_8_nepali.c ├── stem_UTF_8_nepali.h ├── stem_UTF_8_norwegian.c ├── stem_UTF_8_norwegian.h ├── stem_UTF_8_porter.c ├── stem_UTF_8_porter.h ├── stem_UTF_8_portuguese.c ├── stem_UTF_8_portuguese.h ├── stem_UTF_8_romanian.c ├── stem_UTF_8_romanian.h ├── stem_UTF_8_russian.c ├── stem_UTF_8_russian.h ├── stem_UTF_8_serbian.c ├── stem_UTF_8_serbian.h ├── stem_UTF_8_spanish.c ├── stem_UTF_8_spanish.h ├── stem_UTF_8_swedish.c ├── stem_UTF_8_swedish.h ├── stem_UTF_8_tamil.c ├── stem_UTF_8_tamil.h ├── stem_UTF_8_turkish.c ├── stem_UTF_8_turkish.h ├── stem_UTF_8_yiddish.c ├── stem_UTF_8_yiddish.h ├── update-c.sh └── utilities.c /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: 2 | - push 3 | - pull_request 4 | name: Test 5 | jobs: 6 | test: 7 | strategy: 8 | matrix: 9 | go-version: 10 | - 1.19.x 11 | - 1.20.x 12 | - 1.21.x 13 | platform: [ubuntu-latest, macos-latest] 14 | runs-on: ${{ matrix.platform }} 15 | steps: 16 | - name: Install Go 17 | uses: actions/setup-go@v1 18 | with: 19 | go-version: ${{ matrix.go-version }} 20 | - name: Checkout code 21 | uses: actions/checkout@v2 22 | - name: Test 23 | run: go test -v ./... 24 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 2022-11-14 version 0.7.0 2 | * Stemmer.Close 3 | * Use testify for testing 4 | 5 | 2022-11-14 version 0.6.0 6 | * Bump C library to 2.2.0 7 | * Added SnowballVersion 8 | 9 | 2019-04-17 version 0.3.0 10 | * Bump C lib version 11 | * example_test.go 12 | 13 | 2013-04-05 version 0.2.0 14 | * Fix possible memory leak 15 | * List -> LangList (and done once in init) 16 | 17 | 2012-12-06 version 0.1.2 18 | * Free memory 19 | 20 | 2012-12-03 version 0.1.1 21 | * Minor improvements 22 | 23 | 2012-12-03 version 0.1.0 24 | * Initial release 25 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Miki Tebeka 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [Snowball](http://snowball.tartarus.org/) Stemmer for Go 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/tebeka/snowball.svg)](https://pkg.go.dev/github.com/tebeka/snowball) 4 | [![Test](https://github.com/tebeka/snowball/workflows/Test/badge.svg)](https://github.com/tebeka/snowball/actions?query=workflow%3ATest) 5 | 6 | ## Usage 7 | 8 | ```go 9 | package snowball_test 10 | 11 | import ( 12 | "fmt" 13 | 14 | "github.com/tebeka/snowball" 15 | ) 16 | 17 | func Example() { 18 | stemmer, err := snowball.New("english") 19 | if err != nil { 20 | fmt.Println("error", err) 21 | return 22 | } 23 | defer stemmer.Close() 24 | 25 | fmt.Println(stemmer.Stem("worked")) 26 | fmt.Println(stemmer.Stem("working")) 27 | fmt.Println(stemmer.Stem("works")) 28 | // Output: 29 | // work 30 | // work 31 | // work 32 | } 33 | ``` 34 | 35 | This project was mostly a learning exercise for me, I don't consider it production quality. 36 | 37 | ## Development 38 | 39 | If you want to update the underlying C library, run `update-c.sh`. Make sure to run the tests after. 40 | 41 | -------------------------------------------------------------------------------- /_example/song.txt: -------------------------------------------------------------------------------- 1 | The Road goes ever on and on 2 | Down from the door where it began. 3 | Now far ahead the Road has gone, 4 | And I must follow, if I can, 5 | Pursuing it with eager feet, 6 | Until it joins some larger way 7 | Where many paths and errands meet. 8 | And whither then? I cannot say. 9 | -------------------------------------------------------------------------------- /_example/stem.go: -------------------------------------------------------------------------------- 1 | //go:build ignore 2 | 3 | package main 4 | 5 | /* 6 | Example on using Snowball stemmer 7 | 8 | This program will read a file, then print "word -> stem(word)" for every word 9 | in file 10 | */ 11 | 12 | import ( 13 | "bytes" 14 | "flag" 15 | "fmt" 16 | "io/ioutil" 17 | "os" 18 | "regexp" 19 | 20 | "github.com/tebeka/snowball" 21 | ) 22 | 23 | func main() { 24 | flag.Usage = func() { 25 | fmt.Fprintf(os.Stderr, "usage: %s FILENAME\n", os.Args[0]) 26 | flag.PrintDefaults() 27 | } 28 | lang := flag.String("lang", "english", "stemmer language") 29 | flag.Parse() 30 | 31 | if flag.NArg() != 1 { 32 | fmt.Fprintf(os.Stderr, "error: wrong number of arguments\n") 33 | os.Exit(1) 34 | } 35 | 36 | fmt.Println("Using snowball version", snowball.Version) 37 | 38 | stmr, err := snowball.New(*lang) 39 | if err != nil { 40 | fmt.Fprintf(os.Stderr, "error: %s\n", err) 41 | os.Exit(1) 42 | } 43 | defer smtr.Close() 44 | 45 | data, err := ioutil.ReadFile(flag.Arg(0)) 46 | if err != nil { 47 | fmt.Fprintf(os.Stderr, "error: can't open %s - %s\n", flag.Arg(0), err) 48 | os.Exit(1) 49 | } 50 | 51 | re := regexp.MustCompile("[a-zA-Z]+") 52 | 53 | for _, field := range re.FindAll(data, -1) { 54 | word := string(bytes.ToLower(field)) 55 | fmt.Printf("%s -> %s\n", word, stmr.Stem(word)) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /api.c: -------------------------------------------------------------------------------- 1 | 2 | #include /* for calloc, free */ 3 | #include "header.h" 4 | 5 | extern struct SN_env * SN_create_env(int S_size, int I_size) 6 | { 7 | struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); 8 | if (z == NULL) return NULL; 9 | z->p = create_s(); 10 | if (z->p == NULL) goto error; 11 | if (S_size) 12 | { 13 | int i; 14 | z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); 15 | if (z->S == NULL) goto error; 16 | 17 | for (i = 0; i < S_size; i++) 18 | { 19 | z->S[i] = create_s(); 20 | if (z->S[i] == NULL) goto error; 21 | } 22 | } 23 | 24 | if (I_size) 25 | { 26 | z->I = (int *) calloc(I_size, sizeof(int)); 27 | if (z->I == NULL) goto error; 28 | } 29 | 30 | return z; 31 | error: 32 | SN_close_env(z, S_size); 33 | return NULL; 34 | } 35 | 36 | extern void SN_close_env(struct SN_env * z, int S_size) 37 | { 38 | if (z == NULL) return; 39 | if (S_size) 40 | { 41 | int i; 42 | for (i = 0; i < S_size; i++) 43 | { 44 | lose_s(z->S[i]); 45 | } 46 | free(z->S); 47 | } 48 | free(z->I); 49 | if (z->p) lose_s(z->p); 50 | free(z); 51 | } 52 | 53 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s) 54 | { 55 | int err = replace_s(z, 0, z->l, size, s, NULL); 56 | z->c = 0; 57 | return err; 58 | } 59 | -------------------------------------------------------------------------------- /api.h: -------------------------------------------------------------------------------- 1 | 2 | typedef unsigned char symbol; 3 | 4 | /* Or replace 'char' above with 'short' for 16 bit characters. 5 | 6 | More precisely, replace 'char' with whatever type guarantees the 7 | character width you need. Note however that sizeof(symbol) should divide 8 | HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise 9 | there is an alignment problem. In the unlikely event of a problem here, 10 | consult Martin Porter. 11 | 12 | */ 13 | 14 | struct SN_env { 15 | symbol * p; 16 | int c; int l; int lb; int bra; int ket; 17 | symbol * * S; 18 | int * I; 19 | }; 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | extern struct SN_env * SN_create_env(int S_size, int I_size); 26 | extern void SN_close_env(struct SN_env * z, int S_size); 27 | 28 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s); 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | -------------------------------------------------------------------------------- /example_test.go: -------------------------------------------------------------------------------- 1 | package snowball_test 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/tebeka/snowball" 7 | ) 8 | 9 | func Example() { 10 | stemmer, err := snowball.New("english") 11 | if err != nil { 12 | fmt.Println("error", err) 13 | return 14 | } 15 | defer stemmer.Close() 16 | 17 | fmt.Println(stemmer.Stem("working")) 18 | fmt.Println(stemmer.Stem("works")) 19 | fmt.Println(stemmer.Stem("worked")) 20 | // Output: 21 | // work 22 | // work 23 | // work 24 | } 25 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/tebeka/snowball 2 | 3 | go 1.19 4 | 5 | require github.com/stretchr/testify v1.8.1 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 9 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 10 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 11 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 12 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 16 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 17 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /header.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include "api.h" 5 | 6 | #define MAXINT INT_MAX 7 | #define MININT INT_MIN 8 | 9 | #define HEAD 2*sizeof(int) 10 | 11 | #define SIZE(p) ((int *)(p))[-1] 12 | #define SET_SIZE(p, n) ((int *)(p))[-1] = n 13 | #define CAPACITY(p) ((int *)(p))[-2] 14 | 15 | struct among 16 | { int s_size; /* number of chars in string */ 17 | const symbol * s; /* search string */ 18 | int substring_i;/* index to longest matching substring */ 19 | int result; /* result of the lookup */ 20 | int (* function)(struct SN_env *); 21 | }; 22 | 23 | extern symbol * create_s(void); 24 | extern void lose_s(symbol * p); 25 | 26 | extern int skip_utf8(const symbol * p, int c, int limit, int n); 27 | 28 | extern int skip_b_utf8(const symbol * p, int c, int limit, int n); 29 | 30 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 31 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 32 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 33 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 34 | 35 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 36 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 37 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 38 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 39 | 40 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s); 41 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s); 42 | extern int eq_v(struct SN_env * z, const symbol * p); 43 | extern int eq_v_b(struct SN_env * z, const symbol * p); 44 | 45 | extern int find_among(struct SN_env * z, const struct among * v, int v_size); 46 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size); 47 | 48 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment); 49 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s); 50 | extern int slice_from_v(struct SN_env * z, const symbol * p); 51 | extern int slice_del(struct SN_env * z); 52 | 53 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s); 54 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p); 55 | 56 | extern symbol * slice_to(struct SN_env * z, symbol * p); 57 | extern symbol * assign_to(struct SN_env * z, symbol * p); 58 | 59 | extern int len_utf8(const symbol * p); 60 | 61 | extern void debug(struct SN_env * z, int number, int line_count); 62 | -------------------------------------------------------------------------------- /lib-version.txt: -------------------------------------------------------------------------------- 1 | 2.2.0 2 | -------------------------------------------------------------------------------- /libstemmer.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "libstemmer.h" 5 | #include "api.h" 6 | #include "modules.h" 7 | 8 | struct sb_stemmer { 9 | struct SN_env * (*create)(void); 10 | void (*close)(struct SN_env *); 11 | int (*stem)(struct SN_env *); 12 | 13 | struct SN_env * env; 14 | }; 15 | 16 | extern const char ** 17 | sb_stemmer_list(void) 18 | { 19 | return algorithm_names; 20 | } 21 | 22 | static stemmer_encoding_t 23 | sb_getenc(const char * charenc) 24 | { 25 | const struct stemmer_encoding * encoding; 26 | if (charenc == NULL) return ENC_UTF_8; 27 | for (encoding = encodings; encoding->name != 0; encoding++) { 28 | if (strcmp(encoding->name, charenc) == 0) break; 29 | } 30 | if (encoding->name == NULL) return ENC_UNKNOWN; 31 | return encoding->enc; 32 | } 33 | 34 | extern struct sb_stemmer * 35 | sb_stemmer_new(const char * algorithm, const char * charenc) 36 | { 37 | stemmer_encoding_t enc; 38 | const struct stemmer_modules * module; 39 | struct sb_stemmer * stemmer; 40 | 41 | enc = sb_getenc(charenc); 42 | if (enc == ENC_UNKNOWN) return NULL; 43 | 44 | for (module = modules; module->name != 0; module++) { 45 | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; 46 | } 47 | if (module->name == NULL) return NULL; 48 | 49 | stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); 50 | if (stemmer == NULL) return NULL; 51 | 52 | stemmer->create = module->create; 53 | stemmer->close = module->close; 54 | stemmer->stem = module->stem; 55 | 56 | stemmer->env = stemmer->create(); 57 | if (stemmer->env == NULL) 58 | { 59 | sb_stemmer_delete(stemmer); 60 | return NULL; 61 | } 62 | 63 | return stemmer; 64 | } 65 | 66 | void 67 | sb_stemmer_delete(struct sb_stemmer * stemmer) 68 | { 69 | if (stemmer == 0) return; 70 | if (stemmer->close) { 71 | stemmer->close(stemmer->env); 72 | stemmer->close = 0; 73 | } 74 | free(stemmer); 75 | } 76 | 77 | const sb_symbol * 78 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) 79 | { 80 | int ret; 81 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) 82 | { 83 | stemmer->env->l = 0; 84 | return NULL; 85 | } 86 | ret = stemmer->stem(stemmer->env); 87 | if (ret < 0) return NULL; 88 | stemmer->env->p[stemmer->env->l] = 0; 89 | return (const sb_symbol *)(stemmer->env->p); 90 | } 91 | 92 | int 93 | sb_stemmer_length(struct sb_stemmer * stemmer) 94 | { 95 | return stemmer->env->l; 96 | } 97 | -------------------------------------------------------------------------------- /libstemmer.h: -------------------------------------------------------------------------------- 1 | 2 | /* Make header file work when included from C++ */ 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | struct sb_stemmer; 8 | typedef unsigned char sb_symbol; 9 | 10 | /* FIXME - should be able to get a version number for each stemming 11 | * algorithm (which will be incremented each time the output changes). */ 12 | 13 | /** Returns an array of the names of the available stemming algorithms. 14 | * Note that these are the canonical names - aliases (ie, other names for 15 | * the same algorithm) will not be included in the list. 16 | * The list is terminated with a null pointer. 17 | * 18 | * The list must not be modified in any way. 19 | */ 20 | const char ** sb_stemmer_list(void); 21 | 22 | /** Create a new stemmer object, using the specified algorithm, for the 23 | * specified character encoding. 24 | * 25 | * All algorithms will usually be available in UTF-8, but may also be 26 | * available in other character encodings. 27 | * 28 | * @param algorithm The algorithm name. This is either the english 29 | * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the 30 | * language. Note that case is significant in this parameter - the 31 | * value should be supplied in lower case. 32 | * 33 | * @param charenc The character encoding. NULL may be passed as 34 | * this value, in which case UTF-8 encoding will be assumed. Otherwise, 35 | * the argument may be one of "UTF_8", "ISO_8859_1" (i.e. Latin 1), 36 | * "ISO_8859_2" (i.e. Latin 2) or "KOI8_R" (Russian). Note that case is 37 | * significant in this parameter. 38 | * 39 | * @return NULL if the specified algorithm is not recognised, or the 40 | * algorithm is not available for the requested encoding. Otherwise, 41 | * returns a pointer to a newly created stemmer for the requested algorithm. 42 | * The returned pointer must be deleted by calling sb_stemmer_delete(). 43 | * 44 | * @note NULL will also be returned if an out of memory error occurs. 45 | */ 46 | struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); 47 | 48 | /** Delete a stemmer object. 49 | * 50 | * This frees all resources allocated for the stemmer. After calling 51 | * this function, the supplied stemmer may no longer be used in any way. 52 | * 53 | * It is safe to pass a null pointer to this function - this will have 54 | * no effect. 55 | */ 56 | void sb_stemmer_delete(struct sb_stemmer * stemmer); 57 | 58 | /** Stem a word. 59 | * 60 | * The return value is owned by the stemmer - it must not be freed or 61 | * modified, and it will become invalid when the stemmer is called again, 62 | * or if the stemmer is freed. 63 | * 64 | * The length of the return value can be obtained using sb_stemmer_length(). 65 | * 66 | * If an out-of-memory error occurs, this will return NULL. 67 | */ 68 | const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, 69 | const sb_symbol * word, int size); 70 | 71 | /** Get the length of the result of the last stemmed word. 72 | * This should not be called before sb_stemmer_stem() has been called. 73 | */ 74 | int sb_stemmer_length(struct sb_stemmer * stemmer); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | -------------------------------------------------------------------------------- /modules_utf8.h: -------------------------------------------------------------------------------- 1 | /* libstemmer/modules_utf8.h: List of stemming modules. 2 | * 3 | * This file is generated by mkmodules.pl from a list of module names. 4 | * Do not edit manually. 5 | * 6 | * Modules included by this file are: arabic, armenian, basque, catalan, 7 | * danish, dutch, english, finnish, french, german, greek, hindi, hungarian, 8 | * indonesian, irish, italian, lithuanian, nepali, norwegian, porter, 9 | * portuguese, romanian, russian, serbian, spanish, swedish, tamil, turkish, 10 | * yiddish 11 | */ 12 | 13 | #include "stem_UTF_8_arabic.h" 14 | #include "stem_UTF_8_armenian.h" 15 | #include "stem_UTF_8_basque.h" 16 | #include "stem_UTF_8_catalan.h" 17 | #include "stem_UTF_8_danish.h" 18 | #include "stem_UTF_8_dutch.h" 19 | #include "stem_UTF_8_english.h" 20 | #include "stem_UTF_8_finnish.h" 21 | #include "stem_UTF_8_french.h" 22 | #include "stem_UTF_8_german.h" 23 | #include "stem_UTF_8_greek.h" 24 | #include "stem_UTF_8_hindi.h" 25 | #include "stem_UTF_8_hungarian.h" 26 | #include "stem_UTF_8_indonesian.h" 27 | #include "stem_UTF_8_irish.h" 28 | #include "stem_UTF_8_italian.h" 29 | #include "stem_UTF_8_lithuanian.h" 30 | #include "stem_UTF_8_nepali.h" 31 | #include "stem_UTF_8_norwegian.h" 32 | #include "stem_UTF_8_porter.h" 33 | #include "stem_UTF_8_portuguese.h" 34 | #include "stem_UTF_8_romanian.h" 35 | #include "stem_UTF_8_russian.h" 36 | #include "stem_UTF_8_serbian.h" 37 | #include "stem_UTF_8_spanish.h" 38 | #include "stem_UTF_8_swedish.h" 39 | #include "stem_UTF_8_tamil.h" 40 | #include "stem_UTF_8_turkish.h" 41 | #include "stem_UTF_8_yiddish.h" 42 | 43 | typedef enum { 44 | ENC_UNKNOWN=0, 45 | ENC_UTF_8 46 | } stemmer_encoding_t; 47 | 48 | struct stemmer_encoding { 49 | const char * name; 50 | stemmer_encoding_t enc; 51 | }; 52 | static const struct stemmer_encoding encodings[] = { 53 | {"UTF_8", ENC_UTF_8}, 54 | {0,ENC_UNKNOWN} 55 | }; 56 | 57 | struct stemmer_modules { 58 | const char * name; 59 | stemmer_encoding_t enc; 60 | struct SN_env * (*create)(void); 61 | void (*close)(struct SN_env *); 62 | int (*stem)(struct SN_env *); 63 | }; 64 | static const struct stemmer_modules modules[] = { 65 | {"ar", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem}, 66 | {"ara", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem}, 67 | {"arabic", ENC_UTF_8, arabic_UTF_8_create_env, arabic_UTF_8_close_env, arabic_UTF_8_stem}, 68 | {"arm", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem}, 69 | {"armenian", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem}, 70 | {"baq", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem}, 71 | {"basque", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem}, 72 | {"ca", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem}, 73 | {"cat", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem}, 74 | {"catalan", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem}, 75 | {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 76 | {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 77 | {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 78 | {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 79 | {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 80 | {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 81 | {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 82 | {"el", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem}, 83 | {"ell", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem}, 84 | {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 85 | {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 86 | {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 87 | {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 88 | {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 89 | {"eu", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem}, 90 | {"eus", ENC_UTF_8, basque_UTF_8_create_env, basque_UTF_8_close_env, basque_UTF_8_stem}, 91 | {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 92 | {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 93 | {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 94 | {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 95 | {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 96 | {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 97 | {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 98 | {"ga", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem}, 99 | {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 100 | {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 101 | {"gle", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem}, 102 | {"gre", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem}, 103 | {"greek", ENC_UTF_8, greek_UTF_8_create_env, greek_UTF_8_close_env, greek_UTF_8_stem}, 104 | {"hi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem}, 105 | {"hin", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem}, 106 | {"hindi", ENC_UTF_8, hindi_UTF_8_create_env, hindi_UTF_8_close_env, hindi_UTF_8_stem}, 107 | {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 108 | {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 109 | {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 110 | {"hy", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem}, 111 | {"hye", ENC_UTF_8, armenian_UTF_8_create_env, armenian_UTF_8_close_env, armenian_UTF_8_stem}, 112 | {"id", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem}, 113 | {"ind", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem}, 114 | {"indonesian", ENC_UTF_8, indonesian_UTF_8_create_env, indonesian_UTF_8_close_env, indonesian_UTF_8_stem}, 115 | {"irish", ENC_UTF_8, irish_UTF_8_create_env, irish_UTF_8_close_env, irish_UTF_8_stem}, 116 | {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 117 | {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 118 | {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 119 | {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 120 | {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 121 | {"lt", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 122 | {"ne", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem}, 123 | {"nep", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem}, 124 | {"nepali", ENC_UTF_8, nepali_UTF_8_create_env, nepali_UTF_8_close_env, nepali_UTF_8_stem}, 125 | {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 126 | {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 127 | {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 128 | {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 129 | {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 130 | {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 131 | {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, 132 | {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 133 | {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 134 | {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 135 | {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 136 | {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 137 | {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 138 | {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 139 | {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 140 | {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 141 | {"serbian", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem}, 142 | {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 143 | {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 144 | {"sr", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem}, 145 | {"srp", ENC_UTF_8, serbian_UTF_8_create_env, serbian_UTF_8_close_env, serbian_UTF_8_stem}, 146 | {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 147 | {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 148 | {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 149 | {"ta", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem}, 150 | {"tam", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem}, 151 | {"tamil", ENC_UTF_8, tamil_UTF_8_create_env, tamil_UTF_8_close_env, tamil_UTF_8_stem}, 152 | {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 153 | {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 154 | {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 155 | {"yi", ENC_UTF_8, yiddish_UTF_8_create_env, yiddish_UTF_8_close_env, yiddish_UTF_8_stem}, 156 | {"yid", ENC_UTF_8, yiddish_UTF_8_create_env, yiddish_UTF_8_close_env, yiddish_UTF_8_stem}, 157 | {"yiddish", ENC_UTF_8, yiddish_UTF_8_create_env, yiddish_UTF_8_close_env, yiddish_UTF_8_stem}, 158 | {0,ENC_UNKNOWN,0,0,0} 159 | }; 160 | static const char * algorithm_names[] = { 161 | "arabic", 162 | "armenian", 163 | "basque", 164 | "catalan", 165 | "danish", 166 | "dutch", 167 | "english", 168 | "finnish", 169 | "french", 170 | "german", 171 | "greek", 172 | "hindi", 173 | "hungarian", 174 | "indonesian", 175 | "irish", 176 | "italian", 177 | "lithuanian", 178 | "nepali", 179 | "norwegian", 180 | "porter", 181 | "portuguese", 182 | "romanian", 183 | "russian", 184 | "serbian", 185 | "spanish", 186 | "swedish", 187 | "tamil", 188 | "turkish", 189 | "yiddish", 190 | 0 191 | }; 192 | -------------------------------------------------------------------------------- /snowball.go: -------------------------------------------------------------------------------- 1 | // Package snowball implements a stemmer 2 | package snowball 3 | 4 | import ( 5 | _ "embed" 6 | "fmt" 7 | "unsafe" 8 | ) 9 | 10 | /* 11 | #include 12 | #include "libstemmer.h" 13 | */ 14 | import "C" 15 | 16 | const ( 17 | // Version is the library version 18 | Version = "0.7.0" 19 | ) 20 | 21 | var ( 22 | //go:embed "lib-version.txt" 23 | SnowballVersion string 24 | ) 25 | 26 | // Stemmer structure. 27 | // Warning: Stemmers are not goroutine safe, create a stemmer per goroutine or 28 | // use a pool (such as sync.Pool) to ensure safety. 29 | type Stemmer struct { 30 | lang string 31 | stmr *C.struct_sb_stemmer 32 | } 33 | 34 | // New creates a new stemmer for lang. 35 | func New(lang string) (*Stemmer, error) { 36 | clang := C.CString(lang) 37 | 38 | stmr := &Stemmer{ 39 | lang, 40 | C.sb_stemmer_new(clang, nil), 41 | } 42 | 43 | if stmr.stmr == nil { 44 | return nil, fmt.Errorf("can't create stemmer for lang %s", lang) 45 | } 46 | 47 | return stmr, nil 48 | } 49 | 50 | // Close closes the stemmer and frees the underlying C memory. 51 | func (stmr *Stemmer) Close() error { 52 | if stmr.stmr != nil { 53 | C.sb_stemmer_delete(stmr.stmr) 54 | stmr.stmr = nil 55 | } 56 | 57 | return nil 58 | } 59 | 60 | // Lang return the stemmer language. 61 | func (stmr *Stemmer) Lang() string { 62 | return stmr.lang 63 | } 64 | 65 | // Stem returns them stem of word (e.g. running -> run). 66 | func (stmr *Stemmer) Stem(word string) string { 67 | ptr := unsafe.Pointer(C.CString(word)) 68 | defer C.free(ptr) 69 | 70 | w := (*C.sb_symbol)(ptr) 71 | res := unsafe.Pointer(C.sb_stemmer_stem(stmr.stmr, w, C.int(len(word)))) 72 | if res == nil { 73 | return word // TODO: Is this what we want? 74 | } 75 | // We don't free res, snowball's documentation says: 76 | // The return value is owned by the stemmer - it must not be freed 77 | 78 | size := C.sb_stemmer_length(stmr.stmr) 79 | buf := C.GoBytes(res, size) 80 | return string(buf) 81 | } 82 | 83 | // LangList returns the list of languages supported by snowball. 84 | // DEPRECATED: Use Languages 85 | func LangList() []string { 86 | return Languages() 87 | } 88 | 89 | // Languages returns the list of languages supported by snowball. 90 | func Languages() []string { 91 | return langList 92 | } 93 | 94 | var langList []string 95 | 96 | func init() { 97 | // We don't need to free since sb_stemmer_list return pointer to static variable 98 | cp := uintptr(unsafe.Pointer(C.sb_stemmer_list())) 99 | size := unsafe.Sizeof(uintptr(0)) 100 | 101 | for { 102 | name := C.GoString(*(**C.char)(unsafe.Pointer(cp))) 103 | if len(name) == 0 { 104 | break 105 | } 106 | langList = append(langList, name) 107 | cp += size 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /snowball_test.go: -------------------------------------------------------------------------------- 1 | package snowball 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestList(t *testing.T) { 11 | require.Greater(t, len(Languages()), 0, "no langs") 12 | } 13 | 14 | func TestListEnglish(t *testing.T) { 15 | found := false 16 | for _, v := range Languages() { 17 | if v == "english" { 18 | found = true 19 | break 20 | } 21 | } 22 | 23 | require.True(t, found, "english not found") 24 | } 25 | 26 | func TestNew(t *testing.T) { 27 | lang := "english" 28 | stmr, err := New(lang) 29 | require.NoError(t, err, "create english stemmer") 30 | require.Equal(t, lang, stmr.Lang()) 31 | } 32 | 33 | func TestClose(t *testing.T) { 34 | stmr, err := New("english") 35 | require.NoError(t, err, "create english stemmer") 36 | stmr.Close() 37 | require.Nil(t, stmr.stmr, "didn't free C stemmer") 38 | } 39 | 40 | func TestNewNoLang(t *testing.T) { 41 | lang := "klingon" 42 | _, err := New(lang) 43 | require.Error(t, err, "dude, we have a klingon stemmer!") 44 | } 45 | 46 | func TestStem(t *testing.T) { 47 | var testCases = []struct { 48 | lang string 49 | word string 50 | stem string 51 | }{ 52 | {"english", "running", "run"}, 53 | {"german", "käuflich", "kauflich"}, 54 | } 55 | 56 | for _, tc := range testCases { 57 | name := fmt.Sprintf("%s:%s", tc.lang, tc.word) 58 | t.Run(name, func(t *testing.T) { 59 | stmr, err := New(tc.lang) 60 | require.NoErrorf(t, err, "can't create stemmer") 61 | s := stmr.Stem(tc.word) 62 | require.Equal(t, tc.stem, s) 63 | }) 64 | } 65 | } 66 | 67 | func TestSnowballVersion(t *testing.T) { 68 | require.Regexp(t, `\d+\.\d+(\.\d+)`, SnowballVersion) 69 | } 70 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_basque.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * basque_ISO_8859_1_create_env(void); 8 | extern void basque_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int basque_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_catalan.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * catalan_ISO_8859_1_create_env(void); 8 | extern void catalan_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int catalan_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_danish.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_undouble(struct SN_env * z); 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 23 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[3] = { 'h', 'e', 'd' }; 30 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; 31 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; 32 | static const symbol s_0_3[1] = { 'e' }; 33 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; 34 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; 35 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; 36 | static const symbol s_0_7[3] = { 'e', 'n', 'e' }; 37 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; 38 | static const symbol s_0_9[3] = { 'e', 'r', 'e' }; 39 | static const symbol s_0_10[2] = { 'e', 'n' }; 40 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; 41 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; 42 | static const symbol s_0_13[2] = { 'e', 'r' }; 43 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; 44 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; 45 | static const symbol s_0_16[1] = { 's' }; 46 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; 47 | static const symbol s_0_18[2] = { 'e', 's' }; 48 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; 49 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; 50 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; 51 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; 52 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; 53 | static const symbol s_0_24[3] = { 'e', 'n', 's' }; 54 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; 55 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; 56 | static const symbol s_0_27[3] = { 'e', 'r', 's' }; 57 | static const symbol s_0_28[3] = { 'e', 't', 's' }; 58 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; 59 | static const symbol s_0_30[2] = { 'e', 't' }; 60 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; 61 | 62 | static const struct among a_0[32] = 63 | { 64 | { 3, s_0_0, -1, 1, 0}, 65 | { 5, s_0_1, 0, 1, 0}, 66 | { 4, s_0_2, -1, 1, 0}, 67 | { 1, s_0_3, -1, 1, 0}, 68 | { 5, s_0_4, 3, 1, 0}, 69 | { 4, s_0_5, 3, 1, 0}, 70 | { 6, s_0_6, 5, 1, 0}, 71 | { 3, s_0_7, 3, 1, 0}, 72 | { 4, s_0_8, 3, 1, 0}, 73 | { 3, s_0_9, 3, 1, 0}, 74 | { 2, s_0_10, -1, 1, 0}, 75 | { 5, s_0_11, 10, 1, 0}, 76 | { 4, s_0_12, 10, 1, 0}, 77 | { 2, s_0_13, -1, 1, 0}, 78 | { 5, s_0_14, 13, 1, 0}, 79 | { 4, s_0_15, 13, 1, 0}, 80 | { 1, s_0_16, -1, 2, 0}, 81 | { 4, s_0_17, 16, 1, 0}, 82 | { 2, s_0_18, 16, 1, 0}, 83 | { 5, s_0_19, 18, 1, 0}, 84 | { 7, s_0_20, 19, 1, 0}, 85 | { 4, s_0_21, 18, 1, 0}, 86 | { 5, s_0_22, 18, 1, 0}, 87 | { 4, s_0_23, 18, 1, 0}, 88 | { 3, s_0_24, 16, 1, 0}, 89 | { 6, s_0_25, 24, 1, 0}, 90 | { 5, s_0_26, 24, 1, 0}, 91 | { 3, s_0_27, 16, 1, 0}, 92 | { 3, s_0_28, 16, 1, 0}, 93 | { 5, s_0_29, 28, 1, 0}, 94 | { 2, s_0_30, -1, 1, 0}, 95 | { 4, s_0_31, 30, 1, 0} 96 | }; 97 | 98 | static const symbol s_1_0[2] = { 'g', 'd' }; 99 | static const symbol s_1_1[2] = { 'd', 't' }; 100 | static const symbol s_1_2[2] = { 'g', 't' }; 101 | static const symbol s_1_3[2] = { 'k', 't' }; 102 | 103 | static const struct among a_1[4] = 104 | { 105 | { 2, s_1_0, -1, -1, 0}, 106 | { 2, s_1_1, -1, -1, 0}, 107 | { 2, s_1_2, -1, -1, 0}, 108 | { 2, s_1_3, -1, -1, 0} 109 | }; 110 | 111 | static const symbol s_2_0[2] = { 'i', 'g' }; 112 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 113 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; 114 | static const symbol s_2_3[3] = { 'e', 'l', 's' }; 115 | static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' }; 116 | 117 | static const struct among a_2[5] = 118 | { 119 | { 2, s_2_0, -1, 1, 0}, 120 | { 3, s_2_1, 0, 1, 0}, 121 | { 4, s_2_2, 1, 1, 0}, 122 | { 3, s_2_3, -1, 1, 0}, 123 | { 4, s_2_4, -1, 2, 0} 124 | }; 125 | 126 | static const unsigned char g_c[] = { 119, 223, 119, 1 }; 127 | 128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 129 | 130 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; 131 | 132 | static const symbol s_0[] = { 's', 't' }; 133 | static const symbol s_1[] = { 'i', 'g' }; 134 | static const symbol s_2[] = { 'l', 0xF8, 's' }; 135 | 136 | static int r_mark_regions(struct SN_env * z) { 137 | z->I[1] = z->l; 138 | { int c_test1 = z->c; 139 | z->c = z->c + 3; 140 | if (z->c > z->l) return 0; 141 | z->I[0] = z->c; 142 | z->c = c_test1; 143 | } 144 | if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; 145 | { 146 | int ret = in_grouping(z, g_v, 97, 248, 1); 147 | if (ret < 0) return 0; 148 | z->c += ret; 149 | } 150 | z->I[1] = z->c; 151 | 152 | if (!(z->I[1] < z->I[0])) goto lab0; 153 | z->I[1] = z->I[0]; 154 | lab0: 155 | return 1; 156 | } 157 | 158 | static int r_main_suffix(struct SN_env * z) { 159 | int among_var; 160 | 161 | { int mlimit1; 162 | if (z->c < z->I[1]) return 0; 163 | mlimit1 = z->lb; z->lb = z->I[1]; 164 | z->ket = z->c; 165 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 166 | among_var = find_among_b(z, a_0, 32); 167 | if (!(among_var)) { z->lb = mlimit1; return 0; } 168 | z->bra = z->c; 169 | z->lb = mlimit1; 170 | } 171 | switch (among_var) { 172 | case 1: 173 | { int ret = slice_del(z); 174 | if (ret < 0) return ret; 175 | } 176 | break; 177 | case 2: 178 | if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; 179 | { int ret = slice_del(z); 180 | if (ret < 0) return ret; 181 | } 182 | break; 183 | } 184 | return 1; 185 | } 186 | 187 | static int r_consonant_pair(struct SN_env * z) { 188 | { int m_test1 = z->l - z->c; 189 | 190 | { int mlimit2; 191 | if (z->c < z->I[1]) return 0; 192 | mlimit2 = z->lb; z->lb = z->I[1]; 193 | z->ket = z->c; 194 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } 195 | if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit2; return 0; } 196 | z->bra = z->c; 197 | z->lb = mlimit2; 198 | } 199 | z->c = z->l - m_test1; 200 | } 201 | if (z->c <= z->lb) return 0; 202 | z->c--; 203 | z->bra = z->c; 204 | { int ret = slice_del(z); 205 | if (ret < 0) return ret; 206 | } 207 | return 1; 208 | } 209 | 210 | static int r_other_suffix(struct SN_env * z) { 211 | int among_var; 212 | { int m1 = z->l - z->c; (void)m1; 213 | z->ket = z->c; 214 | if (!(eq_s_b(z, 2, s_0))) goto lab0; 215 | z->bra = z->c; 216 | if (!(eq_s_b(z, 2, s_1))) goto lab0; 217 | { int ret = slice_del(z); 218 | if (ret < 0) return ret; 219 | } 220 | lab0: 221 | z->c = z->l - m1; 222 | } 223 | 224 | { int mlimit2; 225 | if (z->c < z->I[1]) return 0; 226 | mlimit2 = z->lb; z->lb = z->I[1]; 227 | z->ket = z->c; 228 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } 229 | among_var = find_among_b(z, a_2, 5); 230 | if (!(among_var)) { z->lb = mlimit2; return 0; } 231 | z->bra = z->c; 232 | z->lb = mlimit2; 233 | } 234 | switch (among_var) { 235 | case 1: 236 | { int ret = slice_del(z); 237 | if (ret < 0) return ret; 238 | } 239 | { int m3 = z->l - z->c; (void)m3; 240 | { int ret = r_consonant_pair(z); 241 | if (ret < 0) return ret; 242 | } 243 | z->c = z->l - m3; 244 | } 245 | break; 246 | case 2: 247 | { int ret = slice_from_s(z, 3, s_2); 248 | if (ret < 0) return ret; 249 | } 250 | break; 251 | } 252 | return 1; 253 | } 254 | 255 | static int r_undouble(struct SN_env * z) { 256 | 257 | { int mlimit1; 258 | if (z->c < z->I[1]) return 0; 259 | mlimit1 = z->lb; z->lb = z->I[1]; 260 | z->ket = z->c; 261 | if (in_grouping_b(z, g_c, 98, 122, 0)) { z->lb = mlimit1; return 0; } 262 | z->bra = z->c; 263 | z->S[0] = slice_to(z, z->S[0]); 264 | if (z->S[0] == 0) return -1; 265 | z->lb = mlimit1; 266 | } 267 | if (!(eq_v_b(z, z->S[0]))) return 0; 268 | { int ret = slice_del(z); 269 | if (ret < 0) return ret; 270 | } 271 | return 1; 272 | } 273 | 274 | extern int danish_ISO_8859_1_stem(struct SN_env * z) { 275 | { int c1 = z->c; 276 | { int ret = r_mark_regions(z); 277 | if (ret < 0) return ret; 278 | } 279 | z->c = c1; 280 | } 281 | z->lb = z->c; z->c = z->l; 282 | 283 | { int m2 = z->l - z->c; (void)m2; 284 | { int ret = r_main_suffix(z); 285 | if (ret < 0) return ret; 286 | } 287 | z->c = z->l - m2; 288 | } 289 | { int m3 = z->l - z->c; (void)m3; 290 | { int ret = r_consonant_pair(z); 291 | if (ret < 0) return ret; 292 | } 293 | z->c = z->l - m3; 294 | } 295 | { int m4 = z->l - z->c; (void)m4; 296 | { int ret = r_other_suffix(z); 297 | if (ret < 0) return ret; 298 | } 299 | z->c = z->l - m4; 300 | } 301 | { int m5 = z->l - z->c; (void)m5; 302 | { int ret = r_undouble(z); 303 | if (ret < 0) return ret; 304 | } 305 | z->c = z->l - m5; 306 | } 307 | z->c = z->lb; 308 | return 1; 309 | } 310 | 311 | extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2); } 312 | 313 | extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } 314 | 315 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_danish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 8 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_dutch.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * dutch_ISO_8859_1_create_env(void); 8 | extern void dutch_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int dutch_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_english.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * english_ISO_8859_1_create_env(void); 8 | extern void english_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int english_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_finnish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * finnish_ISO_8859_1_create_env(void); 8 | extern void finnish_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int finnish_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_french.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * french_ISO_8859_1_create_env(void); 8 | extern void french_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int french_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_german.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int german_ISO_8859_1_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_standard_suffix(struct SN_env * z); 13 | static int r_R2(struct SN_env * z); 14 | static int r_R1(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | static int r_postlude(struct SN_env * z); 17 | static int r_prelude(struct SN_env * z); 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | 23 | extern struct SN_env * german_ISO_8859_1_create_env(void); 24 | extern void german_ISO_8859_1_close_env(struct SN_env * z); 25 | 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | static const symbol s_0_1[1] = { 'U' }; 31 | static const symbol s_0_2[1] = { 'Y' }; 32 | static const symbol s_0_3[1] = { 0xE4 }; 33 | static const symbol s_0_4[1] = { 0xF6 }; 34 | static const symbol s_0_5[1] = { 0xFC }; 35 | 36 | static const struct among a_0[6] = 37 | { 38 | { 0, 0, -1, 5, 0}, 39 | { 1, s_0_1, 0, 2, 0}, 40 | { 1, s_0_2, 0, 1, 0}, 41 | { 1, s_0_3, 0, 3, 0}, 42 | { 1, s_0_4, 0, 4, 0}, 43 | { 1, s_0_5, 0, 2, 0} 44 | }; 45 | 46 | static const symbol s_1_0[1] = { 'e' }; 47 | static const symbol s_1_1[2] = { 'e', 'm' }; 48 | static const symbol s_1_2[2] = { 'e', 'n' }; 49 | static const symbol s_1_3[3] = { 'e', 'r', 'n' }; 50 | static const symbol s_1_4[2] = { 'e', 'r' }; 51 | static const symbol s_1_5[1] = { 's' }; 52 | static const symbol s_1_6[2] = { 'e', 's' }; 53 | 54 | static const struct among a_1[7] = 55 | { 56 | { 1, s_1_0, -1, 2, 0}, 57 | { 2, s_1_1, -1, 1, 0}, 58 | { 2, s_1_2, -1, 2, 0}, 59 | { 3, s_1_3, -1, 1, 0}, 60 | { 2, s_1_4, -1, 1, 0}, 61 | { 1, s_1_5, -1, 3, 0}, 62 | { 2, s_1_6, 5, 2, 0} 63 | }; 64 | 65 | static const symbol s_2_0[2] = { 'e', 'n' }; 66 | static const symbol s_2_1[2] = { 'e', 'r' }; 67 | static const symbol s_2_2[2] = { 's', 't' }; 68 | static const symbol s_2_3[3] = { 'e', 's', 't' }; 69 | 70 | static const struct among a_2[4] = 71 | { 72 | { 2, s_2_0, -1, 1, 0}, 73 | { 2, s_2_1, -1, 1, 0}, 74 | { 2, s_2_2, -1, 2, 0}, 75 | { 3, s_2_3, 2, 1, 0} 76 | }; 77 | 78 | static const symbol s_3_0[2] = { 'i', 'g' }; 79 | static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; 80 | 81 | static const struct among a_3[2] = 82 | { 83 | { 2, s_3_0, -1, 1, 0}, 84 | { 4, s_3_1, -1, 1, 0} 85 | }; 86 | 87 | static const symbol s_4_0[3] = { 'e', 'n', 'd' }; 88 | static const symbol s_4_1[2] = { 'i', 'g' }; 89 | static const symbol s_4_2[3] = { 'u', 'n', 'g' }; 90 | static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; 91 | static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; 92 | static const symbol s_4_5[2] = { 'i', 'k' }; 93 | static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; 94 | static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; 95 | 96 | static const struct among a_4[8] = 97 | { 98 | { 3, s_4_0, -1, 1, 0}, 99 | { 2, s_4_1, -1, 2, 0}, 100 | { 3, s_4_2, -1, 1, 0}, 101 | { 4, s_4_3, -1, 3, 0}, 102 | { 4, s_4_4, -1, 2, 0}, 103 | { 2, s_4_5, -1, 2, 0}, 104 | { 4, s_4_6, -1, 3, 0}, 105 | { 4, s_4_7, -1, 4, 0} 106 | }; 107 | 108 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; 109 | 110 | static const unsigned char g_s_ending[] = { 117, 30, 5 }; 111 | 112 | static const unsigned char g_st_ending[] = { 117, 30, 4 }; 113 | 114 | static const symbol s_0[] = { 's', 's' }; 115 | static const symbol s_1[] = { 'U' }; 116 | static const symbol s_2[] = { 'Y' }; 117 | static const symbol s_3[] = { 'y' }; 118 | static const symbol s_4[] = { 'u' }; 119 | static const symbol s_5[] = { 'a' }; 120 | static const symbol s_6[] = { 'o' }; 121 | static const symbol s_7[] = { 'n', 'i', 's' }; 122 | static const symbol s_8[] = { 'i', 'g' }; 123 | static const symbol s_9[] = { 'e', 'r' }; 124 | static const symbol s_10[] = { 'e', 'n' }; 125 | 126 | static int r_prelude(struct SN_env * z) { 127 | { int c_test1 = z->c; 128 | while(1) { 129 | int c2 = z->c; 130 | { int c3 = z->c; 131 | z->bra = z->c; 132 | if (z->c == z->l || z->p[z->c] != 0xDF) goto lab2; 133 | z->c++; 134 | z->ket = z->c; 135 | { int ret = slice_from_s(z, 2, s_0); 136 | if (ret < 0) return ret; 137 | } 138 | goto lab1; 139 | lab2: 140 | z->c = c3; 141 | if (z->c >= z->l) goto lab0; 142 | z->c++; 143 | } 144 | lab1: 145 | continue; 146 | lab0: 147 | z->c = c2; 148 | break; 149 | } 150 | z->c = c_test1; 151 | } 152 | while(1) { 153 | int c4 = z->c; 154 | while(1) { 155 | int c5 = z->c; 156 | if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; 157 | z->bra = z->c; 158 | { int c6 = z->c; 159 | if (z->c == z->l || z->p[z->c] != 'u') goto lab6; 160 | z->c++; 161 | z->ket = z->c; 162 | if (in_grouping(z, g_v, 97, 252, 0)) goto lab6; 163 | { int ret = slice_from_s(z, 1, s_1); 164 | if (ret < 0) return ret; 165 | } 166 | goto lab5; 167 | lab6: 168 | z->c = c6; 169 | if (z->c == z->l || z->p[z->c] != 'y') goto lab4; 170 | z->c++; 171 | z->ket = z->c; 172 | if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; 173 | { int ret = slice_from_s(z, 1, s_2); 174 | if (ret < 0) return ret; 175 | } 176 | } 177 | lab5: 178 | z->c = c5; 179 | break; 180 | lab4: 181 | z->c = c5; 182 | if (z->c >= z->l) goto lab3; 183 | z->c++; 184 | } 185 | continue; 186 | lab3: 187 | z->c = c4; 188 | break; 189 | } 190 | return 1; 191 | } 192 | 193 | static int r_mark_regions(struct SN_env * z) { 194 | z->I[2] = z->l; 195 | z->I[1] = z->l; 196 | { int c_test1 = z->c; 197 | z->c = z->c + 3; 198 | if (z->c > z->l) return 0; 199 | z->I[0] = z->c; 200 | z->c = c_test1; 201 | } 202 | { 203 | int ret = out_grouping(z, g_v, 97, 252, 1); 204 | if (ret < 0) return 0; 205 | z->c += ret; 206 | } 207 | { 208 | int ret = in_grouping(z, g_v, 97, 252, 1); 209 | if (ret < 0) return 0; 210 | z->c += ret; 211 | } 212 | z->I[2] = z->c; 213 | 214 | if (!(z->I[2] < z->I[0])) goto lab0; 215 | z->I[2] = z->I[0]; 216 | lab0: 217 | { 218 | int ret = out_grouping(z, g_v, 97, 252, 1); 219 | if (ret < 0) return 0; 220 | z->c += ret; 221 | } 222 | { 223 | int ret = in_grouping(z, g_v, 97, 252, 1); 224 | if (ret < 0) return 0; 225 | z->c += ret; 226 | } 227 | z->I[1] = z->c; 228 | return 1; 229 | } 230 | 231 | static int r_postlude(struct SN_env * z) { 232 | int among_var; 233 | while(1) { 234 | int c1 = z->c; 235 | z->bra = z->c; 236 | among_var = find_among(z, a_0, 6); 237 | if (!(among_var)) goto lab0; 238 | z->ket = z->c; 239 | switch (among_var) { 240 | case 1: 241 | { int ret = slice_from_s(z, 1, s_3); 242 | if (ret < 0) return ret; 243 | } 244 | break; 245 | case 2: 246 | { int ret = slice_from_s(z, 1, s_4); 247 | if (ret < 0) return ret; 248 | } 249 | break; 250 | case 3: 251 | { int ret = slice_from_s(z, 1, s_5); 252 | if (ret < 0) return ret; 253 | } 254 | break; 255 | case 4: 256 | { int ret = slice_from_s(z, 1, s_6); 257 | if (ret < 0) return ret; 258 | } 259 | break; 260 | case 5: 261 | if (z->c >= z->l) goto lab0; 262 | z->c++; 263 | break; 264 | } 265 | continue; 266 | lab0: 267 | z->c = c1; 268 | break; 269 | } 270 | return 1; 271 | } 272 | 273 | static int r_R1(struct SN_env * z) { 274 | if (!(z->I[2] <= z->c)) return 0; 275 | return 1; 276 | } 277 | 278 | static int r_R2(struct SN_env * z) { 279 | if (!(z->I[1] <= z->c)) return 0; 280 | return 1; 281 | } 282 | 283 | static int r_standard_suffix(struct SN_env * z) { 284 | int among_var; 285 | { int m1 = z->l - z->c; (void)m1; 286 | z->ket = z->c; 287 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; 288 | among_var = find_among_b(z, a_1, 7); 289 | if (!(among_var)) goto lab0; 290 | z->bra = z->c; 291 | { int ret = r_R1(z); 292 | if (ret == 0) goto lab0; 293 | if (ret < 0) return ret; 294 | } 295 | switch (among_var) { 296 | case 1: 297 | { int ret = slice_del(z); 298 | if (ret < 0) return ret; 299 | } 300 | break; 301 | case 2: 302 | { int ret = slice_del(z); 303 | if (ret < 0) return ret; 304 | } 305 | { int m2 = z->l - z->c; (void)m2; 306 | z->ket = z->c; 307 | if (z->c <= z->lb || z->p[z->c - 1] != 's') { z->c = z->l - m2; goto lab1; } 308 | z->c--; 309 | z->bra = z->c; 310 | if (!(eq_s_b(z, 3, s_7))) { z->c = z->l - m2; goto lab1; } 311 | { int ret = slice_del(z); 312 | if (ret < 0) return ret; 313 | } 314 | lab1: 315 | ; 316 | } 317 | break; 318 | case 3: 319 | if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0; 320 | { int ret = slice_del(z); 321 | if (ret < 0) return ret; 322 | } 323 | break; 324 | } 325 | lab0: 326 | z->c = z->l - m1; 327 | } 328 | { int m3 = z->l - z->c; (void)m3; 329 | z->ket = z->c; 330 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; 331 | among_var = find_among_b(z, a_2, 4); 332 | if (!(among_var)) goto lab2; 333 | z->bra = z->c; 334 | { int ret = r_R1(z); 335 | if (ret == 0) goto lab2; 336 | if (ret < 0) return ret; 337 | } 338 | switch (among_var) { 339 | case 1: 340 | { int ret = slice_del(z); 341 | if (ret < 0) return ret; 342 | } 343 | break; 344 | case 2: 345 | if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab2; 346 | z->c = z->c - 3; 347 | if (z->c < z->lb) goto lab2; 348 | { int ret = slice_del(z); 349 | if (ret < 0) return ret; 350 | } 351 | break; 352 | } 353 | lab2: 354 | z->c = z->l - m3; 355 | } 356 | { int m4 = z->l - z->c; (void)m4; 357 | z->ket = z->c; 358 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; 359 | among_var = find_among_b(z, a_4, 8); 360 | if (!(among_var)) goto lab3; 361 | z->bra = z->c; 362 | { int ret = r_R2(z); 363 | if (ret == 0) goto lab3; 364 | if (ret < 0) return ret; 365 | } 366 | switch (among_var) { 367 | case 1: 368 | { int ret = slice_del(z); 369 | if (ret < 0) return ret; 370 | } 371 | { int m5 = z->l - z->c; (void)m5; 372 | z->ket = z->c; 373 | if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m5; goto lab4; } 374 | z->bra = z->c; 375 | { int m6 = z->l - z->c; (void)m6; 376 | if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab5; 377 | z->c--; 378 | { z->c = z->l - m5; goto lab4; } 379 | lab5: 380 | z->c = z->l - m6; 381 | } 382 | { int ret = r_R2(z); 383 | if (ret == 0) { z->c = z->l - m5; goto lab4; } 384 | if (ret < 0) return ret; 385 | } 386 | { int ret = slice_del(z); 387 | if (ret < 0) return ret; 388 | } 389 | lab4: 390 | ; 391 | } 392 | break; 393 | case 2: 394 | { int m7 = z->l - z->c; (void)m7; 395 | if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab6; 396 | z->c--; 397 | goto lab3; 398 | lab6: 399 | z->c = z->l - m7; 400 | } 401 | { int ret = slice_del(z); 402 | if (ret < 0) return ret; 403 | } 404 | break; 405 | case 3: 406 | { int ret = slice_del(z); 407 | if (ret < 0) return ret; 408 | } 409 | { int m8 = z->l - z->c; (void)m8; 410 | z->ket = z->c; 411 | { int m9 = z->l - z->c; (void)m9; 412 | if (!(eq_s_b(z, 2, s_9))) goto lab9; 413 | goto lab8; 414 | lab9: 415 | z->c = z->l - m9; 416 | if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m8; goto lab7; } 417 | } 418 | lab8: 419 | z->bra = z->c; 420 | { int ret = r_R1(z); 421 | if (ret == 0) { z->c = z->l - m8; goto lab7; } 422 | if (ret < 0) return ret; 423 | } 424 | { int ret = slice_del(z); 425 | if (ret < 0) return ret; 426 | } 427 | lab7: 428 | ; 429 | } 430 | break; 431 | case 4: 432 | { int ret = slice_del(z); 433 | if (ret < 0) return ret; 434 | } 435 | { int m10 = z->l - z->c; (void)m10; 436 | z->ket = z->c; 437 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m10; goto lab10; } 438 | if (!(find_among_b(z, a_3, 2))) { z->c = z->l - m10; goto lab10; } 439 | z->bra = z->c; 440 | { int ret = r_R2(z); 441 | if (ret == 0) { z->c = z->l - m10; goto lab10; } 442 | if (ret < 0) return ret; 443 | } 444 | { int ret = slice_del(z); 445 | if (ret < 0) return ret; 446 | } 447 | lab10: 448 | ; 449 | } 450 | break; 451 | } 452 | lab3: 453 | z->c = z->l - m4; 454 | } 455 | return 1; 456 | } 457 | 458 | extern int german_ISO_8859_1_stem(struct SN_env * z) { 459 | { int c1 = z->c; 460 | { int ret = r_prelude(z); 461 | if (ret < 0) return ret; 462 | } 463 | z->c = c1; 464 | } 465 | { int c2 = z->c; 466 | { int ret = r_mark_regions(z); 467 | if (ret < 0) return ret; 468 | } 469 | z->c = c2; 470 | } 471 | z->lb = z->c; z->c = z->l; 472 | 473 | 474 | { int ret = r_standard_suffix(z); 475 | if (ret < 0) return ret; 476 | } 477 | z->c = z->lb; 478 | { int c3 = z->c; 479 | { int ret = r_postlude(z); 480 | if (ret < 0) return ret; 481 | } 482 | z->c = c3; 483 | } 484 | return 1; 485 | } 486 | 487 | extern struct SN_env * german_ISO_8859_1_create_env(void) { return SN_create_env(0, 3); } 488 | 489 | extern void german_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 490 | 491 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_german.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * german_ISO_8859_1_create_env(void); 8 | extern void german_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int german_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_indonesian.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int indonesian_ISO_8859_1_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_VOWEL(struct SN_env * z); 13 | static int r_SUFFIX_I_OK(struct SN_env * z); 14 | static int r_SUFFIX_AN_OK(struct SN_env * z); 15 | static int r_SUFFIX_KAN_OK(struct SN_env * z); 16 | static int r_KER(struct SN_env * z); 17 | static int r_remove_suffix(struct SN_env * z); 18 | static int r_remove_second_order_prefix(struct SN_env * z); 19 | static int r_remove_first_order_prefix(struct SN_env * z); 20 | static int r_remove_possessive_pronoun(struct SN_env * z); 21 | static int r_remove_particle(struct SN_env * z); 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | 27 | extern struct SN_env * indonesian_ISO_8859_1_create_env(void); 28 | extern void indonesian_ISO_8859_1_close_env(struct SN_env * z); 29 | 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | static const symbol s_0_0[3] = { 'k', 'a', 'h' }; 35 | static const symbol s_0_1[3] = { 'l', 'a', 'h' }; 36 | static const symbol s_0_2[3] = { 'p', 'u', 'n' }; 37 | 38 | static const struct among a_0[3] = 39 | { 40 | { 3, s_0_0, -1, 1, 0}, 41 | { 3, s_0_1, -1, 1, 0}, 42 | { 3, s_0_2, -1, 1, 0} 43 | }; 44 | 45 | static const symbol s_1_0[3] = { 'n', 'y', 'a' }; 46 | static const symbol s_1_1[2] = { 'k', 'u' }; 47 | static const symbol s_1_2[2] = { 'm', 'u' }; 48 | 49 | static const struct among a_1[3] = 50 | { 51 | { 3, s_1_0, -1, 1, 0}, 52 | { 2, s_1_1, -1, 1, 0}, 53 | { 2, s_1_2, -1, 1, 0} 54 | }; 55 | 56 | static const symbol s_2_0[1] = { 'i' }; 57 | static const symbol s_2_1[2] = { 'a', 'n' }; 58 | static const symbol s_2_2[3] = { 'k', 'a', 'n' }; 59 | 60 | static const struct among a_2[3] = 61 | { 62 | { 1, s_2_0, -1, 1, r_SUFFIX_I_OK}, 63 | { 2, s_2_1, -1, 1, r_SUFFIX_AN_OK}, 64 | { 3, s_2_2, 1, 1, r_SUFFIX_KAN_OK} 65 | }; 66 | 67 | static const symbol s_3_0[2] = { 'd', 'i' }; 68 | static const symbol s_3_1[2] = { 'k', 'e' }; 69 | static const symbol s_3_2[2] = { 'm', 'e' }; 70 | static const symbol s_3_3[3] = { 'm', 'e', 'm' }; 71 | static const symbol s_3_4[3] = { 'm', 'e', 'n' }; 72 | static const symbol s_3_5[4] = { 'm', 'e', 'n', 'g' }; 73 | static const symbol s_3_6[4] = { 'm', 'e', 'n', 'y' }; 74 | static const symbol s_3_7[3] = { 'p', 'e', 'm' }; 75 | static const symbol s_3_8[3] = { 'p', 'e', 'n' }; 76 | static const symbol s_3_9[4] = { 'p', 'e', 'n', 'g' }; 77 | static const symbol s_3_10[4] = { 'p', 'e', 'n', 'y' }; 78 | static const symbol s_3_11[3] = { 't', 'e', 'r' }; 79 | 80 | static const struct among a_3[12] = 81 | { 82 | { 2, s_3_0, -1, 1, 0}, 83 | { 2, s_3_1, -1, 2, 0}, 84 | { 2, s_3_2, -1, 1, 0}, 85 | { 3, s_3_3, 2, 5, 0}, 86 | { 3, s_3_4, 2, 1, 0}, 87 | { 4, s_3_5, 4, 1, 0}, 88 | { 4, s_3_6, 4, 3, r_VOWEL}, 89 | { 3, s_3_7, -1, 6, 0}, 90 | { 3, s_3_8, -1, 2, 0}, 91 | { 4, s_3_9, 8, 2, 0}, 92 | { 4, s_3_10, 8, 4, r_VOWEL}, 93 | { 3, s_3_11, -1, 1, 0} 94 | }; 95 | 96 | static const symbol s_4_0[2] = { 'b', 'e' }; 97 | static const symbol s_4_1[7] = { 'b', 'e', 'l', 'a', 'j', 'a', 'r' }; 98 | static const symbol s_4_2[3] = { 'b', 'e', 'r' }; 99 | static const symbol s_4_3[2] = { 'p', 'e' }; 100 | static const symbol s_4_4[7] = { 'p', 'e', 'l', 'a', 'j', 'a', 'r' }; 101 | static const symbol s_4_5[3] = { 'p', 'e', 'r' }; 102 | 103 | static const struct among a_4[6] = 104 | { 105 | { 2, s_4_0, -1, 3, r_KER}, 106 | { 7, s_4_1, 0, 4, 0}, 107 | { 3, s_4_2, 0, 3, 0}, 108 | { 2, s_4_3, -1, 1, 0}, 109 | { 7, s_4_4, 3, 2, 0}, 110 | { 3, s_4_5, 3, 1, 0} 111 | }; 112 | 113 | static const unsigned char g_vowel[] = { 17, 65, 16 }; 114 | 115 | static const symbol s_0[] = { 'e', 'r' }; 116 | static const symbol s_1[] = { 's' }; 117 | static const symbol s_2[] = { 's' }; 118 | static const symbol s_3[] = { 'p' }; 119 | static const symbol s_4[] = { 'p' }; 120 | static const symbol s_5[] = { 'a', 'j', 'a', 'r' }; 121 | static const symbol s_6[] = { 'a', 'j', 'a', 'r' }; 122 | 123 | static int r_remove_particle(struct SN_env * z) { 124 | z->ket = z->c; 125 | if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 104 && z->p[z->c - 1] != 110)) return 0; 126 | if (!(find_among_b(z, a_0, 3))) return 0; 127 | z->bra = z->c; 128 | { int ret = slice_del(z); 129 | if (ret < 0) return ret; 130 | } 131 | z->I[1] -= 1; 132 | return 1; 133 | } 134 | 135 | static int r_remove_possessive_pronoun(struct SN_env * z) { 136 | z->ket = z->c; 137 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 117)) return 0; 138 | if (!(find_among_b(z, a_1, 3))) return 0; 139 | z->bra = z->c; 140 | { int ret = slice_del(z); 141 | if (ret < 0) return ret; 142 | } 143 | z->I[1] -= 1; 144 | return 1; 145 | } 146 | 147 | static int r_SUFFIX_KAN_OK(struct SN_env * z) { 148 | 149 | if (!(z->I[0] != 3)) return 0; 150 | if (!(z->I[0] != 2)) return 0; 151 | return 1; 152 | } 153 | 154 | static int r_SUFFIX_AN_OK(struct SN_env * z) { 155 | if (!(z->I[0] != 1)) return 0; 156 | return 1; 157 | } 158 | 159 | static int r_SUFFIX_I_OK(struct SN_env * z) { 160 | if (!(z->I[0] <= 2)) return 0; 161 | { int m1 = z->l - z->c; (void)m1; 162 | if (z->c <= z->lb || z->p[z->c - 1] != 's') goto lab0; 163 | z->c--; 164 | return 0; 165 | lab0: 166 | z->c = z->l - m1; 167 | } 168 | return 1; 169 | } 170 | 171 | static int r_remove_suffix(struct SN_env * z) { 172 | z->ket = z->c; 173 | if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 110)) return 0; 174 | if (!(find_among_b(z, a_2, 3))) return 0; 175 | z->bra = z->c; 176 | { int ret = slice_del(z); 177 | if (ret < 0) return ret; 178 | } 179 | z->I[1] -= 1; 180 | return 1; 181 | } 182 | 183 | static int r_VOWEL(struct SN_env * z) { 184 | if (in_grouping(z, g_vowel, 97, 117, 0)) return 0; 185 | return 1; 186 | } 187 | 188 | static int r_KER(struct SN_env * z) { 189 | if (out_grouping(z, g_vowel, 97, 117, 0)) return 0; 190 | if (!(eq_s(z, 2, s_0))) return 0; 191 | return 1; 192 | } 193 | 194 | static int r_remove_first_order_prefix(struct SN_env * z) { 195 | int among_var; 196 | z->bra = z->c; 197 | if (z->c + 1 >= z->l || (z->p[z->c + 1] != 105 && z->p[z->c + 1] != 101)) return 0; 198 | among_var = find_among(z, a_3, 12); 199 | if (!(among_var)) return 0; 200 | z->ket = z->c; 201 | switch (among_var) { 202 | case 1: 203 | { int ret = slice_del(z); 204 | if (ret < 0) return ret; 205 | } 206 | z->I[0] = 1; 207 | z->I[1] -= 1; 208 | break; 209 | case 2: 210 | { int ret = slice_del(z); 211 | if (ret < 0) return ret; 212 | } 213 | z->I[0] = 3; 214 | z->I[1] -= 1; 215 | break; 216 | case 3: 217 | z->I[0] = 1; 218 | { int ret = slice_from_s(z, 1, s_1); 219 | if (ret < 0) return ret; 220 | } 221 | z->I[1] -= 1; 222 | break; 223 | case 4: 224 | z->I[0] = 3; 225 | { int ret = slice_from_s(z, 1, s_2); 226 | if (ret < 0) return ret; 227 | } 228 | z->I[1] -= 1; 229 | break; 230 | case 5: 231 | z->I[0] = 1; 232 | z->I[1] -= 1; 233 | { int c1 = z->c; 234 | { int c2 = z->c; 235 | if (in_grouping(z, g_vowel, 97, 117, 0)) goto lab1; 236 | z->c = c2; 237 | { int ret = slice_from_s(z, 1, s_3); 238 | if (ret < 0) return ret; 239 | } 240 | } 241 | goto lab0; 242 | lab1: 243 | z->c = c1; 244 | { int ret = slice_del(z); 245 | if (ret < 0) return ret; 246 | } 247 | } 248 | lab0: 249 | break; 250 | case 6: 251 | z->I[0] = 3; 252 | z->I[1] -= 1; 253 | { int c3 = z->c; 254 | { int c4 = z->c; 255 | if (in_grouping(z, g_vowel, 97, 117, 0)) goto lab3; 256 | z->c = c4; 257 | { int ret = slice_from_s(z, 1, s_4); 258 | if (ret < 0) return ret; 259 | } 260 | } 261 | goto lab2; 262 | lab3: 263 | z->c = c3; 264 | { int ret = slice_del(z); 265 | if (ret < 0) return ret; 266 | } 267 | } 268 | lab2: 269 | break; 270 | } 271 | return 1; 272 | } 273 | 274 | static int r_remove_second_order_prefix(struct SN_env * z) { 275 | int among_var; 276 | z->bra = z->c; 277 | if (z->c + 1 >= z->l || z->p[z->c + 1] != 101) return 0; 278 | among_var = find_among(z, a_4, 6); 279 | if (!(among_var)) return 0; 280 | z->ket = z->c; 281 | switch (among_var) { 282 | case 1: 283 | { int ret = slice_del(z); 284 | if (ret < 0) return ret; 285 | } 286 | z->I[0] = 2; 287 | z->I[1] -= 1; 288 | break; 289 | case 2: 290 | { int ret = slice_from_s(z, 4, s_5); 291 | if (ret < 0) return ret; 292 | } 293 | z->I[1] -= 1; 294 | break; 295 | case 3: 296 | { int ret = slice_del(z); 297 | if (ret < 0) return ret; 298 | } 299 | z->I[0] = 4; 300 | z->I[1] -= 1; 301 | break; 302 | case 4: 303 | { int ret = slice_from_s(z, 4, s_6); 304 | if (ret < 0) return ret; 305 | } 306 | z->I[0] = 4; 307 | z->I[1] -= 1; 308 | break; 309 | } 310 | return 1; 311 | } 312 | 313 | extern int indonesian_ISO_8859_1_stem(struct SN_env * z) { 314 | z->I[1] = 0; 315 | { int c1 = z->c; 316 | while(1) { 317 | int c2 = z->c; 318 | { 319 | int ret = out_grouping(z, g_vowel, 97, 117, 1); 320 | if (ret < 0) goto lab1; 321 | z->c += ret; 322 | } 323 | z->I[1] += 1; 324 | continue; 325 | lab1: 326 | z->c = c2; 327 | break; 328 | } 329 | z->c = c1; 330 | } 331 | if (!(z->I[1] > 2)) return 0; 332 | z->I[0] = 0; 333 | z->lb = z->c; z->c = z->l; 334 | 335 | { int m3 = z->l - z->c; (void)m3; 336 | { int ret = r_remove_particle(z); 337 | if (ret < 0) return ret; 338 | } 339 | z->c = z->l - m3; 340 | } 341 | if (!(z->I[1] > 2)) return 0; 342 | { int m4 = z->l - z->c; (void)m4; 343 | { int ret = r_remove_possessive_pronoun(z); 344 | if (ret < 0) return ret; 345 | } 346 | z->c = z->l - m4; 347 | } 348 | z->c = z->lb; 349 | if (!(z->I[1] > 2)) return 0; 350 | { int c5 = z->c; 351 | { int c_test6 = z->c; 352 | { int ret = r_remove_first_order_prefix(z); 353 | if (ret == 0) goto lab3; 354 | if (ret < 0) return ret; 355 | } 356 | { int c7 = z->c; 357 | { int c_test8 = z->c; 358 | if (!(z->I[1] > 2)) goto lab4; 359 | z->lb = z->c; z->c = z->l; 360 | 361 | { int ret = r_remove_suffix(z); 362 | if (ret == 0) goto lab4; 363 | if (ret < 0) return ret; 364 | } 365 | z->c = z->lb; 366 | z->c = c_test8; 367 | } 368 | if (!(z->I[1] > 2)) goto lab4; 369 | { int ret = r_remove_second_order_prefix(z); 370 | if (ret == 0) goto lab4; 371 | if (ret < 0) return ret; 372 | } 373 | lab4: 374 | z->c = c7; 375 | } 376 | z->c = c_test6; 377 | } 378 | goto lab2; 379 | lab3: 380 | z->c = c5; 381 | { int c9 = z->c; 382 | { int ret = r_remove_second_order_prefix(z); 383 | if (ret < 0) return ret; 384 | } 385 | z->c = c9; 386 | } 387 | { int c10 = z->c; 388 | if (!(z->I[1] > 2)) goto lab5; 389 | z->lb = z->c; z->c = z->l; 390 | 391 | { int ret = r_remove_suffix(z); 392 | if (ret == 0) goto lab5; 393 | if (ret < 0) return ret; 394 | } 395 | z->c = z->lb; 396 | lab5: 397 | z->c = c10; 398 | } 399 | } 400 | lab2: 401 | return 1; 402 | } 403 | 404 | extern struct SN_env * indonesian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2); } 405 | 406 | extern void indonesian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 407 | 408 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_indonesian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * indonesian_ISO_8859_1_create_env(void); 8 | extern void indonesian_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int indonesian_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_irish.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int irish_ISO_8859_1_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_verb_sfx(struct SN_env * z); 13 | static int r_deriv(struct SN_env * z); 14 | static int r_noun_sfx(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | static int r_initial_morph(struct SN_env * z); 17 | static int r_RV(struct SN_env * z); 18 | static int r_R2(struct SN_env * z); 19 | static int r_R1(struct SN_env * z); 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | 25 | extern struct SN_env * irish_ISO_8859_1_create_env(void); 26 | extern void irish_ISO_8859_1_close_env(struct SN_env * z); 27 | 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | static const symbol s_0_0[2] = { 'b', '\'' }; 33 | static const symbol s_0_1[2] = { 'b', 'h' }; 34 | static const symbol s_0_2[3] = { 'b', 'h', 'f' }; 35 | static const symbol s_0_3[2] = { 'b', 'p' }; 36 | static const symbol s_0_4[2] = { 'c', 'h' }; 37 | static const symbol s_0_5[2] = { 'd', '\'' }; 38 | static const symbol s_0_6[4] = { 'd', '\'', 'f', 'h' }; 39 | static const symbol s_0_7[2] = { 'd', 'h' }; 40 | static const symbol s_0_8[2] = { 'd', 't' }; 41 | static const symbol s_0_9[2] = { 'f', 'h' }; 42 | static const symbol s_0_10[2] = { 'g', 'c' }; 43 | static const symbol s_0_11[2] = { 'g', 'h' }; 44 | static const symbol s_0_12[2] = { 'h', '-' }; 45 | static const symbol s_0_13[2] = { 'm', '\'' }; 46 | static const symbol s_0_14[2] = { 'm', 'b' }; 47 | static const symbol s_0_15[2] = { 'm', 'h' }; 48 | static const symbol s_0_16[2] = { 'n', '-' }; 49 | static const symbol s_0_17[2] = { 'n', 'd' }; 50 | static const symbol s_0_18[2] = { 'n', 'g' }; 51 | static const symbol s_0_19[2] = { 'p', 'h' }; 52 | static const symbol s_0_20[2] = { 's', 'h' }; 53 | static const symbol s_0_21[2] = { 't', '-' }; 54 | static const symbol s_0_22[2] = { 't', 'h' }; 55 | static const symbol s_0_23[2] = { 't', 's' }; 56 | 57 | static const struct among a_0[24] = 58 | { 59 | { 2, s_0_0, -1, 1, 0}, 60 | { 2, s_0_1, -1, 4, 0}, 61 | { 3, s_0_2, 1, 2, 0}, 62 | { 2, s_0_3, -1, 8, 0}, 63 | { 2, s_0_4, -1, 5, 0}, 64 | { 2, s_0_5, -1, 1, 0}, 65 | { 4, s_0_6, 5, 2, 0}, 66 | { 2, s_0_7, -1, 6, 0}, 67 | { 2, s_0_8, -1, 9, 0}, 68 | { 2, s_0_9, -1, 2, 0}, 69 | { 2, s_0_10, -1, 5, 0}, 70 | { 2, s_0_11, -1, 7, 0}, 71 | { 2, s_0_12, -1, 1, 0}, 72 | { 2, s_0_13, -1, 1, 0}, 73 | { 2, s_0_14, -1, 4, 0}, 74 | { 2, s_0_15, -1, 10, 0}, 75 | { 2, s_0_16, -1, 1, 0}, 76 | { 2, s_0_17, -1, 6, 0}, 77 | { 2, s_0_18, -1, 7, 0}, 78 | { 2, s_0_19, -1, 8, 0}, 79 | { 2, s_0_20, -1, 3, 0}, 80 | { 2, s_0_21, -1, 1, 0}, 81 | { 2, s_0_22, -1, 9, 0}, 82 | { 2, s_0_23, -1, 3, 0} 83 | }; 84 | 85 | static const symbol s_1_0[6] = { 0xED, 'o', 'c', 'h', 't', 'a' }; 86 | static const symbol s_1_1[7] = { 'a', 0xED, 'o', 'c', 'h', 't', 'a' }; 87 | static const symbol s_1_2[3] = { 'i', 'r', 'e' }; 88 | static const symbol s_1_3[4] = { 'a', 'i', 'r', 'e' }; 89 | static const symbol s_1_4[3] = { 'a', 'b', 'h' }; 90 | static const symbol s_1_5[4] = { 'e', 'a', 'b', 'h' }; 91 | static const symbol s_1_6[3] = { 'i', 'b', 'h' }; 92 | static const symbol s_1_7[4] = { 'a', 'i', 'b', 'h' }; 93 | static const symbol s_1_8[3] = { 'a', 'm', 'h' }; 94 | static const symbol s_1_9[4] = { 'e', 'a', 'm', 'h' }; 95 | static const symbol s_1_10[3] = { 'i', 'm', 'h' }; 96 | static const symbol s_1_11[4] = { 'a', 'i', 'm', 'h' }; 97 | static const symbol s_1_12[5] = { 0xED, 'o', 'c', 'h', 't' }; 98 | static const symbol s_1_13[6] = { 'a', 0xED, 'o', 'c', 'h', 't' }; 99 | static const symbol s_1_14[3] = { 'i', 'r', 0xED }; 100 | static const symbol s_1_15[4] = { 'a', 'i', 'r', 0xED }; 101 | 102 | static const struct among a_1[16] = 103 | { 104 | { 6, s_1_0, -1, 1, 0}, 105 | { 7, s_1_1, 0, 1, 0}, 106 | { 3, s_1_2, -1, 2, 0}, 107 | { 4, s_1_3, 2, 2, 0}, 108 | { 3, s_1_4, -1, 1, 0}, 109 | { 4, s_1_5, 4, 1, 0}, 110 | { 3, s_1_6, -1, 1, 0}, 111 | { 4, s_1_7, 6, 1, 0}, 112 | { 3, s_1_8, -1, 1, 0}, 113 | { 4, s_1_9, 8, 1, 0}, 114 | { 3, s_1_10, -1, 1, 0}, 115 | { 4, s_1_11, 10, 1, 0}, 116 | { 5, s_1_12, -1, 1, 0}, 117 | { 6, s_1_13, 12, 1, 0}, 118 | { 3, s_1_14, -1, 2, 0}, 119 | { 4, s_1_15, 14, 2, 0} 120 | }; 121 | 122 | static const symbol s_2_0[8] = { 0xF3, 'i', 'd', 'e', 'a', 'c', 'h', 'a' }; 123 | static const symbol s_2_1[7] = { 'p', 'a', 't', 'a', 'c', 'h', 'a' }; 124 | static const symbol s_2_2[5] = { 'a', 'c', 'h', 't', 'a' }; 125 | static const symbol s_2_3[8] = { 'a', 'r', 'c', 'a', 'c', 'h', 't', 'a' }; 126 | static const symbol s_2_4[6] = { 'e', 'a', 'c', 'h', 't', 'a' }; 127 | static const symbol s_2_5[11] = { 'g', 'r', 'a', 'f', 'a', 0xED, 'o', 'c', 'h', 't', 'a' }; 128 | static const symbol s_2_6[5] = { 'p', 'a', 'i', 't', 'e' }; 129 | static const symbol s_2_7[3] = { 'a', 'c', 'h' }; 130 | static const symbol s_2_8[4] = { 'e', 'a', 'c', 'h' }; 131 | static const symbol s_2_9[7] = { 0xF3, 'i', 'd', 'e', 'a', 'c', 'h' }; 132 | static const symbol s_2_10[7] = { 'g', 'i', 'n', 'e', 'a', 'c', 'h' }; 133 | static const symbol s_2_11[6] = { 'p', 'a', 't', 'a', 'c', 'h' }; 134 | static const symbol s_2_12[9] = { 'g', 'r', 'a', 'f', 'a', 0xED, 'o', 'c', 'h' }; 135 | static const symbol s_2_13[7] = { 'p', 'a', 't', 'a', 'i', 'g', 'h' }; 136 | static const symbol s_2_14[6] = { 0xF3, 'i', 'd', 'i', 'g', 'h' }; 137 | static const symbol s_2_15[7] = { 'a', 'c', 'h', 't', 0xFA, 'i', 'l' }; 138 | static const symbol s_2_16[8] = { 'e', 'a', 'c', 'h', 't', 0xFA, 'i', 'l' }; 139 | static const symbol s_2_17[6] = { 'g', 'i', 'n', 'e', 'a', 's' }; 140 | static const symbol s_2_18[5] = { 'g', 'i', 'n', 'i', 's' }; 141 | static const symbol s_2_19[4] = { 'a', 'c', 'h', 't' }; 142 | static const symbol s_2_20[7] = { 'a', 'r', 'c', 'a', 'c', 'h', 't' }; 143 | static const symbol s_2_21[5] = { 'e', 'a', 'c', 'h', 't' }; 144 | static const symbol s_2_22[10] = { 'g', 'r', 'a', 'f', 'a', 0xED, 'o', 'c', 'h', 't' }; 145 | static const symbol s_2_23[9] = { 'a', 'r', 'c', 'a', 'c', 'h', 't', 'a', 0xED }; 146 | static const symbol s_2_24[12] = { 'g', 'r', 'a', 'f', 'a', 0xED, 'o', 'c', 'h', 't', 'a', 0xED }; 147 | 148 | static const struct among a_2[25] = 149 | { 150 | { 8, s_2_0, -1, 6, 0}, 151 | { 7, s_2_1, -1, 5, 0}, 152 | { 5, s_2_2, -1, 1, 0}, 153 | { 8, s_2_3, 2, 2, 0}, 154 | { 6, s_2_4, 2, 1, 0}, 155 | { 11, s_2_5, -1, 4, 0}, 156 | { 5, s_2_6, -1, 5, 0}, 157 | { 3, s_2_7, -1, 1, 0}, 158 | { 4, s_2_8, 7, 1, 0}, 159 | { 7, s_2_9, 8, 6, 0}, 160 | { 7, s_2_10, 8, 3, 0}, 161 | { 6, s_2_11, 7, 5, 0}, 162 | { 9, s_2_12, -1, 4, 0}, 163 | { 7, s_2_13, -1, 5, 0}, 164 | { 6, s_2_14, -1, 6, 0}, 165 | { 7, s_2_15, -1, 1, 0}, 166 | { 8, s_2_16, 15, 1, 0}, 167 | { 6, s_2_17, -1, 3, 0}, 168 | { 5, s_2_18, -1, 3, 0}, 169 | { 4, s_2_19, -1, 1, 0}, 170 | { 7, s_2_20, 19, 2, 0}, 171 | { 5, s_2_21, 19, 1, 0}, 172 | { 10, s_2_22, -1, 4, 0}, 173 | { 9, s_2_23, -1, 2, 0}, 174 | { 12, s_2_24, -1, 4, 0} 175 | }; 176 | 177 | static const symbol s_3_0[4] = { 'i', 'm', 'i', 'd' }; 178 | static const symbol s_3_1[5] = { 'a', 'i', 'm', 'i', 'd' }; 179 | static const symbol s_3_2[4] = { 0xED, 'm', 'i', 'd' }; 180 | static const symbol s_3_3[5] = { 'a', 0xED, 'm', 'i', 'd' }; 181 | static const symbol s_3_4[3] = { 'a', 'd', 'h' }; 182 | static const symbol s_3_5[4] = { 'e', 'a', 'd', 'h' }; 183 | static const symbol s_3_6[5] = { 'f', 'a', 'i', 'd', 'h' }; 184 | static const symbol s_3_7[4] = { 'f', 'i', 'd', 'h' }; 185 | static const symbol s_3_8[3] = { 0xE1, 'i', 'l' }; 186 | static const symbol s_3_9[3] = { 'a', 'i', 'n' }; 187 | static const symbol s_3_10[4] = { 't', 'e', 'a', 'r' }; 188 | static const symbol s_3_11[3] = { 't', 'a', 'r' }; 189 | 190 | static const struct among a_3[12] = 191 | { 192 | { 4, s_3_0, -1, 1, 0}, 193 | { 5, s_3_1, 0, 1, 0}, 194 | { 4, s_3_2, -1, 1, 0}, 195 | { 5, s_3_3, 2, 1, 0}, 196 | { 3, s_3_4, -1, 2, 0}, 197 | { 4, s_3_5, 4, 2, 0}, 198 | { 5, s_3_6, -1, 1, 0}, 199 | { 4, s_3_7, -1, 1, 0}, 200 | { 3, s_3_8, -1, 2, 0}, 201 | { 3, s_3_9, -1, 2, 0}, 202 | { 4, s_3_10, -1, 2, 0}, 203 | { 3, s_3_11, -1, 2, 0} 204 | }; 205 | 206 | static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 }; 207 | 208 | static const symbol s_0[] = { 'f' }; 209 | static const symbol s_1[] = { 's' }; 210 | static const symbol s_2[] = { 'b' }; 211 | static const symbol s_3[] = { 'c' }; 212 | static const symbol s_4[] = { 'd' }; 213 | static const symbol s_5[] = { 'g' }; 214 | static const symbol s_6[] = { 'p' }; 215 | static const symbol s_7[] = { 't' }; 216 | static const symbol s_8[] = { 'm' }; 217 | static const symbol s_9[] = { 'a', 'r', 'c' }; 218 | static const symbol s_10[] = { 'g', 'i', 'n' }; 219 | static const symbol s_11[] = { 'g', 'r', 'a', 'f' }; 220 | static const symbol s_12[] = { 'p', 'a', 'i', 't', 'e' }; 221 | static const symbol s_13[] = { 0xF3, 'i', 'd' }; 222 | 223 | static int r_mark_regions(struct SN_env * z) { 224 | z->I[2] = z->l; 225 | z->I[1] = z->l; 226 | z->I[0] = z->l; 227 | { int c1 = z->c; 228 | { 229 | int ret = out_grouping(z, g_v, 97, 250, 1); 230 | if (ret < 0) goto lab0; 231 | z->c += ret; 232 | } 233 | z->I[2] = z->c; 234 | { 235 | int ret = in_grouping(z, g_v, 97, 250, 1); 236 | if (ret < 0) goto lab0; 237 | z->c += ret; 238 | } 239 | z->I[1] = z->c; 240 | { 241 | int ret = out_grouping(z, g_v, 97, 250, 1); 242 | if (ret < 0) goto lab0; 243 | z->c += ret; 244 | } 245 | { 246 | int ret = in_grouping(z, g_v, 97, 250, 1); 247 | if (ret < 0) goto lab0; 248 | z->c += ret; 249 | } 250 | z->I[0] = z->c; 251 | lab0: 252 | z->c = c1; 253 | } 254 | return 1; 255 | } 256 | 257 | static int r_initial_morph(struct SN_env * z) { 258 | int among_var; 259 | z->bra = z->c; 260 | among_var = find_among(z, a_0, 24); 261 | if (!(among_var)) return 0; 262 | z->ket = z->c; 263 | switch (among_var) { 264 | case 1: 265 | { int ret = slice_del(z); 266 | if (ret < 0) return ret; 267 | } 268 | break; 269 | case 2: 270 | { int ret = slice_from_s(z, 1, s_0); 271 | if (ret < 0) return ret; 272 | } 273 | break; 274 | case 3: 275 | { int ret = slice_from_s(z, 1, s_1); 276 | if (ret < 0) return ret; 277 | } 278 | break; 279 | case 4: 280 | { int ret = slice_from_s(z, 1, s_2); 281 | if (ret < 0) return ret; 282 | } 283 | break; 284 | case 5: 285 | { int ret = slice_from_s(z, 1, s_3); 286 | if (ret < 0) return ret; 287 | } 288 | break; 289 | case 6: 290 | { int ret = slice_from_s(z, 1, s_4); 291 | if (ret < 0) return ret; 292 | } 293 | break; 294 | case 7: 295 | { int ret = slice_from_s(z, 1, s_5); 296 | if (ret < 0) return ret; 297 | } 298 | break; 299 | case 8: 300 | { int ret = slice_from_s(z, 1, s_6); 301 | if (ret < 0) return ret; 302 | } 303 | break; 304 | case 9: 305 | { int ret = slice_from_s(z, 1, s_7); 306 | if (ret < 0) return ret; 307 | } 308 | break; 309 | case 10: 310 | { int ret = slice_from_s(z, 1, s_8); 311 | if (ret < 0) return ret; 312 | } 313 | break; 314 | } 315 | return 1; 316 | } 317 | 318 | static int r_RV(struct SN_env * z) { 319 | if (!(z->I[2] <= z->c)) return 0; 320 | return 1; 321 | } 322 | 323 | static int r_R1(struct SN_env * z) { 324 | if (!(z->I[1] <= z->c)) return 0; 325 | return 1; 326 | } 327 | 328 | static int r_R2(struct SN_env * z) { 329 | if (!(z->I[0] <= z->c)) return 0; 330 | return 1; 331 | } 332 | 333 | static int r_noun_sfx(struct SN_env * z) { 334 | int among_var; 335 | z->ket = z->c; 336 | among_var = find_among_b(z, a_1, 16); 337 | if (!(among_var)) return 0; 338 | z->bra = z->c; 339 | switch (among_var) { 340 | case 1: 341 | { int ret = r_R1(z); 342 | if (ret <= 0) return ret; 343 | } 344 | { int ret = slice_del(z); 345 | if (ret < 0) return ret; 346 | } 347 | break; 348 | case 2: 349 | { int ret = r_R2(z); 350 | if (ret <= 0) return ret; 351 | } 352 | { int ret = slice_del(z); 353 | if (ret < 0) return ret; 354 | } 355 | break; 356 | } 357 | return 1; 358 | } 359 | 360 | static int r_deriv(struct SN_env * z) { 361 | int among_var; 362 | z->ket = z->c; 363 | among_var = find_among_b(z, a_2, 25); 364 | if (!(among_var)) return 0; 365 | z->bra = z->c; 366 | switch (among_var) { 367 | case 1: 368 | { int ret = r_R2(z); 369 | if (ret <= 0) return ret; 370 | } 371 | { int ret = slice_del(z); 372 | if (ret < 0) return ret; 373 | } 374 | break; 375 | case 2: 376 | { int ret = slice_from_s(z, 3, s_9); 377 | if (ret < 0) return ret; 378 | } 379 | break; 380 | case 3: 381 | { int ret = slice_from_s(z, 3, s_10); 382 | if (ret < 0) return ret; 383 | } 384 | break; 385 | case 4: 386 | { int ret = slice_from_s(z, 4, s_11); 387 | if (ret < 0) return ret; 388 | } 389 | break; 390 | case 5: 391 | { int ret = slice_from_s(z, 5, s_12); 392 | if (ret < 0) return ret; 393 | } 394 | break; 395 | case 6: 396 | { int ret = slice_from_s(z, 3, s_13); 397 | if (ret < 0) return ret; 398 | } 399 | break; 400 | } 401 | return 1; 402 | } 403 | 404 | static int r_verb_sfx(struct SN_env * z) { 405 | int among_var; 406 | z->ket = z->c; 407 | if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((282896 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; 408 | among_var = find_among_b(z, a_3, 12); 409 | if (!(among_var)) return 0; 410 | z->bra = z->c; 411 | switch (among_var) { 412 | case 1: 413 | { int ret = r_RV(z); 414 | if (ret <= 0) return ret; 415 | } 416 | { int ret = slice_del(z); 417 | if (ret < 0) return ret; 418 | } 419 | break; 420 | case 2: 421 | { int ret = r_R1(z); 422 | if (ret <= 0) return ret; 423 | } 424 | { int ret = slice_del(z); 425 | if (ret < 0) return ret; 426 | } 427 | break; 428 | } 429 | return 1; 430 | } 431 | 432 | extern int irish_ISO_8859_1_stem(struct SN_env * z) { 433 | { int c1 = z->c; 434 | { int ret = r_initial_morph(z); 435 | if (ret < 0) return ret; 436 | } 437 | z->c = c1; 438 | } 439 | 440 | { int ret = r_mark_regions(z); 441 | if (ret < 0) return ret; 442 | } 443 | z->lb = z->c; z->c = z->l; 444 | 445 | { int m2 = z->l - z->c; (void)m2; 446 | { int ret = r_noun_sfx(z); 447 | if (ret < 0) return ret; 448 | } 449 | z->c = z->l - m2; 450 | } 451 | { int m3 = z->l - z->c; (void)m3; 452 | { int ret = r_deriv(z); 453 | if (ret < 0) return ret; 454 | } 455 | z->c = z->l - m3; 456 | } 457 | { int m4 = z->l - z->c; (void)m4; 458 | { int ret = r_verb_sfx(z); 459 | if (ret < 0) return ret; 460 | } 461 | z->c = z->l - m4; 462 | } 463 | z->c = z->lb; 464 | return 1; 465 | } 466 | 467 | extern struct SN_env * irish_ISO_8859_1_create_env(void) { return SN_create_env(0, 3); } 468 | 469 | extern void irish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 470 | 471 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_irish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * irish_ISO_8859_1_create_env(void); 8 | extern void irish_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int irish_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_italian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * italian_ISO_8859_1_create_env(void); 8 | extern void italian_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int italian_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_norwegian.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_other_suffix(struct SN_env * z); 13 | static int r_consonant_pair(struct SN_env * z); 14 | static int r_main_suffix(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | 21 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 22 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 23 | 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | static const symbol s_0_0[1] = { 'a' }; 29 | static const symbol s_0_1[1] = { 'e' }; 30 | static const symbol s_0_2[3] = { 'e', 'd', 'e' }; 31 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; 32 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; 33 | static const symbol s_0_5[3] = { 'a', 'n', 'e' }; 34 | static const symbol s_0_6[3] = { 'e', 'n', 'e' }; 35 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; 36 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; 37 | static const symbol s_0_9[2] = { 'e', 'n' }; 38 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; 39 | static const symbol s_0_11[2] = { 'a', 'r' }; 40 | static const symbol s_0_12[2] = { 'e', 'r' }; 41 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; 42 | static const symbol s_0_14[1] = { 's' }; 43 | static const symbol s_0_15[2] = { 'a', 's' }; 44 | static const symbol s_0_16[2] = { 'e', 's' }; 45 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; 46 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; 47 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; 48 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; 49 | static const symbol s_0_21[3] = { 'e', 'n', 's' }; 50 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 51 | static const symbol s_0_23[3] = { 'e', 'r', 's' }; 52 | static const symbol s_0_24[3] = { 'e', 't', 's' }; 53 | static const symbol s_0_25[2] = { 'e', 't' }; 54 | static const symbol s_0_26[3] = { 'h', 'e', 't' }; 55 | static const symbol s_0_27[3] = { 'e', 'r', 't' }; 56 | static const symbol s_0_28[3] = { 'a', 's', 't' }; 57 | 58 | static const struct among a_0[29] = 59 | { 60 | { 1, s_0_0, -1, 1, 0}, 61 | { 1, s_0_1, -1, 1, 0}, 62 | { 3, s_0_2, 1, 1, 0}, 63 | { 4, s_0_3, 1, 1, 0}, 64 | { 4, s_0_4, 1, 1, 0}, 65 | { 3, s_0_5, 1, 1, 0}, 66 | { 3, s_0_6, 1, 1, 0}, 67 | { 6, s_0_7, 6, 1, 0}, 68 | { 4, s_0_8, 1, 3, 0}, 69 | { 2, s_0_9, -1, 1, 0}, 70 | { 5, s_0_10, 9, 1, 0}, 71 | { 2, s_0_11, -1, 1, 0}, 72 | { 2, s_0_12, -1, 1, 0}, 73 | { 5, s_0_13, 12, 1, 0}, 74 | { 1, s_0_14, -1, 2, 0}, 75 | { 2, s_0_15, 14, 1, 0}, 76 | { 2, s_0_16, 14, 1, 0}, 77 | { 4, s_0_17, 16, 1, 0}, 78 | { 5, s_0_18, 16, 1, 0}, 79 | { 4, s_0_19, 16, 1, 0}, 80 | { 7, s_0_20, 19, 1, 0}, 81 | { 3, s_0_21, 14, 1, 0}, 82 | { 6, s_0_22, 21, 1, 0}, 83 | { 3, s_0_23, 14, 1, 0}, 84 | { 3, s_0_24, 14, 1, 0}, 85 | { 2, s_0_25, -1, 1, 0}, 86 | { 3, s_0_26, 25, 1, 0}, 87 | { 3, s_0_27, -1, 3, 0}, 88 | { 3, s_0_28, -1, 1, 0} 89 | }; 90 | 91 | static const symbol s_1_0[2] = { 'd', 't' }; 92 | static const symbol s_1_1[2] = { 'v', 't' }; 93 | 94 | static const struct among a_1[2] = 95 | { 96 | { 2, s_1_0, -1, -1, 0}, 97 | { 2, s_1_1, -1, -1, 0} 98 | }; 99 | 100 | static const symbol s_2_0[3] = { 'l', 'e', 'g' }; 101 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; 102 | static const symbol s_2_2[2] = { 'i', 'g' }; 103 | static const symbol s_2_3[3] = { 'e', 'i', 'g' }; 104 | static const symbol s_2_4[3] = { 'l', 'i', 'g' }; 105 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; 106 | static const symbol s_2_6[3] = { 'e', 'l', 's' }; 107 | static const symbol s_2_7[3] = { 'l', 'o', 'v' }; 108 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; 109 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; 110 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; 111 | 112 | static const struct among a_2[11] = 113 | { 114 | { 3, s_2_0, -1, 1, 0}, 115 | { 4, s_2_1, 0, 1, 0}, 116 | { 2, s_2_2, -1, 1, 0}, 117 | { 3, s_2_3, 2, 1, 0}, 118 | { 3, s_2_4, 2, 1, 0}, 119 | { 4, s_2_5, 4, 1, 0}, 120 | { 3, s_2_6, -1, 1, 0}, 121 | { 3, s_2_7, -1, 1, 0}, 122 | { 4, s_2_8, 7, 1, 0}, 123 | { 4, s_2_9, 7, 1, 0}, 124 | { 7, s_2_10, 9, 1, 0} 125 | }; 126 | 127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 128 | 129 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; 130 | 131 | static const symbol s_0[] = { 'e', 'r' }; 132 | 133 | static int r_mark_regions(struct SN_env * z) { 134 | z->I[1] = z->l; 135 | { int c_test1 = z->c; 136 | z->c = z->c + 3; 137 | if (z->c > z->l) return 0; 138 | z->I[0] = z->c; 139 | z->c = c_test1; 140 | } 141 | if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; 142 | { 143 | int ret = in_grouping(z, g_v, 97, 248, 1); 144 | if (ret < 0) return 0; 145 | z->c += ret; 146 | } 147 | z->I[1] = z->c; 148 | 149 | if (!(z->I[1] < z->I[0])) goto lab0; 150 | z->I[1] = z->I[0]; 151 | lab0: 152 | return 1; 153 | } 154 | 155 | static int r_main_suffix(struct SN_env * z) { 156 | int among_var; 157 | 158 | { int mlimit1; 159 | if (z->c < z->I[1]) return 0; 160 | mlimit1 = z->lb; z->lb = z->I[1]; 161 | z->ket = z->c; 162 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 163 | among_var = find_among_b(z, a_0, 29); 164 | if (!(among_var)) { z->lb = mlimit1; return 0; } 165 | z->bra = z->c; 166 | z->lb = mlimit1; 167 | } 168 | switch (among_var) { 169 | case 1: 170 | { int ret = slice_del(z); 171 | if (ret < 0) return ret; 172 | } 173 | break; 174 | case 2: 175 | { int m2 = z->l - z->c; (void)m2; 176 | if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1; 177 | goto lab0; 178 | lab1: 179 | z->c = z->l - m2; 180 | if (z->c <= z->lb || z->p[z->c - 1] != 'k') return 0; 181 | z->c--; 182 | if (out_grouping_b(z, g_v, 97, 248, 0)) return 0; 183 | } 184 | lab0: 185 | { int ret = slice_del(z); 186 | if (ret < 0) return ret; 187 | } 188 | break; 189 | case 3: 190 | { int ret = slice_from_s(z, 2, s_0); 191 | if (ret < 0) return ret; 192 | } 193 | break; 194 | } 195 | return 1; 196 | } 197 | 198 | static int r_consonant_pair(struct SN_env * z) { 199 | { int m_test1 = z->l - z->c; 200 | 201 | { int mlimit2; 202 | if (z->c < z->I[1]) return 0; 203 | mlimit2 = z->lb; z->lb = z->I[1]; 204 | z->ket = z->c; 205 | if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit2; return 0; } 206 | if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit2; return 0; } 207 | z->bra = z->c; 208 | z->lb = mlimit2; 209 | } 210 | z->c = z->l - m_test1; 211 | } 212 | if (z->c <= z->lb) return 0; 213 | z->c--; 214 | z->bra = z->c; 215 | { int ret = slice_del(z); 216 | if (ret < 0) return ret; 217 | } 218 | return 1; 219 | } 220 | 221 | static int r_other_suffix(struct SN_env * z) { 222 | 223 | { int mlimit1; 224 | if (z->c < z->I[1]) return 0; 225 | mlimit1 = z->lb; z->lb = z->I[1]; 226 | z->ket = z->c; 227 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 228 | if (!(find_among_b(z, a_2, 11))) { z->lb = mlimit1; return 0; } 229 | z->bra = z->c; 230 | z->lb = mlimit1; 231 | } 232 | { int ret = slice_del(z); 233 | if (ret < 0) return ret; 234 | } 235 | return 1; 236 | } 237 | 238 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z) { 239 | { int c1 = z->c; 240 | { int ret = r_mark_regions(z); 241 | if (ret < 0) return ret; 242 | } 243 | z->c = c1; 244 | } 245 | z->lb = z->c; z->c = z->l; 246 | 247 | { int m2 = z->l - z->c; (void)m2; 248 | { int ret = r_main_suffix(z); 249 | if (ret < 0) return ret; 250 | } 251 | z->c = z->l - m2; 252 | } 253 | { int m3 = z->l - z->c; (void)m3; 254 | { int ret = r_consonant_pair(z); 255 | if (ret < 0) return ret; 256 | } 257 | z->c = z->l - m3; 258 | } 259 | { int m4 = z->l - z->c; (void)m4; 260 | { int ret = r_other_suffix(z); 261 | if (ret < 0) return ret; 262 | } 263 | z->c = z->l - m4; 264 | } 265 | z->c = z->lb; 266 | return 1; 267 | } 268 | 269 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2); } 270 | 271 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 272 | 273 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_norwegian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 8 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_porter.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * porter_ISO_8859_1_create_env(void); 8 | extern void porter_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int porter_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_portuguese.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * portuguese_ISO_8859_1_create_env(void); 8 | extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int portuguese_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_spanish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * spanish_ISO_8859_1_create_env(void); 8 | extern void spanish_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int spanish_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_swedish.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_other_suffix(struct SN_env * z); 13 | static int r_consonant_pair(struct SN_env * z); 14 | static int r_main_suffix(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | 21 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 22 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 23 | 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | static const symbol s_0_0[1] = { 'a' }; 29 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; 30 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; 31 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; 32 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; 33 | static const symbol s_0_5[2] = { 'a', 'd' }; 34 | static const symbol s_0_6[1] = { 'e' }; 35 | static const symbol s_0_7[3] = { 'a', 'd', 'e' }; 36 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; 37 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; 38 | static const symbol s_0_10[3] = { 'a', 'r', 'e' }; 39 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; 40 | static const symbol s_0_12[2] = { 'e', 'n' }; 41 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; 42 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; 43 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; 44 | static const symbol s_0_16[3] = { 'e', 'r', 'n' }; 45 | static const symbol s_0_17[2] = { 'a', 'r' }; 46 | static const symbol s_0_18[2] = { 'e', 'r' }; 47 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; 48 | static const symbol s_0_20[2] = { 'o', 'r' }; 49 | static const symbol s_0_21[1] = { 's' }; 50 | static const symbol s_0_22[2] = { 'a', 's' }; 51 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; 52 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; 53 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; 54 | static const symbol s_0_26[2] = { 'e', 's' }; 55 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; 56 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; 57 | static const symbol s_0_29[3] = { 'e', 'n', 's' }; 58 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; 59 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 60 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; 61 | static const symbol s_0_33[2] = { 'a', 't' }; 62 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; 63 | static const symbol s_0_35[3] = { 'h', 'e', 't' }; 64 | static const symbol s_0_36[3] = { 'a', 's', 't' }; 65 | 66 | static const struct among a_0[37] = 67 | { 68 | { 1, s_0_0, -1, 1, 0}, 69 | { 4, s_0_1, 0, 1, 0}, 70 | { 4, s_0_2, 0, 1, 0}, 71 | { 7, s_0_3, 2, 1, 0}, 72 | { 4, s_0_4, 0, 1, 0}, 73 | { 2, s_0_5, -1, 1, 0}, 74 | { 1, s_0_6, -1, 1, 0}, 75 | { 3, s_0_7, 6, 1, 0}, 76 | { 4, s_0_8, 6, 1, 0}, 77 | { 4, s_0_9, 6, 1, 0}, 78 | { 3, s_0_10, 6, 1, 0}, 79 | { 4, s_0_11, 6, 1, 0}, 80 | { 2, s_0_12, -1, 1, 0}, 81 | { 5, s_0_13, 12, 1, 0}, 82 | { 4, s_0_14, 12, 1, 0}, 83 | { 5, s_0_15, 12, 1, 0}, 84 | { 3, s_0_16, -1, 1, 0}, 85 | { 2, s_0_17, -1, 1, 0}, 86 | { 2, s_0_18, -1, 1, 0}, 87 | { 5, s_0_19, 18, 1, 0}, 88 | { 2, s_0_20, -1, 1, 0}, 89 | { 1, s_0_21, -1, 2, 0}, 90 | { 2, s_0_22, 21, 1, 0}, 91 | { 5, s_0_23, 22, 1, 0}, 92 | { 5, s_0_24, 22, 1, 0}, 93 | { 5, s_0_25, 22, 1, 0}, 94 | { 2, s_0_26, 21, 1, 0}, 95 | { 4, s_0_27, 26, 1, 0}, 96 | { 5, s_0_28, 26, 1, 0}, 97 | { 3, s_0_29, 21, 1, 0}, 98 | { 5, s_0_30, 29, 1, 0}, 99 | { 6, s_0_31, 29, 1, 0}, 100 | { 4, s_0_32, 21, 1, 0}, 101 | { 2, s_0_33, -1, 1, 0}, 102 | { 5, s_0_34, -1, 1, 0}, 103 | { 3, s_0_35, -1, 1, 0}, 104 | { 3, s_0_36, -1, 1, 0} 105 | }; 106 | 107 | static const symbol s_1_0[2] = { 'd', 'd' }; 108 | static const symbol s_1_1[2] = { 'g', 'd' }; 109 | static const symbol s_1_2[2] = { 'n', 'n' }; 110 | static const symbol s_1_3[2] = { 'd', 't' }; 111 | static const symbol s_1_4[2] = { 'g', 't' }; 112 | static const symbol s_1_5[2] = { 'k', 't' }; 113 | static const symbol s_1_6[2] = { 't', 't' }; 114 | 115 | static const struct among a_1[7] = 116 | { 117 | { 2, s_1_0, -1, -1, 0}, 118 | { 2, s_1_1, -1, -1, 0}, 119 | { 2, s_1_2, -1, -1, 0}, 120 | { 2, s_1_3, -1, -1, 0}, 121 | { 2, s_1_4, -1, -1, 0}, 122 | { 2, s_1_5, -1, -1, 0}, 123 | { 2, s_1_6, -1, -1, 0} 124 | }; 125 | 126 | static const symbol s_2_0[2] = { 'i', 'g' }; 127 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 128 | static const symbol s_2_2[3] = { 'e', 'l', 's' }; 129 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; 130 | static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' }; 131 | 132 | static const struct among a_2[5] = 133 | { 134 | { 2, s_2_0, -1, 1, 0}, 135 | { 3, s_2_1, 0, 1, 0}, 136 | { 3, s_2_2, -1, 1, 0}, 137 | { 5, s_2_3, -1, 3, 0}, 138 | { 4, s_2_4, -1, 2, 0} 139 | }; 140 | 141 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; 142 | 143 | static const unsigned char g_s_ending[] = { 119, 127, 149 }; 144 | 145 | static const symbol s_0[] = { 'l', 0xF6, 's' }; 146 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; 147 | 148 | static int r_mark_regions(struct SN_env * z) { 149 | z->I[1] = z->l; 150 | { int c_test1 = z->c; 151 | z->c = z->c + 3; 152 | if (z->c > z->l) return 0; 153 | z->I[0] = z->c; 154 | z->c = c_test1; 155 | } 156 | if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; 157 | { 158 | int ret = in_grouping(z, g_v, 97, 246, 1); 159 | if (ret < 0) return 0; 160 | z->c += ret; 161 | } 162 | z->I[1] = z->c; 163 | 164 | if (!(z->I[1] < z->I[0])) goto lab0; 165 | z->I[1] = z->I[0]; 166 | lab0: 167 | return 1; 168 | } 169 | 170 | static int r_main_suffix(struct SN_env * z) { 171 | int among_var; 172 | 173 | { int mlimit1; 174 | if (z->c < z->I[1]) return 0; 175 | mlimit1 = z->lb; z->lb = z->I[1]; 176 | z->ket = z->c; 177 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 178 | among_var = find_among_b(z, a_0, 37); 179 | if (!(among_var)) { z->lb = mlimit1; return 0; } 180 | z->bra = z->c; 181 | z->lb = mlimit1; 182 | } 183 | switch (among_var) { 184 | case 1: 185 | { int ret = slice_del(z); 186 | if (ret < 0) return ret; 187 | } 188 | break; 189 | case 2: 190 | if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0; 191 | { int ret = slice_del(z); 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | } 196 | return 1; 197 | } 198 | 199 | static int r_consonant_pair(struct SN_env * z) { 200 | 201 | { int mlimit1; 202 | if (z->c < z->I[1]) return 0; 203 | mlimit1 = z->lb; z->lb = z->I[1]; 204 | { int m2 = z->l - z->c; (void)m2; 205 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 206 | if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit1; return 0; } 207 | z->c = z->l - m2; 208 | z->ket = z->c; 209 | if (z->c <= z->lb) { z->lb = mlimit1; return 0; } 210 | z->c--; 211 | z->bra = z->c; 212 | { int ret = slice_del(z); 213 | if (ret < 0) return ret; 214 | } 215 | } 216 | z->lb = mlimit1; 217 | } 218 | return 1; 219 | } 220 | 221 | static int r_other_suffix(struct SN_env * z) { 222 | int among_var; 223 | 224 | { int mlimit1; 225 | if (z->c < z->I[1]) return 0; 226 | mlimit1 = z->lb; z->lb = z->I[1]; 227 | z->ket = z->c; 228 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 229 | among_var = find_among_b(z, a_2, 5); 230 | if (!(among_var)) { z->lb = mlimit1; return 0; } 231 | z->bra = z->c; 232 | switch (among_var) { 233 | case 1: 234 | { int ret = slice_del(z); 235 | if (ret < 0) return ret; 236 | } 237 | break; 238 | case 2: 239 | { int ret = slice_from_s(z, 3, s_0); 240 | if (ret < 0) return ret; 241 | } 242 | break; 243 | case 3: 244 | { int ret = slice_from_s(z, 4, s_1); 245 | if (ret < 0) return ret; 246 | } 247 | break; 248 | } 249 | z->lb = mlimit1; 250 | } 251 | return 1; 252 | } 253 | 254 | extern int swedish_ISO_8859_1_stem(struct SN_env * z) { 255 | { int c1 = z->c; 256 | { int ret = r_mark_regions(z); 257 | if (ret < 0) return ret; 258 | } 259 | z->c = c1; 260 | } 261 | z->lb = z->c; z->c = z->l; 262 | 263 | { int m2 = z->l - z->c; (void)m2; 264 | { int ret = r_main_suffix(z); 265 | if (ret < 0) return ret; 266 | } 267 | z->c = z->l - m2; 268 | } 269 | { int m3 = z->l - z->c; (void)m3; 270 | { int ret = r_consonant_pair(z); 271 | if (ret < 0) return ret; 272 | } 273 | z->c = z->l - m3; 274 | } 275 | { int m4 = z->l - z->c; (void)m4; 276 | { int ret = r_other_suffix(z); 277 | if (ret < 0) return ret; 278 | } 279 | z->c = z->l - m4; 280 | } 281 | z->c = z->lb; 282 | return 1; 283 | } 284 | 285 | extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2); } 286 | 287 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 288 | 289 | -------------------------------------------------------------------------------- /stem_ISO_8859_1_swedish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 8 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 9 | 10 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_2_hungarian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * hungarian_ISO_8859_2_create_env(void); 8 | extern void hungarian_ISO_8859_2_close_env(struct SN_env * z); 9 | 10 | extern int hungarian_ISO_8859_2_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_ISO_8859_2_romanian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * romanian_ISO_8859_2_create_env(void); 8 | extern void romanian_ISO_8859_2_close_env(struct SN_env * z); 9 | 10 | extern int romanian_ISO_8859_2_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_KOI8_R_russian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * russian_KOI8_R_create_env(void); 8 | extern void russian_KOI8_R_close_env(struct SN_env * z); 9 | 10 | extern int russian_KOI8_R_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_arabic.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * arabic_UTF_8_create_env(void); 8 | extern void arabic_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int arabic_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_armenian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * armenian_UTF_8_create_env(void); 8 | extern void armenian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int armenian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_basque.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * basque_UTF_8_create_env(void); 8 | extern void basque_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int basque_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_catalan.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * catalan_UTF_8_create_env(void); 8 | extern void catalan_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int catalan_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_danish.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int danish_UTF_8_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_undouble(struct SN_env * z); 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * danish_UTF_8_create_env(void); 23 | extern void danish_UTF_8_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[3] = { 'h', 'e', 'd' }; 30 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; 31 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; 32 | static const symbol s_0_3[1] = { 'e' }; 33 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; 34 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; 35 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; 36 | static const symbol s_0_7[3] = { 'e', 'n', 'e' }; 37 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; 38 | static const symbol s_0_9[3] = { 'e', 'r', 'e' }; 39 | static const symbol s_0_10[2] = { 'e', 'n' }; 40 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; 41 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; 42 | static const symbol s_0_13[2] = { 'e', 'r' }; 43 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; 44 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; 45 | static const symbol s_0_16[1] = { 's' }; 46 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; 47 | static const symbol s_0_18[2] = { 'e', 's' }; 48 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; 49 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; 50 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; 51 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; 52 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; 53 | static const symbol s_0_24[3] = { 'e', 'n', 's' }; 54 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; 55 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; 56 | static const symbol s_0_27[3] = { 'e', 'r', 's' }; 57 | static const symbol s_0_28[3] = { 'e', 't', 's' }; 58 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; 59 | static const symbol s_0_30[2] = { 'e', 't' }; 60 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; 61 | 62 | static const struct among a_0[32] = 63 | { 64 | { 3, s_0_0, -1, 1, 0}, 65 | { 5, s_0_1, 0, 1, 0}, 66 | { 4, s_0_2, -1, 1, 0}, 67 | { 1, s_0_3, -1, 1, 0}, 68 | { 5, s_0_4, 3, 1, 0}, 69 | { 4, s_0_5, 3, 1, 0}, 70 | { 6, s_0_6, 5, 1, 0}, 71 | { 3, s_0_7, 3, 1, 0}, 72 | { 4, s_0_8, 3, 1, 0}, 73 | { 3, s_0_9, 3, 1, 0}, 74 | { 2, s_0_10, -1, 1, 0}, 75 | { 5, s_0_11, 10, 1, 0}, 76 | { 4, s_0_12, 10, 1, 0}, 77 | { 2, s_0_13, -1, 1, 0}, 78 | { 5, s_0_14, 13, 1, 0}, 79 | { 4, s_0_15, 13, 1, 0}, 80 | { 1, s_0_16, -1, 2, 0}, 81 | { 4, s_0_17, 16, 1, 0}, 82 | { 2, s_0_18, 16, 1, 0}, 83 | { 5, s_0_19, 18, 1, 0}, 84 | { 7, s_0_20, 19, 1, 0}, 85 | { 4, s_0_21, 18, 1, 0}, 86 | { 5, s_0_22, 18, 1, 0}, 87 | { 4, s_0_23, 18, 1, 0}, 88 | { 3, s_0_24, 16, 1, 0}, 89 | { 6, s_0_25, 24, 1, 0}, 90 | { 5, s_0_26, 24, 1, 0}, 91 | { 3, s_0_27, 16, 1, 0}, 92 | { 3, s_0_28, 16, 1, 0}, 93 | { 5, s_0_29, 28, 1, 0}, 94 | { 2, s_0_30, -1, 1, 0}, 95 | { 4, s_0_31, 30, 1, 0} 96 | }; 97 | 98 | static const symbol s_1_0[2] = { 'g', 'd' }; 99 | static const symbol s_1_1[2] = { 'd', 't' }; 100 | static const symbol s_1_2[2] = { 'g', 't' }; 101 | static const symbol s_1_3[2] = { 'k', 't' }; 102 | 103 | static const struct among a_1[4] = 104 | { 105 | { 2, s_1_0, -1, -1, 0}, 106 | { 2, s_1_1, -1, -1, 0}, 107 | { 2, s_1_2, -1, -1, 0}, 108 | { 2, s_1_3, -1, -1, 0} 109 | }; 110 | 111 | static const symbol s_2_0[2] = { 'i', 'g' }; 112 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 113 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; 114 | static const symbol s_2_3[3] = { 'e', 'l', 's' }; 115 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' }; 116 | 117 | static const struct among a_2[5] = 118 | { 119 | { 2, s_2_0, -1, 1, 0}, 120 | { 3, s_2_1, 0, 1, 0}, 121 | { 4, s_2_2, 1, 1, 0}, 122 | { 3, s_2_3, -1, 1, 0}, 123 | { 5, s_2_4, -1, 2, 0} 124 | }; 125 | 126 | static const unsigned char g_c[] = { 119, 223, 119, 1 }; 127 | 128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 129 | 130 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; 131 | 132 | static const symbol s_0[] = { 's', 't' }; 133 | static const symbol s_1[] = { 'i', 'g' }; 134 | static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' }; 135 | 136 | static int r_mark_regions(struct SN_env * z) { 137 | z->I[1] = z->l; 138 | { int c_test1 = z->c; 139 | { int ret = skip_utf8(z->p, z->c, z->l, 3); 140 | if (ret < 0) return 0; 141 | z->c = ret; 142 | } 143 | z->I[0] = z->c; 144 | z->c = c_test1; 145 | } 146 | if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; 147 | { 148 | int ret = in_grouping_U(z, g_v, 97, 248, 1); 149 | if (ret < 0) return 0; 150 | z->c += ret; 151 | } 152 | z->I[1] = z->c; 153 | 154 | if (!(z->I[1] < z->I[0])) goto lab0; 155 | z->I[1] = z->I[0]; 156 | lab0: 157 | return 1; 158 | } 159 | 160 | static int r_main_suffix(struct SN_env * z) { 161 | int among_var; 162 | 163 | { int mlimit1; 164 | if (z->c < z->I[1]) return 0; 165 | mlimit1 = z->lb; z->lb = z->I[1]; 166 | z->ket = z->c; 167 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 168 | among_var = find_among_b(z, a_0, 32); 169 | if (!(among_var)) { z->lb = mlimit1; return 0; } 170 | z->bra = z->c; 171 | z->lb = mlimit1; 172 | } 173 | switch (among_var) { 174 | case 1: 175 | { int ret = slice_del(z); 176 | if (ret < 0) return ret; 177 | } 178 | break; 179 | case 2: 180 | if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; 181 | { int ret = slice_del(z); 182 | if (ret < 0) return ret; 183 | } 184 | break; 185 | } 186 | return 1; 187 | } 188 | 189 | static int r_consonant_pair(struct SN_env * z) { 190 | { int m_test1 = z->l - z->c; 191 | 192 | { int mlimit2; 193 | if (z->c < z->I[1]) return 0; 194 | mlimit2 = z->lb; z->lb = z->I[1]; 195 | z->ket = z->c; 196 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit2; return 0; } 197 | if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit2; return 0; } 198 | z->bra = z->c; 199 | z->lb = mlimit2; 200 | } 201 | z->c = z->l - m_test1; 202 | } 203 | { int ret = skip_b_utf8(z->p, z->c, z->lb, 1); 204 | if (ret < 0) return 0; 205 | z->c = ret; 206 | } 207 | z->bra = z->c; 208 | { int ret = slice_del(z); 209 | if (ret < 0) return ret; 210 | } 211 | return 1; 212 | } 213 | 214 | static int r_other_suffix(struct SN_env * z) { 215 | int among_var; 216 | { int m1 = z->l - z->c; (void)m1; 217 | z->ket = z->c; 218 | if (!(eq_s_b(z, 2, s_0))) goto lab0; 219 | z->bra = z->c; 220 | if (!(eq_s_b(z, 2, s_1))) goto lab0; 221 | { int ret = slice_del(z); 222 | if (ret < 0) return ret; 223 | } 224 | lab0: 225 | z->c = z->l - m1; 226 | } 227 | 228 | { int mlimit2; 229 | if (z->c < z->I[1]) return 0; 230 | mlimit2 = z->lb; z->lb = z->I[1]; 231 | z->ket = z->c; 232 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit2; return 0; } 233 | among_var = find_among_b(z, a_2, 5); 234 | if (!(among_var)) { z->lb = mlimit2; return 0; } 235 | z->bra = z->c; 236 | z->lb = mlimit2; 237 | } 238 | switch (among_var) { 239 | case 1: 240 | { int ret = slice_del(z); 241 | if (ret < 0) return ret; 242 | } 243 | { int m3 = z->l - z->c; (void)m3; 244 | { int ret = r_consonant_pair(z); 245 | if (ret < 0) return ret; 246 | } 247 | z->c = z->l - m3; 248 | } 249 | break; 250 | case 2: 251 | { int ret = slice_from_s(z, 4, s_2); 252 | if (ret < 0) return ret; 253 | } 254 | break; 255 | } 256 | return 1; 257 | } 258 | 259 | static int r_undouble(struct SN_env * z) { 260 | 261 | { int mlimit1; 262 | if (z->c < z->I[1]) return 0; 263 | mlimit1 = z->lb; z->lb = z->I[1]; 264 | z->ket = z->c; 265 | if (in_grouping_b_U(z, g_c, 98, 122, 0)) { z->lb = mlimit1; return 0; } 266 | z->bra = z->c; 267 | z->S[0] = slice_to(z, z->S[0]); 268 | if (z->S[0] == 0) return -1; 269 | z->lb = mlimit1; 270 | } 271 | if (!(eq_v_b(z, z->S[0]))) return 0; 272 | { int ret = slice_del(z); 273 | if (ret < 0) return ret; 274 | } 275 | return 1; 276 | } 277 | 278 | extern int danish_UTF_8_stem(struct SN_env * z) { 279 | { int c1 = z->c; 280 | { int ret = r_mark_regions(z); 281 | if (ret < 0) return ret; 282 | } 283 | z->c = c1; 284 | } 285 | z->lb = z->c; z->c = z->l; 286 | 287 | { int m2 = z->l - z->c; (void)m2; 288 | { int ret = r_main_suffix(z); 289 | if (ret < 0) return ret; 290 | } 291 | z->c = z->l - m2; 292 | } 293 | { int m3 = z->l - z->c; (void)m3; 294 | { int ret = r_consonant_pair(z); 295 | if (ret < 0) return ret; 296 | } 297 | z->c = z->l - m3; 298 | } 299 | { int m4 = z->l - z->c; (void)m4; 300 | { int ret = r_other_suffix(z); 301 | if (ret < 0) return ret; 302 | } 303 | z->c = z->l - m4; 304 | } 305 | { int m5 = z->l - z->c; (void)m5; 306 | { int ret = r_undouble(z); 307 | if (ret < 0) return ret; 308 | } 309 | z->c = z->l - m5; 310 | } 311 | z->c = z->lb; 312 | return 1; 313 | } 314 | 315 | extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2); } 316 | 317 | extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } 318 | 319 | -------------------------------------------------------------------------------- /stem_UTF_8_danish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * danish_UTF_8_create_env(void); 8 | extern void danish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int danish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_dutch.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * dutch_UTF_8_create_env(void); 8 | extern void dutch_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int dutch_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_english.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * english_UTF_8_create_env(void); 8 | extern void english_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int english_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_finnish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * finnish_UTF_8_create_env(void); 8 | extern void finnish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int finnish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_french.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * french_UTF_8_create_env(void); 8 | extern void french_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int french_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_german.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int german_UTF_8_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_standard_suffix(struct SN_env * z); 13 | static int r_R2(struct SN_env * z); 14 | static int r_R1(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | static int r_postlude(struct SN_env * z); 17 | static int r_prelude(struct SN_env * z); 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | 23 | extern struct SN_env * german_UTF_8_create_env(void); 24 | extern void german_UTF_8_close_env(struct SN_env * z); 25 | 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | static const symbol s_0_1[1] = { 'U' }; 31 | static const symbol s_0_2[1] = { 'Y' }; 32 | static const symbol s_0_3[2] = { 0xC3, 0xA4 }; 33 | static const symbol s_0_4[2] = { 0xC3, 0xB6 }; 34 | static const symbol s_0_5[2] = { 0xC3, 0xBC }; 35 | 36 | static const struct among a_0[6] = 37 | { 38 | { 0, 0, -1, 5, 0}, 39 | { 1, s_0_1, 0, 2, 0}, 40 | { 1, s_0_2, 0, 1, 0}, 41 | { 2, s_0_3, 0, 3, 0}, 42 | { 2, s_0_4, 0, 4, 0}, 43 | { 2, s_0_5, 0, 2, 0} 44 | }; 45 | 46 | static const symbol s_1_0[1] = { 'e' }; 47 | static const symbol s_1_1[2] = { 'e', 'm' }; 48 | static const symbol s_1_2[2] = { 'e', 'n' }; 49 | static const symbol s_1_3[3] = { 'e', 'r', 'n' }; 50 | static const symbol s_1_4[2] = { 'e', 'r' }; 51 | static const symbol s_1_5[1] = { 's' }; 52 | static const symbol s_1_6[2] = { 'e', 's' }; 53 | 54 | static const struct among a_1[7] = 55 | { 56 | { 1, s_1_0, -1, 2, 0}, 57 | { 2, s_1_1, -1, 1, 0}, 58 | { 2, s_1_2, -1, 2, 0}, 59 | { 3, s_1_3, -1, 1, 0}, 60 | { 2, s_1_4, -1, 1, 0}, 61 | { 1, s_1_5, -1, 3, 0}, 62 | { 2, s_1_6, 5, 2, 0} 63 | }; 64 | 65 | static const symbol s_2_0[2] = { 'e', 'n' }; 66 | static const symbol s_2_1[2] = { 'e', 'r' }; 67 | static const symbol s_2_2[2] = { 's', 't' }; 68 | static const symbol s_2_3[3] = { 'e', 's', 't' }; 69 | 70 | static const struct among a_2[4] = 71 | { 72 | { 2, s_2_0, -1, 1, 0}, 73 | { 2, s_2_1, -1, 1, 0}, 74 | { 2, s_2_2, -1, 2, 0}, 75 | { 3, s_2_3, 2, 1, 0} 76 | }; 77 | 78 | static const symbol s_3_0[2] = { 'i', 'g' }; 79 | static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; 80 | 81 | static const struct among a_3[2] = 82 | { 83 | { 2, s_3_0, -1, 1, 0}, 84 | { 4, s_3_1, -1, 1, 0} 85 | }; 86 | 87 | static const symbol s_4_0[3] = { 'e', 'n', 'd' }; 88 | static const symbol s_4_1[2] = { 'i', 'g' }; 89 | static const symbol s_4_2[3] = { 'u', 'n', 'g' }; 90 | static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; 91 | static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; 92 | static const symbol s_4_5[2] = { 'i', 'k' }; 93 | static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; 94 | static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; 95 | 96 | static const struct among a_4[8] = 97 | { 98 | { 3, s_4_0, -1, 1, 0}, 99 | { 2, s_4_1, -1, 2, 0}, 100 | { 3, s_4_2, -1, 1, 0}, 101 | { 4, s_4_3, -1, 3, 0}, 102 | { 4, s_4_4, -1, 2, 0}, 103 | { 2, s_4_5, -1, 2, 0}, 104 | { 4, s_4_6, -1, 3, 0}, 105 | { 4, s_4_7, -1, 4, 0} 106 | }; 107 | 108 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; 109 | 110 | static const unsigned char g_s_ending[] = { 117, 30, 5 }; 111 | 112 | static const unsigned char g_st_ending[] = { 117, 30, 4 }; 113 | 114 | static const symbol s_0[] = { 0xC3, 0x9F }; 115 | static const symbol s_1[] = { 's', 's' }; 116 | static const symbol s_2[] = { 'U' }; 117 | static const symbol s_3[] = { 'Y' }; 118 | static const symbol s_4[] = { 'y' }; 119 | static const symbol s_5[] = { 'u' }; 120 | static const symbol s_6[] = { 'a' }; 121 | static const symbol s_7[] = { 'o' }; 122 | static const symbol s_8[] = { 'n', 'i', 's' }; 123 | static const symbol s_9[] = { 'i', 'g' }; 124 | static const symbol s_10[] = { 'e', 'r' }; 125 | static const symbol s_11[] = { 'e', 'n' }; 126 | 127 | static int r_prelude(struct SN_env * z) { 128 | { int c_test1 = z->c; 129 | while(1) { 130 | int c2 = z->c; 131 | { int c3 = z->c; 132 | z->bra = z->c; 133 | if (!(eq_s(z, 2, s_0))) goto lab2; 134 | z->ket = z->c; 135 | { int ret = slice_from_s(z, 2, s_1); 136 | if (ret < 0) return ret; 137 | } 138 | goto lab1; 139 | lab2: 140 | z->c = c3; 141 | { int ret = skip_utf8(z->p, z->c, z->l, 1); 142 | if (ret < 0) goto lab0; 143 | z->c = ret; 144 | } 145 | } 146 | lab1: 147 | continue; 148 | lab0: 149 | z->c = c2; 150 | break; 151 | } 152 | z->c = c_test1; 153 | } 154 | while(1) { 155 | int c4 = z->c; 156 | while(1) { 157 | int c5 = z->c; 158 | if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; 159 | z->bra = z->c; 160 | { int c6 = z->c; 161 | if (z->c == z->l || z->p[z->c] != 'u') goto lab6; 162 | z->c++; 163 | z->ket = z->c; 164 | if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6; 165 | { int ret = slice_from_s(z, 1, s_2); 166 | if (ret < 0) return ret; 167 | } 168 | goto lab5; 169 | lab6: 170 | z->c = c6; 171 | if (z->c == z->l || z->p[z->c] != 'y') goto lab4; 172 | z->c++; 173 | z->ket = z->c; 174 | if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; 175 | { int ret = slice_from_s(z, 1, s_3); 176 | if (ret < 0) return ret; 177 | } 178 | } 179 | lab5: 180 | z->c = c5; 181 | break; 182 | lab4: 183 | z->c = c5; 184 | { int ret = skip_utf8(z->p, z->c, z->l, 1); 185 | if (ret < 0) goto lab3; 186 | z->c = ret; 187 | } 188 | } 189 | continue; 190 | lab3: 191 | z->c = c4; 192 | break; 193 | } 194 | return 1; 195 | } 196 | 197 | static int r_mark_regions(struct SN_env * z) { 198 | z->I[2] = z->l; 199 | z->I[1] = z->l; 200 | { int c_test1 = z->c; 201 | { int ret = skip_utf8(z->p, z->c, z->l, 3); 202 | if (ret < 0) return 0; 203 | z->c = ret; 204 | } 205 | z->I[0] = z->c; 206 | z->c = c_test1; 207 | } 208 | { 209 | int ret = out_grouping_U(z, g_v, 97, 252, 1); 210 | if (ret < 0) return 0; 211 | z->c += ret; 212 | } 213 | { 214 | int ret = in_grouping_U(z, g_v, 97, 252, 1); 215 | if (ret < 0) return 0; 216 | z->c += ret; 217 | } 218 | z->I[2] = z->c; 219 | 220 | if (!(z->I[2] < z->I[0])) goto lab0; 221 | z->I[2] = z->I[0]; 222 | lab0: 223 | { 224 | int ret = out_grouping_U(z, g_v, 97, 252, 1); 225 | if (ret < 0) return 0; 226 | z->c += ret; 227 | } 228 | { 229 | int ret = in_grouping_U(z, g_v, 97, 252, 1); 230 | if (ret < 0) return 0; 231 | z->c += ret; 232 | } 233 | z->I[1] = z->c; 234 | return 1; 235 | } 236 | 237 | static int r_postlude(struct SN_env * z) { 238 | int among_var; 239 | while(1) { 240 | int c1 = z->c; 241 | z->bra = z->c; 242 | among_var = find_among(z, a_0, 6); 243 | if (!(among_var)) goto lab0; 244 | z->ket = z->c; 245 | switch (among_var) { 246 | case 1: 247 | { int ret = slice_from_s(z, 1, s_4); 248 | if (ret < 0) return ret; 249 | } 250 | break; 251 | case 2: 252 | { int ret = slice_from_s(z, 1, s_5); 253 | if (ret < 0) return ret; 254 | } 255 | break; 256 | case 3: 257 | { int ret = slice_from_s(z, 1, s_6); 258 | if (ret < 0) return ret; 259 | } 260 | break; 261 | case 4: 262 | { int ret = slice_from_s(z, 1, s_7); 263 | if (ret < 0) return ret; 264 | } 265 | break; 266 | case 5: 267 | { int ret = skip_utf8(z->p, z->c, z->l, 1); 268 | if (ret < 0) goto lab0; 269 | z->c = ret; 270 | } 271 | break; 272 | } 273 | continue; 274 | lab0: 275 | z->c = c1; 276 | break; 277 | } 278 | return 1; 279 | } 280 | 281 | static int r_R1(struct SN_env * z) { 282 | if (!(z->I[2] <= z->c)) return 0; 283 | return 1; 284 | } 285 | 286 | static int r_R2(struct SN_env * z) { 287 | if (!(z->I[1] <= z->c)) return 0; 288 | return 1; 289 | } 290 | 291 | static int r_standard_suffix(struct SN_env * z) { 292 | int among_var; 293 | { int m1 = z->l - z->c; (void)m1; 294 | z->ket = z->c; 295 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; 296 | among_var = find_among_b(z, a_1, 7); 297 | if (!(among_var)) goto lab0; 298 | z->bra = z->c; 299 | { int ret = r_R1(z); 300 | if (ret == 0) goto lab0; 301 | if (ret < 0) return ret; 302 | } 303 | switch (among_var) { 304 | case 1: 305 | { int ret = slice_del(z); 306 | if (ret < 0) return ret; 307 | } 308 | break; 309 | case 2: 310 | { int ret = slice_del(z); 311 | if (ret < 0) return ret; 312 | } 313 | { int m2 = z->l - z->c; (void)m2; 314 | z->ket = z->c; 315 | if (z->c <= z->lb || z->p[z->c - 1] != 's') { z->c = z->l - m2; goto lab1; } 316 | z->c--; 317 | z->bra = z->c; 318 | if (!(eq_s_b(z, 3, s_8))) { z->c = z->l - m2; goto lab1; } 319 | { int ret = slice_del(z); 320 | if (ret < 0) return ret; 321 | } 322 | lab1: 323 | ; 324 | } 325 | break; 326 | case 3: 327 | if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0; 328 | { int ret = slice_del(z); 329 | if (ret < 0) return ret; 330 | } 331 | break; 332 | } 333 | lab0: 334 | z->c = z->l - m1; 335 | } 336 | { int m3 = z->l - z->c; (void)m3; 337 | z->ket = z->c; 338 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; 339 | among_var = find_among_b(z, a_2, 4); 340 | if (!(among_var)) goto lab2; 341 | z->bra = z->c; 342 | { int ret = r_R1(z); 343 | if (ret == 0) goto lab2; 344 | if (ret < 0) return ret; 345 | } 346 | switch (among_var) { 347 | case 1: 348 | { int ret = slice_del(z); 349 | if (ret < 0) return ret; 350 | } 351 | break; 352 | case 2: 353 | if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab2; 354 | { int ret = skip_b_utf8(z->p, z->c, z->lb, 3); 355 | if (ret < 0) goto lab2; 356 | z->c = ret; 357 | } 358 | { int ret = slice_del(z); 359 | if (ret < 0) return ret; 360 | } 361 | break; 362 | } 363 | lab2: 364 | z->c = z->l - m3; 365 | } 366 | { int m4 = z->l - z->c; (void)m4; 367 | z->ket = z->c; 368 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; 369 | among_var = find_among_b(z, a_4, 8); 370 | if (!(among_var)) goto lab3; 371 | z->bra = z->c; 372 | { int ret = r_R2(z); 373 | if (ret == 0) goto lab3; 374 | if (ret < 0) return ret; 375 | } 376 | switch (among_var) { 377 | case 1: 378 | { int ret = slice_del(z); 379 | if (ret < 0) return ret; 380 | } 381 | { int m5 = z->l - z->c; (void)m5; 382 | z->ket = z->c; 383 | if (!(eq_s_b(z, 2, s_9))) { z->c = z->l - m5; goto lab4; } 384 | z->bra = z->c; 385 | { int m6 = z->l - z->c; (void)m6; 386 | if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab5; 387 | z->c--; 388 | { z->c = z->l - m5; goto lab4; } 389 | lab5: 390 | z->c = z->l - m6; 391 | } 392 | { int ret = r_R2(z); 393 | if (ret == 0) { z->c = z->l - m5; goto lab4; } 394 | if (ret < 0) return ret; 395 | } 396 | { int ret = slice_del(z); 397 | if (ret < 0) return ret; 398 | } 399 | lab4: 400 | ; 401 | } 402 | break; 403 | case 2: 404 | { int m7 = z->l - z->c; (void)m7; 405 | if (z->c <= z->lb || z->p[z->c - 1] != 'e') goto lab6; 406 | z->c--; 407 | goto lab3; 408 | lab6: 409 | z->c = z->l - m7; 410 | } 411 | { int ret = slice_del(z); 412 | if (ret < 0) return ret; 413 | } 414 | break; 415 | case 3: 416 | { int ret = slice_del(z); 417 | if (ret < 0) return ret; 418 | } 419 | { int m8 = z->l - z->c; (void)m8; 420 | z->ket = z->c; 421 | { int m9 = z->l - z->c; (void)m9; 422 | if (!(eq_s_b(z, 2, s_10))) goto lab9; 423 | goto lab8; 424 | lab9: 425 | z->c = z->l - m9; 426 | if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m8; goto lab7; } 427 | } 428 | lab8: 429 | z->bra = z->c; 430 | { int ret = r_R1(z); 431 | if (ret == 0) { z->c = z->l - m8; goto lab7; } 432 | if (ret < 0) return ret; 433 | } 434 | { int ret = slice_del(z); 435 | if (ret < 0) return ret; 436 | } 437 | lab7: 438 | ; 439 | } 440 | break; 441 | case 4: 442 | { int ret = slice_del(z); 443 | if (ret < 0) return ret; 444 | } 445 | { int m10 = z->l - z->c; (void)m10; 446 | z->ket = z->c; 447 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m10; goto lab10; } 448 | if (!(find_among_b(z, a_3, 2))) { z->c = z->l - m10; goto lab10; } 449 | z->bra = z->c; 450 | { int ret = r_R2(z); 451 | if (ret == 0) { z->c = z->l - m10; goto lab10; } 452 | if (ret < 0) return ret; 453 | } 454 | { int ret = slice_del(z); 455 | if (ret < 0) return ret; 456 | } 457 | lab10: 458 | ; 459 | } 460 | break; 461 | } 462 | lab3: 463 | z->c = z->l - m4; 464 | } 465 | return 1; 466 | } 467 | 468 | extern int german_UTF_8_stem(struct SN_env * z) { 469 | { int c1 = z->c; 470 | { int ret = r_prelude(z); 471 | if (ret < 0) return ret; 472 | } 473 | z->c = c1; 474 | } 475 | { int c2 = z->c; 476 | { int ret = r_mark_regions(z); 477 | if (ret < 0) return ret; 478 | } 479 | z->c = c2; 480 | } 481 | z->lb = z->c; z->c = z->l; 482 | 483 | 484 | { int ret = r_standard_suffix(z); 485 | if (ret < 0) return ret; 486 | } 487 | z->c = z->lb; 488 | { int c3 = z->c; 489 | { int ret = r_postlude(z); 490 | if (ret < 0) return ret; 491 | } 492 | z->c = c3; 493 | } 494 | return 1; 495 | } 496 | 497 | extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3); } 498 | 499 | extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 500 | 501 | -------------------------------------------------------------------------------- /stem_UTF_8_german.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * german_UTF_8_create_env(void); 8 | extern void german_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int german_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_greek.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * greek_UTF_8_create_env(void); 8 | extern void greek_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int greek_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_hindi.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * hindi_UTF_8_create_env(void); 8 | extern void hindi_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int hindi_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_hungarian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * hungarian_UTF_8_create_env(void); 8 | extern void hungarian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int hungarian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_indonesian.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int indonesian_UTF_8_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_VOWEL(struct SN_env * z); 13 | static int r_SUFFIX_I_OK(struct SN_env * z); 14 | static int r_SUFFIX_AN_OK(struct SN_env * z); 15 | static int r_SUFFIX_KAN_OK(struct SN_env * z); 16 | static int r_KER(struct SN_env * z); 17 | static int r_remove_suffix(struct SN_env * z); 18 | static int r_remove_second_order_prefix(struct SN_env * z); 19 | static int r_remove_first_order_prefix(struct SN_env * z); 20 | static int r_remove_possessive_pronoun(struct SN_env * z); 21 | static int r_remove_particle(struct SN_env * z); 22 | #ifdef __cplusplus 23 | extern "C" { 24 | #endif 25 | 26 | 27 | extern struct SN_env * indonesian_UTF_8_create_env(void); 28 | extern void indonesian_UTF_8_close_env(struct SN_env * z); 29 | 30 | 31 | #ifdef __cplusplus 32 | } 33 | #endif 34 | static const symbol s_0_0[3] = { 'k', 'a', 'h' }; 35 | static const symbol s_0_1[3] = { 'l', 'a', 'h' }; 36 | static const symbol s_0_2[3] = { 'p', 'u', 'n' }; 37 | 38 | static const struct among a_0[3] = 39 | { 40 | { 3, s_0_0, -1, 1, 0}, 41 | { 3, s_0_1, -1, 1, 0}, 42 | { 3, s_0_2, -1, 1, 0} 43 | }; 44 | 45 | static const symbol s_1_0[3] = { 'n', 'y', 'a' }; 46 | static const symbol s_1_1[2] = { 'k', 'u' }; 47 | static const symbol s_1_2[2] = { 'm', 'u' }; 48 | 49 | static const struct among a_1[3] = 50 | { 51 | { 3, s_1_0, -1, 1, 0}, 52 | { 2, s_1_1, -1, 1, 0}, 53 | { 2, s_1_2, -1, 1, 0} 54 | }; 55 | 56 | static const symbol s_2_0[1] = { 'i' }; 57 | static const symbol s_2_1[2] = { 'a', 'n' }; 58 | static const symbol s_2_2[3] = { 'k', 'a', 'n' }; 59 | 60 | static const struct among a_2[3] = 61 | { 62 | { 1, s_2_0, -1, 1, r_SUFFIX_I_OK}, 63 | { 2, s_2_1, -1, 1, r_SUFFIX_AN_OK}, 64 | { 3, s_2_2, 1, 1, r_SUFFIX_KAN_OK} 65 | }; 66 | 67 | static const symbol s_3_0[2] = { 'd', 'i' }; 68 | static const symbol s_3_1[2] = { 'k', 'e' }; 69 | static const symbol s_3_2[2] = { 'm', 'e' }; 70 | static const symbol s_3_3[3] = { 'm', 'e', 'm' }; 71 | static const symbol s_3_4[3] = { 'm', 'e', 'n' }; 72 | static const symbol s_3_5[4] = { 'm', 'e', 'n', 'g' }; 73 | static const symbol s_3_6[4] = { 'm', 'e', 'n', 'y' }; 74 | static const symbol s_3_7[3] = { 'p', 'e', 'm' }; 75 | static const symbol s_3_8[3] = { 'p', 'e', 'n' }; 76 | static const symbol s_3_9[4] = { 'p', 'e', 'n', 'g' }; 77 | static const symbol s_3_10[4] = { 'p', 'e', 'n', 'y' }; 78 | static const symbol s_3_11[3] = { 't', 'e', 'r' }; 79 | 80 | static const struct among a_3[12] = 81 | { 82 | { 2, s_3_0, -1, 1, 0}, 83 | { 2, s_3_1, -1, 2, 0}, 84 | { 2, s_3_2, -1, 1, 0}, 85 | { 3, s_3_3, 2, 5, 0}, 86 | { 3, s_3_4, 2, 1, 0}, 87 | { 4, s_3_5, 4, 1, 0}, 88 | { 4, s_3_6, 4, 3, r_VOWEL}, 89 | { 3, s_3_7, -1, 6, 0}, 90 | { 3, s_3_8, -1, 2, 0}, 91 | { 4, s_3_9, 8, 2, 0}, 92 | { 4, s_3_10, 8, 4, r_VOWEL}, 93 | { 3, s_3_11, -1, 1, 0} 94 | }; 95 | 96 | static const symbol s_4_0[2] = { 'b', 'e' }; 97 | static const symbol s_4_1[7] = { 'b', 'e', 'l', 'a', 'j', 'a', 'r' }; 98 | static const symbol s_4_2[3] = { 'b', 'e', 'r' }; 99 | static const symbol s_4_3[2] = { 'p', 'e' }; 100 | static const symbol s_4_4[7] = { 'p', 'e', 'l', 'a', 'j', 'a', 'r' }; 101 | static const symbol s_4_5[3] = { 'p', 'e', 'r' }; 102 | 103 | static const struct among a_4[6] = 104 | { 105 | { 2, s_4_0, -1, 3, r_KER}, 106 | { 7, s_4_1, 0, 4, 0}, 107 | { 3, s_4_2, 0, 3, 0}, 108 | { 2, s_4_3, -1, 1, 0}, 109 | { 7, s_4_4, 3, 2, 0}, 110 | { 3, s_4_5, 3, 1, 0} 111 | }; 112 | 113 | static const unsigned char g_vowel[] = { 17, 65, 16 }; 114 | 115 | static const symbol s_0[] = { 'e', 'r' }; 116 | static const symbol s_1[] = { 's' }; 117 | static const symbol s_2[] = { 's' }; 118 | static const symbol s_3[] = { 'p' }; 119 | static const symbol s_4[] = { 'p' }; 120 | static const symbol s_5[] = { 'a', 'j', 'a', 'r' }; 121 | static const symbol s_6[] = { 'a', 'j', 'a', 'r' }; 122 | 123 | static int r_remove_particle(struct SN_env * z) { 124 | z->ket = z->c; 125 | if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 104 && z->p[z->c - 1] != 110)) return 0; 126 | if (!(find_among_b(z, a_0, 3))) return 0; 127 | z->bra = z->c; 128 | { int ret = slice_del(z); 129 | if (ret < 0) return ret; 130 | } 131 | z->I[1] -= 1; 132 | return 1; 133 | } 134 | 135 | static int r_remove_possessive_pronoun(struct SN_env * z) { 136 | z->ket = z->c; 137 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 117)) return 0; 138 | if (!(find_among_b(z, a_1, 3))) return 0; 139 | z->bra = z->c; 140 | { int ret = slice_del(z); 141 | if (ret < 0) return ret; 142 | } 143 | z->I[1] -= 1; 144 | return 1; 145 | } 146 | 147 | static int r_SUFFIX_KAN_OK(struct SN_env * z) { 148 | 149 | if (!(z->I[0] != 3)) return 0; 150 | if (!(z->I[0] != 2)) return 0; 151 | return 1; 152 | } 153 | 154 | static int r_SUFFIX_AN_OK(struct SN_env * z) { 155 | if (!(z->I[0] != 1)) return 0; 156 | return 1; 157 | } 158 | 159 | static int r_SUFFIX_I_OK(struct SN_env * z) { 160 | if (!(z->I[0] <= 2)) return 0; 161 | { int m1 = z->l - z->c; (void)m1; 162 | if (z->c <= z->lb || z->p[z->c - 1] != 's') goto lab0; 163 | z->c--; 164 | return 0; 165 | lab0: 166 | z->c = z->l - m1; 167 | } 168 | return 1; 169 | } 170 | 171 | static int r_remove_suffix(struct SN_env * z) { 172 | z->ket = z->c; 173 | if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 110)) return 0; 174 | if (!(find_among_b(z, a_2, 3))) return 0; 175 | z->bra = z->c; 176 | { int ret = slice_del(z); 177 | if (ret < 0) return ret; 178 | } 179 | z->I[1] -= 1; 180 | return 1; 181 | } 182 | 183 | static int r_VOWEL(struct SN_env * z) { 184 | if (in_grouping_U(z, g_vowel, 97, 117, 0)) return 0; 185 | return 1; 186 | } 187 | 188 | static int r_KER(struct SN_env * z) { 189 | if (out_grouping_U(z, g_vowel, 97, 117, 0)) return 0; 190 | if (!(eq_s(z, 2, s_0))) return 0; 191 | return 1; 192 | } 193 | 194 | static int r_remove_first_order_prefix(struct SN_env * z) { 195 | int among_var; 196 | z->bra = z->c; 197 | if (z->c + 1 >= z->l || (z->p[z->c + 1] != 105 && z->p[z->c + 1] != 101)) return 0; 198 | among_var = find_among(z, a_3, 12); 199 | if (!(among_var)) return 0; 200 | z->ket = z->c; 201 | switch (among_var) { 202 | case 1: 203 | { int ret = slice_del(z); 204 | if (ret < 0) return ret; 205 | } 206 | z->I[0] = 1; 207 | z->I[1] -= 1; 208 | break; 209 | case 2: 210 | { int ret = slice_del(z); 211 | if (ret < 0) return ret; 212 | } 213 | z->I[0] = 3; 214 | z->I[1] -= 1; 215 | break; 216 | case 3: 217 | z->I[0] = 1; 218 | { int ret = slice_from_s(z, 1, s_1); 219 | if (ret < 0) return ret; 220 | } 221 | z->I[1] -= 1; 222 | break; 223 | case 4: 224 | z->I[0] = 3; 225 | { int ret = slice_from_s(z, 1, s_2); 226 | if (ret < 0) return ret; 227 | } 228 | z->I[1] -= 1; 229 | break; 230 | case 5: 231 | z->I[0] = 1; 232 | z->I[1] -= 1; 233 | { int c1 = z->c; 234 | { int c2 = z->c; 235 | if (in_grouping_U(z, g_vowel, 97, 117, 0)) goto lab1; 236 | z->c = c2; 237 | { int ret = slice_from_s(z, 1, s_3); 238 | if (ret < 0) return ret; 239 | } 240 | } 241 | goto lab0; 242 | lab1: 243 | z->c = c1; 244 | { int ret = slice_del(z); 245 | if (ret < 0) return ret; 246 | } 247 | } 248 | lab0: 249 | break; 250 | case 6: 251 | z->I[0] = 3; 252 | z->I[1] -= 1; 253 | { int c3 = z->c; 254 | { int c4 = z->c; 255 | if (in_grouping_U(z, g_vowel, 97, 117, 0)) goto lab3; 256 | z->c = c4; 257 | { int ret = slice_from_s(z, 1, s_4); 258 | if (ret < 0) return ret; 259 | } 260 | } 261 | goto lab2; 262 | lab3: 263 | z->c = c3; 264 | { int ret = slice_del(z); 265 | if (ret < 0) return ret; 266 | } 267 | } 268 | lab2: 269 | break; 270 | } 271 | return 1; 272 | } 273 | 274 | static int r_remove_second_order_prefix(struct SN_env * z) { 275 | int among_var; 276 | z->bra = z->c; 277 | if (z->c + 1 >= z->l || z->p[z->c + 1] != 101) return 0; 278 | among_var = find_among(z, a_4, 6); 279 | if (!(among_var)) return 0; 280 | z->ket = z->c; 281 | switch (among_var) { 282 | case 1: 283 | { int ret = slice_del(z); 284 | if (ret < 0) return ret; 285 | } 286 | z->I[0] = 2; 287 | z->I[1] -= 1; 288 | break; 289 | case 2: 290 | { int ret = slice_from_s(z, 4, s_5); 291 | if (ret < 0) return ret; 292 | } 293 | z->I[1] -= 1; 294 | break; 295 | case 3: 296 | { int ret = slice_del(z); 297 | if (ret < 0) return ret; 298 | } 299 | z->I[0] = 4; 300 | z->I[1] -= 1; 301 | break; 302 | case 4: 303 | { int ret = slice_from_s(z, 4, s_6); 304 | if (ret < 0) return ret; 305 | } 306 | z->I[0] = 4; 307 | z->I[1] -= 1; 308 | break; 309 | } 310 | return 1; 311 | } 312 | 313 | extern int indonesian_UTF_8_stem(struct SN_env * z) { 314 | z->I[1] = 0; 315 | { int c1 = z->c; 316 | while(1) { 317 | int c2 = z->c; 318 | { 319 | int ret = out_grouping_U(z, g_vowel, 97, 117, 1); 320 | if (ret < 0) goto lab1; 321 | z->c += ret; 322 | } 323 | z->I[1] += 1; 324 | continue; 325 | lab1: 326 | z->c = c2; 327 | break; 328 | } 329 | z->c = c1; 330 | } 331 | if (!(z->I[1] > 2)) return 0; 332 | z->I[0] = 0; 333 | z->lb = z->c; z->c = z->l; 334 | 335 | { int m3 = z->l - z->c; (void)m3; 336 | { int ret = r_remove_particle(z); 337 | if (ret < 0) return ret; 338 | } 339 | z->c = z->l - m3; 340 | } 341 | if (!(z->I[1] > 2)) return 0; 342 | { int m4 = z->l - z->c; (void)m4; 343 | { int ret = r_remove_possessive_pronoun(z); 344 | if (ret < 0) return ret; 345 | } 346 | z->c = z->l - m4; 347 | } 348 | z->c = z->lb; 349 | if (!(z->I[1] > 2)) return 0; 350 | { int c5 = z->c; 351 | { int c_test6 = z->c; 352 | { int ret = r_remove_first_order_prefix(z); 353 | if (ret == 0) goto lab3; 354 | if (ret < 0) return ret; 355 | } 356 | { int c7 = z->c; 357 | { int c_test8 = z->c; 358 | if (!(z->I[1] > 2)) goto lab4; 359 | z->lb = z->c; z->c = z->l; 360 | 361 | { int ret = r_remove_suffix(z); 362 | if (ret == 0) goto lab4; 363 | if (ret < 0) return ret; 364 | } 365 | z->c = z->lb; 366 | z->c = c_test8; 367 | } 368 | if (!(z->I[1] > 2)) goto lab4; 369 | { int ret = r_remove_second_order_prefix(z); 370 | if (ret == 0) goto lab4; 371 | if (ret < 0) return ret; 372 | } 373 | lab4: 374 | z->c = c7; 375 | } 376 | z->c = c_test6; 377 | } 378 | goto lab2; 379 | lab3: 380 | z->c = c5; 381 | { int c9 = z->c; 382 | { int ret = r_remove_second_order_prefix(z); 383 | if (ret < 0) return ret; 384 | } 385 | z->c = c9; 386 | } 387 | { int c10 = z->c; 388 | if (!(z->I[1] > 2)) goto lab5; 389 | z->lb = z->c; z->c = z->l; 390 | 391 | { int ret = r_remove_suffix(z); 392 | if (ret == 0) goto lab5; 393 | if (ret < 0) return ret; 394 | } 395 | z->c = z->lb; 396 | lab5: 397 | z->c = c10; 398 | } 399 | } 400 | lab2: 401 | return 1; 402 | } 403 | 404 | extern struct SN_env * indonesian_UTF_8_create_env(void) { return SN_create_env(0, 2); } 405 | 406 | extern void indonesian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 407 | 408 | -------------------------------------------------------------------------------- /stem_UTF_8_indonesian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * indonesian_UTF_8_create_env(void); 8 | extern void indonesian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int indonesian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_irish.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int irish_UTF_8_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_verb_sfx(struct SN_env * z); 13 | static int r_deriv(struct SN_env * z); 14 | static int r_noun_sfx(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | static int r_initial_morph(struct SN_env * z); 17 | static int r_RV(struct SN_env * z); 18 | static int r_R2(struct SN_env * z); 19 | static int r_R1(struct SN_env * z); 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | 24 | 25 | extern struct SN_env * irish_UTF_8_create_env(void); 26 | extern void irish_UTF_8_close_env(struct SN_env * z); 27 | 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | static const symbol s_0_0[2] = { 'b', '\'' }; 33 | static const symbol s_0_1[2] = { 'b', 'h' }; 34 | static const symbol s_0_2[3] = { 'b', 'h', 'f' }; 35 | static const symbol s_0_3[2] = { 'b', 'p' }; 36 | static const symbol s_0_4[2] = { 'c', 'h' }; 37 | static const symbol s_0_5[2] = { 'd', '\'' }; 38 | static const symbol s_0_6[4] = { 'd', '\'', 'f', 'h' }; 39 | static const symbol s_0_7[2] = { 'd', 'h' }; 40 | static const symbol s_0_8[2] = { 'd', 't' }; 41 | static const symbol s_0_9[2] = { 'f', 'h' }; 42 | static const symbol s_0_10[2] = { 'g', 'c' }; 43 | static const symbol s_0_11[2] = { 'g', 'h' }; 44 | static const symbol s_0_12[2] = { 'h', '-' }; 45 | static const symbol s_0_13[2] = { 'm', '\'' }; 46 | static const symbol s_0_14[2] = { 'm', 'b' }; 47 | static const symbol s_0_15[2] = { 'm', 'h' }; 48 | static const symbol s_0_16[2] = { 'n', '-' }; 49 | static const symbol s_0_17[2] = { 'n', 'd' }; 50 | static const symbol s_0_18[2] = { 'n', 'g' }; 51 | static const symbol s_0_19[2] = { 'p', 'h' }; 52 | static const symbol s_0_20[2] = { 's', 'h' }; 53 | static const symbol s_0_21[2] = { 't', '-' }; 54 | static const symbol s_0_22[2] = { 't', 'h' }; 55 | static const symbol s_0_23[2] = { 't', 's' }; 56 | 57 | static const struct among a_0[24] = 58 | { 59 | { 2, s_0_0, -1, 1, 0}, 60 | { 2, s_0_1, -1, 4, 0}, 61 | { 3, s_0_2, 1, 2, 0}, 62 | { 2, s_0_3, -1, 8, 0}, 63 | { 2, s_0_4, -1, 5, 0}, 64 | { 2, s_0_5, -1, 1, 0}, 65 | { 4, s_0_6, 5, 2, 0}, 66 | { 2, s_0_7, -1, 6, 0}, 67 | { 2, s_0_8, -1, 9, 0}, 68 | { 2, s_0_9, -1, 2, 0}, 69 | { 2, s_0_10, -1, 5, 0}, 70 | { 2, s_0_11, -1, 7, 0}, 71 | { 2, s_0_12, -1, 1, 0}, 72 | { 2, s_0_13, -1, 1, 0}, 73 | { 2, s_0_14, -1, 4, 0}, 74 | { 2, s_0_15, -1, 10, 0}, 75 | { 2, s_0_16, -1, 1, 0}, 76 | { 2, s_0_17, -1, 6, 0}, 77 | { 2, s_0_18, -1, 7, 0}, 78 | { 2, s_0_19, -1, 8, 0}, 79 | { 2, s_0_20, -1, 3, 0}, 80 | { 2, s_0_21, -1, 1, 0}, 81 | { 2, s_0_22, -1, 9, 0}, 82 | { 2, s_0_23, -1, 3, 0} 83 | }; 84 | 85 | static const symbol s_1_0[7] = { 0xC3, 0xAD, 'o', 'c', 'h', 't', 'a' }; 86 | static const symbol s_1_1[8] = { 'a', 0xC3, 0xAD, 'o', 'c', 'h', 't', 'a' }; 87 | static const symbol s_1_2[3] = { 'i', 'r', 'e' }; 88 | static const symbol s_1_3[4] = { 'a', 'i', 'r', 'e' }; 89 | static const symbol s_1_4[3] = { 'a', 'b', 'h' }; 90 | static const symbol s_1_5[4] = { 'e', 'a', 'b', 'h' }; 91 | static const symbol s_1_6[3] = { 'i', 'b', 'h' }; 92 | static const symbol s_1_7[4] = { 'a', 'i', 'b', 'h' }; 93 | static const symbol s_1_8[3] = { 'a', 'm', 'h' }; 94 | static const symbol s_1_9[4] = { 'e', 'a', 'm', 'h' }; 95 | static const symbol s_1_10[3] = { 'i', 'm', 'h' }; 96 | static const symbol s_1_11[4] = { 'a', 'i', 'm', 'h' }; 97 | static const symbol s_1_12[6] = { 0xC3, 0xAD, 'o', 'c', 'h', 't' }; 98 | static const symbol s_1_13[7] = { 'a', 0xC3, 0xAD, 'o', 'c', 'h', 't' }; 99 | static const symbol s_1_14[4] = { 'i', 'r', 0xC3, 0xAD }; 100 | static const symbol s_1_15[5] = { 'a', 'i', 'r', 0xC3, 0xAD }; 101 | 102 | static const struct among a_1[16] = 103 | { 104 | { 7, s_1_0, -1, 1, 0}, 105 | { 8, s_1_1, 0, 1, 0}, 106 | { 3, s_1_2, -1, 2, 0}, 107 | { 4, s_1_3, 2, 2, 0}, 108 | { 3, s_1_4, -1, 1, 0}, 109 | { 4, s_1_5, 4, 1, 0}, 110 | { 3, s_1_6, -1, 1, 0}, 111 | { 4, s_1_7, 6, 1, 0}, 112 | { 3, s_1_8, -1, 1, 0}, 113 | { 4, s_1_9, 8, 1, 0}, 114 | { 3, s_1_10, -1, 1, 0}, 115 | { 4, s_1_11, 10, 1, 0}, 116 | { 6, s_1_12, -1, 1, 0}, 117 | { 7, s_1_13, 12, 1, 0}, 118 | { 4, s_1_14, -1, 2, 0}, 119 | { 5, s_1_15, 14, 2, 0} 120 | }; 121 | 122 | static const symbol s_2_0[9] = { 0xC3, 0xB3, 'i', 'd', 'e', 'a', 'c', 'h', 'a' }; 123 | static const symbol s_2_1[7] = { 'p', 'a', 't', 'a', 'c', 'h', 'a' }; 124 | static const symbol s_2_2[5] = { 'a', 'c', 'h', 't', 'a' }; 125 | static const symbol s_2_3[8] = { 'a', 'r', 'c', 'a', 'c', 'h', 't', 'a' }; 126 | static const symbol s_2_4[6] = { 'e', 'a', 'c', 'h', 't', 'a' }; 127 | static const symbol s_2_5[12] = { 'g', 'r', 'a', 'f', 'a', 0xC3, 0xAD, 'o', 'c', 'h', 't', 'a' }; 128 | static const symbol s_2_6[5] = { 'p', 'a', 'i', 't', 'e' }; 129 | static const symbol s_2_7[3] = { 'a', 'c', 'h' }; 130 | static const symbol s_2_8[4] = { 'e', 'a', 'c', 'h' }; 131 | static const symbol s_2_9[8] = { 0xC3, 0xB3, 'i', 'd', 'e', 'a', 'c', 'h' }; 132 | static const symbol s_2_10[7] = { 'g', 'i', 'n', 'e', 'a', 'c', 'h' }; 133 | static const symbol s_2_11[6] = { 'p', 'a', 't', 'a', 'c', 'h' }; 134 | static const symbol s_2_12[10] = { 'g', 'r', 'a', 'f', 'a', 0xC3, 0xAD, 'o', 'c', 'h' }; 135 | static const symbol s_2_13[7] = { 'p', 'a', 't', 'a', 'i', 'g', 'h' }; 136 | static const symbol s_2_14[7] = { 0xC3, 0xB3, 'i', 'd', 'i', 'g', 'h' }; 137 | static const symbol s_2_15[8] = { 'a', 'c', 'h', 't', 0xC3, 0xBA, 'i', 'l' }; 138 | static const symbol s_2_16[9] = { 'e', 'a', 'c', 'h', 't', 0xC3, 0xBA, 'i', 'l' }; 139 | static const symbol s_2_17[6] = { 'g', 'i', 'n', 'e', 'a', 's' }; 140 | static const symbol s_2_18[5] = { 'g', 'i', 'n', 'i', 's' }; 141 | static const symbol s_2_19[4] = { 'a', 'c', 'h', 't' }; 142 | static const symbol s_2_20[7] = { 'a', 'r', 'c', 'a', 'c', 'h', 't' }; 143 | static const symbol s_2_21[5] = { 'e', 'a', 'c', 'h', 't' }; 144 | static const symbol s_2_22[11] = { 'g', 'r', 'a', 'f', 'a', 0xC3, 0xAD, 'o', 'c', 'h', 't' }; 145 | static const symbol s_2_23[10] = { 'a', 'r', 'c', 'a', 'c', 'h', 't', 'a', 0xC3, 0xAD }; 146 | static const symbol s_2_24[14] = { 'g', 'r', 'a', 'f', 'a', 0xC3, 0xAD, 'o', 'c', 'h', 't', 'a', 0xC3, 0xAD }; 147 | 148 | static const struct among a_2[25] = 149 | { 150 | { 9, s_2_0, -1, 6, 0}, 151 | { 7, s_2_1, -1, 5, 0}, 152 | { 5, s_2_2, -1, 1, 0}, 153 | { 8, s_2_3, 2, 2, 0}, 154 | { 6, s_2_4, 2, 1, 0}, 155 | { 12, s_2_5, -1, 4, 0}, 156 | { 5, s_2_6, -1, 5, 0}, 157 | { 3, s_2_7, -1, 1, 0}, 158 | { 4, s_2_8, 7, 1, 0}, 159 | { 8, s_2_9, 8, 6, 0}, 160 | { 7, s_2_10, 8, 3, 0}, 161 | { 6, s_2_11, 7, 5, 0}, 162 | { 10, s_2_12, -1, 4, 0}, 163 | { 7, s_2_13, -1, 5, 0}, 164 | { 7, s_2_14, -1, 6, 0}, 165 | { 8, s_2_15, -1, 1, 0}, 166 | { 9, s_2_16, 15, 1, 0}, 167 | { 6, s_2_17, -1, 3, 0}, 168 | { 5, s_2_18, -1, 3, 0}, 169 | { 4, s_2_19, -1, 1, 0}, 170 | { 7, s_2_20, 19, 2, 0}, 171 | { 5, s_2_21, 19, 1, 0}, 172 | { 11, s_2_22, -1, 4, 0}, 173 | { 10, s_2_23, -1, 2, 0}, 174 | { 14, s_2_24, -1, 4, 0} 175 | }; 176 | 177 | static const symbol s_3_0[4] = { 'i', 'm', 'i', 'd' }; 178 | static const symbol s_3_1[5] = { 'a', 'i', 'm', 'i', 'd' }; 179 | static const symbol s_3_2[5] = { 0xC3, 0xAD, 'm', 'i', 'd' }; 180 | static const symbol s_3_3[6] = { 'a', 0xC3, 0xAD, 'm', 'i', 'd' }; 181 | static const symbol s_3_4[3] = { 'a', 'd', 'h' }; 182 | static const symbol s_3_5[4] = { 'e', 'a', 'd', 'h' }; 183 | static const symbol s_3_6[5] = { 'f', 'a', 'i', 'd', 'h' }; 184 | static const symbol s_3_7[4] = { 'f', 'i', 'd', 'h' }; 185 | static const symbol s_3_8[4] = { 0xC3, 0xA1, 'i', 'l' }; 186 | static const symbol s_3_9[3] = { 'a', 'i', 'n' }; 187 | static const symbol s_3_10[4] = { 't', 'e', 'a', 'r' }; 188 | static const symbol s_3_11[3] = { 't', 'a', 'r' }; 189 | 190 | static const struct among a_3[12] = 191 | { 192 | { 4, s_3_0, -1, 1, 0}, 193 | { 5, s_3_1, 0, 1, 0}, 194 | { 5, s_3_2, -1, 1, 0}, 195 | { 6, s_3_3, 2, 1, 0}, 196 | { 3, s_3_4, -1, 2, 0}, 197 | { 4, s_3_5, 4, 2, 0}, 198 | { 5, s_3_6, -1, 1, 0}, 199 | { 4, s_3_7, -1, 1, 0}, 200 | { 4, s_3_8, -1, 2, 0}, 201 | { 3, s_3_9, -1, 2, 0}, 202 | { 4, s_3_10, -1, 2, 0}, 203 | { 3, s_3_11, -1, 2, 0} 204 | }; 205 | 206 | static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 2 }; 207 | 208 | static const symbol s_0[] = { 'f' }; 209 | static const symbol s_1[] = { 's' }; 210 | static const symbol s_2[] = { 'b' }; 211 | static const symbol s_3[] = { 'c' }; 212 | static const symbol s_4[] = { 'd' }; 213 | static const symbol s_5[] = { 'g' }; 214 | static const symbol s_6[] = { 'p' }; 215 | static const symbol s_7[] = { 't' }; 216 | static const symbol s_8[] = { 'm' }; 217 | static const symbol s_9[] = { 'a', 'r', 'c' }; 218 | static const symbol s_10[] = { 'g', 'i', 'n' }; 219 | static const symbol s_11[] = { 'g', 'r', 'a', 'f' }; 220 | static const symbol s_12[] = { 'p', 'a', 'i', 't', 'e' }; 221 | static const symbol s_13[] = { 0xC3, 0xB3, 'i', 'd' }; 222 | 223 | static int r_mark_regions(struct SN_env * z) { 224 | z->I[2] = z->l; 225 | z->I[1] = z->l; 226 | z->I[0] = z->l; 227 | { int c1 = z->c; 228 | { 229 | int ret = out_grouping_U(z, g_v, 97, 250, 1); 230 | if (ret < 0) goto lab0; 231 | z->c += ret; 232 | } 233 | z->I[2] = z->c; 234 | { 235 | int ret = in_grouping_U(z, g_v, 97, 250, 1); 236 | if (ret < 0) goto lab0; 237 | z->c += ret; 238 | } 239 | z->I[1] = z->c; 240 | { 241 | int ret = out_grouping_U(z, g_v, 97, 250, 1); 242 | if (ret < 0) goto lab0; 243 | z->c += ret; 244 | } 245 | { 246 | int ret = in_grouping_U(z, g_v, 97, 250, 1); 247 | if (ret < 0) goto lab0; 248 | z->c += ret; 249 | } 250 | z->I[0] = z->c; 251 | lab0: 252 | z->c = c1; 253 | } 254 | return 1; 255 | } 256 | 257 | static int r_initial_morph(struct SN_env * z) { 258 | int among_var; 259 | z->bra = z->c; 260 | among_var = find_among(z, a_0, 24); 261 | if (!(among_var)) return 0; 262 | z->ket = z->c; 263 | switch (among_var) { 264 | case 1: 265 | { int ret = slice_del(z); 266 | if (ret < 0) return ret; 267 | } 268 | break; 269 | case 2: 270 | { int ret = slice_from_s(z, 1, s_0); 271 | if (ret < 0) return ret; 272 | } 273 | break; 274 | case 3: 275 | { int ret = slice_from_s(z, 1, s_1); 276 | if (ret < 0) return ret; 277 | } 278 | break; 279 | case 4: 280 | { int ret = slice_from_s(z, 1, s_2); 281 | if (ret < 0) return ret; 282 | } 283 | break; 284 | case 5: 285 | { int ret = slice_from_s(z, 1, s_3); 286 | if (ret < 0) return ret; 287 | } 288 | break; 289 | case 6: 290 | { int ret = slice_from_s(z, 1, s_4); 291 | if (ret < 0) return ret; 292 | } 293 | break; 294 | case 7: 295 | { int ret = slice_from_s(z, 1, s_5); 296 | if (ret < 0) return ret; 297 | } 298 | break; 299 | case 8: 300 | { int ret = slice_from_s(z, 1, s_6); 301 | if (ret < 0) return ret; 302 | } 303 | break; 304 | case 9: 305 | { int ret = slice_from_s(z, 1, s_7); 306 | if (ret < 0) return ret; 307 | } 308 | break; 309 | case 10: 310 | { int ret = slice_from_s(z, 1, s_8); 311 | if (ret < 0) return ret; 312 | } 313 | break; 314 | } 315 | return 1; 316 | } 317 | 318 | static int r_RV(struct SN_env * z) { 319 | if (!(z->I[2] <= z->c)) return 0; 320 | return 1; 321 | } 322 | 323 | static int r_R1(struct SN_env * z) { 324 | if (!(z->I[1] <= z->c)) return 0; 325 | return 1; 326 | } 327 | 328 | static int r_R2(struct SN_env * z) { 329 | if (!(z->I[0] <= z->c)) return 0; 330 | return 1; 331 | } 332 | 333 | static int r_noun_sfx(struct SN_env * z) { 334 | int among_var; 335 | z->ket = z->c; 336 | among_var = find_among_b(z, a_1, 16); 337 | if (!(among_var)) return 0; 338 | z->bra = z->c; 339 | switch (among_var) { 340 | case 1: 341 | { int ret = r_R1(z); 342 | if (ret <= 0) return ret; 343 | } 344 | { int ret = slice_del(z); 345 | if (ret < 0) return ret; 346 | } 347 | break; 348 | case 2: 349 | { int ret = r_R2(z); 350 | if (ret <= 0) return ret; 351 | } 352 | { int ret = slice_del(z); 353 | if (ret < 0) return ret; 354 | } 355 | break; 356 | } 357 | return 1; 358 | } 359 | 360 | static int r_deriv(struct SN_env * z) { 361 | int among_var; 362 | z->ket = z->c; 363 | among_var = find_among_b(z, a_2, 25); 364 | if (!(among_var)) return 0; 365 | z->bra = z->c; 366 | switch (among_var) { 367 | case 1: 368 | { int ret = r_R2(z); 369 | if (ret <= 0) return ret; 370 | } 371 | { int ret = slice_del(z); 372 | if (ret < 0) return ret; 373 | } 374 | break; 375 | case 2: 376 | { int ret = slice_from_s(z, 3, s_9); 377 | if (ret < 0) return ret; 378 | } 379 | break; 380 | case 3: 381 | { int ret = slice_from_s(z, 3, s_10); 382 | if (ret < 0) return ret; 383 | } 384 | break; 385 | case 4: 386 | { int ret = slice_from_s(z, 4, s_11); 387 | if (ret < 0) return ret; 388 | } 389 | break; 390 | case 5: 391 | { int ret = slice_from_s(z, 5, s_12); 392 | if (ret < 0) return ret; 393 | } 394 | break; 395 | case 6: 396 | { int ret = slice_from_s(z, 4, s_13); 397 | if (ret < 0) return ret; 398 | } 399 | break; 400 | } 401 | return 1; 402 | } 403 | 404 | static int r_verb_sfx(struct SN_env * z) { 405 | int among_var; 406 | z->ket = z->c; 407 | if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((282896 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; 408 | among_var = find_among_b(z, a_3, 12); 409 | if (!(among_var)) return 0; 410 | z->bra = z->c; 411 | switch (among_var) { 412 | case 1: 413 | { int ret = r_RV(z); 414 | if (ret <= 0) return ret; 415 | } 416 | { int ret = slice_del(z); 417 | if (ret < 0) return ret; 418 | } 419 | break; 420 | case 2: 421 | { int ret = r_R1(z); 422 | if (ret <= 0) return ret; 423 | } 424 | { int ret = slice_del(z); 425 | if (ret < 0) return ret; 426 | } 427 | break; 428 | } 429 | return 1; 430 | } 431 | 432 | extern int irish_UTF_8_stem(struct SN_env * z) { 433 | { int c1 = z->c; 434 | { int ret = r_initial_morph(z); 435 | if (ret < 0) return ret; 436 | } 437 | z->c = c1; 438 | } 439 | 440 | { int ret = r_mark_regions(z); 441 | if (ret < 0) return ret; 442 | } 443 | z->lb = z->c; z->c = z->l; 444 | 445 | { int m2 = z->l - z->c; (void)m2; 446 | { int ret = r_noun_sfx(z); 447 | if (ret < 0) return ret; 448 | } 449 | z->c = z->l - m2; 450 | } 451 | { int m3 = z->l - z->c; (void)m3; 452 | { int ret = r_deriv(z); 453 | if (ret < 0) return ret; 454 | } 455 | z->c = z->l - m3; 456 | } 457 | { int m4 = z->l - z->c; (void)m4; 458 | { int ret = r_verb_sfx(z); 459 | if (ret < 0) return ret; 460 | } 461 | z->c = z->l - m4; 462 | } 463 | z->c = z->lb; 464 | return 1; 465 | } 466 | 467 | extern struct SN_env * irish_UTF_8_create_env(void) { return SN_create_env(0, 3); } 468 | 469 | extern void irish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 470 | 471 | -------------------------------------------------------------------------------- /stem_UTF_8_irish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * irish_UTF_8_create_env(void); 8 | extern void irish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int irish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_italian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * italian_UTF_8_create_env(void); 8 | extern void italian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int italian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_lithuanian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * lithuanian_UTF_8_create_env(void); 8 | extern void lithuanian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int lithuanian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_nepali.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * nepali_UTF_8_create_env(void); 8 | extern void nepali_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int nepali_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_norwegian.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int norwegian_UTF_8_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_other_suffix(struct SN_env * z); 13 | static int r_consonant_pair(struct SN_env * z); 14 | static int r_main_suffix(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | 21 | extern struct SN_env * norwegian_UTF_8_create_env(void); 22 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 23 | 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | static const symbol s_0_0[1] = { 'a' }; 29 | static const symbol s_0_1[1] = { 'e' }; 30 | static const symbol s_0_2[3] = { 'e', 'd', 'e' }; 31 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; 32 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; 33 | static const symbol s_0_5[3] = { 'a', 'n', 'e' }; 34 | static const symbol s_0_6[3] = { 'e', 'n', 'e' }; 35 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; 36 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; 37 | static const symbol s_0_9[2] = { 'e', 'n' }; 38 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; 39 | static const symbol s_0_11[2] = { 'a', 'r' }; 40 | static const symbol s_0_12[2] = { 'e', 'r' }; 41 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; 42 | static const symbol s_0_14[1] = { 's' }; 43 | static const symbol s_0_15[2] = { 'a', 's' }; 44 | static const symbol s_0_16[2] = { 'e', 's' }; 45 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; 46 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; 47 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; 48 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; 49 | static const symbol s_0_21[3] = { 'e', 'n', 's' }; 50 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 51 | static const symbol s_0_23[3] = { 'e', 'r', 's' }; 52 | static const symbol s_0_24[3] = { 'e', 't', 's' }; 53 | static const symbol s_0_25[2] = { 'e', 't' }; 54 | static const symbol s_0_26[3] = { 'h', 'e', 't' }; 55 | static const symbol s_0_27[3] = { 'e', 'r', 't' }; 56 | static const symbol s_0_28[3] = { 'a', 's', 't' }; 57 | 58 | static const struct among a_0[29] = 59 | { 60 | { 1, s_0_0, -1, 1, 0}, 61 | { 1, s_0_1, -1, 1, 0}, 62 | { 3, s_0_2, 1, 1, 0}, 63 | { 4, s_0_3, 1, 1, 0}, 64 | { 4, s_0_4, 1, 1, 0}, 65 | { 3, s_0_5, 1, 1, 0}, 66 | { 3, s_0_6, 1, 1, 0}, 67 | { 6, s_0_7, 6, 1, 0}, 68 | { 4, s_0_8, 1, 3, 0}, 69 | { 2, s_0_9, -1, 1, 0}, 70 | { 5, s_0_10, 9, 1, 0}, 71 | { 2, s_0_11, -1, 1, 0}, 72 | { 2, s_0_12, -1, 1, 0}, 73 | { 5, s_0_13, 12, 1, 0}, 74 | { 1, s_0_14, -1, 2, 0}, 75 | { 2, s_0_15, 14, 1, 0}, 76 | { 2, s_0_16, 14, 1, 0}, 77 | { 4, s_0_17, 16, 1, 0}, 78 | { 5, s_0_18, 16, 1, 0}, 79 | { 4, s_0_19, 16, 1, 0}, 80 | { 7, s_0_20, 19, 1, 0}, 81 | { 3, s_0_21, 14, 1, 0}, 82 | { 6, s_0_22, 21, 1, 0}, 83 | { 3, s_0_23, 14, 1, 0}, 84 | { 3, s_0_24, 14, 1, 0}, 85 | { 2, s_0_25, -1, 1, 0}, 86 | { 3, s_0_26, 25, 1, 0}, 87 | { 3, s_0_27, -1, 3, 0}, 88 | { 3, s_0_28, -1, 1, 0} 89 | }; 90 | 91 | static const symbol s_1_0[2] = { 'd', 't' }; 92 | static const symbol s_1_1[2] = { 'v', 't' }; 93 | 94 | static const struct among a_1[2] = 95 | { 96 | { 2, s_1_0, -1, -1, 0}, 97 | { 2, s_1_1, -1, -1, 0} 98 | }; 99 | 100 | static const symbol s_2_0[3] = { 'l', 'e', 'g' }; 101 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; 102 | static const symbol s_2_2[2] = { 'i', 'g' }; 103 | static const symbol s_2_3[3] = { 'e', 'i', 'g' }; 104 | static const symbol s_2_4[3] = { 'l', 'i', 'g' }; 105 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; 106 | static const symbol s_2_6[3] = { 'e', 'l', 's' }; 107 | static const symbol s_2_7[3] = { 'l', 'o', 'v' }; 108 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; 109 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; 110 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; 111 | 112 | static const struct among a_2[11] = 113 | { 114 | { 3, s_2_0, -1, 1, 0}, 115 | { 4, s_2_1, 0, 1, 0}, 116 | { 2, s_2_2, -1, 1, 0}, 117 | { 3, s_2_3, 2, 1, 0}, 118 | { 3, s_2_4, 2, 1, 0}, 119 | { 4, s_2_5, 4, 1, 0}, 120 | { 3, s_2_6, -1, 1, 0}, 121 | { 3, s_2_7, -1, 1, 0}, 122 | { 4, s_2_8, 7, 1, 0}, 123 | { 4, s_2_9, 7, 1, 0}, 124 | { 7, s_2_10, 9, 1, 0} 125 | }; 126 | 127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 128 | 129 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; 130 | 131 | static const symbol s_0[] = { 'e', 'r' }; 132 | 133 | static int r_mark_regions(struct SN_env * z) { 134 | z->I[1] = z->l; 135 | { int c_test1 = z->c; 136 | { int ret = skip_utf8(z->p, z->c, z->l, 3); 137 | if (ret < 0) return 0; 138 | z->c = ret; 139 | } 140 | z->I[0] = z->c; 141 | z->c = c_test1; 142 | } 143 | if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; 144 | { 145 | int ret = in_grouping_U(z, g_v, 97, 248, 1); 146 | if (ret < 0) return 0; 147 | z->c += ret; 148 | } 149 | z->I[1] = z->c; 150 | 151 | if (!(z->I[1] < z->I[0])) goto lab0; 152 | z->I[1] = z->I[0]; 153 | lab0: 154 | return 1; 155 | } 156 | 157 | static int r_main_suffix(struct SN_env * z) { 158 | int among_var; 159 | 160 | { int mlimit1; 161 | if (z->c < z->I[1]) return 0; 162 | mlimit1 = z->lb; z->lb = z->I[1]; 163 | z->ket = z->c; 164 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 165 | among_var = find_among_b(z, a_0, 29); 166 | if (!(among_var)) { z->lb = mlimit1; return 0; } 167 | z->bra = z->c; 168 | z->lb = mlimit1; 169 | } 170 | switch (among_var) { 171 | case 1: 172 | { int ret = slice_del(z); 173 | if (ret < 0) return ret; 174 | } 175 | break; 176 | case 2: 177 | { int m2 = z->l - z->c; (void)m2; 178 | if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1; 179 | goto lab0; 180 | lab1: 181 | z->c = z->l - m2; 182 | if (z->c <= z->lb || z->p[z->c - 1] != 'k') return 0; 183 | z->c--; 184 | if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0; 185 | } 186 | lab0: 187 | { int ret = slice_del(z); 188 | if (ret < 0) return ret; 189 | } 190 | break; 191 | case 3: 192 | { int ret = slice_from_s(z, 2, s_0); 193 | if (ret < 0) return ret; 194 | } 195 | break; 196 | } 197 | return 1; 198 | } 199 | 200 | static int r_consonant_pair(struct SN_env * z) { 201 | { int m_test1 = z->l - z->c; 202 | 203 | { int mlimit2; 204 | if (z->c < z->I[1]) return 0; 205 | mlimit2 = z->lb; z->lb = z->I[1]; 206 | z->ket = z->c; 207 | if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit2; return 0; } 208 | if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit2; return 0; } 209 | z->bra = z->c; 210 | z->lb = mlimit2; 211 | } 212 | z->c = z->l - m_test1; 213 | } 214 | { int ret = skip_b_utf8(z->p, z->c, z->lb, 1); 215 | if (ret < 0) return 0; 216 | z->c = ret; 217 | } 218 | z->bra = z->c; 219 | { int ret = slice_del(z); 220 | if (ret < 0) return ret; 221 | } 222 | return 1; 223 | } 224 | 225 | static int r_other_suffix(struct SN_env * z) { 226 | 227 | { int mlimit1; 228 | if (z->c < z->I[1]) return 0; 229 | mlimit1 = z->lb; z->lb = z->I[1]; 230 | z->ket = z->c; 231 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 232 | if (!(find_among_b(z, a_2, 11))) { z->lb = mlimit1; return 0; } 233 | z->bra = z->c; 234 | z->lb = mlimit1; 235 | } 236 | { int ret = slice_del(z); 237 | if (ret < 0) return ret; 238 | } 239 | return 1; 240 | } 241 | 242 | extern int norwegian_UTF_8_stem(struct SN_env * z) { 243 | { int c1 = z->c; 244 | { int ret = r_mark_regions(z); 245 | if (ret < 0) return ret; 246 | } 247 | z->c = c1; 248 | } 249 | z->lb = z->c; z->c = z->l; 250 | 251 | { int m2 = z->l - z->c; (void)m2; 252 | { int ret = r_main_suffix(z); 253 | if (ret < 0) return ret; 254 | } 255 | z->c = z->l - m2; 256 | } 257 | { int m3 = z->l - z->c; (void)m3; 258 | { int ret = r_consonant_pair(z); 259 | if (ret < 0) return ret; 260 | } 261 | z->c = z->l - m3; 262 | } 263 | { int m4 = z->l - z->c; (void)m4; 264 | { int ret = r_other_suffix(z); 265 | if (ret < 0) return ret; 266 | } 267 | z->c = z->l - m4; 268 | } 269 | z->c = z->lb; 270 | return 1; 271 | } 272 | 273 | extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2); } 274 | 275 | extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 276 | 277 | -------------------------------------------------------------------------------- /stem_UTF_8_norwegian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * norwegian_UTF_8_create_env(void); 8 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int norwegian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_porter.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * porter_UTF_8_create_env(void); 8 | extern void porter_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int porter_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_portuguese.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * portuguese_UTF_8_create_env(void); 8 | extern void portuguese_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int portuguese_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_romanian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * romanian_UTF_8_create_env(void); 8 | extern void romanian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int romanian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_russian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * russian_UTF_8_create_env(void); 8 | extern void russian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int russian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_serbian.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * serbian_UTF_8_create_env(void); 8 | extern void serbian_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int serbian_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_spanish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * spanish_UTF_8_create_env(void); 8 | extern void spanish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int spanish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_swedish.c: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #include "header.h" 4 | 5 | #ifdef __cplusplus 6 | extern "C" { 7 | #endif 8 | extern int swedish_UTF_8_stem(struct SN_env * z); 9 | #ifdef __cplusplus 10 | } 11 | #endif 12 | static int r_other_suffix(struct SN_env * z); 13 | static int r_consonant_pair(struct SN_env * z); 14 | static int r_main_suffix(struct SN_env * z); 15 | static int r_mark_regions(struct SN_env * z); 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | 21 | extern struct SN_env * swedish_UTF_8_create_env(void); 22 | extern void swedish_UTF_8_close_env(struct SN_env * z); 23 | 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | static const symbol s_0_0[1] = { 'a' }; 29 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; 30 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; 31 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; 32 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; 33 | static const symbol s_0_5[2] = { 'a', 'd' }; 34 | static const symbol s_0_6[1] = { 'e' }; 35 | static const symbol s_0_7[3] = { 'a', 'd', 'e' }; 36 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; 37 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; 38 | static const symbol s_0_10[3] = { 'a', 'r', 'e' }; 39 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; 40 | static const symbol s_0_12[2] = { 'e', 'n' }; 41 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; 42 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; 43 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; 44 | static const symbol s_0_16[3] = { 'e', 'r', 'n' }; 45 | static const symbol s_0_17[2] = { 'a', 'r' }; 46 | static const symbol s_0_18[2] = { 'e', 'r' }; 47 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; 48 | static const symbol s_0_20[2] = { 'o', 'r' }; 49 | static const symbol s_0_21[1] = { 's' }; 50 | static const symbol s_0_22[2] = { 'a', 's' }; 51 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; 52 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; 53 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; 54 | static const symbol s_0_26[2] = { 'e', 's' }; 55 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; 56 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; 57 | static const symbol s_0_29[3] = { 'e', 'n', 's' }; 58 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; 59 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 60 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; 61 | static const symbol s_0_33[2] = { 'a', 't' }; 62 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; 63 | static const symbol s_0_35[3] = { 'h', 'e', 't' }; 64 | static const symbol s_0_36[3] = { 'a', 's', 't' }; 65 | 66 | static const struct among a_0[37] = 67 | { 68 | { 1, s_0_0, -1, 1, 0}, 69 | { 4, s_0_1, 0, 1, 0}, 70 | { 4, s_0_2, 0, 1, 0}, 71 | { 7, s_0_3, 2, 1, 0}, 72 | { 4, s_0_4, 0, 1, 0}, 73 | { 2, s_0_5, -1, 1, 0}, 74 | { 1, s_0_6, -1, 1, 0}, 75 | { 3, s_0_7, 6, 1, 0}, 76 | { 4, s_0_8, 6, 1, 0}, 77 | { 4, s_0_9, 6, 1, 0}, 78 | { 3, s_0_10, 6, 1, 0}, 79 | { 4, s_0_11, 6, 1, 0}, 80 | { 2, s_0_12, -1, 1, 0}, 81 | { 5, s_0_13, 12, 1, 0}, 82 | { 4, s_0_14, 12, 1, 0}, 83 | { 5, s_0_15, 12, 1, 0}, 84 | { 3, s_0_16, -1, 1, 0}, 85 | { 2, s_0_17, -1, 1, 0}, 86 | { 2, s_0_18, -1, 1, 0}, 87 | { 5, s_0_19, 18, 1, 0}, 88 | { 2, s_0_20, -1, 1, 0}, 89 | { 1, s_0_21, -1, 2, 0}, 90 | { 2, s_0_22, 21, 1, 0}, 91 | { 5, s_0_23, 22, 1, 0}, 92 | { 5, s_0_24, 22, 1, 0}, 93 | { 5, s_0_25, 22, 1, 0}, 94 | { 2, s_0_26, 21, 1, 0}, 95 | { 4, s_0_27, 26, 1, 0}, 96 | { 5, s_0_28, 26, 1, 0}, 97 | { 3, s_0_29, 21, 1, 0}, 98 | { 5, s_0_30, 29, 1, 0}, 99 | { 6, s_0_31, 29, 1, 0}, 100 | { 4, s_0_32, 21, 1, 0}, 101 | { 2, s_0_33, -1, 1, 0}, 102 | { 5, s_0_34, -1, 1, 0}, 103 | { 3, s_0_35, -1, 1, 0}, 104 | { 3, s_0_36, -1, 1, 0} 105 | }; 106 | 107 | static const symbol s_1_0[2] = { 'd', 'd' }; 108 | static const symbol s_1_1[2] = { 'g', 'd' }; 109 | static const symbol s_1_2[2] = { 'n', 'n' }; 110 | static const symbol s_1_3[2] = { 'd', 't' }; 111 | static const symbol s_1_4[2] = { 'g', 't' }; 112 | static const symbol s_1_5[2] = { 'k', 't' }; 113 | static const symbol s_1_6[2] = { 't', 't' }; 114 | 115 | static const struct among a_1[7] = 116 | { 117 | { 2, s_1_0, -1, -1, 0}, 118 | { 2, s_1_1, -1, -1, 0}, 119 | { 2, s_1_2, -1, -1, 0}, 120 | { 2, s_1_3, -1, -1, 0}, 121 | { 2, s_1_4, -1, -1, 0}, 122 | { 2, s_1_5, -1, -1, 0}, 123 | { 2, s_1_6, -1, -1, 0} 124 | }; 125 | 126 | static const symbol s_2_0[2] = { 'i', 'g' }; 127 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 128 | static const symbol s_2_2[3] = { 'e', 'l', 's' }; 129 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; 130 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' }; 131 | 132 | static const struct among a_2[5] = 133 | { 134 | { 2, s_2_0, -1, 1, 0}, 135 | { 3, s_2_1, 0, 1, 0}, 136 | { 3, s_2_2, -1, 1, 0}, 137 | { 5, s_2_3, -1, 3, 0}, 138 | { 5, s_2_4, -1, 2, 0} 139 | }; 140 | 141 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; 142 | 143 | static const unsigned char g_s_ending[] = { 119, 127, 149 }; 144 | 145 | static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' }; 146 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; 147 | 148 | static int r_mark_regions(struct SN_env * z) { 149 | z->I[1] = z->l; 150 | { int c_test1 = z->c; 151 | { int ret = skip_utf8(z->p, z->c, z->l, 3); 152 | if (ret < 0) return 0; 153 | z->c = ret; 154 | } 155 | z->I[0] = z->c; 156 | z->c = c_test1; 157 | } 158 | if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; 159 | { 160 | int ret = in_grouping_U(z, g_v, 97, 246, 1); 161 | if (ret < 0) return 0; 162 | z->c += ret; 163 | } 164 | z->I[1] = z->c; 165 | 166 | if (!(z->I[1] < z->I[0])) goto lab0; 167 | z->I[1] = z->I[0]; 168 | lab0: 169 | return 1; 170 | } 171 | 172 | static int r_main_suffix(struct SN_env * z) { 173 | int among_var; 174 | 175 | { int mlimit1; 176 | if (z->c < z->I[1]) return 0; 177 | mlimit1 = z->lb; z->lb = z->I[1]; 178 | z->ket = z->c; 179 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 180 | among_var = find_among_b(z, a_0, 37); 181 | if (!(among_var)) { z->lb = mlimit1; return 0; } 182 | z->bra = z->c; 183 | z->lb = mlimit1; 184 | } 185 | switch (among_var) { 186 | case 1: 187 | { int ret = slice_del(z); 188 | if (ret < 0) return ret; 189 | } 190 | break; 191 | case 2: 192 | if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0; 193 | { int ret = slice_del(z); 194 | if (ret < 0) return ret; 195 | } 196 | break; 197 | } 198 | return 1; 199 | } 200 | 201 | static int r_consonant_pair(struct SN_env * z) { 202 | 203 | { int mlimit1; 204 | if (z->c < z->I[1]) return 0; 205 | mlimit1 = z->lb; z->lb = z->I[1]; 206 | { int m2 = z->l - z->c; (void)m2; 207 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 208 | if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit1; return 0; } 209 | z->c = z->l - m2; 210 | z->ket = z->c; 211 | { int ret = skip_b_utf8(z->p, z->c, z->lb, 1); 212 | if (ret < 0) { z->lb = mlimit1; return 0; } 213 | z->c = ret; 214 | } 215 | z->bra = z->c; 216 | { int ret = slice_del(z); 217 | if (ret < 0) return ret; 218 | } 219 | } 220 | z->lb = mlimit1; 221 | } 222 | return 1; 223 | } 224 | 225 | static int r_other_suffix(struct SN_env * z) { 226 | int among_var; 227 | 228 | { int mlimit1; 229 | if (z->c < z->I[1]) return 0; 230 | mlimit1 = z->lb; z->lb = z->I[1]; 231 | z->ket = z->c; 232 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit1; return 0; } 233 | among_var = find_among_b(z, a_2, 5); 234 | if (!(among_var)) { z->lb = mlimit1; return 0; } 235 | z->bra = z->c; 236 | switch (among_var) { 237 | case 1: 238 | { int ret = slice_del(z); 239 | if (ret < 0) return ret; 240 | } 241 | break; 242 | case 2: 243 | { int ret = slice_from_s(z, 4, s_0); 244 | if (ret < 0) return ret; 245 | } 246 | break; 247 | case 3: 248 | { int ret = slice_from_s(z, 4, s_1); 249 | if (ret < 0) return ret; 250 | } 251 | break; 252 | } 253 | z->lb = mlimit1; 254 | } 255 | return 1; 256 | } 257 | 258 | extern int swedish_UTF_8_stem(struct SN_env * z) { 259 | { int c1 = z->c; 260 | { int ret = r_mark_regions(z); 261 | if (ret < 0) return ret; 262 | } 263 | z->c = c1; 264 | } 265 | z->lb = z->c; z->c = z->l; 266 | 267 | { int m2 = z->l - z->c; (void)m2; 268 | { int ret = r_main_suffix(z); 269 | if (ret < 0) return ret; 270 | } 271 | z->c = z->l - m2; 272 | } 273 | { int m3 = z->l - z->c; (void)m3; 274 | { int ret = r_consonant_pair(z); 275 | if (ret < 0) return ret; 276 | } 277 | z->c = z->l - m3; 278 | } 279 | { int m4 = z->l - z->c; (void)m4; 280 | { int ret = r_other_suffix(z); 281 | if (ret < 0) return ret; 282 | } 283 | z->c = z->l - m4; 284 | } 285 | z->c = z->lb; 286 | return 1; 287 | } 288 | 289 | extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2); } 290 | 291 | extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 292 | 293 | -------------------------------------------------------------------------------- /stem_UTF_8_swedish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * swedish_UTF_8_create_env(void); 8 | extern void swedish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int swedish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_tamil.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * tamil_UTF_8_create_env(void); 8 | extern void tamil_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int tamil_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_turkish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * turkish_UTF_8_create_env(void); 8 | extern void turkish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int turkish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /stem_UTF_8_yiddish.h: -------------------------------------------------------------------------------- 1 | /* Generated by Snowball 2.2.0 - https://snowballstem.org/ */ 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | extern struct SN_env * yiddish_UTF_8_create_env(void); 8 | extern void yiddish_UTF_8_close_env(struct SN_env * z); 9 | 10 | extern int yiddish_UTF_8_stem(struct SN_env * z); 11 | 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /update-c.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Fetch snowball sources 3 | 4 | case $1 in 5 | -h | --help ) echo "usage: $(basename $0) VERSION"; exit;; 6 | esac 7 | 8 | if [ $# -ne 1 ]; then 9 | echo "error: wrong number of arguments" 1>&2 10 | fi 11 | 12 | set -e 13 | 14 | version=$1 15 | echo "upgrading to version ${version}" 16 | lib=libstemmer_c-${version} 17 | tarball=${lib}.tar.gz 18 | 19 | rm *.c *.h 20 | curl -LO https://snowballstem.org/dist/${tarball} 21 | tar xzf ${tarball} 22 | find ${lib} -name '*.[ch]' -exec cp -v {} . \; 23 | rm stemwords.c libstemmer_utf8.c # example and duplicate 24 | rm -rf ${lib} ${tarball} 25 | sed -i 's|include "../[a-z_]\+/|include "|' *.{c,h} 26 | echo ${version} > lib-version.txt 27 | -------------------------------------------------------------------------------- /utilities.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "header.h" 7 | 8 | #define CREATE_SIZE 1 9 | 10 | extern symbol * create_s(void) { 11 | symbol * p; 12 | void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); 13 | if (mem == NULL) return NULL; 14 | p = (symbol *) (HEAD + (char *) mem); 15 | CAPACITY(p) = CREATE_SIZE; 16 | SET_SIZE(p, 0); 17 | return p; 18 | } 19 | 20 | extern void lose_s(symbol * p) { 21 | if (p == NULL) return; 22 | free((char *) p - HEAD); 23 | } 24 | 25 | /* 26 | new_p = skip_utf8(p, c, l, n); skips n characters forwards from p + c. 27 | new_p is the new position, or -1 on failure. 28 | 29 | -- used to implement hop and next in the utf8 case. 30 | */ 31 | 32 | extern int skip_utf8(const symbol * p, int c, int limit, int n) { 33 | int b; 34 | if (n < 0) return -1; 35 | for (; n > 0; n--) { 36 | if (c >= limit) return -1; 37 | b = p[c++]; 38 | if (b >= 0xC0) { /* 1100 0000 */ 39 | while (c < limit) { 40 | b = p[c]; 41 | if (b >= 0xC0 || b < 0x80) break; 42 | /* break unless b is 10------ */ 43 | c++; 44 | } 45 | } 46 | } 47 | return c; 48 | } 49 | 50 | /* 51 | new_p = skip_b_utf8(p, c, lb, n); skips n characters backwards from p + c - 1 52 | new_p is the new position, or -1 on failure. 53 | 54 | -- used to implement hop and next in the utf8 case. 55 | */ 56 | 57 | extern int skip_b_utf8(const symbol * p, int c, int limit, int n) { 58 | int b; 59 | if (n < 0) return -1; 60 | for (; n > 0; n--) { 61 | if (c <= limit) return -1; 62 | b = p[--c]; 63 | if (b >= 0x80) { /* 1000 0000 */ 64 | while (c > limit) { 65 | b = p[c]; 66 | if (b >= 0xC0) break; /* 1100 0000 */ 67 | c--; 68 | } 69 | } 70 | } 71 | return c; 72 | } 73 | 74 | /* Code for character groupings: utf8 cases */ 75 | 76 | static int get_utf8(const symbol * p, int c, int l, int * slot) { 77 | int b0, b1, b2; 78 | if (c >= l) return 0; 79 | b0 = p[c++]; 80 | if (b0 < 0xC0 || c == l) { /* 1100 0000 */ 81 | *slot = b0; 82 | return 1; 83 | } 84 | b1 = p[c++] & 0x3F; 85 | if (b0 < 0xE0 || c == l) { /* 1110 0000 */ 86 | *slot = (b0 & 0x1F) << 6 | b1; 87 | return 2; 88 | } 89 | b2 = p[c++] & 0x3F; 90 | if (b0 < 0xF0 || c == l) { /* 1111 0000 */ 91 | *slot = (b0 & 0xF) << 12 | b1 << 6 | b2; 92 | return 3; 93 | } 94 | *slot = (b0 & 0x7) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F); 95 | return 4; 96 | } 97 | 98 | static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { 99 | int a, b; 100 | if (c <= lb) return 0; 101 | b = p[--c]; 102 | if (b < 0x80 || c == lb) { /* 1000 0000 */ 103 | *slot = b; 104 | return 1; 105 | } 106 | a = b & 0x3F; 107 | b = p[--c]; 108 | if (b >= 0xC0 || c == lb) { /* 1100 0000 */ 109 | *slot = (b & 0x1F) << 6 | a; 110 | return 2; 111 | } 112 | a |= (b & 0x3F) << 6; 113 | b = p[--c]; 114 | if (b >= 0xE0 || c == lb) { /* 1110 0000 */ 115 | *slot = (b & 0xF) << 12 | a; 116 | return 3; 117 | } 118 | *slot = (p[--c] & 0x7) << 18 | (b & 0x3F) << 12 | a; 119 | return 4; 120 | } 121 | 122 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 123 | do { 124 | int ch; 125 | int w = get_utf8(z->p, z->c, z->l, & ch); 126 | if (!w) return -1; 127 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 128 | return w; 129 | z->c += w; 130 | } while (repeat); 131 | return 0; 132 | } 133 | 134 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 135 | do { 136 | int ch; 137 | int w = get_b_utf8(z->p, z->c, z->lb, & ch); 138 | if (!w) return -1; 139 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 140 | return w; 141 | z->c -= w; 142 | } while (repeat); 143 | return 0; 144 | } 145 | 146 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 147 | do { 148 | int ch; 149 | int w = get_utf8(z->p, z->c, z->l, & ch); 150 | if (!w) return -1; 151 | if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 152 | return w; 153 | z->c += w; 154 | } while (repeat); 155 | return 0; 156 | } 157 | 158 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 159 | do { 160 | int ch; 161 | int w = get_b_utf8(z->p, z->c, z->lb, & ch); 162 | if (!w) return -1; 163 | if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 164 | return w; 165 | z->c -= w; 166 | } while (repeat); 167 | return 0; 168 | } 169 | 170 | /* Code for character groupings: non-utf8 cases */ 171 | 172 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 173 | do { 174 | int ch; 175 | if (z->c >= z->l) return -1; 176 | ch = z->p[z->c]; 177 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 178 | return 1; 179 | z->c++; 180 | } while (repeat); 181 | return 0; 182 | } 183 | 184 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 185 | do { 186 | int ch; 187 | if (z->c <= z->lb) return -1; 188 | ch = z->p[z->c - 1]; 189 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 190 | return 1; 191 | z->c--; 192 | } while (repeat); 193 | return 0; 194 | } 195 | 196 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 197 | do { 198 | int ch; 199 | if (z->c >= z->l) return -1; 200 | ch = z->p[z->c]; 201 | if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 202 | return 1; 203 | z->c++; 204 | } while (repeat); 205 | return 0; 206 | } 207 | 208 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 209 | do { 210 | int ch; 211 | if (z->c <= z->lb) return -1; 212 | ch = z->p[z->c - 1]; 213 | if (!(ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0)) 214 | return 1; 215 | z->c--; 216 | } while (repeat); 217 | return 0; 218 | } 219 | 220 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { 221 | if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; 222 | z->c += s_size; return 1; 223 | } 224 | 225 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { 226 | if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; 227 | z->c -= s_size; return 1; 228 | } 229 | 230 | extern int eq_v(struct SN_env * z, const symbol * p) { 231 | return eq_s(z, SIZE(p), p); 232 | } 233 | 234 | extern int eq_v_b(struct SN_env * z, const symbol * p) { 235 | return eq_s_b(z, SIZE(p), p); 236 | } 237 | 238 | extern int find_among(struct SN_env * z, const struct among * v, int v_size) { 239 | 240 | int i = 0; 241 | int j = v_size; 242 | 243 | int c = z->c; int l = z->l; 244 | const symbol * q = z->p + c; 245 | 246 | const struct among * w; 247 | 248 | int common_i = 0; 249 | int common_j = 0; 250 | 251 | int first_key_inspected = 0; 252 | 253 | while (1) { 254 | int k = i + ((j - i) >> 1); 255 | int diff = 0; 256 | int common = common_i < common_j ? common_i : common_j; /* smaller */ 257 | w = v + k; 258 | { 259 | int i2; for (i2 = common; i2 < w->s_size; i2++) { 260 | if (c + common == l) { diff = -1; break; } 261 | diff = q[common] - w->s[i2]; 262 | if (diff != 0) break; 263 | common++; 264 | } 265 | } 266 | if (diff < 0) { 267 | j = k; 268 | common_j = common; 269 | } else { 270 | i = k; 271 | common_i = common; 272 | } 273 | if (j - i <= 1) { 274 | if (i > 0) break; /* v->s has been inspected */ 275 | if (j == i) break; /* only one item in v */ 276 | 277 | /* - but now we need to go round once more to get 278 | v->s inspected. This looks messy, but is actually 279 | the optimal approach. */ 280 | 281 | if (first_key_inspected) break; 282 | first_key_inspected = 1; 283 | } 284 | } 285 | while (1) { 286 | w = v + i; 287 | if (common_i >= w->s_size) { 288 | z->c = c + w->s_size; 289 | if (w->function == 0) return w->result; 290 | { 291 | int res = w->function(z); 292 | z->c = c + w->s_size; 293 | if (res) return w->result; 294 | } 295 | } 296 | i = w->substring_i; 297 | if (i < 0) return 0; 298 | } 299 | } 300 | 301 | /* find_among_b is for backwards processing. Same comments apply */ 302 | 303 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { 304 | 305 | int i = 0; 306 | int j = v_size; 307 | 308 | int c = z->c; int lb = z->lb; 309 | const symbol * q = z->p + c - 1; 310 | 311 | const struct among * w; 312 | 313 | int common_i = 0; 314 | int common_j = 0; 315 | 316 | int first_key_inspected = 0; 317 | 318 | while (1) { 319 | int k = i + ((j - i) >> 1); 320 | int diff = 0; 321 | int common = common_i < common_j ? common_i : common_j; 322 | w = v + k; 323 | { 324 | int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { 325 | if (c - common == lb) { diff = -1; break; } 326 | diff = q[- common] - w->s[i2]; 327 | if (diff != 0) break; 328 | common++; 329 | } 330 | } 331 | if (diff < 0) { j = k; common_j = common; } 332 | else { i = k; common_i = common; } 333 | if (j - i <= 1) { 334 | if (i > 0) break; 335 | if (j == i) break; 336 | if (first_key_inspected) break; 337 | first_key_inspected = 1; 338 | } 339 | } 340 | while (1) { 341 | w = v + i; 342 | if (common_i >= w->s_size) { 343 | z->c = c - w->s_size; 344 | if (w->function == 0) return w->result; 345 | { 346 | int res = w->function(z); 347 | z->c = c - w->s_size; 348 | if (res) return w->result; 349 | } 350 | } 351 | i = w->substring_i; 352 | if (i < 0) return 0; 353 | } 354 | } 355 | 356 | 357 | /* Increase the size of the buffer pointed to by p to at least n symbols. 358 | * If insufficient memory, returns NULL and frees the old buffer. 359 | */ 360 | static symbol * increase_size(symbol * p, int n) { 361 | symbol * q; 362 | int new_size = n + 20; 363 | void * mem = realloc((char *) p - HEAD, 364 | HEAD + (new_size + 1) * sizeof(symbol)); 365 | if (mem == NULL) { 366 | lose_s(p); 367 | return NULL; 368 | } 369 | q = (symbol *) (HEAD + (char *)mem); 370 | CAPACITY(q) = new_size; 371 | return q; 372 | } 373 | 374 | /* to replace symbols between c_bra and c_ket in z->p by the 375 | s_size symbols at s. 376 | Returns 0 on success, -1 on error. 377 | Also, frees z->p (and sets it to NULL) on error. 378 | */ 379 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) 380 | { 381 | int adjustment; 382 | int len; 383 | if (z->p == NULL) { 384 | z->p = create_s(); 385 | if (z->p == NULL) return -1; 386 | } 387 | adjustment = s_size - (c_ket - c_bra); 388 | len = SIZE(z->p); 389 | if (adjustment != 0) { 390 | if (adjustment + len > CAPACITY(z->p)) { 391 | z->p = increase_size(z->p, adjustment + len); 392 | if (z->p == NULL) return -1; 393 | } 394 | memmove(z->p + c_ket + adjustment, 395 | z->p + c_ket, 396 | (len - c_ket) * sizeof(symbol)); 397 | SET_SIZE(z->p, adjustment + len); 398 | z->l += adjustment; 399 | if (z->c >= c_ket) 400 | z->c += adjustment; 401 | else if (z->c > c_bra) 402 | z->c = c_bra; 403 | } 404 | if (s_size) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); 405 | if (adjptr != NULL) 406 | *adjptr = adjustment; 407 | return 0; 408 | } 409 | 410 | static int slice_check(struct SN_env * z) { 411 | 412 | if (z->bra < 0 || 413 | z->bra > z->ket || 414 | z->ket > z->l || 415 | z->p == NULL || 416 | z->l > SIZE(z->p)) /* this line could be removed */ 417 | { 418 | #if 0 419 | fprintf(stderr, "faulty slice operation:\n"); 420 | debug(z, -1, 0); 421 | #endif 422 | return -1; 423 | } 424 | return 0; 425 | } 426 | 427 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { 428 | if (slice_check(z)) return -1; 429 | return replace_s(z, z->bra, z->ket, s_size, s, NULL); 430 | } 431 | 432 | extern int slice_from_v(struct SN_env * z, const symbol * p) { 433 | return slice_from_s(z, SIZE(p), p); 434 | } 435 | 436 | extern int slice_del(struct SN_env * z) { 437 | return slice_from_s(z, 0, 0); 438 | } 439 | 440 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { 441 | int adjustment; 442 | if (replace_s(z, bra, ket, s_size, s, &adjustment)) 443 | return -1; 444 | if (bra <= z->bra) z->bra += adjustment; 445 | if (bra <= z->ket) z->ket += adjustment; 446 | return 0; 447 | } 448 | 449 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { 450 | return insert_s(z, bra, ket, SIZE(p), p); 451 | } 452 | 453 | extern symbol * slice_to(struct SN_env * z, symbol * p) { 454 | if (slice_check(z)) { 455 | lose_s(p); 456 | return NULL; 457 | } 458 | { 459 | int len = z->ket - z->bra; 460 | if (CAPACITY(p) < len) { 461 | p = increase_size(p, len); 462 | if (p == NULL) 463 | return NULL; 464 | } 465 | memmove(p, z->p + z->bra, len * sizeof(symbol)); 466 | SET_SIZE(p, len); 467 | } 468 | return p; 469 | } 470 | 471 | extern symbol * assign_to(struct SN_env * z, symbol * p) { 472 | int len = z->l; 473 | if (CAPACITY(p) < len) { 474 | p = increase_size(p, len); 475 | if (p == NULL) 476 | return NULL; 477 | } 478 | memmove(p, z->p, len * sizeof(symbol)); 479 | SET_SIZE(p, len); 480 | return p; 481 | } 482 | 483 | extern int len_utf8(const symbol * p) { 484 | int size = SIZE(p); 485 | int len = 0; 486 | while (size--) { 487 | symbol b = *p++; 488 | if (b >= 0xC0 || b < 0x80) ++len; 489 | } 490 | return len; 491 | } 492 | 493 | #if 0 494 | extern void debug(struct SN_env * z, int number, int line_count) { 495 | int i; 496 | int limit = SIZE(z->p); 497 | /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ 498 | if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); 499 | for (i = 0; i <= limit; i++) { 500 | if (z->lb == i) printf("{"); 501 | if (z->bra == i) printf("["); 502 | if (z->c == i) printf("|"); 503 | if (z->ket == i) printf("]"); 504 | if (z->l == i) printf("}"); 505 | if (i < limit) 506 | { int ch = z->p[i]; 507 | if (ch == 0) ch = '#'; 508 | printf("%c", ch); 509 | } 510 | } 511 | printf("'\n"); 512 | } 513 | #endif 514 | --------------------------------------------------------------------------------