├── .gitignore ├── License ├── README.md ├── config.m4 ├── libstemmer_c ├── MANIFEST ├── Makefile ├── README ├── examples │ └── stemwords.c ├── include │ └── libstemmer.h ├── libstemmer │ ├── libstemmer.c │ ├── libstemmer_c.in │ ├── libstemmer_utf8.c │ ├── modules.h │ ├── modules.txt │ ├── modules_utf8.h │ └── modules_utf8.txt ├── mkinc.mak ├── mkinc_utf8.mak ├── runtime │ ├── api.c │ ├── api.h │ ├── header.h │ └── utilities.c └── src_c │ ├── stem_ISO_8859_1_danish.c │ ├── stem_ISO_8859_1_danish.h │ ├── stem_ISO_8859_1_dutch.c │ ├── stem_ISO_8859_1_dutch.h │ ├── stem_ISO_8859_1_english.c │ ├── stem_ISO_8859_1_english.h │ ├── stem_ISO_8859_1_finnish.c │ ├── stem_ISO_8859_1_finnish.h │ ├── stem_ISO_8859_1_french.c │ ├── stem_ISO_8859_1_french.h │ ├── stem_ISO_8859_1_german.c │ ├── stem_ISO_8859_1_german.h │ ├── stem_ISO_8859_1_hungarian.c │ ├── stem_ISO_8859_1_hungarian.h │ ├── stem_ISO_8859_1_italian.c │ ├── stem_ISO_8859_1_italian.h │ ├── stem_ISO_8859_1_norwegian.c │ ├── stem_ISO_8859_1_norwegian.h │ ├── stem_ISO_8859_1_porter.c │ ├── stem_ISO_8859_1_porter.h │ ├── stem_ISO_8859_1_portuguese.c │ ├── stem_ISO_8859_1_portuguese.h │ ├── stem_ISO_8859_1_spanish.c │ ├── stem_ISO_8859_1_spanish.h │ ├── stem_ISO_8859_1_swedish.c │ ├── stem_ISO_8859_1_swedish.h │ ├── stem_ISO_8859_2_romanian.c │ ├── stem_ISO_8859_2_romanian.h │ ├── stem_KOI8_R_russian.c │ ├── stem_KOI8_R_russian.h │ ├── stem_UTF_8_danish.c │ ├── stem_UTF_8_danish.h │ ├── stem_UTF_8_dutch.c │ ├── stem_UTF_8_dutch.h │ ├── stem_UTF_8_dutch_bad.c │ ├── stem_UTF_8_english.c │ ├── stem_UTF_8_english.h │ ├── stem_UTF_8_finnish.c │ ├── stem_UTF_8_finnish.h │ ├── stem_UTF_8_french.c │ ├── stem_UTF_8_french.h │ ├── stem_UTF_8_german.c │ ├── stem_UTF_8_german.h │ ├── stem_UTF_8_hungarian.c │ ├── stem_UTF_8_hungarian.h │ ├── stem_UTF_8_italian.c │ ├── stem_UTF_8_italian.h │ ├── stem_UTF_8_norwegian.c │ ├── stem_UTF_8_norwegian.h │ ├── stem_UTF_8_porter.c │ ├── stem_UTF_8_porter.h │ ├── stem_UTF_8_portuguese.c │ ├── stem_UTF_8_portuguese.h │ ├── stem_UTF_8_romanian.c │ ├── stem_UTF_8_romanian.h │ ├── stem_UTF_8_russian.c │ ├── stem_UTF_8_russian.h │ ├── stem_UTF_8_spanish.c │ ├── stem_UTF_8_spanish.h │ ├── stem_UTF_8_swedish.c │ ├── stem_UTF_8_swedish.h │ ├── stem_UTF_8_turkish.c │ └── stem_UTF_8_turkish.h ├── php_stemmer.h ├── stemmer.c └── tests └── stemword_function_001.phpt /.gitignore: -------------------------------------------------------------------------------- 1 | # useless files 2 | .project 3 | .cache 4 | .settings 5 | *.DS_Store 6 | *~ 7 | *.svn 8 | ._* 9 | *Thumbs.db 10 | \#Untitled-*# 11 | *.marks 12 | \#*# 13 | 14 | # generated files 15 | *.a 16 | *.o 17 | *.lo 18 | *.la 19 | .deps 20 | .libs 21 | build/ 22 | modules/ 23 | Makefile 24 | Makefile.global 25 | Makefile.objects 26 | Makefile.fragments 27 | acinclude.m4 28 | aclocal.m4 29 | autom4te.cache/output.0 30 | autom4te.cache/requests 31 | autom4te.cache/traces.0 32 | build/libtool.m4 33 | build/mkdep.awk 34 | build/shtool 35 | config.guess 36 | config.h 37 | config.h.in 38 | config.log 39 | config.status 40 | config.sub 41 | configure 42 | configure.in 43 | libstemmer_c/stemwords 44 | libtool 45 | ltmain.sh 46 | run-tests.php 47 | config.nice 48 | install-sh 49 | missing 50 | mkinstalldirs 51 | 52 | -------------------------------------------------------------------------------- /License: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008, Javeline B.V. 2 | All rights reserved. 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of the University of California, Berkeley nor the 12 | names of its contributors may be used to endorse or promote products 13 | derived from this software without specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY 16 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | php-stemmer 2 | =========== 3 | 4 | This stem extension for PHP provides stemming capability for a variety of 5 | languages using Dr. M.F. Porter's Snowball API. 6 | 7 | It has a much simpler API than the stem extension found in pecl. 8 | 9 | Usage Example 10 | -------------------- 11 | 12 | 16 | 17 | Install 18 | -------------------- 19 | 20 | The stemmer PHP extension can be installed following the instructions about 21 | building PHP extensions using phpize as described in the [PHP manual](http://www.php.net/manual/en/install.pecl.phpize.php). 22 | 23 | To build this extension, you need to have the PHP development tools installed. 24 | For ubuntu/debian you can use apt-get install php5-dev. 25 | 26 | The phpize command is used to prepare the build environment for a PHP extension. 27 | 28 | In the following sample, the sources for an extension are in a directory named stemmer-php: 29 | 30 | # git clone https://github.com/hthetiot/php-stemmer.git 31 | # cd php-stemmer 32 | # phpize 33 | # ./configure 34 | # make -C libstemmer_c 35 | # make 36 | # [sudo] make install 37 | 38 | Edit you php.ini file and add the line extension=stemmer.so 39 | 40 | About libstemmer_c 41 | -------------------- 42 | 43 | The stemmer PHP extension uses a modified version of libstemmer_c. 44 | 45 | It has replaced the default Dutch stemming algorithm with the much better Kraaij-Pohlmann Dutch stemming algorithm. 46 | The modified version of this lib can be downloaded from [mysqludf.com](http://www.mysqludf.org/lib_mysqludf_stem/). 47 | 48 | Original Source 49 | -------------------- 50 | 51 | This version is a fork of php-stemmer hosted on Google Code orinaly made by 52 | Javeline B.V and available here: http://code.google.com/p/php-stemmer/ 53 | 54 | Licence 55 | -------------------- 56 | [New BSD License](http://opensource.org/licenses/BSD-3-Clause) 57 | 58 | See License file for details 59 | -------------------------------------------------------------------------------- /config.m4: -------------------------------------------------------------------------------- 1 | PHP_ARG_ENABLE(stemmer, whether to enable stemmer, 2 | [ --enable-stemmer Enable stemmer support]) 3 | 4 | if test "$PHP_STEMMER" = "yes"; then 5 | AC_DEFINE(HAVE_STEMMER, 1, [Whether you have stemmer]) 6 | PHP_ADD_LIBRARY_WITH_PATH(stemmer, libstemmer_c, STEMMER_SHARED_LIBADD) 7 | PHP_NEW_EXTENSION(stemmer, stemmer.c, $ext_shared) 8 | PHP_SUBST(STEMMER_SHARED_LIBADD) 9 | fi -------------------------------------------------------------------------------- /libstemmer_c/MANIFEST: -------------------------------------------------------------------------------- 1 | README 2 | src_c/stem_ISO_8859_1_danish.c 3 | src_c/stem_ISO_8859_1_danish.h 4 | src_c/stem_ISO_8859_1_dutch.c 5 | src_c/stem_ISO_8859_1_dutch.h 6 | src_c/stem_ISO_8859_1_english.c 7 | src_c/stem_ISO_8859_1_english.h 8 | src_c/stem_ISO_8859_1_finnish.c 9 | src_c/stem_ISO_8859_1_finnish.h 10 | src_c/stem_ISO_8859_1_french.c 11 | src_c/stem_ISO_8859_1_french.h 12 | src_c/stem_ISO_8859_1_german.c 13 | src_c/stem_ISO_8859_1_german.h 14 | src_c/stem_ISO_8859_1_hungarian.c 15 | src_c/stem_ISO_8859_1_hungarian.h 16 | src_c/stem_ISO_8859_1_italian.c 17 | src_c/stem_ISO_8859_1_italian.h 18 | src_c/stem_ISO_8859_1_norwegian.c 19 | src_c/stem_ISO_8859_1_norwegian.h 20 | src_c/stem_ISO_8859_1_porter.c 21 | src_c/stem_ISO_8859_1_porter.h 22 | src_c/stem_ISO_8859_1_portuguese.c 23 | src_c/stem_ISO_8859_1_portuguese.h 24 | src_c/stem_ISO_8859_1_spanish.c 25 | src_c/stem_ISO_8859_1_spanish.h 26 | src_c/stem_ISO_8859_1_swedish.c 27 | src_c/stem_ISO_8859_1_swedish.h 28 | src_c/stem_ISO_8859_2_romanian.c 29 | src_c/stem_ISO_8859_2_romanian.h 30 | src_c/stem_KOI8_R_russian.c 31 | src_c/stem_KOI8_R_russian.h 32 | src_c/stem_UTF_8_danish.c 33 | src_c/stem_UTF_8_danish.h 34 | src_c/stem_UTF_8_dutch.c 35 | src_c/stem_UTF_8_dutch.h 36 | src_c/stem_UTF_8_english.c 37 | src_c/stem_UTF_8_english.h 38 | src_c/stem_UTF_8_finnish.c 39 | src_c/stem_UTF_8_finnish.h 40 | src_c/stem_UTF_8_french.c 41 | src_c/stem_UTF_8_french.h 42 | src_c/stem_UTF_8_german.c 43 | src_c/stem_UTF_8_german.h 44 | src_c/stem_UTF_8_hungarian.c 45 | src_c/stem_UTF_8_hungarian.h 46 | src_c/stem_UTF_8_italian.c 47 | src_c/stem_UTF_8_italian.h 48 | src_c/stem_UTF_8_norwegian.c 49 | src_c/stem_UTF_8_norwegian.h 50 | src_c/stem_UTF_8_porter.c 51 | src_c/stem_UTF_8_porter.h 52 | src_c/stem_UTF_8_portuguese.c 53 | src_c/stem_UTF_8_portuguese.h 54 | src_c/stem_UTF_8_romanian.c 55 | src_c/stem_UTF_8_romanian.h 56 | src_c/stem_UTF_8_russian.c 57 | src_c/stem_UTF_8_russian.h 58 | src_c/stem_UTF_8_spanish.c 59 | src_c/stem_UTF_8_spanish.h 60 | src_c/stem_UTF_8_swedish.c 61 | src_c/stem_UTF_8_swedish.h 62 | src_c/stem_UTF_8_turkish.c 63 | src_c/stem_UTF_8_turkish.h 64 | runtime/api.c 65 | runtime/api.h 66 | runtime/header.h 67 | runtime/utilities.c 68 | libstemmer/libstemmer.c 69 | libstemmer/libstemmer_utf8.c 70 | libstemmer/modules.h 71 | libstemmer/modules_utf8.h 72 | include/libstemmer.h 73 | -------------------------------------------------------------------------------- /libstemmer_c/Makefile: -------------------------------------------------------------------------------- 1 | include mkinc.mak 2 | CFLAGS=-Iinclude -fPIC 3 | all: libstemmer.o stemwords 4 | libstemmer.o: $(snowball_sources:.c=.o) 5 | $(AR) -cru $@ $^ 6 | cp libstemmer.o libstemmer.a 7 | stemwords: examples/stemwords.o libstemmer.o 8 | $(CC) -o $@ $^ 9 | clean: 10 | rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o *.a 11 | -------------------------------------------------------------------------------- /libstemmer_c/README: -------------------------------------------------------------------------------- 1 | libstemmer_c 2 | ============ 3 | 4 | This document pertains to the C version of the libstemmer distribution, 5 | available for download from: 6 | 7 | http://snowball.tartarus.org/dist/libstemmer_c.tgz 8 | 9 | 10 | Compiling the library 11 | ===================== 12 | 13 | A simple makefile is provided for Unix style systems. On such systems, it 14 | should be possible simply to run "make", and the file "libstemmer.o" 15 | and the example program "stemwords" will be generated. 16 | 17 | If this doesn't work on your system, you need to write your own build 18 | system (or call the compiler directly). The files to compile are 19 | all contained in the "libstemmer", "runtime" and "src_c" directories, 20 | and the public header file is contained in the "include" directory. 21 | 22 | The library comes in two flavours; UTF-8 only, and UTF-8 plus other character 23 | sets. To use the utf-8 only flavour, compile "libstemmer_utf8.c" instead of 24 | "libstemmer.c". 25 | 26 | For convenience "mkinc.mak" is a makefile fragment listing the source files and 27 | header files used to compile the standard version of the library. 28 | "mkinc_utf8.mak" is a comparable makefile fragment listing just the source 29 | files for the UTF-8 only version of the library. 30 | 31 | 32 | Using the library 33 | ================= 34 | 35 | The library provides a simple C API. Essentially, a new stemmer can 36 | be obtained by using "sb_stemmer_new". "sb_stemmer_stem" is then 37 | used to stem a word, "sb_stemmer_length" returns the stemmed 38 | length of the last word processed, and "sb_stemmer_delete" is 39 | used to delete a stemmer. 40 | 41 | Creating a stemmer is a relatively expensive operation - the expected 42 | usage pattern is that a new stemmer is created when needed, used 43 | to stem many words, and deleted after some time. 44 | 45 | Stemmers are re-entrant, but not threadsafe. In other words, if 46 | you wish to access the same stemmer object from multiple threads, 47 | you must ensure that all access is protected by a mutex or similar 48 | device. 49 | 50 | libstemmer does not currently incorporate any mechanism for caching the results 51 | of stemming operations. Such caching can greatly increase the performance of a 52 | stemmer under certain situations, so suitable patches will be considered for 53 | inclusion. 54 | 55 | The standard libstemmer sources contain an algorithm for each of the supported 56 | languages. The algorithm may be selected using the english name of the 57 | language, or using the 2 or 3 letter ISO 639 language codes. In addition, 58 | the traditional "Porter" stemming algorithm for english is included for 59 | backwards compatibility purposes, but we recommend use of the "English" 60 | stemmer in preference for new projects. 61 | 62 | (Some minor algorithms which are included only as curiosities in the snowball 63 | website, such as the Lovins stemmer and the Kraaij Pohlmann stemmer, are not 64 | included in the standard libstemmer sources. These are not really supported by 65 | the snowball project, but it would be possible to compile a modified libstemmer 66 | library containing these if desired.) 67 | 68 | 69 | The stemwords example 70 | ===================== 71 | 72 | The stemwords example program allows you to run any of the stemmers 73 | compiled into the libstemmer library on a sample vocabulary. For 74 | details on how to use it, run it with the "-h" command line option. 75 | 76 | 77 | Using the library in a larger system 78 | ==================================== 79 | 80 | If you are incorporating the library into the build system of a larger 81 | program, I recommend copying the unpacked tarball without modification into 82 | a subdirectory of the sources of your program. Future versions of the 83 | library are intended to keep the same structure, so this will keep the 84 | work required to move to a new version of the library to a minimum. 85 | 86 | As an additional convenience, the list of source and header files used 87 | in the library is detailed in mkinc.mak - a file which is in a suitable 88 | format for inclusion by a Makefile. By including this file in your build 89 | system, you can link the snowball system into your program with a few 90 | extra rules. 91 | 92 | Using the library in a system using GNU autotools 93 | ================================================= 94 | 95 | The libstemmer_c library can be integrated into a larger system which uses the 96 | GNU autotool framework (and in particular, automake and autoconf) as follows: 97 | 98 | 1) Unpack libstemmer_c.tgz in the top level project directory so that there is 99 | a libstemmer_c subdirectory of the top level directory of the project. 100 | 101 | 2) Add a file "Makefile.am" to the unpacked libstemmer_c folder, containing: 102 | 103 | noinst_LTLIBRARIES = libstemmer.la 104 | include $(srcdir)/mkinc.mak 105 | noinst_HEADERS = $(snowball_headers) 106 | libstemmer_la_SOURCES = $(snowball_sources) 107 | 108 | (You may also need to add other lines to this, for example, if you are using 109 | compiler options which are not compatible with compiling the libstemmer 110 | library.) 111 | 112 | 3) Add libstemmer_c to the AC_CONFIG_FILES declaration in the project's 113 | configure.ac file. 114 | 115 | 4) Add to the top level makefile the following lines (or modify existing 116 | assignments to these variables appropriately): 117 | 118 | AUTOMAKE_OPTIONS = subdir-objects 119 | AM_CPPFLAGS = -I$(top_srcdir)/libstemmer_c/include 120 | SUBDIRS=libstemmer_c 121 | _LIBADD = libstemmer_c/libstemmer.la 122 | 123 | (Where is the name of the library or executable which links against 124 | libstemmer.) 125 | 126 | -------------------------------------------------------------------------------- /libstemmer_c/examples/stemwords.c: -------------------------------------------------------------------------------- 1 | /* This is a simple program which uses libstemmer to provide a command 2 | * line interface for stemming using any of the algorithms provided. 3 | */ 4 | 5 | #include 6 | #include /* for malloc, free */ 7 | #include /* for memmove */ 8 | #include /* for isupper, tolower */ 9 | 10 | #include "libstemmer.h" 11 | 12 | const char * progname; 13 | static int pretty = 1; 14 | 15 | static void 16 | stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out) 17 | { 18 | #define INC 10 19 | int lim = INC; 20 | sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol)); 21 | 22 | while(1) { 23 | int ch = getc(f_in); 24 | if (ch == EOF) { 25 | free(b); return; 26 | } 27 | { 28 | int i = 0; 29 | int inlen = 0; 30 | while(1) { 31 | if (ch == '\n' || ch == EOF) break; 32 | if (i == lim) { 33 | sb_symbol * newb; 34 | newb = (sb_symbol *) 35 | realloc(b, (lim + INC) * sizeof(sb_symbol)); 36 | if (newb == 0) goto error; 37 | b = newb; 38 | lim = lim + INC; 39 | } 40 | /* Update count of utf-8 characters. */ 41 | if (ch < 0x80 || ch > 0xBF) inlen += 1; 42 | /* force lower case: */ 43 | if (isupper(ch)) ch = tolower(ch); 44 | 45 | b[i] = ch; 46 | i++; 47 | ch = getc(f_in); 48 | } 49 | 50 | { 51 | const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i); 52 | if (stemmed == NULL) 53 | { 54 | fprintf(stderr, "Out of memory"); 55 | exit(1); 56 | } 57 | else 58 | { 59 | if (pretty == 1) { 60 | fwrite(b, i, 1, f_out); 61 | fputs(" -> ", f_out); 62 | } else if (pretty == 2) { 63 | fwrite(b, i, 1, f_out); 64 | if (sb_stemmer_length(stemmer) > 0) { 65 | int j; 66 | if (inlen < 30) { 67 | for (j = 30 - inlen; j > 0; j--) 68 | fputs(" ", f_out); 69 | } else { 70 | fputs("\n", f_out); 71 | for (j = 30; j > 0; j--) 72 | fputs(" ", f_out); 73 | } 74 | } 75 | } 76 | 77 | fputs((char *)stemmed, f_out); 78 | putc('\n', f_out); 79 | } 80 | } 81 | } 82 | } 83 | error: 84 | if (b != 0) free(b); 85 | return; 86 | } 87 | 88 | /** Display the command line syntax, and then exit. 89 | * @param n The value to exit with. 90 | */ 91 | static void 92 | usage(int n) 93 | { 94 | printf("usage: %s [-l ] [-i ] [-o ] [-c ] [-p[2]] [-h]\n" 95 | "\n" 96 | "The input file consists of a list of words to be stemmed, one per\n" 97 | "line. Words should be in lower case, but (for English) A-Z letters\n" 98 | "are mapped to their a-z equivalents anyway. If omitted, stdin is\n" 99 | "used.\n" 100 | "\n" 101 | "If -c is given, the argument is the character encoding of the input\n" 102 | "and output files. If it is omitted, the UTF-8 encoding is used.\n" 103 | "\n" 104 | "If -p is given the output file consists of each word of the input\n" 105 | "file followed by \"->\" followed by its stemmed equivalent.\n" 106 | "If -p2 is given the output file is a two column layout containing\n" 107 | "the input words in the first column and the stemmed eqivalents in\n" 108 | "the second column.\n" 109 | "Otherwise, the output file consists of the stemmed words, one per\n" 110 | "line.\n" 111 | "\n" 112 | "-h displays this help\n", 113 | progname); 114 | exit(n); 115 | } 116 | 117 | int 118 | main(int argc, char * argv[]) 119 | { 120 | char * in = 0; 121 | char * out = 0; 122 | FILE * f_in; 123 | FILE * f_out; 124 | struct sb_stemmer * stemmer; 125 | 126 | char * language = "english"; 127 | char * charenc = NULL; 128 | 129 | char * s; 130 | int i = 1; 131 | pretty = 0; 132 | 133 | progname = argv[0]; 134 | 135 | while(i < argc) { 136 | s = argv[i++]; 137 | if (s[0] == '-') { 138 | if (strcmp(s, "-o") == 0) { 139 | if (i >= argc) { 140 | fprintf(stderr, "%s requires an argument\n", s); 141 | exit(1); 142 | } 143 | out = argv[i++]; 144 | } else if (strcmp(s, "-i") == 0) { 145 | if (i >= argc) { 146 | fprintf(stderr, "%s requires an argument\n", s); 147 | exit(1); 148 | } 149 | in = argv[i++]; 150 | } else if (strcmp(s, "-l") == 0) { 151 | if (i >= argc) { 152 | fprintf(stderr, "%s requires an argument\n", s); 153 | exit(1); 154 | } 155 | language = argv[i++]; 156 | } else if (strcmp(s, "-c") == 0) { 157 | if (i >= argc) { 158 | fprintf(stderr, "%s requires an argument\n", s); 159 | exit(1); 160 | } 161 | charenc = argv[i++]; 162 | } else if (strcmp(s, "-p2") == 0) { 163 | pretty = 2; 164 | } else if (strcmp(s, "-p") == 0) { 165 | pretty = 1; 166 | } else if (strcmp(s, "-h") == 0) { 167 | usage(0); 168 | } else { 169 | fprintf(stderr, "option %s unknown\n", s); 170 | usage(1); 171 | } 172 | } else { 173 | fprintf(stderr, "unexpected parameter %s\n", s); 174 | usage(1); 175 | } 176 | } 177 | 178 | /* prepare the files */ 179 | f_in = (in == 0) ? stdin : fopen(in, "r"); 180 | if (f_in == 0) { 181 | fprintf(stderr, "file %s not found\n", in); 182 | exit(1); 183 | } 184 | f_out = (out == 0) ? stdout : fopen(out, "w"); 185 | if (f_out == 0) { 186 | fprintf(stderr, "file %s cannot be opened\n", out); 187 | exit(1); 188 | } 189 | 190 | /* do the stemming process: */ 191 | stemmer = sb_stemmer_new(language, charenc); 192 | if (stemmer == 0) { 193 | if (charenc == NULL) { 194 | fprintf(stderr, "language `%s' not available for stemming\n", language); 195 | exit(1); 196 | } else { 197 | fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc); 198 | exit(1); 199 | } 200 | } 201 | stem_file(stemmer, f_in, f_out); 202 | sb_stemmer_delete(stemmer); 203 | 204 | if (in != 0) (void) fclose(f_in); 205 | if (out != 0) (void) fclose(f_out); 206 | 207 | return 0; 208 | } 209 | 210 | -------------------------------------------------------------------------------- /libstemmer_c/include/libstemmer.h: -------------------------------------------------------------------------------- 1 | 2 | /* Make header file work when included from C++ */ 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | struct sb_stemmer; 8 | typedef unsigned char sb_symbol; 9 | 10 | /* FIXME - should be able to get a version number for each stemming 11 | * algorithm (which will be incremented each time the output changes). */ 12 | 13 | /** Returns an array of the names of the available stemming algorithms. 14 | * Note that these are the canonical names - aliases (ie, other names for 15 | * the same algorithm) will not be included in the list. 16 | * The list is terminated with a null pointer. 17 | * 18 | * The list must not be modified in any way. 19 | */ 20 | const char ** sb_stemmer_list(void); 21 | 22 | /** Create a new stemmer object, using the specified algorithm, for the 23 | * specified character encoding. 24 | * 25 | * All algorithms will usually be available in UTF-8, but may also be 26 | * available in other character encodings. 27 | * 28 | * @param algorithm The algorithm name. This is either the english 29 | * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the 30 | * language. Note that case is significant in this parameter - the 31 | * value should be supplied in lower case. 32 | * 33 | * @param charenc The character encoding. NULL may be passed as 34 | * this value, in which case UTF-8 encoding will be assumed. Otherwise, 35 | * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1), 36 | * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that 37 | * case is significant in this parameter. 38 | * 39 | * @return NULL if the specified algorithm is not recognised, or the 40 | * algorithm is not available for the requested encoding. Otherwise, 41 | * returns a pointer to a newly created stemmer for the requested algorithm. 42 | * The returned pointer must be deleted by calling sb_stemmer_delete(). 43 | * 44 | * @note NULL will also be returned if an out of memory error occurs. 45 | */ 46 | struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); 47 | 48 | /** Delete a stemmer object. 49 | * 50 | * This frees all resources allocated for the stemmer. After calling 51 | * this function, the supplied stemmer may no longer be used in any way. 52 | * 53 | * It is safe to pass a null pointer to this function - this will have 54 | * no effect. 55 | */ 56 | void sb_stemmer_delete(struct sb_stemmer * stemmer); 57 | 58 | /** Stem a word. 59 | * 60 | * The return value is owned by the stemmer - it must not be freed or 61 | * modified, and it will become invalid when the stemmer is called again, 62 | * or if the stemmer is freed. 63 | * 64 | * The length of the return value can be obtained using sb_stemmer_length(). 65 | * 66 | * If an out-of-memory error occurs, this will return NULL. 67 | */ 68 | const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, 69 | const sb_symbol * word, int size); 70 | 71 | /** Get the length of the result of the last stemmed word. 72 | * This should not be called before sb_stemmer_stem() has been called. 73 | */ 74 | int sb_stemmer_length(struct sb_stemmer * stemmer); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/libstemmer.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "../include/libstemmer.h" 5 | #include "../runtime/api.h" 6 | #include "modules.h" 7 | 8 | struct sb_stemmer { 9 | struct SN_env * (*create)(void); 10 | void (*close)(struct SN_env *); 11 | int (*stem)(struct SN_env *); 12 | 13 | struct SN_env * env; 14 | }; 15 | 16 | extern const char ** 17 | sb_stemmer_list(void) 18 | { 19 | return algorithm_names; 20 | } 21 | 22 | static stemmer_encoding_t 23 | sb_getenc(const char * charenc) 24 | { 25 | struct stemmer_encoding * encoding; 26 | if (charenc == NULL) return ENC_UTF_8; 27 | for (encoding = encodings; encoding->name != 0; encoding++) { 28 | if (strcmp(encoding->name, charenc) == 0) break; 29 | } 30 | if (encoding->name == NULL) return ENC_UNKNOWN; 31 | return encoding->enc; 32 | } 33 | 34 | extern struct sb_stemmer * 35 | sb_stemmer_new(const char * algorithm, const char * charenc) 36 | { 37 | stemmer_encoding_t enc; 38 | struct stemmer_modules * module; 39 | struct sb_stemmer * stemmer; 40 | 41 | enc = sb_getenc(charenc); 42 | if (enc == ENC_UNKNOWN) return NULL; 43 | 44 | for (module = modules; module->name != 0; module++) { 45 | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; 46 | } 47 | if (module->name == NULL) return NULL; 48 | 49 | stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); 50 | if (stemmer == NULL) return NULL; 51 | 52 | stemmer->create = module->create; 53 | stemmer->close = module->close; 54 | stemmer->stem = module->stem; 55 | 56 | stemmer->env = stemmer->create(); 57 | if (stemmer->env == NULL) 58 | { 59 | sb_stemmer_delete(stemmer); 60 | return NULL; 61 | } 62 | 63 | return stemmer; 64 | } 65 | 66 | void 67 | sb_stemmer_delete(struct sb_stemmer * stemmer) 68 | { 69 | if (stemmer == 0) return; 70 | if (stemmer->close == 0) return; 71 | stemmer->close(stemmer->env); 72 | stemmer->close = 0; 73 | free(stemmer); 74 | } 75 | 76 | const sb_symbol * 77 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) 78 | { 79 | int ret; 80 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) 81 | { 82 | stemmer->env->l = 0; 83 | return NULL; 84 | } 85 | ret = stemmer->stem(stemmer->env); 86 | if (ret < 0) return NULL; 87 | stemmer->env->p[stemmer->env->l] = 0; 88 | return (const sb_symbol *)(stemmer->env->p); 89 | } 90 | 91 | int 92 | sb_stemmer_length(struct sb_stemmer * stemmer) 93 | { 94 | return stemmer->env->l; 95 | } 96 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/libstemmer_c.in: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "../include/libstemmer.h" 5 | #include "../runtime/api.h" 6 | #include "@MODULES_H@" 7 | 8 | struct sb_stemmer { 9 | struct SN_env * (*create)(void); 10 | void (*close)(struct SN_env *); 11 | int (*stem)(struct SN_env *); 12 | 13 | struct SN_env * env; 14 | }; 15 | 16 | extern const char ** 17 | sb_stemmer_list(void) 18 | { 19 | return algorithm_names; 20 | } 21 | 22 | static stemmer_encoding_t 23 | sb_getenc(const char * charenc) 24 | { 25 | struct stemmer_encoding * encoding; 26 | if (charenc == NULL) return ENC_UTF_8; 27 | for (encoding = encodings; encoding->name != 0; encoding++) { 28 | if (strcmp(encoding->name, charenc) == 0) break; 29 | } 30 | if (encoding->name == NULL) return ENC_UNKNOWN; 31 | return encoding->enc; 32 | } 33 | 34 | extern struct sb_stemmer * 35 | sb_stemmer_new(const char * algorithm, const char * charenc) 36 | { 37 | stemmer_encoding_t enc; 38 | struct stemmer_modules * module; 39 | struct sb_stemmer * stemmer; 40 | 41 | enc = sb_getenc(charenc); 42 | if (enc == ENC_UNKNOWN) return NULL; 43 | 44 | for (module = modules; module->name != 0; module++) { 45 | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; 46 | } 47 | if (module->name == NULL) return NULL; 48 | 49 | stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); 50 | if (stemmer == NULL) return NULL; 51 | 52 | stemmer->create = module->create; 53 | stemmer->close = module->close; 54 | stemmer->stem = module->stem; 55 | 56 | stemmer->env = stemmer->create(); 57 | if (stemmer->env == NULL) 58 | { 59 | sb_stemmer_delete(stemmer); 60 | return NULL; 61 | } 62 | 63 | return stemmer; 64 | } 65 | 66 | void 67 | sb_stemmer_delete(struct sb_stemmer * stemmer) 68 | { 69 | if (stemmer == 0) return; 70 | if (stemmer->close == 0) return; 71 | stemmer->close(stemmer->env); 72 | stemmer->close = 0; 73 | free(stemmer); 74 | } 75 | 76 | const sb_symbol * 77 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) 78 | { 79 | int ret; 80 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) 81 | { 82 | stemmer->env->l = 0; 83 | return NULL; 84 | } 85 | ret = stemmer->stem(stemmer->env); 86 | if (ret < 0) return NULL; 87 | stemmer->env->p[stemmer->env->l] = 0; 88 | return (const sb_symbol *)(stemmer->env->p); 89 | } 90 | 91 | int 92 | sb_stemmer_length(struct sb_stemmer * stemmer) 93 | { 94 | return stemmer->env->l; 95 | } 96 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/libstemmer_utf8.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "../include/libstemmer.h" 5 | #include "../runtime/api.h" 6 | #include "modules_utf8.h" 7 | 8 | struct sb_stemmer { 9 | struct SN_env * (*create)(void); 10 | void (*close)(struct SN_env *); 11 | int (*stem)(struct SN_env *); 12 | 13 | struct SN_env * env; 14 | }; 15 | 16 | extern const char ** 17 | sb_stemmer_list(void) 18 | { 19 | return algorithm_names; 20 | } 21 | 22 | static stemmer_encoding_t 23 | sb_getenc(const char * charenc) 24 | { 25 | struct stemmer_encoding * encoding; 26 | if (charenc == NULL) return ENC_UTF_8; 27 | for (encoding = encodings; encoding->name != 0; encoding++) { 28 | if (strcmp(encoding->name, charenc) == 0) break; 29 | } 30 | if (encoding->name == NULL) return ENC_UNKNOWN; 31 | return encoding->enc; 32 | } 33 | 34 | extern struct sb_stemmer * 35 | sb_stemmer_new(const char * algorithm, const char * charenc) 36 | { 37 | stemmer_encoding_t enc; 38 | struct stemmer_modules * module; 39 | struct sb_stemmer * stemmer; 40 | 41 | enc = sb_getenc(charenc); 42 | if (enc == ENC_UNKNOWN) return NULL; 43 | 44 | for (module = modules; module->name != 0; module++) { 45 | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; 46 | } 47 | if (module->name == NULL) return NULL; 48 | 49 | stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); 50 | if (stemmer == NULL) return NULL; 51 | 52 | stemmer->create = module->create; 53 | stemmer->close = module->close; 54 | stemmer->stem = module->stem; 55 | 56 | stemmer->env = stemmer->create(); 57 | if (stemmer->env == NULL) 58 | { 59 | sb_stemmer_delete(stemmer); 60 | return NULL; 61 | } 62 | 63 | return stemmer; 64 | } 65 | 66 | void 67 | sb_stemmer_delete(struct sb_stemmer * stemmer) 68 | { 69 | if (stemmer == 0) return; 70 | if (stemmer->close == 0) return; 71 | stemmer->close(stemmer->env); 72 | stemmer->close = 0; 73 | free(stemmer); 74 | } 75 | 76 | const sb_symbol * 77 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) 78 | { 79 | int ret; 80 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) 81 | { 82 | stemmer->env->l = 0; 83 | return NULL; 84 | } 85 | ret = stemmer->stem(stemmer->env); 86 | if (ret < 0) return NULL; 87 | stemmer->env->p[stemmer->env->l] = 0; 88 | return (const sb_symbol *)(stemmer->env->p); 89 | } 90 | 91 | int 92 | sb_stemmer_length(struct sb_stemmer * stemmer) 93 | { 94 | return stemmer->env->l; 95 | } 96 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules.h: -------------------------------------------------------------------------------- 1 | /* libstemmer/modules.h: List of stemming modules. 2 | * 3 | * This file is generated by mkmodules.pl from a list of module names. 4 | * Do not edit manually. 5 | * 6 | * Modules included by this file are: danish, dutch, english, finnish, french, 7 | * german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | * russian, spanish, swedish, turkish 9 | */ 10 | 11 | #include "../src_c/stem_ISO_8859_1_danish.h" 12 | #include "../src_c/stem_UTF_8_danish.h" 13 | #include "../src_c/stem_ISO_8859_1_dutch.h" 14 | #include "../src_c/stem_UTF_8_dutch.h" 15 | #include "../src_c/stem_ISO_8859_1_english.h" 16 | #include "../src_c/stem_UTF_8_english.h" 17 | #include "../src_c/stem_ISO_8859_1_finnish.h" 18 | #include "../src_c/stem_UTF_8_finnish.h" 19 | #include "../src_c/stem_ISO_8859_1_french.h" 20 | #include "../src_c/stem_UTF_8_french.h" 21 | #include "../src_c/stem_ISO_8859_1_german.h" 22 | #include "../src_c/stem_UTF_8_german.h" 23 | #include "../src_c/stem_ISO_8859_1_hungarian.h" 24 | #include "../src_c/stem_UTF_8_hungarian.h" 25 | #include "../src_c/stem_ISO_8859_1_italian.h" 26 | #include "../src_c/stem_UTF_8_italian.h" 27 | #include "../src_c/stem_ISO_8859_1_norwegian.h" 28 | #include "../src_c/stem_UTF_8_norwegian.h" 29 | #include "../src_c/stem_ISO_8859_1_porter.h" 30 | #include "../src_c/stem_UTF_8_porter.h" 31 | #include "../src_c/stem_ISO_8859_1_portuguese.h" 32 | #include "../src_c/stem_UTF_8_portuguese.h" 33 | #include "../src_c/stem_ISO_8859_2_romanian.h" 34 | #include "../src_c/stem_UTF_8_romanian.h" 35 | #include "../src_c/stem_KOI8_R_russian.h" 36 | #include "../src_c/stem_UTF_8_russian.h" 37 | #include "../src_c/stem_ISO_8859_1_spanish.h" 38 | #include "../src_c/stem_UTF_8_spanish.h" 39 | #include "../src_c/stem_ISO_8859_1_swedish.h" 40 | #include "../src_c/stem_UTF_8_swedish.h" 41 | #include "../src_c/stem_UTF_8_turkish.h" 42 | 43 | typedef enum { 44 | ENC_UNKNOWN=0, 45 | ENC_ISO_8859_1, 46 | ENC_ISO_8859_2, 47 | ENC_KOI8_R, 48 | ENC_UTF_8 49 | } stemmer_encoding_t; 50 | 51 | struct stemmer_encoding { 52 | const char * name; 53 | stemmer_encoding_t enc; 54 | }; 55 | static struct stemmer_encoding encodings[] = { 56 | {"ISO_8859_1", ENC_ISO_8859_1}, 57 | {"ISO_8859_2", ENC_ISO_8859_2}, 58 | {"KOI8_R", ENC_KOI8_R}, 59 | {"UTF_8", ENC_UTF_8}, 60 | {0,ENC_UNKNOWN} 61 | }; 62 | 63 | struct stemmer_modules { 64 | const char * name; 65 | stemmer_encoding_t enc; 66 | struct SN_env * (*create)(void); 67 | void (*close)(struct SN_env *); 68 | int (*stem)(struct SN_env *); 69 | }; 70 | static struct stemmer_modules modules[] = { 71 | {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, 72 | {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 73 | {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, 74 | {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 75 | {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, 76 | {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 77 | {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 78 | {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 79 | {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 80 | {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 81 | {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 82 | {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 83 | {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 84 | {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 85 | {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, 86 | {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 87 | {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, 88 | {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 89 | {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, 90 | {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 91 | {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 92 | {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 93 | {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 94 | {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 95 | {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, 96 | {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 97 | {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, 98 | {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 99 | {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, 100 | {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 101 | {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 102 | {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 103 | {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 104 | {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 105 | {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 106 | {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 107 | {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 108 | {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 109 | {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 110 | {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 111 | {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 112 | {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 113 | {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, 114 | {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 115 | {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, 116 | {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 117 | {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, 118 | {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 119 | {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, 120 | {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 121 | {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, 122 | {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 123 | {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, 124 | {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 125 | {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 126 | {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 127 | {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 128 | {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 129 | {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, 130 | {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 131 | {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, 132 | {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 133 | {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, 134 | {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 135 | {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, 136 | {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 137 | {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, 138 | {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, 139 | {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, 140 | {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 141 | {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, 142 | {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 143 | {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 144 | {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 145 | {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 146 | {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 147 | {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 148 | {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 149 | {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, 150 | {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 151 | {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 152 | {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 153 | {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, 154 | {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 155 | {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, 156 | {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 157 | {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 158 | {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 159 | {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 160 | {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 161 | {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, 162 | {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 163 | {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, 164 | {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 165 | {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, 166 | {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 167 | {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 168 | {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 169 | {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 170 | {0,ENC_UNKNOWN,0,0,0} 171 | }; 172 | static const char * algorithm_names[] = { 173 | "danish", 174 | "dutch", 175 | "english", 176 | "finnish", 177 | "french", 178 | "german", 179 | "hungarian", 180 | "italian", 181 | "norwegian", 182 | "porter", 183 | "portuguese", 184 | "romanian", 185 | "russian", 186 | "spanish", 187 | "swedish", 188 | "turkish", 189 | 0 190 | }; 191 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules.txt: -------------------------------------------------------------------------------- 1 | # This file contains a list of stemmers to include in the distribution. 2 | # The format is a set of space separated lines - on each line: 3 | # First item is name of stemmer. 4 | # Second item is comma separated list of character sets. 5 | # Third item is comma separated list of names to refer to the stemmer by. 6 | # 7 | # Lines starting with a #, or blank lines, are ignored. 8 | 9 | # List all the main algorithms for each language, in UTF-8, and also with 10 | # the most commonly used encoding. 11 | 12 | danish UTF_8,ISO_8859_1 danish,da,dan 13 | dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld 14 | english UTF_8,ISO_8859_1 english,en,eng 15 | finnish UTF_8,ISO_8859_1 finnish,fi,fin 16 | french UTF_8,ISO_8859_1 french,fr,fre,fra 17 | german UTF_8,ISO_8859_1 german,de,ger,deu 18 | hungarian UTF_8,ISO_8859_1 hungarian,hu,hun 19 | italian UTF_8,ISO_8859_1 italian,it,ita 20 | norwegian UTF_8,ISO_8859_1 norwegian,no,nor 21 | portuguese UTF_8,ISO_8859_1 portuguese,pt,por 22 | romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron 23 | russian UTF_8,KOI8_R russian,ru,rus 24 | spanish UTF_8,ISO_8859_1 spanish,es,esl,spa 25 | swedish UTF_8,ISO_8859_1 swedish,sv,swe 26 | turkish UTF_8 turkish,tr,tur 27 | 28 | # Also include the traditional porter algorithm for english. 29 | # The porter algorithm is included in the libstemmer distribution to assist 30 | # with backwards compatibility, but for new systems the english algorithm 31 | # should be used in preference. 32 | porter UTF_8,ISO_8859_1 porter 33 | 34 | # Some other stemmers in the snowball project are not included in the standard 35 | # distribution. To compile a libstemmer with them in, add them to this list, 36 | # and regenerate the distribution. (You will need a full source checkout for 37 | # this.) They are included in the snowball website as curiosities, but are not 38 | # intended for general use, and use of them is is not fully supported. These 39 | # algorithms are: 40 | # 41 | # german2 - This is a slight modification of the german stemmer. 42 | #german2 UTF_8,ISO_8859_1 german2 43 | # 44 | # kraaij_pohlmann - This is a different dutch stemmer. 45 | #kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann 46 | # 47 | # lovins - This is an english stemmer, but fairly outdated, and 48 | # only really applicable to a restricted type of input text 49 | # (keywords in academic publications). 50 | #lovins UTF_8,ISO_8859_1 lovins 51 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules_utf8.h: -------------------------------------------------------------------------------- 1 | /* libstemmer/modules_utf8.h: List of stemming modules. 2 | * 3 | * This file is generated by mkmodules.pl from a list of module names. 4 | * Do not edit manually. 5 | * 6 | * Modules included by this file are: danish, dutch, english, finnish, french, 7 | * german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | * russian, spanish, swedish, turkish 9 | */ 10 | 11 | #include "../src_c/stem_UTF_8_danish.h" 12 | #include "../src_c/stem_UTF_8_dutch.h" 13 | #include "../src_c/stem_UTF_8_english.h" 14 | #include "../src_c/stem_UTF_8_finnish.h" 15 | #include "../src_c/stem_UTF_8_french.h" 16 | #include "../src_c/stem_UTF_8_german.h" 17 | #include "../src_c/stem_UTF_8_hungarian.h" 18 | #include "../src_c/stem_UTF_8_italian.h" 19 | #include "../src_c/stem_UTF_8_norwegian.h" 20 | #include "../src_c/stem_UTF_8_porter.h" 21 | #include "../src_c/stem_UTF_8_portuguese.h" 22 | #include "../src_c/stem_UTF_8_romanian.h" 23 | #include "../src_c/stem_UTF_8_russian.h" 24 | #include "../src_c/stem_UTF_8_spanish.h" 25 | #include "../src_c/stem_UTF_8_swedish.h" 26 | #include "../src_c/stem_UTF_8_turkish.h" 27 | 28 | typedef enum { 29 | ENC_UNKNOWN=0, 30 | ENC_UTF_8 31 | } stemmer_encoding_t; 32 | 33 | struct stemmer_encoding { 34 | const char * name; 35 | stemmer_encoding_t enc; 36 | }; 37 | static struct stemmer_encoding encodings[] = { 38 | {"UTF_8", ENC_UTF_8}, 39 | {0,ENC_UNKNOWN} 40 | }; 41 | 42 | struct stemmer_modules { 43 | const char * name; 44 | stemmer_encoding_t enc; 45 | struct SN_env * (*create)(void); 46 | void (*close)(struct SN_env *); 47 | int (*stem)(struct SN_env *); 48 | }; 49 | static struct stemmer_modules modules[] = { 50 | {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 51 | {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 52 | {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 53 | {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 54 | {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 55 | {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 56 | {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 57 | {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 58 | {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 59 | {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 60 | {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 61 | {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 62 | {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 63 | {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 64 | {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 65 | {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 66 | {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 67 | {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 68 | {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 69 | {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 70 | {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 71 | {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 72 | {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 73 | {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 74 | {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 75 | {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 76 | {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 77 | {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 78 | {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 79 | {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 80 | {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 81 | {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 82 | {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 83 | {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, 84 | {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 85 | {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 86 | {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 87 | {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 88 | {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 89 | {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 90 | {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 91 | {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 92 | {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 93 | {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 94 | {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 95 | {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 96 | {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 97 | {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 98 | {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 99 | {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 100 | {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 101 | {0,ENC_UNKNOWN,0,0,0} 102 | }; 103 | static const char * algorithm_names[] = { 104 | "danish", 105 | "dutch", 106 | "english", 107 | "finnish", 108 | "french", 109 | "german", 110 | "hungarian", 111 | "italian", 112 | "norwegian", 113 | "porter", 114 | "portuguese", 115 | "romanian", 116 | "russian", 117 | "spanish", 118 | "swedish", 119 | "turkish", 120 | 0 121 | }; 122 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules_utf8.txt: -------------------------------------------------------------------------------- 1 | # This file contains a list of stemmers to include in the distribution. 2 | # The format is a set of space separated lines - on each line: 3 | # First item is name of stemmer. 4 | # Second item is comma separated list of character sets. 5 | # Third item is comma separated list of names to refer to the stemmer by. 6 | # 7 | # Lines starting with a #, or blank lines, are ignored. 8 | 9 | # List all the main algorithms for each language, in UTF-8. 10 | 11 | danish UTF_8 danish,da,dan 12 | dutch UTF_8 dutch,nl,dut,nld 13 | english UTF_8 english,en,eng 14 | finnish UTF_8 finnish,fi,fin 15 | french UTF_8 french,fr,fre,fra 16 | german UTF_8 german,de,ger,deu 17 | hungarian UTF_8 hungarian,hu,hun 18 | italian UTF_8 italian,it,ita 19 | norwegian UTF_8 norwegian,no,nor 20 | portuguese UTF_8 portuguese,pt,por 21 | romanian UTF_8 romanian,ro,rum,ron 22 | russian UTF_8 russian,ru,rus 23 | spanish UTF_8 spanish,es,esl,spa 24 | swedish UTF_8 swedish,sv,swe 25 | turkish UTF_8 turkish,tr,tur 26 | 27 | # Also include the traditional porter algorithm for english. 28 | # The porter algorithm is included in the libstemmer distribution to assist 29 | # with backwards compatibility, but for new systems the english algorithm 30 | # should be used in preference. 31 | porter UTF_8 porter 32 | 33 | # Some other stemmers in the snowball project are not included in the standard 34 | # distribution. To compile a libstemmer with them in, add them to this list, 35 | # and regenerate the distribution. (You will need a full source checkout for 36 | # this.) They are included in the snowball website as curiosities, but are not 37 | # intended for general use, and use of them is is not fully supported. These 38 | # algorithms are: 39 | # 40 | # german2 - This is a slight modification of the german stemmer. 41 | #german2 UTF_8 german2 42 | # 43 | # kraaij_pohlmann - This is a different dutch stemmer. 44 | #kraaij_pohlmann UTF_8 kraaij_pohlmann 45 | # 46 | # lovins - This is an english stemmer, but fairly outdated, and 47 | # only really applicable to a restricted type of input text 48 | # (keywords in academic publications). 49 | #lovins UTF_8 lovins 50 | -------------------------------------------------------------------------------- /libstemmer_c/mkinc.mak: -------------------------------------------------------------------------------- 1 | # libstemmer/mkinc.mak: List of stemming module source files 2 | # 3 | # This file is generated by mkmodules.pl from a list of module names. 4 | # Do not edit manually. 5 | # 6 | # Modules included by this file are: danish, dutch, english, finnish, french, 7 | # german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | # russian, spanish, swedish, turkish 9 | 10 | snowball_sources= \ 11 | src_c/stem_ISO_8859_1_danish.c \ 12 | src_c/stem_UTF_8_danish.c \ 13 | src_c/stem_ISO_8859_1_dutch.c \ 14 | src_c/stem_UTF_8_dutch.c \ 15 | src_c/stem_ISO_8859_1_english.c \ 16 | src_c/stem_UTF_8_english.c \ 17 | src_c/stem_ISO_8859_1_finnish.c \ 18 | src_c/stem_UTF_8_finnish.c \ 19 | src_c/stem_ISO_8859_1_french.c \ 20 | src_c/stem_UTF_8_french.c \ 21 | src_c/stem_ISO_8859_1_german.c \ 22 | src_c/stem_UTF_8_german.c \ 23 | src_c/stem_ISO_8859_1_hungarian.c \ 24 | src_c/stem_UTF_8_hungarian.c \ 25 | src_c/stem_ISO_8859_1_italian.c \ 26 | src_c/stem_UTF_8_italian.c \ 27 | src_c/stem_ISO_8859_1_norwegian.c \ 28 | src_c/stem_UTF_8_norwegian.c \ 29 | src_c/stem_ISO_8859_1_porter.c \ 30 | src_c/stem_UTF_8_porter.c \ 31 | src_c/stem_ISO_8859_1_portuguese.c \ 32 | src_c/stem_UTF_8_portuguese.c \ 33 | src_c/stem_ISO_8859_2_romanian.c \ 34 | src_c/stem_UTF_8_romanian.c \ 35 | src_c/stem_KOI8_R_russian.c \ 36 | src_c/stem_UTF_8_russian.c \ 37 | src_c/stem_ISO_8859_1_spanish.c \ 38 | src_c/stem_UTF_8_spanish.c \ 39 | src_c/stem_ISO_8859_1_swedish.c \ 40 | src_c/stem_UTF_8_swedish.c \ 41 | src_c/stem_UTF_8_turkish.c \ 42 | runtime/api.c \ 43 | runtime/utilities.c \ 44 | libstemmer/libstemmer.c 45 | 46 | snowball_headers= \ 47 | src_c/stem_ISO_8859_1_danish.h \ 48 | src_c/stem_UTF_8_danish.h \ 49 | src_c/stem_ISO_8859_1_dutch.h \ 50 | src_c/stem_UTF_8_dutch.h \ 51 | src_c/stem_ISO_8859_1_english.h \ 52 | src_c/stem_UTF_8_english.h \ 53 | src_c/stem_ISO_8859_1_finnish.h \ 54 | src_c/stem_UTF_8_finnish.h \ 55 | src_c/stem_ISO_8859_1_french.h \ 56 | src_c/stem_UTF_8_french.h \ 57 | src_c/stem_ISO_8859_1_german.h \ 58 | src_c/stem_UTF_8_german.h \ 59 | src_c/stem_ISO_8859_1_hungarian.h \ 60 | src_c/stem_UTF_8_hungarian.h \ 61 | src_c/stem_ISO_8859_1_italian.h \ 62 | src_c/stem_UTF_8_italian.h \ 63 | src_c/stem_ISO_8859_1_norwegian.h \ 64 | src_c/stem_UTF_8_norwegian.h \ 65 | src_c/stem_ISO_8859_1_porter.h \ 66 | src_c/stem_UTF_8_porter.h \ 67 | src_c/stem_ISO_8859_1_portuguese.h \ 68 | src_c/stem_UTF_8_portuguese.h \ 69 | src_c/stem_ISO_8859_2_romanian.h \ 70 | src_c/stem_UTF_8_romanian.h \ 71 | src_c/stem_KOI8_R_russian.h \ 72 | src_c/stem_UTF_8_russian.h \ 73 | src_c/stem_ISO_8859_1_spanish.h \ 74 | src_c/stem_UTF_8_spanish.h \ 75 | src_c/stem_ISO_8859_1_swedish.h \ 76 | src_c/stem_UTF_8_swedish.h \ 77 | src_c/stem_UTF_8_turkish.h \ 78 | include/libstemmer.h \ 79 | libstemmer/modules.h \ 80 | runtime/api.h \ 81 | runtime/header.h 82 | 83 | -------------------------------------------------------------------------------- /libstemmer_c/mkinc_utf8.mak: -------------------------------------------------------------------------------- 1 | # libstemmer/mkinc_utf8.mak: List of stemming module source files 2 | # 3 | # This file is generated by mkmodules.pl from a list of module names. 4 | # Do not edit manually. 5 | # 6 | # Modules included by this file are: danish, dutch, english, finnish, french, 7 | # german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | # russian, spanish, swedish, turkish 9 | 10 | snowball_sources= \ 11 | src_c/stem_UTF_8_danish.c \ 12 | src_c/stem_UTF_8_dutch.c \ 13 | src_c/stem_UTF_8_english.c \ 14 | src_c/stem_UTF_8_finnish.c \ 15 | src_c/stem_UTF_8_french.c \ 16 | src_c/stem_UTF_8_german.c \ 17 | src_c/stem_UTF_8_hungarian.c \ 18 | src_c/stem_UTF_8_italian.c \ 19 | src_c/stem_UTF_8_norwegian.c \ 20 | src_c/stem_UTF_8_porter.c \ 21 | src_c/stem_UTF_8_portuguese.c \ 22 | src_c/stem_UTF_8_romanian.c \ 23 | src_c/stem_UTF_8_russian.c \ 24 | src_c/stem_UTF_8_spanish.c \ 25 | src_c/stem_UTF_8_swedish.c \ 26 | src_c/stem_UTF_8_turkish.c \ 27 | runtime/api.c \ 28 | runtime/utilities.c \ 29 | libstemmer/libstemmer_utf8.c 30 | 31 | snowball_headers= \ 32 | src_c/stem_UTF_8_danish.h \ 33 | src_c/stem_UTF_8_dutch.h \ 34 | src_c/stem_UTF_8_english.h \ 35 | src_c/stem_UTF_8_finnish.h \ 36 | src_c/stem_UTF_8_french.h \ 37 | src_c/stem_UTF_8_german.h \ 38 | src_c/stem_UTF_8_hungarian.h \ 39 | src_c/stem_UTF_8_italian.h \ 40 | src_c/stem_UTF_8_norwegian.h \ 41 | src_c/stem_UTF_8_porter.h \ 42 | src_c/stem_UTF_8_portuguese.h \ 43 | src_c/stem_UTF_8_romanian.h \ 44 | src_c/stem_UTF_8_russian.h \ 45 | src_c/stem_UTF_8_spanish.h \ 46 | src_c/stem_UTF_8_swedish.h \ 47 | src_c/stem_UTF_8_turkish.h \ 48 | include/libstemmer.h \ 49 | libstemmer/modules_utf8.h \ 50 | runtime/api.h \ 51 | runtime/header.h 52 | 53 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/api.c: -------------------------------------------------------------------------------- 1 | 2 | #include /* for calloc, free */ 3 | #include "header.h" 4 | 5 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) 6 | { 7 | struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); 8 | if (z == NULL) return NULL; 9 | z->p = create_s(); 10 | if (z->p == NULL) goto error; 11 | if (S_size) 12 | { 13 | int i; 14 | z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); 15 | if (z->S == NULL) goto error; 16 | 17 | for (i = 0; i < S_size; i++) 18 | { 19 | z->S[i] = create_s(); 20 | if (z->S[i] == NULL) goto error; 21 | } 22 | } 23 | 24 | if (I_size) 25 | { 26 | z->I = (int *) calloc(I_size, sizeof(int)); 27 | if (z->I == NULL) goto error; 28 | } 29 | 30 | if (B_size) 31 | { 32 | z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); 33 | if (z->B == NULL) goto error; 34 | } 35 | 36 | return z; 37 | error: 38 | SN_close_env(z, S_size); 39 | return NULL; 40 | } 41 | 42 | extern void SN_close_env(struct SN_env * z, int S_size) 43 | { 44 | if (z == NULL) return; 45 | if (S_size) 46 | { 47 | int i; 48 | for (i = 0; i < S_size; i++) 49 | { 50 | lose_s(z->S[i]); 51 | } 52 | free(z->S); 53 | } 54 | free(z->I); 55 | free(z->B); 56 | if (z->p) lose_s(z->p); 57 | free(z); 58 | } 59 | 60 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s) 61 | { 62 | int err = replace_s(z, 0, z->l, size, s, NULL); 63 | z->c = 0; 64 | return err; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/api.h: -------------------------------------------------------------------------------- 1 | 2 | typedef unsigned char symbol; 3 | 4 | /* Or replace 'char' above with 'short' for 16 bit characters. 5 | 6 | More precisely, replace 'char' with whatever type guarantees the 7 | character width you need. Note however that sizeof(symbol) should divide 8 | HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise 9 | there is an alignment problem. In the unlikely event of a problem here, 10 | consult Martin Porter. 11 | 12 | */ 13 | 14 | struct SN_env { 15 | symbol * p; 16 | int c; int l; int lb; int bra; int ket; 17 | symbol * * S; 18 | int * I; 19 | unsigned char * B; 20 | }; 21 | 22 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); 23 | extern void SN_close_env(struct SN_env * z, int S_size); 24 | 25 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s); 26 | 27 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/header.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include "api.h" 5 | 6 | #define MAXINT INT_MAX 7 | #define MININT INT_MIN 8 | 9 | #define HEAD 2*sizeof(int) 10 | 11 | #define SIZE(p) ((int *)(p))[-1] 12 | #define SET_SIZE(p, n) ((int *)(p))[-1] = n 13 | #define CAPACITY(p) ((int *)(p))[-2] 14 | 15 | struct among 16 | { int s_size; /* number of chars in string */ 17 | const symbol * s; /* search string */ 18 | int substring_i;/* index to longest matching substring */ 19 | int result; /* result of the lookup */ 20 | int (* function)(struct SN_env *); 21 | }; 22 | 23 | extern symbol * create_s(void); 24 | extern void lose_s(symbol * p); 25 | 26 | extern int skip_utf8(const symbol * p, int c, int lb, int l, int n); 27 | 28 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 29 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 30 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 31 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 32 | 33 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 34 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 35 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 36 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 37 | 38 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s); 39 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s); 40 | extern int eq_v(struct SN_env * z, const symbol * p); 41 | extern int eq_v_b(struct SN_env * z, const symbol * p); 42 | 43 | extern int find_among(struct SN_env * z, const struct among * v, int v_size); 44 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size); 45 | 46 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment); 47 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s); 48 | extern int slice_from_v(struct SN_env * z, const symbol * p); 49 | extern int slice_del(struct SN_env * z); 50 | 51 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s); 52 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p); 53 | 54 | extern symbol * slice_to(struct SN_env * z, symbol * p); 55 | extern symbol * assign_to(struct SN_env * z, symbol * p); 56 | 57 | extern void debug(struct SN_env * z, int number, int line_count); 58 | 59 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/utilities.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "header.h" 7 | 8 | #define unless(C) if(!(C)) 9 | 10 | #define CREATE_SIZE 1 11 | 12 | extern symbol * create_s(void) { 13 | symbol * p; 14 | void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); 15 | if (mem == NULL) return NULL; 16 | p = (symbol *) (HEAD + (char *) mem); 17 | CAPACITY(p) = CREATE_SIZE; 18 | SET_SIZE(p, CREATE_SIZE); 19 | return p; 20 | } 21 | 22 | extern void lose_s(symbol * p) { 23 | if (p == NULL) return; 24 | free((char *) p - HEAD); 25 | } 26 | 27 | /* 28 | new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c 29 | if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new 30 | position, or 0 on failure. 31 | 32 | -- used to implement hop and next in the utf8 case. 33 | */ 34 | 35 | extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { 36 | int b; 37 | if (n >= 0) { 38 | for (; n > 0; n--) { 39 | if (c >= l) return -1; 40 | b = p[c++]; 41 | if (b >= 0xC0) { /* 1100 0000 */ 42 | while (c < l) { 43 | b = p[c]; 44 | if (b >= 0xC0 || b < 0x80) break; 45 | /* break unless b is 10------ */ 46 | c++; 47 | } 48 | } 49 | } 50 | } else { 51 | for (; n < 0; n++) { 52 | if (c <= lb) return -1; 53 | b = p[--c]; 54 | if (b >= 0x80) { /* 1000 0000 */ 55 | while (c > lb) { 56 | b = p[c]; 57 | if (b >= 0xC0) break; /* 1100 0000 */ 58 | c--; 59 | } 60 | } 61 | } 62 | } 63 | return c; 64 | } 65 | 66 | /* Code for character groupings: utf8 cases */ 67 | 68 | static int get_utf8(const symbol * p, int c, int l, int * slot) { 69 | int b0, b1; 70 | if (c >= l) return 0; 71 | b0 = p[c++]; 72 | if (b0 < 0xC0 || c == l) { /* 1100 0000 */ 73 | * slot = b0; return 1; 74 | } 75 | b1 = p[c++]; 76 | if (b0 < 0xE0 || c == l) { /* 1110 0000 */ 77 | * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2; 78 | } 79 | * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3; 80 | } 81 | 82 | static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { 83 | int b0, b1; 84 | if (c <= lb) return 0; 85 | b0 = p[--c]; 86 | if (b0 < 0x80 || c == lb) { /* 1000 0000 */ 87 | * slot = b0; return 1; 88 | } 89 | b1 = p[--c]; 90 | if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ 91 | * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; 92 | } 93 | * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; 94 | } 95 | 96 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 97 | do { 98 | int ch; 99 | int w = get_utf8(z->p, z->c, z->l, & ch); 100 | unless (w) return -1; 101 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 102 | return w; 103 | z->c += w; 104 | } while (repeat); 105 | return 0; 106 | } 107 | 108 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 109 | do { 110 | int ch; 111 | int w = get_b_utf8(z->p, z->c, z->lb, & ch); 112 | unless (w) return -1; 113 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 114 | return w; 115 | z->c -= w; 116 | } while (repeat); 117 | return 0; 118 | } 119 | 120 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 121 | do { 122 | int ch; 123 | int w = get_utf8(z->p, z->c, z->l, & ch); 124 | unless (w) return -1; 125 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 126 | return w; 127 | z->c += w; 128 | } while (repeat); 129 | return 0; 130 | } 131 | 132 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 133 | do { 134 | int ch; 135 | int w = get_b_utf8(z->p, z->c, z->lb, & ch); 136 | unless (w) return -1; 137 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 138 | return w; 139 | z->c -= w; 140 | } while (repeat); 141 | return 0; 142 | } 143 | 144 | /* Code for character groupings: non-utf8 cases */ 145 | 146 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 147 | do { 148 | int ch; 149 | if (z->c >= z->l) return -1; 150 | ch = z->p[z->c]; 151 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 152 | return 1; 153 | z->c++; 154 | } while (repeat); 155 | return 0; 156 | } 157 | 158 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 159 | do { 160 | int ch; 161 | if (z->c <= z->lb) return -1; 162 | ch = z->p[z->c - 1]; 163 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 164 | return 1; 165 | z->c--; 166 | } while (repeat); 167 | return 0; 168 | } 169 | 170 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 171 | do { 172 | int ch; 173 | if (z->c >= z->l) return -1; 174 | ch = z->p[z->c]; 175 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 176 | return 1; 177 | z->c++; 178 | } while (repeat); 179 | return 0; 180 | } 181 | 182 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 183 | do { 184 | int ch; 185 | if (z->c <= z->lb) return -1; 186 | ch = z->p[z->c - 1]; 187 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 188 | return 1; 189 | z->c--; 190 | } while (repeat); 191 | return 0; 192 | } 193 | 194 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { 195 | if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; 196 | z->c += s_size; return 1; 197 | } 198 | 199 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { 200 | if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; 201 | z->c -= s_size; return 1; 202 | } 203 | 204 | extern int eq_v(struct SN_env * z, const symbol * p) { 205 | return eq_s(z, SIZE(p), p); 206 | } 207 | 208 | extern int eq_v_b(struct SN_env * z, const symbol * p) { 209 | return eq_s_b(z, SIZE(p), p); 210 | } 211 | 212 | extern int find_among(struct SN_env * z, const struct among * v, int v_size) { 213 | 214 | int i = 0; 215 | int j = v_size; 216 | 217 | int c = z->c; int l = z->l; 218 | symbol * q = z->p + c; 219 | 220 | const struct among * w; 221 | 222 | int common_i = 0; 223 | int common_j = 0; 224 | 225 | int first_key_inspected = 0; 226 | 227 | while(1) { 228 | int k = i + ((j - i) >> 1); 229 | int diff = 0; 230 | int common = common_i < common_j ? common_i : common_j; /* smaller */ 231 | w = v + k; 232 | { 233 | int i2; for (i2 = common; i2 < w->s_size; i2++) { 234 | if (c + common == l) { diff = -1; break; } 235 | diff = q[common] - w->s[i2]; 236 | if (diff != 0) break; 237 | common++; 238 | } 239 | } 240 | if (diff < 0) { j = k; common_j = common; } 241 | else { i = k; common_i = common; } 242 | if (j - i <= 1) { 243 | if (i > 0) break; /* v->s has been inspected */ 244 | if (j == i) break; /* only one item in v */ 245 | 246 | /* - but now we need to go round once more to get 247 | v->s inspected. This looks messy, but is actually 248 | the optimal approach. */ 249 | 250 | if (first_key_inspected) break; 251 | first_key_inspected = 1; 252 | } 253 | } 254 | while(1) { 255 | w = v + i; 256 | if (common_i >= w->s_size) { 257 | z->c = c + w->s_size; 258 | if (w->function == 0) return w->result; 259 | { 260 | int res = w->function(z); 261 | z->c = c + w->s_size; 262 | if (res) return w->result; 263 | } 264 | } 265 | i = w->substring_i; 266 | if (i < 0) return 0; 267 | } 268 | } 269 | 270 | /* find_among_b is for backwards processing. Same comments apply */ 271 | 272 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { 273 | 274 | int i = 0; 275 | int j = v_size; 276 | 277 | int c = z->c; int lb = z->lb; 278 | symbol * q = z->p + c - 1; 279 | 280 | const struct among * w; 281 | 282 | int common_i = 0; 283 | int common_j = 0; 284 | 285 | int first_key_inspected = 0; 286 | 287 | while(1) { 288 | int k = i + ((j - i) >> 1); 289 | int diff = 0; 290 | int common = common_i < common_j ? common_i : common_j; 291 | w = v + k; 292 | { 293 | int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { 294 | if (c - common == lb) { diff = -1; break; } 295 | diff = q[- common] - w->s[i2]; 296 | if (diff != 0) break; 297 | common++; 298 | } 299 | } 300 | if (diff < 0) { j = k; common_j = common; } 301 | else { i = k; common_i = common; } 302 | if (j - i <= 1) { 303 | if (i > 0) break; 304 | if (j == i) break; 305 | if (first_key_inspected) break; 306 | first_key_inspected = 1; 307 | } 308 | } 309 | while(1) { 310 | w = v + i; 311 | if (common_i >= w->s_size) { 312 | z->c = c - w->s_size; 313 | if (w->function == 0) return w->result; 314 | { 315 | int res = w->function(z); 316 | z->c = c - w->s_size; 317 | if (res) return w->result; 318 | } 319 | } 320 | i = w->substring_i; 321 | if (i < 0) return 0; 322 | } 323 | } 324 | 325 | 326 | /* Increase the size of the buffer pointed to by p to at least n symbols. 327 | * If insufficient memory, returns NULL and frees the old buffer. 328 | */ 329 | static symbol * increase_size(symbol * p, int n) { 330 | symbol * q; 331 | int new_size = n + 20; 332 | void * mem = realloc((char *) p - HEAD, 333 | HEAD + (new_size + 1) * sizeof(symbol)); 334 | if (mem == NULL) { 335 | lose_s(p); 336 | return NULL; 337 | } 338 | q = (symbol *) (HEAD + (char *)mem); 339 | CAPACITY(q) = new_size; 340 | return q; 341 | } 342 | 343 | /* to replace symbols between c_bra and c_ket in z->p by the 344 | s_size symbols at s. 345 | Returns 0 on success, -1 on error. 346 | Also, frees z->p (and sets it to NULL) on error. 347 | */ 348 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) 349 | { 350 | int adjustment; 351 | int len; 352 | if (z->p == NULL) { 353 | z->p = create_s(); 354 | if (z->p == NULL) return -1; 355 | } 356 | adjustment = s_size - (c_ket - c_bra); 357 | len = SIZE(z->p); 358 | if (adjustment != 0) { 359 | if (adjustment + len > CAPACITY(z->p)) { 360 | z->p = increase_size(z->p, adjustment + len); 361 | if (z->p == NULL) return -1; 362 | } 363 | memmove(z->p + c_ket + adjustment, 364 | z->p + c_ket, 365 | (len - c_ket) * sizeof(symbol)); 366 | SET_SIZE(z->p, adjustment + len); 367 | z->l += adjustment; 368 | if (z->c >= c_ket) 369 | z->c += adjustment; 370 | else 371 | if (z->c > c_bra) 372 | z->c = c_bra; 373 | } 374 | unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); 375 | if (adjptr != NULL) 376 | *adjptr = adjustment; 377 | return 0; 378 | } 379 | 380 | static int slice_check(struct SN_env * z) { 381 | 382 | if (z->bra < 0 || 383 | z->bra > z->ket || 384 | z->ket > z->l || 385 | z->p == NULL || 386 | z->l > SIZE(z->p)) /* this line could be removed */ 387 | { 388 | #if 0 389 | fprintf(stderr, "faulty slice operation:\n"); 390 | debug(z, -1, 0); 391 | #endif 392 | return -1; 393 | } 394 | return 0; 395 | } 396 | 397 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { 398 | if (slice_check(z)) return -1; 399 | return replace_s(z, z->bra, z->ket, s_size, s, NULL); 400 | } 401 | 402 | extern int slice_from_v(struct SN_env * z, const symbol * p) { 403 | return slice_from_s(z, SIZE(p), p); 404 | } 405 | 406 | extern int slice_del(struct SN_env * z) { 407 | return slice_from_s(z, 0, 0); 408 | } 409 | 410 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { 411 | int adjustment; 412 | if (replace_s(z, bra, ket, s_size, s, &adjustment)) 413 | return -1; 414 | if (bra <= z->bra) z->bra += adjustment; 415 | if (bra <= z->ket) z->ket += adjustment; 416 | return 0; 417 | } 418 | 419 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { 420 | int adjustment; 421 | if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) 422 | return -1; 423 | if (bra <= z->bra) z->bra += adjustment; 424 | if (bra <= z->ket) z->ket += adjustment; 425 | return 0; 426 | } 427 | 428 | extern symbol * slice_to(struct SN_env * z, symbol * p) { 429 | if (slice_check(z)) { 430 | lose_s(p); 431 | return NULL; 432 | } 433 | { 434 | int len = z->ket - z->bra; 435 | if (CAPACITY(p) < len) { 436 | p = increase_size(p, len); 437 | if (p == NULL) 438 | return NULL; 439 | } 440 | memmove(p, z->p + z->bra, len * sizeof(symbol)); 441 | SET_SIZE(p, len); 442 | } 443 | return p; 444 | } 445 | 446 | extern symbol * assign_to(struct SN_env * z, symbol * p) { 447 | int len = z->l; 448 | if (CAPACITY(p) < len) { 449 | p = increase_size(p, len); 450 | if (p == NULL) 451 | return NULL; 452 | } 453 | memmove(p, z->p, len * sizeof(symbol)); 454 | SET_SIZE(p, len); 455 | return p; 456 | } 457 | 458 | #if 0 459 | extern void debug(struct SN_env * z, int number, int line_count) { 460 | int i; 461 | int limit = SIZE(z->p); 462 | /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ 463 | if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); 464 | for (i = 0; i <= limit; i++) { 465 | if (z->lb == i) printf("{"); 466 | if (z->bra == i) printf("["); 467 | if (z->c == i) printf("|"); 468 | if (z->ket == i) printf("]"); 469 | if (z->l == i) printf("}"); 470 | if (i < limit) 471 | { int ch = z->p[i]; 472 | if (ch == 0) ch = '#'; 473 | printf("%c", ch); 474 | } 475 | } 476 | printf("'\n"); 477 | } 478 | #endif 479 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_danish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_undouble(struct SN_env * z); 14 | static int r_other_suffix(struct SN_env * z); 15 | static int r_consonant_pair(struct SN_env * z); 16 | static int r_main_suffix(struct SN_env * z); 17 | static int r_mark_regions(struct SN_env * z); 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | 23 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 24 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 25 | 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | static const symbol s_0_0[3] = { 'h', 'e', 'd' }; 31 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; 32 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; 33 | static const symbol s_0_3[1] = { 'e' }; 34 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; 35 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; 36 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; 37 | static const symbol s_0_7[3] = { 'e', 'n', 'e' }; 38 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; 39 | static const symbol s_0_9[3] = { 'e', 'r', 'e' }; 40 | static const symbol s_0_10[2] = { 'e', 'n' }; 41 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; 42 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; 43 | static const symbol s_0_13[2] = { 'e', 'r' }; 44 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; 45 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; 46 | static const symbol s_0_16[1] = { 's' }; 47 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; 48 | static const symbol s_0_18[2] = { 'e', 's' }; 49 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; 50 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; 51 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; 52 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; 53 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; 54 | static const symbol s_0_24[3] = { 'e', 'n', 's' }; 55 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; 56 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; 57 | static const symbol s_0_27[3] = { 'e', 'r', 's' }; 58 | static const symbol s_0_28[3] = { 'e', 't', 's' }; 59 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; 60 | static const symbol s_0_30[2] = { 'e', 't' }; 61 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; 62 | 63 | static const struct among a_0[32] = 64 | { 65 | /* 0 */ { 3, s_0_0, -1, 1, 0}, 66 | /* 1 */ { 5, s_0_1, 0, 1, 0}, 67 | /* 2 */ { 4, s_0_2, -1, 1, 0}, 68 | /* 3 */ { 1, s_0_3, -1, 1, 0}, 69 | /* 4 */ { 5, s_0_4, 3, 1, 0}, 70 | /* 5 */ { 4, s_0_5, 3, 1, 0}, 71 | /* 6 */ { 6, s_0_6, 5, 1, 0}, 72 | /* 7 */ { 3, s_0_7, 3, 1, 0}, 73 | /* 8 */ { 4, s_0_8, 3, 1, 0}, 74 | /* 9 */ { 3, s_0_9, 3, 1, 0}, 75 | /* 10 */ { 2, s_0_10, -1, 1, 0}, 76 | /* 11 */ { 5, s_0_11, 10, 1, 0}, 77 | /* 12 */ { 4, s_0_12, 10, 1, 0}, 78 | /* 13 */ { 2, s_0_13, -1, 1, 0}, 79 | /* 14 */ { 5, s_0_14, 13, 1, 0}, 80 | /* 15 */ { 4, s_0_15, 13, 1, 0}, 81 | /* 16 */ { 1, s_0_16, -1, 2, 0}, 82 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 83 | /* 18 */ { 2, s_0_18, 16, 1, 0}, 84 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 85 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 86 | /* 21 */ { 4, s_0_21, 18, 1, 0}, 87 | /* 22 */ { 5, s_0_22, 18, 1, 0}, 88 | /* 23 */ { 4, s_0_23, 18, 1, 0}, 89 | /* 24 */ { 3, s_0_24, 16, 1, 0}, 90 | /* 25 */ { 6, s_0_25, 24, 1, 0}, 91 | /* 26 */ { 5, s_0_26, 24, 1, 0}, 92 | /* 27 */ { 3, s_0_27, 16, 1, 0}, 93 | /* 28 */ { 3, s_0_28, 16, 1, 0}, 94 | /* 29 */ { 5, s_0_29, 28, 1, 0}, 95 | /* 30 */ { 2, s_0_30, -1, 1, 0}, 96 | /* 31 */ { 4, s_0_31, 30, 1, 0} 97 | }; 98 | 99 | static const symbol s_1_0[2] = { 'g', 'd' }; 100 | static const symbol s_1_1[2] = { 'd', 't' }; 101 | static const symbol s_1_2[2] = { 'g', 't' }; 102 | static const symbol s_1_3[2] = { 'k', 't' }; 103 | 104 | static const struct among a_1[4] = 105 | { 106 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 107 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 108 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 109 | /* 3 */ { 2, s_1_3, -1, -1, 0} 110 | }; 111 | 112 | static const symbol s_2_0[2] = { 'i', 'g' }; 113 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 114 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; 115 | static const symbol s_2_3[3] = { 'e', 'l', 's' }; 116 | static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' }; 117 | 118 | static const struct among a_2[5] = 119 | { 120 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 121 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 122 | /* 2 */ { 4, s_2_2, 1, 1, 0}, 123 | /* 3 */ { 3, s_2_3, -1, 1, 0}, 124 | /* 4 */ { 4, s_2_4, -1, 2, 0} 125 | }; 126 | 127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 128 | 129 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; 130 | 131 | static const symbol s_0[] = { 's', 't' }; 132 | static const symbol s_1[] = { 'i', 'g' }; 133 | static const symbol s_2[] = { 'l', 0xF8, 's' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 33 */ 138 | { int ret = z->c + 3; 139 | if (0 > ret || ret > z->l) return 0; 140 | z->c = ret; /* hop, line 33 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 33 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ 146 | { /* gopast */ /* non v, line 34 */ 147 | int ret = in_grouping(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 34 */ 152 | /* try, line 35 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 41 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 41 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 41 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 41 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 48 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; 183 | { int ret = slice_del(z); /* delete, line 50 */ 184 | if (ret < 0) return ret; 185 | } 186 | break; 187 | } 188 | return 1; 189 | } 190 | 191 | static int r_consonant_pair(struct SN_env * z) { 192 | { int m_test = z->l - z->c; /* test, line 55 */ 193 | { int mlimit; /* setlimit, line 56 */ 194 | int m1 = z->l - z->c; (void)m1; 195 | if (z->c < z->I[0]) return 0; 196 | z->c = z->I[0]; /* tomark, line 56 */ 197 | mlimit = z->lb; z->lb = z->c; 198 | z->c = z->l - m1; 199 | z->ket = z->c; /* [, line 56 */ 200 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } 201 | if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ 202 | z->bra = z->c; /* ], line 56 */ 203 | z->lb = mlimit; 204 | } 205 | z->c = z->l - m_test; 206 | } 207 | if (z->c <= z->lb) return 0; 208 | z->c--; /* next, line 62 */ 209 | z->bra = z->c; /* ], line 62 */ 210 | { int ret = slice_del(z); /* delete, line 62 */ 211 | if (ret < 0) return ret; 212 | } 213 | return 1; 214 | } 215 | 216 | static int r_other_suffix(struct SN_env * z) { 217 | int among_var; 218 | { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ 219 | z->ket = z->c; /* [, line 66 */ 220 | if (!(eq_s_b(z, 2, s_0))) goto lab0; 221 | z->bra = z->c; /* ], line 66 */ 222 | if (!(eq_s_b(z, 2, s_1))) goto lab0; 223 | { int ret = slice_del(z); /* delete, line 66 */ 224 | if (ret < 0) return ret; 225 | } 226 | lab0: 227 | z->c = z->l - m1; 228 | } 229 | { int mlimit; /* setlimit, line 67 */ 230 | int m2 = z->l - z->c; (void)m2; 231 | if (z->c < z->I[0]) return 0; 232 | z->c = z->I[0]; /* tomark, line 67 */ 233 | mlimit = z->lb; z->lb = z->c; 234 | z->c = z->l - m2; 235 | z->ket = z->c; /* [, line 67 */ 236 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 237 | among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ 238 | if (!(among_var)) { z->lb = mlimit; return 0; } 239 | z->bra = z->c; /* ], line 67 */ 240 | z->lb = mlimit; 241 | } 242 | switch(among_var) { 243 | case 0: return 0; 244 | case 1: 245 | { int ret = slice_del(z); /* delete, line 70 */ 246 | if (ret < 0) return ret; 247 | } 248 | { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ 249 | { int ret = r_consonant_pair(z); 250 | if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ 251 | if (ret < 0) return ret; 252 | } 253 | lab1: 254 | z->c = z->l - m3; 255 | } 256 | break; 257 | case 2: 258 | { int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */ 259 | if (ret < 0) return ret; 260 | } 261 | break; 262 | } 263 | return 1; 264 | } 265 | 266 | static int r_undouble(struct SN_env * z) { 267 | { int mlimit; /* setlimit, line 76 */ 268 | int m1 = z->l - z->c; (void)m1; 269 | if (z->c < z->I[0]) return 0; 270 | z->c = z->I[0]; /* tomark, line 76 */ 271 | mlimit = z->lb; z->lb = z->c; 272 | z->c = z->l - m1; 273 | z->ket = z->c; /* [, line 76 */ 274 | if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } 275 | z->bra = z->c; /* ], line 76 */ 276 | z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ 277 | if (z->S[0] == 0) return -1; /* -> ch, line 76 */ 278 | z->lb = mlimit; 279 | } 280 | if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ 281 | { int ret = slice_del(z); /* delete, line 78 */ 282 | if (ret < 0) return ret; 283 | } 284 | return 1; 285 | } 286 | 287 | extern int danish_ISO_8859_1_stem(struct SN_env * z) { 288 | { int c1 = z->c; /* do, line 84 */ 289 | { int ret = r_mark_regions(z); 290 | if (ret == 0) goto lab0; /* call mark_regions, line 84 */ 291 | if (ret < 0) return ret; 292 | } 293 | lab0: 294 | z->c = c1; 295 | } 296 | z->lb = z->c; z->c = z->l; /* backwards, line 85 */ 297 | 298 | { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ 299 | { int ret = r_main_suffix(z); 300 | if (ret == 0) goto lab1; /* call main_suffix, line 86 */ 301 | if (ret < 0) return ret; 302 | } 303 | lab1: 304 | z->c = z->l - m2; 305 | } 306 | { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ 307 | { int ret = r_consonant_pair(z); 308 | if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ 309 | if (ret < 0) return ret; 310 | } 311 | lab2: 312 | z->c = z->l - m3; 313 | } 314 | { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ 315 | { int ret = r_other_suffix(z); 316 | if (ret == 0) goto lab3; /* call other_suffix, line 88 */ 317 | if (ret < 0) return ret; 318 | } 319 | lab3: 320 | z->c = z->l - m4; 321 | } 322 | { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ 323 | { int ret = r_undouble(z); 324 | if (ret == 0) goto lab4; /* call undouble, line 89 */ 325 | if (ret < 0) return ret; 326 | } 327 | lab4: 328 | z->c = z->l - m5; 329 | } 330 | z->c = z->lb; 331 | return 1; 332 | } 333 | 334 | extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); } 335 | 336 | extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } 337 | 338 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_danish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 9 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_dutch.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * dutch_ISO_8859_1_create_env(void); 9 | extern void dutch_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int dutch_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_english.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * english_ISO_8859_1_create_env(void); 9 | extern void english_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int english_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_finnish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * finnish_ISO_8859_1_create_env(void); 9 | extern void finnish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int finnish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_french.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * french_ISO_8859_1_create_env(void); 9 | extern void french_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int french_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_german.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int german_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_standard_suffix(struct SN_env * z); 14 | static int r_R2(struct SN_env * z); 15 | static int r_R1(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | static int r_postlude(struct SN_env * z); 18 | static int r_prelude(struct SN_env * z); 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | 24 | extern struct SN_env * german_ISO_8859_1_create_env(void); 25 | extern void german_ISO_8859_1_close_env(struct SN_env * z); 26 | 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | static const symbol s_0_1[1] = { 'U' }; 32 | static const symbol s_0_2[1] = { 'Y' }; 33 | static const symbol s_0_3[1] = { 0xE4 }; 34 | static const symbol s_0_4[1] = { 0xF6 }; 35 | static const symbol s_0_5[1] = { 0xFC }; 36 | 37 | static const struct among a_0[6] = 38 | { 39 | /* 0 */ { 0, 0, -1, 6, 0}, 40 | /* 1 */ { 1, s_0_1, 0, 2, 0}, 41 | /* 2 */ { 1, s_0_2, 0, 1, 0}, 42 | /* 3 */ { 1, s_0_3, 0, 3, 0}, 43 | /* 4 */ { 1, s_0_4, 0, 4, 0}, 44 | /* 5 */ { 1, s_0_5, 0, 5, 0} 45 | }; 46 | 47 | static const symbol s_1_0[1] = { 'e' }; 48 | static const symbol s_1_1[2] = { 'e', 'm' }; 49 | static const symbol s_1_2[2] = { 'e', 'n' }; 50 | static const symbol s_1_3[3] = { 'e', 'r', 'n' }; 51 | static const symbol s_1_4[2] = { 'e', 'r' }; 52 | static const symbol s_1_5[1] = { 's' }; 53 | static const symbol s_1_6[2] = { 'e', 's' }; 54 | 55 | static const struct among a_1[7] = 56 | { 57 | /* 0 */ { 1, s_1_0, -1, 2, 0}, 58 | /* 1 */ { 2, s_1_1, -1, 1, 0}, 59 | /* 2 */ { 2, s_1_2, -1, 2, 0}, 60 | /* 3 */ { 3, s_1_3, -1, 1, 0}, 61 | /* 4 */ { 2, s_1_4, -1, 1, 0}, 62 | /* 5 */ { 1, s_1_5, -1, 3, 0}, 63 | /* 6 */ { 2, s_1_6, 5, 2, 0} 64 | }; 65 | 66 | static const symbol s_2_0[2] = { 'e', 'n' }; 67 | static const symbol s_2_1[2] = { 'e', 'r' }; 68 | static const symbol s_2_2[2] = { 's', 't' }; 69 | static const symbol s_2_3[3] = { 'e', 's', 't' }; 70 | 71 | static const struct among a_2[4] = 72 | { 73 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 74 | /* 1 */ { 2, s_2_1, -1, 1, 0}, 75 | /* 2 */ { 2, s_2_2, -1, 2, 0}, 76 | /* 3 */ { 3, s_2_3, 2, 1, 0} 77 | }; 78 | 79 | static const symbol s_3_0[2] = { 'i', 'g' }; 80 | static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; 81 | 82 | static const struct among a_3[2] = 83 | { 84 | /* 0 */ { 2, s_3_0, -1, 1, 0}, 85 | /* 1 */ { 4, s_3_1, -1, 1, 0} 86 | }; 87 | 88 | static const symbol s_4_0[3] = { 'e', 'n', 'd' }; 89 | static const symbol s_4_1[2] = { 'i', 'g' }; 90 | static const symbol s_4_2[3] = { 'u', 'n', 'g' }; 91 | static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; 92 | static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; 93 | static const symbol s_4_5[2] = { 'i', 'k' }; 94 | static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; 95 | static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; 96 | 97 | static const struct among a_4[8] = 98 | { 99 | /* 0 */ { 3, s_4_0, -1, 1, 0}, 100 | /* 1 */ { 2, s_4_1, -1, 2, 0}, 101 | /* 2 */ { 3, s_4_2, -1, 1, 0}, 102 | /* 3 */ { 4, s_4_3, -1, 3, 0}, 103 | /* 4 */ { 4, s_4_4, -1, 2, 0}, 104 | /* 5 */ { 2, s_4_5, -1, 2, 0}, 105 | /* 6 */ { 4, s_4_6, -1, 3, 0}, 106 | /* 7 */ { 4, s_4_7, -1, 4, 0} 107 | }; 108 | 109 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; 110 | 111 | static const unsigned char g_s_ending[] = { 117, 30, 5 }; 112 | 113 | static const unsigned char g_st_ending[] = { 117, 30, 4 }; 114 | 115 | static const symbol s_0[] = { 0xDF }; 116 | static const symbol s_1[] = { 's', 's' }; 117 | static const symbol s_2[] = { 'u' }; 118 | static const symbol s_3[] = { 'U' }; 119 | static const symbol s_4[] = { 'y' }; 120 | static const symbol s_5[] = { 'Y' }; 121 | static const symbol s_6[] = { 'y' }; 122 | static const symbol s_7[] = { 'u' }; 123 | static const symbol s_8[] = { 'a' }; 124 | static const symbol s_9[] = { 'o' }; 125 | static const symbol s_10[] = { 'u' }; 126 | static const symbol s_11[] = { 's' }; 127 | static const symbol s_12[] = { 'n', 'i', 's' }; 128 | static const symbol s_13[] = { 'i', 'g' }; 129 | static const symbol s_14[] = { 'e' }; 130 | static const symbol s_15[] = { 'e' }; 131 | static const symbol s_16[] = { 'e', 'r' }; 132 | static const symbol s_17[] = { 'e', 'n' }; 133 | 134 | static int r_prelude(struct SN_env * z) { 135 | { int c_test = z->c; /* test, line 35 */ 136 | while(1) { /* repeat, line 35 */ 137 | int c1 = z->c; 138 | { int c2 = z->c; /* or, line 38 */ 139 | z->bra = z->c; /* [, line 37 */ 140 | if (!(eq_s(z, 1, s_0))) goto lab2; 141 | z->ket = z->c; /* ], line 37 */ 142 | { int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */ 143 | if (ret < 0) return ret; 144 | } 145 | goto lab1; 146 | lab2: 147 | z->c = c2; 148 | if (z->c >= z->l) goto lab0; 149 | z->c++; /* next, line 38 */ 150 | } 151 | lab1: 152 | continue; 153 | lab0: 154 | z->c = c1; 155 | break; 156 | } 157 | z->c = c_test; 158 | } 159 | while(1) { /* repeat, line 41 */ 160 | int c3 = z->c; 161 | while(1) { /* goto, line 41 */ 162 | int c4 = z->c; 163 | if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; 164 | z->bra = z->c; /* [, line 42 */ 165 | { int c5 = z->c; /* or, line 42 */ 166 | if (!(eq_s(z, 1, s_2))) goto lab6; 167 | z->ket = z->c; /* ], line 42 */ 168 | if (in_grouping(z, g_v, 97, 252, 0)) goto lab6; 169 | { int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */ 170 | if (ret < 0) return ret; 171 | } 172 | goto lab5; 173 | lab6: 174 | z->c = c5; 175 | if (!(eq_s(z, 1, s_4))) goto lab4; 176 | z->ket = z->c; /* ], line 43 */ 177 | if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; 178 | { int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */ 179 | if (ret < 0) return ret; 180 | } 181 | } 182 | lab5: 183 | z->c = c4; 184 | break; 185 | lab4: 186 | z->c = c4; 187 | if (z->c >= z->l) goto lab3; 188 | z->c++; /* goto, line 41 */ 189 | } 190 | continue; 191 | lab3: 192 | z->c = c3; 193 | break; 194 | } 195 | return 1; 196 | } 197 | 198 | static int r_mark_regions(struct SN_env * z) { 199 | z->I[0] = z->l; 200 | z->I[1] = z->l; 201 | { int c_test = z->c; /* test, line 52 */ 202 | { int ret = z->c + 3; 203 | if (0 > ret || ret > z->l) return 0; 204 | z->c = ret; /* hop, line 52 */ 205 | } 206 | z->I[2] = z->c; /* setmark x, line 52 */ 207 | z->c = c_test; 208 | } 209 | { /* gopast */ /* grouping v, line 54 */ 210 | int ret = out_grouping(z, g_v, 97, 252, 1); 211 | if (ret < 0) return 0; 212 | z->c += ret; 213 | } 214 | { /* gopast */ /* non v, line 54 */ 215 | int ret = in_grouping(z, g_v, 97, 252, 1); 216 | if (ret < 0) return 0; 217 | z->c += ret; 218 | } 219 | z->I[0] = z->c; /* setmark p1, line 54 */ 220 | /* try, line 55 */ 221 | if (!(z->I[0] < z->I[2])) goto lab0; 222 | z->I[0] = z->I[2]; 223 | lab0: 224 | { /* gopast */ /* grouping v, line 56 */ 225 | int ret = out_grouping(z, g_v, 97, 252, 1); 226 | if (ret < 0) return 0; 227 | z->c += ret; 228 | } 229 | { /* gopast */ /* non v, line 56 */ 230 | int ret = in_grouping(z, g_v, 97, 252, 1); 231 | if (ret < 0) return 0; 232 | z->c += ret; 233 | } 234 | z->I[1] = z->c; /* setmark p2, line 56 */ 235 | return 1; 236 | } 237 | 238 | static int r_postlude(struct SN_env * z) { 239 | int among_var; 240 | while(1) { /* repeat, line 60 */ 241 | int c1 = z->c; 242 | z->bra = z->c; /* [, line 62 */ 243 | among_var = find_among(z, a_0, 6); /* substring, line 62 */ 244 | if (!(among_var)) goto lab0; 245 | z->ket = z->c; /* ], line 62 */ 246 | switch(among_var) { 247 | case 0: goto lab0; 248 | case 1: 249 | { int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */ 250 | if (ret < 0) return ret; 251 | } 252 | break; 253 | case 2: 254 | { int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */ 255 | if (ret < 0) return ret; 256 | } 257 | break; 258 | case 3: 259 | { int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */ 260 | if (ret < 0) return ret; 261 | } 262 | break; 263 | case 4: 264 | { int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */ 265 | if (ret < 0) return ret; 266 | } 267 | break; 268 | case 5: 269 | { int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */ 270 | if (ret < 0) return ret; 271 | } 272 | break; 273 | case 6: 274 | if (z->c >= z->l) goto lab0; 275 | z->c++; /* next, line 68 */ 276 | break; 277 | } 278 | continue; 279 | lab0: 280 | z->c = c1; 281 | break; 282 | } 283 | return 1; 284 | } 285 | 286 | static int r_R1(struct SN_env * z) { 287 | if (!(z->I[0] <= z->c)) return 0; 288 | return 1; 289 | } 290 | 291 | static int r_R2(struct SN_env * z) { 292 | if (!(z->I[1] <= z->c)) return 0; 293 | return 1; 294 | } 295 | 296 | static int r_standard_suffix(struct SN_env * z) { 297 | int among_var; 298 | { int m1 = z->l - z->c; (void)m1; /* do, line 79 */ 299 | z->ket = z->c; /* [, line 80 */ 300 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; 301 | among_var = find_among_b(z, a_1, 7); /* substring, line 80 */ 302 | if (!(among_var)) goto lab0; 303 | z->bra = z->c; /* ], line 80 */ 304 | { int ret = r_R1(z); 305 | if (ret == 0) goto lab0; /* call R1, line 80 */ 306 | if (ret < 0) return ret; 307 | } 308 | switch(among_var) { 309 | case 0: goto lab0; 310 | case 1: 311 | { int ret = slice_del(z); /* delete, line 82 */ 312 | if (ret < 0) return ret; 313 | } 314 | break; 315 | case 2: 316 | { int ret = slice_del(z); /* delete, line 85 */ 317 | if (ret < 0) return ret; 318 | } 319 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */ 320 | z->ket = z->c; /* [, line 86 */ 321 | if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; } 322 | z->bra = z->c; /* ], line 86 */ 323 | if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; } 324 | { int ret = slice_del(z); /* delete, line 86 */ 325 | if (ret < 0) return ret; 326 | } 327 | lab1: 328 | ; 329 | } 330 | break; 331 | case 3: 332 | if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0; 333 | { int ret = slice_del(z); /* delete, line 89 */ 334 | if (ret < 0) return ret; 335 | } 336 | break; 337 | } 338 | lab0: 339 | z->c = z->l - m1; 340 | } 341 | { int m2 = z->l - z->c; (void)m2; /* do, line 93 */ 342 | z->ket = z->c; /* [, line 94 */ 343 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; 344 | among_var = find_among_b(z, a_2, 4); /* substring, line 94 */ 345 | if (!(among_var)) goto lab2; 346 | z->bra = z->c; /* ], line 94 */ 347 | { int ret = r_R1(z); 348 | if (ret == 0) goto lab2; /* call R1, line 94 */ 349 | if (ret < 0) return ret; 350 | } 351 | switch(among_var) { 352 | case 0: goto lab2; 353 | case 1: 354 | { int ret = slice_del(z); /* delete, line 96 */ 355 | if (ret < 0) return ret; 356 | } 357 | break; 358 | case 2: 359 | if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab2; 360 | { int ret = z->c - 3; 361 | if (z->lb > ret || ret > z->l) goto lab2; 362 | z->c = ret; /* hop, line 99 */ 363 | } 364 | { int ret = slice_del(z); /* delete, line 99 */ 365 | if (ret < 0) return ret; 366 | } 367 | break; 368 | } 369 | lab2: 370 | z->c = z->l - m2; 371 | } 372 | { int m3 = z->l - z->c; (void)m3; /* do, line 103 */ 373 | z->ket = z->c; /* [, line 104 */ 374 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; 375 | among_var = find_among_b(z, a_4, 8); /* substring, line 104 */ 376 | if (!(among_var)) goto lab3; 377 | z->bra = z->c; /* ], line 104 */ 378 | { int ret = r_R2(z); 379 | if (ret == 0) goto lab3; /* call R2, line 104 */ 380 | if (ret < 0) return ret; 381 | } 382 | switch(among_var) { 383 | case 0: goto lab3; 384 | case 1: 385 | { int ret = slice_del(z); /* delete, line 106 */ 386 | if (ret < 0) return ret; 387 | } 388 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */ 389 | z->ket = z->c; /* [, line 107 */ 390 | if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; } 391 | z->bra = z->c; /* ], line 107 */ 392 | { int m4 = z->l - z->c; (void)m4; /* not, line 107 */ 393 | if (!(eq_s_b(z, 1, s_14))) goto lab5; 394 | { z->c = z->l - m_keep; goto lab4; } 395 | lab5: 396 | z->c = z->l - m4; 397 | } 398 | { int ret = r_R2(z); 399 | if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */ 400 | if (ret < 0) return ret; 401 | } 402 | { int ret = slice_del(z); /* delete, line 107 */ 403 | if (ret < 0) return ret; 404 | } 405 | lab4: 406 | ; 407 | } 408 | break; 409 | case 2: 410 | { int m5 = z->l - z->c; (void)m5; /* not, line 110 */ 411 | if (!(eq_s_b(z, 1, s_15))) goto lab6; 412 | goto lab3; 413 | lab6: 414 | z->c = z->l - m5; 415 | } 416 | { int ret = slice_del(z); /* delete, line 110 */ 417 | if (ret < 0) return ret; 418 | } 419 | break; 420 | case 3: 421 | { int ret = slice_del(z); /* delete, line 113 */ 422 | if (ret < 0) return ret; 423 | } 424 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ 425 | z->ket = z->c; /* [, line 115 */ 426 | { int m6 = z->l - z->c; (void)m6; /* or, line 115 */ 427 | if (!(eq_s_b(z, 2, s_16))) goto lab9; 428 | goto lab8; 429 | lab9: 430 | z->c = z->l - m6; 431 | if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; } 432 | } 433 | lab8: 434 | z->bra = z->c; /* ], line 115 */ 435 | { int ret = r_R1(z); 436 | if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */ 437 | if (ret < 0) return ret; 438 | } 439 | { int ret = slice_del(z); /* delete, line 115 */ 440 | if (ret < 0) return ret; 441 | } 442 | lab7: 443 | ; 444 | } 445 | break; 446 | case 4: 447 | { int ret = slice_del(z); /* delete, line 119 */ 448 | if (ret < 0) return ret; 449 | } 450 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */ 451 | z->ket = z->c; /* [, line 121 */ 452 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; } 453 | among_var = find_among_b(z, a_3, 2); /* substring, line 121 */ 454 | if (!(among_var)) { z->c = z->l - m_keep; goto lab10; } 455 | z->bra = z->c; /* ], line 121 */ 456 | { int ret = r_R2(z); 457 | if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */ 458 | if (ret < 0) return ret; 459 | } 460 | switch(among_var) { 461 | case 0: { z->c = z->l - m_keep; goto lab10; } 462 | case 1: 463 | { int ret = slice_del(z); /* delete, line 123 */ 464 | if (ret < 0) return ret; 465 | } 466 | break; 467 | } 468 | lab10: 469 | ; 470 | } 471 | break; 472 | } 473 | lab3: 474 | z->c = z->l - m3; 475 | } 476 | return 1; 477 | } 478 | 479 | extern int german_ISO_8859_1_stem(struct SN_env * z) { 480 | { int c1 = z->c; /* do, line 134 */ 481 | { int ret = r_prelude(z); 482 | if (ret == 0) goto lab0; /* call prelude, line 134 */ 483 | if (ret < 0) return ret; 484 | } 485 | lab0: 486 | z->c = c1; 487 | } 488 | { int c2 = z->c; /* do, line 135 */ 489 | { int ret = r_mark_regions(z); 490 | if (ret == 0) goto lab1; /* call mark_regions, line 135 */ 491 | if (ret < 0) return ret; 492 | } 493 | lab1: 494 | z->c = c2; 495 | } 496 | z->lb = z->c; z->c = z->l; /* backwards, line 136 */ 497 | 498 | { int m3 = z->l - z->c; (void)m3; /* do, line 137 */ 499 | { int ret = r_standard_suffix(z); 500 | if (ret == 0) goto lab2; /* call standard_suffix, line 137 */ 501 | if (ret < 0) return ret; 502 | } 503 | lab2: 504 | z->c = z->l - m3; 505 | } 506 | z->c = z->lb; 507 | { int c4 = z->c; /* do, line 138 */ 508 | { int ret = r_postlude(z); 509 | if (ret == 0) goto lab3; /* call postlude, line 138 */ 510 | if (ret < 0) return ret; 511 | } 512 | lab3: 513 | z->c = c4; 514 | } 515 | return 1; 516 | } 517 | 518 | extern struct SN_env * german_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } 519 | 520 | extern void german_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 521 | 522 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_german.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * german_ISO_8859_1_create_env(void); 9 | extern void german_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int german_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * hungarian_ISO_8859_1_create_env(void); 9 | extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int hungarian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_italian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * italian_ISO_8859_1_create_env(void); 9 | extern void italian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int italian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 23 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[1] = { 'e' }; 31 | static const symbol s_0_2[3] = { 'e', 'd', 'e' }; 32 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; 33 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; 34 | static const symbol s_0_5[3] = { 'a', 'n', 'e' }; 35 | static const symbol s_0_6[3] = { 'e', 'n', 'e' }; 36 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; 37 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; 38 | static const symbol s_0_9[2] = { 'e', 'n' }; 39 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; 40 | static const symbol s_0_11[2] = { 'a', 'r' }; 41 | static const symbol s_0_12[2] = { 'e', 'r' }; 42 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; 43 | static const symbol s_0_14[1] = { 's' }; 44 | static const symbol s_0_15[2] = { 'a', 's' }; 45 | static const symbol s_0_16[2] = { 'e', 's' }; 46 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; 47 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; 48 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; 49 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; 50 | static const symbol s_0_21[3] = { 'e', 'n', 's' }; 51 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 52 | static const symbol s_0_23[3] = { 'e', 'r', 's' }; 53 | static const symbol s_0_24[3] = { 'e', 't', 's' }; 54 | static const symbol s_0_25[2] = { 'e', 't' }; 55 | static const symbol s_0_26[3] = { 'h', 'e', 't' }; 56 | static const symbol s_0_27[3] = { 'e', 'r', 't' }; 57 | static const symbol s_0_28[3] = { 'a', 's', 't' }; 58 | 59 | static const struct among a_0[29] = 60 | { 61 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 62 | /* 1 */ { 1, s_0_1, -1, 1, 0}, 63 | /* 2 */ { 3, s_0_2, 1, 1, 0}, 64 | /* 3 */ { 4, s_0_3, 1, 1, 0}, 65 | /* 4 */ { 4, s_0_4, 1, 1, 0}, 66 | /* 5 */ { 3, s_0_5, 1, 1, 0}, 67 | /* 6 */ { 3, s_0_6, 1, 1, 0}, 68 | /* 7 */ { 6, s_0_7, 6, 1, 0}, 69 | /* 8 */ { 4, s_0_8, 1, 3, 0}, 70 | /* 9 */ { 2, s_0_9, -1, 1, 0}, 71 | /* 10 */ { 5, s_0_10, 9, 1, 0}, 72 | /* 11 */ { 2, s_0_11, -1, 1, 0}, 73 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 74 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 75 | /* 14 */ { 1, s_0_14, -1, 2, 0}, 76 | /* 15 */ { 2, s_0_15, 14, 1, 0}, 77 | /* 16 */ { 2, s_0_16, 14, 1, 0}, 78 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 79 | /* 18 */ { 5, s_0_18, 16, 1, 0}, 80 | /* 19 */ { 4, s_0_19, 16, 1, 0}, 81 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 82 | /* 21 */ { 3, s_0_21, 14, 1, 0}, 83 | /* 22 */ { 6, s_0_22, 21, 1, 0}, 84 | /* 23 */ { 3, s_0_23, 14, 1, 0}, 85 | /* 24 */ { 3, s_0_24, 14, 1, 0}, 86 | /* 25 */ { 2, s_0_25, -1, 1, 0}, 87 | /* 26 */ { 3, s_0_26, 25, 1, 0}, 88 | /* 27 */ { 3, s_0_27, -1, 3, 0}, 89 | /* 28 */ { 3, s_0_28, -1, 1, 0} 90 | }; 91 | 92 | static const symbol s_1_0[2] = { 'd', 't' }; 93 | static const symbol s_1_1[2] = { 'v', 't' }; 94 | 95 | static const struct among a_1[2] = 96 | { 97 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 98 | /* 1 */ { 2, s_1_1, -1, -1, 0} 99 | }; 100 | 101 | static const symbol s_2_0[3] = { 'l', 'e', 'g' }; 102 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; 103 | static const symbol s_2_2[2] = { 'i', 'g' }; 104 | static const symbol s_2_3[3] = { 'e', 'i', 'g' }; 105 | static const symbol s_2_4[3] = { 'l', 'i', 'g' }; 106 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; 107 | static const symbol s_2_6[3] = { 'e', 'l', 's' }; 108 | static const symbol s_2_7[3] = { 'l', 'o', 'v' }; 109 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; 110 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; 111 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; 112 | 113 | static const struct among a_2[11] = 114 | { 115 | /* 0 */ { 3, s_2_0, -1, 1, 0}, 116 | /* 1 */ { 4, s_2_1, 0, 1, 0}, 117 | /* 2 */ { 2, s_2_2, -1, 1, 0}, 118 | /* 3 */ { 3, s_2_3, 2, 1, 0}, 119 | /* 4 */ { 3, s_2_4, 2, 1, 0}, 120 | /* 5 */ { 4, s_2_5, 4, 1, 0}, 121 | /* 6 */ { 3, s_2_6, -1, 1, 0}, 122 | /* 7 */ { 3, s_2_7, -1, 1, 0}, 123 | /* 8 */ { 4, s_2_8, 7, 1, 0}, 124 | /* 9 */ { 4, s_2_9, 7, 1, 0}, 125 | /* 10 */ { 7, s_2_10, 9, 1, 0} 126 | }; 127 | 128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 129 | 130 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; 131 | 132 | static const symbol s_0[] = { 'k' }; 133 | static const symbol s_1[] = { 'e', 'r' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 30 */ 138 | { int ret = z->c + 3; 139 | if (0 > ret || ret > z->l) return 0; 140 | z->c = ret; /* hop, line 30 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 30 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ 146 | { /* gopast */ /* non v, line 31 */ 147 | int ret = in_grouping(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 31 */ 152 | /* try, line 32 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 38 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 38 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 38 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 38 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 44 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ 183 | if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1; 184 | goto lab0; 185 | lab1: 186 | z->c = z->l - m2; 187 | if (!(eq_s_b(z, 1, s_0))) return 0; 188 | if (out_grouping_b(z, g_v, 97, 248, 0)) return 0; 189 | } 190 | lab0: 191 | { int ret = slice_del(z); /* delete, line 46 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 3: 196 | { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ 197 | if (ret < 0) return ret; 198 | } 199 | break; 200 | } 201 | return 1; 202 | } 203 | 204 | static int r_consonant_pair(struct SN_env * z) { 205 | { int m_test = z->l - z->c; /* test, line 53 */ 206 | { int mlimit; /* setlimit, line 54 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 54 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | z->ket = z->c; /* [, line 54 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ 215 | z->bra = z->c; /* ], line 54 */ 216 | z->lb = mlimit; 217 | } 218 | z->c = z->l - m_test; 219 | } 220 | if (z->c <= z->lb) return 0; 221 | z->c--; /* next, line 59 */ 222 | z->bra = z->c; /* ], line 59 */ 223 | { int ret = slice_del(z); /* delete, line 59 */ 224 | if (ret < 0) return ret; 225 | } 226 | return 1; 227 | } 228 | 229 | static int r_other_suffix(struct SN_env * z) { 230 | int among_var; 231 | { int mlimit; /* setlimit, line 63 */ 232 | int m1 = z->l - z->c; (void)m1; 233 | if (z->c < z->I[0]) return 0; 234 | z->c = z->I[0]; /* tomark, line 63 */ 235 | mlimit = z->lb; z->lb = z->c; 236 | z->c = z->l - m1; 237 | z->ket = z->c; /* [, line 63 */ 238 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 239 | among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ 240 | if (!(among_var)) { z->lb = mlimit; return 0; } 241 | z->bra = z->c; /* ], line 63 */ 242 | z->lb = mlimit; 243 | } 244 | switch(among_var) { 245 | case 0: return 0; 246 | case 1: 247 | { int ret = slice_del(z); /* delete, line 67 */ 248 | if (ret < 0) return ret; 249 | } 250 | break; 251 | } 252 | return 1; 253 | } 254 | 255 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z) { 256 | { int c1 = z->c; /* do, line 74 */ 257 | { int ret = r_mark_regions(z); 258 | if (ret == 0) goto lab0; /* call mark_regions, line 74 */ 259 | if (ret < 0) return ret; 260 | } 261 | lab0: 262 | z->c = c1; 263 | } 264 | z->lb = z->c; z->c = z->l; /* backwards, line 75 */ 265 | 266 | { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ 267 | { int ret = r_main_suffix(z); 268 | if (ret == 0) goto lab1; /* call main_suffix, line 76 */ 269 | if (ret < 0) return ret; 270 | } 271 | lab1: 272 | z->c = z->l - m2; 273 | } 274 | { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ 275 | { int ret = r_consonant_pair(z); 276 | if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ 277 | if (ret < 0) return ret; 278 | } 279 | lab2: 280 | z->c = z->l - m3; 281 | } 282 | { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ 283 | { int ret = r_other_suffix(z); 284 | if (ret == 0) goto lab3; /* call other_suffix, line 78 */ 285 | if (ret < 0) return ret; 286 | } 287 | lab3: 288 | z->c = z->l - m4; 289 | } 290 | z->c = z->lb; 291 | return 1; 292 | } 293 | 294 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } 295 | 296 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 297 | 298 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 9 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_porter.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * porter_ISO_8859_1_create_env(void); 9 | extern void porter_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int porter_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * portuguese_ISO_8859_1_create_env(void); 9 | extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int portuguese_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_spanish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * spanish_ISO_8859_1_create_env(void); 9 | extern void spanish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int spanish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_swedish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 23 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; 31 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; 32 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; 33 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; 34 | static const symbol s_0_5[2] = { 'a', 'd' }; 35 | static const symbol s_0_6[1] = { 'e' }; 36 | static const symbol s_0_7[3] = { 'a', 'd', 'e' }; 37 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; 38 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; 39 | static const symbol s_0_10[3] = { 'a', 'r', 'e' }; 40 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; 41 | static const symbol s_0_12[2] = { 'e', 'n' }; 42 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; 43 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; 44 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; 45 | static const symbol s_0_16[3] = { 'e', 'r', 'n' }; 46 | static const symbol s_0_17[2] = { 'a', 'r' }; 47 | static const symbol s_0_18[2] = { 'e', 'r' }; 48 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; 49 | static const symbol s_0_20[2] = { 'o', 'r' }; 50 | static const symbol s_0_21[1] = { 's' }; 51 | static const symbol s_0_22[2] = { 'a', 's' }; 52 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; 53 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; 54 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; 55 | static const symbol s_0_26[2] = { 'e', 's' }; 56 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; 57 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; 58 | static const symbol s_0_29[3] = { 'e', 'n', 's' }; 59 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; 60 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 61 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; 62 | static const symbol s_0_33[2] = { 'a', 't' }; 63 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; 64 | static const symbol s_0_35[3] = { 'h', 'e', 't' }; 65 | static const symbol s_0_36[3] = { 'a', 's', 't' }; 66 | 67 | static const struct among a_0[37] = 68 | { 69 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 70 | /* 1 */ { 4, s_0_1, 0, 1, 0}, 71 | /* 2 */ { 4, s_0_2, 0, 1, 0}, 72 | /* 3 */ { 7, s_0_3, 2, 1, 0}, 73 | /* 4 */ { 4, s_0_4, 0, 1, 0}, 74 | /* 5 */ { 2, s_0_5, -1, 1, 0}, 75 | /* 6 */ { 1, s_0_6, -1, 1, 0}, 76 | /* 7 */ { 3, s_0_7, 6, 1, 0}, 77 | /* 8 */ { 4, s_0_8, 6, 1, 0}, 78 | /* 9 */ { 4, s_0_9, 6, 1, 0}, 79 | /* 10 */ { 3, s_0_10, 6, 1, 0}, 80 | /* 11 */ { 4, s_0_11, 6, 1, 0}, 81 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 82 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 83 | /* 14 */ { 4, s_0_14, 12, 1, 0}, 84 | /* 15 */ { 5, s_0_15, 12, 1, 0}, 85 | /* 16 */ { 3, s_0_16, -1, 1, 0}, 86 | /* 17 */ { 2, s_0_17, -1, 1, 0}, 87 | /* 18 */ { 2, s_0_18, -1, 1, 0}, 88 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 89 | /* 20 */ { 2, s_0_20, -1, 1, 0}, 90 | /* 21 */ { 1, s_0_21, -1, 2, 0}, 91 | /* 22 */ { 2, s_0_22, 21, 1, 0}, 92 | /* 23 */ { 5, s_0_23, 22, 1, 0}, 93 | /* 24 */ { 5, s_0_24, 22, 1, 0}, 94 | /* 25 */ { 5, s_0_25, 22, 1, 0}, 95 | /* 26 */ { 2, s_0_26, 21, 1, 0}, 96 | /* 27 */ { 4, s_0_27, 26, 1, 0}, 97 | /* 28 */ { 5, s_0_28, 26, 1, 0}, 98 | /* 29 */ { 3, s_0_29, 21, 1, 0}, 99 | /* 30 */ { 5, s_0_30, 29, 1, 0}, 100 | /* 31 */ { 6, s_0_31, 29, 1, 0}, 101 | /* 32 */ { 4, s_0_32, 21, 1, 0}, 102 | /* 33 */ { 2, s_0_33, -1, 1, 0}, 103 | /* 34 */ { 5, s_0_34, -1, 1, 0}, 104 | /* 35 */ { 3, s_0_35, -1, 1, 0}, 105 | /* 36 */ { 3, s_0_36, -1, 1, 0} 106 | }; 107 | 108 | static const symbol s_1_0[2] = { 'd', 'd' }; 109 | static const symbol s_1_1[2] = { 'g', 'd' }; 110 | static const symbol s_1_2[2] = { 'n', 'n' }; 111 | static const symbol s_1_3[2] = { 'd', 't' }; 112 | static const symbol s_1_4[2] = { 'g', 't' }; 113 | static const symbol s_1_5[2] = { 'k', 't' }; 114 | static const symbol s_1_6[2] = { 't', 't' }; 115 | 116 | static const struct among a_1[7] = 117 | { 118 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 119 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 120 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 121 | /* 3 */ { 2, s_1_3, -1, -1, 0}, 122 | /* 4 */ { 2, s_1_4, -1, -1, 0}, 123 | /* 5 */ { 2, s_1_5, -1, -1, 0}, 124 | /* 6 */ { 2, s_1_6, -1, -1, 0} 125 | }; 126 | 127 | static const symbol s_2_0[2] = { 'i', 'g' }; 128 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 129 | static const symbol s_2_2[3] = { 'e', 'l', 's' }; 130 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; 131 | static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' }; 132 | 133 | static const struct among a_2[5] = 134 | { 135 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 136 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 137 | /* 2 */ { 3, s_2_2, -1, 1, 0}, 138 | /* 3 */ { 5, s_2_3, -1, 3, 0}, 139 | /* 4 */ { 4, s_2_4, -1, 2, 0} 140 | }; 141 | 142 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; 143 | 144 | static const unsigned char g_s_ending[] = { 119, 127, 149 }; 145 | 146 | static const symbol s_0[] = { 'l', 0xF6, 's' }; 147 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; 148 | 149 | static int r_mark_regions(struct SN_env * z) { 150 | z->I[0] = z->l; 151 | { int c_test = z->c; /* test, line 29 */ 152 | { int ret = z->c + 3; 153 | if (0 > ret || ret > z->l) return 0; 154 | z->c = ret; /* hop, line 29 */ 155 | } 156 | z->I[1] = z->c; /* setmark x, line 29 */ 157 | z->c = c_test; 158 | } 159 | if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ 160 | { /* gopast */ /* non v, line 30 */ 161 | int ret = in_grouping(z, g_v, 97, 246, 1); 162 | if (ret < 0) return 0; 163 | z->c += ret; 164 | } 165 | z->I[0] = z->c; /* setmark p1, line 30 */ 166 | /* try, line 31 */ 167 | if (!(z->I[0] < z->I[1])) goto lab0; 168 | z->I[0] = z->I[1]; 169 | lab0: 170 | return 1; 171 | } 172 | 173 | static int r_main_suffix(struct SN_env * z) { 174 | int among_var; 175 | { int mlimit; /* setlimit, line 37 */ 176 | int m1 = z->l - z->c; (void)m1; 177 | if (z->c < z->I[0]) return 0; 178 | z->c = z->I[0]; /* tomark, line 37 */ 179 | mlimit = z->lb; z->lb = z->c; 180 | z->c = z->l - m1; 181 | z->ket = z->c; /* [, line 37 */ 182 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 183 | among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ 184 | if (!(among_var)) { z->lb = mlimit; return 0; } 185 | z->bra = z->c; /* ], line 37 */ 186 | z->lb = mlimit; 187 | } 188 | switch(among_var) { 189 | case 0: return 0; 190 | case 1: 191 | { int ret = slice_del(z); /* delete, line 44 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 2: 196 | if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0; 197 | { int ret = slice_del(z); /* delete, line 46 */ 198 | if (ret < 0) return ret; 199 | } 200 | break; 201 | } 202 | return 1; 203 | } 204 | 205 | static int r_consonant_pair(struct SN_env * z) { 206 | { int mlimit; /* setlimit, line 50 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 50 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ 215 | z->c = z->l - m2; 216 | z->ket = z->c; /* [, line 52 */ 217 | if (z->c <= z->lb) { z->lb = mlimit; return 0; } 218 | z->c--; /* next, line 52 */ 219 | z->bra = z->c; /* ], line 52 */ 220 | { int ret = slice_del(z); /* delete, line 52 */ 221 | if (ret < 0) return ret; 222 | } 223 | } 224 | z->lb = mlimit; 225 | } 226 | return 1; 227 | } 228 | 229 | static int r_other_suffix(struct SN_env * z) { 230 | int among_var; 231 | { int mlimit; /* setlimit, line 55 */ 232 | int m1 = z->l - z->c; (void)m1; 233 | if (z->c < z->I[0]) return 0; 234 | z->c = z->I[0]; /* tomark, line 55 */ 235 | mlimit = z->lb; z->lb = z->c; 236 | z->c = z->l - m1; 237 | z->ket = z->c; /* [, line 56 */ 238 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 239 | among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ 240 | if (!(among_var)) { z->lb = mlimit; return 0; } 241 | z->bra = z->c; /* ], line 56 */ 242 | switch(among_var) { 243 | case 0: { z->lb = mlimit; return 0; } 244 | case 1: 245 | { int ret = slice_del(z); /* delete, line 57 */ 246 | if (ret < 0) return ret; 247 | } 248 | break; 249 | case 2: 250 | { int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */ 251 | if (ret < 0) return ret; 252 | } 253 | break; 254 | case 3: 255 | { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ 256 | if (ret < 0) return ret; 257 | } 258 | break; 259 | } 260 | z->lb = mlimit; 261 | } 262 | return 1; 263 | } 264 | 265 | extern int swedish_ISO_8859_1_stem(struct SN_env * z) { 266 | { int c1 = z->c; /* do, line 66 */ 267 | { int ret = r_mark_regions(z); 268 | if (ret == 0) goto lab0; /* call mark_regions, line 66 */ 269 | if (ret < 0) return ret; 270 | } 271 | lab0: 272 | z->c = c1; 273 | } 274 | z->lb = z->c; z->c = z->l; /* backwards, line 67 */ 275 | 276 | { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ 277 | { int ret = r_main_suffix(z); 278 | if (ret == 0) goto lab1; /* call main_suffix, line 68 */ 279 | if (ret < 0) return ret; 280 | } 281 | lab1: 282 | z->c = z->l - m2; 283 | } 284 | { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ 285 | { int ret = r_consonant_pair(z); 286 | if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ 287 | if (ret < 0) return ret; 288 | } 289 | lab2: 290 | z->c = z->l - m3; 291 | } 292 | { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ 293 | { int ret = r_other_suffix(z); 294 | if (ret == 0) goto lab3; /* call other_suffix, line 70 */ 295 | if (ret < 0) return ret; 296 | } 297 | lab3: 298 | z->c = z->l - m4; 299 | } 300 | z->c = z->lb; 301 | return 1; 302 | } 303 | 304 | extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } 305 | 306 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 307 | 308 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_swedish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 9 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_2_romanian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * romanian_ISO_8859_2_create_env(void); 9 | extern void romanian_ISO_8859_2_close_env(struct SN_env * z); 10 | 11 | extern int romanian_ISO_8859_2_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_KOI8_R_russian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * russian_KOI8_R_create_env(void); 9 | extern void russian_KOI8_R_close_env(struct SN_env * z); 10 | 11 | extern int russian_KOI8_R_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_danish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int danish_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_undouble(struct SN_env * z); 14 | static int r_other_suffix(struct SN_env * z); 15 | static int r_consonant_pair(struct SN_env * z); 16 | static int r_main_suffix(struct SN_env * z); 17 | static int r_mark_regions(struct SN_env * z); 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | 23 | extern struct SN_env * danish_UTF_8_create_env(void); 24 | extern void danish_UTF_8_close_env(struct SN_env * z); 25 | 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | static const symbol s_0_0[3] = { 'h', 'e', 'd' }; 31 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; 32 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; 33 | static const symbol s_0_3[1] = { 'e' }; 34 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; 35 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; 36 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; 37 | static const symbol s_0_7[3] = { 'e', 'n', 'e' }; 38 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; 39 | static const symbol s_0_9[3] = { 'e', 'r', 'e' }; 40 | static const symbol s_0_10[2] = { 'e', 'n' }; 41 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; 42 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; 43 | static const symbol s_0_13[2] = { 'e', 'r' }; 44 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; 45 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; 46 | static const symbol s_0_16[1] = { 's' }; 47 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; 48 | static const symbol s_0_18[2] = { 'e', 's' }; 49 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; 50 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; 51 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; 52 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; 53 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; 54 | static const symbol s_0_24[3] = { 'e', 'n', 's' }; 55 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; 56 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; 57 | static const symbol s_0_27[3] = { 'e', 'r', 's' }; 58 | static const symbol s_0_28[3] = { 'e', 't', 's' }; 59 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; 60 | static const symbol s_0_30[2] = { 'e', 't' }; 61 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; 62 | 63 | static const struct among a_0[32] = 64 | { 65 | /* 0 */ { 3, s_0_0, -1, 1, 0}, 66 | /* 1 */ { 5, s_0_1, 0, 1, 0}, 67 | /* 2 */ { 4, s_0_2, -1, 1, 0}, 68 | /* 3 */ { 1, s_0_3, -1, 1, 0}, 69 | /* 4 */ { 5, s_0_4, 3, 1, 0}, 70 | /* 5 */ { 4, s_0_5, 3, 1, 0}, 71 | /* 6 */ { 6, s_0_6, 5, 1, 0}, 72 | /* 7 */ { 3, s_0_7, 3, 1, 0}, 73 | /* 8 */ { 4, s_0_8, 3, 1, 0}, 74 | /* 9 */ { 3, s_0_9, 3, 1, 0}, 75 | /* 10 */ { 2, s_0_10, -1, 1, 0}, 76 | /* 11 */ { 5, s_0_11, 10, 1, 0}, 77 | /* 12 */ { 4, s_0_12, 10, 1, 0}, 78 | /* 13 */ { 2, s_0_13, -1, 1, 0}, 79 | /* 14 */ { 5, s_0_14, 13, 1, 0}, 80 | /* 15 */ { 4, s_0_15, 13, 1, 0}, 81 | /* 16 */ { 1, s_0_16, -1, 2, 0}, 82 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 83 | /* 18 */ { 2, s_0_18, 16, 1, 0}, 84 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 85 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 86 | /* 21 */ { 4, s_0_21, 18, 1, 0}, 87 | /* 22 */ { 5, s_0_22, 18, 1, 0}, 88 | /* 23 */ { 4, s_0_23, 18, 1, 0}, 89 | /* 24 */ { 3, s_0_24, 16, 1, 0}, 90 | /* 25 */ { 6, s_0_25, 24, 1, 0}, 91 | /* 26 */ { 5, s_0_26, 24, 1, 0}, 92 | /* 27 */ { 3, s_0_27, 16, 1, 0}, 93 | /* 28 */ { 3, s_0_28, 16, 1, 0}, 94 | /* 29 */ { 5, s_0_29, 28, 1, 0}, 95 | /* 30 */ { 2, s_0_30, -1, 1, 0}, 96 | /* 31 */ { 4, s_0_31, 30, 1, 0} 97 | }; 98 | 99 | static const symbol s_1_0[2] = { 'g', 'd' }; 100 | static const symbol s_1_1[2] = { 'd', 't' }; 101 | static const symbol s_1_2[2] = { 'g', 't' }; 102 | static const symbol s_1_3[2] = { 'k', 't' }; 103 | 104 | static const struct among a_1[4] = 105 | { 106 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 107 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 108 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 109 | /* 3 */ { 2, s_1_3, -1, -1, 0} 110 | }; 111 | 112 | static const symbol s_2_0[2] = { 'i', 'g' }; 113 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 114 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; 115 | static const symbol s_2_3[3] = { 'e', 'l', 's' }; 116 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' }; 117 | 118 | static const struct among a_2[5] = 119 | { 120 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 121 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 122 | /* 2 */ { 4, s_2_2, 1, 1, 0}, 123 | /* 3 */ { 3, s_2_3, -1, 1, 0}, 124 | /* 4 */ { 5, s_2_4, -1, 2, 0} 125 | }; 126 | 127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 128 | 129 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; 130 | 131 | static const symbol s_0[] = { 's', 't' }; 132 | static const symbol s_1[] = { 'i', 'g' }; 133 | static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 33 */ 138 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 139 | if (ret < 0) return 0; 140 | z->c = ret; /* hop, line 33 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 33 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ 146 | { /* gopast */ /* non v, line 34 */ 147 | int ret = in_grouping_U(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 34 */ 152 | /* try, line 35 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 41 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 41 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 41 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 41 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 48 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; 183 | { int ret = slice_del(z); /* delete, line 50 */ 184 | if (ret < 0) return ret; 185 | } 186 | break; 187 | } 188 | return 1; 189 | } 190 | 191 | static int r_consonant_pair(struct SN_env * z) { 192 | { int m_test = z->l - z->c; /* test, line 55 */ 193 | { int mlimit; /* setlimit, line 56 */ 194 | int m1 = z->l - z->c; (void)m1; 195 | if (z->c < z->I[0]) return 0; 196 | z->c = z->I[0]; /* tomark, line 56 */ 197 | mlimit = z->lb; z->lb = z->c; 198 | z->c = z->l - m1; 199 | z->ket = z->c; /* [, line 56 */ 200 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } 201 | if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ 202 | z->bra = z->c; /* ], line 56 */ 203 | z->lb = mlimit; 204 | } 205 | z->c = z->l - m_test; 206 | } 207 | { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); 208 | if (ret < 0) return 0; 209 | z->c = ret; /* next, line 62 */ 210 | } 211 | z->bra = z->c; /* ], line 62 */ 212 | { int ret = slice_del(z); /* delete, line 62 */ 213 | if (ret < 0) return ret; 214 | } 215 | return 1; 216 | } 217 | 218 | static int r_other_suffix(struct SN_env * z) { 219 | int among_var; 220 | { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ 221 | z->ket = z->c; /* [, line 66 */ 222 | if (!(eq_s_b(z, 2, s_0))) goto lab0; 223 | z->bra = z->c; /* ], line 66 */ 224 | if (!(eq_s_b(z, 2, s_1))) goto lab0; 225 | { int ret = slice_del(z); /* delete, line 66 */ 226 | if (ret < 0) return ret; 227 | } 228 | lab0: 229 | z->c = z->l - m1; 230 | } 231 | { int mlimit; /* setlimit, line 67 */ 232 | int m2 = z->l - z->c; (void)m2; 233 | if (z->c < z->I[0]) return 0; 234 | z->c = z->I[0]; /* tomark, line 67 */ 235 | mlimit = z->lb; z->lb = z->c; 236 | z->c = z->l - m2; 237 | z->ket = z->c; /* [, line 67 */ 238 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 239 | among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ 240 | if (!(among_var)) { z->lb = mlimit; return 0; } 241 | z->bra = z->c; /* ], line 67 */ 242 | z->lb = mlimit; 243 | } 244 | switch(among_var) { 245 | case 0: return 0; 246 | case 1: 247 | { int ret = slice_del(z); /* delete, line 70 */ 248 | if (ret < 0) return ret; 249 | } 250 | { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ 251 | { int ret = r_consonant_pair(z); 252 | if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ 253 | if (ret < 0) return ret; 254 | } 255 | lab1: 256 | z->c = z->l - m3; 257 | } 258 | break; 259 | case 2: 260 | { int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */ 261 | if (ret < 0) return ret; 262 | } 263 | break; 264 | } 265 | return 1; 266 | } 267 | 268 | static int r_undouble(struct SN_env * z) { 269 | { int mlimit; /* setlimit, line 76 */ 270 | int m1 = z->l - z->c; (void)m1; 271 | if (z->c < z->I[0]) return 0; 272 | z->c = z->I[0]; /* tomark, line 76 */ 273 | mlimit = z->lb; z->lb = z->c; 274 | z->c = z->l - m1; 275 | z->ket = z->c; /* [, line 76 */ 276 | if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } 277 | z->bra = z->c; /* ], line 76 */ 278 | z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ 279 | if (z->S[0] == 0) return -1; /* -> ch, line 76 */ 280 | z->lb = mlimit; 281 | } 282 | if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ 283 | { int ret = slice_del(z); /* delete, line 78 */ 284 | if (ret < 0) return ret; 285 | } 286 | return 1; 287 | } 288 | 289 | extern int danish_UTF_8_stem(struct SN_env * z) { 290 | { int c1 = z->c; /* do, line 84 */ 291 | { int ret = r_mark_regions(z); 292 | if (ret == 0) goto lab0; /* call mark_regions, line 84 */ 293 | if (ret < 0) return ret; 294 | } 295 | lab0: 296 | z->c = c1; 297 | } 298 | z->lb = z->c; z->c = z->l; /* backwards, line 85 */ 299 | 300 | { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ 301 | { int ret = r_main_suffix(z); 302 | if (ret == 0) goto lab1; /* call main_suffix, line 86 */ 303 | if (ret < 0) return ret; 304 | } 305 | lab1: 306 | z->c = z->l - m2; 307 | } 308 | { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ 309 | { int ret = r_consonant_pair(z); 310 | if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ 311 | if (ret < 0) return ret; 312 | } 313 | lab2: 314 | z->c = z->l - m3; 315 | } 316 | { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ 317 | { int ret = r_other_suffix(z); 318 | if (ret == 0) goto lab3; /* call other_suffix, line 88 */ 319 | if (ret < 0) return ret; 320 | } 321 | lab3: 322 | z->c = z->l - m4; 323 | } 324 | { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ 325 | { int ret = r_undouble(z); 326 | if (ret == 0) goto lab4; /* call undouble, line 89 */ 327 | if (ret < 0) return ret; 328 | } 329 | lab4: 330 | z->c = z->l - m5; 331 | } 332 | z->c = z->lb; 333 | return 1; 334 | } 335 | 336 | extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); } 337 | 338 | extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } 339 | 340 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_danish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * danish_UTF_8_create_env(void); 9 | extern void danish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int danish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_dutch.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * dutch_UTF_8_create_env(void); 9 | extern void dutch_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int dutch_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_english.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * english_UTF_8_create_env(void); 9 | extern void english_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int english_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_finnish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * finnish_UTF_8_create_env(void); 9 | extern void finnish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int finnish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_french.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * french_UTF_8_create_env(void); 9 | extern void french_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int french_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_german.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int german_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_standard_suffix(struct SN_env * z); 14 | static int r_R2(struct SN_env * z); 15 | static int r_R1(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | static int r_postlude(struct SN_env * z); 18 | static int r_prelude(struct SN_env * z); 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | 24 | extern struct SN_env * german_UTF_8_create_env(void); 25 | extern void german_UTF_8_close_env(struct SN_env * z); 26 | 27 | 28 | #ifdef __cplusplus 29 | } 30 | #endif 31 | static const symbol s_0_1[1] = { 'U' }; 32 | static const symbol s_0_2[1] = { 'Y' }; 33 | static const symbol s_0_3[2] = { 0xC3, 0xA4 }; 34 | static const symbol s_0_4[2] = { 0xC3, 0xB6 }; 35 | static const symbol s_0_5[2] = { 0xC3, 0xBC }; 36 | 37 | static const struct among a_0[6] = 38 | { 39 | /* 0 */ { 0, 0, -1, 6, 0}, 40 | /* 1 */ { 1, s_0_1, 0, 2, 0}, 41 | /* 2 */ { 1, s_0_2, 0, 1, 0}, 42 | /* 3 */ { 2, s_0_3, 0, 3, 0}, 43 | /* 4 */ { 2, s_0_4, 0, 4, 0}, 44 | /* 5 */ { 2, s_0_5, 0, 5, 0} 45 | }; 46 | 47 | static const symbol s_1_0[1] = { 'e' }; 48 | static const symbol s_1_1[2] = { 'e', 'm' }; 49 | static const symbol s_1_2[2] = { 'e', 'n' }; 50 | static const symbol s_1_3[3] = { 'e', 'r', 'n' }; 51 | static const symbol s_1_4[2] = { 'e', 'r' }; 52 | static const symbol s_1_5[1] = { 's' }; 53 | static const symbol s_1_6[2] = { 'e', 's' }; 54 | 55 | static const struct among a_1[7] = 56 | { 57 | /* 0 */ { 1, s_1_0, -1, 2, 0}, 58 | /* 1 */ { 2, s_1_1, -1, 1, 0}, 59 | /* 2 */ { 2, s_1_2, -1, 2, 0}, 60 | /* 3 */ { 3, s_1_3, -1, 1, 0}, 61 | /* 4 */ { 2, s_1_4, -1, 1, 0}, 62 | /* 5 */ { 1, s_1_5, -1, 3, 0}, 63 | /* 6 */ { 2, s_1_6, 5, 2, 0} 64 | }; 65 | 66 | static const symbol s_2_0[2] = { 'e', 'n' }; 67 | static const symbol s_2_1[2] = { 'e', 'r' }; 68 | static const symbol s_2_2[2] = { 's', 't' }; 69 | static const symbol s_2_3[3] = { 'e', 's', 't' }; 70 | 71 | static const struct among a_2[4] = 72 | { 73 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 74 | /* 1 */ { 2, s_2_1, -1, 1, 0}, 75 | /* 2 */ { 2, s_2_2, -1, 2, 0}, 76 | /* 3 */ { 3, s_2_3, 2, 1, 0} 77 | }; 78 | 79 | static const symbol s_3_0[2] = { 'i', 'g' }; 80 | static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; 81 | 82 | static const struct among a_3[2] = 83 | { 84 | /* 0 */ { 2, s_3_0, -1, 1, 0}, 85 | /* 1 */ { 4, s_3_1, -1, 1, 0} 86 | }; 87 | 88 | static const symbol s_4_0[3] = { 'e', 'n', 'd' }; 89 | static const symbol s_4_1[2] = { 'i', 'g' }; 90 | static const symbol s_4_2[3] = { 'u', 'n', 'g' }; 91 | static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; 92 | static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; 93 | static const symbol s_4_5[2] = { 'i', 'k' }; 94 | static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; 95 | static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; 96 | 97 | static const struct among a_4[8] = 98 | { 99 | /* 0 */ { 3, s_4_0, -1, 1, 0}, 100 | /* 1 */ { 2, s_4_1, -1, 2, 0}, 101 | /* 2 */ { 3, s_4_2, -1, 1, 0}, 102 | /* 3 */ { 4, s_4_3, -1, 3, 0}, 103 | /* 4 */ { 4, s_4_4, -1, 2, 0}, 104 | /* 5 */ { 2, s_4_5, -1, 2, 0}, 105 | /* 6 */ { 4, s_4_6, -1, 3, 0}, 106 | /* 7 */ { 4, s_4_7, -1, 4, 0} 107 | }; 108 | 109 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; 110 | 111 | static const unsigned char g_s_ending[] = { 117, 30, 5 }; 112 | 113 | static const unsigned char g_st_ending[] = { 117, 30, 4 }; 114 | 115 | static const symbol s_0[] = { 0xC3, 0x9F }; 116 | static const symbol s_1[] = { 's', 's' }; 117 | static const symbol s_2[] = { 'u' }; 118 | static const symbol s_3[] = { 'U' }; 119 | static const symbol s_4[] = { 'y' }; 120 | static const symbol s_5[] = { 'Y' }; 121 | static const symbol s_6[] = { 'y' }; 122 | static const symbol s_7[] = { 'u' }; 123 | static const symbol s_8[] = { 'a' }; 124 | static const symbol s_9[] = { 'o' }; 125 | static const symbol s_10[] = { 'u' }; 126 | static const symbol s_11[] = { 's' }; 127 | static const symbol s_12[] = { 'n', 'i', 's' }; 128 | static const symbol s_13[] = { 'i', 'g' }; 129 | static const symbol s_14[] = { 'e' }; 130 | static const symbol s_15[] = { 'e' }; 131 | static const symbol s_16[] = { 'e', 'r' }; 132 | static const symbol s_17[] = { 'e', 'n' }; 133 | 134 | static int r_prelude(struct SN_env * z) { 135 | { int c_test = z->c; /* test, line 35 */ 136 | while(1) { /* repeat, line 35 */ 137 | int c1 = z->c; 138 | { int c2 = z->c; /* or, line 38 */ 139 | z->bra = z->c; /* [, line 37 */ 140 | if (!(eq_s(z, 2, s_0))) goto lab2; 141 | z->ket = z->c; /* ], line 37 */ 142 | { int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */ 143 | if (ret < 0) return ret; 144 | } 145 | goto lab1; 146 | lab2: 147 | z->c = c2; 148 | { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); 149 | if (ret < 0) goto lab0; 150 | z->c = ret; /* next, line 38 */ 151 | } 152 | } 153 | lab1: 154 | continue; 155 | lab0: 156 | z->c = c1; 157 | break; 158 | } 159 | z->c = c_test; 160 | } 161 | while(1) { /* repeat, line 41 */ 162 | int c3 = z->c; 163 | while(1) { /* goto, line 41 */ 164 | int c4 = z->c; 165 | if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; 166 | z->bra = z->c; /* [, line 42 */ 167 | { int c5 = z->c; /* or, line 42 */ 168 | if (!(eq_s(z, 1, s_2))) goto lab6; 169 | z->ket = z->c; /* ], line 42 */ 170 | if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6; 171 | { int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */ 172 | if (ret < 0) return ret; 173 | } 174 | goto lab5; 175 | lab6: 176 | z->c = c5; 177 | if (!(eq_s(z, 1, s_4))) goto lab4; 178 | z->ket = z->c; /* ], line 43 */ 179 | if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; 180 | { int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */ 181 | if (ret < 0) return ret; 182 | } 183 | } 184 | lab5: 185 | z->c = c4; 186 | break; 187 | lab4: 188 | z->c = c4; 189 | { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); 190 | if (ret < 0) goto lab3; 191 | z->c = ret; /* goto, line 41 */ 192 | } 193 | } 194 | continue; 195 | lab3: 196 | z->c = c3; 197 | break; 198 | } 199 | return 1; 200 | } 201 | 202 | static int r_mark_regions(struct SN_env * z) { 203 | z->I[0] = z->l; 204 | z->I[1] = z->l; 205 | { int c_test = z->c; /* test, line 52 */ 206 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 207 | if (ret < 0) return 0; 208 | z->c = ret; /* hop, line 52 */ 209 | } 210 | z->I[2] = z->c; /* setmark x, line 52 */ 211 | z->c = c_test; 212 | } 213 | { /* gopast */ /* grouping v, line 54 */ 214 | int ret = out_grouping_U(z, g_v, 97, 252, 1); 215 | if (ret < 0) return 0; 216 | z->c += ret; 217 | } 218 | { /* gopast */ /* non v, line 54 */ 219 | int ret = in_grouping_U(z, g_v, 97, 252, 1); 220 | if (ret < 0) return 0; 221 | z->c += ret; 222 | } 223 | z->I[0] = z->c; /* setmark p1, line 54 */ 224 | /* try, line 55 */ 225 | if (!(z->I[0] < z->I[2])) goto lab0; 226 | z->I[0] = z->I[2]; 227 | lab0: 228 | { /* gopast */ /* grouping v, line 56 */ 229 | int ret = out_grouping_U(z, g_v, 97, 252, 1); 230 | if (ret < 0) return 0; 231 | z->c += ret; 232 | } 233 | { /* gopast */ /* non v, line 56 */ 234 | int ret = in_grouping_U(z, g_v, 97, 252, 1); 235 | if (ret < 0) return 0; 236 | z->c += ret; 237 | } 238 | z->I[1] = z->c; /* setmark p2, line 56 */ 239 | return 1; 240 | } 241 | 242 | static int r_postlude(struct SN_env * z) { 243 | int among_var; 244 | while(1) { /* repeat, line 60 */ 245 | int c1 = z->c; 246 | z->bra = z->c; /* [, line 62 */ 247 | among_var = find_among(z, a_0, 6); /* substring, line 62 */ 248 | if (!(among_var)) goto lab0; 249 | z->ket = z->c; /* ], line 62 */ 250 | switch(among_var) { 251 | case 0: goto lab0; 252 | case 1: 253 | { int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */ 254 | if (ret < 0) return ret; 255 | } 256 | break; 257 | case 2: 258 | { int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */ 259 | if (ret < 0) return ret; 260 | } 261 | break; 262 | case 3: 263 | { int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */ 264 | if (ret < 0) return ret; 265 | } 266 | break; 267 | case 4: 268 | { int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */ 269 | if (ret < 0) return ret; 270 | } 271 | break; 272 | case 5: 273 | { int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */ 274 | if (ret < 0) return ret; 275 | } 276 | break; 277 | case 6: 278 | { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); 279 | if (ret < 0) goto lab0; 280 | z->c = ret; /* next, line 68 */ 281 | } 282 | break; 283 | } 284 | continue; 285 | lab0: 286 | z->c = c1; 287 | break; 288 | } 289 | return 1; 290 | } 291 | 292 | static int r_R1(struct SN_env * z) { 293 | if (!(z->I[0] <= z->c)) return 0; 294 | return 1; 295 | } 296 | 297 | static int r_R2(struct SN_env * z) { 298 | if (!(z->I[1] <= z->c)) return 0; 299 | return 1; 300 | } 301 | 302 | static int r_standard_suffix(struct SN_env * z) { 303 | int among_var; 304 | { int m1 = z->l - z->c; (void)m1; /* do, line 79 */ 305 | z->ket = z->c; /* [, line 80 */ 306 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; 307 | among_var = find_among_b(z, a_1, 7); /* substring, line 80 */ 308 | if (!(among_var)) goto lab0; 309 | z->bra = z->c; /* ], line 80 */ 310 | { int ret = r_R1(z); 311 | if (ret == 0) goto lab0; /* call R1, line 80 */ 312 | if (ret < 0) return ret; 313 | } 314 | switch(among_var) { 315 | case 0: goto lab0; 316 | case 1: 317 | { int ret = slice_del(z); /* delete, line 82 */ 318 | if (ret < 0) return ret; 319 | } 320 | break; 321 | case 2: 322 | { int ret = slice_del(z); /* delete, line 85 */ 323 | if (ret < 0) return ret; 324 | } 325 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */ 326 | z->ket = z->c; /* [, line 86 */ 327 | if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; } 328 | z->bra = z->c; /* ], line 86 */ 329 | if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; } 330 | { int ret = slice_del(z); /* delete, line 86 */ 331 | if (ret < 0) return ret; 332 | } 333 | lab1: 334 | ; 335 | } 336 | break; 337 | case 3: 338 | if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0; 339 | { int ret = slice_del(z); /* delete, line 89 */ 340 | if (ret < 0) return ret; 341 | } 342 | break; 343 | } 344 | lab0: 345 | z->c = z->l - m1; 346 | } 347 | { int m2 = z->l - z->c; (void)m2; /* do, line 93 */ 348 | z->ket = z->c; /* [, line 94 */ 349 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; 350 | among_var = find_among_b(z, a_2, 4); /* substring, line 94 */ 351 | if (!(among_var)) goto lab2; 352 | z->bra = z->c; /* ], line 94 */ 353 | { int ret = r_R1(z); 354 | if (ret == 0) goto lab2; /* call R1, line 94 */ 355 | if (ret < 0) return ret; 356 | } 357 | switch(among_var) { 358 | case 0: goto lab2; 359 | case 1: 360 | { int ret = slice_del(z); /* delete, line 96 */ 361 | if (ret < 0) return ret; 362 | } 363 | break; 364 | case 2: 365 | if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab2; 366 | { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3); 367 | if (ret < 0) goto lab2; 368 | z->c = ret; /* hop, line 99 */ 369 | } 370 | { int ret = slice_del(z); /* delete, line 99 */ 371 | if (ret < 0) return ret; 372 | } 373 | break; 374 | } 375 | lab2: 376 | z->c = z->l - m2; 377 | } 378 | { int m3 = z->l - z->c; (void)m3; /* do, line 103 */ 379 | z->ket = z->c; /* [, line 104 */ 380 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; 381 | among_var = find_among_b(z, a_4, 8); /* substring, line 104 */ 382 | if (!(among_var)) goto lab3; 383 | z->bra = z->c; /* ], line 104 */ 384 | { int ret = r_R2(z); 385 | if (ret == 0) goto lab3; /* call R2, line 104 */ 386 | if (ret < 0) return ret; 387 | } 388 | switch(among_var) { 389 | case 0: goto lab3; 390 | case 1: 391 | { int ret = slice_del(z); /* delete, line 106 */ 392 | if (ret < 0) return ret; 393 | } 394 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */ 395 | z->ket = z->c; /* [, line 107 */ 396 | if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; } 397 | z->bra = z->c; /* ], line 107 */ 398 | { int m4 = z->l - z->c; (void)m4; /* not, line 107 */ 399 | if (!(eq_s_b(z, 1, s_14))) goto lab5; 400 | { z->c = z->l - m_keep; goto lab4; } 401 | lab5: 402 | z->c = z->l - m4; 403 | } 404 | { int ret = r_R2(z); 405 | if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */ 406 | if (ret < 0) return ret; 407 | } 408 | { int ret = slice_del(z); /* delete, line 107 */ 409 | if (ret < 0) return ret; 410 | } 411 | lab4: 412 | ; 413 | } 414 | break; 415 | case 2: 416 | { int m5 = z->l - z->c; (void)m5; /* not, line 110 */ 417 | if (!(eq_s_b(z, 1, s_15))) goto lab6; 418 | goto lab3; 419 | lab6: 420 | z->c = z->l - m5; 421 | } 422 | { int ret = slice_del(z); /* delete, line 110 */ 423 | if (ret < 0) return ret; 424 | } 425 | break; 426 | case 3: 427 | { int ret = slice_del(z); /* delete, line 113 */ 428 | if (ret < 0) return ret; 429 | } 430 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ 431 | z->ket = z->c; /* [, line 115 */ 432 | { int m6 = z->l - z->c; (void)m6; /* or, line 115 */ 433 | if (!(eq_s_b(z, 2, s_16))) goto lab9; 434 | goto lab8; 435 | lab9: 436 | z->c = z->l - m6; 437 | if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; } 438 | } 439 | lab8: 440 | z->bra = z->c; /* ], line 115 */ 441 | { int ret = r_R1(z); 442 | if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */ 443 | if (ret < 0) return ret; 444 | } 445 | { int ret = slice_del(z); /* delete, line 115 */ 446 | if (ret < 0) return ret; 447 | } 448 | lab7: 449 | ; 450 | } 451 | break; 452 | case 4: 453 | { int ret = slice_del(z); /* delete, line 119 */ 454 | if (ret < 0) return ret; 455 | } 456 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */ 457 | z->ket = z->c; /* [, line 121 */ 458 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; } 459 | among_var = find_among_b(z, a_3, 2); /* substring, line 121 */ 460 | if (!(among_var)) { z->c = z->l - m_keep; goto lab10; } 461 | z->bra = z->c; /* ], line 121 */ 462 | { int ret = r_R2(z); 463 | if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */ 464 | if (ret < 0) return ret; 465 | } 466 | switch(among_var) { 467 | case 0: { z->c = z->l - m_keep; goto lab10; } 468 | case 1: 469 | { int ret = slice_del(z); /* delete, line 123 */ 470 | if (ret < 0) return ret; 471 | } 472 | break; 473 | } 474 | lab10: 475 | ; 476 | } 477 | break; 478 | } 479 | lab3: 480 | z->c = z->l - m3; 481 | } 482 | return 1; 483 | } 484 | 485 | extern int german_UTF_8_stem(struct SN_env * z) { 486 | { int c1 = z->c; /* do, line 134 */ 487 | { int ret = r_prelude(z); 488 | if (ret == 0) goto lab0; /* call prelude, line 134 */ 489 | if (ret < 0) return ret; 490 | } 491 | lab0: 492 | z->c = c1; 493 | } 494 | { int c2 = z->c; /* do, line 135 */ 495 | { int ret = r_mark_regions(z); 496 | if (ret == 0) goto lab1; /* call mark_regions, line 135 */ 497 | if (ret < 0) return ret; 498 | } 499 | lab1: 500 | z->c = c2; 501 | } 502 | z->lb = z->c; z->c = z->l; /* backwards, line 136 */ 503 | 504 | { int m3 = z->l - z->c; (void)m3; /* do, line 137 */ 505 | { int ret = r_standard_suffix(z); 506 | if (ret == 0) goto lab2; /* call standard_suffix, line 137 */ 507 | if (ret < 0) return ret; 508 | } 509 | lab2: 510 | z->c = z->l - m3; 511 | } 512 | z->c = z->lb; 513 | { int c4 = z->c; /* do, line 138 */ 514 | { int ret = r_postlude(z); 515 | if (ret == 0) goto lab3; /* call postlude, line 138 */ 516 | if (ret < 0) return ret; 517 | } 518 | lab3: 519 | z->c = c4; 520 | } 521 | return 1; 522 | } 523 | 524 | extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } 525 | 526 | extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 527 | 528 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_german.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * german_UTF_8_create_env(void); 9 | extern void german_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int german_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_hungarian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * hungarian_UTF_8_create_env(void); 9 | extern void hungarian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int hungarian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_italian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * italian_UTF_8_create_env(void); 9 | extern void italian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int italian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_norwegian.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int norwegian_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * norwegian_UTF_8_create_env(void); 23 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[1] = { 'e' }; 31 | static const symbol s_0_2[3] = { 'e', 'd', 'e' }; 32 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; 33 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; 34 | static const symbol s_0_5[3] = { 'a', 'n', 'e' }; 35 | static const symbol s_0_6[3] = { 'e', 'n', 'e' }; 36 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; 37 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; 38 | static const symbol s_0_9[2] = { 'e', 'n' }; 39 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; 40 | static const symbol s_0_11[2] = { 'a', 'r' }; 41 | static const symbol s_0_12[2] = { 'e', 'r' }; 42 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; 43 | static const symbol s_0_14[1] = { 's' }; 44 | static const symbol s_0_15[2] = { 'a', 's' }; 45 | static const symbol s_0_16[2] = { 'e', 's' }; 46 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; 47 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; 48 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; 49 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; 50 | static const symbol s_0_21[3] = { 'e', 'n', 's' }; 51 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 52 | static const symbol s_0_23[3] = { 'e', 'r', 's' }; 53 | static const symbol s_0_24[3] = { 'e', 't', 's' }; 54 | static const symbol s_0_25[2] = { 'e', 't' }; 55 | static const symbol s_0_26[3] = { 'h', 'e', 't' }; 56 | static const symbol s_0_27[3] = { 'e', 'r', 't' }; 57 | static const symbol s_0_28[3] = { 'a', 's', 't' }; 58 | 59 | static const struct among a_0[29] = 60 | { 61 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 62 | /* 1 */ { 1, s_0_1, -1, 1, 0}, 63 | /* 2 */ { 3, s_0_2, 1, 1, 0}, 64 | /* 3 */ { 4, s_0_3, 1, 1, 0}, 65 | /* 4 */ { 4, s_0_4, 1, 1, 0}, 66 | /* 5 */ { 3, s_0_5, 1, 1, 0}, 67 | /* 6 */ { 3, s_0_6, 1, 1, 0}, 68 | /* 7 */ { 6, s_0_7, 6, 1, 0}, 69 | /* 8 */ { 4, s_0_8, 1, 3, 0}, 70 | /* 9 */ { 2, s_0_9, -1, 1, 0}, 71 | /* 10 */ { 5, s_0_10, 9, 1, 0}, 72 | /* 11 */ { 2, s_0_11, -1, 1, 0}, 73 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 74 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 75 | /* 14 */ { 1, s_0_14, -1, 2, 0}, 76 | /* 15 */ { 2, s_0_15, 14, 1, 0}, 77 | /* 16 */ { 2, s_0_16, 14, 1, 0}, 78 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 79 | /* 18 */ { 5, s_0_18, 16, 1, 0}, 80 | /* 19 */ { 4, s_0_19, 16, 1, 0}, 81 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 82 | /* 21 */ { 3, s_0_21, 14, 1, 0}, 83 | /* 22 */ { 6, s_0_22, 21, 1, 0}, 84 | /* 23 */ { 3, s_0_23, 14, 1, 0}, 85 | /* 24 */ { 3, s_0_24, 14, 1, 0}, 86 | /* 25 */ { 2, s_0_25, -1, 1, 0}, 87 | /* 26 */ { 3, s_0_26, 25, 1, 0}, 88 | /* 27 */ { 3, s_0_27, -1, 3, 0}, 89 | /* 28 */ { 3, s_0_28, -1, 1, 0} 90 | }; 91 | 92 | static const symbol s_1_0[2] = { 'd', 't' }; 93 | static const symbol s_1_1[2] = { 'v', 't' }; 94 | 95 | static const struct among a_1[2] = 96 | { 97 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 98 | /* 1 */ { 2, s_1_1, -1, -1, 0} 99 | }; 100 | 101 | static const symbol s_2_0[3] = { 'l', 'e', 'g' }; 102 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; 103 | static const symbol s_2_2[2] = { 'i', 'g' }; 104 | static const symbol s_2_3[3] = { 'e', 'i', 'g' }; 105 | static const symbol s_2_4[3] = { 'l', 'i', 'g' }; 106 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; 107 | static const symbol s_2_6[3] = { 'e', 'l', 's' }; 108 | static const symbol s_2_7[3] = { 'l', 'o', 'v' }; 109 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; 110 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; 111 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; 112 | 113 | static const struct among a_2[11] = 114 | { 115 | /* 0 */ { 3, s_2_0, -1, 1, 0}, 116 | /* 1 */ { 4, s_2_1, 0, 1, 0}, 117 | /* 2 */ { 2, s_2_2, -1, 1, 0}, 118 | /* 3 */ { 3, s_2_3, 2, 1, 0}, 119 | /* 4 */ { 3, s_2_4, 2, 1, 0}, 120 | /* 5 */ { 4, s_2_5, 4, 1, 0}, 121 | /* 6 */ { 3, s_2_6, -1, 1, 0}, 122 | /* 7 */ { 3, s_2_7, -1, 1, 0}, 123 | /* 8 */ { 4, s_2_8, 7, 1, 0}, 124 | /* 9 */ { 4, s_2_9, 7, 1, 0}, 125 | /* 10 */ { 7, s_2_10, 9, 1, 0} 126 | }; 127 | 128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 129 | 130 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; 131 | 132 | static const symbol s_0[] = { 'k' }; 133 | static const symbol s_1[] = { 'e', 'r' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 30 */ 138 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 139 | if (ret < 0) return 0; 140 | z->c = ret; /* hop, line 30 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 30 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ 146 | { /* gopast */ /* non v, line 31 */ 147 | int ret = in_grouping_U(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 31 */ 152 | /* try, line 32 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 38 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 38 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 38 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 38 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 44 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ 183 | if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1; 184 | goto lab0; 185 | lab1: 186 | z->c = z->l - m2; 187 | if (!(eq_s_b(z, 1, s_0))) return 0; 188 | if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0; 189 | } 190 | lab0: 191 | { int ret = slice_del(z); /* delete, line 46 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 3: 196 | { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ 197 | if (ret < 0) return ret; 198 | } 199 | break; 200 | } 201 | return 1; 202 | } 203 | 204 | static int r_consonant_pair(struct SN_env * z) { 205 | { int m_test = z->l - z->c; /* test, line 53 */ 206 | { int mlimit; /* setlimit, line 54 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 54 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | z->ket = z->c; /* [, line 54 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ 215 | z->bra = z->c; /* ], line 54 */ 216 | z->lb = mlimit; 217 | } 218 | z->c = z->l - m_test; 219 | } 220 | { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); 221 | if (ret < 0) return 0; 222 | z->c = ret; /* next, line 59 */ 223 | } 224 | z->bra = z->c; /* ], line 59 */ 225 | { int ret = slice_del(z); /* delete, line 59 */ 226 | if (ret < 0) return ret; 227 | } 228 | return 1; 229 | } 230 | 231 | static int r_other_suffix(struct SN_env * z) { 232 | int among_var; 233 | { int mlimit; /* setlimit, line 63 */ 234 | int m1 = z->l - z->c; (void)m1; 235 | if (z->c < z->I[0]) return 0; 236 | z->c = z->I[0]; /* tomark, line 63 */ 237 | mlimit = z->lb; z->lb = z->c; 238 | z->c = z->l - m1; 239 | z->ket = z->c; /* [, line 63 */ 240 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 241 | among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ 242 | if (!(among_var)) { z->lb = mlimit; return 0; } 243 | z->bra = z->c; /* ], line 63 */ 244 | z->lb = mlimit; 245 | } 246 | switch(among_var) { 247 | case 0: return 0; 248 | case 1: 249 | { int ret = slice_del(z); /* delete, line 67 */ 250 | if (ret < 0) return ret; 251 | } 252 | break; 253 | } 254 | return 1; 255 | } 256 | 257 | extern int norwegian_UTF_8_stem(struct SN_env * z) { 258 | { int c1 = z->c; /* do, line 74 */ 259 | { int ret = r_mark_regions(z); 260 | if (ret == 0) goto lab0; /* call mark_regions, line 74 */ 261 | if (ret < 0) return ret; 262 | } 263 | lab0: 264 | z->c = c1; 265 | } 266 | z->lb = z->c; z->c = z->l; /* backwards, line 75 */ 267 | 268 | { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ 269 | { int ret = r_main_suffix(z); 270 | if (ret == 0) goto lab1; /* call main_suffix, line 76 */ 271 | if (ret < 0) return ret; 272 | } 273 | lab1: 274 | z->c = z->l - m2; 275 | } 276 | { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ 277 | { int ret = r_consonant_pair(z); 278 | if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ 279 | if (ret < 0) return ret; 280 | } 281 | lab2: 282 | z->c = z->l - m3; 283 | } 284 | { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ 285 | { int ret = r_other_suffix(z); 286 | if (ret == 0) goto lab3; /* call other_suffix, line 78 */ 287 | if (ret < 0) return ret; 288 | } 289 | lab3: 290 | z->c = z->l - m4; 291 | } 292 | z->c = z->lb; 293 | return 1; 294 | } 295 | 296 | extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } 297 | 298 | extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 299 | 300 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_norwegian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * norwegian_UTF_8_create_env(void); 9 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int norwegian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_porter.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * porter_UTF_8_create_env(void); 9 | extern void porter_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int porter_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_portuguese.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * portuguese_UTF_8_create_env(void); 9 | extern void portuguese_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int portuguese_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_romanian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * romanian_UTF_8_create_env(void); 9 | extern void romanian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int romanian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_russian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * russian_UTF_8_create_env(void); 9 | extern void russian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int russian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_spanish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * spanish_UTF_8_create_env(void); 9 | extern void spanish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int spanish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_swedish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int swedish_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * swedish_UTF_8_create_env(void); 23 | extern void swedish_UTF_8_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; 31 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; 32 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; 33 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; 34 | static const symbol s_0_5[2] = { 'a', 'd' }; 35 | static const symbol s_0_6[1] = { 'e' }; 36 | static const symbol s_0_7[3] = { 'a', 'd', 'e' }; 37 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; 38 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; 39 | static const symbol s_0_10[3] = { 'a', 'r', 'e' }; 40 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; 41 | static const symbol s_0_12[2] = { 'e', 'n' }; 42 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; 43 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; 44 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; 45 | static const symbol s_0_16[3] = { 'e', 'r', 'n' }; 46 | static const symbol s_0_17[2] = { 'a', 'r' }; 47 | static const symbol s_0_18[2] = { 'e', 'r' }; 48 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; 49 | static const symbol s_0_20[2] = { 'o', 'r' }; 50 | static const symbol s_0_21[1] = { 's' }; 51 | static const symbol s_0_22[2] = { 'a', 's' }; 52 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; 53 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; 54 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; 55 | static const symbol s_0_26[2] = { 'e', 's' }; 56 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; 57 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; 58 | static const symbol s_0_29[3] = { 'e', 'n', 's' }; 59 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; 60 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 61 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; 62 | static const symbol s_0_33[2] = { 'a', 't' }; 63 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; 64 | static const symbol s_0_35[3] = { 'h', 'e', 't' }; 65 | static const symbol s_0_36[3] = { 'a', 's', 't' }; 66 | 67 | static const struct among a_0[37] = 68 | { 69 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 70 | /* 1 */ { 4, s_0_1, 0, 1, 0}, 71 | /* 2 */ { 4, s_0_2, 0, 1, 0}, 72 | /* 3 */ { 7, s_0_3, 2, 1, 0}, 73 | /* 4 */ { 4, s_0_4, 0, 1, 0}, 74 | /* 5 */ { 2, s_0_5, -1, 1, 0}, 75 | /* 6 */ { 1, s_0_6, -1, 1, 0}, 76 | /* 7 */ { 3, s_0_7, 6, 1, 0}, 77 | /* 8 */ { 4, s_0_8, 6, 1, 0}, 78 | /* 9 */ { 4, s_0_9, 6, 1, 0}, 79 | /* 10 */ { 3, s_0_10, 6, 1, 0}, 80 | /* 11 */ { 4, s_0_11, 6, 1, 0}, 81 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 82 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 83 | /* 14 */ { 4, s_0_14, 12, 1, 0}, 84 | /* 15 */ { 5, s_0_15, 12, 1, 0}, 85 | /* 16 */ { 3, s_0_16, -1, 1, 0}, 86 | /* 17 */ { 2, s_0_17, -1, 1, 0}, 87 | /* 18 */ { 2, s_0_18, -1, 1, 0}, 88 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 89 | /* 20 */ { 2, s_0_20, -1, 1, 0}, 90 | /* 21 */ { 1, s_0_21, -1, 2, 0}, 91 | /* 22 */ { 2, s_0_22, 21, 1, 0}, 92 | /* 23 */ { 5, s_0_23, 22, 1, 0}, 93 | /* 24 */ { 5, s_0_24, 22, 1, 0}, 94 | /* 25 */ { 5, s_0_25, 22, 1, 0}, 95 | /* 26 */ { 2, s_0_26, 21, 1, 0}, 96 | /* 27 */ { 4, s_0_27, 26, 1, 0}, 97 | /* 28 */ { 5, s_0_28, 26, 1, 0}, 98 | /* 29 */ { 3, s_0_29, 21, 1, 0}, 99 | /* 30 */ { 5, s_0_30, 29, 1, 0}, 100 | /* 31 */ { 6, s_0_31, 29, 1, 0}, 101 | /* 32 */ { 4, s_0_32, 21, 1, 0}, 102 | /* 33 */ { 2, s_0_33, -1, 1, 0}, 103 | /* 34 */ { 5, s_0_34, -1, 1, 0}, 104 | /* 35 */ { 3, s_0_35, -1, 1, 0}, 105 | /* 36 */ { 3, s_0_36, -1, 1, 0} 106 | }; 107 | 108 | static const symbol s_1_0[2] = { 'd', 'd' }; 109 | static const symbol s_1_1[2] = { 'g', 'd' }; 110 | static const symbol s_1_2[2] = { 'n', 'n' }; 111 | static const symbol s_1_3[2] = { 'd', 't' }; 112 | static const symbol s_1_4[2] = { 'g', 't' }; 113 | static const symbol s_1_5[2] = { 'k', 't' }; 114 | static const symbol s_1_6[2] = { 't', 't' }; 115 | 116 | static const struct among a_1[7] = 117 | { 118 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 119 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 120 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 121 | /* 3 */ { 2, s_1_3, -1, -1, 0}, 122 | /* 4 */ { 2, s_1_4, -1, -1, 0}, 123 | /* 5 */ { 2, s_1_5, -1, -1, 0}, 124 | /* 6 */ { 2, s_1_6, -1, -1, 0} 125 | }; 126 | 127 | static const symbol s_2_0[2] = { 'i', 'g' }; 128 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 129 | static const symbol s_2_2[3] = { 'e', 'l', 's' }; 130 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; 131 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' }; 132 | 133 | static const struct among a_2[5] = 134 | { 135 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 136 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 137 | /* 2 */ { 3, s_2_2, -1, 1, 0}, 138 | /* 3 */ { 5, s_2_3, -1, 3, 0}, 139 | /* 4 */ { 5, s_2_4, -1, 2, 0} 140 | }; 141 | 142 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; 143 | 144 | static const unsigned char g_s_ending[] = { 119, 127, 149 }; 145 | 146 | static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' }; 147 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; 148 | 149 | static int r_mark_regions(struct SN_env * z) { 150 | z->I[0] = z->l; 151 | { int c_test = z->c; /* test, line 29 */ 152 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 153 | if (ret < 0) return 0; 154 | z->c = ret; /* hop, line 29 */ 155 | } 156 | z->I[1] = z->c; /* setmark x, line 29 */ 157 | z->c = c_test; 158 | } 159 | if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ 160 | { /* gopast */ /* non v, line 30 */ 161 | int ret = in_grouping_U(z, g_v, 97, 246, 1); 162 | if (ret < 0) return 0; 163 | z->c += ret; 164 | } 165 | z->I[0] = z->c; /* setmark p1, line 30 */ 166 | /* try, line 31 */ 167 | if (!(z->I[0] < z->I[1])) goto lab0; 168 | z->I[0] = z->I[1]; 169 | lab0: 170 | return 1; 171 | } 172 | 173 | static int r_main_suffix(struct SN_env * z) { 174 | int among_var; 175 | { int mlimit; /* setlimit, line 37 */ 176 | int m1 = z->l - z->c; (void)m1; 177 | if (z->c < z->I[0]) return 0; 178 | z->c = z->I[0]; /* tomark, line 37 */ 179 | mlimit = z->lb; z->lb = z->c; 180 | z->c = z->l - m1; 181 | z->ket = z->c; /* [, line 37 */ 182 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 183 | among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ 184 | if (!(among_var)) { z->lb = mlimit; return 0; } 185 | z->bra = z->c; /* ], line 37 */ 186 | z->lb = mlimit; 187 | } 188 | switch(among_var) { 189 | case 0: return 0; 190 | case 1: 191 | { int ret = slice_del(z); /* delete, line 44 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 2: 196 | if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0; 197 | { int ret = slice_del(z); /* delete, line 46 */ 198 | if (ret < 0) return ret; 199 | } 200 | break; 201 | } 202 | return 1; 203 | } 204 | 205 | static int r_consonant_pair(struct SN_env * z) { 206 | { int mlimit; /* setlimit, line 50 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 50 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ 215 | z->c = z->l - m2; 216 | z->ket = z->c; /* [, line 52 */ 217 | { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); 218 | if (ret < 0) { z->lb = mlimit; return 0; } 219 | z->c = ret; /* next, line 52 */ 220 | } 221 | z->bra = z->c; /* ], line 52 */ 222 | { int ret = slice_del(z); /* delete, line 52 */ 223 | if (ret < 0) return ret; 224 | } 225 | } 226 | z->lb = mlimit; 227 | } 228 | return 1; 229 | } 230 | 231 | static int r_other_suffix(struct SN_env * z) { 232 | int among_var; 233 | { int mlimit; /* setlimit, line 55 */ 234 | int m1 = z->l - z->c; (void)m1; 235 | if (z->c < z->I[0]) return 0; 236 | z->c = z->I[0]; /* tomark, line 55 */ 237 | mlimit = z->lb; z->lb = z->c; 238 | z->c = z->l - m1; 239 | z->ket = z->c; /* [, line 56 */ 240 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 241 | among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ 242 | if (!(among_var)) { z->lb = mlimit; return 0; } 243 | z->bra = z->c; /* ], line 56 */ 244 | switch(among_var) { 245 | case 0: { z->lb = mlimit; return 0; } 246 | case 1: 247 | { int ret = slice_del(z); /* delete, line 57 */ 248 | if (ret < 0) return ret; 249 | } 250 | break; 251 | case 2: 252 | { int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */ 253 | if (ret < 0) return ret; 254 | } 255 | break; 256 | case 3: 257 | { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ 258 | if (ret < 0) return ret; 259 | } 260 | break; 261 | } 262 | z->lb = mlimit; 263 | } 264 | return 1; 265 | } 266 | 267 | extern int swedish_UTF_8_stem(struct SN_env * z) { 268 | { int c1 = z->c; /* do, line 66 */ 269 | { int ret = r_mark_regions(z); 270 | if (ret == 0) goto lab0; /* call mark_regions, line 66 */ 271 | if (ret < 0) return ret; 272 | } 273 | lab0: 274 | z->c = c1; 275 | } 276 | z->lb = z->c; z->c = z->l; /* backwards, line 67 */ 277 | 278 | { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ 279 | { int ret = r_main_suffix(z); 280 | if (ret == 0) goto lab1; /* call main_suffix, line 68 */ 281 | if (ret < 0) return ret; 282 | } 283 | lab1: 284 | z->c = z->l - m2; 285 | } 286 | { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ 287 | { int ret = r_consonant_pair(z); 288 | if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ 289 | if (ret < 0) return ret; 290 | } 291 | lab2: 292 | z->c = z->l - m3; 293 | } 294 | { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ 295 | { int ret = r_other_suffix(z); 296 | if (ret == 0) goto lab3; /* call other_suffix, line 70 */ 297 | if (ret < 0) return ret; 298 | } 299 | lab3: 300 | z->c = z->l - m4; 301 | } 302 | z->c = z->lb; 303 | return 1; 304 | } 305 | 306 | extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } 307 | 308 | extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 309 | 310 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_swedish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * swedish_UTF_8_create_env(void); 9 | extern void swedish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int swedish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_turkish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * turkish_UTF_8_create_env(void); 9 | extern void turkish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int turkish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /php_stemmer.h: -------------------------------------------------------------------------------- 1 | #ifndef PHP_STEMMER_PHP_H 2 | #define PHP_STEMMER_PHP_H 1 3 | 4 | #define PHP_STEMMER_VERSION "1.0" 5 | #define PHP_STEMMER_EXTNAME "stemmer" 6 | 7 | PHP_FUNCTION(stemword); 8 | 9 | extern zend_module_entry stemmer_module_entry; 10 | #define phpext_stemmer_ptr &stemmer_module_entry 11 | 12 | #endif 13 | 14 | #ifndef TSRMLS_CC 15 | #define TSRMLS_CC 16 | #endif 17 | -------------------------------------------------------------------------------- /stemmer.c: -------------------------------------------------------------------------------- 1 | #ifdef HAVE_CONFIG_H 2 | #include "config.h" 3 | #endif 4 | 5 | #include "php.h" 6 | #include "php_stemmer.h" 7 | #include "libstemmer_c/include/libstemmer.h" 8 | 9 | ZEND_BEGIN_ARG_INFO_EX(arginfo_void, 0, 0, 0) 10 | ZEND_END_ARG_INFO() 11 | 12 | static zend_function_entry stemmer_functions[] = { 13 | PHP_FE(stemword, arginfo_void) 14 | {NULL, NULL, NULL} 15 | }; 16 | 17 | zend_module_entry stemmer_module_entry = { 18 | #if ZEND_MODULE_API_NO >= 20010901 19 | STANDARD_MODULE_HEADER, 20 | #endif 21 | PHP_STEMMER_EXTNAME, 22 | stemmer_functions, 23 | NULL, 24 | NULL, 25 | NULL, 26 | NULL, 27 | NULL, 28 | #if ZEND_MODULE_API_NO >= 20010901 29 | PHP_STEMMER_VERSION, 30 | #endif 31 | STANDARD_MODULE_PROPERTIES 32 | }; 33 | 34 | #ifdef COMPILE_DL_STEMMER 35 | ZEND_GET_MODULE(stemmer) 36 | #endif 37 | 38 | #if PHP_MAJOR_VERSION < 7 39 | PHP_FUNCTION(stemword) 40 | { 41 | zval *lang, *enc, *arg; 42 | 43 | if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz", &arg,&lang,&enc) == FAILURE)RETURN_NULL(); 44 | 45 | convert_to_string(lang); 46 | convert_to_string(enc); 47 | 48 | struct sb_stemmer * stemmer; 49 | 50 | //char * language = "kraaij_pohlmann"; 51 | //char * charenc = "UTF_8"; 52 | 53 | stemmer = sb_stemmer_new(Z_STRVAL_P(lang),Z_STRVAL_P(enc)); 54 | if(!stemmer) RETURN_NULL(); 55 | 56 | if(Z_TYPE_P(arg) == IS_ARRAY) 57 | { 58 | array_init(return_value); 59 | HashTable *arr_hash; 60 | HashPosition pointer; 61 | int array_count; 62 | arr_hash = Z_ARRVAL_P(arg); 63 | array_count = zend_hash_num_elements(arr_hash); 64 | zval **data; 65 | for( zend_hash_internal_pointer_reset_ex(arr_hash,&pointer); 66 | zend_hash_get_current_data_ex(arr_hash,(void **)&data, &pointer)==SUCCESS; 67 | zend_hash_move_forward_ex(arr_hash,&pointer) ){ 68 | 69 | const sb_symbol *stemmed = ""; 70 | if(Z_TYPE_PP(data) == IS_STRING){ 71 | stemmed = sb_stemmer_stem(stemmer, Z_STRVAL_PP(data), Z_STRLEN_PP(data)); 72 | } 73 | add_next_index_string(return_value,stemmed, 1); 74 | } 75 | }else{ 76 | convert_to_string(arg); 77 | const sb_symbol *stemmed = sb_stemmer_stem(stemmer, Z_STRVAL_P(arg), Z_STRLEN_P(arg)); 78 | if(stemmed)ZVAL_STRING( return_value, stemmed, 1); 79 | } 80 | sb_stemmer_delete(stemmer); 81 | 82 | // RETURN_STRING(stemmed, 1); 83 | //return 1; 84 | } 85 | #else 86 | PHP_FUNCTION(stemword) 87 | { 88 | zval *lang, *enc, *arg; 89 | 90 | if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz", &arg,&lang,&enc) == FAILURE)RETURN_NULL(); 91 | 92 | convert_to_string(lang); 93 | convert_to_string(enc); 94 | 95 | struct sb_stemmer * stemmer; 96 | 97 | //char * language = "kraaij_pohlmann"; 98 | //char * charenc = "UTF_8"; 99 | 100 | stemmer = sb_stemmer_new(Z_STRVAL_P(lang),Z_STRVAL_P(enc)); 101 | if(!stemmer) RETURN_NULL(); 102 | 103 | if(Z_TYPE_P(arg) == IS_ARRAY) 104 | { 105 | array_init(return_value); 106 | HashTable *arr_hash; 107 | HashPosition pointer; 108 | int array_count; 109 | arr_hash = Z_ARRVAL_P(arg); 110 | array_count = zend_hash_num_elements(arr_hash); 111 | zval *data; 112 | for( zend_hash_internal_pointer_reset_ex(arr_hash,&pointer); 113 | zend_hash_get_current_data_ex(arr_hash,(void *)&data)==SUCCESS; 114 | zend_hash_move_forward_ex(arr_hash,&pointer) ){ 115 | 116 | const sb_symbol *stemmed = ""; 117 | if(Z_TYPE_P(data) == IS_STRING){ 118 | stemmed = sb_stemmer_stem(stemmer, Z_STRVAL_P(data), Z_STRLEN_P(data)); 119 | } 120 | add_next_index_string(return_value,stemmed); 121 | } 122 | }else{ 123 | convert_to_string(arg); 124 | const sb_symbol *stemmed = sb_stemmer_stem(stemmer, Z_STRVAL_P(arg), Z_STRLEN_P(arg)); 125 | if(stemmed)ZVAL_STRING( return_value, stemmed); 126 | } 127 | sb_stemmer_delete(stemmer); 128 | 129 | // RETURN_STRING(stemmed, 1); 130 | //return 1; 131 | } 132 | #endif 133 | 134 | -------------------------------------------------------------------------------- /tests/stemword_function_001.phpt: -------------------------------------------------------------------------------- 1 | --TEST-- 2 | stemword() function - basic test for stemword() 3 | --FILE-- 4 | 8 | --EXPECT-- 9 | string(16) "dakpann, balletj" --------------------------------------------------------------------------------