├── .gitignore
├── libstemmer-ffi.swig
├── libstemmer_c
    ├── Makefile
    ├── src_c
    │   ├── stem_UTF_8_dutch.h
    │   ├── stem_UTF_8_danish.h
    │   ├── stem_UTF_8_english.h
    │   ├── stem_UTF_8_finnish.h
    │   ├── stem_UTF_8_french.h
    │   ├── stem_UTF_8_german.h
    │   ├── stem_UTF_8_italian.h
    │   ├── stem_UTF_8_porter.h
    │   ├── stem_UTF_8_russian.h
    │   ├── stem_UTF_8_spanish.h
    │   ├── stem_UTF_8_swedish.h
    │   ├── stem_UTF_8_turkish.h
    │   ├── stem_KOI8_R_russian.h
    │   ├── stem_UTF_8_romanian.h
    │   ├── stem_UTF_8_hungarian.h
    │   ├── stem_UTF_8_norwegian.h
    │   ├── stem_ISO_8859_1_dutch.h
    │   ├── stem_UTF_8_portuguese.h
    │   ├── stem_ISO_8859_1_danish.h
    │   ├── stem_ISO_8859_1_english.h
    │   ├── stem_ISO_8859_1_finnish.h
    │   ├── stem_ISO_8859_1_french.h
    │   ├── stem_ISO_8859_1_german.h
    │   ├── stem_ISO_8859_1_italian.h
    │   ├── stem_ISO_8859_1_porter.h
    │   ├── stem_ISO_8859_1_spanish.h
    │   ├── stem_ISO_8859_1_swedish.h
    │   ├── stem_ISO_8859_2_romanian.h
    │   ├── stem_ISO_8859_1_hungarian.h
    │   ├── stem_ISO_8859_1_norwegian.h
    │   ├── stem_ISO_8859_1_portuguese.h
    │   ├── stem_ISO_8859_1_norwegian.c
    │   ├── stem_UTF_8_norwegian.c
    │   ├── stem_ISO_8859_1_swedish.c
    │   ├── stem_UTF_8_swedish.c
    │   ├── stem_ISO_8859_1_danish.c
    │   └── stem_UTF_8_danish.c
    ├── runtime
    │   ├── api.h
    │   ├── api.c
    │   └── header.h
    ├── mkinc_utf8.mak
    ├── MANIFEST
    ├── libstemmer
    │   ├── libstemmer.c
    │   ├── libstemmer_c.in
    │   ├── libstemmer_utf8.c
    │   ├── modules_utf8.txt
    │   ├── modules.txt
    │   ├── modules_utf8.h
    │   └── modules.h
    ├── mkinc.mak
    ├── include
    │   └── libstemmer.h
    ├── README
    └── examples
    │   └── stemwords.c
├── package.lisp
├── cl-libstemmer.asd
├── README.md
├── stopwords.lisp
├── stopwords
    ├── hu.txt
    ├── fi.txt
    ├── da.txt
    ├── sv.txt
    ├── fr.txt
    ├── nl.txt
    ├── nb.txt
    ├── pt.txt
    ├── de.txt
    ├── it.txt
    ├── en.txt
    ├── es.txt
    └── ru.txt
├── libstemmer-ffi.lisp
└── cl-libstemmer.lisp


/.gitignore:
--------------------------------------------------------------------------------
1 | *.so
2 | *.o
3 | stemwords
4 | 


--------------------------------------------------------------------------------
/libstemmer-ffi.swig:
--------------------------------------------------------------------------------
1 | %module "libstemmer-ffi"
2 | %include "libstemmer_c/include/libstemmer.h"
3 | 


--------------------------------------------------------------------------------
/libstemmer_c/Makefile:
--------------------------------------------------------------------------------
 1 | include mkinc.mak
 2 | CFLAGS=-fPIC
 3 | all: libstemmer.o stemwords
 4 | libstemmer.o: $(snowball_sources:.c=.o)
 5 | 	$(AR) -cru $@ $^
 6 | libstemmer.so: $(snowball_sources:.c=.o)
 7 | 	$(CC) -shared -o $@ $^
 8 | stemwords: examples/stemwords.o libstemmer.o
 9 | 	$(CC) -o $@ $^
10 | clean:
11 | 	rm -f stemwords *.so *.o src_c/*.o runtime/*.o libstemmer/*.o
12 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_dutch.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * dutch_UTF_8_create_env(void);
 9 | extern void dutch_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int dutch_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_danish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * danish_UTF_8_create_env(void);
 9 | extern void danish_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int danish_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_english.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * english_UTF_8_create_env(void);
 9 | extern void english_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int english_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_finnish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * finnish_UTF_8_create_env(void);
 9 | extern void finnish_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int finnish_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_french.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * french_UTF_8_create_env(void);
 9 | extern void french_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int french_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_german.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * german_UTF_8_create_env(void);
 9 | extern void german_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int german_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_italian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * italian_UTF_8_create_env(void);
 9 | extern void italian_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int italian_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_porter.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * porter_UTF_8_create_env(void);
 9 | extern void porter_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int porter_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_russian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * russian_UTF_8_create_env(void);
 9 | extern void russian_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int russian_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_spanish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * spanish_UTF_8_create_env(void);
 9 | extern void spanish_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int spanish_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_swedish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * swedish_UTF_8_create_env(void);
 9 | extern void swedish_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int swedish_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_turkish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * turkish_UTF_8_create_env(void);
 9 | extern void turkish_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int turkish_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_KOI8_R_russian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * russian_KOI8_R_create_env(void);
 9 | extern void russian_KOI8_R_close_env(struct SN_env * z);
10 | 
11 | extern int russian_KOI8_R_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_romanian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * romanian_UTF_8_create_env(void);
 9 | extern void romanian_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int romanian_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_hungarian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * hungarian_UTF_8_create_env(void);
 9 | extern void hungarian_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int hungarian_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_norwegian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * norwegian_UTF_8_create_env(void);
 9 | extern void norwegian_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int norwegian_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * dutch_ISO_8859_1_create_env(void);
 9 | extern void dutch_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int dutch_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_portuguese.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * portuguese_UTF_8_create_env(void);
 9 | extern void portuguese_UTF_8_close_env(struct SN_env * z);
10 | 
11 | extern int portuguese_UTF_8_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_danish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * danish_ISO_8859_1_create_env(void);
 9 | extern void danish_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int danish_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_english.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * english_ISO_8859_1_create_env(void);
 9 | extern void english_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int english_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * finnish_ISO_8859_1_create_env(void);
 9 | extern void finnish_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int finnish_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_french.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * french_ISO_8859_1_create_env(void);
 9 | extern void french_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int french_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_german.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * german_ISO_8859_1_create_env(void);
 9 | extern void german_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int german_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_italian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * italian_ISO_8859_1_create_env(void);
 9 | extern void italian_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int italian_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_porter.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * porter_ISO_8859_1_create_env(void);
 9 | extern void porter_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int porter_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * spanish_ISO_8859_1_create_env(void);
 9 | extern void spanish_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int spanish_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * swedish_ISO_8859_1_create_env(void);
 9 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int swedish_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * romanian_ISO_8859_2_create_env(void);
 9 | extern void romanian_ISO_8859_2_close_env(struct SN_env * z);
10 | 
11 | extern int romanian_ISO_8859_2_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * hungarian_ISO_8859_1_create_env(void);
 9 | extern void hungarian_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int hungarian_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void);
 9 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | extern struct SN_env * portuguese_ISO_8859_1_create_env(void);
 9 | extern void portuguese_ISO_8859_1_close_env(struct SN_env * z);
10 | 
11 | extern int portuguese_ISO_8859_1_stem(struct SN_env * z);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | 


--------------------------------------------------------------------------------
/package.lisp:
--------------------------------------------------------------------------------
 1 | ;;;; package.lisp
 2 | 
 3 | (defpackage #:cl-libstemmer
 4 |   (:use #:cl #:alexandria #:serapeum)
 5 |   (:export :*default-encoding*
 6 |            :stemmer :stemmer-language :stemmer-encoding
 7 |            :stem-all
 8 |            :with-stemmer :stem
 9 |            :load-stemmer :close-stemmer
10 |            :no-such-stemmer
11 |            :no-such-stemmer-language
12 |            :no-such-stemmer-encoding
13 |            :stop-word-p :list-stop-words)
14 |   (:nicknames #:libstemmer))
15 | 


--------------------------------------------------------------------------------
/cl-libstemmer.asd:
--------------------------------------------------------------------------------
 1 | ;;;; cl-libstemmer.asd
 2 | 
 3 | (defpackage #:cl-libstemmer.asdf
 4 |   (:use #:cl #:asdf))
 5 | 
 6 | (in-package #:cl-libstemmer.asdf)
 7 | 
 8 | (defun wrap-package (fn)
 9 |   (let ((*package* (find-package :cl-libstemmer)))
10 |     (funcall fn)))
11 | 
12 | (defsystem #:cl-libstemmer
13 |   :serial t
14 |   :description "Snowball stemming algorithms (FFI)"
15 |   :author "Paul M. Rodriguez <pmr@ruricolist.com>"
16 |   :license "MIT"
17 |   :depends-on (#:alexandria
18 |                #:serapeum
19 |                #:trivial-garbage
20 |                #:cffi
21 |                #:bordeaux-threads
22 |                #:uiop)
23 |   :components ((:file "package")
24 |                (:file "libstemmer-ffi"
25 |                       :around-compile wrap-package)
26 |                (:file "stopwords")
27 |                (:file "cl-libstemmer")))
28 | 


--------------------------------------------------------------------------------
/libstemmer_c/runtime/api.h:
--------------------------------------------------------------------------------
 1 | 
 2 | typedef unsigned char symbol;
 3 | 
 4 | /* Or replace 'char' above with 'short' for 16 bit characters.
 5 | 
 6 |    More precisely, replace 'char' with whatever type guarantees the
 7 |    character width you need. Note however that sizeof(symbol) should divide
 8 |    HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
 9 |    there is an alignment problem. In the unlikely event of a problem here,
10 |    consult Martin Porter.
11 | 
12 | */
13 | 
14 | struct SN_env {
15 |     symbol * p;
16 |     int c; int l; int lb; int bra; int ket;
17 |     symbol * * S;
18 |     int * I;
19 |     unsigned char * B;
20 | };
21 | 
22 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
23 | extern void SN_close_env(struct SN_env * z, int S_size);
24 | 
25 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s);
26 | 
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | The [Snowball][snowball] project has defined stemming algorithms for
 2 | 17 languages. Libstemmer provides these algorithms as a C library.
 3 | 
 4 | CL-LIBSTEMMER includes the full source of libstemmer, and will attempt
 5 | to build and load `libstemmer.so` when it is first loaded. Obviously
 6 | this will only work on a system with `make`.
 7 | 
 8 | The preferred way to use CL-LIBSTEMMER is with `stem-all`:
 9 | 
10 |     (libstemmer:stem-all '("visible" "irradiate" "vainglorious" "habitat")
11 |                                   :en)
12 |     => '("visibl" "irradi" "vainglori" "habitat"), T
13 | 
14 | `stem-all` takes a list of words, a language (as a two or three letter
15 | abbreviation) and, optionally, an encoding. If such a stemmer exists,
16 | stem-all returns the stemmed words; otherwise, it returns the list of
17 | words unchanged. The second value is T if stemming was actually done.
18 | 
19 | You can also stem incrementally, using `with-stemmer` and `stem`:
20 | 
21 |     (libstemmer:with-stemmer (stemmer :en)
22 |       (libstemmer:stem stemmer "resplendent"))
23 |     => "resplend"
24 | 
25 | There are also unbalanced `load-stemmer` and `close-stemmer`
26 | functions. Bear in mind that loading a stemmer is relatively
27 | expensive: for best results, stem in large batches.
28 | 
29 | Besides libstemmer itself, CL-LIBSTEMMER also includes the lists of
30 | stop words compiled by the Snowball project.
31 | 
32 |      (libstemmer:stop-word-p "is" :es) => T
33 | 
34 | [snowball]: http://snowball.tartarus.org/index.php
35 | 


--------------------------------------------------------------------------------
/libstemmer_c/runtime/api.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdlib.h> /* for calloc, free */
 3 | #include "header.h"
 4 | 
 5 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
 6 | {
 7 |     struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
 8 |     if (z == NULL) return NULL;
 9 |     z->p = create_s();
10 |     if (z->p == NULL) goto error;
11 |     if (S_size)
12 |     {
13 |         int i;
14 |         z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
15 |         if (z->S == NULL) goto error;
16 | 
17 |         for (i = 0; i < S_size; i++)
18 |         {
19 |             z->S[i] = create_s();
20 |             if (z->S[i] == NULL) goto error;
21 |         }
22 |     }
23 | 
24 |     if (I_size)
25 |     {
26 |         z->I = (int *) calloc(I_size, sizeof(int));
27 |         if (z->I == NULL) goto error;
28 |     }
29 | 
30 |     if (B_size)
31 |     {
32 |         z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
33 |         if (z->B == NULL) goto error;
34 |     }
35 | 
36 |     return z;
37 | error:
38 |     SN_close_env(z, S_size);
39 |     return NULL;
40 | }
41 | 
42 | extern void SN_close_env(struct SN_env * z, int S_size)
43 | {
44 |     if (z == NULL) return;
45 |     if (S_size)
46 |     {
47 |         int i;
48 |         for (i = 0; i < S_size; i++)
49 |         {
50 |             lose_s(z->S[i]);
51 |         }
52 |         free(z->S);
53 |     }
54 |     free(z->I);
55 |     free(z->B);
56 |     if (z->p) lose_s(z->p);
57 |     free(z);
58 | }
59 | 
60 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
61 | {
62 |     int err = replace_s(z, 0, z->l, size, s, NULL);
63 |     z->c = 0;
64 |     return err;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/libstemmer_c/mkinc_utf8.mak:
--------------------------------------------------------------------------------
 1 | # libstemmer/mkinc_utf8.mak: List of stemming module source files
 2 | #
 3 | # This file is generated by mkmodules.pl from a list of module names.
 4 | # Do not edit manually.
 5 | #
 6 | # Modules included by this file are: danish, dutch, english, finnish, french,
 7 | # german, hungarian, italian, norwegian, porter, portuguese, romanian,
 8 | # russian, spanish, swedish, turkish
 9 | 
10 | snowball_sources= \
11 |   src_c/stem_UTF_8_danish.c \
12 |   src_c/stem_UTF_8_dutch.c \
13 |   src_c/stem_UTF_8_english.c \
14 |   src_c/stem_UTF_8_finnish.c \
15 |   src_c/stem_UTF_8_french.c \
16 |   src_c/stem_UTF_8_german.c \
17 |   src_c/stem_UTF_8_hungarian.c \
18 |   src_c/stem_UTF_8_italian.c \
19 |   src_c/stem_UTF_8_norwegian.c \
20 |   src_c/stem_UTF_8_porter.c \
21 |   src_c/stem_UTF_8_portuguese.c \
22 |   src_c/stem_UTF_8_romanian.c \
23 |   src_c/stem_UTF_8_russian.c \
24 |   src_c/stem_UTF_8_spanish.c \
25 |   src_c/stem_UTF_8_swedish.c \
26 |   src_c/stem_UTF_8_turkish.c \
27 |   runtime/api.c \
28 |   runtime/utilities.c \
29 |   libstemmer/libstemmer_utf8.c
30 | 
31 | snowball_headers= \
32 |   src_c/stem_UTF_8_danish.h \
33 |   src_c/stem_UTF_8_dutch.h \
34 |   src_c/stem_UTF_8_english.h \
35 |   src_c/stem_UTF_8_finnish.h \
36 |   src_c/stem_UTF_8_french.h \
37 |   src_c/stem_UTF_8_german.h \
38 |   src_c/stem_UTF_8_hungarian.h \
39 |   src_c/stem_UTF_8_italian.h \
40 |   src_c/stem_UTF_8_norwegian.h \
41 |   src_c/stem_UTF_8_porter.h \
42 |   src_c/stem_UTF_8_portuguese.h \
43 |   src_c/stem_UTF_8_romanian.h \
44 |   src_c/stem_UTF_8_russian.h \
45 |   src_c/stem_UTF_8_spanish.h \
46 |   src_c/stem_UTF_8_swedish.h \
47 |   src_c/stem_UTF_8_turkish.h \
48 |   include/libstemmer.h \
49 |   libstemmer/modules_utf8.h \
50 |   runtime/api.h \
51 |   runtime/header.h
52 | 
53 | 


--------------------------------------------------------------------------------
/stopwords.lisp:
--------------------------------------------------------------------------------
 1 | (in-package #:libstemmer)
 2 | 
 3 | ;;; NB All of the included stopwords files have been re-encoded as
 4 | ;;; UTF-8.
 5 | 
 6 | ;; TODO Use tries.
 7 | 
 8 | (defun snarf-stopwords-file (file)
 9 |   (let* ((string (read-file-into-string file))
10 |          (lines (lines string))
11 |          (words (remove-if #'emptyp
12 |                            (mapcar #'trim-whitespace
13 |                                    (mapcar (lambda (line)
14 |                                              (subseq line 0 (position #\| line)))
15 |                                            lines)))))
16 |     words))
17 | 
18 | (defun snarf-stopwords (lang)
19 |   (let* ((dir (asdf:system-relative-pathname
20 |                :cl-libstemmer
21 |                "stopwords/"))
22 |          (file (merge-pathnames (make-pathname :name lang :type "txt") dir)))
23 |     (snarf-stopwords-file file)))
24 | 
25 | (defun stopwords ()
26 |   (let* ((dir (asdf:system-relative-pathname :cl-libstemmer "stopwords/"))
27 |          (langs (mapcar #'pathname-name
28 |                         (directory (merge-pathnames "*.txt" dir)))))
29 |     (loop for lang in langs
30 |           collect (cons lang
31 |                         (set-hash-table (snarf-stopwords lang)
32 |                                         :strict nil
33 |                                         :test 'equal)))))
34 | 
35 | (defparameter *stopwords*
36 |   (load-time-value
37 |    (alist-hash-table
38 |     (stopwords)
39 |     :test 'equal)
40 |    t))
41 | 
42 | (defun stop-words (lang &optional (table *stopwords*))
43 |   (gethash (string-downcase lang) table #.(dict)))
44 | 
45 | (defun stop-word-p (word lang &key (table *stopwords*))
46 |   (check-type word string)
47 |   (values (gethash word (stop-words lang table))))
48 | 
49 | (defun list-stop-words (lang)
50 |   (if-let (table (stop-words lang))
51 |     (hash-table-values table)
52 |     '()))
53 | 


--------------------------------------------------------------------------------
/libstemmer_c/MANIFEST:
--------------------------------------------------------------------------------
 1 | README
 2 | src_c/stem_ISO_8859_1_danish.c
 3 | src_c/stem_ISO_8859_1_danish.h
 4 | src_c/stem_ISO_8859_1_dutch.c
 5 | src_c/stem_ISO_8859_1_dutch.h
 6 | src_c/stem_ISO_8859_1_english.c
 7 | src_c/stem_ISO_8859_1_english.h
 8 | src_c/stem_ISO_8859_1_finnish.c
 9 | src_c/stem_ISO_8859_1_finnish.h
10 | src_c/stem_ISO_8859_1_french.c
11 | src_c/stem_ISO_8859_1_french.h
12 | src_c/stem_ISO_8859_1_german.c
13 | src_c/stem_ISO_8859_1_german.h
14 | src_c/stem_ISO_8859_1_hungarian.c
15 | src_c/stem_ISO_8859_1_hungarian.h
16 | src_c/stem_ISO_8859_1_italian.c
17 | src_c/stem_ISO_8859_1_italian.h
18 | src_c/stem_ISO_8859_1_norwegian.c
19 | src_c/stem_ISO_8859_1_norwegian.h
20 | src_c/stem_ISO_8859_1_porter.c
21 | src_c/stem_ISO_8859_1_porter.h
22 | src_c/stem_ISO_8859_1_portuguese.c
23 | src_c/stem_ISO_8859_1_portuguese.h
24 | src_c/stem_ISO_8859_1_spanish.c
25 | src_c/stem_ISO_8859_1_spanish.h
26 | src_c/stem_ISO_8859_1_swedish.c
27 | src_c/stem_ISO_8859_1_swedish.h
28 | src_c/stem_ISO_8859_2_romanian.c
29 | src_c/stem_ISO_8859_2_romanian.h
30 | src_c/stem_KOI8_R_russian.c
31 | src_c/stem_KOI8_R_russian.h
32 | src_c/stem_UTF_8_danish.c
33 | src_c/stem_UTF_8_danish.h
34 | src_c/stem_UTF_8_dutch.c
35 | src_c/stem_UTF_8_dutch.h
36 | src_c/stem_UTF_8_english.c
37 | src_c/stem_UTF_8_english.h
38 | src_c/stem_UTF_8_finnish.c
39 | src_c/stem_UTF_8_finnish.h
40 | src_c/stem_UTF_8_french.c
41 | src_c/stem_UTF_8_french.h
42 | src_c/stem_UTF_8_german.c
43 | src_c/stem_UTF_8_german.h
44 | src_c/stem_UTF_8_hungarian.c
45 | src_c/stem_UTF_8_hungarian.h
46 | src_c/stem_UTF_8_italian.c
47 | src_c/stem_UTF_8_italian.h
48 | src_c/stem_UTF_8_norwegian.c
49 | src_c/stem_UTF_8_norwegian.h
50 | src_c/stem_UTF_8_porter.c
51 | src_c/stem_UTF_8_porter.h
52 | src_c/stem_UTF_8_portuguese.c
53 | src_c/stem_UTF_8_portuguese.h
54 | src_c/stem_UTF_8_romanian.c
55 | src_c/stem_UTF_8_romanian.h
56 | src_c/stem_UTF_8_russian.c
57 | src_c/stem_UTF_8_russian.h
58 | src_c/stem_UTF_8_spanish.c
59 | src_c/stem_UTF_8_spanish.h
60 | src_c/stem_UTF_8_swedish.c
61 | src_c/stem_UTF_8_swedish.h
62 | src_c/stem_UTF_8_turkish.c
63 | src_c/stem_UTF_8_turkish.h
64 | runtime/api.c
65 | runtime/api.h
66 | runtime/header.h
67 | runtime/utilities.c
68 | libstemmer/libstemmer.c
69 | libstemmer/libstemmer_utf8.c
70 | libstemmer/modules.h
71 | libstemmer/modules_utf8.h
72 | include/libstemmer.h
73 | 


--------------------------------------------------------------------------------
/stopwords/hu.txt:
--------------------------------------------------------------------------------
  1 | | Hungarian stop word list
  2 | | prepared by Anna Tordai
  3 | 
  4 | a
  5 | ahogy
  6 | ahol
  7 | aki
  8 | akik
  9 | akkor
 10 | alatt
 11 | által
 12 | általában
 13 | amely
 14 | amelyek
 15 | amelyekben
 16 | amelyeket
 17 | amelyet
 18 | amelynek
 19 | ami
 20 | amit
 21 | amolyan
 22 | amíg
 23 | amikor
 24 | át
 25 | abban
 26 | ahhoz
 27 | annak
 28 | arra
 29 | arról
 30 | az
 31 | azok
 32 | azon
 33 | azt
 34 | azzal
 35 | azért
 36 | aztán
 37 | azután
 38 | azonban
 39 | bár
 40 | be
 41 | belül
 42 | benne
 43 | cikk
 44 | cikkek
 45 | cikkeket
 46 | csak
 47 | de
 48 | e
 49 | eddig
 50 | egész
 51 | egy
 52 | egyes
 53 | egyetlen
 54 | egyéb
 55 | egyik
 56 | egyre
 57 | ekkor
 58 | el
 59 | elég
 60 | ellen
 61 | elõ
 62 | elõször
 63 | elõtt
 64 | elsõ
 65 | én
 66 | éppen
 67 | ebben
 68 | ehhez
 69 | emilyen
 70 | ennek
 71 | erre
 72 | ez
 73 | ezt
 74 | ezek
 75 | ezen
 76 | ezzel
 77 | ezért
 78 | és
 79 | fel
 80 | felé
 81 | hanem
 82 | hiszen
 83 | hogy
 84 | hogyan
 85 | igen
 86 | így
 87 | illetve
 88 | ill.
 89 | ill
 90 | ilyen
 91 | ilyenkor
 92 | ison
 93 | ismét
 94 | itt
 95 | jó
 96 | jól
 97 | jobban
 98 | kell
 99 | kellett
100 | keresztül
101 | keressünk
102 | ki
103 | kívül
104 | között
105 | közül
106 | legalább
107 | lehet
108 | lehetett
109 | legyen
110 | lenne
111 | lenni
112 | lesz
113 | lett
114 | maga
115 | magát
116 | majd
117 | majd
118 | már
119 | más
120 | másik
121 | meg
122 | még
123 | mellett
124 | mert
125 | mely
126 | melyek
127 | mi
128 | mit
129 | míg
130 | miért
131 | milyen
132 | mikor
133 | minden
134 | mindent
135 | mindenki
136 | mindig
137 | mint
138 | mintha
139 | mivel
140 | most
141 | nagy
142 | nagyobb
143 | nagyon
144 | ne
145 | néha
146 | nekem
147 | neki
148 | nem
149 | néhány
150 | nélkül
151 | nincs
152 | olyan
153 | ott
154 | össze
155 | õ
156 | õk
157 | õket
158 | pedig
159 | persze
160 | rá
161 | s
162 | saját
163 | sem
164 | semmi
165 | sok
166 | sokat
167 | sokkal
168 | számára
169 | szemben
170 | szerint
171 | szinte
172 | talán
173 | tehát
174 | teljes
175 | tovább
176 | továbbá
177 | több
178 | úgy
179 | ugyanis
180 | új
181 | újabb
182 | újra
183 | után
184 | utána
185 | utolsó
186 | vagy
187 | vagyis
188 | valaki
189 | valami
190 | valamint
191 | való
192 | vagyok
193 | van
194 | vannak
195 | volt
196 | voltam
197 | voltak
198 | voltunk
199 | vissza
200 | vele
201 | viszont
202 | volna
203 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/libstemmer.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include "../include/libstemmer.h"
 5 | #include "../runtime/api.h"
 6 | #include "modules.h"
 7 | 
 8 | struct sb_stemmer {
 9 |     struct SN_env * (*create)(void);
10 |     void (*close)(struct SN_env *);
11 |     int (*stem)(struct SN_env *);
12 | 
13 |     struct SN_env * env;
14 | };
15 | 
16 | extern const char **
17 | sb_stemmer_list(void)
18 | {
19 |     return algorithm_names;
20 | }
21 | 
22 | static stemmer_encoding_t
23 | sb_getenc(const char * charenc)
24 | {
25 |     struct stemmer_encoding * encoding;
26 |     if (charenc == NULL) return ENC_UTF_8;
27 |     for (encoding = encodings; encoding->name != 0; encoding++) {
28 | 	if (strcmp(encoding->name, charenc) == 0) break;
29 |     }
30 |     if (encoding->name == NULL) return ENC_UNKNOWN;
31 |     return encoding->enc;
32 | }
33 | 
34 | extern struct sb_stemmer *
35 | sb_stemmer_new(const char * algorithm, const char * charenc)
36 | {
37 |     stemmer_encoding_t enc;
38 |     struct stemmer_modules * module;
39 |     struct sb_stemmer * stemmer;
40 | 
41 |     enc = sb_getenc(charenc);
42 |     if (enc == ENC_UNKNOWN) return NULL;
43 | 
44 |     for (module = modules; module->name != 0; module++) {
45 | 	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
46 |     }
47 |     if (module->name == NULL) return NULL;
48 |     
49 |     stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
50 |     if (stemmer == NULL) return NULL;
51 | 
52 |     stemmer->create = module->create;
53 |     stemmer->close = module->close;
54 |     stemmer->stem = module->stem;
55 | 
56 |     stemmer->env = stemmer->create();
57 |     if (stemmer->env == NULL)
58 |     {
59 |         sb_stemmer_delete(stemmer);
60 |         return NULL;
61 |     }
62 | 
63 |     return stemmer;
64 | }
65 | 
66 | void
67 | sb_stemmer_delete(struct sb_stemmer * stemmer)
68 | {
69 |     if (stemmer == 0) return;
70 |     if (stemmer->close == 0) return;
71 |     stemmer->close(stemmer->env);
72 |     stemmer->close = 0;
73 |     free(stemmer);
74 | }
75 | 
76 | const sb_symbol *
77 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
78 | {
79 |     int ret;
80 |     if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
81 |     {
82 |         stemmer->env->l = 0;
83 |         return NULL;
84 |     }
85 |     ret = stemmer->stem(stemmer->env);
86 |     if (ret < 0) return NULL;
87 |     stemmer->env->p[stemmer->env->l] = 0;
88 |     return (const sb_symbol *)(stemmer->env->p);
89 | }
90 | 
91 | int
92 | sb_stemmer_length(struct sb_stemmer * stemmer)
93 | {
94 |     return stemmer->env->l;
95 | }
96 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/libstemmer_c.in:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include "../include/libstemmer.h"
 5 | #include "../runtime/api.h"
 6 | #include "@MODULES_H@"
 7 | 
 8 | struct sb_stemmer {
 9 |     struct SN_env * (*create)(void);
10 |     void (*close)(struct SN_env *);
11 |     int (*stem)(struct SN_env *);
12 | 
13 |     struct SN_env * env;
14 | };
15 | 
16 | extern const char **
17 | sb_stemmer_list(void)
18 | {
19 |     return algorithm_names;
20 | }
21 | 
22 | static stemmer_encoding_t
23 | sb_getenc(const char * charenc)
24 | {
25 |     struct stemmer_encoding * encoding;
26 |     if (charenc == NULL) return ENC_UTF_8;
27 |     for (encoding = encodings; encoding->name != 0; encoding++) {
28 | 	if (strcmp(encoding->name, charenc) == 0) break;
29 |     }
30 |     if (encoding->name == NULL) return ENC_UNKNOWN;
31 |     return encoding->enc;
32 | }
33 | 
34 | extern struct sb_stemmer *
35 | sb_stemmer_new(const char * algorithm, const char * charenc)
36 | {
37 |     stemmer_encoding_t enc;
38 |     struct stemmer_modules * module;
39 |     struct sb_stemmer * stemmer;
40 | 
41 |     enc = sb_getenc(charenc);
42 |     if (enc == ENC_UNKNOWN) return NULL;
43 | 
44 |     for (module = modules; module->name != 0; module++) {
45 | 	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
46 |     }
47 |     if (module->name == NULL) return NULL;
48 |     
49 |     stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
50 |     if (stemmer == NULL) return NULL;
51 | 
52 |     stemmer->create = module->create;
53 |     stemmer->close = module->close;
54 |     stemmer->stem = module->stem;
55 | 
56 |     stemmer->env = stemmer->create();
57 |     if (stemmer->env == NULL)
58 |     {
59 |         sb_stemmer_delete(stemmer);
60 |         return NULL;
61 |     }
62 | 
63 |     return stemmer;
64 | }
65 | 
66 | void
67 | sb_stemmer_delete(struct sb_stemmer * stemmer)
68 | {
69 |     if (stemmer == 0) return;
70 |     if (stemmer->close == 0) return;
71 |     stemmer->close(stemmer->env);
72 |     stemmer->close = 0;
73 |     free(stemmer);
74 | }
75 | 
76 | const sb_symbol *
77 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
78 | {
79 |     int ret;
80 |     if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
81 |     {
82 |         stemmer->env->l = 0;
83 |         return NULL;
84 |     }
85 |     ret = stemmer->stem(stemmer->env);
86 |     if (ret < 0) return NULL;
87 |     stemmer->env->p[stemmer->env->l] = 0;
88 |     return (const sb_symbol *)(stemmer->env->p);
89 | }
90 | 
91 | int
92 | sb_stemmer_length(struct sb_stemmer * stemmer)
93 | {
94 |     return stemmer->env->l;
95 | }
96 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/libstemmer_utf8.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include "../include/libstemmer.h"
 5 | #include "../runtime/api.h"
 6 | #include "modules_utf8.h"
 7 | 
 8 | struct sb_stemmer {
 9 |     struct SN_env * (*create)(void);
10 |     void (*close)(struct SN_env *);
11 |     int (*stem)(struct SN_env *);
12 | 
13 |     struct SN_env * env;
14 | };
15 | 
16 | extern const char **
17 | sb_stemmer_list(void)
18 | {
19 |     return algorithm_names;
20 | }
21 | 
22 | static stemmer_encoding_t
23 | sb_getenc(const char * charenc)
24 | {
25 |     struct stemmer_encoding * encoding;
26 |     if (charenc == NULL) return ENC_UTF_8;
27 |     for (encoding = encodings; encoding->name != 0; encoding++) {
28 | 	if (strcmp(encoding->name, charenc) == 0) break;
29 |     }
30 |     if (encoding->name == NULL) return ENC_UNKNOWN;
31 |     return encoding->enc;
32 | }
33 | 
34 | extern struct sb_stemmer *
35 | sb_stemmer_new(const char * algorithm, const char * charenc)
36 | {
37 |     stemmer_encoding_t enc;
38 |     struct stemmer_modules * module;
39 |     struct sb_stemmer * stemmer;
40 | 
41 |     enc = sb_getenc(charenc);
42 |     if (enc == ENC_UNKNOWN) return NULL;
43 | 
44 |     for (module = modules; module->name != 0; module++) {
45 | 	if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break;
46 |     }
47 |     if (module->name == NULL) return NULL;
48 |     
49 |     stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer));
50 |     if (stemmer == NULL) return NULL;
51 | 
52 |     stemmer->create = module->create;
53 |     stemmer->close = module->close;
54 |     stemmer->stem = module->stem;
55 | 
56 |     stemmer->env = stemmer->create();
57 |     if (stemmer->env == NULL)
58 |     {
59 |         sb_stemmer_delete(stemmer);
60 |         return NULL;
61 |     }
62 | 
63 |     return stemmer;
64 | }
65 | 
66 | void
67 | sb_stemmer_delete(struct sb_stemmer * stemmer)
68 | {
69 |     if (stemmer == 0) return;
70 |     if (stemmer->close == 0) return;
71 |     stemmer->close(stemmer->env);
72 |     stemmer->close = 0;
73 |     free(stemmer);
74 | }
75 | 
76 | const sb_symbol *
77 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size)
78 | {
79 |     int ret;
80 |     if (SN_set_current(stemmer->env, size, (const symbol *)(word)))
81 |     {
82 |         stemmer->env->l = 0;
83 |         return NULL;
84 |     }
85 |     ret = stemmer->stem(stemmer->env);
86 |     if (ret < 0) return NULL;
87 |     stemmer->env->p[stemmer->env->l] = 0;
88 |     return (const sb_symbol *)(stemmer->env->p);
89 | }
90 | 
91 | int
92 | sb_stemmer_length(struct sb_stemmer * stemmer)
93 | {
94 |     return stemmer->env->l;
95 | }
96 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/modules_utf8.txt:
--------------------------------------------------------------------------------
 1 | # This file contains a list of stemmers to include in the distribution.
 2 | # The format is a set of space separated lines - on each line:
 3 | #  First item is name of stemmer.
 4 | #  Second item is comma separated list of character sets.
 5 | #  Third item is comma separated list of names to refer to the stemmer by.
 6 | #
 7 | # Lines starting with a #, or blank lines, are ignored.
 8 | 
 9 | # List all the main algorithms for each language, in UTF-8.
10 | 
11 | danish          UTF_8                   danish,da,dan
12 | dutch           UTF_8                   dutch,nl,dut,nld
13 | english         UTF_8                   english,en,eng
14 | finnish         UTF_8                   finnish,fi,fin
15 | french          UTF_8                   french,fr,fre,fra
16 | german          UTF_8                   german,de,ger,deu
17 | hungarian       UTF_8                   hungarian,hu,hun
18 | italian         UTF_8                   italian,it,ita
19 | norwegian       UTF_8                   norwegian,no,nor
20 | portuguese      UTF_8                   portuguese,pt,por
21 | romanian        UTF_8                   romanian,ro,rum,ron
22 | russian         UTF_8                   russian,ru,rus
23 | spanish         UTF_8                   spanish,es,esl,spa
24 | swedish         UTF_8                   swedish,sv,swe
25 | turkish         UTF_8                   turkish,tr,tur
26 | 
27 | # Also include the traditional porter algorithm for english.
28 | # The porter algorithm is included in the libstemmer distribution to assist
29 | # with backwards compatibility, but for new systems the english algorithm
30 | # should be used in preference.
31 | porter          UTF_8                   porter
32 | 
33 | # Some other stemmers in the snowball project are not included in the standard
34 | # distribution. To compile a libstemmer with them in, add them to this list,
35 | # and regenerate the distribution. (You will need a full source checkout for
36 | # this.) They are included in the snowball website as curiosities, but are not
37 | # intended for general use, and use of them is is not fully supported.  These
38 | # algorithms are:
39 | #
40 | # german2          - This is a slight modification of the german stemmer.
41 | #german2          UTF_8                   german2
42 | #
43 | # kraaij_pohlmann  - This is a different dutch stemmer.
44 | #kraaij_pohlmann  UTF_8                   kraaij_pohlmann
45 | #
46 | # lovins           - This is an english stemmer, but fairly outdated, and
47 | #                    only really applicable to a restricted type of input text
48 | #                    (keywords in academic publications).
49 | #lovins           UTF_8                   lovins
50 | 


--------------------------------------------------------------------------------
/libstemmer_c/runtime/header.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <limits.h>
 3 | 
 4 | #include "api.h"
 5 | 
 6 | #define MAXINT INT_MAX
 7 | #define MININT INT_MIN
 8 | 
 9 | #define HEAD 2*sizeof(int)
10 | 
11 | #define SIZE(p)        ((int *)(p))[-1]
12 | #define SET_SIZE(p, n) ((int *)(p))[-1] = n
13 | #define CAPACITY(p)    ((int *)(p))[-2]
14 | 
15 | struct among
16 | {   int s_size;     /* number of chars in string */
17 |     const symbol * s;       /* search string */
18 |     int substring_i;/* index to longest matching substring */
19 |     int result;     /* result of the lookup */
20 |     int (* function)(struct SN_env *);
21 | };
22 | 
23 | extern symbol * create_s(void);
24 | extern void lose_s(symbol * p);
25 | 
26 | extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
27 | 
28 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
29 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
30 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
31 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
32 | 
33 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
34 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
35 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
36 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
37 | 
38 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
39 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
40 | extern int eq_v(struct SN_env * z, const symbol * p);
41 | extern int eq_v_b(struct SN_env * z, const symbol * p);
42 | 
43 | extern int find_among(struct SN_env * z, const struct among * v, int v_size);
44 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
45 | 
46 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
47 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
48 | extern int slice_from_v(struct SN_env * z, const symbol * p);
49 | extern int slice_del(struct SN_env * z);
50 | 
51 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
52 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
53 | 
54 | extern symbol * slice_to(struct SN_env * z, symbol * p);
55 | extern symbol * assign_to(struct SN_env * z, symbol * p);
56 | 
57 | extern void debug(struct SN_env * z, int number, int line_count);
58 | 
59 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/modules.txt:
--------------------------------------------------------------------------------
 1 | # This file contains a list of stemmers to include in the distribution.
 2 | # The format is a set of space separated lines - on each line:
 3 | #  First item is name of stemmer.
 4 | #  Second item is comma separated list of character sets.
 5 | #  Third item is comma separated list of names to refer to the stemmer by.
 6 | #
 7 | # Lines starting with a #, or blank lines, are ignored.
 8 | 
 9 | # List all the main algorithms for each language, in UTF-8, and also with
10 | # the most commonly used encoding.
11 | 
12 | danish          UTF_8,ISO_8859_1        danish,da,dan
13 | dutch           UTF_8,ISO_8859_1        dutch,nl,dut,nld
14 | english         UTF_8,ISO_8859_1        english,en,eng
15 | finnish         UTF_8,ISO_8859_1        finnish,fi,fin
16 | french          UTF_8,ISO_8859_1        french,fr,fre,fra
17 | german          UTF_8,ISO_8859_1        german,de,ger,deu
18 | hungarian       UTF_8,ISO_8859_1        hungarian,hu,hun
19 | italian         UTF_8,ISO_8859_1        italian,it,ita
20 | norwegian       UTF_8,ISO_8859_1        norwegian,no,nor
21 | portuguese      UTF_8,ISO_8859_1        portuguese,pt,por
22 | romanian        UTF_8,ISO_8859_2        romanian,ro,rum,ron
23 | russian         UTF_8,KOI8_R            russian,ru,rus
24 | spanish         UTF_8,ISO_8859_1        spanish,es,esl,spa
25 | swedish         UTF_8,ISO_8859_1        swedish,sv,swe
26 | turkish         UTF_8                   turkish,tr,tur
27 | 
28 | # Also include the traditional porter algorithm for english.
29 | # The porter algorithm is included in the libstemmer distribution to assist
30 | # with backwards compatibility, but for new systems the english algorithm
31 | # should be used in preference.
32 | porter          UTF_8,ISO_8859_1        porter
33 | 
34 | # Some other stemmers in the snowball project are not included in the standard
35 | # distribution. To compile a libstemmer with them in, add them to this list,
36 | # and regenerate the distribution. (You will need a full source checkout for
37 | # this.) They are included in the snowball website as curiosities, but are not
38 | # intended for general use, and use of them is is not fully supported.  These
39 | # algorithms are:
40 | #
41 | # german2          - This is a slight modification of the german stemmer.
42 | #german2          UTF_8,ISO_8859_1        german2
43 | #
44 | # kraaij_pohlmann  - This is a different dutch stemmer.
45 | #kraaij_pohlmann  UTF_8,ISO_8859_1        kraaij_pohlmann
46 | #
47 | # lovins           - This is an english stemmer, but fairly outdated, and
48 | #                    only really applicable to a restricted type of input text
49 | #                    (keywords in academic publications).
50 | #lovins           UTF_8,ISO_8859_1        lovins
51 | 


--------------------------------------------------------------------------------
/stopwords/fi.txt:
--------------------------------------------------------------------------------
 1 | | forms of BE
 2 | 
 3 | olla
 4 | olen
 5 | olet
 6 | on
 7 | olemme
 8 | olette
 9 | ovat
10 | ole        | negative form
11 | 
12 | oli
13 | olisi
14 | olisit
15 | olisin
16 | olisimme
17 | olisitte
18 | olisivat
19 | olit
20 | olin
21 | olimme
22 | olitte
23 | olivat
24 | ollut
25 | olleet
26 | 
27 | en         | negation
28 | et
29 | ei
30 | emme
31 | ette
32 | eivät
33 | 
34 | |Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
35 | minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
36 | sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
37 | hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
38 | me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
39 | te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
40 | he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
41 | 
42 | tämä   tämän         tätä   tässä   tästä   tähän  tällä   tältä   tälle   tänä   täksi  | this
43 | tuo    tuon          tuota  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
44 | se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
45 | nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
46 | nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
47 | ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
48 | 
49 | kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
50 | ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
51 | mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
52 | mitkä                                                                                    | (pl)
53 | 
54 | joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
55 | jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
56 | 
57 | | conjunctions
58 | 
59 | että   | that
60 | ja     | and
61 | jos    | if
62 | koska  | because
63 | kuin   | than
64 | mutta  | but
65 | niin   | so
66 | sekä   | and
67 | sillä  | for
68 | tai    | or
69 | vaan   | but
70 | vai    | or
71 | vaikka | although
72 | 
73 | 
74 | | prepositions
75 | 
76 | kanssa  | with
77 | mukaan  | according to
78 | noin    | about
79 | poikki  | across
80 | yli     | over, across
81 | 
82 | | other
83 | 
84 | kun    | when
85 | niin   | so
86 | nyt    | now
87 | itse   | self
88 | 


--------------------------------------------------------------------------------
/libstemmer_c/mkinc.mak:
--------------------------------------------------------------------------------
 1 | # libstemmer/mkinc.mak: List of stemming module source files
 2 | #
 3 | # This file is generated by mkmodules.pl from a list of module names.
 4 | # Do not edit manually.
 5 | #
 6 | # Modules included by this file are: danish, dutch, english, finnish, french,
 7 | # german, hungarian, italian, norwegian, porter, portuguese, romanian,
 8 | # russian, spanish, swedish, turkish
 9 | 
10 | snowball_sources= \
11 |   src_c/stem_ISO_8859_1_danish.c \
12 |   src_c/stem_UTF_8_danish.c \
13 |   src_c/stem_ISO_8859_1_dutch.c \
14 |   src_c/stem_UTF_8_dutch.c \
15 |   src_c/stem_ISO_8859_1_english.c \
16 |   src_c/stem_UTF_8_english.c \
17 |   src_c/stem_ISO_8859_1_finnish.c \
18 |   src_c/stem_UTF_8_finnish.c \
19 |   src_c/stem_ISO_8859_1_french.c \
20 |   src_c/stem_UTF_8_french.c \
21 |   src_c/stem_ISO_8859_1_german.c \
22 |   src_c/stem_UTF_8_german.c \
23 |   src_c/stem_ISO_8859_1_hungarian.c \
24 |   src_c/stem_UTF_8_hungarian.c \
25 |   src_c/stem_ISO_8859_1_italian.c \
26 |   src_c/stem_UTF_8_italian.c \
27 |   src_c/stem_ISO_8859_1_norwegian.c \
28 |   src_c/stem_UTF_8_norwegian.c \
29 |   src_c/stem_ISO_8859_1_porter.c \
30 |   src_c/stem_UTF_8_porter.c \
31 |   src_c/stem_ISO_8859_1_portuguese.c \
32 |   src_c/stem_UTF_8_portuguese.c \
33 |   src_c/stem_ISO_8859_2_romanian.c \
34 |   src_c/stem_UTF_8_romanian.c \
35 |   src_c/stem_KOI8_R_russian.c \
36 |   src_c/stem_UTF_8_russian.c \
37 |   src_c/stem_ISO_8859_1_spanish.c \
38 |   src_c/stem_UTF_8_spanish.c \
39 |   src_c/stem_ISO_8859_1_swedish.c \
40 |   src_c/stem_UTF_8_swedish.c \
41 |   src_c/stem_UTF_8_turkish.c \
42 |   runtime/api.c \
43 |   runtime/utilities.c \
44 |   libstemmer/libstemmer.c
45 | 
46 | snowball_headers= \
47 |   src_c/stem_ISO_8859_1_danish.h \
48 |   src_c/stem_UTF_8_danish.h \
49 |   src_c/stem_ISO_8859_1_dutch.h \
50 |   src_c/stem_UTF_8_dutch.h \
51 |   src_c/stem_ISO_8859_1_english.h \
52 |   src_c/stem_UTF_8_english.h \
53 |   src_c/stem_ISO_8859_1_finnish.h \
54 |   src_c/stem_UTF_8_finnish.h \
55 |   src_c/stem_ISO_8859_1_french.h \
56 |   src_c/stem_UTF_8_french.h \
57 |   src_c/stem_ISO_8859_1_german.h \
58 |   src_c/stem_UTF_8_german.h \
59 |   src_c/stem_ISO_8859_1_hungarian.h \
60 |   src_c/stem_UTF_8_hungarian.h \
61 |   src_c/stem_ISO_8859_1_italian.h \
62 |   src_c/stem_UTF_8_italian.h \
63 |   src_c/stem_ISO_8859_1_norwegian.h \
64 |   src_c/stem_UTF_8_norwegian.h \
65 |   src_c/stem_ISO_8859_1_porter.h \
66 |   src_c/stem_UTF_8_porter.h \
67 |   src_c/stem_ISO_8859_1_portuguese.h \
68 |   src_c/stem_UTF_8_portuguese.h \
69 |   src_c/stem_ISO_8859_2_romanian.h \
70 |   src_c/stem_UTF_8_romanian.h \
71 |   src_c/stem_KOI8_R_russian.h \
72 |   src_c/stem_UTF_8_russian.h \
73 |   src_c/stem_ISO_8859_1_spanish.h \
74 |   src_c/stem_UTF_8_spanish.h \
75 |   src_c/stem_ISO_8859_1_swedish.h \
76 |   src_c/stem_UTF_8_swedish.h \
77 |   src_c/stem_UTF_8_turkish.h \
78 |   include/libstemmer.h \
79 |   libstemmer/modules.h \
80 |   runtime/api.h \
81 |   runtime/header.h
82 | 
83 | 


--------------------------------------------------------------------------------
/libstemmer-ffi.lisp:
--------------------------------------------------------------------------------
 1 | ;;; This file was automatically generated by SWIG (http://www.swig.org).
 2 | ;;; Version 2.0.4
 3 | ;;;
 4 | ;;; Do not make changes to this file unless you know what you are doing--modify
 5 | ;;; the SWIG interface file instead.
 6 | 
 7 | 
 8 | ;;;SWIG wrapper code starts here
 9 | 
10 | (cl:defmacro defanonenum (&body enums)
11 |    "Converts anonymous enums to defconstants."
12 |   `(cl:progn ,@(cl:loop for value in enums
13 |                         for index = 0 then (cl:1+ index)
14 |                         when (cl:listp value) do (cl:setf index (cl:second value)
15 |                                                           value (cl:first value))
16 |                         collect `(cl:defconstant ,value ,index))))
17 | 
18 | (cl:eval-when (:compile-toplevel :load-toplevel)
19 |   (cl:unless (cl:fboundp 'swig-lispify)
20 |     (cl:defun swig-lispify (name flag cl:&optional (package cl:*package*))
21 |       (cl:labels ((helper (lst last rest cl:&aux (c (cl:car lst)))
22 |                     (cl:cond
23 |                       ((cl:null lst)
24 |                        rest)
25 |                       ((cl:upper-case-p c)
26 |                        (helper (cl:cdr lst) 'upper
27 |                                (cl:case last
28 |                                  ((lower digit) (cl:list* c #\- rest))
29 |                                  (cl:t (cl:cons c rest)))))
30 |                       ((cl:lower-case-p c)
31 |                        (helper (cl:cdr lst) 'lower (cl:cons (cl:char-upcase c) rest)))
32 |                       ((cl:digit-char-p c)
33 |                        (helper (cl:cdr lst) 'digit
34 |                                (cl:case last
35 |                                  ((upper lower) (cl:list* c #\- rest))
36 |                                  (cl:t (cl:cons c rest)))))
37 |                       ((cl:char-equal c #\_)
38 |                        (helper (cl:cdr lst) '_ (cl:cons #\- rest)))
39 |                       (cl:t
40 |                        (cl:error "Invalid character: ~A" c)))))
41 |         (cl:let ((fix (cl:case flag
42 |                         ((constant enumvalue) "+")
43 |                         (variable "*")
44 |                         (cl:t ""))))
45 |           (cl:intern
46 |            (cl:concatenate
47 |             'cl:string
48 |             fix
49 |             (cl:nreverse (helper (cl:concatenate 'cl:list name) cl:nil cl:nil))
50 |             fix)
51 |            package))))))
52 | 
53 | ;;;SWIG wrapper code ends here
54 | 
55 | 
56 | (cffi:defcfun ("sb_stemmer_list" sb_stemmer_list) :pointer)
57 | 
58 | (cffi:defcfun ("sb_stemmer_new" sb_stemmer_new) :pointer
59 |   (algorithm :string)
60 |   (charenc :string))
61 | 
62 | (cffi:defcfun ("sb_stemmer_delete" sb_stemmer_delete) :void
63 |   (stemmer :pointer))
64 | 
65 | (cffi:defcfun ("sb_stemmer_stem" sb_stemmer_stem) :pointer
66 |   (stemmer :pointer)
67 |   (word :pointer)
68 |   (size :int))
69 | 
70 | (cffi:defcfun ("sb_stemmer_length" sb_stemmer_length) :int
71 |   (stemmer :pointer))
72 | 


--------------------------------------------------------------------------------
/libstemmer_c/include/libstemmer.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* Make header file work when included from C++ */
 3 | #ifdef __cplusplus
 4 | extern "C" {
 5 | #endif
 6 | 
 7 | struct sb_stemmer;
 8 | typedef unsigned char sb_symbol;
 9 | 
10 | /* FIXME - should be able to get a version number for each stemming
11 |  * algorithm (which will be incremented each time the output changes). */
12 | 
13 | /** Returns an array of the names of the available stemming algorithms.
14 |  *  Note that these are the canonical names - aliases (ie, other names for
15 |  *  the same algorithm) will not be included in the list.
16 |  *  The list is terminated with a null pointer.
17 |  *
18 |  *  The list must not be modified in any way.
19 |  */
20 | const char ** sb_stemmer_list(void);
21 | 
22 | /** Create a new stemmer object, using the specified algorithm, for the
23 |  *  specified character encoding.
24 |  *
25 |  *  All algorithms will usually be available in UTF-8, but may also be
26 |  *  available in other character encodings.
27 |  *
28 |  *  @param algorithm The algorithm name.  This is either the english
29 |  *  name of the algorithm, or the 2 or 3 letter ISO 639 codes for the
30 |  *  language.  Note that case is significant in this parameter - the
31 |  *  value should be supplied in lower case.
32 |  *
33 |  *  @param charenc The character encoding.  NULL may be passed as
34 |  *  this value, in which case UTF-8 encoding will be assumed. Otherwise,
35 |  *  the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1),
36 |  *  "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian).  Note that
37 |  *  case is significant in this parameter.
38 |  *
39 |  *  @return NULL if the specified algorithm is not recognised, or the
40 |  *  algorithm is not available for the requested encoding.  Otherwise,
41 |  *  returns a pointer to a newly created stemmer for the requested algorithm.
42 |  *  The returned pointer must be deleted by calling sb_stemmer_delete().
43 |  *
44 |  *  @note NULL will also be returned if an out of memory error occurs.
45 |  */
46 | struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc);
47 | 
48 | /** Delete a stemmer object.
49 |  *
50 |  *  This frees all resources allocated for the stemmer.  After calling
51 |  *  this function, the supplied stemmer may no longer be used in any way.
52 |  *
53 |  *  It is safe to pass a null pointer to this function - this will have
54 |  *  no effect.
55 |  */
56 | void                sb_stemmer_delete(struct sb_stemmer * stemmer);
57 | 
58 | /** Stem a word.
59 |  *
60 |  *  The return value is owned by the stemmer - it must not be freed or
61 |  *  modified, and it will become invalid when the stemmer is called again,
62 |  *  or if the stemmer is freed.
63 |  *
64 |  *  The length of the return value can be obtained using sb_stemmer_length().
65 |  *
66 |  *  If an out-of-memory error occurs, this will return NULL.
67 |  */
68 | const sb_symbol *   sb_stemmer_stem(struct sb_stemmer * stemmer,
69 | 				    const sb_symbol * word, int size);
70 | 
71 | /** Get the length of the result of the last stemmed word.
72 |  *  This should not be called before sb_stemmer_stem() has been called.
73 |  */
74 | int                 sb_stemmer_length(struct sb_stemmer * stemmer);
75 | 
76 | #ifdef __cplusplus
77 | }
78 | #endif
79 | 
80 | 


--------------------------------------------------------------------------------
/stopwords/da.txt:
--------------------------------------------------------------------------------
  1 |  | A Danish stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | This is a ranked list (commonest to rarest) of stopwords derived from
  5 |  | a large text sample.
  6 | 
  7 | 
  8 | og           | and
  9 | i            | in
 10 | jeg          | I
 11 | det          | that (dem. pronoun)/it (pers. pronoun)
 12 | at           | that (in front of a sentence)/to (with infinitive)
 13 | en           | a/an
 14 | den          | it (pers. pronoun)/that (dem. pronoun)
 15 | til          | to/at/for/until/against/by/of/into, more
 16 | er           | present tense of "to be"
 17 | som          | who, as
 18 | på           | on/upon/in/on/at/to/after/of/with/for, on
 19 | de           | they
 20 | med          | with/by/in, along
 21 | han          | he
 22 | af           | of/by/from/off/for/in/with/on, off
 23 | for          | at/for/to/from/by/of/ago, in front/before, because
 24 | ikke         | not
 25 | der          | who/which, there/those
 26 | var          | past tense of "to be"
 27 | mig          | me/myself
 28 | sig          | oneself/himself/herself/itself/themselves
 29 | men          | but
 30 | et           | a/an/one, one (number), someone/somebody/one
 31 | har          | present tense of "to have"
 32 | om           | round/about/for/in/a, about/around/down, if
 33 | vi           | we
 34 | min          | my
 35 | havde        | past tense of "to have"
 36 | ham          | him
 37 | hun          | she
 38 | nu           | now
 39 | over         | over/above/across/by/beyond/past/on/about, over/past
 40 | da           | then, when/as/since
 41 | fra          | from/off/since, off, since
 42 | du           | you
 43 | ud           | out
 44 | sin          | his/her/its/one's
 45 | dem          | them
 46 | os           | us/ourselves
 47 | op           | up
 48 | man          | you/one
 49 | hans         | his
 50 | hvor         | where
 51 | eller        | or
 52 | hvad         | what
 53 | skal         | must/shall etc.
 54 | selv         | myself/youself/herself/ourselves etc., even
 55 | her          | here
 56 | alle         | all/everyone/everybody etc.
 57 | vil          | will (verb)
 58 | blev         | past tense of "to stay/to remain/to get/to become"
 59 | kunne        | could
 60 | ind          | in
 61 | når          | when
 62 | være         | present tense of "to be"
 63 | dog          | however/yet/after all
 64 | noget        | something
 65 | ville        | would
 66 | jo           | you know/you see (adv), yes
 67 | deres        | their/theirs
 68 | efter        | after/behind/according to/for/by/from, later/afterwards
 69 | ned          | down
 70 | skulle       | should
 71 | denne        | this
 72 | end          | than
 73 | dette        | this
 74 | mit          | my/mine
 75 | også         | also
 76 | under        | under/beneath/below/during, below/underneath
 77 | have         | have
 78 | dig          | you
 79 | anden        | other
 80 | hende        | her
 81 | mine         | my
 82 | alt          | everything
 83 | meget        | much/very, plenty of
 84 | sit          | his, her, its, one's
 85 | sine         | his, her, its, one's
 86 | vor          | our
 87 | mod          | against
 88 | disse        | these
 89 | hvis         | if
 90 | din          | your/yours
 91 | nogle        | some
 92 | hos          | by/at
 93 | blive        | be/become
 94 | mange        | many
 95 | ad           | by/through
 96 | bliver       | present tense of "to be/to become"
 97 | hendes       | her/hers
 98 | været        | be
 99 | thi          | for (conj)
100 | jer          | you
101 | sådan        | such, like this/like that
102 | 


--------------------------------------------------------------------------------
/stopwords/sv.txt:
--------------------------------------------------------------------------------
  1 |  | A Swedish stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | This is a ranked list (commonest to rarest) of stopwords derived from
  5 |  | a large text sample.
  6 | 
  7 |  | Swedish stop words occasionally exhibit homonym clashes. For example
  8 |  |  så = so, but also seed. These are indicated clearly below.
  9 | 
 10 | och            | and
 11 | det            | it, this/that
 12 | att            | to (with infinitive)
 13 | i              | in, at
 14 | en             | a
 15 | jag            | I
 16 | hon            | she
 17 | som            | who, that
 18 | han            | he
 19 | på             | on
 20 | den            | it, this/that
 21 | med            | with
 22 | var            | where, each
 23 | sig            | him(self) etc
 24 | för            | for
 25 | så             | so (also: seed)
 26 | till           | to
 27 | är             | is
 28 | men            | but
 29 | ett            | a
 30 | om             | if; around, about
 31 | hade           | had
 32 | de             | they, these/those
 33 | av             | of
 34 | icke           | not, no
 35 | mig            | me
 36 | du             | you
 37 | henne          | her
 38 | då             | then, when
 39 | sin            | his
 40 | nu             | now
 41 | har            | have
 42 | inte           | inte någon = no one
 43 | hans           | his
 44 | honom          | him
 45 | skulle         | 'sake'
 46 | hennes         | her
 47 | där            | there
 48 | min            | my
 49 | man            | one (pronoun)
 50 | ej             | nor
 51 | vid            | at, by, on (also: vast)
 52 | kunde          | could
 53 | något          | some etc
 54 | från           | from, off
 55 | ut             | out
 56 | när            | when
 57 | efter          | after, behind
 58 | upp            | up
 59 | vi             | we
 60 | dem            | them
 61 | vara           | be
 62 | vad            | what
 63 | över           | over
 64 | än             | than
 65 | dig            | you
 66 | kan            | can
 67 | sina           | his
 68 | här            | here
 69 | ha             | have
 70 | mot            | towards
 71 | alla           | all
 72 | under          | under (also: wonder)
 73 | någon          | some etc
 74 | eller          | or (else)
 75 | allt           | all
 76 | mycket         | much
 77 | sedan          | since
 78 | ju             | why
 79 | denna          | this/that
 80 | själv          | myself, yourself etc
 81 | detta          | this/that
 82 | åt             | to
 83 | utan           | without
 84 | varit          | was
 85 | hur            | how
 86 | ingen          | no
 87 | mitt           | my
 88 | ni             | you
 89 | bli            | to be, become
 90 | blev           | from bli
 91 | oss            | us
 92 | din            | thy
 93 | dessa          | these/those
 94 | några          | some etc
 95 | deras          | their
 96 | blir           | from bli
 97 | mina           | my
 98 | samma          | (the) same
 99 | vilken         | who, that
100 | er             | you, your
101 | sådan          | such a
102 | vår            | our
103 | blivit         | from bli
104 | dess           | its
105 | inom           | within
106 | mellan         | between
107 | sådant         | such a
108 | varför         | why
109 | varje          | each
110 | vilka          | who, that
111 | ditt           | thy
112 | vem            | who
113 | vilket         | who, that
114 | sitta          | his
115 | sådana         | such a
116 | vart           | each
117 | dina           | thy
118 | vars           | whose
119 | vårt           | our
120 | våra           | our
121 | ert            | your
122 | era            | your
123 | vilkas         | whose
124 | 


--------------------------------------------------------------------------------
/stopwords/fr.txt:
--------------------------------------------------------------------------------
  1 |  | A French stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 | au             |  a + le
  5 | aux            |  a + les
  6 | avec           |  with
  7 | ce             |  this
  8 | ces            |  these
  9 | dans           |  with
 10 | de             |  of
 11 | des            |  de + les
 12 | du             |  de + le
 13 | elle           |  she
 14 | en             |  `of them' etc
 15 | et             |  and
 16 | eux            |  them
 17 | il             |  he
 18 | je             |  I
 19 | la             |  the
 20 | le             |  the
 21 | leur           |  their
 22 | lui            |  him
 23 | ma             |  my (fem)
 24 | mais           |  but
 25 | me             |  me
 26 | même           |  same; as in moi-même (myself) etc
 27 | mes            |  me (pl)
 28 | moi            |  me
 29 | mon            |  my (masc)
 30 | ne             |  not
 31 | nos            |  our (pl)
 32 | notre          |  our
 33 | nous           |  we
 34 | on             |  one
 35 | ou             |  where
 36 | par            |  by
 37 | pas            |  not
 38 | pour           |  for
 39 | qu             |  que before vowel
 40 | que            |  that
 41 | qui            |  who
 42 | sa             |  his, her (fem)
 43 | se             |  oneself
 44 | ses            |  his (pl)
 45 | son            |  his, her (masc)
 46 | sur            |  on
 47 | ta             |  thy (fem)
 48 | te             |  thee
 49 | tes            |  thy (pl)
 50 | toi            |  thee
 51 | ton            |  thy (masc)
 52 | tu             |  thou
 53 | un             |  a
 54 | une            |  a
 55 | vos            |  your (pl)
 56 | votre          |  your
 57 | vous           |  you
 58 | 
 59 |                |  single letter forms
 60 | 
 61 | c              |  c'
 62 | d              |  d'
 63 | j              |  j'
 64 | l              |  l'
 65 | à              |  to, at
 66 | m              |  m'
 67 | n              |  n'
 68 | s              |  s'
 69 | t              |  t'
 70 | y              |  there
 71 | 
 72 |                | forms of être (not including the infinitive):
 73 | été
 74 | étée
 75 | étées
 76 | étés
 77 | étant
 78 | suis
 79 | es
 80 | est
 81 | sommes
 82 | êtes
 83 | sont
 84 | serai
 85 | seras
 86 | sera
 87 | serons
 88 | serez
 89 | seront
 90 | serais
 91 | serait
 92 | serions
 93 | seriez
 94 | seraient
 95 | étais
 96 | était
 97 | étions
 98 | étiez
 99 | étaient
100 | fus
101 | fut
102 | fûmes
103 | fûtes
104 | furent
105 | sois
106 | soit
107 | soyons
108 | soyez
109 | soient
110 | fusse
111 | fusses
112 | fût
113 | fussions
114 | fussiez
115 | fussent
116 | 
117 |                | forms of avoir (not including the infinitive):
118 | ayant
119 | eu
120 | eue
121 | eues
122 | eus
123 | ai
124 | as
125 | avons
126 | avez
127 | ont
128 | aurai
129 | auras
130 | aura
131 | aurons
132 | aurez
133 | auront
134 | aurais
135 | aurait
136 | aurions
137 | auriez
138 | auraient
139 | avais
140 | avait
141 | avions
142 | aviez
143 | avaient
144 | eut
145 | eûmes
146 | eûtes
147 | eurent
148 | aie
149 | aies
150 | ait
151 | ayons
152 | ayez
153 | aient
154 | eusse
155 | eusses
156 | eût
157 | eussions
158 | eussiez
159 | eussent
160 | 
161 |                | Later additions (from Jean-Christophe Deschamps)
162 | ceci           |  this
163 | cela           |  that (added 11 Apr 2012. Omission reported by Adrien Grand)
164 | celà           |  that (incorrect, though common)
165 | cet            |  this
166 | cette          |  this
167 | ici            |  here
168 | ils            |  they
169 | les            |  the (pl)
170 | leurs          |  their (pl)
171 | quel           |  which
172 | quels          |  which
173 | quelle         |  which
174 | quelles        |  which
175 | sans           |  without
176 | soi            |  oneself
177 | 


--------------------------------------------------------------------------------
/stopwords/nl.txt:
--------------------------------------------------------------------------------
  1 |  | A Dutch stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | This is a ranked list (commonest to rarest) of stopwords derived from
  5 |  | a large sample of Dutch text.
  6 | 
  7 |  | Dutch stop words frequently exhibit homonym clashes. These are indicated
  8 |  | clearly below.
  9 | 
 10 | de             |  the
 11 | en             |  and
 12 | van            |  of, from
 13 | ik             |  I, the ego
 14 | te             |  (1) chez, at etc, (2) to, (3) too
 15 | dat            |  that, which
 16 | die            |  that, those, who, which
 17 | in             |  in, inside
 18 | een            |  a, an, one
 19 | hij            |  he
 20 | het            |  the, it
 21 | niet           |  not, nothing, naught
 22 | zijn           |  (1) to be, being, (2) his, one's, its
 23 | is             |  is
 24 | was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
 25 | op             |  on, upon, at, in, up, used up
 26 | aan            |  on, upon, to (as dative)
 27 | met            |  with, by
 28 | als            |  like, such as, when
 29 | voor           |  (1) before, in front of, (2) furrow
 30 | had            |  had, past tense all persons sing. of 'hebben' (have)
 31 | er             |  there
 32 | maar           |  but, only
 33 | om             |  round, about, for etc
 34 | hem            |  him
 35 | dan            |  then
 36 | zou            |  should/would, past tense all persons sing. of 'zullen'
 37 | of             |  or, whether, if
 38 | wat            |  what, something, anything
 39 | mijn           |  possessive and noun 'mine'
 40 | men            |  people, 'one'
 41 | dit            |  this
 42 | zo             |  so, thus, in this way
 43 | door           |  through by
 44 | over           |  over, across
 45 | ze             |  she, her, they, them
 46 | zich           |  oneself
 47 | bij            |  (1) a bee, (2) by, near, at
 48 | ook            |  also, too
 49 | tot            |  till, until
 50 | je             |  you
 51 | mij            |  me
 52 | uit            |  out of, from
 53 | der            |  Old Dutch form of 'van der' still found in surnames
 54 | daar           |  (1) there, (2) because
 55 | haar           |  (1) her, their, them, (2) hair
 56 | naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
 57 | heb            |  present first person sing. of 'to have'
 58 | hoe            |  how, why
 59 | heeft          |  present third person sing. of 'to have'
 60 | hebben         |  'to have' and various parts thereof
 61 | deze           |  this
 62 | u              |  you
 63 | want           |  (1) for, (2) mitten, (3) rigging
 64 | nog            |  yet, still
 65 | zal            |  'shall', first and third person sing. of verb 'zullen' (will)
 66 | me             |  me
 67 | zij            |  she, they
 68 | nu             |  now
 69 | ge             |  'thou', still used in Belgium and south Netherlands
 70 | geen           |  none
 71 | omdat          |  because
 72 | iets           |  something, somewhat
 73 | worden         |  to become, grow, get
 74 | toch           |  yet, still
 75 | al             |  all, every, each
 76 | waren          |  (1) 'were' (2) to wander, (3) wares, (3)
 77 | veel           |  much, many
 78 | meer           |  (1) more, (2) lake
 79 | doen           |  to do, to make
 80 | toen           |  then, when
 81 | moet           |  noun 'spot/mote' and present form of 'to must'
 82 | ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
 83 | zonder         |  without
 84 | kan            |  noun 'can' and present form of 'to be able'
 85 | hun            |  their, them
 86 | dus            |  so, consequently
 87 | alles          |  all, everything, anything
 88 | onder          |  under, beneath
 89 | ja             |  yes, of course
 90 | eens           |  once, one day
 91 | hier           |  here
 92 | wie            |  who
 93 | werd           |  imperfect third person sing. of 'become'
 94 | altijd         |  always
 95 | doch           |  yet, but etc
 96 | wordt          |  present third person sing. of 'become'
 97 | wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
 98 | kunnen         |  to be able
 99 | ons            |  us/our
100 | zelf           |  self
101 | tegen          |  against, towards, at
102 | na             |  after, near
103 | reeds          |  already
104 | wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
105 | kon            |  could; past tense of 'to be able'
106 | niets          |  nothing
107 | uw             |  your
108 | iemand         |  somebody
109 | geweest        |  been; past participle of 'be'
110 | andere         |  other
111 | 


--------------------------------------------------------------------------------
/stopwords/nb.txt:
--------------------------------------------------------------------------------
  1 |  | A Norwegian stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | This stop word list is for the dominant bokmål dialect. Words unique
  5 |  | to nynorsk are marked *.
  6 | 
  7 |  | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
  8 | 
  9 | og             | and
 10 | i              | in
 11 | jeg            | I
 12 | det            | it/this/that
 13 | at             | to (w. inf.)
 14 | en             | a/an
 15 | et             | a/an
 16 | den            | it/this/that
 17 | til            | to
 18 | er             | is/am/are
 19 | som            | who/that
 20 | på             | on
 21 | de             | they / you(formal)
 22 | med            | with
 23 | han            | he
 24 | av             | of
 25 | ikke           | not
 26 | ikkje          | not *
 27 | der            | there
 28 | så             | so
 29 | var            | was/were
 30 | meg            | me
 31 | seg            | you
 32 | men            | but
 33 | ett            | one
 34 | har            | have
 35 | om             | about
 36 | vi             | we
 37 | min            | my
 38 | mitt           | my
 39 | ha             | have
 40 | hadde          | had
 41 | hun            | she
 42 | nå             | now
 43 | over           | over
 44 | da             | when/as
 45 | ved            | by/know
 46 | fra            | from
 47 | du             | you
 48 | ut             | out
 49 | sin            | your
 50 | dem            | them
 51 | oss            | us
 52 | opp            | up
 53 | man            | you/one
 54 | kan            | can
 55 | hans           | his
 56 | hvor           | where
 57 | eller          | or
 58 | hva            | what
 59 | skal           | shall/must
 60 | selv           | self (reflective)
 61 | sjøl           | self (reflective)
 62 | her            | here
 63 | alle           | all
 64 | vil            | will
 65 | bli            | become
 66 | ble            | became
 67 | blei           | became *
 68 | blitt          | have become
 69 | kunne          | could
 70 | inn            | in
 71 | når            | when
 72 | være           | be
 73 | kom            | come
 74 | noen           | some
 75 | noe            | some
 76 | ville          | would
 77 | dere           | you
 78 | som            | who/which/that
 79 | deres          | their/theirs
 80 | kun            | only/just
 81 | ja             | yes
 82 | etter          | after
 83 | ned            | down
 84 | skulle         | should
 85 | denne          | this
 86 | for            | for/because
 87 | deg            | you
 88 | si             | hers/his
 89 | sine           | hers/his
 90 | sitt           | hers/his
 91 | mot            | against
 92 | å              | to
 93 | meget          | much
 94 | hvorfor        | why
 95 | dette          | this
 96 | disse          | these/those
 97 | uten           | without
 98 | hvordan        | how
 99 | ingen          | none
100 | din            | your
101 | ditt           | your
102 | blir           | become
103 | samme          | same
104 | hvilken        | which
105 | hvilke         | which (plural)
106 | sånn           | such a
107 | inni           | inside/within
108 | mellom         | between
109 | vår            | our
110 | hver           | each
111 | hvem           | who
112 | vors           | us/ours
113 | hvis           | whose
114 | både           | both
115 | bare           | only/just
116 | enn            | than
117 | fordi          | as/because
118 | før            | before
119 | mange          | many
120 | også           | also
121 | slik           | just
122 | vært           | been
123 | være           | to be
124 | båe            | both *
125 | begge          | both
126 | siden          | since
127 | dykk           | your *
128 | dykkar         | yours *
129 | dei            | they *
130 | deira          | them *
131 | deires         | theirs *
132 | deim           | them *
133 | di             | your (fem.) *
134 | då             | as/when *
135 | eg             | I *
136 | ein            | a/an *
137 | eit            | a/an *
138 | eitt           | a/an *
139 | elles          | or *
140 | honom          | he *
141 | hjå            | at *
142 | ho             | she *
143 | hoe            | she *
144 | henne          | her
145 | hennar         | her/hers
146 | hennes         | hers
147 | hoss           | how *
148 | hossen         | how *
149 | ikkje          | not *
150 | ingi           | noone *
151 | inkje          | noone *
152 | korleis        | how *
153 | korso          | how *
154 | kva            | what/which *
155 | kvar           | where *
156 | kvarhelst      | where *
157 | kven           | who/whom *
158 | kvi            | why *
159 | kvifor         | why *
160 | me             | we *
161 | medan          | while *
162 | mi             | my *
163 | mine           | my *
164 | mykje          | much *
165 | no             | now *
166 | nokon          | some (masc./neut.) *
167 | noka           | some (fem.) *
168 | nokor          | some *
169 | noko           | some *
170 | nokre          | some *
171 | si             | his/hers *
172 | sia            | since *
173 | sidan          | since *
174 | so             | so *
175 | somt           | some *
176 | somme          | some *
177 | um             | about*
178 | upp            | up *
179 | vere           | be *
180 | vore           | was *
181 | verte          | become *
182 | vort           | become *
183 | varte          | became *
184 | vart           | became *
185 | 


--------------------------------------------------------------------------------
/stopwords/pt.txt:
--------------------------------------------------------------------------------
  1 |  | A Portuguese stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 | 
  5 |  | The following is a ranked list (commonest to rarest) of stopwords
  6 |  | deriving from a large sample of text.
  7 | 
  8 |  | Extra words have been added at the end.
  9 | 
 10 | de             |  of, from
 11 | a              |  the; to, at; her
 12 | o              |  the; him
 13 | que            |  who, that
 14 | e              |  and
 15 | do             |  de + o
 16 | da             |  de + a
 17 | em             |  in
 18 | um             |  a
 19 | para           |  for
 20 |   | é          from SER
 21 | com            |  with
 22 | não            |  not, no
 23 | uma            |  a
 24 | os             |  the; them
 25 | no             |  em + o
 26 | se             |  himself etc
 27 | na             |  em + a
 28 | por            |  for
 29 | mais           |  more
 30 | as             |  the; them
 31 | dos            |  de + os
 32 | como           |  as, like
 33 | mas            |  but
 34 |   | foi        from SER
 35 | ao             |  a + o
 36 | ele            |  he
 37 | das            |  de + as
 38 |   | tem        from TER
 39 | à              |  a + a
 40 | seu            |  his
 41 | sua            |  her
 42 | ou             |  or
 43 |   | ser        from SER
 44 | quando         |  when
 45 | muito          |  much
 46 |   | há         from HAV
 47 | nos            |  em + os; us
 48 | já             |  already, now
 49 |   | está       from EST
 50 | eu             |  I
 51 | também         |  also
 52 | só             |  only, just
 53 | pelo           |  per + o
 54 | pela           |  per + a
 55 | até            |  up to
 56 | isso           |  that
 57 | ela            |  he
 58 | entre          |  between
 59 |   | era        from SER
 60 | depois         |  after
 61 | sem            |  without
 62 | mesmo          |  same
 63 | aos            |  a + os
 64 |   | ter        from TER
 65 | seus           |  his
 66 | quem           |  whom
 67 | nas            |  em + as
 68 | me             |  me
 69 | esse           |  that
 70 | eles           |  they
 71 |   | estão      from EST
 72 | você           |  you
 73 |   | tinha      from TER
 74 |   | foram      from SER
 75 | essa           |  that
 76 | num            |  em + um
 77 | nem            |  nor
 78 | suas           |  her
 79 | meu            |  my
 80 | às             |  a + as
 81 | minha          |  my
 82 |   | têm        from TER
 83 | numa           |  em + uma
 84 | pelos          |  per + os
 85 | elas           |  they
 86 |   | havia      from HAV
 87 |   | seja       from SER
 88 | qual           |  which
 89 |   | será       from SER
 90 | nós            |  we
 91 |   | tenho      from TER
 92 | lhe            |  to him, her
 93 | deles          |  of them
 94 | essas          |  those
 95 | esses          |  those
 96 | pelas          |  per + as
 97 | este           |  this
 98 |   | fosse      from SER
 99 | dele           |  of him
100 | 
101 |  | other words. There are many contractions such as naquele = em+aquele,
102 |  | mo = me+o, but they are rare.
103 |  | Indefinite article plural forms are also rare.
104 | 
105 | tu             |  thou
106 | te             |  thee
107 | vocês          |  you (plural)
108 | vos            |  you
109 | lhes           |  to them
110 | meus           |  my
111 | minhas
112 | teu            |  thy
113 | tua
114 | teus
115 | tuas
116 | nosso          | our
117 | nossa
118 | nossos
119 | nossas
120 | 
121 | dela           |  of her
122 | delas          |  of them
123 | 
124 | esta           |  this
125 | estes          |  these
126 | estas          |  these
127 | aquele         |  that
128 | aquela         |  that
129 | aqueles        |  those
130 | aquelas        |  those
131 | isto           |  this
132 | aquilo         |  that
133 | 
134 |                | forms of estar, to be (not including the infinitive):
135 | estou
136 | está
137 | estamos
138 | estão
139 | estive
140 | esteve
141 | estivemos
142 | estiveram
143 | estava
144 | estávamos
145 | estavam
146 | estivera
147 | estivéramos
148 | esteja
149 | estejamos
150 | estejam
151 | estivesse
152 | estivéssemos
153 | estivessem
154 | estiver
155 | estivermos
156 | estiverem
157 | 
158 |                | forms of haver, to have (not including the infinitive):
159 | hei
160 | há
161 | havemos
162 | hão
163 | houve
164 | houvemos
165 | houveram
166 | houvera
167 | houvéramos
168 | haja
169 | hajamos
170 | hajam
171 | houvesse
172 | houvéssemos
173 | houvessem
174 | houver
175 | houvermos
176 | houverem
177 | houverei
178 | houverá
179 | houveremos
180 | houverão
181 | houveria
182 | houveríamos
183 | houveriam
184 | 
185 |                | forms of ser, to be (not including the infinitive):
186 | sou
187 | somos
188 | são
189 | era
190 | éramos
191 | eram
192 | fui
193 | foi
194 | fomos
195 | foram
196 | fora
197 | fôramos
198 | seja
199 | sejamos
200 | sejam
201 | fosse
202 | fôssemos
203 | fossem
204 | for
205 | formos
206 | forem
207 | serei
208 | será
209 | seremos
210 | serão
211 | seria
212 | seríamos
213 | seriam
214 | 
215 |                | forms of ter, to have (not including the infinitive):
216 | tenho
217 | tem
218 | temos
219 | tém
220 | tinha
221 | tínhamos
222 | tinham
223 | tive
224 | teve
225 | tivemos
226 | tiveram
227 | tivera
228 | tivéramos
229 | tenha
230 | tenhamos
231 | tenham
232 | tivesse
233 | tivéssemos
234 | tivessem
235 | tiver
236 | tivermos
237 | tiverem
238 | terei
239 | terá
240 | teremos
241 | terão
242 | teria
243 | teríamos
244 | teriam
245 | 


--------------------------------------------------------------------------------
/libstemmer_c/README:
--------------------------------------------------------------------------------
  1 | libstemmer_c
  2 | ============
  3 | 
  4 | This document pertains to the C version of the libstemmer distribution,
  5 | available for download from:
  6 | 
  7 | http://snowball.tartarus.org/dist/libstemmer_c.tgz
  8 | 
  9 | 
 10 | Compiling the library
 11 | =====================
 12 | 
 13 | A simple makefile is provided for Unix style systems.  On such systems, it
 14 | should be possible simply to run "make", and the file "libstemmer.o"
 15 | and the example program "stemwords" will be generated.
 16 | 
 17 | If this doesn't work on your system, you need to write your own build
 18 | system (or call the compiler directly).  The files to compile are
 19 | all contained in the "libstemmer", "runtime" and "src_c" directories,
 20 | and the public header file is contained in the "include" directory.
 21 | 
 22 | The library comes in two flavours; UTF-8 only, and UTF-8 plus other character
 23 | sets.  To use the utf-8 only flavour, compile "libstemmer_utf8.c" instead of
 24 | "libstemmer.c".
 25 | 
 26 | For convenience "mkinc.mak" is a makefile fragment listing the source files and
 27 | header files used to compile the standard version of the library.
 28 | "mkinc_utf8.mak" is a comparable makefile fragment listing just the source
 29 | files for the UTF-8 only version of the library.
 30 | 
 31 | 
 32 | Using the library
 33 | =================
 34 | 
 35 | The library provides a simple C API.  Essentially, a new stemmer can
 36 | be obtained by using "sb_stemmer_new".  "sb_stemmer_stem" is then
 37 | used to stem a word, "sb_stemmer_length" returns the stemmed
 38 | length of the last word processed, and "sb_stemmer_delete" is
 39 | used to delete a stemmer.
 40 | 
 41 | Creating a stemmer is a relatively expensive operation - the expected
 42 | usage pattern is that a new stemmer is created when needed, used
 43 | to stem many words, and deleted after some time.
 44 | 
 45 | Stemmers are re-entrant, but not threadsafe.  In other words, if
 46 | you wish to access the same stemmer object from multiple threads,
 47 | you must ensure that all access is protected by a mutex or similar
 48 | device.
 49 | 
 50 | libstemmer does not currently incorporate any mechanism for caching the results
 51 | of stemming operations.  Such caching can greatly increase the performance of a
 52 | stemmer under certain situations, so suitable patches will be considered for
 53 | inclusion.
 54 | 
 55 | The standard libstemmer sources contain an algorithm for each of the supported
 56 | languages.  The algorithm may be selected using the english name of the
 57 | language, or using the 2 or 3 letter ISO 639 language codes.  In addition,
 58 | the traditional "Porter" stemming algorithm for english is included for
 59 | backwards compatibility purposes, but we recommend use of the "English"
 60 | stemmer in preference for new projects.
 61 | 
 62 | (Some minor algorithms which are included only as curiosities in the snowball
 63 | website, such as the Lovins stemmer and the Kraaij Pohlmann stemmer, are not
 64 | included in the standard libstemmer sources.  These are not really supported by
 65 | the snowball project, but it would be possible to compile a modified libstemmer
 66 | library containing these if desired.)
 67 | 
 68 | 
 69 | The stemwords example
 70 | =====================
 71 | 
 72 | The stemwords example program allows you to run any of the stemmers
 73 | compiled into the libstemmer library on a sample vocabulary.  For
 74 | details on how to use it, run it with the "-h" command line option.
 75 | 
 76 | 
 77 | Using the library in a larger system
 78 | ====================================
 79 | 
 80 | If you are incorporating the library into the build system of a larger
 81 | program, I recommend copying the unpacked tarball without modification into
 82 | a subdirectory of the sources of your program.  Future versions of the
 83 | library are intended to keep the same structure, so this will keep the
 84 | work required to move to a new version of the library to a minimum.
 85 | 
 86 | As an additional convenience, the list of source and header files used
 87 | in the library is detailed in mkinc.mak - a file which is in a suitable
 88 | format for inclusion by a Makefile.  By including this file in your build
 89 | system, you can link the snowball system into your program with a few
 90 | extra rules.
 91 | 
 92 | Using the library in a system using GNU autotools
 93 | =================================================
 94 | 
 95 | The libstemmer_c library can be integrated into a larger system which uses the
 96 | GNU autotool framework (and in particular, automake and autoconf) as follows:
 97 | 
 98 | 1) Unpack libstemmer_c.tgz in the top level project directory so that there is
 99 |    a libstemmer_c subdirectory of the top level directory of the project.
100 | 
101 | 2) Add a file "Makefile.am" to the unpacked libstemmer_c folder, containing:
102 |    
103 | noinst_LTLIBRARIES = libstemmer.la
104 | include $(srcdir)/mkinc.mak
105 | noinst_HEADERS = $(snowball_headers)
106 | libstemmer_la_SOURCES = $(snowball_sources) 
107 | 
108 | (You may also need to add other lines to this, for example, if you are using
109 | compiler options which are not compatible with compiling the libstemmer
110 | library.)
111 | 
112 | 3) Add libstemmer_c to the AC_CONFIG_FILES declaration in the project's
113 |    configure.ac file.
114 | 
115 | 4) Add to the top level makefile the following lines (or modify existing
116 |    assignments to these variables appropriately):
117 | 
118 | AUTOMAKE_OPTIONS = subdir-objects
119 | AM_CPPFLAGS = -I$(top_srcdir)/libstemmer_c/include
120 | SUBDIRS=libstemmer_c
121 | <name>_LIBADD = libstemmer_c/libstemmer.la
122 | 
123 | (Where <name> is the name of the library or executable which links against
124 | libstemmer.) 
125 | 
126 | 


--------------------------------------------------------------------------------
/stopwords/de.txt:
--------------------------------------------------------------------------------
  1 |  | A German stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | The number of forms in this list is reduced significantly by passing it
  5 |  | through the German stemmer.
  6 | 
  7 | 
  8 | aber           |  but
  9 | 
 10 | alle           |  all
 11 | allem
 12 | allen
 13 | aller
 14 | alles
 15 | 
 16 | als            |  than, as
 17 | also           |  so
 18 | am             |  an + dem
 19 | an             |  at
 20 | 
 21 | ander          |  other
 22 | andere
 23 | anderem
 24 | anderen
 25 | anderer
 26 | anderes
 27 | anderm
 28 | andern
 29 | anderr
 30 | anders
 31 | 
 32 | auch           |  also
 33 | auf            |  on
 34 | aus            |  out of
 35 | bei            |  by
 36 | bin            |  am
 37 | bis            |  until
 38 | bist           |  art
 39 | da             |  there
 40 | damit          |  with it
 41 | dann           |  then
 42 | 
 43 | der            |  the
 44 | den
 45 | des
 46 | dem
 47 | die
 48 | das
 49 | 
 50 | daß            |  that
 51 | 
 52 | derselbe       |  the same
 53 | derselben
 54 | denselben
 55 | desselben
 56 | demselben
 57 | dieselbe
 58 | dieselben
 59 | dasselbe
 60 | 
 61 | dazu           |  to that
 62 | 
 63 | dein           |  thy
 64 | deine
 65 | deinem
 66 | deinen
 67 | deiner
 68 | deines
 69 | 
 70 | denn           |  because
 71 | 
 72 | derer          |  of those
 73 | dessen         |  of him
 74 | 
 75 | dich           |  thee
 76 | dir            |  to thee
 77 | du             |  thou
 78 | 
 79 | dies           |  this
 80 | diese
 81 | diesem
 82 | diesen
 83 | dieser
 84 | dieses
 85 | 
 86 | 
 87 | doch           |  (several meanings)
 88 | dort           |  (over) there
 89 | 
 90 | 
 91 | durch          |  through
 92 | 
 93 | ein            |  a
 94 | eine
 95 | einem
 96 | einen
 97 | einer
 98 | eines
 99 | 
100 | einig          |  some
101 | einige
102 | einigem
103 | einigen
104 | einiger
105 | einiges
106 | 
107 | einmal         |  once
108 | 
109 | er             |  he
110 | ihn            |  him
111 | ihm            |  to him
112 | 
113 | es             |  it
114 | etwas          |  something
115 | 
116 | euer           |  your
117 | eure
118 | eurem
119 | euren
120 | eurer
121 | eures
122 | 
123 | für            |  for
124 | gegen          |  towards
125 | gewesen        |  p.p. of sein
126 | hab            |  have
127 | habe           |  have
128 | haben          |  have
129 | hat            |  has
130 | hatte          |  had
131 | hatten         |  had
132 | hier           |  here
133 | hin            |  there
134 | hinter         |  behind
135 | 
136 | ich            |  I
137 | mich           |  me
138 | mir            |  to me
139 | 
140 | 
141 | ihr            |  you, to her
142 | ihre
143 | ihrem
144 | ihren
145 | ihrer
146 | ihres
147 | euch           |  to you
148 | 
149 | im             |  in + dem
150 | in             |  in
151 | indem          |  while
152 | ins            |  in + das
153 | ist            |  is
154 | 
155 | jede           |  each, every
156 | jedem
157 | jeden
158 | jeder
159 | jedes
160 | 
161 | jene           |  that
162 | jenem
163 | jenen
164 | jener
165 | jenes
166 | 
167 | jetzt          |  now
168 | kann           |  can
169 | 
170 | kein           |  no
171 | keine
172 | keinem
173 | keinen
174 | keiner
175 | keines
176 | 
177 | können         |  can
178 | könnte         |  could
179 | machen         |  do
180 | man            |  one
181 | 
182 | manche         |  some, many a
183 | manchem
184 | manchen
185 | mancher
186 | manches
187 | 
188 | mein           |  my
189 | meine
190 | meinem
191 | meinen
192 | meiner
193 | meines
194 | 
195 | mit            |  with
196 | muss           |  must
197 | musste         |  had to
198 | nach           |  to(wards)
199 | nicht          |  not
200 | nichts         |  nothing
201 | noch           |  still, yet
202 | nun            |  now
203 | nur            |  only
204 | ob             |  whether
205 | oder           |  or
206 | ohne           |  without
207 | sehr           |  very
208 | 
209 | sein           |  his
210 | seine
211 | seinem
212 | seinen
213 | seiner
214 | seines
215 | 
216 | selbst         |  self
217 | sich           |  herself
218 | 
219 | sie            |  they, she
220 | ihnen          |  to them
221 | 
222 | sind           |  are
223 | so             |  so
224 | 
225 | solche         |  such
226 | solchem
227 | solchen
228 | solcher
229 | solches
230 | 
231 | soll           |  shall
232 | sollte         |  should
233 | sondern        |  but
234 | sonst          |  else
235 | über           |  over
236 | um             |  about, around
237 | und            |  and
238 | 
239 | uns            |  us
240 | unse
241 | unsem
242 | unsen
243 | unser
244 | unses
245 | 
246 | unter          |  under
247 | viel           |  much
248 | vom            |  von + dem
249 | von            |  from
250 | vor            |  before
251 | während        |  while
252 | war            |  was
253 | waren          |  were
254 | warst          |  wast
255 | was            |  what
256 | weg            |  away, off
257 | weil           |  because
258 | weiter         |  further
259 | 
260 | welche         |  which
261 | welchem
262 | welchen
263 | welcher
264 | welches
265 | 
266 | wenn           |  when
267 | werde          |  will
268 | werden         |  will
269 | wie            |  how
270 | wieder         |  again
271 | will           |  want
272 | wir            |  we
273 | wird           |  will
274 | wirst          |  willst
275 | wo             |  where
276 | wollen         |  want
277 | wollte         |  wanted
278 | würde          |  would
279 | würden         |  would
280 | zu             |  to
281 | zum            |  zu + dem
282 | zur            |  zu + der
283 | zwar           |  indeed
284 | zwischen       |  between
285 | 


--------------------------------------------------------------------------------
/stopwords/it.txt:
--------------------------------------------------------------------------------
  1 |  | An Italian stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 | ad             |  a (to) before vowel
  5 | al             |  a + il
  6 | allo           |  a + lo
  7 | ai             |  a + i
  8 | agli           |  a + gli
  9 | all            |  a + l'
 10 | agl            |  a + gl'
 11 | alla           |  a + la
 12 | alle           |  a + le
 13 | con            |  with
 14 | col            |  con + il
 15 | coi            |  con + i (forms collo, cogli etc are now very rare)
 16 | da             |  from
 17 | dal            |  da + il
 18 | dallo          |  da + lo
 19 | dai            |  da + i
 20 | dagli          |  da + gli
 21 | dall           |  da + l'
 22 | dagl           |  da + gll'
 23 | dalla          |  da + la
 24 | dalle          |  da + le
 25 | di             |  of
 26 | del            |  di + il
 27 | dello          |  di + lo
 28 | dei            |  di + i
 29 | degli          |  di + gli
 30 | dell           |  di + l'
 31 | degl           |  di + gl'
 32 | della          |  di + la
 33 | delle          |  di + le
 34 | in             |  in
 35 | nel            |  in + el
 36 | nello          |  in + lo
 37 | nei            |  in + i
 38 | negli          |  in + gli
 39 | nell           |  in + l'
 40 | negl           |  in + gl'
 41 | nella          |  in + la
 42 | nelle          |  in + le
 43 | su             |  on
 44 | sul            |  su + il
 45 | sullo          |  su + lo
 46 | sui            |  su + i
 47 | sugli          |  su + gli
 48 | sull           |  su + l'
 49 | sugl           |  su + gl'
 50 | sulla          |  su + la
 51 | sulle          |  su + le
 52 | per            |  through, by
 53 | tra            |  among
 54 | contro         |  against
 55 | io             |  I
 56 | tu             |  thou
 57 | lui            |  he
 58 | lei            |  she
 59 | noi            |  we
 60 | voi            |  you
 61 | loro           |  they
 62 | mio            |  my
 63 | mia            |
 64 | miei           |
 65 | mie            |
 66 | tuo            |
 67 | tua            |
 68 | tuoi           |  thy
 69 | tue            |
 70 | suo            |
 71 | sua            |
 72 | suoi           |  his, her
 73 | sue            |
 74 | nostro         |  our
 75 | nostra         |
 76 | nostri         |
 77 | nostre         |
 78 | vostro         |  your
 79 | vostra         |
 80 | vostri         |
 81 | vostre         |
 82 | mi             |  me
 83 | ti             |  thee
 84 | ci             |  us, there
 85 | vi             |  you, there
 86 | lo             |  him, the
 87 | la             |  her, the
 88 | li             |  them
 89 | le             |  them, the
 90 | gli            |  to him, the
 91 | ne             |  from there etc
 92 | il             |  the
 93 | un             |  a
 94 | uno            |  a
 95 | una            |  a
 96 | ma             |  but
 97 | ed             |  and
 98 | se             |  if
 99 | perché         |  why, because
100 | anche          |  also
101 | come           |  how
102 | dov            |  where (as dov')
103 | dove           |  where
104 | che            |  who, that
105 | chi            |  who
106 | cui            |  whom
107 | non            |  not
108 | più            |  more
109 | quale          |  who, that
110 | quanto         |  how much
111 | quanti         |
112 | quanta         |
113 | quante         |
114 | quello         |  that
115 | quelli         |
116 | quella         |
117 | quelle         |
118 | questo         |  this
119 | questi         |
120 | questa         |
121 | queste         |
122 | si             |  yes
123 | tutto          |  all
124 | tutti          |  all
125 | 
126 |                |  single letter forms:
127 | 
128 | a              |  at
129 | c              |  as c' for ce or ci
130 | e              |  and
131 | i              |  the
132 | l              |  as l'
133 | o              |  or
134 | 
135 |                | forms of avere, to have (not including the infinitive):
136 | 
137 | ho
138 | hai
139 | ha
140 | abbiamo
141 | avete
142 | hanno
143 | abbia
144 | abbiate
145 | abbiano
146 | avrò
147 | avrai
148 | avrà
149 | avremo
150 | avrete
151 | avranno
152 | avrei
153 | avresti
154 | avrebbe
155 | avremmo
156 | avreste
157 | avrebbero
158 | avevo
159 | avevi
160 | aveva
161 | avevamo
162 | avevate
163 | avevano
164 | ebbi
165 | avesti
166 | ebbe
167 | avemmo
168 | aveste
169 | ebbero
170 | avessi
171 | avesse
172 | avessimo
173 | avessero
174 | avendo
175 | avuto
176 | avuta
177 | avuti
178 | avute
179 | 
180 |                | forms of essere, to be (not including the infinitive):
181 | sono
182 | sei
183 | è
184 | siamo
185 | siete
186 | sia
187 | siate
188 | siano
189 | sarò
190 | sarai
191 | sarà
192 | saremo
193 | sarete
194 | saranno
195 | sarei
196 | saresti
197 | sarebbe
198 | saremmo
199 | sareste
200 | sarebbero
201 | ero
202 | eri
203 | era
204 | eravamo
205 | eravate
206 | erano
207 | fui
208 | fosti
209 | fu
210 | fummo
211 | foste
212 | furono
213 | fossi
214 | fosse
215 | fossimo
216 | fossero
217 | essendo
218 | 
219 |                | forms of fare, to do (not including the infinitive, fa, fat-):
220 | faccio
221 | fai
222 | facciamo
223 | fanno
224 | faccia
225 | facciate
226 | facciano
227 | farò
228 | farai
229 | farà
230 | faremo
231 | farete
232 | faranno
233 | farei
234 | faresti
235 | farebbe
236 | faremmo
237 | fareste
238 | farebbero
239 | facevo
240 | facevi
241 | faceva
242 | facevamo
243 | facevate
244 | facevano
245 | feci
246 | facesti
247 | fece
248 | facemmo
249 | faceste
250 | fecero
251 | facessi
252 | facesse
253 | facessimo
254 | facessero
255 | facendo
256 | 
257 |                | forms of stare, to be (not including the infinitive):
258 | sto
259 | stai
260 | sta
261 | stiamo
262 | stanno
263 | stia
264 | stiate
265 | stiano
266 | starò
267 | starai
268 | starà
269 | staremo
270 | starete
271 | staranno
272 | starei
273 | staresti
274 | starebbe
275 | staremmo
276 | stareste
277 | starebbero
278 | stavo
279 | stavi
280 | stava
281 | stavamo
282 | stavate
283 | stavano
284 | stetti
285 | stesti
286 | stette
287 | stemmo
288 | steste
289 | stettero
290 | stessi
291 | stesse
292 | stessimo
293 | stessero
294 | stando
295 | 


--------------------------------------------------------------------------------
/stopwords/en.txt:
--------------------------------------------------------------------------------
  1 |  | An English stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | Many of the forms below are quite rare (e.g. "yourselves") but included for
  5 |  |  completeness.
  6 | 
  7 |            | PRONOUNS FORMS
  8 |              | 1st person sing
  9 | 
 10 | i              | subject, always in upper case of course
 11 | 
 12 | me             | object
 13 | my             | possessive adjective
 14 |                | the possessive pronoun `mine' is best suppressed, because of the
 15 |                | sense of coal-mine etc.
 16 | myself         | reflexive
 17 |              | 1st person plural
 18 | we             | subject
 19 | 
 20 | | us           | object
 21 |                | care is required here because US = United States. It is usually
 22 |                | safe to remove it if it is in lower case.
 23 | our            | possessive adjective
 24 | ours           | possessive pronoun
 25 | ourselves      | reflexive
 26 |              | second person (archaic `thou' forms not included)
 27 | you            | subject and object
 28 | your           | possessive adjective
 29 | yours          | possessive pronoun
 30 | yourself       | reflexive (singular)
 31 | yourselves     | reflexive (plural)
 32 |              | third person singular
 33 | he             | subject
 34 | him            | object
 35 | his            | possessive adjective and pronoun
 36 | himself        | reflexive
 37 | 
 38 | she            | subject
 39 | her            | object and possessive adjective
 40 | hers           | possessive pronoun
 41 | herself        | reflexive
 42 | 
 43 | it             | subject and object
 44 | its            | possessive adjective
 45 | itself         | reflexive
 46 |              | third person plural
 47 | they           | subject
 48 | them           | object
 49 | their          | possessive adjective
 50 | theirs         | possessive pronoun
 51 | themselves     | reflexive
 52 |              | other forms (demonstratives, interrogatives)
 53 | what
 54 | which
 55 | who
 56 | whom
 57 | this
 58 | that
 59 | these
 60 | those
 61 | 
 62 |            | VERB FORMS (using F.R. Palmer's nomenclature)
 63 |              | BE
 64 | am             | 1st person, present
 65 | is             | -s form (3rd person, present)
 66 | are            | present
 67 | was            | 1st person, past
 68 | were           | past
 69 | be             | infinitive
 70 | been           | past participle
 71 | being          | -ing form
 72 |              | HAVE
 73 | have           | simple
 74 | has            | -s form
 75 | had            | past
 76 | having         | -ing form
 77 |              | DO
 78 | do             | simple
 79 | does           | -s form
 80 | did            | past
 81 | doing          | -ing form
 82 | 
 83 |  | The forms below are, I believe, best omitted, because of the significant
 84 |  | homonym forms:
 85 | 
 86 |  |  He made a WILL
 87 |  |  old tin CAN
 88 |  |  merry month of MAY
 89 |  |  a smell of MUST
 90 |  |  fight the good fight with all thy MIGHT
 91 | 
 92 |  | would, could, should, ought might however be included
 93 | 
 94 |  |          | AUXILIARIES
 95 |  |            | WILL
 96 |  |will
 97 | 
 98 | would
 99 | 
100 |  |            | SHALL
101 |  |shall
102 | 
103 | should
104 | 
105 |  |            | CAN
106 |  |can
107 | 
108 | could
109 | 
110 |  |            | MAY
111 |  |may
112 |  |might
113 |  |            | MUST
114 |  |must
115 |  |            | OUGHT
116 | 
117 | ought
118 | 
119 |            | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
120 |               | pronoun + verb
121 | 
122 | i'm
123 | you're
124 | he's
125 | she's
126 | it's
127 | we're
128 | they're
129 | i've
130 | you've
131 | we've
132 | they've
133 | i'd
134 | you'd
135 | he'd
136 | she'd
137 | we'd
138 | they'd
139 | i'll
140 | you'll
141 | he'll
142 | she'll
143 | we'll
144 | they'll
145 | 
146 |               | verb + negation
147 | 
148 | isn't
149 | aren't
150 | wasn't
151 | weren't
152 | hasn't
153 | haven't
154 | hadn't
155 | doesn't
156 | don't
157 | didn't
158 | 
159 |               | auxiliary + negation
160 | 
161 | won't
162 | wouldn't
163 | shan't
164 | shouldn't
165 | can't
166 | cannot
167 | couldn't
168 | mustn't
169 | 
170 |              | miscellaneous forms
171 | 
172 | let's
173 | that's
174 | who's
175 | what's
176 | here's
177 | there's
178 | when's
179 | where's
180 | why's
181 | how's
182 | 
183 |               | rarer forms
184 | 
185 |  | daren't needn't
186 | 
187 |               | doubtful forms
188 | 
189 |  | oughtn't mightn't
190 | 
191 |            | ARTICLES
192 | a
193 | an
194 | the
195 | 
196 |            | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
197 |            | high, that classification is pointless.)
198 | and
199 | but
200 | if
201 | or
202 | because
203 | as
204 | until
205 | while
206 | 
207 | of
208 | at
209 | by
210 | for
211 | with
212 | about
213 | against
214 | between
215 | into
216 | through
217 | during
218 | before
219 | after
220 | above
221 | below
222 | to
223 | from
224 | up
225 | down
226 | in
227 | out
228 | on
229 | off
230 | over
231 | under
232 | 
233 | again
234 | further
235 | then
236 | once
237 | 
238 | here
239 | there
240 | when
241 | where
242 | why
243 | how
244 | 
245 | all
246 | any
247 | both
248 | each
249 | few
250 | more
251 | most
252 | other
253 | some
254 | such
255 | 
256 | no
257 | nor
258 | not
259 | only
260 | own
261 | same
262 | so
263 | than
264 | too
265 | very
266 | 
267 |  | Just for the record, the following words are among the commonest in English
268 | 
269 |     | one
270 |     | every
271 |     | least
272 |     | less
273 |     | many
274 |     | now
275 |     | ever
276 |     | never
277 |     | say
278 |     | says
279 |     | said
280 |     | also
281 |     | get
282 |     | go
283 |     | goes
284 |     | just
285 |     | made
286 |     | make
287 |     | put
288 |     | see
289 |     | seen
290 |     | whether
291 |     | like
292 |     | well
293 |     | back
294 |     | even
295 |     | still
296 |     | way
297 |     | take
298 |     | since
299 |     | another
300 |     | however
301 |     | two
302 |     | three
303 |     | four
304 |     | five
305 |     | first
306 |     | second
307 |     | new
308 |     | old
309 |     | high
310 |     | long
311 | 


--------------------------------------------------------------------------------
/libstemmer_c/examples/stemwords.c:
--------------------------------------------------------------------------------
  1 | /* This is a simple program which uses libstemmer to provide a command
  2 |  * line interface for stemming using any of the algorithms provided.
  3 |  */
  4 | 
  5 | #include <stdio.h>
  6 | #include <stdlib.h> /* for malloc, free */
  7 | #include <string.h> /* for memmove */
  8 | #include <ctype.h>  /* for isupper, tolower */
  9 | 
 10 | #include "libstemmer.h"
 11 | 
 12 | const char * progname;
 13 | static int pretty = 1;
 14 | 
 15 | static void
 16 | stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out)
 17 | {
 18 | #define INC 10
 19 |     int lim = INC;
 20 |     sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol));
 21 | 
 22 |     while(1) {
 23 |         int ch = getc(f_in);
 24 |         if (ch == EOF) {
 25 |             free(b); return;
 26 |         }
 27 |         {
 28 |             int i = 0;
 29 | 	    int inlen = 0;
 30 |             while(1) {
 31 |                 if (ch == '\n' || ch == EOF) break;
 32 |                 if (i == lim) {
 33 |                     sb_symbol * newb;
 34 | 		    newb = (sb_symbol *)
 35 | 			    realloc(b, (lim + INC) * sizeof(sb_symbol));
 36 | 		    if (newb == 0) goto error;
 37 | 		    b = newb;
 38 |                     lim = lim + INC;
 39 |                 }
 40 | 		/* Update count of utf-8 characters. */
 41 | 		if (ch < 0x80 || ch > 0xBF) inlen += 1;
 42 |                 /* force lower case: */
 43 |                 if (isupper(ch)) ch = tolower(ch);
 44 | 
 45 |                 b[i] = ch;
 46 | 		i++;
 47 |                 ch = getc(f_in);
 48 |             }
 49 | 
 50 | 	    {
 51 | 		const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i);
 52 |                 if (stemmed == NULL)
 53 |                 {
 54 |                     fprintf(stderr, "Out of memory");
 55 |                     exit(1);
 56 |                 }
 57 |                 else
 58 | 		{
 59 | 		    if (pretty == 1) {
 60 | 			fwrite(b, i, 1, f_out);
 61 | 			fputs(" -> ", f_out);
 62 | 		    } else if (pretty == 2) {
 63 | 			fwrite(b, i, 1, f_out);
 64 | 			if (sb_stemmer_length(stemmer) > 0) {
 65 | 			    int j;
 66 | 			    if (inlen < 30) {
 67 | 				for (j = 30 - inlen; j > 0; j--)
 68 | 				    fputs(" ", f_out);
 69 | 			    } else {
 70 | 				fputs("\n", f_out);
 71 | 				for (j = 30; j > 0; j--)
 72 | 				    fputs(" ", f_out);
 73 | 			    }
 74 | 			}
 75 | 		    }
 76 | 
 77 | 		    fputs((char *)stemmed, f_out);
 78 | 		    putc('\n', f_out);
 79 | 		}
 80 |             }
 81 |         }
 82 |     }
 83 | error:
 84 |     if (b != 0) free(b);
 85 |     return;
 86 | }
 87 | 
 88 | /** Display the command line syntax, and then exit.
 89 |  *  @param n The value to exit with.
 90 |  */
 91 | static void
 92 | usage(int n)
 93 | {
 94 |     printf("usage: %s [-l <language>] [-i <input file>] [-o <output file>] [-c <character encoding>] [-p[2]] [-h]\n"
 95 | 	  "\n"
 96 | 	  "The input file consists of a list of words to be stemmed, one per\n"
 97 | 	  "line. Words should be in lower case, but (for English) A-Z letters\n"
 98 | 	  "are mapped to their a-z equivalents anyway. If omitted, stdin is\n"
 99 | 	  "used.\n"
100 | 	  "\n"
101 | 	  "If -c is given, the argument is the character encoding of the input\n"
102 |           "and output files.  If it is omitted, the UTF-8 encoding is used.\n"
103 | 	  "\n"
104 | 	  "If -p is given the output file consists of each word of the input\n"
105 | 	  "file followed by \"->\" followed by its stemmed equivalent.\n"
106 | 	  "If -p2 is given the output file is a two column layout containing\n"
107 | 	  "the input words in the first column and the stemmed eqivalents in\n"
108 | 	  "the second column.\n"
109 | 	  "Otherwise, the output file consists of the stemmed words, one per\n"
110 | 	  "line.\n"
111 | 	  "\n"
112 | 	  "-h displays this help\n",
113 | 	  progname);
114 |     exit(n);
115 | }
116 | 
117 | int
118 | main(int argc, char * argv[])
119 | {
120 |     char * in = 0;
121 |     char * out = 0;
122 |     FILE * f_in;
123 |     FILE * f_out;
124 |     struct sb_stemmer * stemmer;
125 | 
126 |     char * language = "english";
127 |     char * charenc = NULL;
128 | 
129 |     char * s;
130 |     int i = 1;
131 |     pretty = 0;
132 | 
133 |     progname = argv[0];
134 | 
135 |     while(i < argc) {
136 | 	s = argv[i++];
137 | 	if (s[0] == '-') {
138 | 	    if (strcmp(s, "-o") == 0) {
139 | 		if (i >= argc) {
140 | 		    fprintf(stderr, "%s requires an argument\n", s);
141 | 		    exit(1);
142 | 		}
143 | 		out = argv[i++];
144 | 	    } else if (strcmp(s, "-i") == 0) {
145 | 		if (i >= argc) {
146 | 		    fprintf(stderr, "%s requires an argument\n", s);
147 | 		    exit(1);
148 | 		}
149 | 		in = argv[i++];
150 | 	    } else if (strcmp(s, "-l") == 0) {
151 | 		if (i >= argc) {
152 | 		    fprintf(stderr, "%s requires an argument\n", s);
153 | 		    exit(1);
154 | 		}
155 | 		language = argv[i++];
156 | 	    } else if (strcmp(s, "-c") == 0) {
157 | 		if (i >= argc) {
158 | 		    fprintf(stderr, "%s requires an argument\n", s);
159 | 		    exit(1);
160 | 		}
161 | 		charenc = argv[i++];
162 | 	    } else if (strcmp(s, "-p2") == 0) {
163 | 		pretty = 2;
164 | 	    } else if (strcmp(s, "-p") == 0) {
165 | 		pretty = 1;
166 | 	    } else if (strcmp(s, "-h") == 0) {
167 | 		usage(0);
168 | 	    } else {
169 | 		fprintf(stderr, "option %s unknown\n", s);
170 | 		usage(1);
171 | 	    }
172 | 	} else {
173 | 	    fprintf(stderr, "unexpected parameter %s\n", s);
174 | 	    usage(1);
175 | 	}
176 |     }
177 | 
178 |     /* prepare the files */
179 |     f_in = (in == 0) ? stdin : fopen(in, "r");
180 |     if (f_in == 0) {
181 | 	fprintf(stderr, "file %s not found\n", in);
182 | 	exit(1);
183 |     }
184 |     f_out = (out == 0) ? stdout : fopen(out, "w");
185 |     if (f_out == 0) {
186 | 	fprintf(stderr, "file %s cannot be opened\n", out);
187 | 	exit(1);
188 |     }
189 | 
190 |     /* do the stemming process: */
191 |     stemmer = sb_stemmer_new(language, charenc);
192 |     if (stemmer == 0) {
193 |         if (charenc == NULL) {
194 |             fprintf(stderr, "language `%s' not available for stemming\n", language);
195 |             exit(1);
196 |         } else {
197 |             fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc);
198 |             exit(1);
199 |         }
200 |     }
201 |     stem_file(stemmer, f_in, f_out);
202 |     sb_stemmer_delete(stemmer);
203 | 
204 |     if (in != 0) (void) fclose(f_in);
205 |     if (out != 0) (void) fclose(f_out);
206 | 
207 |     return 0;
208 | }
209 | 
210 | 


--------------------------------------------------------------------------------
/cl-libstemmer.lisp:
--------------------------------------------------------------------------------
  1 | ;;;; cl-libstemmer.lisp
  2 | 
  3 | (in-package #:cl-libstemmer)
  4 | 
  5 | ;;; "cl-libstemmer" goes here. Hacks and glory await!
  6 | 
  7 | (def libstemmer-path
  8 |   (asdf:system-relative-pathname :cl-libstemmer "libstemmer_c/")
  9 |   "Where are the libstemmer sources?")
 10 | 
 11 | (defun wordp (x)
 12 |   "Sanity check for word-ness."
 13 |   (and (stringp x) (< (length x) 500)))
 14 | 
 15 | (deftype word ()
 16 |   "A string that is a word."
 17 |   '(and string (satisfies wordp)))
 18 | 
 19 | (cffi:define-foreign-library libstemmer
 20 |   (t (:default "libstemmer")))
 21 | 
 22 | (defun use-libstemmer ()
 23 |   "Try to load libstemmer; build it if possible."
 24 |   (handler-case
 25 |       (let ((cffi:*foreign-library-directories* (list libstemmer-path)))
 26 |         (cffi:use-foreign-library libstemmer))
 27 |     (error ()
 28 |       (format t "~&Building libstemmer.so...~%")
 29 |       (unless (zerop
 30 |                (uiop:run-program
 31 |                 '("make" "libstemmer.so")
 32 |                 :directory libstemmer-path))
 33 |         (error "Could not build libstemmer.so.")))))
 34 | 
 35 | (use-libstemmer)
 36 | 
 37 | (defparameter *default-encoding* :utf-8
 38 |   "The default encoding for use with stemmers.")
 39 | 
 40 | (def encodings
 41 |   '((:iso-8859-1 . "ISO_8859_1")
 42 |     (:iso-8859-2 . "ISO_8859_2")
 43 |     (:koi8-r . "KOI8_R")
 44 |     (:utf-8 . "UTF_8"))
 45 |   "Map keywords to encoding names libstemmer understands.")
 46 | 
 47 | (defun encoding->string (encoding)
 48 |   "Look up ENCODING in `encodings'."
 49 |   (assocdr encoding encodings))
 50 | 
 51 | (defstruct (%stemmer (:constructor %make-stemmer (pointer &aux (deleted nil))))
 52 |   "Wrap a C stemmer so we can track if it has been closed."
 53 |   (pointer (error "No pointer") :type cffi:foreign-pointer :read-only t)
 54 |   (deleted nil :type boolean))
 55 | 
 56 | (defun %close-stemmer (s)
 57 |   (unless (%stemmer-deleted s)
 58 |     (setf (%stemmer-deleted s) t)
 59 |     (sb_stemmer_delete (%stemmer-pointer s))
 60 |     s))
 61 | 
 62 | (defclass stemmer (synchronized)
 63 |   ((%stemmer :initarg :stemmer :type %stemmer :accessor %stemmer)
 64 |    (language :initarg :language
 65 |              :accessor language-of
 66 |              :accessor stemmer-language)
 67 |    (encoding :initarg :encoding
 68 |              :accessor encoding-of
 69 |              :accessor stemmer-encoding))
 70 |   (:documentation "Lisp wrapper for a C stemmer."))
 71 | 
 72 | (defmethod closed? ((self stemmer))
 73 |   (%stemmer-deleted (%stemmer self)))
 74 | 
 75 | (defmethod print-object ((self stemmer) stream)
 76 |   (print-unreadable-object (self stream :type t)
 77 |     (with-slots (language encoding) self
 78 |       (format t "~a/~a" language encoding))))
 79 | 
 80 | (defcondition no-such-stemmer (error)
 81 |   ((language :initarg :language :accessor no-such-stemmer-language)
 82 |    (encoding :initarg :encoding :accessor no-such-stemmer-encoding))
 83 |   (:documentation "Error for an unsupport language/encoding combination.")
 84 |   (:report (lambda (c s)
 85 |              (with-slots (language encoding) c
 86 |                (format s "No such language/encoding pair: ~a/~a"
 87 |                        language encoding)))))
 88 | 
 89 | (defun check-open (stemmer)
 90 |   "Make sure STEMMER is not closed."
 91 |   (when (closed? stemmer)
 92 |     (error "~a is closed" stemmer)))
 93 | 
 94 | (defmethod initialize-instance :after ((self stemmer) &key language encoding)
 95 |   "Set up the C stemmer."
 96 |   (let* ((enc (encoding->string encoding))
 97 |          (ptr (cffi:with-foreign-strings ((lang language)
 98 |                                           (encoding enc))
 99 |                 (sb_stemmer_new lang encoding))))
100 |     (cond ((cffi:null-pointer-p ptr)
101 |            (error 'no-such-stemmer
102 |                   :language language
103 |                   :encoding encoding))
104 |           (t (let ((stemmer (%make-stemmer ptr)))
105 |                (setf (%stemmer self) stemmer)
106 |                (tg:finalize self
107 |                             (lambda ()
108 |                               (%close-stemmer stemmer))))))))
109 | 
110 | (defun close-stemmer (stemmer)
111 |   "Close STEMMER and free the C-side stemmer."
112 |   (synchronized (stemmer)
113 |     (%close-stemmer (%stemmer stemmer))))
114 | 
115 | (defun load-stemmer (language &optional encoding)
116 |   "Load a stemmer for LANGUAGE and ENCODING (which defaults to
117 | `*default-encoding*`."
118 |   (make 'stemmer
119 |         :language (string-downcase language)
120 |         :encoding (or encoding *default-encoding*)))
121 | 
122 | (defmacro with-stemmer ((var language &optional encoding)
123 |                         &body body)
124 |   (with-thunk (body var)
125 |     `(call/stemmer ,language ,encoding #',body)))
126 | 
127 | (defun call/stemmer (lang enc fn)
128 |   (let ((stemmer (load-stemmer lang enc)))
129 |     (unwind-protect
130 |          (funcall fn stemmer)
131 |       (close-stemmer stemmer))))
132 | 
133 | ;;; NB
134 | ;;; /** Stem a word.
135 | ;;; *
136 | ;;; *  The return value is owned by the stemmer - it must not be freed or
137 | ;;; *  modified, and it will become invalid when the stemmer is called again,
138 | ;;; *  or if the stemmer is freed.
139 | ;;; *
140 | ;;; *  The length of the return value can be obtained using sb_stemmer_length().
141 | ;;; *
142 | ;;; *  If an out-of-memory error occurs, this will return NULL.
143 | ;;; */
144 | 
145 | (defun stem-word/no-lock (stemmer word)
146 |   (typecase word
147 |     (word
148 |      (when (stop-word-p word (stemmer-language stemmer))
149 |        (return-from stem-word/no-lock word))
150 |      (let ((encoding (encoding-of stemmer)))
151 |        (cffi:with-foreign-string (fw word :encoding encoding)
152 |          ;; TODO Octets by encoding.
153 |          (let ((ptr (sb_stemmer_stem (%stemmer-pointer (%stemmer stemmer))
154 |                                      fw
155 |                                      (cffi::foreign-string-length fw :encoding encoding))))
156 |            (cffi:foreign-string-to-lisp ptr :encoding encoding)))))
157 |     (t word)))
158 | 
159 | (defun stem (stemmer word)
160 |   (check-open stemmer)
161 |   (synchronized (stemmer)
162 |     (stem-word/no-lock stemmer word)))
163 | 
164 | (defun stem-all (list language &optional encoding)
165 |   (handler-case
166 |       (values
167 |        (with-stemmer (s language encoding)
168 |          (synchronized (s)
169 |            (loop for item in list
170 |                  collect (stem-word/no-lock s item))))
171 |        t)
172 |     (no-such-stemmer ()
173 |       (values list nil))))
174 | 


--------------------------------------------------------------------------------
/stopwords/es.txt:
--------------------------------------------------------------------------------
  1 |  | A Spanish stop word list. Comments begin with vertical bar. Each stop
  2 |  | word is at the start of a line.
  3 | 
  4 | 
  5 |  | The following is a ranked list (commonest to rarest) of stopwords
  6 |  | deriving from a large sample of text.
  7 | 
  8 |  | Extra words have been added at the end.
  9 | 
 10 | de             |  from, of
 11 | la             |  the, her
 12 | que            |  who, that
 13 | el             |  the
 14 | en             |  in
 15 | y              |  and
 16 | a              |  to
 17 | los            |  the, them
 18 | del            |  de + el
 19 | se             |  himself, from him etc
 20 | las            |  the, them
 21 | por            |  for, by, etc
 22 | un             |  a
 23 | para           |  for
 24 | con            |  with
 25 | no             |  no
 26 | una            |  a
 27 | su             |  his, her
 28 | al             |  a + el
 29 |   | es         from SER
 30 | lo             |  him
 31 | como           |  how
 32 | más            |  more
 33 | pero           |  pero
 34 | sus            |  su plural
 35 | le             |  to him, her
 36 | ya             |  already
 37 | o              |  or
 38 |   | fue        from SER
 39 | este           |  this
 40 |   | ha         from HABER
 41 | sí             |  himself etc
 42 | porque         |  because
 43 | esta           |  this
 44 |   | son        from SER
 45 | entre          |  between
 46 |   | está     from ESTAR
 47 | cuando         |  when
 48 | muy            |  very
 49 | sin            |  without
 50 | sobre          |  on
 51 |   | ser        from SER
 52 |   | tiene      from TENER
 53 | también        |  also
 54 | me             |  me
 55 | hasta          |  until
 56 | hay            |  there is/are
 57 | donde          |  where
 58 |   | han        from HABER
 59 | quien          |  whom, that
 60 |   | están      from ESTAR
 61 |   | estado     from ESTAR
 62 | desde          |  from
 63 | todo           |  all
 64 | nos            |  us
 65 | durante        |  during
 66 |   | estados    from ESTAR
 67 | todos          |  all
 68 | uno            |  a
 69 | les            |  to them
 70 | ni             |  nor
 71 | contra         |  against
 72 | otros          |  other
 73 |   | fueron     from SER
 74 | ese            |  that
 75 | eso            |  that
 76 |   | había      from HABER
 77 | ante           |  before
 78 | ellos          |  they
 79 | e              |  and (variant of y)
 80 | esto           |  this
 81 | mí             |  me
 82 | antes          |  before
 83 | algunos        |  some
 84 | qué            |  what?
 85 | unos           |  a
 86 | yo             |  I
 87 | otro           |  other
 88 | otras          |  other
 89 | otra           |  other
 90 | él             |  he
 91 | tanto          |  so much, many
 92 | esa            |  that
 93 | estos          |  these
 94 | mucho          |  much, many
 95 | quienes        |  who
 96 | nada           |  nothing
 97 | muchos         |  many
 98 | cual           |  who
 99 |   | sea        from SER
100 | poco           |  few
101 | ella           |  she
102 | estar          |  to be
103 |   | haber      from HABER
104 | estas          |  these
105 |   | estaba     from ESTAR
106 |   | estamos    from ESTAR
107 | algunas        |  some
108 | algo           |  something
109 | nosotros       |  we
110 | 
111 |       | other forms
112 | 
113 | mi             |  me
114 | mis            |  mi plural
115 | tú             |  thou
116 | te             |  thee
117 | ti             |  thee
118 | tu             |  thy
119 | tus            |  tu plural
120 | ellas          |  they
121 | nosotras       |  we
122 | vosotros       |  you
123 | vosotras       |  you
124 | os             |  you
125 | mío            |  mine
126 | mía            |
127 | míos           |
128 | mías           |
129 | tuyo           |  thine
130 | tuya           |
131 | tuyos          |
132 | tuyas          |
133 | suyo           |  his, hers, theirs
134 | suya           |
135 | suyos          |
136 | suyas          |
137 | nuestro        |  ours
138 | nuestra        |
139 | nuestros       |
140 | nuestras       |
141 | vuestro        |  yours
142 | vuestra        |
143 | vuestros       |
144 | vuestras       |
145 | esos           |  those
146 | esas           |  those
147 | 
148 |                | forms of estar, to be (not including the infinitive):
149 | estoy
150 | estás
151 | está
152 | estamos
153 | estáis
154 | están
155 | esté
156 | estés
157 | estemos
158 | estéis
159 | estén
160 | estaré
161 | estarás
162 | estará
163 | estaremos
164 | estaréis
165 | estarán
166 | estaría
167 | estarías
168 | estaríamos
169 | estaríais
170 | estarían
171 | estaba
172 | estabas
173 | estábamos
174 | estabais
175 | estaban
176 | estuve
177 | estuviste
178 | estuvo
179 | estuvimos
180 | estuvisteis
181 | estuvieron
182 | estuviera
183 | estuvieras
184 | estuviéramos
185 | estuvierais
186 | estuvieran
187 | estuviese
188 | estuvieses
189 | estuviésemos
190 | estuvieseis
191 | estuviesen
192 | estando
193 | estado
194 | estada
195 | estados
196 | estadas
197 | estad
198 | 
199 |                | forms of haber, to have (not including the infinitive):
200 | he
201 | has
202 | ha
203 | hemos
204 | habéis
205 | han
206 | haya
207 | hayas
208 | hayamos
209 | hayáis
210 | hayan
211 | habré
212 | habrás
213 | habrá
214 | habremos
215 | habréis
216 | habrán
217 | habría
218 | habrías
219 | habríamos
220 | habríais
221 | habrían
222 | había
223 | habías
224 | habíamos
225 | habíais
226 | habían
227 | hube
228 | hubiste
229 | hubo
230 | hubimos
231 | hubisteis
232 | hubieron
233 | hubiera
234 | hubieras
235 | hubiéramos
236 | hubierais
237 | hubieran
238 | hubiese
239 | hubieses
240 | hubiésemos
241 | hubieseis
242 | hubiesen
243 | habiendo
244 | habido
245 | habida
246 | habidos
247 | habidas
248 | 
249 |                | forms of ser, to be (not including the infinitive):
250 | soy
251 | eres
252 | es
253 | somos
254 | sois
255 | son
256 | sea
257 | seas
258 | seamos
259 | seáis
260 | sean
261 | seré
262 | serás
263 | será
264 | seremos
265 | seréis
266 | serán
267 | sería
268 | serías
269 | seríamos
270 | seríais
271 | serían
272 | era
273 | eras
274 | éramos
275 | erais
276 | eran
277 | fui
278 | fuiste
279 | fue
280 | fuimos
281 | fuisteis
282 | fueron
283 | fuera
284 | fueras
285 | fuéramos
286 | fuerais
287 | fueran
288 | fuese
289 | fueses
290 | fuésemos
291 | fueseis
292 | fuesen
293 | siendo
294 | sido
295 |   |  sed also means 'thirst'
296 | 
297 |                | forms of tener, to have (not including the infinitive):
298 | tengo
299 | tienes
300 | tiene
301 | tenemos
302 | tenéis
303 | tienen
304 | tenga
305 | tengas
306 | tengamos
307 | tengáis
308 | tengan
309 | tendré
310 | tendrás
311 | tendrá
312 | tendremos
313 | tendréis
314 | tendrán
315 | tendría
316 | tendrías
317 | tendríamos
318 | tendríais
319 | tendrían
320 | tenía
321 | tenías
322 | teníamos
323 | teníais
324 | tenían
325 | tuve
326 | tuviste
327 | tuvo
328 | tuvimos
329 | tuvisteis
330 | tuvieron
331 | tuviera
332 | tuvieras
333 | tuviéramos
334 | tuvierais
335 | tuvieran
336 | tuviese
337 | tuvieses
338 | tuviésemos
339 | tuvieseis
340 | tuviesen
341 | teniendo
342 | tenido
343 | tenida
344 | tenidos
345 | tenidas
346 | tened
347 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/modules_utf8.h:
--------------------------------------------------------------------------------
  1 | /* libstemmer/modules_utf8.h: List of stemming modules.
  2 |  *
  3 |  * This file is generated by mkmodules.pl from a list of module names.
  4 |  * Do not edit manually.
  5 |  *
  6 |  * Modules included by this file are: danish, dutch, english, finnish, french,
  7 |  * german, hungarian, italian, norwegian, porter, portuguese, romanian,
  8 |  * russian, spanish, swedish, turkish
  9 |  */
 10 | 
 11 | #include "../src_c/stem_UTF_8_danish.h"
 12 | #include "../src_c/stem_UTF_8_dutch.h"
 13 | #include "../src_c/stem_UTF_8_english.h"
 14 | #include "../src_c/stem_UTF_8_finnish.h"
 15 | #include "../src_c/stem_UTF_8_french.h"
 16 | #include "../src_c/stem_UTF_8_german.h"
 17 | #include "../src_c/stem_UTF_8_hungarian.h"
 18 | #include "../src_c/stem_UTF_8_italian.h"
 19 | #include "../src_c/stem_UTF_8_norwegian.h"
 20 | #include "../src_c/stem_UTF_8_porter.h"
 21 | #include "../src_c/stem_UTF_8_portuguese.h"
 22 | #include "../src_c/stem_UTF_8_romanian.h"
 23 | #include "../src_c/stem_UTF_8_russian.h"
 24 | #include "../src_c/stem_UTF_8_spanish.h"
 25 | #include "../src_c/stem_UTF_8_swedish.h"
 26 | #include "../src_c/stem_UTF_8_turkish.h"
 27 | 
 28 | typedef enum {
 29 |   ENC_UNKNOWN=0,
 30 |   ENC_UTF_8
 31 | } stemmer_encoding_t;
 32 | 
 33 | struct stemmer_encoding {
 34 |   const char * name;
 35 |   stemmer_encoding_t enc;
 36 | };
 37 | static struct stemmer_encoding encodings[] = {
 38 |   {"UTF_8", ENC_UTF_8},
 39 |   {0,ENC_UNKNOWN}
 40 | };
 41 | 
 42 | struct stemmer_modules {
 43 |   const char * name;
 44 |   stemmer_encoding_t enc; 
 45 |   struct SN_env * (*create)(void);
 46 |   void (*close)(struct SN_env *);
 47 |   int (*stem)(struct SN_env *);
 48 | };
 49 | static struct stemmer_modules modules[] = {
 50 |   {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
 51 |   {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
 52 |   {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
 53 |   {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
 54 |   {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
 55 |   {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
 56 |   {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
 57 |   {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
 58 |   {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
 59 |   {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
 60 |   {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
 61 |   {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
 62 |   {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
 63 |   {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
 64 |   {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
 65 |   {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
 66 |   {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
 67 |   {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
 68 |   {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
 69 |   {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
 70 |   {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
 71 |   {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
 72 |   {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
 73 |   {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
 74 |   {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
 75 |   {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
 76 |   {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
 77 |   {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
 78 |   {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
 79 |   {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
 80 |   {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
 81 |   {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
 82 |   {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
 83 |   {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
 84 |   {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
 85 |   {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
 86 |   {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
 87 |   {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
 88 |   {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
 89 |   {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
 90 |   {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
 91 |   {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
 92 |   {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
 93 |   {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
 94 |   {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
 95 |   {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
 96 |   {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
 97 |   {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
 98 |   {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
 99 |   {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
100 |   {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
101 |   {0,ENC_UNKNOWN,0,0,0}
102 | };
103 | static const char * algorithm_names[] = {
104 |   "danish", 
105 |   "dutch", 
106 |   "english", 
107 |   "finnish", 
108 |   "french", 
109 |   "german", 
110 |   "hungarian", 
111 |   "italian", 
112 |   "norwegian", 
113 |   "porter", 
114 |   "portuguese", 
115 |   "romanian", 
116 |   "russian", 
117 |   "spanish", 
118 |   "swedish", 
119 |   "turkish", 
120 |   0
121 | };
122 | 


--------------------------------------------------------------------------------
/stopwords/ru.txt:
--------------------------------------------------------------------------------
  1 |  | a russian stop word list. comments begin with vertical bar. each stop
  2 |  | word is at the start of a line.
  3 | 
  4 |  | this is a ranked list (commonest to rarest) of stopwords derived from
  5 |  | a large text sample.
  6 | 
  7 |  | letter `ё' is translated to `е'.
  8 | 
  9 | и              | and
 10 | в              | in/into
 11 | во             | alternative form
 12 | не             | not
 13 | что            | what/that
 14 | он             | he
 15 | на             | on/onto
 16 | я              | i
 17 | с              | from
 18 | со             | alternative form
 19 | как            | how
 20 | а              | milder form of `no' (but)
 21 | то             | conjunction and form of `that'
 22 | все            | all
 23 | она            | she
 24 | так            | so, thus
 25 | его            | him
 26 | но             | but
 27 | да             | yes/and
 28 | ты             | thou
 29 | к              | towards, by
 30 | у              | around, chez
 31 | же             | intensifier particle
 32 | вы             | you
 33 | за             | beyond, behind
 34 | бы             | conditional/subj. particle
 35 | по             | up to, along
 36 | только         | only
 37 | ее             | her
 38 | мне            | to me
 39 | было           | it was
 40 | вот            | here is/are, particle
 41 | от             | away from
 42 | меня           | me
 43 | еще            | still, yet, more
 44 | нет            | no, there isnt/arent
 45 | о              | about
 46 | из             | out of
 47 | ему            | to him
 48 | теперь         | now
 49 | когда          | when
 50 | даже           | even
 51 | ну             | so, well
 52 | вдруг          | suddenly
 53 | ли             | interrogative particle
 54 | если           | if
 55 | уже            | already, but homonym of `narrower'
 56 | или            | or
 57 | ни             | neither
 58 | быть           | to be
 59 | был            | he was
 60 | него           | prepositional form of его
 61 | до             | up to
 62 | вас            | you accusative
 63 | нибудь         | indef. suffix preceded by hyphen
 64 | опять          | again
 65 | уж             | already, but homonym of `adder'
 66 | вам            | to you
 67 | сказал         | he said
 68 | ведь           | particle `after all'
 69 | там            | there
 70 | потом          | then
 71 | себя           | oneself
 72 | ничего         | nothing
 73 | ей             | to her
 74 | может          | usually with `быть' as `maybe'
 75 | они            | they
 76 | тут            | here
 77 | где            | where
 78 | есть           | there is/are
 79 | надо           | got to, must
 80 | ней            | prepositional form of  ей
 81 | для            | for
 82 | мы             | we
 83 | тебя           | thee
 84 | их             | them, their
 85 | чем            | than
 86 | была           | she was
 87 | сам            | self
 88 | чтоб           | in order to
 89 | без            | without
 90 | будто          | as if
 91 | человек        | man, person, one
 92 | чего           | genitive form of `what'
 93 | раз            | once
 94 | тоже           | also
 95 | себе           | to oneself
 96 | под            | beneath
 97 | жизнь          | life
 98 | будет          | will be
 99 | ж              | short form of intensifer particle `же'
100 | тогда          | then
101 | кто            | who
102 | этот           | this
103 | говорил        | was saying
104 | того           | genitive form of `that'
105 | потому         | for that reason
106 | этого          | genitive form of `this'
107 | какой          | which
108 | совсем         | altogether
109 | ним            | prepositional form of `его', `они'
110 | здесь          | here
111 | этом           | prepositional form of `этот'
112 | один           | one
113 | почти          | almost
114 | мой            | my
115 | тем            | instrumental/dative plural of `тот', `то'
116 | чтобы          | full form of `in order that'
117 | нее            | her (acc.)
118 | кажется        | it seems
119 | сейчас         | now
120 | были           | they were
121 | куда           | where to
122 | зачем          | why
123 | сказать        | to say
124 | всех           | all (acc., gen. preposn. plural)
125 | никогда        | never
126 | сегодня        | today
127 | можно          | possible, one can
128 | при            | by
129 | наконец        | finally
130 | два            | two
131 | об             | alternative form of `о', about
132 | другой         | another
133 | хоть           | even
134 | после          | after
135 | над            | above
136 | больше         | more
137 | тот            | that one (masc.)
138 | через          | across, in
139 | эти            | these
140 | нас            | us
141 | про            | about
142 | всего          | in all, only, of all
143 | них            | prepositional form of `они' (they)
144 | какая          | which, feminine
145 | много          | lots
146 | разве          | interrogative particle
147 | сказала        | she said
148 | три            | three
149 | эту            | this, acc. fem. sing.
150 | моя            | my, feminine
151 | впрочем        | moreover, besides
152 | хорошо         | good
153 | свою           | ones own, acc. fem. sing.
154 | этой           | oblique form of `эта', fem. `this'
155 | перед          | in front of
156 | иногда         | sometimes
157 | лучше          | better
158 | чуть           | a little
159 | том            | preposn. form of `that one'
160 | нельзя         | one must not
161 | такой          | such a one
162 | им             | to them
163 | более          | more
164 | всегда         | always
165 | конечно        | of course
166 | всю            | acc. fem. sing of `all'
167 | между          | between
168 | 
169 | 
170 |   | b: some paradigms
171 |   |
172 |   | personal pronouns
173 |   |
174 |   | я  меня  мне  мной  [мною]
175 |   | ты  тебя  тебе  тобой  [тобою]
176 |   | он  его  ему  им  [него, нему, ним]
177 |   | она  ее  эи  ею  [нее, нэи, нею]
178 |   | оно  его  ему  им  [него, нему, ним]
179 |   |
180 |   | мы  нас  нам  нами
181 |   | вы  вас  вам  вами
182 |   | они  их  им  ими  [них, ним, ними]
183 |   |
184 |   |   себя  себе  собой   [собою]
185 |   |
186 |   | demonstrative pronouns: этот (this), тот (that)
187 |   |
188 |   | этот  эта  это  эти
189 |   | этого  эты  это  эти
190 |   | этого  этой  этого  этих
191 |   | этому  этой  этому  этим
192 |   | этим  этой  этим  [этою]  этими
193 |   | этом  этой  этом  этих
194 |   |
195 |   | тот  та  то  те
196 |   | того  ту  то  те
197 |   | того  той  того  тех
198 |   | тому  той  тому  тем
199 |   | тем  той  тем  [тою]  теми
200 |   | том  той  том  тех
201 |   |
202 |   | determinative pronouns
203 |   |
204 |   | (a) весь (all)
205 |   |
206 |   | весь  вся  все  все
207 |   | всего  всю  все  все
208 |   | всего  всей  всего  всех
209 |   | всему  всей  всему  всем
210 |   | всем  всей  всем  [всею]  всеми
211 |   | всем  всей  всем  всех
212 |   |
213 |   | (b) сам (himself etc)
214 |   |
215 |   | сам  сама  само  сами
216 |   | самого саму  само  самих
217 |   | самого самой самого  самих
218 |   | самому самой самому  самим
219 |   | самим  самой  самим  [самою]  самими
220 |   | самом самой самом  самих
221 |   |
222 |   | stems of verbs `to be', `to have', `to do' and modal
223 |   |
224 |   | быть  бы  буд  быв  есть  суть
225 |   | име
226 |   | дел
227 |   | мог   мож  мочь
228 |   | уме
229 |   | хоч  хот
230 |   | долж
231 |   | можн
232 |   | нужн
233 |   | нельзя
234 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
  3 | 
  4 | #include "../runtime/header.h"
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z);
 10 | #ifdef __cplusplus
 11 | }
 12 | #endif
 13 | static int r_other_suffix(struct SN_env * z);
 14 | static int r_consonant_pair(struct SN_env * z);
 15 | static int r_main_suffix(struct SN_env * z);
 16 | static int r_mark_regions(struct SN_env * z);
 17 | #ifdef __cplusplus
 18 | extern "C" {
 19 | #endif
 20 | 
 21 | 
 22 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void);
 23 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z);
 24 | 
 25 | 
 26 | #ifdef __cplusplus
 27 | }
 28 | #endif
 29 | static const symbol s_0_0[1] = { 'a' };
 30 | static const symbol s_0_1[1] = { 'e' };
 31 | static const symbol s_0_2[3] = { 'e', 'd', 'e' };
 32 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' };
 33 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' };
 34 | static const symbol s_0_5[3] = { 'a', 'n', 'e' };
 35 | static const symbol s_0_6[3] = { 'e', 'n', 'e' };
 36 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' };
 37 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' };
 38 | static const symbol s_0_9[2] = { 'e', 'n' };
 39 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' };
 40 | static const symbol s_0_11[2] = { 'a', 'r' };
 41 | static const symbol s_0_12[2] = { 'e', 'r' };
 42 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' };
 43 | static const symbol s_0_14[1] = { 's' };
 44 | static const symbol s_0_15[2] = { 'a', 's' };
 45 | static const symbol s_0_16[2] = { 'e', 's' };
 46 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' };
 47 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' };
 48 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' };
 49 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' };
 50 | static const symbol s_0_21[3] = { 'e', 'n', 's' };
 51 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' };
 52 | static const symbol s_0_23[3] = { 'e', 'r', 's' };
 53 | static const symbol s_0_24[3] = { 'e', 't', 's' };
 54 | static const symbol s_0_25[2] = { 'e', 't' };
 55 | static const symbol s_0_26[3] = { 'h', 'e', 't' };
 56 | static const symbol s_0_27[3] = { 'e', 'r', 't' };
 57 | static const symbol s_0_28[3] = { 'a', 's', 't' };
 58 | 
 59 | static const struct among a_0[29] =
 60 | {
 61 | /*  0 */ { 1, s_0_0, -1, 1, 0},
 62 | /*  1 */ { 1, s_0_1, -1, 1, 0},
 63 | /*  2 */ { 3, s_0_2, 1, 1, 0},
 64 | /*  3 */ { 4, s_0_3, 1, 1, 0},
 65 | /*  4 */ { 4, s_0_4, 1, 1, 0},
 66 | /*  5 */ { 3, s_0_5, 1, 1, 0},
 67 | /*  6 */ { 3, s_0_6, 1, 1, 0},
 68 | /*  7 */ { 6, s_0_7, 6, 1, 0},
 69 | /*  8 */ { 4, s_0_8, 1, 3, 0},
 70 | /*  9 */ { 2, s_0_9, -1, 1, 0},
 71 | /* 10 */ { 5, s_0_10, 9, 1, 0},
 72 | /* 11 */ { 2, s_0_11, -1, 1, 0},
 73 | /* 12 */ { 2, s_0_12, -1, 1, 0},
 74 | /* 13 */ { 5, s_0_13, 12, 1, 0},
 75 | /* 14 */ { 1, s_0_14, -1, 2, 0},
 76 | /* 15 */ { 2, s_0_15, 14, 1, 0},
 77 | /* 16 */ { 2, s_0_16, 14, 1, 0},
 78 | /* 17 */ { 4, s_0_17, 16, 1, 0},
 79 | /* 18 */ { 5, s_0_18, 16, 1, 0},
 80 | /* 19 */ { 4, s_0_19, 16, 1, 0},
 81 | /* 20 */ { 7, s_0_20, 19, 1, 0},
 82 | /* 21 */ { 3, s_0_21, 14, 1, 0},
 83 | /* 22 */ { 6, s_0_22, 21, 1, 0},
 84 | /* 23 */ { 3, s_0_23, 14, 1, 0},
 85 | /* 24 */ { 3, s_0_24, 14, 1, 0},
 86 | /* 25 */ { 2, s_0_25, -1, 1, 0},
 87 | /* 26 */ { 3, s_0_26, 25, 1, 0},
 88 | /* 27 */ { 3, s_0_27, -1, 3, 0},
 89 | /* 28 */ { 3, s_0_28, -1, 1, 0}
 90 | };
 91 | 
 92 | static const symbol s_1_0[2] = { 'd', 't' };
 93 | static const symbol s_1_1[2] = { 'v', 't' };
 94 | 
 95 | static const struct among a_1[2] =
 96 | {
 97 | /*  0 */ { 2, s_1_0, -1, -1, 0},
 98 | /*  1 */ { 2, s_1_1, -1, -1, 0}
 99 | };
100 | 
101 | static const symbol s_2_0[3] = { 'l', 'e', 'g' };
102 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' };
103 | static const symbol s_2_2[2] = { 'i', 'g' };
104 | static const symbol s_2_3[3] = { 'e', 'i', 'g' };
105 | static const symbol s_2_4[3] = { 'l', 'i', 'g' };
106 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' };
107 | static const symbol s_2_6[3] = { 'e', 'l', 's' };
108 | static const symbol s_2_7[3] = { 'l', 'o', 'v' };
109 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' };
110 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' };
111 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' };
112 | 
113 | static const struct among a_2[11] =
114 | {
115 | /*  0 */ { 3, s_2_0, -1, 1, 0},
116 | /*  1 */ { 4, s_2_1, 0, 1, 0},
117 | /*  2 */ { 2, s_2_2, -1, 1, 0},
118 | /*  3 */ { 3, s_2_3, 2, 1, 0},
119 | /*  4 */ { 3, s_2_4, 2, 1, 0},
120 | /*  5 */ { 4, s_2_5, 4, 1, 0},
121 | /*  6 */ { 3, s_2_6, -1, 1, 0},
122 | /*  7 */ { 3, s_2_7, -1, 1, 0},
123 | /*  8 */ { 4, s_2_8, 7, 1, 0},
124 | /*  9 */ { 4, s_2_9, 7, 1, 0},
125 | /* 10 */ { 7, s_2_10, 9, 1, 0}
126 | };
127 | 
128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
129 | 
130 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 };
131 | 
132 | static const symbol s_0[] = { 'k' };
133 | static const symbol s_1[] = { 'e', 'r' };
134 | 
135 | static int r_mark_regions(struct SN_env * z) {
136 |     z->I[0] = z->l;
137 |     {   int c_test = z->c; /* test, line 30 */
138 |         {   int ret = z->c + 3;
139 |             if (0 > ret || ret > z->l) return 0;
140 |             z->c = ret; /* hop, line 30 */
141 |         }
142 |         z->I[1] = z->c; /* setmark x, line 30 */
143 |         z->c = c_test;
144 |     }
145 |     if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */
146 |     {    /* gopast */ /* non v, line 31 */
147 |         int ret = in_grouping(z, g_v, 97, 248, 1);
148 |         if (ret < 0) return 0;
149 |         z->c += ret;
150 |     }
151 |     z->I[0] = z->c; /* setmark p1, line 31 */
152 |      /* try, line 32 */
153 |     if (!(z->I[0] < z->I[1])) goto lab0;
154 |     z->I[0] = z->I[1];
155 | lab0:
156 |     return 1;
157 | }
158 | 
159 | static int r_main_suffix(struct SN_env * z) {
160 |     int among_var;
161 |     {   int mlimit; /* setlimit, line 38 */
162 |         int m1 = z->l - z->c; (void)m1;
163 |         if (z->c < z->I[0]) return 0;
164 |         z->c = z->I[0]; /* tomark, line 38 */
165 |         mlimit = z->lb; z->lb = z->c;
166 |         z->c = z->l - m1;
167 |         z->ket = z->c; /* [, line 38 */
168 |         if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
169 |         among_var = find_among_b(z, a_0, 29); /* substring, line 38 */
170 |         if (!(among_var)) { z->lb = mlimit; return 0; }
171 |         z->bra = z->c; /* ], line 38 */
172 |         z->lb = mlimit;
173 |     }
174 |     switch(among_var) {
175 |         case 0: return 0;
176 |         case 1:
177 |             {   int ret = slice_del(z); /* delete, line 44 */
178 |                 if (ret < 0) return ret;
179 |             }
180 |             break;
181 |         case 2:
182 |             {   int m2 = z->l - z->c; (void)m2; /* or, line 46 */
183 |                 if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1;
184 |                 goto lab0;
185 |             lab1:
186 |                 z->c = z->l - m2;
187 |                 if (!(eq_s_b(z, 1, s_0))) return 0;
188 |                 if (out_grouping_b(z, g_v, 97, 248, 0)) return 0;
189 |             }
190 |         lab0:
191 |             {   int ret = slice_del(z); /* delete, line 46 */
192 |                 if (ret < 0) return ret;
193 |             }
194 |             break;
195 |         case 3:
196 |             {   int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */
197 |                 if (ret < 0) return ret;
198 |             }
199 |             break;
200 |     }
201 |     return 1;
202 | }
203 | 
204 | static int r_consonant_pair(struct SN_env * z) {
205 |     {   int m_test = z->l - z->c; /* test, line 53 */
206 |         {   int mlimit; /* setlimit, line 54 */
207 |             int m1 = z->l - z->c; (void)m1;
208 |             if (z->c < z->I[0]) return 0;
209 |             z->c = z->I[0]; /* tomark, line 54 */
210 |             mlimit = z->lb; z->lb = z->c;
211 |             z->c = z->l - m1;
212 |             z->ket = z->c; /* [, line 54 */
213 |             if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; }
214 |             if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */
215 |             z->bra = z->c; /* ], line 54 */
216 |             z->lb = mlimit;
217 |         }
218 |         z->c = z->l - m_test;
219 |     }
220 |     if (z->c <= z->lb) return 0;
221 |     z->c--; /* next, line 59 */
222 |     z->bra = z->c; /* ], line 59 */
223 |     {   int ret = slice_del(z); /* delete, line 59 */
224 |         if (ret < 0) return ret;
225 |     }
226 |     return 1;
227 | }
228 | 
229 | static int r_other_suffix(struct SN_env * z) {
230 |     int among_var;
231 |     {   int mlimit; /* setlimit, line 63 */
232 |         int m1 = z->l - z->c; (void)m1;
233 |         if (z->c < z->I[0]) return 0;
234 |         z->c = z->I[0]; /* tomark, line 63 */
235 |         mlimit = z->lb; z->lb = z->c;
236 |         z->c = z->l - m1;
237 |         z->ket = z->c; /* [, line 63 */
238 |         if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
239 |         among_var = find_among_b(z, a_2, 11); /* substring, line 63 */
240 |         if (!(among_var)) { z->lb = mlimit; return 0; }
241 |         z->bra = z->c; /* ], line 63 */
242 |         z->lb = mlimit;
243 |     }
244 |     switch(among_var) {
245 |         case 0: return 0;
246 |         case 1:
247 |             {   int ret = slice_del(z); /* delete, line 67 */
248 |                 if (ret < 0) return ret;
249 |             }
250 |             break;
251 |     }
252 |     return 1;
253 | }
254 | 
255 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z) {
256 |     {   int c1 = z->c; /* do, line 74 */
257 |         {   int ret = r_mark_regions(z);
258 |             if (ret == 0) goto lab0; /* call mark_regions, line 74 */
259 |             if (ret < 0) return ret;
260 |         }
261 |     lab0:
262 |         z->c = c1;
263 |     }
264 |     z->lb = z->c; z->c = z->l; /* backwards, line 75 */
265 | 
266 |     {   int m2 = z->l - z->c; (void)m2; /* do, line 76 */
267 |         {   int ret = r_main_suffix(z);
268 |             if (ret == 0) goto lab1; /* call main_suffix, line 76 */
269 |             if (ret < 0) return ret;
270 |         }
271 |     lab1:
272 |         z->c = z->l - m2;
273 |     }
274 |     {   int m3 = z->l - z->c; (void)m3; /* do, line 77 */
275 |         {   int ret = r_consonant_pair(z);
276 |             if (ret == 0) goto lab2; /* call consonant_pair, line 77 */
277 |             if (ret < 0) return ret;
278 |         }
279 |     lab2:
280 |         z->c = z->l - m3;
281 |     }
282 |     {   int m4 = z->l - z->c; (void)m4; /* do, line 78 */
283 |         {   int ret = r_other_suffix(z);
284 |             if (ret == 0) goto lab3; /* call other_suffix, line 78 */
285 |             if (ret < 0) return ret;
286 |         }
287 |     lab3:
288 |         z->c = z->l - m4;
289 |     }
290 |     z->c = z->lb;
291 |     return 1;
292 | }
293 | 
294 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); }
295 | 
296 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }
297 | 
298 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_norwegian.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
  3 | 
  4 | #include "../runtime/header.h"
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | extern int norwegian_UTF_8_stem(struct SN_env * z);
 10 | #ifdef __cplusplus
 11 | }
 12 | #endif
 13 | static int r_other_suffix(struct SN_env * z);
 14 | static int r_consonant_pair(struct SN_env * z);
 15 | static int r_main_suffix(struct SN_env * z);
 16 | static int r_mark_regions(struct SN_env * z);
 17 | #ifdef __cplusplus
 18 | extern "C" {
 19 | #endif
 20 | 
 21 | 
 22 | extern struct SN_env * norwegian_UTF_8_create_env(void);
 23 | extern void norwegian_UTF_8_close_env(struct SN_env * z);
 24 | 
 25 | 
 26 | #ifdef __cplusplus
 27 | }
 28 | #endif
 29 | static const symbol s_0_0[1] = { 'a' };
 30 | static const symbol s_0_1[1] = { 'e' };
 31 | static const symbol s_0_2[3] = { 'e', 'd', 'e' };
 32 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' };
 33 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' };
 34 | static const symbol s_0_5[3] = { 'a', 'n', 'e' };
 35 | static const symbol s_0_6[3] = { 'e', 'n', 'e' };
 36 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' };
 37 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' };
 38 | static const symbol s_0_9[2] = { 'e', 'n' };
 39 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' };
 40 | static const symbol s_0_11[2] = { 'a', 'r' };
 41 | static const symbol s_0_12[2] = { 'e', 'r' };
 42 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' };
 43 | static const symbol s_0_14[1] = { 's' };
 44 | static const symbol s_0_15[2] = { 'a', 's' };
 45 | static const symbol s_0_16[2] = { 'e', 's' };
 46 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' };
 47 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' };
 48 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' };
 49 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' };
 50 | static const symbol s_0_21[3] = { 'e', 'n', 's' };
 51 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' };
 52 | static const symbol s_0_23[3] = { 'e', 'r', 's' };
 53 | static const symbol s_0_24[3] = { 'e', 't', 's' };
 54 | static const symbol s_0_25[2] = { 'e', 't' };
 55 | static const symbol s_0_26[3] = { 'h', 'e', 't' };
 56 | static const symbol s_0_27[3] = { 'e', 'r', 't' };
 57 | static const symbol s_0_28[3] = { 'a', 's', 't' };
 58 | 
 59 | static const struct among a_0[29] =
 60 | {
 61 | /*  0 */ { 1, s_0_0, -1, 1, 0},
 62 | /*  1 */ { 1, s_0_1, -1, 1, 0},
 63 | /*  2 */ { 3, s_0_2, 1, 1, 0},
 64 | /*  3 */ { 4, s_0_3, 1, 1, 0},
 65 | /*  4 */ { 4, s_0_4, 1, 1, 0},
 66 | /*  5 */ { 3, s_0_5, 1, 1, 0},
 67 | /*  6 */ { 3, s_0_6, 1, 1, 0},
 68 | /*  7 */ { 6, s_0_7, 6, 1, 0},
 69 | /*  8 */ { 4, s_0_8, 1, 3, 0},
 70 | /*  9 */ { 2, s_0_9, -1, 1, 0},
 71 | /* 10 */ { 5, s_0_10, 9, 1, 0},
 72 | /* 11 */ { 2, s_0_11, -1, 1, 0},
 73 | /* 12 */ { 2, s_0_12, -1, 1, 0},
 74 | /* 13 */ { 5, s_0_13, 12, 1, 0},
 75 | /* 14 */ { 1, s_0_14, -1, 2, 0},
 76 | /* 15 */ { 2, s_0_15, 14, 1, 0},
 77 | /* 16 */ { 2, s_0_16, 14, 1, 0},
 78 | /* 17 */ { 4, s_0_17, 16, 1, 0},
 79 | /* 18 */ { 5, s_0_18, 16, 1, 0},
 80 | /* 19 */ { 4, s_0_19, 16, 1, 0},
 81 | /* 20 */ { 7, s_0_20, 19, 1, 0},
 82 | /* 21 */ { 3, s_0_21, 14, 1, 0},
 83 | /* 22 */ { 6, s_0_22, 21, 1, 0},
 84 | /* 23 */ { 3, s_0_23, 14, 1, 0},
 85 | /* 24 */ { 3, s_0_24, 14, 1, 0},
 86 | /* 25 */ { 2, s_0_25, -1, 1, 0},
 87 | /* 26 */ { 3, s_0_26, 25, 1, 0},
 88 | /* 27 */ { 3, s_0_27, -1, 3, 0},
 89 | /* 28 */ { 3, s_0_28, -1, 1, 0}
 90 | };
 91 | 
 92 | static const symbol s_1_0[2] = { 'd', 't' };
 93 | static const symbol s_1_1[2] = { 'v', 't' };
 94 | 
 95 | static const struct among a_1[2] =
 96 | {
 97 | /*  0 */ { 2, s_1_0, -1, -1, 0},
 98 | /*  1 */ { 2, s_1_1, -1, -1, 0}
 99 | };
100 | 
101 | static const symbol s_2_0[3] = { 'l', 'e', 'g' };
102 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' };
103 | static const symbol s_2_2[2] = { 'i', 'g' };
104 | static const symbol s_2_3[3] = { 'e', 'i', 'g' };
105 | static const symbol s_2_4[3] = { 'l', 'i', 'g' };
106 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' };
107 | static const symbol s_2_6[3] = { 'e', 'l', 's' };
108 | static const symbol s_2_7[3] = { 'l', 'o', 'v' };
109 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' };
110 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' };
111 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' };
112 | 
113 | static const struct among a_2[11] =
114 | {
115 | /*  0 */ { 3, s_2_0, -1, 1, 0},
116 | /*  1 */ { 4, s_2_1, 0, 1, 0},
117 | /*  2 */ { 2, s_2_2, -1, 1, 0},
118 | /*  3 */ { 3, s_2_3, 2, 1, 0},
119 | /*  4 */ { 3, s_2_4, 2, 1, 0},
120 | /*  5 */ { 4, s_2_5, 4, 1, 0},
121 | /*  6 */ { 3, s_2_6, -1, 1, 0},
122 | /*  7 */ { 3, s_2_7, -1, 1, 0},
123 | /*  8 */ { 4, s_2_8, 7, 1, 0},
124 | /*  9 */ { 4, s_2_9, 7, 1, 0},
125 | /* 10 */ { 7, s_2_10, 9, 1, 0}
126 | };
127 | 
128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
129 | 
130 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 };
131 | 
132 | static const symbol s_0[] = { 'k' };
133 | static const symbol s_1[] = { 'e', 'r' };
134 | 
135 | static int r_mark_regions(struct SN_env * z) {
136 |     z->I[0] = z->l;
137 |     {   int c_test = z->c; /* test, line 30 */
138 |         {   int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
139 |             if (ret < 0) return 0;
140 |             z->c = ret; /* hop, line 30 */
141 |         }
142 |         z->I[1] = z->c; /* setmark x, line 30 */
143 |         z->c = c_test;
144 |     }
145 |     if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */
146 |     {    /* gopast */ /* non v, line 31 */
147 |         int ret = in_grouping_U(z, g_v, 97, 248, 1);
148 |         if (ret < 0) return 0;
149 |         z->c += ret;
150 |     }
151 |     z->I[0] = z->c; /* setmark p1, line 31 */
152 |      /* try, line 32 */
153 |     if (!(z->I[0] < z->I[1])) goto lab0;
154 |     z->I[0] = z->I[1];
155 | lab0:
156 |     return 1;
157 | }
158 | 
159 | static int r_main_suffix(struct SN_env * z) {
160 |     int among_var;
161 |     {   int mlimit; /* setlimit, line 38 */
162 |         int m1 = z->l - z->c; (void)m1;
163 |         if (z->c < z->I[0]) return 0;
164 |         z->c = z->I[0]; /* tomark, line 38 */
165 |         mlimit = z->lb; z->lb = z->c;
166 |         z->c = z->l - m1;
167 |         z->ket = z->c; /* [, line 38 */
168 |         if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
169 |         among_var = find_among_b(z, a_0, 29); /* substring, line 38 */
170 |         if (!(among_var)) { z->lb = mlimit; return 0; }
171 |         z->bra = z->c; /* ], line 38 */
172 |         z->lb = mlimit;
173 |     }
174 |     switch(among_var) {
175 |         case 0: return 0;
176 |         case 1:
177 |             {   int ret = slice_del(z); /* delete, line 44 */
178 |                 if (ret < 0) return ret;
179 |             }
180 |             break;
181 |         case 2:
182 |             {   int m2 = z->l - z->c; (void)m2; /* or, line 46 */
183 |                 if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1;
184 |                 goto lab0;
185 |             lab1:
186 |                 z->c = z->l - m2;
187 |                 if (!(eq_s_b(z, 1, s_0))) return 0;
188 |                 if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0;
189 |             }
190 |         lab0:
191 |             {   int ret = slice_del(z); /* delete, line 46 */
192 |                 if (ret < 0) return ret;
193 |             }
194 |             break;
195 |         case 3:
196 |             {   int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */
197 |                 if (ret < 0) return ret;
198 |             }
199 |             break;
200 |     }
201 |     return 1;
202 | }
203 | 
204 | static int r_consonant_pair(struct SN_env * z) {
205 |     {   int m_test = z->l - z->c; /* test, line 53 */
206 |         {   int mlimit; /* setlimit, line 54 */
207 |             int m1 = z->l - z->c; (void)m1;
208 |             if (z->c < z->I[0]) return 0;
209 |             z->c = z->I[0]; /* tomark, line 54 */
210 |             mlimit = z->lb; z->lb = z->c;
211 |             z->c = z->l - m1;
212 |             z->ket = z->c; /* [, line 54 */
213 |             if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; }
214 |             if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */
215 |             z->bra = z->c; /* ], line 54 */
216 |             z->lb = mlimit;
217 |         }
218 |         z->c = z->l - m_test;
219 |     }
220 |     {   int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
221 |         if (ret < 0) return 0;
222 |         z->c = ret; /* next, line 59 */
223 |     }
224 |     z->bra = z->c; /* ], line 59 */
225 |     {   int ret = slice_del(z); /* delete, line 59 */
226 |         if (ret < 0) return ret;
227 |     }
228 |     return 1;
229 | }
230 | 
231 | static int r_other_suffix(struct SN_env * z) {
232 |     int among_var;
233 |     {   int mlimit; /* setlimit, line 63 */
234 |         int m1 = z->l - z->c; (void)m1;
235 |         if (z->c < z->I[0]) return 0;
236 |         z->c = z->I[0]; /* tomark, line 63 */
237 |         mlimit = z->lb; z->lb = z->c;
238 |         z->c = z->l - m1;
239 |         z->ket = z->c; /* [, line 63 */
240 |         if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
241 |         among_var = find_among_b(z, a_2, 11); /* substring, line 63 */
242 |         if (!(among_var)) { z->lb = mlimit; return 0; }
243 |         z->bra = z->c; /* ], line 63 */
244 |         z->lb = mlimit;
245 |     }
246 |     switch(among_var) {
247 |         case 0: return 0;
248 |         case 1:
249 |             {   int ret = slice_del(z); /* delete, line 67 */
250 |                 if (ret < 0) return ret;
251 |             }
252 |             break;
253 |     }
254 |     return 1;
255 | }
256 | 
257 | extern int norwegian_UTF_8_stem(struct SN_env * z) {
258 |     {   int c1 = z->c; /* do, line 74 */
259 |         {   int ret = r_mark_regions(z);
260 |             if (ret == 0) goto lab0; /* call mark_regions, line 74 */
261 |             if (ret < 0) return ret;
262 |         }
263 |     lab0:
264 |         z->c = c1;
265 |     }
266 |     z->lb = z->c; z->c = z->l; /* backwards, line 75 */
267 | 
268 |     {   int m2 = z->l - z->c; (void)m2; /* do, line 76 */
269 |         {   int ret = r_main_suffix(z);
270 |             if (ret == 0) goto lab1; /* call main_suffix, line 76 */
271 |             if (ret < 0) return ret;
272 |         }
273 |     lab1:
274 |         z->c = z->l - m2;
275 |     }
276 |     {   int m3 = z->l - z->c; (void)m3; /* do, line 77 */
277 |         {   int ret = r_consonant_pair(z);
278 |             if (ret == 0) goto lab2; /* call consonant_pair, line 77 */
279 |             if (ret < 0) return ret;
280 |         }
281 |     lab2:
282 |         z->c = z->l - m3;
283 |     }
284 |     {   int m4 = z->l - z->c; (void)m4; /* do, line 78 */
285 |         {   int ret = r_other_suffix(z);
286 |             if (ret == 0) goto lab3; /* call other_suffix, line 78 */
287 |             if (ret < 0) return ret;
288 |         }
289 |     lab3:
290 |         z->c = z->l - m4;
291 |     }
292 |     z->c = z->lb;
293 |     return 1;
294 | }
295 | 
296 | extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); }
297 | 
298 | extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }
299 | 
300 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
  3 | 
  4 | #include "../runtime/header.h"
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | extern int swedish_ISO_8859_1_stem(struct SN_env * z);
 10 | #ifdef __cplusplus
 11 | }
 12 | #endif
 13 | static int r_other_suffix(struct SN_env * z);
 14 | static int r_consonant_pair(struct SN_env * z);
 15 | static int r_main_suffix(struct SN_env * z);
 16 | static int r_mark_regions(struct SN_env * z);
 17 | #ifdef __cplusplus
 18 | extern "C" {
 19 | #endif
 20 | 
 21 | 
 22 | extern struct SN_env * swedish_ISO_8859_1_create_env(void);
 23 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z);
 24 | 
 25 | 
 26 | #ifdef __cplusplus
 27 | }
 28 | #endif
 29 | static const symbol s_0_0[1] = { 'a' };
 30 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
 31 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
 32 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
 33 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
 34 | static const symbol s_0_5[2] = { 'a', 'd' };
 35 | static const symbol s_0_6[1] = { 'e' };
 36 | static const symbol s_0_7[3] = { 'a', 'd', 'e' };
 37 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
 38 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
 39 | static const symbol s_0_10[3] = { 'a', 'r', 'e' };
 40 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' };
 41 | static const symbol s_0_12[2] = { 'e', 'n' };
 42 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
 43 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
 44 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
 45 | static const symbol s_0_16[3] = { 'e', 'r', 'n' };
 46 | static const symbol s_0_17[2] = { 'a', 'r' };
 47 | static const symbol s_0_18[2] = { 'e', 'r' };
 48 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
 49 | static const symbol s_0_20[2] = { 'o', 'r' };
 50 | static const symbol s_0_21[1] = { 's' };
 51 | static const symbol s_0_22[2] = { 'a', 's' };
 52 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
 53 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
 54 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
 55 | static const symbol s_0_26[2] = { 'e', 's' };
 56 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
 57 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
 58 | static const symbol s_0_29[3] = { 'e', 'n', 's' };
 59 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
 60 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
 61 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
 62 | static const symbol s_0_33[2] = { 'a', 't' };
 63 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
 64 | static const symbol s_0_35[3] = { 'h', 'e', 't' };
 65 | static const symbol s_0_36[3] = { 'a', 's', 't' };
 66 | 
 67 | static const struct among a_0[37] =
 68 | {
 69 | /*  0 */ { 1, s_0_0, -1, 1, 0},
 70 | /*  1 */ { 4, s_0_1, 0, 1, 0},
 71 | /*  2 */ { 4, s_0_2, 0, 1, 0},
 72 | /*  3 */ { 7, s_0_3, 2, 1, 0},
 73 | /*  4 */ { 4, s_0_4, 0, 1, 0},
 74 | /*  5 */ { 2, s_0_5, -1, 1, 0},
 75 | /*  6 */ { 1, s_0_6, -1, 1, 0},
 76 | /*  7 */ { 3, s_0_7, 6, 1, 0},
 77 | /*  8 */ { 4, s_0_8, 6, 1, 0},
 78 | /*  9 */ { 4, s_0_9, 6, 1, 0},
 79 | /* 10 */ { 3, s_0_10, 6, 1, 0},
 80 | /* 11 */ { 4, s_0_11, 6, 1, 0},
 81 | /* 12 */ { 2, s_0_12, -1, 1, 0},
 82 | /* 13 */ { 5, s_0_13, 12, 1, 0},
 83 | /* 14 */ { 4, s_0_14, 12, 1, 0},
 84 | /* 15 */ { 5, s_0_15, 12, 1, 0},
 85 | /* 16 */ { 3, s_0_16, -1, 1, 0},
 86 | /* 17 */ { 2, s_0_17, -1, 1, 0},
 87 | /* 18 */ { 2, s_0_18, -1, 1, 0},
 88 | /* 19 */ { 5, s_0_19, 18, 1, 0},
 89 | /* 20 */ { 2, s_0_20, -1, 1, 0},
 90 | /* 21 */ { 1, s_0_21, -1, 2, 0},
 91 | /* 22 */ { 2, s_0_22, 21, 1, 0},
 92 | /* 23 */ { 5, s_0_23, 22, 1, 0},
 93 | /* 24 */ { 5, s_0_24, 22, 1, 0},
 94 | /* 25 */ { 5, s_0_25, 22, 1, 0},
 95 | /* 26 */ { 2, s_0_26, 21, 1, 0},
 96 | /* 27 */ { 4, s_0_27, 26, 1, 0},
 97 | /* 28 */ { 5, s_0_28, 26, 1, 0},
 98 | /* 29 */ { 3, s_0_29, 21, 1, 0},
 99 | /* 30 */ { 5, s_0_30, 29, 1, 0},
100 | /* 31 */ { 6, s_0_31, 29, 1, 0},
101 | /* 32 */ { 4, s_0_32, 21, 1, 0},
102 | /* 33 */ { 2, s_0_33, -1, 1, 0},
103 | /* 34 */ { 5, s_0_34, -1, 1, 0},
104 | /* 35 */ { 3, s_0_35, -1, 1, 0},
105 | /* 36 */ { 3, s_0_36, -1, 1, 0}
106 | };
107 | 
108 | static const symbol s_1_0[2] = { 'd', 'd' };
109 | static const symbol s_1_1[2] = { 'g', 'd' };
110 | static const symbol s_1_2[2] = { 'n', 'n' };
111 | static const symbol s_1_3[2] = { 'd', 't' };
112 | static const symbol s_1_4[2] = { 'g', 't' };
113 | static const symbol s_1_5[2] = { 'k', 't' };
114 | static const symbol s_1_6[2] = { 't', 't' };
115 | 
116 | static const struct among a_1[7] =
117 | {
118 | /*  0 */ { 2, s_1_0, -1, -1, 0},
119 | /*  1 */ { 2, s_1_1, -1, -1, 0},
120 | /*  2 */ { 2, s_1_2, -1, -1, 0},
121 | /*  3 */ { 2, s_1_3, -1, -1, 0},
122 | /*  4 */ { 2, s_1_4, -1, -1, 0},
123 | /*  5 */ { 2, s_1_5, -1, -1, 0},
124 | /*  6 */ { 2, s_1_6, -1, -1, 0}
125 | };
126 | 
127 | static const symbol s_2_0[2] = { 'i', 'g' };
128 | static const symbol s_2_1[3] = { 'l', 'i', 'g' };
129 | static const symbol s_2_2[3] = { 'e', 'l', 's' };
130 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
131 | static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' };
132 | 
133 | static const struct among a_2[5] =
134 | {
135 | /*  0 */ { 2, s_2_0, -1, 1, 0},
136 | /*  1 */ { 3, s_2_1, 0, 1, 0},
137 | /*  2 */ { 3, s_2_2, -1, 1, 0},
138 | /*  3 */ { 5, s_2_3, -1, 3, 0},
139 | /*  4 */ { 4, s_2_4, -1, 2, 0}
140 | };
141 | 
142 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
143 | 
144 | static const unsigned char g_s_ending[] = { 119, 127, 149 };
145 | 
146 | static const symbol s_0[] = { 'l', 0xF6, 's' };
147 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' };
148 | 
149 | static int r_mark_regions(struct SN_env * z) {
150 |     z->I[0] = z->l;
151 |     {   int c_test = z->c; /* test, line 29 */
152 |         {   int ret = z->c + 3;
153 |             if (0 > ret || ret > z->l) return 0;
154 |             z->c = ret; /* hop, line 29 */
155 |         }
156 |         z->I[1] = z->c; /* setmark x, line 29 */
157 |         z->c = c_test;
158 |     }
159 |     if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */
160 |     {    /* gopast */ /* non v, line 30 */
161 |         int ret = in_grouping(z, g_v, 97, 246, 1);
162 |         if (ret < 0) return 0;
163 |         z->c += ret;
164 |     }
165 |     z->I[0] = z->c; /* setmark p1, line 30 */
166 |      /* try, line 31 */
167 |     if (!(z->I[0] < z->I[1])) goto lab0;
168 |     z->I[0] = z->I[1];
169 | lab0:
170 |     return 1;
171 | }
172 | 
173 | static int r_main_suffix(struct SN_env * z) {
174 |     int among_var;
175 |     {   int mlimit; /* setlimit, line 37 */
176 |         int m1 = z->l - z->c; (void)m1;
177 |         if (z->c < z->I[0]) return 0;
178 |         z->c = z->I[0]; /* tomark, line 37 */
179 |         mlimit = z->lb; z->lb = z->c;
180 |         z->c = z->l - m1;
181 |         z->ket = z->c; /* [, line 37 */
182 |         if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
183 |         among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
184 |         if (!(among_var)) { z->lb = mlimit; return 0; }
185 |         z->bra = z->c; /* ], line 37 */
186 |         z->lb = mlimit;
187 |     }
188 |     switch(among_var) {
189 |         case 0: return 0;
190 |         case 1:
191 |             {   int ret = slice_del(z); /* delete, line 44 */
192 |                 if (ret < 0) return ret;
193 |             }
194 |             break;
195 |         case 2:
196 |             if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0;
197 |             {   int ret = slice_del(z); /* delete, line 46 */
198 |                 if (ret < 0) return ret;
199 |             }
200 |             break;
201 |     }
202 |     return 1;
203 | }
204 | 
205 | static int r_consonant_pair(struct SN_env * z) {
206 |     {   int mlimit; /* setlimit, line 50 */
207 |         int m1 = z->l - z->c; (void)m1;
208 |         if (z->c < z->I[0]) return 0;
209 |         z->c = z->I[0]; /* tomark, line 50 */
210 |         mlimit = z->lb; z->lb = z->c;
211 |         z->c = z->l - m1;
212 |         {   int m2 = z->l - z->c; (void)m2; /* and, line 52 */
213 |             if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
214 |             if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */
215 |             z->c = z->l - m2;
216 |             z->ket = z->c; /* [, line 52 */
217 |             if (z->c <= z->lb) { z->lb = mlimit; return 0; }
218 |             z->c--; /* next, line 52 */
219 |             z->bra = z->c; /* ], line 52 */
220 |             {   int ret = slice_del(z); /* delete, line 52 */
221 |                 if (ret < 0) return ret;
222 |             }
223 |         }
224 |         z->lb = mlimit;
225 |     }
226 |     return 1;
227 | }
228 | 
229 | static int r_other_suffix(struct SN_env * z) {
230 |     int among_var;
231 |     {   int mlimit; /* setlimit, line 55 */
232 |         int m1 = z->l - z->c; (void)m1;
233 |         if (z->c < z->I[0]) return 0;
234 |         z->c = z->I[0]; /* tomark, line 55 */
235 |         mlimit = z->lb; z->lb = z->c;
236 |         z->c = z->l - m1;
237 |         z->ket = z->c; /* [, line 56 */
238 |         if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
239 |         among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
240 |         if (!(among_var)) { z->lb = mlimit; return 0; }
241 |         z->bra = z->c; /* ], line 56 */
242 |         switch(among_var) {
243 |             case 0: { z->lb = mlimit; return 0; }
244 |             case 1:
245 |                 {   int ret = slice_del(z); /* delete, line 57 */
246 |                     if (ret < 0) return ret;
247 |                 }
248 |                 break;
249 |             case 2:
250 |                 {   int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */
251 |                     if (ret < 0) return ret;
252 |                 }
253 |                 break;
254 |             case 3:
255 |                 {   int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
256 |                     if (ret < 0) return ret;
257 |                 }
258 |                 break;
259 |         }
260 |         z->lb = mlimit;
261 |     }
262 |     return 1;
263 | }
264 | 
265 | extern int swedish_ISO_8859_1_stem(struct SN_env * z) {
266 |     {   int c1 = z->c; /* do, line 66 */
267 |         {   int ret = r_mark_regions(z);
268 |             if (ret == 0) goto lab0; /* call mark_regions, line 66 */
269 |             if (ret < 0) return ret;
270 |         }
271 |     lab0:
272 |         z->c = c1;
273 |     }
274 |     z->lb = z->c; z->c = z->l; /* backwards, line 67 */
275 | 
276 |     {   int m2 = z->l - z->c; (void)m2; /* do, line 68 */
277 |         {   int ret = r_main_suffix(z);
278 |             if (ret == 0) goto lab1; /* call main_suffix, line 68 */
279 |             if (ret < 0) return ret;
280 |         }
281 |     lab1:
282 |         z->c = z->l - m2;
283 |     }
284 |     {   int m3 = z->l - z->c; (void)m3; /* do, line 69 */
285 |         {   int ret = r_consonant_pair(z);
286 |             if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
287 |             if (ret < 0) return ret;
288 |         }
289 |     lab2:
290 |         z->c = z->l - m3;
291 |     }
292 |     {   int m4 = z->l - z->c; (void)m4; /* do, line 70 */
293 |         {   int ret = r_other_suffix(z);
294 |             if (ret == 0) goto lab3; /* call other_suffix, line 70 */
295 |             if (ret < 0) return ret;
296 |         }
297 |     lab3:
298 |         z->c = z->l - m4;
299 |     }
300 |     z->c = z->lb;
301 |     return 1;
302 | }
303 | 
304 | extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); }
305 | 
306 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); }
307 | 
308 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_swedish.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
  3 | 
  4 | #include "../runtime/header.h"
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | extern int swedish_UTF_8_stem(struct SN_env * z);
 10 | #ifdef __cplusplus
 11 | }
 12 | #endif
 13 | static int r_other_suffix(struct SN_env * z);
 14 | static int r_consonant_pair(struct SN_env * z);
 15 | static int r_main_suffix(struct SN_env * z);
 16 | static int r_mark_regions(struct SN_env * z);
 17 | #ifdef __cplusplus
 18 | extern "C" {
 19 | #endif
 20 | 
 21 | 
 22 | extern struct SN_env * swedish_UTF_8_create_env(void);
 23 | extern void swedish_UTF_8_close_env(struct SN_env * z);
 24 | 
 25 | 
 26 | #ifdef __cplusplus
 27 | }
 28 | #endif
 29 | static const symbol s_0_0[1] = { 'a' };
 30 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' };
 31 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' };
 32 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' };
 33 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' };
 34 | static const symbol s_0_5[2] = { 'a', 'd' };
 35 | static const symbol s_0_6[1] = { 'e' };
 36 | static const symbol s_0_7[3] = { 'a', 'd', 'e' };
 37 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' };
 38 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' };
 39 | static const symbol s_0_10[3] = { 'a', 'r', 'e' };
 40 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' };
 41 | static const symbol s_0_12[2] = { 'e', 'n' };
 42 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' };
 43 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' };
 44 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' };
 45 | static const symbol s_0_16[3] = { 'e', 'r', 'n' };
 46 | static const symbol s_0_17[2] = { 'a', 'r' };
 47 | static const symbol s_0_18[2] = { 'e', 'r' };
 48 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' };
 49 | static const symbol s_0_20[2] = { 'o', 'r' };
 50 | static const symbol s_0_21[1] = { 's' };
 51 | static const symbol s_0_22[2] = { 'a', 's' };
 52 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' };
 53 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' };
 54 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' };
 55 | static const symbol s_0_26[2] = { 'e', 's' };
 56 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' };
 57 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' };
 58 | static const symbol s_0_29[3] = { 'e', 'n', 's' };
 59 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' };
 60 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' };
 61 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' };
 62 | static const symbol s_0_33[2] = { 'a', 't' };
 63 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' };
 64 | static const symbol s_0_35[3] = { 'h', 'e', 't' };
 65 | static const symbol s_0_36[3] = { 'a', 's', 't' };
 66 | 
 67 | static const struct among a_0[37] =
 68 | {
 69 | /*  0 */ { 1, s_0_0, -1, 1, 0},
 70 | /*  1 */ { 4, s_0_1, 0, 1, 0},
 71 | /*  2 */ { 4, s_0_2, 0, 1, 0},
 72 | /*  3 */ { 7, s_0_3, 2, 1, 0},
 73 | /*  4 */ { 4, s_0_4, 0, 1, 0},
 74 | /*  5 */ { 2, s_0_5, -1, 1, 0},
 75 | /*  6 */ { 1, s_0_6, -1, 1, 0},
 76 | /*  7 */ { 3, s_0_7, 6, 1, 0},
 77 | /*  8 */ { 4, s_0_8, 6, 1, 0},
 78 | /*  9 */ { 4, s_0_9, 6, 1, 0},
 79 | /* 10 */ { 3, s_0_10, 6, 1, 0},
 80 | /* 11 */ { 4, s_0_11, 6, 1, 0},
 81 | /* 12 */ { 2, s_0_12, -1, 1, 0},
 82 | /* 13 */ { 5, s_0_13, 12, 1, 0},
 83 | /* 14 */ { 4, s_0_14, 12, 1, 0},
 84 | /* 15 */ { 5, s_0_15, 12, 1, 0},
 85 | /* 16 */ { 3, s_0_16, -1, 1, 0},
 86 | /* 17 */ { 2, s_0_17, -1, 1, 0},
 87 | /* 18 */ { 2, s_0_18, -1, 1, 0},
 88 | /* 19 */ { 5, s_0_19, 18, 1, 0},
 89 | /* 20 */ { 2, s_0_20, -1, 1, 0},
 90 | /* 21 */ { 1, s_0_21, -1, 2, 0},
 91 | /* 22 */ { 2, s_0_22, 21, 1, 0},
 92 | /* 23 */ { 5, s_0_23, 22, 1, 0},
 93 | /* 24 */ { 5, s_0_24, 22, 1, 0},
 94 | /* 25 */ { 5, s_0_25, 22, 1, 0},
 95 | /* 26 */ { 2, s_0_26, 21, 1, 0},
 96 | /* 27 */ { 4, s_0_27, 26, 1, 0},
 97 | /* 28 */ { 5, s_0_28, 26, 1, 0},
 98 | /* 29 */ { 3, s_0_29, 21, 1, 0},
 99 | /* 30 */ { 5, s_0_30, 29, 1, 0},
100 | /* 31 */ { 6, s_0_31, 29, 1, 0},
101 | /* 32 */ { 4, s_0_32, 21, 1, 0},
102 | /* 33 */ { 2, s_0_33, -1, 1, 0},
103 | /* 34 */ { 5, s_0_34, -1, 1, 0},
104 | /* 35 */ { 3, s_0_35, -1, 1, 0},
105 | /* 36 */ { 3, s_0_36, -1, 1, 0}
106 | };
107 | 
108 | static const symbol s_1_0[2] = { 'd', 'd' };
109 | static const symbol s_1_1[2] = { 'g', 'd' };
110 | static const symbol s_1_2[2] = { 'n', 'n' };
111 | static const symbol s_1_3[2] = { 'd', 't' };
112 | static const symbol s_1_4[2] = { 'g', 't' };
113 | static const symbol s_1_5[2] = { 'k', 't' };
114 | static const symbol s_1_6[2] = { 't', 't' };
115 | 
116 | static const struct among a_1[7] =
117 | {
118 | /*  0 */ { 2, s_1_0, -1, -1, 0},
119 | /*  1 */ { 2, s_1_1, -1, -1, 0},
120 | /*  2 */ { 2, s_1_2, -1, -1, 0},
121 | /*  3 */ { 2, s_1_3, -1, -1, 0},
122 | /*  4 */ { 2, s_1_4, -1, -1, 0},
123 | /*  5 */ { 2, s_1_5, -1, -1, 0},
124 | /*  6 */ { 2, s_1_6, -1, -1, 0}
125 | };
126 | 
127 | static const symbol s_2_0[2] = { 'i', 'g' };
128 | static const symbol s_2_1[3] = { 'l', 'i', 'g' };
129 | static const symbol s_2_2[3] = { 'e', 'l', 's' };
130 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' };
131 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' };
132 | 
133 | static const struct among a_2[5] =
134 | {
135 | /*  0 */ { 2, s_2_0, -1, 1, 0},
136 | /*  1 */ { 3, s_2_1, 0, 1, 0},
137 | /*  2 */ { 3, s_2_2, -1, 1, 0},
138 | /*  3 */ { 5, s_2_3, -1, 3, 0},
139 | /*  4 */ { 5, s_2_4, -1, 2, 0}
140 | };
141 | 
142 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
143 | 
144 | static const unsigned char g_s_ending[] = { 119, 127, 149 };
145 | 
146 | static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' };
147 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' };
148 | 
149 | static int r_mark_regions(struct SN_env * z) {
150 |     z->I[0] = z->l;
151 |     {   int c_test = z->c; /* test, line 29 */
152 |         {   int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
153 |             if (ret < 0) return 0;
154 |             z->c = ret; /* hop, line 29 */
155 |         }
156 |         z->I[1] = z->c; /* setmark x, line 29 */
157 |         z->c = c_test;
158 |     }
159 |     if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */
160 |     {    /* gopast */ /* non v, line 30 */
161 |         int ret = in_grouping_U(z, g_v, 97, 246, 1);
162 |         if (ret < 0) return 0;
163 |         z->c += ret;
164 |     }
165 |     z->I[0] = z->c; /* setmark p1, line 30 */
166 |      /* try, line 31 */
167 |     if (!(z->I[0] < z->I[1])) goto lab0;
168 |     z->I[0] = z->I[1];
169 | lab0:
170 |     return 1;
171 | }
172 | 
173 | static int r_main_suffix(struct SN_env * z) {
174 |     int among_var;
175 |     {   int mlimit; /* setlimit, line 37 */
176 |         int m1 = z->l - z->c; (void)m1;
177 |         if (z->c < z->I[0]) return 0;
178 |         z->c = z->I[0]; /* tomark, line 37 */
179 |         mlimit = z->lb; z->lb = z->c;
180 |         z->c = z->l - m1;
181 |         z->ket = z->c; /* [, line 37 */
182 |         if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
183 |         among_var = find_among_b(z, a_0, 37); /* substring, line 37 */
184 |         if (!(among_var)) { z->lb = mlimit; return 0; }
185 |         z->bra = z->c; /* ], line 37 */
186 |         z->lb = mlimit;
187 |     }
188 |     switch(among_var) {
189 |         case 0: return 0;
190 |         case 1:
191 |             {   int ret = slice_del(z); /* delete, line 44 */
192 |                 if (ret < 0) return ret;
193 |             }
194 |             break;
195 |         case 2:
196 |             if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0;
197 |             {   int ret = slice_del(z); /* delete, line 46 */
198 |                 if (ret < 0) return ret;
199 |             }
200 |             break;
201 |     }
202 |     return 1;
203 | }
204 | 
205 | static int r_consonant_pair(struct SN_env * z) {
206 |     {   int mlimit; /* setlimit, line 50 */
207 |         int m1 = z->l - z->c; (void)m1;
208 |         if (z->c < z->I[0]) return 0;
209 |         z->c = z->I[0]; /* tomark, line 50 */
210 |         mlimit = z->lb; z->lb = z->c;
211 |         z->c = z->l - m1;
212 |         {   int m2 = z->l - z->c; (void)m2; /* and, line 52 */
213 |             if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
214 |             if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */
215 |             z->c = z->l - m2;
216 |             z->ket = z->c; /* [, line 52 */
217 |             {   int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
218 |                 if (ret < 0) { z->lb = mlimit; return 0; }
219 |                 z->c = ret; /* next, line 52 */
220 |             }
221 |             z->bra = z->c; /* ], line 52 */
222 |             {   int ret = slice_del(z); /* delete, line 52 */
223 |                 if (ret < 0) return ret;
224 |             }
225 |         }
226 |         z->lb = mlimit;
227 |     }
228 |     return 1;
229 | }
230 | 
231 | static int r_other_suffix(struct SN_env * z) {
232 |     int among_var;
233 |     {   int mlimit; /* setlimit, line 55 */
234 |         int m1 = z->l - z->c; (void)m1;
235 |         if (z->c < z->I[0]) return 0;
236 |         z->c = z->I[0]; /* tomark, line 55 */
237 |         mlimit = z->lb; z->lb = z->c;
238 |         z->c = z->l - m1;
239 |         z->ket = z->c; /* [, line 56 */
240 |         if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
241 |         among_var = find_among_b(z, a_2, 5); /* substring, line 56 */
242 |         if (!(among_var)) { z->lb = mlimit; return 0; }
243 |         z->bra = z->c; /* ], line 56 */
244 |         switch(among_var) {
245 |             case 0: { z->lb = mlimit; return 0; }
246 |             case 1:
247 |                 {   int ret = slice_del(z); /* delete, line 57 */
248 |                     if (ret < 0) return ret;
249 |                 }
250 |                 break;
251 |             case 2:
252 |                 {   int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */
253 |                     if (ret < 0) return ret;
254 |                 }
255 |                 break;
256 |             case 3:
257 |                 {   int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */
258 |                     if (ret < 0) return ret;
259 |                 }
260 |                 break;
261 |         }
262 |         z->lb = mlimit;
263 |     }
264 |     return 1;
265 | }
266 | 
267 | extern int swedish_UTF_8_stem(struct SN_env * z) {
268 |     {   int c1 = z->c; /* do, line 66 */
269 |         {   int ret = r_mark_regions(z);
270 |             if (ret == 0) goto lab0; /* call mark_regions, line 66 */
271 |             if (ret < 0) return ret;
272 |         }
273 |     lab0:
274 |         z->c = c1;
275 |     }
276 |     z->lb = z->c; z->c = z->l; /* backwards, line 67 */
277 | 
278 |     {   int m2 = z->l - z->c; (void)m2; /* do, line 68 */
279 |         {   int ret = r_main_suffix(z);
280 |             if (ret == 0) goto lab1; /* call main_suffix, line 68 */
281 |             if (ret < 0) return ret;
282 |         }
283 |     lab1:
284 |         z->c = z->l - m2;
285 |     }
286 |     {   int m3 = z->l - z->c; (void)m3; /* do, line 69 */
287 |         {   int ret = r_consonant_pair(z);
288 |             if (ret == 0) goto lab2; /* call consonant_pair, line 69 */
289 |             if (ret < 0) return ret;
290 |         }
291 |     lab2:
292 |         z->c = z->l - m3;
293 |     }
294 |     {   int m4 = z->l - z->c; (void)m4; /* do, line 70 */
295 |         {   int ret = r_other_suffix(z);
296 |             if (ret == 0) goto lab3; /* call other_suffix, line 70 */
297 |             if (ret < 0) return ret;
298 |         }
299 |     lab3:
300 |         z->c = z->l - m4;
301 |     }
302 |     z->c = z->lb;
303 |     return 1;
304 | }
305 | 
306 | extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); }
307 | 
308 | extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); }
309 | 
310 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_ISO_8859_1_danish.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
  3 | 
  4 | #include "../runtime/header.h"
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | extern int danish_ISO_8859_1_stem(struct SN_env * z);
 10 | #ifdef __cplusplus
 11 | }
 12 | #endif
 13 | static int r_undouble(struct SN_env * z);
 14 | static int r_other_suffix(struct SN_env * z);
 15 | static int r_consonant_pair(struct SN_env * z);
 16 | static int r_main_suffix(struct SN_env * z);
 17 | static int r_mark_regions(struct SN_env * z);
 18 | #ifdef __cplusplus
 19 | extern "C" {
 20 | #endif
 21 | 
 22 | 
 23 | extern struct SN_env * danish_ISO_8859_1_create_env(void);
 24 | extern void danish_ISO_8859_1_close_env(struct SN_env * z);
 25 | 
 26 | 
 27 | #ifdef __cplusplus
 28 | }
 29 | #endif
 30 | static const symbol s_0_0[3] = { 'h', 'e', 'd' };
 31 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
 32 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
 33 | static const symbol s_0_3[1] = { 'e' };
 34 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
 35 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
 36 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
 37 | static const symbol s_0_7[3] = { 'e', 'n', 'e' };
 38 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
 39 | static const symbol s_0_9[3] = { 'e', 'r', 'e' };
 40 | static const symbol s_0_10[2] = { 'e', 'n' };
 41 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
 42 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
 43 | static const symbol s_0_13[2] = { 'e', 'r' };
 44 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
 45 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
 46 | static const symbol s_0_16[1] = { 's' };
 47 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
 48 | static const symbol s_0_18[2] = { 'e', 's' };
 49 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
 50 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
 51 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
 52 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
 53 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
 54 | static const symbol s_0_24[3] = { 'e', 'n', 's' };
 55 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
 56 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
 57 | static const symbol s_0_27[3] = { 'e', 'r', 's' };
 58 | static const symbol s_0_28[3] = { 'e', 't', 's' };
 59 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
 60 | static const symbol s_0_30[2] = { 'e', 't' };
 61 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
 62 | 
 63 | static const struct among a_0[32] =
 64 | {
 65 | /*  0 */ { 3, s_0_0, -1, 1, 0},
 66 | /*  1 */ { 5, s_0_1, 0, 1, 0},
 67 | /*  2 */ { 4, s_0_2, -1, 1, 0},
 68 | /*  3 */ { 1, s_0_3, -1, 1, 0},
 69 | /*  4 */ { 5, s_0_4, 3, 1, 0},
 70 | /*  5 */ { 4, s_0_5, 3, 1, 0},
 71 | /*  6 */ { 6, s_0_6, 5, 1, 0},
 72 | /*  7 */ { 3, s_0_7, 3, 1, 0},
 73 | /*  8 */ { 4, s_0_8, 3, 1, 0},
 74 | /*  9 */ { 3, s_0_9, 3, 1, 0},
 75 | /* 10 */ { 2, s_0_10, -1, 1, 0},
 76 | /* 11 */ { 5, s_0_11, 10, 1, 0},
 77 | /* 12 */ { 4, s_0_12, 10, 1, 0},
 78 | /* 13 */ { 2, s_0_13, -1, 1, 0},
 79 | /* 14 */ { 5, s_0_14, 13, 1, 0},
 80 | /* 15 */ { 4, s_0_15, 13, 1, 0},
 81 | /* 16 */ { 1, s_0_16, -1, 2, 0},
 82 | /* 17 */ { 4, s_0_17, 16, 1, 0},
 83 | /* 18 */ { 2, s_0_18, 16, 1, 0},
 84 | /* 19 */ { 5, s_0_19, 18, 1, 0},
 85 | /* 20 */ { 7, s_0_20, 19, 1, 0},
 86 | /* 21 */ { 4, s_0_21, 18, 1, 0},
 87 | /* 22 */ { 5, s_0_22, 18, 1, 0},
 88 | /* 23 */ { 4, s_0_23, 18, 1, 0},
 89 | /* 24 */ { 3, s_0_24, 16, 1, 0},
 90 | /* 25 */ { 6, s_0_25, 24, 1, 0},
 91 | /* 26 */ { 5, s_0_26, 24, 1, 0},
 92 | /* 27 */ { 3, s_0_27, 16, 1, 0},
 93 | /* 28 */ { 3, s_0_28, 16, 1, 0},
 94 | /* 29 */ { 5, s_0_29, 28, 1, 0},
 95 | /* 30 */ { 2, s_0_30, -1, 1, 0},
 96 | /* 31 */ { 4, s_0_31, 30, 1, 0}
 97 | };
 98 | 
 99 | static const symbol s_1_0[2] = { 'g', 'd' };
100 | static const symbol s_1_1[2] = { 'd', 't' };
101 | static const symbol s_1_2[2] = { 'g', 't' };
102 | static const symbol s_1_3[2] = { 'k', 't' };
103 | 
104 | static const struct among a_1[4] =
105 | {
106 | /*  0 */ { 2, s_1_0, -1, -1, 0},
107 | /*  1 */ { 2, s_1_1, -1, -1, 0},
108 | /*  2 */ { 2, s_1_2, -1, -1, 0},
109 | /*  3 */ { 2, s_1_3, -1, -1, 0}
110 | };
111 | 
112 | static const symbol s_2_0[2] = { 'i', 'g' };
113 | static const symbol s_2_1[3] = { 'l', 'i', 'g' };
114 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
115 | static const symbol s_2_3[3] = { 'e', 'l', 's' };
116 | static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' };
117 | 
118 | static const struct among a_2[5] =
119 | {
120 | /*  0 */ { 2, s_2_0, -1, 1, 0},
121 | /*  1 */ { 3, s_2_1, 0, 1, 0},
122 | /*  2 */ { 4, s_2_2, 1, 1, 0},
123 | /*  3 */ { 3, s_2_3, -1, 1, 0},
124 | /*  4 */ { 4, s_2_4, -1, 2, 0}
125 | };
126 | 
127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
128 | 
129 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
130 | 
131 | static const symbol s_0[] = { 's', 't' };
132 | static const symbol s_1[] = { 'i', 'g' };
133 | static const symbol s_2[] = { 'l', 0xF8, 's' };
134 | 
135 | static int r_mark_regions(struct SN_env * z) {
136 |     z->I[0] = z->l;
137 |     {   int c_test = z->c; /* test, line 33 */
138 |         {   int ret = z->c + 3;
139 |             if (0 > ret || ret > z->l) return 0;
140 |             z->c = ret; /* hop, line 33 */
141 |         }
142 |         z->I[1] = z->c; /* setmark x, line 33 */
143 |         z->c = c_test;
144 |     }
145 |     if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
146 |     {    /* gopast */ /* non v, line 34 */
147 |         int ret = in_grouping(z, g_v, 97, 248, 1);
148 |         if (ret < 0) return 0;
149 |         z->c += ret;
150 |     }
151 |     z->I[0] = z->c; /* setmark p1, line 34 */
152 |      /* try, line 35 */
153 |     if (!(z->I[0] < z->I[1])) goto lab0;
154 |     z->I[0] = z->I[1];
155 | lab0:
156 |     return 1;
157 | }
158 | 
159 | static int r_main_suffix(struct SN_env * z) {
160 |     int among_var;
161 |     {   int mlimit; /* setlimit, line 41 */
162 |         int m1 = z->l - z->c; (void)m1;
163 |         if (z->c < z->I[0]) return 0;
164 |         z->c = z->I[0]; /* tomark, line 41 */
165 |         mlimit = z->lb; z->lb = z->c;
166 |         z->c = z->l - m1;
167 |         z->ket = z->c; /* [, line 41 */
168 |         if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
169 |         among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
170 |         if (!(among_var)) { z->lb = mlimit; return 0; }
171 |         z->bra = z->c; /* ], line 41 */
172 |         z->lb = mlimit;
173 |     }
174 |     switch(among_var) {
175 |         case 0: return 0;
176 |         case 1:
177 |             {   int ret = slice_del(z); /* delete, line 48 */
178 |                 if (ret < 0) return ret;
179 |             }
180 |             break;
181 |         case 2:
182 |             if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0;
183 |             {   int ret = slice_del(z); /* delete, line 50 */
184 |                 if (ret < 0) return ret;
185 |             }
186 |             break;
187 |     }
188 |     return 1;
189 | }
190 | 
191 | static int r_consonant_pair(struct SN_env * z) {
192 |     {   int m_test = z->l - z->c; /* test, line 55 */
193 |         {   int mlimit; /* setlimit, line 56 */
194 |             int m1 = z->l - z->c; (void)m1;
195 |             if (z->c < z->I[0]) return 0;
196 |             z->c = z->I[0]; /* tomark, line 56 */
197 |             mlimit = z->lb; z->lb = z->c;
198 |             z->c = z->l - m1;
199 |             z->ket = z->c; /* [, line 56 */
200 |             if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; }
201 |             if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */
202 |             z->bra = z->c; /* ], line 56 */
203 |             z->lb = mlimit;
204 |         }
205 |         z->c = z->l - m_test;
206 |     }
207 |     if (z->c <= z->lb) return 0;
208 |     z->c--; /* next, line 62 */
209 |     z->bra = z->c; /* ], line 62 */
210 |     {   int ret = slice_del(z); /* delete, line 62 */
211 |         if (ret < 0) return ret;
212 |     }
213 |     return 1;
214 | }
215 | 
216 | static int r_other_suffix(struct SN_env * z) {
217 |     int among_var;
218 |     {   int m1 = z->l - z->c; (void)m1; /* do, line 66 */
219 |         z->ket = z->c; /* [, line 66 */
220 |         if (!(eq_s_b(z, 2, s_0))) goto lab0;
221 |         z->bra = z->c; /* ], line 66 */
222 |         if (!(eq_s_b(z, 2, s_1))) goto lab0;
223 |         {   int ret = slice_del(z); /* delete, line 66 */
224 |             if (ret < 0) return ret;
225 |         }
226 |     lab0:
227 |         z->c = z->l - m1;
228 |     }
229 |     {   int mlimit; /* setlimit, line 67 */
230 |         int m2 = z->l - z->c; (void)m2;
231 |         if (z->c < z->I[0]) return 0;
232 |         z->c = z->I[0]; /* tomark, line 67 */
233 |         mlimit = z->lb; z->lb = z->c;
234 |         z->c = z->l - m2;
235 |         z->ket = z->c; /* [, line 67 */
236 |         if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
237 |         among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
238 |         if (!(among_var)) { z->lb = mlimit; return 0; }
239 |         z->bra = z->c; /* ], line 67 */
240 |         z->lb = mlimit;
241 |     }
242 |     switch(among_var) {
243 |         case 0: return 0;
244 |         case 1:
245 |             {   int ret = slice_del(z); /* delete, line 70 */
246 |                 if (ret < 0) return ret;
247 |             }
248 |             {   int m3 = z->l - z->c; (void)m3; /* do, line 70 */
249 |                 {   int ret = r_consonant_pair(z);
250 |                     if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
251 |                     if (ret < 0) return ret;
252 |                 }
253 |             lab1:
254 |                 z->c = z->l - m3;
255 |             }
256 |             break;
257 |         case 2:
258 |             {   int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */
259 |                 if (ret < 0) return ret;
260 |             }
261 |             break;
262 |     }
263 |     return 1;
264 | }
265 | 
266 | static int r_undouble(struct SN_env * z) {
267 |     {   int mlimit; /* setlimit, line 76 */
268 |         int m1 = z->l - z->c; (void)m1;
269 |         if (z->c < z->I[0]) return 0;
270 |         z->c = z->I[0]; /* tomark, line 76 */
271 |         mlimit = z->lb; z->lb = z->c;
272 |         z->c = z->l - m1;
273 |         z->ket = z->c; /* [, line 76 */
274 |         if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; }
275 |         z->bra = z->c; /* ], line 76 */
276 |         z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
277 |         if (z->S[0] == 0) return -1; /* -> ch, line 76 */
278 |         z->lb = mlimit;
279 |     }
280 |     if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
281 |     {   int ret = slice_del(z); /* delete, line 78 */
282 |         if (ret < 0) return ret;
283 |     }
284 |     return 1;
285 | }
286 | 
287 | extern int danish_ISO_8859_1_stem(struct SN_env * z) {
288 |     {   int c1 = z->c; /* do, line 84 */
289 |         {   int ret = r_mark_regions(z);
290 |             if (ret == 0) goto lab0; /* call mark_regions, line 84 */
291 |             if (ret < 0) return ret;
292 |         }
293 |     lab0:
294 |         z->c = c1;
295 |     }
296 |     z->lb = z->c; z->c = z->l; /* backwards, line 85 */
297 | 
298 |     {   int m2 = z->l - z->c; (void)m2; /* do, line 86 */
299 |         {   int ret = r_main_suffix(z);
300 |             if (ret == 0) goto lab1; /* call main_suffix, line 86 */
301 |             if (ret < 0) return ret;
302 |         }
303 |     lab1:
304 |         z->c = z->l - m2;
305 |     }
306 |     {   int m3 = z->l - z->c; (void)m3; /* do, line 87 */
307 |         {   int ret = r_consonant_pair(z);
308 |             if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
309 |             if (ret < 0) return ret;
310 |         }
311 |     lab2:
312 |         z->c = z->l - m3;
313 |     }
314 |     {   int m4 = z->l - z->c; (void)m4; /* do, line 88 */
315 |         {   int ret = r_other_suffix(z);
316 |             if (ret == 0) goto lab3; /* call other_suffix, line 88 */
317 |             if (ret < 0) return ret;
318 |         }
319 |     lab3:
320 |         z->c = z->l - m4;
321 |     }
322 |     {   int m5 = z->l - z->c; (void)m5; /* do, line 89 */
323 |         {   int ret = r_undouble(z);
324 |             if (ret == 0) goto lab4; /* call undouble, line 89 */
325 |             if (ret < 0) return ret;
326 |         }
327 |     lab4:
328 |         z->c = z->l - m5;
329 |     }
330 |     z->c = z->lb;
331 |     return 1;
332 | }
333 | 
334 | extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); }
335 | 
336 | extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); }
337 | 
338 | 


--------------------------------------------------------------------------------
/libstemmer_c/src_c/stem_UTF_8_danish.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /* This file was generated automatically by the Snowball to ANSI C compiler */
  3 | 
  4 | #include "../runtime/header.h"
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | extern int danish_UTF_8_stem(struct SN_env * z);
 10 | #ifdef __cplusplus
 11 | }
 12 | #endif
 13 | static int r_undouble(struct SN_env * z);
 14 | static int r_other_suffix(struct SN_env * z);
 15 | static int r_consonant_pair(struct SN_env * z);
 16 | static int r_main_suffix(struct SN_env * z);
 17 | static int r_mark_regions(struct SN_env * z);
 18 | #ifdef __cplusplus
 19 | extern "C" {
 20 | #endif
 21 | 
 22 | 
 23 | extern struct SN_env * danish_UTF_8_create_env(void);
 24 | extern void danish_UTF_8_close_env(struct SN_env * z);
 25 | 
 26 | 
 27 | #ifdef __cplusplus
 28 | }
 29 | #endif
 30 | static const symbol s_0_0[3] = { 'h', 'e', 'd' };
 31 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' };
 32 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' };
 33 | static const symbol s_0_3[1] = { 'e' };
 34 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' };
 35 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' };
 36 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' };
 37 | static const symbol s_0_7[3] = { 'e', 'n', 'e' };
 38 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' };
 39 | static const symbol s_0_9[3] = { 'e', 'r', 'e' };
 40 | static const symbol s_0_10[2] = { 'e', 'n' };
 41 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' };
 42 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' };
 43 | static const symbol s_0_13[2] = { 'e', 'r' };
 44 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' };
 45 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' };
 46 | static const symbol s_0_16[1] = { 's' };
 47 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' };
 48 | static const symbol s_0_18[2] = { 'e', 's' };
 49 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' };
 50 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' };
 51 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' };
 52 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' };
 53 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' };
 54 | static const symbol s_0_24[3] = { 'e', 'n', 's' };
 55 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' };
 56 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' };
 57 | static const symbol s_0_27[3] = { 'e', 'r', 's' };
 58 | static const symbol s_0_28[3] = { 'e', 't', 's' };
 59 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' };
 60 | static const symbol s_0_30[2] = { 'e', 't' };
 61 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' };
 62 | 
 63 | static const struct among a_0[32] =
 64 | {
 65 | /*  0 */ { 3, s_0_0, -1, 1, 0},
 66 | /*  1 */ { 5, s_0_1, 0, 1, 0},
 67 | /*  2 */ { 4, s_0_2, -1, 1, 0},
 68 | /*  3 */ { 1, s_0_3, -1, 1, 0},
 69 | /*  4 */ { 5, s_0_4, 3, 1, 0},
 70 | /*  5 */ { 4, s_0_5, 3, 1, 0},
 71 | /*  6 */ { 6, s_0_6, 5, 1, 0},
 72 | /*  7 */ { 3, s_0_7, 3, 1, 0},
 73 | /*  8 */ { 4, s_0_8, 3, 1, 0},
 74 | /*  9 */ { 3, s_0_9, 3, 1, 0},
 75 | /* 10 */ { 2, s_0_10, -1, 1, 0},
 76 | /* 11 */ { 5, s_0_11, 10, 1, 0},
 77 | /* 12 */ { 4, s_0_12, 10, 1, 0},
 78 | /* 13 */ { 2, s_0_13, -1, 1, 0},
 79 | /* 14 */ { 5, s_0_14, 13, 1, 0},
 80 | /* 15 */ { 4, s_0_15, 13, 1, 0},
 81 | /* 16 */ { 1, s_0_16, -1, 2, 0},
 82 | /* 17 */ { 4, s_0_17, 16, 1, 0},
 83 | /* 18 */ { 2, s_0_18, 16, 1, 0},
 84 | /* 19 */ { 5, s_0_19, 18, 1, 0},
 85 | /* 20 */ { 7, s_0_20, 19, 1, 0},
 86 | /* 21 */ { 4, s_0_21, 18, 1, 0},
 87 | /* 22 */ { 5, s_0_22, 18, 1, 0},
 88 | /* 23 */ { 4, s_0_23, 18, 1, 0},
 89 | /* 24 */ { 3, s_0_24, 16, 1, 0},
 90 | /* 25 */ { 6, s_0_25, 24, 1, 0},
 91 | /* 26 */ { 5, s_0_26, 24, 1, 0},
 92 | /* 27 */ { 3, s_0_27, 16, 1, 0},
 93 | /* 28 */ { 3, s_0_28, 16, 1, 0},
 94 | /* 29 */ { 5, s_0_29, 28, 1, 0},
 95 | /* 30 */ { 2, s_0_30, -1, 1, 0},
 96 | /* 31 */ { 4, s_0_31, 30, 1, 0}
 97 | };
 98 | 
 99 | static const symbol s_1_0[2] = { 'g', 'd' };
100 | static const symbol s_1_1[2] = { 'd', 't' };
101 | static const symbol s_1_2[2] = { 'g', 't' };
102 | static const symbol s_1_3[2] = { 'k', 't' };
103 | 
104 | static const struct among a_1[4] =
105 | {
106 | /*  0 */ { 2, s_1_0, -1, -1, 0},
107 | /*  1 */ { 2, s_1_1, -1, -1, 0},
108 | /*  2 */ { 2, s_1_2, -1, -1, 0},
109 | /*  3 */ { 2, s_1_3, -1, -1, 0}
110 | };
111 | 
112 | static const symbol s_2_0[2] = { 'i', 'g' };
113 | static const symbol s_2_1[3] = { 'l', 'i', 'g' };
114 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' };
115 | static const symbol s_2_3[3] = { 'e', 'l', 's' };
116 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' };
117 | 
118 | static const struct among a_2[5] =
119 | {
120 | /*  0 */ { 2, s_2_0, -1, 1, 0},
121 | /*  1 */ { 3, s_2_1, 0, 1, 0},
122 | /*  2 */ { 4, s_2_2, 1, 1, 0},
123 | /*  3 */ { 3, s_2_3, -1, 1, 0},
124 | /*  4 */ { 5, s_2_4, -1, 2, 0}
125 | };
126 | 
127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
128 | 
129 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
130 | 
131 | static const symbol s_0[] = { 's', 't' };
132 | static const symbol s_1[] = { 'i', 'g' };
133 | static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' };
134 | 
135 | static int r_mark_regions(struct SN_env * z) {
136 |     z->I[0] = z->l;
137 |     {   int c_test = z->c; /* test, line 33 */
138 |         {   int ret = skip_utf8(z->p, z->c, 0, z->l, + 3);
139 |             if (ret < 0) return 0;
140 |             z->c = ret; /* hop, line 33 */
141 |         }
142 |         z->I[1] = z->c; /* setmark x, line 33 */
143 |         z->c = c_test;
144 |     }
145 |     if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */
146 |     {    /* gopast */ /* non v, line 34 */
147 |         int ret = in_grouping_U(z, g_v, 97, 248, 1);
148 |         if (ret < 0) return 0;
149 |         z->c += ret;
150 |     }
151 |     z->I[0] = z->c; /* setmark p1, line 34 */
152 |      /* try, line 35 */
153 |     if (!(z->I[0] < z->I[1])) goto lab0;
154 |     z->I[0] = z->I[1];
155 | lab0:
156 |     return 1;
157 | }
158 | 
159 | static int r_main_suffix(struct SN_env * z) {
160 |     int among_var;
161 |     {   int mlimit; /* setlimit, line 41 */
162 |         int m1 = z->l - z->c; (void)m1;
163 |         if (z->c < z->I[0]) return 0;
164 |         z->c = z->I[0]; /* tomark, line 41 */
165 |         mlimit = z->lb; z->lb = z->c;
166 |         z->c = z->l - m1;
167 |         z->ket = z->c; /* [, line 41 */
168 |         if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
169 |         among_var = find_among_b(z, a_0, 32); /* substring, line 41 */
170 |         if (!(among_var)) { z->lb = mlimit; return 0; }
171 |         z->bra = z->c; /* ], line 41 */
172 |         z->lb = mlimit;
173 |     }
174 |     switch(among_var) {
175 |         case 0: return 0;
176 |         case 1:
177 |             {   int ret = slice_del(z); /* delete, line 48 */
178 |                 if (ret < 0) return ret;
179 |             }
180 |             break;
181 |         case 2:
182 |             if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0;
183 |             {   int ret = slice_del(z); /* delete, line 50 */
184 |                 if (ret < 0) return ret;
185 |             }
186 |             break;
187 |     }
188 |     return 1;
189 | }
190 | 
191 | static int r_consonant_pair(struct SN_env * z) {
192 |     {   int m_test = z->l - z->c; /* test, line 55 */
193 |         {   int mlimit; /* setlimit, line 56 */
194 |             int m1 = z->l - z->c; (void)m1;
195 |             if (z->c < z->I[0]) return 0;
196 |             z->c = z->I[0]; /* tomark, line 56 */
197 |             mlimit = z->lb; z->lb = z->c;
198 |             z->c = z->l - m1;
199 |             z->ket = z->c; /* [, line 56 */
200 |             if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; }
201 |             if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */
202 |             z->bra = z->c; /* ], line 56 */
203 |             z->lb = mlimit;
204 |         }
205 |         z->c = z->l - m_test;
206 |     }
207 |     {   int ret = skip_utf8(z->p, z->c, z->lb, 0, -1);
208 |         if (ret < 0) return 0;
209 |         z->c = ret; /* next, line 62 */
210 |     }
211 |     z->bra = z->c; /* ], line 62 */
212 |     {   int ret = slice_del(z); /* delete, line 62 */
213 |         if (ret < 0) return ret;
214 |     }
215 |     return 1;
216 | }
217 | 
218 | static int r_other_suffix(struct SN_env * z) {
219 |     int among_var;
220 |     {   int m1 = z->l - z->c; (void)m1; /* do, line 66 */
221 |         z->ket = z->c; /* [, line 66 */
222 |         if (!(eq_s_b(z, 2, s_0))) goto lab0;
223 |         z->bra = z->c; /* ], line 66 */
224 |         if (!(eq_s_b(z, 2, s_1))) goto lab0;
225 |         {   int ret = slice_del(z); /* delete, line 66 */
226 |             if (ret < 0) return ret;
227 |         }
228 |     lab0:
229 |         z->c = z->l - m1;
230 |     }
231 |     {   int mlimit; /* setlimit, line 67 */
232 |         int m2 = z->l - z->c; (void)m2;
233 |         if (z->c < z->I[0]) return 0;
234 |         z->c = z->I[0]; /* tomark, line 67 */
235 |         mlimit = z->lb; z->lb = z->c;
236 |         z->c = z->l - m2;
237 |         z->ket = z->c; /* [, line 67 */
238 |         if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; }
239 |         among_var = find_among_b(z, a_2, 5); /* substring, line 67 */
240 |         if (!(among_var)) { z->lb = mlimit; return 0; }
241 |         z->bra = z->c; /* ], line 67 */
242 |         z->lb = mlimit;
243 |     }
244 |     switch(among_var) {
245 |         case 0: return 0;
246 |         case 1:
247 |             {   int ret = slice_del(z); /* delete, line 70 */
248 |                 if (ret < 0) return ret;
249 |             }
250 |             {   int m3 = z->l - z->c; (void)m3; /* do, line 70 */
251 |                 {   int ret = r_consonant_pair(z);
252 |                     if (ret == 0) goto lab1; /* call consonant_pair, line 70 */
253 |                     if (ret < 0) return ret;
254 |                 }
255 |             lab1:
256 |                 z->c = z->l - m3;
257 |             }
258 |             break;
259 |         case 2:
260 |             {   int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */
261 |                 if (ret < 0) return ret;
262 |             }
263 |             break;
264 |     }
265 |     return 1;
266 | }
267 | 
268 | static int r_undouble(struct SN_env * z) {
269 |     {   int mlimit; /* setlimit, line 76 */
270 |         int m1 = z->l - z->c; (void)m1;
271 |         if (z->c < z->I[0]) return 0;
272 |         z->c = z->I[0]; /* tomark, line 76 */
273 |         mlimit = z->lb; z->lb = z->c;
274 |         z->c = z->l - m1;
275 |         z->ket = z->c; /* [, line 76 */
276 |         if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; }
277 |         z->bra = z->c; /* ], line 76 */
278 |         z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */
279 |         if (z->S[0] == 0) return -1; /* -> ch, line 76 */
280 |         z->lb = mlimit;
281 |     }
282 |     if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */
283 |     {   int ret = slice_del(z); /* delete, line 78 */
284 |         if (ret < 0) return ret;
285 |     }
286 |     return 1;
287 | }
288 | 
289 | extern int danish_UTF_8_stem(struct SN_env * z) {
290 |     {   int c1 = z->c; /* do, line 84 */
291 |         {   int ret = r_mark_regions(z);
292 |             if (ret == 0) goto lab0; /* call mark_regions, line 84 */
293 |             if (ret < 0) return ret;
294 |         }
295 |     lab0:
296 |         z->c = c1;
297 |     }
298 |     z->lb = z->c; z->c = z->l; /* backwards, line 85 */
299 | 
300 |     {   int m2 = z->l - z->c; (void)m2; /* do, line 86 */
301 |         {   int ret = r_main_suffix(z);
302 |             if (ret == 0) goto lab1; /* call main_suffix, line 86 */
303 |             if (ret < 0) return ret;
304 |         }
305 |     lab1:
306 |         z->c = z->l - m2;
307 |     }
308 |     {   int m3 = z->l - z->c; (void)m3; /* do, line 87 */
309 |         {   int ret = r_consonant_pair(z);
310 |             if (ret == 0) goto lab2; /* call consonant_pair, line 87 */
311 |             if (ret < 0) return ret;
312 |         }
313 |     lab2:
314 |         z->c = z->l - m3;
315 |     }
316 |     {   int m4 = z->l - z->c; (void)m4; /* do, line 88 */
317 |         {   int ret = r_other_suffix(z);
318 |             if (ret == 0) goto lab3; /* call other_suffix, line 88 */
319 |             if (ret < 0) return ret;
320 |         }
321 |     lab3:
322 |         z->c = z->l - m4;
323 |     }
324 |     {   int m5 = z->l - z->c; (void)m5; /* do, line 89 */
325 |         {   int ret = r_undouble(z);
326 |             if (ret == 0) goto lab4; /* call undouble, line 89 */
327 |             if (ret < 0) return ret;
328 |         }
329 |     lab4:
330 |         z->c = z->l - m5;
331 |     }
332 |     z->c = z->lb;
333 |     return 1;
334 | }
335 | 
336 | extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); }
337 | 
338 | extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); }
339 | 
340 | 


--------------------------------------------------------------------------------
/libstemmer_c/libstemmer/modules.h:
--------------------------------------------------------------------------------
  1 | /* libstemmer/modules.h: List of stemming modules.
  2 |  *
  3 |  * This file is generated by mkmodules.pl from a list of module names.
  4 |  * Do not edit manually.
  5 |  *
  6 |  * Modules included by this file are: danish, dutch, english, finnish, french,
  7 |  * german, hungarian, italian, norwegian, porter, portuguese, romanian,
  8 |  * russian, spanish, swedish, turkish
  9 |  */
 10 | 
 11 | #include "../src_c/stem_ISO_8859_1_danish.h"
 12 | #include "../src_c/stem_UTF_8_danish.h"
 13 | #include "../src_c/stem_ISO_8859_1_dutch.h"
 14 | #include "../src_c/stem_UTF_8_dutch.h"
 15 | #include "../src_c/stem_ISO_8859_1_english.h"
 16 | #include "../src_c/stem_UTF_8_english.h"
 17 | #include "../src_c/stem_ISO_8859_1_finnish.h"
 18 | #include "../src_c/stem_UTF_8_finnish.h"
 19 | #include "../src_c/stem_ISO_8859_1_french.h"
 20 | #include "../src_c/stem_UTF_8_french.h"
 21 | #include "../src_c/stem_ISO_8859_1_german.h"
 22 | #include "../src_c/stem_UTF_8_german.h"
 23 | #include "../src_c/stem_ISO_8859_1_hungarian.h"
 24 | #include "../src_c/stem_UTF_8_hungarian.h"
 25 | #include "../src_c/stem_ISO_8859_1_italian.h"
 26 | #include "../src_c/stem_UTF_8_italian.h"
 27 | #include "../src_c/stem_ISO_8859_1_norwegian.h"
 28 | #include "../src_c/stem_UTF_8_norwegian.h"
 29 | #include "../src_c/stem_ISO_8859_1_porter.h"
 30 | #include "../src_c/stem_UTF_8_porter.h"
 31 | #include "../src_c/stem_ISO_8859_1_portuguese.h"
 32 | #include "../src_c/stem_UTF_8_portuguese.h"
 33 | #include "../src_c/stem_ISO_8859_2_romanian.h"
 34 | #include "../src_c/stem_UTF_8_romanian.h"
 35 | #include "../src_c/stem_KOI8_R_russian.h"
 36 | #include "../src_c/stem_UTF_8_russian.h"
 37 | #include "../src_c/stem_ISO_8859_1_spanish.h"
 38 | #include "../src_c/stem_UTF_8_spanish.h"
 39 | #include "../src_c/stem_ISO_8859_1_swedish.h"
 40 | #include "../src_c/stem_UTF_8_swedish.h"
 41 | #include "../src_c/stem_UTF_8_turkish.h"
 42 | 
 43 | typedef enum {
 44 |   ENC_UNKNOWN=0,
 45 |   ENC_ISO_8859_1,
 46 |   ENC_ISO_8859_2,
 47 |   ENC_KOI8_R,
 48 |   ENC_UTF_8
 49 | } stemmer_encoding_t;
 50 | 
 51 | struct stemmer_encoding {
 52 |   const char * name;
 53 |   stemmer_encoding_t enc;
 54 | };
 55 | static struct stemmer_encoding encodings[] = {
 56 |   {"ISO_8859_1", ENC_ISO_8859_1},
 57 |   {"ISO_8859_2", ENC_ISO_8859_2},
 58 |   {"KOI8_R", ENC_KOI8_R},
 59 |   {"UTF_8", ENC_UTF_8},
 60 |   {0,ENC_UNKNOWN}
 61 | };
 62 | 
 63 | struct stemmer_modules {
 64 |   const char * name;
 65 |   stemmer_encoding_t enc; 
 66 |   struct SN_env * (*create)(void);
 67 |   void (*close)(struct SN_env *);
 68 |   int (*stem)(struct SN_env *);
 69 | };
 70 | static struct stemmer_modules modules[] = {
 71 |   {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
 72 |   {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
 73 |   {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
 74 |   {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
 75 |   {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem},
 76 |   {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
 77 |   {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
 78 |   {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
 79 |   {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
 80 |   {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
 81 |   {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
 82 |   {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
 83 |   {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
 84 |   {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
 85 |   {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
 86 |   {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
 87 |   {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
 88 |   {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
 89 |   {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem},
 90 |   {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem},
 91 |   {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
 92 |   {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
 93 |   {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
 94 |   {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
 95 |   {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
 96 |   {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
 97 |   {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
 98 |   {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
 99 |   {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem},
100 |   {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem},
101 |   {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
102 |   {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
103 |   {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
104 |   {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
105 |   {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
106 |   {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
107 |   {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem},
108 |   {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem},
109 |   {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
110 |   {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
111 |   {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem},
112 |   {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem},
113 |   {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
114 |   {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
115 |   {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
116 |   {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
117 |   {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem},
118 |   {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem},
119 |   {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
120 |   {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
121 |   {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
122 |   {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
123 |   {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem},
124 |   {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem},
125 |   {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
126 |   {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
127 |   {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem},
128 |   {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem},
129 |   {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
130 |   {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
131 |   {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
132 |   {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
133 |   {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem},
134 |   {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem},
135 |   {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
136 |   {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
137 |   {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem},
138 |   {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem},
139 |   {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
140 |   {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
141 |   {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem},
142 |   {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem},
143 |   {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
144 |   {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
145 |   {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
146 |   {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
147 |   {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
148 |   {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
149 |   {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
150 |   {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
151 |   {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem},
152 |   {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem},
153 |   {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
154 |   {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
155 |   {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem},
156 |   {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem},
157 |   {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
158 |   {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
159 |   {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem},
160 |   {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem},
161 |   {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
162 |   {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
163 |   {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
164 |   {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
165 |   {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem},
166 |   {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem},
167 |   {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
168 |   {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
169 |   {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem},
170 |   {0,ENC_UNKNOWN,0,0,0}
171 | };
172 | static const char * algorithm_names[] = {
173 |   "danish", 
174 |   "dutch", 
175 |   "english", 
176 |   "finnish", 
177 |   "french", 
178 |   "german", 
179 |   "hungarian", 
180 |   "italian", 
181 |   "norwegian", 
182 |   "porter", 
183 |   "portuguese", 
184 |   "romanian", 
185 |   "russian", 
186 |   "spanish", 
187 |   "swedish", 
188 |   "turkish", 
189 |   0
190 | };
191 | 


--------------------------------------------------------------------------------