├── .gitignore ├── .travis.yml ├── Gemfile ├── Gemfile.lock ├── MIT-LICENSE ├── README.rdoc ├── Rakefile ├── ext └── lingua │ ├── extconf.rb │ └── stemmer.c ├── lib └── lingua │ ├── stemmer.rb │ └── version.rb ├── libstemmer_c ├── MANIFEST ├── Makefile ├── Makefile.windows ├── README ├── examples │ └── stemwords.c ├── include │ └── libstemmer.h ├── libstemmer │ ├── libstemmer.c │ ├── libstemmer_utf8.c │ ├── modules.h │ ├── modules.txt │ ├── modules_utf8.h │ └── modules_utf8.txt ├── mkinc.mak ├── mkinc_utf8.mak ├── runtime │ ├── api.c │ ├── api.h │ ├── header.h │ └── utilities.c └── src_c │ ├── stem_ISO_8859_1_danish.c │ ├── stem_ISO_8859_1_danish.h │ ├── stem_ISO_8859_1_dutch.c │ ├── stem_ISO_8859_1_dutch.h │ ├── stem_ISO_8859_1_english.c │ ├── stem_ISO_8859_1_english.h │ ├── stem_ISO_8859_1_finnish.c │ ├── stem_ISO_8859_1_finnish.h │ ├── stem_ISO_8859_1_french.c │ ├── stem_ISO_8859_1_french.h │ ├── stem_ISO_8859_1_german.c │ ├── stem_ISO_8859_1_german.h │ ├── stem_ISO_8859_1_hungarian.c │ ├── stem_ISO_8859_1_hungarian.h │ ├── stem_ISO_8859_1_italian.c │ ├── stem_ISO_8859_1_italian.h │ ├── stem_ISO_8859_1_latin.c │ ├── stem_ISO_8859_1_latin.h │ ├── stem_ISO_8859_1_norwegian.c │ ├── stem_ISO_8859_1_norwegian.h │ ├── stem_ISO_8859_1_porter.c │ ├── stem_ISO_8859_1_porter.h │ ├── stem_ISO_8859_1_portuguese.c │ ├── stem_ISO_8859_1_portuguese.h │ ├── stem_ISO_8859_1_spanish.c │ ├── stem_ISO_8859_1_spanish.h │ ├── stem_ISO_8859_1_swedish.c │ ├── stem_ISO_8859_1_swedish.h │ ├── stem_ISO_8859_2_romanian.c │ ├── stem_ISO_8859_2_romanian.h │ ├── stem_KOI8_R_russian.c │ ├── stem_KOI8_R_russian.h │ ├── stem_UTF_8_danish.c │ ├── stem_UTF_8_danish.h │ ├── stem_UTF_8_dutch.c │ ├── stem_UTF_8_dutch.h │ ├── stem_UTF_8_english.c │ ├── stem_UTF_8_english.h │ ├── stem_UTF_8_finnish.c │ ├── stem_UTF_8_finnish.h │ ├── stem_UTF_8_french.c │ ├── stem_UTF_8_french.h │ ├── stem_UTF_8_german.c │ ├── stem_UTF_8_german.h │ ├── stem_UTF_8_hungarian.c │ ├── stem_UTF_8_hungarian.h │ ├── stem_UTF_8_italian.c │ ├── stem_UTF_8_italian.h │ ├── stem_UTF_8_latin.c │ ├── stem_UTF_8_latin.h │ ├── stem_UTF_8_lithuanian.c │ ├── stem_UTF_8_lithuanian.h │ ├── stem_UTF_8_norwegian.c │ ├── stem_UTF_8_norwegian.h │ ├── stem_UTF_8_porter.c │ ├── stem_UTF_8_porter.h │ ├── stem_UTF_8_portuguese.c │ ├── stem_UTF_8_portuguese.h │ ├── stem_UTF_8_romanian.c │ ├── stem_UTF_8_romanian.h │ ├── stem_UTF_8_russian.c │ ├── stem_UTF_8_russian.h │ ├── stem_UTF_8_spanish.c │ ├── stem_UTF_8_spanish.h │ ├── stem_UTF_8_swedish.c │ ├── stem_UTF_8_swedish.h │ ├── stem_UTF_8_turkish.c │ └── stem_UTF_8_turkish.h ├── ruby-stemmer.gemspec └── test ├── helper.rb └── lingua └── test_stemmer.rb /.gitignore: -------------------------------------------------------------------------------- 1 | rdoc 2 | tmp 3 | libstemmer_c/stemwords 4 | pkg/* 5 | *.o 6 | *.so 7 | Makefile 8 | mkmf.log 9 | *.swp 10 | *.bundle 11 | .idea/* 12 | stemmer.bundle 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.4 4 | - 2.6 5 | - 2.7 6 | script: bundle exec rake 7 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec 3 | 4 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | ruby-stemmer (3.0.0) 5 | 6 | GEM 7 | remote: https://rubygems.org/ 8 | specs: 9 | minitest (5.14.2) 10 | rake (13.0.1) 11 | rake-compiler (1.1.1) 12 | rake 13 | 14 | PLATFORMS 15 | ruby 16 | 17 | DEPENDENCIES 18 | minitest (~> 5.14) 19 | rake-compiler (~> 1.1) 20 | ruby-stemmer! 21 | 22 | BUNDLED WITH 23 | 2.1.4 24 | -------------------------------------------------------------------------------- /MIT-LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2008-2020 Aurelian Oancea 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = Notice @aurelian May 2022 2 | 3 | 👋 This project started in 2008 mostly as a mean for me to learn how to build C extensions to ruby, exposing a library at that time I needed to use in a real life project. 4 | It's 2022 and many things changed since. Most important is my lack of time to keep up with recent libstemmer_c versions and releasing builds compatible with various versions of Windows. 5 | 6 | With this in mind, it is fair to archive this project. 7 | 8 | = Ruby Stemmer 9 | 10 | Ruby-Stemmer exposes SnowBall API to Ruby. 11 | 12 | {Travis CI Status}[https://api.travis-ci.org/aurelian/ruby-stemmer.png] 13 | 14 | This package includes libstemmer_c library released under BSD licence and available for free {here}[https://snowballstem.org/download.html]. 15 | 16 | Support for latin language is also included and it has been generated with the snowball compiler using {schinke contribution}[https://snowballstem.org/otherapps/schinke/]. 17 | 18 | For more details about libstemmer_c please visit the {SnowBall website}[https://snowballstem.org/]. 19 | 20 | == Usage 21 | 22 | require 'rubygems' 23 | require 'lingua/stemmer' 24 | 25 | stemmer= Lingua::Stemmer.new(:language => "ro") 26 | stemmer.stem("netăgăduit") #=> netăgădu 27 | 28 | === Alternative 29 | 30 | require 'rubygems' 31 | require 'lingua/stemmer' 32 | 33 | Lingua.stemmer( %w(incontestabil neîndoielnic), :language => "ro" ) #=> ["incontest", "neîndoieln"] 34 | Lingua.stemmer("installation") #=> "instal" 35 | Lingua.stemmer("installation", :language => "fr", :encoding => "ISO_8859_1") do | word | 36 | puts "~> #{word}" #=> "instal" 37 | end # => # 38 | 39 | === Gemfile 40 | 41 | gem 'ruby-stemmer', '>=2.0.0', :require => 'lingua/stemmer' 42 | 43 | === More details 44 | 45 | * Complete API in {RDoc format}[http://rdoc.info/github/aurelian/ruby-stemmer/master/frames] 46 | * More usage on the {test file}[https://github.com/aurelian/ruby-stemmer/blob/master/test/lingua/test_stemmer.rb] 47 | 48 | == Install 49 | 50 | gem install ruby-stemmer 51 | 52 | ==== Windows 53 | 54 | There's also a Windows (Fat bin) 55 | 56 | gem install ruby-stemmer --platform=x86-mingw32 57 | 58 | As far as I know the above should work with {rubyinstaller}[http://rubyinstaller.org/]. If it fails, you could try with: 59 | 60 | gem install ruby-stemmer --platform=x86-mswin32 61 | 62 | {It's known}[https://cl.ly/BX9o] to work under Windows XP. 63 | 64 | === Development version 65 | 66 | $ git clone git://github.com/aurelian/ruby-stemmer.git 67 | $ cd ruby-stemmer 68 | $ rake -T #<== see what we've got 69 | $ rake compile #<== builds the extension do'h 70 | $ rake test 71 | 72 | ==== Cross Compiling 73 | 74 | Install {rake-compiler-dock}[https://github.com/rake-compiler/rake-compiler-dock] and follow the setup. 75 | 76 | Then, inside the docker image: 77 | 78 | $ AR=i686-w64-mingw32-ar CC=i686-w64-mingw32-gcc LD=i686-w64-mingw32-ld rake cross native gem 79 | 80 | Or, build the lib first then compile: 81 | 82 | $ cd libstemmer_c 83 | $ AR=i686-w64-mingw33-ar CC=i686-w64-mingw32-gcc LD=i686-w64-mingw32-ld make 84 | $ cd ../ 85 | $ rake cross native gem 86 | 87 | == NOT A BUG 88 | 89 | The stemming process is an algorithm to allow one to find the stem of an word (not the root of it). 90 | For further reference on stem vs. root, please check wikipedia articles on the topic: 91 | 92 | * https://en.wikipedia.org/wiki/Word_stem 93 | * https://en.wikipedia.org/wiki/Root_(linguistics) 94 | 95 | == TODO 96 | 97 | * {Open issues}[https://github.com/aurelian/ruby-stemmer/issues] 98 | 99 | == Note on Patches/Pull Requests 100 | 101 | * Fork the project from {github}[https://github.com/aurelian/ruby-stemmer] 102 | * Make your feature addition or {bug fix}[https://github.com/aurelian/ruby-stemmer/issues] 103 | * Add tests for it. This is important so I don't break it in a 104 | future version unintentionally. 105 | * Commit, do not mess with rakefile, version, or history. 106 | 107 | if you want to have your own version, that is fine but 108 | bump version in a commit by itself I can ignore when I pull 109 | * Send me a pull request. Bonus points for topic branches. 110 | 111 | == Alternative Stemmers for Ruby 112 | 113 | * {stemmer4r}[https://rubygems.org/gems/stemmer4r] (ext) 114 | * {fast-stemmer}[https://rubygems.org/gems/fast-stemmer] (ext) 115 | * {uea-stemmer}[https://rubygems.org/gems/uea-stemmer] (ext) 116 | * {stemmer}[https://rubygems.org/gems/stemmer] (pure ruby) 117 | * add yours 118 | 119 | == Copyright 120 | 121 | Copyright (c) 2008-2020 {Aurelian Oancea}[http://locknet.ro]. See MIT-LICENSE for details. 122 | 123 | == Contributors 124 | 125 | * {Aurelian Oancea}[https://github.com/aurelian] 126 | * {Yury Korolev}[https://github.com/yury] - various bug fixes 127 | * {Aaron Patterson}[https://github.com/tenderlove] - rake compiler (windows support), code cleanup 128 | * {Damián Silvani}[https://github.com/munshkr] - Ruby 1.9 encoding 129 | 130 | # encoding: utf-8 131 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler/setup' 3 | 4 | require 'rdoc/task' 5 | require 'rake/testtask' 6 | require 'bundler/gem_tasks' 7 | require 'rake/extensiontask' 8 | require 'rubygems/package_task' 9 | 10 | CLOBBER.include('libstemmer_c/**/*.o') 11 | 12 | GEMSPEC = Gem::Specification.load('ruby-stemmer.gemspec') 13 | 14 | Rake::TestTask.new(:test) do |test| 15 | test.libs << 'lib' << 'test' 16 | test.pattern = 'test/**/test_*.rb' 17 | test.verbose = true 18 | end 19 | 20 | Rake::ExtensionTask.new('ruby-stemmer', GEMSPEC) do |ext| 21 | ext.lib_dir = File.join(*['lib', 'lingua', ENV['FAT_DIR']].compact) 22 | ext.ext_dir = File.join 'ext', 'lingua' 23 | ext.cross_compile = true 24 | ext.cross_platform = ['i386-mswin32-60', 'i386-mingw32'] 25 | ext.name = 'stemmer_native' 26 | end 27 | 28 | Rake::RDocTask.new do |rdoc| 29 | version = File.exist?('VERSION') ? File.read('VERSION') : '' 30 | rdoc.rdoc_dir = 'rdoc' 31 | rdoc.options << '--charset' << 'utf-8' 32 | rdoc.title = "Ruby-Stemmer #{version}" 33 | rdoc.rdoc_files.include('README*') 34 | rdoc.rdoc_files.include('lib/**/*.rb') 35 | rdoc.rdoc_files.include('ext/lingua/stemmer.c') 36 | rdoc.rdoc_files.include('MIT-LICENSE') 37 | end 38 | 39 | task default: %i[clobber compile test] 40 | -------------------------------------------------------------------------------- /ext/lingua/extconf.rb: -------------------------------------------------------------------------------- 1 | ENV['RC_ARCHS'] = '' if RUBY_PLATFORM.match?(/darwin/) 2 | require 'mkmf' 3 | 4 | ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..', '..')) 5 | LIBSTEMMER = File.join(ROOT, 'libstemmer_c') 6 | 7 | # build libstemmer_c 8 | # FreeBSD make is gmake 9 | make = RUBY_PLATFORM.match?(/freebsd/) ? 'gmake' : 'make' 10 | 11 | # MacOS architecture mess up 12 | if RUBY_PLATFORM.match?(/darwin/) 13 | # Config has been deprecated since 1.9.3, and removed since 2.2.0 14 | if defined?(RbConfig) 15 | Config = RbConfig 16 | end 17 | 18 | # see: #issue/3, #issue/5 19 | begin 20 | ENV['ARCHFLAGS'] = '-arch ' + %x[file #{File.expand_path(File.join(Config::CONFIG['bindir'], Config::CONFIG['RUBY_INSTALL_NAME']))}].strip!.match(/executable (.+)$/)[1] unless ENV['ARCHFLAGS'].nil? 21 | rescue 22 | $stderr << "Failed to get your ruby executable architecture.\n" 23 | $stderr << "Please specify one using $ARCHFLAGS environment variable.\n" 24 | exit 25 | end 26 | # see: #issue/9, #issue/6 27 | # see: man compat 28 | if ENV['COMMAND_MODE'] == 'legacy' 29 | $stdout << "Setting compat mode to unix2003\n." 30 | ENV['COMMAND_MODE'] = 'unix2003' 31 | end 32 | end 33 | 34 | # make libstemmer_c. unless we're cross-compiling. 35 | unless RUBY_PLATFORM.match?(/i386-mingw32/) 36 | system "cd #{LIBSTEMMER}; #{make} libstemmer.o; cd #{ROOT};" 37 | exit unless $? == 0 38 | end 39 | 40 | $CFLAGS += " -I#{File.expand_path(File.join(LIBSTEMMER, 'include'))} " 41 | $libs += " -L#{LIBSTEMMER} #{File.expand_path(File.join(LIBSTEMMER, 'libstemmer.o'))} " 42 | 43 | create_makefile('lingua/stemmer_native') if have_header('libstemmer.h') 44 | -------------------------------------------------------------------------------- /ext/lingua/stemmer.c: -------------------------------------------------------------------------------- 1 | #include "ruby.h" 2 | #include 3 | 4 | 5 | #ifdef HAVE_RUBY_ENCODING_H 6 | 7 | #include 8 | 9 | #define ENCODED_STR_NEW2(str, encoding) \ 10 | ({ \ 11 | VALUE _string = rb_str_new2((const char *)str); \ 12 | int _enc = rb_enc_get_index(encoding); \ 13 | rb_enc_associate_index(_string, _enc); \ 14 | _string; \ 15 | }) 16 | 17 | #else 18 | 19 | #define ENCODED_STR_NEW2(str, encoding) \ 20 | rb_str_new2((const char *)str) 21 | 22 | #endif 23 | 24 | 25 | VALUE rb_mLingua; 26 | VALUE rb_cStemmer; 27 | VALUE rb_eStemmerError; 28 | 29 | /* 30 | * Document-method: new 31 | * call-seq: Lingua::Stemmer.new 32 | * 33 | * Creates a new Stemmer, pass :language and :encoding as arguments 34 | * to change encoding or language, otherwise english with UTF_8 will be used 35 | * 36 | * require 'lingua/stemmer' 37 | * s = Lingua::Stemmer.new :language => 'fr' 38 | */ 39 | static VALUE 40 | rb_stemmer_init(VALUE self, VALUE rlang, VALUE renc) { 41 | struct sb_stemmer * stemmer; 42 | 43 | Data_Get_Struct(self, struct sb_stemmer, stemmer); 44 | 45 | // In case someone sends() this method, free up the old one 46 | if(stemmer) sb_stemmer_delete(stemmer); 47 | 48 | stemmer = sb_stemmer_new( RSTRING_PTR(rlang), RSTRING_PTR(renc) ); 49 | if (!stemmer) { 50 | if (!RTEST(renc)) { 51 | rb_raise(rb_eStemmerError, 52 | "Language %s not available for stemming", RSTRING_PTR(rlang)); 53 | } else { 54 | rb_raise(rb_eStemmerError, 55 | "Language %s not available for stemming in encoding %s", 56 | RSTRING_PTR(rlang), RSTRING_PTR(renc)); 57 | } 58 | } 59 | 60 | DATA_PTR(self) = stemmer; 61 | 62 | return self; 63 | } 64 | 65 | /* 66 | * Document-method: stem 67 | * call-seq: stem 68 | * 69 | * Stems a word 70 | * 71 | * require 'lingua/stemmer' 72 | * s = Lingua::Stemmer.new 73 | * s.stem "installation" # ==> install 74 | */ 75 | static VALUE 76 | rb_stemmer_stem(VALUE self, VALUE word) { 77 | struct sb_stemmer * stemmer; 78 | 79 | Data_Get_Struct(self, struct sb_stemmer, stemmer); 80 | if(!stemmer) rb_raise(rb_eRuntimeError, "Stemmer is not initialized"); 81 | 82 | VALUE s_word = rb_String(word); 83 | const sb_symbol * stemmed = sb_stemmer_stem(stemmer, 84 | (sb_symbol *)RSTRING_PTR(s_word), 85 | RSTRING_LEN(s_word) 86 | ); 87 | 88 | VALUE rb_enc = rb_iv_get(self, "@encoding"); 89 | return ENCODED_STR_NEW2((char *)stemmed, rb_enc); 90 | } 91 | 92 | static void 93 | sb_stemmer_free(struct sb_stemmer * stemmer) 94 | { 95 | if(stemmer) sb_stemmer_delete(stemmer); 96 | } 97 | 98 | static VALUE 99 | sb_stemmer_alloc(VALUE klass) 100 | { 101 | return Data_Wrap_Struct(klass, 0, sb_stemmer_free, 0); 102 | } 103 | 104 | /* 105 | * Ruby-Stemmer, Ruby extension to SnowBall API using libstemmer_c 106 | */ 107 | void Init_stemmer_native() { 108 | rb_mLingua = rb_define_module("Lingua"); 109 | rb_cStemmer = rb_define_class_under(rb_mLingua, "Stemmer", rb_cObject); 110 | rb_define_alloc_func(rb_cStemmer, sb_stemmer_alloc); 111 | rb_eStemmerError = rb_define_class_under(rb_mLingua, "StemmerError", rb_eException); 112 | rb_define_private_method(rb_cStemmer, "native_init", rb_stemmer_init, 2); 113 | rb_define_method(rb_cStemmer, "stem", rb_stemmer_stem, 1); 114 | } 115 | 116 | -------------------------------------------------------------------------------- /lib/lingua/stemmer.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | if RUBY_PLATFORM.match?(/(mswin|mingw)/i) 4 | require "lingua/#{RUBY_VERSION.sub(/\.\d+$/, '')}/stemmer_native" 5 | else 6 | require 'lingua/stemmer_native' 7 | end 8 | 9 | require 'lingua/version' 10 | 11 | module Lingua 12 | def self.stemmer(o, options = {}) 13 | stemmer = Stemmer.new(options) 14 | 15 | words = Array(o).map(&:to_s) 16 | 17 | results = [] 18 | words.each do |word| 19 | result = stemmer.stem(word) 20 | if block_given? 21 | yield result 22 | else 23 | results << result 24 | end 25 | end 26 | 27 | return stemmer if block_given? 28 | 29 | o.is_a?(String) ? results[0] : results 30 | end 31 | 32 | class Stemmer 33 | attr_reader :language 34 | attr_reader :encoding 35 | 36 | # Creates a new Stemmer, pass :language and :encoding 37 | # as arguments to change encoding or language, otherwise english with UTF_8 38 | # will be used 39 | # 40 | # require 'lingua/stemmer' 41 | # s = Lingua::Stemmer.new language: 'fr' 42 | # 43 | def initialize(options = {}) 44 | @language = (options[:language] || 'en').to_s 45 | @encoding = (options[:encoding] || 'UTF_8').to_s 46 | 47 | @encoding = Encoding.find(@encoding.tr('_', '-')) 48 | 49 | native_init(@language, native_encoding(@encoding)) 50 | end 51 | 52 | private 53 | 54 | def native_encoding(enc) 55 | enc.name.tr('-', '_') 56 | end 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /lib/lingua/version.rb: -------------------------------------------------------------------------------- 1 | module Lingua 2 | class Stemmer 3 | VERSION = '3.0.0' 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /libstemmer_c/MANIFEST: -------------------------------------------------------------------------------- 1 | README 2 | src_c/stem_ISO_8859_1_danish.c 3 | src_c/stem_ISO_8859_1_danish.h 4 | src_c/stem_ISO_8859_1_dutch.c 5 | src_c/stem_ISO_8859_1_dutch.h 6 | src_c/stem_ISO_8859_1_english.c 7 | src_c/stem_ISO_8859_1_english.h 8 | src_c/stem_ISO_8859_1_finnish.c 9 | src_c/stem_ISO_8859_1_finnish.h 10 | src_c/stem_ISO_8859_1_french.c 11 | src_c/stem_ISO_8859_1_french.h 12 | src_c/stem_ISO_8859_1_german.c 13 | src_c/stem_ISO_8859_1_german.h 14 | src_c/stem_ISO_8859_1_hungarian.c 15 | src_c/stem_ISO_8859_1_hungarian.h 16 | src_c/stem_ISO_8859_1_italian.c 17 | src_c/stem_ISO_8859_1_italian.h 18 | src_c/stem_ISO_8859_1_norwegian.c 19 | src_c/stem_ISO_8859_1_norwegian.h 20 | src_c/stem_ISO_8859_1_porter.c 21 | src_c/stem_ISO_8859_1_porter.h 22 | src_c/stem_ISO_8859_1_portuguese.c 23 | src_c/stem_ISO_8859_1_portuguese.h 24 | src_c/stem_ISO_8859_1_spanish.c 25 | src_c/stem_ISO_8859_1_spanish.h 26 | src_c/stem_ISO_8859_1_swedish.c 27 | src_c/stem_ISO_8859_1_swedish.h 28 | src_c/stem_ISO_8859_2_romanian.c 29 | src_c/stem_ISO_8859_2_romanian.h 30 | src_c/stem_KOI8_R_russian.c 31 | src_c/stem_KOI8_R_russian.h 32 | src_c/stem_UTF_8_danish.c 33 | src_c/stem_UTF_8_danish.h 34 | src_c/stem_UTF_8_dutch.c 35 | src_c/stem_UTF_8_dutch.h 36 | src_c/stem_UTF_8_english.c 37 | src_c/stem_UTF_8_english.h 38 | src_c/stem_UTF_8_finnish.c 39 | src_c/stem_UTF_8_finnish.h 40 | src_c/stem_UTF_8_french.c 41 | src_c/stem_UTF_8_french.h 42 | src_c/stem_UTF_8_german.c 43 | src_c/stem_UTF_8_german.h 44 | src_c/stem_UTF_8_hungarian.c 45 | src_c/stem_UTF_8_hungarian.h 46 | src_c/stem_UTF_8_italian.c 47 | src_c/stem_UTF_8_italian.h 48 | src_c/stem_UTF_8_norwegian.c 49 | src_c/stem_UTF_8_norwegian.h 50 | src_c/stem_UTF_8_porter.c 51 | src_c/stem_UTF_8_porter.h 52 | src_c/stem_UTF_8_portuguese.c 53 | src_c/stem_UTF_8_portuguese.h 54 | src_c/stem_UTF_8_romanian.c 55 | src_c/stem_UTF_8_romanian.h 56 | src_c/stem_UTF_8_russian.c 57 | src_c/stem_UTF_8_russian.h 58 | src_c/stem_UTF_8_spanish.c 59 | src_c/stem_UTF_8_spanish.h 60 | src_c/stem_UTF_8_swedish.c 61 | src_c/stem_UTF_8_swedish.h 62 | src_c/stem_UTF_8_turkish.c 63 | src_c/stem_UTF_8_turkish.h 64 | runtime/api.c 65 | runtime/api.h 66 | runtime/header.h 67 | runtime/utilities.c 68 | libstemmer/libstemmer.c 69 | libstemmer/libstemmer_utf8.c 70 | libstemmer/modules.h 71 | libstemmer/modules_utf8.h 72 | include/libstemmer.h 73 | -------------------------------------------------------------------------------- /libstemmer_c/Makefile: -------------------------------------------------------------------------------- 1 | include mkinc.mak 2 | CFLAGS=-Iinclude -fPIC $(ARCHFLAGS) 3 | all: libstemmer.o stemwords 4 | libstemmer.o: $(snowball_sources:.c=.o) 5 | $(AR) -cru $@ $^ 6 | stemwords: examples/stemwords.o libstemmer.o 7 | $(CC) -o $@ $^ 8 | clean: 9 | rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o examples/*.o 10 | -------------------------------------------------------------------------------- /libstemmer_c/Makefile.windows: -------------------------------------------------------------------------------- 1 | include mkinc.mak 2 | 3 | AR=i686-w64-mingw32-ar 4 | CC=i686-w64-mingw32-gcc 5 | LD=i686-w64-mingw32-ld 6 | 7 | CFLAGS=-Iinclude 8 | 9 | all: libstemmer.o stemwords 10 | libstemmer.o: $(snowball_sources:.c=.o) 11 | $(AR) -cru $@ $^ 12 | stemwords: examples/stemwords.o libstemmer.o 13 | $(CC) -o $@ $^ 14 | clean: 15 | rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o examples/*.o 16 | -------------------------------------------------------------------------------- /libstemmer_c/README: -------------------------------------------------------------------------------- 1 | libstemmer_c 2 | ============ 3 | 4 | This document pertains to the C version of the libstemmer distribution, 5 | available for download from: 6 | 7 | http://snowball.tartarus.org/dist/libstemmer_c.tgz 8 | 9 | 10 | Compiling the library 11 | ===================== 12 | 13 | A simple makefile is provided for Unix style systems. On such systems, it 14 | should be possible simply to run "make", and the file "libstemmer.o" 15 | and the example program "stemwords" will be generated. 16 | 17 | If this doesn't work on your system, you need to write your own build 18 | system (or call the compiler directly). The files to compile are 19 | all contained in the "libstemmer", "runtime" and "src_c" directories, 20 | and the public header file is contained in the "include" directory. 21 | 22 | The library comes in two flavours; UTF-8 only, and UTF-8 plus other character 23 | sets. To use the utf-8 only flavour, compile "libstemmer_utf8.c" instead of 24 | "libstemmer.c". 25 | 26 | For convenience "mkinc.mak" is a makefile fragment listing the source files and 27 | header files used to compile the standard version of the library. 28 | "mkinc_utf8.mak" is a comparable makefile fragment listing just the source 29 | files for the UTF-8 only version of the library. 30 | 31 | 32 | Using the library 33 | ================= 34 | 35 | The library provides a simple C API. Essentially, a new stemmer can 36 | be obtained by using "sb_stemmer_new". "sb_stemmer_stem" is then 37 | used to stem a word, "sb_stemmer_length" returns the stemmed 38 | length of the last word processed, and "sb_stemmer_delete" is 39 | used to delete a stemmer. 40 | 41 | Creating a stemmer is a relatively expensive operation - the expected 42 | usage pattern is that a new stemmer is created when needed, used 43 | to stem many words, and deleted after some time. 44 | 45 | Stemmers are re-entrant, but not threadsafe. In other words, if 46 | you wish to access the same stemmer object from multiple threads, 47 | you must ensure that all access is protected by a mutex or similar 48 | device. 49 | 50 | libstemmer does not currently incorporate any mechanism for caching the results 51 | of stemming operations. Such caching can greatly increase the performance of a 52 | stemmer under certain situations, so suitable patches will be considered for 53 | inclusion. 54 | 55 | The standard libstemmer sources contain an algorithm for each of the supported 56 | languages. The algorithm may be selected using the english name of the 57 | language, or using the 2 or 3 letter ISO 639 language codes. In addition, 58 | the traditional "Porter" stemming algorithm for english is included for 59 | backwards compatibility purposes, but we recommend use of the "English" 60 | stemmer in preference for new projects. 61 | 62 | (Some minor algorithms which are included only as curiosities in the snowball 63 | website, such as the Lovins stemmer and the Kraaij Pohlmann stemmer, are not 64 | included in the standard libstemmer sources. These are not really supported by 65 | the snowball project, but it would be possible to compile a modified libstemmer 66 | library containing these if desired.) 67 | 68 | 69 | The stemwords example 70 | ===================== 71 | 72 | The stemwords example program allows you to run any of the stemmers 73 | compiled into the libstemmer library on a sample vocabulary. For 74 | details on how to use it, run it with the "-h" command line option. 75 | 76 | 77 | Using the library in a larger system 78 | ==================================== 79 | 80 | If you are incorporating the library into the build system of a larger 81 | program, I recommend copying the unpacked tarball without modification into 82 | a subdirectory of the sources of your program. Future versions of the 83 | library are intended to keep the same structure, so this will keep the 84 | work required to move to a new version of the library to a minimum. 85 | 86 | As an additional convenience, the list of source and header files used 87 | in the library is detailed in mkinc.mak - a file which is in a suitable 88 | format for inclusion by a Makefile. By including this file in your build 89 | system, you can link the snowball system into your program with a few 90 | extra rules. 91 | 92 | Using the library in a system using GNU autotools 93 | ================================================= 94 | 95 | The libstemmer_c library can be integrated into a larger system which uses the 96 | GNU autotool framework (and in particular, automake and autoconf) as follows: 97 | 98 | 1) Unpack libstemmer_c.tgz in the top level project directory so that there is 99 | a libstemmer_c subdirectory of the top level directory of the project. 100 | 101 | 2) Add a file "Makefile.am" to the unpacked libstemmer_c folder, containing: 102 | 103 | noinst_LTLIBRARIES = libstemmer.la 104 | include $(srcdir)/mkinc.mak 105 | noinst_HEADERS = $(snowball_headers) 106 | libstemmer_la_SOURCES = $(snowball_sources) 107 | 108 | (You may also need to add other lines to this, for example, if you are using 109 | compiler options which are not compatible with compiling the libstemmer 110 | library.) 111 | 112 | 3) Add libstemmer_c to the AC_CONFIG_FILES declaration in the project's 113 | configure.ac file. 114 | 115 | 4) Add to the top level makefile the following lines (or modify existing 116 | assignments to these variables appropriately): 117 | 118 | AUTOMAKE_OPTIONS = subdir-objects 119 | AM_CPPFLAGS = -I$(top_srcdir)/libstemmer_c/include 120 | SUBDIRS=libstemmer_c 121 | _LIBADD = libstemmer_c/libstemmer.la 122 | 123 | (Where is the name of the library or executable which links against 124 | libstemmer.) 125 | 126 | -------------------------------------------------------------------------------- /libstemmer_c/examples/stemwords.c: -------------------------------------------------------------------------------- 1 | /* This is a simple program which uses libstemmer to provide a command 2 | * line interface for stemming using any of the algorithms provided. 3 | */ 4 | 5 | #include 6 | #include /* for malloc, free */ 7 | #include /* for memmove */ 8 | #include /* for isupper, tolower */ 9 | 10 | #include "libstemmer.h" 11 | 12 | const char * progname; 13 | static int pretty = 1; 14 | 15 | static void 16 | stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out) 17 | { 18 | #define INC 10 19 | int lim = INC; 20 | sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol)); 21 | 22 | while(1) { 23 | int ch = getc(f_in); 24 | if (ch == EOF) { 25 | free(b); return; 26 | } 27 | { 28 | int i = 0; 29 | int inlen = 0; 30 | while(1) { 31 | if (ch == '\n' || ch == EOF) break; 32 | if (i == lim) { 33 | sb_symbol * newb; 34 | newb = (sb_symbol *) 35 | realloc(b, (lim + INC) * sizeof(sb_symbol)); 36 | if (newb == 0) goto error; 37 | b = newb; 38 | lim = lim + INC; 39 | } 40 | /* Update count of utf-8 characters. */ 41 | if (ch < 0x80 || ch > 0xBF) inlen += 1; 42 | /* force lower case: */ 43 | if (isupper(ch)) ch = tolower(ch); 44 | 45 | b[i] = ch; 46 | i++; 47 | ch = getc(f_in); 48 | } 49 | 50 | { 51 | const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i); 52 | if (stemmed == NULL) 53 | { 54 | fprintf(stderr, "Out of memory"); 55 | exit(1); 56 | } 57 | else 58 | { 59 | if (pretty == 1) { 60 | fwrite(b, i, 1, f_out); 61 | fputs(" -> ", f_out); 62 | } else if (pretty == 2) { 63 | fwrite(b, i, 1, f_out); 64 | if (sb_stemmer_length(stemmer) > 0) { 65 | int j; 66 | if (inlen < 30) { 67 | for (j = 30 - inlen; j > 0; j--) 68 | fputs(" ", f_out); 69 | } else { 70 | fputs("\n", f_out); 71 | for (j = 30; j > 0; j--) 72 | fputs(" ", f_out); 73 | } 74 | } 75 | } 76 | 77 | fputs((char *)stemmed, f_out); 78 | putc('\n', f_out); 79 | } 80 | } 81 | } 82 | } 83 | error: 84 | if (b != 0) free(b); 85 | return; 86 | } 87 | 88 | /** Display the command line syntax, and then exit. 89 | * @param n The value to exit with. 90 | */ 91 | static void 92 | usage(int n) 93 | { 94 | printf("usage: %s [-l ] [-i ] [-o ] [-c ] [-p[2]] [-h]\n" 95 | "\n" 96 | "The input file consists of a list of words to be stemmed, one per\n" 97 | "line. Words should be in lower case, but (for English) A-Z letters\n" 98 | "are mapped to their a-z equivalents anyway. If omitted, stdin is\n" 99 | "used.\n" 100 | "\n" 101 | "If -c is given, the argument is the character encoding of the input\n" 102 | "and output files. If it is omitted, the UTF-8 encoding is used.\n" 103 | "\n" 104 | "If -p is given the output file consists of each word of the input\n" 105 | "file followed by \"->\" followed by its stemmed equivalent.\n" 106 | "If -p2 is given the output file is a two column layout containing\n" 107 | "the input words in the first column and the stemmed eqivalents in\n" 108 | "the second column.\n" 109 | "Otherwise, the output file consists of the stemmed words, one per\n" 110 | "line.\n" 111 | "\n" 112 | "-h displays this help\n", 113 | progname); 114 | exit(n); 115 | } 116 | 117 | int 118 | main(int argc, char * argv[]) 119 | { 120 | char * in = 0; 121 | char * out = 0; 122 | FILE * f_in; 123 | FILE * f_out; 124 | struct sb_stemmer * stemmer; 125 | 126 | char * language = "english"; 127 | char * charenc = NULL; 128 | 129 | char * s; 130 | int i = 1; 131 | pretty = 0; 132 | 133 | progname = argv[0]; 134 | 135 | while(i < argc) { 136 | s = argv[i++]; 137 | if (s[0] == '-') { 138 | if (strcmp(s, "-o") == 0) { 139 | if (i >= argc) { 140 | fprintf(stderr, "%s requires an argument\n", s); 141 | exit(1); 142 | } 143 | out = argv[i++]; 144 | } else if (strcmp(s, "-i") == 0) { 145 | if (i >= argc) { 146 | fprintf(stderr, "%s requires an argument\n", s); 147 | exit(1); 148 | } 149 | in = argv[i++]; 150 | } else if (strcmp(s, "-l") == 0) { 151 | if (i >= argc) { 152 | fprintf(stderr, "%s requires an argument\n", s); 153 | exit(1); 154 | } 155 | language = argv[i++]; 156 | } else if (strcmp(s, "-c") == 0) { 157 | if (i >= argc) { 158 | fprintf(stderr, "%s requires an argument\n", s); 159 | exit(1); 160 | } 161 | charenc = argv[i++]; 162 | } else if (strcmp(s, "-p2") == 0) { 163 | pretty = 2; 164 | } else if (strcmp(s, "-p") == 0) { 165 | pretty = 1; 166 | } else if (strcmp(s, "-h") == 0) { 167 | usage(0); 168 | } else { 169 | fprintf(stderr, "option %s unknown\n", s); 170 | usage(1); 171 | } 172 | } else { 173 | fprintf(stderr, "unexpected parameter %s\n", s); 174 | usage(1); 175 | } 176 | } 177 | 178 | /* prepare the files */ 179 | f_in = (in == 0) ? stdin : fopen(in, "r"); 180 | if (f_in == 0) { 181 | fprintf(stderr, "file %s not found\n", in); 182 | exit(1); 183 | } 184 | f_out = (out == 0) ? stdout : fopen(out, "w"); 185 | if (f_out == 0) { 186 | fprintf(stderr, "file %s cannot be opened\n", out); 187 | exit(1); 188 | } 189 | 190 | /* do the stemming process: */ 191 | stemmer = sb_stemmer_new(language, charenc); 192 | if (stemmer == 0) { 193 | if (charenc == NULL) { 194 | fprintf(stderr, "language `%s' not available for stemming\n", language); 195 | exit(1); 196 | } else { 197 | fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc); 198 | exit(1); 199 | } 200 | } 201 | stem_file(stemmer, f_in, f_out); 202 | sb_stemmer_delete(stemmer); 203 | 204 | if (in != 0) (void) fclose(f_in); 205 | if (out != 0) (void) fclose(f_out); 206 | 207 | return 0; 208 | } 209 | 210 | -------------------------------------------------------------------------------- /libstemmer_c/include/libstemmer.h: -------------------------------------------------------------------------------- 1 | 2 | /* Make header file work when included from C++ */ 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | struct sb_stemmer; 8 | typedef unsigned char sb_symbol; 9 | 10 | /* FIXME - should be able to get a version number for each stemming 11 | * algorithm (which will be incremented each time the output changes). */ 12 | 13 | /** Returns an array of the names of the available stemming algorithms. 14 | * Note that these are the canonical names - aliases (ie, other names for 15 | * the same algorithm) will not be included in the list. 16 | * The list is terminated with a null pointer. 17 | * 18 | * The list must not be modified in any way. 19 | */ 20 | const char ** sb_stemmer_list(void); 21 | 22 | /** Create a new stemmer object, using the specified algorithm, for the 23 | * specified character encoding. 24 | * 25 | * All algorithms will usually be available in UTF-8, but may also be 26 | * available in other character encodings. 27 | * 28 | * @param algorithm The algorithm name. This is either the english 29 | * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the 30 | * language. Note that case is significant in this parameter - the 31 | * value should be supplied in lower case. 32 | * 33 | * @param charenc The character encoding. NULL may be passed as 34 | * this value, in which case UTF-8 encoding will be assumed. Otherwise, 35 | * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1), 36 | * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that 37 | * case is significant in this parameter. 38 | * 39 | * @return NULL if the specified algorithm is not recognised, or the 40 | * algorithm is not available for the requested encoding. Otherwise, 41 | * returns a pointer to a newly created stemmer for the requested algorithm. 42 | * The returned pointer must be deleted by calling sb_stemmer_delete(). 43 | * 44 | * @note NULL will also be returned if an out of memory error occurs. 45 | */ 46 | struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); 47 | 48 | /** Delete a stemmer object. 49 | * 50 | * This frees all resources allocated for the stemmer. After calling 51 | * this function, the supplied stemmer may no longer be used in any way. 52 | * 53 | * It is safe to pass a null pointer to this function - this will have 54 | * no effect. 55 | */ 56 | void sb_stemmer_delete(struct sb_stemmer * stemmer); 57 | 58 | /** Stem a word. 59 | * 60 | * The return value is owned by the stemmer - it must not be freed or 61 | * modified, and it will become invalid when the stemmer is called again, 62 | * or if the stemmer is freed. 63 | * 64 | * The length of the return value can be obtained using sb_stemmer_length(). 65 | * 66 | * If an out-of-memory error occurs, this will return NULL. 67 | */ 68 | const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, 69 | const sb_symbol * word, int size); 70 | 71 | /** Get the length of the result of the last stemmed word. 72 | * This should not be called before sb_stemmer_stem() has been called. 73 | */ 74 | int sb_stemmer_length(struct sb_stemmer * stemmer); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/libstemmer.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "../include/libstemmer.h" 5 | #include "../runtime/api.h" 6 | #include "modules.h" 7 | 8 | struct sb_stemmer { 9 | struct SN_env * (*create)(void); 10 | void (*close)(struct SN_env *); 11 | int (*stem)(struct SN_env *); 12 | 13 | struct SN_env * env; 14 | }; 15 | 16 | extern const char ** 17 | sb_stemmer_list(void) 18 | { 19 | return algorithm_names; 20 | } 21 | 22 | static stemmer_encoding_t 23 | sb_getenc(const char * charenc) 24 | { 25 | struct stemmer_encoding * encoding; 26 | if (charenc == NULL) return ENC_UTF_8; 27 | for (encoding = encodings; encoding->name != 0; encoding++) { 28 | if (strcmp(encoding->name, charenc) == 0) break; 29 | } 30 | if (encoding->name == NULL) return ENC_UNKNOWN; 31 | return encoding->enc; 32 | } 33 | 34 | extern struct sb_stemmer * 35 | sb_stemmer_new(const char * algorithm, const char * charenc) 36 | { 37 | stemmer_encoding_t enc; 38 | struct stemmer_modules * module; 39 | struct sb_stemmer * stemmer = 40 | (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); 41 | if (stemmer == NULL) return NULL; 42 | enc = sb_getenc(charenc); 43 | if (enc == ENC_UNKNOWN) return NULL; 44 | 45 | for (module = modules; module->name != 0; module++) { 46 | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; 47 | } 48 | if (module->name == NULL) return NULL; 49 | 50 | stemmer->create = module->create; 51 | stemmer->close = module->close; 52 | stemmer->stem = module->stem; 53 | 54 | stemmer->env = stemmer->create(); 55 | if (stemmer->env == NULL) 56 | { 57 | sb_stemmer_delete(stemmer); 58 | return NULL; 59 | } 60 | 61 | return stemmer; 62 | } 63 | 64 | void 65 | sb_stemmer_delete(struct sb_stemmer * stemmer) 66 | { 67 | if (stemmer == 0) return; 68 | if (stemmer->close == 0) return; 69 | stemmer->close(stemmer->env); 70 | stemmer->close = 0; 71 | free(stemmer); 72 | } 73 | 74 | const sb_symbol * 75 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) 76 | { 77 | int ret; 78 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) 79 | { 80 | stemmer->env->l = 0; 81 | return NULL; 82 | } 83 | ret = stemmer->stem(stemmer->env); 84 | if (ret < 0) return NULL; 85 | stemmer->env->p[stemmer->env->l] = 0; 86 | return (const sb_symbol *)(stemmer->env->p); 87 | } 88 | 89 | int 90 | sb_stemmer_length(struct sb_stemmer * stemmer) 91 | { 92 | return stemmer->env->l; 93 | } 94 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/libstemmer_utf8.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include "../include/libstemmer.h" 5 | #include "../runtime/api.h" 6 | #include "modules_utf8.h" 7 | 8 | struct sb_stemmer { 9 | struct SN_env * (*create)(void); 10 | void (*close)(struct SN_env *); 11 | int (*stem)(struct SN_env *); 12 | 13 | struct SN_env * env; 14 | }; 15 | 16 | extern const char ** 17 | sb_stemmer_list(void) 18 | { 19 | return algorithm_names; 20 | } 21 | 22 | static stemmer_encoding_t 23 | sb_getenc(const char * charenc) 24 | { 25 | struct stemmer_encoding * encoding; 26 | if (charenc == NULL) return ENC_UTF_8; 27 | for (encoding = encodings; encoding->name != 0; encoding++) { 28 | if (strcmp(encoding->name, charenc) == 0) break; 29 | } 30 | if (encoding->name == NULL) return ENC_UNKNOWN; 31 | return encoding->enc; 32 | } 33 | 34 | extern struct sb_stemmer * 35 | sb_stemmer_new(const char * algorithm, const char * charenc) 36 | { 37 | stemmer_encoding_t enc; 38 | struct stemmer_modules * module; 39 | struct sb_stemmer * stemmer = 40 | (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); 41 | if (stemmer == NULL) return NULL; 42 | enc = sb_getenc(charenc); 43 | if (enc == ENC_UNKNOWN) return NULL; 44 | 45 | for (module = modules; module->name != 0; module++) { 46 | if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; 47 | } 48 | if (module->name == NULL) return NULL; 49 | 50 | stemmer->create = module->create; 51 | stemmer->close = module->close; 52 | stemmer->stem = module->stem; 53 | 54 | stemmer->env = stemmer->create(); 55 | if (stemmer->env == NULL) 56 | { 57 | sb_stemmer_delete(stemmer); 58 | return NULL; 59 | } 60 | 61 | return stemmer; 62 | } 63 | 64 | void 65 | sb_stemmer_delete(struct sb_stemmer * stemmer) 66 | { 67 | if (stemmer == 0) return; 68 | if (stemmer->close == 0) return; 69 | stemmer->close(stemmer->env); 70 | stemmer->close = 0; 71 | free(stemmer); 72 | } 73 | 74 | const sb_symbol * 75 | sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) 76 | { 77 | int ret; 78 | if (SN_set_current(stemmer->env, size, (const symbol *)(word))) 79 | { 80 | stemmer->env->l = 0; 81 | return NULL; 82 | } 83 | ret = stemmer->stem(stemmer->env); 84 | if (ret < 0) return NULL; 85 | stemmer->env->p[stemmer->env->l] = 0; 86 | return (const sb_symbol *)(stemmer->env->p); 87 | } 88 | 89 | int 90 | sb_stemmer_length(struct sb_stemmer * stemmer) 91 | { 92 | return stemmer->env->l; 93 | } 94 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules.h: -------------------------------------------------------------------------------- 1 | /* libstemmer/modules.h: List of stemming modules. 2 | * 3 | * This file is generated by mkmodules.pl from a list of module names. 4 | * Do not edit manually. 5 | * 6 | * Modules included by this file are: latin, danish, dutch, english, finnish, french, 7 | * german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | * russian, spanish, swedish, turkish 9 | */ 10 | 11 | #include "../src_c/stem_ISO_8859_1_latin.h" 12 | #include "../src_c/stem_UTF_8_latin.h" 13 | #include "../src_c/stem_ISO_8859_1_danish.h" 14 | #include "../src_c/stem_UTF_8_danish.h" 15 | #include "../src_c/stem_ISO_8859_1_dutch.h" 16 | #include "../src_c/stem_UTF_8_dutch.h" 17 | #include "../src_c/stem_ISO_8859_1_english.h" 18 | #include "../src_c/stem_UTF_8_english.h" 19 | #include "../src_c/stem_ISO_8859_1_finnish.h" 20 | #include "../src_c/stem_UTF_8_finnish.h" 21 | #include "../src_c/stem_ISO_8859_1_french.h" 22 | #include "../src_c/stem_UTF_8_french.h" 23 | #include "../src_c/stem_ISO_8859_1_german.h" 24 | #include "../src_c/stem_UTF_8_german.h" 25 | #include "../src_c/stem_ISO_8859_1_hungarian.h" 26 | #include "../src_c/stem_UTF_8_hungarian.h" 27 | #include "../src_c/stem_ISO_8859_1_italian.h" 28 | #include "../src_c/stem_UTF_8_italian.h" 29 | #include "../src_c/stem_ISO_8859_1_norwegian.h" 30 | #include "../src_c/stem_UTF_8_norwegian.h" 31 | #include "../src_c/stem_ISO_8859_1_porter.h" 32 | #include "../src_c/stem_UTF_8_porter.h" 33 | #include "../src_c/stem_ISO_8859_1_portuguese.h" 34 | #include "../src_c/stem_UTF_8_portuguese.h" 35 | #include "../src_c/stem_ISO_8859_2_romanian.h" 36 | #include "../src_c/stem_UTF_8_romanian.h" 37 | #include "../src_c/stem_KOI8_R_russian.h" 38 | #include "../src_c/stem_UTF_8_russian.h" 39 | #include "../src_c/stem_ISO_8859_1_spanish.h" 40 | #include "../src_c/stem_UTF_8_spanish.h" 41 | #include "../src_c/stem_ISO_8859_1_swedish.h" 42 | #include "../src_c/stem_UTF_8_swedish.h" 43 | #include "../src_c/stem_UTF_8_turkish.h" 44 | #include "../src_c/stem_UTF_8_lithuanian.h" 45 | 46 | typedef enum { 47 | ENC_UNKNOWN=0, 48 | ENC_ISO_8859_1, 49 | ENC_ISO_8859_2, 50 | ENC_KOI8_R, 51 | ENC_UTF_8 52 | } stemmer_encoding_t; 53 | 54 | struct stemmer_encoding { 55 | const char * name; 56 | stemmer_encoding_t enc; 57 | }; 58 | static struct stemmer_encoding encodings[] = { 59 | {"ISO_8859_1", ENC_ISO_8859_1}, 60 | {"ISO_8859_2", ENC_ISO_8859_2}, 61 | {"KOI8_R", ENC_KOI8_R}, 62 | {"UTF_8", ENC_UTF_8}, 63 | {0,ENC_UNKNOWN} 64 | }; 65 | 66 | struct stemmer_modules { 67 | const char * name; 68 | stemmer_encoding_t enc; 69 | struct SN_env * (*create)(void); 70 | void (*close)(struct SN_env *); 71 | int (*stem)(struct SN_env *); 72 | }; 73 | static struct stemmer_modules modules[] = { 74 | {"latin", ENC_ISO_8859_1, latin_ISO_8859_1_create_env, latin_ISO_8859_1_close_env, latin_ISO_8859_1_stem}, 75 | {"latin", ENC_UTF_8, latin_UTF_8_create_env, latin_UTF_8_close_env, latin_UTF_8_stem}, 76 | {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, 77 | {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 78 | {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, 79 | {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 80 | {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, 81 | {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 82 | {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 83 | {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 84 | {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 85 | {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 86 | {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 87 | {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 88 | {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 89 | {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 90 | {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, 91 | {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 92 | {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, 93 | {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 94 | {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, 95 | {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 96 | {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 97 | {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 98 | {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 99 | {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 100 | {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, 101 | {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 102 | {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, 103 | {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 104 | {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, 105 | {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 106 | {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 107 | {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 108 | {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 109 | {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 110 | {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 111 | {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 112 | {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, 113 | {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 114 | {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 115 | {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 116 | {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, 117 | {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 118 | {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, 119 | {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 120 | {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, 121 | {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 122 | {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, 123 | {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 124 | {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, 125 | {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 126 | {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, 127 | {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 128 | {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, 129 | {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 130 | {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 131 | {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 132 | {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, 133 | {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 134 | {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, 135 | {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 136 | {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, 137 | {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 138 | {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, 139 | {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 140 | {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, 141 | {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 142 | {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, 143 | {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, 144 | {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, 145 | {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 146 | {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, 147 | {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 148 | {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 149 | {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 150 | {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 151 | {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 152 | {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 153 | {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 154 | {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, 155 | {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 156 | {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, 157 | {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 158 | {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, 159 | {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 160 | {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, 161 | {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 162 | {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 163 | {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 164 | {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, 165 | {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 166 | {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, 167 | {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 168 | {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, 169 | {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 170 | {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, 171 | {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 172 | {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 173 | {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 174 | {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 175 | {"lt", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 176 | {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 177 | {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 178 | {0,ENC_UNKNOWN,0,0,0} 179 | }; 180 | static const char * algorithm_names[] = { 181 | "latin", 182 | "danish", 183 | "dutch", 184 | "english", 185 | "finnish", 186 | "french", 187 | "german", 188 | "hungarian", 189 | "italian", 190 | "norwegian", 191 | "porter", 192 | "portuguese", 193 | "romanian", 194 | "russian", 195 | "spanish", 196 | "swedish", 197 | "turkish", 198 | "lithuanian", 199 | 0 200 | }; 201 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules.txt: -------------------------------------------------------------------------------- 1 | # This file contains a list of stemmers to include in the distribution. 2 | # The format is a set of space separated lines - on each line: 3 | # First item is name of stemmer. 4 | # Second item is comma separated list of character sets. 5 | # Third item is comma separated list of names to refer to the stemmer by. 6 | # 7 | # Lines starting with a #, or blank lines, are ignored. 8 | 9 | # List all the main algorithms for each language, in UTF-8, and also with 10 | # the most commonly used encoding. 11 | 12 | latin UTF_8,ISO_8859_1 latin 13 | danish UTF_8,ISO_8859_1 danish,da,dan 14 | dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld 15 | english UTF_8,ISO_8859_1 english,en,eng 16 | finnish UTF_8,ISO_8859_1 finnish,fi,fin 17 | french UTF_8,ISO_8859_1 french,fr,fre,fra 18 | german UTF_8,ISO_8859_1 german,de,ger,deu 19 | hungarian UTF_8,ISO_8859_1 hungarian,hu,hun 20 | italian UTF_8,ISO_8859_1 italian,it,ita 21 | norwegian UTF_8,ISO_8859_1 norwegian,no,nor 22 | portuguese UTF_8,ISO_8859_1 portuguese,pt,por 23 | romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron 24 | russian UTF_8,KOI8_R russian,ru,rus 25 | spanish UTF_8,ISO_8859_1 spanish,es,esl,spa 26 | swedish UTF_8,ISO_8859_1 swedish,sv,swe 27 | turkish UTF_8 turkish,tr,tur 28 | 29 | # Also include the traditional porter algorithm for english. 30 | # The porter algorithm is included in the libstemmer distribution to assist 31 | # with backwards compatibility, but for new systems the english algorithm 32 | # should be used in preference. 33 | porter UTF_8,ISO_8859_1 porter 34 | 35 | # Some other stemmers in the snowball project are not included in the standard 36 | # distribution. To compile a libstemmer with them in, add them to this list, 37 | # and regenerate the distribution. (You will need a full source checkout for 38 | # this.) They are included in the snowball website as curiosities, but are not 39 | # intended for general use, and use of them is is not fully supported. These 40 | # algorithms are: 41 | # 42 | # german2 - This is a slight modification of the german stemmer. 43 | #german2 UTF_8,ISO_8859_1 german2 44 | # 45 | # kraaij_pohlmann - This is a different dutch stemmer. 46 | #kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann 47 | # 48 | # lovins - This is an english stemmer, but fairly outdated, and 49 | # only really applicable to a restricted type of input text 50 | # (keywords in academic publications). 51 | #lovins UTF_8,ISO_8859_1 lovins 52 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules_utf8.h: -------------------------------------------------------------------------------- 1 | /* libstemmer/modules_utf8.h: List of stemming modules. 2 | * 3 | * This file is generated by mkmodules.pl from a list of module names. 4 | * Do not edit manually. 5 | * 6 | * Modules included by this file are: latin, danish, dutch, english, finnish, french, 7 | * german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | * russian, spanish, swedish, turkish 9 | */ 10 | 11 | #include "../src_c/stem_UTF_8_latin.h" 12 | #include "../src_c/stem_UTF_8_danish.h" 13 | #include "../src_c/stem_UTF_8_dutch.h" 14 | #include "../src_c/stem_UTF_8_english.h" 15 | #include "../src_c/stem_UTF_8_finnish.h" 16 | #include "../src_c/stem_UTF_8_french.h" 17 | #include "../src_c/stem_UTF_8_german.h" 18 | #include "../src_c/stem_UTF_8_hungarian.h" 19 | #include "../src_c/stem_UTF_8_italian.h" 20 | #include "../src_c/stem_UTF_8_norwegian.h" 21 | #include "../src_c/stem_UTF_8_porter.h" 22 | #include "../src_c/stem_UTF_8_portuguese.h" 23 | #include "../src_c/stem_UTF_8_romanian.h" 24 | #include "../src_c/stem_UTF_8_russian.h" 25 | #include "../src_c/stem_UTF_8_spanish.h" 26 | #include "../src_c/stem_UTF_8_swedish.h" 27 | #include "../src_c/stem_UTF_8_turkish.h" 28 | #include "../src_c/stem_UTF_8_lithuanian.h" 29 | 30 | typedef enum { 31 | ENC_UNKNOWN=0, 32 | ENC_UTF_8 33 | } stemmer_encoding_t; 34 | 35 | struct stemmer_encoding { 36 | const char * name; 37 | stemmer_encoding_t enc; 38 | }; 39 | static struct stemmer_encoding encodings[] = { 40 | {"UTF_8", ENC_UTF_8}, 41 | {0,ENC_UNKNOWN} 42 | }; 43 | 44 | struct stemmer_modules { 45 | const char * name; 46 | stemmer_encoding_t enc; 47 | struct SN_env * (*create)(void); 48 | void (*close)(struct SN_env *); 49 | int (*stem)(struct SN_env *); 50 | }; 51 | static struct stemmer_modules modules[] = { 52 | {"latin", ENC_UTF_8, latin_UTF_8_create_env, latin_UTF_8_close_env, latin_UTF_8_stem}, 53 | {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 54 | {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 55 | {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, 56 | {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 57 | {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 58 | {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 59 | {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 60 | {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 61 | {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 62 | {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, 63 | {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 64 | {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 65 | {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 66 | {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 67 | {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, 68 | {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 69 | {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 70 | {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 71 | {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, 72 | {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 73 | {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, 74 | {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 75 | {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 76 | {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, 77 | {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 78 | {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 79 | {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, 80 | {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 81 | {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, 82 | {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 83 | {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 84 | {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, 85 | {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 86 | {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, 87 | {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 88 | {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, 89 | {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 90 | {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 91 | {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 92 | {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 93 | {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, 94 | {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 95 | {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, 96 | {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 97 | {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, 98 | {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 99 | {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 100 | {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, 101 | {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 102 | {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 103 | {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, 104 | {"lt", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 105 | {"lit", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 106 | {"lithuanian", ENC_UTF_8, lithuanian_UTF_8_create_env, lithuanian_UTF_8_close_env, lithuanian_UTF_8_stem}, 107 | {0,ENC_UNKNOWN,0,0,0} 108 | }; 109 | static const char * algorithm_names[] = { 110 | "danish", 111 | "dutch", 112 | "english", 113 | "finnish", 114 | "french", 115 | "german", 116 | "hungarian", 117 | "italian", 118 | "norwegian", 119 | "porter", 120 | "portuguese", 121 | "romanian", 122 | "russian", 123 | "spanish", 124 | "swedish", 125 | "turkish", 126 | "lithuanian", 127 | 0 128 | }; 129 | -------------------------------------------------------------------------------- /libstemmer_c/libstemmer/modules_utf8.txt: -------------------------------------------------------------------------------- 1 | # This file contains a list of stemmers to include in the distribution. 2 | # The format is a set of space separated lines - on each line: 3 | # First item is name of stemmer. 4 | # Second item is comma separated list of character sets. 5 | # Third item is comma separated list of names to refer to the stemmer by. 6 | # 7 | # Lines starting with a #, or blank lines, are ignored. 8 | 9 | # List all the main algorithms for each language, in UTF-8. 10 | 11 | latin UTF_8 latin 12 | danish UTF_8 danish,da,dan 13 | dutch UTF_8 dutch,nl,dut,nld 14 | english UTF_8 english,en,eng 15 | finnish UTF_8 finnish,fi,fin 16 | french UTF_8 french,fr,fre,fra 17 | german UTF_8 german,de,ger,deu 18 | hungarian UTF_8 hungarian,hu,hun 19 | italian UTF_8 italian,it,ita 20 | norwegian UTF_8 norwegian,no,nor 21 | portuguese UTF_8 portuguese,pt,por 22 | romanian UTF_8 romanian,ro,rum,ron 23 | russian UTF_8 russian,ru,rus 24 | spanish UTF_8 spanish,es,esl,spa 25 | swedish UTF_8 swedish,sv,swe 26 | turkish UTF_8 turkish,tr,tur 27 | lithuanian UTF_8 lithuanian,lt,lit 28 | 29 | # Also include the traditional porter algorithm for english. 30 | # The porter algorithm is included in the libstemmer distribution to assist 31 | # with backwards compatibility, but for new systems the english algorithm 32 | # should be used in preference. 33 | porter UTF_8 porter 34 | 35 | # Some other stemmers in the snowball project are not included in the standard 36 | # distribution. To compile a libstemmer with them in, add them to this list, 37 | # and regenerate the distribution. (You will need a full source checkout for 38 | # this.) They are included in the snowball website as curiosities, but are not 39 | # intended for general use, and use of them is is not fully supported. These 40 | # algorithms are: 41 | # 42 | # german2 - This is a slight modification of the german stemmer. 43 | #german2 UTF_8 german2 44 | # 45 | # kraaij_pohlmann - This is a different dutch stemmer. 46 | #kraaij_pohlmann UTF_8 kraaij_pohlmann 47 | # 48 | # lovins - This is an english stemmer, but fairly outdated, and 49 | # only really applicable to a restricted type of input text 50 | # (keywords in academic publications). 51 | #lovins UTF_8 lovins 52 | -------------------------------------------------------------------------------- /libstemmer_c/mkinc.mak: -------------------------------------------------------------------------------- 1 | # libstemmer/mkinc.mak: List of stemming module source files 2 | # 3 | # This file is generated by mkmodules.pl from a list of module names. 4 | # Do not edit manually. 5 | # 6 | # Modules included by this file are: latin, danish, dutch, english, finnish, french, 7 | # german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | # russian, spanish, swedish, turkish 9 | 10 | snowball_sources= \ 11 | src_c/stem_ISO_8859_1_latin.c \ 12 | src_c/stem_UTF_8_latin.c \ 13 | src_c/stem_ISO_8859_1_danish.c \ 14 | src_c/stem_UTF_8_danish.c \ 15 | src_c/stem_ISO_8859_1_dutch.c \ 16 | src_c/stem_UTF_8_dutch.c \ 17 | src_c/stem_ISO_8859_1_english.c \ 18 | src_c/stem_UTF_8_english.c \ 19 | src_c/stem_ISO_8859_1_finnish.c \ 20 | src_c/stem_UTF_8_finnish.c \ 21 | src_c/stem_ISO_8859_1_french.c \ 22 | src_c/stem_UTF_8_french.c \ 23 | src_c/stem_ISO_8859_1_german.c \ 24 | src_c/stem_UTF_8_german.c \ 25 | src_c/stem_ISO_8859_1_hungarian.c \ 26 | src_c/stem_UTF_8_hungarian.c \ 27 | src_c/stem_ISO_8859_1_italian.c \ 28 | src_c/stem_UTF_8_italian.c \ 29 | src_c/stem_ISO_8859_1_norwegian.c \ 30 | src_c/stem_UTF_8_norwegian.c \ 31 | src_c/stem_ISO_8859_1_porter.c \ 32 | src_c/stem_UTF_8_porter.c \ 33 | src_c/stem_ISO_8859_1_portuguese.c \ 34 | src_c/stem_UTF_8_portuguese.c \ 35 | src_c/stem_ISO_8859_2_romanian.c \ 36 | src_c/stem_UTF_8_romanian.c \ 37 | src_c/stem_KOI8_R_russian.c \ 38 | src_c/stem_UTF_8_russian.c \ 39 | src_c/stem_ISO_8859_1_spanish.c \ 40 | src_c/stem_UTF_8_spanish.c \ 41 | src_c/stem_ISO_8859_1_swedish.c \ 42 | src_c/stem_UTF_8_swedish.c \ 43 | src_c/stem_UTF_8_turkish.c \ 44 | src_c/stem_UTF_8_lithuanian.c \ 45 | runtime/api.c \ 46 | runtime/utilities.c \ 47 | libstemmer/libstemmer.c 48 | 49 | snowball_headers= \ 50 | src_c/stem_ISO_8859_1_latin.h \ 51 | src_c/stem_UTF_8_latin.h \ 52 | src_c/stem_ISO_8859_1_danish.h \ 53 | src_c/stem_UTF_8_danish.h \ 54 | src_c/stem_ISO_8859_1_dutch.h \ 55 | src_c/stem_UTF_8_dutch.h \ 56 | src_c/stem_ISO_8859_1_english.h \ 57 | src_c/stem_UTF_8_english.h \ 58 | src_c/stem_ISO_8859_1_finnish.h \ 59 | src_c/stem_UTF_8_finnish.h \ 60 | src_c/stem_ISO_8859_1_french.h \ 61 | src_c/stem_UTF_8_french.h \ 62 | src_c/stem_ISO_8859_1_german.h \ 63 | src_c/stem_UTF_8_german.h \ 64 | src_c/stem_ISO_8859_1_hungarian.h \ 65 | src_c/stem_UTF_8_hungarian.h \ 66 | src_c/stem_ISO_8859_1_italian.h \ 67 | src_c/stem_UTF_8_italian.h \ 68 | src_c/stem_ISO_8859_1_norwegian.h \ 69 | src_c/stem_UTF_8_norwegian.h \ 70 | src_c/stem_ISO_8859_1_porter.h \ 71 | src_c/stem_UTF_8_porter.h \ 72 | src_c/stem_ISO_8859_1_portuguese.h \ 73 | src_c/stem_UTF_8_portuguese.h \ 74 | src_c/stem_ISO_8859_2_romanian.h \ 75 | src_c/stem_UTF_8_romanian.h \ 76 | src_c/stem_KOI8_R_russian.h \ 77 | src_c/stem_UTF_8_russian.h \ 78 | src_c/stem_ISO_8859_1_spanish.h \ 79 | src_c/stem_UTF_8_spanish.h \ 80 | src_c/stem_ISO_8859_1_swedish.h \ 81 | src_c/stem_UTF_8_swedish.h \ 82 | src_c/stem_UTF_8_turkish.h \ 83 | src_c/stem_UTF_8_lithuanian.h \ 84 | include/libstemmer.h \ 85 | libstemmer/modules.h \ 86 | runtime/api.h \ 87 | runtime/header.h 88 | 89 | -------------------------------------------------------------------------------- /libstemmer_c/mkinc_utf8.mak: -------------------------------------------------------------------------------- 1 | # libstemmer/mkinc_utf8.mak: List of stemming module source files 2 | # 3 | # This file is generated by mkmodules.pl from a list of module names. 4 | # Do not edit manually. 5 | # 6 | # Modules included by this file are: danish, dutch, english, finnish, french, 7 | # german, hungarian, italian, norwegian, porter, portuguese, romanian, 8 | # russian, spanish, swedish, turkish 9 | 10 | snowball_sources= \ 11 | src_c/stem_UTF_8_latin.c \ 12 | src_c/stem_UTF_8_danish.c \ 13 | src_c/stem_UTF_8_dutch.c \ 14 | src_c/stem_UTF_8_english.c \ 15 | src_c/stem_UTF_8_finnish.c \ 16 | src_c/stem_UTF_8_french.c \ 17 | src_c/stem_UTF_8_german.c \ 18 | src_c/stem_UTF_8_hungarian.c \ 19 | src_c/stem_UTF_8_italian.c \ 20 | src_c/stem_UTF_8_norwegian.c \ 21 | src_c/stem_UTF_8_porter.c \ 22 | src_c/stem_UTF_8_portuguese.c \ 23 | src_c/stem_UTF_8_romanian.c \ 24 | src_c/stem_UTF_8_russian.c \ 25 | src_c/stem_UTF_8_spanish.c \ 26 | src_c/stem_UTF_8_swedish.c \ 27 | src_c/stem_UTF_8_turkish.c \ 28 | src_c/stem_UTF_8_lithuanian.c \ 29 | runtime/api.c \ 30 | runtime/utilities.c \ 31 | libstemmer/libstemmer_utf8.c 32 | 33 | snowball_headers= \ 34 | src_c/stem_UTF_8_latin.h \ 35 | src_c/stem_UTF_8_danish.h \ 36 | src_c/stem_UTF_8_dutch.h \ 37 | src_c/stem_UTF_8_english.h \ 38 | src_c/stem_UTF_8_finnish.h \ 39 | src_c/stem_UTF_8_french.h \ 40 | src_c/stem_UTF_8_german.h \ 41 | src_c/stem_UTF_8_hungarian.h \ 42 | src_c/stem_UTF_8_italian.h \ 43 | src_c/stem_UTF_8_norwegian.h \ 44 | src_c/stem_UTF_8_porter.h \ 45 | src_c/stem_UTF_8_portuguese.h \ 46 | src_c/stem_UTF_8_romanian.h \ 47 | src_c/stem_UTF_8_russian.h \ 48 | src_c/stem_UTF_8_spanish.h \ 49 | src_c/stem_UTF_8_swedish.h \ 50 | src_c/stem_UTF_8_turkish.h \ 51 | src_c/stem_UTF_8_lithuanian.h \ 52 | include/libstemmer.h \ 53 | libstemmer/modules_utf8.h \ 54 | runtime/api.h \ 55 | runtime/header.h 56 | 57 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/api.c: -------------------------------------------------------------------------------- 1 | 2 | #include /* for calloc, free */ 3 | #include "header.h" 4 | 5 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) 6 | { 7 | struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); 8 | if (z == NULL) return NULL; 9 | z->p = create_s(); 10 | if (z->p == NULL) goto error; 11 | if (S_size) 12 | { 13 | int i; 14 | z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); 15 | if (z->S == NULL) goto error; 16 | 17 | for (i = 0; i < S_size; i++) 18 | { 19 | z->S[i] = create_s(); 20 | if (z->S[i] == NULL) goto error; 21 | } 22 | } 23 | 24 | if (I_size) 25 | { 26 | z->I = (int *) calloc(I_size, sizeof(int)); 27 | if (z->I == NULL) goto error; 28 | } 29 | 30 | if (B_size) 31 | { 32 | z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); 33 | if (z->B == NULL) goto error; 34 | } 35 | 36 | return z; 37 | error: 38 | SN_close_env(z, S_size); 39 | return NULL; 40 | } 41 | 42 | extern void SN_close_env(struct SN_env * z, int S_size) 43 | { 44 | if (z == NULL) return; 45 | if (S_size) 46 | { 47 | int i; 48 | for (i = 0; i < S_size; i++) 49 | { 50 | lose_s(z->S[i]); 51 | } 52 | free(z->S); 53 | } 54 | free(z->I); 55 | free(z->B); 56 | if (z->p) lose_s(z->p); 57 | free(z); 58 | } 59 | 60 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s) 61 | { 62 | int err = replace_s(z, 0, z->l, size, s, NULL); 63 | z->c = 0; 64 | return err; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/api.h: -------------------------------------------------------------------------------- 1 | 2 | typedef unsigned char symbol; 3 | 4 | /* Or replace 'char' above with 'short' for 16 bit characters. 5 | 6 | More precisely, replace 'char' with whatever type guarantees the 7 | character width you need. Note however that sizeof(symbol) should divide 8 | HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise 9 | there is an alignment problem. In the unlikely event of a problem here, 10 | consult Martin Porter. 11 | 12 | */ 13 | 14 | struct SN_env { 15 | symbol * p; 16 | int c; int l; int lb; int bra; int ket; 17 | symbol * * S; 18 | int * I; 19 | unsigned char * B; 20 | }; 21 | 22 | extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); 23 | extern void SN_close_env(struct SN_env * z, int S_size); 24 | 25 | extern int SN_set_current(struct SN_env * z, int size, const symbol * s); 26 | 27 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/header.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include "api.h" 5 | 6 | #define MAXINT INT_MAX 7 | #define MININT INT_MIN 8 | 9 | #define HEAD 2*sizeof(int) 10 | 11 | #define SIZE(p) ((int *)(p))[-1] 12 | #define SET_SIZE(p, n) ((int *)(p))[-1] = n 13 | #define CAPACITY(p) ((int *)(p))[-2] 14 | 15 | struct among 16 | { int s_size; /* number of chars in string */ 17 | const symbol * s; /* search string */ 18 | int substring_i;/* index to longest matching substring */ 19 | int result; /* result of the lookup */ 20 | int (* function)(struct SN_env *); 21 | }; 22 | 23 | extern symbol * create_s(void); 24 | extern void lose_s(symbol * p); 25 | 26 | extern int skip_utf8(const symbol * p, int c, int lb, int l, int n); 27 | 28 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 29 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 30 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 31 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 32 | 33 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 34 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 35 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 36 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); 37 | 38 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s); 39 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s); 40 | extern int eq_v(struct SN_env * z, const symbol * p); 41 | extern int eq_v_b(struct SN_env * z, const symbol * p); 42 | 43 | extern int find_among(struct SN_env * z, const struct among * v, int v_size); 44 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size); 45 | 46 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment); 47 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s); 48 | extern int slice_from_v(struct SN_env * z, const symbol * p); 49 | extern int slice_del(struct SN_env * z); 50 | 51 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s); 52 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p); 53 | 54 | extern symbol * slice_to(struct SN_env * z, symbol * p); 55 | extern symbol * assign_to(struct SN_env * z, symbol * p); 56 | 57 | extern void debug(struct SN_env * z, int number, int line_count); 58 | 59 | -------------------------------------------------------------------------------- /libstemmer_c/runtime/utilities.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "header.h" 7 | 8 | #define unless(C) if(!(C)) 9 | 10 | #define CREATE_SIZE 1 11 | 12 | extern symbol * create_s(void) { 13 | symbol * p; 14 | void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); 15 | if (mem == NULL) return NULL; 16 | p = (symbol *) (HEAD + (char *) mem); 17 | CAPACITY(p) = CREATE_SIZE; 18 | SET_SIZE(p, CREATE_SIZE); 19 | return p; 20 | } 21 | 22 | extern void lose_s(symbol * p) { 23 | if (p == NULL) return; 24 | free((char *) p - HEAD); 25 | } 26 | 27 | /* 28 | new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c 29 | if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new 30 | position, or 0 on failure. 31 | 32 | -- used to implement hop and next in the utf8 case. 33 | */ 34 | 35 | extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { 36 | int b; 37 | if (n >= 0) { 38 | for (; n > 0; n--) { 39 | if (c >= l) return -1; 40 | b = p[c++]; 41 | if (b >= 0xC0) { /* 1100 0000 */ 42 | while (c < l) { 43 | b = p[c]; 44 | if (b >= 0xC0 || b < 0x80) break; 45 | /* break unless b is 10------ */ 46 | c++; 47 | } 48 | } 49 | } 50 | } else { 51 | for (; n < 0; n++) { 52 | if (c <= lb) return -1; 53 | b = p[--c]; 54 | if (b >= 0x80) { /* 1000 0000 */ 55 | while (c > lb) { 56 | b = p[c]; 57 | if (b >= 0xC0) break; /* 1100 0000 */ 58 | c--; 59 | } 60 | } 61 | } 62 | } 63 | return c; 64 | } 65 | 66 | /* Code for character groupings: utf8 cases */ 67 | 68 | static int get_utf8(const symbol * p, int c, int l, int * slot) { 69 | int b0, b1; 70 | if (c >= l) return 0; 71 | b0 = p[c++]; 72 | if (b0 < 0xC0 || c == l) { /* 1100 0000 */ 73 | * slot = b0; return 1; 74 | } 75 | b1 = p[c++]; 76 | if (b0 < 0xE0 || c == l) { /* 1110 0000 */ 77 | * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2; 78 | } 79 | * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3; 80 | } 81 | 82 | static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { 83 | int b0, b1; 84 | if (c <= lb) return 0; 85 | b0 = p[--c]; 86 | if (b0 < 0x80 || c == lb) { /* 1000 0000 */ 87 | * slot = b0; return 1; 88 | } 89 | b1 = p[--c]; 90 | if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ 91 | * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; 92 | } 93 | * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; 94 | } 95 | 96 | extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 97 | do { 98 | int ch; 99 | int w = get_utf8(z->p, z->c, z->l, & ch); 100 | unless (w) return -1; 101 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 102 | return w; 103 | z->c += w; 104 | } while (repeat); 105 | return 0; 106 | } 107 | 108 | extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 109 | do { 110 | int ch; 111 | int w = get_b_utf8(z->p, z->c, z->lb, & ch); 112 | unless (w) return -1; 113 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 114 | return w; 115 | z->c -= w; 116 | } while (repeat); 117 | return 0; 118 | } 119 | 120 | extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 121 | do { 122 | int ch; 123 | int w = get_utf8(z->p, z->c, z->l, & ch); 124 | unless (w) return -1; 125 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 126 | return w; 127 | z->c += w; 128 | } while (repeat); 129 | return 0; 130 | } 131 | 132 | extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 133 | do { 134 | int ch; 135 | int w = get_b_utf8(z->p, z->c, z->lb, & ch); 136 | unless (w) return -1; 137 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 138 | return w; 139 | z->c -= w; 140 | } while (repeat); 141 | return 0; 142 | } 143 | 144 | /* Code for character groupings: non-utf8 cases */ 145 | 146 | extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 147 | do { 148 | int ch; 149 | if (z->c >= z->l) return -1; 150 | ch = z->p[z->c]; 151 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 152 | return 1; 153 | z->c++; 154 | } while (repeat); 155 | return 0; 156 | } 157 | 158 | extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 159 | do { 160 | int ch; 161 | if (z->c <= z->lb) return -1; 162 | ch = z->p[z->c - 1]; 163 | if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 164 | return 1; 165 | z->c--; 166 | } while (repeat); 167 | return 0; 168 | } 169 | 170 | extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 171 | do { 172 | int ch; 173 | if (z->c >= z->l) return -1; 174 | ch = z->p[z->c]; 175 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 176 | return 1; 177 | z->c++; 178 | } while (repeat); 179 | return 0; 180 | } 181 | 182 | extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { 183 | do { 184 | int ch; 185 | if (z->c <= z->lb) return -1; 186 | ch = z->p[z->c - 1]; 187 | unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) 188 | return 1; 189 | z->c--; 190 | } while (repeat); 191 | return 0; 192 | } 193 | 194 | extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { 195 | if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; 196 | z->c += s_size; return 1; 197 | } 198 | 199 | extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { 200 | if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; 201 | z->c -= s_size; return 1; 202 | } 203 | 204 | extern int eq_v(struct SN_env * z, const symbol * p) { 205 | return eq_s(z, SIZE(p), p); 206 | } 207 | 208 | extern int eq_v_b(struct SN_env * z, const symbol * p) { 209 | return eq_s_b(z, SIZE(p), p); 210 | } 211 | 212 | extern int find_among(struct SN_env * z, const struct among * v, int v_size) { 213 | 214 | int i = 0; 215 | int j = v_size; 216 | 217 | int c = z->c; int l = z->l; 218 | symbol * q = z->p + c; 219 | 220 | const struct among * w; 221 | 222 | int common_i = 0; 223 | int common_j = 0; 224 | 225 | int first_key_inspected = 0; 226 | 227 | while(1) { 228 | int k = i + ((j - i) >> 1); 229 | int diff = 0; 230 | int common = common_i < common_j ? common_i : common_j; /* smaller */ 231 | w = v + k; 232 | { 233 | int i2; for (i2 = common; i2 < w->s_size; i2++) { 234 | if (c + common == l) { diff = -1; break; } 235 | diff = q[common] - w->s[i2]; 236 | if (diff != 0) break; 237 | common++; 238 | } 239 | } 240 | if (diff < 0) { j = k; common_j = common; } 241 | else { i = k; common_i = common; } 242 | if (j - i <= 1) { 243 | if (i > 0) break; /* v->s has been inspected */ 244 | if (j == i) break; /* only one item in v */ 245 | 246 | /* - but now we need to go round once more to get 247 | v->s inspected. This looks messy, but is actually 248 | the optimal approach. */ 249 | 250 | if (first_key_inspected) break; 251 | first_key_inspected = 1; 252 | } 253 | } 254 | while(1) { 255 | w = v + i; 256 | if (common_i >= w->s_size) { 257 | z->c = c + w->s_size; 258 | if (w->function == 0) return w->result; 259 | { 260 | int res = w->function(z); 261 | z->c = c + w->s_size; 262 | if (res) return w->result; 263 | } 264 | } 265 | i = w->substring_i; 266 | if (i < 0) return 0; 267 | } 268 | } 269 | 270 | /* find_among_b is for backwards processing. Same comments apply */ 271 | 272 | extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { 273 | 274 | int i = 0; 275 | int j = v_size; 276 | 277 | int c = z->c; int lb = z->lb; 278 | symbol * q = z->p + c - 1; 279 | 280 | const struct among * w; 281 | 282 | int common_i = 0; 283 | int common_j = 0; 284 | 285 | int first_key_inspected = 0; 286 | 287 | while(1) { 288 | int k = i + ((j - i) >> 1); 289 | int diff = 0; 290 | int common = common_i < common_j ? common_i : common_j; 291 | w = v + k; 292 | { 293 | int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { 294 | if (c - common == lb) { diff = -1; break; } 295 | diff = q[- common] - w->s[i2]; 296 | if (diff != 0) break; 297 | common++; 298 | } 299 | } 300 | if (diff < 0) { j = k; common_j = common; } 301 | else { i = k; common_i = common; } 302 | if (j - i <= 1) { 303 | if (i > 0) break; 304 | if (j == i) break; 305 | if (first_key_inspected) break; 306 | first_key_inspected = 1; 307 | } 308 | } 309 | while(1) { 310 | w = v + i; 311 | if (common_i >= w->s_size) { 312 | z->c = c - w->s_size; 313 | if (w->function == 0) return w->result; 314 | { 315 | int res = w->function(z); 316 | z->c = c - w->s_size; 317 | if (res) return w->result; 318 | } 319 | } 320 | i = w->substring_i; 321 | if (i < 0) return 0; 322 | } 323 | } 324 | 325 | 326 | /* Increase the size of the buffer pointed to by p to at least n symbols. 327 | * If insufficient memory, returns NULL and frees the old buffer. 328 | */ 329 | static symbol * increase_size(symbol * p, int n) { 330 | symbol * q; 331 | int new_size = n + 20; 332 | void * mem = realloc((char *) p - HEAD, 333 | HEAD + (new_size + 1) * sizeof(symbol)); 334 | if (mem == NULL) { 335 | lose_s(p); 336 | return NULL; 337 | } 338 | q = (symbol *) (HEAD + (char *)mem); 339 | CAPACITY(q) = new_size; 340 | return q; 341 | } 342 | 343 | /* to replace symbols between c_bra and c_ket in z->p by the 344 | s_size symbols at s. 345 | Returns 0 on success, -1 on error. 346 | Also, frees z->p (and sets it to NULL) on error. 347 | */ 348 | extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) 349 | { 350 | int adjustment; 351 | int len; 352 | if (z->p == NULL) { 353 | z->p = create_s(); 354 | if (z->p == NULL) return -1; 355 | } 356 | adjustment = s_size - (c_ket - c_bra); 357 | len = SIZE(z->p); 358 | if (adjustment != 0) { 359 | if (adjustment + len > CAPACITY(z->p)) { 360 | z->p = increase_size(z->p, adjustment + len); 361 | if (z->p == NULL) return -1; 362 | } 363 | memmove(z->p + c_ket + adjustment, 364 | z->p + c_ket, 365 | (len - c_ket) * sizeof(symbol)); 366 | SET_SIZE(z->p, adjustment + len); 367 | z->l += adjustment; 368 | if (z->c >= c_ket) 369 | z->c += adjustment; 370 | else 371 | if (z->c > c_bra) 372 | z->c = c_bra; 373 | } 374 | unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); 375 | if (adjptr != NULL) 376 | *adjptr = adjustment; 377 | return 0; 378 | } 379 | 380 | static int slice_check(struct SN_env * z) { 381 | 382 | if (z->bra < 0 || 383 | z->bra > z->ket || 384 | z->ket > z->l || 385 | z->p == NULL || 386 | z->l > SIZE(z->p)) /* this line could be removed */ 387 | { 388 | #if 0 389 | fprintf(stderr, "faulty slice operation:\n"); 390 | debug(z, -1, 0); 391 | #endif 392 | return -1; 393 | } 394 | return 0; 395 | } 396 | 397 | extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { 398 | if (slice_check(z)) return -1; 399 | return replace_s(z, z->bra, z->ket, s_size, s, NULL); 400 | } 401 | 402 | extern int slice_from_v(struct SN_env * z, const symbol * p) { 403 | return slice_from_s(z, SIZE(p), p); 404 | } 405 | 406 | extern int slice_del(struct SN_env * z) { 407 | return slice_from_s(z, 0, 0); 408 | } 409 | 410 | extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { 411 | int adjustment; 412 | if (replace_s(z, bra, ket, s_size, s, &adjustment)) 413 | return -1; 414 | if (bra <= z->bra) z->bra += adjustment; 415 | if (bra <= z->ket) z->ket += adjustment; 416 | return 0; 417 | } 418 | 419 | extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { 420 | int adjustment; 421 | if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) 422 | return -1; 423 | if (bra <= z->bra) z->bra += adjustment; 424 | if (bra <= z->ket) z->ket += adjustment; 425 | return 0; 426 | } 427 | 428 | extern symbol * slice_to(struct SN_env * z, symbol * p) { 429 | if (slice_check(z)) { 430 | lose_s(p); 431 | return NULL; 432 | } 433 | { 434 | int len = z->ket - z->bra; 435 | if (CAPACITY(p) < len) { 436 | p = increase_size(p, len); 437 | if (p == NULL) 438 | return NULL; 439 | } 440 | memmove(p, z->p + z->bra, len * sizeof(symbol)); 441 | SET_SIZE(p, len); 442 | } 443 | return p; 444 | } 445 | 446 | extern symbol * assign_to(struct SN_env * z, symbol * p) { 447 | int len = z->l; 448 | if (CAPACITY(p) < len) { 449 | p = increase_size(p, len); 450 | if (p == NULL) 451 | return NULL; 452 | } 453 | memmove(p, z->p, len * sizeof(symbol)); 454 | SET_SIZE(p, len); 455 | return p; 456 | } 457 | 458 | #if 0 459 | extern void debug(struct SN_env * z, int number, int line_count) { 460 | int i; 461 | int limit = SIZE(z->p); 462 | /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ 463 | if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); 464 | for (i = 0; i <= limit; i++) { 465 | if (z->lb == i) printf("{"); 466 | if (z->bra == i) printf("["); 467 | if (z->c == i) printf("|"); 468 | if (z->ket == i) printf("]"); 469 | if (z->l == i) printf("}"); 470 | if (i < limit) 471 | { int ch = z->p[i]; 472 | if (ch == 0) ch = '#'; 473 | printf("%c", ch); 474 | } 475 | } 476 | printf("'\n"); 477 | } 478 | #endif 479 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_danish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_undouble(struct SN_env * z); 14 | static int r_other_suffix(struct SN_env * z); 15 | static int r_consonant_pair(struct SN_env * z); 16 | static int r_main_suffix(struct SN_env * z); 17 | static int r_mark_regions(struct SN_env * z); 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | 23 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 24 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 25 | 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | static const symbol s_0_0[3] = { 'h', 'e', 'd' }; 31 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; 32 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; 33 | static const symbol s_0_3[1] = { 'e' }; 34 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; 35 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; 36 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; 37 | static const symbol s_0_7[3] = { 'e', 'n', 'e' }; 38 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; 39 | static const symbol s_0_9[3] = { 'e', 'r', 'e' }; 40 | static const symbol s_0_10[2] = { 'e', 'n' }; 41 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; 42 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; 43 | static const symbol s_0_13[2] = { 'e', 'r' }; 44 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; 45 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; 46 | static const symbol s_0_16[1] = { 's' }; 47 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; 48 | static const symbol s_0_18[2] = { 'e', 's' }; 49 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; 50 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; 51 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; 52 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; 53 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; 54 | static const symbol s_0_24[3] = { 'e', 'n', 's' }; 55 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; 56 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; 57 | static const symbol s_0_27[3] = { 'e', 'r', 's' }; 58 | static const symbol s_0_28[3] = { 'e', 't', 's' }; 59 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; 60 | static const symbol s_0_30[2] = { 'e', 't' }; 61 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; 62 | 63 | static const struct among a_0[32] = 64 | { 65 | /* 0 */ { 3, s_0_0, -1, 1, 0}, 66 | /* 1 */ { 5, s_0_1, 0, 1, 0}, 67 | /* 2 */ { 4, s_0_2, -1, 1, 0}, 68 | /* 3 */ { 1, s_0_3, -1, 1, 0}, 69 | /* 4 */ { 5, s_0_4, 3, 1, 0}, 70 | /* 5 */ { 4, s_0_5, 3, 1, 0}, 71 | /* 6 */ { 6, s_0_6, 5, 1, 0}, 72 | /* 7 */ { 3, s_0_7, 3, 1, 0}, 73 | /* 8 */ { 4, s_0_8, 3, 1, 0}, 74 | /* 9 */ { 3, s_0_9, 3, 1, 0}, 75 | /* 10 */ { 2, s_0_10, -1, 1, 0}, 76 | /* 11 */ { 5, s_0_11, 10, 1, 0}, 77 | /* 12 */ { 4, s_0_12, 10, 1, 0}, 78 | /* 13 */ { 2, s_0_13, -1, 1, 0}, 79 | /* 14 */ { 5, s_0_14, 13, 1, 0}, 80 | /* 15 */ { 4, s_0_15, 13, 1, 0}, 81 | /* 16 */ { 1, s_0_16, -1, 2, 0}, 82 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 83 | /* 18 */ { 2, s_0_18, 16, 1, 0}, 84 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 85 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 86 | /* 21 */ { 4, s_0_21, 18, 1, 0}, 87 | /* 22 */ { 5, s_0_22, 18, 1, 0}, 88 | /* 23 */ { 4, s_0_23, 18, 1, 0}, 89 | /* 24 */ { 3, s_0_24, 16, 1, 0}, 90 | /* 25 */ { 6, s_0_25, 24, 1, 0}, 91 | /* 26 */ { 5, s_0_26, 24, 1, 0}, 92 | /* 27 */ { 3, s_0_27, 16, 1, 0}, 93 | /* 28 */ { 3, s_0_28, 16, 1, 0}, 94 | /* 29 */ { 5, s_0_29, 28, 1, 0}, 95 | /* 30 */ { 2, s_0_30, -1, 1, 0}, 96 | /* 31 */ { 4, s_0_31, 30, 1, 0} 97 | }; 98 | 99 | static const symbol s_1_0[2] = { 'g', 'd' }; 100 | static const symbol s_1_1[2] = { 'd', 't' }; 101 | static const symbol s_1_2[2] = { 'g', 't' }; 102 | static const symbol s_1_3[2] = { 'k', 't' }; 103 | 104 | static const struct among a_1[4] = 105 | { 106 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 107 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 108 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 109 | /* 3 */ { 2, s_1_3, -1, -1, 0} 110 | }; 111 | 112 | static const symbol s_2_0[2] = { 'i', 'g' }; 113 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 114 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; 115 | static const symbol s_2_3[3] = { 'e', 'l', 's' }; 116 | static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' }; 117 | 118 | static const struct among a_2[5] = 119 | { 120 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 121 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 122 | /* 2 */ { 4, s_2_2, 1, 1, 0}, 123 | /* 3 */ { 3, s_2_3, -1, 1, 0}, 124 | /* 4 */ { 4, s_2_4, -1, 2, 0} 125 | }; 126 | 127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 128 | 129 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; 130 | 131 | static const symbol s_0[] = { 's', 't' }; 132 | static const symbol s_1[] = { 'i', 'g' }; 133 | static const symbol s_2[] = { 'l', 0xF8, 's' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 33 */ 138 | { int ret = z->c + 3; 139 | if (0 > ret || ret > z->l) return 0; 140 | z->c = ret; /* hop, line 33 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 33 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ 146 | { /* gopast */ /* non v, line 34 */ 147 | int ret = in_grouping(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 34 */ 152 | /* try, line 35 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 41 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 41 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 41 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 41 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 48 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; 183 | { int ret = slice_del(z); /* delete, line 50 */ 184 | if (ret < 0) return ret; 185 | } 186 | break; 187 | } 188 | return 1; 189 | } 190 | 191 | static int r_consonant_pair(struct SN_env * z) { 192 | { int m_test = z->l - z->c; /* test, line 55 */ 193 | { int mlimit; /* setlimit, line 56 */ 194 | int m1 = z->l - z->c; (void)m1; 195 | if (z->c < z->I[0]) return 0; 196 | z->c = z->I[0]; /* tomark, line 56 */ 197 | mlimit = z->lb; z->lb = z->c; 198 | z->c = z->l - m1; 199 | z->ket = z->c; /* [, line 56 */ 200 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } 201 | if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ 202 | z->bra = z->c; /* ], line 56 */ 203 | z->lb = mlimit; 204 | } 205 | z->c = z->l - m_test; 206 | } 207 | if (z->c <= z->lb) return 0; 208 | z->c--; /* next, line 62 */ 209 | z->bra = z->c; /* ], line 62 */ 210 | { int ret = slice_del(z); /* delete, line 62 */ 211 | if (ret < 0) return ret; 212 | } 213 | return 1; 214 | } 215 | 216 | static int r_other_suffix(struct SN_env * z) { 217 | int among_var; 218 | { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ 219 | z->ket = z->c; /* [, line 66 */ 220 | if (!(eq_s_b(z, 2, s_0))) goto lab0; 221 | z->bra = z->c; /* ], line 66 */ 222 | if (!(eq_s_b(z, 2, s_1))) goto lab0; 223 | { int ret = slice_del(z); /* delete, line 66 */ 224 | if (ret < 0) return ret; 225 | } 226 | lab0: 227 | z->c = z->l - m1; 228 | } 229 | { int mlimit; /* setlimit, line 67 */ 230 | int m2 = z->l - z->c; (void)m2; 231 | if (z->c < z->I[0]) return 0; 232 | z->c = z->I[0]; /* tomark, line 67 */ 233 | mlimit = z->lb; z->lb = z->c; 234 | z->c = z->l - m2; 235 | z->ket = z->c; /* [, line 67 */ 236 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 237 | among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ 238 | if (!(among_var)) { z->lb = mlimit; return 0; } 239 | z->bra = z->c; /* ], line 67 */ 240 | z->lb = mlimit; 241 | } 242 | switch(among_var) { 243 | case 0: return 0; 244 | case 1: 245 | { int ret = slice_del(z); /* delete, line 70 */ 246 | if (ret < 0) return ret; 247 | } 248 | { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ 249 | { int ret = r_consonant_pair(z); 250 | if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ 251 | if (ret < 0) return ret; 252 | } 253 | lab1: 254 | z->c = z->l - m3; 255 | } 256 | break; 257 | case 2: 258 | { int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */ 259 | if (ret < 0) return ret; 260 | } 261 | break; 262 | } 263 | return 1; 264 | } 265 | 266 | static int r_undouble(struct SN_env * z) { 267 | { int mlimit; /* setlimit, line 76 */ 268 | int m1 = z->l - z->c; (void)m1; 269 | if (z->c < z->I[0]) return 0; 270 | z->c = z->I[0]; /* tomark, line 76 */ 271 | mlimit = z->lb; z->lb = z->c; 272 | z->c = z->l - m1; 273 | z->ket = z->c; /* [, line 76 */ 274 | if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } 275 | z->bra = z->c; /* ], line 76 */ 276 | z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ 277 | if (z->S[0] == 0) return -1; /* -> ch, line 76 */ 278 | z->lb = mlimit; 279 | } 280 | if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ 281 | { int ret = slice_del(z); /* delete, line 78 */ 282 | if (ret < 0) return ret; 283 | } 284 | return 1; 285 | } 286 | 287 | extern int danish_ISO_8859_1_stem(struct SN_env * z) { 288 | { int c1 = z->c; /* do, line 84 */ 289 | { int ret = r_mark_regions(z); 290 | if (ret == 0) goto lab0; /* call mark_regions, line 84 */ 291 | if (ret < 0) return ret; 292 | } 293 | lab0: 294 | z->c = c1; 295 | } 296 | z->lb = z->c; z->c = z->l; /* backwards, line 85 */ 297 | 298 | { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ 299 | { int ret = r_main_suffix(z); 300 | if (ret == 0) goto lab1; /* call main_suffix, line 86 */ 301 | if (ret < 0) return ret; 302 | } 303 | lab1: 304 | z->c = z->l - m2; 305 | } 306 | { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ 307 | { int ret = r_consonant_pair(z); 308 | if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ 309 | if (ret < 0) return ret; 310 | } 311 | lab2: 312 | z->c = z->l - m3; 313 | } 314 | { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ 315 | { int ret = r_other_suffix(z); 316 | if (ret == 0) goto lab3; /* call other_suffix, line 88 */ 317 | if (ret < 0) return ret; 318 | } 319 | lab3: 320 | z->c = z->l - m4; 321 | } 322 | { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ 323 | { int ret = r_undouble(z); 324 | if (ret == 0) goto lab4; /* call undouble, line 89 */ 325 | if (ret < 0) return ret; 326 | } 327 | lab4: 328 | z->c = z->l - m5; 329 | } 330 | z->c = z->lb; 331 | return 1; 332 | } 333 | 334 | extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); } 335 | 336 | extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } 337 | 338 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_danish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * danish_ISO_8859_1_create_env(void); 9 | extern void danish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int danish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_dutch.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * dutch_ISO_8859_1_create_env(void); 9 | extern void dutch_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int dutch_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_english.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * english_ISO_8859_1_create_env(void); 9 | extern void english_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int english_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_finnish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * finnish_ISO_8859_1_create_env(void); 9 | extern void finnish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int finnish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_french.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * french_ISO_8859_1_create_env(void); 9 | extern void french_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int french_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_german.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * german_ISO_8859_1_create_env(void); 9 | extern void german_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int german_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * hungarian_ISO_8859_1_create_env(void); 9 | extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int hungarian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_italian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * italian_ISO_8859_1_create_env(void); 9 | extern void italian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int italian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_latin.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int latin_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_que_word(struct SN_env * z); 14 | static int r_map_letters(struct SN_env * z); 15 | #ifdef __cplusplus 16 | extern "C" { 17 | #endif 18 | 19 | 20 | extern struct SN_env * latin_ISO_8859_1_create_env(void); 21 | extern void latin_ISO_8859_1_close_env(struct SN_env * z); 22 | 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | static const symbol s_0_0[3] = { 'i', 't', 'a' }; 28 | static const symbol s_0_1[3] = { 'q', 'u', 'a' }; 29 | static const symbol s_0_2[4] = { 'a', 'd', 'a', 'e' }; 30 | static const symbol s_0_3[5] = { 'p', 'e', 'r', 'a', 'e' }; 31 | static const symbol s_0_4[4] = { 'q', 'u', 'a', 'e' }; 32 | static const symbol s_0_5[2] = { 'd', 'e' }; 33 | static const symbol s_0_6[2] = { 'n', 'e' }; 34 | static const symbol s_0_7[6] = { 'u', 't', 'r', 'i', 'b', 'i' }; 35 | static const symbol s_0_8[3] = { 'u', 'b', 'i' }; 36 | static const symbol s_0_9[4] = { 'u', 'n', 'd', 'i' }; 37 | static const symbol s_0_10[4] = { 'o', 'b', 'l', 'i' }; 38 | static const symbol s_0_11[4] = { 'd', 'e', 'n', 'i' }; 39 | static const symbol s_0_12[3] = { 'u', 't', 'i' }; 40 | static const symbol s_0_13[3] = { 'c', 'u', 'i' }; 41 | static const symbol s_0_14[3] = { 'q', 'u', 'i' }; 42 | static const symbol s_0_15[4] = { 'q', 'u', 'a', 'm' }; 43 | static const symbol s_0_16[4] = { 'q', 'u', 'e', 'm' }; 44 | static const symbol s_0_17[6] = { 'q', 'u', 'a', 'r', 'u', 'm' }; 45 | static const symbol s_0_18[6] = { 'q', 'u', 'o', 'r', 'u', 'm' }; 46 | static const symbol s_0_19[2] = { 'c', 'o' }; 47 | static const symbol s_0_20[4] = { 'd', 'e', 'c', 'o' }; 48 | static const symbol s_0_21[4] = { 'r', 'e', 'c', 'o' }; 49 | static const symbol s_0_22[4] = { 'i', 'n', 'c', 'o' }; 50 | static const symbol s_0_23[5] = { 'c', 'o', 'n', 'c', 'o' }; 51 | static const symbol s_0_24[4] = { 'e', 'x', 'c', 'o' }; 52 | static const symbol s_0_25[6] = { 'q', 'u', 'a', 'n', 'd', 'o' }; 53 | static const symbol s_0_26[4] = { 'u', 't', 'r', 'o' }; 54 | static const symbol s_0_27[3] = { 'q', 'u', 'o' }; 55 | static const symbol s_0_28[4] = { 'u', 't', 'e', 'r' }; 56 | static const symbol s_0_29[3] = { 't', 'o', 'r' }; 57 | static const symbol s_0_30[5] = { 'o', 'b', 't', 'o', 'r' }; 58 | static const symbol s_0_31[7] = { 'p', 'r', 'a', 'e', 't', 'o', 'r' }; 59 | static const symbol s_0_32[5] = { 'd', 'e', 't', 'o', 'r' }; 60 | static const symbol s_0_33[5] = { 'r', 'e', 't', 'o', 'r' }; 61 | static const symbol s_0_34[5] = { 'i', 'n', 't', 'o', 'r' }; 62 | static const symbol s_0_35[6] = { 'c', 'o', 'n', 't', 'o', 'r' }; 63 | static const symbol s_0_36[5] = { 'o', 'p', 't', 'o', 'r' }; 64 | static const symbol s_0_37[5] = { 'a', 't', 't', 'o', 'r' }; 65 | static const symbol s_0_38[5] = { 'e', 'x', 't', 'o', 'r' }; 66 | static const symbol s_0_39[4] = { 'q', 'u', 'a', 's' }; 67 | static const symbol s_0_40[3] = { 'a', 'b', 's' }; 68 | static const symbol s_0_41[6] = { 'p', 'l', 'e', 'n', 'i', 's' }; 69 | static const symbol s_0_42[4] = { 'q', 'u', 'i', 's' }; 70 | static const symbol s_0_43[10] = { 'q', 'u', 'o', 't', 'u', 's', 'q', 'u', 'i', 's' }; 71 | static const symbol s_0_44[4] = { 'q', 'u', 'o', 's' }; 72 | static const symbol s_0_45[3] = { 'a', 'p', 's' }; 73 | static const symbol s_0_46[2] = { 'u', 's' }; 74 | static const symbol s_0_47[4] = { 'a', 'b', 'u', 's' }; 75 | static const symbol s_0_48[6] = { 'q', 'u', 'i', 'b', 'u', 's' }; 76 | static const symbol s_0_49[4] = { 'a', 'd', 'u', 's' }; 77 | static const symbol s_0_50[5] = { 'c', 'u', 'i', 'u', 's' }; 78 | static const symbol s_0_51[5] = { 'q', 'u', 'o', 'u', 's' }; 79 | static const symbol s_0_52[3] = { 's', 'u', 's' }; 80 | static const symbol s_0_53[2] = { 'a', 't' }; 81 | 82 | static const struct among a_0[54] = 83 | { 84 | /* 0 */ { 3, s_0_0, -1, -1, 0}, 85 | /* 1 */ { 3, s_0_1, -1, -1, 0}, 86 | /* 2 */ { 4, s_0_2, -1, -1, 0}, 87 | /* 3 */ { 5, s_0_3, -1, -1, 0}, 88 | /* 4 */ { 4, s_0_4, -1, -1, 0}, 89 | /* 5 */ { 2, s_0_5, -1, -1, 0}, 90 | /* 6 */ { 2, s_0_6, -1, -1, 0}, 91 | /* 7 */ { 6, s_0_7, -1, -1, 0}, 92 | /* 8 */ { 3, s_0_8, -1, -1, 0}, 93 | /* 9 */ { 4, s_0_9, -1, -1, 0}, 94 | /* 10 */ { 4, s_0_10, -1, -1, 0}, 95 | /* 11 */ { 4, s_0_11, -1, -1, 0}, 96 | /* 12 */ { 3, s_0_12, -1, -1, 0}, 97 | /* 13 */ { 3, s_0_13, -1, -1, 0}, 98 | /* 14 */ { 3, s_0_14, -1, -1, 0}, 99 | /* 15 */ { 4, s_0_15, -1, -1, 0}, 100 | /* 16 */ { 4, s_0_16, -1, -1, 0}, 101 | /* 17 */ { 6, s_0_17, -1, -1, 0}, 102 | /* 18 */ { 6, s_0_18, -1, -1, 0}, 103 | /* 19 */ { 2, s_0_19, -1, -1, 0}, 104 | /* 20 */ { 4, s_0_20, 19, -1, 0}, 105 | /* 21 */ { 4, s_0_21, 19, -1, 0}, 106 | /* 22 */ { 4, s_0_22, 19, -1, 0}, 107 | /* 23 */ { 5, s_0_23, 19, -1, 0}, 108 | /* 24 */ { 4, s_0_24, 19, -1, 0}, 109 | /* 25 */ { 6, s_0_25, -1, -1, 0}, 110 | /* 26 */ { 4, s_0_26, -1, -1, 0}, 111 | /* 27 */ { 3, s_0_27, -1, -1, 0}, 112 | /* 28 */ { 4, s_0_28, -1, -1, 0}, 113 | /* 29 */ { 3, s_0_29, -1, -1, 0}, 114 | /* 30 */ { 5, s_0_30, 29, -1, 0}, 115 | /* 31 */ { 7, s_0_31, 29, -1, 0}, 116 | /* 32 */ { 5, s_0_32, 29, -1, 0}, 117 | /* 33 */ { 5, s_0_33, 29, -1, 0}, 118 | /* 34 */ { 5, s_0_34, 29, -1, 0}, 119 | /* 35 */ { 6, s_0_35, 29, -1, 0}, 120 | /* 36 */ { 5, s_0_36, 29, -1, 0}, 121 | /* 37 */ { 5, s_0_37, 29, -1, 0}, 122 | /* 38 */ { 5, s_0_38, 29, -1, 0}, 123 | /* 39 */ { 4, s_0_39, -1, -1, 0}, 124 | /* 40 */ { 3, s_0_40, -1, -1, 0}, 125 | /* 41 */ { 6, s_0_41, -1, -1, 0}, 126 | /* 42 */ { 4, s_0_42, -1, -1, 0}, 127 | /* 43 */ { 10, s_0_43, 42, -1, 0}, 128 | /* 44 */ { 4, s_0_44, -1, -1, 0}, 129 | /* 45 */ { 3, s_0_45, -1, -1, 0}, 130 | /* 46 */ { 2, s_0_46, -1, -1, 0}, 131 | /* 47 */ { 4, s_0_47, 46, -1, 0}, 132 | /* 48 */ { 6, s_0_48, 46, -1, 0}, 133 | /* 49 */ { 4, s_0_49, 46, -1, 0}, 134 | /* 50 */ { 5, s_0_50, 46, -1, 0}, 135 | /* 51 */ { 5, s_0_51, 46, -1, 0}, 136 | /* 52 */ { 3, s_0_52, 46, -1, 0}, 137 | /* 53 */ { 2, s_0_53, -1, -1, 0} 138 | }; 139 | 140 | static const symbol s_1_0[1] = { 'a' }; 141 | static const symbol s_1_1[2] = { 'i', 'a' }; 142 | static const symbol s_1_2[2] = { 'u', 'd' }; 143 | static const symbol s_1_3[1] = { 'e' }; 144 | static const symbol s_1_4[2] = { 'a', 'e' }; 145 | static const symbol s_1_5[1] = { 'i' }; 146 | static const symbol s_1_6[2] = { 'a', 'm' }; 147 | static const symbol s_1_7[2] = { 'e', 'm' }; 148 | static const symbol s_1_8[2] = { 'u', 'm' }; 149 | static const symbol s_1_9[1] = { 'o' }; 150 | static const symbol s_1_10[2] = { 'a', 's' }; 151 | static const symbol s_1_11[2] = { 'e', 's' }; 152 | static const symbol s_1_12[2] = { 'i', 's' }; 153 | static const symbol s_1_13[2] = { 'o', 's' }; 154 | static const symbol s_1_14[2] = { 'u', 's' }; 155 | static const symbol s_1_15[4] = { 'i', 'b', 'u', 's' }; 156 | static const symbol s_1_16[3] = { 'i', 'u', 's' }; 157 | static const symbol s_1_17[2] = { 'n', 't' }; 158 | static const symbol s_1_18[1] = { 'u' }; 159 | 160 | static const struct among a_1[19] = 161 | { 162 | /* 0 */ { 1, s_1_0, -1, 1, 0}, 163 | /* 1 */ { 2, s_1_1, 0, 1, 0}, 164 | /* 2 */ { 2, s_1_2, -1, 1, 0}, 165 | /* 3 */ { 1, s_1_3, -1, 1, 0}, 166 | /* 4 */ { 2, s_1_4, 3, 1, 0}, 167 | /* 5 */ { 1, s_1_5, -1, 1, 0}, 168 | /* 6 */ { 2, s_1_6, -1, 1, 0}, 169 | /* 7 */ { 2, s_1_7, -1, 1, 0}, 170 | /* 8 */ { 2, s_1_8, -1, 1, 0}, 171 | /* 9 */ { 1, s_1_9, -1, 1, 0}, 172 | /* 10 */ { 2, s_1_10, -1, 1, 0}, 173 | /* 11 */ { 2, s_1_11, -1, 1, 0}, 174 | /* 12 */ { 2, s_1_12, -1, 1, 0}, 175 | /* 13 */ { 2, s_1_13, -1, 1, 0}, 176 | /* 14 */ { 2, s_1_14, -1, 1, 0}, 177 | /* 15 */ { 4, s_1_15, 14, 1, 0}, 178 | /* 16 */ { 3, s_1_16, 14, 1, 0}, 179 | /* 17 */ { 2, s_1_17, -1, 1, 0}, 180 | /* 18 */ { 1, s_1_18, -1, 1, 0} 181 | }; 182 | 183 | static const symbol s_2_0[4] = { 'm', 'i', 'n', 'i' }; 184 | static const symbol s_2_1[2] = { 'r', 'i' }; 185 | static const symbol s_2_2[3] = { 's', 't', 'i' }; 186 | static const symbol s_2_3[1] = { 'm' }; 187 | static const symbol s_2_4[2] = { 'b', 'o' }; 188 | static const symbol s_2_5[3] = { 'e', 'r', 'o' }; 189 | static const symbol s_2_6[1] = { 'r' }; 190 | static const symbol s_2_7[3] = { 'b', 'o', 'r' }; 191 | static const symbol s_2_8[3] = { 'm', 'u', 'r' }; 192 | static const symbol s_2_9[3] = { 't', 'u', 'r' }; 193 | static const symbol s_2_10[4] = { 'n', 't', 'u', 'r' }; 194 | static const symbol s_2_11[5] = { 'u', 'n', 't', 'u', 'r' }; 195 | static const symbol s_2_12[6] = { 'i', 'u', 'n', 't', 'u', 'r' }; 196 | static const symbol s_2_13[1] = { 's' }; 197 | static const symbol s_2_14[3] = { 'r', 'i', 's' }; 198 | static const symbol s_2_15[5] = { 'b', 'e', 'r', 'i', 's' }; 199 | static const symbol s_2_16[3] = { 't', 'i', 's' }; 200 | static const symbol s_2_17[4] = { 's', 't', 'i', 's' }; 201 | static const symbol s_2_18[2] = { 'n', 's' }; 202 | static const symbol s_2_19[3] = { 'm', 'u', 's' }; 203 | static const symbol s_2_20[1] = { 't' }; 204 | static const symbol s_2_21[2] = { 'n', 't' }; 205 | static const symbol s_2_22[3] = { 'u', 'n', 't' }; 206 | static const symbol s_2_23[4] = { 'i', 'u', 'n', 't' }; 207 | static const symbol s_2_24[5] = { 'e', 'r', 'u', 'n', 't' }; 208 | 209 | static const struct among a_2[25] = 210 | { 211 | /* 0 */ { 4, s_2_0, -1, 4, 0}, 212 | /* 1 */ { 2, s_2_1, -1, 4, 0}, 213 | /* 2 */ { 3, s_2_2, -1, 4, 0}, 214 | /* 3 */ { 1, s_2_3, -1, 4, 0}, 215 | /* 4 */ { 2, s_2_4, -1, 2, 0}, 216 | /* 5 */ { 3, s_2_5, -1, 3, 0}, 217 | /* 6 */ { 1, s_2_6, -1, 4, 0}, 218 | /* 7 */ { 3, s_2_7, 6, 2, 0}, 219 | /* 8 */ { 3, s_2_8, 6, 4, 0}, 220 | /* 9 */ { 3, s_2_9, 6, 4, 0}, 221 | /* 10 */ { 4, s_2_10, 9, 4, 0}, 222 | /* 11 */ { 5, s_2_11, 10, 1, 0}, 223 | /* 12 */ { 6, s_2_12, 11, 1, 0}, 224 | /* 13 */ { 1, s_2_13, -1, 4, 0}, 225 | /* 14 */ { 3, s_2_14, 13, 4, 0}, 226 | /* 15 */ { 5, s_2_15, 14, 2, 0}, 227 | /* 16 */ { 3, s_2_16, 13, 4, 0}, 228 | /* 17 */ { 4, s_2_17, 16, 4, 0}, 229 | /* 18 */ { 2, s_2_18, 13, 4, 0}, 230 | /* 19 */ { 3, s_2_19, 13, 4, 0}, 231 | /* 20 */ { 1, s_2_20, -1, 4, 0}, 232 | /* 21 */ { 2, s_2_21, 20, 4, 0}, 233 | /* 22 */ { 3, s_2_22, 21, 1, 0}, 234 | /* 23 */ { 4, s_2_23, 22, 1, 0}, 235 | /* 24 */ { 5, s_2_24, 22, 1, 0} 236 | }; 237 | 238 | static const symbol s_0[] = { 'j' }; 239 | static const symbol s_1[] = { 'i' }; 240 | static const symbol s_2[] = { 'v' }; 241 | static const symbol s_3[] = { 'u' }; 242 | static const symbol s_4[] = { 'q', 'u', 'e' }; 243 | static const symbol s_5[] = { 'i' }; 244 | static const symbol s_6[] = { 'b', 'i' }; 245 | static const symbol s_7[] = { 'e', 'r', 'i' }; 246 | 247 | static int r_map_letters(struct SN_env * z) { 248 | { int c1 = z->c; /* do, line 14 */ 249 | while(1) { /* repeat, line 14 */ 250 | int c2 = z->c; 251 | while(1) { /* goto, line 14 */ 252 | int c3 = z->c; 253 | z->bra = z->c; /* [, line 14 */ 254 | if (!(eq_s(z, 1, s_0))) goto lab2; 255 | z->ket = z->c; /* ], line 14 */ 256 | z->c = c3; 257 | break; 258 | lab2: 259 | z->c = c3; 260 | if (z->c >= z->l) goto lab1; 261 | z->c++; /* goto, line 14 */ 262 | } 263 | { int ret = slice_from_s(z, 1, s_1); /* <-, line 14 */ 264 | if (ret < 0) return ret; 265 | } 266 | continue; 267 | lab1: 268 | z->c = c2; 269 | break; 270 | } 271 | z->c = c1; 272 | } 273 | { int c4 = z->c; /* do, line 15 */ 274 | while(1) { /* repeat, line 15 */ 275 | int c5 = z->c; 276 | while(1) { /* goto, line 15 */ 277 | int c6 = z->c; 278 | z->bra = z->c; /* [, line 15 */ 279 | if (!(eq_s(z, 1, s_2))) goto lab5; 280 | z->ket = z->c; /* ], line 15 */ 281 | z->c = c6; 282 | break; 283 | lab5: 284 | z->c = c6; 285 | if (z->c >= z->l) goto lab4; 286 | z->c++; /* goto, line 15 */ 287 | } 288 | { int ret = slice_from_s(z, 1, s_3); /* <-, line 15 */ 289 | if (ret < 0) return ret; 290 | } 291 | continue; 292 | lab4: 293 | z->c = c5; 294 | break; 295 | } 296 | z->c = c4; 297 | } 298 | return 1; 299 | } 300 | 301 | static int r_que_word(struct SN_env * z) { 302 | z->ket = z->c; /* [, line 22 */ 303 | if (!(eq_s_b(z, 3, s_4))) return 0; 304 | z->bra = z->c; /* ], line 22 */ 305 | { int m1 = z->l - z->c; (void)m1; /* or, line 35 */ 306 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876514 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1; 307 | if (!(find_among_b(z, a_0, 54))) goto lab1; /* among, line 23 */ 308 | if (z->c > z->lb) goto lab1; /* atlimit, line 32 */ 309 | z->bra = z->c; /* ], line 32 */ 310 | z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 33 */ 311 | if (z->S[0] == 0) return -1; /* => noun_form, line 33 */ 312 | z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 34 */ 313 | if (z->S[1] == 0) return -1; /* => verb_form, line 34 */ 314 | goto lab0; 315 | lab1: 316 | z->c = z->l - m1; 317 | { int ret = slice_del(z); /* delete, line 35 */ 318 | if (ret < 0) return ret; 319 | } 320 | return 0; /* fail, line 35 */ 321 | } 322 | lab0: 323 | return 1; 324 | } 325 | 326 | extern int latin_ISO_8859_1_stem(struct SN_env * z) { 327 | int among_var; 328 | { int ret = r_map_letters(z); 329 | if (ret == 0) return 0; /* call map_letters, line 41 */ 330 | if (ret < 0) return ret; 331 | } 332 | z->lb = z->c; z->c = z->l; /* backwards, line 43 */ 333 | 334 | { int m1 = z->l - z->c; (void)m1; /* or, line 44 */ 335 | { int ret = r_que_word(z); 336 | if (ret == 0) goto lab1; /* call que_word, line 44 */ 337 | if (ret < 0) return ret; 338 | } 339 | goto lab0; 340 | lab1: 341 | z->c = z->l - m1; 342 | z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 45 */ 343 | if (z->S[0] == 0) return -1; /* => noun_form, line 45 */ 344 | z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 46 */ 345 | if (z->S[1] == 0) return -1; /* => verb_form, line 46 */ 346 | { struct SN_env env = * z; /* $ noun_form, line 48 */ 347 | int failure = 1; /* assume failure */ 348 | z->p = z->S[0]; 349 | z->lb = z->c = 0; 350 | z->l = SIZE(z->p); 351 | z->lb = z->c; z->c = z->l; /* backwards, line 48 */ 352 | 353 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 48 */ 354 | z->ket = z->c; /* [, line 49 */ 355 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3711538 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } 356 | among_var = find_among_b(z, a_1, 19); /* substring, line 49 */ 357 | if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } 358 | z->bra = z->c; /* ], line 49 */ 359 | { int ret = z->c - 2; 360 | if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab3; } 361 | z->c = ret; /* hop, line 49 */ 362 | } 363 | switch(among_var) { 364 | case 0: { z->c = z->l - m_keep; goto lab3; } 365 | case 1: 366 | { int ret = slice_del(z); /* delete, line 53 */ 367 | if (ret < 0) return ret; 368 | } 369 | break; 370 | } 371 | lab3: 372 | ; 373 | } 374 | z->c = z->lb; 375 | failure = 0; /* mark success */ 376 | z->S[0] = z->p; 377 | * z = env; 378 | if (failure) return 0; 379 | } 380 | { struct SN_env env = * z; /* $ verb_form, line 57 */ 381 | int failure = 1; /* assume failure */ 382 | z->p = z->S[1]; 383 | z->lb = z->c = 0; 384 | z->l = SIZE(z->p); 385 | z->lb = z->c; z->c = z->l; /* backwards, line 57 */ 386 | 387 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 57 */ 388 | z->ket = z->c; /* [, line 58 */ 389 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876480 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab5; } 390 | among_var = find_among_b(z, a_2, 25); /* substring, line 58 */ 391 | if (!(among_var)) { z->c = z->l - m_keep; goto lab5; } 392 | z->bra = z->c; /* ], line 58 */ 393 | { int ret = z->c - 2; 394 | if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab5; } 395 | z->c = ret; /* hop, line 58 */ 396 | } 397 | switch(among_var) { 398 | case 0: { z->c = z->l - m_keep; goto lab5; } 399 | case 1: 400 | { int ret = slice_from_s(z, 1, s_5); /* <-, line 61 */ 401 | if (ret < 0) return ret; 402 | } 403 | break; 404 | case 2: 405 | { int ret = slice_from_s(z, 2, s_6); /* <-, line 63 */ 406 | if (ret < 0) return ret; 407 | } 408 | break; 409 | case 3: 410 | { int ret = slice_from_s(z, 3, s_7); /* <-, line 65 */ 411 | if (ret < 0) return ret; 412 | } 413 | break; 414 | case 4: 415 | { int ret = slice_del(z); /* delete, line 68 */ 416 | if (ret < 0) return ret; 417 | } 418 | break; 419 | } 420 | lab5: 421 | ; 422 | } 423 | z->c = z->lb; 424 | failure = 0; /* mark success */ 425 | z->S[1] = z->p; 426 | * z = env; 427 | if (failure) return 0; 428 | } 429 | } 430 | lab0: 431 | z->c = z->lb; 432 | { int c_keep = z->c; 433 | int ret = insert_v(z, z->c, z->l, z->S[0]); /* = noun_form, line 74 */ 434 | z->c = c_keep; 435 | if (ret < 0) return ret; 436 | } 437 | return 1; 438 | } 439 | 440 | extern struct SN_env * latin_ISO_8859_1_create_env(void) { return SN_create_env(2, 0, 0); } 441 | 442 | extern void latin_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 2); } 443 | 444 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_latin.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * latin_ISO_8859_1_create_env(void); 9 | extern void latin_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int latin_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 23 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[1] = { 'e' }; 31 | static const symbol s_0_2[3] = { 'e', 'd', 'e' }; 32 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; 33 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; 34 | static const symbol s_0_5[3] = { 'a', 'n', 'e' }; 35 | static const symbol s_0_6[3] = { 'e', 'n', 'e' }; 36 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; 37 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; 38 | static const symbol s_0_9[2] = { 'e', 'n' }; 39 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; 40 | static const symbol s_0_11[2] = { 'a', 'r' }; 41 | static const symbol s_0_12[2] = { 'e', 'r' }; 42 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; 43 | static const symbol s_0_14[1] = { 's' }; 44 | static const symbol s_0_15[2] = { 'a', 's' }; 45 | static const symbol s_0_16[2] = { 'e', 's' }; 46 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; 47 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; 48 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; 49 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; 50 | static const symbol s_0_21[3] = { 'e', 'n', 's' }; 51 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 52 | static const symbol s_0_23[3] = { 'e', 'r', 's' }; 53 | static const symbol s_0_24[3] = { 'e', 't', 's' }; 54 | static const symbol s_0_25[2] = { 'e', 't' }; 55 | static const symbol s_0_26[3] = { 'h', 'e', 't' }; 56 | static const symbol s_0_27[3] = { 'e', 'r', 't' }; 57 | static const symbol s_0_28[3] = { 'a', 's', 't' }; 58 | 59 | static const struct among a_0[29] = 60 | { 61 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 62 | /* 1 */ { 1, s_0_1, -1, 1, 0}, 63 | /* 2 */ { 3, s_0_2, 1, 1, 0}, 64 | /* 3 */ { 4, s_0_3, 1, 1, 0}, 65 | /* 4 */ { 4, s_0_4, 1, 1, 0}, 66 | /* 5 */ { 3, s_0_5, 1, 1, 0}, 67 | /* 6 */ { 3, s_0_6, 1, 1, 0}, 68 | /* 7 */ { 6, s_0_7, 6, 1, 0}, 69 | /* 8 */ { 4, s_0_8, 1, 3, 0}, 70 | /* 9 */ { 2, s_0_9, -1, 1, 0}, 71 | /* 10 */ { 5, s_0_10, 9, 1, 0}, 72 | /* 11 */ { 2, s_0_11, -1, 1, 0}, 73 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 74 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 75 | /* 14 */ { 1, s_0_14, -1, 2, 0}, 76 | /* 15 */ { 2, s_0_15, 14, 1, 0}, 77 | /* 16 */ { 2, s_0_16, 14, 1, 0}, 78 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 79 | /* 18 */ { 5, s_0_18, 16, 1, 0}, 80 | /* 19 */ { 4, s_0_19, 16, 1, 0}, 81 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 82 | /* 21 */ { 3, s_0_21, 14, 1, 0}, 83 | /* 22 */ { 6, s_0_22, 21, 1, 0}, 84 | /* 23 */ { 3, s_0_23, 14, 1, 0}, 85 | /* 24 */ { 3, s_0_24, 14, 1, 0}, 86 | /* 25 */ { 2, s_0_25, -1, 1, 0}, 87 | /* 26 */ { 3, s_0_26, 25, 1, 0}, 88 | /* 27 */ { 3, s_0_27, -1, 3, 0}, 89 | /* 28 */ { 3, s_0_28, -1, 1, 0} 90 | }; 91 | 92 | static const symbol s_1_0[2] = { 'd', 't' }; 93 | static const symbol s_1_1[2] = { 'v', 't' }; 94 | 95 | static const struct among a_1[2] = 96 | { 97 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 98 | /* 1 */ { 2, s_1_1, -1, -1, 0} 99 | }; 100 | 101 | static const symbol s_2_0[3] = { 'l', 'e', 'g' }; 102 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; 103 | static const symbol s_2_2[2] = { 'i', 'g' }; 104 | static const symbol s_2_3[3] = { 'e', 'i', 'g' }; 105 | static const symbol s_2_4[3] = { 'l', 'i', 'g' }; 106 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; 107 | static const symbol s_2_6[3] = { 'e', 'l', 's' }; 108 | static const symbol s_2_7[3] = { 'l', 'o', 'v' }; 109 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; 110 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; 111 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; 112 | 113 | static const struct among a_2[11] = 114 | { 115 | /* 0 */ { 3, s_2_0, -1, 1, 0}, 116 | /* 1 */ { 4, s_2_1, 0, 1, 0}, 117 | /* 2 */ { 2, s_2_2, -1, 1, 0}, 118 | /* 3 */ { 3, s_2_3, 2, 1, 0}, 119 | /* 4 */ { 3, s_2_4, 2, 1, 0}, 120 | /* 5 */ { 4, s_2_5, 4, 1, 0}, 121 | /* 6 */ { 3, s_2_6, -1, 1, 0}, 122 | /* 7 */ { 3, s_2_7, -1, 1, 0}, 123 | /* 8 */ { 4, s_2_8, 7, 1, 0}, 124 | /* 9 */ { 4, s_2_9, 7, 1, 0}, 125 | /* 10 */ { 7, s_2_10, 9, 1, 0} 126 | }; 127 | 128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 129 | 130 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; 131 | 132 | static const symbol s_0[] = { 'k' }; 133 | static const symbol s_1[] = { 'e', 'r' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 30 */ 138 | { int ret = z->c + 3; 139 | if (0 > ret || ret > z->l) return 0; 140 | z->c = ret; /* hop, line 30 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 30 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ 146 | { /* gopast */ /* non v, line 31 */ 147 | int ret = in_grouping(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 31 */ 152 | /* try, line 32 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 38 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 38 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 38 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 38 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 44 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ 183 | if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1; 184 | goto lab0; 185 | lab1: 186 | z->c = z->l - m2; 187 | if (!(eq_s_b(z, 1, s_0))) return 0; 188 | if (out_grouping_b(z, g_v, 97, 248, 0)) return 0; 189 | } 190 | lab0: 191 | { int ret = slice_del(z); /* delete, line 46 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 3: 196 | { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ 197 | if (ret < 0) return ret; 198 | } 199 | break; 200 | } 201 | return 1; 202 | } 203 | 204 | static int r_consonant_pair(struct SN_env * z) { 205 | { int m_test = z->l - z->c; /* test, line 53 */ 206 | { int mlimit; /* setlimit, line 54 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 54 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | z->ket = z->c; /* [, line 54 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ 215 | z->bra = z->c; /* ], line 54 */ 216 | z->lb = mlimit; 217 | } 218 | z->c = z->l - m_test; 219 | } 220 | if (z->c <= z->lb) return 0; 221 | z->c--; /* next, line 59 */ 222 | z->bra = z->c; /* ], line 59 */ 223 | { int ret = slice_del(z); /* delete, line 59 */ 224 | if (ret < 0) return ret; 225 | } 226 | return 1; 227 | } 228 | 229 | static int r_other_suffix(struct SN_env * z) { 230 | int among_var; 231 | { int mlimit; /* setlimit, line 63 */ 232 | int m1 = z->l - z->c; (void)m1; 233 | if (z->c < z->I[0]) return 0; 234 | z->c = z->I[0]; /* tomark, line 63 */ 235 | mlimit = z->lb; z->lb = z->c; 236 | z->c = z->l - m1; 237 | z->ket = z->c; /* [, line 63 */ 238 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 239 | among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ 240 | if (!(among_var)) { z->lb = mlimit; return 0; } 241 | z->bra = z->c; /* ], line 63 */ 242 | z->lb = mlimit; 243 | } 244 | switch(among_var) { 245 | case 0: return 0; 246 | case 1: 247 | { int ret = slice_del(z); /* delete, line 67 */ 248 | if (ret < 0) return ret; 249 | } 250 | break; 251 | } 252 | return 1; 253 | } 254 | 255 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z) { 256 | { int c1 = z->c; /* do, line 74 */ 257 | { int ret = r_mark_regions(z); 258 | if (ret == 0) goto lab0; /* call mark_regions, line 74 */ 259 | if (ret < 0) return ret; 260 | } 261 | lab0: 262 | z->c = c1; 263 | } 264 | z->lb = z->c; z->c = z->l; /* backwards, line 75 */ 265 | 266 | { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ 267 | { int ret = r_main_suffix(z); 268 | if (ret == 0) goto lab1; /* call main_suffix, line 76 */ 269 | if (ret < 0) return ret; 270 | } 271 | lab1: 272 | z->c = z->l - m2; 273 | } 274 | { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ 275 | { int ret = r_consonant_pair(z); 276 | if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ 277 | if (ret < 0) return ret; 278 | } 279 | lab2: 280 | z->c = z->l - m3; 281 | } 282 | { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ 283 | { int ret = r_other_suffix(z); 284 | if (ret == 0) goto lab3; /* call other_suffix, line 78 */ 285 | if (ret < 0) return ret; 286 | } 287 | lab3: 288 | z->c = z->l - m4; 289 | } 290 | z->c = z->lb; 291 | return 1; 292 | } 293 | 294 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } 295 | 296 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 297 | 298 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * norwegian_ISO_8859_1_create_env(void); 9 | extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int norwegian_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_porter.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * porter_ISO_8859_1_create_env(void); 9 | extern void porter_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int porter_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * portuguese_ISO_8859_1_create_env(void); 9 | extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int portuguese_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_spanish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * spanish_ISO_8859_1_create_env(void); 9 | extern void spanish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int spanish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_swedish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 23 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; 31 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; 32 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; 33 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; 34 | static const symbol s_0_5[2] = { 'a', 'd' }; 35 | static const symbol s_0_6[1] = { 'e' }; 36 | static const symbol s_0_7[3] = { 'a', 'd', 'e' }; 37 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; 38 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; 39 | static const symbol s_0_10[3] = { 'a', 'r', 'e' }; 40 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; 41 | static const symbol s_0_12[2] = { 'e', 'n' }; 42 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; 43 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; 44 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; 45 | static const symbol s_0_16[3] = { 'e', 'r', 'n' }; 46 | static const symbol s_0_17[2] = { 'a', 'r' }; 47 | static const symbol s_0_18[2] = { 'e', 'r' }; 48 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; 49 | static const symbol s_0_20[2] = { 'o', 'r' }; 50 | static const symbol s_0_21[1] = { 's' }; 51 | static const symbol s_0_22[2] = { 'a', 's' }; 52 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; 53 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; 54 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; 55 | static const symbol s_0_26[2] = { 'e', 's' }; 56 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; 57 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; 58 | static const symbol s_0_29[3] = { 'e', 'n', 's' }; 59 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; 60 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 61 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; 62 | static const symbol s_0_33[2] = { 'a', 't' }; 63 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; 64 | static const symbol s_0_35[3] = { 'h', 'e', 't' }; 65 | static const symbol s_0_36[3] = { 'a', 's', 't' }; 66 | 67 | static const struct among a_0[37] = 68 | { 69 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 70 | /* 1 */ { 4, s_0_1, 0, 1, 0}, 71 | /* 2 */ { 4, s_0_2, 0, 1, 0}, 72 | /* 3 */ { 7, s_0_3, 2, 1, 0}, 73 | /* 4 */ { 4, s_0_4, 0, 1, 0}, 74 | /* 5 */ { 2, s_0_5, -1, 1, 0}, 75 | /* 6 */ { 1, s_0_6, -1, 1, 0}, 76 | /* 7 */ { 3, s_0_7, 6, 1, 0}, 77 | /* 8 */ { 4, s_0_8, 6, 1, 0}, 78 | /* 9 */ { 4, s_0_9, 6, 1, 0}, 79 | /* 10 */ { 3, s_0_10, 6, 1, 0}, 80 | /* 11 */ { 4, s_0_11, 6, 1, 0}, 81 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 82 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 83 | /* 14 */ { 4, s_0_14, 12, 1, 0}, 84 | /* 15 */ { 5, s_0_15, 12, 1, 0}, 85 | /* 16 */ { 3, s_0_16, -1, 1, 0}, 86 | /* 17 */ { 2, s_0_17, -1, 1, 0}, 87 | /* 18 */ { 2, s_0_18, -1, 1, 0}, 88 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 89 | /* 20 */ { 2, s_0_20, -1, 1, 0}, 90 | /* 21 */ { 1, s_0_21, -1, 2, 0}, 91 | /* 22 */ { 2, s_0_22, 21, 1, 0}, 92 | /* 23 */ { 5, s_0_23, 22, 1, 0}, 93 | /* 24 */ { 5, s_0_24, 22, 1, 0}, 94 | /* 25 */ { 5, s_0_25, 22, 1, 0}, 95 | /* 26 */ { 2, s_0_26, 21, 1, 0}, 96 | /* 27 */ { 4, s_0_27, 26, 1, 0}, 97 | /* 28 */ { 5, s_0_28, 26, 1, 0}, 98 | /* 29 */ { 3, s_0_29, 21, 1, 0}, 99 | /* 30 */ { 5, s_0_30, 29, 1, 0}, 100 | /* 31 */ { 6, s_0_31, 29, 1, 0}, 101 | /* 32 */ { 4, s_0_32, 21, 1, 0}, 102 | /* 33 */ { 2, s_0_33, -1, 1, 0}, 103 | /* 34 */ { 5, s_0_34, -1, 1, 0}, 104 | /* 35 */ { 3, s_0_35, -1, 1, 0}, 105 | /* 36 */ { 3, s_0_36, -1, 1, 0} 106 | }; 107 | 108 | static const symbol s_1_0[2] = { 'd', 'd' }; 109 | static const symbol s_1_1[2] = { 'g', 'd' }; 110 | static const symbol s_1_2[2] = { 'n', 'n' }; 111 | static const symbol s_1_3[2] = { 'd', 't' }; 112 | static const symbol s_1_4[2] = { 'g', 't' }; 113 | static const symbol s_1_5[2] = { 'k', 't' }; 114 | static const symbol s_1_6[2] = { 't', 't' }; 115 | 116 | static const struct among a_1[7] = 117 | { 118 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 119 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 120 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 121 | /* 3 */ { 2, s_1_3, -1, -1, 0}, 122 | /* 4 */ { 2, s_1_4, -1, -1, 0}, 123 | /* 5 */ { 2, s_1_5, -1, -1, 0}, 124 | /* 6 */ { 2, s_1_6, -1, -1, 0} 125 | }; 126 | 127 | static const symbol s_2_0[2] = { 'i', 'g' }; 128 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 129 | static const symbol s_2_2[3] = { 'e', 'l', 's' }; 130 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; 131 | static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' }; 132 | 133 | static const struct among a_2[5] = 134 | { 135 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 136 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 137 | /* 2 */ { 3, s_2_2, -1, 1, 0}, 138 | /* 3 */ { 5, s_2_3, -1, 3, 0}, 139 | /* 4 */ { 4, s_2_4, -1, 2, 0} 140 | }; 141 | 142 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; 143 | 144 | static const unsigned char g_s_ending[] = { 119, 127, 149 }; 145 | 146 | static const symbol s_0[] = { 'l', 0xF6, 's' }; 147 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; 148 | 149 | static int r_mark_regions(struct SN_env * z) { 150 | z->I[0] = z->l; 151 | { int c_test = z->c; /* test, line 29 */ 152 | { int ret = z->c + 3; 153 | if (0 > ret || ret > z->l) return 0; 154 | z->c = ret; /* hop, line 29 */ 155 | } 156 | z->I[1] = z->c; /* setmark x, line 29 */ 157 | z->c = c_test; 158 | } 159 | if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ 160 | { /* gopast */ /* non v, line 30 */ 161 | int ret = in_grouping(z, g_v, 97, 246, 1); 162 | if (ret < 0) return 0; 163 | z->c += ret; 164 | } 165 | z->I[0] = z->c; /* setmark p1, line 30 */ 166 | /* try, line 31 */ 167 | if (!(z->I[0] < z->I[1])) goto lab0; 168 | z->I[0] = z->I[1]; 169 | lab0: 170 | return 1; 171 | } 172 | 173 | static int r_main_suffix(struct SN_env * z) { 174 | int among_var; 175 | { int mlimit; /* setlimit, line 37 */ 176 | int m1 = z->l - z->c; (void)m1; 177 | if (z->c < z->I[0]) return 0; 178 | z->c = z->I[0]; /* tomark, line 37 */ 179 | mlimit = z->lb; z->lb = z->c; 180 | z->c = z->l - m1; 181 | z->ket = z->c; /* [, line 37 */ 182 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 183 | among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ 184 | if (!(among_var)) { z->lb = mlimit; return 0; } 185 | z->bra = z->c; /* ], line 37 */ 186 | z->lb = mlimit; 187 | } 188 | switch(among_var) { 189 | case 0: return 0; 190 | case 1: 191 | { int ret = slice_del(z); /* delete, line 44 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 2: 196 | if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0; 197 | { int ret = slice_del(z); /* delete, line 46 */ 198 | if (ret < 0) return ret; 199 | } 200 | break; 201 | } 202 | return 1; 203 | } 204 | 205 | static int r_consonant_pair(struct SN_env * z) { 206 | { int mlimit; /* setlimit, line 50 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 50 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ 215 | z->c = z->l - m2; 216 | z->ket = z->c; /* [, line 52 */ 217 | if (z->c <= z->lb) { z->lb = mlimit; return 0; } 218 | z->c--; /* next, line 52 */ 219 | z->bra = z->c; /* ], line 52 */ 220 | { int ret = slice_del(z); /* delete, line 52 */ 221 | if (ret < 0) return ret; 222 | } 223 | } 224 | z->lb = mlimit; 225 | } 226 | return 1; 227 | } 228 | 229 | static int r_other_suffix(struct SN_env * z) { 230 | int among_var; 231 | { int mlimit; /* setlimit, line 55 */ 232 | int m1 = z->l - z->c; (void)m1; 233 | if (z->c < z->I[0]) return 0; 234 | z->c = z->I[0]; /* tomark, line 55 */ 235 | mlimit = z->lb; z->lb = z->c; 236 | z->c = z->l - m1; 237 | z->ket = z->c; /* [, line 56 */ 238 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 239 | among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ 240 | if (!(among_var)) { z->lb = mlimit; return 0; } 241 | z->bra = z->c; /* ], line 56 */ 242 | switch(among_var) { 243 | case 0: { z->lb = mlimit; return 0; } 244 | case 1: 245 | { int ret = slice_del(z); /* delete, line 57 */ 246 | if (ret < 0) return ret; 247 | } 248 | break; 249 | case 2: 250 | { int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */ 251 | if (ret < 0) return ret; 252 | } 253 | break; 254 | case 3: 255 | { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ 256 | if (ret < 0) return ret; 257 | } 258 | break; 259 | } 260 | z->lb = mlimit; 261 | } 262 | return 1; 263 | } 264 | 265 | extern int swedish_ISO_8859_1_stem(struct SN_env * z) { 266 | { int c1 = z->c; /* do, line 66 */ 267 | { int ret = r_mark_regions(z); 268 | if (ret == 0) goto lab0; /* call mark_regions, line 66 */ 269 | if (ret < 0) return ret; 270 | } 271 | lab0: 272 | z->c = c1; 273 | } 274 | z->lb = z->c; z->c = z->l; /* backwards, line 67 */ 275 | 276 | { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ 277 | { int ret = r_main_suffix(z); 278 | if (ret == 0) goto lab1; /* call main_suffix, line 68 */ 279 | if (ret < 0) return ret; 280 | } 281 | lab1: 282 | z->c = z->l - m2; 283 | } 284 | { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ 285 | { int ret = r_consonant_pair(z); 286 | if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ 287 | if (ret < 0) return ret; 288 | } 289 | lab2: 290 | z->c = z->l - m3; 291 | } 292 | { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ 293 | { int ret = r_other_suffix(z); 294 | if (ret == 0) goto lab3; /* call other_suffix, line 70 */ 295 | if (ret < 0) return ret; 296 | } 297 | lab3: 298 | z->c = z->l - m4; 299 | } 300 | z->c = z->lb; 301 | return 1; 302 | } 303 | 304 | extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } 305 | 306 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } 307 | 308 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_1_swedish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * swedish_ISO_8859_1_create_env(void); 9 | extern void swedish_ISO_8859_1_close_env(struct SN_env * z); 10 | 11 | extern int swedish_ISO_8859_1_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_ISO_8859_2_romanian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * romanian_ISO_8859_2_create_env(void); 9 | extern void romanian_ISO_8859_2_close_env(struct SN_env * z); 10 | 11 | extern int romanian_ISO_8859_2_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_KOI8_R_russian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * russian_KOI8_R_create_env(void); 9 | extern void russian_KOI8_R_close_env(struct SN_env * z); 10 | 11 | extern int russian_KOI8_R_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_danish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int danish_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_undouble(struct SN_env * z); 14 | static int r_other_suffix(struct SN_env * z); 15 | static int r_consonant_pair(struct SN_env * z); 16 | static int r_main_suffix(struct SN_env * z); 17 | static int r_mark_regions(struct SN_env * z); 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | 22 | 23 | extern struct SN_env * danish_UTF_8_create_env(void); 24 | extern void danish_UTF_8_close_env(struct SN_env * z); 25 | 26 | 27 | #ifdef __cplusplus 28 | } 29 | #endif 30 | static const symbol s_0_0[3] = { 'h', 'e', 'd' }; 31 | static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; 32 | static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; 33 | static const symbol s_0_3[1] = { 'e' }; 34 | static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; 35 | static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; 36 | static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; 37 | static const symbol s_0_7[3] = { 'e', 'n', 'e' }; 38 | static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; 39 | static const symbol s_0_9[3] = { 'e', 'r', 'e' }; 40 | static const symbol s_0_10[2] = { 'e', 'n' }; 41 | static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; 42 | static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; 43 | static const symbol s_0_13[2] = { 'e', 'r' }; 44 | static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; 45 | static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; 46 | static const symbol s_0_16[1] = { 's' }; 47 | static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; 48 | static const symbol s_0_18[2] = { 'e', 's' }; 49 | static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; 50 | static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; 51 | static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; 52 | static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; 53 | static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; 54 | static const symbol s_0_24[3] = { 'e', 'n', 's' }; 55 | static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; 56 | static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; 57 | static const symbol s_0_27[3] = { 'e', 'r', 's' }; 58 | static const symbol s_0_28[3] = { 'e', 't', 's' }; 59 | static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; 60 | static const symbol s_0_30[2] = { 'e', 't' }; 61 | static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; 62 | 63 | static const struct among a_0[32] = 64 | { 65 | /* 0 */ { 3, s_0_0, -1, 1, 0}, 66 | /* 1 */ { 5, s_0_1, 0, 1, 0}, 67 | /* 2 */ { 4, s_0_2, -1, 1, 0}, 68 | /* 3 */ { 1, s_0_3, -1, 1, 0}, 69 | /* 4 */ { 5, s_0_4, 3, 1, 0}, 70 | /* 5 */ { 4, s_0_5, 3, 1, 0}, 71 | /* 6 */ { 6, s_0_6, 5, 1, 0}, 72 | /* 7 */ { 3, s_0_7, 3, 1, 0}, 73 | /* 8 */ { 4, s_0_8, 3, 1, 0}, 74 | /* 9 */ { 3, s_0_9, 3, 1, 0}, 75 | /* 10 */ { 2, s_0_10, -1, 1, 0}, 76 | /* 11 */ { 5, s_0_11, 10, 1, 0}, 77 | /* 12 */ { 4, s_0_12, 10, 1, 0}, 78 | /* 13 */ { 2, s_0_13, -1, 1, 0}, 79 | /* 14 */ { 5, s_0_14, 13, 1, 0}, 80 | /* 15 */ { 4, s_0_15, 13, 1, 0}, 81 | /* 16 */ { 1, s_0_16, -1, 2, 0}, 82 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 83 | /* 18 */ { 2, s_0_18, 16, 1, 0}, 84 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 85 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 86 | /* 21 */ { 4, s_0_21, 18, 1, 0}, 87 | /* 22 */ { 5, s_0_22, 18, 1, 0}, 88 | /* 23 */ { 4, s_0_23, 18, 1, 0}, 89 | /* 24 */ { 3, s_0_24, 16, 1, 0}, 90 | /* 25 */ { 6, s_0_25, 24, 1, 0}, 91 | /* 26 */ { 5, s_0_26, 24, 1, 0}, 92 | /* 27 */ { 3, s_0_27, 16, 1, 0}, 93 | /* 28 */ { 3, s_0_28, 16, 1, 0}, 94 | /* 29 */ { 5, s_0_29, 28, 1, 0}, 95 | /* 30 */ { 2, s_0_30, -1, 1, 0}, 96 | /* 31 */ { 4, s_0_31, 30, 1, 0} 97 | }; 98 | 99 | static const symbol s_1_0[2] = { 'g', 'd' }; 100 | static const symbol s_1_1[2] = { 'd', 't' }; 101 | static const symbol s_1_2[2] = { 'g', 't' }; 102 | static const symbol s_1_3[2] = { 'k', 't' }; 103 | 104 | static const struct among a_1[4] = 105 | { 106 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 107 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 108 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 109 | /* 3 */ { 2, s_1_3, -1, -1, 0} 110 | }; 111 | 112 | static const symbol s_2_0[2] = { 'i', 'g' }; 113 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 114 | static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; 115 | static const symbol s_2_3[3] = { 'e', 'l', 's' }; 116 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' }; 117 | 118 | static const struct among a_2[5] = 119 | { 120 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 121 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 122 | /* 2 */ { 4, s_2_2, 1, 1, 0}, 123 | /* 3 */ { 3, s_2_3, -1, 1, 0}, 124 | /* 4 */ { 5, s_2_4, -1, 2, 0} 125 | }; 126 | 127 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 128 | 129 | static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; 130 | 131 | static const symbol s_0[] = { 's', 't' }; 132 | static const symbol s_1[] = { 'i', 'g' }; 133 | static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 33 */ 138 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 139 | if (ret < 0) return 0; 140 | z->c = ret; /* hop, line 33 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 33 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ 146 | { /* gopast */ /* non v, line 34 */ 147 | int ret = in_grouping_U(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 34 */ 152 | /* try, line 35 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 41 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 41 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 41 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 41 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 48 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; 183 | { int ret = slice_del(z); /* delete, line 50 */ 184 | if (ret < 0) return ret; 185 | } 186 | break; 187 | } 188 | return 1; 189 | } 190 | 191 | static int r_consonant_pair(struct SN_env * z) { 192 | { int m_test = z->l - z->c; /* test, line 55 */ 193 | { int mlimit; /* setlimit, line 56 */ 194 | int m1 = z->l - z->c; (void)m1; 195 | if (z->c < z->I[0]) return 0; 196 | z->c = z->I[0]; /* tomark, line 56 */ 197 | mlimit = z->lb; z->lb = z->c; 198 | z->c = z->l - m1; 199 | z->ket = z->c; /* [, line 56 */ 200 | if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } 201 | if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ 202 | z->bra = z->c; /* ], line 56 */ 203 | z->lb = mlimit; 204 | } 205 | z->c = z->l - m_test; 206 | } 207 | { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); 208 | if (ret < 0) return 0; 209 | z->c = ret; /* next, line 62 */ 210 | } 211 | z->bra = z->c; /* ], line 62 */ 212 | { int ret = slice_del(z); /* delete, line 62 */ 213 | if (ret < 0) return ret; 214 | } 215 | return 1; 216 | } 217 | 218 | static int r_other_suffix(struct SN_env * z) { 219 | int among_var; 220 | { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ 221 | z->ket = z->c; /* [, line 66 */ 222 | if (!(eq_s_b(z, 2, s_0))) goto lab0; 223 | z->bra = z->c; /* ], line 66 */ 224 | if (!(eq_s_b(z, 2, s_1))) goto lab0; 225 | { int ret = slice_del(z); /* delete, line 66 */ 226 | if (ret < 0) return ret; 227 | } 228 | lab0: 229 | z->c = z->l - m1; 230 | } 231 | { int mlimit; /* setlimit, line 67 */ 232 | int m2 = z->l - z->c; (void)m2; 233 | if (z->c < z->I[0]) return 0; 234 | z->c = z->I[0]; /* tomark, line 67 */ 235 | mlimit = z->lb; z->lb = z->c; 236 | z->c = z->l - m2; 237 | z->ket = z->c; /* [, line 67 */ 238 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 239 | among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ 240 | if (!(among_var)) { z->lb = mlimit; return 0; } 241 | z->bra = z->c; /* ], line 67 */ 242 | z->lb = mlimit; 243 | } 244 | switch(among_var) { 245 | case 0: return 0; 246 | case 1: 247 | { int ret = slice_del(z); /* delete, line 70 */ 248 | if (ret < 0) return ret; 249 | } 250 | { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ 251 | { int ret = r_consonant_pair(z); 252 | if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ 253 | if (ret < 0) return ret; 254 | } 255 | lab1: 256 | z->c = z->l - m3; 257 | } 258 | break; 259 | case 2: 260 | { int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */ 261 | if (ret < 0) return ret; 262 | } 263 | break; 264 | } 265 | return 1; 266 | } 267 | 268 | static int r_undouble(struct SN_env * z) { 269 | { int mlimit; /* setlimit, line 76 */ 270 | int m1 = z->l - z->c; (void)m1; 271 | if (z->c < z->I[0]) return 0; 272 | z->c = z->I[0]; /* tomark, line 76 */ 273 | mlimit = z->lb; z->lb = z->c; 274 | z->c = z->l - m1; 275 | z->ket = z->c; /* [, line 76 */ 276 | if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } 277 | z->bra = z->c; /* ], line 76 */ 278 | z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ 279 | if (z->S[0] == 0) return -1; /* -> ch, line 76 */ 280 | z->lb = mlimit; 281 | } 282 | if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ 283 | { int ret = slice_del(z); /* delete, line 78 */ 284 | if (ret < 0) return ret; 285 | } 286 | return 1; 287 | } 288 | 289 | extern int danish_UTF_8_stem(struct SN_env * z) { 290 | { int c1 = z->c; /* do, line 84 */ 291 | { int ret = r_mark_regions(z); 292 | if (ret == 0) goto lab0; /* call mark_regions, line 84 */ 293 | if (ret < 0) return ret; 294 | } 295 | lab0: 296 | z->c = c1; 297 | } 298 | z->lb = z->c; z->c = z->l; /* backwards, line 85 */ 299 | 300 | { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ 301 | { int ret = r_main_suffix(z); 302 | if (ret == 0) goto lab1; /* call main_suffix, line 86 */ 303 | if (ret < 0) return ret; 304 | } 305 | lab1: 306 | z->c = z->l - m2; 307 | } 308 | { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ 309 | { int ret = r_consonant_pair(z); 310 | if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ 311 | if (ret < 0) return ret; 312 | } 313 | lab2: 314 | z->c = z->l - m3; 315 | } 316 | { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ 317 | { int ret = r_other_suffix(z); 318 | if (ret == 0) goto lab3; /* call other_suffix, line 88 */ 319 | if (ret < 0) return ret; 320 | } 321 | lab3: 322 | z->c = z->l - m4; 323 | } 324 | { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ 325 | { int ret = r_undouble(z); 326 | if (ret == 0) goto lab4; /* call undouble, line 89 */ 327 | if (ret < 0) return ret; 328 | } 329 | lab4: 330 | z->c = z->l - m5; 331 | } 332 | z->c = z->lb; 333 | return 1; 334 | } 335 | 336 | extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); } 337 | 338 | extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } 339 | 340 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_danish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * danish_UTF_8_create_env(void); 9 | extern void danish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int danish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_dutch.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * dutch_UTF_8_create_env(void); 9 | extern void dutch_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int dutch_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_english.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * english_UTF_8_create_env(void); 9 | extern void english_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int english_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_finnish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * finnish_UTF_8_create_env(void); 9 | extern void finnish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int finnish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_french.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * french_UTF_8_create_env(void); 9 | extern void french_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int french_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_german.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * german_UTF_8_create_env(void); 9 | extern void german_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int german_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_hungarian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * hungarian_UTF_8_create_env(void); 9 | extern void hungarian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int hungarian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_italian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * italian_UTF_8_create_env(void); 9 | extern void italian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int italian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_latin.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int latin_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_que_word(struct SN_env * z); 14 | static int r_map_letters(struct SN_env * z); 15 | #ifdef __cplusplus 16 | extern "C" { 17 | #endif 18 | 19 | 20 | extern struct SN_env * latin_UTF_8_create_env(void); 21 | extern void latin_UTF_8_close_env(struct SN_env * z); 22 | 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | static const symbol s_0_0[3] = { 'i', 't', 'a' }; 28 | static const symbol s_0_1[3] = { 'q', 'u', 'a' }; 29 | static const symbol s_0_2[4] = { 'a', 'd', 'a', 'e' }; 30 | static const symbol s_0_3[5] = { 'p', 'e', 'r', 'a', 'e' }; 31 | static const symbol s_0_4[4] = { 'q', 'u', 'a', 'e' }; 32 | static const symbol s_0_5[2] = { 'd', 'e' }; 33 | static const symbol s_0_6[2] = { 'n', 'e' }; 34 | static const symbol s_0_7[6] = { 'u', 't', 'r', 'i', 'b', 'i' }; 35 | static const symbol s_0_8[3] = { 'u', 'b', 'i' }; 36 | static const symbol s_0_9[4] = { 'u', 'n', 'd', 'i' }; 37 | static const symbol s_0_10[4] = { 'o', 'b', 'l', 'i' }; 38 | static const symbol s_0_11[4] = { 'd', 'e', 'n', 'i' }; 39 | static const symbol s_0_12[3] = { 'u', 't', 'i' }; 40 | static const symbol s_0_13[3] = { 'c', 'u', 'i' }; 41 | static const symbol s_0_14[3] = { 'q', 'u', 'i' }; 42 | static const symbol s_0_15[4] = { 'q', 'u', 'a', 'm' }; 43 | static const symbol s_0_16[4] = { 'q', 'u', 'e', 'm' }; 44 | static const symbol s_0_17[6] = { 'q', 'u', 'a', 'r', 'u', 'm' }; 45 | static const symbol s_0_18[6] = { 'q', 'u', 'o', 'r', 'u', 'm' }; 46 | static const symbol s_0_19[2] = { 'c', 'o' }; 47 | static const symbol s_0_20[4] = { 'd', 'e', 'c', 'o' }; 48 | static const symbol s_0_21[4] = { 'r', 'e', 'c', 'o' }; 49 | static const symbol s_0_22[4] = { 'i', 'n', 'c', 'o' }; 50 | static const symbol s_0_23[5] = { 'c', 'o', 'n', 'c', 'o' }; 51 | static const symbol s_0_24[4] = { 'e', 'x', 'c', 'o' }; 52 | static const symbol s_0_25[6] = { 'q', 'u', 'a', 'n', 'd', 'o' }; 53 | static const symbol s_0_26[4] = { 'u', 't', 'r', 'o' }; 54 | static const symbol s_0_27[3] = { 'q', 'u', 'o' }; 55 | static const symbol s_0_28[4] = { 'u', 't', 'e', 'r' }; 56 | static const symbol s_0_29[3] = { 't', 'o', 'r' }; 57 | static const symbol s_0_30[5] = { 'o', 'b', 't', 'o', 'r' }; 58 | static const symbol s_0_31[7] = { 'p', 'r', 'a', 'e', 't', 'o', 'r' }; 59 | static const symbol s_0_32[5] = { 'd', 'e', 't', 'o', 'r' }; 60 | static const symbol s_0_33[5] = { 'r', 'e', 't', 'o', 'r' }; 61 | static const symbol s_0_34[5] = { 'i', 'n', 't', 'o', 'r' }; 62 | static const symbol s_0_35[6] = { 'c', 'o', 'n', 't', 'o', 'r' }; 63 | static const symbol s_0_36[5] = { 'o', 'p', 't', 'o', 'r' }; 64 | static const symbol s_0_37[5] = { 'a', 't', 't', 'o', 'r' }; 65 | static const symbol s_0_38[5] = { 'e', 'x', 't', 'o', 'r' }; 66 | static const symbol s_0_39[4] = { 'q', 'u', 'a', 's' }; 67 | static const symbol s_0_40[3] = { 'a', 'b', 's' }; 68 | static const symbol s_0_41[6] = { 'p', 'l', 'e', 'n', 'i', 's' }; 69 | static const symbol s_0_42[4] = { 'q', 'u', 'i', 's' }; 70 | static const symbol s_0_43[10] = { 'q', 'u', 'o', 't', 'u', 's', 'q', 'u', 'i', 's' }; 71 | static const symbol s_0_44[4] = { 'q', 'u', 'o', 's' }; 72 | static const symbol s_0_45[3] = { 'a', 'p', 's' }; 73 | static const symbol s_0_46[2] = { 'u', 's' }; 74 | static const symbol s_0_47[4] = { 'a', 'b', 'u', 's' }; 75 | static const symbol s_0_48[6] = { 'q', 'u', 'i', 'b', 'u', 's' }; 76 | static const symbol s_0_49[4] = { 'a', 'd', 'u', 's' }; 77 | static const symbol s_0_50[5] = { 'c', 'u', 'i', 'u', 's' }; 78 | static const symbol s_0_51[5] = { 'q', 'u', 'o', 'u', 's' }; 79 | static const symbol s_0_52[3] = { 's', 'u', 's' }; 80 | static const symbol s_0_53[2] = { 'a', 't' }; 81 | 82 | static const struct among a_0[54] = 83 | { 84 | /* 0 */ { 3, s_0_0, -1, -1, 0}, 85 | /* 1 */ { 3, s_0_1, -1, -1, 0}, 86 | /* 2 */ { 4, s_0_2, -1, -1, 0}, 87 | /* 3 */ { 5, s_0_3, -1, -1, 0}, 88 | /* 4 */ { 4, s_0_4, -1, -1, 0}, 89 | /* 5 */ { 2, s_0_5, -1, -1, 0}, 90 | /* 6 */ { 2, s_0_6, -1, -1, 0}, 91 | /* 7 */ { 6, s_0_7, -1, -1, 0}, 92 | /* 8 */ { 3, s_0_8, -1, -1, 0}, 93 | /* 9 */ { 4, s_0_9, -1, -1, 0}, 94 | /* 10 */ { 4, s_0_10, -1, -1, 0}, 95 | /* 11 */ { 4, s_0_11, -1, -1, 0}, 96 | /* 12 */ { 3, s_0_12, -1, -1, 0}, 97 | /* 13 */ { 3, s_0_13, -1, -1, 0}, 98 | /* 14 */ { 3, s_0_14, -1, -1, 0}, 99 | /* 15 */ { 4, s_0_15, -1, -1, 0}, 100 | /* 16 */ { 4, s_0_16, -1, -1, 0}, 101 | /* 17 */ { 6, s_0_17, -1, -1, 0}, 102 | /* 18 */ { 6, s_0_18, -1, -1, 0}, 103 | /* 19 */ { 2, s_0_19, -1, -1, 0}, 104 | /* 20 */ { 4, s_0_20, 19, -1, 0}, 105 | /* 21 */ { 4, s_0_21, 19, -1, 0}, 106 | /* 22 */ { 4, s_0_22, 19, -1, 0}, 107 | /* 23 */ { 5, s_0_23, 19, -1, 0}, 108 | /* 24 */ { 4, s_0_24, 19, -1, 0}, 109 | /* 25 */ { 6, s_0_25, -1, -1, 0}, 110 | /* 26 */ { 4, s_0_26, -1, -1, 0}, 111 | /* 27 */ { 3, s_0_27, -1, -1, 0}, 112 | /* 28 */ { 4, s_0_28, -1, -1, 0}, 113 | /* 29 */ { 3, s_0_29, -1, -1, 0}, 114 | /* 30 */ { 5, s_0_30, 29, -1, 0}, 115 | /* 31 */ { 7, s_0_31, 29, -1, 0}, 116 | /* 32 */ { 5, s_0_32, 29, -1, 0}, 117 | /* 33 */ { 5, s_0_33, 29, -1, 0}, 118 | /* 34 */ { 5, s_0_34, 29, -1, 0}, 119 | /* 35 */ { 6, s_0_35, 29, -1, 0}, 120 | /* 36 */ { 5, s_0_36, 29, -1, 0}, 121 | /* 37 */ { 5, s_0_37, 29, -1, 0}, 122 | /* 38 */ { 5, s_0_38, 29, -1, 0}, 123 | /* 39 */ { 4, s_0_39, -1, -1, 0}, 124 | /* 40 */ { 3, s_0_40, -1, -1, 0}, 125 | /* 41 */ { 6, s_0_41, -1, -1, 0}, 126 | /* 42 */ { 4, s_0_42, -1, -1, 0}, 127 | /* 43 */ { 10, s_0_43, 42, -1, 0}, 128 | /* 44 */ { 4, s_0_44, -1, -1, 0}, 129 | /* 45 */ { 3, s_0_45, -1, -1, 0}, 130 | /* 46 */ { 2, s_0_46, -1, -1, 0}, 131 | /* 47 */ { 4, s_0_47, 46, -1, 0}, 132 | /* 48 */ { 6, s_0_48, 46, -1, 0}, 133 | /* 49 */ { 4, s_0_49, 46, -1, 0}, 134 | /* 50 */ { 5, s_0_50, 46, -1, 0}, 135 | /* 51 */ { 5, s_0_51, 46, -1, 0}, 136 | /* 52 */ { 3, s_0_52, 46, -1, 0}, 137 | /* 53 */ { 2, s_0_53, -1, -1, 0} 138 | }; 139 | 140 | static const symbol s_1_0[1] = { 'a' }; 141 | static const symbol s_1_1[2] = { 'i', 'a' }; 142 | static const symbol s_1_2[2] = { 'u', 'd' }; 143 | static const symbol s_1_3[1] = { 'e' }; 144 | static const symbol s_1_4[2] = { 'a', 'e' }; 145 | static const symbol s_1_5[1] = { 'i' }; 146 | static const symbol s_1_6[2] = { 'a', 'm' }; 147 | static const symbol s_1_7[2] = { 'e', 'm' }; 148 | static const symbol s_1_8[2] = { 'u', 'm' }; 149 | static const symbol s_1_9[1] = { 'o' }; 150 | static const symbol s_1_10[2] = { 'a', 's' }; 151 | static const symbol s_1_11[2] = { 'e', 's' }; 152 | static const symbol s_1_12[2] = { 'i', 's' }; 153 | static const symbol s_1_13[2] = { 'o', 's' }; 154 | static const symbol s_1_14[2] = { 'u', 's' }; 155 | static const symbol s_1_15[4] = { 'i', 'b', 'u', 's' }; 156 | static const symbol s_1_16[3] = { 'i', 'u', 's' }; 157 | static const symbol s_1_17[2] = { 'n', 't' }; 158 | static const symbol s_1_18[1] = { 'u' }; 159 | 160 | static const struct among a_1[19] = 161 | { 162 | /* 0 */ { 1, s_1_0, -1, 1, 0}, 163 | /* 1 */ { 2, s_1_1, 0, 1, 0}, 164 | /* 2 */ { 2, s_1_2, -1, 1, 0}, 165 | /* 3 */ { 1, s_1_3, -1, 1, 0}, 166 | /* 4 */ { 2, s_1_4, 3, 1, 0}, 167 | /* 5 */ { 1, s_1_5, -1, 1, 0}, 168 | /* 6 */ { 2, s_1_6, -1, 1, 0}, 169 | /* 7 */ { 2, s_1_7, -1, 1, 0}, 170 | /* 8 */ { 2, s_1_8, -1, 1, 0}, 171 | /* 9 */ { 1, s_1_9, -1, 1, 0}, 172 | /* 10 */ { 2, s_1_10, -1, 1, 0}, 173 | /* 11 */ { 2, s_1_11, -1, 1, 0}, 174 | /* 12 */ { 2, s_1_12, -1, 1, 0}, 175 | /* 13 */ { 2, s_1_13, -1, 1, 0}, 176 | /* 14 */ { 2, s_1_14, -1, 1, 0}, 177 | /* 15 */ { 4, s_1_15, 14, 1, 0}, 178 | /* 16 */ { 3, s_1_16, 14, 1, 0}, 179 | /* 17 */ { 2, s_1_17, -1, 1, 0}, 180 | /* 18 */ { 1, s_1_18, -1, 1, 0} 181 | }; 182 | 183 | static const symbol s_2_0[4] = { 'm', 'i', 'n', 'i' }; 184 | static const symbol s_2_1[2] = { 'r', 'i' }; 185 | static const symbol s_2_2[3] = { 's', 't', 'i' }; 186 | static const symbol s_2_3[1] = { 'm' }; 187 | static const symbol s_2_4[2] = { 'b', 'o' }; 188 | static const symbol s_2_5[3] = { 'e', 'r', 'o' }; 189 | static const symbol s_2_6[1] = { 'r' }; 190 | static const symbol s_2_7[3] = { 'b', 'o', 'r' }; 191 | static const symbol s_2_8[3] = { 'm', 'u', 'r' }; 192 | static const symbol s_2_9[3] = { 't', 'u', 'r' }; 193 | static const symbol s_2_10[4] = { 'n', 't', 'u', 'r' }; 194 | static const symbol s_2_11[5] = { 'u', 'n', 't', 'u', 'r' }; 195 | static const symbol s_2_12[6] = { 'i', 'u', 'n', 't', 'u', 'r' }; 196 | static const symbol s_2_13[1] = { 's' }; 197 | static const symbol s_2_14[3] = { 'r', 'i', 's' }; 198 | static const symbol s_2_15[5] = { 'b', 'e', 'r', 'i', 's' }; 199 | static const symbol s_2_16[3] = { 't', 'i', 's' }; 200 | static const symbol s_2_17[4] = { 's', 't', 'i', 's' }; 201 | static const symbol s_2_18[2] = { 'n', 's' }; 202 | static const symbol s_2_19[3] = { 'm', 'u', 's' }; 203 | static const symbol s_2_20[1] = { 't' }; 204 | static const symbol s_2_21[2] = { 'n', 't' }; 205 | static const symbol s_2_22[3] = { 'u', 'n', 't' }; 206 | static const symbol s_2_23[4] = { 'i', 'u', 'n', 't' }; 207 | static const symbol s_2_24[5] = { 'e', 'r', 'u', 'n', 't' }; 208 | 209 | static const struct among a_2[25] = 210 | { 211 | /* 0 */ { 4, s_2_0, -1, 4, 0}, 212 | /* 1 */ { 2, s_2_1, -1, 4, 0}, 213 | /* 2 */ { 3, s_2_2, -1, 4, 0}, 214 | /* 3 */ { 1, s_2_3, -1, 4, 0}, 215 | /* 4 */ { 2, s_2_4, -1, 2, 0}, 216 | /* 5 */ { 3, s_2_5, -1, 3, 0}, 217 | /* 6 */ { 1, s_2_6, -1, 4, 0}, 218 | /* 7 */ { 3, s_2_7, 6, 2, 0}, 219 | /* 8 */ { 3, s_2_8, 6, 4, 0}, 220 | /* 9 */ { 3, s_2_9, 6, 4, 0}, 221 | /* 10 */ { 4, s_2_10, 9, 4, 0}, 222 | /* 11 */ { 5, s_2_11, 10, 1, 0}, 223 | /* 12 */ { 6, s_2_12, 11, 1, 0}, 224 | /* 13 */ { 1, s_2_13, -1, 4, 0}, 225 | /* 14 */ { 3, s_2_14, 13, 4, 0}, 226 | /* 15 */ { 5, s_2_15, 14, 2, 0}, 227 | /* 16 */ { 3, s_2_16, 13, 4, 0}, 228 | /* 17 */ { 4, s_2_17, 16, 4, 0}, 229 | /* 18 */ { 2, s_2_18, 13, 4, 0}, 230 | /* 19 */ { 3, s_2_19, 13, 4, 0}, 231 | /* 20 */ { 1, s_2_20, -1, 4, 0}, 232 | /* 21 */ { 2, s_2_21, 20, 4, 0}, 233 | /* 22 */ { 3, s_2_22, 21, 1, 0}, 234 | /* 23 */ { 4, s_2_23, 22, 1, 0}, 235 | /* 24 */ { 5, s_2_24, 22, 1, 0} 236 | }; 237 | 238 | static const symbol s_0[] = { 'j' }; 239 | static const symbol s_1[] = { 'i' }; 240 | static const symbol s_2[] = { 'v' }; 241 | static const symbol s_3[] = { 'u' }; 242 | static const symbol s_4[] = { 'q', 'u', 'e' }; 243 | static const symbol s_5[] = { 'i' }; 244 | static const symbol s_6[] = { 'b', 'i' }; 245 | static const symbol s_7[] = { 'e', 'r', 'i' }; 246 | 247 | static int r_map_letters(struct SN_env * z) { 248 | { int c1 = z->c; /* do, line 14 */ 249 | while(1) { /* repeat, line 14 */ 250 | int c2 = z->c; 251 | while(1) { /* goto, line 14 */ 252 | int c3 = z->c; 253 | z->bra = z->c; /* [, line 14 */ 254 | if (!(eq_s(z, 1, s_0))) goto lab2; 255 | z->ket = z->c; /* ], line 14 */ 256 | z->c = c3; 257 | break; 258 | lab2: 259 | z->c = c3; 260 | if (z->c >= z->l) goto lab1; 261 | z->c++; /* goto, line 14 */ 262 | } 263 | { int ret = slice_from_s(z, 1, s_1); /* <-, line 14 */ 264 | if (ret < 0) return ret; 265 | } 266 | continue; 267 | lab1: 268 | z->c = c2; 269 | break; 270 | } 271 | z->c = c1; 272 | } 273 | { int c4 = z->c; /* do, line 15 */ 274 | while(1) { /* repeat, line 15 */ 275 | int c5 = z->c; 276 | while(1) { /* goto, line 15 */ 277 | int c6 = z->c; 278 | z->bra = z->c; /* [, line 15 */ 279 | if (!(eq_s(z, 1, s_2))) goto lab5; 280 | z->ket = z->c; /* ], line 15 */ 281 | z->c = c6; 282 | break; 283 | lab5: 284 | z->c = c6; 285 | if (z->c >= z->l) goto lab4; 286 | z->c++; /* goto, line 15 */ 287 | } 288 | { int ret = slice_from_s(z, 1, s_3); /* <-, line 15 */ 289 | if (ret < 0) return ret; 290 | } 291 | continue; 292 | lab4: 293 | z->c = c5; 294 | break; 295 | } 296 | z->c = c4; 297 | } 298 | return 1; 299 | } 300 | 301 | static int r_que_word(struct SN_env * z) { 302 | z->ket = z->c; /* [, line 22 */ 303 | if (!(eq_s_b(z, 3, s_4))) return 0; 304 | z->bra = z->c; /* ], line 22 */ 305 | { int m1 = z->l - z->c; (void)m1; /* or, line 35 */ 306 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876514 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab1; 307 | if (!(find_among_b(z, a_0, 54))) goto lab1; /* among, line 23 */ 308 | if (z->c > z->lb) goto lab1; /* atlimit, line 32 */ 309 | z->bra = z->c; /* ], line 32 */ 310 | z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 33 */ 311 | if (z->S[0] == 0) return -1; /* => noun_form, line 33 */ 312 | z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 34 */ 313 | if (z->S[1] == 0) return -1; /* => verb_form, line 34 */ 314 | goto lab0; 315 | lab1: 316 | z->c = z->l - m1; 317 | { int ret = slice_del(z); /* delete, line 35 */ 318 | if (ret < 0) return ret; 319 | } 320 | return 0; /* fail, line 35 */ 321 | } 322 | lab0: 323 | return 1; 324 | } 325 | 326 | extern int latin_UTF_8_stem(struct SN_env * z) { 327 | int among_var; 328 | { int ret = r_map_letters(z); 329 | if (ret == 0) return 0; /* call map_letters, line 41 */ 330 | if (ret < 0) return ret; 331 | } 332 | z->lb = z->c; z->c = z->l; /* backwards, line 43 */ 333 | 334 | { int m1 = z->l - z->c; (void)m1; /* or, line 44 */ 335 | { int ret = r_que_word(z); 336 | if (ret == 0) goto lab1; /* call que_word, line 44 */ 337 | if (ret < 0) return ret; 338 | } 339 | goto lab0; 340 | lab1: 341 | z->c = z->l - m1; 342 | z->S[0] = assign_to(z, z->S[0]); /* => noun_form, line 45 */ 343 | if (z->S[0] == 0) return -1; /* => noun_form, line 45 */ 344 | z->S[1] = assign_to(z, z->S[1]); /* => verb_form, line 46 */ 345 | if (z->S[1] == 0) return -1; /* => verb_form, line 46 */ 346 | { struct SN_env env = * z; /* $ noun_form, line 48 */ 347 | int failure = 1; /* assume failure */ 348 | z->p = z->S[0]; 349 | z->lb = z->c = 0; 350 | z->l = SIZE(z->p); 351 | z->lb = z->c; z->c = z->l; /* backwards, line 48 */ 352 | 353 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 48 */ 354 | z->ket = z->c; /* [, line 49 */ 355 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3711538 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } 356 | among_var = find_among_b(z, a_1, 19); /* substring, line 49 */ 357 | if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } 358 | z->bra = z->c; /* ], line 49 */ 359 | { int ret = z->c - 2; 360 | if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab3; } 361 | z->c = ret; /* hop, line 49 */ 362 | } 363 | switch(among_var) { 364 | case 0: { z->c = z->l - m_keep; goto lab3; } 365 | case 1: 366 | { int ret = slice_del(z); /* delete, line 53 */ 367 | if (ret < 0) return ret; 368 | } 369 | break; 370 | } 371 | lab3: 372 | ; 373 | } 374 | z->c = z->lb; 375 | failure = 0; /* mark success */ 376 | z->S[0] = z->p; 377 | * z = env; 378 | if (failure) return 0; 379 | } 380 | { struct SN_env env = * z; /* $ verb_form, line 57 */ 381 | int failure = 1; /* assume failure */ 382 | z->p = z->S[1]; 383 | z->lb = z->c = 0; 384 | z->l = SIZE(z->p); 385 | z->lb = z->c; z->c = z->l; /* backwards, line 57 */ 386 | 387 | { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 57 */ 388 | z->ket = z->c; /* [, line 58 */ 389 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1876480 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab5; } 390 | among_var = find_among_b(z, a_2, 25); /* substring, line 58 */ 391 | if (!(among_var)) { z->c = z->l - m_keep; goto lab5; } 392 | z->bra = z->c; /* ], line 58 */ 393 | { int ret = z->c - 2; 394 | if (z->lb > ret || ret > z->l) { z->c = z->l - m_keep; goto lab5; } 395 | z->c = ret; /* hop, line 58 */ 396 | } 397 | switch(among_var) { 398 | case 0: { z->c = z->l - m_keep; goto lab5; } 399 | case 1: 400 | { int ret = slice_from_s(z, 1, s_5); /* <-, line 61 */ 401 | if (ret < 0) return ret; 402 | } 403 | break; 404 | case 2: 405 | { int ret = slice_from_s(z, 2, s_6); /* <-, line 63 */ 406 | if (ret < 0) return ret; 407 | } 408 | break; 409 | case 3: 410 | { int ret = slice_from_s(z, 3, s_7); /* <-, line 65 */ 411 | if (ret < 0) return ret; 412 | } 413 | break; 414 | case 4: 415 | { int ret = slice_del(z); /* delete, line 68 */ 416 | if (ret < 0) return ret; 417 | } 418 | break; 419 | } 420 | lab5: 421 | ; 422 | } 423 | z->c = z->lb; 424 | failure = 0; /* mark success */ 425 | z->S[1] = z->p; 426 | * z = env; 427 | if (failure) return 0; 428 | } 429 | } 430 | lab0: 431 | z->c = z->lb; 432 | { int c_keep = z->c; 433 | int ret = insert_v(z, z->c, z->l, z->S[0]); /* = noun_form, line 74 */ 434 | z->c = c_keep; 435 | if (ret < 0) return ret; 436 | } 437 | return 1; 438 | } 439 | 440 | extern struct SN_env * latin_UTF_8_create_env(void) { return SN_create_env(2, 0, 0); } 441 | 442 | extern void latin_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 2); } 443 | 444 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_latin.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * latin_UTF_8_create_env(void); 9 | extern void latin_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int latin_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_lithuanian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * lithuanian_UTF_8_create_env(void); 9 | extern void lithuanian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int lithuanian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_norwegian.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int norwegian_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * norwegian_UTF_8_create_env(void); 23 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[1] = { 'e' }; 31 | static const symbol s_0_2[3] = { 'e', 'd', 'e' }; 32 | static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; 33 | static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; 34 | static const symbol s_0_5[3] = { 'a', 'n', 'e' }; 35 | static const symbol s_0_6[3] = { 'e', 'n', 'e' }; 36 | static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; 37 | static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; 38 | static const symbol s_0_9[2] = { 'e', 'n' }; 39 | static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; 40 | static const symbol s_0_11[2] = { 'a', 'r' }; 41 | static const symbol s_0_12[2] = { 'e', 'r' }; 42 | static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; 43 | static const symbol s_0_14[1] = { 's' }; 44 | static const symbol s_0_15[2] = { 'a', 's' }; 45 | static const symbol s_0_16[2] = { 'e', 's' }; 46 | static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; 47 | static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; 48 | static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; 49 | static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; 50 | static const symbol s_0_21[3] = { 'e', 'n', 's' }; 51 | static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 52 | static const symbol s_0_23[3] = { 'e', 'r', 's' }; 53 | static const symbol s_0_24[3] = { 'e', 't', 's' }; 54 | static const symbol s_0_25[2] = { 'e', 't' }; 55 | static const symbol s_0_26[3] = { 'h', 'e', 't' }; 56 | static const symbol s_0_27[3] = { 'e', 'r', 't' }; 57 | static const symbol s_0_28[3] = { 'a', 's', 't' }; 58 | 59 | static const struct among a_0[29] = 60 | { 61 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 62 | /* 1 */ { 1, s_0_1, -1, 1, 0}, 63 | /* 2 */ { 3, s_0_2, 1, 1, 0}, 64 | /* 3 */ { 4, s_0_3, 1, 1, 0}, 65 | /* 4 */ { 4, s_0_4, 1, 1, 0}, 66 | /* 5 */ { 3, s_0_5, 1, 1, 0}, 67 | /* 6 */ { 3, s_0_6, 1, 1, 0}, 68 | /* 7 */ { 6, s_0_7, 6, 1, 0}, 69 | /* 8 */ { 4, s_0_8, 1, 3, 0}, 70 | /* 9 */ { 2, s_0_9, -1, 1, 0}, 71 | /* 10 */ { 5, s_0_10, 9, 1, 0}, 72 | /* 11 */ { 2, s_0_11, -1, 1, 0}, 73 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 74 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 75 | /* 14 */ { 1, s_0_14, -1, 2, 0}, 76 | /* 15 */ { 2, s_0_15, 14, 1, 0}, 77 | /* 16 */ { 2, s_0_16, 14, 1, 0}, 78 | /* 17 */ { 4, s_0_17, 16, 1, 0}, 79 | /* 18 */ { 5, s_0_18, 16, 1, 0}, 80 | /* 19 */ { 4, s_0_19, 16, 1, 0}, 81 | /* 20 */ { 7, s_0_20, 19, 1, 0}, 82 | /* 21 */ { 3, s_0_21, 14, 1, 0}, 83 | /* 22 */ { 6, s_0_22, 21, 1, 0}, 84 | /* 23 */ { 3, s_0_23, 14, 1, 0}, 85 | /* 24 */ { 3, s_0_24, 14, 1, 0}, 86 | /* 25 */ { 2, s_0_25, -1, 1, 0}, 87 | /* 26 */ { 3, s_0_26, 25, 1, 0}, 88 | /* 27 */ { 3, s_0_27, -1, 3, 0}, 89 | /* 28 */ { 3, s_0_28, -1, 1, 0} 90 | }; 91 | 92 | static const symbol s_1_0[2] = { 'd', 't' }; 93 | static const symbol s_1_1[2] = { 'v', 't' }; 94 | 95 | static const struct among a_1[2] = 96 | { 97 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 98 | /* 1 */ { 2, s_1_1, -1, -1, 0} 99 | }; 100 | 101 | static const symbol s_2_0[3] = { 'l', 'e', 'g' }; 102 | static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; 103 | static const symbol s_2_2[2] = { 'i', 'g' }; 104 | static const symbol s_2_3[3] = { 'e', 'i', 'g' }; 105 | static const symbol s_2_4[3] = { 'l', 'i', 'g' }; 106 | static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; 107 | static const symbol s_2_6[3] = { 'e', 'l', 's' }; 108 | static const symbol s_2_7[3] = { 'l', 'o', 'v' }; 109 | static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; 110 | static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; 111 | static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; 112 | 113 | static const struct among a_2[11] = 114 | { 115 | /* 0 */ { 3, s_2_0, -1, 1, 0}, 116 | /* 1 */ { 4, s_2_1, 0, 1, 0}, 117 | /* 2 */ { 2, s_2_2, -1, 1, 0}, 118 | /* 3 */ { 3, s_2_3, 2, 1, 0}, 119 | /* 4 */ { 3, s_2_4, 2, 1, 0}, 120 | /* 5 */ { 4, s_2_5, 4, 1, 0}, 121 | /* 6 */ { 3, s_2_6, -1, 1, 0}, 122 | /* 7 */ { 3, s_2_7, -1, 1, 0}, 123 | /* 8 */ { 4, s_2_8, 7, 1, 0}, 124 | /* 9 */ { 4, s_2_9, 7, 1, 0}, 125 | /* 10 */ { 7, s_2_10, 9, 1, 0} 126 | }; 127 | 128 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; 129 | 130 | static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; 131 | 132 | static const symbol s_0[] = { 'k' }; 133 | static const symbol s_1[] = { 'e', 'r' }; 134 | 135 | static int r_mark_regions(struct SN_env * z) { 136 | z->I[0] = z->l; 137 | { int c_test = z->c; /* test, line 30 */ 138 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 139 | if (ret < 0) return 0; 140 | z->c = ret; /* hop, line 30 */ 141 | } 142 | z->I[1] = z->c; /* setmark x, line 30 */ 143 | z->c = c_test; 144 | } 145 | if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ 146 | { /* gopast */ /* non v, line 31 */ 147 | int ret = in_grouping_U(z, g_v, 97, 248, 1); 148 | if (ret < 0) return 0; 149 | z->c += ret; 150 | } 151 | z->I[0] = z->c; /* setmark p1, line 31 */ 152 | /* try, line 32 */ 153 | if (!(z->I[0] < z->I[1])) goto lab0; 154 | z->I[0] = z->I[1]; 155 | lab0: 156 | return 1; 157 | } 158 | 159 | static int r_main_suffix(struct SN_env * z) { 160 | int among_var; 161 | { int mlimit; /* setlimit, line 38 */ 162 | int m1 = z->l - z->c; (void)m1; 163 | if (z->c < z->I[0]) return 0; 164 | z->c = z->I[0]; /* tomark, line 38 */ 165 | mlimit = z->lb; z->lb = z->c; 166 | z->c = z->l - m1; 167 | z->ket = z->c; /* [, line 38 */ 168 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 169 | among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ 170 | if (!(among_var)) { z->lb = mlimit; return 0; } 171 | z->bra = z->c; /* ], line 38 */ 172 | z->lb = mlimit; 173 | } 174 | switch(among_var) { 175 | case 0: return 0; 176 | case 1: 177 | { int ret = slice_del(z); /* delete, line 44 */ 178 | if (ret < 0) return ret; 179 | } 180 | break; 181 | case 2: 182 | { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ 183 | if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1; 184 | goto lab0; 185 | lab1: 186 | z->c = z->l - m2; 187 | if (!(eq_s_b(z, 1, s_0))) return 0; 188 | if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0; 189 | } 190 | lab0: 191 | { int ret = slice_del(z); /* delete, line 46 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 3: 196 | { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ 197 | if (ret < 0) return ret; 198 | } 199 | break; 200 | } 201 | return 1; 202 | } 203 | 204 | static int r_consonant_pair(struct SN_env * z) { 205 | { int m_test = z->l - z->c; /* test, line 53 */ 206 | { int mlimit; /* setlimit, line 54 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 54 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | z->ket = z->c; /* [, line 54 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ 215 | z->bra = z->c; /* ], line 54 */ 216 | z->lb = mlimit; 217 | } 218 | z->c = z->l - m_test; 219 | } 220 | { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); 221 | if (ret < 0) return 0; 222 | z->c = ret; /* next, line 59 */ 223 | } 224 | z->bra = z->c; /* ], line 59 */ 225 | { int ret = slice_del(z); /* delete, line 59 */ 226 | if (ret < 0) return ret; 227 | } 228 | return 1; 229 | } 230 | 231 | static int r_other_suffix(struct SN_env * z) { 232 | int among_var; 233 | { int mlimit; /* setlimit, line 63 */ 234 | int m1 = z->l - z->c; (void)m1; 235 | if (z->c < z->I[0]) return 0; 236 | z->c = z->I[0]; /* tomark, line 63 */ 237 | mlimit = z->lb; z->lb = z->c; 238 | z->c = z->l - m1; 239 | z->ket = z->c; /* [, line 63 */ 240 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 241 | among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ 242 | if (!(among_var)) { z->lb = mlimit; return 0; } 243 | z->bra = z->c; /* ], line 63 */ 244 | z->lb = mlimit; 245 | } 246 | switch(among_var) { 247 | case 0: return 0; 248 | case 1: 249 | { int ret = slice_del(z); /* delete, line 67 */ 250 | if (ret < 0) return ret; 251 | } 252 | break; 253 | } 254 | return 1; 255 | } 256 | 257 | extern int norwegian_UTF_8_stem(struct SN_env * z) { 258 | { int c1 = z->c; /* do, line 74 */ 259 | { int ret = r_mark_regions(z); 260 | if (ret == 0) goto lab0; /* call mark_regions, line 74 */ 261 | if (ret < 0) return ret; 262 | } 263 | lab0: 264 | z->c = c1; 265 | } 266 | z->lb = z->c; z->c = z->l; /* backwards, line 75 */ 267 | 268 | { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ 269 | { int ret = r_main_suffix(z); 270 | if (ret == 0) goto lab1; /* call main_suffix, line 76 */ 271 | if (ret < 0) return ret; 272 | } 273 | lab1: 274 | z->c = z->l - m2; 275 | } 276 | { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ 277 | { int ret = r_consonant_pair(z); 278 | if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ 279 | if (ret < 0) return ret; 280 | } 281 | lab2: 282 | z->c = z->l - m3; 283 | } 284 | { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ 285 | { int ret = r_other_suffix(z); 286 | if (ret == 0) goto lab3; /* call other_suffix, line 78 */ 287 | if (ret < 0) return ret; 288 | } 289 | lab3: 290 | z->c = z->l - m4; 291 | } 292 | z->c = z->lb; 293 | return 1; 294 | } 295 | 296 | extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } 297 | 298 | extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 299 | 300 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_norwegian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * norwegian_UTF_8_create_env(void); 9 | extern void norwegian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int norwegian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_porter.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * porter_UTF_8_create_env(void); 9 | extern void porter_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int porter_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_portuguese.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * portuguese_UTF_8_create_env(void); 9 | extern void portuguese_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int portuguese_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_romanian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * romanian_UTF_8_create_env(void); 9 | extern void romanian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int romanian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_russian.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * russian_UTF_8_create_env(void); 9 | extern void russian_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int russian_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_spanish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * spanish_UTF_8_create_env(void); 9 | extern void spanish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int spanish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_swedish.c: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #include "../runtime/header.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | extern int swedish_UTF_8_stem(struct SN_env * z); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | static int r_other_suffix(struct SN_env * z); 14 | static int r_consonant_pair(struct SN_env * z); 15 | static int r_main_suffix(struct SN_env * z); 16 | static int r_mark_regions(struct SN_env * z); 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | 22 | extern struct SN_env * swedish_UTF_8_create_env(void); 23 | extern void swedish_UTF_8_close_env(struct SN_env * z); 24 | 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | static const symbol s_0_0[1] = { 'a' }; 30 | static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; 31 | static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; 32 | static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; 33 | static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; 34 | static const symbol s_0_5[2] = { 'a', 'd' }; 35 | static const symbol s_0_6[1] = { 'e' }; 36 | static const symbol s_0_7[3] = { 'a', 'd', 'e' }; 37 | static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; 38 | static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; 39 | static const symbol s_0_10[3] = { 'a', 'r', 'e' }; 40 | static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; 41 | static const symbol s_0_12[2] = { 'e', 'n' }; 42 | static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; 43 | static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; 44 | static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; 45 | static const symbol s_0_16[3] = { 'e', 'r', 'n' }; 46 | static const symbol s_0_17[2] = { 'a', 'r' }; 47 | static const symbol s_0_18[2] = { 'e', 'r' }; 48 | static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; 49 | static const symbol s_0_20[2] = { 'o', 'r' }; 50 | static const symbol s_0_21[1] = { 's' }; 51 | static const symbol s_0_22[2] = { 'a', 's' }; 52 | static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; 53 | static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; 54 | static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; 55 | static const symbol s_0_26[2] = { 'e', 's' }; 56 | static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; 57 | static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; 58 | static const symbol s_0_29[3] = { 'e', 'n', 's' }; 59 | static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; 60 | static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; 61 | static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; 62 | static const symbol s_0_33[2] = { 'a', 't' }; 63 | static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; 64 | static const symbol s_0_35[3] = { 'h', 'e', 't' }; 65 | static const symbol s_0_36[3] = { 'a', 's', 't' }; 66 | 67 | static const struct among a_0[37] = 68 | { 69 | /* 0 */ { 1, s_0_0, -1, 1, 0}, 70 | /* 1 */ { 4, s_0_1, 0, 1, 0}, 71 | /* 2 */ { 4, s_0_2, 0, 1, 0}, 72 | /* 3 */ { 7, s_0_3, 2, 1, 0}, 73 | /* 4 */ { 4, s_0_4, 0, 1, 0}, 74 | /* 5 */ { 2, s_0_5, -1, 1, 0}, 75 | /* 6 */ { 1, s_0_6, -1, 1, 0}, 76 | /* 7 */ { 3, s_0_7, 6, 1, 0}, 77 | /* 8 */ { 4, s_0_8, 6, 1, 0}, 78 | /* 9 */ { 4, s_0_9, 6, 1, 0}, 79 | /* 10 */ { 3, s_0_10, 6, 1, 0}, 80 | /* 11 */ { 4, s_0_11, 6, 1, 0}, 81 | /* 12 */ { 2, s_0_12, -1, 1, 0}, 82 | /* 13 */ { 5, s_0_13, 12, 1, 0}, 83 | /* 14 */ { 4, s_0_14, 12, 1, 0}, 84 | /* 15 */ { 5, s_0_15, 12, 1, 0}, 85 | /* 16 */ { 3, s_0_16, -1, 1, 0}, 86 | /* 17 */ { 2, s_0_17, -1, 1, 0}, 87 | /* 18 */ { 2, s_0_18, -1, 1, 0}, 88 | /* 19 */ { 5, s_0_19, 18, 1, 0}, 89 | /* 20 */ { 2, s_0_20, -1, 1, 0}, 90 | /* 21 */ { 1, s_0_21, -1, 2, 0}, 91 | /* 22 */ { 2, s_0_22, 21, 1, 0}, 92 | /* 23 */ { 5, s_0_23, 22, 1, 0}, 93 | /* 24 */ { 5, s_0_24, 22, 1, 0}, 94 | /* 25 */ { 5, s_0_25, 22, 1, 0}, 95 | /* 26 */ { 2, s_0_26, 21, 1, 0}, 96 | /* 27 */ { 4, s_0_27, 26, 1, 0}, 97 | /* 28 */ { 5, s_0_28, 26, 1, 0}, 98 | /* 29 */ { 3, s_0_29, 21, 1, 0}, 99 | /* 30 */ { 5, s_0_30, 29, 1, 0}, 100 | /* 31 */ { 6, s_0_31, 29, 1, 0}, 101 | /* 32 */ { 4, s_0_32, 21, 1, 0}, 102 | /* 33 */ { 2, s_0_33, -1, 1, 0}, 103 | /* 34 */ { 5, s_0_34, -1, 1, 0}, 104 | /* 35 */ { 3, s_0_35, -1, 1, 0}, 105 | /* 36 */ { 3, s_0_36, -1, 1, 0} 106 | }; 107 | 108 | static const symbol s_1_0[2] = { 'd', 'd' }; 109 | static const symbol s_1_1[2] = { 'g', 'd' }; 110 | static const symbol s_1_2[2] = { 'n', 'n' }; 111 | static const symbol s_1_3[2] = { 'd', 't' }; 112 | static const symbol s_1_4[2] = { 'g', 't' }; 113 | static const symbol s_1_5[2] = { 'k', 't' }; 114 | static const symbol s_1_6[2] = { 't', 't' }; 115 | 116 | static const struct among a_1[7] = 117 | { 118 | /* 0 */ { 2, s_1_0, -1, -1, 0}, 119 | /* 1 */ { 2, s_1_1, -1, -1, 0}, 120 | /* 2 */ { 2, s_1_2, -1, -1, 0}, 121 | /* 3 */ { 2, s_1_3, -1, -1, 0}, 122 | /* 4 */ { 2, s_1_4, -1, -1, 0}, 123 | /* 5 */ { 2, s_1_5, -1, -1, 0}, 124 | /* 6 */ { 2, s_1_6, -1, -1, 0} 125 | }; 126 | 127 | static const symbol s_2_0[2] = { 'i', 'g' }; 128 | static const symbol s_2_1[3] = { 'l', 'i', 'g' }; 129 | static const symbol s_2_2[3] = { 'e', 'l', 's' }; 130 | static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; 131 | static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' }; 132 | 133 | static const struct among a_2[5] = 134 | { 135 | /* 0 */ { 2, s_2_0, -1, 1, 0}, 136 | /* 1 */ { 3, s_2_1, 0, 1, 0}, 137 | /* 2 */ { 3, s_2_2, -1, 1, 0}, 138 | /* 3 */ { 5, s_2_3, -1, 3, 0}, 139 | /* 4 */ { 5, s_2_4, -1, 2, 0} 140 | }; 141 | 142 | static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; 143 | 144 | static const unsigned char g_s_ending[] = { 119, 127, 149 }; 145 | 146 | static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' }; 147 | static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; 148 | 149 | static int r_mark_regions(struct SN_env * z) { 150 | z->I[0] = z->l; 151 | { int c_test = z->c; /* test, line 29 */ 152 | { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); 153 | if (ret < 0) return 0; 154 | z->c = ret; /* hop, line 29 */ 155 | } 156 | z->I[1] = z->c; /* setmark x, line 29 */ 157 | z->c = c_test; 158 | } 159 | if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ 160 | { /* gopast */ /* non v, line 30 */ 161 | int ret = in_grouping_U(z, g_v, 97, 246, 1); 162 | if (ret < 0) return 0; 163 | z->c += ret; 164 | } 165 | z->I[0] = z->c; /* setmark p1, line 30 */ 166 | /* try, line 31 */ 167 | if (!(z->I[0] < z->I[1])) goto lab0; 168 | z->I[0] = z->I[1]; 169 | lab0: 170 | return 1; 171 | } 172 | 173 | static int r_main_suffix(struct SN_env * z) { 174 | int among_var; 175 | { int mlimit; /* setlimit, line 37 */ 176 | int m1 = z->l - z->c; (void)m1; 177 | if (z->c < z->I[0]) return 0; 178 | z->c = z->I[0]; /* tomark, line 37 */ 179 | mlimit = z->lb; z->lb = z->c; 180 | z->c = z->l - m1; 181 | z->ket = z->c; /* [, line 37 */ 182 | if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 183 | among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ 184 | if (!(among_var)) { z->lb = mlimit; return 0; } 185 | z->bra = z->c; /* ], line 37 */ 186 | z->lb = mlimit; 187 | } 188 | switch(among_var) { 189 | case 0: return 0; 190 | case 1: 191 | { int ret = slice_del(z); /* delete, line 44 */ 192 | if (ret < 0) return ret; 193 | } 194 | break; 195 | case 2: 196 | if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0; 197 | { int ret = slice_del(z); /* delete, line 46 */ 198 | if (ret < 0) return ret; 199 | } 200 | break; 201 | } 202 | return 1; 203 | } 204 | 205 | static int r_consonant_pair(struct SN_env * z) { 206 | { int mlimit; /* setlimit, line 50 */ 207 | int m1 = z->l - z->c; (void)m1; 208 | if (z->c < z->I[0]) return 0; 209 | z->c = z->I[0]; /* tomark, line 50 */ 210 | mlimit = z->lb; z->lb = z->c; 211 | z->c = z->l - m1; 212 | { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ 213 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 214 | if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ 215 | z->c = z->l - m2; 216 | z->ket = z->c; /* [, line 52 */ 217 | { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); 218 | if (ret < 0) { z->lb = mlimit; return 0; } 219 | z->c = ret; /* next, line 52 */ 220 | } 221 | z->bra = z->c; /* ], line 52 */ 222 | { int ret = slice_del(z); /* delete, line 52 */ 223 | if (ret < 0) return ret; 224 | } 225 | } 226 | z->lb = mlimit; 227 | } 228 | return 1; 229 | } 230 | 231 | static int r_other_suffix(struct SN_env * z) { 232 | int among_var; 233 | { int mlimit; /* setlimit, line 55 */ 234 | int m1 = z->l - z->c; (void)m1; 235 | if (z->c < z->I[0]) return 0; 236 | z->c = z->I[0]; /* tomark, line 55 */ 237 | mlimit = z->lb; z->lb = z->c; 238 | z->c = z->l - m1; 239 | z->ket = z->c; /* [, line 56 */ 240 | if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } 241 | among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ 242 | if (!(among_var)) { z->lb = mlimit; return 0; } 243 | z->bra = z->c; /* ], line 56 */ 244 | switch(among_var) { 245 | case 0: { z->lb = mlimit; return 0; } 246 | case 1: 247 | { int ret = slice_del(z); /* delete, line 57 */ 248 | if (ret < 0) return ret; 249 | } 250 | break; 251 | case 2: 252 | { int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */ 253 | if (ret < 0) return ret; 254 | } 255 | break; 256 | case 3: 257 | { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ 258 | if (ret < 0) return ret; 259 | } 260 | break; 261 | } 262 | z->lb = mlimit; 263 | } 264 | return 1; 265 | } 266 | 267 | extern int swedish_UTF_8_stem(struct SN_env * z) { 268 | { int c1 = z->c; /* do, line 66 */ 269 | { int ret = r_mark_regions(z); 270 | if (ret == 0) goto lab0; /* call mark_regions, line 66 */ 271 | if (ret < 0) return ret; 272 | } 273 | lab0: 274 | z->c = c1; 275 | } 276 | z->lb = z->c; z->c = z->l; /* backwards, line 67 */ 277 | 278 | { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ 279 | { int ret = r_main_suffix(z); 280 | if (ret == 0) goto lab1; /* call main_suffix, line 68 */ 281 | if (ret < 0) return ret; 282 | } 283 | lab1: 284 | z->c = z->l - m2; 285 | } 286 | { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ 287 | { int ret = r_consonant_pair(z); 288 | if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ 289 | if (ret < 0) return ret; 290 | } 291 | lab2: 292 | z->c = z->l - m3; 293 | } 294 | { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ 295 | { int ret = r_other_suffix(z); 296 | if (ret == 0) goto lab3; /* call other_suffix, line 70 */ 297 | if (ret < 0) return ret; 298 | } 299 | lab3: 300 | z->c = z->l - m4; 301 | } 302 | z->c = z->lb; 303 | return 1; 304 | } 305 | 306 | extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } 307 | 308 | extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } 309 | 310 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_swedish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * swedish_UTF_8_create_env(void); 9 | extern void swedish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int swedish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /libstemmer_c/src_c/stem_UTF_8_turkish.h: -------------------------------------------------------------------------------- 1 | 2 | /* This file was generated automatically by the Snowball to ANSI C compiler */ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | extern struct SN_env * turkish_UTF_8_create_env(void); 9 | extern void turkish_UTF_8_close_env(struct SN_env * z); 10 | 11 | extern int turkish_UTF_8_stem(struct SN_env * z); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | -------------------------------------------------------------------------------- /ruby-stemmer.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | lib = File.expand_path('../lib', __FILE__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | require 'lingua/version' 6 | 7 | Gem::Specification.new do |s| 8 | s.name = 'ruby-stemmer' 9 | s.version = Lingua::Stemmer::VERSION 10 | 11 | s.platform = Gem::Platform::RUBY 12 | s.required_ruby_version = '>= 2.4.0' 13 | 14 | s.require_paths = ['lib'] 15 | s.authors = ['Aurelian Oancea', 'Yury Korolev'] 16 | 17 | s.description = 'Expose the bundled libstemmer_c library to Ruby.' 18 | s.email = 'oancea@gmail.com' 19 | s.extensions = ['ext/lingua/extconf.rb'] 20 | s.extra_rdoc_files = ['README.rdoc'] 21 | s.files = `git ls-files`.split("\n") 22 | s.homepage = 'http://github.com/aurelian/ruby-stemmer' 23 | s.licenses = ['MIT'] 24 | s.summary = 'Expose libstemmer_c to Ruby.' 25 | 26 | s.add_development_dependency 'minitest', '~> 5.14' 27 | s.add_development_dependency 'rake-compiler', '~> 1.1' 28 | end 29 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler/setup' 3 | 4 | require 'minitest/autorun' 5 | 6 | $LOAD_PATH.unshift File.expand_path('../lib', __FILE__) 7 | require 'lingua/stemmer' 8 | -------------------------------------------------------------------------------- /test/lingua/test_stemmer.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'helper' 4 | 5 | class TestStemmer < Minitest::Test 6 | def test_stemmer_creation 7 | assert_kind_of ::Lingua::Stemmer, ::Lingua::Stemmer.new 8 | end 9 | 10 | def test_exceptions 11 | assert_raises ::Lingua::StemmerError do 12 | # invalid encoding for language 13 | ::Lingua::Stemmer.new language: 'ro', encoding: 'ISO_8859_1' 14 | end 15 | assert_raises ::Lingua::StemmerError do 16 | # invalid language 17 | ::Lingua::Stemmer.new language: 'cat' 18 | end 19 | end 20 | 21 | def test_latin 22 | ::Lingua::Stemmer.new language: 'latin', encoding: 'ISO_8859_1' 23 | rescue StandardError => e 24 | flunk "Expected latin to be loaded but failed with #{e}" 25 | end 26 | 27 | def test_stem 28 | stemmer = ::Lingua::Stemmer.new(language: 'en', encoding: 'UTF_8') 29 | assert_equal stemmer.stem('obnoxious'), 'obnoxi' 30 | assert_equal stemmer.stem('personalities'), 'person' 31 | end 32 | 33 | def test_string_stemmer 34 | assert_equal ::Lingua.stemmer('installation', language: 'en'), 'instal' 35 | stemmer = ::Lingua.stemmer('installation', language: 'fr') do |word| 36 | assert_equal word, 'install' 37 | end 38 | assert_kind_of ::Lingua::Stemmer, stemmer 39 | assert_equal stemmer.encoding, Encoding::UTF_8 40 | end 41 | 42 | def test_array_stemmer 43 | results = ::Lingua.stemmer(%w[one two], language: 'de', encoding: 'ISO_8859_1') 44 | assert_equal 2, results.size 45 | assert_kind_of Array, results 46 | end 47 | 48 | def test_array_stemmer_issue_22 49 | results = ::Lingua.stemmer(['one'], language: 'de', encoding: 'ISO_8859_1') 50 | assert_equal 1, results.size 51 | assert_kind_of Array, results 52 | end 53 | 54 | def test_stemmer_subclass 55 | assert_raises(RuntimeError) do 56 | Class.new(Lingua::Stemmer) do 57 | def native_init(a, b); end 58 | end.new.stem('cow') 59 | end 60 | end 61 | 62 | def test_default_encoding_option 63 | if RUBY_VERSION >= '1.9' 64 | assert_equal ::Lingua::Stemmer.new.encoding, Encoding::UTF_8 65 | else 66 | assert_equal ::Lingua::Stemmer.new.encoding, 'UTF_8' 67 | end 68 | end 69 | 70 | def test_different_encoding_options 71 | assert_equal ::Lingua::Stemmer.new(encoding: 'ISO_8859_1').encoding, Encoding::ISO_8859_1 72 | assert_equal ::Lingua::Stemmer.new(encoding: 'UTF-8').encoding, Encoding::UTF_8 73 | assert_equal ::Lingua::Stemmer.new(encoding: 'utf-8').encoding, Encoding::UTF_8 74 | assert_equal ::Lingua::Stemmer.new(encoding: :ISO_8859_1).encoding, Encoding::ISO_8859_1 75 | assert_equal ::Lingua::Stemmer.new(encoding: Encoding::UTF_8).encoding, Encoding::UTF_8 76 | end 77 | 78 | def test_string_encoding 79 | word = 'așezare' 80 | 81 | stem = ::Lingua.stemmer(word, language: 'ro', encoding: 'UTF_8') 82 | assert_equal word.encoding, stem.encoding 83 | 84 | s = ::Lingua::Stemmer.new(language: 'ro', encoding: 'UTF_8') 85 | assert_equal s.stem(word).encoding, word.encoding 86 | 87 | stem = ::Lingua.stemmer('installation', language: 'fr', encoding: 'ISO-8859-1') 88 | assert_equal stem.encoding, Encoding::ISO_8859_1 89 | end 90 | 91 | def test_lithuanian_stem 92 | stemmer = ::Lingua::Stemmer.new(language: 'lt') 93 | %w[ 94 | kompiuteris kompiuterio kompiuteriui kompiuteriu kompiuteri 95 | ].each do |word| 96 | assert_equal stemmer.stem(word), 'kompiuter' 97 | end 98 | end 99 | end 100 | --------------------------------------------------------------------------------