├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── composer.json
├── phpunit.xml.dist
├── src
    ├── NotFoundException.php
    ├── Stemmer
    │   ├── Catalan.php
    │   ├── Danish.php
    │   ├── Dutch.php
    │   ├── English.php
    │   ├── Finnish.php
    │   ├── French.php
    │   ├── German.php
    │   ├── Italian.php
    │   ├── Norwegian.php
    │   ├── Portuguese.php
    │   ├── Romanian.php
    │   ├── Russian.php
    │   ├── Spanish.php
    │   ├── Stem.php
    │   ├── Stemmer.php
    │   └── Swedish.php
    ├── StemmerFactory.php
    ├── StemmerManager.php
    └── Transliterate.php
└── test
    ├── CatalanTest.php
    ├── CsvFileIterator.php
    ├── CsvFileVerboseIterator.php
    ├── DanishTest.php
    ├── DutchTest.php
    ├── EnglishTest.php
    ├── FactoryTest.php
    ├── FinnishTest.php
    ├── FrenchTest.php
    ├── GermanTest.php
    ├── ItalianTest.php
    ├── ManagerTest.php
    ├── NorwegianTest.php
    ├── PortugueseTest.php
    ├── RomanianTest.php
    ├── RussianTest.php
    ├── SpanishTest.php
    ├── SwedishTest.php
    └── files
        ├── ca.txt
        ├── de.txt
        ├── dk.txt
        ├── en.txt
        ├── es.txt
        ├── fi.txt
        ├── fr.txt
        ├── it.txt
        ├── nl.txt
        ├── no.txt
        ├── pt.txt
        ├── ro.txt
        ├── ru.txt
        └── sw.txt


/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [master]
 7 | 
 8 | jobs:
 9 |   tests:
10 |     runs-on: ubuntu-latest
11 | 
12 |     strategy:
13 |       fail-fast: true
14 |       matrix:
15 |         php: [7.3, 7.4, 8.0, 8.1, 8.2, 8.3, 8.4]
16 |         stability: [prefer-lowest, prefer-stable]
17 | 
18 |     name: PHP ${{ matrix.php }} - ${{ matrix.stability }}
19 | 
20 |     steps:
21 |       - name: Checkout code
22 |         uses: actions/checkout@v3
23 | 
24 |       - name: Setup PHP
25 |         uses: shivammathur/setup-php@v2
26 |         with:
27 |           php-version: ${{ matrix.php }}
28 |           tools: composer:v2
29 |           coverage: none
30 | 
31 |       - name: Install dependencies
32 |         uses: nick-fields/retry@v2
33 |         with:
34 |           timeout_minutes: 5
35 |           max_attempts: 5
36 |           command: composer update --${{ matrix.stability }} --prefer-dist --no-interaction --no-progress
37 | 
38 |       - name: Copy PHP Unit Settings
39 |         run: cp phpunit.xml.dist phpunit.xml
40 | 
41 |       - name: Execute tests
42 |         run: vendor/bin/phpunit --verbose
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Managed by Composer
 2 | /vendor/
 3 | 
 4 | # PHPUnit
 5 | /app/phpunit.xml
 6 | /phpunit.xml
 7 | 
 8 | # Build data
 9 | /build/
10 | 
11 | # Composer PHAR
12 | /composer.phar
13 | 
14 | /.settings/
15 | /.buildpath
16 | /.project
17 | /composer.lock
18 | 
19 | .history
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 wamania
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # php-stemmer
 2 | 
 3 | PHP native implementation of Snowball stemmer
 4 | https://snowballstem.org/
 5 | 
 6 | Accept only UTF-8
 7 | 
 8 | * [Languages](#languages)
 9 | * [Installation](#installation)
10 | * [Usage](#usage)
11 | 
12 | Languages
13 | ------------
14 | Available : 
15 | - Catalan (by Orestes Sanchez Benavente orestes@estotienearreglo.es)
16 | - Danish
17 | - Dutch
18 | - English
19 | - Finnish (by [Mikko Saari](https://github.com/msaari/))
20 | - French
21 | - German
22 | - Italian
23 | - Norwegian
24 | - Portuguese
25 | - Romanian
26 | - Russian
27 | - Spanish
28 | - Swedish
29 | 
30 | Installation
31 | ------------
32 | 
33 | For PHP5, use 1.3
34 | ```
35 | composer require wamania/php-stemmer "^1.3"
36 | ```
37 | 
38 | For PHP7 use 2.x (branch 2.x is backward compatible with 1.x)
39 | ```
40 | composer require wamania/php-stemmer "^2.0"
41 | ```
42 | 
43 | For PHP^7.3 and PHP^8.0 use 3.x (backward compatible, but phpunit^9 don't work with php < 7.3)
44 | ```
45 | composer require wamania/php-stemmer "^3.0"
46 | ```
47 | 
48 | For PHP^8.4 use 4.x (avoid deprecated by switching from voku utf8 to [joomla/string](https://github.com/joomla-framework/string) )
49 | ```
50 | composer require wamania/php-stemmer "^4.0"
51 | ```
52 | 
53 | Usage
54 | -----
55 | 
56 | For 2.x ~ 4.x, you should use the factory
57 | ```php
58 | use Wamania\Snowball\StemmerFactory;
59 | 
60 | // use ISO_639 (2 or 3 letters) or language name in english
61 | $stemmer = StemmerFactory::create('fr');
62 | $stemmer = StemmerFactory::create ('spanish');
63 | 
64 | // then 
65 | $stem = $stemmer->stem('automóvil');
66 | ```
67 | 
68 | Or the manager
69 | ```php
70 | use Wamania\Snowball\StemmerManager;
71 | 
72 | $manager = new StemmerManager();
73 | $stem = $manager->stem('automóvil', 'es');
74 | ```
75 | 
76 | In 1.3, you must instantiate manually
77 | 
78 | ```php
79 | use Wamania\Snowball\French;
80 | 
81 | $stemmer = new French();
82 | $stem = $stemmer->stem('anticonstitutionnellement');
83 | ```
84 | 
85 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "wamania/php-stemmer",
 3 | 	"description": "Native PHP Stemmer",
 4 | 	"keywords": ["stemmer", "porter", "php"],
 5 | 	"license": "MIT",
 6 | 	"authors": [
 7 | 		{
 8 | 			"name": "Wamania",
 9 | 			"homepage": "http://wamania.com"
10 | 		}
11 | 	],
12 | 	"require": {
13 | 		"php": ">=7.3",
14 | 		"joomla/string": ">=2.0.1"
15 | 	},
16 | 	"require-dev":{
17 | 		"phpunit/phpunit": "^9.0"
18 | 	},
19 | 	"autoload": {
20 | 		"psr-4": {
21 | 			"Wamania\\Snowball\\": "src/"
22 | 		}
23 | 	},
24 | 	"autoload-dev": {
25 | 		"psr-4": {
26 | 			"Wamania\\Snowball\\Tests\\": "test/"
27 | 		}
28 | 	}
29 | }
30 | 


--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" ?>
 2 | <phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 3 |          xsi:noNamespaceSchemaLocation="vendor/phpunit/phpunit/phpunit.xsd"
 4 |          backupGlobals="false"
 5 |          colors="true"
 6 |          beStrictAboutChangesToGlobalState="true"
 7 |          beStrictAboutOutputDuringTests="true"
 8 |          beStrictAboutTestsThatDoNotTestAnything="true"
 9 |          beStrictAboutTodoAnnotatedTests="true"
10 |          verbose="true"
11 |          bootstrap="vendor/autoload.php">
12 | 
13 |     <testsuites>
14 |         <testsuite name="functional">
15 |             <directory suffix=".php">test</directory>
16 |         </testsuite>
17 |     </testsuites>
18 | 
19 |     <filter>
20 |         <whitelist>
21 |             <directory>src</directory>
22 |         </whitelist>
23 |     </filter>
24 | </phpunit>
25 | 


--------------------------------------------------------------------------------
/src/NotFoundException.php:
--------------------------------------------------------------------------------
1 | <?php
2 | 
3 | namespace Wamania\Snowball;
4 | 
5 | class NotFoundException extends \Exception
6 | {
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/src/Stemmer/Catalan.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link   http://snowball.tartarus.org/algorithms/catalan/stemmer.html
 10 |  * @author Orestes Sanchez Benavente <orestes@estotienearreglo.es>
 11 |  *
 12 |  *
 13 |  * Some fine tuning was necessary in this implementation of the original catalan stemmer algorithm in Snowball:
 14 |  *
 15 |  *    1. Some suffix sets have overlapping items, so here all items are sorted by decreasing size, to
 16 |  *       prevent that a shorter suffix will skip a larger one.
 17 |  *
 18 |  *    2. Some alternatives (`or` operator in Snowball) in the original algorithm have
 19 |  *       been rearranged to make sure they are applied in the right order.
 20 |  *
 21 |  *  Based on the reference Snowball implementation by Israel Olalla of iSOCO
 22 |  */
 23 | class Catalan extends Stem
 24 | {
 25 | 
 26 |     /**
 27 |      * All catalan vowels
 28 |      */
 29 |     protected static $vowels = ['a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'à', 'è', 'ï', 'ò', 'ü'];
 30 | 
 31 |     protected static $standard_suffix_1a = [
 32 |         'allengües', 'ativitats', 'bilitats', 'ionistes', 'ialistes', 'ialismes', 'ativitat', 'atòries', 'isament',
 33 |         'bilitat', 'ivitats', 'ionisme', 'ionista', 'ialista', 'ialisme', 'íssimes', 'formes', 'ivisme', 'aments',
 34 |         'nça', 'ificar', 'idores', 'ancies', 'atòria', 'ivitat', 'encies', 'ències', 'atives', 'íssima', 'íssims',
 35 |         'ictes', 'eries', 'itats', 'itzar', 'ament', 'ments', 'sfera', 'ícies', 'àries', 'cions', 'ístic', 'issos',
 36 |         'íssem', 'íssiu', 'issem', 'isseu', 'ísseu', 'dores', 'adura', 'ívola', 'ables', 'adors', 'idors', 'adora',
 37 |         'doras', 'dures', 'ancia', 'toris', 'encia', 'ència', 'ïtats', 'atius', 'ativa', 'ibles', 'asses', 'assos',
 38 |         'íssim', 'ìssem', 'ìsseu', 'ìssin', 'ismes', 'istes', 'inies', 'íinia', 'ínies', 'trius', 'atge', 'icte',
 39 |         'ells', 'ella', 'essa', 'eres', 'ines', 'able', 'itat', 'ives', 'ment', 'amen', 'iste', 'aire', 'eria',
 40 |         'eses', 'esos', 'ícia', 'icis', 'ícis', 'ària', 'alla', 'nces', 'enca', 'issa', 'dora', 'dors', 'bles',
 41 |         'ívol', 'egar', 'ejar', 'itar', 'ació', 'ants', 'tori', 'ions', 'isam', 'ores', 'aris', 'ïtat', 'atiu',
 42 |         'ible', 'assa', 'ents', 'imes', 'isme', 'ista', 'inia', 'ites', 'triu', 'oses', 'osos', 'ient', 'otes',
 43 |         'ell', 'esc', 'ets', 'eta', 'ers', 'ina', 'iva', 'ius', 'fer', 'als', 'era', 'ana', 'esa', 'ici', 'íci',
 44 |         'ció', 'dor', 'all', 'enc', 'osa', 'ble', 'dís', 'dur', 'ant', 'ats', 'ota', 'ors', 'ora', 'ari', 'uts',
 45 |         'uds', 'ent', 'ims', 'ima', 'ita', 'ar', 'és', 'ès', 'et', 'ls', 'ió', 'ot', 'al', 'or', 'il', 'ís', 'ós',
 46 |         'ud', 'ots', 'ó'
 47 |     ];
 48 | 
 49 |     protected static $attached_pronoun = [
 50 |         'selas', 'selos', '\'hi', '\'ho', '\'ls', '-les', '-nos', '\'ns', 'sela', 'selo', '\'s', '\'l', '-ls', '-la',
 51 |         '-li', 'vos', 'nos', '-us', '\'n', '-ns', '\'m', '-me', '-te', '\'t', 'los', 'las', 'les', 'ens', 'se', 'us',
 52 |         '-n', '-m', 'li', 'lo', 'me', 'le', 'la', 'ho', 'hi'
 53 |     ];
 54 | 
 55 |     protected static $verb_suffixes = [
 56 |         'aríamos', 'eríamos', 'iríamos', 'eresseu', 'iéramos', 'iésemos', 'adores', 'aríais', 'aremos', 'eríais',
 57 |         'eremos', 'iríais', 'iremos', 'ierais', 'ieseis', 'asteis', 'isteis', 'ábamos', 'áramos', 'ásemos', 'isquen',
 58 |         'esquin', 'esquis', 'esques', 'esquen', 'ïsquen', 'ïsques', 'adora', 'adors', 'arían', 'arías', 'arian',
 59 |         'arien', 'aries', 'aréis', 'erían', 'erías', 'eréis', 'erass', 'irían', 'irías', 'iréis', 'asseu', 'esseu',
 60 |         'àsseu', 'àssem', 'àssim', 'àssiu', 'essen', 'esses', 'assen', 'asses', 'assim', 'assiu', 'éssen', 'ésseu',
 61 |         'éssim', 'éssiu', 'éssem', 'aríem', 'aríeu', 'eixer', 'eixes', 'ieran', 'iesen', 'ieron', 'iendo', 'essin',
 62 |         'essis', 'assin', 'assis', 'essim', 'èssim', 'èssiu', 'ieras', 'ieses', 'abais', 'arais', 'aseis', 'íamos',
 63 |         'irien', 'iries', 'irìem', 'irìeu', 'iguem', 'igueu', 'esqui', 'eixin', 'eixis', 'eixen', 'iríem', 'iríeu',
 64 |         'atges', 'issen', 'isses', 'issin', 'issis', 'issiu', 'issim', 'ïssin', 'íssiu', 'íssim', 'ïssis', 'ïguem',
 65 |         'ïgueu', 'ïssen', 'ïsses', 'itzeu', 'itzis', 'ador', 'ents', 'udes', 'eren', 'arán', 'arás', 'aria', 'aràs',
 66 |         'aría', 'arés', 'erán', 'erás', 'ería', 'erau', 'irán', 'irás', 'iría', 'írem', 'íreu', 'aves', 'avem', 'ávem',
 67 |         'àvem', 'àveu', 'áveu', 'aven', 'ares', 'àrem', 'àreu', 'àren', 'areu', 'aren', 'tzar', 'ides', 'ïdes', 'ades',
 68 |         'iera', 'iese', 'aste', 'iste', 'aban', 'aran', 'asen', 'aron', 'abas', 'adas', 'idas', 'aras', 'ases', 'íais',
 69 |         'ados', 'idos', 'amos', 'imos', 'ques', 'iran', 'irem', 'iren', 'ires', 'ireu', 'iria', 'iràs', 'eixi', 'eixo',
 70 |         'isin', 'isis', 'esca', 'isca', 'ïsca', 'ïren', 'ïres', 'ïxen', 'ïxes', 'ixen', 'ixes', 'inin', 'inis', 'ineu',
 71 |         'itza', 'itzi', 'itzo', 'itzà', 'arem', 'ent', 'arà', 'ará', 'ara', 'aré', 'erá', 'eré', 'irá', 'iré', 'íeu',
 72 |         'ies', 'íem', 'ìeu', 'ien', 'uda', 'ava', 'ats', 'ant', 'ïen', 'ams', 'ïes', 'dre', 'eix', 'ïda', 'aba', 'ada',
 73 |         'ida', 'its', 'ids', 'ase', 'ían', 'ado', 'ido', 'ieu', 'ess', 'ass', 'ías', 'áis', 'ira', 'irà', 'irè', 'sis',
 74 |         'sin', 'int', 'isc', 'ïsc', 'ïra', 'ïxo', 'ixo', 'ixa', 'ini', 'itz', 'iïn', 're', 'ie', 'er', 'ia', 'at', 'ut',
 75 |         'au', 'ïm', 'ïu', 'és', 'en', 'es', 'em', 'am', 'ïa', 'it', 'ït', 'ía', 'ad', 'ed', 'id', 'an', 'ió', 'ar',
 76 |         'ir', 'as', 'ii', 'io', 'ià', 'ís', 'ïx', 'ix', 'in', 'às', 'iï', 'iïs', 'í'
 77 |     ];
 78 | 
 79 |     protected static $residual_suffixes = [
 80 |         'itz', 'it', 'os', 'eu', 'iu', 'is', 'ir', 'ïn', 'ïs', 'a', 'o', 'á', 'à', 'í', 'ó', 'e', 'é', 'i', 's', 'ì',
 81 |         'ï'
 82 |     ];
 83 | 
 84 |     /**
 85 |      * {@inheritdoc}
 86 |      */
 87 |     public function stem($word)
 88 |     {
 89 |         $this->word = StringHelper::strtolower($word);
 90 | 
 91 |         // Catalan stemmer does not use Rv
 92 |         $this->r1();
 93 |         $this->r2();
 94 | 
 95 |         // Step 0: Attached pronoun
 96 |         $this->step0();
 97 | 
 98 |         $word = $this->word;
 99 |         // Step 1a: Standard suffix
100 |         $this->step1a();
101 | 
102 |         // Step 1b: Verb suffix
103 |         // Do step 1b if no ending was removed by step 1a.
104 |         if ($this->word == $word) {
105 |             $this->step1b();
106 |         }
107 | 
108 |         $this->step2();
109 |         $this->finish();
110 | 
111 |         return $this->word;
112 |     }
113 | 
114 |     /**
115 |      * Step 0: Attached pronoun
116 |      *
117 |      * Search for the longest among the following suffixes
118 |      * and delete it in R1.
119 |      */
120 | 
121 |     private function step0()
122 |     {
123 |         if (($position = $this->search(static::$attached_pronoun)) !== false) {
124 |             if ($this->inR1($position)) {
125 |                 $this->word = StringHelper::substr($this->word, 0, $position);
126 |                 return true;
127 |             }
128 |         }
129 |         return false;
130 |     }
131 | 
132 |     /**
133 |      * Step 1a: Standard suffix
134 |      */
135 |     private function step1a()
136 |     {
137 |         // Run step 1a.2 before 1a.1, since they overlap on `cions` (1a.1) and `acions` (1a.2)
138 |         //
139 |         // Step 1a.2.
140 |         // acions ada ades
141 |         //      delete if in R2
142 |         if (($position = $this->search(['acions', 'ada', 'ades'])) !== false) {
143 |             if ($this->inR2($position)) {
144 |                 $this->word = StringHelper::substr($this->word, 0, $position);
145 |             }
146 |             return true;
147 |         }
148 | 
149 |         // Step 1a.1.
150 |         // ar atge formes icte ictes ell ells ella és ès esc essa et ets eta eres eries ers ina ines able ls ió itat
151 |         // itats itzar iva ives ivisme ius fer ment amen ament aments ments ot sfera al als era ana iste aire eria esa
152 |         // eses esos or ícia ícies icis ici íci ícis ària àries alla ció cions n{c}a nces ó dor all il ístic enc enca
153 |         // ís issa issos íssem íssiu issem isseu ísseu ós osa dora dores dors adura ble bles ívol ívola dís egar ejar
154 |         // ificar itar ables adors idores idors adora ació doras dur dures alleng{u"}es ant ants ancia ancies atòria
155 |         // atòries tori toris ats ions ota isam ors ora ores isament bilitat bilitats ivitat ivitats ari aris ionisme
156 |         // ionista ionistes ialista ialistes ialisme ialismes ud uts uds encia encies ència ències ïtat ïtats atiu
157 |         // atius atives ativa ativitat ativitats ible ibles assa asses assos ent ents íssim íssima íssims íssimes
158 |         // ìssem ìsseu ìssin ims ima imes isme ista ismes istes inia inies íinia ínies ita ites triu trius oses osos
159 |         // ient otes ots
160 |         //
161 |         //      delete if in R1
162 |         if (($position = $this->search(self::$standard_suffix_1a)) !== false) {
163 |             if ($this->inR1($position)) {
164 |                 $this->word = StringHelper::substr($this->word, 0, $position);
165 |             }
166 |             return true;
167 |         }
168 | 
169 |         // Step 1a.3.
170 |         // logía logíes logia logies logi logis lógica lógics lógiques
171 |         //      replace with log if in R2
172 |         if (($position = $this->search(
173 |                 ['logía', 'logíes', 'logia', 'logies', 'logis', 'lógica', 'lógics', 'lógiques', 'logi']
174 |             )) !== false) {
175 |             if ($this->inR2($position)) {
176 |                 $this->word = preg_replace(
177 |                     '#(logía|logíes|logia|logies|logis|lógica|lógics|lógiques|logi)$#u', 'log', $this->word
178 |                 );
179 |             }
180 |             return true;
181 |         }
182 | 
183 |         // Step 1a.4.
184 |         // ic ica ics iques
185 |         //      replace with ic if in R2
186 |         if (($position = $this->search(['ics', 'ica', 'iques', 'ic'])) !== false) {
187 |             if ($this->inR2($position)) {
188 |                 $this->word = preg_replace('#(ics|ica|iques|ic)$#u', 'ic', $this->word);
189 |             }
190 |             return true;
191 |         }
192 | 
193 |         // Step 1a.5.
194 |         // quíssims quíssimes quíssima quíssim
195 |         //      replace with c if in R1
196 |         if (($position = $this->search(['quíssima', 'quíssims', 'quíssimes', 'quíssim'])) !== false) {
197 |             if ($this->inR1($position)) {
198 |                 $this->word = preg_replace('#(quíssima|quíssims|quíssimes|quíssim)$#u', 'c', $this->word);
199 |             }
200 |             return true;
201 |         }
202 | 
203 |         return false;
204 |     }
205 | 
206 |     /**
207 |      * Step 1b: Verb suffixes
208 |      *      Search for the longest among the following suffixes in r1 and r2, and
209 |      *      perform the action indicated.
210 |      */
211 |     private function step1b()
212 |     {
213 |         // Step 1b.1
214 |         //
215 |         // aríamos eríamos iríamos eresseu iéramos iésemos adores aríais aremos eríais
216 |         // eremos iríais iremos ierais ieseis asteis isteis ábamos áramos ásemos isquen
217 |         // esquin esquis esques esquen ïsquen ïsques adora adors arían arías arian
218 |         // arien aries aréis erían erías eréis erass irían irías iréis asseu esseu
219 |         // àsseu àssem àssim àssiu essen esses assen asses assim assiu éssen ésseu
220 |         // éssim éssiu éssem aríem aríeu eixer eixes ieran iesen ieron iendo essin
221 |         // essis assin assis essim èssim èssiu ieras ieses abais arais aseis íamos
222 |         // irien iries irìem irìeu iguem igueu esqui eixin eixis eixen iríem iríeu
223 |         // atges issen isses issin issis issiu issim ïssin íssiu íssim ïssis ïguem
224 |         // ïgueu ïssen ïsses itzeu itzis ador ents udes eren arán arás aria aràs
225 |         // aría arés erán erás ería erau irán irás iría írem íreu aves avem ávem
226 |         // àvem àveu áveu aven ares àrem àreu àren areu aren tzar ides ïdes ades
227 |         // iera iese aste iste aban aran asen aron abas adas idas aras ases íais
228 |         // ados idos amos imos ques iran irem iren ires ireu iria iràs eixi eixo
229 |         // isin isis esca isca ïsca ïren ïres ïxen ïxes ixen ixes inin inis ineu
230 |         // itza itzi itzo itzà arem ent arà ará ara aré erá eré irá iré íeu
231 |         // ies íem ìeu ien uda ava ats ant ïen ams ïes dre eix ïda aba ada
232 |         // ida its ids ase ían ado ido ieu ess ass ías áis ira irà irè sis
233 |         // sin int isc ïsc ïra ïxo ixo ixa ini itz iïn re ie er ia at ut
234 |         // au ïm ïu és en es em am ïa it ït ía ad ed id an ió ar
235 |         // ir as ii io ià ís ïx ix in às iï iïs í
236 |         //      delete if in R1
237 |         if (($position = $this->search(static::$verb_suffixes)) !== false) {
238 |             if ($this->inR1($position)) {
239 |                 $this->word = StringHelper::substr($this->word, 0, $position);
240 |             }
241 |             return true;
242 |         }
243 | 
244 |         // Step 1b.2
245 |         // ando
246 |         //      delete if in R2
247 |         if (($position = $this->search(['ando'])) !== false) {
248 |             if ($this->inR2($position)) {
249 |                 $this->word = StringHelper::substr($this->word, 0, $position);
250 |             }
251 |             return true;
252 |         }
253 |         return false;
254 |     }
255 | 
256 |     /**
257 |      * Step 2: residual suffix
258 |      * Search for the longest among the following suffixes in R1, and perform
259 |      * the action indicated.
260 |      */
261 |     private function step2()
262 |     {
263 |         // Step 2.1
264 |         // residual suffix
265 |         //      delete if in R1
266 |         if (($position = $this->search(static::$residual_suffixes)) !== false) {
267 |             if ($this->inR1($position)) {
268 |                 $this->word = StringHelper::substr($this->word, 0, $position);
269 |             }
270 |             return true;
271 |         }
272 | 
273 |         // Step 2.2
274 |         // iqu
275 |         //      replace with ic if in R1
276 |         if (($position = $this->search(['iqu'])) !== false) {
277 |             if ($this->inR1($position)) {
278 |                 $this->word = preg_replace('#(iqu)$#u', 'ic', $this->word);
279 |             }
280 |             return true;
281 |         }
282 | 
283 |         return false;
284 |     }
285 | 
286 |     /**
287 |      * And finally:
288 |      * Remove accents and l aggeminades
289 |      */
290 |     private function finish()
291 |     {
292 |         $this->word = str_replace(
293 |             ['á', 'é', 'í', 'ó', 'ú', 'à', 'è', 'ì', 'ò', 'ï', 'ü', '·'],
294 |             ['a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'i', 'u', '.'],
295 |             $this->word
296 |         );
297 |     }
298 | 
299 | }
300 | 


--------------------------------------------------------------------------------
/src/Stemmer/Danish.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/danish/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Danish extends Stem
 14 | {
 15 |     /**
 16 |      * All danish vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'æ', 'å', 'ø');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word): string
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         // R2 is not used: R1 is defined in the same way as in the German stemmer
 28 |         $this->r1();
 29 | 
 30 |         // then R1 is adjusted so that the region before it contains at least 3 letters.
 31 |         if ($this->r1Index < 3) {
 32 |             $this->r1Index = 3;
 33 |             $this->r1 = StringHelper::substr($this->word, 3);
 34 |         }
 35 | 
 36 |         // Do each of steps 1, 2 3 and 4.
 37 |         $this->step1();
 38 |         $this->step2();
 39 |         $this->step3();
 40 |         $this->step4();
 41 | 
 42 |         return $this->word;
 43 |     }
 44 | 
 45 |     /**
 46 |      * Define a valid s-ending as one of
 47 |      * a   b   c   d   f   g   h   j   k   l   m   n   o   p   r   t   v   y   z   å
 48 |      *
 49 |      * @param string $ending
 50 |      * @return boolean
 51 |      */
 52 |     private function hasValidSEnding($word)
 53 |     {
 54 |         $lastLetter = StringHelper::substr($word, -1, 1);
 55 |         return in_array($lastLetter, array('a', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z', 'å'));
 56 |     }
 57 | 
 58 |     /**
 59 |      * Step 1
 60 |      * Search for the longest among the following suffixes in R1, and perform the action indicated.
 61 |      */
 62 |     private function step1()
 63 |     {
 64 |         // hed   ethed   ered   e   erede   ende   erende   ene   erne   ere   en   heden   eren   er   heder   erer
 65 |         // heds   es   endes   erendes   enes   ernes   eres   ens   hedens   erens   ers   ets   erets   et   eret
 66 |         //      delete
 67 |         if ( ($position = $this->searchIfInR1(array(
 68 |             'erendes', 'erende', 'hedens', 'erede', 'ethed', 'heden', 'endes', 'erets', 'heder', 'ernes',
 69 |             'erens', 'ered', 'ende', 'erne', 'eres', 'eren', 'eret', 'erer', 'enes', 'heds',
 70 |             'ens', 'ene', 'ere', 'ers', 'ets', 'hed', 'es', 'et', 'er', 'en', 'e'
 71 |         ))) !== false) {
 72 |             $this->word = StringHelper::substr($this->word, 0, $position);
 73 |             return true;
 74 |         }
 75 | 
 76 |         // s
 77 |         //      delete if preceded by a valid s-ending
 78 |         if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
 79 |             $word = StringHelper::substr($this->word, 0, $position);
 80 |             if ($this->hasValidSEnding($word)) {
 81 |                 $this->word = $word;
 82 |             }
 83 |             return true;
 84 |         }
 85 |     }
 86 | 
 87 |     /**
 88 |      * Step 2
 89 |      * Search for one of the following suffixes in R1, and if found delete the last letter.
 90 |      *      gd   dt   gt   kt
 91 |      */
 92 |     private function step2()
 93 |     {
 94 |         if ($this->searchIfInR1(array('gd', 'dt', 'gt', 'kt')) !== false) {
 95 |             $this->word = StringHelper::substr($this->word, 0, -1);
 96 |         }
 97 |     }
 98 | 
 99 |     /**
100 |      * Step 3:
101 |      */
102 |     private function step3()
103 |     {
104 |         // If the word ends igst, remove the final st.
105 |         if ($this->search(array('igst')) !== false) {
106 |             $this->word = StringHelper::substr($this->word, 0, -2);
107 |         }
108 | 
109 |         // Search for the longest among the following suffixes in R1, and perform the action indicated.
110 |         //  ig   lig   elig   els
111 |         //      delete, and then repeat step 2
112 |         if ( ($position = $this->searchIfInR1(array('elig', 'lig', 'ig', 'els'))) !== false) {
113 |             $this->word = StringHelper::substr($this->word, 0, $position);
114 |             $this->step2();
115 |             return true;
116 |         }
117 | 
118 |         //  løst
119 |         //      replace with løs
120 |         if ($this->searchIfInR1(array('løst')) !== false) {
121 |             $this->word = StringHelper::substr($this->word, 0, -1);
122 |         }
123 |     }
124 | 
125 |     /**
126 |      * Step 4: undouble
127 |      * If the word ends with double consonant in R1, remove one of the consonants.
128 |      */
129 |     private function step4()
130 |     {
131 |         $length = StringHelper::strlen($this->word);
132 |         if (!$this->inR1(($length-1))) {
133 |             return false;
134 |         }
135 | 
136 |         $lastLetter = StringHelper::substr($this->word, -1, 1);
137 |         if (in_array($lastLetter, self::$vowels)) {
138 |             return false;
139 |         }
140 |         $beforeLastLetter = StringHelper::substr($this->word, -2, 1);
141 | 
142 |         if ($lastLetter == $beforeLastLetter) {
143 |             $this->word = StringHelper::substr($this->word, 0, -1);
144 |         }
145 |         return true;
146 |     }
147 | }
148 | 


--------------------------------------------------------------------------------
/src/Stemmer/Dutch.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/dutch/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Dutch extends Stem
 14 | {
 15 |     /**
 16 |      * All dutch vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'è');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         // First, remove all umlaut and acute accents.
 28 |         $this->word = str_replace(
 29 |             array('ä', 'ë', 'ï', 'ö', 'ü', 'á', 'é', 'í', 'ó', 'ú'),
 30 |             array('a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'),
 31 |             $this->word);
 32 | 
 33 |         $this->plainVowels = implode('', self::$vowels);
 34 | 
 35 |         // Put initial y, y after a vowel, and i between vowels into upper case.
 36 |         $this->word = preg_replace('#^y#u', 'Y', $this->word);
 37 |         $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
 38 |         $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
 39 | 
 40 |         // R1 and R2 (see the note on R1 and R2) are then defined as in German.
 41 |         // R1 and R2 are first set up in the standard way
 42 |         $this->r1();
 43 |         $this->r2();
 44 | 
 45 |         // but then R1 is adjusted so that the region before it contains at least 3 letters.
 46 |         if ($this->r1Index < 3) {
 47 |             $this->r1Index = 3;
 48 |             $this->r1 = StringHelper::substr($this->word, 3);
 49 |         }
 50 | 
 51 |         // Do each of steps 1, 2 3 and 4.
 52 |         $this->step1();
 53 |         $removedE = $this->step2();
 54 |         $this->step3a();
 55 |         $this->step3b($removedE);
 56 |         $this->step4();
 57 |         $this->finish();
 58 | 
 59 |         return $this->word;
 60 |     }
 61 | 
 62 |     /**
 63 |      * Define a valid s-ending as a non-vowel other than j.
 64 |      * @param string $ending
 65 |      * @return boolean
 66 |      */
 67 |     private function hasValidSEnding($word)
 68 |     {
 69 |         $lastLetter = StringHelper::substr($word, -1, 1);
 70 |         return !in_array($lastLetter, array_merge(self::$vowels, array('j')));
 71 |     }
 72 | 
 73 |     /**
 74 |      * Define a valid en-ending as a non-vowel, and not gem.
 75 |      * @param string $ending
 76 |      * @return boolean
 77 |      */
 78 |     private function hasValidEnEnding($word)
 79 |     {
 80 |         $lastLetter = StringHelper::substr($word, -1, 1);
 81 |         if (in_array($lastLetter, self::$vowels)) {
 82 |             return false;
 83 |         }
 84 | 
 85 |         $threeLastLetters = StringHelper::substr($word, -3, 3);
 86 |         if ($threeLastLetters == 'gem') {
 87 |             return false;
 88 |         }
 89 |         return true;
 90 |     }
 91 | 
 92 |     /**
 93 |      *  Define undoubling the ending as removing the last letter if the word ends kk, dd or tt.
 94 |      */
 95 |     private function unDoubling()
 96 |     {
 97 |         if ($this->search(array('kk', 'dd', 'tt')) !== false) {
 98 |             $this->word = StringHelper::substr($this->word, 0, -1);
 99 |         }
100 |     }
101 | 
102 |     /**
103 |      * Step 1
104 |      * Search for the longest among the following suffixes, and perform the action indicated
105 |      */
106 |     private function step1()
107 |     {
108 |         // heden
109 |         //      replace with heid if in R1
110 |         if ( ($position = $this->search(array('heden'))) !== false) {
111 |             if ($this->inR1($position)) {
112 |                 $this->word = preg_replace('#(heden)$#u', 'heid', $this->word);
113 |             }
114 |             return true;
115 |         }
116 | 
117 |         // en   ene
118 |         //      delete if in R1 and preceded by a valid en-ending, and then undouble the ending
119 |         if ( ($position = $this->search(array('ene', 'en'))) !== false) {
120 |             if ($this->inR1($position)) {
121 |                 $word = StringHelper::substr($this->word, 0, $position);
122 |                 if ($this->hasValidEnEnding($word)) {
123 |                     $this->word = $word;
124 |                     $this->unDoubling();
125 |                 }
126 |             }
127 |             return true;
128 |         }
129 | 
130 |         // s   se
131 |         //      delete if in R1 and preceded by a valid s-ending
132 |         if ( ($position = $this->search(array('se', 's'))) !== false) {
133 |             if ($this->inR1($position)) {
134 |                 $word = StringHelper::substr($this->word, 0, $position);
135 |                 if ($this->hasValidSEnding($word)) {
136 |                     $this->word = $word;
137 |                 }
138 |             }
139 |             return true;
140 |         }
141 | 
142 |         return false;
143 |     }
144 | 
145 |     /**
146 |      * Step 2
147 |      * Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending
148 |      */
149 |     private function step2()
150 |     {
151 |         if ( ($position = $this->search(array('e'))) !== false) {
152 |             if ($this->inR1($position)) {
153 |                 $letter = StringHelper::substr($this->word, -2, 1);
154 |                 if (!in_array($letter, self::$vowels)) {
155 |                     $this->word = StringHelper::substr($this->word, 0, $position);
156 |                     $this->unDoubling();
157 | 
158 |                     return true;
159 |                 }
160 |             }
161 |         }
162 | 
163 |         return false;
164 |     }
165 | 
166 |     /**
167 |      * Step 3a: heid
168 |      * delete heid if in R2 and not preceded by c, and treat a preceding en as in step 1(b)
169 |      */
170 |     private function step3a()
171 |     {
172 |         if ( ($position = $this->search(array('heid'))) !== false) {
173 |             if ($this->inR2($position)) {
174 |                 $letter = StringHelper::substr($this->word, -5, 1);
175 |                 if ($letter !== 'c') {
176 |                     $this->word = StringHelper::substr($this->word, 0, $position);
177 | 
178 |                     if ( ($position = $this->search(array('en'))) !== false) {
179 |                         if ($this->inR1($position)) {
180 |                             $word = StringHelper::substr($this->word, 0, $position);
181 |                             if ($this->hasValidEnEnding($word)) {
182 |                                 $this->word = $word;
183 |                                 $this->unDoubling();
184 |                             }
185 |                         }
186 |                     }
187 |                 }
188 |             }
189 |         }
190 | 
191 |     }
192 | 
193 |     /**
194 |      * Step 3b: d-suffixe
195 |      * Search for the longest among the following suffixes, and perform the action indicated.
196 |      */
197 |     private function step3b($removedE)
198 |     {
199 |         // end   ing
200 |         //      delete if in R2
201 |         //      if preceded by ig, delete if in R2 and not preceded by e, otherwise undouble the ending
202 |         if ( ($position = $this->search(array('end', 'ing'))) !== false) {
203 |             if ($this->inR2($position)) {
204 |                 $this->word = StringHelper::substr($this->word, 0, $position);
205 | 
206 |                 if ( ($position2 = $this->searchIfInR2(array('ig'))) !== false) {
207 |                     $letter = StringHelper::substr($this->word, -3, 1);
208 |                     if ($letter !== 'e') {
209 |                         $this->word = StringHelper::substr($this->word, 0, $position2);
210 |                     }
211 |                 } else {
212 |                     $this->unDoubling();
213 |                 }
214 |             }
215 | 
216 | 
217 |             return true;
218 |         }
219 | 
220 |         // ig
221 |         //      delete if in R2 and not preceded by e
222 |         if ( ($position = $this->search(array('ig'))) !== false) {
223 |             if ($this->inR2($position)) {
224 |                 $letter = StringHelper::substr($this->word, -3, 1);
225 |                 if ($letter !== 'e') {
226 |                     $this->word = StringHelper::substr($this->word, 0, $position);
227 |                 }
228 |             }
229 |             return true;
230 |         }
231 | 
232 |         // lijk
233 |         //      delete if in R2, and then repeat step 2
234 |         if ( ($position = $this->search(array('lijk'))) !== false) {
235 |             if ($this->inR2($position)) {
236 |                 $this->word = StringHelper::substr($this->word, 0, $position);
237 |                 $this->step2();
238 |             }
239 |             return true;
240 |         }
241 | 
242 |         // baar
243 |         //      delete if in R2
244 |         if ( ($position = $this->search(array('baar'))) !== false) {
245 |             if ($this->inR2($position)) {
246 |                 $this->word = StringHelper::substr($this->word, 0, $position);
247 |             }
248 |             return true;
249 |         }
250 | 
251 |         // bar
252 |         //      delete if in R2 and if step 2 actually removed an e
253 |         if ( ($position = $this->search(array('bar'))) !== false) {
254 |             if ($this->inR2($position) && $removedE) {
255 |                 $this->word = StringHelper::substr($this->word, 0, $position);
256 |             }
257 |             return true;
258 |         }
259 | 
260 |         return false;
261 |     }
262 | 
263 |     /**
264 |      * Step 4: undouble vowel
265 |      * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u,
266 |      * remove one of the vowels from V (for example, maan -> man, brood -> brod).
267 |      */
268 |     private function step4()
269 |     {
270 |         // D is a non-vowel other than I
271 |         $d = StringHelper::substr($this->word, -1, 1);
272 |         if (in_array($d, array_merge(self::$vowels, array('I')))) {
273 |             return false;
274 |         }
275 | 
276 |         // V is double a, e, o or u
277 |         $v = StringHelper::substr($this->word, -3, 2);
278 |         if (!in_array($v, array('aa', 'ee', 'oo', 'uu'))) {
279 |             return false;
280 |         }
281 |         $singleV = StringHelper::substr($v, 0, 1);
282 | 
283 |         // C is a non-vowel
284 |         $c = StringHelper::substr($this->word, -4, 1);
285 |         if (in_array($c, self::$vowels)) {
286 |             return false;
287 |         }
288 | 
289 |         $this->word = StringHelper::substr($this->word, 0, -4);
290 |         $this->word .= $c . $singleV  .$d;
291 |     }
292 | 
293 |     /**
294 |      * Finally
295 |      * Turn I and Y back into lower case.
296 |      */
297 |     private function finish()
298 |     {
299 |         $this->word = str_replace(array('I', 'Y'), array('i', 'y'), $this->word);
300 |     }
301 | }
302 | 


--------------------------------------------------------------------------------
/src/Stemmer/English.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  * English Porter 2
  9 |  *
 10 |  * @link http://snowball.tartarus.org/algorithms/english/stemmer.html
 11 |  * @author wamania
 12 |  *
 13 |  */
 14 | class English extends Stem
 15 | {
 16 |     /**
 17 |      * All english vowels
 18 |      */
 19 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y');
 20 | 
 21 |     protected static $doubles = array('bb', 'dd', 'ff', 'gg', 'mm', 'nn', 'pp', 'rr', 'tt');
 22 | 
 23 |     protected static $liEnding = array('c', 'd', 'e', 'g', 'h', 'k', 'm', 'n', 'r', 't');
 24 | 
 25 |     /**
 26 |      * {@inheritdoc}
 27 |      */
 28 |     public function stem($word)
 29 |     {
 30 |         if (StringHelper::strlen($word) < 3) {
 31 |             return $word;
 32 |         }
 33 | 
 34 |         $this->word = StringHelper::strtolower($word);
 35 | 
 36 |         // exceptions
 37 |         if (null !== ($word = $this->exception1())) {
 38 |             return $word;
 39 |         }
 40 | 
 41 | 
 42 |         $this->plainVowels = implode('', self::$vowels);
 43 | 
 44 |         // Remove initial ', if present.
 45 |         $first = StringHelper::substr($this->word, 0, 1);
 46 |         if ($first == "'") {
 47 |             $this->word = StringHelper::substr($this->word, 1);
 48 |         }
 49 | 
 50 |         // Set initial y, or y after a vowel, to Y
 51 |         if ($first == 'y') {
 52 |             $this->word = preg_replace('#^y#u', 'Y', $this->word);
 53 |         }
 54 |         $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
 55 | 
 56 |         $this->r1();
 57 |         $this->exceptionR1();
 58 |         $this->r2();
 59 | 
 60 |         $this->step0();
 61 |         $this->step1a();
 62 | 
 63 |         // exceptions 2
 64 |         if (null !== ($word = $this->exception2())) {
 65 |             return $word;
 66 |         }
 67 | 
 68 |         $this->step1b();
 69 |         $this->step1c();
 70 |         $this->step2();
 71 |         $this->step3();
 72 |         $this->step4();
 73 |         $this->step5();
 74 |         $this->finish();
 75 | 
 76 |         return $this->word;
 77 |     }
 78 | 
 79 |     /**
 80 |      * Step 0
 81 |      * Remove ', 's, 's'
 82 |      */
 83 |     private function step0()
 84 |     {
 85 |         if ( ($position = $this->search(array("'s'", "'s", "'"))) !== false) {
 86 |             $this->word = StringHelper::substr($this->word, 0, $position);
 87 |         }
 88 |     }
 89 | 
 90 |     private function step1a()
 91 |     {
 92 |         // sses
 93 |         //      replace by ss
 94 |         if ( ($position = $this->search(array('sses'))) !== false) {
 95 |             $this->word = preg_replace('#(sses)$#u', 'ss', $this->word);
 96 |             return true;
 97 |         }
 98 | 
 99 |         // ied+   ies*
100 |         //      replace by i if preceded by more than one letter, otherwise by ie (so ties -> tie, cries -> cri)
101 |         if ( ($position = $this->search(array('ied', 'ies'))) !== false) {
102 |             if ($position > 1) {
103 |                 $this->word = preg_replace('#(ied|ies)$#u', 'i', $this->word);
104 | 
105 |             } else {
106 |                 $this->word = preg_replace('#(ied|ies)$#u', 'ie', $this->word);
107 |             }
108 |             return true;
109 |         }
110 | 
111 |         // us+   ss
112 |         //  do nothing
113 |         if ( ($position = $this->search(array('us', 'ss'))) !== false) {
114 |             return true;
115 |         }
116 | 
117 |         // s
118 |         //      delete if the preceding word part contains a vowel not immediately before the s (so gas and this retain the s, gaps and kiwis lose it)
119 |         if ( ($position = $this->search(array('s'))) !== false) {
120 |             for ($i=0; $i<$position-1; $i++) {
121 |                 $letter = StringHelper::substr($this->word, $i, 1);
122 | 
123 |                 if (in_array($letter, self::$vowels)) {
124 |                     $this->word = StringHelper::substr($this->word, 0, $position);
125 |                     return true;
126 |                 }
127 |             }
128 |             return true;
129 |         }
130 | 
131 |         return false;
132 |     }
133 | 
134 |     /**
135 |      * Step 1b
136 |      */
137 |     private function step1b()
138 |     {
139 |         // eed   eedly+
140 |         //      replace by ee if in R1
141 |         if ( ($position = $this->search(array('eedly', 'eed'))) !== false) {
142 |             if ($this->inR1($position)) {
143 |                 $this->word = preg_replace('#(eedly|eed)$#u', 'ee', $this->word);
144 |             }
145 |             return true;
146 |         }
147 | 
148 |         // ed   edly+   ing   ingly+
149 |         //      delete if the preceding word part contains a vowel, and after the deletion:
150 |         //      if the word ends at, bl or iz add e (so luxuriat -> luxuriate), or
151 |         //      if the word ends with a double remove the last letter (so hopp -> hop), or
152 |         //      if the word is short, add e (so hop -> hope)
153 |         if ( ($position = $this->search(array('edly', 'ingly', 'ed', 'ing'))) !== false) {
154 |             for ($i=0; $i<$position; $i++) {
155 |                 $letter = StringHelper::substr($this->word, $i, 1);
156 | 
157 |                 if (in_array($letter, self::$vowels)) {
158 |                     $this->word = StringHelper::substr($this->word, 0, $position);
159 | 
160 |                     if ($this->search(array('at', 'bl', 'iz')) !== false) {
161 |                         $this->word .= 'e';
162 | 
163 |                     } elseif ( ($position2 = $this->search(self::$doubles)) !== false) {
164 |                         $this->word = StringHelper::substr($this->word, 0, ($position2+1));
165 | 
166 |                     } elseif ($this->isShort()) {
167 |                         $this->word .= 'e';
168 |                     }
169 | 
170 |                     return true;
171 |                 }
172 |             }
173 |             return true;
174 |         }
175 | 
176 |         return false;
177 |     }
178 | 
179 |     /**
180 |      * Step 1c: *
181 |      */
182 |     private function step1c()
183 |     {
184 |         // replace suffix y or Y by i if preceded by a non-vowel
185 |         // which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
186 |         $length = StringHelper::strlen($this->word);
187 | 
188 |         if ($length < 3) {
189 |             return true;
190 |         }
191 | 
192 |         if ( ($position = $this->search(array('y', 'Y'))) !== false) {
193 |             $before = $position - 1;
194 |             $letter = StringHelper::substr($this->word, $before, 1);
195 | 
196 |             if (! in_array($letter, self::$vowels)) {
197 |                 $this->word = preg_replace('#(y|Y)$#u', 'i', $this->word);
198 |             }
199 | 
200 |             return true;
201 |         }
202 | 
203 |         return false;
204 |     }
205 | 
206 |     /**
207 |      * Step 2
208 |      *  Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.
209 |      */
210 |     private function step2()
211 |     {
212 |         // iveness   iviti:   replace by ive
213 |         if ( ($position = $this->search(array('iveness', 'iviti'))) !== false) {
214 |             if ($this->inR1($position)) {
215 |                 $this->word = preg_replace('#(iveness|iviti)$#u', 'ive', $this->word);
216 |             }
217 |             return true;
218 |         }
219 | 
220 |         // ousli   ousness:   replace by ous
221 |         if ( ($position = $this->search(array('ousli', 'ousness'))) !== false) {
222 |             if ($this->inR1($position)) {
223 |                 $this->word = preg_replace('#(ousli|ousness)$#u', 'ous', $this->word);
224 |             }
225 |             return true;
226 |         }
227 | 
228 |         // izer   ization:   replace by ize
229 |         if ( ($position = $this->search(array('izer', 'ization'))) !== false) {
230 |             if ($this->inR1($position)) {
231 |                 $this->word = preg_replace('#(izer|ization)$#u', 'ize', $this->word);
232 |             }
233 |             return true;
234 |         }
235 | 
236 |         // ational   ation   ator:   replace by ate
237 |         if ( ($position = $this->search(array('ational', 'ation', 'ator'))) !== false) {
238 |             if ($this->inR1($position)) {
239 |                 $this->word = preg_replace('#(ational|ation|ator)$#u', 'ate', $this->word);
240 |             }
241 |             return true;
242 |         }
243 | 
244 |         // biliti   bli+:   replace by ble
245 |         if ( ($position = $this->search(array('biliti', 'bli'))) !== false) {
246 |             if ($this->inR1($position)) {
247 |                 $this->word = preg_replace('#(biliti|bli)$#u', 'ble', $this->word);
248 |             }
249 |             return true;
250 |         }
251 | 
252 |         // lessli+:   replace by less
253 |         if ( ($position = $this->search(array('lessli'))) !== false) {
254 |             if ($this->inR1($position)) {
255 |                 $this->word = preg_replace('#(lessli)$#u', 'less', $this->word);
256 |             }
257 |             return true;
258 |         }
259 | 
260 |         // fulness:   replace by ful
261 |         if ( ($position = $this->search(array('fulness', 'fulli'))) !== false) {
262 |             if ($this->inR1($position)) {
263 |                 $this->word = preg_replace('#(fulness|fulli)$#u', 'ful', $this->word);
264 |             }
265 |             return true;
266 |         }
267 | 
268 |         // tional:   replace by tion
269 |         if ( ($position = $this->search(array('tional'))) !== false) {
270 |             if ($this->inR1($position)) {
271 |                 $this->word = preg_replace('#(tional)$#u', 'tion', $this->word);
272 |             }
273 |             return true;
274 |         }
275 | 
276 |         // alism   aliti   alli:   replace by al
277 |         if ( ($position = $this->search(array('alism', 'aliti', 'alli'))) !== false) {
278 |             if ($this->inR1($position)) {
279 |                 $this->word = preg_replace('#(alism|aliti|alli)$#u', 'al', $this->word);
280 |             }
281 |             return true;
282 |         }
283 | 
284 |         // enci:   replace by ence
285 |         if ( ($position = $this->search(array('enci'))) !== false) {
286 |             if ($this->inR1($position)) {
287 |                 $this->word = preg_replace('#(enci)$#u', 'ence', $this->word);
288 |             }
289 |             return true;
290 |         }
291 | 
292 |         // anci:   replace by ance
293 |         if ( ($position = $this->search(array('anci'))) !== false) {
294 |             if ($this->inR1($position)) {
295 |                 $this->word = preg_replace('#(anci)$#u', 'ance', $this->word);
296 |             }
297 |             return true;
298 |         }
299 | 
300 |         // abli:   replace by able
301 |         if ( ($position = $this->search(array('abli'))) !== false) {
302 |             if ($this->inR1($position)) {
303 |                 $this->word = preg_replace('#(abli)$#u', 'able', $this->word);
304 |             }
305 |             return true;
306 |         }
307 | 
308 |         // entli:   replace by ent
309 |         if ( ($position = $this->search(array('entli'))) !== false) {
310 |             if ($this->inR1($position)) {
311 |                 $this->word = preg_replace('#(entli)$#u', 'ent', $this->word);
312 |             }
313 |             return true;
314 |         }
315 | 
316 |         // ogi+:   replace by og if preceded by l
317 |         if ( ($position = $this->search(array('ogi'))) !== false) {
318 | 
319 |             if ($this->inR1($position)) {
320 |                 $before = $position - 1;
321 |                 $letter = StringHelper::substr($this->word, $before, 1);
322 | 
323 |                 if ($letter == 'l') {
324 |                     $this->word = preg_replace('#(ogi)$#u', 'og', $this->word);
325 |                 }
326 |             }
327 | 
328 |             return true;
329 |         }
330 | 
331 |         // li+:   delete if preceded by a valid li-ending
332 |         if ( ($position = $this->search(array('li'))) !== false) {
333 | 
334 |             if ($this->inR1($position)) {
335 |                 // a letter for you
336 |                 $letter = StringHelper::substr($this->word, ($position-1), 1);
337 | 
338 |                 if (in_array($letter, self::$liEnding)) {
339 |                     $this->word = StringHelper::substr($this->word, 0, $position);
340 |                 }
341 |             }
342 | 
343 |             return true;
344 |         }
345 | 
346 |         return false;
347 |     }
348 | 
349 |     /**
350 |      * Step 3:
351 |      * Search for the longest among the following suffixes, and, if found and in R1, perform the action indicated.
352 |      */
353 |     private function step3()
354 |     {
355 |         // ational+:   replace by ate
356 |         if ($this->searchIfInR1(array('ational')) !== false) {
357 |             $this->word = preg_replace('#(ational)$#u', 'ate', $this->word);
358 |             return true;
359 |         }
360 | 
361 |         // tional+:   replace by tion
362 |         if ($this->searchIfInR1(array('tional')) !== false) {
363 |             $this->word = preg_replace('#(tional)$#u', 'tion', $this->word);
364 |             return true;
365 |         }
366 | 
367 |         // alize:   replace by al
368 |         if ($this->searchIfInR1(array('alize')) !== false) {
369 |             $this->word = preg_replace('#(alize)$#u', 'al', $this->word);
370 |             return true;
371 |         }
372 | 
373 |         // icate   iciti   ical:   replace by ic
374 |         if ($this->searchIfInR1(array('icate', 'iciti', 'ical')) !== false) {
375 |             $this->word = preg_replace('#(icate|iciti|ical)$#u', 'ic', $this->word);
376 |             return true;
377 |         }
378 | 
379 |         // ful   ness:   delete
380 |         if ( ($position = $this->searchIfInR1(array('ful', 'ness'))) !== false) {
381 |             $this->word = StringHelper::substr($this->word, 0, $position);
382 |             return true;
383 |         }
384 | 
385 |         // ative*:   delete if in R2
386 |         if ( (($position = $this->searchIfInR1(array('ative'))) !== false) && ($this->inR2($position)) )  {
387 |             $this->word = StringHelper::substr($this->word, 0, $position);
388 |             return true;
389 |         }
390 | 
391 |         return false;
392 |     }
393 | 
394 |     /**
395 |      * Step 4
396 |      * Search for the longest among the following suffixes, and, if found and in R2, perform the action indicated.
397 |      */
398 |     private function step4()
399 |     {
400 |         //    ement  ance   ence  able ible   ant  ment   ent   ism   ate   iti   ous   ive   ize al  er   ic
401 |         //      delete
402 |         if ( ($position = $this->search(array(
403 |             'ance', 'ence', 'ement', 'able', 'ible', 'ant', 'ment', 'ent', 'ism',
404 |             'ate', 'iti', 'ous', 'ive', 'ize', 'al', 'er', 'ic'))) !== false) {
405 | 
406 |             if ($this->inR2($position)) {
407 |                 $this->word = StringHelper::substr($this->word, 0, $position);
408 |             }
409 |             return true;
410 |         }
411 | 
412 |         // ion
413 |         //      delete if preceded by s or t
414 |         if ( ($position = $this->searchIfInR2(array('ion'))) !== false) {
415 |             $before = $position - 1;
416 |             $letter = StringHelper::substr($this->word, $before, 1);
417 | 
418 |             if ($letter == 's' || $letter == 't') {
419 |                 $this->word = StringHelper::substr($this->word, 0, $position);
420 |             }
421 | 
422 |             return true;
423 |         }
424 | 
425 |         return false;
426 |     }
427 | 
428 |     /**
429 |      * Step 5: *
430 |      * Search for the the following suffixes, and, if found, perform the action indicated.
431 |      */
432 |     private function step5()
433 |     {
434 |         // e
435 |         //      delete if in R2, or in R1 and not preceded by a short syllable
436 |         if ( ($position = $this->search(array('e'))) !== false) {
437 |             if ($this->inR2($position)) {
438 |                 $this->word = StringHelper::substr($this->word, 0, $position);
439 | 
440 |             } elseif ($this->inR1($position)) {
441 |                 if ( (! $this->searchShortSyllabe(-4, 3)) && (! $this->searchShortSyllabe(-3, 2)) ) {
442 |                     $this->word = StringHelper::substr($this->word, 0, $position);
443 |                 }
444 |             }
445 | 
446 |             return true;
447 |         }
448 | 
449 |         // l
450 |         //      delete if in R2 and preceded by l
451 |         if ( ($position = $this->searchIfInR2(array('l'))) !== false) {
452 |             $before = $position - 1;
453 |             $letter = StringHelper::substr($this->word, $before, 1);
454 | 
455 |             if ($letter == 'l') {
456 |                 $this->word = StringHelper::substr($this->word, 0, $position);
457 |             }
458 | 
459 |             return true;
460 |         }
461 | 
462 |         return false;
463 |     }
464 | 
465 |     private function finish()
466 |     {
467 |         $this->word = str_replace('Y', 'y', $this->word);
468 |     }
469 | 
470 |     private function exceptionR1()
471 |     {
472 |         if (StringHelper::strpos($this->word, 'gener') === 0) {
473 |             $this->r1 = StringHelper::substr($this->word, 5);
474 |             $this->r1Index = 5;
475 | 
476 |         } elseif (StringHelper::strpos($this->word, 'commun') === 0) {
477 |             $this->r1 = StringHelper::substr($this->word, 6);
478 |             $this->r1Index = 6;
479 | 
480 |         } elseif (StringHelper::strpos($this->word, 'arsen') === 0) {
481 |             $this->r1 = StringHelper::substr($this->word, 5);
482 |             $this->r1Index = 5;
483 |         }
484 |     }
485 | 
486 |     /**
487 |      *  1/ Stem certain special words as follows,
488 |      *  2/ If one of the following is found, leave it invariant,
489 |      */
490 |     private function exception1()
491 |     {
492 |         $exceptions = array(
493 |             'skis'   => 'ski',
494 |             'skies'  => 'sky',
495 |             'dying'  => 'die',
496 |             'lying'  => 'lie',
497 |             'tying'  => 'tie',
498 |             'idly'   => 'idl',
499 |             'gently' => 'gentl',
500 |             'ugly'   => 'ugli',
501 |             'early'  => 'earli',
502 |             'only'   => 'onli',
503 |             'singly' => 'singl',
504 |             // invariants
505 |             'sky'    => 'sky',
506 |             'news'   => 'news',
507 |             'howe'   => 'howe',
508 |             'atlas'  => 'atlas',
509 |             'cosmos' => 'cosmos',
510 |             'bias'   => 'bias',
511 |             'andes'  => 'andes'
512 |         );
513 | 
514 |         if (isset($exceptions[$this->word])) {
515 |             return $exceptions[$this->word];
516 |         }
517 | 
518 |         return null;
519 |     }
520 | 
521 |     /**
522 |      * Following step 1a, leave the following invariant,
523 |      */
524 |     private function exception2()
525 |     {
526 |         $exceptions = array(
527 |             'inning' => 'inning',
528 |             'outing' => 'outing',
529 |             'canning' => 'canning',
530 |             'herring' => 'herring',
531 |             'earring' => 'earring',
532 |             'proceed' => 'proceed',
533 |             'exceed'  => 'exceed',
534 |             'succeed' => 'succeed'
535 |         );
536 | 
537 |         if (isset($exceptions[$this->word])) {
538 |             return $exceptions[$this->word];
539 |         }
540 | 
541 |         return null;
542 |     }
543 | 
544 |     /**
545 |      *  A word is called short if it ends in a short syllable, and if R1 is null.
546 |      *  Note : R1 not really null, but the word at this state must be smaller than r1 index
547 |      *
548 |      *  @return boolean
549 |      */
550 |     private function isShort()
551 |     {
552 |         $length = StringHelper::strlen($this->word);
553 |         return ( ($this->searchShortSyllabe(-3, 3) || $this->searchShortSyllabe(-2, 2)) && ($length == $this->r1Index) );
554 |     }
555 | 
556 |     /**
557 |      * Define a short syllable in a word as either (a) a vowel followed by a non-vowel other than w, x or Y and preceded by a non-vowel,
558 |      *  or * (b) a vowel at the beginning of the word followed by a non-vowel.
559 |      *
560 |      *  So rap, trap, entrap end with a short syllable, and ow, on, at are classed as short syllables.
561 |      *  But uproot, bestow, disturb do not end with a short syllable.
562 |      */
563 |     private function searchShortSyllabe($from, $nbLetters)
564 |     {
565 |         $length = StringHelper::strlen($this->word);
566 | 
567 |         if ($from < 0) {
568 |             $from = $length + $from;
569 |         }
570 |         if ($from < 0) {
571 |             $from = 0;
572 |         }
573 | 
574 |         // (a) is just for beginning of the word
575 |         if ( ($nbLetters == 2) && ($from != 0) ) {
576 |             return false;
577 |         }
578 | 
579 |         $first = StringHelper::substr($this->word, $from, 1);
580 |         $second = StringHelper::substr($this->word, ($from+1), 1);
581 | 
582 |         if ($nbLetters == 2) {
583 |             if ( (in_array($first, self::$vowels)) && (!in_array($second, self::$vowels)) ) {
584 |                 return true;
585 |             }
586 |         }
587 | 
588 |         $third = StringHelper::substr($this->word, ($from+2), 1);
589 | 
590 |         if ( (!in_array($first, self::$vowels)) && (in_array($second, self::$vowels))
591 |             && (!in_array($third, array_merge(self::$vowels, array('x', 'Y', 'w'))))) {
592 |                 return true;
593 |             }
594 | 
595 |         return false;
596 |     }
597 | }
598 | 


--------------------------------------------------------------------------------
/src/Stemmer/Finnish.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Finnish Snowball Stemmer.
  4 |  *
  5 |  * @author msaari <mikko@mikkosaari.fi>
  6 |  */
  7 | namespace Wamania\Snowball\Stemmer;
  8 | 
  9 | use Joomla\String\StringHelper;
 10 | 
 11 | /**
 12 |  * Finnish Snowball Stemmer.
 13 |  *
 14 |  * @link http://snowball.tartarus.org/algorithms/finnish/stemmer.html
 15 |  * @author msaari
 16 |  */
 17 | class Finnish extends Stem
 18 | {
 19 |     /**
 20 |      * All swedish vowels
 21 |      */
 22 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö');
 23 | 
 24 |     protected static $consonants = array('b', 'c', 'd', 'f', 'g', 'h', 'j',
 25 |     'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'z');
 26 | 
 27 |     protected static $restrictedVowels = array('a', 'e', 'i', 'o', 'u', 'ä', 'ö');
 28 | 
 29 |     /**
 30 |      * Long restricted vowels, ie. doubled vowels.
 31 |      */
 32 |     protected static $longVowels = array('aa', 'ee', 'ii', 'oo', 'uu', 'ää', 'öö');
 33 | 
 34 |     private $_removedInStep3 = false;
 35 | 
 36 |     /**
 37 |      * {@inheritdoc}
 38 |      */
 39 |     public function stem($word)
 40 |     {
 41 |         $this->word = StringHelper::strtolower($word);
 42 | 
 43 |         // R1 and R2 are then defined in the usual way
 44 |         $this->r1();
 45 |         $this->r2();
 46 | 
 47 |         // Do each of steps 1, 2 3, 4, 5 and 6.
 48 | 
 49 |         $this->step1();
 50 |         $this->step2();
 51 |         $this->step3();
 52 |         $this->step4();
 53 |         $this->step5();
 54 |         $this->step6();
 55 | 
 56 |         return $this->word;
 57 |     }
 58 | 
 59 |     /**
 60 |      * Step 1
 61 |      *
 62 |      * Search for the longest among the following suffixes in R1, and perform
 63 |      * the action indicated.
 64 |      *
 65 |      * @return boolean True when something is done.
 66 |      */
 67 |     private function step1()
 68 |     {
 69 |         // (a) kin   kaan   kään   ko   kö   han   hän   pa   pä
 70 |         //      delete if preceded by n, t or a vowel
 71 |         if (($position = $this->searchIfInR1(array('kaan', 'kään', 'kin', 'han', 'hän', 'ko', 'kö', 'pa', 'pä'))) !== false) {
 72 |             $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
 73 | 
 74 |             if (in_array($lastLetter, array_merge(['t', 'n'], self::$vowels))) {
 75 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 76 |                 $this->r1();
 77 |                 $this->r2();
 78 |             }
 79 | 
 80 |             return true;
 81 |         }
 82 | 
 83 |         //  sti
 84 |         //  delete if in R2
 85 |         if (($position = $this->searchIfInR1(array('sti'))) !== false) {
 86 |             if ($this->inR2($position)) {
 87 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 88 |                 $this->r1();
 89 |                 $this->r2();
 90 |             }
 91 | 
 92 |             return true;
 93 |         }
 94 |     }
 95 | 
 96 |     /**
 97 |      * Step 2: possessives.
 98 |      *
 99 |      * Search for the longest among the following suffixes in R1, and perform
100 |      * the action indicated.
101 |      *
102 |      * @return boolean True when something is done.
103 |      */
104 |     private function step2()
105 |     {
106 |         // si
107 |         //  delete if not preceded by k
108 |         if (($position = $this->searchIfInR1(array('si'))) !== false) {
109 |             $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
110 | 
111 |             if ($lastLetter !== 'k') {
112 |                 $this->word = StringHelper::substr($this->word, 0, $position);
113 |                 $this->r1();
114 |                 $this->r2();
115 |                 return true;
116 |             }
117 |         }
118 | 
119 |         // ni
120 |         //  delete
121 |         if (($position = $this->searchIfInR1(array('ni'))) !== false) {
122 |             $this->word = StringHelper::substr($this->word, 0, $position);
123 |             // if preceded by kse, replace with ksi
124 |             if ( ($position = $this->search(array('kse'))) !== false) {
125 |                 $this->word = preg_replace('#(kse)$#u', 'ksi', $this->word);
126 |             }
127 |             $this->r1();
128 |             $this->r2();
129 |             return true;
130 |         }
131 | 
132 |         // nsa   nsä   mme   nne
133 |         //  delete
134 |         if (($position = $this->searchIfInR1(array('nsa', 'nsä', 'mme', 'nne'))) !== false) {
135 |             $this->word = StringHelper::substr($this->word, 0, $position);
136 |             $this->r1();
137 |             $this->r2();
138 |             return true;
139 |         }
140 | 
141 |         // an
142 |         //  delete if preceded by one of   ta   ssa   sta   lla   lta   na
143 |         if (($position = $this->searchIfInR1(array('an'))) !== false) {
144 |             $word = StringHelper::substr($this->word, 0, $position);
145 |             $lastThreeLetters = StringHelper::substr($word, -3, 3);
146 |             $lastTwoLetters = StringHelper::substr($word, -2, 2);
147 |             if (in_array($lastThreeLetters, array('ssa', 'sta', 'lla', 'lta'), true) || in_array($lastTwoLetters, array('na', 'ta'), true)) {
148 |                 $this->word = $word;
149 |                 $this->r1();
150 |                 $this->r2();
151 |                 return true;
152 |             }
153 |         }
154 | 
155 |         // än
156 |         // delete if preceded by one of   tä   ssä   stä   llä   ltä   nä
157 |         if (($position = $this->searchIfInR1(array('än'))) !== false) {
158 |             $word = StringHelper::substr($this->word, 0, $position);
159 |             $lastThreeLetters = StringHelper::substr($word, -3, 3);
160 |             $lastTwoLetters = StringHelper::substr($word, -2, 2);
161 |             if (in_array($lastThreeLetters, array('ssä', 'stä', 'llä', 'ltä'), true) || in_array($lastTwoLetters, array('nä', 'tä'), true)) {
162 |                 $this->word = $word;
163 |                 $this->r1();
164 |                 $this->r2();
165 |                 return true;
166 |             }
167 |         }
168 | 
169 |         // en
170 |         // delete if preceded by one of   lle   ine
171 |         if (($position = $this->searchIfInR1(array('en'))) !== false) {
172 |             $word = StringHelper::substr($this->word, 0, $position);
173 |             if (StringHelper::strlen($this->word) > 4) {
174 |                 $lastThreeLetters = StringHelper::substr($this->word, -5, 3);
175 |                 if (in_array($lastThreeLetters, array('lle', 'ine'), true)) {
176 |                     $this->word = $word;
177 |                     $this->r1();
178 |                     $this->r2();
179 |                     return true;
180 |                 }
181 |             }
182 |         }
183 |     }
184 | 
185 |     /**
186 |      * Step 3: cases
187 |      *
188 |      * Search for the longest among the following suffixes in R1, and perform
189 |      * the action indicated.
190 |      *
191 |      * @return boolean True when something is done.
192 |      */
193 |     private function step3()
194 |     {
195 |         // hXn
196 |         // delete if preceded by X, where X is a V other than u (a/han, e/hen etc)
197 |         foreach (self::$restrictedVowels as $vowel) {
198 |             if ($vowel === 'u') {
199 |                 continue;
200 |             }
201 |             if (($position = $this->searchIfInR1(array('h' . $vowel . 'n'))) !== false) {
202 |                 $lastLetter = StringHelper::substr($this->word, $position-1, 1);
203 |                 if ($lastLetter === $vowel) {
204 |                     $this->word = StringHelper::substr($this->word, 0, $position);
205 |                     $this->_removedInStep3 = true;
206 |                     $this->r1();
207 |                     $this->r2();
208 |                 }
209 |                 return true;
210 |             }
211 |         }
212 | 
213 |         // siin   den   tten
214 |         // delete if preceded by Vi
215 |         if (($position = $this->searchIfInR1(array('siin', 'den', 'tten'))) !== false) {
216 |             $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
217 |             if ($lastLetter === 'i') {
218 |                 $nextLastLetter = StringHelper::substr($this->word, ($position-2), 1);
219 |                 if (in_array($nextLastLetter, self::$restrictedVowels, true)) {
220 |                     $this->word = StringHelper::substr($this->word, 0, $position);
221 |                     $this->_removedInStep3 = true;
222 |                     $this->r1();
223 |                     $this->r2();
224 |                     return true;
225 |                 }
226 |             }
227 |         }
228 | 
229 |         // seen
230 |         // delete if preceded by LV
231 |         if (($position = $this->searchIfInR1(array('seen'))) !== false) {
232 |             $lastLetters = StringHelper::substr($this->word, ($position-2), 2);
233 | 
234 |             if (in_array($lastLetters, self::$longVowels, true)) {
235 |                 $this->word = StringHelper::substr($this->word, 0, $position);
236 |                 $this->_removedInStep3 = true;
237 |                 $this->r1();
238 |                 $this->r2();
239 |                 return true;
240 |             }
241 |         }
242 | 
243 |         // tta    ttä
244 |         // delete if preceded by e
245 |         if (($position = $this->searchIfInR1(array('tta', 'ttä'))) !== false) {
246 |             $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
247 | 
248 |             if ($lastLetter === 'e') {
249 |                 $this->word = StringHelper::substr($this->word, 0, $position);
250 |                 $this->_removedInStep3 = true;
251 |                 $this->r1();
252 |                 $this->r2();
253 |                 return true;
254 |             }
255 |         }
256 | 
257 |         // ta  tä  ssa  ssä  sta  stä  lla  llä  lta  ltä  lle  na  nä  ksi  ine
258 |         // delete
259 |         if (($position = $this->searchIfInR1(array('ssa', 'ssä', 'sta', 'stä', 'lla', 'llä', 'lta', 'ltä', 'lle', 'ksi', 'na', 'nä', 'ine', 'ta', 'tä'))) !== false) {
260 |             $this->word = StringHelper::substr($this->word, 0, $position);
261 |             $this->_removedInStep3 = true;
262 |             $this->r1();
263 |             $this->r2();
264 |             return true;
265 |         }
266 | 
267 |         // a    ä
268 |         // delete if preceded by cv
269 |         if (($position = $this->searchIfInR1(array('a', 'ä'))) !== false) {
270 |             $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
271 |             $nextLastLetter = StringHelper::substr($this->word, ($position-2), 1);
272 | 
273 |             if (in_array($lastLetter, self::$vowels, true) && in_array($nextLastLetter, self::$consonants, true)) {
274 |                 $this->word = StringHelper::substr($this->word, 0, $position);
275 |                 $this->_removedInStep3 = true;
276 |                 $this->r1();
277 |                 $this->r2();
278 |                 return true;
279 |             }
280 |         }
281 | 
282 |         // n
283 |         // delete, and if preceded by LV or ie, delete the last vowel
284 |         if (($position = $this->searchIfInR1(array('n'))) !== false) {
285 |             $lastLetters = StringHelper::substr($this->word, ($position-2), 2);
286 | 
287 |             if (in_array($lastLetters, self::$longVowels, true) || $lastLetters === 'ie') {
288 |                 $this->word = StringHelper::substr($this->word, 0, $position-1);
289 |             } else {
290 |                 $this->word = StringHelper::substr($this->word, 0, $position);
291 |             }
292 |             $this->r1();
293 |             $this->r2();
294 |             $this->_removedInStep3 = true;
295 |             return true;
296 |         }
297 |     }
298 | 
299 |     /**
300 |      * Step 4: other endings
301 |      *
302 |      * Search for the longest among the following suffixes in R2, and perform
303 |      * the action indicated
304 |      *
305 |      * @return boolean True when something is done.
306 |      */
307 |     private function step4()
308 |     {
309 |         // mpi   mpa   mpä   mmi   mma   mmä
310 |         // delete if not preceded by po
311 |         if (($position = $this->searchIfInR2(array('mpi', 'mpa', 'mpä', 'mmi', 'mma', 'mmä'))) !== false) {
312 |             $lastLetters = StringHelper::substr($this->word, ($position-2), 2);
313 |             if ($lastLetters !== 'po') {
314 |                 $this->word = StringHelper::substr($this->word, 0, $position);
315 |                 $this->r1();
316 |                 $this->r2();
317 |                 return true;
318 |             }
319 |         }
320 | 
321 |         // impi   impa   impä   immi   imma   immä   eja   ejä
322 |         // delete
323 |         if (($position = $this->searchIfInR2(array('impi', 'impa', 'impä', 'immi', 'imma', 'immä', 'eja', 'ejä'))) !== false) {
324 |             $this->word = StringHelper::substr($this->word, 0, $position);
325 |             $this->r1();
326 |             $this->r2();
327 |             return true;
328 |         }
329 |     }
330 | 
331 |     /**
332 |      * Step 5: plurals
333 |      * If an ending was removed in step 3, delete a final i or j if in R1;
334 |      * otherwise,
335 |      * if an ending was not removed in step 3, delete a final t in R1 if it
336 |      * follows a vowel, and, if a t is removed, delete a final mma or imma in
337 |      * R2, unless the mma is preceded by po.
338 |      *
339 |      * @return boolean True when something is done.
340 |      */
341 |     private function step5()
342 |     {
343 |         if ($this->_removedInStep3) {
344 |             if (($position = $this->searchIfInR1(array('i', 'j'))) !== false) {
345 |                 $this->word = StringHelper::substr($this->word, 0, $position);
346 |                 $this->r1();
347 |                 $this->r2();
348 |                 return true;
349 |             }
350 |         } else {
351 |             if (($position = $this->searchIfInR1(array('t'))) !== false) {
352 |                 $lastLetter = StringHelper::substr($this->word, ($position-1), 1);
353 |                 if (in_array($lastLetter, self::$vowels, true)) {
354 |                     $this->word = StringHelper::substr($this->word, 0, $position);
355 |                     $this->r1();
356 |                     $this->r2();
357 |                     if (($position2 = $this->searchIfInR2(array('imma'))) !== false) {
358 |                         $this->word = StringHelper::substr($this->word, 0, $position2);
359 |                         $this->r1();
360 |                         $this->r2();
361 |                         return true;
362 |                     } elseif (($position2 = $this->searchIfInR2(array('mma'))) !== false) {
363 |                         $lastLetters = StringHelper::substr($this->word, ($position2-2), 2);
364 |                         if ($lastLetters !== 'po') {
365 |                             $this->word = StringHelper::substr($this->word, 0, $position2);
366 |                             $this->r1();
367 |                             $this->r2();
368 |                             return true;
369 |                         }
370 |                     }
371 |                 }
372 |             }
373 |         }
374 | 
375 |     }
376 | 
377 |     /**
378 |      * Step 6: tidying up
379 |      *
380 |      * Do in turn steps (a), (b), (c), (d), restricting all tests to the
381 |      * region R1.
382 |      */
383 |     private function step6()
384 |     {
385 |         // a) If R1 ends LV
386 |         // delete the last letter
387 |         if (($position = $this->searchIfInR1(self::$longVowels)) !== false) {
388 |             $this->word = StringHelper::substr($this->word, 0, $position+1);
389 |             $this->r1();
390 |             $this->r2();
391 |         }
392 | 
393 |         // b) If R1 ends cX, c a consonant and X one of   a   ä   e   i,
394 |         // delete the last letter
395 |         $lastLetter = StringHelper::substr($this->r1, -1, 1);
396 |         $secondToLastLetter = StringHelper::substr($this->r1, -2, 1);
397 |         if (in_array($secondToLastLetter, self::$consonants, true) && in_array($lastLetter, array('a', 'e', 'i', 'ä'))) {
398 |             $this->word = StringHelper::substr($this->word, 0, -1);
399 |             $this->r1();
400 |             $this->r2();
401 |         }
402 | 
403 |         // c) If R1 ends oj or uj
404 |         // delete the last letter
405 |         $twoLastLetters = StringHelper::substr($this->r1, -2, 2);
406 |         if (in_array($twoLastLetters, array('oj', 'uj'))) {
407 |             $this->word = StringHelper::substr($this->word, 0, -1);
408 |             $this->r1();
409 |             $this->r2();
410 |         }
411 | 
412 |         // d) If R1 ends jo
413 |         // delete the last letter
414 |         $twoLastLetters = StringHelper::substr($this->r1, -2, 2);
415 |         if ($twoLastLetters === 'jo') {
416 |             $this->word = StringHelper::substr($this->word, 0, -1);
417 |             $this->r1();
418 |             $this->r2();
419 |         }
420 | 
421 |         // e) If the word ends with a double consonant followed by zero or more
422 |         // vowels, remove the last consonant (so eläkk -> eläk,
423 |         // aatonaatto -> aatonaato)
424 |         $endVowels = '';
425 |         for ($i = StringHelper::strlen($this->word) - 1; $i > 0; $i--) {
426 |             $letter = StringHelper::substr($this->word, $i, 1);
427 |             if (in_array($letter, self::$vowels, true)) {
428 |                 $endVowels = $letter . $endVowels;
429 |             } else {
430 |                 // check for double consonant
431 |                 $prevLetter = StringHelper::substr($this->word, $i-1, 1);
432 |                 if ($prevLetter === $letter) {
433 |                     $this->word = StringHelper::substr($this->word, 0, $i) . $endVowels;
434 |                 }
435 |                 break;
436 |             }
437 |         }
438 |     }
439 | }
440 | 


--------------------------------------------------------------------------------
/src/Stemmer/French.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/french/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class French extends Stem
 14 | {
 15 |     /**
 16 |      * All french vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'â', 'à', 'ë', 'é', 'ê', 'è', 'ï', 'î', 'ô', 'û', 'ù');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         $this->plainVowels = implode('', self::$vowels);
 28 | 
 29 |         $this->step0();
 30 | 
 31 |         $this->rv();
 32 |         $this->r1();
 33 |         $this->r2();
 34 | 
 35 |         // to know if step1, 2a or 2b have altered the word
 36 |         $this->originalWord = $this->word;
 37 | 
 38 |         $nextStep = $this->step1();
 39 | 
 40 |         // Do step 2a if either no ending was removed by step 1, or if one of endings amment, emment, ment, ments was found.
 41 |         if ( ($nextStep == 2) || ($this->originalWord == $this->word) ) {
 42 |             $modified = $this->step2a();
 43 |             if (!$modified) {
 44 |                 $this->step2b();
 45 |             }
 46 |         }
 47 | 
 48 |         if ($this->word != $this->originalWord) {
 49 |             $this->step3();
 50 | 
 51 |         } else {
 52 |             $this->step4();
 53 |         }
 54 | 
 55 |         $this->step5();
 56 |         $this->step6();
 57 |         $this->finish();
 58 | 
 59 |         return $this->word;
 60 |     }
 61 | 
 62 | 
 63 | 
 64 |     /**
 65 |      *  Assume the word is in lower case.
 66 |      *  Then put into upper case u or i preceded and followed by a vowel, and y preceded or followed by a vowel.
 67 |      *  u after q is also put into upper case. For example,
 68 |      *      jouer 		-> 		joUer
 69 |      *      ennuie 		-> 		ennuIe
 70 |      *      yeux 		-> 		Yeux
 71 |      *      quand 		-> 		qUand
 72 |      */
 73 |     private function step0()
 74 |     {
 75 |         $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
 76 |         $this->word = preg_replace('#(['.$this->plainVowels.'])y#u', '$1Y', $this->word);
 77 |         $this->word = preg_replace('#y(['.$this->plainVowels.'])#u', 'Y$1', $this->word);
 78 |         $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
 79 |         $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
 80 |     }
 81 | 
 82 |     /**
 83 |      * Step 1
 84 |      * Search for the longest among the following suffixes, and perform the action indicated.
 85 |      *
 86 |      * @return integer Next step number
 87 |      */
 88 |     private function step1()
 89 |     {
 90 |         // ance   iqUe   isme   able   iste   eux   ances   iqUes   ismes   ables   istes
 91 |         //     delete if in R2
 92 |         if ( ($position = $this->search(array('ances', 'iqUes', 'ismes', 'ables', 'istes', 'ance', 'iqUe','isme', 'able', 'iste', 'eux'))) !== false) {
 93 |             if ($this->inR2($position)) {
 94 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 95 |             }
 96 |             return 3;
 97 |         }
 98 | 
 99 |         // atrice   ateur   ation   atrices   ateurs   ations
100 |         //      delete if in R2
101 |         //      if preceded by ic, delete if in R2, else replace by iqU
102 |         if ( ($position = $this->search(array('atrices', 'ateurs', 'ations', 'atrice', 'ateur', 'ation'))) !== false) {
103 |             if ($this->inR2($position)) {
104 |                 $this->word = StringHelper::substr($this->word, 0, $position);
105 | 
106 |                 if ( ($position2 = $this->searchIfInR2(array('ic'))) !== false) {
107 |                     $this->word = StringHelper::substr($this->word, 0, $position2);
108 |                 } else {
109 |                     $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
110 |                 }
111 |             }
112 | 
113 |             return 3;
114 |         }
115 | 
116 |         // logie   logies
117 |         //      replace with log if in R2
118 |         if ( ($position = $this->search(array('logies', 'logie'))) !== false) {
119 |             if ($this->inR2($position)) {
120 |                 $this->word = preg_replace('#(logies|logie)$#u', 'log', $this->word);
121 |             }
122 |             return 3;
123 |         }
124 | 
125 |         // usion   ution   usions   utions
126 |         //      replace with u if in R2
127 |         if ( ($position = $this->search(array('usions', 'utions', 'usion', 'ution'))) !== false) {
128 |             if ($this->inR2($position)) {
129 |                 $this->word = preg_replace('#(usion|ution|usions|utions)$#u', 'u', $this->word);
130 |             }
131 |             return 3;
132 |         }
133 | 
134 |         // ence   ences
135 |         //      replace with ent if in R2
136 |         if ( ($position = $this->search(array('ences', 'ence'))) !== false) {
137 |             if ($this->inR2($position)) {
138 |                 $this->word = preg_replace('#(ence|ences)$#u', 'ent', $this->word);
139 |             }
140 |             return 3;
141 |         }
142 | 
143 |         // issement   issements
144 |         //      delete if in R1 and preceded by a non-vowel
145 |         if ( ($position = $this->search(array('issements', 'issement'))) != false) {
146 |             if ($this->inR1($position)) {
147 |                 $before = $position - 1;
148 |                 $letter = StringHelper::substr($this->word, $before, 1);
149 |                 if (! in_array($letter, self::$vowels)) {
150 |                     $this->word = StringHelper::substr($this->word, 0, $position);
151 |                 }
152 |             }
153 |             return 3;
154 |         }
155 | 
156 |         // ement   ements
157 |         //      delete if in RV
158 |         //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
159 |         //      if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise,
160 |         //      if preceded by abl or iqU, delete if in R2, otherwise,
161 |         //      if preceded by ièr or Ièr, replace by i if in RV
162 |         if ( ($position = $this->search(array('ements', 'ement'))) !== false) {
163 | 
164 |             // delete if in RV
165 |             if ($this->inRv($position)) {
166 |                 $this->word = StringHelper::substr($this->word, 0, $position);
167 |             }
168 | 
169 |             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
170 |             if ( ($position = $this->searchIfInR2(array('iv'))) !== false) {
171 |                 $this->word = StringHelper::substr($this->word, 0, $position);
172 |                 if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
173 |                     $this->word = StringHelper::substr($this->word, 0, $position2);
174 |                 }
175 | 
176 |             // if preceded by eus, delete if in R2, else replace by eux if in R1, otherwise,
177 |             } elseif ( ($position = $this->search(array('eus'))) !== false) {
178 |                 if ($this->inR2($position)) {
179 |                     $this->word = StringHelper::substr($this->word, 0, $position);
180 | 
181 |                 } elseif ($this->inR1($position)) {
182 |                     $this->word = preg_replace('#(eus)$#u', 'eux', $this->word);
183 |                 }
184 | 
185 |             // if preceded by abl or iqU, delete if in R2, otherwise,
186 |             } elseif ( ($position = $this->searchIfInR2(array('abl', 'iqU'))) !== false) {
187 |                 $this->word = StringHelper::substr($this->word, 0, $position);
188 | 
189 |             // if preceded by ièr or Ièr, replace by i if in RV
190 |             } elseif ( ($position = $this->searchIfInRv(array('ièr', 'Ièr'))) !== false) {
191 |                 $this->word = preg_replace('#(ièr|Ièr)$#u', 'i', $this->word);
192 |             }
193 |             return 3;
194 |         }
195 | 
196 |         // ité   ités
197 |         //      delete if in R2
198 |         //      if preceded by abil, delete if in R2, else replace by abl, otherwise,
199 |         //      if preceded by ic, delete if in R2, else replace by iqU, otherwise,
200 |         //      if preceded by iv, delete if in R2
201 |         if ( ($position = $this->search(array('ités', 'ité'))) !== false) {
202 | 
203 |             // delete if in R2
204 |             if ($this->inR2($position)) {
205 |                 $this->word = StringHelper::substr($this->word, 0, $position);
206 |             }
207 | 
208 |             // if preceded by abil, delete if in R2, else replace by abl, otherwise,
209 |             if ( ($position = $this->search(array('abil'))) !== false) {
210 |                 if ($this->inR2($position)) {
211 |                     $this->word = StringHelper::substr($this->word, 0, $position);
212 |                 } else {
213 |                     $this->word = preg_replace('#(abil)$#u', 'abl', $this->word);
214 |                 }
215 | 
216 |             // if preceded by ic, delete if in R2, else replace by iqU, otherwise,
217 |             } elseif ( ($position = $this->search(array('ic'))) !== false) {
218 |                 if ($this->inR2($position)) {
219 |                     $this->word = StringHelper::substr($this->word, 0, $position);
220 |                 } else {
221 |                     $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
222 |                 }
223 | 
224 |             // if preceded by iv, delete if in R2
225 |             } elseif ( ($position = $this->searchIfInR2(array('iv'))) !== false) {
226 |                 $this->word = StringHelper::substr($this->word, 0, $position);
227 |             }
228 | 
229 |             return 3;
230 |         }
231 | 
232 |         // if   ive   ifs   ives
233 |         //      delete if in R2
234 |         //      if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2, else replace by iqU)
235 |         if ( ($position = $this->search(array('ifs', 'ives', 'if', 'ive'))) !== false) {
236 | 
237 |             if ($this->inR2($position)) {
238 |                 $this->word = StringHelper::substr($this->word, 0, $position);
239 |             }
240 | 
241 |             if ( ($position = $this->searchIfInR2(array('at'))) !== false) {
242 |                 $this->word = StringHelper::substr($this->word, 0, $position);
243 | 
244 |                 if ( ($position2 = $this->search(array('ic'))) !== false) {
245 |                     if ($this->inR2($position2)) {
246 |                         $this->word = StringHelper::substr($this->word, 0, $position2);
247 |                     } else {
248 |                         $this->word = preg_replace('#(ic)$#u', 'iqU', $this->word);
249 |                     }
250 |                 }
251 |             }
252 | 
253 |             return 3;
254 |         }
255 | 
256 |         // eaux
257 |         //      replace with eau
258 |         if ( ($position = $this->search(array('eaux'))) !== false) {
259 |             $this->word = preg_replace('#(eaux)$#u', 'eau', $this->word);
260 |             return 3;
261 |         }
262 | 
263 |         // aux
264 |         //      replace with al if in R1
265 |         if ( ($position = $this->search(array('aux'))) !== false) {
266 |             if ($this->inR1($position)) {
267 |                 $this->word = preg_replace('#(aux)$#u', 'al', $this->word);
268 |             }
269 |             return 3;
270 |         }
271 | 
272 |         // euse   euses
273 |         //      delete if in R2, else replace by eux if in R1
274 |         if ( ($position = $this->search(array('euses', 'euse'))) !== false) {
275 |             if ($this->inR2($position)) {
276 |                 $this->word = StringHelper::substr($this->word, 0, $position);
277 | 
278 |             } elseif ($this->inR1($position)) {
279 |                 $this->word = preg_replace('#(euses|euse)$#u', 'eux', $this->word);
280 |                 //return 3;
281 |             }
282 |             return 3;
283 |         }
284 | 
285 |         // amment
286 |         //      replace with ant if in RV
287 |         if ( ($position = $this->search(array('amment'))) !== false) {
288 |             if ($this->inRv($position)) {
289 |                 $this->word = preg_replace('#(amment)$#u', 'ant', $this->word);
290 |             }
291 |             return 2;
292 |         }
293 | 
294 |         // emment
295 |         //      replace with ent if in RV
296 |         if ( ($position = $this->search(array('emment'))) !== false) {
297 |             if ($this->inRv($position)) {
298 |                 $this->word = preg_replace('#(emment)$#u', 'ent', $this->word);
299 |             }
300 |             return 2;
301 |         }
302 | 
303 |         // ment   ments
304 |         //      delete if preceded by a vowel in RV
305 |         if ( ($position = $this->search(array('ments', 'ment'))) != false) {
306 |             $before = $position - 1;
307 |             $letter = StringHelper::substr($this->word, $before, 1);
308 |             if ( $this->inRv($before) && (in_array($letter, self::$vowels)) ) {
309 |                 $this->word = StringHelper::substr($this->word, 0, $position);
310 |             }
311 | 
312 |             return 2;
313 |         }
314 | 
315 |         return 2;
316 |     }
317 | 
318 |     /**
319 |      * Step 2a: Verb suffixes beginning i
320 |      *  In steps 2a and 2b all tests are confined to the RV region.
321 |      *  Search for the longest among the following suffixes and if found, delete if preceded by a non-vowel.
322 |      *      îmes   ît   îtes   i   ie   ies   ir   ira   irai   iraIent   irais   irait   iras   irent   irez   iriez
323 |      *      irions   irons   iront   is   issaIent   issais   issait   issant   issante   issantes   issants   isse
324 |      *      issent   isses   issez   issiez   issions   issons   it
325 |      *  (Note that the non-vowel itself must also be in RV.)
326 |      */
327 |     private function step2a()
328 |     {
329 |         if ( ($position = $this->searchIfInRv(array(
330 |             'îmes', 'îtes', 'ît', 'ies', 'ie', 'iraIent', 'irais', 'irait', 'irai', 'iras', 'ira', 'irent', 'irez', 'iriez',
331 |             'irions', 'irons', 'iront', 'ir', 'issaIent', 'issais', 'issait', 'issant', 'issantes', 'issante', 'issants',
332 |             'issent', 'isses', 'issez', 'isse', 'issiez', 'issions', 'issons', 'is', 'it', 'i'))) !== false) {
333 | 
334 |             $before = $position - 1;
335 |             $letter = StringHelper::substr($this->word, $before, 1);
336 |             if ( $this->inRv($before) && (!in_array($letter, self::$vowels)) ) {
337 |                 $this->word = StringHelper::substr($this->word, 0, $position);
338 | 
339 |                 return true;
340 |             }
341 |         }
342 | 
343 |         return false;
344 |     }
345 | 
346 |     /**
347 |      * Do step 2b if step 2a was done, but failed to remove a suffix.
348 |      * Step 2b: Other verb suffixes
349 |      */
350 |     private function step2b()
351 |     {
352 |         // é   ée   ées   és   èrent   er   era   erai   eraIent   erais   erait   eras   erez   eriez   erions   erons   eront   ez   iez
353 |         //      delete
354 |         if ( ($position = $this->searchIfInRv(array(
355 |             'ées', 'èrent', 'erais', 'erait', 'erai', 'eraIent', 'eras', 'erez', 'eriez',
356 |             'erions', 'erons', 'eront', 'era', 'er', 'iez', 'ez','és', 'ée', 'é'))) !== false) {
357 | 
358 |             $this->word = StringHelper::substr($this->word, 0, $position);
359 | 
360 |             return true;
361 |         }
362 | 
363 |         // âmes   ât   âtes   a   ai   aIent   ais   ait   ant   ante   antes   ants   as   asse   assent   asses   assiez   assions
364 |         //      delete
365 |         //      if preceded by e, delete
366 |         if ( ($position = $this->searchIfInRv(array(
367 |             'âmes', 'âtes', 'ât', 'aIent', 'ais', 'ait', 'antes', 'ante', 'ants', 'ant',
368 |             'assent', 'asses', 'assiez', 'assions', 'asse', 'as', 'ai', 'a'))) !== false) {
369 | 
370 |             $before = $position - 1;
371 |             $letter = StringHelper::substr($this->word, $before, 1);
372 |             if ( $this->inRv($before) && ($letter == 'e') ) {
373 |                 $this->word = StringHelper::substr($this->word, 0, $before);
374 | 
375 |             } else {
376 |                 $this->word = StringHelper::substr($this->word, 0, $position);
377 |             }
378 | 
379 |             return true;
380 |         }
381 | 
382 |         // ions
383 |         //      delete if in R2
384 |         if ( ($position = $this->searchIfInRv(array('ions'))) !== false) {
385 |             if ($this->inR2($position)) {
386 |                 $this->word = StringHelper::substr($this->word, 0, $position);
387 |             }
388 | 
389 |             return true;
390 |         }
391 | 
392 |         return false;
393 |     }
394 | 
395 |     /**
396 |      * Step 3: Replace final Y with i or final ç with c
397 |      */
398 |     private function step3()
399 |     {
400 |         $this->word = preg_replace('#(Y)$#u', 'i', $this->word);
401 |         $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
402 |     }
403 | 
404 |     /**
405 |      * Step 4: Residual suffix
406 |      */
407 |     private function step4()
408 |     {
409 |         //If the word ends s, not preceded by a, i, o, u, è or s, delete it.
410 |         if (preg_match('#[^aiouès]s$#', $this->word)) {
411 |             $this->word = StringHelper::substr($this->word, 0, -1);
412 |         }
413 | 
414 |         // In the rest of step 4, all tests are confined to the RV region.
415 |         // ion
416 |         //      delete if in R2 and preceded by s or t
417 |         if ( (($position = $this->searchIfInRv(array('ion'))) !== false) && ($this->inR2($position)) ) {
418 |             $before = $position - 1;
419 |             $letter = StringHelper::substr($this->word, $before, 1);
420 |             if ( $this->inRv($before) && (($letter == 's') || ($letter == 't')) ) {
421 |                 $this->word = StringHelper::substr($this->word, 0, $position);
422 |             }
423 |             return true;
424 |         }
425 | 
426 |         // ier   ière   Ier   Ière
427 |         //      replace with i
428 |         if ( ($this->searchIfInRv(array('ier', 'ière', 'Ier', 'Ière'))) !== false) {
429 |             $this->word = preg_replace('#(ier|ière|Ier|Ière)$#u', 'i', $this->word);
430 |             return true;
431 |         }
432 | 
433 |         // e
434 |         //      delete
435 |         if ( ($this->searchIfInRv(array('e'))) !== false) {
436 |             $this->word = StringHelper::substr($this->word, 0, -1);
437 |             return true;
438 |         }
439 | 
440 |         // ë
441 |         //      if preceded by gu, delete
442 |         if ( ($position = $this->searchIfInRv(array('guë'))) !== false) {
443 |             if ($this->inRv($position+2)) {
444 |                 $this->word = StringHelper::substr($this->word, 0, -1);
445 |                 return true;
446 |             }
447 |         }
448 | 
449 |         return false;
450 |     }
451 | 
452 |     /**
453 |      * Step 5: Undouble
454 |      * If the word ends enn, onn, ett, ell or eill, delete the last letter
455 |      */
456 |     private function step5()
457 |     {
458 |         if ($this->search(array('enn', 'onn', 'ett', 'ell', 'eill')) !== false) {
459 |             $this->word = StringHelper::substr($this->word, 0, -1);
460 |         }
461 |     }
462 | 
463 |     /**
464 |      * Step 6: Un-accent
465 |      * If the words ends é or è followed by at least one non-vowel, remove the accent from the e.
466 |      */
467 |     private function step6()
468 |     {
469 |         $this->word = preg_replace('#(é|è)([^'.$this->plainVowels.']+)$#u', 'e$2', $this->word);
470 |     }
471 | 
472 |     /**
473 |      * And finally:
474 |      * Turn any remaining I, U and Y letters in the word back into lower case.
475 |      */
476 |     private function finish()
477 |     {
478 |         $this->word = str_replace(array('I','U','Y'), array('i', 'u', 'y'), $this->word);
479 |     }
480 | 
481 |     /**
482 |      *  If the word begins with two vowels, RV is the region after the third letter,
483 |      *  otherwise the region after the first vowel not at the beginning of the word,
484 |      *  or the end of the word if these positions cannot be found.
485 |      *  (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
486 |      */
487 |     protected function rv()
488 |     {
489 |         $length = StringHelper::strlen($this->word);
490 | 
491 |         $this->rv = '';
492 |         $this->rvIndex = $length;
493 | 
494 |         if ($length < 3) {
495 |             return true;
496 |         }
497 | 
498 |         // If the word begins with two vowels, RV is the region after the third letter
499 |         $first = StringHelper::substr($this->word, 0, 1);
500 |         $second = StringHelper::substr($this->word, 1, 1);
501 | 
502 |         if ( (in_array($first, self::$vowels)) && (in_array($second, self::$vowels)) ) {
503 |             $this->rv = StringHelper::substr($this->word, 3);
504 |             $this->rvIndex = 3;
505 |             return true;
506 |         }
507 | 
508 |         // (Exceptionally, par, col or tap, at the begining of a word is also taken to define RV as the region to their right.)
509 |         $begin3 = StringHelper::substr($this->word, 0, 3);
510 |         if (in_array($begin3, array('par', 'col', 'tap'))) {
511 |             $this->rv = StringHelper::substr($this->word, 3);
512 |             $this->rvIndex = 3;
513 |             return true;
514 |         }
515 | 
516 |         //  otherwise the region after the first vowel not at the beginning of the word,
517 |         for ($i=1; $i<$length; $i++) {
518 |             $letter = StringHelper::substr($this->word, $i, 1);
519 |             if (in_array($letter, self::$vowels)) {
520 |                 $this->rv = StringHelper::substr($this->word, ($i + 1));
521 |                 $this->rvIndex = $i + 1;
522 |                 return true;
523 |             }
524 |         }
525 | 
526 |         return false;
527 |     }
528 | }
529 | 


--------------------------------------------------------------------------------
/src/Stemmer/German.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/german/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class German extends Stem
 14 | {
 15 |     /**
 16 |      * All German vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö', 'ü');
 19 | 
 20 |     protected static $sEndings = array('b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 'r' ,'t');
 21 | 
 22 |     protected static $stEndings = array('b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't');
 23 | 
 24 |     /**
 25 |      * {@inheritdoc}
 26 |      */
 27 |     public function stem($word)
 28 |     {
 29 |         $this->plainVowels = implode('', self::$vowels);
 30 | 
 31 |         $this->word = StringHelper::strtolower($word);
 32 | 
 33 |         // First, replace ß by ss
 34 |         $this->word = str_replace('ß', 'ss', $this->word);
 35 | 
 36 |         // put u and y between vowels into upper case
 37 |         $this->word = preg_replace('#(['.$this->plainVowels.'])y(['.$this->plainVowels.'])#u', '$1Y$2', $this->word);
 38 |         $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
 39 | 
 40 |         //  R1 and R2 are first set up in the standard way
 41 |         $this->r1();
 42 |         $this->r2();
 43 | 
 44 |         // but then R1 is adjusted so that the region before it contains at least 3 letters.
 45 |         if ($this->r1Index < 3) {
 46 |             $this->r1Index = 3;
 47 |             $this->r1 = StringHelper::substr($this->word, 3);
 48 |         }
 49 | 
 50 |         $this->step1();
 51 |         $this->step2();
 52 |         $this->step3();
 53 |         $this->finish();
 54 | 
 55 |         return $this->word;
 56 |     }
 57 | 
 58 |     /**
 59 |      * Step 1
 60 |      */
 61 |     private function step1()
 62 |     {
 63 |         // delete if in R1
 64 |         if ( ($position = $this->search(array('em', 'ern', 'er'))) !== false) {
 65 |             if ($this->inR1($position)) {
 66 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 67 |             }
 68 |             return true;
 69 |         }
 70 | 
 71 |         // delete if in R1
 72 |         if ( ($position = $this->search(array('es', 'en', 'e'))) !== false) {
 73 |             if ($this->inR1($position)) {
 74 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 75 | 
 76 |                 //If an ending of group (b) is deleted, and the ending is preceded by niss, delete the final s
 77 |                 if ($this->search(array('niss')) !== false) {
 78 |                     $this->word = StringHelper::substr($this->word, 0, -1);
 79 |                 }
 80 |             }
 81 |             return true;
 82 |         }
 83 | 
 84 |         // s (preceded by a valid s-ending)
 85 |         if ( ($position = $this->search(array('s'))) !== false) {
 86 |             if ($this->inR1($position)) {
 87 |                 $before = $position - 1;
 88 |                 $letter = StringHelper::substr($this->word, $before, 1);
 89 | 
 90 |                 if (in_array($letter, self::$sEndings)) {
 91 |                     $this->word = StringHelper::substr($this->word, 0, $position);
 92 |                 }
 93 |             }
 94 |             return true;
 95 |         }
 96 | 
 97 |         return false;
 98 |     }
 99 | 
100 |     /**
101 |      * Step 2
102 |      */
103 |     private function step2()
104 |     {
105 |         // en   er   est
106 |         //      delete if in R1
107 |         if ( ($position = $this->search(array('en', 'er', 'est'))) !== false) {
108 |             if ($this->inR1($position)) {
109 |                 $this->word = StringHelper::substr($this->word, 0, $position);
110 |             }
111 |             return true;
112 |         }
113 | 
114 |         // st (preceded by a valid st-ending, itself preceded by at least 3 letters)
115 |         //      delete if in R1
116 |         if ( ($position = $this->search(array('st'))) !== false) {
117 |             if ($this->inR1($position)) {
118 |                 $before = $position - 1;
119 |                 if ($before >= 3) {
120 |                     $letter = StringHelper::substr($this->word, $before, 1);
121 | 
122 |                     if (in_array($letter, self::$stEndings)) {
123 |                         $this->word = StringHelper::substr($this->word, 0, $position);
124 |                     }
125 |                 }
126 |             }
127 |             return true;
128 |         }
129 |         return false;
130 |     }
131 | 
132 |     /**
133 |      * Step 3: d-suffixes
134 |      */
135 |     private function step3()
136 |     {
137 |         // end   ung
138 |         //      delete if in R2
139 |         //      if preceded by ig, delete if in R2 and not preceded by e
140 |         if ( ($position = $this->search(array('end', 'ung'))) !== false) {
141 |             if ($this->inR2($position)) {
142 |                 $this->word = StringHelper::substr($this->word, 0, $position);
143 |             }
144 | 
145 |             if ( ($position2 = $this->search(array('ig'))) !== false) {
146 |                 $before = $position2 - 1;
147 |                 $letter = StringHelper::substr($this->word, $before, 1);
148 | 
149 |                 if ( ($this->inR2($position2)) && ($letter != 'e') ) {
150 |                     $this->word = StringHelper::substr($this->word, 0, $position2);
151 |                 }
152 |             }
153 |             return true;
154 |         }
155 | 
156 |         // ig   ik   isch
157 |         //      delete if in R2 and not preceded by e
158 |         if ( ($position = $this->search(array('ig', 'ik', 'isch'))) !== false) {
159 |             $before = $position - 1;
160 |             $letter = StringHelper::substr($this->word, $before, 1);
161 | 
162 |             if ( ($this->inR2($position)) && ($letter != 'e') ) {
163 |                 $this->word = StringHelper::substr($this->word, 0, $position);
164 |             }
165 |             return true;
166 |         }
167 | 
168 |         // lich   heit
169 |         //      delete if in R2
170 |         //      if preceded by er or en, delete if in R1
171 |         if ( ($position = $this->search(array('lich', 'heit'))) != false) {
172 |             if ($this->inR2($position)) {
173 |                 $this->word = StringHelper::substr($this->word, 0, $position);
174 |             }
175 | 
176 |             if ( ($position2 = $this->search(array('er', 'en'))) !== false) {
177 |                 if ($this->inR1($position2)) {
178 |                     $this->word = StringHelper::substr($this->word, 0, $position2);
179 |                 }
180 |             }
181 |             return true;
182 |         }
183 | 
184 |         // keit
185 |         //      delete if in R2
186 |         //      if preceded by lich or ig, delete if in R2
187 |         if ( ($position = $this->search(array('keit'))) != false) {
188 |             if ($this->inR2($position)) {
189 |                 $this->word = StringHelper::substr($this->word, 0, $position);
190 |             }
191 | 
192 |             if ( ($position2 = $this->search(array('lich', 'ig'))) !== false) {
193 |                 if ($this->inR2($position2)) {
194 |                     $this->word = StringHelper::substr($this->word, 0, $position2);
195 |                 }
196 |             }
197 |             return true;
198 |         }
199 | 
200 |         return false;
201 |     }
202 | 
203 |     /**
204 |      * Finally
205 |      */
206 |     private function finish()
207 |     {
208 |         // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
209 |         $this->word = str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
210 |     }
211 | }
212 | 


--------------------------------------------------------------------------------
/src/Stemmer/Italian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/italian/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Italian extends Stem
 14 | {
 15 |     /**
 16 |      * All Italian vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'à', 'è', 'ì', 'ò', 'ù');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->plainVowels = implode('', self::$vowels);
 26 | 
 27 |         $this->word = StringHelper::strtolower($word);
 28 | 
 29 |         // First, replace all acute accents by grave accents.
 30 |         $this->word = str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word);
 31 | 
 32 |         //And, as in French, put u after q, and u, i between vowels into upper case. (See note on vowel marking.) The vowels are then
 33 |         $this->word = preg_replace('#([q])u#u', '$1U', $this->word);
 34 |         $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
 35 |         $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
 36 | 
 37 |         $this->rv();
 38 |         $this->r1();
 39 |         $this->r2();
 40 | 
 41 |         $this->step0();
 42 | 
 43 |         $word = $this->word;
 44 |         $this->step1();
 45 | 
 46 |         //Do step 2 if no ending was removed by step 1.
 47 |         if ($word == $this->word) {
 48 |             $this->step2();
 49 |         }
 50 | 
 51 |         $this->step3a();
 52 |         $this->step3b();
 53 |         $this->finish();
 54 | 
 55 |         return $this->word;
 56 |     }
 57 | 
 58 |     /**
 59 |      * Step 0: Attached pronoun
 60 |      */
 61 |     private function step0()
 62 |     {
 63 |         // Search for the longest among the following suffixes
 64 |         if ( ($position = $this->search(array(
 65 |             'gliela', 'gliele', 'glieli', 'glielo', 'gliene',
 66 |             'sene', 'mela', 'mele', 'meli', 'melo', 'mene', 'tela', 'tele', 'teli', 'telo', 'tene', 'cela',
 67 |             'cele', 'celi', 'celo', 'cene', 'vela', 'vele', 'veli', 'velo', 'vene',
 68 |             'gli', 'la', 'le', 'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi', 'ci'))) !== false) {
 69 | 
 70 |             $suffixe = StringHelper::substr($this->word, $position);
 71 | 
 72 |             // following one of (in RV)
 73 |              // a
 74 |             $a = array('ando', 'endo');
 75 |             $a = array_map(function($item) use ($suffixe) {
 76 |                 return $item . $suffixe;
 77 |             }, $a);
 78 |             // In case of (a) the suffix is deleted
 79 |             if ($this->searchIfInRv($a) !== false) {
 80 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 81 |             }
 82 | 
 83 |             //b
 84 |             $b = array('ar', 'er', 'ir');
 85 |             $b = array_map(function($item) use ($suffixe) {
 86 |                 return $item . $suffixe;
 87 |             }, $b);
 88 |             // in case (b) it is replace by e
 89 |             if ($this->searchIfInRv($b) !== false) {
 90 |                 $this->word = preg_replace('#('.$suffixe.')$#u', 'e', $this->word);
 91 |             }
 92 | 
 93 |             return true;
 94 |         }
 95 | 
 96 |         return false;
 97 |     }
 98 | 
 99 |     /**
100 |      * Step 1: Standard suffix removal
101 |      */
102 |     private function step1()
103 |     {
104 |         // amente
105 |         //      delete if in R1
106 |         //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
107 |         //      if preceded by os, ic or abil, delete if in R2
108 |         if ( ($position = $this->search(array('amente'))) !== false) {
109 |             if ($this->inR1($position)) {
110 |                 $this->word = StringHelper::substr($this->word, 0, $position);
111 |             }
112 | 
113 |             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
114 |             if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
115 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
116 |                 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
117 |                     $this->word = StringHelper::substr($this->word, 0, $position3);
118 |                 }
119 | 
120 |                 // if preceded by os, ic or ad, delete if in R2
121 |             } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'abil'))) != false) {
122 |                 $this->word = StringHelper::substr($this->word, 0, $position4);
123 |             }
124 |             return true;
125 |         }
126 | 
127 |         // delete if in R2
128 |         if ( ($position = $this->search(array(
129 |             'ibili', 'atrice', 'abili', 'abile', 'ibile', 'atrici', 'mente',
130 |             'anza', 'anze', 'iche', 'ichi', 'ismo', 'ismi', 'ista', 'iste', 'isti', 'istà', 'istè', 'istì', 'ante', 'anti',
131 |             'ico', 'ici', 'ica', 'ice', 'oso', 'osi', 'osa', 'ose'
132 |         ))) !== false) {
133 | 
134 |             if ($this->inR2($position)) {
135 |                 $this->word = StringHelper::substr($this->word, 0, $position);
136 |             }
137 |             return true;
138 |         }
139 | 
140 |         // azione   azioni   atore   atori
141 |         //      delete if in R2
142 |         //      if preceded by ic, delete if in R2
143 |         if ( ($position = $this->search(array('azione', 'azioni', 'atore', 'atori'))) !== false) {
144 |             if ($this->inR2($position)) {
145 |                 $this->word = StringHelper::substr($this->word, 0, $position);
146 | 
147 |                 if ( ($position2 = $this->search(array('ic'))) !== false) {
148 |                     if ($this->inR2($position2)) {
149 |                         $this->word = StringHelper::substr($this->word, 0, $position2);
150 |                     }
151 |                 }
152 |             }
153 |             return true;
154 |         }
155 | 
156 |         // logia   logie
157 |         //      replace with log if in R2
158 |         if ( ($position = $this->search(array('logia', 'logie'))) !== false) {
159 |             if ($this->inR2($position)) {
160 |                 $this->word = preg_replace('#(logia|logie)$#u', 'log', $this->word);
161 |             }
162 |             return true;
163 |         }
164 | 
165 |         // uzione   uzioni   usione   usioni
166 |         //      replace with u if in R2
167 |         if ( ($position = $this->search(array('uzione', 'uzioni', 'usione', 'usioni'))) !== false) {
168 |             if ($this->inR2($position)) {
169 |                 $this->word = preg_replace('#(uzione|uzioni|usione|usioni)$#u', 'u', $this->word);
170 |             }
171 |             return true;
172 |         }
173 | 
174 |         // enza   enze
175 |         //      replace with ente if in R2
176 |         if ( ($position = $this->search(array('enza', 'enze'))) !== false) {
177 |             if ($this->inR2($position)) {
178 |                 $this->word = preg_replace('#(enza|enze)$#u', 'ente', $this->word);
179 |             }
180 |             return true;
181 |         }
182 | 
183 |         // amento   amenti   imento   imenti
184 |         //      delete if in RV
185 |         if ( ($position = $this->search(array('amento', 'amenti', 'imento', 'imenti'))) !== false) {
186 |             if ($this->inRv($position)) {
187 |                 $this->word = StringHelper::substr($this->word, 0, $position);
188 |             }
189 |             return true;
190 |         }
191 | 
192 |         // ità
193 |         //      delete if in R2
194 |         //      if preceded by abil, ic or iv, delete if in R2
195 |         if ( ($position = $this->search(array('ità'))) !== false) {
196 |             if ($this->inR2($position)) {
197 |                 $this->word = StringHelper::substr($this->word, 0, $position);
198 |             }
199 | 
200 |             if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
201 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
202 |             }
203 |             return true;
204 |         }
205 | 
206 |         // ivo   ivi   iva   ive
207 |         //      delete if in R2
208 |         //      if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2)
209 |         if ( ($position = $this->search(array('ivo', 'ivi', 'iva', 'ive'))) !== false) {
210 |             if ($this->inR2($position)) {
211 |                 $this->word = StringHelper::substr($this->word, 0, $position);
212 |             }
213 | 
214 |             if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
215 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
216 |                 if ( ($position3 = $this->searchIfInR2(array('ic'))) !== false) {
217 |                     $this->word = StringHelper::substr($this->word, 0, $position3);
218 |                 }
219 |             }
220 |             return true;
221 |         }
222 | 
223 |         return false;
224 |     }
225 | 
226 |     /**
227 |      * Step 2: Verb suffixes
228 |      * Search for the longest among the following suffixes in RV, and if found, delete.
229 |      */
230 |     private function step2()
231 |     {
232 |         if ( ($position = $this->searchIfInRv(array(
233 |             'assimo', 'assero', 'eranno', 'erebbero', 'erebbe', 'eremmo', 'ereste', 'eresti', 'essero', 'iranno', 'irebbero', 'irebbe', 'iremmo',
234 |             'iscano', 'ireste', 'iresti', 'iscono', 'issero',
235 |             'avamo', 'arono', 'avano', 'avate', 'eremo', 'erete', 'erono', 'evamo', 'evano', 'evate', 'ivamo', 'ivano', 'ivate', 'iremo', 'irete', 'irono',
236 |             'ammo', 'ando', 'asse', 'assi', 'emmo', 'enda', 'ende', 'endi', 'endo', 'erai', 'erei', 'Yamo', 'iamo', 'immo', 'irà', 'irai', 'irei',
237 |             'isca', 'isce', 'isci', 'isco',
238 |             'ano', 'are', 'ata', 'ate', 'ati', 'ato', 'ava', 'avi', 'avo', 'erà', 'ere', 'erò', 'ete', 'eva',
239 |             'evi', 'evo', 'ire', 'ita', 'ite', 'iti', 'ito', 'iva', 'ivi', 'ivo', 'ono', 'uta', 'ute', 'uti', 'uto', 'irò', 'ar', 'ir'))) !== false) {
240 | 
241 |             $this->word = StringHelper::substr($this->word, 0, $position);
242 |         }
243 |     }
244 | 
245 |     /**
246 |      * Step 3a
247 |      * Delete a final a, e, i, o, à, è, ì or ò if it is in RV, and a preceding i if it is in RV
248 |      */
249 |     private function step3a()
250 |     {
251 |         if ($this->searchIfInRv(array('a', 'e', 'i', 'o', 'à', 'è', 'ì', 'ò')) !== false) {
252 |             $this->word = StringHelper::substr($this->word, 0, -1);
253 | 
254 |             if ($this->searchIfInRv(array('i')) !== false) {
255 |                 $this->word = StringHelper::substr($this->word, 0, -1);
256 |             }
257 |             return true;
258 |         }
259 |         return false;
260 |     }
261 | 
262 |     /**
263 |      * Step 3b
264 |      * Replace final ch (or gh) with c (or g) if in RV (crocch -> crocc)
265 |      */
266 |     private function step3b()
267 |     {
268 |         if ($this->searchIfInRv(array('ch')) !== false) {
269 |             $this->word = preg_replace('#(ch)$#u', 'c', $this->word);
270 | 
271 |         } elseif ($this->searchIfInRv(array('gh')) !== false) {
272 |             $this->word = preg_replace('#(gh)$#u', 'g', $this->word);
273 |         }
274 |     }
275 | 
276 |     /**
277 |      * Finally
278 |      * turn I and U back into lower case
279 |      */
280 |     private function finish()
281 |     {
282 |         $this->word = str_replace(array('I', 'U'), array('i', 'u'), $this->word);
283 |     }
284 | }
285 | 


--------------------------------------------------------------------------------
/src/Stemmer/Norwegian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/norwegian/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Norwegian extends Stem
 14 | {
 15 |     /**
 16 |      * All norwegian vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'æ', 'å', 'ø');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         // R2 is not used: R1 is defined in the same way as in the German stemmer
 28 |         $this->r1();
 29 | 
 30 |         // then R1 is adjusted so that the region before it contains at least 3 letters.
 31 |         if ($this->r1Index < 3) {
 32 |             $this->r1Index = 3;
 33 |             $this->r1 = StringHelper::substr($this->word, 3);
 34 |         }
 35 | 
 36 |         // Do each of steps 1, 2 3 and 4.
 37 |         $this->step1();
 38 |         $this->step2();
 39 |         $this->step3();
 40 | 
 41 |         return $this->word;
 42 |     }
 43 | 
 44 |     /**
 45 |      * Define a valid s-ending as one of
 46 |      * b   c   d   f   g   h   j   l   m   n   o   p   r   t   v   y   z,
 47 |      * or k not preceded by a vowel
 48 |      *
 49 |      * @param string $ending
 50 |      * @return boolean
 51 |      */
 52 |     private function hasValidSEnding($word)
 53 |     {
 54 |         $lastLetter = StringHelper::substr($word, -1, 1);
 55 |         if (in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z'))) {
 56 |             return true;
 57 |         }
 58 |         if ($lastLetter == 'k') {
 59 |             $beforeLetter = StringHelper::substr($word, -2, 1);
 60 |             if (!in_array($beforeLetter, self::$vowels)) {
 61 |                 return true;
 62 |             }
 63 |         }
 64 |         return false;
 65 |     }
 66 | 
 67 |     /**
 68 |      * Step 1
 69 |      * Search for the longest among the following suffixes in R1, and perform the action indicated.
 70 |      */
 71 |     private function step1()
 72 |     {
 73 |         //  erte   ert
 74 |         //      replace with er
 75 |         if ( ($position = $this->searchIfInR1(array('erte', 'ert'))) !== false) {
 76 |             $this->word = preg_replace('#(erte|ert)$#u', 'er', $this->word);
 77 |             return true;
 78 |         }
 79 | 
 80 |          // a   e   ede   ande   ende   ane   ene   hetene   en   heten   ar   er   heter   as   es   edes   endes   enes   hetenes   ens   hetens   ers   ets   et   het   ast
 81 |         //      delete
 82 |         if ( ($position = $this->searchIfInR1(array(
 83 |             'hetenes', 'hetene', 'hetens', 'heten', 'endes', 'heter', 'ande', 'ende', 'enes', 'edes', 'ede', 'ane',
 84 |             'ene', 'het', 'ers', 'ets', 'ast', 'ens', 'en', 'ar', 'er', 'as', 'es', 'et', 'a', 'e'
 85 |         ))) !== false) {
 86 |             $this->word = StringHelper::substr($this->word, 0, $position);
 87 |             return true;
 88 |         }
 89 | 
 90 |         //  s
 91 |         //      delete if preceded by a valid s-ending
 92 |         if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
 93 |             $word = StringHelper::substr($this->word, 0, $position);
 94 |             if ($this->hasValidSEnding($word)) {
 95 |                 $this->word = $word;
 96 |             }
 97 |             return true;
 98 |         }
 99 |     }
100 | 
101 |     /**
102 |      * Step 2
103 |      * If the word ends dt or vt in R1, delete the t.
104 |      */
105 |     private function step2()
106 |     {
107 |         if ($this->searchIfInR1(array('dt', 'vt')) !== false) {
108 |             $this->word = StringHelper::substr($this->word, 0, -1);
109 |         }
110 |     }
111 | 
112 |     /**
113 |      * Step 3:
114 |      * Search for the longest among the following suffixes in R1, and if found, delete.
115 |      */
116 |     private function step3()
117 |     {
118 |         // leg   eleg   ig   eig   lig   elig   els   lov   elov   slov   hetslov
119 |         if ( ($position = $this->searchIfInR1(array(
120 |             'hetslov', 'eleg', 'elov', 'slov', 'elig', 'eig', 'lig', 'els', 'lov', 'leg', 'ig'
121 |         ))) !== false) {
122 |             $this->word = StringHelper::substr($this->word, 0, $position);
123 |         }
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/src/Stemmer/Portuguese.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/portuguese/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Portuguese extends Stem
 14 | {
 15 |     /**
 16 |      * All Portuguese vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'â', 'ê', 'ô');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         $this->word = str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
 28 | 
 29 |         $this->rv();
 30 |         $this->r1();
 31 |         $this->r2();
 32 | 
 33 |         $word = $this->word;
 34 |         $this->step1();
 35 | 
 36 |         if ($word == $this->word) {
 37 |             $this->step2();
 38 |         }
 39 | 
 40 |         if ($word != $this->word) {
 41 |             $this->step3();
 42 |         } else {
 43 |             $this->step4();
 44 |         }
 45 | 
 46 |         $this->step5();
 47 |         $this->finish();
 48 | 
 49 |         return $this->word;
 50 |     }
 51 | 
 52 |     /**
 53 |      * Step 1: Standard suffix removal
 54 |      */
 55 |     private function step1()
 56 |     {
 57 |         // delete if in R2
 58 |         if ( ($position = $this->search(array(
 59 |             'amentos', 'imentos', 'adoras', 'adores', 'amento', 'imento', 'adora', 'istas', 'ismos', 'antes', 'ância',
 60 |             'ezas', 'eza', 'icos', 'icas', 'ismo', 'ável', 'ível', 'ista', 'oso',
 61 |             'osos', 'osas', 'osa', 'ico', 'ica', 'ador', 'aça~o', 'aço~es' , 'ante'))) !== false) {
 62 | 
 63 |             if ($this->inR2($position)) {
 64 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 65 |             }
 66 |             return true;
 67 |         }
 68 | 
 69 |         // logía   logías
 70 |         //      replace with log if in R2
 71 |         if ( ($position = $this->search(array('logías', 'logía'))) !== false) {
 72 |             if ($this->inR2($position)) {
 73 |                 $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word);
 74 |             }
 75 |             return true;
 76 |         }
 77 | 
 78 |         // ución   uciones
 79 |         //      replace with u if in R2
 80 |         if ( ($position = $this->search(array('uciones', 'ución'))) !== false) {
 81 |             if ($this->inR2($position)) {
 82 |                 $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word);
 83 |             }
 84 |             return true;
 85 |         }
 86 | 
 87 |         // ência    ências
 88 |         //      replace with ente if in R2
 89 |         if ( ($position = $this->search(array('ências', 'ência'))) !== false) {
 90 |             if ($this->inR2($position)) {
 91 |                 $this->word = preg_replace('#(ências|ência)$#u', 'ente', $this->word);
 92 |             }
 93 |             return true;
 94 |         }
 95 | 
 96 |         // amente
 97 |         //      delete if in R1
 98 |         //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
 99 |         //      if preceded by os, ic or ad, delete if in R2
100 |         if ( ($position = $this->search(array('amente'))) !== false) {
101 | 
102 |             // delete if in R1
103 |             if ($this->inR1($position)) {
104 |                 $this->word = StringHelper::substr($this->word, 0, $position);
105 |             }
106 | 
107 |             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
108 |             if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
109 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
110 |                 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
111 |                     $this->word = StringHelper::substr($this->word, 0, $position3);
112 |                 }
113 | 
114 |                 // if preceded by os, ic or ad, delete if in R2
115 |             } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) !== false) {
116 |                 $this->word = StringHelper::substr($this->word, 0, $position4);
117 |             }
118 |             return true;
119 |         }
120 | 
121 |         // mente
122 |         //      delete if in R2
123 |         //      if preceded by ante, avel or ível, delete if in R2
124 |         if ( ($position = $this->search(array('mente'))) !== false) {
125 | 
126 |             // delete if in R2
127 |             if ($this->inR2($position)) {
128 |                 $this->word = StringHelper::substr($this->word, 0, $position);
129 |             }
130 | 
131 |             // if preceded by ante, avel or ível, delete if in R2
132 |             if ( ($position2 = $this->searchIfInR2(array('ante', 'avel', 'ível'))) != false) {
133 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
134 |             }
135 |             return true;
136 |         }
137 | 
138 |         // idade   idades
139 |         //      delete if in R2
140 |         //      if preceded by abil, ic or iv, delete if in R2
141 |         if ( ($position = $this->search(array('idades', 'idade'))) !== false) {
142 | 
143 |             // delete if in R2
144 |             if ($this->inR2($position)) {
145 |                 $this->word = StringHelper::substr($this->word, 0, $position);
146 |             }
147 | 
148 |             // if preceded by abil, ic or iv, delete if in R2
149 |             if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) !== false) {
150 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
151 |             }
152 |             return true;
153 |         }
154 | 
155 |         // iva   ivo   ivas   ivos
156 |         //      delete if in R2
157 |         //      if preceded by at, delete if in R2
158 |         if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) !== false) {
159 | 
160 |             // delete if in R2
161 |             if ($this->inR2($position)) {
162 |                 $this->word = StringHelper::substr($this->word, 0, $position);
163 |             }
164 | 
165 |             // if preceded by at, delete if in R2
166 |             if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
167 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
168 |             }
169 |             return true;
170 |         }
171 | 
172 |         // ira   iras
173 |         //      replace with ir if in RV and preceded by e
174 |         if ( ($position = $this->search(array('iras', 'ira'))) !== false) {
175 | 
176 |             if ($this->inRv($position)) {
177 |                 $before = $position -1;
178 |                 $letter = StringHelper::substr($this->word, $before, 1);
179 | 
180 |                 if ($letter == 'e') {
181 |                     $this->word = preg_replace('#(iras|ira)$#u', 'ir', $this->word);
182 |                 }
183 |             }
184 |             return true;
185 |         }
186 | 
187 |         return false;
188 |     }
189 | 
190 |     /**
191 |      * Step 2: Verb suffixes
192 |      * Search for the longest among the following suffixes in RV, and if found, delete.
193 |      */
194 |     private function step2()
195 |     {
196 |         if ( ($position = $this->searchIfInRv(array(
197 |             'aríamos', 'eríamos', 'iríamos', 'ássemos', 'êssemos', 'íssemos',
198 |             'aríeis', 'eríeis', 'iríeis', 'ásseis', 'ésseis', 'ísseis', 'áramos', 'éramos', 'íramos', 'ávamos',
199 |             'aremos', 'eremos', 'iremos',
200 |             'ariam', 'eriam', 'iriam', 'assem', 'essem', 'issem', 'arias', 'erias', 'irias', 'ardes', 'erdes', 'irdes',
201 |             'asses', 'esses', 'isses', 'astes', 'estes', 'istes', 'áreis', 'areis', 'éreis', 'ereis', 'íreis', 'ireis',
202 |             'áveis', 'íamos', 'armos', 'ermos', 'irmos',
203 |             'aria', 'eria', 'iria', 'asse', 'esse', 'isse', 'aste', 'este', 'iste', 'arei', 'erei', 'irei', 'adas', 'idas',
204 |             'aram', 'eram', 'iram', 'avam', 'arem', 'erem', 'irem', 'ando', 'endo', 'indo', 'ara~o', 'era~o', 'ira~o',
205 |             'arás', 'aras', 'erás', 'eras', 'irás', 'avas', 'ares', 'eres', 'ires', 'íeis', 'ados', 'idos', 'ámos', 'amos',
206 |             'emos', 'imos', 'iras',
207 |             'ada', 'ida', 'ará', 'ara', 'erá', 'era', 'irá', 'ava', 'iam', 'ado', 'ido', 'ias', 'ais', 'eis', 'ira',
208 |             'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou',
209 |         ))) !== false) {
210 | 
211 |             $this->word = StringHelper::substr($this->word, 0, $position);
212 |             return true;
213 |         }
214 |         return false;
215 |     }
216 | 
217 |     /**
218 |      * Step 3: d-suffixes
219 |      *
220 |      */
221 |     private function step3()
222 |     {
223 |         // Delete suffix i if in RV and preceded by c
224 |         if ($this->searchIfInRv(array('i')) !== false) {
225 |             $letter = StringHelper::substr($this->word, -2, 1);
226 | 
227 |             if ($letter == 'c') {
228 |                 $this->word = StringHelper::substr($this->word, 0, -1);
229 |             }
230 |             return true;
231 |         }
232 |         return false;
233 |     }
234 | 
235 |     /**
236 |      * Step 4
237 |      */
238 |     private function step4()
239 |     {
240 |         // If the word ends with one of the suffixes "os   a   i   o   á   í   ó" in RV, delete it
241 |         if ( ($position = $this->searchIfInRv(array('os', 'a', 'i', 'o','á', 'í', 'ó'))) !== false) {
242 |             $this->word = StringHelper::substr($this->word, 0, $position);
243 |             return true;
244 |         }
245 |         return false;
246 |     }
247 | 
248 |     /**
249 |      * Step 5
250 |      */
251 |     private function step5()
252 |     {
253 |         // If the word ends with one of "e   é   ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i).
254 |         if ($this->searchIfInRv(array('e', 'é', 'ê')) !== false) {
255 |             $this->word = StringHelper::substr($this->word, 0, -1);
256 | 
257 |             if ( ($position2 = $this->search(array('gu', 'ci'))) !== false) {
258 |                 if ($this->inRv(($position2+1))) {
259 |                     $this->word = StringHelper::substr($this->word, 0, -1);
260 |                 }
261 |             }
262 |             return true;
263 |         } else if ($this->search(array('ç')) !== false) {
264 |             $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
265 |             return true;
266 |         }
267 |         return false;
268 |     }
269 | 
270 |     /**
271 |      * Finally
272 |      */
273 |     private function finish()
274 |     {
275 |         // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
276 |         $this->word = str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
277 |     }
278 | }
279 | 


--------------------------------------------------------------------------------
/src/Stemmer/Romanian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/romanian/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Romanian extends Stem
 14 | {
 15 |     /**
 16 |      * All Romanian vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'ă', 'â', 'e', 'i', 'î', 'o', 'u');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         $this->plainVowels = implode('', self::$vowels);
 28 | 
 29 |         //  First, i and u between vowels are put into upper case (so that they are treated as consonants).
 30 |         $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
 31 |         $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word);
 32 | 
 33 |         $this->rv();
 34 |         $this->r1();
 35 |         $this->r2();
 36 | 
 37 |         $this->step0();
 38 | 
 39 |         $word1 = $this->word;
 40 |         $word2 = $this->word;
 41 | 
 42 |         do {
 43 |             $word1 = $this->word;
 44 |             $this->step1();
 45 |         } while ($this->word != $word1);
 46 | 
 47 |         $this->step2();
 48 | 
 49 |         // Do step 3 if no suffix was removed either by step 1 or step 2.
 50 |         if ($word2 == $this->word) {
 51 |             $this->step3();
 52 |         }
 53 | 
 54 |         $this->step4();
 55 |         $this->finish();
 56 | 
 57 |         return $this->word;
 58 |     }
 59 | 
 60 |     /**
 61 |      * Step 0: Removal of plurals (and other simplifications)
 62 |      * Search for the longest among the following suffixes, and, if it is in R1, perform the action indicated.
 63 |      * @return boolean
 64 |      */
 65 |     private function step0()
 66 |     {
 67 |         // ul   ului
 68 |         //      delete
 69 |         if ( ($position = $this->search(array('ul', 'ului'))) !== false) {
 70 |             if ($this->inR1($position)) {
 71 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 72 |             }
 73 |             return true;
 74 |         }
 75 | 
 76 |         // aua
 77 |         //      replace with a
 78 |         if ( ($position = $this->search(array('aua'))) !== false) {
 79 |             if ($this->inR1($position)) {
 80 |                 $this->word = preg_replace('#(aua)$#u', 'a', $this->word);
 81 |             }
 82 |             return true;
 83 |         }
 84 | 
 85 |         // ea   ele   elor
 86 |         //      replace with e
 87 |         if ( ($position = $this->search(array('ea', 'ele', 'elor'))) !== false) {
 88 |             if ($this->inR1($position)) {
 89 |                 $this->word = preg_replace('#(ea|ele|elor)$#u', 'e', $this->word);
 90 |             }
 91 |             return true;
 92 |         }
 93 | 
 94 |         // ii   iua   iei   iile   iilor   ilor
 95 |         //      replace with i
 96 |         if ( ($position = $this->search(array('ii', 'iua', 'iei', 'iile', 'iilor', 'ilor'))) !== false) {
 97 |             if ($this->inR1($position)) {
 98 |                 $this->word = preg_replace('#(ii|iua|iei|iile|iilor|ilor)$#u', 'i', $this->word);
 99 |             }
100 |             return true;
101 |         }
102 | 
103 |         // ile
104 |         //      replace with i if not preceded by ab
105 |         if ( ($position = $this->search(array('ile'))) !== false) {
106 |             if ($this->inR1($position)) {
107 |                 $before = StringHelper::substr($this->word, ($position-2), 2);
108 | 
109 |                 if ($before != 'ab') {
110 |                     $this->word = preg_replace('#(ile)$#u', 'i', $this->word);
111 |                 }
112 |             }
113 |             return true;
114 |         }
115 | 
116 |         // atei
117 |         //      replace with at
118 |         if ( ($position = $this->search(array('atei'))) != false) {
119 |             if ($this->inR1($position)) {
120 |                 $this->word = preg_replace('#(atei)$#u', 'at', $this->word);
121 |             }
122 |             return true;
123 |         }
124 | 
125 |         // aţie   aţia
126 |         //      replace with aţi
127 |         if ( ($position = $this->search(array('aţie', 'aţia'))) !== false) {
128 |             if ($this->inR1($position)) {
129 |                 $this->word = preg_replace('#(aţie|aţia)$#u', 'aţi', $this->word);
130 |             }
131 |             return true;
132 |         }
133 | 
134 |         return false;
135 |     }
136 | 
137 |     /**
138 |      * Step 1: Reduction of combining suffixes
139 |      * Search for the longest among the following suffixes, and, if it is in R1, preform the replacement action indicated.
140 |      * Then repeat this step until no replacement occurs.
141 |      * @return boolean
142 |      */
143 |     private function step1()
144 |     {
145 |         // abilitate   abilitati   abilităi   abilităţi
146 |         //      replace with abil
147 |         if ( ($position = $this->search(array('abilitate', 'abilitati', 'abilităi', 'abilităţi'))) !== false) {
148 |             if ($this->inR1($position)) {
149 |                 $this->word = preg_replace('#(abilitate|abilitati|abilităi|abilităţi)$#u', 'abil', $this->word);
150 |             }
151 |             return true;
152 |         }
153 | 
154 |         // ibilitate
155 |         //      replace with ibil
156 |         if ( ($position = $this->search(array('ibilitate'))) !== false) {
157 |             if ($this->inR1($position)) {
158 |                 $this->word = preg_replace('#(ibilitate)$#u', 'ibil', $this->word);
159 |             }
160 |             return true;
161 |         }
162 | 
163 |         // ivitate   ivitati   ivităi   ivităţi
164 |         //      replace with iv
165 |         if ( ($position = $this->search(array('ivitate', 'ivitati', 'ivităi', 'ivităţi'))) !== false) {
166 |             if ($this->inR1($position)) {
167 |                 $this->word = preg_replace('#(ivitate|ivitati|ivităi|ivităţi)$#u', 'iv', $this->word);
168 |             }
169 |             return true;
170 |         }
171 | 
172 |         // icitate   icitati   icităi   icităţi   icator   icatori   iciv   iciva   icive   icivi   icivă   ical   icala   icale   icali   icală
173 |         //      replace with ic
174 |         if ( ($position = $this->search(array(
175 |             'icitate', 'icitati', 'icităi', 'icităţi', 'icatori', 'icator', 'iciva',
176 |             'icive', 'icivi', 'icivă', 'icala', 'icale', 'icali', 'icală', 'iciv', 'ical'))) !== false) {
177 |             if ($this->inR1($position)) {
178 |                 $this->word = preg_replace('#(icitate|icitati|icităi|icităţi|cator|icatori|iciva|icive|icivi|icivă|icala|icale|icali|icală|ical|iciv)$#u', 'ic', $this->word);
179 |             }
180 |             return true;
181 |         }
182 | 
183 |         // ativ   ativa   ative   ativi   ativă   aţiune   atoare   ator   atori   ătoare   ător   ători
184 |         //      replace with at
185 |         if ( ($position = $this->search(array('ativa', 'ative', 'ativi', 'ativă', 'ativ', 'aţiune', 'atoare', 'atori', 'ătoare', 'ători', 'ător', 'ator'))) !== false) {
186 |             if ($this->inR1($position)) {
187 |                 $this->word = preg_replace('#(ativa|ative|ativi|ativă|ativ|aţiune|atoare|atori|ătoare|ători|ător|ator)$#u', 'at', $this->word);
188 |             }
189 |             return true;
190 |         }
191 | 
192 |         // itiv   itiva   itive   itivi   itivă   iţiune   itoare   itor   itori
193 |         //      replace with it
194 |         if ( ($position = $this->search(array('itiva', 'itive', 'itivi', 'itivă', 'itiv', 'iţiune', 'itoare', 'itori', 'itor'))) !== false) {
195 |             if ($this->inR1($position)) {
196 |                 $this->word = preg_replace('#(itiva|itive|itivi|itivă|itiv|iţiune|itoare|itori|itor)$#u', 'it', $this->word);
197 |             }
198 |             return true;
199 |         }
200 | 
201 |         return false;
202 |     }
203 | 
204 |     /**
205 |      * Step 2: Removal of 'standard' suffixes
206 |      * Search for the longest among the following suffixes, and, if it is in R2, perform the action indicated.
207 |      * @return boolean
208 |      */
209 |     private function step2()
210 |     {
211 |         // atori   itate   itati, ităţi, abila   abile   abili   abilă, ibila   ibile   ibili   ibilă
212 |         // anta, ante, anti, antă, ator, ibil, oasa   oasă   oase, ităi, abil
213 |         // osi   oşi   ant   ici   ică iva   ive   ivi   ivă ata   ată   ati   ate, ata   ată   ati   ate uta   ută   uti   ute, ita   ită   iti   ite  ica   ice
214 |         // at, os, iv, ut, it, ic
215 |         //      delete
216 |         if ( ($position = $this->search(array(
217 |             'atori', 'itate', 'itati', 'ităţi', 'abila', 'abile', 'abili', 'abilă', 'ibila', 'ibile', 'ibili', 'ibilă',
218 |             'anta', 'ante', 'anti', 'antă', 'ator', 'ibil', 'oasa', 'oasă', 'oase', 'ităi', 'abil',
219 |             'osi', 'oşi', 'ant', 'ici', 'ică', 'iva', 'ive', 'ivi', 'ivă', 'ata', 'ată', 'ati', 'ate', 'ata', 'ată',
220 |             'ati', 'ate', 'uta', 'ută', 'uti', 'ute', 'ita', 'ită', 'iti', 'ite', 'ica', 'ice',
221 |             'at', 'os', 'iv', 'ut', 'it', 'ic'
222 |         ))) !== false) {
223 |             if ($this->inR2($position)) {
224 |                 $this->word = StringHelper::substr($this->word, 0, $position);
225 |             }
226 |             return true;
227 |         }
228 | 
229 |         // iune   iuni
230 |         //      delete if preceded by ţ, and replace the ţ by t.
231 |         if ( ($position = $this->search(array('iune', 'iuni'))) !== false) {
232 |             if ($this->inR2($position)) {
233 |                 $before = $position - 1;
234 |                 $letter = StringHelper::substr($this->word, $before, 1);
235 |                 if ($letter == 'ţ') {
236 |                     $this->word = StringHelper::substr($this->word, 0, $position);
237 |                     $this->word = preg_replace('#(ţ)$#u', 't', $this->word);
238 |                 }
239 |             }
240 |             return true;
241 |         }
242 | 
243 |         // ism   isme   ist   ista   iste   isti   istă   işti
244 |         //      replace with ist
245 |         if ( ($position = $this->search(array('isme', 'ism', 'ista', 'iste', 'isti', 'istă', 'işti', 'ist'))) !== false) {
246 |             if ($this->inR2($position)) {
247 |                 $this->word = preg_replace('#(isme|ism|ista|iste|isti|istă|işti|ist)$#u', 'ist', $this->word);
248 |             }
249 |             return true;
250 |         }
251 | 
252 |         return false;
253 |     }
254 | 
255 |     /**
256 |      * Step 3: Removal of verb suffixes
257 |      * Do step 3 if no suffix was removed either by step 1 or step 2.
258 |      * @return boolean
259 |      */
260 |     private function step3()
261 |     {
262 |         // are   ere   ire   âre   ind   ând   indu   ându   eze   ească   ez   ezi   ează   esc   eşti
263 |         // eşte   ăsc   ăşti   ăşte   am   ai   au   eam   eai   ea   eaţi   eau   iam   iai   ia   iaţi
264 |         // iau   ui   aşi   arăm   arăţi   ară   uşi   urăm   urăţi   ură   işi   irăm   irăţi   iră   âi
265 |         // âşi   ârăm   ârăţi   âră   asem   aseşi   ase   aserăm   aserăţi   aseră   isem   iseşi   ise
266 |         // iserăm   iserăţi   iseră   âsem   âseşi   âse   âserăm   âserăţi   âseră   usem   useşi   use   userăm   userăţi   useră
267 |         //      delete if preceded in RV by a consonant or u
268 |         if ( ($position = $this->searchIfInRv(array(
269 |             'userăţi', 'iserăţi', 'âserăţi', 'aserăţi',
270 |             'userăm', 'iserăm', 'âserăm', 'aserăm',
271 |             'iseră', 'âseşi', 'useră', 'âseră', 'useşi', 'iseşi', 'aseră', 'aseşi', 'ârăţi', 'irăţi', 'urăţi', 'arăţi', 'ească',
272 |             'usem', 'âsem', 'isem', 'asem', 'ârăm', 'urăm', 'irăm', 'arăm', 'iaţi', 'eaţi', 'ăşte', 'ăşti', 'eşte', 'eşti', 'ează', 'ându', 'indu',
273 |             'âse', 'use', 'ise', 'ase', 'âră', 'iră', 'işi', 'ură', 'uşi', 'ară', 'aşi', 'âşi', 'iau', 'iai', 'iam', 'eau', 'eai', 'eam', 'ăsc',
274 |             'are', 'ere', 'ire', 'âre', 'ind', 'ând', 'eze', 'ezi', 'esc',
275 |             'âi', 'ui', 'ia', 'ea', 'au', 'ai', 'am', 'ez'
276 |         ))) !== false) {
277 |             if ($this->inRv($position)) {
278 |                 $before = $position - 1;
279 |                 if ($this->inRv($before)) {
280 |                     $letter = StringHelper::substr($this->word, $before, 1);
281 | 
282 |                     if ( (!in_array($letter, self::$vowels)) || ($letter == 'u') ) {
283 |                         $this->word = StringHelper::substr($this->word, 0, $position);
284 |                     }
285 |                 }
286 |             }
287 |             return true;
288 |         }
289 | 
290 | 
291 | 
292 |         // ăm   aţi   em   eţi   im   iţi   âm   âţi   seşi   serăm   serăţi   seră   sei   se   sesem   seseşi   sese   seserăm   seserăţi   seseră
293 |         //      delete
294 |         if ( ($position = $this->searchIfInRv(array(
295 |             'seserăm', 'seserăţi', 'seseră', 'seseşi', 'sesem', 'serăţi', 'serăm', 'seşi', 'sese', 'seră',
296 |             'aţi', 'eţi', 'iţi', 'âţi', 'sei', 'se', 'ăm', 'âm', 'em', 'im'
297 |         ))) !== false) {
298 |             if ($this->inRv($position)) {
299 |                 $this->word = StringHelper::substr($this->word, 0, $position);
300 |             }
301 |             return true;
302 |         }
303 |     }
304 | 
305 |     /**
306 |      * Step 4: Removal of final vowel
307 |      */
308 |     private function step4()
309 |     {
310 |         // Search for the longest among the suffixes "a   e   i   ie   ă " and, if it is in RV, delete it.
311 |         if ( ($position = $this->search(array('a', 'ie', 'e', 'i', 'ă'))) !== false) {
312 |             if ($this->inRv($position)) {
313 |                 $this->word = StringHelper::substr($this->word, 0, $position);
314 |             }
315 |         }
316 | 
317 |         return true;
318 |     }
319 | 
320 |     /**
321 |      * Finally
322 |      * Turn I, U back into i, u
323 |      */
324 |     private function finish()
325 |     {
326 |         // Turn I, U back into i, u
327 |         $this->word = str_replace(array('I', 'U'), array('i', 'u'), $this->word);
328 |     }
329 | }
330 | 


--------------------------------------------------------------------------------
/src/Stemmer/Russian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/russian/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Russian extends Stem
 14 | {
 15 |     /**
 16 |      * All russian vowels
 17 |      */
 18 |     protected static $vowels = array('а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я');
 19 | 
 20 |     protected static $perfectiveGerund = array(
 21 |         array('вшись', 'вши', 'в'),
 22 |         array('ывшись', 'ившись', 'ывши', 'ивши', 'ив', 'ыв')
 23 |     );
 24 | 
 25 |     protected static $adjective = array(
 26 |         'ыми', 'ими', 'ему', 'ому', 'его', 'ого', 'ее', 'ие', 'ые', 'ое', 'ей', 'ий',
 27 |         'ый', 'ой', 'ем', 'им', 'ым','ом','их', 'ых', 'ую', 'юю', 'ая', 'яя', 'ою', 'ею'
 28 |     );
 29 | 
 30 |     protected static $participle = array(
 31 |         array('ем', 'нн', 'вш', 'ющ', 'щ'),
 32 |         array('ивш', 'ывш', 'ующ')
 33 |     );
 34 | 
 35 |     protected static $reflexive = array('ся', 'сь');
 36 | 
 37 |     protected static $verb = array(
 38 |         array('ешь', 'нно', 'ете', 'йте', 'ла', 'на', 'ли', 'й', 'л', 'ем', 'н', 'ло', 'но', 'ет', 'ют', 'ны', 'ть'),
 39 |         array(
 40 |             'уйте', 'ило', 'ыло', 'ено','ила', 'ыла', 'ена', 'ейте', 'ены', 'ить', 'ыть', 'ишь', 'ите', 'или', 'ыли',
 41 |             'ует', 'уют', 'ей', 'уй', 'ил', 'ыл', 'им', 'ым', 'ен', 'ят', 'ит', 'ыт', 'ую', 'ю'
 42 |         )
 43 |     );
 44 | 
 45 |     protected static $noun = array(
 46 |         'иями', 'ями', 'ами', 'ией', 'иям', 'ием', 'иях', 'ев', 'ов', 'ие', 'ье', 'еи', 'ии', 'ей', 'ой', 'ий', 'ям',
 47 |         'ем', 'ам', 'ом', 'ах', 'ях', 'ию', 'ью', 'ия', 'ья', 'я', 'а', 'е', 'ы', 'ь', 'и', 'о', 'у', 'й', 'ю'
 48 |     );
 49 | 
 50 |     protected static $superlative = array('ейше', 'ейш');
 51 | 
 52 |     protected static $derivational = array('ость', 'ост');
 53 | 
 54 |     /**
 55 |      * {@inheritdoc}
 56 |      */
 57 |     public function stem($word)
 58 |     {
 59 |         $this->word = StringHelper::strtolower($word);
 60 | 
 61 |         // R2 is not used: R1 is defined in the same way as in the German stemmer
 62 |         $this->r1();
 63 |         $this->r2();
 64 |         $this->rv();
 65 | 
 66 |         // Do each of steps 1, 2 3 and 4.
 67 |         $this->step1();
 68 |         $this->step2();
 69 |         $this->step3();
 70 |         $this->step4();
 71 | 
 72 |         return $this->word;
 73 |     }
 74 | 
 75 |     /**
 76 |      * Step 1: Search for a PERFECTIVE GERUND ending. If one is found remove it, and that is then the end of step 1.
 77 |      * Otherwise try and remove a REFLEXIVE ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending.
 78 |      * As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
 79 |      */
 80 |     private function step1()
 81 |     {
 82 |         // Search for a PERFECTIVE GERUND ending.
 83 |         // group 1
 84 |         if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[0])) !== false) {
 85 |             if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
 86 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 87 |                 return true;
 88 |             }
 89 |         }
 90 | 
 91 |         // group 2
 92 |         if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[1])) !== false) {
 93 |             if ($this->inRv($position)) {
 94 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 95 |                 return true;
 96 |             }
 97 |         }
 98 | 
 99 |         // Otherwise try and remove a REFLEXIVE ending
100 |         if ( ($position = $this->searchIfInRv(self::$reflexive)) !== false) {
101 |             if ($this->inRv($position)) {
102 |                 $this->word = StringHelper::substr($this->word, 0, $position);
103 |             }
104 |         }
105 | 
106 |         // then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending.
107 |         // As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
108 |         if ( ($position = $this->searchIfInRv(self::$adjective)) !== false) {
109 |             if ($this->inRv($position)) {
110 |                 $this->word = StringHelper::substr($this->word, 0, $position);
111 | 
112 |                 if ( ($position2 = $this->search(self::$participle[0])) !== false) {
113 |                     if ( ($this->inRv($position2)) && ($this->checkGroup1($position2)) ) {
114 |                         $this->word = StringHelper::substr($this->word, 0, $position2);
115 |                         return true;
116 |                     }
117 |                 }
118 | 
119 |                 if ( ($position2 = $this->search(self::$participle[1])) !== false) {
120 |                     if ($this->inRv($position2)) {
121 |                         $this->word = StringHelper::substr($this->word, 0, $position2);
122 |                         return true;
123 |                     }
124 |                 }
125 | 
126 |                 return true;
127 |             }
128 |         }
129 | 
130 |         if ( ($position = $this->searchIfInRv(self::$verb[0])) !== false) {
131 |             if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
132 |                 $this->word = StringHelper::substr($this->word, 0, $position);
133 |                 return true;
134 |             }
135 |         }
136 | 
137 |         if ( ($position = $this->searchIfInRv(self::$verb[1])) !== false) {
138 |             if ($this->inRv($position)) {
139 |                 $this->word = StringHelper::substr($this->word, 0, $position);
140 |                 return true;
141 |             }
142 |         }
143 | 
144 |         if ( ($position = $this->searchIfInRv(self::$noun)) !== false) {
145 |             if ($this->inRv($position)) {
146 |                 $this->word = StringHelper::substr($this->word, 0, $position);
147 |                 return true;
148 |             }
149 |         }
150 | 
151 |         return false;
152 |     }
153 | 
154 |     /**
155 |      * Step 2: If the word ends with и (i), remove it.
156 |      */
157 |     private function step2()
158 |     {
159 |         if ( ($position = $this->searchIfInRv(array('и'))) !== false) {
160 |             if ($this->inRv($position)) {
161 |                 $this->word = StringHelper::substr($this->word, 0, $position);
162 |                 return true;
163 |             }
164 |         }
165 |         return false;
166 |     }
167 | 
168 |     /**
169 |      * Step 3: Search for a DERIVATIONAL ending in R2 (i.e. the entire ending must lie in R2),
170 |      * and if one is found, remove it.
171 |      */
172 |     private function step3()
173 |     {
174 |         if ( ($position = $this->searchIfInRv(self::$derivational)) !== false) {
175 |             if ($this->inR2($position)) {
176 |                 $this->word = StringHelper::substr($this->word, 0, $position);
177 |                 return true;
178 |             }
179 |         }
180 |     }
181 | 
182 |     /**
183 |      *  Step 4: (1) Undouble н (n), or, (2) if the word ends with a SUPERLATIVE ending, remove it
184 |      *  and undouble н (n), or (3) if the word ends ь (') (soft sign) remove it.
185 |      */
186 |     private function step4()
187 |     {
188 |         // (2) if the word ends with a SUPERLATIVE ending, remove it
189 |         if ( ($position = $this->searchIfInRv(self::$superlative)) !== false) {
190 |             $this->word = StringHelper::substr($this->word, 0, $position);
191 |         }
192 | 
193 |         // (1) Undouble н (n)
194 |         if ( ($position = $this->searchIfInRv(array('нн'))) !== false) {
195 |             $this->word = StringHelper::substr($this->word, 0, ($position+1));
196 |             return true;
197 |         }
198 | 
199 |         // (3) if the word ends ь (') (soft sign) remove it
200 |         if ( ($position = $this->searchIfInRv(array('ь'))) !== false) {
201 |             $this->word = StringHelper::substr($this->word, 0, $position);
202 |             return true;
203 |         }
204 |     }
205 | 
206 |     /**
207 |      *  In any word, RV is the region after the first vowel, or the end of the word if it contains no vowel.
208 |      */
209 |     protected function rv()
210 |     {
211 |         $length = StringHelper::strlen($this->word);
212 | 
213 |         $this->rv = '';
214 |         $this->rvIndex = $length;
215 | 
216 |         for ($i=0; $i<$length; $i++) {
217 |             $letter = StringHelper::substr($this->word, $i, 1);
218 |             if (in_array($letter, self::$vowels)) {
219 |                 $this->rv = StringHelper::substr($this->word, ($i+1));
220 |                 $this->rvIndex = $i + 1;
221 |                 return true;
222 |             }
223 |         }
224 | 
225 |         return false;
226 |     }
227 | 
228 |     /**
229 |      * group 1 endings must follow а (a) or я (ia)
230 |      *
231 |      * @param integer $position
232 |      * @return boolean
233 |      */
234 |     private function checkGroup1($position)
235 |     {
236 |         if (! $this->inRv(($position-1))) {
237 |             return false;
238 |         }
239 | 
240 |         $letter = StringHelper::substr($this->word, ($position - 1), 1);
241 | 
242 |         if ($letter == 'а' || $letter == 'я') {
243 |             return true;
244 |         }
245 |         return false;
246 |     }
247 | }
248 | 


--------------------------------------------------------------------------------
/src/Stemmer/Spanish.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | use Wamania\Snowball\Transliterate;
  7 | 
  8 | /**
  9 |  *
 10 |  * @link http://snowball.tartarus.org/algorithms/spanish/stemmer.html
 11 |  * @author wamania
 12 |  *
 13 |  */
 14 | class Spanish extends Stem
 15 | {
 16 |     /**
 17 |      * All spanish vowels
 18 |      */
 19 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'ü');
 20 | 
 21 |     /**
 22 |      * {@inheritdoc}
 23 |      */
 24 |     public function stem($word)
 25 |     {
 26 |         $this->word = StringHelper::strtolower($word);
 27 | 
 28 |         $this->rv();
 29 |         $this->r1();
 30 |         $this->r2();
 31 | 
 32 |         $this->step0();
 33 | 
 34 |         $word = $this->word;
 35 |         $this->step1();
 36 | 
 37 |         // Do step 2a if no ending was removed by step 1.
 38 |         if ($this->word == $word) {
 39 |             $this->step2a();
 40 | 
 41 |             // Do Step 2b if step 2a was done, but failed to remove a suffix.
 42 |             if ($this->word == $word) {
 43 |                 $this->step2b();
 44 |             }
 45 |         }
 46 | 
 47 |         $this->step3();
 48 |         $this->finish();
 49 | 
 50 |         return $this->word;
 51 |     }
 52 | 
 53 |     /**
 54 |      * Step 0: Attached pronoun
 55 |      *
 56 |      * Search for the longest among the following suffixes
 57 |      *      me   se   sela   selo   selas   selos   la   le   lo   las   les   los   nos
 58 |      *
 59 |      * and delete it, if comes after one of
 60 |      *      (a) iéndo   ándo   ár   ér   ír
 61 |      *      (b) ando   iendo   ar   er   ir
 62 |      *      (c) yendo following u
 63 |      *
 64 |      *  in RV. In the case of (c), yendo must lie in RV, but the preceding u can be outside it.
 65 |      *  In the case of (a), deletion is followed by removing the acute accent (for example, haciéndola -> haciendo).
 66 |      */
 67 |     private function step0()
 68 |     {
 69 |         if ( ($position = $this->searchIfInRv(array('selas', 'selos', 'las', 'los', 'les', 'nos', 'selo', 'sela', 'me', 'se', 'la', 'le', 'lo' ))) != false) {
 70 |             $suffixe = StringHelper::substr($this->word, $position);
 71 | 
 72 |             // a
 73 |             $a = array('iéndo', 'ándo', 'ár', 'ér', 'ír');
 74 |             $a = array_map(function($item) use ($suffixe) {
 75 |                 return $item . $suffixe;
 76 |             }, $a);
 77 | 
 78 |             if ( ($position2 = $this->searchIfInRv($a)) !== false) {
 79 |                 $suffixe2 = StringHelper::substr($this->word, $position2);
 80 |                 $suffixe2 = Transliterate::utf8_latin_to_ascii($suffixe2); // unaccent
 81 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
 82 |                 $this->word .= $suffixe2;
 83 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 84 |                 return true;
 85 |             }
 86 | 
 87 |             // b
 88 |             $b = array('iendo', 'ando', 'ar', 'er', 'ir');
 89 |             $b = array_map(function($item) use ($suffixe) {
 90 |                 return $item . $suffixe;
 91 |             }, $b);
 92 | 
 93 |             if ( ($position2 = $this->searchIfInRv($b)) !== false) {
 94 |                 $this->word = StringHelper::substr($this->word, 0, $position);
 95 |                 return true;
 96 |             }
 97 | 
 98 |             // c
 99 |             if ( ($position2 = $this->searchIfInRv(array('yendo' . $suffixe))) != false) {
100 |                 $before = StringHelper::substr($this->word, ($position2-1), 1);
101 |                 if ( (isset($before)) && ($before == 'u') ) {
102 |                     $this->word = StringHelper::substr($this->word, 0, $position);
103 |                     return true;
104 |                 }
105 |             }
106 |         }
107 | 
108 |         return false;
109 |     }
110 | 
111 |     /**
112 |      * Step 1
113 |      */
114 |     private function step1()
115 |     {
116 |         // anza   anzas   ico   ica   icos   icas   ismo   ismos   able   ables   ible   ibles   ista
117 |         // istas   oso   osa   osos   osas   amiento   amientos   imiento   imientos
118 |         //      delete if in R2
119 |         if ( ($position = $this->search(array(
120 |             'imientos', 'imiento', 'amientos', 'amiento', 'osas', 'osos', 'osa', 'oso', 'istas', 'ista', 'ibles',
121 |             'ible', 'ables', 'able', 'ismos', 'ismo', 'icas', 'icos', 'ica', 'ico', 'anzas', 'anza'))) != false) {
122 | 
123 |             if ($this->inR2($position)) {
124 |                 $this->word = StringHelper::substr($this->word, 0, $position);
125 |             }
126 |             return true;
127 |         }
128 | 
129 |         // adora   ador   ación   adoras   adores   aciones   ante   antes   ancia   ancias
130 |         //      delete if in R2
131 |         //      if preceded by ic, delete if in R2
132 |         if ( ($position = $this->search(array(
133 |             'adoras', 'adora', 'aciones', 'ación', 'adores', 'ador', 'antes', 'ante', 'ancias', 'ancia'))) != false) {
134 | 
135 |             if ($this->inR2($position)) {
136 |                 $this->word = StringHelper::substr($this->word, 0, $position);
137 |             }
138 | 
139 |             if ( ($position2 = $this->searchIfInR2(array('ic')))) {
140 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
141 |             }
142 |             return true;
143 |         }
144 | 
145 |         // logía   logías
146 |         //      replace with log if in R2
147 |         if ( ($position = $this->search(array('logías', 'logía'))) != false) {
148 |             if ($this->inR2($position)) {
149 |                 $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word);
150 |             }
151 |             return true;
152 |         }
153 | 
154 |         // ución   uciones
155 |         //      replace with u if in R2
156 |         if ( ($position = $this->search(array('uciones', 'ución'))) != false) {
157 |             if ($this->inR2($position)) {
158 |                 $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word);
159 |             }
160 |             return true;
161 |         }
162 | 
163 |         // encia   encias
164 |         //      replace with ente if in R2
165 |         if ( ($position = $this->search(array('encias', 'encia'))) != false) {
166 |             if ($this->inR2($position)) {
167 |                 $this->word = preg_replace('#(encias|encia)$#u', 'ente', $this->word);
168 |             }
169 |             return true;
170 |         }
171 | 
172 |         // amente
173 |         //      delete if in R1
174 |         //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
175 |         //      if preceded by os, ic or ad, delete if in R2
176 |         if ( ($position = $this->search(array('amente'))) != false) {
177 | 
178 |             // delete if in R1
179 |             if ($this->inR1($position)) {
180 |                 $this->word = StringHelper::substr($this->word, 0, $position);
181 |             }
182 | 
183 |             // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
184 |             if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
185 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
186 |                 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
187 |                     $this->word = StringHelper::substr($this->word, 0, $position3);
188 |                 }
189 | 
190 |             // if preceded by os, ic or ad, delete if in R2
191 |             } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) != false) {
192 |                 $this->word = StringHelper::substr($this->word, 0, $position4);
193 |             }
194 |             return true;
195 |         }
196 | 
197 |         // mente
198 |         //      delete if in R2
199 |         //      if preceded by ante, able or ible, delete if in R2
200 |         if ( ($position = $this->search(array('mente'))) != false) {
201 | 
202 |             // delete if in R2
203 |             if ($this->inR2($position)) {
204 |                 $this->word = StringHelper::substr($this->word, 0, $position);
205 |             }
206 | 
207 |             // if preceded by ante, able or ible, delete if in R2
208 |             if ( ($position2 = $this->searchIfInR2(array('ante', 'able', 'ible'))) != false) {
209 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
210 |             }
211 |             return true;
212 |         }
213 | 
214 |         // idad   idades
215 |         //      delete if in R2
216 |         //      if preceded by abil, ic or iv, delete if in R2
217 |         if ( ($position = $this->search(array('idades', 'idad'))) != false) {
218 | 
219 |             // delete if in R2
220 |             if ($this->inR2($position)) {
221 |                 $this->word = StringHelper::substr($this->word, 0, $position);
222 |             }
223 | 
224 |             // if preceded by abil, ic or iv, delete if in R2
225 |             if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
226 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
227 |             }
228 |             return true;
229 |         }
230 | 
231 |         // iva   ivo   ivas   ivos
232 |         //      delete if in R2
233 |         //      if preceded by at, delete if in R2
234 |         if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) != false) {
235 | 
236 |             // delete if in R2
237 |             if ($this->inR2($position)) {
238 |                 $this->word = StringHelper::substr($this->word, 0, $position);
239 |             }
240 | 
241 |             // if preceded by at, delete if in R2
242 |             if ( ($position2 = $this->searchIfInR2(array('at'))) != false) {
243 |                 $this->word = StringHelper::substr($this->word, 0, $position2);
244 |             }
245 |             return true;
246 |         }
247 | 
248 |         return false;
249 |     }
250 | 
251 |     /**
252 |      * Step 2a: Verb suffixes beginning y
253 |      */
254 |     private function step2a()
255 |     {
256 |         // if found, delete if preceded by u
257 |         // (Note that the preceding u need not be in RV.)
258 |         if ( ($position = $this->searchIfInRv(array(
259 |             'yamos', 'yendo', 'yeron', 'yan', 'yen', 'yais', 'yas', 'yes', 'yo', 'yó', 'ya', 'ye'))) != false) {
260 | 
261 |             $before = StringHelper::substr($this->word, ($position-1), 1);
262 |             if ( (isset($before)) && ($before == 'u') ) {
263 |                 $this->word = StringHelper::substr($this->word, 0, $position);
264 |                 return true;
265 |             }
266 |         }
267 | 
268 |         return false;
269 |     }
270 | 
271 |     /**
272 |      * Step 2b: Other verb suffixes
273 |      *      Search for the longest among the following suffixes in RV, and perform the action indicated.
274 |      */
275 |     private function step2b()
276 |     {
277 |         //      delete
278 |         if ( ($position = $this->searchIfInRv(array(
279 |             'iésemos', 'iéramos', 'ábamos', 'iríamos', 'eríamos', 'aríamos', 'áramos', 'ásemos', 'eríais',
280 |             'aremos', 'eremos', 'iremos', 'asteis', 'ieseis', 'ierais', 'isteis', 'aríais',
281 |             'irían', 'aréis', 'erían', 'erías', 'eréis', 'iréis', 'irías', 'ieran', 'iesen', 'ieron', 'iendo', 'ieras',
282 |             'iríais', 'arían', 'arías',
283 |             'amos', 'imos', 'ados', 'idos', 'irán', 'irás', 'erán', 'erás', 'ería', 'iría', 'íais', 'arán', 'arás', 'aría',
284 |             'iera', 'iese', 'aste', 'iste', 'aban', 'aran', 'asen', 'aron', 'ando', 'abas', 'adas', 'idas', 'ases', 'aras',
285 |             'aré', 'erá', 'eré', 'áis', 'ías', 'irá', 'iré', 'aba', 'ían', 'ada', 'ara', 'ase', 'ida', 'ado', 'ido', 'ará',
286 |             'ad', 'ed', 'id', 'ís', 'ió', 'ar', 'er', 'ir', 'as', 'ía', 'an'
287 |         ))) != false) {
288 |             $this->word = StringHelper::substr($this->word, 0, $position);
289 |             return true;
290 |         }
291 | 
292 |         // en   es   éis   emos
293 |         //      delete, and if preceded by gu delete the u (the gu need not be in RV)
294 |         if ( ($position = $this->searchIfInRv(array('éis', 'emos', 'en', 'es'))) != false) {
295 |             $this->word = StringHelper::substr($this->word, 0, $position);
296 | 
297 |             if ( ($position2 = $this->search(array('gu'))) != false) {
298 |                 $this->word = StringHelper::substr($this->word, 0, ($position2+1));
299 |             }
300 | 
301 | 
302 |             return true;
303 |         }
304 |     }
305 | 
306 |     /**
307 |      * Step 3: residual suffix
308 |      * Search for the longest among the following suffixes in RV, and perform the action indicated.
309 |      */
310 |     private function step3()
311 |     {
312 |         // os   a   o   á   í   ó
313 |         //      delete if in RV
314 |         if ( ($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) {
315 |             $this->word = StringHelper::substr($this->word, 0, $position);
316 |             return true;
317 |         }
318 | 
319 |         // e   é
320 |         //      delete if in RV, and if preceded by gu with the u in RV delete the u
321 |         if ( ($position = $this->searchIfInRv(array('e', 'é'))) != false) {
322 |             $this->word = StringHelper::substr($this->word, 0, $position);
323 | 
324 |             if ( ($position2 = $this->searchIfInRv(array('u'))) != false) {
325 |                 $before = StringHelper::substr($this->word, ($position2-1), 1);
326 |                 if ( (isset($before)) && ($before == 'g') ) {
327 |                     $this->word = StringHelper::substr($this->word, 0, $position2);
328 |                     return true;
329 |                 }
330 |             }
331 |         }
332 | 
333 |         return false;
334 |     }
335 | 
336 |     /**
337 |      * And finally:
338 |      * Remove acute accents
339 |      */
340 |     private function finish()
341 |     {
342 |         $this->word = str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
343 |     }
344 | }
345 | 


--------------------------------------------------------------------------------
/src/Stemmer/Stem.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | abstract class Stem implements Stemmer
  8 | {
  9 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y');
 10 | 
 11 |     /**
 12 |      * helper, contains stringified list of vowels
 13 |      * @var string
 14 |      */
 15 |     protected $plainVowels;
 16 | 
 17 |     /**
 18 |      * The word we are stemming
 19 |      * @var string
 20 |      */
 21 |     protected $word;
 22 | 
 23 |     /**
 24 |      * The original word, use to check if word has been modified
 25 |      * @var string
 26 |      */
 27 |     protected $originalWord;
 28 | 
 29 |     /**
 30 |      * RV value
 31 |      * @var string
 32 |      */
 33 |     protected $rv;
 34 | 
 35 |     /**
 36 |      * RV index (based on the beginning of the word)
 37 |      * @var integer
 38 |      */
 39 |     protected $rvIndex;
 40 | 
 41 |     /**
 42 |      * R1 value
 43 |      * @var integer
 44 |      */
 45 |     protected $r1;
 46 | 
 47 |     /**
 48 |      * R1 index (based on the beginning of the word)
 49 |      * @var int
 50 |      */
 51 |     protected $r1Index;
 52 | 
 53 |     /**
 54 |      * R2 value
 55 |      * @var integer
 56 |      */
 57 |     protected $r2;
 58 | 
 59 |     /**
 60 |      * R2 index (based on the beginning of the word)
 61 |      * @var int
 62 |      */
 63 |     protected $r2Index;
 64 | 
 65 |     protected function inRv($position)
 66 |     {
 67 |         return ($position >= $this->rvIndex);
 68 |     }
 69 | 
 70 |     protected function inR1($position)
 71 |     {
 72 |         return ($position >= $this->r1Index);
 73 |     }
 74 | 
 75 |     protected function inR2($position)
 76 |     {
 77 |         return ($position >= $this->r2Index);
 78 |     }
 79 | 
 80 |     protected function searchIfInRv($suffixes)
 81 |     {
 82 |         return $this->search($suffixes, $this->rvIndex);
 83 |     }
 84 | 
 85 |     protected function searchIfInR1($suffixes)
 86 |     {
 87 |         return $this->search($suffixes, $this->r1Index);
 88 |     }
 89 | 
 90 |     protected function searchIfInR2($suffixes)
 91 |     {
 92 |         return $this->search($suffixes, $this->r2Index);
 93 |     }
 94 | 
 95 |     protected function search($suffixes, $offset = 0)
 96 |     {
 97 |         $length = StringHelper::strlen($this->word);
 98 |         if ($offset > $length) {
 99 |             return false;
100 |         }
101 |         foreach ($suffixes as $suffixe) {
102 |             if ( (($position = StringHelper::strrpos($this->word, $suffixe, $offset)) !== false) && ((StringHelper::strlen($suffixe)+$position) == $length) ) {
103 |                 return $position;
104 |             }
105 |         }
106 | 
107 |         return false;
108 |     }
109 | 
110 |     /**
111 |      * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel.
112 |      */
113 |     protected function r1()
114 |     {
115 |         list($this->r1Index, $this->r1) = $this->rx($this->word);
116 |     }
117 | 
118 |     /**
119 |      * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel.
120 |      */
121 |     protected function r2()
122 |     {
123 |         list($index, $value) = $this->rx($this->r1);
124 | 
125 |         $this->r2 = $value;
126 |         $this->r2Index = $this->r1Index + $index;
127 |     }
128 | 
129 |     /**
130 |      * Common function for R1 and R2
131 |      * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel.
132 |      * R1 : $in = $this->word
133 |      * R2 : $in = R1
134 |      */
135 |     protected function rx($in)
136 |     {
137 |         $length = StringHelper::strlen($in);
138 | 
139 |         // defaults
140 |         $value = '';
141 |         $index = $length;
142 | 
143 |         // we search all vowels
144 |         $vowels = array();
145 |         for ($i=0; $i<$length; $i++) {
146 |             $letter = StringHelper::substr($in, $i, 1);
147 |             if (in_array($letter, static::$vowels)) {
148 |                 $vowels[] = $i;
149 |             }
150 |         }
151 | 
152 |         // search the non-vowel following a vowel
153 |         foreach ($vowels as $position) {
154 |             $after = $position + 1;
155 |             $letter = StringHelper::substr($in, $after, 1);
156 | 
157 |             if (! in_array($letter, static::$vowels)) {
158 |                 $index = $after + 1;
159 |                 $value = StringHelper::substr($in, ($after+1));
160 | 
161 |                 break;
162 |             }
163 |         }
164 | 
165 |         return array($index, $value);
166 |     }
167 | 
168 |     /**
169 |      * Used by spanish, italian, portuguese, etc (but not by french)
170 |      *
171 |      * If the second letter is a consonant, RV is the region after the next following vowel,
172 |      * or if the first two letters are vowels, RV is the region after the next consonant,
173 |      * and otherwise (consonant-vowel case) RV is the region after the third letter.
174 |      * But RV is the end of the word if these positions cannot be found.
175 |      */
176 |     protected function rv()
177 |     {
178 |         $length = StringHelper::strlen($this->word);
179 | 
180 |         $this->rv = '';
181 |         $this->rvIndex = $length;
182 | 
183 |         if ($length < 3) {
184 |             return true;
185 |         }
186 | 
187 |         $first = StringHelper::substr($this->word, 0, 1);
188 |         $second = StringHelper::substr($this->word, 1, 1);
189 | 
190 |         // If the second letter is a consonant, RV is the region after the next following vowel,
191 |         if (!in_array($second, static::$vowels)) {
192 |             for ($i=2; $i<$length; $i++) {
193 |                 $letter = StringHelper::substr($this->word, $i, 1);
194 |                 if (in_array($letter, static::$vowels)) {
195 |                     $this->rvIndex = $i + 1;
196 |                     $this->rv = StringHelper::substr($this->word, ($i+1));
197 |                     return true;
198 |                 }
199 |             }
200 |         }
201 | 
202 |         // or if the first two letters are vowels, RV is the region after the next consonant,
203 |         if ( (in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
204 |             for ($i=2; $i<$length; $i++) {
205 |                 $letter = StringHelper::substr($this->word, $i, 1);
206 |                 if (! in_array($letter, static::$vowels)) {
207 |                     $this->rvIndex = $i + 1;
208 |                     $this->rv = StringHelper::substr($this->word, ($i+1));
209 |                     return true;
210 |                 }
211 |             }
212 |         }
213 | 
214 |         // and otherwise (consonant-vowel case) RV is the region after the third letter.
215 |         if ( (! in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
216 |             $this->rv = StringHelper::substr($this->word, 3);
217 |             $this->rvIndex = 3;
218 |             return true;
219 |         }
220 |     }
221 | }
222 | 


--------------------------------------------------------------------------------
/src/Stemmer/Stemmer.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Stemmer;
 3 | 
 4 | /**
 5 |  * @author Luís Cobucci <lcobucci@gmail.com>
 6 |  */
 7 | interface Stemmer
 8 | {
 9 |     /**
10 |      * Main function to get the STEM of a word
11 |      *
12 |      * @param string $word A valid UTF-8 word
13 |      *
14 |      * @return string
15 |      *
16 |      * @throws \Exception
17 |      */
18 |     public function stem($word);
19 | }
20 | 


--------------------------------------------------------------------------------
/src/Stemmer/Swedish.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball\Stemmer;
  4 | 
  5 | use Joomla\String\StringHelper;
  6 | 
  7 | /**
  8 |  *
  9 |  * @link http://snowball.tartarus.org/algorithms/swedish/stemmer.html
 10 |  * @author wamania
 11 |  *
 12 |  */
 13 | class Swedish extends Stem
 14 | {
 15 |     /**
 16 |      * All swedish vowels
 17 |      */
 18 |     protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'å', 'ö');
 19 | 
 20 |     /**
 21 |      * {@inheritdoc}
 22 |      */
 23 |     public function stem($word)
 24 |     {
 25 |         $this->word = StringHelper::strtolower($word);
 26 | 
 27 |         // R2 is not used: R1 is defined in the same way as in the German stemmer
 28 |         $this->r1();
 29 | 
 30 |         // then R1 is adjusted so that the region before it contains at least 3 letters.
 31 |         if ($this->r1Index < 3) {
 32 |             $this->r1Index = 3;
 33 |             $this->r1 = StringHelper::substr($this->word, 3);
 34 |         }
 35 | 
 36 |         // Do each of steps 1, 2 3 and 4.
 37 |         $this->step1();
 38 |         $this->step2();
 39 |         $this->step3();
 40 | 
 41 |         return $this->word;
 42 |     }
 43 | 
 44 |     /**
 45 |      * Define a valid s-ending as one of
 46 |      * b   c   d   f   g   h   j   k   l   m   n   o   p   r   t   v   y
 47 |      *
 48 |      * @param string $ending
 49 |      * @return boolean
 50 |      */
 51 |     private function hasValidSEnding($word)
 52 |     {
 53 |         $lastLetter = StringHelper::substr($word, -1, 1);
 54 |         return in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y'));
 55 |     }
 56 | 
 57 |     /**
 58 |      * Step 1
 59 |      * Search for the longest among the following suffixes in R1, and perform the action indicated.
 60 |      */
 61 |     private function step1()
 62 |     {
 63 |         // a   arna   erna   heterna   orna   ad   e   ade   ande   arne   are   aste   en   anden   aren   heten
 64 |         // ern   ar   er   heter   or   as   arnas   ernas   ornas   es   ades   andes   ens   arens   hetens
 65 |         // erns   at   andet   het   ast
 66 |         //      delete
 67 |         if ( ($position = $this->searchIfInR1(array(
 68 |             'heterna', 'hetens', 'ornas', 'andes', 'arnas', 'heter', 'ernas', 'anden', 'heten', 'andet', 'arens',
 69 |             'orna', 'arna', 'erna', 'aren', 'ande', 'ades', 'arne', 'erns', 'aste', 'ade', 'ern', 'het',
 70 |             'ast', 'are', 'ens', 'or', 'es', 'ad', 'en', 'at', 'ar', 'as', 'er', 'a', 'e'
 71 |         ))) !== false) {
 72 |             $this->word = StringHelper::substr($this->word, 0, $position);
 73 |             return true;
 74 |         }
 75 | 
 76 |         //  s
 77 |         //      delete if preceded by a valid s-ending
 78 |         if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
 79 |             $word = StringHelper::substr($this->word, 0, $position);
 80 |             if ($this->hasValidSEnding($word)) {
 81 |                 $this->word = $word;
 82 |             }
 83 |         }
 84 |     }
 85 | 
 86 |     /**
 87 |      * Step 2
 88 |      * Search for one of the following suffixes in R1, and if found delete the last letter.
 89 |      */
 90 |     private function step2()
 91 |     {
 92 |         // dd   gd   nn   dt   gt   kt   tt
 93 |         if ($this->searchIfInR1(array('dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt')) !== false) {
 94 |             $this->word = StringHelper::substr($this->word, 0, -1);
 95 |         }
 96 |     }
 97 | 
 98 |     /**
 99 |      * Step 3:
100 |      * Search for the longest among the following suffixes in R1, and perform the action indicated.
101 |      */
102 |     private function step3()
103 |     {
104 |         // lig   ig   els
105 |         //      delete
106 |         if ( ($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) {
107 |             $this->word = StringHelper::substr($this->word, 0, $position);
108 |             return true;
109 |         }
110 | 
111 |         // löst
112 |         //      replace with lös
113 |         if ( ($this->searchIfInR1(array('löst'))) !== false) {
114 |             $this->word = StringHelper::substr($this->word, 0, -1);
115 |             return true;
116 |         }
117 | 
118 |         // fullt
119 |         //      replace with full
120 |         if ( ($this->searchIfInR1(array('fullt'))) !== false) {
121 |             $this->word = StringHelper::substr($this->word, 0, -1);
122 |             return true;
123 |         }
124 |     }
125 | }
126 | 


--------------------------------------------------------------------------------
/src/StemmerFactory.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Wamania\Snowball;
 4 | 
 5 | use Joomla\String\StringHelper;
 6 | use Wamania\Snowball\Stemmer\Catalan;
 7 | use Wamania\Snowball\Stemmer\Danish;
 8 | use Wamania\Snowball\Stemmer\Dutch;
 9 | use Wamania\Snowball\Stemmer\English;
10 | use Wamania\Snowball\Stemmer\Finnish;
11 | use Wamania\Snowball\Stemmer\French;
12 | use Wamania\Snowball\Stemmer\German;
13 | use Wamania\Snowball\Stemmer\Italian;
14 | use Wamania\Snowball\Stemmer\Norwegian;
15 | use Wamania\Snowball\Stemmer\Portuguese;
16 | use Wamania\Snowball\Stemmer\Romanian;
17 | use Wamania\Snowball\Stemmer\Russian;
18 | use Wamania\Snowball\Stemmer\Spanish;
19 | use Wamania\Snowball\Stemmer\Stemmer;
20 | use Wamania\Snowball\Stemmer\Swedish;
21 | 
22 | class StemmerFactory
23 | {
24 |     const LANGS = [
25 |         Catalan::class    => ['ca', 'cat', 'catalan'],
26 |         Danish::class     => ['da', 'dan', 'danish'],
27 |         Dutch::class      => ['nl', 'dut', 'nld', 'dutch'],
28 |         English::class    => ['en', 'eng', 'english'],
29 |         Finnish::class    => ['fi', 'fin', 'finnish'],
30 |         French::class     => ['fr', 'fre', 'fra', 'french'],
31 |         German::class     => ['de', 'deu', 'ger', 'german'],
32 |         Italian::class    => ['it', 'ita', 'italian'],
33 |         Norwegian::class  => ['no', 'nor', 'norwegian'],
34 |         Portuguese::class => ['pt', 'por', 'portuguese'],
35 |         Romanian::class   => ['ro', 'rum', 'ron', 'romanian'],
36 |         Russian::class    => ['ru', 'rus', 'russian'],
37 |         Spanish::class    => ['es', 'spa', 'spanish'],
38 |         Swedish::class    => ['sv', 'swe', 'swedish']
39 |     ];
40 | 
41 |     /**
42 |      * @throws NotFoundException
43 |      */
44 |     public static function create(string $code): Stemmer
45 |     {
46 |         $code = StringHelper::strtolower($code);
47 | 
48 |         foreach (self::LANGS as $classname => $isoCodes) {
49 |             if (in_array($code, $isoCodes)) {
50 |                 return new $classname;
51 |             }
52 |         }
53 | 
54 |         throw new NotFoundException(sprintf('Stemmer not found for %s', $code));
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/StemmerManager.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Wamania\Snowball;
 4 | 
 5 | class StemmerManager
 6 | {
 7 |     /** @var array */
 8 |     private $stemmers;
 9 | 
10 |     public function __construct()
11 |     {
12 |         $this->stemmers = [];
13 |     }
14 | 
15 |     /**
16 |      * @throws NotFoundException
17 |      */
18 |     public function stem(string $word, string $isoCode): string
19 |     {
20 |         if (!isset($this->stemmers[$isoCode])) {
21 |             $this->stemmers[$isoCode] = StemmerFactory::create($isoCode);
22 |         }
23 | 
24 |         return $this->stemmers[$isoCode]->stem($word);
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/src/Transliterate.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Wamania\Snowball;
  4 | 
  5 | /**
  6 |  * Class to transliterate strings
  7 |  *
  8 |  * @note   Copy of Joomlas transliterate class which is a port of phputf8's utf8_accents_to_ascii()
  9 |  */
 10 | class Transliterate
 11 | {
 12 |     /**
 13 |      * Returns strings transliterated from UTF-8 to Latin
 14 |      *
 15 |      * @param   string   $string  String to transliterate
 16 |      * @param   integer  $case    Optionally specify upper or lower case. Default to null.
 17 |      *
 18 |      * @return  string  Transliterated string
 19 |      */
 20 |     public static function utf8_latin_to_ascii($string, $case = 0)
 21 |     {
 22 |         static $UTF8_LOWER_ACCENTS = null;
 23 |         static $UTF8_UPPER_ACCENTS = null;
 24 | 
 25 |         if ($case <= 0) {
 26 |             if (\is_null($UTF8_LOWER_ACCENTS)) {
 27 |                 $UTF8_LOWER_ACCENTS = [
 28 |                     'à' => 'a',
 29 |                     'ô' => 'o',
 30 |                     'ď' => 'd',
 31 |                     'ḟ' => 'f',
 32 |                     'ë' => 'e',
 33 |                     'š' => 's',
 34 |                     'ơ' => 'o',
 35 |                     'ß' => 'ss',
 36 |                     'ă' => 'a',
 37 |                     'ř' => 'r',
 38 |                     'ț' => 't',
 39 |                     'ň' => 'n',
 40 |                     'ā' => 'a',
 41 |                     'ķ' => 'k',
 42 |                     'ŝ' => 's',
 43 |                     'ỳ' => 'y',
 44 |                     'ņ' => 'n',
 45 |                     'ĺ' => 'l',
 46 |                     'ħ' => 'h',
 47 |                     'ṗ' => 'p',
 48 |                     'ó' => 'o',
 49 |                     'ú' => 'u',
 50 |                     'ě' => 'e',
 51 |                     'é' => 'e',
 52 |                     'ç' => 'c',
 53 |                     'ẁ' => 'w',
 54 |                     'ċ' => 'c',
 55 |                     'õ' => 'o',
 56 |                     'ṡ' => 's',
 57 |                     'ø' => 'o',
 58 |                     'ģ' => 'g',
 59 |                     'ŧ' => 't',
 60 |                     'ș' => 's',
 61 |                     'ė' => 'e',
 62 |                     'ĉ' => 'c',
 63 |                     'ś' => 's',
 64 |                     'î' => 'i',
 65 |                     'ű' => 'u',
 66 |                     'ć' => 'c',
 67 |                     'ę' => 'e',
 68 |                     'ŵ' => 'w',
 69 |                     'ṫ' => 't',
 70 |                     'ū' => 'u',
 71 |                     'č' => 'c',
 72 |                     'ö' => 'oe',
 73 |                     'è' => 'e',
 74 |                     'ŷ' => 'y',
 75 |                     'ą' => 'a',
 76 |                     'ł' => 'l',
 77 |                     'ų' => 'u',
 78 |                     'ů' => 'u',
 79 |                     'ş' => 's',
 80 |                     'ğ' => 'g',
 81 |                     'ļ' => 'l',
 82 |                     'ƒ' => 'f',
 83 |                     'ž' => 'z',
 84 |                     'ẃ' => 'w',
 85 |                     'ḃ' => 'b',
 86 |                     'å' => 'a',
 87 |                     'ì' => 'i',
 88 |                     'ï' => 'i',
 89 |                     'ḋ' => 'd',
 90 |                     'ť' => 't',
 91 |                     'ŗ' => 'r',
 92 |                     'ä' => 'ae',
 93 |                     'í' => 'i',
 94 |                     'ŕ' => 'r',
 95 |                     'ê' => 'e',
 96 |                     'ü' => 'ue',
 97 |                     'ò' => 'o',
 98 |                     'ē' => 'e',
 99 |                     'ñ' => 'n',
100 |                     'ń' => 'n',
101 |                     'ĥ' => 'h',
102 |                     'ĝ' => 'g',
103 |                     'đ' => 'd',
104 |                     'ĵ' => 'j',
105 |                     'ÿ' => 'y',
106 |                     'ũ' => 'u',
107 |                     'ŭ' => 'u',
108 |                     'ư' => 'u',
109 |                     'ţ' => 't',
110 |                     'ý' => 'y',
111 |                     'ő' => 'o',
112 |                     'â' => 'a',
113 |                     'ľ' => 'l',
114 |                     'ẅ' => 'w',
115 |                     'ż' => 'z',
116 |                     'ī' => 'i',
117 |                     'ã' => 'a',
118 |                     'ġ' => 'g',
119 |                     'ṁ' => 'm',
120 |                     'ō' => 'o',
121 |                     'ĩ' => 'i',
122 |                     'ù' => 'u',
123 |                     'į' => 'i',
124 |                     'ź' => 'z',
125 |                     'á' => 'a',
126 |                     'û' => 'u',
127 |                     'þ' => 'th',
128 |                     'ð' => 'dh',
129 |                     'æ' => 'ae',
130 |                     'µ' => 'u',
131 |                     'ĕ' => 'e',
132 |                     'œ' => 'oe',
133 |                 ];
134 |             }
135 | 
136 |             $string = str_replace(array_keys($UTF8_LOWER_ACCENTS), array_values($UTF8_LOWER_ACCENTS), $string);
137 |         }
138 | 
139 |         if ($case >= 0) {
140 |             if (\is_null($UTF8_UPPER_ACCENTS)) {
141 |                 $UTF8_UPPER_ACCENTS = [
142 |                     'À' => 'A',
143 |                     'Ô' => 'O',
144 |                     'Ď' => 'D',
145 |                     'Ḟ' => 'F',
146 |                     'Ë' => 'E',
147 |                     'Š' => 'S',
148 |                     'Ơ' => 'O',
149 |                     'Ă' => 'A',
150 |                     'Ř' => 'R',
151 |                     'Ț' => 'T',
152 |                     'Ň' => 'N',
153 |                     'Ā' => 'A',
154 |                     'Ķ' => 'K',
155 |                     'Ŝ' => 'S',
156 |                     'Ỳ' => 'Y',
157 |                     'Ņ' => 'N',
158 |                     'Ĺ' => 'L',
159 |                     'Ħ' => 'H',
160 |                     'Ṗ' => 'P',
161 |                     'Ó' => 'O',
162 |                     'Ú' => 'U',
163 |                     'Ě' => 'E',
164 |                     'É' => 'E',
165 |                     'Ç' => 'C',
166 |                     'Ẁ' => 'W',
167 |                     'Ċ' => 'C',
168 |                     'Õ' => 'O',
169 |                     'Ṡ' => 'S',
170 |                     'Ø' => 'O',
171 |                     'Ģ' => 'G',
172 |                     'Ŧ' => 'T',
173 |                     'Ș' => 'S',
174 |                     'Ė' => 'E',
175 |                     'Ĉ' => 'C',
176 |                     'Ś' => 'S',
177 |                     'Î' => 'I',
178 |                     'Ű' => 'U',
179 |                     'Ć' => 'C',
180 |                     'Ę' => 'E',
181 |                     'Ŵ' => 'W',
182 |                     'Ṫ' => 'T',
183 |                     'Ū' => 'U',
184 |                     'Č' => 'C',
185 |                     'Ö' => 'Oe',
186 |                     'È' => 'E',
187 |                     'Ŷ' => 'Y',
188 |                     'Ą' => 'A',
189 |                     'Ł' => 'L',
190 |                     'Ų' => 'U',
191 |                     'Ů' => 'U',
192 |                     'Ş' => 'S',
193 |                     'Ğ' => 'G',
194 |                     'Ļ' => 'L',
195 |                     'Ƒ' => 'F',
196 |                     'Ž' => 'Z',
197 |                     'Ẃ' => 'W',
198 |                     'Ḃ' => 'B',
199 |                     'Å' => 'A',
200 |                     'Ì' => 'I',
201 |                     'Ï' => 'I',
202 |                     'Ḋ' => 'D',
203 |                     'Ť' => 'T',
204 |                     'Ŗ' => 'R',
205 |                     'Ä' => 'Ae',
206 |                     'Í' => 'I',
207 |                     'Ŕ' => 'R',
208 |                     'Ê' => 'E',
209 |                     'Ü' => 'Ue',
210 |                     'Ò' => 'O',
211 |                     'Ē' => 'E',
212 |                     'Ñ' => 'N',
213 |                     'Ń' => 'N',
214 |                     'Ĥ' => 'H',
215 |                     'Ĝ' => 'G',
216 |                     'Đ' => 'D',
217 |                     'Ĵ' => 'J',
218 |                     'Ÿ' => 'Y',
219 |                     'Ũ' => 'U',
220 |                     'Ŭ' => 'U',
221 |                     'Ư' => 'U',
222 |                     'Ţ' => 'T',
223 |                     'Ý' => 'Y',
224 |                     'Ő' => 'O',
225 |                     'Â' => 'A',
226 |                     'Ľ' => 'L',
227 |                     'Ẅ' => 'W',
228 |                     'Ż' => 'Z',
229 |                     'Ī' => 'I',
230 |                     'Ã' => 'A',
231 |                     'Ġ' => 'G',
232 |                     'Ṁ' => 'M',
233 |                     'Ō' => 'O',
234 |                     'Ĩ' => 'I',
235 |                     'Ù' => 'U',
236 |                     'Į' => 'I',
237 |                     'Ź' => 'Z',
238 |                     'Á' => 'A',
239 |                     'Û' => 'U',
240 |                     'Þ' => 'Th',
241 |                     'Ð' => 'Dh',
242 |                     'Æ' => 'Ae',
243 |                     'Ĕ' => 'E',
244 |                     'Œ' => 'Oe',
245 |                 ];
246 |             }
247 | 
248 |             $string = str_replace(array_keys($UTF8_UPPER_ACCENTS), array_values($UTF8_UPPER_ACCENTS), $string);
249 |         }
250 | 
251 |         return $string;
252 |     }
253 | }
254 | 


--------------------------------------------------------------------------------
/test/CatalanTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Catalan;
 6 | 
 7 | class CatalanTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Catalan();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileVerboseIterator('test/files/ca.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/CsvFileIterator.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | class CsvFileIterator implements \Iterator
 5 | {
 6 |     protected $file;
 7 |     protected $key = 0;
 8 |     protected $current;
 9 | 
10 |     public function __construct($file)
11 |     {
12 |         if (! ($this->file = fopen($file, 'r'))) {
13 |             die('Can\'t open file '.$this->file)."\n";
14 |         }
15 |     }
16 | 
17 |     public function __destruct()
18 |     {
19 |         fclose($this->file);
20 |     }
21 | 
22 |     public function rewind()
23 |     {
24 |         rewind($this->file);
25 |         //$this->current = fgetcsv($this->file, null, "\t");
26 |         $line = fgets($this->file);
27 |         $current = explode(' ', $line);
28 |         $current = array_filter($current);
29 |         $current = array_values($current);
30 |         $current = array_map('trim', $current);
31 |         $this->current = $current;
32 |         $this->key = 0;
33 |     }
34 | 
35 |     public function valid()
36 |     {
37 |         return !feof($this->file);
38 |     }
39 | 
40 |     public function key()
41 |     {
42 |         return $this->key;
43 |     }
44 | 
45 |     public function current()
46 |     {
47 |         return $this->current;
48 |     }
49 | 
50 |     public function next()
51 |     {
52 |         $line = fgets($this->file);
53 |         $current = explode(' ', $line);
54 |         $current = array_filter($current);
55 |         $current = array_values($current);
56 |         $current = array_map('trim', $current);
57 |         $this->current = $current;
58 |         $this->key++;
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/test/CsvFileVerboseIterator.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | class CsvFileVerboseIterator extends CsvFileIterator
 5 | {
 6 |     public function rewind()
 7 |     {
 8 |         parent::rewind();
 9 |         $this->_updateKey($this->current());
10 |     }
11 | 
12 |     public function next()
13 |     {
14 |         parent::next();
15 |         if ($this->valid()) {
16 |             $this->_updateKey($this->current());
17 |         }
18 |     }
19 | 
20 |     protected function _updateKey($value)
21 |     {
22 |         if ($value && sizeof($value)) {
23 |             $this->key = $value[0];
24 |         } elseif (sizeof($this->current)) {
25 |             $this->key = $this->current[0];
26 |         }
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/test/DanishTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Danish;
 6 | 
 7 | class DanishTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Danish();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/dk.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/DutchTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Dutch;
 6 | 
 7 | class DutchTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Dutch();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/nl.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/EnglishTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\English;
 6 | 
 7 | class EnglishTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new English();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/en.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/FactoryTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Wamania\Snowball\Tests;
 4 | 
 5 | use PHPUnit\Framework\TestCase;
 6 | use Wamania\Snowball\StemmerFactory;
 7 | 
 8 | class FactoryTest extends TestCase
 9 | {
10 |     public function testFactory()
11 |     {
12 |         $isoCodes = [
13 |             'ca' => 'Wamania\\Snowball\\Stemmer\\Catalan',
14 |             'cat' => 'Wamania\\Snowball\\Stemmer\\Catalan',
15 |             'catalan' => 'Wamania\\Snowball\\Stemmer\\Catalan',
16 |             'da' => 'Wamania\\Snowball\\Stemmer\\Danish',
17 |             'dan' => 'Wamania\\Snowball\\Stemmer\\Danish',
18 |             'danish' => 'Wamania\\Snowball\\Stemmer\\Danish',
19 |             'nl' => 'Wamania\\Snowball\\Stemmer\\Dutch',
20 |             'dut' => 'Wamania\\Snowball\\Stemmer\\Dutch',
21 |             'nld' => 'Wamania\\Snowball\\Stemmer\\Dutch',
22 |             'dutch' => 'Wamania\\Snowball\\Stemmer\\Dutch',
23 |             'en' => 'Wamania\\Snowball\\Stemmer\\English',
24 |             'eng' => 'Wamania\\Snowball\\Stemmer\\English',
25 |             'english' => 'Wamania\\Snowball\\Stemmer\\English',
26 |             'fr' => 'Wamania\\Snowball\\Stemmer\\French',
27 |             'fre' => 'Wamania\\Snowball\\Stemmer\\French',
28 |             'fra' => 'Wamania\\Snowball\\Stemmer\\French',
29 |             'french' => 'Wamania\\Snowball\\Stemmer\\French',
30 |             'de' => 'Wamania\\Snowball\\Stemmer\\German',
31 |             'deu' => 'Wamania\\Snowball\\Stemmer\\German',
32 |             'ger' => 'Wamania\\Snowball\\Stemmer\\German',
33 |             'german' => 'Wamania\\Snowball\\Stemmer\\German',
34 |             'it' => 'Wamania\\Snowball\\Stemmer\\Italian',
35 |             'ita' => 'Wamania\\Snowball\\Stemmer\\Italian',
36 |             'italian' => 'Wamania\\Snowball\\Stemmer\\Italian',
37 |             'no' => 'Wamania\\Snowball\\Stemmer\\Norwegian',
38 |             'nor' => 'Wamania\\Snowball\\Stemmer\\Norwegian',
39 |             'norwegian' => 'Wamania\\Snowball\\Stemmer\\Norwegian',
40 |             'pt' => 'Wamania\\Snowball\\Stemmer\\Portuguese',
41 |             'por' => 'Wamania\\Snowball\\Stemmer\\Portuguese',
42 |             'portuguese' => 'Wamania\\Snowball\\Stemmer\\Portuguese',
43 |             'ro' => 'Wamania\\Snowball\\Stemmer\\Romanian',
44 |             'rum' => 'Wamania\\Snowball\\Stemmer\\Romanian',
45 |             'ron' => 'Wamania\\Snowball\\Stemmer\\Romanian',
46 |             'romanian' => 'Wamania\\Snowball\\Stemmer\\Romanian',
47 |             'ru' => 'Wamania\\Snowball\\Stemmer\\Russian',
48 |             'rus' => 'Wamania\\Snowball\\Stemmer\\Russian',
49 |             'russian' => 'Wamania\\Snowball\\Stemmer\\Russian',
50 |             'es' => 'Wamania\\Snowball\\Stemmer\\Spanish',
51 |             'spa' => 'Wamania\\Snowball\\Stemmer\\Spanish',
52 |             'spanish' => 'Wamania\\Snowball\\Stemmer\\Spanish',
53 |             'sv' => 'Wamania\\Snowball\\Stemmer\\Swedish',
54 |             'swe' => 'Wamania\\Snowball\\Stemmer\\Swedish',
55 |             'swedish' => 'Wamania\\Snowball\\Stemmer\\Swedish',
56 |         ];
57 | 
58 |         foreach ($isoCodes as $isoCode => $classname) {
59 |             $stemmer = StemmerFactory::create($isoCode);
60 | 
61 |             $this->assertTrue($stemmer instanceof $classname);
62 |         }
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/test/FinnishTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Finnish;
 6 | 
 7 | class FinnishTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Finnish();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 | 		$this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/fi.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/FrenchTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\French;
 6 | 
 7 | class FrenchTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new French();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/fr.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/GermanTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\German;
 6 | 
 7 | class GermanTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new German();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/de.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/ItalianTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Italian;
 6 | 
 7 | class ItalianTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Italian();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/it.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/ManagerTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Wamania\Snowball\Tests;
 4 | 
 5 | use PHPUnit\Framework\TestCase;
 6 | use Wamania\Snowball\StemmerManager;
 7 | 
 8 | class ManagerTest extends TestCase
 9 | {
10 |     public function testManager()
11 |     {
12 |         $stemmerManager = new StemmerManager();
13 | 
14 |         $this->assertEquals('anticonstitutionnel', $stemmerManager->stem('anticonstitutionnelement', 'fr'));
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/test/NorwegianTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Norwegian;
 6 | 
 7 | class NorwegianTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Norwegian();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/no.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/PortugueseTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Portuguese;
 6 | 
 7 | class PortugueseTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Portuguese();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/pt.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/RomanianTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Romanian;
 6 | 
 7 | class RomanianTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Romanian();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/ro.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/RussianTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Russian;
 6 | 
 7 | class RussianTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Russian();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/ru.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/SpanishTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Spanish;
 6 | 
 7 | class SpanishTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Spanish();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/es.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/test/SwedishTest.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | namespace Wamania\Snowball\Tests;
 3 | 
 4 | use PHPUnit\Framework\TestCase;
 5 | use Wamania\Snowball\Stemmer\Swedish;
 6 | 
 7 | class SwedishTest extends TestCase
 8 | {
 9 |     /**
10 |      * @dataProvider load
11 |      */
12 |     public function testStem($word, $stem)
13 |     {
14 |         $o = new Swedish();
15 | 
16 |         $snowballStem = $o->stem($word);
17 | 
18 |         $this->assertEquals($stem, $snowballStem);
19 |     }
20 | 
21 |     public function load()
22 |     {
23 |         return new CsvFileIterator('test/files/sw.txt');
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------