├── .env
├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── LICENSE
├── README.md
├── composer.json
├── docker-compose.yml
├── docker
    └── cli
    │   └── Dockerfile
├── phpunit.xml
├── res
    └── sample1.txt
├── src
    ├── TextRankFacade.php
    └── Tool
    │   ├── Graph.php
    │   ├── Parser.php
    │   ├── Score.php
    │   ├── StopWords
    │       ├── Arabic.php
    │       ├── Dutch.php
    │       ├── English.php
    │       ├── French.php
    │       ├── German.php
    │       ├── Indonesian.php
    │       ├── Italian.php
    │       ├── Norwegian.php
    │       ├── Russian.php
    │       ├── Spanish.php
    │       ├── StopWordsAbstract.php
    │       └── Turkish.php
    │   ├── Summarize.php
    │   └── Text.php
└── tests
    └── TextRankFacadeTest.php


/.env:
--------------------------------------------------------------------------------
1 | COMPOSE_PROJECT_NAME=PHP-Science-TextRank
2 | 
3 | PREFIX=php-science-textrank
4 | 
5 | SOURCE_DIR=./
6 | TARGET_DIR=/var/www/html
7 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 | 
 7 | jobs:
 8 |   run:
 9 |     runs-on: ${{ matrix.operating-system }}
10 |     strategy:
11 |       matrix:
12 |         operating-system: [ubuntu-latest]
13 |         php-versions: ['8.0', '8.1', '8.2', '8.3']
14 |     name: PHP ${{ matrix.php-versions }} Test on ${{ matrix.operating-system }}
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v2
18 | 
19 |       - name: Setup PHP
20 |         uses: shivammathur/setup-php@v2
21 |         with:
22 |           php-version: ${{ matrix.php-versions }}
23 |           coverage: xdebug
24 | 
25 |       - name: Validate composer files
26 |         run: composer validate
27 | 
28 |       - name: Install dependencies
29 |         if: steps.composer-cache.outputs.cache-hit != 'true'
30 |         run: composer install --prefer-dist --no-progress --no-suggest
31 | 
32 |       - name: Run test suite
33 |         run: composer test
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | /.tmp
3 | /.phpunit.result.cache
4 | /composer.lock
5 | /vendor


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | MIT License
 3 | 
 4 | Copyright (c) 2016-2021 PHP-Science
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center">
 2 | TextRank
 3 | </h1>
 4 | 
 5 | <p align="center">
 6 |     <a href="https://github.com/PHP-Science/TextRank/actions">
 7 | 		<img src="https://github.com/php-science/textrank/workflows/tests/badge.svg"/>
 8 | 	</a>
 9 | 	<a href="https://packagist.org/packages/php-science/textrank">
10 | 	    <img src="https://poser.pugx.org/php-science/textrank/v/stable.svg" />
11 | 	</a>
12 | 	<a href="https://packagist.org/packages/php-science/textrank">
13 |         <img src="https://poser.pugx.org/php-science/textrank/downloads"/>
14 |     </a>
15 | 	<a href="https://github.com/PHP-Science/TextRank/blob/master/LICENSE">
16 |         <img src="https://img.shields.io/badge/license-MIT-FFF300.svg"/>
17 |     </a>
18 | </p>
19 | 
20 | <p align="center">
21 | This source code is an implementation of TextRank algorithm in PHP programming language, under MIT licence.<br />
22 | <br />
23 | </p>
24 | 
25 | # TextRank vs. ChatGPT
26 | GPTs like ChatGPT are supervised language models that understand the context and generate new content from the given
27 | input using vast resources while TextRank is a cost-efficient/low-cost text extraction algorithm. TextRank algorithm 
28 | also can be used as a pre-processor to a GPT model to reduce the text size to save on resource consumption.
29 | 
30 | # TextRank or Automatic summarization
31 | > Automatic summarization is the process of reducing a text document with a computer program in order to create a summary that retains the most important points of the original document. Technologies that can make a coherent summary take into account variables such as length, writing style and syntax. Automatic data summarization is part of machine learning and data mining. The main idea of summarization is to find a representative subset of the data, which contains the information of the entire set. Summarization technologies are used in a large number of sectors in industry today. - Wikipedia
32 | 
33 | The algorithm of this implementation is:
34 | * Extracts sentences,
35 | * Removes stopwords,
36 | * Adds integer values to words by finding and counting the matching words,
37 | * Weights the values of the words,
38 | * Normalizes values to get the scores,
39 | * Sorts by scores
40 | 
41 | # Install to use it in your project
42 | ```
43 | cd your-project-folder
44 | composer require php-science/textrank
45 | ```
46 | 
47 | # Install for contributing
48 | ```
49 | cd git-project-folder
50 | docker-compose build
51 | docker-compose up -d
52 | composer install
53 | composer test
54 | ```
55 | 
56 | # Examples
57 | ```php
58 | 
59 | use PhpScience\TextRank\Tool\StopWords\English;
60 | 
61 | // String contains a long text, see the /res/sample1.txt file.
62 | $text = "Lorem ipsum...";
63 | 
64 | $api = new TextRankFacade();
65 | // English implementation for stopwords/junk words:
66 | $stopWords = new English();
67 | $api->setStopWords($stopWords);
68 | 
69 | // Array of the most important keywords:
70 | $result = $api->getOnlyKeyWords($text); 
71 | 
72 | // Array of the sentences from the most important part of the text:
73 | $result = $api->getHighlights($text); 
74 | 
75 | // Array of the most important sentences from the text:
76 | $result = $api->summarizeTextBasic($text);
77 | ```
78 | More examples: 
79 | * [tests/TextRankFacadeTest.php](https://github.com/DavidBelicza/PHP-Science-TextRank/blob/master/tests/TextRankFacadeTest.php)
80 | * https://php.science
81 | 
82 | # Authors, Contributors
83 | 
84 | Name | GitHub user
85 | --- | ---
86 | David Belicza | @DavidBelicza
87 | Riccardo Marton | @riccardomarton
88 | Syndesi | @Syndesi 
89 | vincentsch | @vincentsch
90 | Andrew Welch | @khalwat 
91 | Andrey Astashov | @mvcaaa
92 | Leo Toneff | @bragle
93 | Willy Arisky | @willyarisky
94 | Robert-Jan Keizer | @KeizerDev
95 | Morty | @evil1morty
96 | Sezer Fidancı | @SezerFidanci
97 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "php-science/textrank",
 3 |     "description": "TextRank (automatic text summarization) for PHP.",
 4 |     "keywords": ["science", "textrank", "automatic", "summarization", "PHP", "PHP8", "strict", "ai", "artificial", "intelligence"],
 5 |     "license": "MIT",
 6 |     "authors": [
 7 |         {
 8 |             "name": "David Belicza",
 9 |             "email": "david@belicza.com"
10 |         }
11 |     ],
12 |     "require": {
13 |         "php": ">=7.2",
14 |         "ext-ctype": "*",
15 |         "ext-mbstring": "*"
16 |     },
17 |     "require-dev": {
18 |         "phpunit/phpunit": "9.*"
19 |     },
20 |     "autoload": {
21 |         "psr-4": {
22 |             "PhpScience\\TextRank\\": ["src/"]
23 |         }
24 |     },
25 |     "autoload-dev": {
26 |         "psr-4": {
27 |             "PhpScience\\TextRank\\": ["tests/"]
28 |         }
29 |     },
30 |     "scripts": {
31 |         "test": "phpunit --colors='always' $(pwd)/tests"
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 | 
 5 |   cli:
 6 |     container_name: ${PREFIX}_cli
 7 |     image: ${PREFIX}_cli
 8 |     build: ./docker/cli
 9 |     volumes:
10 |       - ${SOURCE_DIR}:${TARGET_DIR}
11 |     stdin_open: true
12 | 


--------------------------------------------------------------------------------
/docker/cli/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM php:8.2-cli
 2 | 
 3 | ENV TZ=Europe/Budapest
 4 | ENV DEBIAN_FRONTEND=noninteractive
 5 | 
 6 | RUN apt-get update
 7 | 
 8 | RUN apt-get install -y \
 9 |     libfreetype6-dev \
10 |     libicu-dev \
11 |     libjpeg62-turbo-dev \
12 |     libmcrypt-dev \
13 |     libxslt1-dev
14 | 
15 | RUN apt-get install --no-install-recommends -y \
16 |     tzdata \
17 |     zip \
18 |     unzip \
19 |     git
20 | 
21 | RUN apt-get install -y software-properties-common
22 | 
23 | RUN php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');"
24 | RUN php -r "if (hash_file('sha384', 'composer-setup.php') === 'e21205b207c3ff031906575712edab6f13eb0b361f2085f1f1237b7126d785e826a450292b6cfd1d64d92e6563bbde02') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;"
25 | RUN php composer-setup.php
26 | RUN php -r "unlink('composer-setup.php');"
27 | RUN mv composer.phar /usr/local/bin/composer
28 | 
29 | WORKDIR /var/www/html
30 | 


--------------------------------------------------------------------------------
/phpunit.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 | /**
 4 |  * PHP Science TextRank (http://php.science/)
 5 |  *
 6 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
 7 |  * @license https://opensource.org/licenses/MIT the MIT License
 8 |  * @author  David Belicza <david@belicza.com>
 9 |  */
10 | -->
11 | <phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
12 |          xsi:noNamespaceSchemaLocation="vendor/phpunit/phpunit/phpunit.xsd"
13 |          bootstrap="vendor/autoload.php"
14 |          colors="true">
15 | 
16 |     <testsuites>
17 |         <testsuite name="Application Test Suite">
18 |             <directory>./tests/</directory>
19 |         </testsuite>
20 |     </testsuites>
21 | 
22 |     <filter>
23 |         <whitelist processUncoveredFilesFromWhitelist="true">
24 |             <directory suffix=".php">src</directory>
25 |         </whitelist>
26 |     </filter>
27 | </phpunit>


--------------------------------------------------------------------------------
/res/sample1.txt:
--------------------------------------------------------------------------------
1 | Over the past fortnight we asked you to nominate your top extensions for the GNOME desktop. And you did just that. Having now sifted through the hundreds of entries, we’re ready to reveal your favourite GNOME Shell extensions. GNOME 3 (which is more commonly used with the GNOME Shell) has an extension framework that lets developers (and users) extend, build on, and shape how the desktop looks, acts and functions. Dash to Dock takes the GNOME Dash — this is the ‘favourites bar’ that appears on the left-hand side of the screen in the Activities overlay — and transforms it into a desktop dock. And just like Plank, Docky or AWN you can add app launchers, rearrange them, and use them to minimise, restore and switch between app windows. Dash to Dock has many of the common “Dock” features you’d expect, including autohide and intellihide, a fixed-width mode, adjustable icon size, and custom themes. My biggest pet peeve with GNOME Shell is its legacy app tray that hides in the bottom left of the screen. All extraneous non-system applets, indicators and tray icons hide down here. This makes it a little harder to use applications that rely on a system tray presence, like Skype, Franz, Telegram, and Dropbox. TopIcons Plus is the quick way to put GNOME system tray icons back where they belong: on show and in reach. The extension moves legacy tray icons from the bottom left of Gnome Shell to the right-hand side of the top panel. A well-stocked settings panel lets you adjust icon opacity, color, padding, size and tray position. Dive into the settings to adjust the sizing, styling and positioning of icons. Like the popular daily stimulant of choice, the Caffeine GNOME extension keeps your computer awake. It couldn’t be simpler to use: just click the empty mug icon. An empty cup means you’re using normal auto suspend rules – e.g., a screensaver – while a freshly brewed cup of coffee means auto suspend and screensaver are turned off. The Caffeine GNOME extension supports GNOME Shell 3.4 or later. Familiar with applications like Guake and Tilda? If so, you’ll instantly see the appeal of the (superbly named) Drop Down Terminal GNOME extension. When installed just tap the key above the tab key (though it can be changed to almost any key you wish) to get instant access to the command line. Want to speed up using workspaces? This simple tool lets you do just that. Once installed you can quickly switch between workspaces by scrolling over the top panel - no need to enter the Activities Overlay!
2 | 


--------------------------------------------------------------------------------
/src/TextRankFacade.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank;
 13 | 
 14 | use PhpScience\TextRank\Tool\Graph;
 15 | use PhpScience\TextRank\Tool\Parser;
 16 | use PhpScience\TextRank\Tool\Score;
 17 | use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
 18 | use PhpScience\TextRank\Tool\Summarize;
 19 | 
 20 | /**
 21 |  * Class TextRankFacade
 22 |  *
 23 |  * This Facade class is capable to find the keywords in a raw text, weigh them
 24 |  * and retrieve the most important sentences from the whole text. It is an
 25 |  * implementation of the TextRank algorithm.
 26 |  *
 27 |  * <code>
 28 |  *      $stopWords = new English();
 29 |  *
 30 |  *      $textRank = new TextRankFacade();
 31 |  *      $textRank->setStopWords($stopWords);
 32 |  *
 33 |  *      $sentences = $textRank->summarizeTextFreely(
 34 |  *          $rawText,
 35 |  *          5,
 36 |  *          2,
 37 |  *          Summarize::GET_ALL_IMPORTANT
 38 |  *      );
 39 |  * </code>
 40 |  *
 41 |  * @package PhpScience\TextRank
 42 |  */
 43 | class TextRankFacade
 44 | {
 45 |     /**
 46 |      * Stop Words
 47 |      *
 48 |      * Stop Words to ignore because of dummy words. These words will not be Key
 49 |      * Words. A, like, no yes, one, two, I, you for example.
 50 |      *
 51 |      * @see \PhpScience\TextRank\Tool\StopWords\English
 52 |      *
 53 |      * @var StopWordsAbstract
 54 |      */
 55 |     protected $stopWords;
 56 | 
 57 |     /**
 58 |      * Set Stop Words.
 59 |      *
 60 |      * @param StopWordsAbstract $stopWords Stop Words to ignore because of
 61 |      *                                     dummy words.
 62 |      */
 63 |     public function setStopWords(StopWordsAbstract $stopWords)
 64 |     {
 65 |         $this->stopWords = $stopWords;
 66 |     }
 67 | 
 68 |     /**
 69 |      * Only Keywords
 70 |      *
 71 |      * It retrieves the possible keywords with their scores from a text.
 72 |      *
 73 |      * @param string $rawText A single raw text.
 74 |      *
 75 |      * @return array Array from Keywords. Key is the parsed word, value is the
 76 |      *               word score.
 77 |      */
 78 |     public function getOnlyKeyWords(string $rawText): array
 79 |     {
 80 |         $parser = new Parser();
 81 |         $parser->setMinimumWordLength(3);
 82 |         $parser->setRawText($rawText);
 83 | 
 84 |         if ($this->stopWords) {
 85 |             $parser->setStopWords($this->stopWords);
 86 |         }
 87 | 
 88 |         $text = $parser->parse();
 89 | 
 90 |         $graph = new Graph();
 91 |         $graph->createGraph($text);
 92 | 
 93 |         $score = new Score();
 94 | 
 95 |         return $score->calculate(
 96 |             $graph, $text
 97 |         );
 98 |     }
 99 | 
100 |     /**
101 |      * Highlighted Texts
102 |      *
103 |      * It finds the most important sentences from a text by the most important
104 |      * keywords and these keywords also found by automatically. It retrieves
105 |      * the most important sentences what are 20 percent of the full text.
106 |      *
107 |      * @param string $rawText A single raw text.
108 |      *
109 |      * @return array An array from sentences.
110 |      */
111 |     public function getHighlights(string $rawText): array
112 |     {
113 |         $parser = new Parser();
114 |         $parser->setMinimumWordLength(3);
115 |         $parser->setRawText($rawText);
116 | 
117 |         if ($this->stopWords) {
118 |             $parser->setStopWords($this->stopWords);
119 |         }
120 | 
121 |         $text = $parser->parse();
122 |         $maximumSentences = (int) (count($text->getSentences()) * 0.2);
123 | 
124 |         $graph = new Graph();
125 |         $graph->createGraph($text);
126 | 
127 |         $score = new Score();
128 |         $scores = $score->calculate($graph, $text);
129 | 
130 |         $summarize = new Summarize();
131 | 
132 |         return $summarize->getSummarize(
133 |             $scores,
134 |             $graph,
135 |             $text,
136 |             12,
137 |             $maximumSentences,
138 |             Summarize::GET_ALL_IMPORTANT
139 |         );
140 |     }
141 | 
142 |     /**
143 |      * Compounds a Summarized Text
144 |      *
145 |      * It finds the three most important sentences from a text by the most
146 |      * important keywords and these keywords also found by automatically. It
147 |      * retrieves these important sentences.
148 |      *
149 |      * @param string $rawText A single raw text.
150 |      *
151 |      * @return array An array from sentences.
152 |      */
153 |     public function summarizeTextCompound(string $rawText): array
154 |     {
155 |         $parser = new Parser();
156 |         $parser->setMinimumWordLength(3);
157 |         $parser->setRawText($rawText);
158 | 
159 |         if ($this->stopWords) {
160 |             $parser->setStopWords($this->stopWords);
161 |         }
162 | 
163 |         $text = $parser->parse();
164 | 
165 |         $graph = new Graph();
166 |         $graph->createGraph($text);
167 | 
168 |         $score = new Score();
169 |         $scores = $score->calculate($graph, $text);
170 | 
171 |         $summarize = new Summarize();
172 | 
173 |         return $summarize->getSummarize(
174 |             $scores,
175 |             $graph,
176 |             $text,
177 |             10,
178 |             3,
179 |             Summarize::GET_ALL_IMPORTANT
180 |         );
181 |     }
182 | 
183 |     /**
184 |      * Summarized Text
185 |      *
186 |      * It finds the most important sentence from a text by the most important
187 |      * keywords and these keywords also found by automatically. It retrieves
188 |      * the most important sentence and its following sentences.
189 |      *
190 |      * @param string $rawText A single raw text.
191 |      *
192 |      * @return array An array from sentences.
193 |      */
194 |     public function summarizeTextBasic(string $rawText): array
195 |     {
196 |         $parser = new Parser();
197 |         $parser->setMinimumWordLength(3);
198 |         $parser->setRawText($rawText);
199 | 
200 |         if ($this->stopWords) {
201 |             $parser->setStopWords($this->stopWords);
202 |         }
203 | 
204 |         $text = $parser->parse();
205 | 
206 |         $graph = new Graph();
207 |         $graph->createGraph($text);
208 | 
209 |         $score = new Score();
210 |         $scores = $score->calculate($graph, $text);
211 | 
212 |         $summarize = new Summarize();
213 | 
214 |         return $summarize->getSummarize(
215 |             $scores,
216 |             $graph,
217 |             $text,
218 |             10,
219 |             3,
220 |             Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS
221 |         );
222 |     }
223 | 
224 |     /**
225 |      * Freely Summarized Text.
226 |      *
227 |      * It retrieves the most important sentences from a text by the most important
228 |      * keywords and these keywords also found by automatically.
229 |      *
230 |      * @param string $rawText           A single raw text.
231 |      * @param int    $analyzedKeyWords  Maximum number of the most important
232 |      *                                  Key Words to analyze the text.
233 |      * @param int    $expectedSentences How many sentence should be retrieved.
234 |      * @param int    $summarizeType     Highlights from the text or a part of
235 |      *                                  the text.
236 |      *
237 |      * @return array An array from sentences.
238 |      */
239 |     public function summarizeTextFreely(
240 |         string $rawText,
241 |         int $analyzedKeyWords,
242 |         int $expectedSentences,
243 |         int $summarizeType
244 |     ): array {
245 |         $parser = new Parser();
246 |         $parser->setMinimumWordLength(3);
247 |         $parser->setRawText($rawText);
248 | 
249 |         if ($this->stopWords) {
250 |             $parser->setStopWords($this->stopWords);
251 |         }
252 | 
253 |         $text = $parser->parse();
254 | 
255 |         $graph = new Graph();
256 |         $graph->createGraph($text);
257 | 
258 |         $score = new Score();
259 |         $scores = $score->calculate($graph, $text);
260 | 
261 |         $summarize = new Summarize();
262 | 
263 |         return $summarize->getSummarize(
264 |             $scores,
265 |             $graph,
266 |             $text,
267 |             $analyzedKeyWords,
268 |             $expectedSentences,
269 |             $summarizeType
270 |         );
271 |     }
272 | }
273 | 


--------------------------------------------------------------------------------
/src/Tool/Graph.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * PHP Science TextRank (http://php.science/)
 4 |  *
 5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
 6 |  * @license https://opensource.org/licenses/MIT the MIT License
 7 |  * @author  David Belicza <david@belicza.com>
 8 |  */
 9 | 
10 | declare(strict_types=1);
11 | 
12 | namespace PhpScience\TextRank\Tool;
13 | 
14 | /**
15 |  * Class Graph
16 |  *
17 |  * This graph store the sentences and their words with the indexes. This graph
18 |  * is the full map of the whole text.
19 |  *
20 |  * @package PhpScience\TextRank\Tool
21 |  */
22 | class Graph
23 | {
24 |     /**
25 |      * Key is the word, value is an array with the sentence IDs.
26 |      *
27 |      * @var array
28 |      */
29 |     protected $graph = [];
30 | 
31 |     /**
32 |      * Create Graph.
33 |      *
34 |      * It creates a graph and save it into the graph property.
35 |      *
36 |      * @param Text $text Text object contains the parsed and prepared text
37 |      *                   data.
38 |      */
39 |     public function createGraph(Text &$text)
40 |     {
41 |         $wordMatrix = $text->getWordMatrix();
42 | 
43 |         foreach ($wordMatrix as $sentenceIdx => $words) {
44 |             $idxArray = array_keys($words);
45 | 
46 |             foreach ($idxArray as $idxKey => $idxValue) {
47 |                 $connections = [];
48 | 
49 |                 if (isset($idxArray[$idxKey - 1])) {
50 |                     $connections[] = $idxArray[$idxKey - 1];
51 |                 }
52 | 
53 |                 if (isset($idxArray[$idxKey + 1])) {
54 |                     $connections[] = $idxArray[$idxKey + 1];
55 |                 }
56 | 
57 |                 $this->graph[$words[$idxValue]][$sentenceIdx][$idxValue] = $connections;
58 |             }
59 |         }
60 |     }
61 | 
62 |     /**
63 |      * Graph.
64 |      *
65 |      * It retrieves the graph. Key is the word, value is an array with the
66 |      * sentence IDs.
67 |      *
68 |      * <code>
69 |      *       array(
70 |      *           'apple' => array(    // word
71 |      *               2 => array(      // ID of the sentence
72 |      *                   52 => array( // ID of the word in the sentence
73 |      *                       51, 53   // IDs of the closest words to the apple word
74 |      *                   ),
75 |      *                   10 => array( // IDs of the closest words to the apple word
76 |      *                       9, 11    // IDs of the closest words to the apple word
77 |      *                   ),
78 |      *                   5 => array(6)
79 |      *               ),
80 |      *               6 => array(
81 |      *                   9 => array(8, 10)
82 |      *               ),
83 |      *           ),
84 |      *           'orange' => array(
85 |      *               1  => array(
86 |      *                   30 => array(29, 31)
87 |      *               )
88 |      *           )
89 |      *       );
90 |      * </code>
91 |      *
92 |      * @return array
93 |      */
94 |     public function getGraph(): array
95 |     {
96 |         return $this->graph;
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/Tool/Parser.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool;
 13 | 
 14 | use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
 15 | 
 16 | /**
 17 |  * Class Parser
 18 |  *
 19 |  * This class purpose to parse a real text to sentences and array.
 20 |  *
 21 |  * @package PhpScience\TextRank\Tool
 22 |  */
 23 | class Parser
 24 | {
 25 |     /**
 26 |      * The number of length of the smallest word. Words bellow it will be
 27 |      * ignored.
 28 |      *
 29 |      * @var int
 30 |      */
 31 |     protected $minimumWordLength = 0;
 32 | 
 33 |     /**
 34 |      * A single text, article, book for example.
 35 |      *
 36 |      * @var string
 37 |      */
 38 |     protected $rawText = '';
 39 | 
 40 |     /**
 41 |      * The array of the punctuations. The punctuation is the value. The key
 42 |      * refers to the key of its sentence.
 43 |      *
 44 |      * @var array
 45 |      */
 46 |     protected $marks = [];
 47 | 
 48 |     /**
 49 |      * Stop Words to ignore. These words will not be keywords.
 50 |      *
 51 |      * @var StopWordsAbstract
 52 |      */
 53 |     protected $stopWords;
 54 | 
 55 |     /**
 56 |      * It sets the minimum word length. Words bellow it will be ignored.
 57 |      *
 58 |      * @param int $wordLength
 59 |      */
 60 |     public function setMinimumWordLength(int $wordLength)
 61 |     {
 62 |         $this->minimumWordLength = $wordLength;
 63 |     }
 64 | 
 65 |     /**
 66 |      * It sets the raw text.
 67 |      *
 68 |      * @param string $rawText
 69 |      */
 70 |     public function setRawText(string $rawText)
 71 |     {
 72 |         $this->rawText = $rawText;
 73 |     }
 74 | 
 75 |     /**
 76 |      * Set Stop Words.
 77 |      *
 78 |      * It sets the stop words to remove them from the found keywords.
 79 |      *
 80 |      * @param StopWordsAbstract $words Stop Words to ignore. These words will
 81 |      *                                 not be keywords.
 82 |      */
 83 |     public function setStopWords(StopWordsAbstract $words)
 84 |     {
 85 |         $this->stopWords = $words;
 86 |     }
 87 | 
 88 |     /**
 89 |      * It retrieves the punctuations.
 90 |      *
 91 |      * @return array Array from punctuations where key is the index to link to
 92 |      *               the sentence and value is the punctuation.
 93 |      */
 94 |     public function getMarks(): array
 95 |     {
 96 |         return $this->marks;
 97 |     }
 98 | 
 99 |     /**
100 |      * Parse.
101 |      *
102 |      * It parses the text from the property and retrieves in Text object
103 |      * prepared to scoring and to searching.
104 |      *
105 |      * @return Text Parsed text prepared to scoring.
106 |      */
107 |     public function parse(): Text
108 |     {
109 |         $matrix = [];
110 |         $sentences = $this->getSentences();
111 | 
112 |         foreach ($sentences as $sentenceIdx => $sentence) {
113 |             $matrix[$sentenceIdx] = $this->getWords($sentence);
114 |         }
115 | 
116 |         $text = new Text();
117 |         $text->setSentences($sentences);
118 |         $text->setWordMatrix($matrix);
119 |         $text->setMarks($this->marks);
120 | 
121 |         return $text;
122 |     }
123 | 
124 |     /**
125 |      * Sentences.
126 |      *
127 |      * It retrieves the sentences in array without junk data.
128 |      *
129 |      * @return array Array from sentences.
130 |      */
131 |     protected function getSentences(): array
132 |     {
133 |         $sentences = $sentences = preg_split(
134 |             '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/',
135 |             $this->rawText,
136 |             -1,
137 |             PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
138 |         );
139 | 
140 |         return array_values(
141 |             array_filter(
142 |                 array_map(
143 |                     [$this, 'cleanSentence'],
144 |                     $sentences
145 |                 )
146 |             )
147 |         );
148 |     }
149 | 
150 |     /**
151 |      * Possible Keywords.
152 |      *
153 |      * It retrieves an array of possible keywords without junk characters,
154 |      * spaces and stop words.
155 |      *
156 |      * @param string $subText It should be a sentence.
157 |      *
158 |      * @return array The array of the possible keywords.
159 |      */
160 |     protected function getWords(string $subText): array
161 |     {
162 |         $words = preg_split(
163 |             '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/',
164 |             $subText,
165 |             -1,
166 |             PREG_SPLIT_NO_EMPTY
167 |         );
168 | 
169 |         $words = array_values(
170 |             array_filter(
171 |                 array_map(
172 |                     [$this, 'cleanWord'],
173 |                     $words
174 |                 )
175 |             )
176 |         );
177 | 
178 |         if ($this->stopWords) {
179 |             return array_filter($words, function($word) {
180 |                 return !ctype_punct($word)
181 |                         && strlen($word) > $this->minimumWordLength
182 |                         && !$this->stopWords->exist($word);
183 |             });
184 |         } else {
185 |             return array_filter($words, function($word) {
186 |                 return !ctype_punct($word)
187 |                         && strlen($word) > $this->minimumWordLength;
188 |             });
189 |         }
190 |     }
191 | 
192 |     /**
193 |      * Clean Sentence.
194 |      *
195 |      * It clean the sentence. If it is a punctuation it will be stored in the
196 |      * property $marks.
197 |      *
198 |      * @param string $sentence A sentence as a string.
199 |      *
200 |      * @return string It is empty string when it's punctuation. Otherwise it's
201 |      *                the trimmed sentence itself.
202 |      */
203 |     protected function cleanSentence(string $sentence): string
204 |     {
205 |         if (strlen(trim($sentence)) == 1) {
206 |             $this->marks[] = trim($sentence);
207 |             return '';
208 | 
209 |         } else {
210 |             return trim($sentence);
211 |         }
212 |     }
213 | 
214 |     /**
215 |      * Clean Word.
216 |      *
217 |      * It removes the junk spaces from the word and retrieves it.
218 |      *
219 |      * @param string $word
220 |      *
221 |      * @return string Cleaned word.
222 |      */
223 |     protected function cleanWord(string $word): string
224 |     {
225 |         return mb_strtolower(trim($word));
226 |     }
227 | }
228 | 


--------------------------------------------------------------------------------
/src/Tool/Score.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool;
 13 | 
 14 | /**
 15 |  * Class Score
 16 |  *
 17 |  * It handles words and assigns weighted numbers to them.
 18 |  *
 19 |  * @package PhpScience\TextRank\Tool
 20 |  */
 21 | class Score
 22 | {
 23 |     /**
 24 |      * The maximum connections by a word in the current text.
 25 |      *
 26 |      * @var int
 27 |      */
 28 |     protected $maximumValue = 0;
 29 | 
 30 |     /**
 31 |      * The minimum connection by a word in the current text.
 32 |      *
 33 |      * @var int
 34 |      */
 35 |     protected $minimumValue = 0;
 36 | 
 37 |     /**
 38 |      * Calculate Scores.
 39 |      *
 40 |      * It calculates the scores from word's connections and the connections'
 41 |      * scores. It retrieves the scores in a form of a matrix where the key is
 42 |      * the word and value is the score. The score is between 0 and 1.
 43 |      *
 44 |      * @param Graph $graph The graph of the text.
 45 |      * @param Text  $text  Text object what stores all text data.
 46 |      *
 47 |      * @return array Key is the word and value is the float or int type score
 48 |      *               between 1 and 0.
 49 |      */
 50 |     public function calculate(Graph $graph, Text &$text): array
 51 |     {
 52 |         $graphData = $graph->getGraph();
 53 |         $wordMatrix = $text->getWordMatrix();
 54 |         $wordConnections = $this->calculateConnectionNumbers($graphData);
 55 |         $scores = $this->calculateScores(
 56 |             $graphData,
 57 |             $wordMatrix,
 58 |             $wordConnections
 59 |         );
 60 | 
 61 |         return $this->normalizeAndSortScores($scores);
 62 |     }
 63 | 
 64 |     /**
 65 |      * Connection Numbers.
 66 |      *
 67 |      * It calculates the number of connections for each word and retrieves it
 68 |      * in array where key is the word and value is the number of connections.
 69 |      *
 70 |      * @param array $graphData Graph data from a Graph type object.
 71 |      *
 72 |      * @return array Key is the word and value is the number of the connected
 73 |      *               words.
 74 |      */
 75 |     protected function calculateConnectionNumbers(array &$graphData): array
 76 |     {
 77 |         $wordConnections = [];
 78 | 
 79 |         foreach ($graphData as $wordKey => $sentences) {
 80 |             $connectionCount = 0;
 81 | 
 82 |             foreach ($sentences as $sentenceIdx => $wordInstances) {
 83 |                 foreach ($wordInstances as $connections) {
 84 |                     $connectionCount += count($connections);
 85 |                 }
 86 |             }
 87 | 
 88 |             $wordConnections[$wordKey] = $connectionCount;
 89 |         }
 90 | 
 91 |         return $wordConnections;
 92 |     }
 93 | 
 94 |     /**
 95 |      * Calculate Scores.
 96 |      *
 97 |      * It calculates the score of the words and retrieves it in array where key
 98 |      * is the word and value is the score. The score depends on the number of
 99 |      * the connections and the closest word's connection numbers.
100 |      *
101 |      * @param array $graphData       Graph data from a Graph type object.
102 |      * @param array $wordMatrix      Multidimensional array from integer keys
103 |      *                               and string values.
104 |      * @param array $wordConnections Key is the word and value is the number of
105 |      *                               the connected words.
106 |      *
107 |      * @return array Scores where key is the word and value is the score.
108 |      */
109 |     protected function calculateScores(
110 |         array &$graphData,
111 |         array &$wordMatrix,
112 |         array &$wordConnections
113 |     ): array {
114 |         $scores = [];
115 | 
116 |         foreach ($graphData as $wordKey => $sentences) {
117 |             $value = 0;
118 | 
119 |             foreach ($sentences as $sentenceIdx => $wordInstances) {
120 |                 foreach ($wordInstances as $connections) {
121 |                     foreach ($connections as $wordIdx) {
122 |                         $word = $wordMatrix[$sentenceIdx][$wordIdx];
123 |                         $value += $wordConnections[$word];
124 |                     }
125 |                 }
126 |             }
127 | 
128 |             $scores[$wordKey] = $value;
129 | 
130 |             if ($value > $this->maximumValue) {
131 |                 $this->maximumValue = $value;
132 |             }
133 | 
134 |             if ($value < $this->minimumValue || $this->minimumValue == 0) {
135 |                 $this->minimumValue = $value;
136 |             }
137 |         }
138 | 
139 |         return $scores;
140 |     }
141 | 
142 |     /**
143 |      * Normalize and Sort Scores.
144 |      *
145 |      * It recalculates the scores by normalize the score numbers to between 0
146 |      * and 1.
147 |      *
148 |      * @param array $scores Keywords with scores. Score is the key.
149 |      *
150 |      * @return array Keywords with normalized and ordered scores.
151 |      */
152 |     protected function normalizeAndSortScores(array &$scores): array
153 |     {
154 |         foreach ($scores as $key => $value) {
155 |             $v = $this->normalize(
156 |                 $value,
157 |                 $this->minimumValue,
158 |                 $this->maximumValue
159 |             );
160 | 
161 |             $scores[$key] = $v;
162 |         }
163 | 
164 |         arsort($scores);
165 | 
166 |         return $scores;
167 |     }
168 | 
169 |     /**
170 |      * It normalizes a number.
171 |      *
172 |      * @param int $value Current weight.
173 |      * @param int $min   Minimum weight.
174 |      * @param int $max   Maximum weight.
175 |      *
176 |      * @return float|int Normalized weight aka score.
177 |      */
178 |     protected function normalize(int $value, int $min, int $max): float
179 |     {
180 |         $divisor = $max - $min;
181 | 
182 |         if ($divisor == 0) {
183 |             return 0.0;
184 |         }
185 | 
186 |         $normalized = ($value - $min) / $divisor;
187 | 
188 |         return $normalized;
189 |     }
190 | }
191 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Arabic.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | declare(strict_types=1);
  4 | 
  5 | namespace PhpScience\TextRank\Tool\StopWords;
  6 | 
  7 | /**
  8 |  * Class Arabic
  9 |  *
 10 |  * @package PhpScience\TextRank\Tool\StopWords
 11 |  */
 12 | class Arabic extends StopWordsAbstract
 13 | {
 14 |     /**
 15 |      * Stop words for avoid dummy keywords for Language Arabic.
 16 |      *
 17 |      * @var array
 18 |      */
 19 | 
 20 |     protected $words = [
 21 |         'يوم',
 22 |         'يمكن',
 23 |         'يكون',
 24 |         'وهي',
 25 |         'وهو',
 26 |         'ومن',
 27 |         'ولم',
 28 |         'ولا',
 29 |         'وكانت',
 30 |         'وكان',
 31 |         'وقف',
 32 |         'وقد',
 33 |         'وقالت',
 34 |         'وقال',
 35 |         'وفي',
 36 |         'واوضح',
 37 |         'وان',
 38 |         'واكد',
 39 |         'واضافت',
 40 |         'واضاف',
 41 |         'واحد',
 42 |         'و',
 43 |         'هي',
 44 |         'هو',
 45 |         'هناك',
 46 |         'هذه',
 47 |         'هذا',
 48 |         'نهاية',
 49 |         'نفسه',
 50 |         'نحو',
 51 |         'منها',
 52 |         'منذ',
 53 |         'من',
 54 |         'مليون',
 55 |         'مليار',
 56 |         'مقابل',
 57 |         'مع',
 58 |         'مساء',
 59 |         'مايو',
 60 |         'ما',
 61 |         'لوكالة',
 62 |         'لها',
 63 |         'له',
 64 |         'لن',
 65 |         'لم',
 66 |         'للامم',
 67 |         'لكن',
 68 |         'لقاء',
 69 |         'لدى',
 70 |         'لا',
 71 |         'كما',
 72 |         'كلم',
 73 |         'كل',
 74 |         'كانت',
 75 |         'كان',
 76 |         'قوة',
 77 |         'قد',
 78 |         'قبل',
 79 |         'قال',
 80 |         'فيها',
 81 |         'فيه',
 82 |         'فى',
 83 |         'في',
 84 |         'فان',
 85 |         'ف',
 86 |         'غير',
 87 |         'غدا',
 88 |         'عندما',
 89 |         'عند',
 90 |         'عن',
 91 |         'عليها',
 92 |         'عليه',
 93 |         'على',
 94 |         'عشرة',
 95 |         'عشر',
 96 |         'عدم',
 97 |         'عدد',
 98 |         'عدة',
 99 |         'عاما',
100 |         'عام',
101 |         'ضمن',
102 |         'ضد',
103 |         'صفر',
104 |         'صباح',
105 |         'شخصا',
106 |         'سنوات',
107 |         'سنة',
108 |         'زيارة',
109 |         'ذلك',
110 |         'دون',
111 |         'خلال',
112 |         'حين',
113 |         'حيث',
114 |         'حول',
115 |         'حوالى',
116 |         'حتى',
117 |         'حاليا',
118 |         'جميع',
119 |         'ثم',
120 |         'ثلاثة',
121 |         'تم',
122 |         'بين',
123 |         'بها',
124 |         'به',
125 |         'بن',
126 |         'بعض',
127 |         'بعد',
128 |         'بشكل',
129 |         'بسبب',
130 |         'برس',
131 |         'بان',
132 |         'باسم',
133 |         'ب',
134 |         'ايضا',
135 |         'ايام',
136 |         'ايار',
137 |         'اي',
138 |         'اول',
139 |         'او',
140 |         'انها',
141 |         'انه',
142 |         'ان',
143 |         'امس',
144 |         'امام',
145 |         'اما',
146 |         'اليوم',
147 |         'الى',
148 |         'الوقت',
149 |         'المقبل',
150 |         'الماضي',
151 |         'الف',
152 |         'السابق',
153 |         'الذين',
154 |         'الذى',
155 |         'الذي',
156 |         'الذاتي',
157 |         'الثانية',
158 |         'الثاني',
159 |         'التى',
160 |         'التي',
161 |         'الاولى',
162 |         'الاول',
163 |         'الان',
164 |         'الاخيرة',
165 |         'الا',
166 |         'اكد',
167 |         'اكثر',
168 |         'اف',
169 |         'اعلنت',
170 |         'اعادة',
171 |         'اطار',
172 |         'اربعة',
173 |         'اذا',
174 |         'اخرى',
175 |         'احد',
176 |         'اجل',
177 |         'اثر',
178 |         'ا',
179 |         'أ',
180 |         'ـ'
181 |     ];
182 | }
183 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Dutch.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | declare(strict_types=1);
  4 | 
  5 | namespace PhpScience\TextRank\Tool\StopWords;
  6 | 
  7 | /**
  8 |  * Class Dutch
  9 |  *
 10 |  * @package PhpScience\TextRank\Tool\StopWords
 11 |  */
 12 | class Dutch extends StopWordsAbstract
 13 | {
 14 |     /**
 15 |      * Stop words for avoid dummy keywords for Language Dutch.
 16 |      *
 17 |      * @var array
 18 |      */
 19 |     protected $words = [
 20 |         'aan',
 21 |         'aangaande',
 22 |         'aangezien',
 23 |         'achte',
 24 |         'achter',
 25 |         'achterna',
 26 |         'af',
 27 |         'afgelopen',
 28 |         'al',
 29 |         'aldaar',
 30 |         'aldus',
 31 |         'alhoewel',
 32 |         'alias',
 33 |         'alle',
 34 |         'allebei',
 35 |         'alleen',
 36 |         'alles',
 37 |         'als',
 38 |         'alsnog',
 39 |         'altijd',
 40 |         'altoos',
 41 |         'ander',
 42 |         'andere',
 43 |         'anders',
 44 |         'anderszins',
 45 |         'beetje',
 46 |         'behalve',
 47 |         'behoudens',
 48 |         'beide',
 49 |         'beiden',
 50 |         'ben',
 51 |         'beneden',
 52 |         'bent',
 53 |         'bepaald',
 54 |         'betreffende',
 55 |         'bij',
 56 |         'bijna',
 57 |         'bijv',
 58 |         'binnen',
 59 |         'binnenin',
 60 |         'blijkbaar',
 61 |         'blijken',
 62 |         'boven',
 63 |         'bovenal',
 64 |         'bovendien',
 65 |         'bovengenoemd',
 66 |         'bovenstaand',
 67 |         'bovenvermeld',
 68 |         'buiten',
 69 |         'bv',
 70 |         'daar',
 71 |         'daardoor',
 72 |         'daarheen',
 73 |         'daarin',
 74 |         'daarna',
 75 |         'daarnet',
 76 |         'daarom',
 77 |         'daarop',
 78 |         'daaruit',
 79 |         'daarvanlangs',
 80 |         'dan',
 81 |         'dat',
 82 |         'de',
 83 |         'deden',
 84 |         'deed',
 85 |         'der',
 86 |         'derde',
 87 |         'derhalve',
 88 |         'dertig',
 89 |         'deze',
 90 |         'dhr',
 91 |         'die',
 92 |         'dikwijls',
 93 |         'dit',
 94 |         'doch',
 95 |         'doe',
 96 |         'doen',
 97 |         'doet',
 98 |         'door',
 99 |         'doorgaand',
100 |         'drie',
101 |         'duizend',
102 |         'dus',
103 |         'echter',
104 |         'een',
105 |         'eens',
106 |         'eer',
107 |         'eerdat',
108 |         'eerder',
109 |         'eerlang',
110 |         'eerst',
111 |         'eerste',
112 |         'eigen',
113 |         'eigenlijk',
114 |         'elk',
115 |         'elke',
116 |         'en',
117 |         'enig',
118 |         'enige',
119 |         'enigszins',
120 |         'enkel',
121 |         'er',
122 |         'erdoor',
123 |         'erg',
124 |         'ergens',
125 |         'etc',
126 |         'etcetera',
127 |         'even',
128 |         'eveneens',
129 |         'evenwel',
130 |         'gauw',
131 |         'ge',
132 |         'gedurende',
133 |         'geen',
134 |         'gehad',
135 |         'gekund',
136 |         'geleden',
137 |         'gelijk',
138 |         'gemoeten',
139 |         'gemogen',
140 |         'genoeg',
141 |         'geweest',
142 |         'gewoon',
143 |         'gewoonweg',
144 |         'haar',
145 |         'haarzelf',
146 |         'had',
147 |         'hadden',
148 |         'hare',
149 |         'heb',
150 |         'hebben',
151 |         'hebt',
152 |         'hedden',
153 |         'heeft',
154 |         'heel',
155 |         'hem',
156 |         'hemzelf',
157 |         'hen',
158 |         'het',
159 |         'hetzelfde',
160 |         'hier',
161 |         'hierbeneden',
162 |         'hierboven',
163 |         'hierin',
164 |         'hierna',
165 |         'hierom',
166 |         'hij',
167 |         'hijzelf',
168 |         'hoe',
169 |         'hoewel',
170 |         'honderd',
171 |         'hun',
172 |         'hunne',
173 |         'ieder',
174 |         'iedere',
175 |         'iedereen',
176 |         'iemand',
177 |         'iets',
178 |         'ik',
179 |         'ikzelf',
180 |         'in',
181 |         'inderdaad',
182 |         'inmiddels',
183 |         'intussen',
184 |         'inzake',
185 |         'is',
186 |         'ja',
187 |         'je',
188 |         'jezelf',
189 |         'jij',
190 |         'jijzelf',
191 |         'jou',
192 |         'jouw',
193 |         'jouwe',
194 |         'juist',
195 |         'jullie',
196 |         'kan',
197 |         'klaar',
198 |         'kon',
199 |         'konden',
200 |         'krachtens',
201 |         'kun',
202 |         'kunnen',
203 |         'kunt',
204 |         'laatst',
205 |         'later',
206 |         'liever',
207 |         'lijken',
208 |         'lijkt',
209 |         'maak',
210 |         'maakt',
211 |         'maakte',
212 |         'maakten',
213 |         'maar',
214 |         'mag',
215 |         'maken',
216 |         'me',
217 |         'meer',
218 |         'meest',
219 |         'meestal',
220 |         'men',
221 |         'met',
222 |         'mevr',
223 |         'mezelf',
224 |         'mij',
225 |         'mijn',
226 |         'mijnent',
227 |         'mijner',
228 |         'mijzelf',
229 |         'minder',
230 |         'miss',
231 |         'misschien',
232 |         'missen',
233 |         'mits',
234 |         'mocht',
235 |         'mochten',
236 |         'moest',
237 |         'moesten',
238 |         'moet',
239 |         'moeten',
240 |         'mogen',
241 |         'mr',
242 |         'mrs',
243 |         'mw',
244 |         'na',
245 |         'naar',
246 |         'nadat',
247 |         'nam',
248 |         'namelijk',
249 |         'nee',
250 |         'neem',
251 |         'negen',
252 |         'nemen',
253 |         'nergens',
254 |         'net',
255 |         'niemand',
256 |         'niet',
257 |         'niets',
258 |         'niks',
259 |         'noch',
260 |         'nochtans',
261 |         'nog',
262 |         'nogal',
263 |         'nooit',
264 |         'nu',
265 |         'nv',
266 |         'of',
267 |         'ofschoon',
268 |         'om',
269 |         'omdat',
270 |         'omhoog',
271 |         'omlaag',
272 |         'omstreeks',
273 |         'omtrent',
274 |         'omver',
275 |         'ondanks',
276 |         'onder',
277 |         'ondertussen',
278 |         'ongeveer',
279 |         'ons',
280 |         'onszelf',
281 |         'onze',
282 |         'onzeker',
283 |         'ooit',
284 |         'ook',
285 |         'op',
286 |         'opnieuw',
287 |         'opzij',
288 |         'over',
289 |         'overal',
290 |         'overeind',
291 |         'overige',
292 |         'overigens',
293 |         'paar',
294 |         'pas',
295 |         'per',
296 |         'precies',
297 |         'recent',
298 |         'redelijk',
299 |         'reeds',
300 |         'rond',
301 |         'rondom',
302 |         'samen',
303 |         'sedert',
304 |         'sinds',
305 |         'sindsdien',
306 |         'slechts',
307 |         'sommige',
308 |         'spoedig',
309 |         'steeds',
310 |         'tamelijk',
311 |         'te',
312 |         'tegen',
313 |         'tegenover',
314 |         'tenzij',
315 |         'terwijl',
316 |         'thans',
317 |         'tien',
318 |         'tiende',
319 |         'tijdens',
320 |         'tja',
321 |         'toch',
322 |         'toe',
323 |         'toen',
324 |         'toenmaals',
325 |         'toenmalig',
326 |         'tot',
327 |         'totdat',
328 |         'tussen',
329 |         'twee',
330 |         'tweede',
331 |         'u',
332 |         'uit',
333 |         'uitgezonderd',
334 |         'uw',
335 |         'vaak',
336 |         'vaakwat',
337 |         'van',
338 |         'vanaf',
339 |         'vandaan',
340 |         'vanuit',
341 |         'vanwege',
342 |         'veel',
343 |         'veeleer',
344 |         'veertig',
345 |         'verder',
346 |         'verscheidene',
347 |         'verschillende',
348 |         'vervolgens',
349 |         'via',
350 |         'vier',
351 |         'vierde',
352 |         'vijf',
353 |         'vijfde',
354 |         'vijftig',
355 |         'vol',
356 |         'volgend',
357 |         'volgens',
358 |         'voor',
359 |         'vooraf',
360 |         'vooral',
361 |         'vooralsnog',
362 |         'voorbij',
363 |         'voordat',
364 |         'voordezen',
365 |         'voordien',
366 |         'voorheen',
367 |         'voorop',
368 |         'voorts',
369 |         'vooruit',
370 |         'vrij',
371 |         'vroeg',
372 |         'waar',
373 |         'waarom',
374 |         'waarschijnlijk',
375 |         'wanneer',
376 |         'want',
377 |         'waren',
378 |         'was',
379 |         'wat',
380 |         'we',
381 |         'wederom',
382 |         'weer',
383 |         'weg',
384 |         'wegens',
385 |         'weinig',
386 |         'wel',
387 |         'weldra',
388 |         'welk',
389 |         'welke',
390 |         'werd',
391 |         'werden',
392 |         'werder',
393 |         'wezen',
394 |         'whatever',
395 |         'wie',
396 |         'wiens',
397 |         'wier',
398 |         'wij',
399 |         'wijzelf',
400 |         'wil',
401 |         'wilden',
402 |         'willen',
403 |         'word',
404 |         'worden',
405 |         'wordt',
406 |         'zal',
407 |         'ze',
408 |         'zei',
409 |         'zeker',
410 |         'zelf',
411 |         'zelfde',
412 |         'zelfs',
413 |         'zes',
414 |         'zeven',
415 |         'zich',
416 |         'zichzelf',
417 |         'zij',
418 |         'zijn',
419 |         'zijne',
420 |         'zijzelf',
421 |         'zo',
422 |         'zoals',
423 |         'zodat',
424 |         'zodra',
425 |         'zonder',
426 |         'zou',
427 |         'zouden',
428 |         'zowat',
429 |         'zulk',
430 |         'zulke',
431 |         'zullen',
432 |         'zult',
433 |     ];
434 | }
435 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/English.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool\StopWords;
 13 | 
 14 | /**
 15 |  * Class English
 16 |  *
 17 |  * @package PhpScience\TextRank\Tool\StopWords
 18 |  */
 19 | class English extends StopWordsAbstract
 20 | {
 21 |     /**
 22 |      * Stop words for avoid dummy keywords for Language English.
 23 |      *
 24 |      * @var array
 25 |      */
 26 |     protected $words = [
 27 |         'a',
 28 |         'about',
 29 |         'above',
 30 |         'above',
 31 |         'across',
 32 |         'after',
 33 |         'afterwards',
 34 |         'again',
 35 |         'against',
 36 |         'all',
 37 |         'almost',
 38 |         'alone',
 39 |         'along',
 40 |         'already',
 41 |         'also',
 42 |         'although',
 43 |         'always',
 44 |         'am',
 45 |         'among',
 46 |         'amongst',
 47 |         'amoungst',
 48 |         'amount',
 49 |         'an',
 50 |         'and',
 51 |         'another',
 52 |         'any',
 53 |         'anyhow',
 54 |         'anyone',
 55 |         'anything',
 56 |         'anyway',
 57 |         'anywhere',
 58 |         'are',
 59 |         'around',
 60 |         'as',
 61 |         'at',
 62 |         'back',
 63 |         'be',
 64 |         'became',
 65 |         'because',
 66 |         'become',
 67 |         'becomes',
 68 |         'becoming',
 69 |         'been',
 70 |         'before',
 71 |         'beforehand',
 72 |         'behind',
 73 |         'being',
 74 |         'below',
 75 |         'beside',
 76 |         'besides',
 77 |         'between',
 78 |         'beyond',
 79 |         'bill',
 80 |         'both',
 81 |         'bottom',
 82 |         'but',
 83 |         'by',
 84 |         'call',
 85 |         'can',
 86 |         'cannot',
 87 |         'cant',
 88 |         'co',
 89 |         'con',
 90 |         'could',
 91 |         'couldnt',
 92 |         'cry',
 93 |         'de',
 94 |         'describe',
 95 |         'detail',
 96 |         'do',
 97 |         'done',
 98 |         'down',
 99 |         'due',
100 |         'during',
101 |         'each',
102 |         'eg',
103 |         'eight',
104 |         'either',
105 |         'eleven',
106 |         'else',
107 |         'elsewhere',
108 |         'empty',
109 |         'enough',
110 |         'etc',
111 |         'even',
112 |         'ever',
113 |         'every',
114 |         'everyone',
115 |         'everything',
116 |         'everywhere',
117 |         'except',
118 |         'few',
119 |         'fifteen',
120 |         'fify',
121 |         'fill',
122 |         'find',
123 |         'fire',
124 |         'first',
125 |         'five',
126 |         'for',
127 |         'former',
128 |         'formerly',
129 |         'forty',
130 |         'found',
131 |         'four',
132 |         'from',
133 |         'front',
134 |         'full',
135 |         'further',
136 |         'get',
137 |         'give',
138 |         'go',
139 |         'had',
140 |         'has',
141 |         'hasnt',
142 |         'have',
143 |         'he',
144 |         'hence',
145 |         'her',
146 |         'here',
147 |         'hereafter',
148 |         'hereby',
149 |         'herein',
150 |         'hereupon',
151 |         'hers',
152 |         'herself',
153 |         'him',
154 |         'himself',
155 |         'his',
156 |         'how',
157 |         'however',
158 |         'hundred',
159 |         'ie',
160 |         'if',
161 |         'in',
162 |         'inc',
163 |         'indeed',
164 |         'interest',
165 |         'into',
166 |         'is',
167 |         'it',
168 |         'its',
169 |         'itself',
170 |         'keep',
171 |         'last',
172 |         'latter',
173 |         'latterly',
174 |         'least',
175 |         'less',
176 |         'ltd',
177 |         'made',
178 |         'many',
179 |         'may',
180 |         'me',
181 |         'meanwhile',
182 |         'might',
183 |         'mill',
184 |         'mine',
185 |         'more',
186 |         'moreover',
187 |         'most',
188 |         'mostly',
189 |         'move',
190 |         'much',
191 |         'must',
192 |         'my',
193 |         'myself',
194 |         'name',
195 |         'namely',
196 |         'neither',
197 |         'never',
198 |         'nevertheless',
199 |         'next',
200 |         'nine',
201 |         'no',
202 |         'nobody',
203 |         'none',
204 |         'noone',
205 |         'nor',
206 |         'not',
207 |         'nothing',
208 |         'now',
209 |         'nowhere',
210 |         'of',
211 |         'off',
212 |         'often',
213 |         'on',
214 |         'once',
215 |         'one',
216 |         'only',
217 |         'onto',
218 |         'or',
219 |         'other',
220 |         'others',
221 |         'otherwise',
222 |         'our',
223 |         'ours',
224 |         'ourselves',
225 |         'out',
226 |         'over',
227 |         'own',
228 |         'part',
229 |         'per',
230 |         'perhaps',
231 |         'please',
232 |         'put',
233 |         'rather',
234 |         're',
235 |         'same',
236 |         'see',
237 |         'seem',
238 |         'seemed',
239 |         'seeming',
240 |         'seems',
241 |         'serious',
242 |         'several',
243 |         'she',
244 |         'should',
245 |         'show',
246 |         'side',
247 |         'since',
248 |         'sincere',
249 |         'six',
250 |         'sixty',
251 |         'so',
252 |         'some',
253 |         'somehow',
254 |         'someone',
255 |         'something',
256 |         'sometime',
257 |         'sometimes',
258 |         'somewhere',
259 |         'still',
260 |         'such',
261 |         'system',
262 |         'take',
263 |         'ten',
264 |         'than',
265 |         'that',
266 |         'the',
267 |         'their',
268 |         'them',
269 |         'themselves',
270 |         'then',
271 |         'thence',
272 |         'there',
273 |         'thereafter',
274 |         'thereby',
275 |         'therefore',
276 |         'therein',
277 |         'thereupon',
278 |         'these',
279 |         'they',
280 |         'thickv',
281 |         'thin',
282 |         'third',
283 |         'this',
284 |         'those',
285 |         'though',
286 |         'three',
287 |         'through',
288 |         'throughout',
289 |         'thru',
290 |         'thus',
291 |         'to',
292 |         'together',
293 |         'too',
294 |         'top',
295 |         'toward',
296 |         'towards',
297 |         'twelve',
298 |         'twenty',
299 |         'two',
300 |         'un',
301 |         'under',
302 |         'until',
303 |         'up',
304 |         'upon',
305 |         'us',
306 |         'very',
307 |         'via',
308 |         'was',
309 |         'we',
310 |         'well',
311 |         'were',
312 |         'what',
313 |         'whatever',
314 |         'when',
315 |         'whence',
316 |         'whenever',
317 |         'where',
318 |         'whereafter',
319 |         'whereas',
320 |         'whereby',
321 |         'wherein',
322 |         'whereupon',
323 |         'wherever',
324 |         'whether',
325 |         'which',
326 |         'while',
327 |         'whither',
328 |         'who',
329 |         'whoever',
330 |         'whole',
331 |         'whom',
332 |         'whose',
333 |         'why',
334 |         'will',
335 |         'with',
336 |         'within',
337 |         'without',
338 |         'would',
339 |         'yet',
340 |         'you',
341 |         'your',
342 |         'yours',
343 |         'yourself',
344 |         'yourselves'
345 |     ];
346 | }
347 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/French.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  Syndesi <github.com/Syndesi>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool\StopWords;
 13 | 
 14 | /**
 15 |  * Class French
 16 |  *
 17 |  * @package PhpScience\TextRank\Tool\StopWords
 18 |  */
 19 | class French extends StopWordsAbstract
 20 | {
 21 |     /**
 22 |      * Stop words for avoid dummy keywords for Language French.
 23 |      * Source: https://github.com/stopwords-iso/stopwords-fr
 24 |      *
 25 |      * @var array
 26 |      */
 27 |     protected $words = [
 28 |         'a',
 29 |         'abord',
 30 |         'absolument',
 31 |         'afin',
 32 |         'ah',
 33 |         'ai',
 34 |         'aie',
 35 |         'aient',
 36 |         'aies',
 37 |         'ailleurs',
 38 |         'ainsi',
 39 |         'ait',
 40 |         'allaient',
 41 |         'allo',
 42 |         'allons',
 43 |         'allô',
 44 |         'alors',
 45 |         'anterieur',
 46 |         'anterieure',
 47 |         'anterieures',
 48 |         'apres',
 49 |         'après',
 50 |         'as',
 51 |         'assez',
 52 |         'attendu',
 53 |         'au',
 54 |         'aucun',
 55 |         'aucune',
 56 |         'aucuns',
 57 |         'aujourd',
 58 |         'aujourd\'hui',
 59 |         'aupres',
 60 |         'auquel',
 61 |         'aura',
 62 |         'aurai',
 63 |         'auraient',
 64 |         'aurais',
 65 |         'aurait',
 66 |         'auras',
 67 |         'aurez',
 68 |         'auriez',
 69 |         'aurions',
 70 |         'aurons',
 71 |         'auront',
 72 |         'aussi',
 73 |         'autre',
 74 |         'autrefois',
 75 |         'autrement',
 76 |         'autres',
 77 |         'autrui',
 78 |         'aux',
 79 |         'auxquelles',
 80 |         'auxquels',
 81 |         'avaient',
 82 |         'avais',
 83 |         'avait',
 84 |         'avant',
 85 |         'avec',
 86 |         'avez',
 87 |         'aviez',
 88 |         'avions',
 89 |         'avoir',
 90 |         'avons',
 91 |         'ayant',
 92 |         'ayez',
 93 |         'ayons',
 94 |         'b',
 95 |         'bah',
 96 |         'bas',
 97 |         'basee',
 98 |         'bat',
 99 |         'beau',
100 |         'beaucoup',
101 |         'bien',
102 |         'bigre',
103 |         'bon',
104 |         'boum',
105 |         'bravo',
106 |         'brrr',
107 |         'c',
108 |         'car',
109 |         'ce',
110 |         'ceci',
111 |         'cela',
112 |         'celle',
113 |         'celle-ci',
114 |         'celle-là',
115 |         'celles',
116 |         'celles-ci',
117 |         'celles-là',
118 |         'celui',
119 |         'celui-ci',
120 |         'celui-là',
121 |         'celà',
122 |         'cent',
123 |         'cependant',
124 |         'certain',
125 |         'certaine',
126 |         'certaines',
127 |         'certains',
128 |         'certes',
129 |         'ces',
130 |         'cet',
131 |         'cette',
132 |         'ceux',
133 |         'ceux-ci',
134 |         'ceux-là',
135 |         'chacun',
136 |         'chacune',
137 |         'chaque',
138 |         'cher',
139 |         'chers',
140 |         'chez',
141 |         'chiche',
142 |         'chut',
143 |         'chère',
144 |         'chères',
145 |         'ci',
146 |         'cinq',
147 |         'cinquantaine',
148 |         'cinquante',
149 |         'cinquantième',
150 |         'cinquième',
151 |         'clac',
152 |         'clic',
153 |         'combien',
154 |         'comme',
155 |         'comment',
156 |         'comparable',
157 |         'comparables',
158 |         'compris',
159 |         'concernant',
160 |         'contre',
161 |         'couic',
162 |         'crac',
163 |         'd',
164 |         'da',
165 |         'dans',
166 |         'de',
167 |         'debout',
168 |         'dedans',
169 |         'dehors',
170 |         'deja',
171 |         'delà',
172 |         'depuis',
173 |         'dernier',
174 |         'derniere',
175 |         'derriere',
176 |         'derrière',
177 |         'des',
178 |         'desormais',
179 |         'desquelles',
180 |         'desquels',
181 |         'dessous',
182 |         'dessus',
183 |         'deux',
184 |         'deuxième',
185 |         'deuxièmement',
186 |         'devant',
187 |         'devers',
188 |         'devra',
189 |         'devrait',
190 |         'different',
191 |         'differentes',
192 |         'differents',
193 |         'différent',
194 |         'différente',
195 |         'différentes',
196 |         'différents',
197 |         'dire',
198 |         'directe',
199 |         'directement',
200 |         'dit',
201 |         'dite',
202 |         'dits',
203 |         'divers',
204 |         'diverse',
205 |         'diverses',
206 |         'dix',
207 |         'dix-huit',
208 |         'dix-neuf',
209 |         'dix-sept',
210 |         'dixième',
211 |         'doit',
212 |         'doivent',
213 |         'donc',
214 |         'dont',
215 |         'dos',
216 |         'douze',
217 |         'douzième',
218 |         'dring',
219 |         'droite',
220 |         'du',
221 |         'duquel',
222 |         'durant',
223 |         'dès',
224 |         'début',
225 |         'désormais',
226 |         'e',
227 |         'effet',
228 |         'egale',
229 |         'egalement',
230 |         'egales',
231 |         'eh',
232 |         'elle',
233 |         'elle-même',
234 |         'elles',
235 |         'elles-mêmes',
236 |         'en',
237 |         'encore',
238 |         'enfin',
239 |         'entre',
240 |         'envers',
241 |         'environ',
242 |         'es',
243 |         'essai',
244 |         'est',
245 |         'et',
246 |         'etant',
247 |         'etc',
248 |         'etre',
249 |         'eu',
250 |         'eue',
251 |         'eues',
252 |         'euh',
253 |         'eurent',
254 |         'eus',
255 |         'eusse',
256 |         'eussent',
257 |         'eusses',
258 |         'eussiez',
259 |         'eussions',
260 |         'eut',
261 |         'eux',
262 |         'eux-mêmes',
263 |         'exactement',
264 |         'excepté',
265 |         'extenso',
266 |         'exterieur',
267 |         'eûmes',
268 |         'eût',
269 |         'eûtes',
270 |         'f',
271 |         'fais',
272 |         'faisaient',
273 |         'faisant',
274 |         'fait',
275 |         'faites',
276 |         'façon',
277 |         'feront',
278 |         'fi',
279 |         'flac',
280 |         'floc',
281 |         'fois',
282 |         'font',
283 |         'force',
284 |         'furent',
285 |         'fus',
286 |         'fusse',
287 |         'fussent',
288 |         'fusses',
289 |         'fussiez',
290 |         'fussions',
291 |         'fut',
292 |         'fûmes',
293 |         'fût',
294 |         'fûtes',
295 |         'g',
296 |         'gens',
297 |         'h',
298 |         'ha',
299 |         'haut',
300 |         'hein',
301 |         'hem',
302 |         'hep',
303 |         'hi',
304 |         'ho',
305 |         'holà',
306 |         'hop',
307 |         'hormis',
308 |         'hors',
309 |         'hou',
310 |         'houp',
311 |         'hue',
312 |         'hui',
313 |         'huit',
314 |         'huitième',
315 |         'hum',
316 |         'hurrah',
317 |         'hé',
318 |         'hélas',
319 |         'i',
320 |         'ici',
321 |         'il',
322 |         'ils',
323 |         'importe',
324 |         'j',
325 |         'je',
326 |         'jusqu',
327 |         'jusque',
328 |         'juste',
329 |         'k',
330 |         'l',
331 |         'la',
332 |         'laisser',
333 |         'laquelle',
334 |         'las',
335 |         'le',
336 |         'lequel',
337 |         'les',
338 |         'lesquelles',
339 |         'lesquels',
340 |         'leur',
341 |         'leurs',
342 |         'longtemps',
343 |         'lors',
344 |         'lorsque',
345 |         'lui',
346 |         'lui-meme',
347 |         'lui-même',
348 |         'là',
349 |         'lès',
350 |         'm',
351 |         'ma',
352 |         'maint',
353 |         'maintenant',
354 |         'mais',
355 |         'malgre',
356 |         'malgré',
357 |         'maximale',
358 |         'me',
359 |         'meme',
360 |         'memes',
361 |         'merci',
362 |         'mes',
363 |         'mien',
364 |         'mienne',
365 |         'miennes',
366 |         'miens',
367 |         'mille',
368 |         'mince',
369 |         'mine',
370 |         'minimale',
371 |         'moi',
372 |         'moi-meme',
373 |         'moi-même',
374 |         'moindres',
375 |         'moins',
376 |         'mon',
377 |         'mot',
378 |         'moyennant',
379 |         'multiple',
380 |         'multiples',
381 |         'même',
382 |         'mêmes',
383 |         'n',
384 |         'na',
385 |         'naturel',
386 |         'naturelle',
387 |         'naturelles',
388 |         'ne',
389 |         'neanmoins',
390 |         'necessaire',
391 |         'necessairement',
392 |         'neuf',
393 |         'neuvième',
394 |         'ni',
395 |         'nombreuses',
396 |         'nombreux',
397 |         'nommés',
398 |         'non',
399 |         'nos',
400 |         'notamment',
401 |         'notre',
402 |         'nous',
403 |         'nous-mêmes',
404 |         'nouveau',
405 |         'nouveaux',
406 |         'nul',
407 |         'néanmoins',
408 |         'nôtre',
409 |         'nôtres',
410 |         'o',
411 |         'oh',
412 |         'ohé',
413 |         'ollé',
414 |         'olé',
415 |         'on',
416 |         'ont',
417 |         'onze',
418 |         'onzième',
419 |         'ore',
420 |         'ou',
421 |         'ouf',
422 |         'ouias',
423 |         'oust',
424 |         'ouste',
425 |         'outre',
426 |         'ouvert',
427 |         'ouverte',
428 |         'ouverts',
429 |         'o|',
430 |         'où',
431 |         'p',
432 |         'paf',
433 |         'pan',
434 |         'par',
435 |         'parce',
436 |         'parfois',
437 |         'parle',
438 |         'parlent',
439 |         'parler',
440 |         'parmi',
441 |         'parole',
442 |         'parseme',
443 |         'partant',
444 |         'particulier',
445 |         'particulière',
446 |         'particulièrement',
447 |         'pas',
448 |         'passé',
449 |         'pendant',
450 |         'pense',
451 |         'permet',
452 |         'personne',
453 |         'personnes',
454 |         'peu',
455 |         'peut',
456 |         'peuvent',
457 |         'peux',
458 |         'pff',
459 |         'pfft',
460 |         'pfut',
461 |         'pif',
462 |         'pire',
463 |         'pièce',
464 |         'plein',
465 |         'plouf',
466 |         'plupart',
467 |         'plus',
468 |         'plusieurs',
469 |         'plutôt',
470 |         'possessif',
471 |         'possessifs',
472 |         'possible',
473 |         'possibles',
474 |         'pouah',
475 |         'pour',
476 |         'pourquoi',
477 |         'pourrais',
478 |         'pourrait',
479 |         'pouvait',
480 |         'prealable',
481 |         'precisement',
482 |         'premier',
483 |         'première',
484 |         'premièrement',
485 |         'pres',
486 |         'probable',
487 |         'probante',
488 |         'procedant',
489 |         'proche',
490 |         'près',
491 |         'psitt',
492 |         'pu',
493 |         'puis',
494 |         'puisque',
495 |         'pur',
496 |         'pure',
497 |         'q',
498 |         'qu',
499 |         'quand',
500 |         'quant',
501 |         'quant-à-soi',
502 |         'quanta',
503 |         'quarante',
504 |         'quatorze',
505 |         'quatre',
506 |         'quatre-vingt',
507 |         'quatrième',
508 |         'quatrièmement',
509 |         'que',
510 |         'quel',
511 |         'quelconque',
512 |         'quelle',
513 |         'quelles',
514 |         'quelqu\'un',
515 |         'quelque',
516 |         'quelques',
517 |         'quels',
518 |         'qui',
519 |         'quiconque',
520 |         'quinze',
521 |         'quoi',
522 |         'quoique',
523 |         'r',
524 |         'rare',
525 |         'rarement',
526 |         'rares',
527 |         'relative',
528 |         'relativement',
529 |         'remarquable',
530 |         'rend',
531 |         'rendre',
532 |         'restant',
533 |         'reste',
534 |         'restent',
535 |         'restrictif',
536 |         'retour',
537 |         'revoici',
538 |         'revoilà',
539 |         'rien',
540 |         's',
541 |         'sa',
542 |         'sacrebleu',
543 |         'sait',
544 |         'sans',
545 |         'sapristi',
546 |         'sauf',
547 |         'se',
548 |         'sein',
549 |         'seize',
550 |         'selon',
551 |         'semblable',
552 |         'semblaient',
553 |         'semble',
554 |         'semblent',
555 |         'sent',
556 |         'sept',
557 |         'septième',
558 |         'sera',
559 |         'serai',
560 |         'seraient',
561 |         'serais',
562 |         'serait',
563 |         'seras',
564 |         'serez',
565 |         'seriez',
566 |         'serions',
567 |         'serons',
568 |         'seront',
569 |         'ses',
570 |         'seul',
571 |         'seule',
572 |         'seulement',
573 |         'si',
574 |         'sien',
575 |         'sienne',
576 |         'siennes',
577 |         'siens',
578 |         'sinon',
579 |         'six',
580 |         'sixième',
581 |         'soi',
582 |         'soi-même',
583 |         'soient',
584 |         'sois',
585 |         'soit',
586 |         'soixante',
587 |         'sommes',
588 |         'son',
589 |         'sont',
590 |         'sous',
591 |         'souvent',
592 |         'soyez',
593 |         'soyons',
594 |         'specifique',
595 |         'specifiques',
596 |         'speculatif',
597 |         'stop',
598 |         'strictement',
599 |         'subtiles',
600 |         'suffisant',
601 |         'suffisante',
602 |         'suffit',
603 |         'suis',
604 |         'suit',
605 |         'suivant',
606 |         'suivante',
607 |         'suivantes',
608 |         'suivants',
609 |         'suivre',
610 |         'sujet',
611 |         'superpose',
612 |         'sur',
613 |         'surtout',
614 |         't',
615 |         'ta',
616 |         'tac',
617 |         'tandis',
618 |         'tant',
619 |         'tardive',
620 |         'te',
621 |         'tel',
622 |         'telle',
623 |         'tellement',
624 |         'telles',
625 |         'tels',
626 |         'tenant',
627 |         'tend',
628 |         'tenir',
629 |         'tente',
630 |         'tes',
631 |         'tic',
632 |         'tien',
633 |         'tienne',
634 |         'tiennes',
635 |         'tiens',
636 |         'toc',
637 |         'toi',
638 |         'toi-même',
639 |         'ton',
640 |         'touchant',
641 |         'toujours',
642 |         'tous',
643 |         'tout',
644 |         'toute',
645 |         'toutefois',
646 |         'toutes',
647 |         'treize',
648 |         'trente',
649 |         'tres',
650 |         'trois',
651 |         'troisième',
652 |         'troisièmement',
653 |         'trop',
654 |         'très',
655 |         'tsoin',
656 |         'tsouin',
657 |         'tu',
658 |         'té',
659 |         'u',
660 |         'un',
661 |         'une',
662 |         'unes',
663 |         'uniformement',
664 |         'unique',
665 |         'uniques',
666 |         'uns',
667 |         'v',
668 |         'va',
669 |         'vais',
670 |         'valeur',
671 |         'vas',
672 |         'vers',
673 |         'via',
674 |         'vif',
675 |         'vifs',
676 |         'vingt',
677 |         'vivat',
678 |         'vive',
679 |         'vives',
680 |         'vlan',
681 |         'voici',
682 |         'voie',
683 |         'voient',
684 |         'voilà',
685 |         'vont',
686 |         'vos',
687 |         'votre',
688 |         'vous',
689 |         'vous-mêmes',
690 |         'vu',
691 |         'vé',
692 |         'vôtre',
693 |         'vôtres',
694 |         'w',
695 |         'x',
696 |         'y',
697 |         'z',
698 |         'zut',
699 |         'à',
700 |         'â',
701 |         'ça',
702 |         'ès',
703 |         'étaient',
704 |         'étais',
705 |         'était',
706 |         'étant',
707 |         'état',
708 |         'étiez',
709 |         'étions',
710 |         'été',
711 |         'étée',
712 |         'étées',
713 |         'étés',
714 |         'êtes',
715 |         'être',
716 |         'ô'
717 |     ];
718 | }
719 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/German.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  Syndesi <github.com/Syndesi>
  8 |  */
  9 | declare(strict_types=1);
 10 | namespace PhpScience\TextRank\Tool\StopWords;
 11 | /**
 12 |  * Class German
 13 |  *
 14 |  * @package PhpScience\TextRank\Tool\StopWords
 15 |  */
 16 | class German extends StopWordsAbstract
 17 | {
 18 |     /**
 19 |      * Stop words for avoid dummy keywords for Language German.
 20 |      * Word list created by Marco Götze, Steffen Geyer.
 21 |      * Source: https://solariz.de/de/downloads/6/german-enhanced-stopwords.htm
 22 |      *
 23 |      * @var array
 24 |      */
 25 |     protected $words = [
 26 |         'ab',
 27 |         'aber',
 28 |         'alle',
 29 |         'allein',
 30 |         'allem',
 31 |         'allen',
 32 |         'aller',
 33 |         'allerdings',
 34 |         'allerlei',
 35 |         'alles',
 36 |         'allmählich',
 37 |         'allzu',
 38 |         'als',
 39 |         'alsbald',
 40 |         'also',
 41 |         'am',
 42 |         'an',
 43 |         'and',
 44 |         'ander',
 45 |         'andere',
 46 |         'anderem',
 47 |         'anderen',
 48 |         'anderer',
 49 |         'andererseits',
 50 |         'anderes',
 51 |         'anderm',
 52 |         'andern',
 53 |         'andernfalls',
 54 |         'anders',
 55 |         'anstatt',
 56 |         'auch',
 57 |         'auf',
 58 |         'aus',
 59 |         'ausgenommen',
 60 |         'ausser',
 61 |         'ausserdem',
 62 |         'außer',
 63 |         'außerdem',
 64 |         'außerhalb',
 65 |         'bald',
 66 |         'bei',
 67 |         'beide',
 68 |         'beiden',
 69 |         'beiderlei',
 70 |         'beides',
 71 |         'beim',
 72 |         'beinahe',
 73 |         'bereits',
 74 |         'besonders',
 75 |         'besser',
 76 |         'beträchtlich',
 77 |         'bevor',
 78 |         'bezüglich',
 79 |         'bin',
 80 |         'bis',
 81 |         'bisher',
 82 |         'bislang',
 83 |         'bist',
 84 |         'bloß',
 85 |         'bsp.',
 86 |         'bzw',
 87 |         'ca',
 88 |         'ca.',
 89 |         'content',
 90 |         'da',
 91 |         'dabei',
 92 |         'dadurch',
 93 |         'dafür',
 94 |         'dagegen',
 95 |         'daher',
 96 |         'dahin',
 97 |         'damals',
 98 |         'damit',
 99 |         'danach',
100 |         'daneben',
101 |         'dann',
102 |         'daran',
103 |         'darauf',
104 |         'daraus',
105 |         'darin',
106 |         'darum',
107 |         'darunter',
108 |         'darüber',
109 |         'darüberhinaus',
110 |         'das',
111 |         'dass',
112 |         'dasselbe',
113 |         'davon',
114 |         'davor',
115 |         'dazu',
116 |         'daß',
117 |         'dein',
118 |         'deine',
119 |         'deinem',
120 |         'deinen',
121 |         'deiner',
122 |         'deines',
123 |         'dem',
124 |         'demnach',
125 |         'demselben',
126 |         'den',
127 |         'denen',
128 |         'denn',
129 |         'dennoch',
130 |         'denselben',
131 |         'der',
132 |         'derart',
133 |         'derartig',
134 |         'derem',
135 |         'deren',
136 |         'derer',
137 |         'derjenige',
138 |         'derjenigen',
139 |         'derselbe',
140 |         'derselben',
141 |         'derzeit',
142 |         'des',
143 |         'deshalb',
144 |         'desselben',
145 |         'dessen',
146 |         'desto',
147 |         'deswegen',
148 |         'dich',
149 |         'die',
150 |         'diejenige',
151 |         'dies',
152 |         'diese',
153 |         'dieselbe',
154 |         'dieselben',
155 |         'diesem',
156 |         'diesen',
157 |         'dieser',
158 |         'dieses',
159 |         'diesseits',
160 |         'dir',
161 |         'direkt',
162 |         'direkte',
163 |         'direkten',
164 |         'direkter',
165 |         'doch',
166 |         'dort',
167 |         'dorther',
168 |         'dorthin',
169 |         'drauf',
170 |         'drin',
171 |         'drunter',
172 |         'drüber',
173 |         'du',
174 |         'dunklen',
175 |         'durch',
176 |         'durchaus',
177 |         'eben',
178 |         'ebenfalls',
179 |         'ebenso',
180 |         'eher',
181 |         'eigenen',
182 |         'eigenes',
183 |         'eigentlich',
184 |         'ein',
185 |         'eine',
186 |         'einem',
187 |         'einen',
188 |         'einer',
189 |         'einerseits',
190 |         'eines',
191 |         'einfach',
192 |         'einführen',
193 |         'einführte',
194 |         'einführten',
195 |         'eingesetzt',
196 |         'einig',
197 |         'einige',
198 |         'einigem',
199 |         'einigen',
200 |         'einiger',
201 |         'einigermaßen',
202 |         'einiges',
203 |         'einmal',
204 |         'eins',
205 |         'einseitig',
206 |         'einseitige',
207 |         'einseitigen',
208 |         'einseitiger',
209 |         'einst',
210 |         'einstmals',
211 |         'einzig',
212 |         'entsprechend',
213 |         'entweder',
214 |         'er',
215 |         'erst',
216 |         'es',
217 |         'etc',
218 |         'etliche',
219 |         'etwa',
220 |         'etwas',
221 |         'euch',
222 |         'euer',
223 |         'eure',
224 |         'eurem',
225 |         'euren',
226 |         'eurer',
227 |         'eures',
228 |         'falls',
229 |         'fast',
230 |         'ferner',
231 |         'folgende',
232 |         'folgenden',
233 |         'folgender',
234 |         'folgendes',
235 |         'folglich',
236 |         'fuer',
237 |         'für',
238 |         'gab',
239 |         'ganze',
240 |         'ganzem',
241 |         'ganzen',
242 |         'ganzer',
243 |         'ganzes',
244 |         'gar',
245 |         'gegen',
246 |         'gemäss',
247 |         'ggf',
248 |         'gleich',
249 |         'gleichwohl',
250 |         'gleichzeitig',
251 |         'glücklicherweise',
252 |         'gänzlich',
253 |         'hab',
254 |         'habe',
255 |         'haben',
256 |         'haette',
257 |         'hast',
258 |         'hat',
259 |         'hatte',
260 |         'hatten',
261 |         'hattest',
262 |         'hattet',
263 |         'heraus',
264 |         'herein',
265 |         'hier',
266 |         'hier',
267 |         'hinter',
268 |         'hiermit',
269 |         'hiesige',
270 |         'hin',
271 |         'hinein',
272 |         'hinten',
273 |         'hinter',
274 |         'hinterher',
275 |         'http',
276 |         'hätt',
277 |         'hätte',
278 |         'hätten',
279 |         'höchstens',
280 |         'ich',
281 |         'igitt',
282 |         'ihm',
283 |         'ihn',
284 |         'ihnen',
285 |         'ihr',
286 |         'ihre',
287 |         'ihrem',
288 |         'ihren',
289 |         'ihrer',
290 |         'ihres',
291 |         'im',
292 |         'immer',
293 |         'immerhin',
294 |         'in',
295 |         'indem',
296 |         'indessen',
297 |         'infolge',
298 |         'innen',
299 |         'innerhalb',
300 |         'ins',
301 |         'insofern',
302 |         'inzwischen',
303 |         'irgend',
304 |         'irgendeine',
305 |         'irgendwas',
306 |         'irgendwen',
307 |         'irgendwer',
308 |         'irgendwie',
309 |         'irgendwo',
310 |         'ist',
311 |         'ja',
312 |         'je',
313 |         'jed',
314 |         'jede',
315 |         'jedem',
316 |         'jeden',
317 |         'jedenfalls',
318 |         'jeder',
319 |         'jederlei',
320 |         'jedes',
321 |         'jedoch',
322 |         'jemand',
323 |         'jene',
324 |         'jenem',
325 |         'jenen',
326 |         'jener',
327 |         'jenes',
328 |         'jenseits',
329 |         'jetzt',
330 |         'jährig',
331 |         'jährige',
332 |         'jährigen',
333 |         'jähriges',
334 |         'kam',
335 |         'kann',
336 |         'kannst',
337 |         'kaum',
338 |         'kein',
339 |         'keine',
340 |         'keinem',
341 |         'keinen',
342 |         'keiner',
343 |         'keinerlei',
344 |         'keines',
345 |         'keineswegs',
346 |         'klar',
347 |         'klare',
348 |         'klaren',
349 |         'klares',
350 |         'klein',
351 |         'kleinen',
352 |         'kleiner',
353 |         'kleines',
354 |         'koennen',
355 |         'koennt',
356 |         'koennte',
357 |         'koennten',
358 |         'komme',
359 |         'kommen',
360 |         'kommt',
361 |         'konkret',
362 |         'konkrete',
363 |         'konkreten',
364 |         'konkreter',
365 |         'konkretes',
366 |         'können',
367 |         'könnt',
368 |         'künftig',
369 |         'leider',
370 |         'machen',
371 |         'man',
372 |         'manche',
373 |         'manchem',
374 |         'manchen',
375 |         'mancher',
376 |         'mancherorts',
377 |         'manches',
378 |         'manchmal',
379 |         'mehr',
380 |         'mehrere',
381 |         'mein',
382 |         'meine',
383 |         'meinem',
384 |         'meinen',
385 |         'meiner',
386 |         'meines',
387 |         'mich',
388 |         'mir',
389 |         'mit',
390 |         'mithin',
391 |         'muessen',
392 |         'muesst',
393 |         'muesste',
394 |         'muss',
395 |         'musst',
396 |         'musste',
397 |         'mussten',
398 |         'muß',
399 |         'mußt',
400 |         'müssen',
401 |         'müsste',
402 |         'müssten',
403 |         'müßt',
404 |         'müßte',
405 |         'nach',
406 |         'nachdem',
407 |         'nachher',
408 |         'nachhinein',
409 |         'nahm',
410 |         'natürlich',
411 |         'neben',
412 |         'nebenan',
413 |         'nehmen',
414 |         'nein',
415 |         'nicht',
416 |         'nichts',
417 |         'nie',
418 |         'niemals',
419 |         'niemand',
420 |         'nirgends',
421 |         'nirgendwo',
422 |         'noch',
423 |         'nun',
424 |         'nur',
425 |         'nächste',
426 |         'nämlich',
427 |         'nötigenfalls',
428 |         'ob',
429 |         'oben',
430 |         'oberhalb',
431 |         'obgleich',
432 |         'obschon',
433 |         'obwohl',
434 |         'oder',
435 |         'oft',
436 |         'per',
437 |         'plötzlich',
438 |         'schließlich',
439 |         'schon',
440 |         'sehr',
441 |         'sehrwohl',
442 |         'seid',
443 |         'sein',
444 |         'seine',
445 |         'seinem',
446 |         'seinen',
447 |         'seiner',
448 |         'seines',
449 |         'seit',
450 |         'seitdem',
451 |         'seither',
452 |         'selber',
453 |         'selbst',
454 |         'sich',
455 |         'sicher',
456 |         'sicherlich',
457 |         'sie',
458 |         'sind',
459 |         'so',
460 |         'sobald',
461 |         'sodass',
462 |         'sodaß',
463 |         'soeben',
464 |         'sofern',
465 |         'sofort',
466 |         'sogar',
467 |         'solange',
468 |         'solch',
469 |         'solche',
470 |         'solchem',
471 |         'solchen',
472 |         'solcher',
473 |         'solches',
474 |         'soll',
475 |         'sollen',
476 |         'sollst',
477 |         'sollt',
478 |         'sollte',
479 |         'sollten',
480 |         'solltest',
481 |         'somit',
482 |         'sondern',
483 |         'sonst',
484 |         'sonstwo',
485 |         'sooft',
486 |         'soviel',
487 |         'soweit',
488 |         'sowie',
489 |         'sowohl',
490 |         'tatsächlich',
491 |         'tatsächlichen',
492 |         'tatsächlicher',
493 |         'tatsächliches',
494 |         'trotzdem',
495 |         'ueber',
496 |         'um',
497 |         'umso',
498 |         'unbedingt',
499 |         'und',
500 |         'unmöglich',
501 |         'unmögliche',
502 |         'unmöglichen',
503 |         'unmöglicher',
504 |         'uns',
505 |         'unser',
506 |         'unser',
507 |         'unsere',
508 |         'unsere',
509 |         'unserem',
510 |         'unseren',
511 |         'unserer',
512 |         'unseres',
513 |         'unter',
514 |         'usw',
515 |         'viel',
516 |         'viele',
517 |         'vielen',
518 |         'vieler',
519 |         'vieles',
520 |         'vielleicht',
521 |         'vielmals',
522 |         'vom',
523 |         'von',
524 |         'vor',
525 |         'voran',
526 |         'vorher',
527 |         'vorüber',
528 |         'völlig',
529 |         'wann',
530 |         'war',
531 |         'waren',
532 |         'warst',
533 |         'warum',
534 |         'was',
535 |         'weder',
536 |         'weil',
537 |         'weiter',
538 |         'weitere',
539 |         'weiterem',
540 |         'weiteren',
541 |         'weiterer',
542 |         'weiteres',
543 |         'weiterhin',
544 |         'weiß',
545 |         'welche',
546 |         'welchem',
547 |         'welchen',
548 |         'welcher',
549 |         'welches',
550 |         'wem',
551 |         'wen',
552 |         'wenig',
553 |         'wenige',
554 |         'weniger',
555 |         'wenigstens',
556 |         'wenn',
557 |         'wenngleich',
558 |         'wer',
559 |         'werde',
560 |         'werden',
561 |         'werdet',
562 |         'weshalb',
563 |         'wessen',
564 |         'wichtig',
565 |         'wie',
566 |         'wieder',
567 |         'wieso',
568 |         'wieviel',
569 |         'wiewohl',
570 |         'will',
571 |         'willst',
572 |         'wir',
573 |         'wird',
574 |         'wirklich',
575 |         'wirst',
576 |         'wo',
577 |         'wodurch',
578 |         'wogegen',
579 |         'woher',
580 |         'wohin',
581 |         'wohingegen',
582 |         'wohl',
583 |         'wohlweislich',
584 |         'womit',
585 |         'woraufhin',
586 |         'woraus',
587 |         'worin',
588 |         'wurde',
589 |         'wurden',
590 |         'während',
591 |         'währenddessen',
592 |         'wär',
593 |         'wäre',
594 |         'wären',
595 |         'würde',
596 |         'würden',
597 |         'z.B.',
598 |         'zB',
599 |         'zahlreich',
600 |         'zeitweise',
601 |         'zu',
602 |         'zudem',
603 |         'zuerst',
604 |         'zufolge',
605 |         'zugleich',
606 |         'zuletzt',
607 |         'zum',
608 |         'zumal',
609 |         'zur',
610 |         'zurück',
611 |         'zusammen',
612 |         'zuviel',
613 |         'zwar',
614 |         'zwischen',
615 |         'ähnlich',
616 |         'übel',
617 |         'über',
618 |         'überall',
619 |         'überallhin',
620 |         'überdies',
621 |         'übermorgen',
622 |         'übrig',
623 |         'übrigens'
624 |     ];
625 | }


--------------------------------------------------------------------------------
/src/Tool/StopWords/Indonesian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | declare(strict_types=1);
  4 | 
  5 | namespace PhpScience\TextRank\Tool\StopWords;
  6 | 
  7 | /**
  8 |  * Class Indonesian
  9 |  *
 10 |  * @package PhpScience\TextRank\Tool\StopWords
 11 |  */
 12 | class Indonesian extends StopWordsAbstract
 13 | {
 14 |     /**
 15 |      * Stop words for avoid dummy keywords for Language Indonesian.
 16 |      *
 17 |      * @var array
 18 |      */
 19 |     protected $words = [
 20 |         'ada',
 21 |         'adalah',
 22 |         'adanya',
 23 |         'adapun',
 24 |         'agak',
 25 |         'agaknya',
 26 |         'agar',
 27 |         'akan',
 28 |         'akankah',
 29 |         'akhir',
 30 |         'akhiri',
 31 |         'akhirnya',
 32 |         'aku',
 33 |         'akulah',
 34 |         'amat',
 35 |         'amatlah',
 36 |         'anda',
 37 |         'andalah',
 38 |         'antar',
 39 |         'antara',
 40 |         'antaranya',
 41 |         'apa',
 42 |         'apaan',
 43 |         'apabila',
 44 |         'apakah',
 45 |         'apalagi',
 46 |         'apatah',
 47 |         'artinya',
 48 |         'asal',
 49 |         'asalkan',
 50 |         'atas',
 51 |         'atau',
 52 |         'ataukah',
 53 |         'ataupun',
 54 |         'awal',
 55 |         'awalnya',
 56 |         'bagai',
 57 |         'bagaikan',
 58 |         'bagaimana',
 59 |         'bagaimanakah',
 60 |         'bagaimanapun',
 61 |         'bagi',
 62 |         'bagian',
 63 |         'bahkan',
 64 |         'bahwa',
 65 |         'bahwasanya',
 66 |         'baik',
 67 |         'bakal',
 68 |         'bakalan',
 69 |         'balik',
 70 |         'banyak',
 71 |         'bapak',
 72 |         'baru',
 73 |         'bawah',
 74 |         'beberapa',
 75 |         'begini',
 76 |         'beginian',
 77 |         'beginikah',
 78 |         'beginilah',
 79 |         'begitu',
 80 |         'begitukah',
 81 |         'begitulah',
 82 |         'begitupun',
 83 |         'bekerja',
 84 |         'belakang',
 85 |         'belakangan',
 86 |         'belum',
 87 |         'belumlah',
 88 |         'benar',
 89 |         'benarkah',
 90 |         'benarlah',
 91 |         'berada',
 92 |         'berakhir',
 93 |         'berakhirlah',
 94 |         'berakhirnya',
 95 |         'berapa',
 96 |         'berapakah',
 97 |         'berapalah',
 98 |         'berapapun',
 99 |         'berarti',
100 |         'berawal',
101 |         'berbagai',
102 |         'berdatangan',
103 |         'beri',
104 |         'berikan',
105 |         'berikut',
106 |         'berikutnya',
107 |         'berjumlah',
108 |         'berkali-kali',
109 |         'berkata',
110 |         'berkehendak',
111 |         'berkeinginan',
112 |         'berkenaan',
113 |         'berlainan',
114 |         'berlalu',
115 |         'berlangsung',
116 |         'berlebihan',
117 |         'bermacam',
118 |         'bermacam-macam',
119 |         'bermaksud',
120 |         'bermula',
121 |         'bersama',
122 |         'bersama-sama',
123 |         'bersiap',
124 |         'bersiap-siap',
125 |         'bertanya',
126 |         'bertanya-tanya',
127 |         'berturut',
128 |         'berturut-turut',
129 |         'bertutur',
130 |         'berujar',
131 |         'berupa',
132 |         'besar',
133 |         'betul',
134 |         'betulkah',
135 |         'biasa',
136 |         'biasanya',
137 |         'bila',
138 |         'bilakah',
139 |         'bisa',
140 |         'bisakah',
141 |         'boleh',
142 |         'bolehkah',
143 |         'bolehlah',
144 |         'buat',
145 |         'bukan',
146 |         'bukankah',
147 |         'bukanlah',
148 |         'bukannya',
149 |         'bulan',
150 |         'bung',
151 |         'cara',
152 |         'caranya',
153 |         'cukup',
154 |         'cukupkah',
155 |         'cukuplah',
156 |         'cuma',
157 |         'dahulu',
158 |         'dalam',
159 |         'dan',
160 |         'dapat',
161 |         'dari',
162 |         'daripada',
163 |         'datang',
164 |         'dekat',
165 |         'demi',
166 |         'demikian',
167 |         'demikianlah',
168 |         'dengan',
169 |         'depan',
170 |         'di',
171 |         'dia',
172 |         'diakhiri',
173 |         'diakhirinya',
174 |         'dialah',
175 |         'diantara',
176 |         'diantaranya',
177 |         'diberi',
178 |         'diberikan',
179 |         'diberikannya',
180 |         'dibuat',
181 |         'dibuatnya',
182 |         'didapat',
183 |         'didatangkan',
184 |         'digunakan',
185 |         'diibaratkan',
186 |         'diibaratkannya',
187 |         'diingat',
188 |         'diingatkan',
189 |         'diinginkan',
190 |         'dijawab',
191 |         'dijelaskan',
192 |         'dijelaskannya',
193 |         'dikarenakan',
194 |         'dikatakan',
195 |         'dikatakannya',
196 |         'dikerjakan',
197 |         'diketahui',
198 |         'diketahuinya',
199 |         'dikira',
200 |         'dilakukan',
201 |         'dilalui',
202 |         'dilihat',
203 |         'dimaksud',
204 |         'dimaksudkan',
205 |         'dimaksudkannya',
206 |         'dimaksudnya',
207 |         'diminta',
208 |         'dimintai',
209 |         'dimisalkan',
210 |         'dimulai',
211 |         'dimulailah',
212 |         'dimulainya',
213 |         'dimungkinkan',
214 |         'dini',
215 |         'dipastikan',
216 |         'diperbuat',
217 |         'diperbuatnya',
218 |         'dipergunakan',
219 |         'diperkirakan',
220 |         'diperlihatkan',
221 |         'diperlukan',
222 |         'diperlukannya',
223 |         'dipersoalkan',
224 |         'dipertanyakan',
225 |         'dipunyai',
226 |         'diri',
227 |         'dirinya',
228 |         'disampaikan',
229 |         'disebut',
230 |         'disebutkan',
231 |         'disebutkannya',
232 |         'disini',
233 |         'disinilah',
234 |         'ditambahkan',
235 |         'ditandaskan',
236 |         'ditanya',
237 |         'ditanyai',
238 |         'ditanyakan',
239 |         'ditegaskan',
240 |         'ditujukan',
241 |         'ditunjuk',
242 |         'ditunjuki',
243 |         'ditunjukkan',
244 |         'ditunjukkannya',
245 |         'ditunjuknya',
246 |         'dituturkan',
247 |         'dituturkannya',
248 |         'diucapkan',
249 |         'diucapkannya',
250 |         'diungkapkan',
251 |         'dong',
252 |         'dua',
253 |         'dulu',
254 |         'empat',
255 |         'enggak',
256 |         'enggaknya',
257 |         'entah',
258 |         'entahlah',
259 |         'guna',
260 |         'gunakan',
261 |         'hal',
262 |         'hampir',
263 |         'hanya',
264 |         'hanyalah',
265 |         'hari',
266 |         'harus',
267 |         'haruslah',
268 |         'harusnya',
269 |         'hendak',
270 |         'hendaklah',
271 |         'hendaknya',
272 |         'hingga',
273 |         'ia',
274 |         'ialah',
275 |         'ibarat',
276 |         'ibaratkan',
277 |         'ibaratnya',
278 |         'ibu',
279 |         'ikut',
280 |         'ingat',
281 |         'ingat-ingat',
282 |         'ingin',
283 |         'inginkah',
284 |         'inginkan',
285 |         'ini',
286 |         'inikah',
287 |         'inilah',
288 |         'itu',
289 |         'itukah',
290 |         'itulah',
291 |         'jadi',
292 |         'jadilah',
293 |         'jadinya',
294 |         'jangan',
295 |         'jangankan',
296 |         'janganlah',
297 |         'jauh',
298 |         'jawab',
299 |         'jawaban',
300 |         'jawabnya',
301 |         'jelas',
302 |         'jelaskan',
303 |         'jelaslah',
304 |         'jelasnya',
305 |         'jika',
306 |         'jikalau',
307 |         'juga',
308 |         'jumlah',
309 |         'jumlahnya',
310 |         'justru',
311 |         'kala',
312 |         'kalau',
313 |         'kalaulah',
314 |         'kalaupun',
315 |         'kalian',
316 |         'kami',
317 |         'kamilah',
318 |         'kamu',
319 |         'kamulah',
320 |         'kan',
321 |         'kapan',
322 |         'kapankah',
323 |         'kapanpun',
324 |         'karena',
325 |         'karenanya',
326 |         'kasus',
327 |         'kata',
328 |         'katakan',
329 |         'katakanlah',
330 |         'katanya',
331 |         'ke',
332 |         'keadaan',
333 |         'kebetulan',
334 |         'kecil',
335 |         'kedua',
336 |         'keduanya',
337 |         'keinginan',
338 |         'kelamaan',
339 |         'kelihatan',
340 |         'kelihatannya',
341 |         'kelima',
342 |         'keluar',
343 |         'kembali',
344 |         'kemudian',
345 |         'kemungkinan',
346 |         'kemungkinannya',
347 |         'kenapa',
348 |         'kepada',
349 |         'kepadanya',
350 |         'kesampaian',
351 |         'keseluruhan',
352 |         'keseluruhannya',
353 |         'keterlaluan',
354 |         'ketika',
355 |         'khususnya',
356 |         'kini',
357 |         'kinilah',
358 |         'kira',
359 |         'kira-kira',
360 |         'kiranya',
361 |         'kita',
362 |         'kitalah',
363 |         'kok',
364 |         'kurang',
365 |         'lagi',
366 |         'lagian',
367 |         'lah',
368 |         'lain',
369 |         'lainnya',
370 |         'lalu',
371 |         'lama',
372 |         'lamanya',
373 |         'lanjut',
374 |         'lanjutnya',
375 |         'lebih',
376 |         'lewat',
377 |         'lima',
378 |         'luar',
379 |         'macam',
380 |         'maka',
381 |         'makanya',
382 |         'makin',
383 |         'malah',
384 |         'malahan',
385 |         'mampu',
386 |         'mampukah',
387 |         'mana',
388 |         'manakala',
389 |         'manalagi',
390 |         'masa',
391 |         'masalah',
392 |         'masalahnya',
393 |         'masih',
394 |         'masihkah',
395 |         'masing',
396 |         'masing-masing',
397 |         'mau',
398 |         'maupun',
399 |         'melainkan',
400 |         'melakukan',
401 |         'melalui',
402 |         'melihat',
403 |         'melihatnya',
404 |         'memang',
405 |         'memastikan',
406 |         'memberi',
407 |         'memberikan',
408 |         'membuat',
409 |         'memerlukan',
410 |         'memihak',
411 |         'meminta',
412 |         'memintakan',
413 |         'memisalkan',
414 |         'memperbuat',
415 |         'mempergunakan',
416 |         'memperkirakan',
417 |         'memperlihatkan',
418 |         'mempersiapkan',
419 |         'mempersoalkan',
420 |         'mempertanyakan',
421 |         'mempunyai',
422 |         'memulai',
423 |         'memungkinkan',
424 |         'menaiki',
425 |         'menambahkan',
426 |         'menandaskan',
427 |         'menanti',
428 |         'menanti-nanti',
429 |         'menantikan',
430 |         'menanya',
431 |         'menanyai',
432 |         'menanyakan',
433 |         'mendapat',
434 |         'mendapatkan',
435 |         'mendatang',
436 |         'mendatangi',
437 |         'mendatangkan',
438 |         'menegaskan',
439 |         'mengakhiri',
440 |         'mengapa',
441 |         'mengatakan',
442 |         'mengatakannya',
443 |         'mengenai',
444 |         'mengerjakan',
445 |         'mengetahui',
446 |         'menggunakan',
447 |         'menghendaki',
448 |         'mengibaratkan',
449 |         'mengibaratkannya',
450 |         'mengingat',
451 |         'mengingatkan',
452 |         'menginginkan',
453 |         'mengira',
454 |         'mengucapkan',
455 |         'mengucapkannya',
456 |         'mengungkapkan',
457 |         'menjadi',
458 |         'menjawab',
459 |         'menjelaskan',
460 |         'menuju',
461 |         'menunjuk',
462 |         'menunjuki',
463 |         'menunjukkan',
464 |         'menunjuknya',
465 |         'menurut',
466 |         'menuturkan',
467 |         'menyampaikan',
468 |         'menyangkut',
469 |         'menyatakan',
470 |         'menyebutkan',
471 |         'menyeluruh',
472 |         'menyiapkan',
473 |         'merasa',
474 |         'mereka',
475 |         'merekalah',
476 |         'merupakan',
477 |         'meski',
478 |         'meskipun',
479 |         'meyakini',
480 |         'meyakinkan',
481 |         'minta',
482 |         'mirip',
483 |         'misal',
484 |         'misalkan',
485 |         'misalnya',
486 |         'mula',
487 |         'mulai',
488 |         'mulailah',
489 |         'mulanya',
490 |         'mungkin',
491 |         'mungkinkah',
492 |         'nah',
493 |         'naik',
494 |         'namun',
495 |         'nanti',
496 |         'nantinya',
497 |         'nyaris',
498 |         'nyatanya',
499 |         'oleh',
500 |         'olehnya',
501 |         'pada',
502 |         'padahal',
503 |         'padanya',
504 |         'pak',
505 |         'paling',
506 |         'panjang',
507 |         'pantas',
508 |         'para',
509 |         'pasti',
510 |         'pastilah',
511 |         'penting',
512 |         'pentingnya',
513 |         'per',
514 |         'percuma',
515 |         'perlu',
516 |         'perlukah',
517 |         'perlunya',
518 |         'pernah',
519 |         'persoalan',
520 |         'pertama',
521 |         'pertama-tama',
522 |         'pertanyaan',
523 |         'pertanyakan',
524 |         'pihak',
525 |         'pihaknya',
526 |         'pukul',
527 |         'pula',
528 |         'pun',
529 |         'punya',
530 |         'rasa',
531 |         'rasanya',
532 |         'rata',
533 |         'rupanya',
534 |         'saat',
535 |         'saatnya',
536 |         'saja',
537 |         'sajalah',
538 |         'saling',
539 |         'sama',
540 |         'sama-sama',
541 |         'sambil',
542 |         'sampai',
543 |         'sampai-sampai',
544 |         'sampaikan',
545 |         'sana',
546 |         'sangat',
547 |         'sangatlah',
548 |         'satu',
549 |         'saya',
550 |         'sayalah',
551 |         'se',
552 |         'sebab',
553 |         'sebabnya',
554 |         'sebagai',
555 |         'sebagaimana',
556 |         'sebagainya',
557 |         'sebagian',
558 |         'sebaik',
559 |         'sebaik-baiknya',
560 |         'sebaiknya',
561 |         'sebaliknya',
562 |         'sebanyak',
563 |         'sebegini',
564 |         'sebegitu',
565 |         'sebelum',
566 |         'sebelumnya',
567 |         'sebenarnya',
568 |         'seberapa',
569 |         'sebesar',
570 |         'sebetulnya',
571 |         'sebisanya',
572 |         'sebuah',
573 |         'sebut',
574 |         'sebutlah',
575 |         'sebutnya',
576 |         'secara',
577 |         'secukupnya',
578 |         'sedang',
579 |         'sedangkan',
580 |         'sedemikian',
581 |         'sedikit',
582 |         'sedikitnya',
583 |         'seenaknya',
584 |         'segala',
585 |         'segalanya',
586 |         'segera',
587 |         'seharusnya',
588 |         'sehingga',
589 |         'seingat',
590 |         'sejak',
591 |         'sejauh',
592 |         'sejenak',
593 |         'sejumlah',
594 |         'sekadar',
595 |         'sekadarnya',
596 |         'sekali',
597 |         'sekali-kali',
598 |         'sekalian',
599 |         'sekaligus',
600 |         'sekalipun',
601 |         'sekarang',
602 |         'sekarang',
603 |         'sekecil',
604 |         'seketika',
605 |         'sekiranya',
606 |         'sekitar',
607 |         'sekitarnya',
608 |         'sekurang-kurangnya',
609 |         'sekurangnya',
610 |         'sela',
611 |         'selain',
612 |         'selaku',
613 |         'selalu',
614 |         'selama',
615 |         'selama-lamanya',
616 |         'selamanya',
617 |         'selanjutnya',
618 |         'seluruh',
619 |         'seluruhnya',
620 |         'semacam',
621 |         'semakin',
622 |         'semampu',
623 |         'semampunya',
624 |         'semasa',
625 |         'semasih',
626 |         'semata',
627 |         'semata-mata',
628 |         'semaunya',
629 |         'sementara',
630 |         'semisal',
631 |         'semisalnya',
632 |         'sempat',
633 |         'semua',
634 |         'semuanya',
635 |         'semula',
636 |         'sendiri',
637 |         'sendirian',
638 |         'sendirinya',
639 |         'seolah',
640 |         'seolah-olah',
641 |         'seorang',
642 |         'sepanjang',
643 |         'sepantasnya',
644 |         'sepantasnyalah',
645 |         'seperlunya',
646 |         'seperti',
647 |         'sepertinya',
648 |         'sepihak',
649 |         'sering',
650 |         'seringnya',
651 |         'serta',
652 |         'serupa',
653 |         'sesaat',
654 |         'sesama',
655 |         'sesampai',
656 |         'sesegera',
657 |         'sesekali',
658 |         'seseorang',
659 |         'sesuatu',
660 |         'sesuatunya',
661 |         'sesudah',
662 |         'sesudahnya',
663 |         'setelah',
664 |         'setempat',
665 |         'setengah',
666 |         'seterusnya',
667 |         'setiap',
668 |         'setiba',
669 |         'setibanya',
670 |         'setidak-tidaknya',
671 |         'setidaknya',
672 |         'setinggi',
673 |         'seusai',
674 |         'sewaktu',
675 |         'siap',
676 |         'siapa',
677 |         'siapakah',
678 |         'siapapun',
679 |         'sini',
680 |         'sinilah',
681 |         'soal',
682 |         'soalnya',
683 |         'suatu',
684 |         'sudah',
685 |         'sudahkah',
686 |         'sudahlah',
687 |         'supaya',
688 |         'tadi',
689 |         'tadinya',
690 |         'tahu',
691 |         'tahun',
692 |         'tak',
693 |         'tambah',
694 |         'tambahnya',
695 |         'tampak',
696 |         'tampaknya',
697 |         'tandas',
698 |         'tandasnya',
699 |         'tanpa',
700 |         'tanya',
701 |         'tanyakan',
702 |         'tanyanya',
703 |         'tapi',
704 |         'tegas',
705 |         'tegasnya',
706 |         'telah',
707 |         'tempat',
708 |         'tengah',
709 |         'tentang',
710 |         'tentu',
711 |         'tentulah',
712 |         'tentunya',
713 |         'tepat',
714 |         'terakhir',
715 |         'terasa',
716 |         'terbanyak',
717 |         'terdahulu',
718 |         'terdapat',
719 |         'terdiri',
720 |         'terhadap',
721 |         'terhadapnya',
722 |         'teringat',
723 |         'teringat-ingat',
724 |         'terjadi',
725 |         'terjadilah',
726 |         'terjadinya',
727 |         'terkira',
728 |         'terlalu',
729 |         'terlebih',
730 |         'terlihat',
731 |         'termasuk',
732 |         'ternyata',
733 |         'tersampaikan',
734 |         'tersebut',
735 |         'tersebutlah',
736 |         'tertentu',
737 |         'tertuju',
738 |         'terus',
739 |         'terutama',
740 |         'tetap',
741 |         'tetapi',
742 |         'tiap',
743 |         'tiba',
744 |         'tiba-tiba',
745 |         'tidak',
746 |         'tidakkah',
747 |         'tidaklah',
748 |         'tiga',
749 |         'tinggi',
750 |         'toh',
751 |         'tunjuk',
752 |         'turut',
753 |         'tutur',
754 |         'tuturnya',
755 |         'ucap',
756 |         'ucapnya',
757 |         'ujar',
758 |         'ujarnya',
759 |         'umum',
760 |         'umumnya',
761 |         'ungkap',
762 |         'ungkapnya',
763 |         'untuk',
764 |         'usah',
765 |         'usai',
766 |         'waduh',
767 |         'wah',
768 |         'wahai',
769 |         'waktu',
770 |         'waktunya',
771 |         'walau',
772 |         'walaupun',
773 |         'wong',
774 |         'yaitu',
775 |         'yakin',
776 |         'yakni',
777 |         'yang',
778 |     ];
779 | }
780 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Italian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | declare(strict_types=1);
  4 | 
  5 | namespace PhpScience\TextRank\Tool\StopWords;
  6 | 
  7 | /**
  8 |  * Class Italian
  9 |  *
 10 |  * @package PhpScience\TextRank\Tool\StopWords
 11 |  */
 12 | class Italian extends StopWordsAbstract
 13 | {
 14 |     /**
 15 |      * Stop words for avoid dummy keywords for Language Italian.
 16 |      *
 17 |      * @var array
 18 |      */
 19 |     protected $words = [
 20 |         'a',
 21 |         'abbastanza',
 22 |         'abbia',
 23 |         'abbiamo',
 24 |         'abbiano',
 25 |         'abbiate',
 26 |         'accidenti',
 27 |         'ad',
 28 |         'adesso',
 29 |         'affinche',
 30 |         'agl',
 31 |         'agli',
 32 |         'ahime',
 33 |         'ahimã¨',
 34 |         'ahimè',
 35 |         'ai',
 36 |         'al',
 37 |         'alcuna',
 38 |         'alcuni',
 39 |         'alcuno',
 40 |         'all',
 41 |         'alla',
 42 |         'alle',
 43 |         'allo',
 44 |         'allora',
 45 |         'altre',
 46 |         'altri',
 47 |         'altrimenti',
 48 |         'altro',
 49 |         'altrove',
 50 |         'altrui',
 51 |         'anche',
 52 |         'ancora',
 53 |         'anni',
 54 |         'anno',
 55 |         'ansa',
 56 |         'anticipo',
 57 |         'assai',
 58 |         'attesa',
 59 |         'attraverso',
 60 |         'avanti',
 61 |         'avemmo',
 62 |         'avendo',
 63 |         'avente',
 64 |         'aver',
 65 |         'avere',
 66 |         'averlo',
 67 |         'avesse',
 68 |         'avessero',
 69 |         'avessi',
 70 |         'avessimo',
 71 |         'aveste',
 72 |         'avesti',
 73 |         'avete',
 74 |         'aveva',
 75 |         'avevamo',
 76 |         'avevano',
 77 |         'avevate',
 78 |         'avevi',
 79 |         'avevo',
 80 |         'avrai',
 81 |         'avranno',
 82 |         'avrebbe',
 83 |         'avrebbero',
 84 |         'avrei',
 85 |         'avremmo',
 86 |         'avremo',
 87 |         'avreste',
 88 |         'avresti',
 89 |         'avrete',
 90 |         'avrà',
 91 |         'avrò',
 92 |         'avuta',
 93 |         'avute',
 94 |         'avuti',
 95 |         'avuto',
 96 |         'basta',
 97 |         'ben',
 98 |         'bene',
 99 |         'benissimo',
100 |         'berlusconi',
101 |         'brava',
102 |         'bravo',
103 |         'buono',
104 |         'c',
105 |         'casa',
106 |         'caso',
107 |         'cento',
108 |         'certa',
109 |         'certe',
110 |         'certi',
111 |         'certo',
112 |         'che',
113 |         'chi',
114 |         'chicchessia',
115 |         'chiunque',
116 |         'ci',
117 |         'ciascuna',
118 |         'ciascuno',
119 |         'cima',
120 |         'cinque',
121 |         'cio',
122 |         'cioe',
123 |         'cioã¨',
124 |         'cioè',
125 |         'circa',
126 |         'citta',
127 |         'città',
128 |         'cittã',
129 |         'ciã²',
130 |         'ciò',
131 |         'co',
132 |         'codesta',
133 |         'codesti',
134 |         'codesto',
135 |         'cogli',
136 |         'coi',
137 |         'col',
138 |         'colei',
139 |         'coll',
140 |         'coloro',
141 |         'colui',
142 |         'come',
143 |         'cominci',
144 |         'comprare',
145 |         'comunque',
146 |         'con',
147 |         'concernente',
148 |         'conciliarsi',
149 |         'conclusione',
150 |         'consecutivi',
151 |         'consecutivo',
152 |         'consiglio',
153 |         'contro',
154 |         'cortesia',
155 |         'cos',
156 |         'cosa',
157 |         'cosi',
158 |         'cosã¬',
159 |         'così',
160 |         'cui',
161 |         'd',
162 |         'da',
163 |         'dagl',
164 |         'dagli',
165 |         'dai',
166 |         'dal',
167 |         'dall',
168 |         'dalla',
169 |         'dalle',
170 |         'dallo',
171 |         'dappertutto',
172 |         'davanti',
173 |         'degl',
174 |         'degli',
175 |         'dei',
176 |         'del',
177 |         'dell',
178 |         'della',
179 |         'delle',
180 |         'dello',
181 |         'dentro',
182 |         'detto',
183 |         'deve',
184 |         'devo',
185 |         'di',
186 |         'dice',
187 |         'dietro',
188 |         'dire',
189 |         'dirimpetto',
190 |         'diventa',
191 |         'diventare',
192 |         'diventato',
193 |         'dopo',
194 |         'doppio',
195 |         'dov',
196 |         'dove',
197 |         'dovra',
198 |         'dovrà',
199 |         'dovrã',
200 |         'dovunque',
201 |         'due',
202 |         'dunque',
203 |         'durante',
204 |         'e',
205 |         'ebbe',
206 |         'ebbero',
207 |         'ebbi',
208 |         'ecc',
209 |         'ecco',
210 |         'ed',
211 |         'effettivamente',
212 |         'egli',
213 |         'ella',
214 |         'entrambi',
215 |         'eppure',
216 |         'era',
217 |         'erano',
218 |         'eravamo',
219 |         'eravate',
220 |         'eri',
221 |         'ero',
222 |         'esempio',
223 |         'esse',
224 |         'essendo',
225 |         'esser',
226 |         'essere',
227 |         'essi',
228 |         'ex',
229 |         'fa',
230 |         'faccia',
231 |         'facciamo',
232 |         'facciano',
233 |         'facciate',
234 |         'faccio',
235 |         'facemmo',
236 |         'facendo',
237 |         'facesse',
238 |         'facessero',
239 |         'facessi',
240 |         'facessimo',
241 |         'faceste',
242 |         'facesti',
243 |         'faceva',
244 |         'facevamo',
245 |         'facevano',
246 |         'facevate',
247 |         'facevi',
248 |         'facevo',
249 |         'fai',
250 |         'fanno',
251 |         'farai',
252 |         'faranno',
253 |         'fare',
254 |         'farebbe',
255 |         'farebbero',
256 |         'farei',
257 |         'faremmo',
258 |         'faremo',
259 |         'fareste',
260 |         'faresti',
261 |         'farete',
262 |         'farà',
263 |         'farò',
264 |         'fatto',
265 |         'favore',
266 |         'fece',
267 |         'fecero',
268 |         'feci',
269 |         'fin',
270 |         'finalmente',
271 |         'finche',
272 |         'fine',
273 |         'fino',
274 |         'forse',
275 |         'forza',
276 |         'fosse',
277 |         'fossero',
278 |         'fossi',
279 |         'fossimo',
280 |         'foste',
281 |         'fosti',
282 |         'fra',
283 |         'frattempo',
284 |         'fu',
285 |         'fui',
286 |         'fummo',
287 |         'fuori',
288 |         'furono',
289 |         'futuro',
290 |         'generale',
291 |         'gente',
292 |         'gia',
293 |         'giacche',
294 |         'giorni',
295 |         'giorno',
296 |         'giu',
297 |         'già',
298 |         'giã',
299 |         'gli',
300 |         'gliela',
301 |         'gliele',
302 |         'glieli',
303 |         'glielo',
304 |         'gliene',
305 |         'governo',
306 |         'grande',
307 |         'grazie',
308 |         'gruppo',
309 |         'ha',
310 |         'haha',
311 |         'hai',
312 |         'hanno',
313 |         'ho',
314 |         'i',
315 |         'ie',
316 |         'ieri',
317 |         'il',
318 |         'improvviso',
319 |         'in',
320 |         'inc',
321 |         'indietro',
322 |         'infatti',
323 |         'inoltre',
324 |         'insieme',
325 |         'intanto',
326 |         'intorno',
327 |         'invece',
328 |         'io',
329 |         'l',
330 |         'la',
331 |         'lasciato',
332 |         'lato',
333 |         'lavoro',
334 |         'le',
335 |         'lei',
336 |         'li',
337 |         'lo',
338 |         'lontano',
339 |         'loro',
340 |         'lui',
341 |         'lungo',
342 |         'luogo',
343 |         'là',
344 |         'lã',
345 |         'ma',
346 |         'macche',
347 |         'magari',
348 |         'maggior',
349 |         'mai',
350 |         'male',
351 |         'malgrado',
352 |         'malissimo',
353 |         'mancanza',
354 |         'marche',
355 |         'me',
356 |         'medesimo',
357 |         'mediante',
358 |         'meglio',
359 |         'meno',
360 |         'mentre',
361 |         'mesi',
362 |         'mezzo',
363 |         'mi',
364 |         'mia',
365 |         'mie',
366 |         'miei',
367 |         'mila',
368 |         'miliardi',
369 |         'milioni',
370 |         'minimi',
371 |         'ministro',
372 |         'mio',
373 |         'modo',
374 |         'molta',
375 |         'molti',
376 |         'moltissimo',
377 |         'molto',
378 |         'momento',
379 |         'mondo',
380 |         'mosto',
381 |         'nazionale',
382 |         'ne',
383 |         'negl',
384 |         'negli',
385 |         'nei',
386 |         'nel',
387 |         'nell',
388 |         'nella',
389 |         'nelle',
390 |         'nello',
391 |         'nemmeno',
392 |         'neppure',
393 |         'nessun',
394 |         'nessuna',
395 |         'nessuno',
396 |         'niente',
397 |         'no',
398 |         'noi',
399 |         'nome',
400 |         'non',
401 |         'nondimeno',
402 |         'nonostante',
403 |         'nonsia',
404 |         'nostra',
405 |         'nostre',
406 |         'nostri',
407 |         'nostro',
408 |         'novanta',
409 |         'nove',
410 |         'nulla',
411 |         'nuovi',
412 |         'nuovo',
413 |         'o',
414 |         'od',
415 |         'oggi',
416 |         'ogni',
417 |         'ognuna',
418 |         'ognuno',
419 |         'oltre',
420 |         'oppure',
421 |         'ora',
422 |         'ore',
423 |         'osi',
424 |         'ossia',
425 |         'ottanta',
426 |         'otto',
427 |         'paese',
428 |         'parecchi',
429 |         'parecchie',
430 |         'parecchio',
431 |         'parte',
432 |         'partendo',
433 |         'peccato',
434 |         'peggio',
435 |         'per',
436 |         'perche',
437 |         'perchã¨',
438 |         'perchè',
439 |         'perché',
440 |         'percio',
441 |         'perciã²',
442 |         'perciò',
443 |         'perfino',
444 |         'pero',
445 |         'persino',
446 |         'persone',
447 |         'perã²',
448 |         'però',
449 |         'piedi',
450 |         'pieno',
451 |         'piglia',
452 |         'piu',
453 |         'piuttosto',
454 |         'piã¹',
455 |         'più',
456 |         'po',
457 |         'pochissimo',
458 |         'poco',
459 |         'poi',
460 |         'poiche',
461 |         'possa',
462 |         'possedere',
463 |         'posteriore',
464 |         'posto',
465 |         'potrebbe',
466 |         'preferibilmente',
467 |         'presa',
468 |         'press',
469 |         'prima',
470 |         'primo',
471 |         'principalmente',
472 |         'probabilmente',
473 |         'promesso',
474 |         'proprio',
475 |         'puo',
476 |         'pure',
477 |         'purtroppo',
478 |         'puã²',
479 |         'può',
480 |         'qua',
481 |         'qualche',
482 |         'qualcosa',
483 |         'qualcuna',
484 |         'qualcuno',
485 |         'quale',
486 |         'quali',
487 |         'qualunque',
488 |         'quando',
489 |         'quanta',
490 |         'quante',
491 |         'quanti',
492 |         'quanto',
493 |         'quantunque',
494 |         'quarto',
495 |         'quasi',
496 |         'quattro',
497 |         'quel',
498 |         'quella',
499 |         'quelle',
500 |         'quelli',
501 |         'quello',
502 |         'quest',
503 |         'questa',
504 |         'queste',
505 |         'questi',
506 |         'questo',
507 |         'qui',
508 |         'quindi',
509 |         'quinto',
510 |         'realmente',
511 |         'recente',
512 |         'recentemente',
513 |         'registrazione',
514 |         'relativo',
515 |         'riecco',
516 |         'rispetto',
517 |         'salvo',
518 |         'sara',
519 |         'sarai',
520 |         'saranno',
521 |         'sarebbe',
522 |         'sarebbero',
523 |         'sarei',
524 |         'saremmo',
525 |         'saremo',
526 |         'sareste',
527 |         'saresti',
528 |         'sarete',
529 |         'sarà',
530 |         'sarã',
531 |         'sarò',
532 |         'scola',
533 |         'scopo',
534 |         'scorso',
535 |         'se',
536 |         'secondo',
537 |         'seguente',
538 |         'seguito',
539 |         'sei',
540 |         'sembra',
541 |         'sembrare',
542 |         'sembrato',
543 |         'sembrava',
544 |         'sembri',
545 |         'sempre',
546 |         'senza',
547 |         'sette',
548 |         'si',
549 |         'sia',
550 |         'siamo',
551 |         'siano',
552 |         'siate',
553 |         'siete',
554 |         'sig',
555 |         'solito',
556 |         'solo',
557 |         'soltanto',
558 |         'sono',
559 |         'sopra',
560 |         'soprattutto',
561 |         'sotto',
562 |         'spesso',
563 |         'srl',
564 |         'sta',
565 |         'stai',
566 |         'stando',
567 |         'stanno',
568 |         'starai',
569 |         'staranno',
570 |         'starebbe',
571 |         'starebbero',
572 |         'starei',
573 |         'staremmo',
574 |         'staremo',
575 |         'stareste',
576 |         'staresti',
577 |         'starete',
578 |         'starà',
579 |         'starò',
580 |         'stata',
581 |         'state',
582 |         'stati',
583 |         'stato',
584 |         'stava',
585 |         'stavamo',
586 |         'stavano',
587 |         'stavate',
588 |         'stavi',
589 |         'stavo',
590 |         'stemmo',
591 |         'stessa',
592 |         'stesse',
593 |         'stessero',
594 |         'stessi',
595 |         'stessimo',
596 |         'stesso',
597 |         'steste',
598 |         'stesti',
599 |         'stette',
600 |         'stettero',
601 |         'stetti',
602 |         'stia',
603 |         'stiamo',
604 |         'stiano',
605 |         'stiate',
606 |         'sto',
607 |         'su',
608 |         'sua',
609 |         'subito',
610 |         'successivamente',
611 |         'successivo',
612 |         'sue',
613 |         'sugl',
614 |         'sugli',
615 |         'sui',
616 |         'sul',
617 |         'sull',
618 |         'sulla',
619 |         'sulle',
620 |         'sullo',
621 |         'suo',
622 |         'suoi',
623 |         'tale',
624 |         'tali',
625 |         'talvolta',
626 |         'tanto',
627 |         'te',
628 |         'tempo',
629 |         'terzo',
630 |         'th',
631 |         'ti',
632 |         'titolo',
633 |         'torino',
634 |         'tra',
635 |         'tranne',
636 |         'tre',
637 |         'trenta',
638 |         'triplo',
639 |         'troppo',
640 |         'trovato',
641 |         'tu',
642 |         'tua',
643 |         'tue',
644 |         'tuo',
645 |         'tuoi',
646 |         'tutta',
647 |         'tuttavia',
648 |         'tutte',
649 |         'tutti',
650 |         'tutto',
651 |         'uguali',
652 |         'ulteriore',
653 |         'ultimo',
654 |         'un',
655 |         'una',
656 |         'uno',
657 |         'uomo',
658 |         'va',
659 |         'vai',
660 |         'vale',
661 |         'vari',
662 |         'varia',
663 |         'varie',
664 |         'vario',
665 |         'verso',
666 |         'vi',
667 |         'via',
668 |         'vicino',
669 |         'visto',
670 |         'vita',
671 |         'voi',
672 |         'volta',
673 |         'volte',
674 |         'vostra',
675 |         'vostre',
676 |         'vostri',
677 |         'vostro',
678 |         'ã¨',
679 |         'è'
680 |     ];
681 | }
682 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Norwegian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  Syndesi <github.com/Syndesi>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool\StopWords;
 13 | 
 14 | /**
 15 |  * Class Norwegian
 16 |  *
 17 |  * @package PhpScience\TextRank\Tool\StopWords
 18 |  */
 19 | class Norwegian extends StopWordsAbstract
 20 | {
 21 |     /**
 22 |      * Stop words for avoid dummy keywords for Language Norwegian.
 23 |      * Source: https://github.com/stopwords-iso/stopwords-no
 24 |      *
 25 |      * @var array
 26 |      */
 27 |     protected $words = [
 28 |         'alle',
 29 |         'andre',
 30 |         'arbeid',
 31 |         'at',
 32 |         'av',
 33 |         'bare',
 34 |         'begge',
 35 |         'ble',
 36 |         'blei',
 37 |         'bli',
 38 |         'blir',
 39 |         'blitt',
 40 |         'bort',
 41 |         'bra',
 42 |         'bruke',
 43 |         'både',
 44 |         'båe',
 45 |         'da',
 46 |         'de',
 47 |         'deg',
 48 |         'dei',
 49 |         'deim',
 50 |         'deira',
 51 |         'deires',
 52 |         'dem',
 53 |         'den',
 54 |         'denne',
 55 |         'der',
 56 |         'dere',
 57 |         'deres',
 58 |         'det',
 59 |         'dette',
 60 |         'di',
 61 |         'din',
 62 |         'disse',
 63 |         'ditt',
 64 |         'du',
 65 |         'dykk',
 66 |         'dykkar',
 67 |         'då',
 68 |         'eg',
 69 |         'ein',
 70 |         'eit',
 71 |         'eitt',
 72 |         'eller',
 73 |         'elles',
 74 |         'en',
 75 |         'ene',
 76 |         'eneste',
 77 |         'enhver',
 78 |         'enn',
 79 |         'er',
 80 |         'et',
 81 |         'ett',
 82 |         'etter',
 83 |         'folk',
 84 |         'for',
 85 |         'fordi',
 86 |         'forsûke',
 87 |         'fra',
 88 |         'få',
 89 |         'før',
 90 |         'fûr',
 91 |         'fûrst',
 92 |         'gjorde',
 93 |         'gjûre',
 94 |         'god',
 95 |         'gå',
 96 |         'ha',
 97 |         'hadde',
 98 |         'han',
 99 |         'hans',
100 |         'har',
101 |         'hennar',
102 |         'henne',
103 |         'hennes',
104 |         'her',
105 |         'hjå',
106 |         'ho',
107 |         'hoe',
108 |         'honom',
109 |         'hoss',
110 |         'hossen',
111 |         'hun',
112 |         'hva',
113 |         'hvem',
114 |         'hver',
115 |         'hvilke',
116 |         'hvilken',
117 |         'hvis',
118 |         'hvor',
119 |         'hvordan',
120 |         'hvorfor',
121 |         'i',
122 |         'ikke',
123 |         'ikkje',
124 |         'ingen',
125 |         'ingi',
126 |         'inkje',
127 |         'inn',
128 |         'innen',
129 |         'inni',
130 |         'ja',
131 |         'jeg',
132 |         'kan',
133 |         'kom',
134 |         'korleis',
135 |         'korso',
136 |         'kun',
137 |         'kunne',
138 |         'kva',
139 |         'kvar',
140 |         'kvarhelst',
141 |         'kven',
142 |         'kvi',
143 |         'kvifor',
144 |         'lage',
145 |         'lang',
146 |         'lik',
147 |         'like',
148 |         'makt',
149 |         'man',
150 |         'mange',
151 |         'me',
152 |         'med',
153 |         'medan',
154 |         'meg',
155 |         'meget',
156 |         'mellom',
157 |         'men',
158 |         'mens',
159 |         'mer',
160 |         'mest',
161 |         'mi',
162 |         'min',
163 |         'mine',
164 |         'mitt',
165 |         'mot',
166 |         'mye',
167 |         'mykje',
168 |         'må',
169 |         'måte',
170 |         'navn',
171 |         'ned',
172 |         'nei',
173 |         'no',
174 |         'noe',
175 |         'noen',
176 |         'noka',
177 |         'noko',
178 |         'nokon',
179 |         'nokor',
180 |         'nokre',
181 |         'ny',
182 |         'nå',
183 |         'når',
184 |         'og',
185 |         'også',
186 |         'om',
187 |         'opp',
188 |         'oss',
189 |         'over',
190 |         'part',
191 |         'punkt',
192 |         'på',
193 |         'rett',
194 |         'riktig',
195 |         'samme',
196 |         'sant',
197 |         'seg',
198 |         'selv',
199 |         'si',
200 |         'sia',
201 |         'sidan',
202 |         'siden',
203 |         'sin',
204 |         'sine',
205 |         'sist',
206 |         'sitt',
207 |         'sjøl',
208 |         'skal',
209 |         'skulle',
210 |         'slik',
211 |         'slutt',
212 |         'so',
213 |         'som',
214 |         'somme',
215 |         'somt',
216 |         'start',
217 |         'stille',
218 |         'så',
219 |         'sånn',
220 |         'tid',
221 |         'til',
222 |         'tilbake',
223 |         'tilstand',
224 |         'um',
225 |         'under',
226 |         'upp',
227 |         'ut',
228 |         'uten',
229 |         'var',
230 |         'vart',
231 |         'varte',
232 |         'ved',
233 |         'verdi',
234 |         'vere',
235 |         'verte',
236 |         'vi',
237 |         'vil',
238 |         'ville',
239 |         'vite',
240 |         'vore',
241 |         'vors',
242 |         'vort',
243 |         'vår',
244 |         'være',
245 |         'vært',
246 |         'vöre',
247 |         'vört',
248 |         'å'
249 |     ];
250 | }
251 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Russian.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  * @author  Andrey Astashov <mvc.aaa@gmail.com> (Russian StopWords)
  9 |  */
 10 | 
 11 | declare(strict_types=1);
 12 | 
 13 | namespace PhpScience\TextRank\Tool\StopWords;
 14 | 
 15 | /**
 16 |  * Class Russian
 17 |  *
 18 |  * @package PhpScience\TextRank\Tool\StopWords
 19 |  */
 20 | class Russian extends StopWordsAbstract
 21 | {
 22 | 	/**
 23 | 	 * Stop words for avoid dummy keywords for Language Russian.
 24 | 	 *
 25 | 	 * @var array
 26 | 	 */
 27 |     protected $words = [
 28 |         'c',
 29 |         'а',
 30 |         'алло',
 31 |         'без',
 32 |         'белый',
 33 |         'близко',
 34 |         'более',
 35 |         'больше',
 36 |         'большой',
 37 |         'будем',
 38 |         'будет',
 39 |         'будете',
 40 |         'будешь',
 41 |         'будто',
 42 |         'буду',
 43 |         'будут',
 44 |         'будь',
 45 |         'бы',
 46 |         'бывает',
 47 |         'бывь',
 48 |         'был',
 49 |         'была',
 50 |         'были',
 51 |         'было',
 52 |         'быть',
 53 |         'в',
 54 |         'важная',
 55 |         'важное',
 56 |         'важные',
 57 |         'важный',
 58 |         'вам',
 59 |         'вами',
 60 |         'вас',
 61 |         'ваш',
 62 |         'ваша',
 63 |         'ваше',
 64 |         'ваши',
 65 |         'вверх',
 66 |         'вдали',
 67 |         'вдруг',
 68 |         'ведь',
 69 |         'везде',
 70 |         'вернуться',
 71 |         'весь',
 72 |         'вечер',
 73 |         'взгляд',
 74 |         'взять',
 75 |         'вид',
 76 |         'видел',
 77 |         'видеть',
 78 |         'вместе',
 79 |         'вне',
 80 |         'вниз',
 81 |         'внизу',
 82 |         'во',
 83 |         'вода',
 84 |         'война',
 85 |         'вокруг',
 86 |         'вон',
 87 |         'вообще',
 88 |         'вопрос',
 89 |         'восемнадцатый',
 90 |         'восемнадцать',
 91 |         'восемь',
 92 |         'восьмой',
 93 |         'вот',
 94 |         'впрочем',
 95 |         'времени',
 96 |         'время',
 97 |         'все',
 98 |         'все еще',
 99 |         'всегда',
100 |         'всего',
101 |         'всем',
102 |         'всеми',
103 |         'всему',
104 |         'всех',
105 |         'всею',
106 |         'всю',
107 |         'всюду',
108 |         'вся',
109 |         'всё',
110 |         'второй',
111 |         'вы',
112 |         'выйти',
113 |         'г',
114 |         'где',
115 |         'главный',
116 |         'глаз',
117 |         'говорил',
118 |         'говорит',
119 |         'говорить',
120 |         'год',
121 |         'года',
122 |         'году',
123 |         'голова',
124 |         'голос',
125 |         'город',
126 |         'да',
127 |         'давать',
128 |         'давно',
129 |         'даже',
130 |         'далекий',
131 |         'далеко',
132 |         'дальше',
133 |         'даром',
134 |         'дать',
135 |         'два',
136 |         'двадцатый',
137 |         'двадцать',
138 |         'две',
139 |         'двенадцатый',
140 |         'двенадцать',
141 |         'дверь',
142 |         'двух',
143 |         'девятнадцатый',
144 |         'девятнадцать',
145 |         'девятый',
146 |         'девять',
147 |         'действительно',
148 |         'дел',
149 |         'делал',
150 |         'делать',
151 |         'делаю',
152 |         'дело',
153 |         'день',
154 |         'деньги',
155 |         'десятый',
156 |         'десять',
157 |         'для',
158 |         'до',
159 |         'довольно',
160 |         'долго',
161 |         'должен',
162 |         'должно',
163 |         'должный',
164 |         'дом',
165 |         'дорога',
166 |         'друг',
167 |         'другая',
168 |         'другие',
169 |         'других',
170 |         'друго',
171 |         'другое',
172 |         'другой',
173 |         'думать',
174 |         'душа',
175 |         'е',
176 |         'его',
177 |         'ее',
178 |         'ей',
179 |         'ему',
180 |         'если',
181 |         'есть',
182 |         'еще',
183 |         'ещё',
184 |         'ею',
185 |         'её',
186 |         'ж',
187 |         'ждать',
188 |         'же',
189 |         'жена',
190 |         'женщина',
191 |         'жизнь',
192 |         'жить',
193 |         'за',
194 |         'занят',
195 |         'занята',
196 |         'занято',
197 |         'заняты',
198 |         'затем',
199 |         'зато',
200 |         'зачем',
201 |         'здесь',
202 |         'земля',
203 |         'знать',
204 |         'значит',
205 |         'значить',
206 |         'и',
207 |         'иди',
208 |         'идти',
209 |         'из',
210 |         'или',
211 |         'им',
212 |         'имеет',
213 |         'имел',
214 |         'именно',
215 |         'иметь',
216 |         'ими',
217 |         'имя',
218 |         'иногда',
219 |         'их',
220 |         'к',
221 |         'каждая',
222 |         'каждое',
223 |         'каждые',
224 |         'каждый',
225 |         'кажется',
226 |         'казаться',
227 |         'как',
228 |         'какая',
229 |         'какой',
230 |         'кем',
231 |         'книга',
232 |         'когда',
233 |         'кого',
234 |         'ком',
235 |         'комната',
236 |         'кому',
237 |         'конец',
238 |         'конечно',
239 |         'которая',
240 |         'которого',
241 |         'которой',
242 |         'которые',
243 |         'который',
244 |         'которых',
245 |         'кроме',
246 |         'кругом',
247 |         'кто',
248 |         'куда',
249 |         'лежать',
250 |         'лет',
251 |         'ли',
252 |         'лицо',
253 |         'лишь',
254 |         'лучше',
255 |         'любить',
256 |         'люди',
257 |         'м',
258 |         'маленький',
259 |         'мало',
260 |         'мать',
261 |         'машина',
262 |         'между',
263 |         'меля',
264 |         'менее',
265 |         'меньше',
266 |         'меня',
267 |         'место',
268 |         'миллионов',
269 |         'мимо',
270 |         'минута',
271 |         'мир',
272 |         'мира',
273 |         'мне',
274 |         'много',
275 |         'многочисленная',
276 |         'многочисленное',
277 |         'многочисленные',
278 |         'многочисленный',
279 |         'мной',
280 |         'мною',
281 |         'мог',
282 |         'могу',
283 |         'могут',
284 |         'мож',
285 |         'может',
286 |         'может быть',
287 |         'можно',
288 |         'можхо',
289 |         'мои',
290 |         'мой',
291 |         'мор',
292 |         'москва',
293 |         'мочь',
294 |         'моя',
295 |         'моё',
296 |         'мы',
297 |         'на',
298 |         'наверху',
299 |         'над',
300 |         'надо',
301 |         'назад',
302 |         'наиболее',
303 |         'найти',
304 |         'наконец',
305 |         'нам',
306 |         'нами',
307 |         'народ',
308 |         'нас',
309 |         'начала',
310 |         'начать',
311 |         'наш',
312 |         'наша',
313 |         'наше',
314 |         'наши',
315 |         'не',
316 |         'него',
317 |         'недавно',
318 |         'недалеко',
319 |         'нее',
320 |         'ней',
321 |         'некоторый',
322 |         'нельзя',
323 |         'нем',
324 |         'немного',
325 |         'нему',
326 |         'непрерывно',
327 |         'нередко',
328 |         'несколько',
329 |         'нет',
330 |         'нею',
331 |         'неё',
332 |         'ни',
333 |         'нибудь',
334 |         'ниже',
335 |         'низко',
336 |         'никакой',
337 |         'никогда',
338 |         'никто',
339 |         'никуда',
340 |         'ним',
341 |         'ними',
342 |         'них',
343 |         'ничего',
344 |         'ничто',
345 |         'но',
346 |         'новый',
347 |         'нога',
348 |         'ночь',
349 |         'ну',
350 |         'нужно',
351 |         'нужный',
352 |         'нх',
353 |         'о',
354 |         'об',
355 |         'оба',
356 |         'обычно',
357 |         'один',
358 |         'одиннадцатый',
359 |         'одиннадцать',
360 |         'однажды',
361 |         'однако',
362 |         'одного',
363 |         'одной',
364 |         'оказаться',
365 |         'окно',
366 |         'около',
367 |         'он',
368 |         'она',
369 |         'они',
370 |         'оно',
371 |         'опять',
372 |         'особенно',
373 |         'остаться',
374 |         'от',
375 |         'ответить',
376 |         'отец',
377 |         'откуда',
378 |         'отовсюду',
379 |         'отсюда',
380 |         'очень',
381 |         'первый',
382 |         'перед',
383 |         'писать',
384 |         'плечо',
385 |         'по',
386 |         'под',
387 |         'подойди',
388 |         'подумать',
389 |         'пожалуйста',
390 |         'позже',
391 |         'пойти',
392 |         'пока',
393 |         'пол',
394 |         'получить',
395 |         'помнить',
396 |         'понимать',
397 |         'понять',
398 |         'пор',
399 |         'пора',
400 |         'после',
401 |         'последний',
402 |         'посмотреть',
403 |         'посреди',
404 |         'потом',
405 |         'потому',
406 |         'почему',
407 |         'почти',
408 |         'правда',
409 |         'прекрасно',
410 |         'при',
411 |         'про',
412 |         'просто',
413 |         'против',
414 |         'процентов',
415 |         'путь',
416 |         'пятнадцатый',
417 |         'пятнадцать',
418 |         'пятый',
419 |         'пять',
420 |         'работа',
421 |         'работать',
422 |         'раз',
423 |         'разве',
424 |         'рано',
425 |         'раньше',
426 |         'ребенок',
427 |         'решить',
428 |         'россия',
429 |         'рука',
430 |         'русский',
431 |         'ряд',
432 |         'рядом',
433 |         'с',
434 |         'с кем',
435 |         'сам',
436 |         'сама',
437 |         'сами',
438 |         'самим',
439 |         'самими',
440 |         'самих',
441 |         'само',
442 |         'самого',
443 |         'самой',
444 |         'самом',
445 |         'самому',
446 |         'саму',
447 |         'самый',
448 |         'свет',
449 |         'свое',
450 |         'своего',
451 |         'своей',
452 |         'свои',
453 |         'своих',
454 |         'свой',
455 |         'свою',
456 |         'сделать',
457 |         'сеаой',
458 |         'себе',
459 |         'себя',
460 |         'сегодня',
461 |         'седьмой',
462 |         'сейчас',
463 |         'семнадцатый',
464 |         'семнадцать',
465 |         'семь',
466 |         'сидеть',
467 |         'сила',
468 |         'сих',
469 |         'сказал',
470 |         'сказала',
471 |         'сказать',
472 |         'сколько',
473 |         'слишком',
474 |         'слово',
475 |         'случай',
476 |         'смотреть',
477 |         'сначала',
478 |         'снова',
479 |         'со',
480 |         'собой',
481 |         'собою',
482 |         'советский',
483 |         'совсем',
484 |         'спасибо',
485 |         'спросить',
486 |         'сразу',
487 |         'стал',
488 |         'старый',
489 |         'стать',
490 |         'стол',
491 |         'сторона',
492 |         'стоять',
493 |         'страна',
494 |         'суть',
495 |         'считать',
496 |         'т',
497 |         'та',
498 |         'так',
499 |         'такая',
500 |         'также',
501 |         'таки',
502 |         'такие',
503 |         'такое',
504 |         'такой',
505 |         'там',
506 |         'твои',
507 |         'твой',
508 |         'твоя',
509 |         'твоё',
510 |         'те',
511 |         'тебе',
512 |         'тебя',
513 |         'тем',
514 |         'теми',
515 |         'теперь',
516 |         'тех',
517 |         'то',
518 |         'тобой',
519 |         'тобою',
520 |         'товарищ',
521 |         'тогда',
522 |         'того',
523 |         'тоже',
524 |         'только',
525 |         'том',
526 |         'тому',
527 |         'тот',
528 |         'тою',
529 |         'третий',
530 |         'три',
531 |         'тринадцатый',
532 |         'тринадцать',
533 |         'ту',
534 |         'туда',
535 |         'тут',
536 |         'ты',
537 |         'тысяч',
538 |         'у',
539 |         'увидеть',
540 |         'уж',
541 |         'уже',
542 |         'улица',
543 |         'уметь',
544 |         'утро',
545 |         'хороший',
546 |         'хорошо',
547 |         'хотел бы',
548 |         'хотеть',
549 |         'хоть',
550 |         'хотя',
551 |         'хочешь',
552 |         'час',
553 |         'часто',
554 |         'часть',
555 |         'чаще',
556 |         'чего',
557 |         'человек',
558 |         'чем',
559 |         'чему',
560 |         'через',
561 |         'четвертый',
562 |         'четыре',
563 |         'четырнадцатый',
564 |         'четырнадцать',
565 |         'что',
566 |         'чтоб',
567 |         'чтобы',
568 |         'чуть',
569 |         'шестнадцатый',
570 |         'шестнадцать',
571 |         'шестой',
572 |         'шесть',
573 |         'эта',
574 |         'эти',
575 |         'этим',
576 |         'этими',
577 |         'этих',
578 |         'это',
579 |         'этого',
580 |         'этой',
581 |         'этом',
582 |         'этому',
583 |         'этот',
584 |         'эту',
585 |         'я',
586 |         'являюсь'
587 | 	];
588 | }
589 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Spanish.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  Syndesi <github.com/Syndesi>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool\StopWords;
 13 | 
 14 | /**
 15 |  * Class Spanish
 16 |  *
 17 |  * @package PhpScience\TextRank\Tool\StopWords
 18 |  */
 19 | class Spanish extends StopWordsAbstract
 20 | {
 21 |     /**
 22 |      * Stop words for avoid dummy keywords for Language Spanish.
 23 |      * Source: https://github.com/stopwords-iso/stopwords-es
 24 |      *
 25 |      * @var array
 26 |      */
 27 |     protected $words = [
 28 |         'a',
 29 |         'actualmente',
 30 |         'acuerdo',
 31 |         'adelante',
 32 |         'ademas',
 33 |         'además',
 34 |         'adrede',
 35 |         'afirmó',
 36 |         'agregó',
 37 |         'ahi',
 38 |         'ahora',
 39 |         'ahí',
 40 |         'al',
 41 |         'algo',
 42 |         'alguna',
 43 |         'algunas',
 44 |         'alguno',
 45 |         'algunos',
 46 |         'algún',
 47 |         'alli',
 48 |         'allí',
 49 |         'alrededor',
 50 |         'ambos',
 51 |         'ampleamos',
 52 |         'antano',
 53 |         'antaño',
 54 |         'ante',
 55 |         'anterior',
 56 |         'antes',
 57 |         'apenas',
 58 |         'aproximadamente',
 59 |         'aquel',
 60 |         'aquella',
 61 |         'aquellas',
 62 |         'aquello',
 63 |         'aquellos',
 64 |         'aqui',
 65 |         'aquél',
 66 |         'aquélla',
 67 |         'aquéllas',
 68 |         'aquéllos',
 69 |         'aquí',
 70 |         'arriba',
 71 |         'arribaabajo',
 72 |         'aseguró',
 73 |         'asi',
 74 |         'así',
 75 |         'atras',
 76 |         'aun',
 77 |         'aunque',
 78 |         'ayer',
 79 |         'añadió',
 80 |         'aún',
 81 |         'b',
 82 |         'bajo',
 83 |         'bastante',
 84 |         'bien',
 85 |         'breve',
 86 |         'buen',
 87 |         'buena',
 88 |         'buenas',
 89 |         'bueno',
 90 |         'buenos',
 91 |         'c',
 92 |         'cada',
 93 |         'casi',
 94 |         'cerca',
 95 |         'cierta',
 96 |         'ciertas',
 97 |         'cierto',
 98 |         'ciertos',
 99 |         'cinco',
100 |         'claro',
101 |         'comentó',
102 |         'como',
103 |         'con',
104 |         'conmigo',
105 |         'conocer',
106 |         'conseguimos',
107 |         'conseguir',
108 |         'considera',
109 |         'consideró',
110 |         'consigo',
111 |         'consigue',
112 |         'consiguen',
113 |         'consigues',
114 |         'contigo',
115 |         'contra',
116 |         'cosas',
117 |         'creo',
118 |         'cual',
119 |         'cuales',
120 |         'cualquier',
121 |         'cuando',
122 |         'cuanta',
123 |         'cuantas',
124 |         'cuanto',
125 |         'cuantos',
126 |         'cuatro',
127 |         'cuenta',
128 |         'cuál',
129 |         'cuáles',
130 |         'cuándo',
131 |         'cuánta',
132 |         'cuántas',
133 |         'cuánto',
134 |         'cuántos',
135 |         'cómo',
136 |         'd',
137 |         'da',
138 |         'dado',
139 |         'dan',
140 |         'dar',
141 |         'de',
142 |         'debajo',
143 |         'debe',
144 |         'deben',
145 |         'debido',
146 |         'decir',
147 |         'dejó',
148 |         'del',
149 |         'delante',
150 |         'demasiado',
151 |         'demás',
152 |         'dentro',
153 |         'deprisa',
154 |         'desde',
155 |         'despacio',
156 |         'despues',
157 |         'después',
158 |         'detras',
159 |         'detrás',
160 |         'dia',
161 |         'dias',
162 |         'dice',
163 |         'dicen',
164 |         'dicho',
165 |         'dieron',
166 |         'diferente',
167 |         'diferentes',
168 |         'dijeron',
169 |         'dijo',
170 |         'dio',
171 |         'donde',
172 |         'dos',
173 |         'durante',
174 |         'día',
175 |         'días',
176 |         'dónde',
177 |         'e',
178 |         'ejemplo',
179 |         'el',
180 |         'ella',
181 |         'ellas',
182 |         'ello',
183 |         'ellos',
184 |         'embargo',
185 |         'empleais',
186 |         'emplean',
187 |         'emplear',
188 |         'empleas',
189 |         'empleo',
190 |         'en',
191 |         'encima',
192 |         'encuentra',
193 |         'enfrente',
194 |         'enseguida',
195 |         'entonces',
196 |         'entre',
197 |         'era',
198 |         'erais',
199 |         'eramos',
200 |         'eran',
201 |         'eras',
202 |         'eres',
203 |         'es',
204 |         'esa',
205 |         'esas',
206 |         'ese',
207 |         'eso',
208 |         'esos',
209 |         'esta',
210 |         'estaba',
211 |         'estabais',
212 |         'estaban',
213 |         'estabas',
214 |         'estad',
215 |         'estada',
216 |         'estadas',
217 |         'estado',
218 |         'estados',
219 |         'estais',
220 |         'estamos',
221 |         'estan',
222 |         'estando',
223 |         'estar',
224 |         'estaremos',
225 |         'estará',
226 |         'estarán',
227 |         'estarás',
228 |         'estaré',
229 |         'estaréis',
230 |         'estaría',
231 |         'estaríais',
232 |         'estaríamos',
233 |         'estarían',
234 |         'estarías',
235 |         'estas',
236 |         'este',
237 |         'estemos',
238 |         'esto',
239 |         'estos',
240 |         'estoy',
241 |         'estuve',
242 |         'estuviera',
243 |         'estuvierais',
244 |         'estuvieran',
245 |         'estuvieras',
246 |         'estuvieron',
247 |         'estuviese',
248 |         'estuvieseis',
249 |         'estuviesen',
250 |         'estuvieses',
251 |         'estuvimos',
252 |         'estuviste',
253 |         'estuvisteis',
254 |         'estuviéramos',
255 |         'estuviésemos',
256 |         'estuvo',
257 |         'está',
258 |         'estábamos',
259 |         'estáis',
260 |         'están',
261 |         'estás',
262 |         'esté',
263 |         'estéis',
264 |         'estén',
265 |         'estés',
266 |         'ex',
267 |         'excepto',
268 |         'existe',
269 |         'existen',
270 |         'explicó',
271 |         'expresó',
272 |         'f',
273 |         'fin',
274 |         'final',
275 |         'fue',
276 |         'fuera',
277 |         'fuerais',
278 |         'fueran',
279 |         'fueras',
280 |         'fueron',
281 |         'fuese',
282 |         'fueseis',
283 |         'fuesen',
284 |         'fueses',
285 |         'fui',
286 |         'fuimos',
287 |         'fuiste',
288 |         'fuisteis',
289 |         'fuéramos',
290 |         'fuésemos',
291 |         'g',
292 |         'general',
293 |         'gran',
294 |         'grandes',
295 |         'gueno',
296 |         'h',
297 |         'ha',
298 |         'haber',
299 |         'habia',
300 |         'habida',
301 |         'habidas',
302 |         'habido',
303 |         'habidos',
304 |         'habiendo',
305 |         'habla',
306 |         'hablan',
307 |         'habremos',
308 |         'habrá',
309 |         'habrán',
310 |         'habrás',
311 |         'habré',
312 |         'habréis',
313 |         'habría',
314 |         'habríais',
315 |         'habríamos',
316 |         'habrían',
317 |         'habrías',
318 |         'habéis',
319 |         'había',
320 |         'habíais',
321 |         'habíamos',
322 |         'habían',
323 |         'habías',
324 |         'hace',
325 |         'haceis',
326 |         'hacemos',
327 |         'hacen',
328 |         'hacer',
329 |         'hacerlo',
330 |         'haces',
331 |         'hacia',
332 |         'haciendo',
333 |         'hago',
334 |         'han',
335 |         'has',
336 |         'hasta',
337 |         'hay',
338 |         'haya',
339 |         'hayamos',
340 |         'hayan',
341 |         'hayas',
342 |         'hayáis',
343 |         'he',
344 |         'hecho',
345 |         'hemos',
346 |         'hicieron',
347 |         'hizo',
348 |         'horas',
349 |         'hoy',
350 |         'hube',
351 |         'hubiera',
352 |         'hubierais',
353 |         'hubieran',
354 |         'hubieras',
355 |         'hubieron',
356 |         'hubiese',
357 |         'hubieseis',
358 |         'hubiesen',
359 |         'hubieses',
360 |         'hubimos',
361 |         'hubiste',
362 |         'hubisteis',
363 |         'hubiéramos',
364 |         'hubiésemos',
365 |         'hubo',
366 |         'i',
367 |         'igual',
368 |         'incluso',
369 |         'indicó',
370 |         'informo',
371 |         'informó',
372 |         'intenta',
373 |         'intentais',
374 |         'intentamos',
375 |         'intentan',
376 |         'intentar',
377 |         'intentas',
378 |         'intento',
379 |         'ir',
380 |         'j',
381 |         'junto',
382 |         'k',
383 |         'l',
384 |         'la',
385 |         'lado',
386 |         'largo',
387 |         'las',
388 |         'le',
389 |         'lejos',
390 |         'les',
391 |         'llegó',
392 |         'lleva',
393 |         'llevar',
394 |         'lo',
395 |         'los',
396 |         'luego',
397 |         'lugar',
398 |         'm',
399 |         'mal',
400 |         'manera',
401 |         'manifestó',
402 |         'mas',
403 |         'mayor',
404 |         'me',
405 |         'mediante',
406 |         'medio',
407 |         'mejor',
408 |         'mencionó',
409 |         'menos',
410 |         'menudo',
411 |         'mi',
412 |         'mia',
413 |         'mias',
414 |         'mientras',
415 |         'mio',
416 |         'mios',
417 |         'mis',
418 |         'misma',
419 |         'mismas',
420 |         'mismo',
421 |         'mismos',
422 |         'modo',
423 |         'momento',
424 |         'mucha',
425 |         'muchas',
426 |         'mucho',
427 |         'muchos',
428 |         'muy',
429 |         'más',
430 |         'mí',
431 |         'mía',
432 |         'mías',
433 |         'mío',
434 |         'míos',
435 |         'n',
436 |         'nada',
437 |         'nadie',
438 |         'ni',
439 |         'ninguna',
440 |         'ningunas',
441 |         'ninguno',
442 |         'ningunos',
443 |         'ningún',
444 |         'no',
445 |         'nos',
446 |         'nosotras',
447 |         'nosotros',
448 |         'nuestra',
449 |         'nuestras',
450 |         'nuestro',
451 |         'nuestros',
452 |         'nueva',
453 |         'nuevas',
454 |         'nuevo',
455 |         'nuevos',
456 |         'nunca',
457 |         'o',
458 |         'ocho',
459 |         'os',
460 |         'otra',
461 |         'otras',
462 |         'otro',
463 |         'otros',
464 |         'p',
465 |         'pais',
466 |         'para',
467 |         'parece',
468 |         'parte',
469 |         'partir',
470 |         'pasada',
471 |         'pasado',
472 |         'paìs',
473 |         'peor',
474 |         'pero',
475 |         'pesar',
476 |         'poca',
477 |         'pocas',
478 |         'poco',
479 |         'pocos',
480 |         'podeis',
481 |         'podemos',
482 |         'poder',
483 |         'podria',
484 |         'podriais',
485 |         'podriamos',
486 |         'podrian',
487 |         'podrias',
488 |         'podrá',
489 |         'podrán',
490 |         'podría',
491 |         'podrían',
492 |         'poner',
493 |         'por',
494 |         'por qué',
495 |         'porque',
496 |         'posible',
497 |         'primer',
498 |         'primera',
499 |         'primero',
500 |         'primeros',
501 |         'principalmente',
502 |         'pronto',
503 |         'propia',
504 |         'propias',
505 |         'propio',
506 |         'propios',
507 |         'proximo',
508 |         'próximo',
509 |         'próximos',
510 |         'pudo',
511 |         'pueda',
512 |         'puede',
513 |         'pueden',
514 |         'puedo',
515 |         'pues',
516 |         'q',
517 |         'qeu',
518 |         'que',
519 |         'quedó',
520 |         'queremos',
521 |         'quien',
522 |         'quienes',
523 |         'quiere',
524 |         'quiza',
525 |         'quizas',
526 |         'quizá',
527 |         'quizás',
528 |         'quién',
529 |         'quiénes',
530 |         'qué',
531 |         'r',
532 |         'raras',
533 |         'realizado',
534 |         'realizar',
535 |         'realizó',
536 |         'repente',
537 |         'respecto',
538 |         's',
539 |         'sabe',
540 |         'sabeis',
541 |         'sabemos',
542 |         'saben',
543 |         'saber',
544 |         'sabes',
545 |         'sal',
546 |         'salvo',
547 |         'se',
548 |         'sea',
549 |         'seamos',
550 |         'sean',
551 |         'seas',
552 |         'segun',
553 |         'segunda',
554 |         'segundo',
555 |         'según',
556 |         'seis',
557 |         'ser',
558 |         'sera',
559 |         'seremos',
560 |         'será',
561 |         'serán',
562 |         'serás',
563 |         'seré',
564 |         'seréis',
565 |         'sería',
566 |         'seríais',
567 |         'seríamos',
568 |         'serían',
569 |         'serías',
570 |         'seáis',
571 |         'señaló',
572 |         'si',
573 |         'sido',
574 |         'siempre',
575 |         'siendo',
576 |         'siete',
577 |         'sigue',
578 |         'siguiente',
579 |         'sin',
580 |         'sino',
581 |         'sobre',
582 |         'sois',
583 |         'sola',
584 |         'solamente',
585 |         'solas',
586 |         'solo',
587 |         'solos',
588 |         'somos',
589 |         'son',
590 |         'soy',
591 |         'soyos',
592 |         'su',
593 |         'supuesto',
594 |         'sus',
595 |         'suya',
596 |         'suyas',
597 |         'suyo',
598 |         'suyos',
599 |         'sé',
600 |         'sí',
601 |         'sólo',
602 |         't',
603 |         'tal',
604 |         'tambien',
605 |         'también',
606 |         'tampoco',
607 |         'tan',
608 |         'tanto',
609 |         'tarde',
610 |         'te',
611 |         'temprano',
612 |         'tendremos',
613 |         'tendrá',
614 |         'tendrán',
615 |         'tendrás',
616 |         'tendré',
617 |         'tendréis',
618 |         'tendría',
619 |         'tendríais',
620 |         'tendríamos',
621 |         'tendrían',
622 |         'tendrías',
623 |         'tened',
624 |         'teneis',
625 |         'tenemos',
626 |         'tener',
627 |         'tenga',
628 |         'tengamos',
629 |         'tengan',
630 |         'tengas',
631 |         'tengo',
632 |         'tengáis',
633 |         'tenida',
634 |         'tenidas',
635 |         'tenido',
636 |         'tenidos',
637 |         'teniendo',
638 |         'tenéis',
639 |         'tenía',
640 |         'teníais',
641 |         'teníamos',
642 |         'tenían',
643 |         'tenías',
644 |         'tercera',
645 |         'ti',
646 |         'tiempo',
647 |         'tiene',
648 |         'tienen',
649 |         'tienes',
650 |         'toda',
651 |         'todas',
652 |         'todavia',
653 |         'todavía',
654 |         'todo',
655 |         'todos',
656 |         'total',
657 |         'trabaja',
658 |         'trabajais',
659 |         'trabajamos',
660 |         'trabajan',
661 |         'trabajar',
662 |         'trabajas',
663 |         'trabajo',
664 |         'tras',
665 |         'trata',
666 |         'través',
667 |         'tres',
668 |         'tu',
669 |         'tus',
670 |         'tuve',
671 |         'tuviera',
672 |         'tuvierais',
673 |         'tuvieran',
674 |         'tuvieras',
675 |         'tuvieron',
676 |         'tuviese',
677 |         'tuvieseis',
678 |         'tuviesen',
679 |         'tuvieses',
680 |         'tuvimos',
681 |         'tuviste',
682 |         'tuvisteis',
683 |         'tuviéramos',
684 |         'tuviésemos',
685 |         'tuvo',
686 |         'tuya',
687 |         'tuyas',
688 |         'tuyo',
689 |         'tuyos',
690 |         'tú',
691 |         'u',
692 |         'ultimo',
693 |         'un',
694 |         'una',
695 |         'unas',
696 |         'uno',
697 |         'unos',
698 |         'usa',
699 |         'usais',
700 |         'usamos',
701 |         'usan',
702 |         'usar',
703 |         'usas',
704 |         'uso',
705 |         'usted',
706 |         'ustedes',
707 |         'v',
708 |         'va',
709 |         'vais',
710 |         'valor',
711 |         'vamos',
712 |         'van',
713 |         'varias',
714 |         'varios',
715 |         'vaya',
716 |         'veces',
717 |         'ver',
718 |         'verdad',
719 |         'verdadera',
720 |         'verdadero',
721 |         'vez',
722 |         'vosotras',
723 |         'vosotros',
724 |         'voy',
725 |         'vuestra',
726 |         'vuestras',
727 |         'vuestro',
728 |         'vuestros',
729 |         'w',
730 |         'x',
731 |         'y',
732 |         'ya',
733 |         'yo',
734 |         'z',
735 |         'él',
736 |         'éramos',
737 |         'ésa',
738 |         'ésas',
739 |         'ése',
740 |         'ésos',
741 |         'ésta',
742 |         'éstas',
743 |         'éste',
744 |         'éstos',
745 |         'última',
746 |         'últimas',
747 |         'último',
748 |         'últimos',
749 |     ];
750 | }
751 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/StopWordsAbstract.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * PHP Science TextRank (http://php.science/)
 4 |  *
 5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
 6 |  * @license https://opensource.org/licenses/MIT the MIT License
 7 |  * @author  David Belicza <david@belicza.com>
 8 |  */
 9 | 
10 | declare(strict_types=1);
11 | 
12 | namespace PhpScience\TextRank\Tool\StopWords;
13 | 
14 | /**
15 |  * Class StopWordsAbstract
16 |  *
17 |  * @package PhpScience\TextRank\Tool\StopWords
18 |  */
19 | abstract class StopWordsAbstract
20 | {
21 |     /**
22 |      * Stop words for avoid dummy keywords.
23 |      *
24 |      * @var array
25 |      */
26 |     protected $words = [];
27 | 
28 |     /**
29 |      * It retrieves the word exists or does not in the list of Stop words.
30 |      *
31 |      * @param string $word
32 |      *
33 |      * @return bool It is True when it exists.
34 |      */
35 |     public function exist(string $word): bool
36 |     {
37 |         return array_search($word, $this->words) !== false;
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/Tool/StopWords/Turkish.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  Sezer Fidancı <github.com/SezerFidanci>
  8 |  */
  9 | declare(strict_types=1);
 10 | namespace PhpScience\TextRank\Tool\StopWords;
 11 | /**
 12 |  * Class Turkish
 13 |  *
 14 |  * @package PhpScience\TextRank\Tool\StopWords
 15 |  */
 16 | class Turkish extends StopWordsAbstract
 17 | {
 18 |     /**
 19 |      * Stop words for avoid dummy keywords for Language Turkish.
 20 |      * Word list created by Sezer Fidancı.
 21 |      * Source: https://raw.githubusercontent.com/abdullahharuntahtali/turkish_stop_words/master/turkish_stopwords.txt
 22 |      *
 23 |      * @var array
 24 |      */
 25 |     protected $words = [
 26 |         "acaba",
 27 |         "aksine",
 28 |         "al",
 29 |         "alarak",
 30 |         "aldılar",
 31 |         "aldım",
 32 |         "aldırdılar",
 33 |         "aldırdım",
 34 |         "aldırmadık",
 35 |         "aldırmadım",
 36 |         "almadım",
 37 |         "almaktadır",
 38 |         "almıştır",
 39 |         "altmış",
 40 |         "altı",
 41 |         "alıp",
 42 |         "ama",
 43 |         "amacı",
 44 |         "amacında",
 45 |         "amacıyla",
 46 |         "amaçla",
 47 |         "amaçlanmaktadır",
 48 |         "an",
 49 |         "ancak",
 50 |         "anlaşılmaktadır",
 51 |         "arada",
 52 |         "arasında",
 53 |         "artık",
 54 |         "asla",
 55 |         "aslında",
 56 |         "ay",
 57 |         "ayrıca",
 58 |         "ayrılmaktadır",
 59 |         "ayy",
 60 |         "az",
 61 |         "azdır",
 62 |         "bana",
 63 |         "bazen",
 64 |         "bazı",
 65 |         "bazıları",
 66 |         "bazısı",
 67 |         "başda",
 68 |         "başlık",
 69 |         "başta",
 70 |         "belgelenmiştir",
 71 |         "belirlendi",
 72 |         "belirlenmiş",
 73 |         "belirlenmişdir",
 74 |         "belirlenmiştir",
 75 |         "belirli",
 76 |         "belki",
 77 |         "belli",
 78 |         "ben",
 79 |         "benden",
 80 |         "beni",
 81 |         "benim",
 82 |         "benimde",
 83 |         "beri",
 84 |         "beş",
 85 |         "beşe",
 86 |         "beşi",
 87 |         "beşinci",
 88 |         "beşli",
 89 |         "bile",
 90 |         "bilhassa",
 91 |         "bin",
 92 |         "bir",
 93 |         "biri",
 94 |         "birisi",
 95 |         "birkaç",
 96 |         "birkaçı",
 97 |         "birkez",
 98 |         "birlikte",
 99 |         "birçok",
100 |         "birçokları",
101 |         "birçoğu",
102 |         "birşey",
103 |         "birşeyi",
104 |         "biz",
105 |         "bizden",
106 |         "bize",
107 |         "bizi",
108 |         "bizim",
109 |         "bu",
110 |         "bulunan",
111 |         "bulunanlar",
112 |         "bulunduk",
113 |         "bulundular",
114 |         "bulundum",
115 |         "bulundunuz",
116 |         "bulunmak",
117 |         "bulunuldu",
118 |         "bulunulmuştur",
119 |         "buna",
120 |         "bunda",
121 |         "bundan",
122 |         "bunlar",
123 |         "bunları",
124 |         "bunların",
125 |         "bunu",
126 |         "bunun",
127 |         "bununda",
128 |         "bununla",
129 |         "burada",
130 |         "böyle",
131 |         "böylece",
132 |         "bütün",
133 |         "ca",
134 |         "ce",
135 |         "çeşitler",
136 |         "çeşitli",
137 |         "çok",
138 |         "çoktur",
139 |         "çoğu",
140 |         "çoğuna",
141 |         "çoğunu",
142 |         "çünkü",
143 |         "da",
144 |         "daa",
145 |         "daha",
146 |         "dahi",
147 |         "dair",
148 |         "de",
149 |         "defa",
150 |         "demek",
151 |         "değil",
152 |         "di",
153 |         "diye",
154 |         "diğer",
155 |         "diğeri",
156 |         "diğerleri",
157 |         "doksan",
158 |         "dokuz",
159 |         "dolayı",
160 |         "dolayısıyla",
161 |         "du",
162 |         "durdu",
163 |         "durduk",
164 |         "durdular",
165 |         "durdum",
166 |         "durulacak",
167 |         "durulacaktır",
168 |         "duruldu",
169 |         "durulmamış",
170 |         "durulmamıştır",
171 |         "durulmuştur",
172 |         "durulur",
173 |         "durulurlar",
174 |         "durumda",
175 |         "durumdur",
176 |         "durunuz",
177 |         "dört",
178 |         "dış",
179 |         "edecek",
180 |         "eden",
181 |         "ederek",
182 |         "edilecek",
183 |         "ediliyor",
184 |         "edilmesi",
185 |         "edilmiş",
186 |         "ediyor",
187 |         "elbette",
188 |         "elli",
189 |         "en",
190 |         "en çok",
191 |         "et",
192 |         "etme",
193 |         "etmedim",
194 |         "etmek",
195 |         "etmekte",
196 |         "etmesi",
197 |         "etti",
198 |         "ettiklerini",
199 |         "ettirmek",
200 |         "ettiği",
201 |         "ettiğini",
202 |         "eğer",
203 |         "fakat",
204 |         "felan",
205 |         "filan",
206 |         "geldiler",
207 |         "gelir",
208 |         "geliyorlar",
209 |         "gelmiş",
210 |         "gelmişler",
211 |         "gene",
212 |         "gerektiğinde",
213 |         "getirdi",
214 |         "getirdik",
215 |         "getirdiler",
216 |         "getirdim",
217 |         "getirdiniz",
218 |         "getirmişler",
219 |         "gibi",
220 |         "gider",
221 |         "gidiyorlar",
222 |         "gil",
223 |         "giller",
224 |         "gine",
225 |         "gitmişler",
226 |         "gittiler",
227 |         "göre",
228 |         "ha",
229 |         "haa",
230 |         "halen",
231 |         "hangi",
232 |         "hangisi",
233 |         "hani",
234 |         "hatta",
235 |         "he",
236 |         "hee",
237 |         "hem",
238 |         "henüz",
239 |         "hep",
240 |         "hepsi",
241 |         "hepsine",
242 |         "hepsini",
243 |         "her",
244 |         "her biri",
245 |         "herhangi",
246 |         "herkes",
247 |         "herkese",
248 |         "herkesi",
249 |         "herkesin",
250 |         "hi",
251 |         "hiç",
252 |         "hiç kimse",
253 |         "hiçbir",
254 |         "hiçbiri",
255 |         "hiçbirine",
256 |         "hiçbirini",
257 |         "hu",
258 |         "huu",
259 |         "hâlâ",
260 |         "hı",
261 |         "ın",
262 |         "ıt",
263 |         "iki",
264 |         "ile",
265 |         "ilgili",
266 |         "in",
267 |         "inceledik",
268 |         "incelediler",
269 |         "incelediniz",
270 |         "incelen",
271 |         "incelendi",
272 |         "incelenmiş",
273 |         "ise",
274 |         "isimiyle",
275 |         "isimle",
276 |         "isimlendirildi",
277 |         "isimlendirilen",
278 |         "isimlendirilmiş",
279 |         "isimli",
280 |         "ismi ile",
281 |         "isminde",
282 |         "isminden",
283 |         "isminin",
284 |         "it",
285 |         "itibaren",
286 |         "itibariyle",
287 |         "içerisi",
288 |         "içerisinde",
289 |         "içerisine",
290 |         "içerisiyle",
291 |         "içersi",
292 |         "için",
293 |         "içinde",
294 |         "işte",
295 |         "kadar",
296 |         "kal",
297 |         "kaldı",
298 |         "kaldık",
299 |         "kaldılar",
300 |         "kaldın",
301 |         "kalır",
302 |         "karşın",
303 |         "katrilyon",
304 |         "kaç",
305 |         "kendi",
306 |         "kendilerine",
307 |         "kendine",
308 |         "kendini",
309 |         "kendisi",
310 |         "kendisine",
311 |         "kendisini",
312 |         "kez",
313 |         "ki",
314 |         "kikir",
315 |         "kikiri",
316 |         "kim",
317 |         "kimden",
318 |         "kime",
319 |         "kimi",
320 |         "kimin",
321 |         "kimisi",
322 |         "kimse",
323 |         "kurulduk",
324 |         "kuruldum",
325 |         "kurulmak",
326 |         "kurulmuştur",
327 |         "kıkır",
328 |         "kırk",
329 |         "la",
330 |         "lar",
331 |         "le",
332 |         "ler",
333 |         "madem",
334 |         "maksadı",
335 |         "maksadı ile",
336 |         "maksadıyla",
337 |         "mi",
338 |         "milyar",
339 |         "milyon",
340 |         "mu",
341 |         "mü",
342 |         "mı",
343 |         "na",
344 |         "nasıl",
345 |         "nda",
346 |         "nde",
347 |         "ndi",
348 |         "ndı",
349 |         "ne",
350 |         "ne kadar",
351 |         "ne zaman",
352 |         "neden",
353 |         "nedenle",
354 |         "nedir",
355 |         "nerde",
356 |         "nerede",
357 |         "nereden",
358 |         "nereye",
359 |         "nesi",
360 |         "neyse",
361 |         "ni",
362 |         "nin",
363 |         "niye",
364 |         "niçin",
365 |         "nu",
366 |         "nü",
367 |         "nı",
368 |         "nın",
369 |         "ol",
370 |         "olan",
371 |         "olanlar",
372 |         "olanların",
373 |         "olarak",
374 |         "oldu",
375 |         "olduk",
376 |         "olduklarını",
377 |         "oldular",
378 |         "oldum",
379 |         "oldun",
380 |         "oldunuz",
381 |         "oldurdu",
382 |         "oldurdular",
383 |         "oldurdun",
384 |         "oldurdunuz",
385 |         "olduğu",
386 |         "olduğunu",
387 |         "olmadı",
388 |         "olmadığı",
389 |         "olmak",
390 |         "olmaktadır",
391 |         "olması",
392 |         "olmayan",
393 |         "olmaz",
394 |         "olsa",
395 |         "olsun",
396 |         "olundu",
397 |         "olundular",
398 |         "olundum",
399 |         "olundun",
400 |         "olup",
401 |         "olur",
402 |         "olursa",
403 |         "oluyor",
404 |         "oluşturmaktadır",
405 |         "on",
406 |         "ona",
407 |         "ondan",
408 |         "onlar",
409 |         "onlara",
410 |         "onlardan",
411 |         "onları",
412 |         "onların",
413 |         "onu",
414 |         "onun",
415 |         "onunda",
416 |         "onunla",
417 |         "orada",
418 |         "ortada",
419 |         "ortadalar",
420 |         "ortadan",
421 |         "ortadayım",
422 |         "ortadayız",
423 |         "ortaya",
424 |         "ortayı",
425 |         "otuz",
426 |         "oysa",
427 |         "oysaki",
428 |         "öbürü",
429 |         "ön",
430 |         "önce",
431 |         "önerdi",
432 |         "önerdiler",
433 |         "önerdim",
434 |         "önerilmiş",
435 |         "önerilmiştir",
436 |         "ötürü",
437 |         "öyle",
438 |         "öze",
439 |         "özü",
440 |         "pek",
441 |         "rağmen",
442 |         "sadece",
443 |         "sana",
444 |         "sanki",
445 |         "saten",
446 |         "sağladığı",
447 |         "sekiz",
448 |         "seksen",
449 |         "sen",
450 |         "senden",
451 |         "seni",
452 |         "senin",
453 |         "seninde",
454 |         "siz",
455 |         "sizden",
456 |         "size",
457 |         "sizi",
458 |         "sizin",
459 |         "son",
460 |         "sonra",
461 |         "sunulmuştur",
462 |         "suretiyle",
463 |         "suretle",
464 |         "söylenebilir",
465 |         "sürece",
466 |         "süretiyle",
467 |         "süretle",
468 |         "sürüldü",
469 |         "sürüldük",
470 |         "sürüldüler",
471 |         "sürüldüm",
472 |         "sürüldünüz",
473 |         "sıfır",
474 |         "sırala",
475 |         "sıraladım",
476 |         "sıralamıştır",
477 |         "şayet",
478 |         "şekilde",
479 |         "şekliyle",
480 |         "şey",
481 |         "şeyden",
482 |         "şeye",
483 |         "şeyi",
484 |         "şeyler",
485 |         "şimdi",
486 |         "şu",
487 |         "şuna",
488 |         "şunda",
489 |         "şundan",
490 |         "şunlar",
491 |         "şunları",
492 |         "şunu",
493 |         "şunun",
494 |         "şununda",
495 |         "şununla",
496 |         "şöyle",
497 |         "ta",
498 |         "taa",
499 |         "tabi",
500 |         "tamam",
501 |         "taraftan",
502 |         "tarafından",
503 |         "tartışılmıştır",
504 |         "te",
505 |         "tee",
506 |         "tir",
507 |         "trilyon",
508 |         "tüm",
509 |         "tümü",
510 |         "tır",
511 |         "ulaştık",
512 |         "ulaştılar",
513 |         "ulaştım",
514 |         "ulaşılan",
515 |         "ulaşıldı",
516 |         "ulaşılmak",
517 |         "ulaşılır",
518 |         "üstlenir",
519 |         "üt",
520 |         "üzere",
521 |         "üzeri",
522 |         "üzerinde",
523 |         "üzerinden",
524 |         "üzerine",
525 |         "üzerinize",
526 |         "üç",
527 |         "var",
528 |         "vardı",
529 |         "ve",
530 |         "veya",
531 |         "veyahut",
532 |         "ya",
533 |         "yaa",
534 |         "yani",
535 |         "yapacak",
536 |         "yapma",
537 |         "yapmak",
538 |         "yaptı",
539 |         "yaptıkları",
540 |         "yaptıklarını",
541 |         "yaptığı",
542 |         "yaptığını",
543 |         "yapılan",
544 |         "yapılması",
545 |         "yapıyor",
546 |         "yararlanılmıştır",
547 |         "ye",
548 |         "yedi",
549 |         "yeniden",
550 |         "yerine",
551 |         "yetmiş",
552 |         "yine",
553 |         "yinemi",
554 |         "yirmi",
555 |         "yla",
556 |         "yle",
557 |         "yoksa",
558 |         "yolla",
559 |         "yolladı",
560 |         "yolladılar",
561 |         "yolladım",
562 |         "yollayalım",
563 |         "yüksektir",
564 |         "yüz",
565 |         "zannetti",
566 |         "zaten",
567 |         "zenberek",
568 |         "zinhar",
569 |     ];
570 | }


--------------------------------------------------------------------------------
/src/Tool/Summarize.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool;
 13 | 
 14 | /**
 15 |  * Class Summarize
 16 |  *
 17 |  * This is for summarize the text from parsed data.
 18 |  *
 19 |  * @package PhpScience\TextRank\Tool
 20 |  */
 21 | class Summarize
 22 | {
 23 |     /**
 24 |      * To find all important sentences.
 25 |      *
 26 |      * @var int
 27 |      */
 28 |     const GET_ALL_IMPORTANT = 0;
 29 | 
 30 |     /**
 31 |      * To find the most important sentence and its following sentences.
 32 |      *
 33 |      * @var int
 34 |      */
 35 |     const GET_FIRST_IMPORTANT_AND_FOLLOWINGS = 1;
 36 | 
 37 |     /**
 38 |      * Array of sentence weight. Key is the index of the sentence and value is
 39 |      * the weight of the sentence.
 40 |      *
 41 |      * @var array
 42 |      */
 43 |     protected $sentenceWeight = [];
 44 | 
 45 |     /**
 46 |      * Summarize text.
 47 |      *
 48 |      * It retrieves the summarized text in array.
 49 |      *
 50 |      * @param array $scores        Keywords with scores. Score is the key.
 51 |      * @param Graph $graph         The graph of the text.
 52 |      * @param Text  $text          Text object what stores all text data.
 53 |      * @param int   $keyWordLimit  How many keyword should be used to find the
 54 |      *                             important sentences.
 55 |      * @param int   $sentenceLimit How many sentence should be retrieved.
 56 |      * @param int   $type          The type of summarizing. Possible values are
 57 |      *                             the constants of this class.
 58 |      *
 59 |      * @return array An array from sentences.
 60 |      */
 61 |     public function getSummarize(
 62 |         array &$scores,
 63 |         Graph &$graph,
 64 |         Text &$text,
 65 |         int $keyWordLimit,
 66 |         int $sentenceLimit,
 67 |         int $type
 68 |     ): array {
 69 | 
 70 |         $graphData = $graph->getGraph();
 71 |         $sentences = $text->getSentences();
 72 |         $marks = $text->getMarks();
 73 |         $this->findAndWeightSentences($scores, $graphData, $keyWordLimit);
 74 | 
 75 |         if ($type == Summarize::GET_ALL_IMPORTANT) {
 76 |             return $this->getAllImportant($sentences, $marks, $sentenceLimit);
 77 | 
 78 |         } else if ($type == Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS) {
 79 |             return $this->getFirstImportantAndFollowings(
 80 |                 $sentences,
 81 |                 $marks,
 82 |                 $sentenceLimit
 83 |             );
 84 |         }
 85 | 
 86 |         return [];
 87 |     }
 88 | 
 89 |     /**
 90 |      * Find and Weight Sentences.
 91 |      *
 92 |      * It finds the most important sentences and stores them into the property.
 93 |      *
 94 |      * @param array $scores       Keywords with scores. Score is the key.
 95 |      * @param array $graphData    Graph data from a Graph type object.
 96 |      * @param int   $keyWordLimit How many keyword should be used to find the
 97 |      *                            important sentences.
 98 |      */
 99 |     protected function findAndWeightSentences(
100 |         array &$scores,
101 |         array &$graphData,
102 |         int $keyWordLimit
103 |     ) {
104 |         $i = 0;
105 | 
106 |         foreach ($scores as $word => $score) {
107 |             if ($i >= $keyWordLimit) {
108 |                 break;
109 |             }
110 | 
111 |             $i++;
112 |             $wordMap = $graphData[$word];
113 | 
114 |             foreach ($wordMap as $key => $value) {
115 |                 $this->updateSentenceWeight($key);
116 |             }
117 |         }
118 | 
119 |         arsort($this->sentenceWeight);
120 |     }
121 | 
122 |     /**
123 |      * Important Sentences.
124 |      *
125 |      * It retrieves the important sentences.
126 |      *
127 |      * @param array $sentences     Sentences, ordered by weights.
128 |      * @param array $marks         Array of punctuations. Key is the reference
129 |      *                             to the sentence, value is the punctuation.
130 |      * @param int   $sentenceLimit How many sentence should be retrieved.
131 |      *
132 |      * @return array An array from sentences what are the most important
133 |      *               sentences.
134 |      */
135 |     protected function getAllImportant(
136 |         array &$sentences,
137 |         array &$marks,
138 |         int $sentenceLimit
139 |     ): array {
140 | 
141 |         $summary = [];
142 |         $i = 0;
143 | 
144 |         foreach ($this->sentenceWeight as $sentenceIdx => $weight) {
145 |             if ($i >= $sentenceLimit) {
146 |                 break;
147 |             }
148 | 
149 |             $i++;
150 |             $summary[$sentenceIdx] = $sentences[$sentenceIdx]
151 |                 . $this->getMark($marks, $sentenceIdx);
152 |         }
153 | 
154 |         ksort($summary);
155 | 
156 |         return $summary;
157 |     }
158 | 
159 |     /**
160 |      * Most Important Sentence and Next.
161 |      *
162 |      * It retrieves the first most important sentence and its following
163 |      * sentences.
164 |      *
165 |      * @param array $sentences     Sentences, ordered by weights.
166 |      * @param array $marks         Array of punctuations. Key is the reference
167 |      *                             to the sentence, value is the punctuation.
168 |      * @param int   $sentenceLimit How many sentence should be retrieved.
169 |      *
170 |      * @return array An array from sentences what contains the most important
171 |      *               sentence and its following sentences.
172 |      */
173 |     protected function getFirstImportantAndFollowings(
174 |         array &$sentences,
175 |         array &$marks,
176 |         int $sentenceLimit
177 |     ): array {
178 | 
179 |         $summary = [];
180 |         $startIdx = 0;
181 | 
182 |         foreach ($this->sentenceWeight as $sentenceIdx => $weight) {
183 |             $summary[$sentenceIdx] = $sentences[$sentenceIdx] .
184 |                 $this->getMark($marks, $sentenceIdx);
185 | 
186 |             $startIdx = $sentenceIdx;
187 |             break;
188 |         }
189 | 
190 |         $i = 0;
191 | 
192 |         foreach ($sentences as $sentenceIdx => $sentence) {
193 |             if ($sentenceIdx <= $startIdx) {
194 |                 continue;
195 |             } else if ($i >= $sentenceLimit - 1) {
196 |                 break;
197 |             }
198 | 
199 |             $i++;
200 |             $summary[$sentenceIdx] = $sentences[$sentenceIdx] .
201 |                 $this->getMark($marks, $sentenceIdx);
202 |         }
203 | 
204 |         return $summary;
205 |     }
206 | 
207 |     /**
208 |      * Update Sentence Weight.
209 |      *
210 |      * It updates the sentence weight what is stored in the property.
211 |      *
212 |      * @param int $sentenceIdx Index of the sentence.
213 |      */
214 |     protected function updateSentenceWeight(int $sentenceIdx)
215 |     {
216 |         if (isset($this->sentenceWeight[$sentenceIdx])) {
217 |             $this->sentenceWeight[$sentenceIdx] = $this->sentenceWeight[$sentenceIdx] + 1;
218 |         } else {
219 |             $this->sentenceWeight[$sentenceIdx] = 1;
220 |         }
221 |     }
222 | 
223 |     /**
224 |      * Punctuations.
225 |      *
226 |      * It retrieves the punctuation of the sentence.
227 |      *
228 |      * @param array $marks The punctuation. Key is the reference to the
229 |      *                     sentence, value is the punctuation.
230 |      * @param int   $idx   Key of the punctuation.
231 |      *
232 |      * @return string The punctuation of the sentence.
233 |      */
234 |     protected function getMark(array &$marks, int $idx)
235 |     {
236 |         return isset($marks[$idx]) ? $marks[$idx] : '';
237 |     }
238 | }
239 | 


--------------------------------------------------------------------------------
/src/Tool/Text.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank\Tool;
 13 | 
 14 | /**
 15 |  * Class Text
 16 |  *
 17 |  * This class is for store the parsed texts.
 18 |  *
 19 |  * @package PhpScience\TextRank\Tool
 20 |  */
 21 | class Text
 22 | {
 23 |     /**
 24 |      * Multidimensional array from words of the text. Key is index of the
 25 |      * sentence, value is an array from words where key is the index of the
 26 |      * word and value is the word.
 27 |      *
 28 |      * @var array
 29 |      */
 30 |     protected $wordMatrix = [];
 31 | 
 32 |     /**
 33 |      * Array from sentences where key is the index and value is the sentence.
 34 |      *
 35 |      * @var array
 36 |      */
 37 |     protected $sentences = [];
 38 | 
 39 |     /**
 40 |      * Array from punctuations where key is the index to link to the sentence
 41 |      * and value is the punctuation.
 42 |      *
 43 |      * @var array
 44 |      */
 45 |     protected $marks = [];
 46 | 
 47 |     /**
 48 |      * It set the Words' matrix to the property.
 49 |      *
 50 |      * @param array $wordMatrix Multidimensional array from integer keys and
 51 |      *                          string values.
 52 |      */
 53 |     public function setWordMatrix(array $wordMatrix)
 54 |     {
 55 |         $this->wordMatrix = $wordMatrix;
 56 |     }
 57 | 
 58 |     /**
 59 |      * It sets the sentences.
 60 |      *
 61 |      * @param array $sentences Array's key should be an int and value should be
 62 |      *                         string.
 63 |      */
 64 |     public function setSentences(array $sentences)
 65 |     {
 66 |         $this->sentences = $sentences;
 67 |     }
 68 | 
 69 |     /**
 70 |      * It set the punctuations to the property.
 71 |      *
 72 |      * @param array $marks Array's key should be an int and value should be
 73 |      *                     string.
 74 |      */
 75 |     public function setMarks(array $marks)
 76 |     {
 77 |         $this->marks = $marks;
 78 |     }
 79 | 
 80 |     /**
 81 |      * It retrieves the words in sentence groups.
 82 |      *
 83 |      * @return array Multidimensional array from words of the text. Key is
 84 |      *               index of the sentence, value is an array from words
 85 |      *               where key is the index of the word and value is the word.
 86 |      */
 87 |     public function getWordMatrix(): array
 88 |     {
 89 |         return $this->wordMatrix;
 90 |     }
 91 | 
 92 |     /**
 93 |      * It retrieves the sentences.
 94 |      *
 95 |      * @return array Array from sentences where key is the index and value is
 96 |      *               the sentence.
 97 |      */
 98 |     public function getSentences(): array
 99 |     {
100 |         return $this->sentences;
101 |     }
102 | 
103 |     /**
104 |      * It retrieves the punctuations.
105 |      *
106 |      * @return array Array from punctuations where key is the index to link to
107 |      *               the sentence and value is the punctuation.
108 |      */
109 |     public function getMarks(): array
110 |     {
111 |         return $this->marks;
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/tests/TextRankFacadeTest.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * PHP Science TextRank (http://php.science/)
  4 |  *
  5 |  * @see     https://github.com/DavidBelicza/PHP-Science-TextRank
  6 |  * @license https://opensource.org/licenses/MIT the MIT License
  7 |  * @author  David Belicza <david@belicza.com>
  8 |  */
  9 | 
 10 | declare(strict_types=1);
 11 | 
 12 | namespace PhpScience\TextRank;
 13 | 
 14 | use PhpScience\TextRank\Tool\StopWords\English;
 15 | use PhpScience\TextRank\Tool\StopWords\Russian;
 16 | use PhpScience\TextRank\Tool\Summarize;
 17 | use PHPUnit\Framework\TestCase;
 18 | 
 19 | class TextRankFacadeTest extends TestCase
 20 | {
 21 |     protected $sampleText1;
 22 | 
 23 |     public function setUp(): void
 24 |     {
 25 |         parent::setUp();
 26 | 
 27 |         $path =  __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'res'
 28 |             . DIRECTORY_SEPARATOR . 'sample1.txt';
 29 |         $file = fopen($path, 'r');
 30 | 
 31 |         $this->sampleText1 = fread($file, filesize($path));
 32 | 
 33 |         fclose($file);
 34 |     }
 35 | 
 36 |     public function testGetOnlyKeyWords()
 37 |     {
 38 |         $api = new TextRankFacade();
 39 |         $stopWords = new English();
 40 |         $api->setStopWords($stopWords);
 41 | 
 42 |         $result = $api->getOnlyKeyWords($this->sampleText1);
 43 | 
 44 |         $this->assertTrue(count($result) > 0);
 45 |         $this->assertTrue(array_values($result)[0] == 1);
 46 |     }
 47 | 
 48 |     public function testGetHighlights()
 49 |     {
 50 |         $api = new TextRankFacade();
 51 |         $stopWords = new English();
 52 |         $api->setStopWords($stopWords);
 53 | 
 54 |         $result = $api->getHighlights($this->sampleText1);
 55 | 
 56 |         $this->assertTrue(count($result) > 0);
 57 |     }
 58 | 
 59 |     public function testSummarizeTextCompound()
 60 |     {
 61 |         $api = new TextRankFacade();
 62 |         $stopWords = new English();
 63 |         $api->setStopWords($stopWords);
 64 | 
 65 |         $result = $api->summarizeTextCompound($this->sampleText1);
 66 | 
 67 |         $this->assertTrue(count($result) > 0);
 68 |     }
 69 | 
 70 |     public function testSummarizeTextBasic()
 71 |     {
 72 |         $api = new TextRankFacade();
 73 |         $stopWords = new English();
 74 |         $api->setStopWords($stopWords);
 75 | 
 76 |         $result = $api->summarizeTextBasic($this->sampleText1);
 77 | 
 78 |         $this->assertTrue(count($result) > 0);
 79 |     }
 80 | 
 81 |     public function testSummarizeTextFreely()
 82 |     {
 83 |         $api = new TextRankFacade();
 84 |         $stopWords = new English();
 85 |         $api->setStopWords($stopWords);
 86 | 
 87 |         $result = $api->summarizeTextFreely(
 88 |             $this->sampleText1,
 89 |             5,
 90 |             2,
 91 |             Summarize::GET_ALL_IMPORTANT
 92 |         );
 93 | 
 94 |         $this->assertTrue(count($result) == 2);
 95 | 
 96 |         $result = $api->summarizeTextFreely(
 97 |             $this->sampleText1,
 98 |             10,
 99 |             1,
100 |             Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS
101 |         );
102 | 
103 |         $this->assertTrue(count($result) == 1);
104 | 
105 |         // Stop words.
106 |         $result = $api->summarizeTextFreely(
107 |             'one two. one two. three four.',
108 |             2,
109 |             10,
110 |             Summarize::GET_ALL_IMPORTANT
111 |         );
112 | 
113 |         $this->assertTrue(count($result) == 0);
114 | 
115 |         // Less sentences then expected.
116 |         $result = $api->summarizeTextFreely(
117 |             'lorem ipsum. lorem holy ipsum. sit dolor amet.',
118 |             2,
119 |             10,
120 |             Summarize::GET_ALL_IMPORTANT
121 |         );
122 | 
123 |         $this->assertTrue(count($result) == 2);
124 |     }
125 | 
126 |     public function testSmallText()
127 |     {
128 |         $api = new TextRankFacade();
129 |         $stopWords = new English();
130 |         $api->setStopWords($stopWords);
131 | 
132 |         $result = $api->getOnlyKeyWords('lorem ipsum sit');
133 | 
134 |         $this->assertEquals(2, count($result));
135 | 
136 |         $result = $api->getOnlyKeyWords('sit');
137 | 
138 |         $this->assertEquals(0, count($result));
139 | 
140 |         $result = $api->getOnlyKeyWords('');
141 | 
142 |         $this->assertEquals(0, count($result));
143 |     }
144 | 
145 |     public function testSmallTextRu()
146 |     {
147 |         $api = new TextRankFacade();
148 |         $stopWords = new Russian();
149 |         $api->setStopWords($stopWords);
150 |         $result = $api->getOnlyKeyWords('между холодными ладонями');
151 |         $this->assertCount(2, $result);
152 | 
153 |         $result = $api->getOnlyKeyWords('конец');
154 |         $this->assertCount(0, $result);
155 | 
156 |         $result = $api->getOnlyKeyWords('');
157 |         $this->assertCount(0, $result);
158 |     }
159 | }
160 | 


--------------------------------------------------------------------------------