├── .env ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── LICENSE ├── README.md ├── composer.json ├── docker-compose.yml ├── docker └── cli │ └── Dockerfile ├── phpunit.xml ├── res └── sample1.txt ├── src ├── TextRankFacade.php └── Tool │ ├── Graph.php │ ├── Parser.php │ ├── Score.php │ ├── StopWords │ ├── Arabic.php │ ├── Dutch.php │ ├── English.php │ ├── French.php │ ├── German.php │ ├── Indonesian.php │ ├── Italian.php │ ├── Norwegian.php │ ├── Russian.php │ ├── Spanish.php │ ├── StopWordsAbstract.php │ └── Turkish.php │ ├── Summarize.php │ └── Text.php └── tests └── TextRankFacadeTest.php /.env: -------------------------------------------------------------------------------- 1 | COMPOSE_PROJECT_NAME=PHP-Science-TextRank 2 | 3 | PREFIX=php-science-textrank 4 | 5 | SOURCE_DIR=./ 6 | TARGET_DIR=/var/www/html 7 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | jobs: 8 | run: 9 | runs-on: ${{ matrix.operating-system }} 10 | strategy: 11 | matrix: 12 | operating-system: [ubuntu-latest] 13 | php-versions: ['8.0', '8.1', '8.2', '8.3'] 14 | name: PHP ${{ matrix.php-versions }} Test on ${{ matrix.operating-system }} 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v2 18 | 19 | - name: Setup PHP 20 | uses: shivammathur/setup-php@v2 21 | with: 22 | php-version: ${{ matrix.php-versions }} 23 | coverage: xdebug 24 | 25 | - name: Validate composer files 26 | run: composer validate 27 | 28 | - name: Install dependencies 29 | if: steps.composer-cache.outputs.cache-hit != 'true' 30 | run: composer install --prefer-dist --no-progress --no-suggest 31 | 32 | - name: Run test suite 33 | run: composer test 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | /.tmp 3 | /.phpunit.result.cache 4 | /composer.lock 5 | /vendor -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | MIT License 3 | 4 | Copyright (c) 2016-2021 PHP-Science 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
21 | This source code is an implementation of TextRank algorithm in PHP programming language, under MIT licence.
22 |
23 |
28 | * $stopWords = new English();
29 | *
30 | * $textRank = new TextRankFacade();
31 | * $textRank->setStopWords($stopWords);
32 | *
33 | * $sentences = $textRank->summarizeTextFreely(
34 | * $rawText,
35 | * 5,
36 | * 2,
37 | * Summarize::GET_ALL_IMPORTANT
38 | * );
39 | *
40 | *
41 | * @package PhpScience\TextRank
42 | */
43 | class TextRankFacade
44 | {
45 | /**
46 | * Stop Words
47 | *
48 | * Stop Words to ignore because of dummy words. These words will not be Key
49 | * Words. A, like, no yes, one, two, I, you for example.
50 | *
51 | * @see \PhpScience\TextRank\Tool\StopWords\English
52 | *
53 | * @var StopWordsAbstract
54 | */
55 | protected $stopWords;
56 |
57 | /**
58 | * Set Stop Words.
59 | *
60 | * @param StopWordsAbstract $stopWords Stop Words to ignore because of
61 | * dummy words.
62 | */
63 | public function setStopWords(StopWordsAbstract $stopWords)
64 | {
65 | $this->stopWords = $stopWords;
66 | }
67 |
68 | /**
69 | * Only Keywords
70 | *
71 | * It retrieves the possible keywords with their scores from a text.
72 | *
73 | * @param string $rawText A single raw text.
74 | *
75 | * @return array Array from Keywords. Key is the parsed word, value is the
76 | * word score.
77 | */
78 | public function getOnlyKeyWords(string $rawText): array
79 | {
80 | $parser = new Parser();
81 | $parser->setMinimumWordLength(3);
82 | $parser->setRawText($rawText);
83 |
84 | if ($this->stopWords) {
85 | $parser->setStopWords($this->stopWords);
86 | }
87 |
88 | $text = $parser->parse();
89 |
90 | $graph = new Graph();
91 | $graph->createGraph($text);
92 |
93 | $score = new Score();
94 |
95 | return $score->calculate(
96 | $graph, $text
97 | );
98 | }
99 |
100 | /**
101 | * Highlighted Texts
102 | *
103 | * It finds the most important sentences from a text by the most important
104 | * keywords and these keywords also found by automatically. It retrieves
105 | * the most important sentences what are 20 percent of the full text.
106 | *
107 | * @param string $rawText A single raw text.
108 | *
109 | * @return array An array from sentences.
110 | */
111 | public function getHighlights(string $rawText): array
112 | {
113 | $parser = new Parser();
114 | $parser->setMinimumWordLength(3);
115 | $parser->setRawText($rawText);
116 |
117 | if ($this->stopWords) {
118 | $parser->setStopWords($this->stopWords);
119 | }
120 |
121 | $text = $parser->parse();
122 | $maximumSentences = (int) (count($text->getSentences()) * 0.2);
123 |
124 | $graph = new Graph();
125 | $graph->createGraph($text);
126 |
127 | $score = new Score();
128 | $scores = $score->calculate($graph, $text);
129 |
130 | $summarize = new Summarize();
131 |
132 | return $summarize->getSummarize(
133 | $scores,
134 | $graph,
135 | $text,
136 | 12,
137 | $maximumSentences,
138 | Summarize::GET_ALL_IMPORTANT
139 | );
140 | }
141 |
142 | /**
143 | * Compounds a Summarized Text
144 | *
145 | * It finds the three most important sentences from a text by the most
146 | * important keywords and these keywords also found by automatically. It
147 | * retrieves these important sentences.
148 | *
149 | * @param string $rawText A single raw text.
150 | *
151 | * @return array An array from sentences.
152 | */
153 | public function summarizeTextCompound(string $rawText): array
154 | {
155 | $parser = new Parser();
156 | $parser->setMinimumWordLength(3);
157 | $parser->setRawText($rawText);
158 |
159 | if ($this->stopWords) {
160 | $parser->setStopWords($this->stopWords);
161 | }
162 |
163 | $text = $parser->parse();
164 |
165 | $graph = new Graph();
166 | $graph->createGraph($text);
167 |
168 | $score = new Score();
169 | $scores = $score->calculate($graph, $text);
170 |
171 | $summarize = new Summarize();
172 |
173 | return $summarize->getSummarize(
174 | $scores,
175 | $graph,
176 | $text,
177 | 10,
178 | 3,
179 | Summarize::GET_ALL_IMPORTANT
180 | );
181 | }
182 |
183 | /**
184 | * Summarized Text
185 | *
186 | * It finds the most important sentence from a text by the most important
187 | * keywords and these keywords also found by automatically. It retrieves
188 | * the most important sentence and its following sentences.
189 | *
190 | * @param string $rawText A single raw text.
191 | *
192 | * @return array An array from sentences.
193 | */
194 | public function summarizeTextBasic(string $rawText): array
195 | {
196 | $parser = new Parser();
197 | $parser->setMinimumWordLength(3);
198 | $parser->setRawText($rawText);
199 |
200 | if ($this->stopWords) {
201 | $parser->setStopWords($this->stopWords);
202 | }
203 |
204 | $text = $parser->parse();
205 |
206 | $graph = new Graph();
207 | $graph->createGraph($text);
208 |
209 | $score = new Score();
210 | $scores = $score->calculate($graph, $text);
211 |
212 | $summarize = new Summarize();
213 |
214 | return $summarize->getSummarize(
215 | $scores,
216 | $graph,
217 | $text,
218 | 10,
219 | 3,
220 | Summarize::GET_FIRST_IMPORTANT_AND_FOLLOWINGS
221 | );
222 | }
223 |
224 | /**
225 | * Freely Summarized Text.
226 | *
227 | * It retrieves the most important sentences from a text by the most important
228 | * keywords and these keywords also found by automatically.
229 | *
230 | * @param string $rawText A single raw text.
231 | * @param int $analyzedKeyWords Maximum number of the most important
232 | * Key Words to analyze the text.
233 | * @param int $expectedSentences How many sentence should be retrieved.
234 | * @param int $summarizeType Highlights from the text or a part of
235 | * the text.
236 | *
237 | * @return array An array from sentences.
238 | */
239 | public function summarizeTextFreely(
240 | string $rawText,
241 | int $analyzedKeyWords,
242 | int $expectedSentences,
243 | int $summarizeType
244 | ): array {
245 | $parser = new Parser();
246 | $parser->setMinimumWordLength(3);
247 | $parser->setRawText($rawText);
248 |
249 | if ($this->stopWords) {
250 | $parser->setStopWords($this->stopWords);
251 | }
252 |
253 | $text = $parser->parse();
254 |
255 | $graph = new Graph();
256 | $graph->createGraph($text);
257 |
258 | $score = new Score();
259 | $scores = $score->calculate($graph, $text);
260 |
261 | $summarize = new Summarize();
262 |
263 | return $summarize->getSummarize(
264 | $scores,
265 | $graph,
266 | $text,
267 | $analyzedKeyWords,
268 | $expectedSentences,
269 | $summarizeType
270 | );
271 | }
272 | }
273 |
--------------------------------------------------------------------------------
/src/Tool/Graph.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 |
10 | declare(strict_types=1);
11 |
12 | namespace PhpScience\TextRank\Tool;
13 |
14 | /**
15 | * Class Graph
16 | *
17 | * This graph store the sentences and their words with the indexes. This graph
18 | * is the full map of the whole text.
19 | *
20 | * @package PhpScience\TextRank\Tool
21 | */
22 | class Graph
23 | {
24 | /**
25 | * Key is the word, value is an array with the sentence IDs.
26 | *
27 | * @var array
28 | */
29 | protected $graph = [];
30 |
31 | /**
32 | * Create Graph.
33 | *
34 | * It creates a graph and save it into the graph property.
35 | *
36 | * @param Text $text Text object contains the parsed and prepared text
37 | * data.
38 | */
39 | public function createGraph(Text &$text)
40 | {
41 | $wordMatrix = $text->getWordMatrix();
42 |
43 | foreach ($wordMatrix as $sentenceIdx => $words) {
44 | $idxArray = array_keys($words);
45 |
46 | foreach ($idxArray as $idxKey => $idxValue) {
47 | $connections = [];
48 |
49 | if (isset($idxArray[$idxKey - 1])) {
50 | $connections[] = $idxArray[$idxKey - 1];
51 | }
52 |
53 | if (isset($idxArray[$idxKey + 1])) {
54 | $connections[] = $idxArray[$idxKey + 1];
55 | }
56 |
57 | $this->graph[$words[$idxValue]][$sentenceIdx][$idxValue] = $connections;
58 | }
59 | }
60 | }
61 |
62 | /**
63 | * Graph.
64 | *
65 | * It retrieves the graph. Key is the word, value is an array with the
66 | * sentence IDs.
67 | *
68 | *
69 | * array(
70 | * 'apple' => array( // word
71 | * 2 => array( // ID of the sentence
72 | * 52 => array( // ID of the word in the sentence
73 | * 51, 53 // IDs of the closest words to the apple word
74 | * ),
75 | * 10 => array( // IDs of the closest words to the apple word
76 | * 9, 11 // IDs of the closest words to the apple word
77 | * ),
78 | * 5 => array(6)
79 | * ),
80 | * 6 => array(
81 | * 9 => array(8, 10)
82 | * ),
83 | * ),
84 | * 'orange' => array(
85 | * 1 => array(
86 | * 30 => array(29, 31)
87 | * )
88 | * )
89 | * );
90 | *
91 | *
92 | * @return array
93 | */
94 | public function getGraph(): array
95 | {
96 | return $this->graph;
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/src/Tool/Parser.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 |
10 | declare(strict_types=1);
11 |
12 | namespace PhpScience\TextRank\Tool;
13 |
14 | use PhpScience\TextRank\Tool\StopWords\StopWordsAbstract;
15 |
16 | /**
17 | * Class Parser
18 | *
19 | * This class purpose to parse a real text to sentences and array.
20 | *
21 | * @package PhpScience\TextRank\Tool
22 | */
23 | class Parser
24 | {
25 | /**
26 | * The number of length of the smallest word. Words bellow it will be
27 | * ignored.
28 | *
29 | * @var int
30 | */
31 | protected $minimumWordLength = 0;
32 |
33 | /**
34 | * A single text, article, book for example.
35 | *
36 | * @var string
37 | */
38 | protected $rawText = '';
39 |
40 | /**
41 | * The array of the punctuations. The punctuation is the value. The key
42 | * refers to the key of its sentence.
43 | *
44 | * @var array
45 | */
46 | protected $marks = [];
47 |
48 | /**
49 | * Stop Words to ignore. These words will not be keywords.
50 | *
51 | * @var StopWordsAbstract
52 | */
53 | protected $stopWords;
54 |
55 | /**
56 | * It sets the minimum word length. Words bellow it will be ignored.
57 | *
58 | * @param int $wordLength
59 | */
60 | public function setMinimumWordLength(int $wordLength)
61 | {
62 | $this->minimumWordLength = $wordLength;
63 | }
64 |
65 | /**
66 | * It sets the raw text.
67 | *
68 | * @param string $rawText
69 | */
70 | public function setRawText(string $rawText)
71 | {
72 | $this->rawText = $rawText;
73 | }
74 |
75 | /**
76 | * Set Stop Words.
77 | *
78 | * It sets the stop words to remove them from the found keywords.
79 | *
80 | * @param StopWordsAbstract $words Stop Words to ignore. These words will
81 | * not be keywords.
82 | */
83 | public function setStopWords(StopWordsAbstract $words)
84 | {
85 | $this->stopWords = $words;
86 | }
87 |
88 | /**
89 | * It retrieves the punctuations.
90 | *
91 | * @return array Array from punctuations where key is the index to link to
92 | * the sentence and value is the punctuation.
93 | */
94 | public function getMarks(): array
95 | {
96 | return $this->marks;
97 | }
98 |
99 | /**
100 | * Parse.
101 | *
102 | * It parses the text from the property and retrieves in Text object
103 | * prepared to scoring and to searching.
104 | *
105 | * @return Text Parsed text prepared to scoring.
106 | */
107 | public function parse(): Text
108 | {
109 | $matrix = [];
110 | $sentences = $this->getSentences();
111 |
112 | foreach ($sentences as $sentenceIdx => $sentence) {
113 | $matrix[$sentenceIdx] = $this->getWords($sentence);
114 | }
115 |
116 | $text = new Text();
117 | $text->setSentences($sentences);
118 | $text->setWordMatrix($matrix);
119 | $text->setMarks($this->marks);
120 |
121 | return $text;
122 | }
123 |
124 | /**
125 | * Sentences.
126 | *
127 | * It retrieves the sentences in array without junk data.
128 | *
129 | * @return array Array from sentences.
130 | */
131 | protected function getSentences(): array
132 | {
133 | $sentences = $sentences = preg_split(
134 | '/(\n+)|(\.\s|\?\s|\!\s)(?![^\(]*\))/',
135 | $this->rawText,
136 | -1,
137 | PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE
138 | );
139 |
140 | return array_values(
141 | array_filter(
142 | array_map(
143 | [$this, 'cleanSentence'],
144 | $sentences
145 | )
146 | )
147 | );
148 | }
149 |
150 | /**
151 | * Possible Keywords.
152 | *
153 | * It retrieves an array of possible keywords without junk characters,
154 | * spaces and stop words.
155 | *
156 | * @param string $subText It should be a sentence.
157 | *
158 | * @return array The array of the possible keywords.
159 | */
160 | protected function getWords(string $subText): array
161 | {
162 | $words = preg_split(
163 | '/(?:(^\p{P}+)|(\p{P}*\s+\p{P}*)|(\p{P}+$))/',
164 | $subText,
165 | -1,
166 | PREG_SPLIT_NO_EMPTY
167 | );
168 |
169 | $words = array_values(
170 | array_filter(
171 | array_map(
172 | [$this, 'cleanWord'],
173 | $words
174 | )
175 | )
176 | );
177 |
178 | if ($this->stopWords) {
179 | return array_filter($words, function($word) {
180 | return !ctype_punct($word)
181 | && strlen($word) > $this->minimumWordLength
182 | && !$this->stopWords->exist($word);
183 | });
184 | } else {
185 | return array_filter($words, function($word) {
186 | return !ctype_punct($word)
187 | && strlen($word) > $this->minimumWordLength;
188 | });
189 | }
190 | }
191 |
192 | /**
193 | * Clean Sentence.
194 | *
195 | * It clean the sentence. If it is a punctuation it will be stored in the
196 | * property $marks.
197 | *
198 | * @param string $sentence A sentence as a string.
199 | *
200 | * @return string It is empty string when it's punctuation. Otherwise it's
201 | * the trimmed sentence itself.
202 | */
203 | protected function cleanSentence(string $sentence): string
204 | {
205 | if (strlen(trim($sentence)) == 1) {
206 | $this->marks[] = trim($sentence);
207 | return '';
208 |
209 | } else {
210 | return trim($sentence);
211 | }
212 | }
213 |
214 | /**
215 | * Clean Word.
216 | *
217 | * It removes the junk spaces from the word and retrieves it.
218 | *
219 | * @param string $word
220 | *
221 | * @return string Cleaned word.
222 | */
223 | protected function cleanWord(string $word): string
224 | {
225 | return mb_strtolower(trim($word));
226 | }
227 | }
228 |
--------------------------------------------------------------------------------
/src/Tool/Score.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 |
10 | declare(strict_types=1);
11 |
12 | namespace PhpScience\TextRank\Tool;
13 |
14 | /**
15 | * Class Score
16 | *
17 | * It handles words and assigns weighted numbers to them.
18 | *
19 | * @package PhpScience\TextRank\Tool
20 | */
21 | class Score
22 | {
23 | /**
24 | * The maximum connections by a word in the current text.
25 | *
26 | * @var int
27 | */
28 | protected $maximumValue = 0;
29 |
30 | /**
31 | * The minimum connection by a word in the current text.
32 | *
33 | * @var int
34 | */
35 | protected $minimumValue = 0;
36 |
37 | /**
38 | * Calculate Scores.
39 | *
40 | * It calculates the scores from word's connections and the connections'
41 | * scores. It retrieves the scores in a form of a matrix where the key is
42 | * the word and value is the score. The score is between 0 and 1.
43 | *
44 | * @param Graph $graph The graph of the text.
45 | * @param Text $text Text object what stores all text data.
46 | *
47 | * @return array Key is the word and value is the float or int type score
48 | * between 1 and 0.
49 | */
50 | public function calculate(Graph $graph, Text &$text): array
51 | {
52 | $graphData = $graph->getGraph();
53 | $wordMatrix = $text->getWordMatrix();
54 | $wordConnections = $this->calculateConnectionNumbers($graphData);
55 | $scores = $this->calculateScores(
56 | $graphData,
57 | $wordMatrix,
58 | $wordConnections
59 | );
60 |
61 | return $this->normalizeAndSortScores($scores);
62 | }
63 |
64 | /**
65 | * Connection Numbers.
66 | *
67 | * It calculates the number of connections for each word and retrieves it
68 | * in array where key is the word and value is the number of connections.
69 | *
70 | * @param array $graphData Graph data from a Graph type object.
71 | *
72 | * @return array Key is the word and value is the number of the connected
73 | * words.
74 | */
75 | protected function calculateConnectionNumbers(array &$graphData): array
76 | {
77 | $wordConnections = [];
78 |
79 | foreach ($graphData as $wordKey => $sentences) {
80 | $connectionCount = 0;
81 |
82 | foreach ($sentences as $sentenceIdx => $wordInstances) {
83 | foreach ($wordInstances as $connections) {
84 | $connectionCount += count($connections);
85 | }
86 | }
87 |
88 | $wordConnections[$wordKey] = $connectionCount;
89 | }
90 |
91 | return $wordConnections;
92 | }
93 |
94 | /**
95 | * Calculate Scores.
96 | *
97 | * It calculates the score of the words and retrieves it in array where key
98 | * is the word and value is the score. The score depends on the number of
99 | * the connections and the closest word's connection numbers.
100 | *
101 | * @param array $graphData Graph data from a Graph type object.
102 | * @param array $wordMatrix Multidimensional array from integer keys
103 | * and string values.
104 | * @param array $wordConnections Key is the word and value is the number of
105 | * the connected words.
106 | *
107 | * @return array Scores where key is the word and value is the score.
108 | */
109 | protected function calculateScores(
110 | array &$graphData,
111 | array &$wordMatrix,
112 | array &$wordConnections
113 | ): array {
114 | $scores = [];
115 |
116 | foreach ($graphData as $wordKey => $sentences) {
117 | $value = 0;
118 |
119 | foreach ($sentences as $sentenceIdx => $wordInstances) {
120 | foreach ($wordInstances as $connections) {
121 | foreach ($connections as $wordIdx) {
122 | $word = $wordMatrix[$sentenceIdx][$wordIdx];
123 | $value += $wordConnections[$word];
124 | }
125 | }
126 | }
127 |
128 | $scores[$wordKey] = $value;
129 |
130 | if ($value > $this->maximumValue) {
131 | $this->maximumValue = $value;
132 | }
133 |
134 | if ($value < $this->minimumValue || $this->minimumValue == 0) {
135 | $this->minimumValue = $value;
136 | }
137 | }
138 |
139 | return $scores;
140 | }
141 |
142 | /**
143 | * Normalize and Sort Scores.
144 | *
145 | * It recalculates the scores by normalize the score numbers to between 0
146 | * and 1.
147 | *
148 | * @param array $scores Keywords with scores. Score is the key.
149 | *
150 | * @return array Keywords with normalized and ordered scores.
151 | */
152 | protected function normalizeAndSortScores(array &$scores): array
153 | {
154 | foreach ($scores as $key => $value) {
155 | $v = $this->normalize(
156 | $value,
157 | $this->minimumValue,
158 | $this->maximumValue
159 | );
160 |
161 | $scores[$key] = $v;
162 | }
163 |
164 | arsort($scores);
165 |
166 | return $scores;
167 | }
168 |
169 | /**
170 | * It normalizes a number.
171 | *
172 | * @param int $value Current weight.
173 | * @param int $min Minimum weight.
174 | * @param int $max Maximum weight.
175 | *
176 | * @return float|int Normalized weight aka score.
177 | */
178 | protected function normalize(int $value, int $min, int $max): float
179 | {
180 | $divisor = $max - $min;
181 |
182 | if ($divisor == 0) {
183 | return 0.0;
184 | }
185 |
186 | $normalized = ($value - $min) / $divisor;
187 |
188 | return $normalized;
189 | }
190 | }
191 |
--------------------------------------------------------------------------------
/src/Tool/StopWords/Arabic.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 |
10 | declare(strict_types=1);
11 |
12 | namespace PhpScience\TextRank\Tool\StopWords;
13 |
14 | /**
15 | * Class English
16 | *
17 | * @package PhpScience\TextRank\Tool\StopWords
18 | */
19 | class English extends StopWordsAbstract
20 | {
21 | /**
22 | * Stop words for avoid dummy keywords for Language English.
23 | *
24 | * @var array
25 | */
26 | protected $words = [
27 | 'a',
28 | 'about',
29 | 'above',
30 | 'above',
31 | 'across',
32 | 'after',
33 | 'afterwards',
34 | 'again',
35 | 'against',
36 | 'all',
37 | 'almost',
38 | 'alone',
39 | 'along',
40 | 'already',
41 | 'also',
42 | 'although',
43 | 'always',
44 | 'am',
45 | 'among',
46 | 'amongst',
47 | 'amoungst',
48 | 'amount',
49 | 'an',
50 | 'and',
51 | 'another',
52 | 'any',
53 | 'anyhow',
54 | 'anyone',
55 | 'anything',
56 | 'anyway',
57 | 'anywhere',
58 | 'are',
59 | 'around',
60 | 'as',
61 | 'at',
62 | 'back',
63 | 'be',
64 | 'became',
65 | 'because',
66 | 'become',
67 | 'becomes',
68 | 'becoming',
69 | 'been',
70 | 'before',
71 | 'beforehand',
72 | 'behind',
73 | 'being',
74 | 'below',
75 | 'beside',
76 | 'besides',
77 | 'between',
78 | 'beyond',
79 | 'bill',
80 | 'both',
81 | 'bottom',
82 | 'but',
83 | 'by',
84 | 'call',
85 | 'can',
86 | 'cannot',
87 | 'cant',
88 | 'co',
89 | 'con',
90 | 'could',
91 | 'couldnt',
92 | 'cry',
93 | 'de',
94 | 'describe',
95 | 'detail',
96 | 'do',
97 | 'done',
98 | 'down',
99 | 'due',
100 | 'during',
101 | 'each',
102 | 'eg',
103 | 'eight',
104 | 'either',
105 | 'eleven',
106 | 'else',
107 | 'elsewhere',
108 | 'empty',
109 | 'enough',
110 | 'etc',
111 | 'even',
112 | 'ever',
113 | 'every',
114 | 'everyone',
115 | 'everything',
116 | 'everywhere',
117 | 'except',
118 | 'few',
119 | 'fifteen',
120 | 'fify',
121 | 'fill',
122 | 'find',
123 | 'fire',
124 | 'first',
125 | 'five',
126 | 'for',
127 | 'former',
128 | 'formerly',
129 | 'forty',
130 | 'found',
131 | 'four',
132 | 'from',
133 | 'front',
134 | 'full',
135 | 'further',
136 | 'get',
137 | 'give',
138 | 'go',
139 | 'had',
140 | 'has',
141 | 'hasnt',
142 | 'have',
143 | 'he',
144 | 'hence',
145 | 'her',
146 | 'here',
147 | 'hereafter',
148 | 'hereby',
149 | 'herein',
150 | 'hereupon',
151 | 'hers',
152 | 'herself',
153 | 'him',
154 | 'himself',
155 | 'his',
156 | 'how',
157 | 'however',
158 | 'hundred',
159 | 'ie',
160 | 'if',
161 | 'in',
162 | 'inc',
163 | 'indeed',
164 | 'interest',
165 | 'into',
166 | 'is',
167 | 'it',
168 | 'its',
169 | 'itself',
170 | 'keep',
171 | 'last',
172 | 'latter',
173 | 'latterly',
174 | 'least',
175 | 'less',
176 | 'ltd',
177 | 'made',
178 | 'many',
179 | 'may',
180 | 'me',
181 | 'meanwhile',
182 | 'might',
183 | 'mill',
184 | 'mine',
185 | 'more',
186 | 'moreover',
187 | 'most',
188 | 'mostly',
189 | 'move',
190 | 'much',
191 | 'must',
192 | 'my',
193 | 'myself',
194 | 'name',
195 | 'namely',
196 | 'neither',
197 | 'never',
198 | 'nevertheless',
199 | 'next',
200 | 'nine',
201 | 'no',
202 | 'nobody',
203 | 'none',
204 | 'noone',
205 | 'nor',
206 | 'not',
207 | 'nothing',
208 | 'now',
209 | 'nowhere',
210 | 'of',
211 | 'off',
212 | 'often',
213 | 'on',
214 | 'once',
215 | 'one',
216 | 'only',
217 | 'onto',
218 | 'or',
219 | 'other',
220 | 'others',
221 | 'otherwise',
222 | 'our',
223 | 'ours',
224 | 'ourselves',
225 | 'out',
226 | 'over',
227 | 'own',
228 | 'part',
229 | 'per',
230 | 'perhaps',
231 | 'please',
232 | 'put',
233 | 'rather',
234 | 're',
235 | 'same',
236 | 'see',
237 | 'seem',
238 | 'seemed',
239 | 'seeming',
240 | 'seems',
241 | 'serious',
242 | 'several',
243 | 'she',
244 | 'should',
245 | 'show',
246 | 'side',
247 | 'since',
248 | 'sincere',
249 | 'six',
250 | 'sixty',
251 | 'so',
252 | 'some',
253 | 'somehow',
254 | 'someone',
255 | 'something',
256 | 'sometime',
257 | 'sometimes',
258 | 'somewhere',
259 | 'still',
260 | 'such',
261 | 'system',
262 | 'take',
263 | 'ten',
264 | 'than',
265 | 'that',
266 | 'the',
267 | 'their',
268 | 'them',
269 | 'themselves',
270 | 'then',
271 | 'thence',
272 | 'there',
273 | 'thereafter',
274 | 'thereby',
275 | 'therefore',
276 | 'therein',
277 | 'thereupon',
278 | 'these',
279 | 'they',
280 | 'thickv',
281 | 'thin',
282 | 'third',
283 | 'this',
284 | 'those',
285 | 'though',
286 | 'three',
287 | 'through',
288 | 'throughout',
289 | 'thru',
290 | 'thus',
291 | 'to',
292 | 'together',
293 | 'too',
294 | 'top',
295 | 'toward',
296 | 'towards',
297 | 'twelve',
298 | 'twenty',
299 | 'two',
300 | 'un',
301 | 'under',
302 | 'until',
303 | 'up',
304 | 'upon',
305 | 'us',
306 | 'very',
307 | 'via',
308 | 'was',
309 | 'we',
310 | 'well',
311 | 'were',
312 | 'what',
313 | 'whatever',
314 | 'when',
315 | 'whence',
316 | 'whenever',
317 | 'where',
318 | 'whereafter',
319 | 'whereas',
320 | 'whereby',
321 | 'wherein',
322 | 'whereupon',
323 | 'wherever',
324 | 'whether',
325 | 'which',
326 | 'while',
327 | 'whither',
328 | 'who',
329 | 'whoever',
330 | 'whole',
331 | 'whom',
332 | 'whose',
333 | 'why',
334 | 'will',
335 | 'with',
336 | 'within',
337 | 'without',
338 | 'would',
339 | 'yet',
340 | 'you',
341 | 'your',
342 | 'yours',
343 | 'yourself',
344 | 'yourselves'
345 | ];
346 | }
347 |
--------------------------------------------------------------------------------
/src/Tool/StopWords/French.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 |
10 | declare(strict_types=1);
11 |
12 | namespace PhpScience\TextRank\Tool\StopWords;
13 |
14 | /**
15 | * Class French
16 | *
17 | * @package PhpScience\TextRank\Tool\StopWords
18 | */
19 | class French extends StopWordsAbstract
20 | {
21 | /**
22 | * Stop words for avoid dummy keywords for Language French.
23 | * Source: https://github.com/stopwords-iso/stopwords-fr
24 | *
25 | * @var array
26 | */
27 | protected $words = [
28 | 'a',
29 | 'abord',
30 | 'absolument',
31 | 'afin',
32 | 'ah',
33 | 'ai',
34 | 'aie',
35 | 'aient',
36 | 'aies',
37 | 'ailleurs',
38 | 'ainsi',
39 | 'ait',
40 | 'allaient',
41 | 'allo',
42 | 'allons',
43 | 'allô',
44 | 'alors',
45 | 'anterieur',
46 | 'anterieure',
47 | 'anterieures',
48 | 'apres',
49 | 'après',
50 | 'as',
51 | 'assez',
52 | 'attendu',
53 | 'au',
54 | 'aucun',
55 | 'aucune',
56 | 'aucuns',
57 | 'aujourd',
58 | 'aujourd\'hui',
59 | 'aupres',
60 | 'auquel',
61 | 'aura',
62 | 'aurai',
63 | 'auraient',
64 | 'aurais',
65 | 'aurait',
66 | 'auras',
67 | 'aurez',
68 | 'auriez',
69 | 'aurions',
70 | 'aurons',
71 | 'auront',
72 | 'aussi',
73 | 'autre',
74 | 'autrefois',
75 | 'autrement',
76 | 'autres',
77 | 'autrui',
78 | 'aux',
79 | 'auxquelles',
80 | 'auxquels',
81 | 'avaient',
82 | 'avais',
83 | 'avait',
84 | 'avant',
85 | 'avec',
86 | 'avez',
87 | 'aviez',
88 | 'avions',
89 | 'avoir',
90 | 'avons',
91 | 'ayant',
92 | 'ayez',
93 | 'ayons',
94 | 'b',
95 | 'bah',
96 | 'bas',
97 | 'basee',
98 | 'bat',
99 | 'beau',
100 | 'beaucoup',
101 | 'bien',
102 | 'bigre',
103 | 'bon',
104 | 'boum',
105 | 'bravo',
106 | 'brrr',
107 | 'c',
108 | 'car',
109 | 'ce',
110 | 'ceci',
111 | 'cela',
112 | 'celle',
113 | 'celle-ci',
114 | 'celle-là',
115 | 'celles',
116 | 'celles-ci',
117 | 'celles-là',
118 | 'celui',
119 | 'celui-ci',
120 | 'celui-là',
121 | 'celà',
122 | 'cent',
123 | 'cependant',
124 | 'certain',
125 | 'certaine',
126 | 'certaines',
127 | 'certains',
128 | 'certes',
129 | 'ces',
130 | 'cet',
131 | 'cette',
132 | 'ceux',
133 | 'ceux-ci',
134 | 'ceux-là',
135 | 'chacun',
136 | 'chacune',
137 | 'chaque',
138 | 'cher',
139 | 'chers',
140 | 'chez',
141 | 'chiche',
142 | 'chut',
143 | 'chère',
144 | 'chères',
145 | 'ci',
146 | 'cinq',
147 | 'cinquantaine',
148 | 'cinquante',
149 | 'cinquantième',
150 | 'cinquième',
151 | 'clac',
152 | 'clic',
153 | 'combien',
154 | 'comme',
155 | 'comment',
156 | 'comparable',
157 | 'comparables',
158 | 'compris',
159 | 'concernant',
160 | 'contre',
161 | 'couic',
162 | 'crac',
163 | 'd',
164 | 'da',
165 | 'dans',
166 | 'de',
167 | 'debout',
168 | 'dedans',
169 | 'dehors',
170 | 'deja',
171 | 'delà',
172 | 'depuis',
173 | 'dernier',
174 | 'derniere',
175 | 'derriere',
176 | 'derrière',
177 | 'des',
178 | 'desormais',
179 | 'desquelles',
180 | 'desquels',
181 | 'dessous',
182 | 'dessus',
183 | 'deux',
184 | 'deuxième',
185 | 'deuxièmement',
186 | 'devant',
187 | 'devers',
188 | 'devra',
189 | 'devrait',
190 | 'different',
191 | 'differentes',
192 | 'differents',
193 | 'différent',
194 | 'différente',
195 | 'différentes',
196 | 'différents',
197 | 'dire',
198 | 'directe',
199 | 'directement',
200 | 'dit',
201 | 'dite',
202 | 'dits',
203 | 'divers',
204 | 'diverse',
205 | 'diverses',
206 | 'dix',
207 | 'dix-huit',
208 | 'dix-neuf',
209 | 'dix-sept',
210 | 'dixième',
211 | 'doit',
212 | 'doivent',
213 | 'donc',
214 | 'dont',
215 | 'dos',
216 | 'douze',
217 | 'douzième',
218 | 'dring',
219 | 'droite',
220 | 'du',
221 | 'duquel',
222 | 'durant',
223 | 'dès',
224 | 'début',
225 | 'désormais',
226 | 'e',
227 | 'effet',
228 | 'egale',
229 | 'egalement',
230 | 'egales',
231 | 'eh',
232 | 'elle',
233 | 'elle-même',
234 | 'elles',
235 | 'elles-mêmes',
236 | 'en',
237 | 'encore',
238 | 'enfin',
239 | 'entre',
240 | 'envers',
241 | 'environ',
242 | 'es',
243 | 'essai',
244 | 'est',
245 | 'et',
246 | 'etant',
247 | 'etc',
248 | 'etre',
249 | 'eu',
250 | 'eue',
251 | 'eues',
252 | 'euh',
253 | 'eurent',
254 | 'eus',
255 | 'eusse',
256 | 'eussent',
257 | 'eusses',
258 | 'eussiez',
259 | 'eussions',
260 | 'eut',
261 | 'eux',
262 | 'eux-mêmes',
263 | 'exactement',
264 | 'excepté',
265 | 'extenso',
266 | 'exterieur',
267 | 'eûmes',
268 | 'eût',
269 | 'eûtes',
270 | 'f',
271 | 'fais',
272 | 'faisaient',
273 | 'faisant',
274 | 'fait',
275 | 'faites',
276 | 'façon',
277 | 'feront',
278 | 'fi',
279 | 'flac',
280 | 'floc',
281 | 'fois',
282 | 'font',
283 | 'force',
284 | 'furent',
285 | 'fus',
286 | 'fusse',
287 | 'fussent',
288 | 'fusses',
289 | 'fussiez',
290 | 'fussions',
291 | 'fut',
292 | 'fûmes',
293 | 'fût',
294 | 'fûtes',
295 | 'g',
296 | 'gens',
297 | 'h',
298 | 'ha',
299 | 'haut',
300 | 'hein',
301 | 'hem',
302 | 'hep',
303 | 'hi',
304 | 'ho',
305 | 'holà',
306 | 'hop',
307 | 'hormis',
308 | 'hors',
309 | 'hou',
310 | 'houp',
311 | 'hue',
312 | 'hui',
313 | 'huit',
314 | 'huitième',
315 | 'hum',
316 | 'hurrah',
317 | 'hé',
318 | 'hélas',
319 | 'i',
320 | 'ici',
321 | 'il',
322 | 'ils',
323 | 'importe',
324 | 'j',
325 | 'je',
326 | 'jusqu',
327 | 'jusque',
328 | 'juste',
329 | 'k',
330 | 'l',
331 | 'la',
332 | 'laisser',
333 | 'laquelle',
334 | 'las',
335 | 'le',
336 | 'lequel',
337 | 'les',
338 | 'lesquelles',
339 | 'lesquels',
340 | 'leur',
341 | 'leurs',
342 | 'longtemps',
343 | 'lors',
344 | 'lorsque',
345 | 'lui',
346 | 'lui-meme',
347 | 'lui-même',
348 | 'là',
349 | 'lès',
350 | 'm',
351 | 'ma',
352 | 'maint',
353 | 'maintenant',
354 | 'mais',
355 | 'malgre',
356 | 'malgré',
357 | 'maximale',
358 | 'me',
359 | 'meme',
360 | 'memes',
361 | 'merci',
362 | 'mes',
363 | 'mien',
364 | 'mienne',
365 | 'miennes',
366 | 'miens',
367 | 'mille',
368 | 'mince',
369 | 'mine',
370 | 'minimale',
371 | 'moi',
372 | 'moi-meme',
373 | 'moi-même',
374 | 'moindres',
375 | 'moins',
376 | 'mon',
377 | 'mot',
378 | 'moyennant',
379 | 'multiple',
380 | 'multiples',
381 | 'même',
382 | 'mêmes',
383 | 'n',
384 | 'na',
385 | 'naturel',
386 | 'naturelle',
387 | 'naturelles',
388 | 'ne',
389 | 'neanmoins',
390 | 'necessaire',
391 | 'necessairement',
392 | 'neuf',
393 | 'neuvième',
394 | 'ni',
395 | 'nombreuses',
396 | 'nombreux',
397 | 'nommés',
398 | 'non',
399 | 'nos',
400 | 'notamment',
401 | 'notre',
402 | 'nous',
403 | 'nous-mêmes',
404 | 'nouveau',
405 | 'nouveaux',
406 | 'nul',
407 | 'néanmoins',
408 | 'nôtre',
409 | 'nôtres',
410 | 'o',
411 | 'oh',
412 | 'ohé',
413 | 'ollé',
414 | 'olé',
415 | 'on',
416 | 'ont',
417 | 'onze',
418 | 'onzième',
419 | 'ore',
420 | 'ou',
421 | 'ouf',
422 | 'ouias',
423 | 'oust',
424 | 'ouste',
425 | 'outre',
426 | 'ouvert',
427 | 'ouverte',
428 | 'ouverts',
429 | 'o|',
430 | 'où',
431 | 'p',
432 | 'paf',
433 | 'pan',
434 | 'par',
435 | 'parce',
436 | 'parfois',
437 | 'parle',
438 | 'parlent',
439 | 'parler',
440 | 'parmi',
441 | 'parole',
442 | 'parseme',
443 | 'partant',
444 | 'particulier',
445 | 'particulière',
446 | 'particulièrement',
447 | 'pas',
448 | 'passé',
449 | 'pendant',
450 | 'pense',
451 | 'permet',
452 | 'personne',
453 | 'personnes',
454 | 'peu',
455 | 'peut',
456 | 'peuvent',
457 | 'peux',
458 | 'pff',
459 | 'pfft',
460 | 'pfut',
461 | 'pif',
462 | 'pire',
463 | 'pièce',
464 | 'plein',
465 | 'plouf',
466 | 'plupart',
467 | 'plus',
468 | 'plusieurs',
469 | 'plutôt',
470 | 'possessif',
471 | 'possessifs',
472 | 'possible',
473 | 'possibles',
474 | 'pouah',
475 | 'pour',
476 | 'pourquoi',
477 | 'pourrais',
478 | 'pourrait',
479 | 'pouvait',
480 | 'prealable',
481 | 'precisement',
482 | 'premier',
483 | 'première',
484 | 'premièrement',
485 | 'pres',
486 | 'probable',
487 | 'probante',
488 | 'procedant',
489 | 'proche',
490 | 'près',
491 | 'psitt',
492 | 'pu',
493 | 'puis',
494 | 'puisque',
495 | 'pur',
496 | 'pure',
497 | 'q',
498 | 'qu',
499 | 'quand',
500 | 'quant',
501 | 'quant-à-soi',
502 | 'quanta',
503 | 'quarante',
504 | 'quatorze',
505 | 'quatre',
506 | 'quatre-vingt',
507 | 'quatrième',
508 | 'quatrièmement',
509 | 'que',
510 | 'quel',
511 | 'quelconque',
512 | 'quelle',
513 | 'quelles',
514 | 'quelqu\'un',
515 | 'quelque',
516 | 'quelques',
517 | 'quels',
518 | 'qui',
519 | 'quiconque',
520 | 'quinze',
521 | 'quoi',
522 | 'quoique',
523 | 'r',
524 | 'rare',
525 | 'rarement',
526 | 'rares',
527 | 'relative',
528 | 'relativement',
529 | 'remarquable',
530 | 'rend',
531 | 'rendre',
532 | 'restant',
533 | 'reste',
534 | 'restent',
535 | 'restrictif',
536 | 'retour',
537 | 'revoici',
538 | 'revoilà',
539 | 'rien',
540 | 's',
541 | 'sa',
542 | 'sacrebleu',
543 | 'sait',
544 | 'sans',
545 | 'sapristi',
546 | 'sauf',
547 | 'se',
548 | 'sein',
549 | 'seize',
550 | 'selon',
551 | 'semblable',
552 | 'semblaient',
553 | 'semble',
554 | 'semblent',
555 | 'sent',
556 | 'sept',
557 | 'septième',
558 | 'sera',
559 | 'serai',
560 | 'seraient',
561 | 'serais',
562 | 'serait',
563 | 'seras',
564 | 'serez',
565 | 'seriez',
566 | 'serions',
567 | 'serons',
568 | 'seront',
569 | 'ses',
570 | 'seul',
571 | 'seule',
572 | 'seulement',
573 | 'si',
574 | 'sien',
575 | 'sienne',
576 | 'siennes',
577 | 'siens',
578 | 'sinon',
579 | 'six',
580 | 'sixième',
581 | 'soi',
582 | 'soi-même',
583 | 'soient',
584 | 'sois',
585 | 'soit',
586 | 'soixante',
587 | 'sommes',
588 | 'son',
589 | 'sont',
590 | 'sous',
591 | 'souvent',
592 | 'soyez',
593 | 'soyons',
594 | 'specifique',
595 | 'specifiques',
596 | 'speculatif',
597 | 'stop',
598 | 'strictement',
599 | 'subtiles',
600 | 'suffisant',
601 | 'suffisante',
602 | 'suffit',
603 | 'suis',
604 | 'suit',
605 | 'suivant',
606 | 'suivante',
607 | 'suivantes',
608 | 'suivants',
609 | 'suivre',
610 | 'sujet',
611 | 'superpose',
612 | 'sur',
613 | 'surtout',
614 | 't',
615 | 'ta',
616 | 'tac',
617 | 'tandis',
618 | 'tant',
619 | 'tardive',
620 | 'te',
621 | 'tel',
622 | 'telle',
623 | 'tellement',
624 | 'telles',
625 | 'tels',
626 | 'tenant',
627 | 'tend',
628 | 'tenir',
629 | 'tente',
630 | 'tes',
631 | 'tic',
632 | 'tien',
633 | 'tienne',
634 | 'tiennes',
635 | 'tiens',
636 | 'toc',
637 | 'toi',
638 | 'toi-même',
639 | 'ton',
640 | 'touchant',
641 | 'toujours',
642 | 'tous',
643 | 'tout',
644 | 'toute',
645 | 'toutefois',
646 | 'toutes',
647 | 'treize',
648 | 'trente',
649 | 'tres',
650 | 'trois',
651 | 'troisième',
652 | 'troisièmement',
653 | 'trop',
654 | 'très',
655 | 'tsoin',
656 | 'tsouin',
657 | 'tu',
658 | 'té',
659 | 'u',
660 | 'un',
661 | 'une',
662 | 'unes',
663 | 'uniformement',
664 | 'unique',
665 | 'uniques',
666 | 'uns',
667 | 'v',
668 | 'va',
669 | 'vais',
670 | 'valeur',
671 | 'vas',
672 | 'vers',
673 | 'via',
674 | 'vif',
675 | 'vifs',
676 | 'vingt',
677 | 'vivat',
678 | 'vive',
679 | 'vives',
680 | 'vlan',
681 | 'voici',
682 | 'voie',
683 | 'voient',
684 | 'voilà',
685 | 'vont',
686 | 'vos',
687 | 'votre',
688 | 'vous',
689 | 'vous-mêmes',
690 | 'vu',
691 | 'vé',
692 | 'vôtre',
693 | 'vôtres',
694 | 'w',
695 | 'x',
696 | 'y',
697 | 'z',
698 | 'zut',
699 | 'à',
700 | 'â',
701 | 'ça',
702 | 'ès',
703 | 'étaient',
704 | 'étais',
705 | 'était',
706 | 'étant',
707 | 'état',
708 | 'étiez',
709 | 'étions',
710 | 'été',
711 | 'étée',
712 | 'étées',
713 | 'étés',
714 | 'êtes',
715 | 'être',
716 | 'ô'
717 | ];
718 | }
719 |
--------------------------------------------------------------------------------
/src/Tool/StopWords/German.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 | declare(strict_types=1);
10 | namespace PhpScience\TextRank\Tool\StopWords;
11 | /**
12 | * Class German
13 | *
14 | * @package PhpScience\TextRank\Tool\StopWords
15 | */
16 | class German extends StopWordsAbstract
17 | {
18 | /**
19 | * Stop words for avoid dummy keywords for Language German.
20 | * Word list created by Marco Götze, Steffen Geyer.
21 | * Source: https://solariz.de/de/downloads/6/german-enhanced-stopwords.htm
22 | *
23 | * @var array
24 | */
25 | protected $words = [
26 | 'ab',
27 | 'aber',
28 | 'alle',
29 | 'allein',
30 | 'allem',
31 | 'allen',
32 | 'aller',
33 | 'allerdings',
34 | 'allerlei',
35 | 'alles',
36 | 'allmählich',
37 | 'allzu',
38 | 'als',
39 | 'alsbald',
40 | 'also',
41 | 'am',
42 | 'an',
43 | 'and',
44 | 'ander',
45 | 'andere',
46 | 'anderem',
47 | 'anderen',
48 | 'anderer',
49 | 'andererseits',
50 | 'anderes',
51 | 'anderm',
52 | 'andern',
53 | 'andernfalls',
54 | 'anders',
55 | 'anstatt',
56 | 'auch',
57 | 'auf',
58 | 'aus',
59 | 'ausgenommen',
60 | 'ausser',
61 | 'ausserdem',
62 | 'außer',
63 | 'außerdem',
64 | 'außerhalb',
65 | 'bald',
66 | 'bei',
67 | 'beide',
68 | 'beiden',
69 | 'beiderlei',
70 | 'beides',
71 | 'beim',
72 | 'beinahe',
73 | 'bereits',
74 | 'besonders',
75 | 'besser',
76 | 'beträchtlich',
77 | 'bevor',
78 | 'bezüglich',
79 | 'bin',
80 | 'bis',
81 | 'bisher',
82 | 'bislang',
83 | 'bist',
84 | 'bloß',
85 | 'bsp.',
86 | 'bzw',
87 | 'ca',
88 | 'ca.',
89 | 'content',
90 | 'da',
91 | 'dabei',
92 | 'dadurch',
93 | 'dafür',
94 | 'dagegen',
95 | 'daher',
96 | 'dahin',
97 | 'damals',
98 | 'damit',
99 | 'danach',
100 | 'daneben',
101 | 'dann',
102 | 'daran',
103 | 'darauf',
104 | 'daraus',
105 | 'darin',
106 | 'darum',
107 | 'darunter',
108 | 'darüber',
109 | 'darüberhinaus',
110 | 'das',
111 | 'dass',
112 | 'dasselbe',
113 | 'davon',
114 | 'davor',
115 | 'dazu',
116 | 'daß',
117 | 'dein',
118 | 'deine',
119 | 'deinem',
120 | 'deinen',
121 | 'deiner',
122 | 'deines',
123 | 'dem',
124 | 'demnach',
125 | 'demselben',
126 | 'den',
127 | 'denen',
128 | 'denn',
129 | 'dennoch',
130 | 'denselben',
131 | 'der',
132 | 'derart',
133 | 'derartig',
134 | 'derem',
135 | 'deren',
136 | 'derer',
137 | 'derjenige',
138 | 'derjenigen',
139 | 'derselbe',
140 | 'derselben',
141 | 'derzeit',
142 | 'des',
143 | 'deshalb',
144 | 'desselben',
145 | 'dessen',
146 | 'desto',
147 | 'deswegen',
148 | 'dich',
149 | 'die',
150 | 'diejenige',
151 | 'dies',
152 | 'diese',
153 | 'dieselbe',
154 | 'dieselben',
155 | 'diesem',
156 | 'diesen',
157 | 'dieser',
158 | 'dieses',
159 | 'diesseits',
160 | 'dir',
161 | 'direkt',
162 | 'direkte',
163 | 'direkten',
164 | 'direkter',
165 | 'doch',
166 | 'dort',
167 | 'dorther',
168 | 'dorthin',
169 | 'drauf',
170 | 'drin',
171 | 'drunter',
172 | 'drüber',
173 | 'du',
174 | 'dunklen',
175 | 'durch',
176 | 'durchaus',
177 | 'eben',
178 | 'ebenfalls',
179 | 'ebenso',
180 | 'eher',
181 | 'eigenen',
182 | 'eigenes',
183 | 'eigentlich',
184 | 'ein',
185 | 'eine',
186 | 'einem',
187 | 'einen',
188 | 'einer',
189 | 'einerseits',
190 | 'eines',
191 | 'einfach',
192 | 'einführen',
193 | 'einführte',
194 | 'einführten',
195 | 'eingesetzt',
196 | 'einig',
197 | 'einige',
198 | 'einigem',
199 | 'einigen',
200 | 'einiger',
201 | 'einigermaßen',
202 | 'einiges',
203 | 'einmal',
204 | 'eins',
205 | 'einseitig',
206 | 'einseitige',
207 | 'einseitigen',
208 | 'einseitiger',
209 | 'einst',
210 | 'einstmals',
211 | 'einzig',
212 | 'entsprechend',
213 | 'entweder',
214 | 'er',
215 | 'erst',
216 | 'es',
217 | 'etc',
218 | 'etliche',
219 | 'etwa',
220 | 'etwas',
221 | 'euch',
222 | 'euer',
223 | 'eure',
224 | 'eurem',
225 | 'euren',
226 | 'eurer',
227 | 'eures',
228 | 'falls',
229 | 'fast',
230 | 'ferner',
231 | 'folgende',
232 | 'folgenden',
233 | 'folgender',
234 | 'folgendes',
235 | 'folglich',
236 | 'fuer',
237 | 'für',
238 | 'gab',
239 | 'ganze',
240 | 'ganzem',
241 | 'ganzen',
242 | 'ganzer',
243 | 'ganzes',
244 | 'gar',
245 | 'gegen',
246 | 'gemäss',
247 | 'ggf',
248 | 'gleich',
249 | 'gleichwohl',
250 | 'gleichzeitig',
251 | 'glücklicherweise',
252 | 'gänzlich',
253 | 'hab',
254 | 'habe',
255 | 'haben',
256 | 'haette',
257 | 'hast',
258 | 'hat',
259 | 'hatte',
260 | 'hatten',
261 | 'hattest',
262 | 'hattet',
263 | 'heraus',
264 | 'herein',
265 | 'hier',
266 | 'hier',
267 | 'hinter',
268 | 'hiermit',
269 | 'hiesige',
270 | 'hin',
271 | 'hinein',
272 | 'hinten',
273 | 'hinter',
274 | 'hinterher',
275 | 'http',
276 | 'hätt',
277 | 'hätte',
278 | 'hätten',
279 | 'höchstens',
280 | 'ich',
281 | 'igitt',
282 | 'ihm',
283 | 'ihn',
284 | 'ihnen',
285 | 'ihr',
286 | 'ihre',
287 | 'ihrem',
288 | 'ihren',
289 | 'ihrer',
290 | 'ihres',
291 | 'im',
292 | 'immer',
293 | 'immerhin',
294 | 'in',
295 | 'indem',
296 | 'indessen',
297 | 'infolge',
298 | 'innen',
299 | 'innerhalb',
300 | 'ins',
301 | 'insofern',
302 | 'inzwischen',
303 | 'irgend',
304 | 'irgendeine',
305 | 'irgendwas',
306 | 'irgendwen',
307 | 'irgendwer',
308 | 'irgendwie',
309 | 'irgendwo',
310 | 'ist',
311 | 'ja',
312 | 'je',
313 | 'jed',
314 | 'jede',
315 | 'jedem',
316 | 'jeden',
317 | 'jedenfalls',
318 | 'jeder',
319 | 'jederlei',
320 | 'jedes',
321 | 'jedoch',
322 | 'jemand',
323 | 'jene',
324 | 'jenem',
325 | 'jenen',
326 | 'jener',
327 | 'jenes',
328 | 'jenseits',
329 | 'jetzt',
330 | 'jährig',
331 | 'jährige',
332 | 'jährigen',
333 | 'jähriges',
334 | 'kam',
335 | 'kann',
336 | 'kannst',
337 | 'kaum',
338 | 'kein',
339 | 'keine',
340 | 'keinem',
341 | 'keinen',
342 | 'keiner',
343 | 'keinerlei',
344 | 'keines',
345 | 'keineswegs',
346 | 'klar',
347 | 'klare',
348 | 'klaren',
349 | 'klares',
350 | 'klein',
351 | 'kleinen',
352 | 'kleiner',
353 | 'kleines',
354 | 'koennen',
355 | 'koennt',
356 | 'koennte',
357 | 'koennten',
358 | 'komme',
359 | 'kommen',
360 | 'kommt',
361 | 'konkret',
362 | 'konkrete',
363 | 'konkreten',
364 | 'konkreter',
365 | 'konkretes',
366 | 'können',
367 | 'könnt',
368 | 'künftig',
369 | 'leider',
370 | 'machen',
371 | 'man',
372 | 'manche',
373 | 'manchem',
374 | 'manchen',
375 | 'mancher',
376 | 'mancherorts',
377 | 'manches',
378 | 'manchmal',
379 | 'mehr',
380 | 'mehrere',
381 | 'mein',
382 | 'meine',
383 | 'meinem',
384 | 'meinen',
385 | 'meiner',
386 | 'meines',
387 | 'mich',
388 | 'mir',
389 | 'mit',
390 | 'mithin',
391 | 'muessen',
392 | 'muesst',
393 | 'muesste',
394 | 'muss',
395 | 'musst',
396 | 'musste',
397 | 'mussten',
398 | 'muß',
399 | 'mußt',
400 | 'müssen',
401 | 'müsste',
402 | 'müssten',
403 | 'müßt',
404 | 'müßte',
405 | 'nach',
406 | 'nachdem',
407 | 'nachher',
408 | 'nachhinein',
409 | 'nahm',
410 | 'natürlich',
411 | 'neben',
412 | 'nebenan',
413 | 'nehmen',
414 | 'nein',
415 | 'nicht',
416 | 'nichts',
417 | 'nie',
418 | 'niemals',
419 | 'niemand',
420 | 'nirgends',
421 | 'nirgendwo',
422 | 'noch',
423 | 'nun',
424 | 'nur',
425 | 'nächste',
426 | 'nämlich',
427 | 'nötigenfalls',
428 | 'ob',
429 | 'oben',
430 | 'oberhalb',
431 | 'obgleich',
432 | 'obschon',
433 | 'obwohl',
434 | 'oder',
435 | 'oft',
436 | 'per',
437 | 'plötzlich',
438 | 'schließlich',
439 | 'schon',
440 | 'sehr',
441 | 'sehrwohl',
442 | 'seid',
443 | 'sein',
444 | 'seine',
445 | 'seinem',
446 | 'seinen',
447 | 'seiner',
448 | 'seines',
449 | 'seit',
450 | 'seitdem',
451 | 'seither',
452 | 'selber',
453 | 'selbst',
454 | 'sich',
455 | 'sicher',
456 | 'sicherlich',
457 | 'sie',
458 | 'sind',
459 | 'so',
460 | 'sobald',
461 | 'sodass',
462 | 'sodaß',
463 | 'soeben',
464 | 'sofern',
465 | 'sofort',
466 | 'sogar',
467 | 'solange',
468 | 'solch',
469 | 'solche',
470 | 'solchem',
471 | 'solchen',
472 | 'solcher',
473 | 'solches',
474 | 'soll',
475 | 'sollen',
476 | 'sollst',
477 | 'sollt',
478 | 'sollte',
479 | 'sollten',
480 | 'solltest',
481 | 'somit',
482 | 'sondern',
483 | 'sonst',
484 | 'sonstwo',
485 | 'sooft',
486 | 'soviel',
487 | 'soweit',
488 | 'sowie',
489 | 'sowohl',
490 | 'tatsächlich',
491 | 'tatsächlichen',
492 | 'tatsächlicher',
493 | 'tatsächliches',
494 | 'trotzdem',
495 | 'ueber',
496 | 'um',
497 | 'umso',
498 | 'unbedingt',
499 | 'und',
500 | 'unmöglich',
501 | 'unmögliche',
502 | 'unmöglichen',
503 | 'unmöglicher',
504 | 'uns',
505 | 'unser',
506 | 'unser',
507 | 'unsere',
508 | 'unsere',
509 | 'unserem',
510 | 'unseren',
511 | 'unserer',
512 | 'unseres',
513 | 'unter',
514 | 'usw',
515 | 'viel',
516 | 'viele',
517 | 'vielen',
518 | 'vieler',
519 | 'vieles',
520 | 'vielleicht',
521 | 'vielmals',
522 | 'vom',
523 | 'von',
524 | 'vor',
525 | 'voran',
526 | 'vorher',
527 | 'vorüber',
528 | 'völlig',
529 | 'wann',
530 | 'war',
531 | 'waren',
532 | 'warst',
533 | 'warum',
534 | 'was',
535 | 'weder',
536 | 'weil',
537 | 'weiter',
538 | 'weitere',
539 | 'weiterem',
540 | 'weiteren',
541 | 'weiterer',
542 | 'weiteres',
543 | 'weiterhin',
544 | 'weiß',
545 | 'welche',
546 | 'welchem',
547 | 'welchen',
548 | 'welcher',
549 | 'welches',
550 | 'wem',
551 | 'wen',
552 | 'wenig',
553 | 'wenige',
554 | 'weniger',
555 | 'wenigstens',
556 | 'wenn',
557 | 'wenngleich',
558 | 'wer',
559 | 'werde',
560 | 'werden',
561 | 'werdet',
562 | 'weshalb',
563 | 'wessen',
564 | 'wichtig',
565 | 'wie',
566 | 'wieder',
567 | 'wieso',
568 | 'wieviel',
569 | 'wiewohl',
570 | 'will',
571 | 'willst',
572 | 'wir',
573 | 'wird',
574 | 'wirklich',
575 | 'wirst',
576 | 'wo',
577 | 'wodurch',
578 | 'wogegen',
579 | 'woher',
580 | 'wohin',
581 | 'wohingegen',
582 | 'wohl',
583 | 'wohlweislich',
584 | 'womit',
585 | 'woraufhin',
586 | 'woraus',
587 | 'worin',
588 | 'wurde',
589 | 'wurden',
590 | 'während',
591 | 'währenddessen',
592 | 'wär',
593 | 'wäre',
594 | 'wären',
595 | 'würde',
596 | 'würden',
597 | 'z.B.',
598 | 'zB',
599 | 'zahlreich',
600 | 'zeitweise',
601 | 'zu',
602 | 'zudem',
603 | 'zuerst',
604 | 'zufolge',
605 | 'zugleich',
606 | 'zuletzt',
607 | 'zum',
608 | 'zumal',
609 | 'zur',
610 | 'zurück',
611 | 'zusammen',
612 | 'zuviel',
613 | 'zwar',
614 | 'zwischen',
615 | 'ähnlich',
616 | 'übel',
617 | 'über',
618 | 'überall',
619 | 'überallhin',
620 | 'überdies',
621 | 'übermorgen',
622 | 'übrig',
623 | 'übrigens'
624 | ];
625 | }
--------------------------------------------------------------------------------
/src/Tool/StopWords/Indonesian.php:
--------------------------------------------------------------------------------
1 |
8 | */
9 |
10 | declare(strict_types=1);
11 |
12 | namespace PhpScience\TextRank\Tool\StopWords;
13 |
14 | /**
15 | * Class Norwegian
16 | *
17 | * @package PhpScience\TextRank\Tool\StopWords
18 | */
19 | class Norwegian extends StopWordsAbstract
20 | {
21 | /**
22 | * Stop words for avoid dummy keywords for Language Norwegian.
23 | * Source: https://github.com/stopwords-iso/stopwords-no
24 | *
25 | * @var array
26 | */
27 | protected $words = [
28 | 'alle',
29 | 'andre',
30 | 'arbeid',
31 | 'at',
32 | 'av',
33 | 'bare',
34 | 'begge',
35 | 'ble',
36 | 'blei',
37 | 'bli',
38 | 'blir',
39 | 'blitt',
40 | 'bort',
41 | 'bra',
42 | 'bruke',
43 | 'både',
44 | 'båe',
45 | 'da',
46 | 'de',
47 | 'deg',
48 | 'dei',
49 | 'deim',
50 | 'deira',
51 | 'deires',
52 | 'dem',
53 | 'den',
54 | 'denne',
55 | 'der',
56 | 'dere',
57 | 'deres',
58 | 'det',
59 | 'dette',
60 | 'di',
61 | 'din',
62 | 'disse',
63 | 'ditt',
64 | 'du',
65 | 'dykk',
66 | 'dykkar',
67 | 'då',
68 | 'eg',
69 | 'ein',
70 | 'eit',
71 | 'eitt',
72 | 'eller',
73 | 'elles',
74 | 'en',
75 | 'ene',
76 | 'eneste',
77 | 'enhver',
78 | 'enn',
79 | 'er',
80 | 'et',
81 | 'ett',
82 | 'etter',
83 | 'folk',
84 | 'for',
85 | 'fordi',
86 | 'forsûke',
87 | 'fra',
88 | 'få',
89 | 'før',
90 | 'fûr',
91 | 'fûrst',
92 | 'gjorde',
93 | 'gjûre',
94 | 'god',
95 | 'gå',
96 | 'ha',
97 | 'hadde',
98 | 'han',
99 | 'hans',
100 | 'har',
101 | 'hennar',
102 | 'henne',
103 | 'hennes',
104 | 'her',
105 | 'hjå',
106 | 'ho',
107 | 'hoe',
108 | 'honom',
109 | 'hoss',
110 | 'hossen',
111 | 'hun',
112 | 'hva',
113 | 'hvem',
114 | 'hver',
115 | 'hvilke',
116 | 'hvilken',
117 | 'hvis',
118 | 'hvor',
119 | 'hvordan',
120 | 'hvorfor',
121 | 'i',
122 | 'ikke',
123 | 'ikkje',
124 | 'ingen',
125 | 'ingi',
126 | 'inkje',
127 | 'inn',
128 | 'innen',
129 | 'inni',
130 | 'ja',
131 | 'jeg',
132 | 'kan',
133 | 'kom',
134 | 'korleis',
135 | 'korso',
136 | 'kun',
137 | 'kunne',
138 | 'kva',
139 | 'kvar',
140 | 'kvarhelst',
141 | 'kven',
142 | 'kvi',
143 | 'kvifor',
144 | 'lage',
145 | 'lang',
146 | 'lik',
147 | 'like',
148 | 'makt',
149 | 'man',
150 | 'mange',
151 | 'me',
152 | 'med',
153 | 'medan',
154 | 'meg',
155 | 'meget',
156 | 'mellom',
157 | 'men',
158 | 'mens',
159 | 'mer',
160 | 'mest',
161 | 'mi',
162 | 'min',
163 | 'mine',
164 | 'mitt',
165 | 'mot',
166 | 'mye',
167 | 'mykje',
168 | 'må',
169 | 'måte',
170 | 'navn',
171 | 'ned',
172 | 'nei',
173 | 'no',
174 | 'noe',
175 | 'noen',
176 | 'noka',
177 | 'noko',
178 | 'nokon',
179 | 'nokor',
180 | 'nokre',
181 | 'ny',
182 | 'nå',
183 | 'når',
184 | 'og',
185 | 'også',
186 | 'om',
187 | 'opp',
188 | 'oss',
189 | 'over',
190 | 'part',
191 | 'punkt',
192 | 'på',
193 | 'rett',
194 | 'riktig',
195 | 'samme',
196 | 'sant',
197 | 'seg',
198 | 'selv',
199 | 'si',
200 | 'sia',
201 | 'sidan',
202 | 'siden',
203 | 'sin',
204 | 'sine',
205 | 'sist',
206 | 'sitt',
207 | 'sjøl',
208 | 'skal',
209 | 'skulle',
210 | 'slik',
211 | 'slutt',
212 | 'so',
213 | 'som',
214 | 'somme',
215 | 'somt',
216 | 'start',
217 | 'stille',
218 | 'så',
219 | 'sånn',
220 | 'tid',
221 | 'til',
222 | 'tilbake',
223 | 'tilstand',
224 | 'um',
225 | 'under',
226 | 'upp',
227 | 'ut',
228 | 'uten',
229 | 'var',
230 | 'vart',
231 | 'varte',
232 | 'ved',
233 | 'verdi',
234 | 'vere',
235 | 'verte',
236 | 'vi',
237 | 'vil',
238 | 'ville',
239 | 'vite',
240 | 'vore',
241 | 'vors',
242 | 'vort',
243 | 'vår',
244 | 'være',
245 | 'vært',
246 | 'vöre',
247 | 'vört',
248 | 'å'
249 | ];
250 | }
251 |
--------------------------------------------------------------------------------
/src/Tool/StopWords/Russian.php:
--------------------------------------------------------------------------------
1 |
8 | * @author Andrey Astashov