├── .npmignore
├── demo
├── stopword-demo.gif
├── mini-additions.css
├── index.html
└── stopword-app.js
├── .prettierrc.json
├── src
├── reference.txt
├── stopwords__123.js
├── stopwords_est.js
├── stopwords_hye.js
├── stopwords_lat.js
├── stopwords_ell.js
├── stopwords_som.js
├── stopwords_sot.js
├── stopwords_zul.js
├── stopwords_lgg.js
├── stopwords_lggNd.js
├── stopwords_afr.js
├── stopwords_hau.js
├── stopwords_yor.js
├── stopwords_mar.js
├── stopwords_slk.js
├── stopwords_eus.js
├── stopwords_gle.js
├── stopwords_tha.js
├── stopwords_fas.js
├── stopwords_spa.js
├── stopwords_kur.js
├── stopwords_zho.js
├── stopwords_vie.js
├── stopwords_fin.js
├── stopwords_glg.js
├── stopwords_lav.js
├── stopwords_epo.js
├── stopwords_por.js
├── stopwords_eng.js
├── stopwords_hrv.js
├── stopwords_nld.js
├── stopwords_nob.js
├── stopwords_heb.js
├── stopwords_swa.js
├── stopwords_rus.js
├── stopwords_cat.js
├── stopwords_tgl.js
├── stopwords_ukr.js
├── stopwords_jpn.js
├── stopwords_fra.js
├── stopwords_dan.js
├── stopword.js
├── stopwords_bul.js
├── stopwords_guj.js
├── stopwords_ron.js
├── stopwords_hin.js
├── stopwords_tur.js
├── stopwords_ces.js
├── stopwords_pol.js
├── stopwords_ita.js
├── stopwords_mya.js
├── stopwords_slv.js
├── stopwords_ben.js
├── stopwords_swe.js
├── stopwords_panGu.js
├── stopwords_ara.js
├── stopwords_kor.js
├── stopwords_lit.js
└── stopwords_urd.js
├── .github
├── workflows
│ └── tests.yml
└── dependabot.yml
├── .gitignore
├── LICENSE
├── package.json
├── rollup.config.js
├── test
└── ui-test.js
└── CODE_OF_CONDUCT.md
/.npmignore:
--------------------------------------------------------------------------------
1 | test
2 | .travis.yml
3 |
--------------------------------------------------------------------------------
/demo/stopword-demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fergiemcdowall/stopword/HEAD/demo/stopword-demo.gif
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "arrowParens": "avoid",
3 | "bracketSpacing": true,
4 | "semi": false,
5 | "singleQuote": true,
6 | "trailingComma": "none"
7 | }
8 |
--------------------------------------------------------------------------------
/src/reference.txt:
--------------------------------------------------------------------------------
1 | Short version for minified scripts:
2 |
3 | The MIT License (MIT)
4 | Copyright (c) 2015 - 2022 Fergus McDowall
5 |
6 | Full license text + Third party licenses found in ./3rd-party.txt
--------------------------------------------------------------------------------
/src/stopwords__123.js:
--------------------------------------------------------------------------------
1 | const num123 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
2 | const numFas = ['۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '۰']
3 | const numKor = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
4 | const numMya = ['၀', '၁', '၂', '၃', '၄', '၅', '၆', '၇', '၈', '၉']
5 | const numTel = ['౦', '౧', '౨', '౩', '౪', '౫', '౬', '౭', '౮', '౯']
6 | const _123 = [...num123, ...numFas, ...numKor, ...numMya, ...numTel]
7 | export { _123 }
8 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: tests
2 | on:
3 | - push
4 | - pull_request
5 | jobs:
6 | run-tests:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | node-version: [lts/-1, lts/*, node]
11 | steps:
12 | - uses: actions/checkout@v3
13 | - uses: actions/setup-node@v3
14 | with:
15 | node-version: ${{ matrix.node-version }}
16 | cache: 'npm'
17 | - run: npm install
18 | - run: sudo apt-get install xvfb
19 | - run: xvfb-run --auto-servernum npm test
--------------------------------------------------------------------------------
/src/stopwords_est.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const est = [
4 | 'aga',
5 | 'ei',
6 | 'et',
7 | 'ja',
8 | 'jah',
9 | 'kas',
10 | 'kui',
11 | 'kõik',
12 | 'ma',
13 | 'me',
14 | 'mida',
15 | 'midagi',
16 | 'mind',
17 | 'minu',
18 | 'mis',
19 | 'mu',
20 | 'mul',
21 | 'mulle',
22 | 'nad',
23 | 'nii',
24 | 'oled',
25 | 'olen',
26 | 'oli',
27 | 'oma',
28 | 'on',
29 | 'pole',
30 | 'sa',
31 | 'seda',
32 | 'see',
33 | 'selle',
34 | 'siin',
35 | 'siis',
36 | 'ta',
37 | 'te',
38 | 'ära'
39 | ]
40 | export { est }
41 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | # To get started with Dependabot version updates, you'll need to specify which
2 | # package ecosystems to update and where the package manifests are located.
3 | # Please see the documentation for all configuration options:
4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5 |
6 | version: 2
7 | updates:
8 | - package-ecosystem: "npm" # See documentation for possible values
9 | directory: "/" # Location of package manifests
10 | schedule:
11 | interval: "daily"
12 | # Always increase the version requirement
13 | # to match the new version.
14 | versioning-strategy: increase
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 |
5 | # Runtime data
6 | pids
7 | *.pid
8 | *.seed
9 |
10 | # Directory for instrumented libs generated by jscoverage/JSCover
11 | lib-cov
12 |
13 | # Coverage directory used by tools like istanbul
14 | coverage
15 |
16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
17 | .grunt
18 |
19 | # node-waf configuration
20 | .lock-wscript
21 |
22 | # Compiled binary addons (http://nodejs.org/api/addons.html)
23 | build/Release
24 |
25 | # Dependency directory
26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
27 | node_modules
28 |
29 | # OSX noise
30 | .DS_Store
31 |
--------------------------------------------------------------------------------
/demo/mini-additions.css:
--------------------------------------------------------------------------------
1 | h1,
2 | h2,
3 | h3,
4 | h4,
5 | h5,
6 | h6 {
7 | margin-left: 0px;
8 | }
9 |
10 | h2 {
11 | line-height: 1em;
12 | margin-top: 5px;
13 | margin-bottom: 5px;
14 | }
15 |
16 | p {
17 | padding: 0 0 0 0;
18 | margin: 0 0 20px 0;
19 | }
20 |
21 | #header {
22 | display: flex;
23 | align-items: center;
24 | justify-content: space-between;
25 | padding-left: 10px;
26 | }
27 |
28 | #addContainer {
29 | background: #cccccc;
30 | padding: 10px;
31 | }
32 |
33 | #newString {
34 | padding: 10px;
35 | background: #eeeeee;
36 | }
37 |
38 | #stopwordsRemoved span {
39 | font-size: 1.2rem;
40 | }
41 |
42 | input#text {
43 | width: 100%;
44 | font-size: 1.2rem;
45 | margin-left: 0;
46 | }
47 |
--------------------------------------------------------------------------------
/src/stopwords_hye.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const hye = [
4 | 'այդ',
5 | 'այլ',
6 | 'այն',
7 | 'այս',
8 | 'դու',
9 | 'դուք',
10 | 'եմ',
11 | 'են',
12 | 'ենք',
13 | 'ես',
14 | 'եք',
15 | 'է',
16 | 'էի',
17 | 'էին',
18 | 'էինք',
19 | 'էիր',
20 | 'էիք',
21 | 'էր',
22 | 'ըստ',
23 | 'թ',
24 | 'ի',
25 | 'ին',
26 | 'իսկ',
27 | 'իր',
28 | 'կամ',
29 | 'համար',
30 | 'հետ',
31 | 'հետո',
32 | 'մենք',
33 | 'մեջ',
34 | 'մի',
35 | 'ն',
36 | 'նա',
37 | 'նաև',
38 | 'նրա',
39 | 'նրանք',
40 | 'որ',
41 | 'որը',
42 | 'որոնք',
43 | 'որպես',
44 | 'ու',
45 | 'ում',
46 | 'պիտի',
47 | 'վրա',
48 | 'և'
49 | ]
50 | export { hye }
51 |
--------------------------------------------------------------------------------
/src/stopwords_lat.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const lat = [
4 | 'a',
5 | 'ab',
6 | 'ac',
7 | 'ad',
8 | 'at',
9 | 'atque',
10 | 'aut',
11 | 'autem',
12 | 'cum',
13 | 'de',
14 | 'dum',
15 | 'e',
16 | 'erant',
17 | 'erat',
18 | 'est',
19 | 'et',
20 | 'etiam',
21 | 'ex',
22 | 'haec',
23 | 'hic',
24 | 'hoc',
25 | 'in',
26 | 'ita',
27 | 'me',
28 | 'nec',
29 | 'neque',
30 | 'non',
31 | 'per',
32 | 'qua',
33 | 'quae',
34 | 'quam',
35 | 'qui',
36 | 'quibus',
37 | 'quidem',
38 | 'quo',
39 | 'quod',
40 | 're',
41 | 'rebus',
42 | 'rem',
43 | 'res',
44 | 'sed',
45 | 'si',
46 | 'sic',
47 | 'sunt',
48 | 'tamen',
49 | 'tandem',
50 | 'te',
51 | 'ut',
52 | 'vel'
53 | ]
54 | export { lat }
55 |
--------------------------------------------------------------------------------
/demo/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Stopword browser demo
10 |
11 |
12 |
13 |
16 |
17 |
18 |
Language
19 |
Check which Language code to use.
20 |
Text with stopwords
21 |
22 |
23 |
24 |
25 |
End result
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 - 2022 Fergus McDowall
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/stopwords_ell.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const ell = [
4 | 'αλλα',
5 | 'αν',
6 | 'αντι',
7 | 'απο',
8 | 'αυτα',
9 | 'αυτεσ',
10 | 'αυτη',
11 | 'αυτο',
12 | 'αυτοι',
13 | 'αυτοσ',
14 | 'αυτουσ',
15 | 'αυτων',
16 | 'για',
17 | 'δε',
18 | 'δεν',
19 | 'εαν',
20 | 'ειμαι',
21 | 'ειμαστε',
22 | 'ειναι',
23 | 'εισαι',
24 | 'ειστε',
25 | 'εκεινα',
26 | 'εκεινεσ',
27 | 'εκεινη',
28 | 'εκεινο',
29 | 'εκεινοι',
30 | 'εκεινοσ',
31 | 'εκεινουσ',
32 | 'εκεινων',
33 | 'ενω',
34 | 'επι',
35 | 'η',
36 | 'θα',
37 | 'ισωσ',
38 | 'κ',
39 | 'και',
40 | 'κατα',
41 | 'κι',
42 | 'μα',
43 | 'με',
44 | 'μετα',
45 | 'μη',
46 | 'μην',
47 | 'να',
48 | 'ο',
49 | 'οι',
50 | 'ομωσ',
51 | 'οπωσ',
52 | 'οσο',
53 | 'οτι',
54 | 'παρα',
55 | 'ποια',
56 | 'ποιεσ',
57 | 'ποιο',
58 | 'ποιοι',
59 | 'ποιοσ',
60 | 'ποιουσ',
61 | 'ποιων',
62 | 'που',
63 | 'προσ',
64 | 'πωσ',
65 | 'σε',
66 | 'στη',
67 | 'στην',
68 | 'στο',
69 | 'στον',
70 | 'τα',
71 | 'την',
72 | 'τησ',
73 | 'το',
74 | 'τον',
75 | 'τοτε',
76 | 'του',
77 | 'των',
78 | 'ωσ'
79 | ]
80 | export { ell }
81 |
--------------------------------------------------------------------------------
/src/stopwords_som.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const som = [
20 | 'oo',
21 | 'atabo',
22 | 'ay',
23 | 'ku',
24 | 'waxeey',
25 | 'uu',
26 | 'lakin',
27 | 'si',
28 | 'ayuu',
29 | 'soo',
30 | 'waa',
31 | 'ka',
32 | 'kasoo',
33 | 'kale',
34 | 'waxuu',
35 | 'ayee',
36 | 'ayaa',
37 | 'kuu',
38 | 'isku',
39 | 'ugu',
40 | 'jiray',
41 | 'dhan',
42 | 'dambeestii',
43 | 'inuu',
44 | 'in',
45 | 'jirtay',
46 | 'uheestay',
47 | 'aad',
48 | 'uga',
49 | 'hadana',
50 | 'timaado',
51 | 'timaaday'
52 | ]
53 | export { som }
54 |
--------------------------------------------------------------------------------
/demo/stopword-app.js:
--------------------------------------------------------------------------------
1 | const languageSelect = document.getElementById('languages')
2 | const sentenceInput = document.getElementById('text')
3 | const resultText = document.getElementById('stopwordsRemoved')
4 |
5 | function updateSentence () {
6 | const language = languageSelect.value
7 | const oldString = sentenceInput.value.split(' ')
8 | const newString = sw.removeStopwords(oldString, sw[language]).join(' ')
9 | console.group('Stopwords applied')
10 | console.log('oldString:', oldString.join(' '))
11 | console.log('newString:', newString)
12 | console.groupEnd()
13 |
14 | // Populate with only meaningful words
15 | resultText.textContent = newString
16 | }
17 |
18 | // Listen to events and initiate a stopword removal
19 | sentenceInput.addEventListener('keyup', updateSentence)
20 |
21 | // Configure the language select
22 | const allLanguages = Object.entries(sw)
23 | .filter(([key, val]) => Array.isArray(val))
24 | .map((val) => val[0])
25 |
26 | languageSelect.addEventListener('change', function (event) {
27 | languageSelect.value = event.target.value
28 | sentenceInput.focus()
29 | updateSentence()
30 | })
31 |
32 | languageSelect.innerHTML = allLanguages
33 | .map((lang) => ``)
34 | .join('')
35 |
36 | languageSelect.value = 'en'
37 |
--------------------------------------------------------------------------------
/src/stopwords_sot.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const sot = [
20 | 'a',
21 | 'le',
22 | 'o',
23 | 'ba',
24 | 'ho',
25 | 'oa',
26 | 'ea',
27 | 'ka',
28 | 'hae',
29 | 'tselane',
30 | 'eaba',
31 | 'ke',
32 | 'hore',
33 | 'ha',
34 | 'e',
35 | 'ne',
36 | 're',
37 | 'bona',
38 | 'me',
39 | 'limo',
40 | 'tsa',
41 | 'haholo',
42 | 'la',
43 | 'empa',
44 | 'ngoanake',
45 | 'se',
46 | 'moo',
47 | 'm\'e',
48 | 'bane',
49 | 'mo',
50 | 'tse',
51 | 'sa',
52 | 'li',
53 | 'ena',
54 | 'bina',
55 | 'pina',
56 | 'hape'
57 | ]
58 | export { sot }
59 |
--------------------------------------------------------------------------------
/src/stopwords_zul.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const zul = [
20 | 'ukuthi',
21 | 'kodwa',
22 | 'futhi',
23 | 'kakhulu',
24 | 'wakhe',
25 | 'kusho',
26 | 'uma',
27 | 'wathi',
28 | 'umama',
29 | 'kanye',
30 | 'phansi',
31 | 'ngesikhathi',
32 | 'lapho',
33 | 'u',
34 | 'zakhe',
35 | 'khona',
36 | 'ukuba',
37 | 'nje',
38 | 'phezulu',
39 | 'yakhe',
40 | 'kungani',
41 | 'wase',
42 | 'la',
43 | 'mina',
44 | 'wami',
45 | 'ukuze',
46 | 'unonkungu',
47 | 'wabona',
48 | 'wahamba',
49 | 'lakhe',
50 | 'yami',
51 | 'kanjani',
52 | 'kwakukhona',
53 | 'ngelinye'
54 | ]
55 | export { zul }
56 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "stopword",
3 | "version": "3.1.5",
4 | "description": "A module for node.js and the browser that takes in text and returns text that is stripped of stopwords. Has pre-defined stopword lists for 62 languages and also takes lists with custom stopwords as input.",
5 | "main": "./dist/stopword.cjs.js",
6 | "module": "./src/stopword.js",
7 | "browser": "./src/stopword.js",
8 | "jsdelivr": "./dist/stopword.umd.min.js",
9 | "files": [
10 | "./dist",
11 | "./src",
12 | "./rollup.config.js"
13 | ],
14 | "scripts": {
15 | "build": "rollup --config",
16 | "lint": "standard --fix ./*.js src/*.js test/*.js",
17 | "test": "npm run lint && npm run build && npx ava ./test/test.cjs.js && npx ava ./test/test.esm.mjs && npx ava ./test/ui-test.js"
18 | },
19 | "repository": {
20 | "type": "git",
21 | "url": "https://github.com/fergiemcdowall/stopword"
22 | },
23 | "keywords": [
24 | "stopword",
25 | "stopwords",
26 | "document-processing",
27 | "search",
28 | "search-index",
29 | "nlp"
30 | ],
31 | "devDependencies": {
32 | "batr": "^2.1.10",
33 | "standard": "^17.1.2",
34 | "words-n-numbers": "^9.1.2"
35 | },
36 | "author": "Fergus McDowall",
37 | "license": "MIT",
38 | "bugs": {
39 | "url": "https://github.com/fergiemcdowall/stopword/issues"
40 | },
41 | "homepage": "https://github.com/fergiemcdowall/stopword"
42 | }
43 |
--------------------------------------------------------------------------------
/src/stopwords_lgg.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const lgg = [
20 | '́',
21 | '̀',
22 | 'nɨ',
23 | 'mà',
24 | 'rɨ',
25 | 'dɨ',
26 | 'ɨ',
27 | '́nɨ',
28 | 'èrɨ',
29 | '́á\'',
30 | 'sɨ',
31 | 'àzɨ',
32 | 'yɨ',
33 | 'rá',
34 | 'vɨ',
35 | 'nga',
36 | 'be',
37 | 'mɨ',
38 | 'à',
39 | 'dà',
40 | 'kʉ',
41 | 'bá',
42 | ' ́lé',
43 | 'má',
44 | 'e',
45 | 'yo',
46 | '̀yɨ',
47 | 'ma',
48 | 'kɨ',
49 | 'àlʉ',
50 | '́mà',
51 | 'rʉ́',
52 | 'drɨ',
53 | 'patí',
54 | 'a',
55 | 'è',
56 | 'yó',
57 | 'te',
58 | '̀á',
59 | 'mà',
60 | 'mâ',
61 | 'dálé',
62 | 'yí',
63 | '̌',
64 | 'pɨ',
65 | 'e\'yó',
66 | 'ndráa',
67 | 'bo',
68 | 'di',
69 | 'drìá'
70 | ]
71 | export { lgg }
72 |
--------------------------------------------------------------------------------
/src/stopwords_lggNd.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const lggNd = [
20 | 'ma',
21 | 'ni',
22 | 'ri',
23 | 'eri',
24 | 'di',
25 | 'yi',
26 | 'si',
27 | 'ba',
28 | 'nga',
29 | 'i',
30 | 'ra',
31 | 'ku',
32 | 'be',
33 | 'yo',
34 | 'da',
35 | 'azini',
36 | 'dria',
37 | 'ru',
38 | 'azi',
39 | 'mu',
40 | 'te',
41 | 'ndra',
42 | 'diyi',
43 | 'ima',
44 | 'mi',
45 | 'alu',
46 | 'nde',
47 | 'alia',
48 | 'le',
49 | 'vile',
50 | 'dri',
51 | 'pati',
52 | 'aria',
53 | 'bo',
54 | 'e\'yo',
55 | 'tu',
56 | 'kini',
57 | 'dii',
58 | 'ama',
59 | 'eyi',
60 | 'dika',
61 | 'pi',
62 | 'e',
63 | 'angu',
64 | 'e\'do',
65 | 'pie',
66 | 'ka',
67 | 'ti',
68 | 'o\'du',
69 | 'du'
70 | ]
71 | export { lggNd }
72 |
--------------------------------------------------------------------------------
/src/stopwords_afr.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License,
4 | Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing,
11 | software
12 | distributed under the License is distributed on an "AS IS" BASIS,
13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
14 | either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | */
18 |
19 | /* This list is frequency sorted. That means it can be sliced from the bottom
20 | and be less agressive in excluding stopwords */
21 |
22 | const afr = [
23 | 'die',
24 | 'het',
25 | 'en',
26 | 'sy',
27 | 'nie',
28 | 'was',
29 | 'hy',
30 | 'te',
31 | 'is',
32 | 'ek',
33 | 'om',
34 | 'hulle',
35 | 'in',
36 | 'my',
37 | '\'n',
38 | 'vir',
39 | 'toe',
40 | 'haar',
41 | 'van',
42 | 'dit',
43 | 'op',
44 | 'se',
45 | 'wat',
46 | 'met',
47 | 'gaan',
48 | 'baie',
49 | 'ons',
50 | 'jy',
51 | 'na',
52 | 'maar',
53 | 'hom',
54 | 'so',
55 | 'n',
56 | 'huis',
57 | 'kan',
58 | 'aan',
59 | 'dat',
60 | 'daar',
61 | 'sal',
62 | 'jou',
63 | 'gesê',
64 | 'by',
65 | 'kom',
66 | 'een',
67 | 'ma',
68 | 'as',
69 | 'son',
70 | 'groot',
71 | 'begin',
72 | 'al'
73 | ]
74 | export { afr }
75 |
--------------------------------------------------------------------------------
/src/stopwords_hau.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const hau = [
20 | 'ta',
21 | 'da',
22 | 'ya',
23 | 'sai',
24 | 'ba',
25 | 'yi',
26 | 'na',
27 | 'kuma',
28 | 'ma',
29 | 'ji',
30 | 'cikin',
31 | 'in',
32 | 'ni',
33 | 'wata',
34 | 'wani',
35 | 'ce',
36 | 'tana',
37 | 'don',
38 | 'za',
39 | 'sun',
40 | 'amma',
41 | 'ga',
42 | 'ina',
43 | 'ne',
44 | 'tselane',
45 | 'mai',
46 | 'suka',
47 | 'wannan',
48 | 'a',
49 | 'ko',
50 | 'lokacin',
51 | 'su',
52 | 'take',
53 | 'kaka',
54 | 'shi',
55 | 'yake',
56 | 'yana',
57 | 'mulongo',
58 | 'mata',
59 | 'ka',
60 | 'ban',
61 | 'ita',
62 | 'tafi',
63 | 'shanshani',
64 | 'kai',
65 | 'daɗi',
66 | 'mi',
67 | 'ƙato',
68 | 'fara',
69 | 'rana'
70 | ]
71 | export { hau }
72 |
--------------------------------------------------------------------------------
/rollup.config.js:
--------------------------------------------------------------------------------
1 | import resolve from '@rollup/plugin-node-resolve'
2 | import commonjs from '@rollup/plugin-commonjs'
3 | import { terser } from 'rollup-plugin-terser'
4 | import path from 'path'
5 | import license from 'rollup-plugin-license'
6 |
7 | export default [
8 | // browser-friendly UMD build
9 | // CommonJS (for Node) and ES module (for bundlers) build.
10 | {
11 | input: './src/stopword.js',
12 | output: [
13 | { name: 'sw', file: './dist/stopword.umd.js', format: 'umd', exports: 'named' },
14 | { file: './dist/stopword.cjs.js', format: 'cjs' },
15 | { file: './dist/stopword.esm.mjs', format: 'es' }
16 | ],
17 | plugins: [
18 | resolve(), // so Rollup can find `ms`
19 | commonjs() // so Rollup can convert `ms` to an ES module
20 | ]
21 | },
22 | // Minified versions
23 | {
24 | input: './src/stopword.js',
25 | output: [
26 | { name: 'sw', file: './dist/stopword.umd.min.js', format: 'umd', exports: 'named' },
27 | { file: './dist/stopword.cjs.min.js', format: 'cjs' },
28 | { file: './dist/stopword.esm.min.mjs', format: 'es' }
29 | ],
30 | plugins: [
31 | resolve(), // so Rollup can find `ms`
32 | commonjs(), // so Rollup can convert `ms` to an ES module
33 | terser(), // Minify
34 | license({ // Add reference to license file for minified scripts
35 | banner: {
36 | commentStyle: 'regular', // The default
37 | content: {
38 | file: path.join(__dirname, './src/reference.txt')
39 | }
40 | }
41 | })
42 | ]
43 | }
44 | ]
45 |
--------------------------------------------------------------------------------
/src/stopwords_yor.js:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Liam Doherty
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | */
15 |
16 | /* This list is frequency sorted. That means it can be sliced from the bottom
17 | and be less agressive in excluding stopwords */
18 |
19 | const yor = [
20 | 'ó',
21 | 'ní',
22 | 'ìjàpá',
23 | 'ṣe',
24 | 'rẹ̀',
25 | 'tí',
26 | 'àwọn',
27 | 'sí',
28 | 'ni',
29 | 'náà',
30 | 'anansi',
31 | 'láti',
32 | 'kan',
33 | 'ti',
34 | 'ń',
35 | 'lọ',
36 | 'o',
37 | 'bí',
38 | 'padà',
39 | 'sì',
40 | 'wá',
41 | 'wangari',
42 | 'lè',
43 | 'wà',
44 | 'kí',
45 | 'púpọ̀',
46 | 'odò',
47 | 'mi',
48 | 'wọ́n',
49 | 'pẹ̀lú',
50 | 'a',
51 | 'ṣùgbọ́n',
52 | 'fún',
53 | 'jẹ́',
54 | 'fẹ́',
55 | 'oúnjẹ',
56 | 'rí',
57 | 'igi',
58 | 'kò',
59 | 'ilé',
60 | 'jù',
61 | 'olóńgbò',
62 | 'pé',
63 | 'é',
64 | 'gbogbo',
65 | 'iṣu',
66 | 'inú',
67 | 'bẹ̀rẹ̀',
68 | 'jẹ',
69 | 'fi',
70 | 'dúró',
71 | 'alẹ́',
72 | 'ọjọ́',
73 | 'nítorí',
74 | 'nǹkan',
75 | 'ọ̀rẹ́',
76 | 'àkókò',
77 | 'sínú',
78 | 'ṣ',
79 | 'yìí'
80 | ]
81 | export { yor }
82 |
--------------------------------------------------------------------------------
/test/ui-test.js:
--------------------------------------------------------------------------------
1 | const { chromium } = require('playwright')
2 | const test = require('ava')
3 | const browserPromise = chromium.launch({
4 | headless: true
5 | // slowMo: 350
6 | })
7 |
8 | const path = require('path')
9 | async function pageMacro (t, callback) {
10 | const browser = await browserPromise
11 | const page = await browser.newPage()
12 | await page.setViewportSize({ width: 640, height: 480 })
13 | try {
14 | await callback(t, page)
15 | } finally {
16 | await page.close()
17 | }
18 | }
19 |
20 | test('1: Select english stopwords, type a sentence, check result. 2: Same for norwegian', pageMacro, async (t, page) => {
21 | // t.plan(2)
22 | const filePath = await path.resolve('./demo/index.html')
23 | const url = 'file://' + filePath
24 |
25 | // Go to demo
26 | await page.goto(url)
27 |
28 | // 1: Select english language and type a sentence
29 | await page.selectOption('select', 'eng')
30 | await page.click('#text')
31 | await page.keyboard.type('what a wonderful day for the stopword module it is')
32 | // TEST: check result
33 | let stopped = await (page.textContent('#stopwordsRemoved'))
34 | t.deepEqual(await stopped, 'wonderful day stopword module')
35 |
36 | // 2: select norwegian language, remove text and type sentence
37 | await page.selectOption('select', 'nob')
38 | await page.click('#text', {
39 | clickCount: 3
40 | })
41 | await page.keyboard.press('Backspace')
42 | await page.keyboard.type('for en fin dag det er for stoppordmodulen')
43 | // TEST: check result
44 | stopped = await (page.textContent('#stopwordsRemoved'))
45 | t.deepEqual(await stopped, 'fin dag stoppordmodulen')
46 | })
47 |
--------------------------------------------------------------------------------
/src/stopwords_mar.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const mar = [
4 | 'अधिक',
5 | 'अनेक',
6 | 'अशी',
7 | 'असलयाचे',
8 | 'असलेल्या',
9 | 'असा',
10 | 'असून',
11 | 'असे',
12 | 'आज',
13 | 'आणि',
14 | 'आता',
15 | 'आपल्या',
16 | 'आला',
17 | 'आली',
18 | 'आले',
19 | 'आहे',
20 | 'आहेत',
21 | 'एक',
22 | 'एका',
23 | 'कमी',
24 | 'करणयात',
25 | 'करून',
26 | 'का',
27 | 'काम',
28 | 'काय',
29 | 'काही',
30 | 'किवा',
31 | 'की',
32 | 'केला',
33 | 'केली',
34 | 'केले',
35 | 'कोटी',
36 | 'गेल्या',
37 | 'घेऊन',
38 | 'जात',
39 | 'झाला',
40 | 'झाली',
41 | 'झाले',
42 | 'झालेल्या',
43 | 'टा',
44 | 'डॉ',
45 | 'तर',
46 | 'तरी',
47 | 'तसेच',
48 | 'ता',
49 | 'ती',
50 | 'तीन',
51 | 'ते',
52 | 'तो',
53 | 'त्या',
54 | 'त्याचा',
55 | 'त्याची',
56 | 'त्याच्या',
57 | 'त्याना',
58 | 'त्यानी',
59 | 'त्यामुळे',
60 | 'त्री',
61 | 'दिली',
62 | 'दोन',
63 | 'न',
64 | 'नाही',
65 | 'निर्ण्य',
66 | 'पण',
67 | 'पम',
68 | 'परयतन',
69 | 'पाटील',
70 | 'म',
71 | 'मात्र',
72 | 'माहिती',
73 | 'मी',
74 | 'मुबी',
75 | 'म्हणजे',
76 | 'म्हणाले',
77 | 'म्हणून',
78 | 'या',
79 | 'याचा',
80 | 'याची',
81 | 'याच्या',
82 | 'याना',
83 | 'यानी',
84 | 'येणार',
85 | 'येत',
86 | 'येथील',
87 | 'येथे',
88 | 'लाख',
89 | 'व',
90 | 'व्यकत',
91 | 'सर्व',
92 | 'सागित्ले',
93 | 'सुरू',
94 | 'हजार',
95 | 'हा',
96 | 'ही',
97 | 'हे',
98 | 'होणार',
99 | 'होत',
100 | 'होता',
101 | 'होती',
102 | 'होते'
103 | ]
104 | export { mar }
105 |
--------------------------------------------------------------------------------
/src/stopwords_slk.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const slk = [
4 | 'a',
5 | 'aby',
6 | 'aj',
7 | 'ako',
8 | 'aký',
9 | 'ale',
10 | 'alebo',
11 | 'ani',
12 | 'avšak',
13 | 'ba',
14 | 'bez',
15 | 'buï',
16 | 'cez',
17 | 'do',
18 | 'ho',
19 | 'hoci',
20 | 'i',
21 | 'ich',
22 | 'im',
23 | 'ja',
24 | 'jeho',
25 | 'jej',
26 | 'jemu',
27 | 'ju',
28 | 'k',
29 | 'kam',
30 | 'kde',
31 | 'kedže',
32 | 'keï',
33 | 'kto',
34 | 'ktorý',
35 | 'ku',
36 | 'lebo',
37 | 'ma',
38 | 'mi',
39 | 'mne',
40 | 'mnou',
41 | 'mu',
42 | 'my',
43 | 'mòa',
44 | 'môj',
45 | 'na',
46 | 'nad',
47 | 'nami',
48 | 'neho',
49 | 'nej',
50 | 'nemu',
51 | 'nich',
52 | 'nielen',
53 | 'nim',
54 | 'no',
55 | 'nám',
56 | 'nás',
57 | 'náš',
58 | 'ním',
59 | 'o',
60 | 'od',
61 | 'on',
62 | 'ona',
63 | 'oni',
64 | 'ono',
65 | 'ony',
66 | 'po',
67 | 'pod',
68 | 'pre',
69 | 'pred',
70 | 'pri',
71 | 's',
72 | 'sa',
73 | 'seba',
74 | 'sem',
75 | 'so',
76 | 'svoj',
77 | 'taký',
78 | 'tam',
79 | 'teba',
80 | 'tebe',
81 | 'tebou',
82 | 'tej',
83 | 'ten',
84 | 'ti',
85 | 'tie',
86 | 'to',
87 | 'toho',
88 | 'tomu',
89 | 'tou',
90 | 'tvoj',
91 | 'ty',
92 | 'tá',
93 | 'tým',
94 | 'v',
95 | 'vami',
96 | 'veï',
97 | 'vo',
98 | 'vy',
99 | 'vám',
100 | 'vás',
101 | 'váš',
102 | 'však',
103 | 'z',
104 | 'za',
105 | 'zo',
106 | 'a',
107 | 'èi',
108 | 'èo',
109 | 'èí',
110 | 'òom',
111 | 'òou',
112 | 'òu',
113 | 'že'
114 | ]
115 | export { slk }
116 |
--------------------------------------------------------------------------------
/src/stopwords_eus.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const eus = [
4 | 'al',
5 | 'anitz',
6 | 'arabera',
7 | 'asko',
8 | 'baina',
9 | 'bat',
10 | 'batean',
11 | 'batek',
12 | 'bati',
13 | 'batzuei',
14 | 'batzuek',
15 | 'batzuetan',
16 | 'batzuk',
17 | 'bera',
18 | 'beraiek',
19 | 'berau',
20 | 'berauek',
21 | 'bere',
22 | 'berori',
23 | 'beroriek',
24 | 'beste',
25 | 'bezala',
26 | 'da',
27 | 'dago',
28 | 'dira',
29 | 'ditu',
30 | 'du',
31 | 'dute',
32 | 'edo',
33 | 'egin',
34 | 'ere',
35 | 'eta',
36 | 'eurak',
37 | 'ez',
38 | 'gainera',
39 | 'gu',
40 | 'gutxi',
41 | 'guzti',
42 | 'haiei',
43 | 'haiek',
44 | 'haietan',
45 | 'hainbeste',
46 | 'hala',
47 | 'han',
48 | 'handik',
49 | 'hango',
50 | 'hara',
51 | 'hari',
52 | 'hark',
53 | 'hartan',
54 | 'hau',
55 | 'hauei',
56 | 'hauek',
57 | 'hauetan',
58 | 'hemen',
59 | 'hemendik',
60 | 'hemengo',
61 | 'hi',
62 | 'hona',
63 | 'honek',
64 | 'honela',
65 | 'honetan',
66 | 'honi',
67 | 'hor',
68 | 'hori',
69 | 'horiei',
70 | 'horiek',
71 | 'horietan',
72 | 'horko',
73 | 'horra',
74 | 'horrek',
75 | 'horrela',
76 | 'horretan',
77 | 'horri',
78 | 'hortik',
79 | 'hura',
80 | 'izan',
81 | 'ni',
82 | 'noiz',
83 | 'nola',
84 | 'non',
85 | 'nondik',
86 | 'nongo',
87 | 'nor',
88 | 'nora',
89 | 'ze',
90 | 'zein',
91 | 'zen',
92 | 'zenbait',
93 | 'zenbat',
94 | 'zer',
95 | 'zergatik',
96 | 'ziren',
97 | 'zituen',
98 | 'zu',
99 | 'zuek',
100 | 'zuen',
101 | 'zuten'
102 | ]
103 | export { eus }
104 |
--------------------------------------------------------------------------------
/src/stopwords_gle.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const gle = [
4 | 'a',
5 | 'ach',
6 | 'ag',
7 | 'agus',
8 | 'an',
9 | 'aon',
10 | 'ar',
11 | 'arna',
12 | 'as',
13 | 'b\'',
14 | 'ba',
15 | 'beirt',
16 | 'bhúr',
17 | 'caoga',
18 | 'ceathair',
19 | 'ceathrar',
20 | 'chomh',
21 | 'chtó',
22 | 'chuig',
23 | 'chun',
24 | 'cois',
25 | 'céad',
26 | 'cúig',
27 | 'cúigear',
28 | 'd\'',
29 | 'daichead',
30 | 'dar',
31 | 'de',
32 | 'deich',
33 | 'deichniúr',
34 | 'den',
35 | 'dhá',
36 | 'do',
37 | 'don',
38 | 'dtí',
39 | 'dá',
40 | 'dár',
41 | 'dó',
42 | 'faoi',
43 | 'faoin',
44 | 'faoina',
45 | 'faoinár',
46 | 'fara',
47 | 'fiche',
48 | 'gach',
49 | 'gan',
50 | 'go',
51 | 'gur',
52 | 'haon',
53 | 'hocht',
54 | 'i',
55 | 'iad',
56 | 'idir',
57 | 'in',
58 | 'ina',
59 | 'ins',
60 | 'inár',
61 | 'is',
62 | 'le',
63 | 'leis',
64 | 'lena',
65 | 'lenár',
66 | 'm\'',
67 | 'mar',
68 | 'mo',
69 | 'mé',
70 | 'na',
71 | 'nach',
72 | 'naoi',
73 | 'naonúr',
74 | 'ná',
75 | 'ní',
76 | 'níor',
77 | 'nó',
78 | 'nócha',
79 | 'ocht',
80 | 'ochtar',
81 | 'os',
82 | 'roimh',
83 | 'sa',
84 | 'seacht',
85 | 'seachtar',
86 | 'seachtó',
87 | 'seasca',
88 | 'seisear',
89 | 'siad',
90 | 'sibh',
91 | 'sinn',
92 | 'sna',
93 | 'sé',
94 | 'sí',
95 | 'tar',
96 | 'thar',
97 | 'thú',
98 | 'triúr',
99 | 'trí',
100 | 'trína',
101 | 'trínár',
102 | 'tríocha',
103 | 'tú',
104 | 'um',
105 | 'ár',
106 | 'é',
107 | 'éis',
108 | 'í',
109 | 'ó',
110 | 'ón',
111 | 'óna',
112 | 'ónár'
113 | ]
114 | export { gle }
115 |
--------------------------------------------------------------------------------
/src/stopwords_tha.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const tha = [
4 | 'กล่าว',
5 | 'กว่า',
6 | 'กัน',
7 | 'กับ',
8 | 'การ',
9 | 'ก็',
10 | 'ก่อน',
11 | 'ขณะ',
12 | 'ขอ',
13 | 'ของ',
14 | 'ขึ้น',
15 | 'คง',
16 | 'ครั้ง',
17 | 'ความ',
18 | 'คือ',
19 | 'จะ',
20 | 'จัด',
21 | 'จาก',
22 | 'จึง',
23 | 'ช่วง',
24 | 'ซึ่ง',
25 | 'ดัง',
26 | 'ด้วย',
27 | 'ด้าน',
28 | 'ตั้ง',
29 | 'ตั้งแต่',
30 | 'ตาม',
31 | 'ต่อ',
32 | 'ต่าง',
33 | 'ต่างๆ',
34 | 'ต้อง',
35 | 'ถึง',
36 | 'ถูก',
37 | 'ถ้า',
38 | 'ทั้ง',
39 | 'ทั้งนี้',
40 | 'ทาง',
41 | 'ที่',
42 | 'ที่สุด',
43 | 'ทุก',
44 | 'ทํา',
45 | 'ทําให้',
46 | 'นอกจาก',
47 | 'นัก',
48 | 'นั้น',
49 | 'นี้',
50 | 'น่า',
51 | 'นํา',
52 | 'บาง',
53 | 'ผล',
54 | 'ผ่าน',
55 | 'พบ',
56 | 'พร้อม',
57 | 'มา',
58 | 'มาก',
59 | 'มี',
60 | 'ยัง',
61 | 'รวม',
62 | 'ระหว่าง',
63 | 'รับ',
64 | 'ราย',
65 | 'ร่วม',
66 | 'ลง',
67 | 'วัน',
68 | 'ว่า',
69 | 'สุด',
70 | 'ส่ง',
71 | 'ส่วน',
72 | 'สําหรับ',
73 | 'หนึ่ง',
74 | 'หรือ',
75 | 'หลัง',
76 | 'หลังจาก',
77 | 'หลาย',
78 | 'หาก',
79 | 'อยาก',
80 | 'อยู่',
81 | 'อย่าง',
82 | 'ออก',
83 | 'อะไร',
84 | 'อาจ',
85 | 'อีก',
86 | 'เขา',
87 | 'เข้า',
88 | 'เคย',
89 | 'เฉพาะ',
90 | 'เช่น',
91 | 'เดียว',
92 | 'เดียวกัน',
93 | 'เนื่องจาก',
94 | 'เปิด',
95 | 'เปิดเผย',
96 | 'เป็น',
97 | 'เป็นการ',
98 | 'เพราะ',
99 | 'เพื่อ',
100 | 'เมื่อ',
101 | 'เรา',
102 | 'เริ่ม',
103 | 'เลย',
104 | 'เห็น',
105 | 'เอง',
106 | 'แต่',
107 | 'แบบ',
108 | 'แรก',
109 | 'และ',
110 | 'แล้ว',
111 | 'แห่ง',
112 | 'โดย',
113 | 'ใน',
114 | 'ให้',
115 | 'ได้',
116 | 'ไป',
117 | 'ไม่',
118 | 'ไว้'
119 | ]
120 | export { tha }
121 |
--------------------------------------------------------------------------------
/src/stopwords_fas.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, Chris Umbel
3 | Farsi Stop Words by Fardin Koochaki
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the 'Software'), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to fdo so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | */
23 |
24 | const fas = [
25 | 'از',
26 | 'با',
27 | 'به',
28 | 'برای',
29 | 'و',
30 | 'باید',
31 | 'شاید',
32 | 'اکنون',
33 | 'اگر',
34 | 'اگرچه',
35 | 'الا',
36 | 'اما',
37 | 'اندر',
38 | 'اینکه',
39 | 'باری',
40 | 'بالعکس',
41 | 'بدون',
42 | 'بر',
43 | 'بلکه',
44 | 'بنابراین',
45 | 'بی',
46 | 'پس',
47 | 'تا',
48 | 'جز',
49 | 'چنانچه',
50 | 'چه',
51 | 'چون',
52 | 'در',
53 | 'را',
54 | 'روی',
55 | 'زیرا',
56 | 'سپس',
57 | 'غیر',
58 | 'که',
59 | 'لیکن',
60 | 'مانند',
61 | 'مثل',
62 | 'مگر',
63 | 'نه',
64 | 'نیز',
65 | 'هرچند',
66 | 'هم',
67 | 'همان',
68 | 'وانگهی',
69 | 'ولی',
70 | 'ولو',
71 | 'همانند',
72 | 'همچو'
73 | ]
74 | export { fas }
75 |
--------------------------------------------------------------------------------
/src/stopwords_spa.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, David Przybilla, Chris Umbel
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const spa = [
24 | 'a',
25 | 'un',
26 | 'el',
27 | 'ella',
28 | 'y',
29 | 'sobre',
30 | 'de',
31 | 'la',
32 | 'que',
33 | 'en',
34 | 'los',
35 | 'del',
36 | 'se',
37 | 'las',
38 | 'por',
39 | 'un',
40 | 'para',
41 | 'con',
42 | 'no',
43 | 'una',
44 | 'su',
45 | 'al',
46 | 'lo',
47 | 'como',
48 | 'más',
49 | 'pero',
50 | 'sus',
51 | 'le',
52 | 'ya',
53 | 'o',
54 | 'porque',
55 | 'cuando',
56 | 'muy',
57 | 'sin',
58 | 'sobre',
59 | 'también',
60 | 'me',
61 | 'hasta',
62 | 'donde',
63 | 'quien',
64 | 'desde',
65 | 'nos',
66 | 'durante',
67 | 'uno',
68 | 'ni',
69 | 'contra',
70 | 'ese',
71 | 'eso',
72 | 'mí',
73 | 'qué',
74 | 'otro',
75 | 'él',
76 | 'cual',
77 | 'poco',
78 | 'mi',
79 | 'tú',
80 | 'te',
81 | 'ti',
82 | 'sí'
83 | ]
84 | export { spa }
85 |
--------------------------------------------------------------------------------
/src/stopwords_kur.js:
--------------------------------------------------------------------------------
1 | /* The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Gene Diaz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. */
22 |
23 | const kur = [
24 | 'ئێمە',
25 | 'ئێوە',
26 | 'ئەم',
27 | 'ئەو',
28 | 'ئەوان',
29 | 'ئەوەی',
30 | 'بۆ',
31 | 'بێ',
32 | 'بێجگە',
33 | 'بە',
34 | 'بەبێ',
35 | 'بەدەم',
36 | 'بەردەم',
37 | 'بەرلە',
38 | 'بەرەوی',
39 | 'بەرەوە',
40 | 'بەلای',
41 | 'بەپێی',
42 | 'تۆ',
43 | 'تێ',
44 | 'جگە',
45 | 'دوای',
46 | 'دوو',
47 | 'دە',
48 | 'دەکات',
49 | 'دەگەڵ',
50 | 'سەر',
51 | 'لێ',
52 | 'لە',
53 | 'لەبابەت',
54 | 'لەباتی',
55 | 'لەبارەی',
56 | 'لەبرێتی',
57 | 'لەبن',
58 | 'لەبەر',
59 | 'لەبەینی',
60 | 'لەدەم',
61 | 'لەرێ',
62 | 'لەرێگا',
63 | 'لەرەوی',
64 | 'لەسەر',
65 | 'لەلایەن',
66 | 'لەناو',
67 | 'لەنێو',
68 | 'لەو',
69 | 'لەپێناوی',
70 | 'لەژێر',
71 | 'لەگەڵ',
72 | 'من',
73 | 'ناو',
74 | 'نێوان',
75 | 'هەر',
76 | 'هەروەها',
77 | 'و',
78 | 'وەک',
79 | 'پاش',
80 | 'پێ',
81 | 'پێش',
82 | 'چەند',
83 | 'کرد',
84 | 'کە',
85 | 'ی'
86 | ]
87 | export { kur }
88 |
--------------------------------------------------------------------------------
/src/stopwords_zho.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, David Przybilla, Chris Umbel
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const zho = [
24 | '的',
25 | '地',
26 | '得',
27 | '和',
28 | '跟',
29 | '与',
30 | '及',
31 | '向',
32 | '并',
33 | '等',
34 | '更',
35 | '已',
36 | '含',
37 | '做',
38 | '我',
39 | '你',
40 | '他',
41 | '她',
42 | '们',
43 | '某',
44 | '该',
45 | '各',
46 | '每',
47 | '这',
48 | '那',
49 | '哪',
50 | '什',
51 | '么',
52 | '谁',
53 | '年',
54 | '月',
55 | '日',
56 | '时',
57 | '分',
58 | '秒',
59 | '几',
60 | '多',
61 | '来',
62 | '在',
63 | '就',
64 | '又',
65 | '很',
66 | '呢',
67 | '吧',
68 | '吗',
69 | '了',
70 | '嘛',
71 | '哇',
72 | '儿',
73 | '哼',
74 | '啊',
75 | '嗯',
76 | '是',
77 | '着',
78 | '都',
79 | '不',
80 | '说',
81 | '也',
82 | '看',
83 | '把',
84 | '还',
85 | '个',
86 | '有',
87 | '小',
88 | '到',
89 | '一',
90 | '为',
91 | '中',
92 | '于',
93 | '对',
94 | '会',
95 | '之',
96 | '第',
97 | '此',
98 | '或',
99 | '共',
100 | '按',
101 | '请'
102 | ]
103 | export { zho }
104 |
--------------------------------------------------------------------------------
/src/stopwords_vie.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, David Przybilla, Chris Umbel
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const vie = [
24 | 'bị',
25 | 'bởi',
26 | 'cả',
27 | 'các',
28 | 'cái',
29 | 'cần',
30 | 'càng',
31 | 'chỉ',
32 | 'chiếc',
33 | 'cho',
34 | 'chứ',
35 | 'chưa',
36 | 'chuyện',
37 | 'có',
38 | 'có thể',
39 | 'cứ',
40 | 'của',
41 | 'cùng',
42 | 'cũng',
43 | 'đã',
44 | 'đang',
45 | 'để',
46 | 'đến nỗi',
47 | 'đều',
48 | 'điều',
49 | 'do',
50 | 'đó',
51 | 'được',
52 | 'dưới',
53 | 'gì',
54 | 'khi',
55 | 'không',
56 | 'là',
57 | 'lại',
58 | 'lên',
59 | 'lúc',
60 | 'mà',
61 | 'mỗi',
62 | 'một cách',
63 | 'này',
64 | 'nên',
65 | 'nếu',
66 | 'ngay',
67 | 'nhiều',
68 | 'như',
69 | 'nhưng',
70 | 'những',
71 | 'nơi',
72 | 'nữa',
73 | 'phải',
74 | 'qua',
75 | 'ra',
76 | 'rằng',
77 | 'rất',
78 | 'rồi',
79 | 'sau',
80 | 'sẽ',
81 | 'so',
82 | 'sự',
83 | 'tại',
84 | 'theo',
85 | 'thì',
86 | 'trên',
87 | 'trước',
88 | 'từ',
89 | 'từng',
90 | 'và',
91 | 'vẫn',
92 | 'vào',
93 | 'vậy',
94 | 'vì',
95 | 'việc',
96 | 'với',
97 | 'vừa',
98 | 'vâng',
99 | 'à',
100 | 'ừ',
101 | 'từ'
102 | ]
103 | export { vie }
104 |
--------------------------------------------------------------------------------
/src/stopwords_fin.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 | Copyright (c) 2018 Espen Klem
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | */
23 |
24 | /* This list is frequency sorted. That means it can be sliced from the bottom
25 | and be less agressive in excluding stopwords */
26 |
27 | const fin = [
28 | 'ja',
29 | 'on',
30 | 'oli',
31 | 'hän',
32 | 'vuonna',
33 | 'myös',
34 | 'joka',
35 | 'se',
36 | 'sekä',
37 | 'sen',
38 | 'mutta',
39 | 'ei',
40 | 'ovat',
41 | 'hänen',
42 | 'n',
43 | 'kanssa',
44 | 'vuoden',
45 | 'jälkeen',
46 | 'että',
47 | 's',
48 | 'tai',
49 | 'jonka',
50 | 'jossa',
51 | 'mukaan',
52 | 'kun',
53 | 'muun',
54 | 'muassa',
55 | 'hänet',
56 | 'olivat',
57 | 'kuitenkin',
58 | 'noin',
59 | 'vuosina',
60 | 'aikana',
61 | 'lisäksi',
62 | 'kaksi',
63 | 'kuin',
64 | 'ollut',
65 | 'the',
66 | 'myöhemmin',
67 | 'eli',
68 | 'vain',
69 | 'teki',
70 | 'mm',
71 | 'jotka',
72 | 'ennen',
73 | 'ensimmäinen',
74 | 'a',
75 | '9',
76 | 'jo',
77 | 'kuten',
78 | 'yksi',
79 | 'ensimmäisen',
80 | 'vastaan',
81 | 'tämän',
82 | 'vuodesta',
83 | 'sitä',
84 | 'voi',
85 | 'luvun',
86 | 'luvulla',
87 | 'of',
88 | 'ole',
89 | 'kauden',
90 | 'osa',
91 | 'esimerkiksi',
92 | 'jolloin',
93 | 'yli',
94 | 'de',
95 | 'kaudella',
96 | 'eri',
97 | 'sillä',
98 | 'kolme',
99 | 'he',
100 | 'vuotta'
101 | ]
102 | export { fin }
103 |
--------------------------------------------------------------------------------
/src/stopwords_glg.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const glg = [
4 | 'a',
5 | 'alí',
6 | 'ao',
7 | 'aos',
8 | 'aquel',
9 | 'aquela',
10 | 'aquelas',
11 | 'aqueles',
12 | 'aquilo',
13 | 'aquí',
14 | 'as',
15 | 'así',
16 | 'aínda',
17 | 'ben',
18 | 'cando',
19 | 'che',
20 | 'co',
21 | 'coa',
22 | 'coas',
23 | 'comigo',
24 | 'con',
25 | 'connosco',
26 | 'contigo',
27 | 'convosco',
28 | 'cos',
29 | 'cun',
30 | 'cunha',
31 | 'cunhas',
32 | 'cuns',
33 | 'da',
34 | 'dalgunha',
35 | 'dalgunhas',
36 | 'dalgún',
37 | 'dalgúns',
38 | 'das',
39 | 'de',
40 | 'del',
41 | 'dela',
42 | 'delas',
43 | 'deles',
44 | 'desde',
45 | 'deste',
46 | 'do',
47 | 'dos',
48 | 'dun',
49 | 'dunha',
50 | 'dunhas',
51 | 'duns',
52 | 'e',
53 | 'el',
54 | 'ela',
55 | 'elas',
56 | 'eles',
57 | 'en',
58 | 'era',
59 | 'eran',
60 | 'esa',
61 | 'esas',
62 | 'ese',
63 | 'eses',
64 | 'esta',
65 | 'estaba',
66 | 'estar',
67 | 'este',
68 | 'estes',
69 | 'estiven',
70 | 'estou',
71 | 'está',
72 | 'están',
73 | 'eu',
74 | 'facer',
75 | 'foi',
76 | 'foron',
77 | 'fun',
78 | 'había',
79 | 'hai',
80 | 'iso',
81 | 'isto',
82 | 'la',
83 | 'las',
84 | 'lle',
85 | 'lles',
86 | 'lo',
87 | 'los',
88 | 'mais',
89 | 'me',
90 | 'meu',
91 | 'meus',
92 | 'min',
93 | 'miña',
94 | 'miñas',
95 | 'moi',
96 | 'na',
97 | 'nas',
98 | 'neste',
99 | 'nin',
100 | 'no',
101 | 'non',
102 | 'nos',
103 | 'nosa',
104 | 'nosas',
105 | 'noso',
106 | 'nosos',
107 | 'nun',
108 | 'nunha',
109 | 'nunhas',
110 | 'nuns',
111 | 'nós',
112 | 'o',
113 | 'os',
114 | 'ou',
115 | 'para',
116 | 'pero',
117 | 'pode',
118 | 'pois',
119 | 'pola',
120 | 'polas',
121 | 'polo',
122 | 'polos',
123 | 'por',
124 | 'que',
125 | 'se',
126 | 'senón',
127 | 'ser',
128 | 'seu',
129 | 'seus',
130 | 'sexa',
131 | 'sido',
132 | 'sobre',
133 | 'súa',
134 | 'súas',
135 | 'tamén',
136 | 'tan',
137 | 'te',
138 | 'ten',
139 | 'ter',
140 | 'teu',
141 | 'teus',
142 | 'teñen',
143 | 'teño',
144 | 'ti',
145 | 'tido',
146 | 'tiven',
147 | 'tiña',
148 | 'túa',
149 | 'túas',
150 | 'un',
151 | 'unha',
152 | 'unhas',
153 | 'uns',
154 | 'vos',
155 | 'vosa',
156 | 'vosas',
157 | 'voso',
158 | 'vosos',
159 | 'vós',
160 | 'á',
161 | 'é',
162 | 'ó',
163 | 'ós'
164 | ]
165 | export { glg }
166 |
--------------------------------------------------------------------------------
/src/stopwords_lav.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const lav = [
4 | 'aiz',
5 | 'ap',
6 | 'apakš',
7 | 'apakšpus',
8 | 'ar',
9 | 'arī',
10 | 'augšpus',
11 | 'bet',
12 | 'bez',
13 | 'bija',
14 | 'biji',
15 | 'biju',
16 | 'bijām',
17 | 'bijāt',
18 | 'būs',
19 | 'būsi',
20 | 'būsiet',
21 | 'būsim',
22 | 'būt',
23 | 'būšu',
24 | 'caur',
25 | 'diemžēl',
26 | 'diezin',
27 | 'droši',
28 | 'dēļ',
29 | 'esam',
30 | 'esat',
31 | 'esi',
32 | 'esmu',
33 | 'gan',
34 | 'gar',
35 | 'iekam',
36 | 'iekams',
37 | 'iekām',
38 | 'iekāms',
39 | 'iekš',
40 | 'iekšpus',
41 | 'ik',
42 | 'ir',
43 | 'it',
44 | 'itin',
45 | 'iz',
46 | 'ja',
47 | 'jau',
48 | 'jeb',
49 | 'jebšu',
50 | 'jel',
51 | 'jo',
52 | 'jā',
53 | 'ka',
54 | 'kamēr',
55 | 'kaut',
56 | 'kolīdz',
57 | 'kopš',
58 | 'kā',
59 | 'kļuva',
60 | 'kļuvi',
61 | 'kļuvu',
62 | 'kļuvām',
63 | 'kļuvāt',
64 | 'kļūs',
65 | 'kļūsi',
66 | 'kļūsiet',
67 | 'kļūsim',
68 | 'kļūst',
69 | 'kļūstam',
70 | 'kļūstat',
71 | 'kļūsti',
72 | 'kļūstu',
73 | 'kļūt',
74 | 'kļūšu',
75 | 'labad',
76 | 'lai',
77 | 'lejpus',
78 | 'līdz',
79 | 'līdzko',
80 | 'ne',
81 | 'nebūt',
82 | 'nedz',
83 | 'nekā',
84 | 'nevis',
85 | 'nezin',
86 | 'no',
87 | 'nu',
88 | 'nē',
89 | 'otrpus',
90 | 'pa',
91 | 'par',
92 | 'pat',
93 | 'pie',
94 | 'pirms',
95 | 'pret',
96 | 'priekš',
97 | 'pār',
98 | 'pēc',
99 | 'starp',
100 | 'tad',
101 | 'tak',
102 | 'tapi',
103 | 'taps',
104 | 'tapsi',
105 | 'tapsiet',
106 | 'tapsim',
107 | 'tapt',
108 | 'tapāt',
109 | 'tapšu',
110 | 'taču',
111 | 'te',
112 | 'tiec',
113 | 'tiek',
114 | 'tiekam',
115 | 'tiekat',
116 | 'tieku',
117 | 'tik',
118 | 'tika',
119 | 'tikai',
120 | 'tiki',
121 | 'tikko',
122 | 'tiklab',
123 | 'tiklīdz',
124 | 'tiks',
125 | 'tiksiet',
126 | 'tiksim',
127 | 'tikt',
128 | 'tiku',
129 | 'tikvien',
130 | 'tikām',
131 | 'tikāt',
132 | 'tikšu',
133 | 'tomēr',
134 | 'topat',
135 | 'turpretim',
136 | 'turpretī',
137 | 'tā',
138 | 'tādēļ',
139 | 'tālab',
140 | 'tāpēc',
141 | 'un',
142 | 'uz',
143 | 'vai',
144 | 'var',
145 | 'varat',
146 | 'varēja',
147 | 'varēji',
148 | 'varēju',
149 | 'varējām',
150 | 'varējāt',
151 | 'varēs',
152 | 'varēsi',
153 | 'varēsiet',
154 | 'varēsim',
155 | 'varēt',
156 | 'varēšu',
157 | 'vien',
158 | 'virs',
159 | 'virspus',
160 | 'vis',
161 | 'viņpus',
162 | 'zem',
163 | 'ārpus',
164 | 'šaipus'
165 | ]
166 | export { lav }
167 |
--------------------------------------------------------------------------------
/src/stopwords_epo.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const epo = [
4 | 'adiaŭ',
5 | 'ajn',
6 | 'al',
7 | 'ankoraŭ',
8 | 'antaŭ',
9 | 'aŭ',
10 | 'bonan',
11 | 'bonvole',
12 | 'bonvolu',
13 | 'bv',
14 | 'ci',
15 | 'cia',
16 | 'cian',
17 | 'cin',
18 | 'd-ro',
19 | 'da',
20 | 'de',
21 | 'dek',
22 | 'deka',
23 | 'do',
24 | 'doktor\'',
25 | 'doktoro',
26 | 'du',
27 | 'dua',
28 | 'dum',
29 | 'eble',
30 | 'ekz',
31 | 'ekzemple',
32 | 'en',
33 | 'estas',
34 | 'estis',
35 | 'estos',
36 | 'estu',
37 | 'estus',
38 | 'eĉ',
39 | 'f-no',
40 | 'feliĉan',
41 | 'for',
42 | 'fraŭlino',
43 | 'ha',
44 | 'havas',
45 | 'havis',
46 | 'havos',
47 | 'havu',
48 | 'havus',
49 | 'he',
50 | 'ho',
51 | 'hu',
52 | 'ili',
53 | 'ilia',
54 | 'ilian',
55 | 'ilin',
56 | 'inter',
57 | 'io',
58 | 'ion',
59 | 'iu',
60 | 'iujn',
61 | 'iun',
62 | 'ja',
63 | 'jam',
64 | 'je',
65 | 'jes',
66 | 'k',
67 | 'kaj',
68 | 'ke',
69 | 'kio',
70 | 'kion',
71 | 'kiu',
72 | 'kiujn',
73 | 'kiun',
74 | 'kvankam',
75 | 'kvar',
76 | 'kvara',
77 | 'kvazaŭ',
78 | 'kvin',
79 | 'kvina',
80 | 'la',
81 | 'li',
82 | 'lia',
83 | 'lian',
84 | 'lin',
85 | 'malantaŭ',
86 | 'male',
87 | 'malgraŭ',
88 | 'mem',
89 | 'mi',
90 | 'mia',
91 | 'mian',
92 | 'min',
93 | 'minus',
94 | 'naŭ',
95 | 'naŭa',
96 | 'ne',
97 | 'nek',
98 | 'nenio',
99 | 'nenion',
100 | 'neniu',
101 | 'neniun',
102 | 'nepre',
103 | 'ni',
104 | 'nia',
105 | 'nian',
106 | 'nin',
107 | 'nu',
108 | 'nun',
109 | 'nur',
110 | 'ok',
111 | 'oka',
112 | 'oni',
113 | 'onia',
114 | 'onian',
115 | 'onin',
116 | 'plej',
117 | 'pli',
118 | 'plu',
119 | 'plus',
120 | 'por',
121 | 'post',
122 | 'preter',
123 | 's-no',
124 | 's-ro',
125 | 'se',
126 | 'sed',
127 | 'sep',
128 | 'sepa',
129 | 'ses',
130 | 'sesa',
131 | 'si',
132 | 'sia',
133 | 'sian',
134 | 'sin',
135 | 'sinjor\'',
136 | 'sinjorino',
137 | 'sinjoro',
138 | 'sub',
139 | 'super',
140 | 'supren',
141 | 'sur',
142 | 'tamen',
143 | 'tio',
144 | 'tion',
145 | 'tiu',
146 | 'tiujn',
147 | 'tiun',
148 | 'tra',
149 | 'tri',
150 | 'tria',
151 | 'tuj',
152 | 'tute',
153 | 'unu',
154 | 'unua',
155 | 've',
156 | 'verŝajne',
157 | 'vi',
158 | 'via',
159 | 'vian',
160 | 'vin',
161 | 'ĉi',
162 | 'ĉio',
163 | 'ĉion',
164 | 'ĉiu',
165 | 'ĉiujn',
166 | 'ĉiun',
167 | 'ĉu',
168 | 'ĝi',
169 | 'ĝia',
170 | 'ĝian',
171 | 'ĝin',
172 | 'ĝis',
173 | 'ĵus',
174 | 'ŝi',
175 | 'ŝia',
176 | 'ŝin'
177 | ]
178 | export { epo }
179 |
--------------------------------------------------------------------------------
/src/stopwords_por.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, Luís Rodrigues
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const por = [
24 | 'a',
25 | 'à',
26 | 'ao',
27 | 'aos',
28 | 'aquela',
29 | 'aquelas',
30 | 'aquele',
31 | 'aqueles',
32 | 'aquilo',
33 | 'as',
34 | 'às',
35 | 'até',
36 | 'com',
37 | 'como',
38 | 'da',
39 | 'das',
40 | 'de',
41 | 'dela',
42 | 'delas',
43 | 'dele',
44 | 'deles',
45 | 'depois',
46 | 'do',
47 | 'dos',
48 | 'e',
49 | 'ela',
50 | 'elas',
51 | 'ele',
52 | 'eles',
53 | 'em',
54 | 'entre',
55 | 'essa',
56 | 'essas',
57 | 'esse',
58 | 'esses',
59 | 'esta',
60 | 'estas',
61 | 'este',
62 | 'estes',
63 | 'eu',
64 | 'isso',
65 | 'isto',
66 | 'já',
67 | 'lhe',
68 | 'lhes',
69 | 'mais',
70 | 'mas',
71 | 'me',
72 | 'mesmo',
73 | 'meu',
74 | 'meus',
75 | 'minha',
76 | 'minhas',
77 | 'muito',
78 | 'muitos',
79 | 'na',
80 | 'não',
81 | 'nas',
82 | 'nem',
83 | 'no',
84 | 'nos',
85 | 'nós',
86 | 'nossa',
87 | 'nossas',
88 | 'nosso',
89 | 'nossos',
90 | 'num',
91 | 'nuns',
92 | 'numa',
93 | 'numas',
94 | 'o',
95 | 'os',
96 | 'ou',
97 | 'para',
98 | 'pela',
99 | 'pelas',
100 | 'pelo',
101 | 'pelos',
102 | 'por',
103 | 'quais',
104 | 'qual',
105 | 'quando',
106 | 'que',
107 | 'quem',
108 | 'se',
109 | 'sem',
110 | 'seu',
111 | 'seus',
112 | 'só',
113 | 'sua',
114 | 'suas',
115 | 'também',
116 | 'te',
117 | 'teu',
118 | 'teus',
119 | 'tu',
120 | 'tua',
121 | 'tuas',
122 | 'um',
123 | 'uma',
124 | 'umas',
125 | 'você',
126 | 'vocês',
127 | 'vos',
128 | 'vosso',
129 | 'vossos'
130 | ]
131 | export { por }
132 |
--------------------------------------------------------------------------------
/src/stopwords_eng.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, Chris Umbel
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const eng = [
24 | 'about',
25 | 'after',
26 | 'all',
27 | 'also',
28 | 'am',
29 | 'an',
30 | 'and',
31 | 'another',
32 | 'any',
33 | 'are',
34 | 'as',
35 | 'at',
36 | 'be',
37 | 'because',
38 | 'been',
39 | 'before',
40 | 'being',
41 | 'between',
42 | 'both',
43 | 'but',
44 | 'by',
45 | 'came',
46 | 'can',
47 | 'come',
48 | 'could',
49 | 'did',
50 | 'do',
51 | 'each',
52 | 'for',
53 | 'from',
54 | 'get',
55 | 'got',
56 | 'has',
57 | 'had',
58 | 'he',
59 | 'have',
60 | 'her',
61 | 'here',
62 | 'him',
63 | 'himself',
64 | 'his',
65 | 'how',
66 | 'if',
67 | 'in',
68 | 'into',
69 | 'is',
70 | 'it',
71 | 'like',
72 | 'make',
73 | 'many',
74 | 'me',
75 | 'might',
76 | 'more',
77 | 'most',
78 | 'much',
79 | 'must',
80 | 'my',
81 | 'never',
82 | 'now',
83 | 'of',
84 | 'on',
85 | 'only',
86 | 'or',
87 | 'other',
88 | 'our',
89 | 'out',
90 | 'over',
91 | 'said',
92 | 'same',
93 | 'should',
94 | 'since',
95 | 'some',
96 | 'still',
97 | 'such',
98 | 'take',
99 | 'than',
100 | 'that',
101 | 'the',
102 | 'their',
103 | 'them',
104 | 'then',
105 | 'there',
106 | 'these',
107 | 'they',
108 | 'this',
109 | 'those',
110 | 'through',
111 | 'to',
112 | 'too',
113 | 'under',
114 | 'up',
115 | 'very',
116 | 'was',
117 | 'way',
118 | 'we',
119 | 'well',
120 | 'were',
121 | 'what',
122 | 'where',
123 | 'which',
124 | 'while',
125 | 'who',
126 | 'with',
127 | 'would',
128 | 'you',
129 | 'your',
130 | 'a',
131 | 'i'
132 | ]
133 | export { eng }
134 |
--------------------------------------------------------------------------------
/src/stopwords_hrv.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const hrv = [
4 | 'a',
5 | 'ako',
6 | 'ali',
7 | 'bi',
8 | 'bih',
9 | 'bila',
10 | 'bili',
11 | 'bilo',
12 | 'bio',
13 | 'bismo',
14 | 'biste',
15 | 'biti',
16 | 'bumo',
17 | 'da',
18 | 'do',
19 | 'duž',
20 | 'ga',
21 | 'hoće',
22 | 'hoćemo',
23 | 'hoćete',
24 | 'hoćeš',
25 | 'hoću',
26 | 'i',
27 | 'iako',
28 | 'ih',
29 | 'ili',
30 | 'iz',
31 | 'ja',
32 | 'je',
33 | 'jedna',
34 | 'jedne',
35 | 'jedno',
36 | 'jer',
37 | 'jesam',
38 | 'jesi',
39 | 'jesmo',
40 | 'jest',
41 | 'jeste',
42 | 'jesu',
43 | 'jim',
44 | 'joj',
45 | 'još',
46 | 'ju',
47 | 'kada',
48 | 'kako',
49 | 'kao',
50 | 'koja',
51 | 'koje',
52 | 'koji',
53 | 'kojima',
54 | 'koju',
55 | 'kroz',
56 | 'li',
57 | 'me',
58 | 'mene',
59 | 'meni',
60 | 'mi',
61 | 'mimo',
62 | 'moj',
63 | 'moja',
64 | 'moje',
65 | 'mu',
66 | 'na',
67 | 'nad',
68 | 'nakon',
69 | 'nam',
70 | 'nama',
71 | 'nas',
72 | 'naš',
73 | 'naša',
74 | 'naše',
75 | 'našeg',
76 | 'ne',
77 | 'nego',
78 | 'neka',
79 | 'neki',
80 | 'nekog',
81 | 'neku',
82 | 'nema',
83 | 'netko',
84 | 'neće',
85 | 'nećemo',
86 | 'nećete',
87 | 'nećeš',
88 | 'neću',
89 | 'nešto',
90 | 'ni',
91 | 'nije',
92 | 'nikoga',
93 | 'nikoje',
94 | 'nikoju',
95 | 'nisam',
96 | 'nisi',
97 | 'nismo',
98 | 'niste',
99 | 'nisu',
100 | 'njega',
101 | 'njegov',
102 | 'njegova',
103 | 'njegovo',
104 | 'njemu',
105 | 'njezin',
106 | 'njezina',
107 | 'njezino',
108 | 'njih',
109 | 'njihov',
110 | 'njihova',
111 | 'njihovo',
112 | 'njim',
113 | 'njima',
114 | 'njoj',
115 | 'nju',
116 | 'no',
117 | 'o',
118 | 'od',
119 | 'odmah',
120 | 'on',
121 | 'ona',
122 | 'oni',
123 | 'ono',
124 | 'ova',
125 | 'pa',
126 | 'pak',
127 | 'po',
128 | 'pod',
129 | 'pored',
130 | 'prije',
131 | 's',
132 | 'sa',
133 | 'sam',
134 | 'samo',
135 | 'se',
136 | 'sebe',
137 | 'sebi',
138 | 'si',
139 | 'smo',
140 | 'ste',
141 | 'su',
142 | 'sve',
143 | 'svi',
144 | 'svog',
145 | 'svoj',
146 | 'svoja',
147 | 'svoje',
148 | 'svom',
149 | 'ta',
150 | 'tada',
151 | 'taj',
152 | 'tako',
153 | 'te',
154 | 'tebe',
155 | 'tebi',
156 | 'ti',
157 | 'to',
158 | 'toj',
159 | 'tome',
160 | 'tu',
161 | 'tvoj',
162 | 'tvoja',
163 | 'tvoje',
164 | 'u',
165 | 'uz',
166 | 'vam',
167 | 'vama',
168 | 'vas',
169 | 'vaš',
170 | 'vaša',
171 | 'vaše',
172 | 'već',
173 | 'vi',
174 | 'vrlo',
175 | 'za',
176 | 'zar',
177 | 'će',
178 | 'ćemo',
179 | 'ćete',
180 | 'ćeš',
181 | 'ću',
182 | 'što'
183 | ]
184 | export { hrv }
185 |
--------------------------------------------------------------------------------
/src/stopwords_nld.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, Chris Umbel, Martijn de Boer, Damien van Holten
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | // This dutch wordlist has been parsed from a list created by Damien van Holten
24 | // source: http://www.damienvanholten.com/blog/dutch-stop-words/
25 |
26 | const nld = [
27 | 'aan',
28 | 'af',
29 | 'al',
30 | 'alles',
31 | 'als',
32 | 'altijd',
33 | 'andere',
34 | 'ben',
35 | 'bij',
36 | 'daar',
37 | 'dan',
38 | 'dat',
39 | 'de',
40 | 'der',
41 | 'deze',
42 | 'die',
43 | 'dit',
44 | 'doch',
45 | 'doen',
46 | 'door',
47 | 'dus',
48 | 'een',
49 | 'eens',
50 | 'en',
51 | 'er',
52 | 'ge',
53 | 'geen',
54 | 'geweest',
55 | 'haar',
56 | 'had',
57 | 'heb',
58 | 'hebben',
59 | 'heeft',
60 | 'hem',
61 | 'het',
62 | 'hier',
63 | 'hij',
64 | 'hoe',
65 | 'hun',
66 | 'iemand',
67 | 'iets',
68 | 'ik',
69 | 'in',
70 | 'is',
71 | 'ja',
72 | 'je',
73 | 'kan',
74 | 'kon',
75 | 'kunnen',
76 | 'maar',
77 | 'me',
78 | 'meer',
79 | 'men',
80 | 'met',
81 | 'mij',
82 | 'mijn',
83 | 'moet',
84 | 'na',
85 | 'naar',
86 | 'niet',
87 | 'niets',
88 | 'nog',
89 | 'nu',
90 | 'of',
91 | 'om',
92 | 'omdat',
93 | 'ons',
94 | 'ook',
95 | 'op',
96 | 'over',
97 | 'reeds',
98 | 'te',
99 | 'tegen',
100 | 'toch',
101 | 'toen',
102 | 'tot',
103 | 'u',
104 | 'uit',
105 | 'uw',
106 | 'van',
107 | 'veel',
108 | 'voor',
109 | 'want',
110 | 'waren',
111 | 'was',
112 | 'wat',
113 | 'we',
114 | 'wel',
115 | 'werd',
116 | 'wezen',
117 | 'wie',
118 | 'wij',
119 | 'wil',
120 | 'worden',
121 | 'zal',
122 | 'ze',
123 | 'zei',
124 | 'zelf',
125 | 'zich',
126 | 'zij',
127 | 'zijn',
128 | 'zo',
129 | 'zonder',
130 | 'zou'
131 | ]
132 | export { nld }
133 |
--------------------------------------------------------------------------------
/src/stopwords_nob.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2014, Kristoffer Brabrand
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const nob = [
24 | 'og',
25 | 'i',
26 | 'jeg',
27 | 'det',
28 | 'at',
29 | 'en',
30 | 'et',
31 | 'den',
32 | 'til',
33 | 'er',
34 | 'som',
35 | 'på',
36 | 'de',
37 | 'med',
38 | 'han',
39 | 'av',
40 | 'ikke',
41 | 'der',
42 | 'så',
43 | 'var',
44 | 'meg',
45 | 'seg',
46 | 'men',
47 | 'ett',
48 | 'har',
49 | 'om',
50 | 'vi',
51 | 'min',
52 | 'mitt',
53 | 'ha',
54 | 'hadde',
55 | 'hun',
56 | 'nå',
57 | 'over',
58 | 'da',
59 | 'ved',
60 | 'fra',
61 | 'du',
62 | 'ut',
63 | 'sin',
64 | 'dem',
65 | 'oss',
66 | 'opp',
67 | 'man',
68 | 'kan',
69 | 'hans',
70 | 'hvor',
71 | 'eller',
72 | 'hva',
73 | 'skal',
74 | 'selv',
75 | 'sjøl',
76 | 'her',
77 | 'alle',
78 | 'vil',
79 | 'bli',
80 | 'ble',
81 | 'blitt',
82 | 'kunne',
83 | 'inn',
84 | 'når',
85 | 'kom',
86 | 'noen',
87 | 'noe',
88 | 'ville',
89 | 'dere',
90 | 'som',
91 | 'deres',
92 | 'kun',
93 | 'ja',
94 | 'etter',
95 | 'ned',
96 | 'skulle',
97 | 'denne',
98 | 'for',
99 | 'deg',
100 | 'si',
101 | 'sine',
102 | 'sitt',
103 | 'mot',
104 | 'å',
105 | 'meget',
106 | 'hvorfor',
107 | 'dette',
108 | 'disse',
109 | 'uten',
110 | 'hvordan',
111 | 'ingen',
112 | 'din',
113 | 'ditt',
114 | 'blir',
115 | 'samme',
116 | 'hvilken',
117 | 'hvilke',
118 | 'sånn',
119 | 'inni',
120 | 'mellom',
121 | 'vår',
122 | 'hver',
123 | 'hvem',
124 | 'vors',
125 | 'hvis',
126 | 'både',
127 | 'bare',
128 | 'enn',
129 | 'fordi',
130 | 'før',
131 | 'mange',
132 | 'også',
133 | 'slik',
134 | 'vært',
135 | 'være',
136 | 'begge',
137 | 'siden',
138 | 'henne',
139 | 'hennar',
140 | 'hennes'
141 | ]
142 | export { nob }
143 |
--------------------------------------------------------------------------------
/src/stopwords_heb.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 | Guy Saar - Stop words list
4 | */
5 | const heb = [
6 | 'אבל',
7 | 'או',
8 | 'אולי',
9 | 'אותה',
10 | 'אותו',
11 | 'אותי',
12 | 'אותך',
13 | 'אותם',
14 | 'אותן',
15 | 'אותנו',
16 | 'אז',
17 | 'אחר',
18 | 'אחרות',
19 | 'אחרי',
20 | 'אחריכן',
21 | 'אחרים',
22 | 'אחרת',
23 | 'אי',
24 | 'איזה',
25 | 'איך',
26 | 'אין',
27 | 'איפה',
28 | 'איתה',
29 | 'איתו',
30 | 'איתי',
31 | 'איתך',
32 | 'איתכם',
33 | 'איתכן',
34 | 'איתם',
35 | 'איתן',
36 | 'איתנו',
37 | 'אך',
38 | 'אל',
39 | 'אלה',
40 | 'אלו',
41 | 'אם',
42 | 'אנחנו',
43 | 'אני',
44 | 'אס',
45 | 'אף',
46 | 'אצל',
47 | 'אשר',
48 | 'את',
49 | 'אתה',
50 | 'אתכם',
51 | 'אתכן',
52 | 'אתם',
53 | 'אתן',
54 | 'באיזומידה',
55 | 'באמצע',
56 | 'באמצעות',
57 | 'בגלל',
58 | 'בין',
59 | 'בלי',
60 | 'במידה',
61 | 'במקוםשבו',
62 | 'ברם',
63 | 'בשביל',
64 | 'בשעהש',
65 | 'בתוך',
66 | 'גם',
67 | 'דרך',
68 | 'הוא',
69 | 'היא',
70 | 'היה',
71 | 'היכן',
72 | 'היתה',
73 | 'היתי',
74 | 'הם',
75 | 'הן',
76 | 'הנה',
77 | 'הסיבהשבגללה',
78 | 'הרי',
79 | 'ואילו',
80 | 'ואת',
81 | 'זאת',
82 | 'זה',
83 | 'זות',
84 | 'יהיה',
85 | 'יוכל',
86 | 'יוכלו',
87 | 'יותרמדי',
88 | 'יכול',
89 | 'יכולה',
90 | 'יכולות',
91 | 'יכולים',
92 | 'יכל',
93 | 'יכלה',
94 | 'יכלו',
95 | 'יש',
96 | 'כאן',
97 | 'כאשר',
98 | 'כולם',
99 | 'כולן',
100 | 'כזה',
101 | 'כי',
102 | 'כיצד',
103 | 'כך',
104 | 'ככה',
105 | 'כל',
106 | 'כלל',
107 | 'כמו',
108 | 'כן',
109 | 'כפי',
110 | 'כש',
111 | 'לא',
112 | 'לאו',
113 | 'לאיזותכלית',
114 | 'לאן',
115 | 'לבין',
116 | 'לה',
117 | 'להיות',
118 | 'להם',
119 | 'להן',
120 | 'לו',
121 | 'לי',
122 | 'לכם',
123 | 'לכן',
124 | 'למה',
125 | 'למטה',
126 | 'למעלה',
127 | 'למקוםשבו',
128 | 'למרות',
129 | 'לנו',
130 | 'לעבר',
131 | 'לעיכן',
132 | 'לפיכך',
133 | 'לפני',
134 | 'מאד',
135 | 'מאחורי',
136 | 'מאיזוסיבה',
137 | 'מאין',
138 | 'מאיפה',
139 | 'מבלי',
140 | 'מבעד',
141 | 'מדוע',
142 | 'מה',
143 | 'מהיכן',
144 | 'מול',
145 | 'מחוץ',
146 | 'מי',
147 | 'מכאן',
148 | 'מכיוון',
149 | 'מלבד',
150 | 'מן',
151 | 'מנין',
152 | 'מסוגל',
153 | 'מעט',
154 | 'מעטים',
155 | 'מעל',
156 | 'מצד',
157 | 'מקוםבו',
158 | 'מתחת',
159 | 'מתי',
160 | 'נגד',
161 | 'נגר',
162 | 'נו',
163 | 'עד',
164 | 'עז',
165 | 'על',
166 | 'עלי',
167 | 'עליה',
168 | 'עליהם',
169 | 'עליהן',
170 | 'עליו',
171 | 'עליך',
172 | 'עליכם',
173 | 'עלינו',
174 | 'עם',
175 | 'עצמה',
176 | 'עצמהם',
177 | 'עצמהן',
178 | 'עצמו',
179 | 'עצמי',
180 | 'עצמם',
181 | 'עצמן',
182 | 'עצמנו',
183 | 'פה',
184 | 'רק',
185 | 'שוב',
186 | 'של',
187 | 'שלה',
188 | 'שלהם',
189 | 'שלהן',
190 | 'שלו',
191 | 'שלי',
192 | 'שלך',
193 | 'שלכה',
194 | 'שלכם',
195 | 'שלכן',
196 | 'שלנו',
197 | 'שם',
198 | 'תהיה',
199 | 'תחת'
200 | ]
201 | export { heb }
202 |
--------------------------------------------------------------------------------
/src/stopwords_swa.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2016 Liam Doherty
3 |
4 | Licensed under the Apache License, Version 2.0 (the "License");
5 | you may not use this file except in compliance with the License.
6 | You may obtain a copy of the License at
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 |
17 | // This list is frequency sorted. That means it can be sliced from the bottom
18 | // and be less agressive in excluding stopwords */
19 |
20 | const swa = [
21 | 'na',
22 | 'ya',
23 | 'wa',
24 | 'kwa',
25 | 'ni',
26 | 'za',
27 | 'katika',
28 | 'la',
29 | 'kuwa',
30 | 'kama',
31 | 'kwamba',
32 | 'cha',
33 | 'hiyo',
34 | 'lakini',
35 | 'yake',
36 | 'hata',
37 | 'wakati',
38 | 'hivyo',
39 | 'sasa',
40 | 'wake',
41 | 'au',
42 | 'watu',
43 | 'hii',
44 | 'zaidi',
45 | 'vya',
46 | 'huo',
47 | 'tu',
48 | 'kwenye',
49 | 'si',
50 | 'pia',
51 | 'ili',
52 | 'moja',
53 | 'kila',
54 | 'baada',
55 | 'ambao',
56 | 'ambayo',
57 | 'yao',
58 | 'wao',
59 | 'kuna',
60 | 'hilo',
61 | 'kutoka',
62 | 'kubwa',
63 | 'pamoja',
64 | 'bila',
65 | 'huu',
66 | 'hayo',
67 | 'sana',
68 | 'ndani',
69 | 'mkuu',
70 | 'hizo',
71 | 'kufanya',
72 | 'wengi',
73 | 'hadi',
74 | 'mmoja',
75 | 'hili',
76 | 'juu',
77 | 'kwanza',
78 | 'wetu',
79 | 'kuhusu',
80 | 'baadhi',
81 | 'wote',
82 | 'yetu',
83 | 'hivi',
84 | 'kweli',
85 | 'mara',
86 | 'wengine',
87 | 'nini',
88 | 'ndiyo',
89 | 'zao',
90 | 'kati',
91 | 'hao',
92 | 'hapa',
93 | 'kutokana',
94 | 'muda',
95 | 'habari',
96 | 'ambaye',
97 | 'wenye',
98 | 'nyingine',
99 | 'hakuna',
100 | 'tena',
101 | 'hatua',
102 | 'bado',
103 | 'nafasi',
104 | 'basi',
105 | 'kabisa',
106 | 'hicho',
107 | 'nje',
108 | 'huyo',
109 | 'vile',
110 | 'yote',
111 | 'mkubwa',
112 | 'alikuwa',
113 | 'zote',
114 | 'leo',
115 | 'haya',
116 | 'huko',
117 | 'kutoa',
118 | 'mwa',
119 | 'kiasi',
120 | 'hasa',
121 | 'nyingi',
122 | 'kabla',
123 | 'wale',
124 | 'chini',
125 | 'gani',
126 | 'hapo',
127 | 'lazima',
128 | 'mwingine',
129 | 'bali',
130 | 'huku',
131 | 'zake',
132 | 'ilikuwa',
133 | 'tofauti',
134 | 'kupata',
135 | 'mbalimbali',
136 | 'pale',
137 | 'kusema',
138 | 'badala',
139 | 'wazi',
140 | 'yeye',
141 | 'alisema',
142 | 'hawa',
143 | 'ndio',
144 | 'hizi',
145 | 'tayari',
146 | 'wala',
147 | 'muhimu',
148 | 'ile',
149 | 'mpya',
150 | 'ambazo',
151 | 'dhidi',
152 | 'kwenda',
153 | 'sisi',
154 | 'kwani',
155 | 'jinsi',
156 | 'binafsi',
157 | 'kutumia',
158 | 'mbili',
159 | 'mbali',
160 | 'kuu',
161 | 'mengine',
162 | 'mbele',
163 | 'namna',
164 | 'mengi',
165 | 'upande'
166 | ]
167 | export { swa }
168 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at espen.klem@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/src/stopwords_rus.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, Polyakov Vladimir, Chris Umbel
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const rus = [
24 | 'и',
25 | 'в',
26 | 'во',
27 | 'не',
28 | 'что',
29 | 'он',
30 | 'на',
31 | 'я',
32 | 'с',
33 | 'со',
34 | 'как',
35 | 'а',
36 | 'то',
37 | 'все',
38 | 'она',
39 | 'так',
40 | 'его',
41 | 'но',
42 | 'да',
43 | 'ты',
44 | 'к',
45 | 'у',
46 | 'же',
47 | 'вы',
48 | 'за',
49 | 'бы',
50 | 'по',
51 | 'только',
52 | 'ее',
53 | 'мне',
54 | 'было',
55 | 'вот',
56 | 'от',
57 | 'меня',
58 | 'еще',
59 | 'нет',
60 | 'о',
61 | 'из',
62 | 'ему',
63 | 'теперь',
64 | 'когда',
65 | 'даже',
66 | 'ну',
67 | 'ли',
68 | 'если',
69 | 'уже',
70 | 'или',
71 | 'ни',
72 | 'быть',
73 | 'был',
74 | 'него',
75 | 'до',
76 | 'вас',
77 | 'нибудь',
78 | 'уж',
79 | 'вам',
80 | 'сказал',
81 | 'ведь',
82 | 'там',
83 | 'потом',
84 | 'себя',
85 | 'ничего',
86 | 'ей',
87 | 'может',
88 | 'они',
89 | 'тут',
90 | 'где',
91 | 'есть',
92 | 'надо',
93 | 'ней',
94 | 'для',
95 | 'мы',
96 | 'тебя',
97 | 'их',
98 | 'чем',
99 | 'была',
100 | 'сам',
101 | 'чтоб',
102 | 'без',
103 | 'будто',
104 | 'чего',
105 | 'раз',
106 | 'тоже',
107 | 'себе',
108 | 'под',
109 | 'будет',
110 | 'ж',
111 | 'тогда',
112 | 'кто',
113 | 'этот',
114 | 'того',
115 | 'потому',
116 | 'этого',
117 | 'какой',
118 | 'совсем',
119 | 'ним',
120 | 'этом',
121 | 'почти',
122 | 'мой',
123 | 'тем',
124 | 'чтобы',
125 | 'нее',
126 | 'были',
127 | 'куда',
128 | 'всех',
129 | 'никогда',
130 | 'сегодня',
131 | 'можно',
132 | 'при',
133 | 'об',
134 | 'другой',
135 | 'хоть',
136 | 'после',
137 | 'над',
138 | 'больше',
139 | 'тот',
140 | 'через',
141 | 'эти',
142 | 'нас',
143 | 'про',
144 | 'всего',
145 | 'них',
146 | 'какая',
147 | 'много',
148 | 'разве',
149 | 'эту',
150 | 'моя',
151 | 'свою',
152 | 'этой',
153 | 'перед',
154 | 'иногда',
155 | 'лучше',
156 | 'чуть',
157 | 'том',
158 | 'нельзя',
159 | 'такой',
160 | 'им',
161 | 'более',
162 | 'всегда',
163 | 'конечно',
164 | 'всю',
165 | 'между',
166 | 'это',
167 | 'лишь'
168 | ]
169 | export { rus }
170 |
--------------------------------------------------------------------------------
/src/stopwords_cat.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const cat = [
4 | 'a',
5 | 'abans',
6 | 'ací',
7 | 'ah',
8 | 'així',
9 | 'això',
10 | 'al',
11 | 'aleshores',
12 | 'algun',
13 | 'alguna',
14 | 'algunes',
15 | 'alguns',
16 | 'alhora',
17 | 'allà',
18 | 'allí',
19 | 'allò',
20 | 'als',
21 | 'altra',
22 | 'altre',
23 | 'altres',
24 | 'amb',
25 | 'ambdues',
26 | 'ambdós',
27 | 'apa',
28 | 'aquell',
29 | 'aquella',
30 | 'aquelles',
31 | 'aquells',
32 | 'aquest',
33 | 'aquesta',
34 | 'aquestes',
35 | 'aquests',
36 | 'aquí',
37 | 'baix',
38 | 'cada',
39 | 'cadascuna',
40 | 'cadascunes',
41 | 'cadascuns',
42 | 'cadascú',
43 | 'com',
44 | 'contra',
45 | 'd\'un',
46 | 'd\'una',
47 | 'd\'unes',
48 | 'd\'uns',
49 | 'dalt',
50 | 'de',
51 | 'del',
52 | 'dels',
53 | 'des',
54 | 'després',
55 | 'dins',
56 | 'dintre',
57 | 'donat',
58 | 'doncs',
59 | 'durant',
60 | 'e',
61 | 'eh',
62 | 'el',
63 | 'els',
64 | 'em',
65 | 'en',
66 | 'encara',
67 | 'ens',
68 | 'entre',
69 | 'eren',
70 | 'es',
71 | 'esta',
72 | 'estaven',
73 | 'esteu',
74 | 'està',
75 | 'estàvem',
76 | 'estàveu',
77 | 'et',
78 | 'etc',
79 | 'ets',
80 | 'fins',
81 | 'fora',
82 | 'gairebé',
83 | 'ha',
84 | 'han',
85 | 'has',
86 | 'havia',
87 | 'he',
88 | 'hem',
89 | 'heu',
90 | 'hi',
91 | 'ho',
92 | 'i',
93 | 'igual',
94 | 'iguals',
95 | 'ja',
96 | 'l\'hi',
97 | 'la',
98 | 'les',
99 | 'li',
100 | 'li\'n',
101 | 'llavors',
102 | 'm\'he',
103 | 'ma',
104 | 'mal',
105 | 'malgrat',
106 | 'mateix',
107 | 'mateixa',
108 | 'mateixes',
109 | 'mateixos',
110 | 'me',
111 | 'mentre',
112 | 'meu',
113 | 'meus',
114 | 'meva',
115 | 'meves',
116 | 'molt',
117 | 'molta',
118 | 'moltes',
119 | 'molts',
120 | 'mon',
121 | 'mons',
122 | 'més',
123 | 'n\'he',
124 | 'n\'hi',
125 | 'ne',
126 | 'ni',
127 | 'no',
128 | 'nogensmenys',
129 | 'només',
130 | 'nosaltres',
131 | 'nostra',
132 | 'nostre',
133 | 'nostres',
134 | 'o',
135 | 'oh',
136 | 'oi',
137 | 'on',
138 | 'pas',
139 | 'pel',
140 | 'pels',
141 | 'per',
142 | 'perquè',
143 | 'però',
144 | 'poc',
145 | 'poca',
146 | 'pocs',
147 | 'poques',
148 | 'potser',
149 | 'propi',
150 | 'qual',
151 | 'quals',
152 | 'quan',
153 | 'quant',
154 | 'que',
155 | 'quelcom',
156 | 'qui',
157 | 'quin',
158 | 'quina',
159 | 'quines',
160 | 'quins',
161 | 'què',
162 | 's\'ha',
163 | 's\'han',
164 | 'sa',
165 | 'semblant',
166 | 'semblants',
167 | 'ses',
168 | 'seu',
169 | 'seus',
170 | 'seva',
171 | 'seves',
172 | 'si',
173 | 'sobre',
174 | 'sobretot',
175 | 'solament',
176 | 'sols',
177 | 'son',
178 | 'sons',
179 | 'sota',
180 | 'sou',
181 | 'sóc',
182 | 'són',
183 | 't\'ha',
184 | 't\'han',
185 | 't\'he',
186 | 'ta',
187 | 'tal',
188 | 'també',
189 | 'tampoc',
190 | 'tan',
191 | 'tant',
192 | 'tanta',
193 | 'tantes',
194 | 'teu',
195 | 'teus',
196 | 'teva',
197 | 'teves',
198 | 'ton',
199 | 'tons',
200 | 'tot',
201 | 'tota',
202 | 'totes',
203 | 'tots',
204 | 'un',
205 | 'una',
206 | 'unes',
207 | 'uns',
208 | 'us',
209 | 'va',
210 | 'vaig',
211 | 'vam',
212 | 'van',
213 | 'vas',
214 | 'veu',
215 | 'vosaltres',
216 | 'vostra',
217 | 'vostre',
218 | 'vostres',
219 | 'érem',
220 | 'éreu',
221 | 'és'
222 | ]
223 | export { cat }
224 |
--------------------------------------------------------------------------------
/src/stopwords_tgl.js:
--------------------------------------------------------------------------------
1 | /* The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Gene Diaz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. */
22 |
23 | const tgl = [
24 | 'akin',
25 | 'aking',
26 | 'ako',
27 | 'alin',
28 | 'am',
29 | 'amin',
30 | 'aming',
31 | 'ang',
32 | 'ano',
33 | 'anumang',
34 | 'apat',
35 | 'at',
36 | 'atin',
37 | 'ating',
38 | 'ay',
39 | 'bababa',
40 | 'bago',
41 | 'bakit',
42 | 'bawat',
43 | 'bilang',
44 | 'dahil',
45 | 'dalawa',
46 | 'dapat',
47 | 'din',
48 | 'dito',
49 | 'doon',
50 | 'gagawin',
51 | 'gayunman',
52 | 'ginagawa',
53 | 'ginawa',
54 | 'ginawang',
55 | 'gumawa',
56 | 'gusto',
57 | 'habang',
58 | 'hanggang',
59 | 'hindi',
60 | 'huwag',
61 | 'iba',
62 | 'ibaba',
63 | 'ibabaw',
64 | 'ibig',
65 | 'ikaw',
66 | 'ilagay',
67 | 'ilalim',
68 | 'ilan',
69 | 'inyong',
70 | 'isa',
71 | 'isang',
72 | 'itaas',
73 | 'ito',
74 | 'iyo',
75 | 'iyon',
76 | 'iyong',
77 | 'ka',
78 | 'kahit',
79 | 'kailangan',
80 | 'kailanman',
81 | 'kami',
82 | 'kanila',
83 | 'kanilang',
84 | 'kanino',
85 | 'kanya',
86 | 'kanyang',
87 | 'kapag',
88 | 'kapwa',
89 | 'karamihan',
90 | 'katiyakan',
91 | 'katulad',
92 | 'kaya',
93 | 'kaysa',
94 | 'ko',
95 | 'kong',
96 | 'kulang',
97 | 'kumuha',
98 | 'kung',
99 | 'laban',
100 | 'lahat',
101 | 'lamang',
102 | 'likod',
103 | 'lima',
104 | 'maaari',
105 | 'maaaring',
106 | 'maging',
107 | 'mahusay',
108 | 'makita',
109 | 'marami',
110 | 'marapat',
111 | 'masyado',
112 | 'may',
113 | 'mayroon',
114 | 'mga',
115 | 'minsan',
116 | 'mismo',
117 | 'mula',
118 | 'muli',
119 | 'na',
120 | 'nabanggit',
121 | 'naging',
122 | 'nagkaroon',
123 | 'nais',
124 | 'nakita',
125 | 'namin',
126 | 'napaka',
127 | 'narito',
128 | 'nasaan',
129 | 'ng',
130 | 'ngayon',
131 | 'ni',
132 | 'nila',
133 | 'nilang',
134 | 'nito',
135 | 'niya',
136 | 'niyang',
137 | 'noon',
138 | 'o',
139 | 'pa',
140 | 'paano',
141 | 'pababa',
142 | 'paggawa',
143 | 'pagitan',
144 | 'pagkakaroon',
145 | 'pagkatapos',
146 | 'palabas',
147 | 'pamamagitan',
148 | 'panahon',
149 | 'pangalawa',
150 | 'para',
151 | 'paraan',
152 | 'pareho',
153 | 'pataas',
154 | 'pero',
155 | 'pumunta',
156 | 'pumupunta',
157 | 'sa',
158 | 'saan',
159 | 'sabi',
160 | 'sabihin',
161 | 'sarili',
162 | 'sila',
163 | 'sino',
164 | 'siya',
165 | 'tatlo',
166 | 'tayo',
167 | 'tulad',
168 | 'tungkol',
169 | 'una',
170 | 'walang'
171 | ]
172 | export { tgl }
173 |
--------------------------------------------------------------------------------
/src/stopwords_ukr.js:
--------------------------------------------------------------------------------
1 | /* The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Gene Diaz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. */
22 |
23 | const ukr = [
24 | 'а',
25 | 'або',
26 | 'авжеж',
27 | 'адже',
28 | 'аж',
29 | 'але',
30 | 'ані',
31 | 'б',
32 | 'без',
33 | 'би',
34 | 'бо',
35 | 'був',
36 | 'була',
37 | 'були',
38 | 'було',
39 | 'бути',
40 | 'більш',
41 | 'в',
42 | 'вам',
43 | 'вами',
44 | 'вас',
45 | 'весь',
46 | 'вже',
47 | 'вздовж',
48 | 'ви',
49 | 'від',
50 | 'вниз',
51 | 'внизу',
52 | 'вона',
53 | 'вони',
54 | 'воно',
55 | 'все',
56 | 'всередині',
57 | 'всіх',
58 | 'вся',
59 | 'від',
60 | 'він',
61 | 'да',
62 | 'давай',
63 | 'давати',
64 | 'де',
65 | 'десь',
66 | 'дещо',
67 | 'для',
68 | 'до',
69 | 'є',
70 | 'ж',
71 | 'же',
72 | 'з',
73 | 'за',
74 | 'завжди',
75 | 'замість',
76 | 'зі',
77 | 'і',
78 | 'із',
79 | 'інших',
80 | 'її',
81 | 'їй',
82 | 'їм',
83 | 'їх',
84 | 'й',
85 | 'його',
86 | 'йому',
87 | 'коли',
88 | 'ледве',
89 | 'лиш',
90 | 'майже',
91 | 'мене',
92 | 'мені',
93 | 'ми',
94 | 'між',
95 | 'мій',
96 | 'мною',
97 | 'мов',
98 | 'мого',
99 | 'моєї',
100 | 'моє',
101 | 'може',
102 | 'мої',
103 | 'моїх',
104 | 'моя',
105 | 'на',
106 | 'над',
107 | 'навколо',
108 | 'навіть',
109 | 'нам',
110 | 'нами',
111 | 'нас',
112 | 'наче',
113 | 'наш',
114 | 'не',
115 | 'нє',
116 | 'неї',
117 | 'нема',
118 | 'немов',
119 | 'неначе',
120 | 'нею',
121 | 'ним',
122 | 'ними',
123 | 'них',
124 | 'ні',
125 | 'ніби',
126 | 'ніщо',
127 | 'нього',
128 | 'о',
129 | 'ось',
130 | 'от',
131 | 'отже',
132 | 'отож',
133 | 'під',
134 | 'по',
135 | 'поза',
136 | 'про',
137 | 'під',
138 | 'сам',
139 | 'сама',
140 | 'свій',
141 | 'свої',
142 | 'своя',
143 | 'свою',
144 | 'себе',
145 | 'собі',
146 | 'та',
147 | 'там',
148 | 'так',
149 | 'така',
150 | 'такий',
151 | 'також',
152 | 'твій',
153 | 'твого',
154 | 'твоєї',
155 | 'твої',
156 | 'твоя',
157 | 'те',
158 | 'тебе',
159 | 'ти',
160 | 'ті',
161 | 'тільки',
162 | 'то',
163 | 'тобі',
164 | 'тобою',
165 | 'тобто',
166 | 'тоді',
167 | 'тож',
168 | 'той',
169 | 'тощо',
170 | 'тут',
171 | 'у',
172 | 'хіба',
173 | 'хоч',
174 | 'хоча',
175 | 'це',
176 | 'цей',
177 | 'ці',
178 | 'ця',
179 | 'чи',
180 | 'чого',
181 | 'ще',
182 | 'що',
183 | 'щоб',
184 | 'щось',
185 | 'я',
186 | 'як',
187 | 'яка',
188 | 'який',
189 | 'якої'
190 | ]
191 | export { ukr }
192 |
--------------------------------------------------------------------------------
/src/stopwords_jpn.js:
--------------------------------------------------------------------------------
1 | // Original copyright:
2 | /*
3 | Licensed to the Apache Software Foundation (ASF) under one or more
4 | contributor license agreements. See the NOTICE file distributed with
5 | this work for additional information regarding copyright ownership.
6 | The ASF licenses this file to You under the Apache License, Version 2.0
7 | the "License"); you may not use this file except in compliance with
8 | the License. You may obtain a copy of the License at
9 |
10 | http://www.apache.org/licenses/LICENSE-2.0
11 |
12 | Unless required by applicable law or agreed to in writing, software
13 | distributed under the License is distributed on an "AS IS" BASIS,
14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | See the License for the specific language governing permissions and
16 | limitations under the License.
17 | */
18 |
19 | // This version:
20 | /* The MIT License (MIT)
21 | Copyright (c) 2012, Guillaume Marty
22 |
23 | Permission is hereby granted, free of charge, to any person obtaining a copy
24 | of this software and associated documentation files (the "Software"), to deal
25 | in the Software without restriction, including without limitation the rights
26 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27 | copies of the Software, and to permit persons to whom the Software is
28 | furnished to do so, subject to the following conditions:
29 |
30 | The above copyright notice and this permission notice shall be included in
31 | all copies or substantial portions of the Software.
32 |
33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
39 | THE SOFTWARE.
40 | */
41 |
42 | // Original location:
43 | // http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/stopwords.txt
44 |
45 | const jpn = [
46 | 'の',
47 | 'に',
48 | 'は',
49 | 'を',
50 | 'た',
51 | 'が',
52 | 'で',
53 | 'て',
54 | 'と',
55 | 'し',
56 | 'れ',
57 | 'さ',
58 | 'ある',
59 | 'いる',
60 | 'も',
61 | 'する',
62 | 'から',
63 | 'な',
64 | 'こと',
65 | 'として',
66 | 'い',
67 | 'や',
68 | 'れる',
69 | 'など',
70 | 'なっ',
71 | 'ない',
72 | 'この',
73 | 'ため',
74 | 'その',
75 | 'あっ',
76 | 'よう',
77 | 'また',
78 | 'もの',
79 | 'という',
80 | 'あり',
81 | 'まで',
82 | 'られ',
83 | 'なる',
84 | 'へ',
85 | 'か',
86 | 'だ',
87 | 'これ',
88 | 'によって',
89 | 'により',
90 | 'おり',
91 | 'より',
92 | 'による',
93 | 'ず',
94 | 'なり',
95 | 'られる',
96 | 'において',
97 | 'ば',
98 | 'なかっ',
99 | 'なく',
100 | 'しかし',
101 | 'について',
102 | 'せ',
103 | 'だっ',
104 | 'その後',
105 | 'できる',
106 | 'それ',
107 | 'う',
108 | 'ので',
109 | 'なお',
110 | 'のみ',
111 | 'でき',
112 | 'き',
113 | 'つ',
114 | 'における',
115 | 'および',
116 | 'いう',
117 | 'さらに',
118 | 'でも',
119 | 'ら',
120 | 'たり',
121 | 'その他',
122 | 'に関する',
123 | 'たち',
124 | 'ます',
125 | 'ん',
126 | 'なら',
127 | 'に対して',
128 | '特に',
129 | 'せる',
130 | '及び',
131 | 'これら',
132 | 'とき',
133 | 'では',
134 | 'にて',
135 | 'ほか',
136 | 'ながら',
137 | 'うち',
138 | 'そして',
139 | 'とともに',
140 | 'ただし',
141 | 'かつて',
142 | 'それぞれ',
143 | 'または',
144 | 'お',
145 | 'ほど',
146 | 'ものの',
147 | 'に対する',
148 | 'ほとんど',
149 | 'と共に',
150 | 'といった',
151 | 'です',
152 | 'とも',
153 | 'ところ',
154 | 'ここ'
155 | ]
156 | export { jpn }
157 |
--------------------------------------------------------------------------------
/src/stopwords_fra.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2014, Ismaël Héry
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const fra = [
24 | 'être',
25 | 'avoir',
26 | 'faire',
27 | 'a',
28 | 'au',
29 | 'aux',
30 | 'avec',
31 | 'ce',
32 | 'ces',
33 | 'dans',
34 | 'de',
35 | 'des',
36 | 'du',
37 | 'elle',
38 | 'en',
39 | 'et',
40 | 'eux',
41 | 'il',
42 | 'je',
43 | 'la',
44 | 'le',
45 | 'leur',
46 | 'lui',
47 | 'ma',
48 | 'mais',
49 | 'me',
50 | 'même',
51 | 'mes',
52 | 'moi',
53 | 'mon',
54 | 'ne',
55 | 'nos',
56 | 'notre',
57 | 'nous',
58 | 'on',
59 | 'ou',
60 | 'où',
61 | 'par',
62 | 'pas',
63 | 'pour',
64 | 'qu',
65 | 'que',
66 | 'qui',
67 | 'sa',
68 | 'se',
69 | 'ses',
70 | 'son',
71 | 'sur',
72 | 'ta',
73 | 'te',
74 | 'tes',
75 | 'toi',
76 | 'ton',
77 | 'tu',
78 | 'un',
79 | 'une',
80 | 'vos',
81 | 'votre',
82 | 'vous',
83 | 'c',
84 | 'd',
85 | 'j',
86 | 'l',
87 | 'à',
88 | 'm',
89 | 'n',
90 | 's',
91 | 't',
92 | 'y',
93 | 'été',
94 | 'étée',
95 | 'étées',
96 | 'étés',
97 | 'étant',
98 | 'suis',
99 | 'es',
100 | 'est',
101 | 'sommes',
102 | 'êtes',
103 | 'sont',
104 | 'serai',
105 | 'seras',
106 | 'sera',
107 | 'serons',
108 | 'serez',
109 | 'seront',
110 | 'serais',
111 | 'serait',
112 | 'serions',
113 | 'seriez',
114 | 'seraient',
115 | 'étais',
116 | 'était',
117 | 'étions',
118 | 'étiez',
119 | 'étaient',
120 | 'fus',
121 | 'fut',
122 | 'fûmes',
123 | 'fûtes',
124 | 'furent',
125 | 'sois',
126 | 'soit',
127 | 'soyons',
128 | 'soyez',
129 | 'soient',
130 | 'fusse',
131 | 'fusses',
132 | 'fût',
133 | 'fussions',
134 | 'fussiez',
135 | 'fussent',
136 | 'ayant',
137 | 'eu',
138 | 'eue',
139 | 'eues',
140 | 'eus',
141 | 'ai',
142 | 'as',
143 | 'avons',
144 | 'avez',
145 | 'ont',
146 | 'aurai',
147 | 'auras',
148 | 'aura',
149 | 'aurons',
150 | 'aurez',
151 | 'auront',
152 | 'aurais',
153 | 'aurait',
154 | 'aurions',
155 | 'auriez',
156 | 'auraient',
157 | 'avais',
158 | 'avait',
159 | 'avions',
160 | 'aviez',
161 | 'avaient',
162 | 'eut',
163 | 'eûmes',
164 | 'eûtes',
165 | 'eurent',
166 | 'aie',
167 | 'aies',
168 | 'ait',
169 | 'ayons',
170 | 'ayez',
171 | 'aient',
172 | 'eusse',
173 | 'eusses',
174 | 'eût',
175 | 'eussions',
176 | 'eussiez',
177 | 'eussent',
178 | 'ceci',
179 | 'cela',
180 | 'cet',
181 | 'cette',
182 | 'ici',
183 | 'ils',
184 | 'les',
185 | 'leurs',
186 | 'quel',
187 | 'quels',
188 | 'quelle',
189 | 'quelles',
190 | 'sans',
191 | 'soi'
192 | ]
193 | export { fra }
194 |
--------------------------------------------------------------------------------
/src/stopwords_dan.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2016 Gene Diaz
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 | Source: https://github.com/stopwords-iso/stopwords-da
25 | */
26 |
27 | const dan = [
28 | 'ad',
29 | 'af',
30 | 'aldrig',
31 | 'alle',
32 | 'alt',
33 | 'anden',
34 | 'andet',
35 | 'andre',
36 | 'at',
37 | 'bare',
38 | 'begge',
39 | 'blev',
40 | 'blive',
41 | 'bliver',
42 | 'da',
43 | 'de',
44 | 'dem',
45 | 'den',
46 | 'denne',
47 | 'der',
48 | 'deres',
49 | 'det',
50 | 'dette',
51 | 'dig',
52 | 'din',
53 | 'dine',
54 | 'disse',
55 | 'dit',
56 | 'dog',
57 | 'du',
58 | 'efter',
59 | 'ej',
60 | 'eller',
61 | 'en',
62 | 'end',
63 | 'ene',
64 | 'eneste',
65 | 'enhver',
66 | 'er',
67 | 'et',
68 | 'far',
69 | 'fem',
70 | 'fik',
71 | 'fire',
72 | 'flere',
73 | 'fleste',
74 | 'for',
75 | 'fordi',
76 | 'forrige',
77 | 'fra',
78 | 'få',
79 | 'får',
80 | 'før',
81 | 'god',
82 | 'godt',
83 | 'ham',
84 | 'han',
85 | 'hans',
86 | 'har',
87 | 'havde',
88 | 'have',
89 | 'hej',
90 | 'helt',
91 | 'hende',
92 | 'hendes',
93 | 'her',
94 | 'hos',
95 | 'hun',
96 | 'hvad',
97 | 'hvem',
98 | 'hver',
99 | 'hvilken',
100 | 'hvis',
101 | 'hvor',
102 | 'hvordan',
103 | 'hvorfor',
104 | 'hvornår',
105 | 'i',
106 | 'ikke',
107 | 'ind',
108 | 'ingen',
109 | 'intet',
110 | 'ja',
111 | 'jeg',
112 | 'jer',
113 | 'jeres',
114 | 'jo',
115 | 'kan',
116 | 'kom',
117 | 'komme',
118 | 'kommer',
119 | 'kun',
120 | 'kunne',
121 | 'lad',
122 | 'lav',
123 | 'lidt',
124 | 'lige',
125 | 'lille',
126 | 'man',
127 | 'mand',
128 | 'mange',
129 | 'med',
130 | 'meget',
131 | 'men',
132 | 'mens',
133 | 'mere',
134 | 'mig',
135 | 'min',
136 | 'mine',
137 | 'mit',
138 | 'mod',
139 | 'må',
140 | 'ned',
141 | 'nej',
142 | 'ni',
143 | 'nogen',
144 | 'noget',
145 | 'nogle',
146 | 'nu',
147 | 'ny',
148 | 'nyt',
149 | 'når',
150 | 'nær',
151 | 'næste',
152 | 'næsten',
153 | 'og',
154 | 'også',
155 | 'okay',
156 | 'om',
157 | 'op',
158 | 'os',
159 | 'otte',
160 | 'over',
161 | 'på',
162 | 'se',
163 | 'seks',
164 | 'selv',
165 | 'ser',
166 | 'ses',
167 | 'sig',
168 | 'sige',
169 | 'sin',
170 | 'sine',
171 | 'sit',
172 | 'skal',
173 | 'skulle',
174 | 'som',
175 | 'stor',
176 | 'store',
177 | 'syv',
178 | 'så',
179 | 'sådan',
180 | 'tag',
181 | 'tage',
182 | 'thi',
183 | 'ti',
184 | 'til',
185 | 'to',
186 | 'tre',
187 | 'ud',
188 | 'under',
189 | 'var',
190 | 'ved',
191 | 'vi',
192 | 'vil',
193 | 'ville',
194 | 'vor',
195 | 'vores',
196 | 'være',
197 | 'været'
198 | ]
199 | export { dan }
200 |
--------------------------------------------------------------------------------
/src/stopword.js:
--------------------------------------------------------------------------------
1 | import { _123 } from './stopwords__123.js'
2 | import { afr } from './stopwords_afr.js'
3 | import { ara } from './stopwords_ara.js'
4 | import { hye } from './stopwords_hye.js'
5 | import { eus } from './stopwords_eus.js'
6 | import { ben } from './stopwords_ben.js'
7 | import { bre } from './stopwords_bre.js'
8 | import { bul } from './stopwords_bul.js'
9 | import { cat } from './stopwords_cat.js'
10 | import { zho } from './stopwords_zho.js'
11 | import { hrv } from './stopwords_hrv.js'
12 | import { ces } from './stopwords_ces.js'
13 | import { dan } from './stopwords_dan.js'
14 | import { nld } from './stopwords_nld.js'
15 | import { eng } from './stopwords_eng.js'
16 | import { epo } from './stopwords_epo.js'
17 | import { est } from './stopwords_est.js'
18 | import { fin } from './stopwords_fin.js'
19 | import { fra } from './stopwords_fra.js'
20 | import { glg } from './stopwords_glg.js'
21 | import { deu } from './stopwords_deu.js'
22 | import { ell } from './stopwords_ell.js'
23 | import { guj } from './stopwords_guj.js'
24 | import { hau } from './stopwords_hau.js'
25 | import { heb } from './stopwords_heb.js'
26 | import { hin } from './stopwords_hin.js'
27 | import { gle } from './stopwords_gle.js'
28 | import { hun } from './stopwords_hun.js'
29 | import { ind } from './stopwords_ind.js'
30 | import { ita } from './stopwords_ita.js'
31 | import { jpn } from './stopwords_jpn.js'
32 | import { kor } from './stopwords_kor.js'
33 | import { kur } from './stopwords_kur.js'
34 | import { lat } from './stopwords_lat.js'
35 | import { lav } from './stopwords_lav.js'
36 | import { lit } from './stopwords_lit.js'
37 | import { lgg } from './stopwords_lgg.js'
38 | import { lggNd } from './stopwords_lggNd.js'
39 | import { msa } from './stopwords_msa.js'
40 | import { mar } from './stopwords_mar.js'
41 | import { mya } from './stopwords_mya.js'
42 | import { nob } from './stopwords_nob.js'
43 | import { panGu } from './stopwords_panGu.js'
44 | import { fas } from './stopwords_fas.js'
45 | import { pol } from './stopwords_pol.js'
46 | import { por } from './stopwords_por.js'
47 | import { porBr } from './stopwords_porBr.js'
48 | import { ron } from './stopwords_ron.js'
49 | import { rus } from './stopwords_rus.js'
50 | import { slk } from './stopwords_slk.js'
51 | import { slv } from './stopwords_slv.js'
52 | import { som } from './stopwords_som.js'
53 | import { sot } from './stopwords_sot.js'
54 | import { spa } from './stopwords_spa.js'
55 | import { swa } from './stopwords_swa.js'
56 | import { swe } from './stopwords_swe.js'
57 | import { tha } from './stopwords_tha.js'
58 | import { tgl } from './stopwords_tgl.js'
59 | import { tur } from './stopwords_tur.js'
60 | import { ukr } from './stopwords_ukr.js'
61 | import { urd } from './stopwords_urd.js'
62 | import { vie } from './stopwords_vie.js'
63 | import { yor } from './stopwords_yor.js'
64 | import { zul } from './stopwords_zul.js'
65 |
66 | // default to english stopword list
67 | const removeStopwords = (tokens, stopwords = eng) => {
68 | if (!Array.isArray(tokens) || !Array.isArray(stopwords)) {
69 | throw new Error('expected Arrays try: removeStopwords(Array[, Array])')
70 | }
71 | return tokens.filter(x => !stopwords.includes(x.toLowerCase()))
72 | }
73 |
74 | export {
75 | removeStopwords,
76 | _123,
77 | afr,
78 | ara,
79 | ben,
80 | bre,
81 | bul,
82 | cat,
83 | ces,
84 | dan,
85 | deu,
86 | ell,
87 | eng,
88 | epo,
89 | est,
90 | eus,
91 | fas,
92 | fin,
93 | fra,
94 | gle,
95 | glg,
96 | guj,
97 | hau,
98 | heb,
99 | hin,
100 | hrv,
101 | hun,
102 | hye,
103 | ind,
104 | ita,
105 | jpn,
106 | kor,
107 | kur,
108 | lat,
109 | lav,
110 | lgg,
111 | lggNd,
112 | lit,
113 | mar,
114 | msa,
115 | mya,
116 | nld,
117 | nob,
118 | panGu,
119 | pol,
120 | por,
121 | porBr,
122 | ron,
123 | rus,
124 | slk,
125 | slv,
126 | som,
127 | sot,
128 | spa,
129 | swa,
130 | swe,
131 | tgl,
132 | tha,
133 | tur,
134 | ukr,
135 | urd,
136 | vie,
137 | yor,
138 | zho,
139 | zul
140 | }
141 |
--------------------------------------------------------------------------------
/src/stopwords_bul.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const bul = [
4 | 'а',
5 | 'автентичен',
6 | 'аз',
7 | 'ако',
8 | 'ала',
9 | 'бе',
10 | 'без',
11 | 'беше',
12 | 'би',
13 | 'бивш',
14 | 'бивша',
15 | 'бившо',
16 | 'бил',
17 | 'била',
18 | 'били',
19 | 'било',
20 | 'благодаря',
21 | 'близо',
22 | 'бъдат',
23 | 'бъде',
24 | 'бяха',
25 | 'в',
26 | 'вас',
27 | 'ваш',
28 | 'ваша',
29 | 'вероятно',
30 | 'вече',
31 | 'взема',
32 | 'ви',
33 | 'вие',
34 | 'винаги',
35 | 'внимава',
36 | 'време',
37 | 'все',
38 | 'всеки',
39 | 'всички',
40 | 'всичко',
41 | 'всяка',
42 | 'във',
43 | 'въпреки',
44 | 'върху',
45 | 'г',
46 | 'ги',
47 | 'главен',
48 | 'главна',
49 | 'главно',
50 | 'глас',
51 | 'го',
52 | 'година',
53 | 'години',
54 | 'годишен',
55 | 'д',
56 | 'да',
57 | 'дали',
58 | 'два',
59 | 'двама',
60 | 'двамата',
61 | 'две',
62 | 'двете',
63 | 'ден',
64 | 'днес',
65 | 'дни',
66 | 'до',
67 | 'добра',
68 | 'добре',
69 | 'добро',
70 | 'добър',
71 | 'докато',
72 | 'докога',
73 | 'дори',
74 | 'досега',
75 | 'доста',
76 | 'друг',
77 | 'друга',
78 | 'други',
79 | 'е',
80 | 'евтин',
81 | 'едва',
82 | 'един',
83 | 'една',
84 | 'еднаква',
85 | 'еднакви',
86 | 'еднакъв',
87 | 'едно',
88 | 'екип',
89 | 'ето',
90 | 'живот',
91 | 'за',
92 | 'забавям',
93 | 'зад',
94 | 'заедно',
95 | 'заради',
96 | 'засега',
97 | 'заспал',
98 | 'затова',
99 | 'защо',
100 | 'защото',
101 | 'и',
102 | 'из',
103 | 'или',
104 | 'им',
105 | 'има',
106 | 'имат',
107 | 'иска',
108 | 'й',
109 | 'каза',
110 | 'как',
111 | 'каква',
112 | 'какво',
113 | 'както',
114 | 'какъв',
115 | 'като',
116 | 'кога',
117 | 'когато',
118 | 'което',
119 | 'които',
120 | 'кой',
121 | 'който',
122 | 'колко',
123 | 'която',
124 | 'къде',
125 | 'където',
126 | 'към',
127 | 'лесен',
128 | 'лесно',
129 | 'ли',
130 | 'лош',
131 | 'м',
132 | 'май',
133 | 'малко',
134 | 'ме',
135 | 'между',
136 | 'мек',
137 | 'мен',
138 | 'месец',
139 | 'ми',
140 | 'много',
141 | 'мнозина',
142 | 'мога',
143 | 'могат',
144 | 'може',
145 | 'мокър',
146 | 'моля',
147 | 'момента',
148 | 'му',
149 | 'н',
150 | 'на',
151 | 'над',
152 | 'назад',
153 | 'най',
154 | 'направи',
155 | 'напред',
156 | 'например',
157 | 'нас',
158 | 'не',
159 | 'него',
160 | 'нещо',
161 | 'нея',
162 | 'ни',
163 | 'ние',
164 | 'никой',
165 | 'нито',
166 | 'нищо',
167 | 'но',
168 | 'нов',
169 | 'нова',
170 | 'нови',
171 | 'новина',
172 | 'някои',
173 | 'някой',
174 | 'няколко',
175 | 'няма',
176 | 'обаче',
177 | 'около',
178 | 'освен',
179 | 'особено',
180 | 'от',
181 | 'отгоре',
182 | 'отново',
183 | 'още',
184 | 'пак',
185 | 'по',
186 | 'повече',
187 | 'повечето',
188 | 'под',
189 | 'поне',
190 | 'поради',
191 | 'после',
192 | 'почти',
193 | 'прави',
194 | 'пред',
195 | 'преди',
196 | 'през',
197 | 'при',
198 | 'пък',
199 | 'първата',
200 | 'първи',
201 | 'първо',
202 | 'пъти',
203 | 'равен',
204 | 'равна',
205 | 'с',
206 | 'са',
207 | 'сам',
208 | 'само',
209 | 'се',
210 | 'сега',
211 | 'си',
212 | 'син',
213 | 'скоро',
214 | 'след',
215 | 'следващ',
216 | 'сме',
217 | 'смях',
218 | 'според',
219 | 'сред',
220 | 'срещу',
221 | 'сте',
222 | 'съм',
223 | 'със',
224 | 'също',
225 | 'т',
226 | 'т.н.',
227 | 'тази',
228 | 'така',
229 | 'такива',
230 | 'такъв',
231 | 'там',
232 | 'твой',
233 | 'те',
234 | 'тези',
235 | 'ти',
236 | 'то',
237 | 'това',
238 | 'тогава',
239 | 'този',
240 | 'той',
241 | 'толкова',
242 | 'точно',
243 | 'три',
244 | 'трябва',
245 | 'тук',
246 | 'тъй',
247 | 'тя',
248 | 'тях',
249 | 'у',
250 | 'утре',
251 | 'харесва',
252 | 'хиляди',
253 | 'ч',
254 | 'часа',
255 | 'че',
256 | 'често',
257 | 'чрез',
258 | 'ще',
259 | 'щом',
260 | 'юмрук',
261 | 'я',
262 | 'як'
263 | ]
264 | export { bul }
265 |
--------------------------------------------------------------------------------
/src/stopwords_guj.js:
--------------------------------------------------------------------------------
1 | /* MIT License
2 |
3 | Copyright (c) 2020 Stopwords ISO
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the 'Software'), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. */
22 |
23 | const guj = [
24 | 'અંગે',
25 | 'અંદર',
26 | 'અથવા',
27 | 'અને',
28 | 'અમને',
29 | 'અમારું',
30 | 'અમે',
31 | 'અહીં',
32 | 'આ',
33 | 'આગળ',
34 | 'આથી',
35 | 'આનું',
36 | 'આને',
37 | 'આપણને',
38 | 'આપણું',
39 | 'આપણે',
40 | 'આપી',
41 | 'આર',
42 | 'આવી',
43 | 'આવે',
44 | 'ઉપર',
45 | 'ઉભા',
46 | 'ઊંચે',
47 | 'ઊભું',
48 | 'એ',
49 | 'એક',
50 | 'એન',
51 | 'એના',
52 | 'એનાં',
53 | 'એની',
54 | 'એનું',
55 | 'એને',
56 | 'એનો',
57 | 'એમ',
58 | 'એવા',
59 | 'એવાં',
60 | 'એવી',
61 | 'એવું',
62 | 'એવો',
63 | 'ઓછું',
64 | 'કંઈક',
65 | 'કઈ',
66 | 'કયું',
67 | 'કયો',
68 | 'કરતાં',
69 | 'કરવું',
70 | 'કરી',
71 | 'કરીએ',
72 | 'કરું',
73 | 'કરે',
74 | 'કરેલું',
75 | 'કર્યા',
76 | 'કર્યાં',
77 | 'કર્યું',
78 | 'કર્યો',
79 | 'કાંઈ',
80 | 'કે',
81 | 'કેટલું',
82 | 'કેમ',
83 | 'કેવી',
84 | 'કેવું',
85 | 'કોઈ',
86 | 'કોઈક',
87 | 'કોણ',
88 | 'કોણે',
89 | 'કોને',
90 | 'ક્યાં',
91 | 'ક્યારે',
92 | 'ખૂબ',
93 | 'ગઈ',
94 | 'ગયા',
95 | 'ગયાં',
96 | 'ગયું',
97 | 'ગયો',
98 | 'ઘણું',
99 | 'છ',
100 | 'છતાં',
101 | 'છીએ',
102 | 'છું',
103 | 'છે',
104 | 'છેક',
105 | 'છો',
106 | 'જ',
107 | 'જાય',
108 | 'જી',
109 | 'જે',
110 | 'જેટલું',
111 | 'જેને',
112 | 'જેમ',
113 | 'જેવી',
114 | 'જેવું',
115 | 'જેવો',
116 | 'જો',
117 | 'જોઈએ',
118 | 'જ્યાં',
119 | 'જ્યારે',
120 | 'ઝાઝું',
121 | 'તને',
122 | 'તમને',
123 | 'તમારું',
124 | 'તમે',
125 | 'તા',
126 | 'તારાથી',
127 | 'તારામાં',
128 | 'તારું',
129 | 'તું',
130 | 'તે',
131 | 'તેં',
132 | 'તેઓ',
133 | 'તેણે',
134 | 'તેથી',
135 | 'તેના',
136 | 'તેની',
137 | 'તેનું',
138 | 'તેને',
139 | 'તેમ',
140 | 'તેમનું',
141 | 'તેમને',
142 | 'તેવી',
143 | 'તેવું',
144 | 'તો',
145 | 'ત્યાં',
146 | 'ત્યારે',
147 | 'થઇ',
148 | 'થઈ',
149 | 'થઈએ',
150 | 'થતા',
151 | 'થતાં',
152 | 'થતી',
153 | 'થતું',
154 | 'થતો',
155 | 'થયા',
156 | 'થયાં',
157 | 'થયું',
158 | 'થયેલું',
159 | 'થયો',
160 | 'થવું',
161 | 'થાઉં',
162 | 'થાઓ',
163 | 'થાય',
164 | 'થી',
165 | 'થોડું',
166 | 'દરેક',
167 | 'ન',
168 | 'નં',
169 | 'નં.',
170 | 'નથી',
171 | 'નહિ',
172 | 'નહી',
173 | 'નહીં',
174 | 'ના',
175 | 'ની',
176 | 'નીચે',
177 | 'નું',
178 | 'ને',
179 | 'નો',
180 | 'પછી',
181 | 'પણ',
182 | 'પર',
183 | 'પરંતુ',
184 | 'પહેલાં',
185 | 'પાછળ',
186 | 'પાસે',
187 | 'પોતાનું',
188 | 'પ્રત્યેક',
189 | 'ફક્ત',
190 | 'ફરી',
191 | 'ફરીથી',
192 | 'બંને',
193 | 'બધા',
194 | 'બધું',
195 | 'બની',
196 | 'બહાર',
197 | 'બહુ',
198 | 'બાદ',
199 | 'બે',
200 | 'મને',
201 | 'મા',
202 | 'માં',
203 | 'માટે',
204 | 'માત્ર',
205 | 'મારું',
206 | 'મી',
207 | 'મૂકવું',
208 | 'મૂકી',
209 | 'મૂક્યા',
210 | 'મૂક્યાં',
211 | 'મૂક્યું',
212 | 'મેં',
213 | 'રહી',
214 | 'રહે',
215 | 'રહેવું',
216 | 'રહ્યા',
217 | 'રહ્યાં',
218 | 'રહ્યો',
219 | 'રીતે',
220 | 'રૂ.',
221 | 'રૂા',
222 | 'લેતા',
223 | 'લેતું',
224 | 'લેવા',
225 | 'વગેરે',
226 | 'વધુ',
227 | 'શકે',
228 | 'શા',
229 | 'શું',
230 | 'સરખું',
231 | 'સામે',
232 | 'સુધી',
233 | 'હતા',
234 | 'હતાં',
235 | 'હતી',
236 | 'હતું',
237 | 'હવે',
238 | 'હશે',
239 | 'હશો',
240 | 'હા',
241 | 'હું',
242 | 'હો',
243 | 'હોઈ',
244 | 'હોઈશ',
245 | 'હોઈશું',
246 | 'હોય',
247 | 'હોવા'
248 | ]
249 | export { guj }
250 |
--------------------------------------------------------------------------------
/src/stopwords_ron.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const ron = [
4 | 'acea',
5 | 'aceasta',
6 | 'această',
7 | 'aceea',
8 | 'acei',
9 | 'aceia',
10 | 'acel',
11 | 'acela',
12 | 'acele',
13 | 'acelea',
14 | 'acest',
15 | 'acesta',
16 | 'aceste',
17 | 'acestea',
18 | 'aceşti',
19 | 'aceştia',
20 | 'acolo',
21 | 'acord',
22 | 'acum',
23 | 'ai',
24 | 'aia',
25 | 'aibă',
26 | 'aici',
27 | 'al',
28 | 'ale',
29 | 'alea',
30 | 'altceva',
31 | 'altcineva',
32 | 'am',
33 | 'ar',
34 | 'are',
35 | 'asemenea',
36 | 'asta',
37 | 'astea',
38 | 'astăzi',
39 | 'asupra',
40 | 'au',
41 | 'avea',
42 | 'avem',
43 | 'aveţi',
44 | 'azi',
45 | 'aş',
46 | 'aşadar',
47 | 'aţi',
48 | 'bine',
49 | 'bucur',
50 | 'bună',
51 | 'ca',
52 | 'care',
53 | 'caut',
54 | 'ce',
55 | 'cel',
56 | 'ceva',
57 | 'chiar',
58 | 'cinci',
59 | 'cine',
60 | 'cineva',
61 | 'contra',
62 | 'cu',
63 | 'cum',
64 | 'cumva',
65 | 'curând',
66 | 'curînd',
67 | 'când',
68 | 'cât',
69 | 'câte',
70 | 'câtva',
71 | 'câţi',
72 | 'cînd',
73 | 'cît',
74 | 'cîte',
75 | 'cîtva',
76 | 'cîţi',
77 | 'că',
78 | 'căci',
79 | 'cărei',
80 | 'căror',
81 | 'cărui',
82 | 'către',
83 | 'da',
84 | 'dacă',
85 | 'dar',
86 | 'datorită',
87 | 'dată',
88 | 'dau',
89 | 'de',
90 | 'deci',
91 | 'deja',
92 | 'deoarece',
93 | 'departe',
94 | 'deşi',
95 | 'din',
96 | 'dinaintea',
97 | 'dintr-',
98 | 'dintre',
99 | 'doi',
100 | 'doilea',
101 | 'două',
102 | 'drept',
103 | 'după',
104 | 'dă',
105 | 'ea',
106 | 'ei',
107 | 'el',
108 | 'ele',
109 | 'eram',
110 | 'este',
111 | 'eu',
112 | 'eşti',
113 | 'face',
114 | 'fata',
115 | 'fi',
116 | 'fie',
117 | 'fiecare',
118 | 'fii',
119 | 'fim',
120 | 'fiu',
121 | 'fiţi',
122 | 'frumos',
123 | 'fără',
124 | 'graţie',
125 | 'halbă',
126 | 'iar',
127 | 'ieri',
128 | 'la',
129 | 'le',
130 | 'li',
131 | 'lor',
132 | 'lui',
133 | 'lângă',
134 | 'lîngă',
135 | 'mai',
136 | 'mea',
137 | 'mei',
138 | 'mele',
139 | 'mereu',
140 | 'meu',
141 | 'mi',
142 | 'mie',
143 | 'mine',
144 | 'mult',
145 | 'multă',
146 | 'mulţi',
147 | 'mulţumesc',
148 | 'mâine',
149 | 'mîine',
150 | 'mă',
151 | 'ne',
152 | 'nevoie',
153 | 'nici',
154 | 'nicăieri',
155 | 'nimeni',
156 | 'nimeri',
157 | 'nimic',
158 | 'nişte',
159 | 'noastre',
160 | 'noastră',
161 | 'noi',
162 | 'noroc',
163 | 'nostru',
164 | 'nouă',
165 | 'noştri',
166 | 'nu',
167 | 'opt',
168 | 'ori',
169 | 'oricare',
170 | 'orice',
171 | 'oricine',
172 | 'oricum',
173 | 'oricând',
174 | 'oricât',
175 | 'oricînd',
176 | 'oricît',
177 | 'oriunde',
178 | 'patra',
179 | 'patru',
180 | 'patrulea',
181 | 'pe',
182 | 'pentru',
183 | 'peste',
184 | 'pic',
185 | 'poate',
186 | 'pot',
187 | 'prea',
188 | 'prima',
189 | 'primul',
190 | 'prin',
191 | 'printr-',
192 | 'puţin',
193 | 'puţina',
194 | 'puţină',
195 | 'până',
196 | 'pînă',
197 | 'rog',
198 | 'sa',
199 | 'sale',
200 | 'sau',
201 | 'se',
202 | 'spate',
203 | 'spre',
204 | 'sub',
205 | 'sunt',
206 | 'suntem',
207 | 'sunteţi',
208 | 'sută',
209 | 'sînt',
210 | 'sîntem',
211 | 'sînteţi',
212 | 'să',
213 | 'săi',
214 | 'său',
215 | 'ta',
216 | 'tale',
217 | 'te',
218 | 'timp',
219 | 'tine',
220 | 'toate',
221 | 'toată',
222 | 'tot',
223 | 'totuşi',
224 | 'toţi',
225 | 'trei',
226 | 'treia',
227 | 'treilea',
228 | 'tu',
229 | 'tăi',
230 | 'tău',
231 | 'un',
232 | 'una',
233 | 'unde',
234 | 'undeva',
235 | 'unei',
236 | 'uneia',
237 | 'unele',
238 | 'uneori',
239 | 'unii',
240 | 'unor',
241 | 'unora',
242 | 'unu',
243 | 'unui',
244 | 'unuia',
245 | 'unul',
246 | 'vi',
247 | 'voastre',
248 | 'voastră',
249 | 'voi',
250 | 'vostru',
251 | 'vouă',
252 | 'voştri',
253 | 'vreme',
254 | 'vreo',
255 | 'vreun',
256 | 'vă',
257 | 'zece',
258 | 'zero',
259 | 'zi',
260 | 'zice',
261 | 'îi',
262 | 'îl',
263 | 'îmi',
264 | 'împotriva',
265 | 'în',
266 | 'înainte',
267 | 'înaintea',
268 | 'încotro',
269 | 'încât',
270 | 'încît',
271 | 'între',
272 | 'întrucât',
273 | 'întrucît',
274 | 'îţi',
275 | 'ăla',
276 | 'ălea',
277 | 'ăsta',
278 | 'ăstea',
279 | 'ăştia',
280 | 'şapte',
281 | 'şase',
282 | 'şi',
283 | 'ştiu',
284 | 'ţi',
285 | 'ţie'
286 | ]
287 | export { ron }
288 |
--------------------------------------------------------------------------------
/src/stopwords_hin.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2016 Gene Diaz
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 | Originates from: https://github.com/stopwords-iso/stopwords-hi/
25 | */
26 |
27 | const hin = [
28 | 'अंदर',
29 | 'अत',
30 | 'अदि',
31 | 'अप',
32 | 'अपना',
33 | 'अपनि',
34 | 'अपनी',
35 | 'अपने',
36 | 'अभि',
37 | 'अभी',
38 | 'आदि',
39 | 'आप',
40 | 'इंहिं',
41 | 'इंहें',
42 | 'इंहों',
43 | 'इतयादि',
44 | 'इत्यादि',
45 | 'इन',
46 | 'इनका',
47 | 'इन्हीं',
48 | 'इन्हें',
49 | 'इन्हों',
50 | 'इस',
51 | 'इसका',
52 | 'इसकि',
53 | 'इसकी',
54 | 'इसके',
55 | 'इसमें',
56 | 'इसि',
57 | 'इसी',
58 | 'इसे',
59 | 'उंहिं',
60 | 'उंहें',
61 | 'उंहों',
62 | 'उन',
63 | 'उनका',
64 | 'उनकि',
65 | 'उनकी',
66 | 'उनके',
67 | 'उनको',
68 | 'उन्हीं',
69 | 'उन्हें',
70 | 'उन्हों',
71 | 'उस',
72 | 'उसके',
73 | 'उसि',
74 | 'उसी',
75 | 'उसे',
76 | 'एक',
77 | 'एवं',
78 | 'एस',
79 | 'एसे',
80 | 'ऐसे',
81 | 'ओर',
82 | 'और',
83 | 'कइ',
84 | 'कई',
85 | 'कर',
86 | 'करता',
87 | 'करते',
88 | 'करना',
89 | 'करने',
90 | 'करें',
91 | 'कहते',
92 | 'कहा',
93 | 'का',
94 | 'काफि',
95 | 'काफ़ी',
96 | 'कि',
97 | 'किंहें',
98 | 'किंहों',
99 | 'कितना',
100 | 'किन्हें',
101 | 'किन्हों',
102 | 'किया',
103 | 'किर',
104 | 'किस',
105 | 'किसि',
106 | 'किसी',
107 | 'किसे',
108 | 'की',
109 | 'कुछ',
110 | 'कुल',
111 | 'के',
112 | 'को',
113 | 'कोइ',
114 | 'कोई',
115 | 'कोन',
116 | 'कोनसा',
117 | 'कौन',
118 | 'कौनसा',
119 | 'गया',
120 | 'घर',
121 | 'जब',
122 | 'जहाँ',
123 | 'जहां',
124 | 'जा',
125 | 'जिंहें',
126 | 'जिंहों',
127 | 'जितना',
128 | 'जिधर',
129 | 'जिन',
130 | 'जिन्हें',
131 | 'जिन्हों',
132 | 'जिस',
133 | 'जिसे',
134 | 'जीधर',
135 | 'जेसा',
136 | 'जेसे',
137 | 'जैसा',
138 | 'जैसे',
139 | 'जो',
140 | 'तक',
141 | 'तब',
142 | 'तरह',
143 | 'तिंहें',
144 | 'तिंहों',
145 | 'तिन',
146 | 'तिन्हें',
147 | 'तिन्हों',
148 | 'तिस',
149 | 'तिसे',
150 | 'तो',
151 | 'था',
152 | 'थि',
153 | 'थी',
154 | 'थे',
155 | 'दबारा',
156 | 'दवारा',
157 | 'दिया',
158 | 'दुसरा',
159 | 'दुसरे',
160 | 'दूसरे',
161 | 'दो',
162 | 'द्वारा',
163 | 'न',
164 | 'नहिं',
165 | 'नहीं',
166 | 'ना',
167 | 'निचे',
168 | 'निहायत',
169 | 'नीचे',
170 | 'ने',
171 | 'पर',
172 | 'पहले',
173 | 'पुरा',
174 | 'पूरा',
175 | 'पे',
176 | 'फिर',
177 | 'बनि',
178 | 'बनी',
179 | 'बहि',
180 | 'बही',
181 | 'बहुत',
182 | 'बाद',
183 | 'बाला',
184 | 'बिलकुल',
185 | 'भि',
186 | 'भितर',
187 | 'भी',
188 | 'भीतर',
189 | 'मगर',
190 | 'मानो',
191 | 'मे',
192 | 'में',
193 | 'यदि',
194 | 'यह',
195 | 'यहाँ',
196 | 'यहां',
197 | 'यहि',
198 | 'यही',
199 | 'या',
200 | 'यिह',
201 | 'ये',
202 | 'रखें',
203 | 'रवासा',
204 | 'रहा',
205 | 'रहे',
206 | 'ऱ्वासा',
207 | 'लिए',
208 | 'लिये',
209 | 'लेकिन',
210 | 'व',
211 | 'वगेरह',
212 | 'वरग',
213 | 'वर्ग',
214 | 'वह',
215 | 'वहाँ',
216 | 'वहां',
217 | 'वहिं',
218 | 'वहीं',
219 | 'वाले',
220 | 'वुह',
221 | 'वे',
222 | 'वग़ैरह',
223 | 'संग',
224 | 'सकता',
225 | 'सकते',
226 | 'सबसे',
227 | 'सभि',
228 | 'सभी',
229 | 'साथ',
230 | 'साबुत',
231 | 'साभ',
232 | 'सारा',
233 | 'से',
234 | 'सो',
235 | 'हि',
236 | 'ही',
237 | 'हुअ',
238 | 'हुआ',
239 | 'हुइ',
240 | 'हुई',
241 | 'हुए',
242 | 'हे',
243 | 'हें',
244 | 'है',
245 | 'हैं',
246 | 'हो',
247 | 'होता',
248 | 'होति',
249 | 'होती',
250 | 'होते',
251 | 'होना',
252 | 'होने'
253 | ]
254 | export { hin }
255 |
--------------------------------------------------------------------------------
/src/stopwords_tur.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const tur = [
4 | 'acaba',
5 | 'acep',
6 | 'adeta',
7 | 'altmış',
8 | 'altmış',
9 | 'altı',
10 | 'altı',
11 | 'ama',
12 | 'ancak',
13 | 'arada',
14 | 'artık',
15 | 'aslında',
16 | 'aynen',
17 | 'ayrıca',
18 | 'az',
19 | 'bana',
20 | 'bari',
21 | 'bazen',
22 | 'bazı',
23 | 'bazı',
24 | 'başka',
25 | 'belki',
26 | 'ben',
27 | 'benden',
28 | 'beni',
29 | 'benim',
30 | 'beri',
31 | 'beş',
32 | 'beş',
33 | 'beş',
34 | 'bile',
35 | 'bin',
36 | 'bir',
37 | 'biraz',
38 | 'biri',
39 | 'birkaç',
40 | 'birkez',
41 | 'birçok',
42 | 'birşey',
43 | 'birşeyi',
44 | 'birşey',
45 | 'birşeyi',
46 | 'birşey',
47 | 'biz',
48 | 'bizden',
49 | 'bize',
50 | 'bizi',
51 | 'bizim',
52 | 'bu',
53 | 'buna',
54 | 'bunda',
55 | 'bundan',
56 | 'bunlar',
57 | 'bunları',
58 | 'bunların',
59 | 'bunu',
60 | 'bunun',
61 | 'burada',
62 | 'böyle',
63 | 'böylece',
64 | 'bütün',
65 | 'da',
66 | 'daha',
67 | 'dahi',
68 | 'dahil',
69 | 'daima',
70 | 'dair',
71 | 'dayanarak',
72 | 'de',
73 | 'defa',
74 | 'deđil',
75 | 'değil',
76 | 'diye',
77 | 'diđer',
78 | 'diğer',
79 | 'doksan',
80 | 'dokuz',
81 | 'dolayı',
82 | 'dolayısıyla',
83 | 'dört',
84 | 'edecek',
85 | 'eden',
86 | 'ederek',
87 | 'edilecek',
88 | 'ediliyor',
89 | 'edilmesi',
90 | 'ediyor',
91 | 'elli',
92 | 'en',
93 | 'etmesi',
94 | 'etti',
95 | 'ettiği',
96 | 'ettiğini',
97 | 'eđer',
98 | 'eğer',
99 | 'fakat',
100 | 'gibi',
101 | 'göre',
102 | 'halbuki',
103 | 'halen',
104 | 'hangi',
105 | 'hani',
106 | 'hariç',
107 | 'hatta',
108 | 'hele',
109 | 'hem',
110 | 'henüz',
111 | 'hep',
112 | 'hepsi',
113 | 'her',
114 | 'herhangi',
115 | 'herkes',
116 | 'herkesin',
117 | 'hiç',
118 | 'hiçbir',
119 | 'iken',
120 | 'iki',
121 | 'ila',
122 | 'ile',
123 | 'ilgili',
124 | 'ilk',
125 | 'illa',
126 | 'ise',
127 | 'itibaren',
128 | 'itibariyle',
129 | 'iyi',
130 | 'iyice',
131 | 'için',
132 | 'işte',
133 | 'işte',
134 | 'kadar',
135 | 'kanımca',
136 | 'karşın',
137 | 'katrilyon',
138 | 'kendi',
139 | 'kendilerine',
140 | 'kendini',
141 | 'kendisi',
142 | 'kendisine',
143 | 'kendisini',
144 | 'kere',
145 | 'kez',
146 | 'keşke',
147 | 'ki',
148 | 'kim',
149 | 'kimden',
150 | 'kime',
151 | 'kimi',
152 | 'kimse',
153 | 'kırk',
154 | 'kısaca',
155 | 'kırk',
156 | 'lakin',
157 | 'madem',
158 | 'međer',
159 | 'milyar',
160 | 'milyon',
161 | 'mu',
162 | 'mü',
163 | 'mı',
164 | 'mı',
165 | 'nasıl',
166 | 'nasıl',
167 | 'ne',
168 | 'neden',
169 | 'nedenle',
170 | 'nerde',
171 | 'nere',
172 | 'nerede',
173 | 'nereye',
174 | 'nitekim',
175 | 'niye',
176 | 'niçin',
177 | 'o',
178 | 'olan',
179 | 'olarak',
180 | 'oldu',
181 | 'olduklarını',
182 | 'olduğu',
183 | 'olduğunu',
184 | 'olmadı',
185 | 'olmadığı',
186 | 'olmak',
187 | 'olması',
188 | 'olmayan',
189 | 'olmaz',
190 | 'olsa',
191 | 'olsun',
192 | 'olup',
193 | 'olur',
194 | 'olursa',
195 | 'oluyor',
196 | 'on',
197 | 'ona',
198 | 'ondan',
199 | 'onlar',
200 | 'onlardan',
201 | 'onlari',
202 | 'onların',
203 | 'onları',
204 | 'onların',
205 | 'onu',
206 | 'onun',
207 | 'otuz',
208 | 'oysa',
209 | 'pek',
210 | 'rağmen',
211 | 'sadece',
212 | 'sanki',
213 | 'sekiz',
214 | 'seksen',
215 | 'sen',
216 | 'senden',
217 | 'seni',
218 | 'senin',
219 | 'siz',
220 | 'sizden',
221 | 'sizi',
222 | 'sizin',
223 | 'sonra',
224 | 'tarafından',
225 | 'trilyon',
226 | 'tüm',
227 | 'var',
228 | 'vardı',
229 | 've',
230 | 'veya',
231 | 'veyahut',
232 | 'ya',
233 | 'yahut',
234 | 'yani',
235 | 'yapacak',
236 | 'yapmak',
237 | 'yaptı',
238 | 'yaptıkları',
239 | 'yaptığı',
240 | 'yaptığını',
241 | 'yapılan',
242 | 'yapılması',
243 | 'yapıyor',
244 | 'yedi',
245 | 'yerine',
246 | 'yetmiş',
247 | 'yetmiş',
248 | 'yetmiş',
249 | 'yine',
250 | 'yirmi',
251 | 'yoksa',
252 | 'yüz',
253 | 'zaten',
254 | 'çok',
255 | 'çünkü',
256 | 'öyle',
257 | 'üzere',
258 | 'üç',
259 | 'şey',
260 | 'şeyden',
261 | 'şeyi',
262 | 'şeyler',
263 | 'şu',
264 | 'şuna',
265 | 'şunda',
266 | 'şundan',
267 | 'şunu',
268 | 'şey',
269 | 'şeyden',
270 | 'şeyi',
271 | 'şeyler',
272 | 'şu',
273 | 'şuna',
274 | 'şunda',
275 | 'şundan',
276 | 'şunları',
277 | 'şunu',
278 | 'şöyle',
279 | 'şayet',
280 | 'şimdi',
281 | 'şu',
282 | 'şöyle'
283 | ]
284 | export { tur }
285 |
--------------------------------------------------------------------------------
/src/stopwords_ces.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const ces = [
4 | 'a',
5 | 'aby',
6 | 'ahoj',
7 | 'aj',
8 | 'ale',
9 | 'anebo',
10 | 'ani',
11 | 'ano',
12 | 'asi',
13 | 'aspoň',
14 | 'atd',
15 | 'atp',
16 | 'ačkoli',
17 | 'až',
18 | 'bez',
19 | 'beze',
20 | 'blízko',
21 | 'bohužel',
22 | 'brzo',
23 | 'bude',
24 | 'budem',
25 | 'budeme',
26 | 'budete',
27 | 'budeš',
28 | 'budou',
29 | 'budu',
30 | 'by',
31 | 'byl',
32 | 'byla',
33 | 'byli',
34 | 'bylo',
35 | 'byly',
36 | 'bys',
37 | 'být',
38 | 'během',
39 | 'chce',
40 | 'chceme',
41 | 'chcete',
42 | 'chceš',
43 | 'chci',
44 | 'chtít',
45 | 'chtějí',
46 | 'chut\'',
47 | 'chuti',
48 | 'co',
49 | 'což',
50 | 'cz',
51 | 'daleko',
52 | 'další',
53 | 'den',
54 | 'deset',
55 | 'devatenáct',
56 | 'devět',
57 | 'dnes',
58 | 'do',
59 | 'dobrý',
60 | 'docela',
61 | 'dva',
62 | 'dvacet',
63 | 'dvanáct',
64 | 'dvě',
65 | 'dál',
66 | 'dále',
67 | 'děkovat',
68 | 'děkujeme',
69 | 'děkuji',
70 | 'ho',
71 | 'hodně',
72 | 'i',
73 | 'jak',
74 | 'jakmile',
75 | 'jako',
76 | 'jakož',
77 | 'jde',
78 | 'je',
79 | 'jeden',
80 | 'jedenáct',
81 | 'jedna',
82 | 'jedno',
83 | 'jednou',
84 | 'jedou',
85 | 'jeho',
86 | 'jehož',
87 | 'jej',
88 | 'jejich',
89 | 'její',
90 | 'jelikož',
91 | 'jemu',
92 | 'jen',
93 | 'jenom',
94 | 'jestli',
95 | 'jestliže',
96 | 'ještě',
97 | 'jež',
98 | 'ji',
99 | 'jich',
100 | 'jimi',
101 | 'jinak',
102 | 'jiné',
103 | 'již',
104 | 'jsem',
105 | 'jseš',
106 | 'jsi',
107 | 'jsme',
108 | 'jsou',
109 | 'jste',
110 | 'já',
111 | 'jí',
112 | 'jím',
113 | 'jíž',
114 | 'k',
115 | 'kam',
116 | 'kde',
117 | 'kdo',
118 | 'kdy',
119 | 'když',
120 | 'ke',
121 | 'kolik',
122 | 'kromě',
123 | 'kterou',
124 | 'která',
125 | 'které',
126 | 'který',
127 | 'kteří',
128 | 'kvůli',
129 | 'mají',
130 | 'mezi',
131 | 'mi',
132 | 'mne',
133 | 'mnou',
134 | 'mně',
135 | 'moc',
136 | 'mohl',
137 | 'mohou',
138 | 'moje',
139 | 'moji',
140 | 'možná',
141 | 'musí',
142 | 'my',
143 | 'má',
144 | 'málo',
145 | 'mám',
146 | 'máme',
147 | 'máte',
148 | 'máš',
149 | 'mé',
150 | 'mí',
151 | 'mít',
152 | 'mě',
153 | 'můj',
154 | 'může',
155 | 'na',
156 | 'nad',
157 | 'nade',
158 | 'napište',
159 | 'naproti',
160 | 'načež',
161 | 'naše',
162 | 'naši',
163 | 'ne',
164 | 'nebo',
165 | 'nebyl',
166 | 'nebyla',
167 | 'nebyli',
168 | 'nebyly',
169 | 'nedělají',
170 | 'nedělá',
171 | 'nedělám',
172 | 'neděláme',
173 | 'neděláte',
174 | 'neděláš',
175 | 'neg',
176 | 'nejsi',
177 | 'nejsou',
178 | 'nemají',
179 | 'nemáme',
180 | 'nemáte',
181 | 'neměl',
182 | 'není',
183 | 'nestačí',
184 | 'nevadí',
185 | 'než',
186 | 'nic',
187 | 'nich',
188 | 'nimi',
189 | 'nové',
190 | 'nový',
191 | 'nula',
192 | 'nám',
193 | 'námi',
194 | 'nás',
195 | 'náš',
196 | 'ním',
197 | 'ně',
198 | 'něco',
199 | 'nějak',
200 | 'někde',
201 | 'někdo',
202 | 'němu',
203 | 'němuž',
204 | 'o',
205 | 'od',
206 | 'ode',
207 | 'on',
208 | 'ona',
209 | 'oni',
210 | 'ono',
211 | 'ony',
212 | 'osm',
213 | 'osmnáct',
214 | 'pak',
215 | 'patnáct',
216 | 'po',
217 | 'pod',
218 | 'podle',
219 | 'pokud',
220 | 'potom',
221 | 'pouze',
222 | 'pozdě',
223 | 'pořád',
224 | 'pravé',
225 | 'pro',
226 | 'prostě',
227 | 'prosím',
228 | 'proti',
229 | 'proto',
230 | 'protože',
231 | 'proč',
232 | 'první',
233 | 'pta',
234 | 'pět',
235 | 'před',
236 | 'přes',
237 | 'přese',
238 | 'při',
239 | 'přičemž',
240 | 're',
241 | 'rovně',
242 | 's',
243 | 'se',
244 | 'sedm',
245 | 'sedmnáct',
246 | 'si',
247 | 'skoro',
248 | 'smí',
249 | 'smějí',
250 | 'snad',
251 | 'spolu',
252 | 'sta',
253 | 'sto',
254 | 'strana',
255 | 'sté',
256 | 'své',
257 | 'svých',
258 | 'svým',
259 | 'svými',
260 | 'ta',
261 | 'tady',
262 | 'tak',
263 | 'takhle',
264 | 'taky',
265 | 'také',
266 | 'takže',
267 | 'tam',
268 | 'tamhle',
269 | 'tamhleto',
270 | 'tamto',
271 | 'tato',
272 | 'tebe',
273 | 'tebou',
274 | 'ted\'',
275 | 'tedy',
276 | 'ten',
277 | 'tento',
278 | 'teto',
279 | 'ti',
280 | 'tipy',
281 | 'tisíc',
282 | 'tisíce',
283 | 'to',
284 | 'tobě',
285 | 'tohle',
286 | 'toho',
287 | 'tohoto',
288 | 'tom',
289 | 'tomto',
290 | 'tomu',
291 | 'tomuto',
292 | 'toto',
293 | 'trošku',
294 | 'tu',
295 | 'tuto',
296 | 'tvoje',
297 | 'tvá',
298 | 'tvé',
299 | 'tvůj',
300 | 'ty',
301 | 'tyto',
302 | 'téma',
303 | 'tím',
304 | 'tímto',
305 | 'tě',
306 | 'těm',
307 | 'těmu',
308 | 'třeba',
309 | 'tři',
310 | 'třináct',
311 | 'u',
312 | 'určitě',
313 | 'už',
314 | 'v',
315 | 'vaše',
316 | 'vaši',
317 | 've',
318 | 'vedle',
319 | 'večer',
320 | 'vlastně',
321 | 'vy',
322 | 'vám',
323 | 'vámi',
324 | 'vás',
325 | 'váš',
326 | 'více',
327 | 'však',
328 | 'všechno',
329 | 'všichni',
330 | 'vůbec',
331 | 'vždy',
332 | 'z',
333 | 'za',
334 | 'zatímco',
335 | 'zač',
336 | 'zda',
337 | 'zde',
338 | 'ze',
339 | 'zprávy',
340 | 'zpět',
341 | 'čau',
342 | 'či',
343 | 'článku',
344 | 'články',
345 | 'čtrnáct',
346 | 'čtyři',
347 | 'šest',
348 | 'šestnáct',
349 | 'že'
350 | ]
351 | export { ces }
352 |
--------------------------------------------------------------------------------
/src/stopwords_pol.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2013, Paweł Łaskarzewski
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | // list based on: http://pl.wikipedia.org/wiki/Wikipedia:Stopwords
24 |
25 | const pol = [
26 | 'a',
27 | 'aby',
28 | 'ach',
29 | 'acz',
30 | 'aczkolwiek',
31 | 'aj',
32 | 'albo',
33 | 'ale',
34 | 'ależ',
35 | 'ani',
36 | 'aż',
37 | 'bardziej',
38 | 'bardzo',
39 | 'bo',
40 | 'bowiem',
41 | 'by',
42 | 'byli',
43 | 'bynajmniej',
44 | 'być',
45 | 'był',
46 | 'była',
47 | 'było',
48 | 'były',
49 | 'będzie',
50 | 'będą',
51 | 'cali',
52 | 'cała',
53 | 'cały',
54 | 'ci',
55 | 'cię',
56 | 'ciebie',
57 | 'co',
58 | 'cokolwiek',
59 | 'coś',
60 | 'czasami',
61 | 'czasem',
62 | 'czemu',
63 | 'czy',
64 | 'czyli',
65 | 'daleko',
66 | 'dla',
67 | 'dlaczego',
68 | 'dlatego',
69 | 'do',
70 | 'dobrze',
71 | 'dokąd',
72 | 'dość',
73 | 'dużo',
74 | 'dwa',
75 | 'dwaj',
76 | 'dwie',
77 | 'dwoje',
78 | 'dziś',
79 | 'dzisiaj',
80 | 'gdy',
81 | 'gdyby',
82 | 'gdyż',
83 | 'gdzie',
84 | 'gdziekolwiek',
85 | 'gdzieś',
86 | 'i',
87 | 'ich',
88 | 'ile',
89 | 'im',
90 | 'inna',
91 | 'inne',
92 | 'inny',
93 | 'innych',
94 | 'iż',
95 | 'ja',
96 | 'ją',
97 | 'jak',
98 | 'jakaś',
99 | 'jakby',
100 | 'jaki',
101 | 'jakichś',
102 | 'jakie',
103 | 'jakiś',
104 | 'jakiż',
105 | 'jakkolwiek',
106 | 'jako',
107 | 'jakoś',
108 | 'je',
109 | 'jeden',
110 | 'jedna',
111 | 'jedno',
112 | 'jednak',
113 | 'jednakże',
114 | 'jego',
115 | 'jej',
116 | 'jemu',
117 | 'jest',
118 | 'jestem',
119 | 'jeszcze',
120 | 'jeśli',
121 | 'jeżeli',
122 | 'już',
123 | 'ją',
124 | 'każdy',
125 | 'kiedy',
126 | 'kilka',
127 | 'kimś',
128 | 'kto',
129 | 'ktokolwiek',
130 | 'ktoś',
131 | 'która',
132 | 'które',
133 | 'którego',
134 | 'której',
135 | 'który',
136 | 'których',
137 | 'którym',
138 | 'którzy',
139 | 'ku',
140 | 'lat',
141 | 'lecz',
142 | 'lub',
143 | 'ma',
144 | 'mają',
145 | 'mało',
146 | 'mam',
147 | 'mi',
148 | 'mimo',
149 | 'między',
150 | 'mną',
151 | 'mnie',
152 | 'mogą',
153 | 'moi',
154 | 'moim',
155 | 'moja',
156 | 'moje',
157 | 'może',
158 | 'możliwe',
159 | 'można',
160 | 'mój',
161 | 'mu',
162 | 'musi',
163 | 'my',
164 | 'na',
165 | 'nad',
166 | 'nam',
167 | 'nami',
168 | 'nas',
169 | 'nasi',
170 | 'nasz',
171 | 'nasza',
172 | 'nasze',
173 | 'naszego',
174 | 'naszych',
175 | 'natomiast',
176 | 'natychmiast',
177 | 'nawet',
178 | 'nią',
179 | 'nic',
180 | 'nich',
181 | 'nie',
182 | 'niech',
183 | 'niego',
184 | 'niej',
185 | 'niemu',
186 | 'nigdy',
187 | 'nim',
188 | 'nimi',
189 | 'niż',
190 | 'no',
191 | 'o',
192 | 'obok',
193 | 'od',
194 | 'około',
195 | 'on',
196 | 'ona',
197 | 'one',
198 | 'oni',
199 | 'ono',
200 | 'oraz',
201 | 'oto',
202 | 'owszem',
203 | 'pan',
204 | 'pana',
205 | 'pani',
206 | 'po',
207 | 'pod',
208 | 'podczas',
209 | 'pomimo',
210 | 'ponad',
211 | 'ponieważ',
212 | 'powinien',
213 | 'powinna',
214 | 'powinni',
215 | 'powinno',
216 | 'poza',
217 | 'prawie',
218 | 'przecież',
219 | 'przed',
220 | 'przede',
221 | 'przedtem',
222 | 'przez',
223 | 'przy',
224 | 'roku',
225 | 'również',
226 | 'sam',
227 | 'sama',
228 | 'są',
229 | 'się',
230 | 'skąd',
231 | 'sobie',
232 | 'sobą',
233 | 'sposób',
234 | 'swoje',
235 | 'ta',
236 | 'tak',
237 | 'taka',
238 | 'taki',
239 | 'takie',
240 | 'także',
241 | 'tam',
242 | 'te',
243 | 'tego',
244 | 'tej',
245 | 'temu',
246 | 'ten',
247 | 'teraz',
248 | 'też',
249 | 'to',
250 | 'tobą',
251 | 'tobie',
252 | 'toteż',
253 | 'trzeba',
254 | 'tu',
255 | 'tutaj',
256 | 'twoi',
257 | 'twoim',
258 | 'twoja',
259 | 'twoje',
260 | 'twym',
261 | 'twój',
262 | 'ty',
263 | 'tych',
264 | 'tylko',
265 | 'tym',
266 | 'u',
267 | 'w',
268 | 'wam',
269 | 'wami',
270 | 'was',
271 | 'wasz',
272 | 'zaś',
273 | 'wasza',
274 | 'wasze',
275 | 'we',
276 | 'według',
277 | 'wiele',
278 | 'wielu',
279 | 'więc',
280 | 'więcej',
281 | 'tę',
282 | 'wszyscy',
283 | 'wszystkich',
284 | 'wszystkie',
285 | 'wszystkim',
286 | 'wszystko',
287 | 'wtedy',
288 | 'wy',
289 | 'właśnie',
290 | 'z',
291 | 'za',
292 | 'zapewne',
293 | 'zawsze',
294 | 'ze',
295 | 'zł',
296 | 'znowu',
297 | 'znów',
298 | 'został',
299 | 'żaden',
300 | 'żadna',
301 | 'żadne',
302 | 'żadnych',
303 | 'że',
304 | 'żeby'
305 | ]
306 | export { pol }
307 |
--------------------------------------------------------------------------------
/src/stopwords_ita.js:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2011, David Przybilla, Chris Umbel
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | */
22 |
23 | const ita = [
24 | 'ad',
25 | 'al',
26 | 'allo',
27 | 'ai',
28 | 'agli',
29 | 'all',
30 | 'agl',
31 | 'alla',
32 | 'alle',
33 | 'con',
34 | 'col',
35 | 'coi',
36 | 'da',
37 | 'dal',
38 | 'dallo',
39 | 'dai',
40 | 'dagli',
41 | 'dall',
42 | 'dagl',
43 | 'dalla',
44 | 'dalle',
45 | 'di',
46 | 'del',
47 | 'dello',
48 | 'dei',
49 | 'degli',
50 | 'dell',
51 | 'degl',
52 | 'della',
53 | 'delle',
54 | 'in',
55 | 'nel',
56 | 'nello',
57 | 'nei',
58 | 'negli',
59 | 'nell',
60 | 'negl',
61 | 'nella',
62 | 'nelle',
63 | 'su',
64 | 'sul',
65 | 'sullo',
66 | 'sui',
67 | 'sugli',
68 | 'sull',
69 | 'sugl',
70 | 'sulla',
71 | 'sulle',
72 | 'per',
73 | 'tra',
74 | 'contro',
75 | 'io',
76 | 'tu',
77 | 'lui',
78 | 'lei',
79 | 'noi',
80 | 'voi',
81 | 'loro',
82 | 'mio',
83 | 'mia',
84 | 'miei',
85 | 'mie',
86 | 'tuo',
87 | 'tua',
88 | 'tuoi',
89 | 'tue',
90 | 'suo',
91 | 'sua',
92 | 'suoi',
93 | 'sue',
94 | 'nostro',
95 | 'nostra',
96 | 'nostri',
97 | 'nostre',
98 | 'vostro',
99 | 'vostra',
100 | 'vostri',
101 | 'vostre',
102 | 'mi',
103 | 'ti',
104 | 'ci',
105 | 'vi',
106 | 'lo',
107 | 'la',
108 | 'li',
109 | 'le',
110 | 'gli',
111 | 'ne',
112 | 'il',
113 | 'un',
114 | 'uno',
115 | 'una',
116 | 'ma',
117 | 'ed',
118 | 'se',
119 | 'perché',
120 | 'anche',
121 | 'come',
122 | 'dov',
123 | 'dove',
124 | 'che',
125 | 'chi',
126 | 'cui',
127 | 'non',
128 | 'più',
129 | 'quale',
130 | 'quanto',
131 | 'quanti',
132 | 'quanta',
133 | 'quante',
134 | 'quello',
135 | 'quelli',
136 | 'quella',
137 | 'quelle',
138 | 'questo',
139 | 'questi',
140 | 'questa',
141 | 'queste',
142 | 'si',
143 | 'tutto',
144 | 'tutti',
145 | 'a',
146 | 'c',
147 | 'e',
148 | 'i',
149 | 'l',
150 | 'o',
151 | 'ho',
152 | 'hai',
153 | 'ha',
154 | 'abbiamo',
155 | 'avete',
156 | 'hanno',
157 | 'abbia',
158 | 'abbiate',
159 | 'abbiano',
160 | 'avrò',
161 | 'avrai',
162 | 'avrà',
163 | 'avremo',
164 | 'avrete',
165 | 'avranno',
166 | 'avrei',
167 | 'avresti',
168 | 'avrebbe',
169 | 'avremmo',
170 | 'avreste',
171 | 'avrebbero',
172 | 'avevo',
173 | 'avevi',
174 | 'aveva',
175 | 'avevamo',
176 | 'avevate',
177 | 'avevano',
178 | 'ebbi',
179 | 'avesti',
180 | 'ebbe',
181 | 'avemmo',
182 | 'aveste',
183 | 'ebbero',
184 | 'avessi',
185 | 'avesse',
186 | 'avessimo',
187 | 'avessero',
188 | 'avendo',
189 | 'avuto',
190 | 'avuta',
191 | 'avuti',
192 | 'avute',
193 | 'sono',
194 | 'sei',
195 | 'è',
196 | 'siamo',
197 | 'siete',
198 | 'sia',
199 | 'siate',
200 | 'siano',
201 | 'sarò',
202 | 'sarai',
203 | 'sarà',
204 | 'saremo',
205 | 'sarete',
206 | 'saranno',
207 | 'sarei',
208 | 'saresti',
209 | 'sarebbe',
210 | 'saremmo',
211 | 'sareste',
212 | 'sarebbero',
213 | 'ero',
214 | 'eri',
215 | 'era',
216 | 'eravamo',
217 | 'eravate',
218 | 'erano',
219 | 'fui',
220 | 'fosti',
221 | 'fu',
222 | 'fummo',
223 | 'foste',
224 | 'furono',
225 | 'fossi',
226 | 'fosse',
227 | 'fossimo',
228 | 'fossero',
229 | 'essendo',
230 | 'faccio',
231 | 'fai',
232 | 'facciamo',
233 | 'fanno',
234 | 'faccia',
235 | 'facciate',
236 | 'facciano',
237 | 'farò',
238 | 'farai',
239 | 'farà',
240 | 'faremo',
241 | 'farete',
242 | 'faranno',
243 | 'farei',
244 | 'faresti',
245 | 'farebbe',
246 | 'faremmo',
247 | 'fareste',
248 | 'farebbero',
249 | 'facevo',
250 | 'facevi',
251 | 'faceva',
252 | 'facevamo',
253 | 'facevate',
254 | 'facevano',
255 | 'feci',
256 | 'facesti',
257 | 'fece',
258 | 'facemmo',
259 | 'faceste',
260 | 'fecero',
261 | 'facessi',
262 | 'facesse',
263 | 'facessimo',
264 | 'facessero',
265 | 'facendo',
266 | 'sto',
267 | 'stai',
268 | 'sta',
269 | 'stiamo',
270 | 'stanno',
271 | 'stia',
272 | 'stiate',
273 | 'stiano',
274 | 'starò',
275 | 'starai',
276 | 'starà',
277 | 'staremo',
278 | 'starete',
279 | 'staranno',
280 | 'starei',
281 | 'staresti',
282 | 'starebbe',
283 | 'staremmo',
284 | 'stareste',
285 | 'starebbero',
286 | 'stavo',
287 | 'stavi',
288 | 'stava',
289 | 'stavamo',
290 | 'stavate',
291 | 'stavano',
292 | 'stetti',
293 | 'stesti',
294 | 'stette',
295 | 'stemmo',
296 | 'steste',
297 | 'stettero',
298 | 'stessi',
299 | 'stesse',
300 | 'stessimo',
301 | 'stessero',
302 | 'stando'
303 | ]
304 | export { ita }
305 |
--------------------------------------------------------------------------------
/src/stopwords_mya.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2019 Kyaw-Zin-Thant
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the 'Software'), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | */
24 |
25 | const mya = [
26 | 'အပေါ်',
27 | 'အနက်',
28 | 'အမြဲတမ်း',
29 | 'အတွင်းတွင်',
30 | 'မကြာမီ',
31 | 'မတိုင်မီ',
32 | 'ဒါ့အပြင်',
33 | 'အောက်မှာ',
34 | 'အထဲမှာ',
35 | 'ဘယ်တော့မျှ',
36 | 'မကြာခဏ',
37 | 'တော်တော်လေး',
38 | 'စဉ်တွင်',
39 | 'နှင့်အတူ',
40 | 'နှင့်',
41 | 'နှင့်တကွ',
42 | 'ကျွန်တော်',
43 | 'ကျွန်မ',
44 | 'ငါ',
45 | 'ကျုပ်',
46 | 'ကျွနု်ပ်',
47 | 'ကျနော်',
48 | 'ကျမ',
49 | 'သူ',
50 | 'သူမ',
51 | 'ထိုဟာ',
52 | 'ထိုအရာ',
53 | 'ဤအရာ',
54 | 'ထို',
55 | '၄င်း',
56 | 'ကျွန်တော်တို့',
57 | 'ကျွန်မတို့',
58 | 'ငါတို့',
59 | 'ကျုပ်တို့',
60 | 'ကျွနု်ပ်တို့',
61 | 'ကျနော်တို့',
62 | 'ကျမတို့',
63 | 'သင်',
64 | 'သင်တို့',
65 | 'နင်တို့',
66 | 'မင်း',
67 | 'မင်းတို့',
68 | 'သူတို့',
69 | 'ကျွန်တော်အား',
70 | 'ကျွန်တော်ကို',
71 | 'ကျွန်မကို',
72 | 'ငါကို',
73 | 'ကျုပ်ကို',
74 | 'ကျွနု်ပ်ကို',
75 | 'သူ့ကို',
76 | 'သူမကို',
77 | 'ထိုအရာကို',
78 | 'သင့်ကို',
79 | 'သင်တို့ကို',
80 | 'နင်တို့ကို',
81 | 'မင်းကို',
82 | 'မင်းတို့ကို',
83 | 'ငါတို့ကို',
84 | 'ကျုပ်တို့ကို',
85 | 'ကျွနု်ပ်တို့ကို',
86 | 'မိမိကိုယ်တိုင်',
87 | 'မိမိဘာသာ',
88 | 'မင်းကိုယ်တိုင်',
89 | 'မင်းဘာသာ',
90 | 'မင်းတို့ကိုယ်တိုင်',
91 | 'မင်းတို့ဘာသာ',
92 | 'သူကိုယ်တိုင်',
93 | 'ကိုယ်တိုင်',
94 | 'သူမကိုယ်တိုင်',
95 | 'သူ့ဘာသာ',
96 | 'သူ့ကိုယ်ကို',
97 | 'ကိုယ့်ကိုယ်ကို',
98 | 'မိမိကိုယ်ကို',
99 | '၄င်းပင်',
100 | 'ထိုအရာပင်',
101 | 'သည့်',
102 | 'မည့်',
103 | 'တဲ့',
104 | 'ကျွနု်ပ်၏',
105 | 'ကျွန်တော်၏',
106 | 'ကျွန်မ၏',
107 | 'ကျနော်၏',
108 | 'ကျမ၏',
109 | 'သူ၏',
110 | 'သူမ၏',
111 | 'ထိုအရာ၏',
112 | 'ထိုဟာ၏',
113 | 'ကျွနု်ပ်တို့၏',
114 | 'ငါတို့၏',
115 | 'ကျွန်တော်တို့၏',
116 | 'ကျွန်မတို့၏',
117 | 'ကျနော်တို့၏',
118 | 'ကျမတို့၏',
119 | 'သင်၏',
120 | 'သင်တို့၏',
121 | 'မင်း၏',
122 | 'မင်းတို့၏',
123 | 'သူတို့၏',
124 | 'ကျွန်တော့်ဟာ',
125 | 'ကျွန်မဟာ',
126 | 'ကျနော်၏ဟာ',
127 | 'ကျမ၏ဟာ',
128 | 'ကျမဟာ',
129 | 'ကျနော်ဟာ',
130 | 'သူဟာ',
131 | 'သူမဟာ',
132 | 'သူ့ဟာ',
133 | 'ကျွနု်ပ်တို့ဟာ',
134 | 'ကျွန်တော်တို့ဟာ',
135 | 'ကျွန်မတို့ဟာ',
136 | 'သင်တို့ဟာ',
137 | 'မင်းတို့ဟာ',
138 | 'သူတို့ဟာ',
139 | 'သူမတို့ဟာ',
140 | 'ဤအရာ',
141 | 'ဟောဒါ',
142 | 'ဟောဒီ',
143 | 'ဟောဒီဟာ',
144 | 'ဒီဟာ',
145 | 'ဒါ',
146 | 'ထိုအရာ',
147 | '၄င်းအရာ',
148 | 'ယင်းအရာ',
149 | 'အဲဒါ',
150 | 'ဟိုဟာ',
151 | 'အချို့',
152 | 'တစ်ခုခု',
153 | 'အဘယ်မဆို',
154 | 'ဘယ်အရာမဆို',
155 | 'အဘယ်မည်သော',
156 | 'အကြင်',
157 | 'အရာရာတိုင်း',
158 | 'စိုးစဉ်မျှ',
159 | 'စိုးစဉ်းမျှ',
160 | 'ဘယ်လောက်မဆို',
161 | 'တစ်စုံတစ်ရာ',
162 | 'တစုံတရာ',
163 | 'အလျဉ်းမဟုတ်',
164 | 'မည်သည့်နည်းနှင့်မျှမဟုတ်',
165 | 'အလျဉ်းမရှိသော',
166 | 'အခြားဖြစ်သော',
167 | 'အခြားသော',
168 | 'အခြားတစ်ခု',
169 | 'အခြားတစ်ယောက်',
170 | 'အားလုံး',
171 | 'အရာရာတိုင်း',
172 | 'အကုန်လုံး',
173 | 'အလုံးစုံ',
174 | 'အရာခပ်သိမ်း',
175 | 'တစ်ခုစီ',
176 | 'အသီးသီး',
177 | 'တစ်ဦးဦး',
178 | 'တစ်ခုခု',
179 | 'ကိုယ်စီကိုယ်ငှ',
180 | 'ကိုယ်စီ',
181 | 'တစ်ဦးစီ',
182 | 'တစ်ယောက်စီ',
183 | 'တစ်ခုစီ',
184 | 'အကုန်',
185 | 'အပြည့်အစုံ',
186 | 'လုံးလုံး',
187 | 'နှစ်ခုလုံး',
188 | 'နှစ်ယောက်လုံး',
189 | 'နှစ်ဘက်လုံး',
190 | 'တစ်စုံတစ်ရာ',
191 | 'တစ်စုံတစ်ခု',
192 | 'တစုံတခု',
193 | 'တစ်စုံတစ်ယောက်',
194 | 'တစုံတယောက်',
195 | 'တစ်ယောက်ယောက်',
196 | 'မည်သူမဆို',
197 | 'ဘာမျှမရှိ',
198 | 'ဘာမှမရှိ',
199 | 'အဘယ်အရာမျှမရှိ',
200 | 'လူတိုင်း',
201 | 'လူတကာ',
202 | 'နှင့်',
203 | 'ပြီးလျှင်',
204 | '၄င်းနောက်',
205 | 'သို့မဟုတ်',
206 | 'သို့တည်းမဟုတ်',
207 | 'သို့မဟုတ်လျှင်',
208 | 'ဒါမှမဟုတ်',
209 | 'ဖြစ်စေ',
210 | 'သို့စေကာမူ',
211 | 'ဒါပေမယ့်',
212 | 'ဒါပေမဲ့',
213 | 'မှတစ်ပါး',
214 | 'မှလွဲလျှင်',
215 | 'အဘယ်ကြောင့်ဆိုသော်',
216 | 'သောကြောင့်',
217 | 'သဖြင့်',
218 | '၍',
219 | 'သည့်အတွက်ကြောင့်',
220 | 'လျှင်',
221 | 'ပါက',
222 | 'အကယ်၍',
223 | 'သော်ငြားလည်း',
224 | 'စေကာမူ',
225 | 'နည်းတူ',
226 | 'ပေမယ့်',
227 | 'ပေမဲ့',
228 | 'ထိုနည်းတူစွာ',
229 | 'ထိုနည်းတူ',
230 | 'ကဲ့သို့',
231 | 'သကဲ့သို့',
232 | 'ယင်းကဲ့သို့',
233 | 'ထိုကဲ့သို့',
234 | 'နှင့်စပ်လျဉ်း၍',
235 | 'ဤမျှ',
236 | 'ဤမျှလောက်',
237 | 'ဤကဲ့သို့',
238 | 'အခုလောက်ထိ',
239 | 'ဒါကတော့',
240 | 'အဘယ်ကဲ့သလို့',
241 | 'မည်ကဲ့သို့',
242 | 'မည်သည့်နည်းနှင့်',
243 | 'မည်သည့်နည်းဖြင့်',
244 | 'မည်သည့်နည့်နှင့်မဆို',
245 | 'မည်သည့်နည်းဖြင့်မဆို',
246 | 'မည်သို့',
247 | 'ဘယ်လိုလဲ',
248 | 'သို့ပေတည့်',
249 | 'သို့ပေမည့်',
250 | 'ဘယ်နည်းနှင့်',
251 | 'မည်ရွေ့မည်မျှ',
252 | 'အဘယ်မျှလောက်',
253 | 'ဘယ်လောက်',
254 | 'မည်သူ',
255 | 'ဘယ်သူ',
256 | 'မည်သည့်အကြောင်းကြောင့်',
257 | 'ဘာအတွက်ကြောင့်',
258 | 'အဘယ်ကြောင့်',
259 | 'မည်သည့်အတွက်ကြောင့်',
260 | 'ဘာကြောင့်',
261 | 'ဘာအတွက်နဲ့လဲ',
262 | 'မည်သည်',
263 | 'ဘာလဲ',
264 | 'အဘယ်အရာနည်း',
265 | 'မည်သည့်အရပ်မှာ',
266 | 'ဘယ်နေရာတွင်',
267 | 'မည်သည့်နေရာတွင်',
268 | 'မည်သည့်နေရာသို့',
269 | 'ဘယ်နေရာသို့',
270 | 'ဘယ်နေရာမှာ',
271 | 'ဘယ်သူ၏',
272 | 'မည်သည့်အရာ၏',
273 | 'မည်သည့်အခါ',
274 | 'ဘယ်အချိန်',
275 | 'ဘယ်အခါ',
276 | 'မည်သည့်အချိန်',
277 | 'ဘယ်တော့',
278 | 'မည်သူကို',
279 | 'မည်သူက',
280 | 'ဘယ်သူ့ကို',
281 | 'မည်သူမည်ဝါ',
282 | 'မည်သည့်အရာ',
283 | 'ဘယ်အရာ',
284 | 'မည်သို့ပင်ဖြစ်စေ',
285 | 'ဘယ်လိုပဲဖြစ်ဖြစ်',
286 | 'မည်ရွေ့မည်မျှဖြစ်စေ',
287 | 'မည်သည့်နည်းနှင့်မဆို',
288 | 'ဘယ်နည်းနဲ့ဖြစ်ဖြစ်',
289 | 'မည်သူမဆို',
290 | 'ဘယ်သူမဆို',
291 | 'အဘယ်သူမဆို',
292 | 'မည်သည့်အရာမဆို',
293 | 'ဘာဖြစ်ဖြစ်',
294 | 'မည်သည့်အရာဖြစ်ဖြစ်',
295 | 'မည်သည့်အရပ်၌မဆို',
296 | 'မည်သည့်နေရာမဆို',
297 | 'ဘယ်အခါမဆို',
298 | 'ဘယ်အချိန်မဆို',
299 | 'ဘယ်အခါဖြစ်ဖြစ်',
300 | 'အချိန်အခါမရွေး'
301 | ]
302 | export { mya }
303 |
--------------------------------------------------------------------------------
/src/stopwords_slv.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const slv = [
4 | 'a',
5 | 'ali',
6 | 'april',
7 | 'avgust',
8 | 'b',
9 | 'bi',
10 | 'bil',
11 | 'bila',
12 | 'bile',
13 | 'bili',
14 | 'bilo',
15 | 'biti',
16 | 'blizu',
17 | 'bo',
18 | 'bodo',
19 | 'bojo',
20 | 'bolj',
21 | 'bom',
22 | 'bomo',
23 | 'boste',
24 | 'bova',
25 | 'boš',
26 | 'brez',
27 | 'c',
28 | 'cel',
29 | 'cela',
30 | 'celi',
31 | 'celo',
32 | 'd',
33 | 'da',
34 | 'daleč',
35 | 'dan',
36 | 'danes',
37 | 'datum',
38 | 'december',
39 | 'deset',
40 | 'deseta',
41 | 'deseti',
42 | 'deseto',
43 | 'devet',
44 | 'deveta',
45 | 'deveti',
46 | 'deveto',
47 | 'do',
48 | 'dober',
49 | 'dobra',
50 | 'dobri',
51 | 'dobro',
52 | 'dokler',
53 | 'dol',
54 | 'dolg',
55 | 'dolga',
56 | 'dolgi',
57 | 'dovolj',
58 | 'drug',
59 | 'druga',
60 | 'drugi',
61 | 'drugo',
62 | 'dva',
63 | 'dve',
64 | 'e',
65 | 'eden',
66 | 'en',
67 | 'ena',
68 | 'ene',
69 | 'eni',
70 | 'enkrat',
71 | 'eno',
72 | 'etc.',
73 | 'f',
74 | 'februar',
75 | 'g',
76 | 'g.',
77 | 'ga',
78 | 'ga.',
79 | 'gor',
80 | 'gospa',
81 | 'gospod',
82 | 'h',
83 | 'halo',
84 | 'i',
85 | 'idr.',
86 | 'ii',
87 | 'iii',
88 | 'in',
89 | 'iv',
90 | 'ix',
91 | 'iz',
92 | 'j',
93 | 'januar',
94 | 'jaz',
95 | 'je',
96 | 'ji',
97 | 'jih',
98 | 'jim',
99 | 'jo',
100 | 'julij',
101 | 'junij',
102 | 'jutri',
103 | 'k',
104 | 'kadarkoli',
105 | 'kaj',
106 | 'kajti',
107 | 'kako',
108 | 'kakor',
109 | 'kamor',
110 | 'kamorkoli',
111 | 'kar',
112 | 'karkoli',
113 | 'katerikoli',
114 | 'kdaj',
115 | 'kdo',
116 | 'kdorkoli',
117 | 'ker',
118 | 'ki',
119 | 'kje',
120 | 'kjer',
121 | 'kjerkoli',
122 | 'ko',
123 | 'koder',
124 | 'koderkoli',
125 | 'koga',
126 | 'komu',
127 | 'kot',
128 | 'kratek',
129 | 'kratka',
130 | 'kratke',
131 | 'kratki',
132 | 'l',
133 | 'lahka',
134 | 'lahke',
135 | 'lahki',
136 | 'lahko',
137 | 'le',
138 | 'lep',
139 | 'lepa',
140 | 'lepe',
141 | 'lepi',
142 | 'lepo',
143 | 'leto',
144 | 'm',
145 | 'maj',
146 | 'majhen',
147 | 'majhna',
148 | 'majhni',
149 | 'malce',
150 | 'malo',
151 | 'manj',
152 | 'marec',
153 | 'me',
154 | 'med',
155 | 'medtem',
156 | 'mene',
157 | 'mesec',
158 | 'mi',
159 | 'midva',
160 | 'midve',
161 | 'mnogo',
162 | 'moj',
163 | 'moja',
164 | 'moje',
165 | 'mora',
166 | 'morajo',
167 | 'moram',
168 | 'moramo',
169 | 'morate',
170 | 'moraš',
171 | 'morem',
172 | 'mu',
173 | 'n',
174 | 'na',
175 | 'nad',
176 | 'naj',
177 | 'najina',
178 | 'najino',
179 | 'najmanj',
180 | 'naju',
181 | 'največ',
182 | 'nam',
183 | 'narobe',
184 | 'nas',
185 | 'nato',
186 | 'nazaj',
187 | 'naš',
188 | 'naša',
189 | 'naše',
190 | 'ne',
191 | 'nedavno',
192 | 'nedelja',
193 | 'nek',
194 | 'neka',
195 | 'nekaj',
196 | 'nekatere',
197 | 'nekateri',
198 | 'nekatero',
199 | 'nekdo',
200 | 'neke',
201 | 'nekega',
202 | 'neki',
203 | 'nekje',
204 | 'neko',
205 | 'nekoga',
206 | 'nekoč',
207 | 'ni',
208 | 'nikamor',
209 | 'nikdar',
210 | 'nikjer',
211 | 'nikoli',
212 | 'nič',
213 | 'nje',
214 | 'njega',
215 | 'njegov',
216 | 'njegova',
217 | 'njegovo',
218 | 'njej',
219 | 'njemu',
220 | 'njen',
221 | 'njena',
222 | 'njeno',
223 | 'nji',
224 | 'njih',
225 | 'njihov',
226 | 'njihova',
227 | 'njihovo',
228 | 'njiju',
229 | 'njim',
230 | 'njo',
231 | 'njun',
232 | 'njuna',
233 | 'njuno',
234 | 'no',
235 | 'nocoj',
236 | 'november',
237 | 'npr.',
238 | 'o',
239 | 'ob',
240 | 'oba',
241 | 'obe',
242 | 'oboje',
243 | 'od',
244 | 'odprt',
245 | 'odprta',
246 | 'odprti',
247 | 'okoli',
248 | 'oktober',
249 | 'on',
250 | 'onadva',
251 | 'one',
252 | 'oni',
253 | 'onidve',
254 | 'osem',
255 | 'osma',
256 | 'osmi',
257 | 'osmo',
258 | 'oz.',
259 | 'p',
260 | 'pa',
261 | 'pet',
262 | 'peta',
263 | 'petek',
264 | 'peti',
265 | 'peto',
266 | 'po',
267 | 'pod',
268 | 'pogosto',
269 | 'poleg',
270 | 'poln',
271 | 'polna',
272 | 'polni',
273 | 'polno',
274 | 'ponavadi',
275 | 'ponedeljek',
276 | 'ponovno',
277 | 'potem',
278 | 'povsod',
279 | 'pozdravljen',
280 | 'pozdravljeni',
281 | 'prav',
282 | 'prava',
283 | 'prave',
284 | 'pravi',
285 | 'pravo',
286 | 'prazen',
287 | 'prazna',
288 | 'prazno',
289 | 'prbl.',
290 | 'precej',
291 | 'pred',
292 | 'prej',
293 | 'preko',
294 | 'pri',
295 | 'pribl.',
296 | 'približno',
297 | 'primer',
298 | 'pripravljen',
299 | 'pripravljena',
300 | 'pripravljeni',
301 | 'proti',
302 | 'prva',
303 | 'prvi',
304 | 'prvo',
305 | 'r',
306 | 'ravno',
307 | 'redko',
308 | 'res',
309 | 'reč',
310 | 's',
311 | 'saj',
312 | 'sam',
313 | 'sama',
314 | 'same',
315 | 'sami',
316 | 'samo',
317 | 'se',
318 | 'sebe',
319 | 'sebi',
320 | 'sedaj',
321 | 'sedem',
322 | 'sedma',
323 | 'sedmi',
324 | 'sedmo',
325 | 'sem',
326 | 'september',
327 | 'seveda',
328 | 'si',
329 | 'sicer',
330 | 'skoraj',
331 | 'skozi',
332 | 'slab',
333 | 'smo',
334 | 'so',
335 | 'sobota',
336 | 'spet',
337 | 'sreda',
338 | 'srednja',
339 | 'srednji',
340 | 'sta',
341 | 'ste',
342 | 'stran',
343 | 'stvar',
344 | 'sva',
345 | 't',
346 | 'ta',
347 | 'tak',
348 | 'taka',
349 | 'take',
350 | 'taki',
351 | 'tako',
352 | 'takoj',
353 | 'tam',
354 | 'te',
355 | 'tebe',
356 | 'tebi',
357 | 'tega',
358 | 'težak',
359 | 'težka',
360 | 'težki',
361 | 'težko',
362 | 'ti',
363 | 'tista',
364 | 'tiste',
365 | 'tisti',
366 | 'tisto',
367 | 'tj.',
368 | 'tja',
369 | 'to',
370 | 'toda',
371 | 'torek',
372 | 'tretja',
373 | 'tretje',
374 | 'tretji',
375 | 'tri',
376 | 'tu',
377 | 'tudi',
378 | 'tukaj',
379 | 'tvoj',
380 | 'tvoja',
381 | 'tvoje',
382 | 'u',
383 | 'v',
384 | 'vaju',
385 | 'vam',
386 | 'vas',
387 | 'vaš',
388 | 'vaša',
389 | 'vaše',
390 | 've',
391 | 'vedno',
392 | 'velik',
393 | 'velika',
394 | 'veliki',
395 | 'veliko',
396 | 'vendar',
397 | 'ves',
398 | 'več',
399 | 'vi',
400 | 'vidva',
401 | 'vii',
402 | 'viii',
403 | 'visok',
404 | 'visoka',
405 | 'visoke',
406 | 'visoki',
407 | 'vsa',
408 | 'vsaj',
409 | 'vsak',
410 | 'vsaka',
411 | 'vsakdo',
412 | 'vsake',
413 | 'vsaki',
414 | 'vsakomur',
415 | 'vse',
416 | 'vsega',
417 | 'vsi',
418 | 'vso',
419 | 'včasih',
420 | 'včeraj',
421 | 'x',
422 | 'z',
423 | 'za',
424 | 'zadaj',
425 | 'zadnji',
426 | 'zakaj',
427 | 'zaprta',
428 | 'zaprti',
429 | 'zaprto',
430 | 'zdaj',
431 | 'zelo',
432 | 'zunaj',
433 | 'č',
434 | 'če',
435 | 'često',
436 | 'četrta',
437 | 'četrtek',
438 | 'četrti',
439 | 'četrto',
440 | 'čez',
441 | 'čigav',
442 | 'š',
443 | 'šest',
444 | 'šesta',
445 | 'šesti',
446 | 'šesto',
447 | 'štiri',
448 | 'ž',
449 | 'že'
450 | ]
451 | export { slv }
452 |
--------------------------------------------------------------------------------
/src/stopwords_ben.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2016 Gene Diaz
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | */
24 |
25 | const ben = [
26 | 'অতএব',
27 | 'অথচ',
28 | 'অথবা',
29 | 'অনুযায়ী',
30 | 'অনেক',
31 | 'অনেকে',
32 | 'অনেকেই',
33 | 'অন্তত',
34 | 'অন্য',
35 | 'অবধি',
36 | 'অবশ্য',
37 | 'অর্থাত',
38 | 'আই',
39 | 'আগামী',
40 | 'আগে',
41 | 'আগেই',
42 | 'আছে',
43 | 'আজ',
44 | 'আদ্যভাগে',
45 | 'আপনার',
46 | 'আপনি',
47 | 'আবার',
48 | 'আমরা',
49 | 'আমাকে',
50 | 'আমাদের',
51 | 'আমার',
52 | 'আমি',
53 | 'আর',
54 | 'আরও',
55 | 'ই',
56 | 'ইত্যাদি',
57 | 'ইহা',
58 | 'উচিত',
59 | 'উত্তর',
60 | 'উনি',
61 | 'উপর',
62 | 'উপরে',
63 | 'এ',
64 | 'এঁদের',
65 | 'এঁরা',
66 | 'এই',
67 | 'একই',
68 | 'একটি',
69 | 'একবার',
70 | 'একে',
71 | 'এক্',
72 | 'এখন',
73 | 'এখনও',
74 | 'এখানে',
75 | 'এখানেই',
76 | 'এটা',
77 | 'এটাই',
78 | 'এটি',
79 | 'এত',
80 | 'এতটাই',
81 | 'এতে',
82 | 'এদের',
83 | 'এব',
84 | 'এবং',
85 | 'এবার',
86 | 'এমন',
87 | 'এমনকী',
88 | 'এমনি',
89 | 'এর',
90 | 'এরা',
91 | 'এল',
92 | 'এস',
93 | 'এসে',
94 | 'ঐ',
95 | 'ও',
96 | 'ওঁদের',
97 | 'ওঁর',
98 | 'ওঁরা',
99 | 'ওই',
100 | 'ওকে',
101 | 'ওখানে',
102 | 'ওদের',
103 | 'ওর',
104 | 'ওরা',
105 | 'কখনও',
106 | 'কত',
107 | 'কবে',
108 | 'কমনে',
109 | 'কয়েক',
110 | 'কয়েকটি',
111 | 'করছে',
112 | 'করছেন',
113 | 'করতে',
114 | 'করবে',
115 | 'করবেন',
116 | 'করলে',
117 | 'করলেন',
118 | 'করা',
119 | 'করাই',
120 | 'করায়',
121 | 'করার',
122 | 'করি',
123 | 'করিতে',
124 | 'করিয়া',
125 | 'করিয়ে',
126 | 'করে',
127 | 'করেই',
128 | 'করেছিলেন',
129 | 'করেছে',
130 | 'করেছেন',
131 | 'করেন',
132 | 'কাউকে',
133 | 'কাছ',
134 | 'কাছে',
135 | 'কাজ',
136 | 'কাজে',
137 | 'কারও',
138 | 'কারণ',
139 | 'কি',
140 | 'কিংবা',
141 | 'কিছু',
142 | 'কিছুই',
143 | 'কিন্তু',
144 | 'কী',
145 | 'কে',
146 | 'কেউ',
147 | 'কেউই',
148 | 'কেখা',
149 | 'কেন',
150 | 'কোটি',
151 | 'কোন',
152 | 'কোনও',
153 | 'কোনো',
154 | 'ক্ষেত্রে',
155 | 'কয়েক',
156 | 'খুব',
157 | 'গিয়ে',
158 | 'গিয়েছে',
159 | 'গিয়ে',
160 | 'গুলি',
161 | 'গেছে',
162 | 'গেল',
163 | 'গেলে',
164 | 'গোটা',
165 | 'চলে',
166 | 'চান',
167 | 'চায়',
168 | 'চার',
169 | 'চালু',
170 | 'চেয়ে',
171 | 'চেষ্টা',
172 | 'ছাড়া',
173 | 'ছাড়াও',
174 | 'ছিল',
175 | 'ছিলেন',
176 | 'জন',
177 | 'জনকে',
178 | 'জনের',
179 | 'জন্য',
180 | 'জন্যওজে',
181 | 'জানতে',
182 | 'জানা',
183 | 'জানানো',
184 | 'জানায়',
185 | 'জানিয়ে',
186 | 'জানিয়েছে',
187 | 'জে',
188 | 'জ্নজন',
189 | 'টি',
190 | 'ঠিক',
191 | 'তখন',
192 | 'তত',
193 | 'তথা',
194 | 'তবু',
195 | 'তবে',
196 | 'তা',
197 | 'তাঁকে',
198 | 'তাঁদের',
199 | 'তাঁর',
200 | 'তাঁরা',
201 | 'তাঁাহারা',
202 | 'তাই',
203 | 'তাও',
204 | 'তাকে',
205 | 'তাতে',
206 | 'তাদের',
207 | 'তার',
208 | 'তারপর',
209 | 'তারা',
210 | 'তারৈ',
211 | 'তাহলে',
212 | 'তাহা',
213 | 'তাহাতে',
214 | 'তাহার',
215 | 'তিনঐ',
216 | 'তিনি',
217 | 'তিনিও',
218 | 'তুমি',
219 | 'তুলে',
220 | 'তেমন',
221 | 'তো',
222 | 'তোমার',
223 | 'থাকবে',
224 | 'থাকবেন',
225 | 'থাকা',
226 | 'থাকায়',
227 | 'থাকে',
228 | 'থাকেন',
229 | 'থেকে',
230 | 'থেকেই',
231 | 'থেকেও',
232 | 'দিকে',
233 | 'দিতে',
234 | 'দিন',
235 | 'দিয়ে',
236 | 'দিয়েছে',
237 | 'দিয়েছেন',
238 | 'দিলেন',
239 | 'দু',
240 | 'দুই',
241 | 'দুটি',
242 | 'দুটো',
243 | 'দেওয়া',
244 | 'দেওয়ার',
245 | 'দেওয়া',
246 | 'দেখতে',
247 | 'দেখা',
248 | 'দেখে',
249 | 'দেন',
250 | 'দেয়',
251 | 'দ্বারা',
252 | 'ধরা',
253 | 'ধরে',
254 | 'ধামার',
255 | 'নতুন',
256 | 'নয়',
257 | 'না',
258 | 'নাই',
259 | 'নাকি',
260 | 'নাগাদ',
261 | 'নানা',
262 | 'নিজে',
263 | 'নিজেই',
264 | 'নিজেদের',
265 | 'নিজের',
266 | 'নিতে',
267 | 'নিয়ে',
268 | 'নিয়ে',
269 | 'নেই',
270 | 'নেওয়া',
271 | 'নেওয়ার',
272 | 'নেওয়া',
273 | 'নয়',
274 | 'পক্ষে',
275 | 'পর',
276 | 'পরে',
277 | 'পরেই',
278 | 'পরেও',
279 | 'পর্যন্ত',
280 | 'পাওয়া',
281 | 'পাচ',
282 | 'পারি',
283 | 'পারে',
284 | 'পারেন',
285 | 'পি',
286 | 'পেয়ে',
287 | 'পেয়্র্',
288 | 'প্রতি',
289 | 'প্রথম',
290 | 'প্রভৃতি',
291 | 'প্রযন্ত',
292 | 'প্রাথমিক',
293 | 'প্রায়',
294 | 'প্রায়',
295 | 'ফলে',
296 | 'ফিরে',
297 | 'ফের',
298 | 'বক্তব্য',
299 | 'বদলে',
300 | 'বন',
301 | 'বরং',
302 | 'বলতে',
303 | 'বলল',
304 | 'বললেন',
305 | 'বলা',
306 | 'বলে',
307 | 'বলেছেন',
308 | 'বলেন',
309 | 'বসে',
310 | 'বহু',
311 | 'বা',
312 | 'বাদে',
313 | 'বার',
314 | 'বি',
315 | 'বিনা',
316 | 'বিভিন্ন',
317 | 'বিশেষ',
318 | 'বিষয়টি',
319 | 'বেশ',
320 | 'বেশি',
321 | 'ব্যবহার',
322 | 'ব্যাপারে',
323 | 'ভাবে',
324 | 'ভাবেই',
325 | 'মতো',
326 | 'মতোই',
327 | 'মধ্যভাগে',
328 | 'মধ্যে',
329 | 'মধ্যেই',
330 | 'মধ্যেও',
331 | 'মনে',
332 | 'মাত্র',
333 | 'মাধ্যমে',
334 | 'মোট',
335 | 'মোটেই',
336 | 'যখন',
337 | 'যত',
338 | 'যতটা',
339 | 'যথেষ্ট',
340 | 'যদি',
341 | 'যদিও',
342 | 'যা',
343 | 'যাঁর',
344 | 'যাঁরা',
345 | 'যাওয়া',
346 | 'যাওয়ার',
347 | 'যাওয়া',
348 | 'যাকে',
349 | 'যাচ্ছে',
350 | 'যাতে',
351 | 'যাদের',
352 | 'যান',
353 | 'যাবে',
354 | 'যায়',
355 | 'যার',
356 | 'যারা',
357 | 'যিনি',
358 | 'যে',
359 | 'যেখানে',
360 | 'যেতে',
361 | 'যেন',
362 | 'যেমন',
363 | 'র',
364 | 'রকম',
365 | 'রয়েছে',
366 | 'রাখা',
367 | 'রেখে',
368 | 'লক্ষ',
369 | 'শুধু',
370 | 'শুরু',
371 | 'সঙ্গে',
372 | 'সঙ্গেও',
373 | 'সব',
374 | 'সবার',
375 | 'সমস্ত',
376 | 'সম্প্রতি',
377 | 'সহ',
378 | 'সহিত',
379 | 'সাধারণ',
380 | 'সামনে',
381 | 'সি',
382 | 'সুতরাং',
383 | 'সে',
384 | 'সেই',
385 | 'সেখান',
386 | 'সেখানে',
387 | 'সেটা',
388 | 'সেটাই',
389 | 'সেটাও',
390 | 'সেটি',
391 | 'স্পষ্ট',
392 | 'স্বয়ং',
393 | 'হইতে',
394 | 'হইবে',
395 | 'হইয়া',
396 | 'হওয়া',
397 | 'হওয়ায়',
398 | 'হওয়ার',
399 | 'হচ্ছে',
400 | 'হত',
401 | 'হতে',
402 | 'হতেই',
403 | 'হন',
404 | 'হবে',
405 | 'হবেন',
406 | 'হয়',
407 | 'হয়তো',
408 | 'হয়নি',
409 | 'হয়ে',
410 | 'হয়েই',
411 | 'হয়েছিল',
412 | 'হয়েছে',
413 | 'হয়েছেন',
414 | 'হল',
415 | 'হলে',
416 | 'হলেই',
417 | 'হলেও',
418 | 'হলো',
419 | 'হাজার',
420 | 'হিসাবে',
421 | 'হৈলে',
422 | 'হোক',
423 | 'হয়'
424 | ]
425 | export { ben }
426 |
--------------------------------------------------------------------------------
/src/stopwords_swe.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2016 Gene Diaz
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 | Source: https://github.com/stopwords-iso/stopwords-sv
25 | */
26 |
27 | const swe = [
28 | 'aderton',
29 | 'adertonde',
30 | 'adjö',
31 | 'aldrig',
32 | 'alla',
33 | 'allas',
34 | 'allt',
35 | 'alltid',
36 | 'alltså',
37 | 'andra',
38 | 'andras',
39 | 'annan',
40 | 'annat',
41 | 'artonde',
42 | 'artonn',
43 | 'att',
44 | 'av',
45 | 'bakom',
46 | 'bara',
47 | 'behöva',
48 | 'behövas',
49 | 'behövde',
50 | 'behövt',
51 | 'beslut',
52 | 'beslutat',
53 | 'beslutit',
54 | 'bland',
55 | 'blev',
56 | 'bli',
57 | 'blir',
58 | 'blivit',
59 | 'bort',
60 | 'borta',
61 | 'bra',
62 | 'bäst',
63 | 'bättre',
64 | 'båda',
65 | 'bådas',
66 | 'dag',
67 | 'dagar',
68 | 'dagarna',
69 | 'dagen',
70 | 'de',
71 | 'del',
72 | 'delen',
73 | 'dem',
74 | 'den',
75 | 'denna',
76 | 'deras',
77 | 'dess',
78 | 'dessa',
79 | 'det',
80 | 'detta',
81 | 'dig',
82 | 'din',
83 | 'dina',
84 | 'dit',
85 | 'ditt',
86 | 'dock',
87 | 'dom',
88 | 'du',
89 | 'där',
90 | 'därför',
91 | 'då',
92 | 'e',
93 | 'efter',
94 | 'eftersom',
95 | 'ej',
96 | 'elfte',
97 | 'eller',
98 | 'elva',
99 | 'emot',
100 | 'en',
101 | 'enkel',
102 | 'enkelt',
103 | 'enkla',
104 | 'enligt',
105 | 'ens',
106 | 'er',
107 | 'era',
108 | 'ers',
109 | 'ert',
110 | 'ett',
111 | 'ettusen',
112 | 'fanns',
113 | 'fem',
114 | 'femte',
115 | 'femtio',
116 | 'femtionde',
117 | 'femton',
118 | 'femtonde',
119 | 'fick',
120 | 'fin',
121 | 'finnas',
122 | 'finns',
123 | 'fjorton',
124 | 'fjortonde',
125 | 'fjärde',
126 | 'fler',
127 | 'flera',
128 | 'flesta',
129 | 'fram',
130 | 'framför',
131 | 'från',
132 | 'fyra',
133 | 'fyrtio',
134 | 'fyrtionde',
135 | 'få',
136 | 'får',
137 | 'fått',
138 | 'följande',
139 | 'för',
140 | 'före',
141 | 'förlåt',
142 | 'förra',
143 | 'första',
144 | 'genast',
145 | 'genom',
146 | 'gick',
147 | 'gjorde',
148 | 'gjort',
149 | 'god',
150 | 'goda',
151 | 'godare',
152 | 'godast',
153 | 'gott',
154 | 'gälla',
155 | 'gäller',
156 | 'gällt',
157 | 'gärna',
158 | 'gå',
159 | 'går',
160 | 'gått',
161 | 'gör',
162 | 'göra',
163 | 'ha',
164 | 'hade',
165 | 'haft',
166 | 'han',
167 | 'hans',
168 | 'har',
169 | 'heller',
170 | 'hellre',
171 | 'helst',
172 | 'helt',
173 | 'henne',
174 | 'hennes',
175 | 'hit',
176 | 'hon',
177 | 'honom',
178 | 'hundra',
179 | 'hundraen',
180 | 'hundraett',
181 | 'hur',
182 | 'här',
183 | 'hög',
184 | 'höger',
185 | 'högre',
186 | 'högst',
187 | 'i',
188 | 'ibland',
189 | 'icke',
190 | 'idag',
191 | 'igen',
192 | 'igår',
193 | 'imorgon',
194 | 'in',
195 | 'inför',
196 | 'inga',
197 | 'ingen',
198 | 'ingenting',
199 | 'inget',
200 | 'innan',
201 | 'inne',
202 | 'inom',
203 | 'inte',
204 | 'inuti',
205 | 'ja',
206 | 'jag',
207 | 'jo',
208 | 'ju',
209 | 'just',
210 | 'jämfört',
211 | 'kan',
212 | 'kanske',
213 | 'knappast',
214 | 'kom',
215 | 'komma',
216 | 'kommer',
217 | 'kommit',
218 | 'kr',
219 | 'kunde',
220 | 'kunna',
221 | 'kunnat',
222 | 'kvar',
223 | 'legat',
224 | 'ligga',
225 | 'ligger',
226 | 'lika',
227 | 'likställd',
228 | 'likställda',
229 | 'lilla',
230 | 'lite',
231 | 'liten',
232 | 'litet',
233 | 'länge',
234 | 'längre',
235 | 'längst',
236 | 'lätt',
237 | 'lättare',
238 | 'lättast',
239 | 'långsam',
240 | 'långsammare',
241 | 'långsammast',
242 | 'långsamt',
243 | 'långt',
244 | 'låt',
245 | 'man',
246 | 'med',
247 | 'mej',
248 | 'mellan',
249 | 'men',
250 | 'mer',
251 | 'mera',
252 | 'mest',
253 | 'mig',
254 | 'min',
255 | 'mina',
256 | 'mindre',
257 | 'minst',
258 | 'mitt',
259 | 'mittemot',
260 | 'mot',
261 | 'mycket',
262 | 'många',
263 | 'måste',
264 | 'möjlig',
265 | 'möjligen',
266 | 'möjligt',
267 | 'möjligtvis',
268 | 'ned',
269 | 'nederst',
270 | 'nedersta',
271 | 'nedre',
272 | 'nej',
273 | 'ner',
274 | 'ni',
275 | 'nio',
276 | 'nionde',
277 | 'nittio',
278 | 'nittionde',
279 | 'nitton',
280 | 'nittonde',
281 | 'nog',
282 | 'noll',
283 | 'nr',
284 | 'nu',
285 | 'nummer',
286 | 'när',
287 | 'nästa',
288 | 'någon',
289 | 'någonting',
290 | 'något',
291 | 'några',
292 | 'nån',
293 | 'nånting',
294 | 'nåt',
295 | 'nödvändig',
296 | 'nödvändiga',
297 | 'nödvändigt',
298 | 'nödvändigtvis',
299 | 'och',
300 | 'också',
301 | 'ofta',
302 | 'oftast',
303 | 'olika',
304 | 'olikt',
305 | 'om',
306 | 'oss',
307 | 'på',
308 | 'rakt',
309 | 'redan',
310 | 'rätt',
311 | 'sa',
312 | 'sade',
313 | 'sagt',
314 | 'samma',
315 | 'sedan',
316 | 'senare',
317 | 'senast',
318 | 'sent',
319 | 'sex',
320 | 'sextio',
321 | 'sextionde',
322 | 'sexton',
323 | 'sextonde',
324 | 'sig',
325 | 'sin',
326 | 'sina',
327 | 'sist',
328 | 'sista',
329 | 'siste',
330 | 'sitt',
331 | 'sitta',
332 | 'sju',
333 | 'sjunde',
334 | 'sjuttio',
335 | 'sjuttionde',
336 | 'sjutton',
337 | 'sjuttonde',
338 | 'själv',
339 | 'sjätte',
340 | 'ska',
341 | 'skall',
342 | 'skulle',
343 | 'slutligen',
344 | 'små',
345 | 'smått',
346 | 'snart',
347 | 'som',
348 | 'stor',
349 | 'stora',
350 | 'stort',
351 | 'större',
352 | 'störst',
353 | 'säga',
354 | 'säger',
355 | 'sämre',
356 | 'sämst',
357 | 'så',
358 | 'sådan',
359 | 'sådana',
360 | 'sådant',
361 | 'ta',
362 | 'tack',
363 | 'tar',
364 | 'tidig',
365 | 'tidigare',
366 | 'tidigast',
367 | 'tidigt',
368 | 'till',
369 | 'tills',
370 | 'tillsammans',
371 | 'tio',
372 | 'tionde',
373 | 'tjugo',
374 | 'tjugoen',
375 | 'tjugoett',
376 | 'tjugonde',
377 | 'tjugotre',
378 | 'tjugotvå',
379 | 'tjungo',
380 | 'tolfte',
381 | 'tolv',
382 | 'tre',
383 | 'tredje',
384 | 'trettio',
385 | 'trettionde',
386 | 'tretton',
387 | 'trettonde',
388 | 'två',
389 | 'tvåhundra',
390 | 'under',
391 | 'upp',
392 | 'ur',
393 | 'ursäkt',
394 | 'ut',
395 | 'utan',
396 | 'utanför',
397 | 'ute',
398 | 'va',
399 | 'vad',
400 | 'var',
401 | 'vara',
402 | 'varför',
403 | 'varifrån',
404 | 'varit',
405 | 'varje',
406 | 'varken',
407 | 'vars',
408 | 'varsågod',
409 | 'vart',
410 | 'vem',
411 | 'vems',
412 | 'verkligen',
413 | 'vi',
414 | 'vid',
415 | 'vidare',
416 | 'viktig',
417 | 'viktigare',
418 | 'viktigast',
419 | 'viktigt',
420 | 'vilka',
421 | 'vilkas',
422 | 'vilken',
423 | 'vilket',
424 | 'vill',
425 | 'väl',
426 | 'vänster',
427 | 'vänstra',
428 | 'värre',
429 | 'vår',
430 | 'våra',
431 | 'vårt',
432 | 'än',
433 | 'ännu',
434 | 'är',
435 | 'även',
436 | 'åt',
437 | 'åtminstone',
438 | 'åtta',
439 | 'åttio',
440 | 'åttionde',
441 | 'åttonde',
442 | 'över',
443 | 'övermorgon',
444 | 'överst',
445 | 'övre'
446 | ]
447 | export { swe }
448 |
--------------------------------------------------------------------------------
/src/stopwords_panGu.js:
--------------------------------------------------------------------------------
1 | /* The MIT License (MIT)
2 | Copyright (c) 2018-20 Espen Klem
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in all
12 | copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | SOFTWARE.
21 | */
22 |
23 | /* This list is frequency sorted. That means it can be sliced from the bottom
24 | and be less agressive in excluding stopwords */
25 |
26 | const panGu = [
27 | 'ਦੇ',
28 | 'ਵਿੱਚ',
29 | 'ਦਾ',
30 | 'ਅਤੇ',
31 | 'ਦੀ',
32 | 'ਇੱਕ',
33 | 'ਨੂੰ',
34 | 'ਹੈ',
35 | 'ਤੋਂ',
36 | 'ਇਸ',
37 | 'ਇਹ',
38 | 'ਨੇ',
39 | 'ਤੇ',
40 | 'ਨਾਲ',
41 | 'ਲਈ',
42 | 'ਵੀ',
43 | 'ਸੀ',
44 | 'ਵਿਚ',
45 | 'ਕਿ',
46 | 'ਜੋ',
47 | 'ਉਹ',
48 | 'ਉਸ',
49 | 'ਹਨ',
50 | 'ਜਾਂਦਾ',
51 | 'ਕੀਤਾ',
52 | 'ਗਿਆ',
53 | 'ਹੀ',
54 | 'ਕੇ',
55 | 'ਜਾਂ',
56 | 'ਦੀਆਂ',
57 | 'ਜਿਸ',
58 | 'ਕਰਨ',
59 | 'ਹੋ',
60 | 'ਕਰ',
61 | 'ਆਪਣੇ',
62 | 'ਕੀਤੀ',
63 | 'ਤੌਰ',
64 | 'ਬਾਅਦ',
65 | 'ਨਹੀਂ',
66 | 'ਭਾਰਤੀ',
67 | 'ਪਿੰਡ',
68 | 'ਸਿੰਘ',
69 | 'ਉੱਤੇ',
70 | 'ਸਾਲ',
71 | '।',
72 | 'ਪੰਜਾਬ',
73 | 'ਸਭ',
74 | 'ਭਾਰਤ',
75 | 'ਉਨ੍ਹਾਂ',
76 | 'ਹੁੰਦਾ',
77 | 'ਤੱਕ',
78 | 'ਇਕ',
79 | 'ਹੋਇਆ',
80 | 'ਜਨਮ',
81 | 'ਬਹੁਤ',
82 | 'ਪਰ',
83 | 'ਦੁਆਰਾ',
84 | 'ਰੂਪ',
85 | 'ਹੋਰ',
86 | 'ਕੰਮ',
87 | 'ਆਪਣੀ',
88 | 'ਤਾਂ',
89 | 'ਸਮੇਂ',
90 | 'ਪੰਜਾਬੀ',
91 | 'ਗਈ',
92 | 'ਦਿੱਤਾ',
93 | 'ਦੋ',
94 | 'ਕਿਸੇ',
95 | 'ਕਈ',
96 | 'ਜਾ',
97 | 'ਵਾਲੇ',
98 | 'ਸ਼ੁਰੂ',
99 | 'ਉਸਨੇ',
100 | 'ਕਿਹਾ',
101 | 'ਹੋਣ',
102 | 'ਲੋਕ',
103 | 'ਜਾਂਦੀ',
104 | 'ਵਿੱਚੋਂ',
105 | 'ਨਾਮ',
106 | 'ਜਦੋਂ',
107 | 'ਪਹਿਲਾਂ',
108 | 'ਕਰਦਾ',
109 | 'ਹੁੰਦੀ',
110 | 'ਹੋਏ',
111 | 'ਸਨ',
112 | 'ਵਜੋਂ',
113 | 'ਰਾਜ',
114 | 'ਮੁੱਖ',
115 | 'ਕਰਦੇ',
116 | 'ਕੁਝ',
117 | 'ਸਾਰੇ',
118 | 'ਹੁੰਦੇ',
119 | 'ਸ਼ਹਿਰ',
120 | 'ਭਾਸ਼ਾ',
121 | 'ਹੋਈ',
122 | 'ਅਨੁਸਾਰ',
123 | 'ਸਕਦਾ',
124 | 'ਆਮ',
125 | 'ਵੱਖ',
126 | 'ਕੋਈ',
127 | 'ਵਾਰ',
128 | 'ਗਏ',
129 | 'ਖੇਤਰ',
130 | 'ਜੀ',
131 | 'ਕਾਰਨ',
132 | 'ਕਰਕੇ',
133 | 'ਜਿਵੇਂ',
134 | 'ਜ਼ਿਲ੍ਹੇ',
135 | 'ਲੋਕਾਂ',
136 | 'ਚ',
137 | 'ਸਾਹਿਤ',
138 | 'ਸਦੀ',
139 | 'ਬਾਰੇ',
140 | 'ਜਾਂਦੇ',
141 | 'ਵਾਲਾ',
142 | 'ਜਾਣ',
143 | 'ਪਹਿਲੀ',
144 | 'ਪ੍ਰਾਪਤ',
145 | 'ਰਿਹਾ',
146 | 'ਵਾਲੀ',
147 | 'ਨਾਂ',
148 | 'ਦੌਰਾਨ',
149 | 'ਤਰ੍ਹਾਂ',
150 | 'ਯੂਨੀਵਰਸਿਟੀ',
151 | 'ਨਾ',
152 | 'ਏ',
153 | 'ਤਿੰਨ',
154 | 'ਇਨ੍ਹਾਂ',
155 | 'ਗੁਰੂ',
156 | 'ਇਸਨੂੰ',
157 | 'ਇਹਨਾਂ',
158 | 'ਪਿਤਾ',
159 | 'ਲਿਆ',
160 | 'ਸ਼ਾਮਲ',
161 | 'ਸ਼ਬਦ',
162 | 'ਅੰਗਰੇਜ਼ੀ',
163 | 'ਉਸਨੂੰ',
164 | 'ਉਹਨਾਂ',
165 | 'ਸਥਿਤ',
166 | 'ਫਿਰ',
167 | 'ਜੀਵਨ',
168 | 'ਸਕੂਲ',
169 | 'ਹੁਣ',
170 | 'ਦਿਨ',
171 | 'ਕੀਤੇ',
172 | 'ਆਦਿ',
173 | 'ਵੱਧ',
174 | 'ਲੈ',
175 | 'ਘਰ',
176 | 'ਵੱਲ',
177 | 'ਦੇਸ਼',
178 | 'ਵਲੋਂ',
179 | 'ਬਣ',
180 | 'ਵੀਂ',
181 | 'ਫਿਲਮ',
182 | 'ਉਮਰ',
183 | 'ਬਲਾਕ',
184 | 'ਰਹੇ',
185 | 'ਸਾਹਿਬ',
186 | 'ਕਰਦੀ',
187 | 'ਹਰ',
188 | 'ਪੈਦਾ',
189 | 'ਘੱਟ',
190 | 'ਲੇਖਕ',
191 | 'ਹਿੱਸਾ',
192 | 'ਫ਼ਿਲਮ',
193 | 'ਮੌਤ',
194 | 'ਜਿੱਥੇ',
195 | 'ਵੱਡਾ',
196 | 'ਵਿਖੇ',
197 | 'ਆਪਣਾ',
198 | 'ਪਹਿਲਾ',
199 | 'ਵਰਤੋਂ',
200 | 'ਆਪ',
201 | 'ਕਰਨਾ',
202 | 'ਵਿਆਹ',
203 | 'ਰਹੀ',
204 | 'ਰਾਹੀਂ',
205 | 'ਦਿੱਤੀ',
206 | 'ਉਸਦੇ',
207 | 'ਪਰਿਵਾਰ',
208 | 'ਆ',
209 | 'ਦੂਜੇ',
210 | 'ਅਮਰੀਕਾ',
211 | 'ਮੰਨਿਆ',
212 | 'ਇਸਦੇ',
213 | 'ਈ',
214 | 'ਕਾਲਜ',
215 | 'ਸਰਕਾਰ',
216 | 'ਇੱਥੇ',
217 | 'ਪਾਕਿਸਤਾਨ',
218 | 'ਸ਼ਾਮਿਲ',
219 | 'ਵਿਗਿਆਨ',
220 | 'ਉਸਦੀ',
221 | 'ਪੇਸ਼',
222 | 'ਕਿਉਂਕਿ',
223 | 'ਪਹਿਲੇ',
224 | 'ਧਰਮ',
225 | 'ਮਸ਼ਹੂਰ',
226 | 'ਅੰਦਰ',
227 | 'ਵਿਚੋਂ',
228 | 'ਜਿਨ੍ਹਾਂ',
229 | 'ਜਾਣਿਆ',
230 | 'ਪਾਣੀ',
231 | 'ਇਲਾਵਾ',
232 | 'ਅਰਥ',
233 | 'ਚਾਰ',
234 | 'ਪ੍ਰਸਿੱਧ',
235 | 'ਨਾਵਲ',
236 | 'ਵੱਡੇ',
237 | 'ਵੱਲੋਂ',
238 | 'ਕਹਾਣੀ',
239 | 'ਵਿਸ਼ਵ',
240 | 'ਮੂਲ',
241 | 'ਅਮਰੀਕੀ',
242 | 'ਸਥਾਨ',
243 | 'ਇਤਿਹਾਸ',
244 | 'ਕੁੱਝ',
245 | 'ਵਿਕਾਸ',
246 | 'ਉੱਤਰ',
247 | 'ਸਿੱਖਿਆ',
248 | 'ਹਿੰਦੀ',
249 | 'ਪ੍ਰਮੁੱਖ',
250 | 'ਰਚਨਾ',
251 | 'ਬਣਾਇਆ',
252 | 'ਵਿਸ਼ੇਸ਼',
253 | 'ਡਾ',
254 | 'ਉੱਪਰ',
255 | 'ਪੱਛਮੀ',
256 | 'ਦੇਣ',
257 | 'ਇਸਦਾ',
258 | 'ਸਕਦੇ',
259 | 'ਰੱਖਿਆ',
260 | 'ਕਵੀ',
261 | 'ਦਿੱਲੀ',
262 | 'ਵੱਡੀ',
263 | 'ਭੂਮਿਕਾ',
264 | 'ਸਮਾਜ',
265 | 'ਕਾਵਿ',
266 | 'ਕੀ',
267 | 'ਕੋਲ',
268 | 'ਦ',
269 | 'ਗੱਲ',
270 | 'ਸੰਸਾਰ',
271 | 'ਭਾਗ',
272 | 'ਆਈ',
273 | 'ਦੱਖਣ',
274 | 'ਅੱਜ',
275 | 'ਸਿੱਖ',
276 | 'ਕਹਿੰਦੇ',
277 | 'ਸੰਗੀਤ',
278 | 'ਕਿਲੋਮੀਟਰ',
279 | 'ਜਿਹਨਾਂ',
280 | 'ਸਭਾ',
281 | 'ਜਿਸਦਾ',
282 | 'ਜਨਵਰੀ',
283 | 'ਕਵਿਤਾ',
284 | 'ਮੈਂਬਰ',
285 | 'ਲਿਖਿਆ',
286 | 'ਮਾਂ',
287 | 'ਕਲਾ',
288 | 'ਪੰਜ',
289 | 'ਥਾਂ',
290 | 'ਹੇਠ',
291 | 'ਜਿਆਦਾ',
292 | 'ਵਰਤਿਆ',
293 | 'ਮਾਰਚ',
294 | 'ਡੀ',
295 | 'ਅਕਤੂਬਰ',
296 | 'ਤਕ',
297 | 'ਨਾਟਕ',
298 | 'ਬੀ',
299 | 'ਖਾਸ',
300 | 'ਇਸੇ',
301 | 'ਆਧੁਨਿਕ',
302 | 'ਅਗਸਤ',
303 | 'ਤਿਆਰ',
304 | 'ਮਾਤਾ',
305 | 'ਬਣਾਉਣ',
306 | 'ਨਵੰਬਰ',
307 | 'ਵਿਅਕਤੀ',
308 | 'ਦੱਖਣੀ',
309 | 'ਦਸੰਬਰ',
310 | 'ਆਫ',
311 | 'ਗੀਤ',
312 | 'ਗਿਣਤੀ',
313 | 'ਕਾਲ',
314 | 'ਖੋਜ',
315 | 'ਸਾਲਾਂ',
316 | 'ਪੂਰੀ',
317 | 'ਸਮਾਂ',
318 | 'ਜ਼ਿਆਦਾ',
319 | 'ਇਸਦੀ',
320 | 'ਸਕਦੀ',
321 | 'ਵਿਚਕਾਰ',
322 | 'ਰਾਜਧਾਨੀ',
323 | 'ਉਸਦਾ',
324 | 'ਜੁਲਾਈ',
325 | 'ਜੂਨ',
326 | 'ਅਧੀਨ',
327 | 'ਸਥਾਪਨਾ',
328 | 'ਸੇਵਾ',
329 | 'ਭਾਵ',
330 | 'ਵਰਗ',
331 | 'ਛੋਟੇ',
332 | 'ਦਿੰਦਾ',
333 | 'ਸਮਾਜਿਕ',
334 | 'ਹੁੰਦੀਆਂ',
335 | 'ਟੀਮ',
336 | 'ਔਰਤਾਂ',
337 | 'ਅਕਸਰ',
338 | 'ਪ੍ਰਕਾਸ਼ਿਤ',
339 | 'ਉਰਦੂ',
340 | 'ਰੰਗ',
341 | 'ਪਾਰਟੀ',
342 | 'ਬਣਾ',
343 | 'ਪ੍ਰਭਾਵ',
344 | 'ਸ਼ੁਰੂਆਤ',
345 | 'ਲਗਭਗ',
346 | 'ਮਈ',
347 | 'ਸਿਰਫ',
348 | 'ਨੇੜੇ',
349 | 'ਜਿਸਨੂੰ',
350 | 'ਹਾਲਾਂਕਿ',
351 | 'ਦੂਰ',
352 | 'ਸਤੰਬਰ',
353 | 'ਕਿਤਾਬ',
354 | 'ਕਦੇ',
355 | 'ਉੱਤਰੀ',
356 | 'ਪ੍ਰਕਾਰ',
357 | 'ਇਸਨੇ',
358 | 'ਪ੍ਰਦੇਸ਼',
359 | 'ਅੱਗੇ',
360 | 'ਸੰਯੁਕਤ',
361 | 'ਪੜ੍ਹਾਈ',
362 | 'ਵਧੇਰੇ',
363 | 'ਨਾਲ਼',
364 | 'ਮਨੁੱਖ',
365 | 'ਬਾਕੀ',
366 | 'ਪ੍ਰਧਾਨ',
367 | 'ਦੂਜੀ',
368 | 'ਕੁੱਲ',
369 | 'ਆਫ਼',
370 | 'ਅਧਿਐਨ',
371 | 'ਰਾਸ਼ਟਰੀ',
372 | 'ਪੁੱਤਰ',
373 | 'ਅੰਤਰਰਾਸ਼ਟਰੀ',
374 | 'ਧਰਤੀ',
375 | 'ਕੇਂਦਰ',
376 | 'ਦੇਸ਼ਾਂ',
377 | 'ਮੱਧ',
378 | 'ਜ਼ਿਲ੍ਹਾ',
379 | 'ਸਾਰੀਆਂ',
380 | 'ਪੱਧਰ',
381 | 'ਹੋਵੇ',
382 | 'ਜੇ',
383 | 'ਭਾਈ',
384 | 'ਰਹਿਣ',
385 | 'ਪੁਰਸਕਾਰ',
386 | 'ਸਭਿਆਚਾਰ',
387 | 'ਪਤਾ',
388 | 'ਪਾਸੇ',
389 | 'ਨਵੇਂ',
390 | 'ਕੰਪਨੀ',
391 | 'ਬਾਹਰ',
392 | 'ਵੇਲੇ',
393 | 'ਸੰਨ',
394 | 'ਪੂਰਬੀ',
395 | 'ਵਿਚਾਰ',
396 | 'ਕਾਰਜ',
397 | 'ਪੀ',
398 | 'ਮਹੱਤਵਪੂਰਨ',
399 | 'ਦੁਨੀਆਂ',
400 | 'ਧਾਰਮਿਕ',
401 | 'ਮਨੁੱਖੀ',
402 | 'ਸਮੂਹ',
403 | 'ਅਜਿਹੇ',
404 | 'ਲਾਲ',
405 | 'ਦੂਜਾ',
406 | 'ਭਰਾ',
407 | 'ਸ੍ਰੀ',
408 | 'ਅੰਤ',
409 | 'ਜਾਂਦੀਆਂ',
410 | 'ਸ਼ਾਹ',
411 | 'ਰਹਿੰਦੇ',
412 | 'ਮਹਾਨ',
413 | 'ਚੀਨ',
414 | 'ਮੀਟਰ',
415 | 'ਵਰਗੇ',
416 | 'ਨਾਲੋਂ',
417 | 'ਹਾਸਲ',
418 | 'ਕਿਸਮ',
419 | 'ਅਜਿਹਾ',
420 | 'ਬਣਿਆ',
421 | 'ਭਰ',
422 | 'ਛੱਡ',
423 | 'ਲੈਣ',
424 | 'ਹਿੱਸੇ',
425 | 'ਟੀ',
426 | 'ਲਿਖੇ',
427 | 'ਮਿਲ',
428 | 'ਮੌਜੂਦ',
429 | 'ਦਿੱਤੇ',
430 | 'ਵਾਸਤੇ',
431 | 'ਵਾਲੀਆਂ',
432 | 'ਵਧੀਆ',
433 | 'ਰੂਸੀ',
434 | 'ਜਾਰੀ',
435 | 'ਸਰਕਾਰੀ',
436 | 'ਡਿਗਰੀ',
437 | 'ਪੱਛਮ',
438 | 'ਲੜਾਈ',
439 | 'ਭਾਸ਼ਾਵਾਂ',
440 | 'ਰਾਜਾ',
441 | 'ਜਲੰਧਰ',
442 | 'ਹਿੰਦੂ',
443 | 'ਔਰਤ',
444 | 'ਜੰਗ',
445 | 'ਬਾਬਾ',
446 | 'ਬੱਚਿਆਂ',
447 | 'ਮੰਤਰੀ',
448 | 'ਪਟਿਆਲਾ',
449 | 'ਵਾਂਗ',
450 | 'ਆਉਣ',
451 | 'ਭਾਵੇਂ',
452 | 'ਕੇਵਲ',
453 | 'ਐਸ',
454 | 'ਪ੍ਰਾਚੀਨ',
455 | 'ਰਹਿੰਦਾ',
456 | 'ਬੋਲੀ',
457 | 'ਅਵਾਰਡ',
458 | 'ਨਗਰ',
459 | 'ਖੇਡਾਂ',
460 | 'ਫਿਲਮਾਂ',
461 | 'ਬੱਚੇ',
462 | 'ਕੌਰ',
463 | 'ਤੋ',
464 | 'ਪ੍ਰਤੀ',
465 | 'ਕੁਆਂਟਮ',
466 | 'ਅਬਾਦੀ',
467 | 'ਪੁਸਤਕ',
468 | 'ਐਮ',
469 | 'ਰਾਮ',
470 | 'ਖੇਤਰਾਂ',
471 | 'ਫਰਵਰੀ',
472 | 'ਕ੍ਰਿਕਟ',
473 | 'ਪੈਂਦਾ',
474 | 'ਇਤਿਹਾਸਕ',
475 | 'ਲੱਗ',
476 | 'ਬ੍ਰਿਟਿਸ਼',
477 | 'ਆਇਆ',
478 | 'ਮਿਲਦਾ'
479 | ]
480 | export { panGu }
481 |
--------------------------------------------------------------------------------
/src/stopwords_ara.js:
--------------------------------------------------------------------------------
1 | /*
2 | The MIT License (MIT)
3 |
4 | Copyright (c) 2016 Gene Diaz
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the 'Software'), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | */
24 |
25 | const ara = [
26 | '،',
27 | 'ّآض',
28 | 'آمينَ',
29 | 'آه',
30 | 'آهاً',
31 | 'آي',
32 | 'أ',
33 | 'أب',
34 | 'أجل',
35 | 'أجمع',
36 | 'أخ',
37 | 'أخذ',
38 | 'أصبح',
39 | 'أضحى',
40 | 'أقبل',
41 | 'أقل',
42 | 'أكثر',
43 | 'ألا',
44 | 'أم',
45 | 'أما',
46 | 'أمامك',
47 | 'أمامكَ',
48 | 'أمسى',
49 | 'أمّا',
50 | 'أن',
51 | 'أنا',
52 | 'أنت',
53 | 'أنتم',
54 | 'أنتما',
55 | 'أنتن',
56 | 'أنتِ',
57 | 'أنشأ',
58 | 'أنّى',
59 | 'أو',
60 | 'أوشك',
61 | 'أولئك',
62 | 'أولئكم',
63 | 'أولاء',
64 | 'أولالك',
65 | 'أوّهْ',
66 | 'أي',
67 | 'أيا',
68 | 'أين',
69 | 'أينما',
70 | 'أيّ',
71 | 'أَنَّ',
72 | 'أََيُّ',
73 | 'أُفٍّ',
74 | 'إذ',
75 | 'إذا',
76 | 'إذاً',
77 | 'إذما',
78 | 'إذن',
79 | 'إلى',
80 | 'إليكم',
81 | 'إليكما',
82 | 'إليكنّ',
83 | 'إليكَ',
84 | 'إلَيْكَ',
85 | 'إلّا',
86 | 'إمّا',
87 | 'إن',
88 | 'إنّما',
89 | 'إي',
90 | 'إياك',
91 | 'إياكم',
92 | 'إياكما',
93 | 'إياكن',
94 | 'إيانا',
95 | 'إياه',
96 | 'إياها',
97 | 'إياهم',
98 | 'إياهما',
99 | 'إياهن',
100 | 'إياي',
101 | 'إيهٍ',
102 | 'إِنَّ',
103 | 'ا',
104 | 'ابتدأ',
105 | 'اثر',
106 | 'اجل',
107 | 'احد',
108 | 'اخرى',
109 | 'اخلولق',
110 | 'اذا',
111 | 'اربعة',
112 | 'ارتدّ',
113 | 'استحال',
114 | 'اطار',
115 | 'اعادة',
116 | 'اعلنت',
117 | 'اف',
118 | 'اكثر',
119 | 'اكد',
120 | 'الألاء',
121 | 'الألى',
122 | 'الا',
123 | 'الاخيرة',
124 | 'الان',
125 | 'الاول',
126 | 'الاولى',
127 | 'التى',
128 | 'التي',
129 | 'الثاني',
130 | 'الثانية',
131 | 'الذاتي',
132 | 'الذى',
133 | 'الذي',
134 | 'الذين',
135 | 'السابق',
136 | 'الف',
137 | 'اللائي',
138 | 'اللاتي',
139 | 'اللتان',
140 | 'اللتيا',
141 | 'اللتين',
142 | 'اللذان',
143 | 'اللذين',
144 | 'اللواتي',
145 | 'الماضي',
146 | 'المقبل',
147 | 'الوقت',
148 | 'الى',
149 | 'اليوم',
150 | 'اما',
151 | 'امام',
152 | 'امس',
153 | 'ان',
154 | 'انبرى',
155 | 'انقلب',
156 | 'انه',
157 | 'انها',
158 | 'او',
159 | 'اول',
160 | 'اي',
161 | 'ايار',
162 | 'ايام',
163 | 'ايضا',
164 | 'ب',
165 | 'بات',
166 | 'باسم',
167 | 'بان',
168 | 'بخٍ',
169 | 'برس',
170 | 'بسبب',
171 | 'بسّ',
172 | 'بشكل',
173 | 'بضع',
174 | 'بطآن',
175 | 'بعد',
176 | 'بعض',
177 | 'بك',
178 | 'بكم',
179 | 'بكما',
180 | 'بكن',
181 | 'بل',
182 | 'بلى',
183 | 'بما',
184 | 'بماذا',
185 | 'بمن',
186 | 'بن',
187 | 'بنا',
188 | 'به',
189 | 'بها',
190 | 'بي',
191 | 'بيد',
192 | 'بين',
193 | 'بَسْ',
194 | 'بَلْهَ',
195 | 'بِئْسَ',
196 | 'تانِ',
197 | 'تانِك',
198 | 'تبدّل',
199 | 'تجاه',
200 | 'تحوّل',
201 | 'تلقاء',
202 | 'تلك',
203 | 'تلكم',
204 | 'تلكما',
205 | 'تم',
206 | 'تينك',
207 | 'تَيْنِ',
208 | 'تِه',
209 | 'تِي',
210 | 'ثلاثة',
211 | 'ثم',
212 | 'ثمّ',
213 | 'ثمّة',
214 | 'ثُمَّ',
215 | 'جعل',
216 | 'جلل',
217 | 'جميع',
218 | 'جير',
219 | 'حار',
220 | 'حاشا',
221 | 'حاليا',
222 | 'حاي',
223 | 'حتى',
224 | 'حرى',
225 | 'حسب',
226 | 'حم',
227 | 'حوالى',
228 | 'حول',
229 | 'حيث',
230 | 'حيثما',
231 | 'حين',
232 | 'حيَّ',
233 | 'حَبَّذَا',
234 | 'حَتَّى',
235 | 'حَذارِ',
236 | 'خلا',
237 | 'خلال',
238 | 'دون',
239 | 'دونك',
240 | 'ذا',
241 | 'ذات',
242 | 'ذاك',
243 | 'ذانك',
244 | 'ذانِ',
245 | 'ذلك',
246 | 'ذلكم',
247 | 'ذلكما',
248 | 'ذلكن',
249 | 'ذو',
250 | 'ذوا',
251 | 'ذواتا',
252 | 'ذواتي',
253 | 'ذيت',
254 | 'ذينك',
255 | 'ذَيْنِ',
256 | 'ذِه',
257 | 'ذِي',
258 | 'راح',
259 | 'رجع',
260 | 'رويدك',
261 | 'ريث',
262 | 'رُبَّ',
263 | 'زيارة',
264 | 'سبحان',
265 | 'سرعان',
266 | 'سنة',
267 | 'سنوات',
268 | 'سوف',
269 | 'سوى',
270 | 'سَاءَ',
271 | 'سَاءَمَا',
272 | 'شبه',
273 | 'شخصا',
274 | 'شرع',
275 | 'شَتَّانَ',
276 | 'صار',
277 | 'صباح',
278 | 'صفر',
279 | 'صهٍ',
280 | 'صهْ',
281 | 'ضد',
282 | 'ضمن',
283 | 'طاق',
284 | 'طالما',
285 | 'طفق',
286 | 'طَق',
287 | 'ظلّ',
288 | 'عاد',
289 | 'عام',
290 | 'عاما',
291 | 'عامة',
292 | 'عدا',
293 | 'عدة',
294 | 'عدد',
295 | 'عدم',
296 | 'عسى',
297 | 'عشر',
298 | 'عشرة',
299 | 'علق',
300 | 'على',
301 | 'عليك',
302 | 'عليه',
303 | 'عليها',
304 | 'علًّ',
305 | 'عن',
306 | 'عند',
307 | 'عندما',
308 | 'عوض',
309 | 'عين',
310 | 'عَدَسْ',
311 | 'عَمَّا',
312 | 'غدا',
313 | 'غير',
314 | 'ـ',
315 | 'ف',
316 | 'فان',
317 | 'فلان',
318 | 'فو',
319 | 'فى',
320 | 'في',
321 | 'فيم',
322 | 'فيما',
323 | 'فيه',
324 | 'فيها',
325 | 'قال',
326 | 'قام',
327 | 'قبل',
328 | 'قد',
329 | 'قطّ',
330 | 'قلما',
331 | 'قوة',
332 | 'كأنّما',
333 | 'كأين',
334 | 'كأيّ',
335 | 'كأيّن',
336 | 'كاد',
337 | 'كان',
338 | 'كانت',
339 | 'كذا',
340 | 'كذلك',
341 | 'كرب',
342 | 'كل',
343 | 'كلا',
344 | 'كلاهما',
345 | 'كلتا',
346 | 'كلم',
347 | 'كليكما',
348 | 'كليهما',
349 | 'كلّما',
350 | 'كلَّا',
351 | 'كم',
352 | 'كما',
353 | 'كي',
354 | 'كيت',
355 | 'كيف',
356 | 'كيفما',
357 | 'كَأَنَّ',
358 | 'كِخ',
359 | 'لئن',
360 | 'لا',
361 | 'لات',
362 | 'لاسيما',
363 | 'لدن',
364 | 'لدى',
365 | 'لعمر',
366 | 'لقاء',
367 | 'لك',
368 | 'لكم',
369 | 'لكما',
370 | 'لكن',
371 | 'لكنَّما',
372 | 'لكي',
373 | 'لكيلا',
374 | 'للامم',
375 | 'لم',
376 | 'لما',
377 | 'لمّا',
378 | 'لن',
379 | 'لنا',
380 | 'له',
381 | 'لها',
382 | 'لو',
383 | 'لوكالة',
384 | 'لولا',
385 | 'لوما',
386 | 'لي',
387 | 'لَسْتَ',
388 | 'لَسْتُ',
389 | 'لَسْتُم',
390 | 'لَسْتُمَا',
391 | 'لَسْتُنَّ',
392 | 'لَسْتِ',
393 | 'لَسْنَ',
394 | 'لَعَلَّ',
395 | 'لَكِنَّ',
396 | 'لَيْتَ',
397 | 'لَيْسَ',
398 | 'لَيْسَا',
399 | 'لَيْسَتَا',
400 | 'لَيْسَتْ',
401 | 'لَيْسُوا',
402 | 'لَِسْنَا',
403 | 'ما',
404 | 'ماانفك',
405 | 'مابرح',
406 | 'مادام',
407 | 'ماذا',
408 | 'مازال',
409 | 'مافتئ',
410 | 'مايو',
411 | 'متى',
412 | 'مثل',
413 | 'مذ',
414 | 'مساء',
415 | 'مع',
416 | 'معاذ',
417 | 'مقابل',
418 | 'مكانكم',
419 | 'مكانكما',
420 | 'مكانكنّ',
421 | 'مكانَك',
422 | 'مليار',
423 | 'مليون',
424 | 'مما',
425 | 'ممن',
426 | 'من',
427 | 'منذ',
428 | 'منها',
429 | 'مه',
430 | 'مهما',
431 | 'مَنْ',
432 | 'مِن',
433 | 'نحن',
434 | 'نحو',
435 | 'نعم',
436 | 'نفس',
437 | 'نفسه',
438 | 'نهاية',
439 | 'نَخْ',
440 | 'نِعِمّا',
441 | 'نِعْمَ',
442 | 'ها',
443 | 'هاؤم',
444 | 'هاكَ',
445 | 'هاهنا',
446 | 'هبّ',
447 | 'هذا',
448 | 'هذه',
449 | 'هكذا',
450 | 'هل',
451 | 'هلمَّ',
452 | 'هلّا',
453 | 'هم',
454 | 'هما',
455 | 'هن',
456 | 'هنا',
457 | 'هناك',
458 | 'هنالك',
459 | 'هو',
460 | 'هي',
461 | 'هيا',
462 | 'هيت',
463 | 'هيّا',
464 | 'هَؤلاء',
465 | 'هَاتانِ',
466 | 'هَاتَيْنِ',
467 | 'هَاتِه',
468 | 'هَاتِي',
469 | 'هَجْ',
470 | 'هَذا',
471 | 'هَذانِ',
472 | 'هَذَيْنِ',
473 | 'هَذِه',
474 | 'هَذِي',
475 | 'هَيْهَاتَ',
476 | 'و',
477 | 'وا',
478 | 'واحد',
479 | 'واضاف',
480 | 'واضافت',
481 | 'واكد',
482 | 'وان',
483 | 'واهاً',
484 | 'واوضح',
485 | 'وراءَك',
486 | 'وفي',
487 | 'وقال',
488 | 'وقالت',
489 | 'وقد',
490 | 'وقف',
491 | 'وكان',
492 | 'وكانت',
493 | 'ولا',
494 | 'ولم',
495 | 'ومن',
496 | 'وهو',
497 | 'وهي',
498 | 'ويكأنّ',
499 | 'وَيْ',
500 | 'وُشْكَانََ',
501 | 'يكون',
502 | 'يمكن',
503 | 'يوم',
504 | 'ّأيّان'
505 | ]
506 | export { ara }
507 |
--------------------------------------------------------------------------------
/src/stopwords_kor.js:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2017 Peter Graham, contributors. Released under the Apache-2.0 license.
2 |
3 | const kor = [
4 | '가',
5 | '가까스로',
6 | '가령',
7 | '각',
8 | '각각',
9 | '각자',
10 | '각종',
11 | '갖고말하자면',
12 | '같다',
13 | '같이',
14 | '개의치않고',
15 | '거니와',
16 | '거바',
17 | '거의',
18 | '것',
19 | '것과 같이',
20 | '것들',
21 | '게다가',
22 | '게우다',
23 | '겨우',
24 | '견지에서',
25 | '결과에 이르다',
26 | '결국',
27 | '결론을 낼 수 있다',
28 | '겸사겸사',
29 | '고려하면',
30 | '고로',
31 | '곧',
32 | '공동으로',
33 | '과',
34 | '과연',
35 | '관계가 있다',
36 | '관계없이',
37 | '관련이 있다',
38 | '관하여',
39 | '관한',
40 | '관해서는',
41 | '구',
42 | '구체적으로',
43 | '구토하다',
44 | '그',
45 | '그들',
46 | '그때',
47 | '그래',
48 | '그래도',
49 | '그래서',
50 | '그러나',
51 | '그러니',
52 | '그러니까',
53 | '그러면',
54 | '그러므로',
55 | '그러한즉',
56 | '그런 까닭에',
57 | '그런데',
58 | '그런즉',
59 | '그럼',
60 | '그럼에도 불구하고',
61 | '그렇게 함으로써',
62 | '그렇지',
63 | '그렇지 않다면',
64 | '그렇지 않으면',
65 | '그렇지만',
66 | '그렇지않으면',
67 | '그리고',
68 | '그리하여',
69 | '그만이다',
70 | '그에 따르는',
71 | '그위에',
72 | '그저',
73 | '그중에서',
74 | '그치지 않다',
75 | '근거로',
76 | '근거하여',
77 | '기대여',
78 | '기점으로',
79 | '기준으로',
80 | '기타',
81 | '까닭으로',
82 | '까악',
83 | '까지',
84 | '까지 미치다',
85 | '까지도',
86 | '꽈당',
87 | '끙끙',
88 | '끼익',
89 | '나',
90 | '나머지는',
91 | '남들',
92 | '남짓',
93 | '너',
94 | '너희',
95 | '너희들',
96 | '네',
97 | '넷',
98 | '년',
99 | '논하지 않다',
100 | '놀라다',
101 | '누가 알겠는가',
102 | '누구',
103 | '다른',
104 | '다른 방면으로',
105 | '다만',
106 | '다섯',
107 | '다소',
108 | '다수',
109 | '다시 말하자면',
110 | '다시말하면',
111 | '다음',
112 | '다음에',
113 | '다음으로',
114 | '단지',
115 | '답다',
116 | '당신',
117 | '당장',
118 | '대로 하다',
119 | '대하면',
120 | '대하여',
121 | '대해 말하자면',
122 | '대해서',
123 | '댕그',
124 | '더구나',
125 | '더군다나',
126 | '더라도',
127 | '더불어',
128 | '더욱더',
129 | '더욱이는',
130 | '도달하다',
131 | '도착하다',
132 | '동시에',
133 | '동안',
134 | '된바에야',
135 | '된이상',
136 | '두번째로',
137 | '둘',
138 | '둥둥',
139 | '뒤따라',
140 | '뒤이어',
141 | '든간에',
142 | '들',
143 | '등',
144 | '등등',
145 | '딩동',
146 | '따라',
147 | '따라서',
148 | '따위',
149 | '따지지 않다',
150 | '딱',
151 | '때',
152 | '때가 되어',
153 | '때문에',
154 | '또',
155 | '또한',
156 | '뚝뚝',
157 | '라 해도',
158 | '령',
159 | '로',
160 | '로 인하여',
161 | '로부터',
162 | '로써',
163 | '륙',
164 | '를',
165 | '마음대로',
166 | '마저',
167 | '마저도',
168 | '마치',
169 | '막론하고',
170 | '만 못하다',
171 | '만약',
172 | '만약에',
173 | '만은 아니다',
174 | '만이 아니다',
175 | '만일',
176 | '만큼',
177 | '말하자면',
178 | '말할것도 없고',
179 | '매',
180 | '매번',
181 | '메쓰겁다',
182 | '몇',
183 | '모',
184 | '모두',
185 | '무렵',
186 | '무릎쓰고',
187 | '무슨',
188 | '무엇',
189 | '무엇때문에',
190 | '물론',
191 | '및',
192 | '바꾸어말하면',
193 | '바꾸어말하자면',
194 | '바꾸어서 말하면',
195 | '바꾸어서 한다면',
196 | '바꿔 말하면',
197 | '바로',
198 | '바와같이',
199 | '밖에 안된다',
200 | '반대로',
201 | '반대로 말하자면',
202 | '반드시',
203 | '버금',
204 | '보는데서',
205 | '보다더',
206 | '보드득',
207 | '본대로',
208 | '봐',
209 | '봐라',
210 | '부류의 사람들',
211 | '부터',
212 | '불구하고',
213 | '불문하고',
214 | '붕붕',
215 | '비걱거리다',
216 | '비교적',
217 | '비길수 없다',
218 | '비로소',
219 | '비록',
220 | '비슷하다',
221 | '비추어 보아',
222 | '비하면',
223 | '뿐만 아니라',
224 | '뿐만아니라',
225 | '뿐이다',
226 | '삐걱',
227 | '삐걱거리다',
228 | '사',
229 | '삼',
230 | '상대적으로 말하자면',
231 | '생각한대로',
232 | '설령',
233 | '설마',
234 | '설사',
235 | '셋',
236 | '소생',
237 | '소인',
238 | '솨',
239 | '쉿',
240 | '습니까',
241 | '습니다',
242 | '시각',
243 | '시간',
244 | '시작하여',
245 | '시초에',
246 | '시키다',
247 | '실로',
248 | '심지어',
249 | '아',
250 | '아니',
251 | '아니나다를가',
252 | '아니라면',
253 | '아니면',
254 | '아니었다면',
255 | '아래윗',
256 | '아무거나',
257 | '아무도',
258 | '아야',
259 | '아울러',
260 | '아이',
261 | '아이고',
262 | '아이구',
263 | '아이야',
264 | '아이쿠',
265 | '아하',
266 | '아홉',
267 | '안 그러면',
268 | '않기 위하여',
269 | '않기 위해서',
270 | '알 수 있다',
271 | '알았어',
272 | '앗',
273 | '앞에서',
274 | '앞의것',
275 | '야',
276 | '약간',
277 | '양자',
278 | '어',
279 | '어기여차',
280 | '어느',
281 | '어느 년도',
282 | '어느것',
283 | '어느곳',
284 | '어느때',
285 | '어느쪽',
286 | '어느해',
287 | '어디',
288 | '어때',
289 | '어떠한',
290 | '어떤',
291 | '어떤것',
292 | '어떤것들',
293 | '어떻게',
294 | '어떻해',
295 | '어이',
296 | '어째서',
297 | '어쨋든',
298 | '어쩔수 없다',
299 | '어찌',
300 | '어찌됏든',
301 | '어찌됏어',
302 | '어찌하든지',
303 | '어찌하여',
304 | '언제',
305 | '언젠가',
306 | '얼마',
307 | '얼마 안 되는 것',
308 | '얼마간',
309 | '얼마나',
310 | '얼마든지',
311 | '얼마만큼',
312 | '얼마큼',
313 | '엉엉',
314 | '에',
315 | '에 가서',
316 | '에 달려 있다',
317 | '에 대해',
318 | '에 있다',
319 | '에 한하다',
320 | '에게',
321 | '에서',
322 | '여',
323 | '여기',
324 | '여덟',
325 | '여러분',
326 | '여보시오',
327 | '여부',
328 | '여섯',
329 | '여전히',
330 | '여차',
331 | '연관되다',
332 | '연이서',
333 | '영',
334 | '영차',
335 | '옆사람',
336 | '예',
337 | '예를 들면',
338 | '예를 들자면',
339 | '예컨대',
340 | '예하면',
341 | '오',
342 | '오로지',
343 | '오르다',
344 | '오자마자',
345 | '오직',
346 | '오호',
347 | '오히려',
348 | '와',
349 | '와 같은 사람들',
350 | '와르르',
351 | '와아',
352 | '왜',
353 | '왜냐하면',
354 | '외에도',
355 | '요만큼',
356 | '요만한 것',
357 | '요만한걸',
358 | '요컨대',
359 | '우르르',
360 | '우리',
361 | '우리들',
362 | '우선',
363 | '우에 종합한것과같이',
364 | '운운',
365 | '월',
366 | '위에서 서술한바와같이',
367 | '위하여',
368 | '위해서',
369 | '윙윙',
370 | '육',
371 | '으로',
372 | '으로 인하여',
373 | '으로서',
374 | '으로써',
375 | '을',
376 | '응',
377 | '응당',
378 | '의',
379 | '의거하여',
380 | '의지하여',
381 | '의해',
382 | '의해되다',
383 | '의해서',
384 | '이',
385 | '이 되다',
386 | '이 때문에',
387 | '이 밖에',
388 | '이 외에',
389 | '이 정도의',
390 | '이것',
391 | '이곳',
392 | '이때',
393 | '이라면',
394 | '이래',
395 | '이러이러하다',
396 | '이러한',
397 | '이런',
398 | '이럴정도로',
399 | '이렇게 많은 것',
400 | '이렇게되면',
401 | '이렇게말하자면',
402 | '이렇구나',
403 | '이로 인하여',
404 | '이르기까지',
405 | '이리하여',
406 | '이만큼',
407 | '이번',
408 | '이봐',
409 | '이상',
410 | '이어서',
411 | '이었다',
412 | '이와 같다',
413 | '이와 같은',
414 | '이와 반대로',
415 | '이와같다면',
416 | '이외에도',
417 | '이용하여',
418 | '이유만으로',
419 | '이젠',
420 | '이지만',
421 | '이쪽',
422 | '이천구',
423 | '이천육',
424 | '이천칠',
425 | '이천팔',
426 | '인 듯하다',
427 | '인젠',
428 | '일',
429 | '일것이다',
430 | '일곱',
431 | '일단',
432 | '일때',
433 | '일반적으로',
434 | '일지라도',
435 | '임에 틀림없다',
436 | '입각하여',
437 | '입장에서',
438 | '잇따라',
439 | '있다',
440 | '자',
441 | '자기',
442 | '자기집',
443 | '자마자',
444 | '자신',
445 | '잠깐',
446 | '잠시',
447 | '저',
448 | '저것',
449 | '저것만큼',
450 | '저기',
451 | '저쪽',
452 | '저희',
453 | '전부',
454 | '전자',
455 | '전후',
456 | '점에서 보아',
457 | '정도에 이르다',
458 | '제',
459 | '제각기',
460 | '제외하고',
461 | '조금',
462 | '조차',
463 | '조차도',
464 | '졸졸',
465 | '좀',
466 | '좋아',
467 | '좍좍',
468 | '주룩주룩',
469 | '주저하지 않고',
470 | '줄은 몰랏다',
471 | '줄은모른다',
472 | '중에서',
473 | '중의하나',
474 | '즈음하여',
475 | '즉',
476 | '즉시',
477 | '지든지',
478 | '지만',
479 | '지말고',
480 | '진짜로',
481 | '쪽으로',
482 | '차라리',
483 | '참',
484 | '참나',
485 | '첫번째로',
486 | '쳇',
487 | '총적으로',
488 | '총적으로 말하면',
489 | '총적으로 보면',
490 | '칠',
491 | '콸콸',
492 | '쾅쾅',
493 | '쿵',
494 | '타다',
495 | '타인',
496 | '탕탕',
497 | '토하다',
498 | '통하여',
499 | '툭',
500 | '퉤',
501 | '틈타',
502 | '팍',
503 | '팔',
504 | '퍽',
505 | '펄렁',
506 | '하',
507 | '하게될것이다',
508 | '하게하다',
509 | '하겠는가',
510 | '하고 있다',
511 | '하고있었다',
512 | '하곤하였다',
513 | '하구나',
514 | '하기 때문에',
515 | '하기 위하여',
516 | '하기는한데',
517 | '하기만 하면',
518 | '하기보다는',
519 | '하기에',
520 | '하나',
521 | '하느니',
522 | '하는 김에',
523 | '하는 편이 낫다',
524 | '하는것도',
525 | '하는것만 못하다',
526 | '하는것이 낫다',
527 | '하는바',
528 | '하더라도',
529 | '하도다',
530 | '하도록시키다',
531 | '하도록하다',
532 | '하든지',
533 | '하려고하다',
534 | '하마터면',
535 | '하면 할수록',
536 | '하면된다',
537 | '하면서',
538 | '하물며',
539 | '하여금',
540 | '하여야',
541 | '하자마자',
542 | '하지 않는다면',
543 | '하지 않도록',
544 | '하지마',
545 | '하지마라',
546 | '하지만',
547 | '하하',
548 | '한 까닭에',
549 | '한 이유는',
550 | '한 후',
551 | '한다면',
552 | '한다면 몰라도',
553 | '한데',
554 | '한마디',
555 | '한적이있다',
556 | '한켠으로는',
557 | '한항목',
558 | '할 따름이다',
559 | '할 생각이다',
560 | '할 줄 안다',
561 | '할 지경이다',
562 | '할 힘이 있다',
563 | '할때',
564 | '할만하다',
565 | '할망정',
566 | '할뿐',
567 | '할수있다',
568 | '할수있어',
569 | '할줄알다',
570 | '할지라도',
571 | '할지언정',
572 | '함께',
573 | '해도된다',
574 | '해도좋다',
575 | '해봐요',
576 | '해서는 안된다',
577 | '해야한다',
578 | '해요',
579 | '했어요',
580 | '향하다',
581 | '향하여',
582 | '향해서',
583 | '허',
584 | '허걱',
585 | '허허',
586 | '헉',
587 | '헉헉',
588 | '헐떡헐떡',
589 | '형식으로 쓰여',
590 | '혹시',
591 | '혹은',
592 | '혼자',
593 | '훨씬',
594 | '휘익',
595 | '휴',
596 | '흐흐',
597 | '흥',
598 | '힘입어',
599 | '︿',
600 | '~',
601 | '¥'
602 | ]
603 | export { kor }
604 |
--------------------------------------------------------------------------------
/src/stopwords_lit.js:
--------------------------------------------------------------------------------
1 | /* The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Gene Diaz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. */
22 |
23 | const lit = [
24 | 'abi',
25 | 'abidvi',
26 | 'abiejose',
27 | 'abiejuose',
28 | 'abiejø',
29 | 'abiem',
30 | 'abigaliai',
31 | 'abipus',
32 | 'abu',
33 | 'abudu',
34 | 'ai',
35 | 'ana',
36 | 'anaiptol',
37 | 'anaisiais',
38 | 'anajai',
39 | 'anajam',
40 | 'anajame',
41 | 'anapus',
42 | 'anas',
43 | 'anasai',
44 | 'anasis',
45 | 'anei',
46 | 'aniedvi',
47 | 'anieji',
48 | 'aniesiems',
49 | 'anoji',
50 | 'anojo',
51 | 'anojoje',
52 | 'anokia',
53 | 'anoks',
54 | 'anosiomis',
55 | 'anosioms',
56 | 'anosios',
57 | 'anosiose',
58 | 'anot',
59 | 'ant',
60 | 'antai',
61 | 'anuodu',
62 | 'anuoju',
63 | 'anuosiuose',
64 | 'anuosius',
65 | 'anàja',
66 | 'anàjà',
67 | 'anàjá',
68 | 'anàsias',
69 | 'anøjø',
70 | 'apie',
71 | 'aplink',
72 | 'ar',
73 | 'arba',
74 | 'argi',
75 | 'arti',
76 | 'aukðèiau',
77 | 'að',
78 | 'be',
79 | 'bei',
80 | 'beje',
81 | 'bemaþ',
82 | 'bent',
83 | 'bet',
84 | 'betgi',
85 | 'beveik',
86 | 'dar',
87 | 'dargi',
88 | 'daugmaþ',
89 | 'deja',
90 | 'dëka',
91 | 'dël',
92 | 'dëlei',
93 | 'dëlto',
94 | 'ech',
95 | 'et',
96 | 'gal',
97 | 'galbût',
98 | 'galgi',
99 | 'gan',
100 | 'gana',
101 | 'gi',
102 | 'greta',
103 | 'idant',
104 | 'iki',
105 | 'ir',
106 | 'irgi',
107 | 'it',
108 | 'itin',
109 | 'ið',
110 | 'iðilgai',
111 | 'iðvis',
112 | 'jaisiais',
113 | 'jajai',
114 | 'jajam',
115 | 'jajame',
116 | 'jei',
117 | 'jeigu',
118 | 'ji',
119 | 'jiedu',
120 | 'jiedvi',
121 | 'jieji',
122 | 'jiesiems',
123 | 'jinai',
124 | 'jis',
125 | 'jisai',
126 | 'jog',
127 | 'joji',
128 | 'jojo',
129 | 'jojoje',
130 | 'jokia',
131 | 'joks',
132 | 'josiomis',
133 | 'josioms',
134 | 'josios',
135 | 'josiose',
136 | 'judu',
137 | 'judvi',
138 | 'juk',
139 | 'jumis',
140 | 'jums',
141 | 'jumyse',
142 | 'juodu',
143 | 'juoju',
144 | 'juosiuose',
145 | 'juosius',
146 | 'jus',
147 | 'jàja',
148 | 'jàjà',
149 | 'jàsias',
150 | 'jájá',
151 | 'jøjø',
152 | 'jûs',
153 | 'jûsiðkis',
154 | 'jûsiðkë',
155 | 'jûsø',
156 | 'kad',
157 | 'kada',
158 | 'kadangi',
159 | 'kai',
160 | 'kaip',
161 | 'kaipgi',
162 | 'kas',
163 | 'katra',
164 | 'katras',
165 | 'katriedvi',
166 | 'katruodu',
167 | 'kaþin',
168 | 'kaþkas',
169 | 'kaþkatra',
170 | 'kaþkatras',
171 | 'kaþkokia',
172 | 'kaþkoks',
173 | 'kaþkuri',
174 | 'kaþkuris',
175 | 'kiaurai',
176 | 'kiek',
177 | 'kiekvienas',
178 | 'kieno',
179 | 'kita',
180 | 'kitas',
181 | 'kitokia',
182 | 'kitoks',
183 | 'kodël',
184 | 'kokia',
185 | 'koks',
186 | 'kol',
187 | 'kolei',
188 | 'kone',
189 | 'kuomet',
190 | 'kur',
191 | 'kurgi',
192 | 'kuri',
193 | 'kuriedvi',
194 | 'kuris',
195 | 'kuriuodu',
196 | 'lai',
197 | 'lig',
198 | 'ligi',
199 | 'link',
200 | 'lyg',
201 | 'man',
202 | 'manaisiais',
203 | 'manajai',
204 | 'manajam',
205 | 'manajame',
206 | 'manas',
207 | 'manasai',
208 | 'manasis',
209 | 'mane',
210 | 'manieji',
211 | 'maniesiems',
212 | 'manim',
213 | 'manimi',
214 | 'maniðkis',
215 | 'maniðkë',
216 | 'mano',
217 | 'manoji',
218 | 'manojo',
219 | 'manojoje',
220 | 'manosiomis',
221 | 'manosioms',
222 | 'manosios',
223 | 'manosiose',
224 | 'manuoju',
225 | 'manuosiuose',
226 | 'manuosius',
227 | 'manyje',
228 | 'manàja',
229 | 'manàjà',
230 | 'manàjá',
231 | 'manàsias',
232 | 'manæs',
233 | 'manøjø',
234 | 'mat',
235 | 'maþdaug',
236 | 'maþne',
237 | 'mes',
238 | 'mudu',
239 | 'mudvi',
240 | 'mumis',
241 | 'mums',
242 | 'mumyse',
243 | 'mus',
244 | 'mûsiðkis',
245 | 'mûsiðkë',
246 | 'mûsø',
247 | 'na',
248 | 'nagi',
249 | 'ne',
250 | 'nebe',
251 | 'nebent',
252 | 'negi',
253 | 'negu',
254 | 'nei',
255 | 'nejau',
256 | 'nejaugi',
257 | 'nekaip',
258 | 'nelyginant',
259 | 'nes',
260 | 'net',
261 | 'netgi',
262 | 'netoli',
263 | 'neva',
264 | 'nors',
265 | 'nuo',
266 | 'në',
267 | 'o',
268 | 'ogi',
269 | 'oi',
270 | 'paeiliui',
271 | 'pagal',
272 | 'pakeliui',
273 | 'palaipsniui',
274 | 'palei',
275 | 'pas',
276 | 'pasak',
277 | 'paskos',
278 | 'paskui',
279 | 'paskum',
280 | 'pat',
281 | 'pati',
282 | 'patiems',
283 | 'paties',
284 | 'pats',
285 | 'patys',
286 | 'patá',
287 | 'paèiais',
288 | 'paèiam',
289 | 'paèiame',
290 | 'paèiu',
291 | 'paèiuose',
292 | 'paèius',
293 | 'paèiø',
294 | 'per',
295 | 'pernelyg',
296 | 'pirm',
297 | 'pirma',
298 | 'pirmiau',
299 | 'po',
300 | 'prie',
301 | 'prieð',
302 | 'prieðais',
303 | 'pro',
304 | 'pusiau',
305 | 'rasi',
306 | 'rodos',
307 | 'sau',
308 | 'savaisiais',
309 | 'savajai',
310 | 'savajam',
311 | 'savajame',
312 | 'savas',
313 | 'savasai',
314 | 'savasis',
315 | 'save',
316 | 'savieji',
317 | 'saviesiems',
318 | 'savimi',
319 | 'saviðkis',
320 | 'saviðkë',
321 | 'savo',
322 | 'savoji',
323 | 'savojo',
324 | 'savojoje',
325 | 'savosiomis',
326 | 'savosioms',
327 | 'savosios',
328 | 'savosiose',
329 | 'savuoju',
330 | 'savuosiuose',
331 | 'savuosius',
332 | 'savyje',
333 | 'savàja',
334 | 'savàjà',
335 | 'savàjá',
336 | 'savàsias',
337 | 'savæs',
338 | 'savøjø',
339 | 'skersai',
340 | 'skradþiai',
341 | 'staèiai',
342 | 'su',
343 | 'sulig',
344 | 'ta',
345 | 'tad',
346 | 'tai',
347 | 'taigi',
348 | 'taip',
349 | 'taipogi',
350 | 'taisiais',
351 | 'tajai',
352 | 'tajam',
353 | 'tajame',
354 | 'tamsta',
355 | 'tarp',
356 | 'tarsi',
357 | 'tartum',
358 | 'tarytum',
359 | 'tas',
360 | 'tasai',
361 | 'tau',
362 | 'tavaisiais',
363 | 'tavajai',
364 | 'tavajam',
365 | 'tavajame',
366 | 'tavas',
367 | 'tavasai',
368 | 'tavasis',
369 | 'tave',
370 | 'tavieji',
371 | 'taviesiems',
372 | 'tavimi',
373 | 'taviðkis',
374 | 'taviðkë',
375 | 'tavo',
376 | 'tavoji',
377 | 'tavojo',
378 | 'tavojoje',
379 | 'tavosiomis',
380 | 'tavosioms',
381 | 'tavosios',
382 | 'tavosiose',
383 | 'tavuoju',
384 | 'tavuosiuose',
385 | 'tavuosius',
386 | 'tavyje',
387 | 'tavàja',
388 | 'tavàjà',
389 | 'tavàjá',
390 | 'tavàsias',
391 | 'tavæs',
392 | 'tavøjø',
393 | 'taèiau',
394 | 'te',
395 | 'tegu',
396 | 'tegul',
397 | 'tiedvi',
398 | 'tieji',
399 | 'ties',
400 | 'tiesiems',
401 | 'tiesiog',
402 | 'tik',
403 | 'tikriausiai',
404 | 'tiktai',
405 | 'toji',
406 | 'tojo',
407 | 'tojoje',
408 | 'tokia',
409 | 'toks',
410 | 'tol',
411 | 'tolei',
412 | 'toliau',
413 | 'tosiomis',
414 | 'tosioms',
415 | 'tosios',
416 | 'tosiose',
417 | 'tu',
418 | 'tuodu',
419 | 'tuoju',
420 | 'tuosiuose',
421 | 'tuosius',
422 | 'turbût',
423 | 'tàja',
424 | 'tàjà',
425 | 'tàjá',
426 | 'tàsias',
427 | 'tøjø',
428 | 'tûlas',
429 | 'uþ',
430 | 'uþtat',
431 | 'uþvis',
432 | 'va',
433 | 'vai',
434 | 'viduj',
435 | 'vidury',
436 | 'vien',
437 | 'vienas',
438 | 'vienokia',
439 | 'vienoks',
440 | 'vietoj',
441 | 'virð',
442 | 'virðuj',
443 | 'virðum',
444 | 'vis',
445 | 'vis dëlto',
446 | 'visa',
447 | 'visas',
448 | 'visgi',
449 | 'visokia',
450 | 'visoks',
451 | 'vos',
452 | 'vël',
453 | 'vëlgi',
454 | 'ypaè',
455 | 'á',
456 | 'ákypai',
457 | 'ástriþai',
458 | 'ðalia',
459 | 'ðe',
460 | 'ði',
461 | 'ðiaisiais',
462 | 'ðiajai',
463 | 'ðiajam',
464 | 'ðiajame',
465 | 'ðiapus',
466 | 'ðiedvi',
467 | 'ðieji',
468 | 'ðiesiems',
469 | 'ðioji',
470 | 'ðiojo',
471 | 'ðiojoje',
472 | 'ðiokia',
473 | 'ðioks',
474 | 'ðiosiomis',
475 | 'ðiosioms',
476 | 'ðiosios',
477 | 'ðiosiose',
478 | 'ðis',
479 | 'ðisai',
480 | 'ðit',
481 | 'ðita',
482 | 'ðitas',
483 | 'ðitiedvi',
484 | 'ðitokia',
485 | 'ðitoks',
486 | 'ðituodu',
487 | 'ðiuodu',
488 | 'ðiuoju',
489 | 'ðiuosiuose',
490 | 'ðiuosius',
491 | 'ðiàja',
492 | 'ðiàjà',
493 | 'ðiàsias',
494 | 'ðiøjø',
495 | 'ðtai',
496 | 'ðájá',
497 | 'þemiau'
498 | ]
499 | export { lit }
500 |
--------------------------------------------------------------------------------
/src/stopwords_urd.js:
--------------------------------------------------------------------------------
1 | /* The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Gene Diaz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE. */
22 |
23 | const urd = [
24 | 'آئی',
25 | 'آئے',
26 | 'آج',
27 | 'آخر',
28 | 'آخرکبر',
29 | 'آدهی',
30 | 'آًب',
31 | 'آٹھ',
32 | 'آیب',
33 | 'اة',
34 | 'اخبزت',
35 | 'اختتبم',
36 | 'ادھر',
37 | 'ارد',
38 | 'اردگرد',
39 | 'ارکبى',
40 | 'اش',
41 | 'اضتعوبل',
42 | 'اضتعوبلات',
43 | 'اضطرذ',
44 | 'اضکب',
45 | 'اضکی',
46 | 'اضکے',
47 | 'اطراف',
48 | 'اغیب',
49 | 'افراد',
50 | 'الگ',
51 | 'اور',
52 | 'اوًچب',
53 | 'اوًچبئی',
54 | 'اوًچی',
55 | 'اوًچے',
56 | 'اى',
57 | 'اً',
58 | 'اًذر',
59 | 'اًہیں',
60 | 'اٹھبًب',
61 | 'اپٌب',
62 | 'اپٌے',
63 | 'اچھب',
64 | 'اچھی',
65 | 'اچھے',
66 | 'اکثر',
67 | 'اکٹھب',
68 | 'اکٹھی',
69 | 'اکٹھے',
70 | 'اکیلا',
71 | 'اکیلی',
72 | 'اکیلے',
73 | 'اگرچہ',
74 | 'اہن',
75 | 'ایطے',
76 | 'ایک',
77 | 'ب',
78 | 'ت',
79 | 'تبزٍ',
80 | 'تت',
81 | 'تر',
82 | 'ترتیت',
83 | 'تریي',
84 | 'تعذاد',
85 | 'تن',
86 | 'تو',
87 | 'توبم',
88 | 'توہی',
89 | 'توہیں',
90 | 'تٌہب',
91 | 'تک',
92 | 'تھب',
93 | 'تھوڑا',
94 | 'تھوڑی',
95 | 'تھوڑے',
96 | 'تھی',
97 | 'تھے',
98 | 'تیي',
99 | 'ثب',
100 | 'ثبئیں',
101 | 'ثبترتیت',
102 | 'ثبری',
103 | 'ثبرے',
104 | 'ثبعث',
105 | 'ثبلا',
106 | 'ثبلترتیت',
107 | 'ثبہر',
108 | 'ثدبئے',
109 | 'ثرآں',
110 | 'ثراں',
111 | 'ثرش',
112 | 'ثعذ',
113 | 'ثغیر',
114 | 'ثلٌذ',
115 | 'ثلٌذوثبلا',
116 | 'ثلکہ',
117 | 'ثي',
118 | 'ثٌب',
119 | 'ثٌبرہب',
120 | 'ثٌبرہی',
121 | 'ثٌبرہے',
122 | 'ثٌبًب',
123 | 'ثٌذ',
124 | 'ثٌذکرو',
125 | 'ثٌذکرًب',
126 | 'ثٌذی',
127 | 'ثڑا',
128 | 'ثڑوں',
129 | 'ثڑی',
130 | 'ثڑے',
131 | 'ثھر',
132 | 'ثھرا',
133 | 'ثھراہوا',
134 | 'ثھرپور',
135 | 'ثھی',
136 | 'ثہت',
137 | 'ثہتر',
138 | 'ثہتری',
139 | 'ثہتریي',
140 | 'ثیچ',
141 | 'ج',
142 | 'خب',
143 | 'خبرہب',
144 | 'خبرہی',
145 | 'خبرہے',
146 | 'خبهوظ',
147 | 'خبًب',
148 | 'خبًتب',
149 | 'خبًتی',
150 | 'خبًتے',
151 | 'خبًٌب',
152 | 'خت',
153 | 'ختن',
154 | 'خجکہ',
155 | 'خص',
156 | 'خططرذ',
157 | 'خلذی',
158 | 'خو',
159 | 'خواى',
160 | 'خوًہی',
161 | 'خوکہ',
162 | 'خٌبة',
163 | 'خگہ',
164 | 'خگہوں',
165 | 'خگہیں',
166 | 'خیطب',
167 | 'خیطبکہ',
168 | 'در',
169 | 'درخبت',
170 | 'درخہ',
171 | 'درخے',
172 | 'درزقیقت',
173 | 'درضت',
174 | 'دش',
175 | 'دفعہ',
176 | 'دلچطپ',
177 | 'دلچطپی',
178 | 'دلچطپیبں',
179 | 'دو',
180 | 'دور',
181 | 'دوراى',
182 | 'دوضرا',
183 | 'دوضروں',
184 | 'دوضری',
185 | 'دوضرے',
186 | 'دوًوں',
187 | 'دکھبئیں',
188 | 'دکھبتب',
189 | 'دکھبتی',
190 | 'دکھبتے',
191 | 'دکھبو',
192 | 'دکھبًب',
193 | 'دکھبیب',
194 | 'دی',
195 | 'دیب',
196 | 'دیتب',
197 | 'دیتی',
198 | 'دیتے',
199 | 'دیر',
200 | 'دیٌب',
201 | 'دیکھو',
202 | 'دیکھٌب',
203 | 'دیکھی',
204 | 'دیکھیں',
205 | 'دے',
206 | 'ر',
207 | 'راضتوں',
208 | 'راضتہ',
209 | 'راضتے',
210 | 'رریعہ',
211 | 'رریعے',
212 | 'رکي',
213 | 'رکھ',
214 | 'رکھب',
215 | 'رکھتب',
216 | 'رکھتبہوں',
217 | 'رکھتی',
218 | 'رکھتے',
219 | 'رکھی',
220 | 'رکھے',
221 | 'رہب',
222 | 'رہی',
223 | 'رہے',
224 | 'ز',
225 | 'زبصل',
226 | 'زبضر',
227 | 'زبل',
228 | 'زبلات',
229 | 'زبلیہ',
230 | 'زصوں',
231 | 'زصہ',
232 | 'زصے',
233 | 'زقبئق',
234 | 'زقیتیں',
235 | 'زقیقت',
236 | 'زکن',
237 | 'زکویہ',
238 | 'زیبدٍ',
239 | 'صبف',
240 | 'صسیر',
241 | 'صفر',
242 | 'صورت',
243 | 'صورتسبل',
244 | 'صورتوں',
245 | 'صورتیں',
246 | 'ض',
247 | 'ضبت',
248 | 'ضبتھ',
249 | 'ضبدٍ',
250 | 'ضبرا',
251 | 'ضبرے',
252 | 'ضبل',
253 | 'ضبلوں',
254 | 'ضت',
255 | 'ضرور',
256 | 'ضرورت',
257 | 'ضروری',
258 | 'ضلطلہ',
259 | 'ضوچ',
260 | 'ضوچب',
261 | 'ضوچتب',
262 | 'ضوچتی',
263 | 'ضوچتے',
264 | 'ضوچو',
265 | 'ضوچٌب',
266 | 'ضوچی',
267 | 'ضوچیں',
268 | 'ضکب',
269 | 'ضکتب',
270 | 'ضکتی',
271 | 'ضکتے',
272 | 'ضکٌب',
273 | 'ضکی',
274 | 'ضکے',
275 | 'ضیذھب',
276 | 'ضیذھی',
277 | 'ضیذھے',
278 | 'ضیکٌڈ',
279 | 'ضے',
280 | 'طرف',
281 | 'طریق',
282 | 'طریقوں',
283 | 'طریقہ',
284 | 'طریقے',
285 | 'طور',
286 | 'طورپر',
287 | 'ظبہر',
288 | 'ع',
289 | 'عذد',
290 | 'عظین',
291 | 'علاقوں',
292 | 'علاقہ',
293 | 'علاقے',
294 | 'علاوٍ',
295 | 'عووهی',
296 | 'غبیذ',
297 | 'غخص',
298 | 'غذ',
299 | 'غروع',
300 | 'غروعبت',
301 | 'غے',
302 | 'فرد',
303 | 'فی',
304 | 'ق',
305 | 'قجل',
306 | 'قجیلہ',
307 | 'قطن',
308 | 'لئے',
309 | 'لا',
310 | 'لازهی',
311 | 'لو',
312 | 'لوجب',
313 | 'لوجی',
314 | 'لوجے',
315 | 'لوسبت',
316 | 'لوسہ',
317 | 'لوگ',
318 | 'لوگوں',
319 | 'لڑکپي',
320 | 'لگتب',
321 | 'لگتی',
322 | 'لگتے',
323 | 'لگٌب',
324 | 'لگی',
325 | 'لگیں',
326 | 'لگے',
327 | 'لی',
328 | 'لیب',
329 | 'لیٌب',
330 | 'لیں',
331 | 'لے',
332 | 'ه',
333 | 'هتعلق',
334 | 'هختلف',
335 | 'هسترم',
336 | 'هسترهہ',
337 | 'هسطوش',
338 | 'هسیذ',
339 | 'هطئلہ',
340 | 'هطئلے',
341 | 'هطبئل',
342 | 'هطتعول',
343 | 'هطلق',
344 | 'هعلوم',
345 | 'هػتول',
346 | 'هلا',
347 | 'هوکي',
348 | 'هوکٌبت',
349 | 'هوکٌہ',
350 | 'هٌبضت',
351 | 'هڑا',
352 | 'هڑًب',
353 | 'هڑے',
354 | 'هکول',
355 | 'هگر',
356 | 'هہرثبى',
357 | 'هیرا',
358 | 'هیری',
359 | 'هیرے',
360 | 'هیں',
361 | 'و',
362 | 'وار',
363 | 'والے',
364 | 'وٍ',
365 | 'ًئی',
366 | 'ًئے',
367 | 'ًب',
368 | 'ًبپطٌذ',
369 | 'ًبگسیر',
370 | 'ًطجت',
371 | 'ًقطہ',
372 | 'ًو',
373 | 'ًوخواى',
374 | 'ًکبلٌب',
375 | 'ًکتہ',
376 | 'ًہ',
377 | 'ًہیں',
378 | 'ًیب',
379 | 'ًے',
380 | 'ٓ آش',
381 | 'ٹھیک',
382 | 'پبئے',
383 | 'پبش',
384 | 'پبًب',
385 | 'پبًچ',
386 | 'پر',
387 | 'پراًب',
388 | 'پطٌذ',
389 | 'پل',
390 | 'پورا',
391 | 'پوچھب',
392 | 'پوچھتب',
393 | 'پوچھتی',
394 | 'پوچھتے',
395 | 'پوچھو',
396 | 'پوچھوں',
397 | 'پوچھٌب',
398 | 'پوچھیں',
399 | 'پچھلا',
400 | 'پھر',
401 | 'پہلا',
402 | 'پہلی',
403 | 'پہلےضی',
404 | 'پہلےضے',
405 | 'پہلےضےہی',
406 | 'پیع',
407 | 'چبر',
408 | 'چبہب',
409 | 'چبہٌب',
410 | 'چبہے',
411 | 'چلا',
412 | 'چلو',
413 | 'چلیں',
414 | 'چلے',
415 | 'چکب',
416 | 'چکی',
417 | 'چکیں',
418 | 'چکے',
419 | 'چھوٹب',
420 | 'چھوٹوں',
421 | 'چھوٹی',
422 | 'چھوٹے',
423 | 'چھہ',
424 | 'چیسیں',
425 | 'ڈھوًڈا',
426 | 'ڈھوًڈلیب',
427 | 'ڈھوًڈو',
428 | 'ڈھوًڈًب',
429 | 'ڈھوًڈی',
430 | 'ڈھوًڈیں',
431 | 'ک',
432 | 'کئی',
433 | 'کئے',
434 | 'کب',
435 | 'کبفی',
436 | 'کبم',
437 | 'کت',
438 | 'کجھی',
439 | 'کرا',
440 | 'کرتب',
441 | 'کرتبہوں',
442 | 'کرتی',
443 | 'کرتے',
444 | 'کرتےہو',
445 | 'کررہب',
446 | 'کررہی',
447 | 'کررہے',
448 | 'کرو',
449 | 'کرًب',
450 | 'کریں',
451 | 'کرے',
452 | 'کطی',
453 | 'کل',
454 | 'کن',
455 | 'کوئی',
456 | 'کوتر',
457 | 'کورا',
458 | 'کوروں',
459 | 'کورٍ',
460 | 'کورے',
461 | 'کوطي',
462 | 'کوى',
463 | 'کوًطب',
464 | 'کوًطی',
465 | 'کوًطے',
466 | 'کھولا',
467 | 'کھولو',
468 | 'کھولٌب',
469 | 'کھولی',
470 | 'کھولیں',
471 | 'کھولے',
472 | 'کہ',
473 | 'کہب',
474 | 'کہتب',
475 | 'کہتی',
476 | 'کہتے',
477 | 'کہو',
478 | 'کہوں',
479 | 'کہٌب',
480 | 'کہی',
481 | 'کہیں',
482 | 'کہے',
483 | 'کی',
484 | 'کیب',
485 | 'کیطب',
486 | 'کیطرف',
487 | 'کیطے',
488 | 'کیلئے',
489 | 'کیوًکہ',
490 | 'کیوں',
491 | 'کیے',
492 | 'کے',
493 | 'کےثعذ',
494 | 'کےرریعے',
495 | 'گئی',
496 | 'گئے',
497 | 'گب',
498 | 'گرد',
499 | 'گروٍ',
500 | 'گروپ',
501 | 'گروہوں',
502 | 'گٌتی',
503 | 'گی',
504 | 'گیب',
505 | 'گے',
506 | 'ہر',
507 | 'ہن',
508 | 'ہو',
509 | 'ہوئی',
510 | 'ہوئے',
511 | 'ہوا',
512 | 'ہوبرا',
513 | 'ہوبری',
514 | 'ہوبرے',
515 | 'ہوتب',
516 | 'ہوتی',
517 | 'ہوتے',
518 | 'ہورہب',
519 | 'ہورہی',
520 | 'ہورہے',
521 | 'ہوضکتب',
522 | 'ہوضکتی',
523 | 'ہوضکتے',
524 | 'ہوًب',
525 | 'ہوًی',
526 | 'ہوًے',
527 | 'ہوچکب',
528 | 'ہوچکی',
529 | 'ہوچکے',
530 | 'ہوگئی',
531 | 'ہوگئے',
532 | 'ہوگیب',
533 | 'ہوں',
534 | 'ہی',
535 | 'ہیں',
536 | 'ہے',
537 | 'ی',
538 | 'یقیٌی',
539 | 'یہ',
540 | 'یہبں'
541 | ]
542 | export { urd }
543 |
--------------------------------------------------------------------------------