├── LICENSE ├── README.md ├── lib ├── export_file.js ├── languages_all.js ├── stopword.js ├── stopwords_af.js ├── stopwords_ar.js ├── stopwords_bg.js ├── stopwords_bn.js ├── stopwords_br.js ├── stopwords_ca.js ├── stopwords_cs.js ├── stopwords_da.js ├── stopwords_de.js ├── stopwords_el.js ├── stopwords_en.js ├── stopwords_eo.js ├── stopwords_es.js ├── stopwords_et.js ├── stopwords_eu.js ├── stopwords_fa.js ├── stopwords_fi.js ├── stopwords_fr.js ├── stopwords_ga.js ├── stopwords_gl.js ├── stopwords_ha.js ├── stopwords_he.js ├── stopwords_hi.js ├── stopwords_hr.js ├── stopwords_hu.js ├── stopwords_hy.js ├── stopwords_id.js ├── stopwords_it.js ├── stopwords_ja.js ├── stopwords_ko.js ├── stopwords_la.js ├── stopwords_lv.js ├── stopwords_mr.js ├── stopwords_nl.js ├── stopwords_no.js ├── stopwords_pl.js ├── stopwords_pt.js ├── stopwords_ro.js ├── stopwords_ru.js ├── stopwords_sk.js ├── stopwords_sl.js ├── stopwords_so.js ├── stopwords_st.js ├── stopwords_sv.js ├── stopwords_sw.js ├── stopwords_th.js ├── stopwords_tr.js ├── stopwords_yo.js ├── stopwords_zh.js └── stopwords_zu.js └── package.json /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Yager Anderson (github @swissums) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # remove-stopwords 2 | `remove-stopword` is a node module that allows you to strip stopwords from an 3 | input text. [In natural language processing, "Stopwords" are words 4 | that are so frequent that they can safely be removed from a text 5 | without altering its 6 | meaning.](https://en.wikipedia.org/wiki/Stop_words). 7 | 8 | This library is specifically designed for WorldBrain's usecase of stripping as many words from every webpage as possible to make search-indexing faster in regards to several thousand documents of varying information. 9 | 10 | **Credits:** 11 | 12 | This module was essentially coppied directly from [@fergiemcdowall's stopword library](https://github.com/fergiemcdowall/stopword). 13 | The only differences is that more language support was added from this [stopwords json lib](https://github.com/6/stopwords-json) 14 | Also there are minor tweaks to several languages specifically for worldbrains use-case. 15 | Unless otherwise specified all the stopwords came from [stopwords json lib](https://github.com/6/stopwords-json) 16 | 17 | [![MIT License][license-image]][license-url] 18 | 19 | ## Usage 20 | 21 | ### Default (English) 22 | By default, `stopword` will strip an array of "meaningless" English words 23 | 24 | ```javascript 25 | sw = require('stopword') 26 | const oldString = 'a really Interesting string with some words'.split(' ') 27 | const newString = sw.removeStopwords(oldString) 28 | // newString is now [ 'really', 'Interesting', 'string', 'words' ] 29 | 30 | ``` 31 | 32 | ### Other languages 33 | You can also specify a language other than English, as a string: 34 | ```javascript 35 | sw = require('stopword') 36 | const oldString = 'Trädgårdsägare är beredda att pröva vad som helst för att bli av med de hatade mördarsniglarna åäö'.split(' ') 37 | // sw.sv contains swedish stopwords 38 | const newString = sw.removeStopwords(oldString, 'sv') 39 | // newString is now [ 'Trädgårdsägare', 'beredda', 'pröva', 'helst', 'hatade', 'mördarsniglarna', 'åäö' ] 40 | ``` 41 | 42 | ### All languages 43 | You can also specify to remove stopwords from all languages by specifying `'all'`: 44 | ```javascript 45 | sw = require('stopword') 46 | const oldString = 'Trädgårdsägare är beredda att a really Interesting string with some words ciao'.split(' ') 47 | // 'all' iterates over every stopword list in the lib 48 | const newString = sw.removeStopwords(oldString, 'all') 49 | // newString is now [ 'Trädgårdsägare', 'beredda', 'really', 'Interesting', 'string', 'words' ] 50 | ``` 51 | 52 | ### Custom list of stopwords 53 | And last, but not least, it is possible to use your own, custom list of stopwords: 54 | ```javascript 55 | sw = require('stopword') 56 | const oldString = 'you can even roll your own custom stopword list'.split(' ') 57 | // Just add your own list/array of stopwords 58 | const newString = sw.removeStopwords(oldString, [ 'even', 'a', 'custom', 'stopword', 'list', 'is', 'possible'] 59 | // newString is now [ 'you', 'can', 'roll', 'your', 'own'] 60 | ``` 61 | 62 | ## API 63 | 64 | ### Language List 65 | 66 | Arrays of stopwords for the following languages are supplied: 67 | 68 | * `af` - Afrikaans 69 | * `ar` - Modern Standard Arabic 70 | * `hy` - Armenian 71 | * `eu` - Basque 72 | * `bn` - Bengali 73 | * `br` - Brazilian Portuguese 74 | * `bg` - Bulgarian 75 | * `ca` - Catalan 76 | * `zh` - Chinese 77 | * `hr` - Croation 78 | * `hr` - Czech 79 | * `da` - Danish 80 | * `nl` - Dutch 81 | * `en` - English 82 | * `eo` - Esperanto 83 | * `et` - Estonian 84 | * `fa` - Farsi 85 | * `fi` - Finnish 86 | * `fr` - French 87 | * `gl` - Galician 88 | * `de` - German 89 | * `el` - Greek 90 | * `ha` - Hausa 91 | * `he` - Hebrew 92 | * `hi` - Hindi 93 | * `hu` - Hungarian 94 | * `id` - Indonesian 95 | * `ga` - Irish 96 | * `it` - Italian 97 | * `ja` - Japanese 98 | * `ko` - Korean 99 | * `la` - Latin 100 | * `lv` - Latvian 101 | * `mr` - Marathi 102 | * `no` - Norwegian 103 | * `fa` - Persian 104 | * `pl` - Polish 105 | * `pt` - Portuguese 106 | * `ro` - Romanian 107 | * `ru` - Russian 108 | * `sk` - Slovak 109 | * `sl` - Slovenian 110 | * `so` - Somalia 111 | * `st` - Southern Sotho 112 | * `es` - Spanish 113 | * `sw` - Swahili 114 | * `sv` - Swedish 115 | * `th` - Thai 116 | * `yo` - Yoruba 117 | * `zu` - Zulu 118 | 119 | ```javascript 120 | sw = require('stopword') 121 | norwegianStopwords = sw.no 122 | // norwegianStopwords now contains an Array of norwgian stopwords 123 | ``` 124 | 125 | #### Languages with no space between words 126 | `ja` Japanese and `zh` Chinese Simplified have no space between words. For these languages you need to split the text into words before feeding it to the `stopword` module. You can check out [TinySegmenter](http://chasen.org/%7Etaku/software/TinySegmenter/) for Japanese and [chinese-tokenizer](https://github.com/yishn/chinese-tokenizer) for Chinese. 127 | 128 | ### removeStopwords 129 | 130 | Returns an Array that represents the text with the specified stopwords removed. 131 | 132 | * `text` An array of words 133 | * `stopwords` An array of stopwords 134 | 135 | ```javascript 136 | sw = require('stopword') 137 | var text = sw.removeStopwords(text[, stopwords]) 138 | // text is now an array of given words minus specified stopwords 139 | ``` 140 | 141 | 142 | ## Release Notes: 143 | 144 | [license-image]: http://img.shields.io/badge/license-MIT-blue.svg?style=flat 145 | [license-url]: LICENSE 146 | -------------------------------------------------------------------------------- /lib/export_file.js: -------------------------------------------------------------------------------- 1 | 2 | exports.all = require('./languages_all.js').languages 3 | exports.af = require('./stopwords_af.js').words 4 | exports.ar = require('./stopwords_ar.js').words 5 | exports.bg = require('./stopwords_bg.js').words 6 | exports.bn = require('./stopwords_bn.js').words 7 | exports.br = require('./stopwords_br.js').words 8 | exports.ca = require('./stopwords_ca.js').words 9 | exports.cs = require('./stopwords_cs.js').words 10 | exports.da = require('./stopwords_da.js').words 11 | exports.de = require('./stopwords_de.js').words 12 | exports.el = require('./stopwords_el.js').words 13 | exports.en = require('./stopwords_en.js').words 14 | exports.eo = require('./stopwords_eo.js').words 15 | exports.es = require('./stopwords_es.js').words 16 | exports.et = require('./stopwords_et.js').words 17 | exports.eu = require('./stopwords_eu.js').words 18 | exports.fa = require('./stopwords_fa.js').words 19 | exports.fi = require('./stopwords_fi.js').words 20 | exports.fr = require('./stopwords_fr.js').words 21 | exports.ga = require('./stopwords_ga.js').words 22 | exports.gl = require('./stopwords_gl.js').words 23 | exports.ha = require('./stopwords_ha.js').words 24 | exports.he = require('./stopwords_he.js').words 25 | exports.hi = require('./stopwords_hi.js').words 26 | exports.hr = require('./stopwords_hr.js').words 27 | exports.hu = require('./stopwords_hu.js').words 28 | exports.hy = require('./stopwords_hy.js').words 29 | exports.js = require('./stopwords_id.js').words 30 | exports.it = require('./stopwords_it.js').words 31 | exports.ja = require('./stopwords_ja.js').words 32 | exports.ko = require('./stopwords_ko.js').words 33 | exports.la = require('./stopwords_la.js').words 34 | exports.lv = require('./stopwords_lv.js').words 35 | exports.mr = require('./stopwords_mr.js').words 36 | exports.nl = require('./stopwords_nl.js').words 37 | exports.no = require('./stopwords_no.js').words 38 | exports.pl = require('./stopwords_pl.js').words 39 | exports.pt = require('./stopwords_pt.js').words 40 | exports.ro = require('./stopwords_ro.js').words 41 | exports.ru = require('./stopwords_ru.js').words 42 | exports.sk = require('./stopwords_sk.js').words 43 | exports.sl = require('./stopwords_sl.js').words 44 | exports.so = require('./stopwords_so.js').words 45 | exports.st = require('./stopwords_st.js').words 46 | exports.sv = require('./stopwords_sv.js').words 47 | exports.sw = require('./stopwords_sw.js').words 48 | exports.th = require('./stopwords_th.js').words 49 | exports.tr = require('./stopwords_tr.js').words 50 | exports.yo = require('./stopwords_yo.js').words 51 | exports.zh = require('./stopwords_zh.js').words 52 | exports.zu = require('./stopwords_zu.js').words -------------------------------------------------------------------------------- /lib/languages_all.js: -------------------------------------------------------------------------------- 1 | var languages = [ 2 | 'af', 'ar', 'bg', 'bn', 'br', 'ca', 'cs', 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'ga', 'gl', 'ha', 'he', 'hi', 'hr', 'hu', 'hy', 'js', 'it', 'ja', 'ko', 'la', 'lv', 'mr', 'nl', 'no', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'so', 'st', 'sv', 'sw', 'th', 'tr', 'yo', 'zh', 'zu' 3 | ] 4 | exports.languages = languages 5 | 6 | 7 | -------------------------------------------------------------------------------- /lib/stopword.js: -------------------------------------------------------------------------------- 1 | const defaultStopwords = require('./stopwords_en.js').words 2 | var lang = require('./export_file.js'); 3 | 4 | function filterTokens(tokens, stopwords) { 5 | return tokens.filter(function (value) { 6 | return stopwords.indexOf(value.toLowerCase()) === -1 7 | }) 8 | } 9 | 10 | exports.removeStopwords = function(tokens, stopwords) { 11 | 12 | stopwordsList = lang[stopwords] || defaultStopwords 13 | 14 | if (typeof tokens !== 'object' || typeof stopwordsList != 'object'){ 15 | throw new Error ('expected Arrays try: removeStopwords(Array[, Array])') 16 | } 17 | 18 | // If users specifies all it goes through all the languages and filters out any stopwords 19 | if (stopwords === 'all') { 20 | var wordsToKeep = tokens 21 | 22 | stopwordsList.map(function (value) { 23 | var wordsToMaybeKeep = filterTokens(wordsToKeep, lang[value]) 24 | 25 | wordsToKeep = wordsToMaybeKeep.length < wordsToKeep.length ? wordsToMaybeKeep : wordsToKeep 26 | 27 | }) 28 | return wordsToKeep 29 | } 30 | return filterTokens(tokens, stopwordsList) 31 | } -------------------------------------------------------------------------------- /lib/stopwords_af.js: -------------------------------------------------------------------------------- 1 | var words = ["'n","aan","af","al","as","baie","by","daar","dag", 2 | "dat","die","dit","een","ek","en","gaan","gesê","haar","het","hom", 3 | "hulle","hy","in","is","jou","jy","kan","kom","ma","maar","met","my", 4 | "na","nie","om","ons","op","saam","sal","se","sien","so","sy","te", 5 | "toe","uit","van","vir","was","wat","ʼn"] 6 | 7 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_ar.js: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Gene Diaz 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | // a list of commonly used words that have little meaning and can be excluded 26 | // from analysis. 27 | 28 | var words = ["،","آض","آمينَ","آه","آهاً","آي","أ","أب","أجل","أجمع","أخ","أخذ","أصبح","أضحى","أقبل","أقل","أكثر","ألا","أم","أما","أمامك","أمامكَ","أمسى","أمّا","أن","أنا","أنت","أنتم","أنتما","أنتن","أنتِ","أنشأ","أنّى","أو","أوشك","أولئك","أولئكم","أولاء","أولالك","أوّهْ","أي","أيا","أين","أينما","أيّ","أَنَّ","أََيُّ","أُفٍّ","إذ","إذا","إذاً","إذما","إذن","إلى","إليكم","إليكما","إليكنّ","إليكَ","إلَيْكَ","إلّا","إمّا","إن","إنّما","إي","إياك","إياكم","إياكما","إياكن","إيانا","إياه","إياها","إياهم","إياهما","إياهن","إياي","إيهٍ","إِنَّ","ا","ابتدأ","اثر","اجل","احد","اخرى","اخلولق","اذا","اربعة","ارتدّ","استحال","اطار","اعادة","اعلنت","اف","اكثر","اكد","الألاء","الألى","الا","الاخيرة","الان","الاول","الاولى","التى","التي","الثاني","الثانية","الذاتي","الذى","الذي","الذين","السابق","الف","اللائي","اللاتي","اللتان","اللتيا","اللتين","اللذان","اللذين","اللواتي","الماضي","المقبل","الوقت","الى","اليوم","اما","امام","امس","ان","انبرى","انقلب","انه","انها","او","اول","اي","ايار","ايام","ايضا","ب","بات","باسم","بان","بخٍ","برس","بسبب","بسّ","بشكل","بضع","بطآن","بعد","بعض","بك","بكم","بكما","بكن","بل","بلى","بما","بماذا","بمن","بن","بنا","به","بها","بي","بيد","بين","بَسْ","بَلْهَ","بِئْسَ","تانِ","تانِك","تبدّل","تجاه","تحوّل","تلقاء","تلك","تلكم","تلكما","تم","تينك","تَيْنِ","تِه","تِي","ثلاثة","ثم","ثمّ","ثمّة","ثُمَّ","جعل","جلل","جميع","جير","حار","حاشا","حاليا","حاي","حتى","حرى","حسب","حم","حوالى","حول","حيث","حيثما","حين","حيَّ","حَبَّذَا","حَتَّى","حَذارِ","خلا","خلال","دون","دونك","ذا","ذات","ذاك","ذانك","ذانِ","ذلك","ذلكم","ذلكما","ذلكن","ذو","ذوا","ذواتا","ذواتي","ذيت","ذينك","ذَيْنِ","ذِه","ذِي","راح","رجع","رويدك","ريث","رُبَّ","زيارة","سبحان","سرعان","سنة","سنوات","سوف","سوى","سَاءَ","سَاءَمَا","شبه","شخصا","شرع","شَتَّانَ","صار","صباح","صفر","صهٍ","صهْ","ضد","ضمن","طاق","طالما","طفق","طَق","ظلّ","عاد","عام","عاما","عامة","عدا","عدة","عدد","عدم","عسى","عشر","عشرة","علق","على","عليك","عليه","عليها","علًّ","عن","عند","عندما","عوض","عين","عَدَسْ","عَمَّا","غدا","غير","ـ","ف","فان","فلان","فو","فى","في","فيم","فيما","فيه","فيها","قال","قام","قبل","قد","قطّ","قلما","قوة","كأنّما","كأين","كأيّ","كأيّن","كاد","كان","كانت","كذا","كذلك","كرب","كل","كلا","كلاهما","كلتا","كلم","كليكما","كليهما","كلّما","كلَّا","كم","كما","كي","كيت","كيف","كيفما","كَأَنَّ","كِخ","لئن","لا","لات","لاسيما","لدن","لدى","لعمر","لقاء","لك","لكم","لكما","لكن","لكنَّما","لكي","لكيلا","للامم","لم","لما","لمّا","لن","لنا","له","لها","لو","لوكالة","لولا","لوما","لي","لَسْتَ","لَسْتُ","لَسْتُم","لَسْتُمَا","لَسْتُنَّ","لَسْتِ","لَسْنَ","لَعَلَّ","لَكِنَّ","لَيْتَ","لَيْسَ","لَيْسَا","لَيْسَتَا","لَيْسَتْ","لَيْسُوا","لَِسْنَا","ما","ماانفك","مابرح","مادام","ماذا","مازال","مافتئ","مايو","متى","مثل","مذ","مساء","مع","معاذ","مقابل","مكانكم","مكانكما","مكانكنّ","مكانَك","مليار","مليون","مما","ممن","من","منذ","منها","مه","مهما","مَنْ","مِن","نحن","نحو","نعم","نفس","نفسه","نهاية","نَخْ","نِعِمّا","نِعْمَ","ها","هاؤم","هاكَ","هاهنا","هبّ","هذا","هذه","هكذا","هل","هلمَّ","هلّا","هم","هما","هن","هنا","هناك","هنالك","هو","هي","هيا","هيت","هيّا","هَؤلاء","هَاتانِ","هَاتَيْنِ","هَاتِه","هَاتِي","هَجْ","هَذا","هَذانِ","هَذَيْنِ","هَذِه","هَذِي","هَيْهَاتَ","و","و6","وا","واحد","واضاف","واضافت","واكد","وان","واهاً","واوضح","وراءَك","وفي","وقال","وقالت","وقد","وقف","وكان","وكانت","ولا","ولم","ومن","وهو","وهي","ويكأنّ","وَيْ","وُشْكَانََ","يكون","يمكن","يوم","ّأيّان"] 29 | 30 | // tell the world about the noise words. 31 | exports.words = words 32 | -------------------------------------------------------------------------------- /lib/stopwords_bg.js: -------------------------------------------------------------------------------- 1 | var words = ["а","автентичен","аз","ако","ала","бе","без","беше","би","бивш","бивша", 2 | "бившо","бил","била","били","било","благодаря","близо","бъдат","бъде","бяха","в","вас", 3 | "ваш","ваша","вероятно","вече","взема","ви","вие","винаги","внимава","време","все","всеки", 4 | "всички","всичко","всяка","във","въпреки","върху","г","ги","главен","главна","главно","глас", 5 | "го","година","години","годишен","д","да","дали","два","двама","двамата","две","двете","ден", 6 | "днес","дни","до","добра","добре","добро","добър","докато","докога","дори","досега","доста", 7 | "друг","друга","други","е","евтин","едва","един","една","еднаква","еднакви","еднакъв","едно", 8 | "екип","ето","живот","за","забавям","зад","заедно","заради","засега","заспал","затова","защо", 9 | "защото","и","из","или","им","има","имат","иска","й","каза","как","каква","какво","както","какъв", 10 | "като","кога","когато","което","които","кой","който","колко","която","къде","където","към","лесен", 11 | "лесно","ли","лош","м","май","малко","ме","между","мек","мен","месец","ми","много","мнозина","мога", 12 | "могат","може","мокър","моля","момента","му","н","на","над","назад","най","направи","напред","например", 13 | "нас","не","него","нещо","нея","ни","ние","никой","нито","нищо","но","нов","нова","нови","новина","някои", 14 | "някой","няколко","няма","обаче","около","освен","особено","от","отгоре","отново","още","пак","по", 15 | "повече","повечето","под","поне","поради","после","почти","прави","пред","преди","през","при","пък", 16 | "първата","първи","първо","пъти","равен","равна","с","са","сам","само","се","сега","си","син","скоро", 17 | "след","следващ","сме","смях","според","сред","срещу","сте","съм","със","също","т","т.н.","тази","така", 18 | "такива","такъв","там","твой","те","тези","ти","то","това","тогава","този","той","толкова","точно","три", 19 | "трябва","тук","тъй","тя","тях","у","утре","харесва","хиляди","ч","часа","че","често","чрез","ще","щом", 20 | "юмрук","я","як"] 21 | 22 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_bn.js: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Gene Diaz 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | // a list of commonly used words that have little meaning and can be excluded 26 | // from analysis. 27 | var words = ["অতএব","অথচ","অথবা","অনুযায়ী","অনেক","অনেকে","অনেকেই","অন্তত","অন্য","অবধি","অবশ্য","অর্থাত","আই","আগামী","আগে","আগেই","আছে","আজ","আদ্যভাগে","আপনার","আপনি","আবার","আমরা","আমাকে","আমাদের","আমার","আমি","আর","আরও","ই","ইত্যাদি","ইহা","উচিত","উত্তর","উনি","উপর","উপরে","এ","এঁদের","এঁরা","এই","একই","একটি","একবার","একে","এক্","এখন","এখনও","এখানে","এখানেই","এটা","এটাই","এটি","এত","এতটাই","এতে","এদের","এব","এবং","এবার","এমন","এমনকী","এমনি","এর","এরা","এল","এস","এসে","ঐ","ও","ওঁদের","ওঁর","ওঁরা","ওই","ওকে","ওখানে","ওদের","ওর","ওরা","কখনও","কত","কবে","কমনে","কয়েক","কয়েকটি","করছে","করছেন","করতে","করবে","করবেন","করলে","করলেন","করা","করাই","করায়","করার","করি","করিতে","করিয়া","করিয়ে","করে","করেই","করেছিলেন","করেছে","করেছেন","করেন","কাউকে","কাছ","কাছে","কাজ","কাজে","কারও","কারণ","কি","কিংবা","কিছু","কিছুই","কিন্তু","কী","কে","কেউ","কেউই","কেখা","কেন","কোটি","কোন","কোনও","কোনো","ক্ষেত্রে","কয়েক","খুব","গিয়ে","গিয়েছে","গিয়ে","গুলি","গেছে","গেল","গেলে","গোটা","চলে","চান","চায়","চার","চালু","চেয়ে","চেষ্টা","ছাড়া","ছাড়াও","ছিল","ছিলেন","জন","জনকে","জনের","জন্য","জন্যওজে","জানতে","জানা","জানানো","জানায়","জানিয়ে","জানিয়েছে","জে","জ্নজন","টি","ঠিক","তখন","তত","তথা","তবু","তবে","তা","তাঁকে","তাঁদের","তাঁর","তাঁরা","তাঁাহারা","তাই","তাও","তাকে","তাতে","তাদের","তার","তারপর","তারা","তারৈ","তাহলে","তাহা","তাহাতে","তাহার","তিনঐ","তিনি","তিনিও","তুমি","তুলে","তেমন","তো","তোমার","থাকবে","থাকবেন","থাকা","থাকায়","থাকে","থাকেন","থেকে","থেকেই","থেকেও","দিকে","দিতে","দিন","দিয়ে","দিয়েছে","দিয়েছেন","দিলেন","দু","দুই","দুটি","দুটো","দেওয়া","দেওয়ার","দেওয়া","দেখতে","দেখা","দেখে","দেন","দেয়","দ্বারা","ধরা","ধরে","ধামার","নতুন","নয়","না","নাই","নাকি","নাগাদ","নানা","নিজে","নিজেই","নিজেদের","নিজের","নিতে","নিয়ে","নিয়ে","নেই","নেওয়া","নেওয়ার","নেওয়া","নয়","পক্ষে","পর","পরে","পরেই","পরেও","পর্যন্ত","পাওয়া","পাচ","পারি","পারে","পারেন","পি","পেয়ে","পেয়্র্","প্রতি","প্রথম","প্রভৃতি","প্রযন্ত","প্রাথমিক","প্রায়","প্রায়","ফলে","ফিরে","ফের","বক্তব্য","বদলে","বন","বরং","বলতে","বলল","বললেন","বলা","বলে","বলেছেন","বলেন","বসে","বহু","বা","বাদে","বার","বি","বিনা","বিভিন্ন","বিশেষ","বিষয়টি","বেশ","বেশি","ব্যবহার","ব্যাপারে","ভাবে","ভাবেই","মতো","মতোই","মধ্যভাগে","মধ্যে","মধ্যেই","মধ্যেও","মনে","মাত্র","মাধ্যমে","মোট","মোটেই","যখন","যত","যতটা","যথেষ্ট","যদি","যদিও","যা","যাঁর","যাঁরা","যাওয়া","যাওয়ার","যাওয়া","যাকে","যাচ্ছে","যাতে","যাদের","যান","যাবে","যায়","যার","যারা","যিনি","যে","যেখানে","যেতে","যেন","যেমন","র","রকম","রয়েছে","রাখা","রেখে","লক্ষ","শুধু","শুরু","সঙ্গে","সঙ্গেও","সব","সবার","সমস্ত","সম্প্রতি","সহ","সহিত","সাধারণ","সামনে","সি","সুতরাং","সে","সেই","সেখান","সেখানে","সেটা","সেটাই","সেটাও","সেটি","স্পষ্ট","স্বয়ং","হইতে","হইবে","হইয়া","হওয়া","হওয়ায়","হওয়ার","হচ্ছে","হত","হতে","হতেই","হন","হবে","হবেন","হয়","হয়তো","হয়নি","হয়ে","হয়েই","হয়েছিল","হয়েছে","হয়েছেন","হল","হলে","হলেই","হলেও","হলো","হাজার","হিসাবে","হৈলে","হোক","হয়"] 28 | // tell the world about the noise words. 29 | exports.words = words 30 | -------------------------------------------------------------------------------- /lib/stopwords_br.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017, Micael Levi 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | // a list of commonly used words that have little meaning and can be excluded 24 | // from analysis. 25 | var words = [ 'a', 'agora', 'ainda', 'alguém', 'algum', 'alguma', 'algumas', 'alguns', 'ampla', 'amplas', 'amplo', 26 | 'amplos', 'ante', 'antes', 'ao', 'aos', 'após', 'aquela', 'aquelas', 'aquele', 'aqueles', 'aquilo', 'as', 'até', 27 | 'através', 'cada', 'coisa', 'coisas', 'com', 'como', 'contra', 'contudo', 'da', 'daquele', 'daqueles', 'das', 'de', 28 | 'dela', 'delas', 'dele', 'deles', 'depois', 'dessa', 'dessas', 'desse', 'desses', 'desta', 'destas', 'deste', 'deste', 29 | 'destes', 'deve', 'devem', 'devendo', 'dever', 'deverá', 'deverão', 'deveria', 'deveriam', 'devia', 'deviam', 'disse', 30 | 'disso', 'disto', 'dito', 'diz', 'dizem', 'do', 'dos', 'e', 'é', 'ela', 'elas', 'ele', 'eles', 'em', 'enquanto', 31 | 'entre', 'era', 'essa', 'essas', 'esse', 'esses', 'esta', 'está', 'estamos', 'estão', 'estas', 'estava', 'estavam', 32 | 'estávamos', 'este', 'estes', 'estou', 'eu', 'fazendo', 'fazer', 'feita', 'feitas', 'feito', 'feitos', 'foi', 'for', 33 | 'foram', 'fosse', 'fossem', 'grande', 'grandes', 'há', 'isso', 'isto', 'já', 'la', 'lá', 'lhe', 'lhes', 'lo', 'mas', 34 | 'me', 'mesma', 'mesmas', 'mesmo', 'mesmos', 'meu', 'meus', 'minha', 'minhas', 'muita', 'muitas', 'muito', 'muitos', 35 | 'na', 'não', 'nas', 'nem', 'nenhum', 'nessa', 'nessas', 'nesta', 'nestas', 'ninguém', 'no', 'nos', 'nós', 'nossa', 36 | 'nossas', 'nosso', 'nossos', 'num', 'numa', 'nunca', 'o', 'os', 'ou', 'outra', 'outras', 'outro', 'outros', 'para', 37 | 'pela', 'pelas', 'pelo', 'pelos', 'pequena', 'pequenas', 'pequeno', 'pequenos', 'per', 'perante', 'pode', 'pude', 38 | 'podendo', 'poder', 'poderia', 'poderiam', 'podia', 'podiam', 'pois', 'por', 'porém', 'porque', 'posso', 'pouca', 39 | 'poucas', 'pouco', 'poucos', 'primeiro', 'primeiros', 'própria', 'próprias', 'próprio', 'próprios', 'quais', 40 | 'qual', 'quando', 'quanto', 'quantos', 'que', 'quem', 'são', 'se', 'seja', 'sejam', 'sem', 'sempre', 'sendo', 41 | 'será', 'serão', 'seu', 'seus', 'si', 'sido', 'só', 'sob', 'sobre', 'sua', 'suas', 'talvez', 'também', 'tampouco', 42 | 'te', 'tem', 'tendo', 'tenha', 'ter', 'teu', 'teus', 'ti', 'tido', 'tinha', 'tinham', 'toda', 'todas', 'todavia', 43 | 'todo', 'todos', 'tu', 'tua', 'tuas', 'tudo', 'última', 'últimas', 'último', 'últimos', 'um', 'uma', 'umas', 44 | 'uns', 'vendo', 'ver', 'vez', 'vindo', 'vir', 'vos', 'vós' ] 45 | 46 | // tell the world about the noise words. 47 | exports.words = words 48 | -------------------------------------------------------------------------------- /lib/stopwords_ca.js: -------------------------------------------------------------------------------- 1 | var words = ["a","abans","ací","ah","així","això","al","aleshores","algun","alguna","algunes","alguns","alhora", 2 | "allà","allí","allò","als","altra","altre","altres","amb","ambdues","ambdós","apa","aquell","aquella","aquelles", 3 | "aquells","aquest","aquesta","aquestes","aquests","aquí","baix","cada","cadascuna","cadascunes","cadascuns", 4 | "cadascú","com","contra","d'un","d'una","d'unes","d'uns","dalt","de","del","dels","des","després","dins","dintre", 5 | "donat","doncs","durant","e","eh","el","els","em","en","encara","ens","entre","eren","es","esta","estaven","esteu", 6 | "està","estàvem","estàveu","et","etc","ets","fins","fora","gairebé","ha","han","has","havia","he","hem","heu","hi", 7 | "ho","i","igual","iguals","ja","l'hi","la","les","li","li'n","llavors","m'he","ma","mal","malgrat","mateix","mateixa", 8 | "mateixes","mateixos","me","mentre","meu","meus","meva","meves","molt","molta","moltes","molts","mon","mons","més", 9 | "n'he","n'hi","ne","ni","no","nogensmenys","només","nosaltres","nostra","nostre","nostres","o","oh","oi","on","pas", 10 | "pel","pels","per","perquè","però","poc","poca","pocs","poques","potser","propi","qual","quals","quan","quant","que", 11 | "quelcom","qui","quin","quina","quines","quins","què","s'ha","s'han","sa","semblant","semblants","ses","seu","seus", 12 | "seva","seves","si","sobre","sobretot","solament","sols","son","sons","sota","sou","sóc","són","t'ha","t'han","t'he", 13 | "ta","tal","també","tampoc","tan","tant","tanta","tantes","teu","teus","teva","teves","ton","tons","tot","tota","totes", 14 | "tots","un","una","unes","uns","us","va","vaig","vam","van","vas","veu","vosaltres","vostra","vostre","vostres","érem", 15 | "éreu","és"] 16 | 17 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_cs.js: -------------------------------------------------------------------------------- 1 | var words = ["a","aby","ahoj","aj","ale","anebo","ani","ano","asi","aspoň","atd","atp","ačkoli","až","bez","beze","blízko", 2 | "bohužel","brzo","bude","budem","budeme","budete","budeš","budou","budu","by","byl","byla","byli","bylo","byly","bys","být", 3 | "během","chce","chceme","chcete","chceš","chci","chtít","chtějí","chut'","chuti","co","což","cz","daleko","další","den", 4 | "deset","devatenáct","devět","dnes","do","dobrý","docela","dva","dvacet","dvanáct","dvě","dál","dále","děkovat","děkujeme", 5 | "děkuji","ho","hodně","i","jak","jakmile","jako","jakož","jde","je","jeden","jedenáct","jedna","jedno","jednou","jedou", 6 | "jeho","jehož","jej","jejich","její","jelikož","jemu","jen","jenom","jestli","jestliže","ještě","jež","ji","jich","jimi", 7 | "jinak","jiné","již","jsem","jseš","jsi","jsme","jsou","jste","já","jí","jím","jíž","k","kam","kde","kdo","kdy","když","ke", 8 | "kolik","kromě","kterou","která","které","který","kteří","kvůli","mají","mezi","mi","mne","mnou","mně","moc","mohl","mohou", 9 | "moje","moji","možná","musí","my","má","málo","mám","máme","máte","máš","mé","mí","mít","mě","můj","může","na","nad","nade", 10 | "napište","naproti","načež","naše","naši","ne","nebo","nebyl","nebyla","nebyli","nebyly","nedělají","nedělá","nedělám","neděláme", 11 | "neděláte","neděláš","neg","nejsi","nejsou","nemají","nemáme","nemáte","neměl","není","nestačí","nevadí","než","nic","nich", 12 | "nimi","nové","nový","nula","nám","námi","nás","náš","ním","ně","něco","nějak","někde","někdo","němu","němuž","o","od","ode", 13 | "on","ona","oni","ono","ony","osm","osmnáct","pak","patnáct","po","pod","podle","pokud","potom","pouze","pozdě","pořád", 14 | "pravé","pro","prostě","prosím","proti","proto","protože","proč","první","pta","pět","před","přes","přese","při","přičemž", 15 | "re","rovně","s","se","sedm","sedmnáct","si","skoro","smí","smějí","snad","spolu","sta","sto","strana","sté","své","svých", 16 | "svým","svými","ta","tady","tak","takhle","taky","také","takže","tam","tamhle","tamhleto","tamto","tato","tebe","tebou", 17 | "ted'","tedy","ten","tento","teto","ti","tipy","tisíc","tisíce","to","tobě","tohle","toho","tohoto","tom","tomto","tomu", 18 | "tomuto","toto","trošku","tu","tuto","tvoje","tvá","tvé","tvůj","ty","tyto","téma","tím","tímto","tě","těm","těmu","třeba", 19 | "tři","třináct","u","určitě","už","v","vaše","vaši","ve","vedle","večer","vlastně","vy","vám","vámi","vás","váš","více", 20 | "však","všechno","všichni","vůbec","vždy","z","za","zatímco","zač","zda","zde","ze","zprávy","zpět","čau","či","článku", 21 | "články","čtrnáct","čtyři","šest","šestnáct","že"] 22 | 23 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_da.js: -------------------------------------------------------------------------------- 1 | /* 2 | Creative Commons – Attribution / ShareAlike 3.0 license 3 | http://creativecommons.org/licenses/by-sa/3.0/ 4 | 5 | List based on frequently used words in subtitles in 2012. 6 | 7 | Thanks to 8 | opensubtitles.org 9 | https://invokeit.wordpress.com/frequency-word-lists/#comment-9707 10 | */ 11 | 12 | // a list of commonly used words that have little meaning and can be excluded 13 | // from analysis. 14 | var words = [ 15 | 'er', 'jeg', 'det', 'du', 'ikke', 'i', 'at', 'en', 'og', 'har', 16 | 'vi', 'til', 'på', 'hvad', 'med', 'mig', 'så', 'for', 'de', 'dig', 17 | 'der', 'den', 'han', 'kan', 'af', 'vil', 'var', 'her', 'et', 'skal', 18 | 'ved', 'nu', 'men', 'om', 'ja', 'som', 'nej', 'min', 'noget', 'ham', 19 | 'hun', 'bare', 'kom', 'være', 'din', 'hvor', 'dem', 'ud', 'os', 'hvis', 20 | 'må', 'se', 'godt', 'have', 'fra', 'ville', 'okay', 'lige', 'op', 'alle', 21 | 'lad', 'hvorfor', 'sig', 'hvordan', 'få', 'kunne', 'eller', 'hvem', 'man', 'bliver', 22 | 'havde', 'da', 'ingen', 'efter', 'når', 'alt', 'jo', 'to', 'mit', 'ind', 23 | 'hej', 'aldrig', 'lidt', 'nogen', 'over', 'også', 'mand', 'far', 'skulle', 'selv', 24 | 'får', 'hans', 'ser', 'vores', 'jer', 'sådan', 'dit', 'kun', 'deres', 'ned', 25 | 'mine', 'komme', 'tage', 'denne', 'sige', 'dette', 'blive', 'helt', 'fordi', 26 | 'end', 'tag', 'før', 'fik', 'dine', 27 | '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '_'] 28 | 29 | // tell the world about the noise words. 30 | exports.words = words 31 | -------------------------------------------------------------------------------- /lib/stopwords_de.js: -------------------------------------------------------------------------------- 1 | /* 2 | */ 3 | 4 | // a list of commonly used words that have little meaning and can be excluded 5 | // from analysis. 6 | var words = [ 7 | 'a', 'ab', 'aber', 'ach', 'acht', 'achte', 'achten', 'achter', 'achtes', 'ag', 'alle', 'allein', 'allem', 'allen', 'aller', 'allerdings', 'alles', 'allgemeinen', 'als', 'also', 'am', 'an', 'ander', 'andere', 'anderem', 'anderen', 'anderer', 'anderes', 'anderm', 'andern', 'anderr', 'anders', 'au', 'auch', 'auf', 'aus', 'ausser', 'ausserdem', 'außer', 'außerdem', 'b', 'bald', 'bei', 'beide', 'beiden', 'beim', 'beispiel', 'bekannt', 'bereits', 'besonders', 'besser', 'besten', 'bin', 'bis', 'bisher', 'bist', 'c', 'd', 'd.h', 'da', 'dabei', 'dadurch', 'dafür', 'dagegen', 'daher', 'dahin', 'dahinter', 'damals', 'damit', 'danach', 'daneben', 'dank', 'dann', 'daran', 'darauf', 'daraus', 'darf', 'darfst', 'darin', 'darum', 'darunter', 'darüber', 'das', 'dasein', 'daselbst', 'dass', 'dasselbe', 'davon', 'davor', 'dazu', 'dazwischen', 'daß', 'dein', 'deine', 'deinem', 'deinen', 'deiner', 'deines', 'dem', 'dementsprechend', 'demgegenüber', 'demgemäss', 'demgemäß', 'demselben', 'demzufolge', 'den', 'denen', 'denn', 'denselben', 'der', 'deren', 'derer', 'derjenige', 'derjenigen', 'dermassen', 'dermaßen', 'derselbe', 'derselben', 'des', 'deshalb', 'desselben', 'dessen', 'deswegen', 'dich', 'die', 'diejenige', 'diejenigen', 'dies', 'diese', 'dieselbe', 'dieselben', 'diesem', 'diesen', 'dieser', 'dieses', 'dir', 'doch', 'dort', 'drei', 'drin', 'dritte', 'dritten', 'dritter', 'drittes', 'du', 'durch', 'durchaus', 'durfte', 'durften', 'dürfen', 'dürft', 'e', 'eben', 'ebenso', 'ehrlich', 'ei', 'ei, ', 'eigen', 'eigene', 'eigenen', 'eigener', 'eigenes', 'ein', 'einander', 'eine', 'einem', 'einen', 'einer', 'eines', 'einig', 'einige', 'einigem', 'einigen', 'einiger', 'einiges', 'einmal', 'eins', 'elf', 'en', 'ende', 'endlich', 'entweder', 'er', 'ernst', 'erst', 'erste', 'ersten', 'erster', 'erstes', 'es', 'etwa', 'etwas', 'euch', 'euer', 'eure', 'eurem', 'euren', 'eurer', 'eures', 'f', 'folgende', 'früher', 'fünf', 'fünfte', 'fünften', 'fünfter', 'fünftes', 'für', 'g', 'gab', 'ganz', 'ganze', 'ganzen', 'ganzer', 'ganzes', 'gar', 'gedurft', 'gegen', 'gegenüber', 'gehabt', 'gehen', 'geht', 'gekannt', 'gekonnt', 'gemacht', 'gemocht', 'gemusst', 'genug', 'gerade', 'gern', 'gesagt', 'geschweige', 'gewesen', 'gewollt', 'geworden', 'gibt', 'ging', 'gleich', 'gott', 'gross', 'grosse', 'grossen', 'grosser', 'grosses', 'groß', 'große', 'großen', 'großer', 'großes', 'gut', 'gute', 'guter', 'gutes', 'h', 'hab', 'habe', 'haben', 'habt', 'hast', 'hat', 'hatte', 'hatten', 'hattest', 'hattet', 'heisst', 'her', 'heute', 'hier', 'hin', 'hinter', 'hoch', 'hätte', 'hätten', 'i', 'ich', 'ihm', 'ihn', 'ihnen', 'ihr', 'ihre', 'ihrem', 'ihren', 'ihrer', 'ihres', 'im', 'immer', 'in', 'indem', 'infolgedessen', 'ins', 'irgend', 'ist', 'j', 'ja', 'jahr', 'jahre', 'jahren', 'je', 'jede', 'jedem', 'jeden', 'jeder', 'jedermann', 'jedermanns', 'jedes', 'jedoch', 'jemand', 'jemandem', 'jemanden', 'jene', 'jenem', 'jenen', 'jener', 'jenes', 'jetzt', 'k', 'kam', 'kann', 'kannst', 'kaum', 'kein', 'keine', 'keinem', 'keinen', 'keiner', 'keines', 'kleine', 'kleinen', 'kleiner', 'kleines', 'kommen', 'kommt', 'konnte', 'konnten', 'kurz', 'können', 'könnt', 'könnte', 'l', 'lang', 'lange', 'leicht', 'leide', 'lieber', 'los', 'm', 'machen', 'macht', 'machte', 'mag', 'magst', 'mahn', 'mal', 'man', 'manche', 'manchem', 'manchen', 'mancher', 'manches', 'mann', 'mehr', 'mein', 'meine', 'meinem', 'meinen', 'meiner', 'meines', 'mensch', 'menschen', 'mich', 'mir', 'mit', 'mittel', 'mochte', 'mochten', 'morgen', 'muss', 'musst', 'musste', 'mussten', 'muß', 'mußt', 'möchte', 'mögen', 'möglich', 'mögt', 'müssen', 'müsst', 'müßt', 'n', 'na', 'nach', 'nachdem', 'nahm', 'natürlich', 'neben', 'nein', 'neue', 'neuen', 'neun', 'neunte', 'neunten', 'neunter', 'neuntes', 'nicht', 'nichts', 'nie', 'niemand', 'niemandem', 'niemanden', 'noch', 'nun', 'nur', 'o', 'ob', 'oben', 'oder', 'offen', 'oft', 'ohne', 'ordnung', 'p', 'q', 'r', 'recht', 'rechte', 'rechten', 'rechter', 'rechtes', 'richtig', 'rund', 's', 'sa', 'sache', 'sagt', 'sagte', 'sah', 'satt', 'schlecht', 'schluss', 'schon', 'sechs', 'sechste', 'sechsten', 'sechster', 'sechstes', 'sehr', 'sei', 'seid', 'seien', 'sein', 'seine', 'seinem', 'seinen', 'seiner', 'seines', 'seit', 'seitdem', 'selbst', 'sich', 'sie', 'sieben', 'siebente', 'siebenten', 'siebenter', 'siebentes', 'sind', 'so', 'solang', 'solche', 'solchem', 'solchen', 'solcher', 'solches', 'soll', 'sollen', 'sollst', 'sollt', 'sollte', 'sollten', 'sondern', 'sonst', 'soweit', 'sowie', 'später', 'startseite', 'statt', 'steht', 'suche', 't', 'tag', 'tage', 'tagen', 'tat', 'teil', 'tel', 'tritt', 'trotzdem', 'tun', 'u', 'uhr', 'um', 'und', 'und?', 'uns', 'unse', 'unsem', 'unsen', 'unser', 'unsere', 'unserer', 'unses', 'unter', 'v', 'vergangenen', 'viel', 'viele', 'vielem', 'vielen', 'vielleicht', 'vier', 'vierte', 'vierten', 'vierter', 'viertes', 'vom', 'von', 'vor', 'w', 'wahr?', 'wann', 'war', 'waren', 'warst', 'wart', 'warum', 'was', 'weg', 'wegen', 'weil', 'weit', 'weiter', 'weitere', 'weiteren', 'weiteres', 'welche', 'welchem', 'welchen', 'welcher', 'welches', 'wem', 'wen', 'wenig', 'wenige', 'weniger', 'weniges', 'wenigstens', 'wenn', 'wer', 'werde', 'werden', 'werdet', 'weshalb', 'wessen', 'wie', 'wieder', 'wieso', 'will', 'willst', 'wir', 'wird', 'wirklich', 'wirst', 'wissen', 'wo', 'woher', 'wohin', 'wohl', 'wollen', 'wollt', 'wollte', 'wollten', 'worden', 'wurde', 'wurden', 'während', 'währenddem', 'währenddessen', 'wäre', 'würde', 'würden', 'x', 'y', 'z', 'z.b', 'zehn', 'zehnte', 'zehnten', 'zehnter', 'zehntes', 'zeit', 'zu', 'zuerst', 'zugleich', 'zum', 'zunächst', 'zur', 'zurück', 'zusammen', 'zwanzig', 'zwar', 'zwei', 'zweite', 'zweiten', 'zweiter', 'zweites', 'zwischen', 'zwölf', 'über', 'überhaupt', 'übrigens' 8 | ] 9 | 10 | // tell the world about the noise words. 11 | exports.words = words 12 | -------------------------------------------------------------------------------- /lib/stopwords_el.js: -------------------------------------------------------------------------------- 1 | var words = ["αλλα","αν","αντι","απο","αυτα","αυτεσ","αυτη","αυτο","αυτοι","αυτοσ","αυτουσ","αυτων","για","δε", 2 | "δεν","εαν","ειμαι","ειμαστε","ειναι","εισαι","ειστε","εκεινα","εκεινεσ","εκεινη","εκεινο","εκεινοι","εκεινοσ", 3 | "εκεινουσ","εκεινων","ενω","επι","η","θα","ισωσ","κ","και","κατα","κι","μα","με","μετα","μη","μην","να","ο","οι", 4 | "ομωσ","οπωσ","οσο","οτι","παρα","ποια","ποιεσ","ποιο","ποιοι","ποιοσ","ποιουσ","ποιων","που","προσ","πωσ","σε", 5 | "στη","στην","στο","στον","τα","την","τησ","το","τον","τοτε","του","των","ωσ"] 6 | 7 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_en.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2011, Chris Umbel 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | // a list of commonly used words that have little meaning and can be excluded 24 | // from analysis. 25 | 26 | var words = ["a","as","able","about","above","according","accordingly","across","actually","after","afterwards","again","against","aint", 27 | "all","allow","allows","almost","alone","along","already","also","although","always","am","among","amongst","an","and","another","any", 28 | "anybody","anyhow","anyone","anything","anyway","anyways","anywhere","apart","appear","appreciate","appropriate","are","arent","around", 29 | "as","aside","ask","asking","associated","at","available","away","awfully","b","be","became","because","become","becomes","becoming", 30 | "been","before","beforehand","behind","being","believe","below","beside","besides","best","better","between","beyond","both","brief", 31 | "but","by","c","cmon","cs","came","can","cant","cannot","cant","cause","causes","certain","certainly","changes","clearly","co","com", 32 | "come","comes","concerning","consequently","consider","considering","contain","containing","contains","corresponding","could","couldnt", 33 | "course","currently","d","definitely","described","despite","did","didnt","different","do","does","doesnt","doing","dont","done","down", 34 | "downwards","during","e","each","edu","eg","eight","either","else","elsewhere","enough","entirely","especially","et","etc","even","ever", 35 | "every","everybody","everyone","everything","everywhere","ex","exactly","example","except","f","far","few","fifth","first","five","followed", 36 | "following","follows","for","former","formerly","forth","four","from","further","furthermore","g","get","gets","getting","given","gives", 37 | "go","goes","going","gone","got","gotten","greetings","h","had","hadnt","happens","hardly","has","hasnt","have","havent","having","he", 38 | "hes","hello","help","hence","her","here","heres","hereafter","hereby","herein","hereupon","hers","herself","hi","him","himself","his", 39 | "hither","hopefully","how","howbeit","however","i","id","ill","im","ive","ie","if","ignored","immediate","in","inasmuch","inc","indeed", 40 | "indicate","indicated","indicates","inner","insofar","instead","into","inward","is","isnt","it","itd","itll","its","its","itself","j", 41 | "just","k","keep","keeps","kept","know","known","knows","l","last","lately","later","latter","latterly","least","less","lest","let","lets", 42 | "like","liked","likely","little","look","looking","looks","ltd","m","mainly","many","may","maybe","me","mean","meanwhile","merely","might", 43 | "more","moreover","most","mostly","much","must","my","myself","n","name","namely","nd","near","nearly","necessary","need","needs","neither", 44 | "never","nevertheless","new","next","nine","no","nobody","non","none","noone","nor","normally","not","nothing","novel","now","nowhere","o", 45 | "obviously","of","off","often","oh","ok","okay","old","on","once","one","ones","only","onto","or","other","others","otherwise","ought","our", 46 | "ours","ourselves","out","outside","over","overall","own","p","particular","particularly","per","perhaps","placed","please","plus","possible", 47 | "presumably","probably","provides","q","que","quite","qv","r","rather","rd","re","really","reasonably","regarding","regardless","regards", 48 | "relatively","respectively","right","s","said","same","saw","say","saying","says","second","secondly","see","seeing","seem","seemed","seeming", 49 | "seems","seen","self","selves","sensible","sent","serious","seriously","seven","several","shall","she","should","shouldnt","since","six","so", 50 | "some","somebody","somehow","someone","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specified","specify","specifying", 51 | "still","sub","such","sup","sure","t","ts","take","taken","tell","tends","th","than","thank","thanks","thanx","that","thats","thats","the", 52 | "their","theirs","them","themselves","then","thence","there","theres","thereafter","thereby","therefore","therein","theres","thereupon", 53 | "these","they","theyd","theyll","theyre","theyve","think","third","this","thorough","thoroughly","those","though","three","through", 54 | "throughout","thru","thus","to","together","too","took","toward","towards","tried","tries","truly","try","trying","twice","two","u","un", 55 | "under","unfortunately","unless","unlikely","until","unto","up","upon","us","use","used","useful","uses","using","usually","uucp","v", 56 | "value","various","very","via","viz","vs","w","want","wants","was","wasnt","way","we","wed","well","were","weve","welcome","well", 57 | "went","were","werent","what","whats","whatever","when","whence","whenever","where","wheres","whereafter","whereas","whereby","wherein", 58 | "whereupon","wherever","whether","which","while","whither","who","whos","whoever","whole","whom","whose","why","will","willing","wish", 59 | "with","within","without","wont","wonder","would","wouldnt","x","y","yes","yet","you","youd","youll","youre","youve","your","yours", 60 | "yourself","yourselves","youll","z","zero"] 61 | 62 | // tell the world about the noise words. 63 | exports.words = words 64 | -------------------------------------------------------------------------------- /lib/stopwords_eo.js: -------------------------------------------------------------------------------- 1 | var words = ["adiaŭ","ajn","al","ankoraŭ","antaŭ","aŭ","bonan","bonvole","bonvolu","bv","ci","cia","cian","cin","d-ro", 2 | "da","de","dek","deka","do","doktor'","doktoro","du","dua","dum","eble","ekz","ekzemple","en","estas","estis","estos", 3 | "estu","estus","eĉ","f-no","feliĉan","for","fraŭlino","ha","havas","havis","havos","havu","havus","he","ho","hu","ili", 4 | "ilia","ilian","ilin","inter","io","ion","iu","iujn","iun","ja","jam","je","jes","k","kaj","ke","kio","kion","kiu", 5 | "kiujn","kiun","kvankam","kvar","kvara","kvazaŭ","kvin","kvina","la","li","lia","lian","lin","malantaŭ","male","malgraŭ", 6 | "mem","mi","mia","mian","min","minus","naŭ","naŭa","ne","nek","nenio","nenion","neniu","neniun","nepre","ni","nia","nian", 7 | "nin","nu","nun","nur","ok","oka","oni","onia","onian","onin","plej","pli","plu","plus","por","post","preter","s-no","s-ro", 8 | "se","sed","sep","sepa","ses","sesa","si","sia","sian","sin","sinjor'","sinjorino","sinjoro","sub","super","supren","sur", 9 | "tamen","tio","tion","tiu","tiujn","tiun","tra","tri","tria","tuj","tute","unu","unua","ve","verŝajne","vi","via","vian", 10 | "vin","ĉi","ĉio","ĉion","ĉiu","ĉiujn","ĉiun","ĉu","ĝi","ĝia","ĝian","ĝin","ĝis","ĵus","ŝi","ŝia","ŝin"] 11 | 12 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_es.js: -------------------------------------------------------------------------------- 1 | var words = ["a","actualmente","acuerdo","adelante","ademas","además","adrede","afirmó","agregó","ahi","ahora", 2 | "ahí","al","algo","alguna","algunas","alguno","algunos","algún","alli","allí","alrededor","ambos","ampleamos", 3 | "antano","antaño","ante","anterior","antes","apenas","aproximadamente","aquel","aquella","aquellas","aquello", 4 | "aquellos","aqui","aquél","aquélla","aquéllas","aquéllos","aquí","arriba","arribaabajo","aseguró","asi","así", 5 | "atras","aun","aunque","ayer","añadió","aún","b","bajo","bastante","bien","breve","buen","buena","buenas","bueno", 6 | "buenos","c","cada","casi","cerca","cierta","ciertas","cierto","ciertos","cinco","claro","comentó","como","con", 7 | "conmigo","conocer","conseguimos","conseguir","considera","consideró","consigo","consigue","consiguen","consigues", 8 | "contigo","contra","cosas","creo","cual","cuales","cualquier","cuando","cuanta","cuantas","cuanto","cuantos","cuatro", 9 | "cuenta","cuál","cuáles","cuándo","cuánta","cuántas","cuánto","cuántos","cómo","d","da","dado","dan","dar","de", 10 | "debajo","debe","deben","debido","decir","dejó","del","delante","demasiado","demás","dentro","deprisa","desde", 11 | "despacio","despues","después","detras","detrás","dia","dias","dice","dicen","dicho","dieron","diferente","diferentes", 12 | "dijeron","dijo","dio","donde","dos","durante","día","días","dónde","e","ejemplo","el","ella","ellas","ello","ellos", 13 | "embargo","empleais","emplean","emplear","empleas","empleo","en","encima","encuentra","enfrente","enseguida","entonces", 14 | "entre","era","eramos","eran","eras","eres","es","esa","esas","ese","eso","esos","esta","estaba","estaban","estado", 15 | "estados","estais","estamos","estan","estar","estará","estas","este","esto","estos","estoy","estuvo","está","están","ex", 16 | "excepto","existe","existen","explicó","expresó","f","fin","final","fue","fuera","fueron","fui","fuimos","g","general", 17 | "gran","grandes","gueno","h","ha","haber","habia","habla","hablan","habrá","había","habían","hace","haceis","hacemos", 18 | "hacen","hacer","hacerlo","haces","hacia","haciendo","hago","han","hasta","hay","haya","he","hecho","hemos","hicieron", 19 | "hizo","horas","hoy","hubo","i","igual","incluso","indicó","informo","informó","intenta","intentais","intentamos","intentan", 20 | "intentar","intentas","intento","ir","j","junto","k","l","la","lado","largo","las","le","lejos","les","llegó","lleva", 21 | "llevar","lo","los","luego","lugar","m","mal","manera","manifestó","mas","mayor","me","mediante","medio","mejor","mencionó", 22 | "menos","menudo","mi","mia","mias","mientras","mio","mios","mis","misma","mismas","mismo","mismos","modo","momento","mucha", 23 | "muchas","mucho","muchos","muy","más","mí","mía","mías","mío","míos","n","nada","nadie","ni","ninguna","ningunas","ninguno", 24 | "ningunos","ningún","no","nos","nosotras","nosotros","nuestra","nuestras","nuestro","nuestros","nueva","nuevas","nuevo", 25 | "nuevos","nunca","o","ocho","os","otra","otras","otro","otros","p","pais","para","parece","parte","partir","pasada", 26 | "pasado","paìs","peor","pero","pesar","poca","pocas","poco","pocos","podeis","podemos","poder","podria","podriais", 27 | "podriamos","podrian","podrias","podrá","podrán","podría","podrían","poner","por","porque","posible","primer","primera", 28 | "primero","primeros","principalmente","pronto","propia","propias","propio","propios","proximo","próximo","próximos","pudo", 29 | "pueda","puede","pueden","puedo","pues","q","qeu","que","quedó","queremos","quien","quienes","quiere","quiza","quizas", 30 | "quizá","quizás","quién","quiénes","qué","r","raras","realizado","realizar","realizó","repente","respecto","s","sabe", 31 | "sabeis","sabemos","saben","saber","sabes","salvo","se","sea","sean","segun","segunda","segundo","según","seis","ser", 32 | "sera","será","serán","sería","señaló","si","sido","siempre","siendo","siete","sigue","siguiente","sin","sino","sobre", 33 | "sois","sola","solamente","solas","solo","solos","somos","son","soy","soyos","su","supuesto","sus","suya","suyas","suyo", 34 | "sé","sí","sólo","t","tal","tambien","también","tampoco","tan","tanto","tarde","te","temprano","tendrá","tendrán","teneis", 35 | "tenemos","tener","tenga","tengo","tenido","tenía","tercera","ti","tiempo","tiene","tienen","toda","todas","todavia", 36 | "todavía","todo","todos","total","trabaja","trabajais","trabajamos","trabajan","trabajar","trabajas","trabajo","tras", 37 | "trata","través","tres","tu","tus","tuvo","tuya","tuyas","tuyo","tuyos","tú","u","ultimo","un","una","unas","uno","unos", 38 | "usa","usais","usamos","usan","usar","usas","uso","usted","ustedes","v","va","vais","valor","vamos","van","varias","varios", 39 | "vaya","veces","ver","verdad","verdadera","verdadero","vez","vosotras","vosotros","voy","vuestra","vuestras","vuestro", 40 | "vuestros","w","x","y","ya","yo","z","él","ésa","ésas","ése","ésos","ésta","éstas","éste","éstos","última","últimas", 41 | "último","últimos"] 42 | 43 | exports.words = words 44 | -------------------------------------------------------------------------------- /lib/stopwords_et.js: -------------------------------------------------------------------------------- 1 | var words = ["aga","ei","et","ja","jah","kas","kui","kõik","ma","me","mida","midagi","mind","minu","mis","mu","mul", 2 | "mulle","nad","nii","oled","olen","oli","oma","on","pole","sa","seda","see","selle","siin","siis","ta","te","ära"] 3 | 4 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_eu.js: -------------------------------------------------------------------------------- 1 | var words = ["al","anitz","arabera","asko","baina","bat","batean","batek","bati","batzuei","batzuek","batzuetan", 2 | "batzuk","bera","beraiek","berau","berauek","bere","berori","beroriek","beste","bezala","da","dago","dira","ditu", 3 | "du","dute","edo","egin","ere","eta","eurak","ez","gainera","gu","gutxi","guzti","haiei","haiek","haietan","hainbeste", 4 | "hala","han","handik","hango","hara","hari","hark","hartan","hau","hauei","hauek","hauetan","hemen","hemendik","hemengo", 5 | "hi","hona","honek","honela","honetan","honi","hor","hori","horiei","horiek","horietan","horko","horra","horrek","horrela", 6 | "horretan","horri","hortik","hura","izan","ni","noiz","nola","non","nondik","nongo","nor","nora","ze","zein","zen", 7 | "zenbait","zenbat","zer","zergatik","ziren","zituen","zu","zuek","zuen","zuten"] 8 | 9 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_fa.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2011, Chris Umbel 3 | Farsi Stop Words by Fardin Koochaki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | 24 | // a list of commonly used words that have little meaning and can be excluded 25 | // from analysis. 26 | var words = [ 27 | 28 | // Words 29 | "آباد","آره","آری","آمد","آمده","آن","آنان","آنجا","آنكه","آنها","آنچه","آورد", 30 | "آورده","آيد","آیا","اثرِ","از","است","استفاده","اش","اكنون","البته","البتّه","ام", 31 | "اما","امروز","امسال","اند","انکه","او","اول","اي","ايشان","ايم","اين","اينكه","اگر", 32 | "با","بار","بارة","باره","باشد","باشند","باشيم","بالا","بالایِ","بايد","بدون","بر","برابرِ", 33 | "براساس","براي","برایِ","برخوردار","برخي","برداري","بروز","بسيار","بسياري","بعد","بعری","بعضي", 34 | "بلكه","بله","بلکه","بلی","بنابراين","بندي","به","بهترين","بود","بودن","بودند","بوده","بي","بيست", 35 | "بيش","بيشتر","بيشتري","بين","بی","بیرونِ","تا","تازه","تاكنون","تان","تحت","تر","ترين","تمام", 36 | "تمامي","تنها","تواند","توانند","توسط","تولِ","تویِ","جا","جاي","جايي","جدا","جديد","جريان","جز", 37 | "جلوگيري","جلویِ","حتي","حدودِ","حق","خارجِ","خدمات","خواست","خواهد","خواهند","خواهيم","خود","خويش", 38 | "خیاه","داد","دادن","دادند","داده","دارد","دارند","داريم","داشت","داشتن","داشتند","داشته","دانست", 39 | "دانند","در","درباره","دنبالِ","ده","دهد","دهند","دو","دوم","ديده","ديروز","ديگر","ديگران","ديگري", 40 | "دیگر","را","راه","رفت","رفته","روب","روزهاي","روي","رویِ","ريزي","زياد","زير","زيرا","زیرِ","سابق", 41 | "ساخته","سازي","سراسر","سریِ","سعي","سمتِ","سوم","سوي","سویِ","سپس","شان","شايد","شد","شدن","شدند", 42 | "شده","شش","شما","شناسي","شود","شوند","صورت","ضدِّ","ضمن","طبقِ","طريق","طور","طي","عقبِ","علّتِ", 43 | "عنوانِ","غير","فقط","فكر","فوق","قابل","قبل","قصدِ","كرد","كردم","كردن","كردند","كرده","كسي","كل", 44 | "كمتر","كند","كنم","كنند","كنيد","كنيم","كه","لطفاً","ما","مان","مانند","مانندِ","مثل","مثلِ","مختلف", 45 | "مدّتی","مردم","مرسی","مقابل","من","مورد","مي","ميليارد","ميليون","مگر","ناشي","نام","نبايد","نبود", 46 | "نخست","نخستين","نخواهد","ندارد","ندارند","نداشته","نزديك","نزدِ","نزدیکِ","نشان","نشده","نظير","نكرده", 47 | "نمايد","نمي","نه","نوعي","نيز","نيست","ها","هاي","هايي","هر","هرگز","هزار","هست","هستند","هستيم","هفت", 48 | "هم","همان","همه","همواره","همين","همچنان","همچنين","همچون","همین","هنوز","هنگام","هنگامِ","هنگامی","هيچ", 49 | "هیچ","و","وسطِ","وقتي","وقتیکه","ولی","وي","وگو","يا","يابد","يك","يكديگر","يكي","ّه","پاعینِ","پس","پنج", 50 | "پيش","پیش","پیشِ","چرا","چطور","چند","چندین","چنين","چه","چهار","چون","چيزي","چگونه","چیز","چیزی","چیست", 51 | "کجا","کجاست","کدام","کس","کسی","کنارِ","که","کَی","کی","گذاري","گذاشته","گردد","گرفت","گرفته","گروهي","گفت", 52 | "گفته","گويد","گويند","گيرد","گيري","یا","یک", 53 | 54 | // Symbols 55 | '؟', '!', '٪', '.', '،', '؛', ':', ';', ',', 56 | 57 | // Numbers 58 | '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '۰' 59 | ] 60 | 61 | // tell the world about the noise words. 62 | exports.words = words 63 | -------------------------------------------------------------------------------- /lib/stopwords_fi.js: -------------------------------------------------------------------------------- 1 | var words = ["aiemmin","aika","aikaa","aikaan","aikaisemmin","aikaisin","aikajen","aikana","aikoina","aikoo", 2 | "aikovat","aina","ainakaan","ainakin","ainoa","ainoat","aiomme","aion","aiotte","aist","aivan","ajan","alas", 3 | "alemmas","alkuisin","alkuun","alla","alle","aloitamme","aloitan","aloitat","aloitatte","aloitattivat","aloitettava", 4 | "aloitettevaksi","aloitettu","aloitimme","aloitin","aloitit","aloititte","aloittaa","aloittamatta","aloitti","aloittivat", 5 | "alta","aluksi","alussa","alusta","annettavaksi","annetteva","annettu","ansiosta","antaa","antamatta","antoi","aoua", 6 | "apu","asia","asiaa","asian","asiasta","asiat","asioiden","asioihin","asioita","asti","avuksi","avulla","avun","avutta", 7 | "edelle","edelleen","edellä","edeltä","edemmäs","edes","edessä","edestä","ehkä","ei","eikä","eilen","eivät","eli", 8 | "ellei","elleivät","ellemme","ellen","ellet","ellette","emme","en","enemmän","eniten","ennen","ensi","ensimmäinen", 9 | "ensimmäiseksi","ensimmäisen","ensimmäisenä","ensimmäiset","ensimmäisiksi","ensimmäisinä","ensimmäisiä","ensimmäistä", 10 | "ensin","entinen","entisen","entisiä","entisten","entistä","enää","eri","erittäin","erityisesti","eräiden","eräs","eräät", 11 | "esi","esiin","esillä","esimerkiksi","et","eteen","etenkin","etessa","ette","ettei","että","haikki","halua","haluaa", 12 | "haluamatta","haluamme","haluan","haluat","haluatte","haluavat","halunnut","halusi","halusimme","halusin","halusit", 13 | "halusitte","halusivat","halutessa","haluton","he","hei","heidän","heihin","heille","heiltä","heissä","heistä","heitä", 14 | "helposti","heti","hetkellä","hieman","hitaasti","hoikein","huolimatta","huomenna","hyvien","hyviin","hyviksi","hyville", 15 | "hyviltä","hyvin","hyvinä","hyvissä","hyvistä","hyviä","hyvä","hyvät","hyvää","hän","häneen","hänelle","hänellä","häneltä", 16 | "hänen","hänessä","hänestä","hänet","ihan","ilman","ilmeisesti","itse","itsensä","itseään","ja","jo","johon","joiden", 17 | "joihin","joiksi","joilla","joille","joilta","joissa","joista","joita","joka","jokainen","jokin","joko","joku","jolla", 18 | "jolle","jolloin","jolta","jompikumpi","jonka","jonkin","jonne","joo","jopa","jos","joskus","jossa","josta","jota","jotain", 19 | "joten","jotenkin","jotenkuten","jotka","jotta","jouduimme","jouduin","jouduit","jouduitte","joudumme","joudun","joudutte", 20 | "joukkoon","joukossa","joukosta","joutua","joutui","joutuivat","joutumaan","joutuu","joutuvat","juuri","jälkeen","jälleen", 21 | "jää","kahdeksan","kahdeksannen","kahdella","kahdelle","kahdelta","kahden","kahdessa","kahdesta","kahta","kahteen","kai", 22 | "kaiken","kaikille","kaikilta","kaikkea","kaikki","kaikkia","kaikkiaan","kaikkialla","kaikkialle","kaikkialta","kaikkien", 23 | "kaikkin","kaksi","kannalta","kannattaa","kanssa","kanssaan","kanssamme","kanssani","kanssanne","kanssasi","kauan","kauemmas", 24 | "kaukana","kautta","kehen","keiden","keihin","keiksi","keille","keillä","keiltä","keinä","keissä","keistä","keitten","keittä", 25 | "keitä","keneen","keneksi","kenelle","kenellä","keneltä","kenen","kenenä","kenessä","kenestä","kenet","kenettä","kennessästä", 26 | "kenties","kerran","kerta","kertaa","keskellä","kesken","keskimäärin","ketkä","ketä","kiitos","kohti","koko","kokonaan","kolmas", 27 | "kolme","kolmen","kolmesti","koska","koskaan","kovin","kuin","kuinka","kuinkan","kuitenkaan","kuitenkin","kuka","kukaan","kukin", 28 | "kukka","kumpainen","kumpainenkaan","kumpi","kumpikaan","kumpikin","kun","kuten","kuuden","kuusi","kuutta","kylliksi","kyllä", 29 | "kymmenen","kyse","liian","liki","lisäksi","lisää","lla","luo","luona","lähekkäin","lähelle","lähellä","läheltä","lähemmäs", 30 | "lähes","lähinnä","lähtien","läpi","mahdollisimman","mahdollista","me","meidän","meille","meillä","melkein","melko","menee", 31 | "meneet","menemme","menen","menet","menette","menevät","meni","menimme","menin","menit","menivät","mennessä","mennyt", 32 | "menossa","mihin","mikin","miksi","mikä","mikäli","mikään","milloin","milloinkan","minne","minun","minut","minä","missä", 33 | "mistä","miten","mitä","mitään","moi","molemmat","mones","monesti","monet","moni","moniaalla","moniaalle","moniaalta", 34 | "monta","muassa","muiden","muita","muka","mukaan","mukaansa","mukana","mutta","muu","muualla","muualle","muualta", 35 | "muuanne","muulloin","muun","muut","muuta","muutama","muutaman","muuten","myöhemmin","myös","myöskin","myöskään", 36 | "myötä","ne","neljä","neljän","neljää","niiden","niin","niistä","niitä","noin","nopeammin","nopeasti","nopeiten", 37 | "nro","nuo","nyt","näiden","näin","näissä","näissähin","näissälle","näissältä","näissästä","näitä","nämä","ohi","oikea", 38 | "oikealla","oikein","ole","olemme","olen","olet","olette","oleva","olevan","olevat","oli","olimme","olin","olisi", 39 | "olisimme","olisin","olisit","olisitte","olisivat","olit","olitte","olivat","olla","olleet","olli","ollut","oma", 40 | "omaa","omaan","omaksi","omalle","omalta","oman","omassa","omat","omia","omien","omiin","omiksi","omille","omilta", 41 | "omissa","omista","on","onkin","onko","ovat","paikoittain","paitsi","pakosti","paljon","paremmin","parempi","parhaillaan", 42 | "parhaiten","perusteella","peräti","pian","pieneen","pieneksi","pienelle","pienellä","pieneltä","pienempi","pienestä", 43 | "pieni","pienin","puolesta","puolestaan","päälle","runsaasti","saakka","sadam","sama","samaa","samaan","samalla", 44 | "samallalta","samallassa","samallasta","saman","samat","samoin","sata","sataa","satojen","se","seitsemän","sekä", 45 | "sen","seuraavat","siellä","sieltä","siihen","siinä","siis","siitä","sijaan","siksi","silloin","sillä","silti", 46 | "sinne","sinua","sinulle","sinulta","sinun","sinussa","sinusta","sinut","sinä","sisäkkäin","sisällä","siten", 47 | "sitten","sitä","ssa","sta","suoraan","suuntaan","suuren","suuret","suuri","suuria","suurin","suurten","taa", 48 | "taas","taemmas","tahansa","tai","takaa","takaisin","takana","takia","tapauksessa","tarpeeksi","tavalla", 49 | "tavoitteena","te","tietysti","todella","toinen","toisaalla","toisaalle","toisaalta","toiseen","toiseksi", 50 | "toisella","toiselle","toiselta","toisemme","toisen","toisensa","toisessa","toisesta","toista","toistaiseksi", 51 | "toki","tosin","tuhannen","tuhat","tule","tulee","tulemme","tulen","tulet","tulette","tulevat","tulimme","tulin", 52 | "tulisi","tulisimme","tulisin","tulisit","tulisitte","tulisivat","tulit","tulitte","tulivat","tulla","tulleet", 53 | "tullut","tuntuu","tuo","tuolla","tuolloin","tuolta","tuonne","tuskin","tykö","tähän","tällä","tällöin","tämä", 54 | "tämän","tänne","tänä","tänään","tässä","tästä","täten","tätä","täysin","täytyvät","täytyy","täällä","täältä", 55 | "ulkopuolella","usea","useasti","useimmiten","usein","useita","uudeksi","uudelleen","uuden","uudet","uusi","uusia", 56 | "uusien","uusinta","uuteen","uutta","vaan","vahemmän","vai","vaiheessa","vaikea","vaikean","vaikeat","vaikeilla", 57 | "vaikeille","vaikeilta","vaikeissa","vaikeista","vaikka","vain","varmasti","varsin","varsinkin","varten","vasen", 58 | "vasenmalla","vasta","vastaan","vastakkain","vastan","verran","vielä","vierekkäin","vieressä","vieri","viiden", 59 | "viime","viimeinen","viimeisen","viimeksi","viisi","voi","voidaan","voimme","voin","voisi","voit","voitte","voivat", 60 | "vuoden","vuoksi","vuosi","vuosien","vuosina","vuotta","vähemmän","vähintään","vähiten","vähän","välillä","yhdeksän", 61 | "yhden","yhdessä","yhteen","yhteensä","yhteydessä","yhteyteen","yhtä","yhtäälle","yhtäällä","yhtäältä","yhtään","yhä", 62 | "yksi","yksin","yksittäin","yleensä","ylemmäs","yli","ylös","ympäri","älköön","älä"] 63 | 64 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_fr.js: -------------------------------------------------------------------------------- 1 | var words = ["a","abord","absolument","afin","ah","ai","aie","ailleurs","ainsi","ait","allaient","allo","allons", 2 | "allô","alors","anterieur","anterieure","anterieures","apres","après","as","assez","attendu","au","aucun","aucune", 3 | "aujourd","aujourd'hui","aupres","auquel","aura","auraient","aurait","auront","aussi","autre","autrefois","autrement", 4 | "autres","autrui","aux","auxquelles","auxquels","avaient","avais","avait","avant","avec","avoir","avons","ayant","b", 5 | "bah","bas","basee","bat","beau","beaucoup","bien","bigre","boum","bravo","brrr","c","car","ce","ceci","cela","celle", 6 | "celle-ci","celle-là","celles","celles-ci","celles-là","celui","celui-ci","celui-là","cent","cependant","certain", 7 | "certaine","certaines","certains","certes","ces","cet","cette","ceux","ceux-ci","ceux-là","chacun","chacune","chaque", 8 | "cher","chers","chez","chiche","chut","chère","chères","ci","cinq","cinquantaine","cinquante","cinquantième","cinquième", 9 | "clac","clic","combien","comme","comment","comparable","comparables","compris","concernant","contre","couic","crac","d", 10 | "da","dans","de","debout","dedans","dehors","deja","delà","depuis","dernier","derniere","derriere","derrière","des", 11 | "desormais","desquelles","desquels","dessous","dessus","deux","deuxième","deuxièmement","devant","devers","devra", 12 | "different","differentes","differents","différent","différente","différentes","différents","dire","directe","directement", 13 | "dit","dite","dits","divers","diverse","diverses","dix","dix-huit","dix-neuf","dix-sept","dixième","doit","doivent","donc", 14 | "dont","douze","douzième","dring","du","duquel","durant","dès","désormais","e","effet","egale","egalement","egales","eh", 15 | "elle","elle-même","elles","elles-mêmes","en","encore","enfin","entre","envers","environ","es","est","et","etant","etc", 16 | "etre","eu","euh","eux","eux-mêmes","exactement","excepté","extenso","exterieur","f","fais","faisaient","faisant","fait", 17 | "façon","feront","fi","flac","floc","font","g","gens","h","ha","hein","hem","hep","hi","ho","holà","hop","hormis","hors", 18 | "hou","houp","hue","hui","huit","huitième","hum","hurrah","hé","hélas","i","il","ils","importe","j","je","jusqu","jusque", 19 | "juste","k","l","la","laisser","laquelle","las","le","lequel","les","lesquelles","lesquels","leur","leurs","longtemps", 20 | "lors","lorsque","lui","lui-meme","lui-même","là","lès","m","ma","maint","maintenant","mais","malgre","malgré","maximale", 21 | "me","meme","memes","merci","mes","mien","mienne","miennes","miens","mille","mince","minimale","moi","moi-meme","moi-même", 22 | "moindres","moins","mon","moyennant","multiple","multiples","même","mêmes","n","na","naturel","naturelle","naturelles","ne", 23 | "neanmoins","necessaire","necessairement","neuf","neuvième","ni","nombreuses","nombreux","non","nos","notamment","notre", 24 | "nous","nous-mêmes","nouveau","nul","néanmoins","nôtre","nôtres","o","oh","ohé","ollé","olé","on","ont","onze","onzième", 25 | "ore","ou","ouf","ouias","oust","ouste","outre","ouvert","ouverte","ouverts","o|","où","p","paf","pan","par","parce", 26 | "parfois","parle","parlent","parler","parmi","parseme","partant","particulier","particulière","particulièrement","pas", 27 | "passé","pendant","pense","permet","personne","peu","peut","peuvent","peux","pff","pfft","pfut","pif","pire","plein", 28 | "plouf","plus","plusieurs","plutôt","possessif","possessifs","possible","possibles","pouah","pour","pourquoi","pourrais", 29 | "pourrait","pouvait","prealable","precisement","premier","première","premièrement","pres","probable","probante", 30 | "procedant","proche","près","psitt","pu","puis","puisque","pur","pure","q","qu","quand","quant","quant-à-soi","quanta", 31 | "quarante","quatorze","quatre","quatre-vingt","quatrième","quatrièmement","que","quel","quelconque","quelle","quelles", 32 | "quelqu'un","quelque","quelques","quels","qui","quiconque","quinze","quoi","quoique","r","rare","rarement","rares", 33 | "relative","relativement","remarquable","rend","rendre","restant","reste","restent","restrictif","retour","revoici", 34 | "revoilà","rien","s","sa","sacrebleu","sait","sans","sapristi","sauf","se","sein","seize","selon","semblable","semblaient", 35 | "semble","semblent","sent","sept","septième","sera","seraient","serait","seront","ses","seul","seule","seulement","si", 36 | "sien","sienne","siennes","siens","sinon","six","sixième","soi","soi-même","soit","soixante","son","sont","sous","souvent", 37 | "specifique","specifiques","speculatif","stop","strictement","subtiles","suffisant","suffisante","suffit","suis","suit", 38 | "suivant","suivante","suivantes","suivants","suivre","superpose","sur","surtout","t","ta","tac","tant","tardive","te", 39 | "tel","telle","tellement","telles","tels","tenant","tend","tenir","tente","tes","tic","tien","tienne","tiennes","tiens", 40 | "toc","toi","toi-même","ton","touchant","toujours","tous","tout","toute","toutefois","toutes","treize","trente","tres", 41 | "trois","troisième","troisièmement","trop","très","tsoin","tsouin","tu","té","u","un","une","unes","uniformement","unique", 42 | "uniques","uns","v","va","vais","vas","vers","via","vif","vifs","vingt","vivat","vive","vives","vlan","voici","voilà", 43 | "vont","vos","votre","vous","vous-mêmes","vu","vé","vôtre","vôtres","w","x","y","z","zut","à","â","ça","ès","étaient", 44 | "étais","était","étant","été","être","ô"] 45 | 46 | exports.words = words 47 | -------------------------------------------------------------------------------- /lib/stopwords_ga.js: -------------------------------------------------------------------------------- 1 | var words = ["a","ach","ag","agus","an","aon","ar","arna","as","b'","ba","beirt","bhúr","caoga","ceathair", 2 | "ceathrar","chomh","chtó","chuig","chun","cois","céad","cúig","cúigear","d'","daichead","dar","de","deich", 3 | "deichniúr","den","dhá","do","don","dtí","dá","dár","dó","faoi","faoin","faoina","faoinár","fara","fiche", 4 | "gach","gan","go","gur","haon","hocht","i","iad","idir","in","ina","ins","inár","is","le","leis","lena", 5 | "lenár","m'","mar","mo","mé","na","nach","naoi","naonúr","ná","ní","níor","nó","nócha","ocht","ochtar", 6 | "os","roimh","sa","seacht","seachtar","seachtó","seasca","seisear","siad","sibh","sinn","sna","sé","sí", 7 | "tar","thar","thú","triúr","trí","trína","trínár","tríocha","tú","um","ár","é","éis","í","ó","ón","óna","ónár"] 8 | 9 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_gl.js: -------------------------------------------------------------------------------- 1 | var words = ["a","alí","ao","aos","aquel","aquela","aquelas","aqueles","aquilo","aquí","as","así","aínda","ben", 2 | "cando","che","co","coa","coas","comigo","con","connosco","contigo","convosco","cos","cun","cunha","cunhas","cuns", 3 | "da","dalgunha","dalgunhas","dalgún","dalgúns","das","de","del","dela","delas","deles","desde","deste","do","dos", 4 | "dun","dunha","dunhas","duns","e","el","ela","elas","eles","en","era","eran","esa","esas","ese","eses","esta","estaba", 5 | "estar","este","estes","estiven","estou","está","están","eu","facer","foi","foron","fun","había","hai","iso","isto", 6 | "la","las","lle","lles","lo","los","mais","me","meu","meus","min","miña","miñas","moi","na","nas","neste","nin","no", 7 | "non","nos","nosa","nosas","noso","nosos","nun","nunha","nunhas","nuns","nós","o","os","ou","para","pero","pode","pois", 8 | "pola","polas","polo","polos","por","que","se","senón","ser","seu","seus","sexa","sido","sobre","súa","súas","tamén", 9 | "tan","te","ten","ter","teu","teus","teñen","teño","ti","tido","tiven","tiña","túa","túas","un","unha","unhas","uns", 10 | "vos","vosa","vosas","voso","vosos","vós","á","é","ó","ós"] 11 | 12 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_ha.js: -------------------------------------------------------------------------------- 1 | var words = ["a","amma","ba","ban","ce","cikin","da","don","ga","in","ina","ita","ji","ka","ko","kuma","lokacin", 2 | "ma","mai","na","ne","ni","sai","shi","su","suka","sun","ta","tafi","take","tana","wani","wannan","wata","ya","yake", 3 | "yana","yi","za"] 4 | 5 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_he.js: -------------------------------------------------------------------------------- 1 | var words = ["אבל","או","אולי","אותה","אותו","אותי","אותך","אותם","אותן","אותנו","אז","אחר","אחרות","אחרי","אחריכן", 2 | "אחרים","אחרת","אי","איזה","איך","אין","איפה","איתה","איתו","איתי","איתך","איתכם","איתכן","איתם","איתן","איתנו","אך", 3 | "אל","אלה","אלו","אם","אנחנו","אני","אס","אף","אצל","אשר","את","אתה","אתכם","אתכן","אתם","אתן","באיזומידה","באמצע", 4 | "באמצעות","בגלל","בין","בלי","במידה","במקוםשבו","ברם","בשביל","בשעהש","בתוך","גם","דרך","הוא","היא","היה","היכן", 5 | "היתה","היתי","הם","הן","הנה","הסיבהשבגללה","הרי","ואילו","ואת","זאת","זה","זות","יהיה","יוכל","יוכלו","יותרמדי", 6 | "יכול","יכולה","יכולות","יכולים","יכל","יכלה","יכלו","יש","כאן","כאשר","כולם","כולן","כזה","כי","כיצד","כך","ככה", 7 | "כל","כלל","כמו","כן","כפי","כש","לא","לאו","לאיזותכלית","לאן","לבין","לה","להיות","להם","להן","לו","לי","לכם","לכן", 8 | "למה","למטה","למעלה","למקוםשבו","למרות","לנו","לעבר","לעיכן","לפיכך","לפני","מאד","מאחורי","מאיזוסיבה","מאין","מאיפה", 9 | "מבלי","מבעד","מדוע","מה","מהיכן","מול","מחוץ","מי","מכאן","מכיוון","מלבד","מן","מנין","מסוגל","מעט","מעטים","מעל", 10 | "מצד","מקוםבו","מתחת","מתי","נגד","נגר","נו","עד","עז","על","עלי","עליה","עליהם","עליהן","עליו","עליך","עליכם","עלינו", 11 | "עם","עצמה","עצמהם","עצמהן","עצמו","עצמי","עצמם","עצמן","עצמנו","פה","רק","שוב","של","שלה","שלהם","שלהן","שלו","שלי", 12 | "שלך","שלכה","שלכם","שלכן","שלנו","שם","תהיה","תחת"] 13 | 14 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_hi.js: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Gene Diaz 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | Originates from: https://github.com/stopwords-iso/stopwords-hi/ 25 | */ 26 | 27 | // a list of commonly used words that have little meaning and can be excluded 28 | // from analysis. 29 | var words = [ 30 | 'अंदर','अत','अदि','अप','अपना','अपनि','अपनी','अपने','अभि','अभी','आदि','आप','इंहिं', 31 | 'इंहें','इंहों','इतयादि','इत्यादि','इन','इनका','इन्हीं','इन्हें','इन्हों','इस','इसका','इसकि', 32 | 'इसकी','इसके','इसमें','इसि','इसी','इसे','उंहिं','उंहें','उंहों','उन','उनका','उनकि','उनकी', 33 | 'उनके','उनको','उन्हीं','उन्हें','उन्हों','उस','उसके','उसि','उसी','उसे','एक','एवं','एस', 34 | 'एसे','ऐसे','ओर','और','कइ','कई','कर','करता','करते','करना','करने','करें','कहते', 35 | 'कहा','का','काफि','काफ़ी','कि','किंहें','किंहों','कितना','किन्हें','किन्हों','किया','किर','किस', 36 | 'किसि','किसी','किसे','की','कुछ','कुल','के','को','कोइ','कोई','कोन','कोनसा','कौन', 37 | 'कौनसा','गया','घर','जब','जहाँ','जहां','जा','जिंहें','जिंहों','जितना','जिधर','जिन','जिन्हें', 38 | 'जिन्हों','जिस','जिसे','जीधर','जेसा','जेसे','जैसा','जैसे','जो','तक','तब','तरह','तिंहें', 39 | 'तिंहों','तिन','तिन्हें','तिन्हों','तिस','तिसे','तो','था','थि','थी','थे','दबारा','दवारा','दिया', 40 | 'दुसरा','दुसरे','दूसरे','दो','द्वारा','न','नहिं','नहीं','ना','निचे','निहायत','नीचे','ने','पर', 41 | 'पहले','पुरा','पूरा','पे','फिर','बनि','बनी','बहि','बही','बहुत','बाद','बाला','बिलकुल', 42 | 'भि','भितर','भी','भीतर','मगर','मानो','मे','में','यदि','यह','यहाँ','यहां','यहि','यही', 43 | 'या','यिह','ये','रखें','रवासा','रहा','रहे','ऱ्वासा','लिए','लिये','लेकिन','व','वगेरह','वरग', 44 | 'वर्ग','वह','वहाँ','वहां','वहिं','वहीं','वाले','वुह','वे','वग़ैरह','संग','सकता','सकते','सबसे', 45 | 'सभि','सभी','साथ','साबुत','साभ','सारा','से','सो','हि','ही','हुअ','हुआ','हुइ','हुई', 46 | 'हुए','हे','हें','है','हैं','हो','होता','होति','होती','होते','होना','होने'] 47 | 48 | // tell the world about the noise words. 49 | exports.words = words 50 | -------------------------------------------------------------------------------- /lib/stopwords_hr.js: -------------------------------------------------------------------------------- 1 | var words = ["a","ako","ali","bi","bih","bila","bili","bilo","bio","bismo","biste","biti","bumo","da","do","duž","ga", 2 | "hoće","hoćemo","hoćete","hoćeš","hoću","i","iako","ih","ili","iz","ja","je","jedna","jedne","jedno","jer","jesam", 3 | "jesi","jesmo","jest","jeste","jesu","jim","joj","još","ju","kada","kako","kao","koja","koje","koji","kojima","koju", 4 | "kroz","li","me","mene","meni","mi","mimo","moj","moja","moje","mu","na","nad","nakon","nam","nama","nas","naš","naša", 5 | "naše","našeg","ne","nego","neka","neki","nekog","neku","nema","netko","neće","nećemo","nećete","nećeš","neću","nešto", 6 | "ni","nije","nikoga","nikoje","nikoju","nisam","nisi","nismo","niste","nisu","njega","njegov","njegova","njegovo","njemu", 7 | "njezin","njezina","njezino","njih","njihov","njihova","njihovo","njim","njima","njoj","nju","no","o","od","odmah","on", 8 | "ona","oni","ono","ova","pa","pak","po","pod","pored","prije","s","sa","sam","samo","se","sebe","sebi","si","smo","ste", 9 | "su","sve","svi","svog","svoj","svoja","svoje","svom","ta","tada","taj","tako","te","tebe","tebi","ti","to","toj","tome", 10 | "tu","tvoj","tvoja","tvoje","u","uz","vam","vama","vas","vaš","vaša","vaše","već","vi","vrlo","za","zar","će","ćemo", 11 | "ćete","ćeš","ću","što"] 12 | 13 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_hu.js: -------------------------------------------------------------------------------- 1 | var words = ["a","abba","abban","abból","addig","ahhoz","ahogy","ahol","aki","akik","akkor","akár","alapján","alatt", 2 | "alatta","alattad","alattam","alattatok","alattuk","alattunk","alá","alád","alájuk","alám","alánk","alátok","alól", 3 | "alóla","alólad","alólam","alólatok","alóluk","alólunk","amely","amelybol","amelyek","amelyekben","amelyeket","amelyet", 4 | "amelyik","amelynek","ami","amikor","amit","amolyan","amott","amíg","annak","annál","arra","arról","attól","az","aznap", 5 | "azok","azokat","azokba","azokban","azokból","azokhoz","azokig","azokkal","azokká","azoknak","azoknál","azokon","azokra", 6 | "azokról","azoktól","azokért","azon","azonban","azonnal","azt","aztán","azután","azzal","azzá","azért","bal","balra", 7 | "ban","be","belé","beléd","beléjük","belém","belénk","belétek","belül","belőle","belőled","belőlem","belőletek", 8 | "belőlük","belőlünk","ben","benne","benned","bennem","bennetek","bennük","bennünk","bár","bárcsak","bármilyen", 9 | "búcsú","cikk","cikkek","cikkeket","csak","csakhogy","csupán","de","dehogy","e","ebbe","ebben","ebből","eddig", 10 | "egy","egyebek","egyebet","egyedül","egyelőre","egyes","egyet","egyetlen","egyik","egymás","egyre","egyszerre", 11 | "egyéb","együtt","egész","egészen","ehhez","ekkor","el","eleinte","ellen","ellenes","elleni","ellenére","elmondta", 12 | "első","elsők","elsősorban","elsőt","elé","eléd","elég","eléjük","elém","elénk","elétek","elő","előbb","elől","előle", 13 | "előled","előlem","előletek","előlük","előlünk","először","előtt","előtte","előtted","előttem","előttetek","előttük", 14 | "előttünk","előző","emilyen","engem","ennek","ennyi","ennél","enyém","erre","erről","esetben","ettől","ez","ezek", 15 | "ezekbe","ezekben","ezekből","ezeken","ezeket","ezekhez","ezekig","ezekkel","ezekké","ezeknek","ezeknél","ezekre", 16 | "ezekről","ezektől","ezekért","ezen","ezentúl","ezer","ezret","ezt","ezután","ezzel","ezzé","ezért","fel","fele", 17 | "felek","felet","felett","felé","fent","fenti","fél","fölé","gyakran","ha","halló","hamar","hanem","harmadik", 18 | "harmadikat","harminc","hat","hatodik","hatodikat","hatot","hatvan","helyett","hetedik","hetediket","hetet", 19 | "hetven","hirtelen","hiszen","hiába","hogy","hogyan","hol","holnap","holnapot","honnan","hova","hozzá","hozzád", 20 | "hozzájuk","hozzám","hozzánk","hozzátok","hurrá","huszadik","hány","hányszor","hármat","három","hát","hátha", 21 | "hátulsó","hét","húsz","ide","ide-оda","idén","igazán","igen","ill","illetve","ilyen","ilyenkor","immár","inkább", 22 | "is","ismét","ison","itt","jelenleg","jobban","jobbra","jó","jól","jólesik","jóval","jövőre","kell","kellene","kellett", 23 | "kelljen","keressünk","keresztül","ketten","kettő","kettőt","kevés","ki","kiben","kiből","kicsit","kicsoda","kihez", 24 | "kik","kikbe","kikben","kikből","kiken","kiket","kikhez","kikkel","kikké","kiknek","kiknél","kikre","kikről","kiktől", 25 | "kikért","kilenc","kilencedik","kilencediket","kilencet","kilencven","kin","kinek","kinél","kire","kiről","kit","kitől", 26 | "kivel","kivé","kié","kiért","korábban","képest","kérem","kérlek","kész","késő","később","későn","két","kétszer","kívül", 27 | "körül","köszönhetően","köszönöm","közben","közel","közepesen","közepén","közé","között","közül","külön","különben", 28 | "különböző","különbözőbb","különbözőek","lassan","le","legalább","legyen","lehet","lehetetlen","lehetett","lehetőleg", 29 | "lehetőség","lenne","lenni","lennék","lennének","lesz","leszek","lesznek","leszünk","lett","lettek","lettem","lettünk", 30 | "lévő","ma","maga","magad","magam","magatokat","magukat","magunkat","magát","mai","majd","majdnem","manapság","meg", 31 | "megcsinál","megcsinálnak","megint","megvan","mellett","mellette","melletted","mellettem","mellettetek","mellettük", 32 | "mellettünk","mellé","melléd","melléjük","mellém","mellénk","mellétek","mellől","mellőle","mellőled","mellőlem", 33 | "mellőletek","mellőlük","mellőlünk","mely","melyek","melyik","mennyi","mert","mi","miatt","miatta","miattad","miattam", 34 | "miattatok","miattuk","miattunk","mibe","miben","miből","mihez","mik","mikbe","mikben","mikből","miken","miket","mikhez" 35 | ,"mikkel","mikké","miknek","miknél","mikor","mikre","mikről","miktől","mikért","milyen","min","mind","mindegyik", 36 | "mindegyiket","minden","mindenesetre","mindenki","mindent","mindenütt","mindig","mindketten","minek","minket","mint", 37 | "mintha","minél","mire","miről","mit","mitől","mivel","mivé","miért","mondta","most","mostanáig","már","más","másik", 38 | "másikat","másnap","második","másodszor","mások","másokat","mást","még","mégis","míg","mögé","mögéd","mögéjük","mögém", 39 | "mögénk","mögétek","mögött","mögötte","mögötted","mögöttem","mögöttetek","mögöttük","mögöttünk","mögül","mögüle","mögüled", 40 | "mögülem","mögületek","mögülük","mögülünk","múltkor","múlva","na","nagy","nagyobb","nagyon","naponta","napot","ne", 41 | "negyedik","negyediket","negyven","neked","nekem","neki","nekik","nektek","nekünk","nem","nemcsak","nemrég","nincs", 42 | "nyolc","nyolcadik","nyolcadikat","nyolcat","nyolcvan","nála","nálad","nálam","nálatok","náluk","nálunk","négy","négyet", 43 | "néha","néhány","nélkül","o","oda","ok","olyan","onnan","ott","pedig","persze","pár","például","rajta","rajtad","rajtam", 44 | "rajtatok","rajtuk","rajtunk","rendben","rosszul","rá","rád","rájuk","rám","ránk","rátok","régen","régóta","részére", 45 | "róla","rólad","rólam","rólatok","róluk","rólunk","rögtön","s","saját","se","sem","semmi","semmilyen","semmiség","senki", 46 | "soha","sok","sokan","sokat","sokkal","sokszor","sokáig","során","stb.","szemben","szerbusz","szerint","szerinte", 47 | "szerinted","szerintem","szerintetek","szerintük","szerintünk","szervusz","szinte","számára","száz","századik","százat", 48 | "szépen","szét","szíves","szívesen","szíveskedjék","sőt","talán","tavaly","te","tegnap","tegnapelőtt","tehát","tele", 49 | "teljes","tessék","ti","tied","titeket","tizedik","tizediket","tizenegy","tizenegyedik","tizenhat","tizenhárom","tizenhét", 50 | "tizenkettedik","tizenkettő","tizenkilenc","tizenkét","tizennyolc","tizennégy","tizenöt","tizet","tovább","további", 51 | "továbbá","távol","téged","tényleg","tíz","több","többi","többször","túl","tőle","tőled","tőlem","tőletek","tőlük", 52 | "tőlünk","ugyanakkor","ugyanez","ugyanis","ugye","urak","uram","urat","utoljára","utolsó","után","utána","vagy","vagyis", 53 | "vagyok","vagytok","vagyunk","vajon","valahol","valaki","valakit","valamelyik","valami","valamint","való","van","vannak", 54 | "vele","veled","velem","veletek","velük","velünk","vissza","viszlát","viszont","viszontlátásra","volna","volnának","volnék", 55 | "volt","voltak","voltam","voltunk","végre","végén","végül","által","általában","ám","át","éljen","én","éppen","érte", 56 | "érted","értem","értetek","értük","értünk","és","év","évben","éve","évek","éves","évi","évvel","így","óta","ön","önbe", 57 | "önben","önből","önhöz","önnek","önnel","önnél","önre","önről","önt","öntől","önért","önök","önökbe","önökben","önökből", 58 | "önöket","önökhöz","önökkel","önöknek","önöknél","önökre","önökről","önöktől","önökért","önökön","önön","össze","öt", 59 | "ötven","ötödik","ötödiket","ötöt","úgy","úgyis","úgynevezett","új","újabb","újra","úr","ő","ők","őket","őt"] 60 | 61 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_hy.js: -------------------------------------------------------------------------------- 1 | var words = ["այդ","այլ","այն","այս","դու","դուք","եմ","են","ենք","ես","եք","է","էի","էին","էինք","էիր","էիք","էր", 2 | "ըստ","թ","ի","ին","իսկ","իր","կամ","համար","հետ","հետո","մենք","մեջ","մի","ն","նա","նաև","նրա","նրանք","որ","որը", 3 | "որոնք","որպես","ու","ում","պիտի","վրա","և"] 4 | 5 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_id.js: -------------------------------------------------------------------------------- 1 | var words = ["ada","adalah","adanya","adapun","agak","agaknya","agar","akan","akankah","akhirnya","aku","akulah", 2 | "amat","amatlah","anda","andalah","antar","antara","antaranya","apa","apaan","apabila","apakah","apalagi","apatah", 3 | "atau","ataukah","ataupun","bagai","bagaikan","bagaimana","bagaimanakah","bagaimanapun","bagi","bahkan","bahwa", 4 | "bahwasanya","banyak","beberapa","begini","beginian","beginikah","beginilah","begitu","begitukah","begitulah", 5 | "begitupun","belum","belumlah","berapa","berapakah","berapalah","berapapun","bermacam","bersama","betulkah", 6 | "biasa","biasanya","bila","bilakah","bisa","bisakah","boleh","bolehkah","bolehlah","buat","bukan","bukankah", 7 | "bukanlah","bukannya","cuma","dahulu","dalam","dan","dapat","dari","daripada","dekat","demi","demikian","demikianlah", 8 | "dengan","depan","di","dia","dialah","diantara","diantaranya","dikarenakan","dini","diri","dirinya","disini","disinilah", 9 | "dong","dulu","enggak","enggaknya","entah","entahlah","hal","hampir","hanya","hanyalah","harus","haruslah","harusnya", 10 | "hendak","hendaklah","hendaknya","hingga","ia","ialah","ibarat","ingin","inginkah","inginkan","ini","inikah","inilah", 11 | "itu","itukah","itulah","jangan","jangankan","janganlah","jika","jikalau","juga","justru","kala","kalau","kalaulah", 12 | "kalaupun","kalian","kami","kamilah","kamu","kamulah","kan","kapan","kapankah","kapanpun","karena","karenanya","ke", 13 | "kecil","kemudian","kenapa","kepada","kepadanya","ketika","khususnya","kini","kinilah","kiranya","kita","kitalah", 14 | "kok","lagi","lagian","lah","lain","lainnya","lalu","lama","lamanya","lebih","macam","maka","makanya","makin","malah", 15 | "malahan","mampu","mampukah","mana","manakala","manalagi","masih","masihkah","masing","mau","maupun","melainkan","melalui", 16 | "memang","mengapa","mereka","merekalah","merupakan","meski","meskipun","mungkin","mungkinkah","nah","namun","nanti", 17 | "nantinya","nyaris","oleh","olehnya","pada","padahal","padanya","paling","pantas","para","pasti","pastilah","per", 18 | "percuma","pernah","pula","pun","rupanya","saat","saatnya","saja","sajalah","saling","sama","sambil","sampai","sana", 19 | "sangat","sangatlah","saya","sayalah","se","sebab","sebabnya","sebagai","sebagaimana","sebagainya","sebaliknya","sebanyak", 20 | "sebegini","sebegitu","sebelum","sebelumnya","sebenarnya","seberapa","sebetulnya","sebisanya","sebuah","sedang","sedangkan", 21 | "sedemikian","sedikit","sedikitnya","segala","segalanya","segera","seharusnya","sehingga","sejak","sejenak","sekali", 22 | "sekalian","sekaligus","sekalipun","sekarang","seketika","sekiranya","sekitar","sekitarnya","sela","selagi","selain", 23 | "selaku","selalu","selama","selamanya","seluruh","seluruhnya","semacam","semakin","semasih","semaunya","sementara", 24 | "sempat","semua","semuanya","semula","sendiri","sendirinya","seolah","seorang","sepanjang","sepantasnya","sepantasnyalah", 25 | "seperti","sepertinya","sering","seringnya","serta","serupa","sesaat","sesama","sesegera","sesekali","seseorang","sesuatu", 26 | "sesuatunya","sesudah","sesudahnya","setelah","seterusnya","setiap","setidaknya","sewaktu","siapa","siapakah","siapapun", 27 | "sini","sinilah","suatu","sudah","sudahkah","sudahlah","supaya","tadi","tadinya","tak","tanpa","tapi","telah","tentang", 28 | "tentu","tentulah","tentunya","terdiri","terhadap","terhadapnya","terlalu","terlebih","tersebut","tersebutlah","tertentu", 29 | "tetapi","tiap","tidak","tidakkah","tidaklah","toh","waduh","wah","wahai","walau","walaupun","wong","yaitu","yakni","yang"] 30 | 31 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_it.js: -------------------------------------------------------------------------------- 1 | var words = ["IE","a","abbastanza","abbia","abbiamo","abbiano","abbiate","accidenti","ad","adesso","affinche","agl","agli", 2 | "ahime","ahimè","ai","al","alcuna","alcuni","alcuno","all","alla","alle","allo","allora","altri","altrimenti","altro", 3 | "altrove","altrui","anche","ancora","anni","anno","ansa","anticipo","assai","attesa","attraverso","avanti","avemmo", 4 | "avendo","avente","aver","avere","averlo","avesse","avessero","avessi","avessimo","aveste","avesti","avete","aveva", 5 | "avevamo","avevano","avevate","avevi","avevo","avrai","avranno","avrebbe","avrebbero","avrei","avremmo","avremo", 6 | "avreste","avresti","avrete","avrà","avrò","avuta","avute","avuti","avuto","basta","bene","benissimo","berlusconi", 7 | "brava","bravo","c","casa","caso","cento","certa","certe","certi","certo","che","chi","chicchessia","chiunque","ci", 8 | "ciascuna","ciascuno","cima","cio","cioe","cioè","circa","citta","città","ciò","co","codesta","codesti","codesto", 9 | "cogli","coi","col","colei","coll","coloro","colui","come","cominci","comunque","con","concernente","conciliarsi", 10 | "conclusione","consiglio","contro","cortesia","cos","cosa","cosi","così","cui","d","da","dagl","dagli","dai","dal", 11 | "dall","dalla","dalle","dallo","dappertutto","davanti","degl","degli","dei","del","dell","della","delle","dello", 12 | "dentro","detto","deve","di","dice","dietro","dire","dirimpetto","diventa","diventare","diventato","dopo","dov","dove", 13 | "dovra","dovrà","dovunque","due","dunque","durante","e","ebbe","ebbero","ebbi","ecc","ecco","ed","effettivamente","egli", 14 | "ella","entrambi","eppure","era","erano","eravamo","eravate","eri","ero","esempio","esse","essendo","esser","essere", 15 | "essi","ex","fa","faccia","facciamo","facciano","facciate","faccio","facemmo","facendo","facesse","facessero","facessi", 16 | "facessimo","faceste","facesti","faceva","facevamo","facevano","facevate","facevi","facevo","fai","fanno","farai", 17 | "faranno","fare","farebbe","farebbero","farei","faremmo","faremo","fareste","faresti","farete","farà","farò","fatto", 18 | "favore","fece","fecero","feci","fin","finalmente","finche","fine","fino","forse","forza","fosse","fossero","fossi", 19 | "fossimo","foste","fosti","fra","frattempo","fu","fui","fummo","fuori","furono","futuro","generale","gia","giacche", 20 | "giorni","giorno","già","gli","gliela","gliele","glieli","glielo","gliene","governo","grande","grazie","gruppo","ha", 21 | "haha","hai","hanno","ho","i","ieri","il","improvviso","in","inc","infatti","inoltre","insieme","intanto","intorno", 22 | "invece","io","l","la","lasciato","lato","lavoro","le","lei","li","lo","lontano","loro","lui","lungo","luogo","là", 23 | "ma","macche","magari","maggior","mai","male","malgrado","malissimo","mancanza","marche","me","medesimo","mediante", 24 | "meglio","meno","mentre","mesi","mezzo","mi","mia","mie","miei","mila","miliardi","milioni","minimi","ministro", 25 | "mio","modo","molti","moltissimo","molto","momento","mondo","mosto","nazionale","ne","negl","negli","nei","nel", 26 | "nell","nella","nelle","nello","nemmeno","neppure","nessun","nessuna","nessuno","niente","no","noi","non","nondimeno", 27 | "nonostante","nonsia","nostra","nostre","nostri","nostro","novanta","nove","nulla","nuovo","o","od","oggi","ogni", 28 | "ognuna","ognuno","oltre","oppure","ora","ore","osi","ossia","ottanta","otto","paese","parecchi","parecchie", 29 | "parecchio","parte","partendo","peccato","peggio","per","perche","perchè","perché","percio","perciò","perfino","pero", 30 | "persino","persone","però","piedi","pieno","piglia","piu","piuttosto","più","po","pochissimo","poco","poi","poiche", 31 | "possa","possedere","posteriore","posto","potrebbe","preferibilmente","presa","press","prima","primo","principalmente", 32 | "probabilmente","proprio","puo","pure","purtroppo","può","qualche","qualcosa","qualcuna","qualcuno","quale","quali", 33 | "qualunque","quando","quanta","quante","quanti","quanto","quantunque","quasi","quattro","quel","quella","quelle", 34 | "quelli","quello","quest","questa","queste","questi","questo","qui","quindi","realmente","recente","recentemente", 35 | "registrazione","relativo","riecco","salvo","sara","sarai","saranno","sarebbe","sarebbero","sarei","saremmo","saremo", 36 | "sareste","saresti","sarete","sarà","sarò","scola","scopo","scorso","se","secondo","seguente","seguito","sei","sembra", 37 | "sembrare","sembrato","sembri","sempre","senza","sette","si","sia","siamo","siano","siate","siete","sig","solito", 38 | "solo","soltanto","sono","sopra","sotto","spesso","srl","sta","stai","stando","stanno","starai","staranno","starebbe", 39 | "starebbero","starei","staremmo","staremo","stareste","staresti","starete","starà","starò","stata","state","stati", 40 | "stato","stava","stavamo","stavano","stavate","stavi","stavo","stemmo","stessa","stesse","stessero","stessi","stessimo", 41 | "stesso","steste","stesti","stette","stettero","stetti","stia","stiamo","stiano","stiate","sto","su","sua","subito", 42 | "successivamente","successivo","sue","sugl","sugli","sui","sul","sull","sulla","sulle","sullo","suo","suoi","tale", 43 | "tali","talvolta","tanto","te","tempo","ti","titolo","torino","tra","tranne","tre","trenta","troppo","trovato","tu", 44 | "tua","tue","tuo","tuoi","tutta","tuttavia","tutte","tutti","tutto","uguali","ulteriore","ultimo","un","una","uno", 45 | "uomo","va","vale","vari","varia","varie","vario","verso","vi","via","vicino","visto","vita","voi","volta","volte", 46 | "vostra","vostre","vostri","vostro","è"] 47 | 48 | exports.words = words 49 | -------------------------------------------------------------------------------- /lib/stopwords_ja.js: -------------------------------------------------------------------------------- 1 | // Original copyright: 2 | /* 3 | Licensed to the Apache Software Foundation (ASF) under one or more 4 | contributor license agreements. See the NOTICE file distributed with 5 | this work for additional information regarding copyright ownership. 6 | The ASF licenses this file to You under the Apache License, Version 2.0 7 | the "License"); you may not use this file except in compliance with 8 | the License. You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | // This version: 20 | /* 21 | Copyright (c) 2012, Guillaume Marty 22 | 23 | Permission is hereby granted, free of charge, to any person obtaining a copy 24 | of this software and associated documentation files (the "Software"), to deal 25 | in the Software without restriction, including without limitation the rights 26 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 27 | copies of the Software, and to permit persons to whom the Software is 28 | furnished to do so, subject to the following conditions: 29 | 30 | The above copyright notice and this permission notice shall be included in 31 | all copies or substantial portions of the Software. 32 | 33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 34 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 35 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 36 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 37 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 38 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 39 | THE SOFTWARE. 40 | */ 41 | 42 | // a list of commonly used words that have little meaning and can be excluded 43 | // from analysis. 44 | // Original location: 45 | // http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/stopwords.txt 46 | var words = ['の', 'に', 'は', 'を', 'た', 'が', 'で', 'て', 'と', 'し', 'れ', 'さ', 47 | 'ある', 'いる', 'も', 'する', 'から', 'な', 'こと', 'として', 'い', 'や', 'れる', 48 | 'など', 'なっ', 'ない', 'この', 'ため', 'その', 'あっ', 'よう', 'また', 'もの', 49 | 'という', 'あり', 'まで', 'られ', 'なる', 'へ', 'か', 'だ', 'これ', 'によって', 50 | 'により', 'おり', 'より', 'による', 'ず', 'なり', 'られる', 'において', 'ば', 'なかっ', 51 | 'なく', 'しかし', 'について', 'せ', 'だっ', 'その後', 'できる', 'それ', 'う', 'ので', 52 | 'なお', 'のみ', 'でき', 'き', 'つ', 'における', 'および', 'いう', 'さらに', 'でも', 53 | 'ら', 'たり', 'その他', 'に関する', 'たち', 'ます', 'ん', 'なら', 'に対して', '特に', 54 | 'せる', '及び', 'これら', 'とき', 'では', 'にて', 'ほか', 'ながら', 'うち', 'そして', 55 | 'とともに', 'ただし', 'かつて', 'それぞれ', 'または', 'お', 'ほど', 'ものの', 'に対する', 56 | 'ほとんど', 'と共に', 'といった', 'です', 'とも', 'ところ', 'ここ'] 57 | 58 | // tell the world about the noise words. 59 | exports.words = words 60 | -------------------------------------------------------------------------------- /lib/stopwords_ko.js: -------------------------------------------------------------------------------- 1 | var words = [ 2 | "《","》", 3 | "가","가까스로","가령","각","각각","각자","각종","갖고말하자면","같다","같이","개의치않고","거니와","거바","거의","것","것과 같이","것들","게다가", 4 | "게우다","겨우","견지에서","결과에 이르다","결국","결론을 낼 수 있다","겸사겸사","고려하면","고로","곧","공동으로","과","과연","관계가 있다","관계없이", 5 | "관련이 있다","관하여","관한","관해서는","구","구체적으로","구토하다","그","그들","그때","그래","그래도","그래서","그러나","그러니","그러니까","그러면", 6 | "그러므로","그러한즉","그런 까닭에","그런데","그런즉","그럼","그럼에도 불구하고","그렇게 함으로써","그렇지","그렇지 않다면","그렇지 않으면","그렇지만", 7 | "그렇지않으면","그리고","그리하여","그만이다","그에 따르는","그위에","그저","그중에서","그치지 않다","근거로","근거하여","기대여","기점으로","기준으로", 8 | "기타","까닭으로","까악","까지","까지 미치다","까지도","꽈당","끙끙","끼익","나","나머지는","남들","남짓","너","너희","너희들","네","넷","년", 9 | "논하지 않다","놀라다","누가 알겠는가","누구","다른","다른 방면으로","다만","다섯","다소","다수","다시 말하자면","다시말하면","다음","다음에", 10 | "다음으로","단지","답다","당신","당장","대로 하다","대하면","대하여","대해 말하자면","대해서","댕그","더구나","더군다나","더라도","더불어", 11 | "더욱더","더욱이는","도달하다","도착하다","동시에","동안","된바에야","된이상","두번째로","둘","둥둥","뒤따라","뒤이어","든간에","들","등","등등", 12 | "딩동","따라","따라서","따위","따지지 않다","딱","때","때가 되어","때문에","또","또한","뚝뚝","라 해도","령","로","로 인하여","로부터","로써", 13 | "륙","를","마음대로","마저","마저도","마치","막론하고","만 못하다","만약","만약에","만은 아니다","만이 아니다","만일","만큼","말하자면","말할것도 없고", 14 | "매","매번","메쓰겁다","몇","모","모두","무렵","무릎쓰고","무슨","무엇","무엇때문에","물론","및","바꾸어말하면","바꾸어말하자면","바꾸어서 말하면", 15 | "바꾸어서 한다면","바꿔 말하면","바로","바와같이","밖에 안된다","반대로","반대로 말하자면","반드시","버금","보는데서","보다더","보드득","본대로","봐" 16 | ,"봐라","부류의 사람들","부터","불구하고","불문하고","붕붕","비걱거리다","비교적","비길수 없다","비로소","비록","비슷하다","비추어 보아","비하면", 17 | "뿐만 아니라","뿐만아니라","뿐이다","삐걱","삐걱거리다","사","삼","상대적으로 말하자면","생각한대로","설령","설마","설사","셋","소생","소인","솨", 18 | "쉿","습니까","습니다","시각","시간","시작하여","시초에","시키다","실로","심지어","아","아니","아니나다를가","아니라면","아니면","아니었다면", 19 | "아래윗","아무거나","아무도","아야","아울러","아이","아이고","아이구","아이야","아이쿠","아하","아홉","안 그러면","않기 위하여","않기 위해서", 20 | "알 수 있다","알았어","앗","앞에서","앞의것","야","약간","양자","어","어기여차","어느","어느 년도","어느것","어느곳","어느때","어느쪽","어느해", 21 | "어디","어때","어떠한","어떤","어떤것","어떤것들","어떻게","어떻해","어이","어째서","어쨋든","어쩔수 없다","어찌","어찌됏든","어찌됏어","어찌하든지", 22 | "어찌하여","언제","언젠가","얼마","얼마 안 되는 것","얼마간","얼마나","얼마든지","얼마만큼","얼마큼","엉엉","에","에 가서","에 달려 있다","에 대해", 23 | "에 있다","에 한하다","에게","에서","여","여기","여덟","여러분","여보시오","여부","여섯","여전히","여차","연관되다","연이서","영","영차","옆사람", 24 | "예","예를 들면","예를 들자면","예컨대","예하면","오","오로지","오르다","오자마자","오직","오호","오히려","와","와 같은 사람들","와르르","와아","왜", 25 | "왜냐하면","외에도","요만큼","요만한 것","요만한걸","요컨대","우르르","우리","우리들","우선","우에 종합한것과같이","운운","월","위에서 서술한바와같이", 26 | "위하여","위해서","윙윙","육","으로","으로 인하여","으로서","으로써","을","응","응당","의","의거하여","의지하여","의해","의해되다","의해서","이", 27 | "이 되다","이 때문에","이 밖에","이 외에","이 정도의","이것","이곳","이때","이라면","이래","이러이러하다","이러한","이런","이럴정도로","이렇게 많은 것", 28 | "이렇게되면","이렇게말하자면","이렇구나","이로 인하여","이르기까지","이리하여","이만큼","이번","이봐","이상","이어서","이었다","이와 같다","이와 같은", 29 | "이와 반대로","이와같다면","이외에도","이용하여","이유만으로","이젠","이지만","이쪽","이천구","이천육","이천칠","이천팔","인 듯하다","인젠","일", 30 | "일것이다","일곱","일단","일때","일반적으로","일지라도","임에 틀림없다","입각하여","입장에서","잇따라","있다","자","자기","자기집","자마자","자신", 31 | "잠깐","잠시","저","저것","저것만큼","저기","저쪽","저희","전부","전자","전후","점에서 보아","정도에 이르다","제","제각기","제외하고","조금","조차", 32 | "조차도","졸졸","좀","좋아","좍좍","주룩주룩","주저하지 않고","줄은 몰랏다","줄은모른다","중에서","중의하나","즈음하여","즉","즉시","지든지","지만", 33 | "지말고","진짜로","쪽으로","차라리","참","참나","첫번째로","쳇","총적으로","총적으로 말하면","총적으로 보면","칠","콸콸","쾅쾅","쿵","타다","타인", 34 | "탕탕","토하다","통하여","툭","퉤","틈타","팍","팔","퍽","펄렁","하","하게될것이다","하게하다","하겠는가","하고 있다","하고있었다","하곤하였다", 35 | "하구나","하기 때문에","하기 위하여","하기는한데","하기만 하면","하기보다는","하기에","하나","하느니","하는 김에","하는 편이 낫다","하는것도", 36 | "하는것만 못하다","하는것이 낫다","하는바","하더라도","하도다","하도록시키다","하도록하다","하든지","하려고하다","하마터면","하면 할수록","하면된다", 37 | "하면서","하물며","하여금","하여야","하자마자","하지 않는다면","하지 않도록","하지마","하지마라","하지만","하하","한 까닭에","한 이유는","한 후", 38 | "한다면","한다면 몰라도","한데","한마디","한적이있다","한켠으로는","한항목","할 따름이다","할 생각이다","할 줄 안다","할 지경이다","할 힘이 있다", 39 | "할때","할만하다","할망정","할뿐","할수있다","할수있어","할줄알다","할지라도","할지언정","함께","해도된다","해도좋다","해봐요","해서는 안된다", 40 | "해야한다","해요","했어요","향하다","향하여","향해서","허","허걱","허허","헉","헉헉","헐떡헐떡","형식으로 쓰여","혹시","혹은","혼자","훨씬", 41 | "휘익","휴","흐흐","흥","힘입어",] 42 | 43 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_la.js: -------------------------------------------------------------------------------- 1 | var words = ["a","ab","ac","ad","at","atque","aut","autem","cum","de","dum","e","erant","erat","est","et","etiam", 2 | "ex","haec","hic","hoc","in","ita","me","nec","neque","non","per","qua","quae","quam","qui","quibus","quidem","quo", 3 | "quod","re","rebus","rem","res","sed","si","sic","sunt","tamen","tandem","te","ut","vel"] 4 | 5 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_lv.js: -------------------------------------------------------------------------------- 1 | var words = ["aiz","ap","apakš","apakšpus","ar","arī","augšpus","bet","bez","bija","biji","biju","bijām","bijāt","būs", 2 | "būsi","būsiet","būsim","būt","būšu","caur","diemžēl","diezin","droši","dēļ","esam","esat","esi","esmu","gan","gar", 3 | "iekam","iekams","iekām","iekāms","iekš","iekšpus","ik","ir","it","itin","iz","ja","jau","jeb","jebšu","jel","jo","jā", 4 | "ka","kamēr","kaut","kolīdz","kopš","kā","kļuva","kļuvi","kļuvu","kļuvām","kļuvāt","kļūs","kļūsi","kļūsiet","kļūsim", 5 | "kļūst","kļūstam","kļūstat","kļūsti","kļūstu","kļūt","kļūšu","labad","lai","lejpus","līdz","līdzko","ne","nebūt","nedz", 6 | "nekā","nevis","nezin","no","nu","nē","otrpus","pa","par","pat","pie","pirms","pret","priekš","pār","pēc","starp","tad", 7 | "tak","tapi","taps","tapsi","tapsiet","tapsim","tapt","tapāt","tapšu","taču","te","tiec","tiek","tiekam","tiekat","tieku", 8 | "tik","tika","tikai","tiki","tikko","tiklab","tiklīdz","tiks","tiksiet","tiksim","tikt","tiku","tikvien","tikām","tikāt", 9 | "tikšu","tomēr","topat","turpretim","turpretī","tā","tādēļ","tālab","tāpēc","un","uz","vai","var","varat","varēja","varēji", 10 | "varēju","varējām","varējāt","varēs","varēsi","varēsiet","varēsim","varēt","varēšu","vien","virs","virspus","vis","viņpus", 11 | "zem","ārpus","šaipus"] 12 | 13 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_mr.js: -------------------------------------------------------------------------------- 1 | var words = ["अधिक","अनेक","अशी","असलयाचे","असलेल्या","असा","असून","असे","आज","आणि","आता","आपल्या","आला","आली","आले","आहे","आहेत", 2 | "एक","एका","कमी","करणयात","करून","का","काम","काय","काही","किवा","की","केला","केली","केले","कोटी","गेल्या","घेऊन","जात","झाला","झाली","झाले", 3 | "झालेल्या","टा","डॉ","तर","तरी","तसेच","ता","ती","तीन","ते","तो","त्या","त्याचा","त्याची","त्याच्या","त्याना","त्यानी","त्यामुळे","त्री","दिली","दोन","न","नाही", 4 | "निर्ण्य","पण","पम","परयतन","पाटील","म","मात्र","माहिती","मी","मुबी","म्हणजे","म्हणाले","म्हणून","या","याचा","याची","याच्या","याना","यानी","येणार","येत", 5 | "येथील","येथे","लाख","व","व्यकत","सर्व","सागित्ले","सुरू","हजार","हा","ही","हे","होणार","होत","होता","होती","होते"] 6 | 7 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_nl.js: -------------------------------------------------------------------------------- 1 | var words = ["aan","achte","achter","af","al","alle","alleen","alles","als","ander","anders","beetje" 2 | ,"behalve","beide","beiden","ben","beneden","bent","bij","bijna","bijv","blijkbaar","blijken","boven","bv", 3 | "daar","daardoor","daarin","daarna","daarom","daaruit","dan","dat","de","deden","deed","derde","derhalve","dertig", 4 | "deze","dhr","die","dit","doe","doen","doet","door","drie","duizend","echter","een","eens","eerst","eerste","eigen", 5 | "eigenlijk","elk","elke","en","enige","er","erg","ergens","etc","etcetera","even","geen","genoeg","geweest","haar", 6 | "haarzelf","had","hadden","heb","hebben","hebt","hedden","heeft","heel","hem","hemzelf","hen","het","hetzelfde", 7 | "hier","hierin","hierna","hierom","hij","hijzelf","hoe","honderd","hun","ieder","iedere","iedereen","iemand","iets", 8 | "ik","in","inderdaad","intussen","is","ja","je","jij","jijzelf","jou","jouw","jullie","kan","kon","konden","kun", 9 | "kunnen","kunt","laatst","later","lijken","lijkt","maak","maakt","maakte","maakten","maar","mag","maken","me","meer", 10 | "meest","meestal","men","met","mevr","mij","mijn","minder","miss","misschien","missen","mits","mocht","mochten", 11 | "moest","moesten","moet","moeten","mogen","mr","mrs","mw","na","naar","nam","namelijk","nee","neem","negen", 12 | "nemen","nergens","niemand","niet","niets","niks","noch","nochtans","nog","nooit","nu","nv","of","om","omdat", 13 | "ondanks","onder","ondertussen","ons","onze","onzeker","ooit","ook","op","over","overal","overige","paar","per", 14 | "recent","redelijk","samen","sinds","steeds","te","tegen","tegenover","thans","tien","tiende","tijdens","tja","toch", 15 | "toe","tot","totdat","tussen","twee","tweede","u","uit","uw","vaak","van","vanaf","veel","veertig","verder", 16 | "verscheidene","verschillende","via","vier","vierde","vijf","vijfde","vijftig","volgend","volgens","voor","voordat", 17 | "voorts","waar","waarom","waarschijnlijk","wanneer","waren","was","wat","we","wederom","weer","weinig","wel","welk", 18 | "welke","werd","werden","werder","whatever","wie","wij","wijzelf","wil","wilden","willen","word","worden","wordt","zal", 19 | "ze","zei","zeker","zelf","zelfde","zes","zeven","zich","zij","zijn","zijzelf","zo","zoals","zodat","zou","zouden", 20 | "zulk","zullen"] 21 | 22 | exports.words = words 23 | -------------------------------------------------------------------------------- /lib/stopwords_no.js: -------------------------------------------------------------------------------- 1 | 2 | var words = ["alle","at","av","bare","begge","ble","blei","bli","blir","blitt","både","båe","da", 3 | "de","deg","dei","deim","deira","deires","dem","den","denne","der","dere","deres","det","dette","di", 4 | "din","disse","ditt","du","dykk","dykkar","då","eg","ein","eit","eitt","eller","elles","en","enn","er", 5 | "et","ett","etter","for","fordi","fra","før","ha","hadde","han","hans","har","hennar","henne","hennes", 6 | "her","hjå","ho","hoe","honom","hoss","hossen","hun","hva","hvem","hver","hvilke","hvilken","hvis","hvor", 7 | "hvordan","hvorfor","i","ikke","ikkje","ingen","ingi","inkje","inn","inni","ja","jeg","kan","kom","korleis", 8 | "korso","kun","kunne","kva","kvar","kvarhelst","kven","kvi","kvifor","man","mange","me","med","medan","meg", 9 | "meget","mellom","men","mi","min","mine","mitt","mot","mykje","ned","no","noe","noen","noka","noko","nokon", 10 | "nokor","nokre","nå","når","og","også","om","opp","oss","over","på","samme","seg","selv","si","sia","sidan", 11 | "siden","sin","sine","sitt","sjøl","skal","skulle","slik","so","som","somme","somt","så","sånn","til","um", 12 | "upp","ut","uten","var","vart","varte","ved","vere","verte","vi","vil","ville","vore","vors","vort","vår", 13 | "være","vært","å"] 14 | 15 | 16 | exports.words = words 17 | -------------------------------------------------------------------------------- /lib/stopwords_pl.js: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2013, Paweł Łaskarzewski 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | // a list of commonly used words that have little meaning and can be excluded 24 | // from analysis. 25 | // list based on: http://pl.wikipedia.org/wiki/Wikipedia:Stopwords 26 | var words = [ 27 | 'a', 'aby', 'ach', 'acz', 'aczkolwiek', 'aj', 'albo', 'ale', 'ależ', 'ani', 28 | 'aż', 'bardziej', 'bardzo', 'bo', 'bowiem', 'by', 'byli', 'bynajmniej', 29 | 'być', 'był', 'była', 'było', 'były', 'będzie', 'będą', 'cali', 'cała', 30 | 'cały', 'ci', 'cię', 'ciebie', 'co', 'cokolwiek', 'coś', 'czasami', 31 | 'czasem', 'czemu', 'czy', 'czyli', 'daleko', 'dla', 'dlaczego', 'dlatego', 32 | 'do', 'dobrze', 'dokąd', 'dość', 'dużo', 'dwa', 'dwaj', 'dwie', 'dwoje', 33 | 'dziś', 'dzisiaj', 'gdy', 'gdyby', 'gdyż', 'gdzie', 'gdziekolwiek', 34 | 'gdzieś', 'i', 'ich', 'ile', 'im', 'inna', 'inne', 'inny', 'innych', 'iż', 35 | 'ja', 'ją', 'jak', 'jakaś', 'jakby', 'jaki', 'jakichś', 'jakie', 'jakiś', 36 | 'jakiż', 'jakkolwiek', 'jako', 'jakoś', 'je', 'jeden', 'jedna', 'jedno', 37 | 'jednak', 'jednakże', 'jego', 'jej', 'jemu', 'jest', 'jestem', 'jeszcze', 38 | 'jeśli', 'jeżeli', 'już', 'ją', 'każdy', 'kiedy', 'kilka', 'kimś', 'kto', 39 | 'ktokolwiek', 'ktoś', 'która', 'które', 'którego', 'której', 'który', 40 | 'których', 'którym', 'którzy', 'ku', 'lat', 'lecz', 'lub', 'ma', 'mają', 41 | 'mało', 'mam', 'mi', 'mimo', 'między', 'mną', 'mnie', 'mogą', 'moi', 'moim', 42 | 'moja', 'moje', 'może', 'możliwe', 'można', 'mój', 'mu', 'musi', 'my', 'na', 43 | 'nad', 'nam', 'nami', 'nas', 'nasi', 'nasz', 'nasza', 'nasze', 'naszego', 44 | 'naszych', 'natomiast', 'natychmiast', 'nawet', 'nią', 'nic', 'nich', 'nie', 45 | 'niech', 'niego', 'niej', 'niemu', 'nigdy', 'nim', 'nimi', 'niż', 'no', 'o', 46 | 'obok', 'od', 'około', 'on', 'ona', 'one', 'oni', 'ono', 'oraz', 'oto', 47 | 'owszem', 'pan', 'pana', 'pani', 'po', 'pod', 'podczas', 'pomimo', 'ponad', 48 | 'ponieważ', 'powinien', 'powinna', 'powinni', 'powinno', 'poza', 'prawie', 49 | 'przecież', 'przed', 'przede', 'przedtem', 'przez', 'przy', 'roku', 50 | 'również', 'sam', 'sama', 'są', 'się', 'skąd', 'sobie', 'sobą', 'sposób', 51 | 'swoje', 'ta', 'tak', 'taka', 'taki', 'takie', 'także', 'tam', 'te', 'tego', 52 | 'tej', 'temu', 'ten', 'teraz', 'też', 'to', 'tobą', 'tobie', 'toteż', 53 | 'trzeba', 'tu', 'tutaj', 'twoi', 'twoim', 'twoja', 'twoje', 'twym', 'twój', 54 | 'ty', 'tych', 'tylko', 'tym', 'u', 'w', 'wam', 'wami', 'was', 'wasz', 'zaś', 55 | 'wasza', 'wasze', 'we', 'według', 'wiele', 'wielu', 'więc', 'więcej', 'tę', 56 | 'wszyscy', 'wszystkich', 'wszystkie', 'wszystkim', 'wszystko', 'wtedy', 57 | 'wy', 'właśnie', 'z', 'za', 'zapewne', 'zawsze', 'ze', 'zł', 'znowu', 58 | 'znów', 'został', 'żaden', 'żadna', 'żadne', 'żadnych', 'że', 'żeby', 59 | '$', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '_'] 60 | 61 | // tell the world about the noise words. 62 | exports.words = words 63 | -------------------------------------------------------------------------------- /lib/stopwords_pt.js: -------------------------------------------------------------------------------- 1 | var words = ["a","acerca","adeus","agora","ainda","algmas","algo","algumas","alguns","ali","além","ambos","ano","anos", 2 | "antes","ao","aos","apenas","apoio","apontar","após","aquela","aquelas","aquele","aqueles","aqui","aquilo","as","assim", 3 | "através","atrás","até","aí","baixo","bastante","bem","bom","breve","cada","caminho","catorze","cedo","cento","certamente", 4 | "certeza","cima","cinco","coisa","com","como","comprido","conhecido","conselho","contra","corrente","custa","cá","da", 5 | "daquela","daquele","dar","das","de","debaixo","demais","dentro","depois","desde","desligado","dessa","desse","desta", 6 | "deste","deve","devem","deverá","dez","dezanove","dezasseis","dezassete","dezoito","dia","diante","direita","diz","dizem", 7 | "dizer","do","dois","dos","doze","duas","dá","dão","dúvida","e","ela","elas","ele","eles","em","embora","enquanto","entre", 8 | "então","era","essa","essas","esse","esses","esta","estado","estar","estará","estas","estava","este","estes","esteve", 9 | "estive","estivemos","estiveram","estiveste","estivestes","estou","está","estás","estão","eu","exemplo","falta","fará", 10 | "favor","faz","fazeis","fazem","fazemos","fazer","fazes","fazia","faço","fez","fim","final","foi","fomos","for","fora", 11 | "foram","forma","foste","fostes","fui","geral","grande","grandes","grupo","hoje","horas","há","iniciar","inicio","ir", 12 | "irá","isso","ista","iste","isto","já","lado","ligado","local","logo","longe","lugar","lá","maior","maioria","maiorias", 13 | "mais","mal","mas","me","meio","menor","menos","meses","mesmo","meu","meus","mil","minha","minhas","momento","muito", 14 | "muitos","máximo","mês","na","nada","naquela","naquele","nas","nem","nenhuma","nessa","nesse","nesta","neste","no","noite", 15 | "nome","nos","nossa","nossas","nosso","nossos","nova","nove","novo","novos","num","numa","nunca","não","nível","nós", 16 | "número","o","obra","obrigada","obrigado","oitava","oitavo","oito","onde","ontem","onze","os","ou","outra","outras", 17 | "outro","outros","para","parece","parte","partir","pegar","pela","pelas","pelo","pelos","perto","pessoas","pode","podem", 18 | "poder","poderá","podia","ponto","pontos","por","porque","porquê","posição","possivelmente","posso","possível","pouca", 19 | "pouco","povo","primeira","primeiro","promeiro","próprio","próximo","puderam","pôde","põe","põem","qual","qualquer", 20 | "quando","quanto","quarta","quarto","quatro","que","quem","quer","quero","questão","quieto","quinta","quinto","quinze", 21 | "quê","relação","sabe","saber","se","segunda","segundo","sei","seis","sem","sempre","ser","seria","sete","seu","seus", 22 | "sexta","sexto","sim","sistema","sob","sobre","sois","somente","somos","sou","sua","suas","são","sétima","sétimo","tal", 23 | "talvez","também","tanto","tarde","te","tem","temos","tempo","tendes","tenho","tens","tentar","tentaram","tente","tentei", 24 | "ter","terceira","terceiro","teu","teus","teve","tipo","tive","tivemos","tiveram","tiveste","tivestes","toda","todas", 25 | "todo","todos","trabalhar","trabalho","treze","três","tu","tua","tuas","tudo","tão","têm","um","uma","umas","uns","usa", 26 | "usar","vai","vais","valor","veja","vem","vens","ver","verdade","verdadeiro","vez","vezes","viagem","vindo","vinte", 27 | "você","vocês","vos","vossa","vossas","vosso","vossos","vários","vão","vêm","vós","zero","à","às","área","é","és", 28 | "último"] 29 | 30 | exports.words = words 31 | -------------------------------------------------------------------------------- /lib/stopwords_ro.js: -------------------------------------------------------------------------------- 1 | var words = ["acea","aceasta","această","aceea","acei","aceia","acel","acela","acele","acelea","acest","acesta","aceste", 2 | "acestea","aceşti","aceştia","acolo","acord","acum","ai","aia","aibă","aici","al","ale","alea","altceva","altcineva", 3 | "am","ar","are","asemenea","asta","astea","astăzi","asupra","au","avea","avem","aveţi","azi","aş","aşadar","aţi","bine", 4 | "bucur","bună","ca","care","caut","ce","cel","ceva","chiar","cinci","cine","cineva","contra","cu","cum","cumva","curând", 5 | "curînd","când","cât","câte","câtva","câţi","cînd","cît","cîte","cîtva","cîţi","că","căci","cărei","căror","cărui","către", 6 | "da","dacă","dar","datorită","dată","dau","de","deci","deja","deoarece","departe","deşi","din","dinaintea","dintr-", 7 | "dintre","doi","doilea","două","drept","după","dă","ea","ei","el","ele","eram","este","eu","eşti","face","fata","fi", 8 | "fie","fiecare","fii","fim","fiu","fiţi","frumos","fără","graţie","halbă","iar","ieri","la","le","li","lor","lui","lângă", 9 | "lîngă","mai","mea","mei","mele","mereu","meu","mi","mie","mine","mult","multă","mulţi","mulţumesc","mâine","mîine","mă", 10 | "ne","nevoie","nici","nicăieri","nimeni","nimeri","nimic","nişte","noastre","noastră","noi","noroc","nostru","nouă", 11 | "noştri","nu","opt","ori","oricare","orice","oricine","oricum","oricând","oricât","oricînd","oricît","oriunde","patra", 12 | "patru","patrulea","pe","pentru","peste","pic","poate","pot","prea","prima","primul","prin","printr-","puţin","puţina", 13 | "puţină","până","pînă","rog","sa","sale","sau","se","spate","spre","sub","sunt","suntem","sunteţi","sută","sînt","sîntem", 14 | "sînteţi","să","săi","său","ta","tale","te","timp","tine","toate","toată","tot","totuşi","toţi","trei","treia","treilea", 15 | "tu","tăi","tău","un","una","unde","undeva","unei","uneia","unele","uneori","unii","unor","unora","unu","unui","unuia", 16 | "unul","vi","voastre","voastră","voi","vostru","vouă","voştri","vreme","vreo","vreun","vă","zece","zero","zi","zice","îi", 17 | "îl","îmi","împotriva","în","înainte","înaintea","încotro","încât","încît","între","întrucât","întrucît","îţi","ăla", 18 | "ălea","ăsta","ăstea","ăştia","şapte","şase","şi","ştiu","ţi","ţie"] 19 | 20 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_ru.js: -------------------------------------------------------------------------------- 1 | 2 | var words = ["а","алло","без","белый","близко","более","больше","большой","будем","будет","будете","будешь","будто", 3 | "буду","будут","будь","бы","бывает","бывь","был","была","были","было","быть","в","важная","важное","важные","важный", 4 | "вам","вами","вас","ваш","ваша","ваше","ваши","вверх","вдали","вдруг","ведь","везде","вернуться","весь","вечер","взгляд", 5 | "взять","вид","видеть","вместе","вниз","внизу","во","вода","война","вокруг","вон","вообще","вопрос","восемнадцатый", 6 | "восемнадцать","восемь","восьмой","вот","впрочем","времени","время","все","всегда","всего","всем","всеми","всему", 7 | "всех","всею","всю","всюду","вся","всё","второй","вы","выйти","г","где","главный","глаз","говорил","говорит","говорить", 8 | "год","года","году","голова","голос","город","да","давать","давно","даже","далекий","далеко","дальше","даром","дать", 9 | "два","двадцатый","двадцать","две","двенадцатый","двенадцать","дверь","двух","девятнадцатый","девятнадцать","девятый", 10 | "девять","действительно","дел","делать","дело","день","деньги","десятый","десять","для","до","довольно","долго","должно", 11 | "должный","дом","дорога","друг","другая","другие","других","друго","другое","другой","думать","душа","е","его","ее","ей", 12 | "ему","если","есть","еще","ещё","ею","её","ж","ждать","же","жена","женщина","жизнь","жить","за","занят","занята","занято", 13 | "заняты","затем","зато","зачем","здесь","земля","знать","значит","значить","и","идти","из","или","им","именно","иметь", 14 | "ими","имя","иногда","их","к","каждая","каждое","каждые","каждый","кажется","казаться","как","какая","какой","кем","книга", 15 | "когда","кого","ком","комната","кому","конец","конечно","которая","которого","которой","которые","который","которых", 16 | "кроме","кругом","кто","куда","лежать","лет","ли","лицо","лишь","лучше","любить","люди","м","маленький","мало","мать", 17 | "машина","между","меля","менее","меньше","меня","место","миллионов","мимо","минута","мир","мира","мне","много", 18 | "многочисленная","многочисленное","многочисленные","многочисленный","мной","мною","мог","могут","мож","может","можно", 19 | "можхо","мои","мой","мор","москва","мочь","моя","моё","мы","на","наверху","над","надо","назад","наиболее","найти", 20 | "наконец","нам","нами","народ","нас","начала","начать","наш","наша","наше","наши","не","него","недавно","недалеко","нее", 21 | "ней","некоторый","нельзя","нем","немного","нему","непрерывно","нередко","несколько","нет","нею","неё","ни","нибудь", 22 | "ниже","низко","никакой","никогда","никто","никуда","ними","них","ничего","ничто","но","новый","нога","ночь","ну","нужно", 23 | "нужный","нх","о","об","оба","обычно","один","одиннадцатый","одиннадцать","однажды","однако","одного","одной","оказаться", 24 | "окно","около","он","она","они","оно","опять","особенно","остаться","от","ответить","отец","отовсюду","отсюда","очень", 25 | "первый","перед","писать","плечо","по","под","подумать","пожалуйста","позже","пойти","пока","пол","получить","помнить", 26 | "понимать","понять","пор","пора","после","последний","посмотреть","посреди","потом","потому","почему","почти","правда", 27 | "прекрасно","при","про","просто","против","процентов","пятнадцатый","пятнадцать","пятый","пять","работа","работать","раз", 28 | "разве","рано","раньше","ребенок","решить","россия","рука","русский","ряд","рядом","с","сам","сама","сами","самим","самими", 29 | "самих","само","самого","самой","самом","самому","саму","самый","свет","свое","своего","своей","свои","своих","свой","свою", 30 | "сделать","сеаой","себе","себя","сегодня","седьмой","сейчас","семнадцатый","семнадцать","семь","сидеть","сила","сих","сказал", 31 | "сказала","сказать","сколько","слишком","слово","случай","смотреть","сначала","снова","со","собой","собою","советский", 32 | "совсем","спасибо","спросить","сразу","стал","старый","стать","стол","сторона","стоять","страна","суть","считать","т","та", 33 | "так","такая","также","таки","такие","такое","такой","там","твой","твоя","твоё","те","тебе","тебя","тем","теми","теперь", 34 | "тех","то","тобой","тобою","товарищ","тогда","того","тоже","только","том","тому","тот","тою","третий","три","тринадцатый", 35 | "тринадцать","ту","туда","тут","ты","тысяч","у","увидеть","уж","уже","улица","уметь","утро","хороший","хорошо","хотеть", 36 | "хоть","хотя","хочешь","час","часто","часть","чаще","чего","человек","чем","чему","через","четвертый","четыре", 37 | "четырнадцатый","четырнадцать","что","чтоб","чтобы","чуть","шестнадцатый","шестнадцать","шестой","шесть","эта","эти", 38 | "этим","этими","этих","это","этого","этой","этом","этому","этот","эту","я"] 39 | 40 | exports.words = words 41 | -------------------------------------------------------------------------------- /lib/stopwords_sk.js: -------------------------------------------------------------------------------- 1 | var words = ["a","aby","aj","ako","aký","ale","alebo","ani","avšak","ba","bez","buï","cez","do","ho","hoci","i","ich", 2 | "im","ja","jeho","jej","jemu","ju","k","kam","kde","kedže","keï","kto","ktorý","ku","lebo","ma","mi","mne","mnou","mu", 3 | "my","mòa","môj","na","nad","nami","neho","nej","nemu","nich","nielen","nim","no","nám","nás","náš","ním","o","od","on", 4 | "ona","oni","ono","ony","po","pod","pre","pred","pri","s","sa","seba","sem","so","svoj","taký","tam","teba","tebe","tebou", 5 | "tej","ten","ti","tie","to","toho","tomu","tou","tvoj","ty","tá","tým","v","vami","veï","vo","vy","vám","vás","váš","však", 6 | "z","za","zo","a","èi","èo","èí","òom","òou","òu","že"] 7 | 8 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_sl.js: -------------------------------------------------------------------------------- 1 | var words = ["a","ali","april","avgust","b","bi","bil","bila","bile","bili","bilo","biti","blizu","bo","bodo","bojo","bolj", 2 | "bom","bomo","boste","bova","boš","brez","c","cel","cela","celi","celo","d","da","daleč","dan","danes","datum","december", 3 | "deset","deseta","deseti","deseto","devet","deveta","deveti","deveto","do","dober","dobra","dobri","dobro","dokler","dol", 4 | "dolg","dolga","dolgi","dovolj","drug","druga","drugi","drugo","dva","dve","e","eden","en","ena","ene","eni","enkrat", 5 | "eno","etc.","f","februar","g","g.","ga","ga.","gor","gospa","gospod","h","halo","i","idr.","ii","iii","in","iv","ix","iz", 6 | "j","januar","jaz","je","ji","jih","jim","jo","julij","junij","jutri","k","kadarkoli","kaj","kajti","kako","kakor","kamor", 7 | "kamorkoli","kar","karkoli","katerikoli","kdaj","kdo","kdorkoli","ker","ki","kje","kjer","kjerkoli","ko","koder", 8 | "koderkoli","koga","komu","kot","kratek","kratka","kratke","kratki","l","lahka","lahke","lahki","lahko","le","lep","lepa", 9 | "lepe","lepi","lepo","leto","m","maj","majhen","majhna","majhni","malce","malo","manj","marec","me","med","medtem","mene", 10 | "mesec","mi","midva","midve","mnogo","moj","moja","moje","mora","morajo","moram","moramo","morate","moraš","morem","mu", 11 | "n","na","nad","naj","najina","najino","najmanj","naju","največ","nam","narobe","nas","nato","nazaj","naš","naša","naše", 12 | "ne","nedavno","nedelja","nek","neka","nekaj","nekatere","nekateri","nekatero","nekdo","neke","nekega","neki","nekje", 13 | "neko","nekoga","nekoč","ni","nikamor","nikdar","nikjer","nikoli","nič","nje","njega","njegov","njegova","njegovo","njej", 14 | "njemu","njen","njena","njeno","nji","njih","njihov","njihova","njihovo","njiju","njim","njo","njun","njuna","njuno","no", 15 | "nocoj","november","npr.","o","ob","oba","obe","oboje","od","odprt","odprta","odprti","okoli","oktober","on","onadva","one", 16 | "oni","onidve","osem","osma","osmi","osmo","oz.","p","pa","pet","peta","petek","peti","peto","po","pod","pogosto","poleg", 17 | "poln","polna","polni","polno","ponavadi","ponedeljek","ponovno","potem","povsod","pozdravljen","pozdravljeni","prav", 18 | "prava","prave","pravi","pravo","prazen","prazna","prazno","prbl.","precej","pred","prej","preko","pri","pribl.", 19 | "približno","primer","pripravljen","pripravljena","pripravljeni","proti","prva","prvi","prvo","r","ravno","redko","res", 20 | "reč","s","saj","sam","sama","same","sami","samo","se","sebe","sebi","sedaj","sedem","sedma","sedmi","sedmo","sem", 21 | "september","seveda","si","sicer","skoraj","skozi","slab","smo","so","sobota","spet","sreda","srednja","srednji","sta", 22 | "ste","stran","stvar","sva","t","ta","tak","taka","take","taki","tako","takoj","tam","te","tebe","tebi","tega","težak", 23 | "težka","težki","težko","ti","tista","tiste","tisti","tisto","tj.","tja","to","toda","torek","tretja","tretje","tretji", 24 | "tri","tu","tudi","tukaj","tvoj","tvoja","tvoje","u","v","vaju","vam","vas","vaš","vaša","vaše","ve","vedno","velik", 25 | "velika","veliki","veliko","vendar","ves","več","vi","vidva","vii","viii","visok","visoka","visoke","visoki","vsa","vsaj", 26 | "vsak","vsaka","vsakdo","vsake","vsaki","vsakomur","vse","vsega","vsi","vso","včasih","včeraj","x","z","za","zadaj", 27 | "zadnji","zakaj","zaprta","zaprti","zaprto","zdaj","zelo","zunaj","č","če","često","četrta","četrtek","četrti","četrto", 28 | "čez","čigav","š","šest","šesta","šesti","šesto","štiri","ž","že"] 29 | 30 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_so.js: -------------------------------------------------------------------------------- 1 | var words = ["aad","albaabkii","atabo","ay","ayaa","ayee","ayuu","dhan","hadana","in","inuu","isku","jiray","jirtay","ka", 2 | "kale","kasoo","ku","kuu","lakin","markii","oo","si","soo","uga","ugu","uu","waa","waxa","waxuu"] 3 | 4 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_st.js: -------------------------------------------------------------------------------- 1 | var words = ["a","ba","bane","bona","e","ea","eaba","empa","ena","ha","hae","hape","ho","hore","ka","ke","la","le","li", 2 | "me","mo","moo","ne","o","oa","re","sa","se","tloha","tsa","tse"] 3 | 4 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_sv.js: -------------------------------------------------------------------------------- 1 | var words = ["aderton","adertonde","adjö","aldrig","alla","allas","allt","alltid","alltså","andra","andras","annan", 2 | "annat","artonde","artonn","att","av","bakom","bara","behöva","behövas","behövde","behövt","beslut","beslutat","beslutit", 3 | "bland","blev","bli","blir","blivit","bort","borta","bra","bäst","bättre","båda","bådas","dag","dagar","dagarna","dagen", 4 | "de","del","delen","dem","den","denna","deras","dess","dessa","det","detta","dig","din","dina","dit","ditt","dock","du", 5 | "där","därför","då","efter","eftersom","ej","elfte","eller","elva","en","enkel","enkelt","enkla","enligt","er","era","ert", 6 | "ett","ettusen","fanns","fem","femte","femtio","femtionde","femton","femtonde","fick","fin","finnas","finns","fjorton", 7 | "fjortonde","fjärde","fler","flera","flesta","fram","framför","från","fyra","fyrtio","fyrtionde","få","får","fått", 8 | "följande","för","före","förlåt","förra","första","genast","genom","gick","gjorde","gjort","god","goda","godare", 9 | "godast","gott","gälla","gäller","gällt","gärna","gå","går","gått","gör","göra","ha","hade","haft","han","hans","har", 10 | "heller","hellre","helst","helt","henne","hennes","hit","hon","honom","hundra","hundraen","hundraett","hur","här", 11 | "hög","höger","högre","högst","i","ibland","icke","idag","igen","igår","imorgon","in","inför","inga","ingen","ingenting", 12 | "inget","innan","inne","inom","inte","inuti","ja","jag","ju","jämfört","kan","kanske","knappast","kom","komma","kommer", 13 | "kommit","kr","kunde","kunna","kunnat","kvar","legat","ligga","ligger","lika","likställd","likställda","lilla","lite", 14 | "liten","litet","länge","längre","längst","lätt","lättare","lättast","långsam","långsammare","långsammast","långsamt", 15 | "långt","man","med","mellan","men","mer","mera","mest","mig","min","mina","mindre","minst","mitt","mittemot","mot", 16 | "mycket","många","måste","möjlig","möjligen","möjligt","möjligtvis","ned","nederst","nedersta","nedre","nej","ner","ni", 17 | "nio","nionde","nittio","nittionde","nitton","nittonde","nog","noll","nr","nu","nummer","när","nästa","någon","någonting", 18 | "något","några","nödvändig","nödvändiga","nödvändigt","nödvändigtvis","och","också","ofta","oftast","olika","olikt","om", 19 | "oss","på","rakt","redan","rätt","sade","sagt","samma","sedan","senare","senast","sent","sex","sextio","sextionde","sexton", 20 | "sextonde","sig","sin","sina","sist","sista","siste","sitt","sitta","sju","sjunde","sjuttio","sjuttionde","sjutton", 21 | "sjuttonde","själv","sjätte","ska","skall","skulle","slutligen","små","smått","snart","som","stor","stora","stort", 22 | "större","störst","säga","säger","sämre","sämst","så","sådan","sådana","sådant","tack","tidig","tidigare","tidigast", 23 | "tidigt","till","tills","tillsammans","tio","tionde","tjugo","tjugoen","tjugoett","tjugonde","tjugotre","tjugotvå", 24 | "tjungo","tolfte","tolv","tre","tredje","trettio","trettionde","tretton","trettonde","två","tvåhundra","under","upp", 25 | "ur","ursäkt","ut","utan","utanför","ute","vad","var","vara","varför","varifrån","varit","varje","varken","vars", 26 | "varsågod","vart","vem","vems","verkligen","vi","vid","vidare","viktig","viktigare","viktigast","viktigt","vilka", 27 | "vilkas","vilken","vilket","vill","vänster","vänstra","värre","vår","våra","vårt","än","ännu","är","även","åt", 28 | "åtminstone","åtta","åttio","åttionde","åttonde","över","övermorgon","överst","övre"] 29 | 30 | exports.words = words 31 | -------------------------------------------------------------------------------- /lib/stopwords_sw.js: -------------------------------------------------------------------------------- 1 | var words = ["akasema","alikuwa","alisema","baada","basi","bila","cha","chini","hadi","hapo","hata","hivyo","hiyo", 2 | "huku","huo","ili","ilikuwa","juu","kama","karibu","katika","kila","kima","kisha","kubwa","kutoka","kuwa","kwa","kwamba", 3 | "kwenda","kwenye","la","lakini","mara","mdogo","mimi","mkubwa","mmoja","moja","muda","mwenye","na","naye","ndani","ng", 4 | "ni","nini","nonkungu","pamoja","pia","sana","sasa","sauti","tafadhali","tena","tu","vile","wa","wakati","wake","walikuwa", 5 | "wao","watu","wengine","wote","ya","yake","yangu","yao","yeye","yule","za","zaidi","zake"] 6 | 7 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_th.js: -------------------------------------------------------------------------------- 1 | var words = ["กล่าว","กว่า","กัน","กับ","การ","ก็","ก่อน","ขณะ","ขอ","ของ","ขึ้น","คง","ครั้ง","ความ","คือ","จะ","จัด","จาก","จึง", 2 | "ช่วง","ซึ่ง","ดัง","ด้วย","ด้าน","ตั้ง","ตั้งแต่","ตาม","ต่อ","ต่าง","ต่างๆ","ต้อง","ถึง","ถูก","ถ้า","ทั้ง","ทั้งนี้","ทาง","ที่","ที่สุด","ทุก","ทํา", 3 | "ทําให้","นอกจาก","นัก","นั้น","นี้","น่า","นํา","บาง","ผล","ผ่าน","พบ","พร้อม","มา","มาก","มี","ยัง","รวม","ระหว่าง","รับ","ราย","ร่วม", 4 | "ลง","วัน","ว่า","สุด","ส่ง","ส่วน","สําหรับ","หนึ่ง","หรือ","หลัง","หลังจาก","หลาย","หาก","อยาก","อยู่","อย่าง","ออก","อะไร","อาจ","อีก", 5 | "เขา","เข้า","เคย","เฉพาะ","เช่น","เดียว","เดียวกัน","เนื่องจาก","เปิด","เปิดเผย","เป็น","เป็นการ","เพราะ","เพื่อ","เมื่อ","เรา","เริ่ม", 6 | "เลย","เห็น","เอง","แต่","แบบ","แรก","และ","แล้ว","แห่ง","โดย","ใน","ให้","ได้","ไป","ไม่","ไว้"] 7 | 8 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_tr.js: -------------------------------------------------------------------------------- 1 | var words = ["acaba","acep","adeta","altmýþ","altmış","altý","altı","ama","ancak","arada","artýk","aslında","aynen", 2 | "ayrıca","az","bana","bari","bazen","bazý","bazı","baţka","belki","ben","benden","beni","benim","beri","beþ","beş", 3 | "beţ","bile","bin","bir","biraz","biri","birkaç","birkez","birçok","birþey","birþeyi","birşey","birşeyi","birţey", 4 | "biz","bizden","bize","bizi","bizim","bu","buna","bunda","bundan","bunlar","bunları","bunların","bunu","bunun", 5 | "burada","böyle","böylece","bütün","da","daha","dahi","dahil","daima","dair","dayanarak","de","defa","deđil","değil", 6 | "diye","diđer","diğer","doksan","dokuz","dolayı","dolayısıyla","dört","edecek","eden","ederek","edilecek","ediliyor", 7 | "edilmesi","ediyor","elli","en","etmesi","etti","ettiği","ettiğini","eđer","eğer","fakat","gibi","göre","halbuki", 8 | "halen","hangi","hani","hariç","hatta","hele","hem","henüz","hep","hepsi","her","herhangi","herkes","herkesin","hiç", 9 | "hiçbir","iken","iki","ila","ile","ilgili","ilk","illa","ise","itibaren","itibariyle","iyi","iyice","için","işte","iţte", 10 | "kadar","kanýmca","karşın","katrilyon","kendi","kendilerine","kendini","kendisi","kendisine","kendisini","kere","kez", 11 | "keţke","ki","kim","kimden","kime","kimi","kimse","kýrk","kýsaca","kırk","lakin","madem","međer","milyar","milyon", 12 | "mu","mü","mý","mı","nasýl","nasıl","ne","neden","nedenle","nerde","nere","nerede","nereye","nitekim","niye","niçin", 13 | "o","olan","olarak","oldu","olduklarını","olduğu","olduğunu","olmadı","olmadığı","olmak","olması","olmayan","olmaz", 14 | "olsa","olsun","olup","olur","olursa","oluyor","on","ona","ondan","onlar","onlardan","onlari","onlarýn","onları","onların", 15 | "onu","onun","otuz","oysa","pek","rağmen","sadece","sanki","sekiz","seksen","sen","senden","seni","senin","siz","sizden", 16 | "sizi","sizin","sonra","tarafından","trilyon","tüm","var","vardı","ve","veya","veyahut","ya","yahut","yani","yapacak", 17 | "yapmak","yaptı","yaptıkları","yaptığı","yaptığını","yapılan","yapılması","yapıyor","yedi","yerine","yetmiþ","yetmiş", 18 | "yetmiţ","yine","yirmi","yoksa","yüz","zaten","çok","çünkü","öyle","üzere","üç","þey","þeyden","þeyi","þeyler","þu", 19 | "þuna","þunda","þundan","þunu","şey","şeyden","şeyi","şeyler","şu","şuna","şunda","şundan","şunları","şunu","şöyle", 20 | "ţayet","ţimdi","ţu","ţöyle"] 21 | 22 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_yo.js: -------------------------------------------------------------------------------- 1 | var words = ["a","an","bá","bí","bẹ̀rẹ̀","fún","fẹ́","gbogbo","inú","jù","jẹ","jẹ́","kan","kì","kí","kò","láti","lè","lọ","mi", 2 | "mo","máa","mọ̀","ni","náà","ní","nígbà","nítorí","nǹkan","o","padà","pé","púpọ̀","pẹ̀lú","rẹ̀","sì","sí","sínú","ṣ","ti","tí", 3 | "wà","wá","wọn","wọ́n","yìí","àti","àwọn","é","í","òun","ó","ń","ńlá","ṣe","ṣé","ṣùgbọ́n","ẹmọ́","ọjọ́","ọ̀pọ̀lọpọ̀"] 4 | 5 | exports.words = words -------------------------------------------------------------------------------- /lib/stopwords_zh.js: -------------------------------------------------------------------------------- 1 | 2 | var words = ["一切","一则","一方面","一旦","一来","一样","一般","七","万一", 3 | "三","上下","不仅","不但","不光","不单","不只","不如","不怕","不惟","不成","不拘","不比","不然","不特","不独","不管", 4 | "不论","不过","不问","与","与其","与否","与此同时","且","两者","个","临","为","为了","为什么","为何","为着","乃","乃至", 5 | "么","之","之一","之所以","之类","乌乎","乎","乘","九","也","也好","也罢","了","二","于","于是","于是乎","云云","五","人家", 6 | "什么","什么样","从","从而","他","他人","他们","以","以便","以免","以及","以至","以至于","以致","们","任","任何","任凭","似的", 7 | "但","但是","何","何况","何处","何时","作为","你","你们","使得","例如","依","依照","俺","俺们","倘","倘使","倘或","倘然","倘若", 8 | "借","假使","假如","假若","像","八","六","兮","关于","其","其一","其中","其二","其他","其余","其它","其次","具体地说","具体说来", 9 | "再者","再说","冒","冲","况且","几","几时","凭","凭借","则","别","别的","别说","到","前后","前者","加之","即","即令","即使","即便", 10 | "即或","即若","又","及","及其","及至","反之","反过来","反过来说","另","另一方面","另外","只是","只有","只要","只限","叫","叮咚","可", 11 | "可以","可是","可见","各","各个","各位","各种","各自","同","同时","向","向着","吓","吗","否则","吧","吧哒","吱","呀","呃","呕","呗", 12 | "呜","呜呼","呢","呵","呸","呼哧","咋","和","咚","咦","咱","咱们","咳","哇","哈","哈哈","哉","哎","哎呀","哎哟","哗","哟","哦","哩", 13 | "哪","哪个","哪些","哪儿","哪天","哪年","哪怕","哪样","哪边","哪里","哼","哼唷","唉","啊","啐","啥","啦","啪达","喂","喏","喔唷", 14 | "嗡嗡","嗬","嗯","嗳","嘎","嘎登","嘘","嘛","嘻","嘿","四","因","因为","因此","因而","固然","在","在下","地","多","多少","她","她们", 15 | "如","如上所述","如何","如其","如果","如此","如若","宁","宁可","宁愿","宁肯","它","它们","对","对于","将","尔后","尚且","就","就是", 16 | "就是说","尽","尽管","岂但","己","并","并且","开外","开始","归","当","当着","彼","彼此","往","待","得","怎","怎么","怎么办","怎么样", 17 | "怎样","总之","总的来看","总的来说","总的说来","总而言之","恰恰相反","您","慢说","我","我们","或","或是","或者","所","所以","打","把", 18 | "抑或","拿","按","按照","换句话说","换言之","据","接着","故","故此","旁人","无宁","无论","既","既是","既然","时候","是","是的","替", 19 | "有","有些","有关","有的","望","朝","朝着","本","本着","来","来着","极了","果然","果真","某","某个","某些","根据","正如","此","此外", 20 | "此间","毋宁","每","每当","比","比如","比方","沿","沿着","漫说","焉","然则","然后","然而","照","照着","甚么","甚而","甚至","用","由", 21 | "由于","由此可见","的","的话","相对而言","省得","着","着呢","矣","离","第","等","等等","管","紧接着","纵","纵令","纵使","纵然","经", 22 | "经过","结果","给","继而","综上所述","罢了","者","而","而且","而况","而外","而已","而是","而言","能","腾","自","自个儿","自从","自各儿", 23 | "自家","自己","自身","至","至于","若","若是","若非","莫若","虽","虽则","虽然","虽说","被","要","要不","要不是","要不然","要么","要是", 24 | "让","论","设使","设若","该","诸位","谁","谁知","赶","起","起见","趁","趁着","越是","跟","较","较之","边","过","还是","还有","这", 25 | "这个","这么","这么些","这么样","这么点儿","这些","这会儿","这儿","这就是说","这时","这样","这边","这里","进而","连","连同","通过", 26 | "遵照","那","那个","那么","那么些","那么样","那些","那会儿","那儿","那时","那样","那边","那里","鄙人","鉴于","阿","除","除了","除此之外", 27 | "除非","随","随着","零","非但","非徒","靠","顺","顺着","首先"] 28 | 29 | 30 | exports.words = words 31 | -------------------------------------------------------------------------------- /lib/stopwords_zu.js: -------------------------------------------------------------------------------- 1 | var words = ["futhi","kahle","kakhulu","kanye","khona","kodwa","kungani","kusho","la","lakhe","lapho","mina","ngesikhathi", 2 | "nje","phansi","phezulu","u","ukuba","ukuthi","ukuze","uma","wahamba","wakhe","wami","wase","wathi","yakhe","zakhe","zonke"] 3 | 4 | exports.words = words -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "remove-stopwords", 3 | "version": "1.0.3", 4 | "description": "A package to remove common stopwords from an array, it covers most languages and is optimized primarily for WorldBrain", 5 | "main": "lib/stopword.js", 6 | "directories": { 7 | "lib": "lib" 8 | }, 9 | "scripts": { 10 | "test": "echo \"Error: no test specified\" && exit 1" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/WorldBrain/remove-stopwords.git" 15 | }, 16 | "keywords": [ 17 | "stopwords", 18 | "nlp", 19 | "remove" 20 | ], 21 | "author": "Yager Anderson ", 22 | "license": "MIT", 23 | "bugs": { 24 | "url": "https://github.com/WorldBrain/remove-stopwords/issues" 25 | }, 26 | "homepage": "https://github.com/WorldBrain/remove-stopwords#readme" 27 | } 28 | --------------------------------------------------------------------------------