├── .public ├── architecture.png ├── image.png ├── screenshot1.png ├── screenshot2.png ├── screenshot3.png └── screenshot4.png ├── LICENSE ├── README.md ├── backend ├── app.py ├── bm25_api.py ├── chroma_doc_manager.py ├── file_monitor.py ├── language_presets.json ├── llama_types.py ├── llm_utils.py ├── memory_server.py ├── nltk_data │ ├── corpora │ │ └── stopwords │ │ │ ├── README │ │ │ ├── arabic │ │ │ ├── azerbaijani │ │ │ ├── basque │ │ │ ├── bengali │ │ │ ├── catalan │ │ │ ├── chinese │ │ │ ├── danish │ │ │ ├── dutch │ │ │ ├── english │ │ │ ├── finnish │ │ │ ├── french │ │ │ ├── german │ │ │ ├── greek │ │ │ ├── hebrew │ │ │ ├── hinglish │ │ │ ├── hungarian │ │ │ ├── indonesian │ │ │ ├── italian │ │ │ ├── kazakh │ │ │ ├── nepali │ │ │ ├── norwegian │ │ │ ├── portuguese │ │ │ ├── romanian │ │ │ ├── russian │ │ │ ├── slovene │ │ │ ├── spanish │ │ │ ├── swedish │ │ │ ├── tajik │ │ │ └── turkish │ └── tokenizers │ │ └── punkt │ │ ├── .DS_Store │ │ ├── PY3 │ │ ├── README │ │ ├── czech.pickle │ │ ├── danish.pickle │ │ ├── dutch.pickle │ │ ├── english.pickle │ │ ├── estonian.pickle │ │ ├── finnish.pickle │ │ ├── french.pickle │ │ ├── german.pickle │ │ ├── greek.pickle │ │ ├── italian.pickle │ │ ├── malayalam.pickle │ │ ├── norwegian.pickle │ │ ├── polish.pickle │ │ ├── portuguese.pickle │ │ ├── russian.pickle │ │ ├── slovene.pickle │ │ ├── spanish.pickle │ │ ├── swedish.pickle │ │ └── turkish.pickle │ │ ├── README │ │ ├── czech.pickle │ │ ├── danish.pickle │ │ ├── dutch.pickle │ │ ├── english.pickle │ │ ├── estonian.pickle │ │ ├── finnish.pickle │ │ ├── french.pickle │ │ ├── german.pickle │ │ ├── greek.pickle │ │ ├── italian.pickle │ │ ├── malayalam.pickle │ │ ├── norwegian.pickle │ │ ├── polish.pickle │ │ ├── portuguese.pickle │ │ ├── russian.pickle │ │ ├── slovene.pickle │ │ ├── spanish.pickle │ │ ├── swedish.pickle │ │ └── turkish.pickle ├── requirements.txt ├── retrivial_ranking.py └── settings.py ├── external_example └── embedding_server.py ├── frontend ├── components │ ├── Chat │ │ ├── Chat.tsx │ │ ├── ChatInput.tsx │ │ ├── ChatLoader.tsx │ │ ├── ChatMessage.tsx │ │ └── run.log │ └── Layout │ │ ├── Footer.tsx │ │ └── Navbar.tsx ├── next-env.d.ts ├── next.config.js ├── package-lock.json ├── package.json ├── pages │ ├── _app.tsx │ ├── _document.tsx │ ├── api │ │ ├── chat.ts │ │ ├── login.ts │ │ └── verify.ts │ ├── index.tsx │ └── login.tsx ├── postcss.config.js ├── public │ └── favicon.ico ├── styles │ └── globals.css ├── tailwind.config.js ├── tsconfig.json ├── types │ └── index.ts ├── users.json └── utils │ └── index.ts └── md_website ├── Chat History.md ├── Notes.md ├── SETTINGS.md ├── chat_history └── .gitkeep ├── index.md ├── notes └── .gitkeep └── template └── link.md /.public/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/architecture.png -------------------------------------------------------------------------------- /.public/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/image.png -------------------------------------------------------------------------------- /.public/screenshot1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot1.png -------------------------------------------------------------------------------- /.public/screenshot2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot2.png -------------------------------------------------------------------------------- /.public/screenshot3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot3.png -------------------------------------------------------------------------------- /.public/screenshot4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot4.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yipeng Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🐘 Loyal Elephie: Your Trusty Memory-enabled AI Companion 🧠 2 |

3 | Your image description 4 |

5 | 🚀 Embark on an exciting adventure with Loyal Elephie, your faithful AI sidekick! This project combines the power of a neat Next.js web UI and a mighty Python backend, leveraging the latest advancements in Large Language Models (LLMs) and Retrieval Augmented Generation (RAG) to deliver a seamless and meaningful chatting experience! 🎉 6 | 7 | ## Features 8 | 9 | 1. **🎛️ Controllable Memory:** Take control of Loyal Elephie's memory! You decide which moments to save, and you can easily edit the context as needed. It is your second-brain for episodic memory. ✍️ 10 | 11 | 2. **🔍 Hybrid Search:** Experience the powerful combination of ChromaDB and BM25 for efficient searches! It's also optimized for handling date-relevant queries. 📅 12 | 13 | 3. **🔒 Secure Web Access:** With a built-in login feature, only authorized users can access your AI companion, ensuring your conversations remain private and secure over the internet. 🛡️ 14 | 15 | 4. **🤖 Streamlined LLM Agent:** Loyal Elephie uses XML syntax with no function-calling required. It is also optimized for less token usage and works smoothly with great local LLMs using Llama.cpp or ExllamaV2. 💬 16 | 17 | 5. **📝 (Optional) Markdown Editor Integration:** Connect with online Markdown editors to view the original referred document during chats and experience real-time LLM knowledge integration after editing your notes online. 🌐 18 | 19 | Loyal Elephie supports both open and proprietary LLMs and embeddings serving as OpenAI compatible APIs. 20 | 21 | ![](.public/architecture.png) 22 | 23 | Warning: This project was originally designed for **Linux** and compatibility with Windows or macOS has not been fully tested. If you are using Windows, I strongly recommend you to run this project in **WSL**. 24 | 25 | 26 | ## Screenshots 27 | *Meta-Llama-3-70B-Instruct.Q4_K_S.gguf was used when capturing the below screenshots* 28 | 29 | ![](.public/screenshot1.png) 30 | 31 | ![](.public/screenshot2.png) 32 | 33 | 34 | ![](.public/screenshot4.png) 35 | With [SilverBulletMd](https://github.com/silverbulletmd/silverbullet), you can edit a note on the browser and then let Loyal Elephie rememeber it! 36 | 37 | ![](.public/screenshot3.png) 38 | 39 | The UI is modified from https://github.com/mckaywrigley/chatbot-ui-lite. Credits to the author Mckay Wrigley! 40 | 41 | ## Deployment 42 | 43 | **1. Clone Repo** 44 | 45 | ```bash 46 | git clone https://github.com/v2rockets/Loyal-Elephie.git 47 | ``` 48 | 49 | **2. Install Frontend Requirments** 50 | 51 | ```bash 52 | cd frontend 53 | npm i 54 | ``` 55 | 56 | **3. Configure Login Users** 57 | 58 | frontend/users.json 59 | ```json 60 | [{ 61 | "username":"admin", 62 | "password":"admin" 63 | }] 64 | ``` 65 | 66 | **4. Install Backend Requirements** 67 | 68 | ```bash 69 | cd backend 70 | pip install -r requirements.txt 71 | ``` 72 | 73 | **5. Configure Backend Settings** 74 | 75 | ```python 76 | # backend/settings.py 77 | NICK_NAME = 'Peter' # This is your nick name. Make sure to set it at the beginning and don't change so that LLM will not get confused. 78 | 79 | CHAT_BASE_URL = 'https://api.openai.com/v1' # Modify to your OpenAI compatible API url 80 | CHAT_API_KEY = 'your-api-key' 81 | CHAT_MODEL_NAME = "gpt-3.5-turbo" 82 | 83 | # Language Preference (experimental) 84 | # Supported Languages: English, Chinese, German, French, Spanish, Portuguese, Italian, Dutch, Czech, Polish, Russian, Arabic 85 | LANGUAGE_PREFERENCE = "English" 86 | ``` 87 | 88 | **6. Run App** 89 | 90 | frontend: 91 | ```bash 92 | cd frontend 93 | npm run build 94 | npm run start 95 | ``` 96 | backend: 97 | ```bash 98 | cd backend 99 | python app.py 100 | ``` 101 | 102 | # Usage Tips 103 | * By default, visit Loyal Elephie from http://localhost:8080 104 | * use "Save" button to save the current conversation into Loyal Elephie's memory 105 | * use "Reset" button to clear the current conversation (not affecting saving status, the same as refreshing page) 106 | * click on the titles in "Reference" to navigate to the corresponding Markdown notes (but SilverBulletMd or another web Markdown editor has to be hosted and configured) 107 | 108 | Some of the workable local LLMs tested: 109 | * OpenHermes-2.5-Mistral-7B 110 | * Mixtral-8x7B-Instruct-v0.1 111 | * c4ai-command-r-v01 112 | * Meta-Llama-3-70B-Instruct (Best so far) 113 | * Qwen2-72b-instruct (Best for non-English languages) 114 | 115 | For those who need hand-on local embedding API, an embedding server example is added to "external_example". You will need to install "sentence_transformers" to run it. After deployment, modify "settings.py" to finish configuration: 116 | 117 | ```python 118 | EMBEDDING_BASE_URL = 'http://localhost:8001/v1' # local embedding deployment URL 119 | ``` 120 | -------------------------------------------------------------------------------- /backend/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uvicorn 3 | from uvicorn.config import LOGGING_CONFIG 4 | 5 | from file_monitor import WatchdogThread, UpdateThread 6 | from memory_server import app, server_state 7 | from settings import * 8 | 9 | if __name__ == "__main__": 10 | 11 | watchdog_thread = WatchdogThread(CHAT_PATH, NOTE_PATH) 12 | watchdog_thread.start() 13 | update_thread = UpdateThread(server_state) 14 | update_thread.start() 15 | 16 | LOGGING_CONFIG["formatters"]["access"]["fmt"] = "%(asctime)s %(levelprefix)s %(message)s" 17 | LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M:%S" 18 | uvicorn.run(app, host=os.getenv("HOST", "localhost"), port=os.getenv("PORT", 5000)) -------------------------------------------------------------------------------- /backend/bm25_api.py: -------------------------------------------------------------------------------- 1 | import os 2 | import statistics 3 | 4 | import nltk 5 | nltk.data.path.append("nltk_data") 6 | from nltk.corpus import stopwords 7 | from nltk.tokenize import word_tokenize 8 | # from nltk.stem import PorterStemmer 9 | from rank_bm25 import BM25Okapi 10 | from settings import LANGUAGE_PREFERENCE 11 | from threading import Lock 12 | 13 | lock = Lock() 14 | 15 | corpus_index = None 16 | # Initialize BM25 17 | bm25 = None 18 | 19 | # Initialize stemmer and stopwords 20 | # stemmer = PorterStemmer() 21 | stop_words = set(stopwords.words(LANGUAGE_PREFERENCE.lower())) 22 | 23 | if LANGUAGE_PREFERENCE == 'Chinese': 24 | import jieba 25 | 26 | # Pre-processing function 27 | def preprocess(text): 28 | if LANGUAGE_PREFERENCE == 'Chinese': 29 | tokens = jieba.lcut(text.lower()) 30 | else: 31 | tokens = word_tokenize(text.lower(), language=LANGUAGE_PREFERENCE.lower()) 32 | # tokens = [stemmer.stem(token) for token in tokens if token not in stop_words and token.isalpha()] 33 | tokens = [token for token in tokens if token not in stop_words and token.isalpha()] 34 | return tokens 35 | 36 | def update_corpus(): 37 | global corpus_index 38 | global bm25 39 | with lock: 40 | corpus = [] 41 | corpus_index = {} 42 | dir = "digests" 43 | files = os.listdir(dir) 44 | # doc_manager.client.delete_collection('digests') 45 | i = 0 46 | for file in files: 47 | if not file.startswith("Conversation") and not file.startswith("Note"): 48 | continue 49 | title = file.replace(';', ':') # revert conversion for Windows file name rules 50 | digest = None 51 | with open(os.path.join(dir, file), encoding='utf-8') as f: 52 | digest = f.read() 53 | corpus.append(digest) 54 | corpus_index[title] = i 55 | i += 1 56 | 57 | # Preprocess the corpus 58 | if corpus: 59 | processed_corpus = [preprocess(doc) for doc in corpus] 60 | bm25 = BM25Okapi(processed_corpus, k1=1.5, b=0.75, epsilon=0.25) 61 | print("bm25 corpus updated") 62 | 63 | def standardize(lst): 64 | mean_val = statistics.mean(lst) 65 | std_dev = statistics.pstdev(lst) 66 | if std_dev == 0: 67 | return [0]*len(lst) 68 | return [(x - mean_val) / std_dev for x in lst] 69 | 70 | def get_norm_bm25_scores(query, doc_id_list): 71 | with lock: 72 | if not doc_id_list: 73 | return [] 74 | query = preprocess(query)[::-1] 75 | query = list(set(query)) 76 | # Get scores 77 | doc_index_list = [corpus_index[doc_id] for doc_id in doc_id_list] 78 | scores = bm25.get_batch_scores(query, doc_index_list) 79 | norm_scores = standardize(scores) 80 | print('\n'.join([f"{b}-{a}" for a,b in zip(doc_id_list,norm_scores)])) 81 | return norm_scores 82 | 83 | def get_avg_bm25_scores(query, doc_id_list): 84 | with lock: 85 | if not doc_id_list: 86 | return [] 87 | query = preprocess(query)[::-1] 88 | query = list(set(query)) 89 | # Get scores 90 | doc_index_list = [corpus_index[doc_id] for doc_id in doc_id_list] 91 | scores = bm25.get_batch_scores(query, doc_index_list) 92 | avg_scores = [score/len(query) for score in scores] 93 | print('\n'.join([f"{b}-{a}" for a,b in zip(doc_id_list,avg_scores)])) 94 | return avg_scores 95 | 96 | -------------------------------------------------------------------------------- /backend/chroma_doc_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | import chromadb 3 | import datetime 4 | import threading 5 | from chromadb import EmbeddingFunction 6 | from chromadb.config import Settings 7 | from langchain.text_splitter import RecursiveCharacterTextSplitter 8 | 9 | from llm_utils import get_embeddings 10 | from settings import LANGUAGE_PREFERENCE 11 | 12 | ROOT_FOLDER = 'digests' 13 | 14 | # --------------------------------------------------------------------------- 15 | # Monkey patch ChromaDB's validate_where function to support string comparison 16 | # --------------------------------------------------------------------------- 17 | # ChromaDB doesn't support string comparison for the $gte/$lte operators by default. 18 | # This code overrides the default validate_where function to add this functionality 19 | # without modifying the original ChromaDB source code. 20 | def custom_validate_where(where: dict) -> dict: 21 | """ 22 | Custom validation function to allow string comparison for the $gte operator. 23 | """ 24 | if not isinstance(where, dict): 25 | raise ValueError(f"Expected where to be a dict, got {where}") 26 | if len(where) != 1: 27 | raise ValueError(f"Expected where to have exactly one operator, got {where}") 28 | for key, value in where.items(): 29 | if not isinstance(key, str): 30 | raise ValueError(f"Expected where key to be a str, got {key}") 31 | if ( 32 | key != "$and" 33 | and key != "$or" 34 | and key != "$in" 35 | and key != "$nin" 36 | and not isinstance(value, (str, int, float, dict)) 37 | ): 38 | raise ValueError( 39 | f"Expected where value to be a str, int, float, or operator expression, got {value}" 40 | ) 41 | if key == "$and" or key == "$or": 42 | if not isinstance(value, list): 43 | raise ValueError( 44 | f"Expected where value for $and or $or to be a list of where expressions, got {value}" 45 | ) 46 | if len(value) <= 1: 47 | raise ValueError( 48 | f"Expected where value for $and or $or to be a list with at least two where expressions, got {value}" 49 | ) 50 | for where_expression in value: 51 | custom_validate_where(where_expression) 52 | # Value is an operator expression 53 | if isinstance(value, dict): 54 | # Ensure there is only one operator 55 | if len(value) != 1: 56 | raise ValueError( 57 | f"Expected operator expression to have exactly one operator, got {value}" 58 | ) 59 | 60 | for operator, operand in value.items(): 61 | # Allow strings for gt, gte, lt, lte 62 | if operator in ["$gt", "$gte", "$lt", "$lte"]: 63 | if not isinstance(operand, (str, int, float)): 64 | raise ValueError( 65 | f"Expected operand value to be a str, int, or float for operator {operator}, got {operand}" 66 | ) 67 | if operator in ["$in", "$nin"]: 68 | if not isinstance(operand, list): 69 | raise ValueError( 70 | f"Expected operand value to be a list for operator {operator}, got {operand}" 71 | ) 72 | if operator not in [ 73 | "$gt", 74 | "$gte", 75 | "$lt", 76 | "$lte", 77 | "$ne", 78 | "$eq", 79 | "$in", 80 | "$nin", 81 | ]: 82 | raise ValueError( 83 | f"Expected where operator to be one of $gt, $gte, $lt, $lte, $ne, $eq, $in, $nin, " 84 | f"got {operator}" 85 | ) 86 | 87 | if not isinstance(operand, (str, int, float, list)): 88 | raise ValueError( 89 | f"Expected where operand value to be a str, int, float, or list of those types, got {operand}" 90 | ) 91 | if isinstance(operand, list) and ( 92 | len(operand) == 0 93 | or not all(isinstance(x, type(operand[0])) for x in operand) 94 | ): 95 | raise ValueError( 96 | f"Expected where operand value to be a non-empty list, and all values to be of the same type " 97 | f"got {operand}" 98 | ) 99 | return where 100 | 101 | chromadb.api.types.validate_where = custom_validate_where 102 | # --------------------------------------------------------------------------- 103 | 104 | class EmbeddingFunction(EmbeddingFunction): 105 | def __call__(self, input): 106 | return get_embeddings(input) 107 | 108 | class DocumentFolder(): 109 | def __init__(self, dir) -> None: 110 | self.dir = dir 111 | if not os.path.exists(dir): 112 | os.mkdir(dir) 113 | 114 | def save(self, doc_id, string): 115 | doc_name = doc_id.replace(':', ';') 116 | with open(os.path.join(self.dir,doc_name), "w+", encoding='utf-8') as f: 117 | f.write(string) 118 | 119 | def load(self, doc_id): 120 | doc_name = doc_id.replace(':', ';') 121 | with open(os.path.join(self.dir,doc_name), encoding='utf-8') as f: 122 | s = f.read() 123 | return s 124 | 125 | def delete(self, doc_id): 126 | doc_name = doc_id.replace(':', ';') 127 | path = os.path.join(ROOT_FOLDER,doc_name) 128 | if os.path.exists(path): 129 | os.remove(path) 130 | return True 131 | return False 132 | 133 | 134 | class ChromaDocManager: 135 | def __init__(self): 136 | self.lock = threading.Lock() 137 | # Initialize a persistent Chroma client 138 | self.client = chromadb.PersistentClient(path=f'./{ROOT_FOLDER}/chroma', settings=Settings(anonymized_telemetry=False)) # this will not refresh on file change 139 | self.collection = self.client.get_or_create_collection(name='digests', embedding_function=EmbeddingFunction()) 140 | self.folder = DocumentFolder(ROOT_FOLDER) 141 | 142 | # Define a function to add documents to the Chroma database 143 | def _add_index(self, document: str, doc_id: str, other_meta=None, chunk_size=100, chunk_overlap=0): 144 | assert ';' not in doc_id 145 | # Split the document into chunks using the RecursiveCharacterTextSplitter 146 | if LANGUAGE_PREFERENCE == 'Chinese': 147 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size//4, chunk_overlap=chunk_overlap, separators=['。','?'], keep_separator=False) 148 | else: 149 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 150 | chunks = text_splitter.split_text(document) 151 | # Add each chunk to ChromaDB with associated doc_id and its index 152 | ids = [f"{doc_id}_{i}" for i, _ in enumerate(chunks)] 153 | # embed_chunks = get_embeddings(chunks) 154 | doc_metadata = {"doc_id": doc_id} 155 | if other_meta: 156 | doc_metadata.update(other_meta) 157 | if not "doc_time" in doc_metadata: 158 | doc_metadata["doc_time"] = datetime.datetime.now().strftime("%Y-%m-%d") 159 | self.collection.upsert(documents=chunks, ids=ids, metadatas=[doc_metadata]*len(chunks)) 160 | 161 | def add_document(self, document: str, doc_id: str, **kwargs): 162 | with self.lock: 163 | self._remove_index_by_doc_id(doc_id) 164 | self._add_index(document, doc_id, **kwargs) 165 | self.folder.save(doc_id, document) 166 | 167 | def query_by_strings(self, strings, n_results): 168 | with self.lock: 169 | # Split the string into chunks for embedding 170 | res = self.collection.query( 171 | query_texts=strings, 172 | n_results=n_results, 173 | include = [ "documents", "metadatas", "distances" ] 174 | ) 175 | return res 176 | 177 | def query_by_strings_with_time_range(self, strings, n_results, start_time, end_time): 178 | with self.lock: 179 | start_time_str = start_time.strftime("%Y-%m-%d") 180 | end_time_str = end_time.strftime("%Y-%m-%d") 181 | print("search range: ", start_time_str, end_time_str) 182 | # Split the string into chunks for embedding 183 | res = self.collection.query( 184 | query_texts=strings, 185 | n_results=n_results, 186 | include = [ "documents", "metadatas", "distances" ], 187 | where = {"$and":[{"doc_time":{"$gte": start_time_str}}, {"doc_time":{"$lte": end_time_str}}]} 188 | ) 189 | return res 190 | 191 | def query_by_doc_id(self, doc_id): 192 | with self.lock: 193 | res = self.collection.get( 194 | where = {"doc_id":doc_id}, 195 | include = [ "documents", "metadatas" ] 196 | ) 197 | return res 198 | 199 | def _query_by_name(self, doc_name): 200 | res = self.collection.get( 201 | where = {"doc_name":doc_name}, 202 | include = [ "metadatas" ] 203 | ) 204 | return res 205 | 206 | def query_all(self): 207 | with self.lock: 208 | return self.collection.get(include = [ "documents", "metadatas" ]) 209 | 210 | def _remove_index_by_doc_id(self, doc_id: str): 211 | self.collection.delete(where={"doc_id":doc_id}) 212 | 213 | def remove_document(self, doc_id: str): 214 | with self.lock: 215 | self._remove_index_by_doc_id(doc_id) 216 | self.folder.delete(doc_id) 217 | 218 | def remove_document_by_name(self, doc_name: str): 219 | with self.lock: 220 | res = self._query_by_name(doc_name) 221 | print(res["metadatas"]) 222 | if res["metadatas"]: 223 | ids = set([x['doc_id'] for x in res["metadatas"]]) 224 | print("to remove: ", ids) 225 | for doc_id in ids: 226 | self._remove_index_by_doc_id(doc_id) 227 | self.folder.delete(doc_id) 228 | 229 | def get_document_by_ids(self, doc_ids): 230 | with self.lock: 231 | return [self.folder.load(doc_id) for doc_id in doc_ids] 232 | 233 | # def update_document(self, document: str, doc_id: str): 234 | # # Update the document in ChromaDB by first removing and then adding the new chunks 235 | # self.remove_document(doc_id) 236 | # self.add_document(document, doc_id) 237 | 238 | doc_manager = ChromaDocManager() 239 | -------------------------------------------------------------------------------- /backend/file_monitor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import threading 3 | import time 4 | from watchdog.observers import Observer 5 | from watchdog.events import FileSystemEventHandler 6 | from datetime import datetime, timedelta 7 | 8 | from llm_utils import digest_simple, digest_markdown 9 | from chroma_doc_manager import doc_manager 10 | from bm25_api import update_corpus 11 | 12 | modified_files = {} 13 | 14 | class MyEventHandler(FileSystemEventHandler): 15 | def on_modified(self, event): 16 | print(f"Change: {event.src_path}") 17 | if event.src_path.endswith('.md'): 18 | modified_files[event.src_path] = "on_modified" 19 | 20 | def on_created(self, event): 21 | print(f"Add: {event.src_path}") 22 | if event.src_path.endswith('.md'): 23 | modified_files[event.src_path] = "on_created" 24 | 25 | def on_deleted(self, event): 26 | print(f"Delete: {event.src_path}") 27 | if event.src_path.endswith('.md'): 28 | modified_files[event.src_path] = "on_deleted" 29 | 30 | def on_moved(self, event): 31 | print(f"Move: {event.src_path} to {event.dest_path}") 32 | if event.src_path.endswith('.md'): 33 | modified_files[event.src_path] = "on_deleted" 34 | if event.dest_path.endswith('.md'): 35 | modified_files[event.dest_path] = "on_created" 36 | 37 | class UpdateThread(threading.Thread): 38 | def __init__(self, server_state, *args, **kwargs): 39 | super().__init__(*args, **kwargs) 40 | self.server_state = server_state 41 | update_corpus() # probably better start in memory server 42 | 43 | def run(self): 44 | while True: 45 | if not modified_files: # If there are no modified files 46 | time.sleep(60) # Wait for a minute before checking again 47 | continue # Skip the rest of the loop and start the next iteration 48 | 49 | now = datetime.now() 50 | diff = timedelta(seconds=30) 51 | if self.server_state["last_use"] and now - self.server_state["last_use"] < diff: # If the server is used in recently 5 mins 52 | print("Server is used lately") 53 | time.sleep(10) # Wait for a minute before checking again 54 | continue # Skip the rest of the loop and start the next iteration 55 | 56 | path, event_type = next(iter(modified_files.items())) # Get the first item 57 | del modified_files[path] # Remove the handled file 58 | print(f"To handle {event_type}: {path}") 59 | try: 60 | file = os.path.basename(path) 61 | title = file.rsplit(".", 1)[0].replace(';', ':') 62 | if "notes" in path: 63 | doc_manager.remove_document_by_name(title) 64 | else: 65 | doc_manager.remove_document(title) 66 | 67 | if event_type in ["on_created", "on_modified"]: 68 | if "notes" in path: 69 | time_str = str(now)[0:10] 70 | print(time_str) 71 | digests = digest_markdown(title, path) 72 | print(digests) 73 | for headers, summary in digests: 74 | doc_id = "Note of " + headers 75 | doc_manager.add_document(summary, doc_id, other_meta = {"doc_time": time_str, "doc_name":title}) 76 | else: 77 | summary, tag = digest_simple(title, path) 78 | digest = f"{title}\n{summary}" 79 | if len(tag): 80 | digest += '\nOpinion: ' + tag 81 | print(digest) 82 | if title.startswith("Conversation"): 83 | time_str = title.rsplit("on", 1)[1][1:11] 84 | doc_manager.add_document(digest, title, other_meta = {"doc_time": time_str}) 85 | else: 86 | print("Warning: Unformatted doc ", title) 87 | except Exception: 88 | import traceback 89 | traceback.print_exc() 90 | print(f"error handling {event_type}: {path}") 91 | 92 | update_corpus() 93 | 94 | 95 | class WatchdogThread(threading.Thread): 96 | def __init__(self, chat_path, note_path, *args, **kwargs): 97 | super().__init__(*args, **kwargs) 98 | self.chat_path = chat_path 99 | self.note_path = note_path 100 | self.observer = Observer() 101 | 102 | def run(self): 103 | event_handler = MyEventHandler() 104 | self.observer.schedule(event_handler, self.chat_path, recursive=False) 105 | self.observer.schedule(event_handler, self.note_path, recursive=False) 106 | self.observer.start() 107 | try: 108 | while True: 109 | time.sleep(1) 110 | except KeyboardInterrupt: 111 | self.observer.stop() 112 | self.observer.join() 113 | -------------------------------------------------------------------------------- /backend/language_presets.json: -------------------------------------------------------------------------------- 1 | { 2 | "languages": { 3 | "English": { 4 | "user_message": "I don't remember how Loyal Elephie was created.", 5 | "think_message": "To assist {NICK_NAME}, I need to search my memory for the questions:\nHow Loyal Elephie was created?\nHow AI secretary like Loyal Elephie was developed?", 6 | "search_query": "Loyal Elephie created detail\n{NICK_NAME} AI secretary develop", 7 | "context_title": "Technical notes", 8 | "context_content": "{NICK_NAME} mentioned that the current AI secretary -- Loyal Elephie is integrated with advanced vector search and LLM technology. It could be used to provide insightful advices because the AI secretary has access to vast knowledge from {NICK_NAME}'s notes and conversations.", 9 | "reply_message": "Hey {NICK_NAME}, how could you forget about my creation? I am your artwork using advanced vector search and LLM technology. If you need insightful advices based on your notes and conversations between us, just tap on me, ah-ha!" 10 | }, 11 | "Chinese": { 12 | "user_message": "我不记得Loyal Elephie是如何创建的。", 13 | "think_message": "为了帮助{NICK_NAME},我需要在我的记忆中搜索以下问题:\nLoyal Elephie是如何创建的?\n像Loyal Elephie这样的AI秘书是如何开发的?", 14 | "search_query": "Loyal Elephie创建详情\n{NICK_NAME} AI秘书开发", 15 | "context_title": "技术笔记", 16 | "context_content": "{NICK_NAME}提到当前的AI秘书——Loyal Elephie集成了先进的向量搜索和LLM技术。它可以用来提供有见地的建议,因为AI秘书可以访问{NICK_NAME}的笔记和对话中的大量知识。", 17 | "reply_message": "嘿,{NICK_NAME},你怎么能忘记我的创建过程呢?我是使用先进的向量搜索和LLM技术的作品。如果你需要基于你的笔记和我们之间的对话提供有见地的建议,只需来找我,啊哈!" 18 | }, 19 | "German": { 20 | "user_message": "Ich erinnere mich nicht daran, wie Loyal Elephie erschaffen wurde.", 21 | "think_message": "Um {NICK_NAME} zu unterstützen, muss ich mein Gedächtnis nach den Fragen durchsuchen:\nWie wurde Loyal Elephie erschaffen?\nWie wurde ein KI-Sekretär wie Loyal Elephie entwickelt?", 22 | "search_query": "Loyal Elephie Erschaffungsdetails\n{NICK_NAME} KI-Sekretär Entwicklung", 23 | "context_title": "Technische Notizen", 24 | "context_content": "{NICK_NAME} erwähnte, dass der aktuelle KI-Sekretär -- Loyal Elephie mit fortschrittlicher Vektorsuche und LLM-Technologie integriert ist. Er könnte verwendet werden, um aufschlussreiche Ratschläge zu geben, da der KI-Sekretär Zugang zu umfangreichem Wissen aus {NICK_NAME}s Notizen und Gesprächen hat.", 25 | "reply_message": "Hey {NICK_NAME}, wie konntest du meine Erschaffung vergessen? Ich bin dein Kunstwerk, das fortschrittliche Vektorsuche und LLM-Technologie verwendet. Wenn du aufschlussreiche Ratschläge basierend auf deinen Notizen und unseren Gesprächen benötigst, tippe einfach auf mich, aha!" 26 | }, 27 | "French": { 28 | "user_message": "Je ne me souviens pas comment Loyal Elephie a été créé.", 29 | "think_message": "Pour aider {NICK_NAME}, je dois chercher dans ma mémoire les questions :\nComment Loyal Elephie a-t-il été créé ?\nComment un secrétaire IA comme Loyal Elephie a-t-il été développé ?", 30 | "search_query": "Détails de la création de Loyal Elephie\nDéveloppement du secrétaire IA de {NICK_NAME}", 31 | "context_title": "Notes techniques", 32 | "context_content": "{NICK_NAME} a mentionné que le secrétaire IA actuel -- Loyal Elephie est intégré avec une technologie avancée de recherche vectorielle et LLM. Il pourrait être utilisé pour fournir des conseils perspicaces car le secrétaire IA a accès à de vastes connaissances issues des notes et conversations de {NICK_NAME}.", 33 | "reply_message": "Hey {NICK_NAME}, comment as-tu pu oublier ma création ? Je suis ton œuvre d'art utilisant une technologie avancée de recherche vectorielle et LLM. Si tu as besoin de conseils perspicaces basés sur tes notes et nos conversations, il te suffit de me taper dessus, ah-ha !" 34 | }, 35 | "Spanish": { 36 | "user_message": "No recuerdo cómo se creó Loyal Elephie.", 37 | "think_message": "Para ayudar a {NICK_NAME}, necesito buscar en mi memoria las preguntas:\n¿Cómo se creó Loyal Elephie?\n¿Cómo se desarrolló un secretario de IA como Loyal Elephie?", 38 | "search_query": "Detalles de la creación de Loyal Elephie\nDesarrollo del secretario de IA de {NICK_NAME}", 39 | "context_title": "Notas técnicas", 40 | "context_content": "{NICK_NAME} mencionó que el secretario de IA actual -- Loyal Elephie está integrado con tecnología avanzada de búsqueda vectorial y LLM. Podría utilizarse para proporcionar consejos perspicaces porque el secretario de IA tiene acceso a un vasto conocimiento de las notas y conversaciones de {NICK_NAME}.", 41 | "reply_message": "¡Oye {NICK_NAME}, ¿cómo pudiste olvidar mi creación? Soy tu obra de arte utilizando tecnología avanzada de búsqueda vectorial y LLM. Si necesitas consejos perspicaces basados en tus notas y nuestras conversaciones, ¡solo tócame, ah-ha!" 42 | }, 43 | "Portuguese": { 44 | "user_message": "Não me lembro como o Loyal Elephie foi criado.", 45 | "think_message": "Para ajudar {NICK_NAME}, preciso pesquisar na minha memória as perguntas:\nComo o Loyal Elephie foi criado?\nComo um secretário de IA como o Loyal Elephie foi desenvolvido?", 46 | "search_query": "Detalhes da criação do Loyal Elephie\nDesenvolvimento do secretário de IA de {NICK_NAME}", 47 | "context_title": "Notas técnicas", 48 | "context_content": "{NICK_NAME} mencionou que o atual secretário de IA -- Loyal Elephie está integrado com tecnologia avançada de busca vetorial e LLM. Ele poderia ser usado para fornecer conselhos perspicazes porque o secretário de IA tem acesso a um vasto conhecimento das notas e conversas de {NICK_NAME}.", 49 | "reply_message": "Ei {NICK_NAME}, como você pôde esquecer da minha criação? Eu sou sua obra de arte usando tecnologia avançada de busca vetorial e LLM. Se você precisar de conselhos perspicazes baseados em suas notas e nossas conversas, é só me tocar, ah-ha!" 50 | }, 51 | "Italian": { 52 | "user_message": "Non ricordo come è stato creato Loyal Elephie.", 53 | "think_message": "Per assistere {NICK_NAME}, devo cercare nella mia memoria le domande:\nCome è stato creato Loyal Elephie?\nCome è stato sviluppato un segretario IA come Loyal Elephie?", 54 | "search_query": "Dettagli sulla creazione di Loyal Elephie\nSviluppo del segretario IA di {NICK_NAME}", 55 | "context_title": "Note tecniche", 56 | "context_content": "{NICK_NAME} ha menzionato che l'attuale segretario IA -- Loyal Elephie è integrato con tecnologia avanzata di ricerca vettoriale e LLM. Potrebbe essere utilizzato per fornire consigli perspicaci perché il segretario IA ha accesso a vaste conoscenze dalle note e dalle conversazioni di {NICK_NAME}.", 57 | "reply_message": "Ehi {NICK_NAME}, come hai potuto dimenticare la mia creazione? Sono la tua opera d'arte che utilizza tecnologia avanzata di ricerca vettoriale e LLM. Se hai bisogno di consigli perspicaci basati sulle tue note e le nostre conversazioni, basta toccarmi, ah-ha!" 58 | }, 59 | "Dutch": { 60 | "user_message": "Ik herinner me niet hoe Loyal Elephie werd gecreëerd.", 61 | "think_message": "Om {NICK_NAME} te helpen, moet ik in mijn geheugen zoeken naar de vragen:\nHoe werd Loyal Elephie gecreëerd?\nHoe werd een AI-secretaris zoals Loyal Elephie ontwikkeld?", 62 | "search_query": "Details over de creatie van Loyal Elephie\nOntwikkeling van {NICK_NAME}'s AI-secretaris", 63 | "context_title": "Technische notities", 64 | "context_content": "{NICK_NAME} vermeldde dat de huidige AI-secretaris -- Loyal Elephie is geïntegreerd met geavanceerde vectorzoekopdracht en LLM-technologie. Het zou kunnen worden gebruikt om inzichtelijke adviezen te geven omdat de AI-secretaris toegang heeft tot uitgebreide kennis uit {NICK_NAME}'s notities en gesprekken.", 65 | "reply_message": "Hé {NICK_NAME}, hoe kon je mijn creatie vergeten? Ik ben jouw kunstwerk dat gebruik maakt van geavanceerde vectorzoekopdracht en LLM-technologie. Als je inzichtelijke adviezen nodig hebt op basis van je notities en onze gesprekken, tik dan gewoon op mij, ah-ha!" 66 | }, 67 | "Czech": { 68 | "user_message": "Nepamatuji si, jak byl Loyal Elephie vytvořen.", 69 | "think_message": "Abych pomohl {NICK_NAME}, musím ve své paměti vyhledat otázky:\nJak byl Loyal Elephie vytvořen?\nJak byl vyvinut AI sekretář jako Loyal Elephie?", 70 | "search_query": "Podrobnosti o vytvoření Loyal Elephie\nVývoj AI sekretáře {NICK_NAME}", 71 | "context_title": "Technické poznámky", 72 | "context_content": "{NICK_NAME} zmínil, že současný AI sekretář -- Loyal Elephie je integrován s pokročilou technologií vektorového vyhledávání a LLM. Mohl by být použit k poskytování pronikavých rad, protože AI sekretář má přístup k rozsáhlým znalostem z poznámek a konverzací {NICK_NAME}.", 73 | "reply_message": "Hej {NICK_NAME}, jak jsi mohl zapomenout na moje vytvoření? Jsem tvoje umělecké dílo využívající pokročilou technologii vektorového vyhledávání a LLM. Pokud potřebuješ pronikavé rady založené na tvých poznámkách a našich konverzacích, stačí na mě klepnout, ah-ha!" 74 | }, 75 | "Polish": { 76 | "user_message": "Nie pamiętam, jak został stworzony Loyal Elephie.", 77 | "think_message": "Aby pomóc {NICK_NAME}, muszę przeszukać moją pamięć w poszukiwaniu pytań:\nJak został stworzony Loyal Elephie?\nJak został opracowany sekretarz AI taki jak Loyal Elephie?", 78 | "search_query": "Szczegóły stworzenia Loyal Elephie\nRozwój sekretarza AI {NICK_NAME}", 79 | "context_title": "Notatki techniczne", 80 | "context_content": "{NICK_NAME} wspomniał, że obecny sekretarz AI -- Loyal Elephie jest zintegrowany z zaawansowaną technologią wyszukiwania wektorowego i LLM. Mógłby być wykorzystywany do udzielania wnikliwych porad, ponieważ sekretarz AI ma dostęp do obszernej wiedzy z notatek i rozmów {NICK_NAME}.", 81 | "reply_message": "Hej {NICK_NAME}, jak mogłeś zapomnieć o moim stworzeniu? Jestem twoim dziełem sztuki wykorzystującym zaawansowaną technologię wyszukiwania wektorowego i LLM. Jeśli potrzebujesz wnikliwych porad opartych na twoich notatkach i naszych rozmowach, po prostu dotknij mnie, ah-ha!" 82 | }, 83 | "Russian": { 84 | "user_message": "Я не помню, как был создан Loyal Elephie.", 85 | "think_message": "Чтобы помочь {NICK_NAME}, мне нужно поискать в своей памяти ответы на вопросы:\nКак был создан Loyal Elephie?\nКак был разработан ИИ-секретарь, подобный Loyal Elephie?", 86 | "search_query": "Подробности создания Loyal Elephie\nРазработка ИИ-секретаря {NICK_NAME}", 87 | "context_title": "Технические заметки", 88 | "context_content": "{NICK_NAME} упомянул, что текущий ИИ-секретарь -- Loyal Elephie интегрирован с передовой технологией векторного поиска и LLM. Его можно использовать для предоставления проницательных советов, поскольку ИИ-секретарь имеет доступ к обширным знаниям из заметок и разговоров {NICK_NAME}.", 89 | "reply_message": "Эй, {NICK_NAME}, как ты мог забыть о моем создании? Я твое произведение искусства, использующее передовую технологию векторного поиска и LLM. Если тебе нужны проницательные советы, основанные на твоих заметках и наших разговорах, просто нажми на меня, ах-ха!" 90 | }, 91 | "Arabic": { 92 | "user_message": "لا أتذكر كيف تم إنشاء Loyal Elephie.", 93 | "think_message": "لمساعدة {NICK_NAME}، أحتاج إلى البحث في ذاكرتي عن الأسئلة:\nكيف تم إنشاء Loyal Elephie؟\nكيف تم تطوير سكرتير الذكاء الاصطناعي مثل Loyal Elephie؟", 94 | "search_query": "تفاصيل إنشاء Loyal Elephie\nتطوير سكرتير الذكاء الاصطناعي لـ {NICK_NAME}", 95 | "context_title": "ملاحظات تقنية", 96 | "context_content": "ذكر {NICK_NAME} أن سكرتير الذكاء الاصطناعي الحالي -- Loyal Elephie مدمج مع تقنية بحث متجهي متقدمة وتقنية LLM. يمكن استخدامه لتقديم نصائح ثاقبة لأن سكرتير الذكاء الاصطناعي لديه إمكانية الوصول إلى معرفة واسعة من ملاحظات ومحادثات {NICK_NAME}.", 97 | "reply_message": "مرحبًا {NICK_NAME}، كيف يمكنك أن تنسى إنشائي؟ أنا عملك الفني الذي يستخدم تقنية البحث المتجهي المتقدمة وتقنية LLM. إذا كنت بحاجة إلى نصائح ثاقبة بناءً على ملاحظاتك ومحادثاتنا، ما عليك سوى النقر علي، آه-ها!" 98 | } 99 | } 100 | } -------------------------------------------------------------------------------- /backend/llama_types.py: -------------------------------------------------------------------------------- 1 | """Types and request signatures for OpenAI compatibility 2 | 3 | NOTE: These types may change to match the OpenAI OpenAPI specification. 4 | 5 | Based on the OpenAI OpenAPI specification: 6 | https://github.com/openai/openai-openapi/blob/master/openapi.yaml 7 | 8 | """ 9 | from typing import Any, List, Optional, Dict, Union 10 | from typing_extensions import TypedDict, NotRequired, Literal 11 | 12 | 13 | # NOTE: Defining this correctly using annotations seems to break pydantic validation. 14 | # This is a workaround until we can figure out how to do this correctly 15 | # JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]] 16 | JsonType = Union[None, int, str, bool, List[Any], Dict[str, Any]] 17 | 18 | 19 | class EmbeddingUsage(TypedDict): 20 | prompt_tokens: int 21 | total_tokens: int 22 | 23 | 24 | class Embedding(TypedDict): 25 | index: int 26 | object: str 27 | embedding: List[float] 28 | 29 | 30 | class CreateEmbeddingResponse(TypedDict): 31 | object: Literal["list"] 32 | model: str 33 | data: List[Embedding] 34 | usage: EmbeddingUsage 35 | 36 | 37 | class CompletionLogprobs(TypedDict): 38 | text_offset: List[int] 39 | token_logprobs: List[Optional[float]] 40 | tokens: List[str] 41 | top_logprobs: List[Optional[Dict[str, float]]] 42 | 43 | 44 | class CompletionChoice(TypedDict): 45 | text: str 46 | index: int 47 | logprobs: Optional[CompletionLogprobs] 48 | finish_reason: Optional[Literal["stop", "length"]] 49 | 50 | 51 | class CompletionUsage(TypedDict): 52 | prompt_tokens: int 53 | completion_tokens: int 54 | total_tokens: int 55 | 56 | 57 | class CreateCompletionResponse(TypedDict): 58 | id: str 59 | object: Literal["text_completion"] 60 | created: int 61 | model: str 62 | choices: List[CompletionChoice] 63 | usage: NotRequired[CompletionUsage] 64 | 65 | 66 | class ChatCompletionResponseFunctionCall(TypedDict): 67 | name: str 68 | arguments: str 69 | 70 | 71 | class ChatCompletionResponseMessage(TypedDict): 72 | content: Optional[str] 73 | tool_calls: NotRequired["ChatCompletionMessageToolCalls"] 74 | role: Literal["assistant", "function"] # NOTE: "function" may be incorrect here 75 | function_call: NotRequired[ChatCompletionResponseFunctionCall] # DEPRECATED 76 | 77 | 78 | class ChatCompletionFunction(TypedDict): 79 | name: str 80 | description: NotRequired[str] 81 | parameters: Dict[str, JsonType] # TODO: make this more specific 82 | 83 | 84 | class ChatCompletionResponseChoice(TypedDict): 85 | index: int 86 | message: "ChatCompletionResponseMessage" 87 | finish_reason: Optional[str] 88 | 89 | 90 | class CreateChatCompletionResponse(TypedDict): 91 | id: str 92 | object: Literal["chat.completion"] 93 | created: int 94 | model: str 95 | choices: List["ChatCompletionResponseChoice"] 96 | usage: CompletionUsage 97 | 98 | 99 | class ChatCompletionMessageToolCallChunkFunction(TypedDict): 100 | name: str 101 | arguments: str 102 | 103 | 104 | class ChatCompletionMessageToolCallChunk(TypedDict): 105 | index: int 106 | id: NotRequired[str] 107 | type: Literal["function"] 108 | function: ChatCompletionMessageToolCallChunkFunction 109 | 110 | 111 | class ChatCompletionStreamResponseDeltaEmpty(TypedDict): 112 | pass 113 | 114 | 115 | class ChatCompletionStreamResponseDeltaFunctionCall(TypedDict): 116 | name: str 117 | arguments: str 118 | 119 | 120 | class ChatCompletionStreamResponseDelta(TypedDict): 121 | content: NotRequired[str] 122 | function_call: NotRequired[ 123 | ChatCompletionStreamResponseDeltaFunctionCall 124 | ] # DEPRECATED 125 | tool_calls: NotRequired[List[ChatCompletionMessageToolCallChunk]] 126 | role: NotRequired[Literal["system", "user", "assistant", "tool"]] 127 | 128 | 129 | class ChatCompletionStreamResponseChoice(TypedDict): 130 | index: int 131 | delta: Union[ 132 | ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty 133 | ] 134 | finish_reason: Optional[Literal["stop", "length", "tool_calls", "function_call"]] 135 | 136 | 137 | class CreateChatCompletionStreamResponse(TypedDict): 138 | id: str 139 | model: str 140 | object: Literal["chat.completion.chunk"] 141 | created: int 142 | choices: List[ChatCompletionStreamResponseChoice] 143 | 144 | 145 | class ChatCompletionFunctions(TypedDict): 146 | name: str 147 | description: NotRequired[str] 148 | parameters: Dict[str, JsonType] # TODO: make this more specific 149 | 150 | 151 | class ChatCompletionFunctionCallOption(TypedDict): 152 | name: str 153 | 154 | 155 | class ChatCompletionRequestResponseFormat(TypedDict): 156 | type: Literal["text", "json_object"] 157 | 158 | 159 | class ChatCompletionRequestMessageContentPartText(TypedDict): 160 | type: Literal["text"] 161 | text: str 162 | 163 | 164 | class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict): 165 | url: str 166 | detail: NotRequired[Literal["auto", "low", "high"]] 167 | 168 | 169 | class ChatCompletionRequestMessageContentPartImage(TypedDict): 170 | type: Literal["image_url"] 171 | image_url: Union[str, ChatCompletionRequestMessageContentPartImageImageUrl] 172 | 173 | 174 | ChatCompletionRequestMessageContentPart = Union[ 175 | ChatCompletionRequestMessageContentPartText, 176 | ChatCompletionRequestMessageContentPartImage, 177 | ] 178 | 179 | 180 | class ChatCompletionRequestSystemMessage(TypedDict): 181 | role: Literal["system"] 182 | content: Optional[str] 183 | 184 | 185 | class ChatCompletionRequestUserMessage(TypedDict): 186 | role: Literal["user"] 187 | content: Optional[Union[str, List[ChatCompletionRequestMessageContentPart]]] 188 | 189 | 190 | class ChatCompletionMessageToolCallFunction(TypedDict): 191 | name: str 192 | arguments: str 193 | 194 | 195 | class ChatCompletionMessageToolCall(TypedDict): 196 | id: str 197 | type: Literal["function"] 198 | function: ChatCompletionMessageToolCallFunction 199 | 200 | 201 | ChatCompletionMessageToolCalls = List[ChatCompletionMessageToolCall] 202 | 203 | 204 | class ChatCompletionRequestAssistantMessageFunctionCall(TypedDict): 205 | name: str 206 | arguments: str 207 | 208 | 209 | class ChatCompletionRequestAssistantMessage(TypedDict): 210 | role: Literal["assistant"] 211 | content: Optional[str] 212 | tool_calls: NotRequired[ChatCompletionMessageToolCalls] 213 | function_call: NotRequired[ 214 | ChatCompletionRequestAssistantMessageFunctionCall 215 | ] # DEPRECATED 216 | 217 | 218 | class ChatCompletionRequestToolMessage(TypedDict): 219 | role: Literal["tool"] 220 | content: Optional[str] 221 | tool_call_id: str 222 | 223 | 224 | class ChatCompletionRequestFunctionMessage(TypedDict): 225 | role: Literal["function"] 226 | content: Optional[str] 227 | name: str 228 | 229 | 230 | ChatCompletionRequestMessage = Union[ 231 | ChatCompletionRequestSystemMessage, 232 | ChatCompletionRequestUserMessage, 233 | ChatCompletionRequestAssistantMessage, 234 | ChatCompletionRequestUserMessage, 235 | ChatCompletionRequestToolMessage, 236 | ChatCompletionRequestFunctionMessage, 237 | ] 238 | 239 | 240 | class ChatCompletionRequestFunctionCallOption(TypedDict): 241 | name: str 242 | 243 | 244 | ChatCompletionRequestFunctionCall = Union[ 245 | Literal["none", "auto"], ChatCompletionRequestFunctionCallOption 246 | ] 247 | 248 | ChatCompletionFunctionParameters = Dict[str, JsonType] # TODO: make this more specific 249 | 250 | 251 | class ChatCompletionToolFunction(TypedDict): 252 | name: str 253 | description: NotRequired[str] 254 | parameters: ChatCompletionFunctionParameters 255 | 256 | 257 | class ChatCompletionTool(TypedDict): 258 | type: Literal["function"] 259 | function: ChatCompletionToolFunction 260 | 261 | 262 | class ChatCompletionNamedToolChoiceFunction(TypedDict): 263 | name: str 264 | 265 | 266 | class ChatCompletionNamedToolChoice(TypedDict): 267 | type: Literal["function"] 268 | function: ChatCompletionNamedToolChoiceFunction 269 | 270 | 271 | ChatCompletionToolChoiceOption = Union[ 272 | Literal["none", "auto"], ChatCompletionNamedToolChoice 273 | ] 274 | 275 | 276 | # NOTE: The following type names are not part of the OpenAI OpenAPI specification 277 | # and will be removed in a future major release. 278 | 279 | EmbeddingData = Embedding 280 | CompletionChunk = CreateCompletionResponse 281 | Completion = CreateCompletionResponse 282 | CreateCompletionStreamResponse = CreateCompletionResponse 283 | ChatCompletionMessage = ChatCompletionResponseMessage 284 | ChatCompletionChoice = ChatCompletionResponseChoice 285 | ChatCompletion = CreateChatCompletionResponse 286 | ChatCompletionChunkDeltaEmpty = ChatCompletionStreamResponseDeltaEmpty 287 | ChatCompletionChunkChoice = ChatCompletionStreamResponseChoice 288 | ChatCompletionChunkDelta = ChatCompletionStreamResponseDelta 289 | ChatCompletionChunk = CreateChatCompletionStreamResponse 290 | ChatCompletionStreamResponse = CreateChatCompletionStreamResponse 291 | ChatCompletionResponseFunction = ChatCompletionFunction 292 | ChatCompletionFunctionCall = ChatCompletionResponseFunctionCall -------------------------------------------------------------------------------- /backend/llm_utils.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from langchain.text_splitter import MarkdownHeaderTextSplitter 3 | import tiktoken 4 | 5 | from settings import * 6 | 7 | client_embed = OpenAI(base_url = EMBEDDING_BASE_URL, api_key = EMBEDDING_API_KEY) 8 | client = OpenAI(base_url = CHAT_BASE_URL, api_key = CHAT_API_KEY) 9 | 10 | def get_embeddings(chunks): 11 | data = client_embed.embeddings.create(input=chunks, model=EMBEDDING_MODEL_NAME).data 12 | return [d.embedding for d in data] 13 | 14 | def chat(messages:list[dict]): 15 | response = client.chat.completions.create( 16 | model=CHAT_MODEL_NAME, 17 | messages=messages, 18 | max_tokens=CHAT_MAX_TOKEN 19 | ) 20 | return response.choices[0].message.content 21 | 22 | def simplify_markdown_headers(page_content, current_nesting_level): 23 | # Split the content into lines for processing 24 | lines = page_content.split('\n') 25 | 26 | # Process each line to adjust header levels 27 | simplified_lines = [] 28 | for line in lines: 29 | # Check if the line starts with markdown header syntax 30 | if line.startswith('#'): 31 | # Count the number of '#' to determine the original level 32 | header_level = line.count('#') 33 | 34 | # Calculate the new header level 35 | new_header_level = header_level - current_nesting_level + 1 36 | 37 | # Ensure the new header level is at least 1 38 | new_header_level = max(new_header_level, 1) 39 | 40 | # Replace the original header syntax with the new level 41 | new_header = '#' * new_header_level + ' ' + line.lstrip('#').lstrip() 42 | simplified_lines.append(new_header) 43 | else: 44 | # If it's not a header, keep the line as is 45 | simplified_lines.append(line) 46 | 47 | # Join the lines back into a single string 48 | simplified_content = '\n'.join(simplified_lines) 49 | return simplified_content 50 | 51 | 52 | # This function will try to digest a markdown file into multiple docs based on headers 53 | def digest_markdown(title, path): 54 | headers = [ 55 | ("#", "header1"), 56 | ("##", "header2"), 57 | ("###", "header3"), 58 | ] 59 | parent_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers) 60 | with open(path, encoding='utf-8') as f: 61 | s = f.read() 62 | docs = parent_splitter.split_text(s) 63 | digests = [] 64 | for doc in docs: 65 | headers = "" 66 | headers += title 67 | level = 0 68 | if 'header1' in doc.metadata: 69 | headers += " > " + doc.metadata['header1'].strip() 70 | level = 1 71 | if 'header2' in doc.metadata: 72 | headers += " > " + doc.metadata['header2'].strip() 73 | level = 2 74 | if 'header3' in doc.metadata: 75 | headers += " > " + doc.metadata['header3'].strip() 76 | level = 3 77 | page_content = simplify_markdown_headers(doc.page_content.strip(), level) 78 | content = f"---Begin Note---\nHeaders: {headers}\n{page_content}\n---End Note---" 79 | prompt = SUMMARY_NOTE_PROMPT.replace("{NICK_NAME}", NICK_NAME) 80 | prompt = prompt.replace("{LANGUAGE_PREFERENCE}", "" if LANGUAGE_PREFERENCE=="English" else f" The note should be in {LANGUAGE_PREFERENCE}.") 81 | summary = chat([ 82 | {"role": "system", "content": prompt}, 83 | {"role": "user", "content": content}] 84 | ) 85 | # digest = f"# {headers}\n{summary}" 86 | digests.append((headers, summary)) 87 | return digests 88 | 89 | def digest_simple(title, path): 90 | with open(path, encoding='utf-8') as f: 91 | s = f.read() 92 | tag = "" 93 | if s.startswith('#'): # tagged doc 94 | tag, s = s.split('\n',1) 95 | tag = tag.lstrip('#').strip() 96 | text = f"---{title}---\n{s}" 97 | prompt = SUMMARY_PROMPT.replace("{NICK_NAME}", NICK_NAME) 98 | prompt = prompt.replace("{LANGUAGE_PREFERENCE}", "" if LANGUAGE_PREFERENCE=="English" else f" The note should be in {LANGUAGE_PREFERENCE}.") 99 | summary = chat([ 100 | {"role": "system", "content": prompt}, 101 | {"role": "user", "content": text}]).strip() 102 | return summary, tag 103 | 104 | def count_token(input_str): 105 | encoding = tiktoken.get_encoding("o200k_base") # This is only approximation 106 | if type(input_str) == dict: 107 | input_str = f"role: {input_str['role']}, content: {input_str['content']}" 108 | length = len(encoding.encode(input_str)) 109 | return length 110 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/README: -------------------------------------------------------------------------------- 1 | Stopwords Corpus 2 | 3 | This corpus contains lists of stop words for several languages. These 4 | are high-frequency grammatical words which are usually ignored in text 5 | retrieval applications. 6 | 7 | They were obtained from: 8 | http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/ 9 | 10 | The stop words for the Romanian language were obtained from: 11 | http://arlc.ro/resources/ 12 | 13 | The English list has been augmented 14 | https://github.com/nltk/nltk_data/issues/22 15 | 16 | The German list has been corrected 17 | https://github.com/nltk/nltk_data/pull/49 18 | 19 | A Kazakh list has been added 20 | https://github.com/nltk/nltk_data/pull/52 21 | 22 | A Nepali list has been added 23 | https://github.com/nltk/nltk_data/pull/83 24 | 25 | An Azerbaijani list has been added 26 | https://github.com/nltk/nltk_data/pull/100 27 | 28 | A Greek list has been added 29 | https://github.com/nltk/nltk_data/pull/103 30 | 31 | An Indonesian list has been added 32 | https://github.com/nltk/nltk_data/pull/112 33 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/arabic: -------------------------------------------------------------------------------- 1 | إذ 2 | إذا 3 | إذما 4 | إذن 5 | أف 6 | أقل 7 | أكثر 8 | ألا 9 | إلا 10 | التي 11 | الذي 12 | الذين 13 | اللاتي 14 | اللائي 15 | اللتان 16 | اللتيا 17 | اللتين 18 | اللذان 19 | اللذين 20 | اللواتي 21 | إلى 22 | إليك 23 | إليكم 24 | إليكما 25 | إليكن 26 | أم 27 | أما 28 | أما 29 | إما 30 | أن 31 | إن 32 | إنا 33 | أنا 34 | أنت 35 | أنتم 36 | أنتما 37 | أنتن 38 | إنما 39 | إنه 40 | أنى 41 | أنى 42 | آه 43 | آها 44 | أو 45 | أولاء 46 | أولئك 47 | أوه 48 | آي 49 | أي 50 | أيها 51 | إي 52 | أين 53 | أين 54 | أينما 55 | إيه 56 | بخ 57 | بس 58 | بعد 59 | بعض 60 | بك 61 | بكم 62 | بكم 63 | بكما 64 | بكن 65 | بل 66 | بلى 67 | بما 68 | بماذا 69 | بمن 70 | بنا 71 | به 72 | بها 73 | بهم 74 | بهما 75 | بهن 76 | بي 77 | بين 78 | بيد 79 | تلك 80 | تلكم 81 | تلكما 82 | ته 83 | تي 84 | تين 85 | تينك 86 | ثم 87 | ثمة 88 | حاشا 89 | حبذا 90 | حتى 91 | حيث 92 | حيثما 93 | حين 94 | خلا 95 | دون 96 | ذا 97 | ذات 98 | ذاك 99 | ذان 100 | ذانك 101 | ذلك 102 | ذلكم 103 | ذلكما 104 | ذلكن 105 | ذه 106 | ذو 107 | ذوا 108 | ذواتا 109 | ذواتي 110 | ذي 111 | ذين 112 | ذينك 113 | ريث 114 | سوف 115 | سوى 116 | شتان 117 | عدا 118 | عسى 119 | عل 120 | على 121 | عليك 122 | عليه 123 | عما 124 | عن 125 | عند 126 | غير 127 | فإذا 128 | فإن 129 | فلا 130 | فمن 131 | في 132 | فيم 133 | فيما 134 | فيه 135 | فيها 136 | قد 137 | كأن 138 | كأنما 139 | كأي 140 | كأين 141 | كذا 142 | كذلك 143 | كل 144 | كلا 145 | كلاهما 146 | كلتا 147 | كلما 148 | كليكما 149 | كليهما 150 | كم 151 | كم 152 | كما 153 | كي 154 | كيت 155 | كيف 156 | كيفما 157 | لا 158 | لاسيما 159 | لدى 160 | لست 161 | لستم 162 | لستما 163 | لستن 164 | لسن 165 | لسنا 166 | لعل 167 | لك 168 | لكم 169 | لكما 170 | لكن 171 | لكنما 172 | لكي 173 | لكيلا 174 | لم 175 | لما 176 | لن 177 | لنا 178 | له 179 | لها 180 | لهم 181 | لهما 182 | لهن 183 | لو 184 | لولا 185 | لوما 186 | لي 187 | لئن 188 | ليت 189 | ليس 190 | ليسا 191 | ليست 192 | ليستا 193 | ليسوا 194 | ما 195 | ماذا 196 | متى 197 | مذ 198 | مع 199 | مما 200 | ممن 201 | من 202 | منه 203 | منها 204 | منذ 205 | مه 206 | مهما 207 | نحن 208 | نحو 209 | نعم 210 | ها 211 | هاتان 212 | هاته 213 | هاتي 214 | هاتين 215 | هاك 216 | هاهنا 217 | هذا 218 | هذان 219 | هذه 220 | هذي 221 | هذين 222 | هكذا 223 | هل 224 | هلا 225 | هم 226 | هما 227 | هن 228 | هنا 229 | هناك 230 | هنالك 231 | هو 232 | هؤلاء 233 | هي 234 | هيا 235 | هيت 236 | هيهات 237 | والذي 238 | والذين 239 | وإذ 240 | وإذا 241 | وإن 242 | ولا 243 | ولكن 244 | ولو 245 | وما 246 | ومن 247 | وهو 248 | يا 249 | أبٌ 250 | أخٌ 251 | حمٌ 252 | فو 253 | أنتِ 254 | يناير 255 | فبراير 256 | مارس 257 | أبريل 258 | مايو 259 | يونيو 260 | يوليو 261 | أغسطس 262 | سبتمبر 263 | أكتوبر 264 | نوفمبر 265 | ديسمبر 266 | جانفي 267 | فيفري 268 | مارس 269 | أفريل 270 | ماي 271 | جوان 272 | جويلية 273 | أوت 274 | كانون 275 | شباط 276 | آذار 277 | نيسان 278 | أيار 279 | حزيران 280 | تموز 281 | آب 282 | أيلول 283 | تشرين 284 | دولار 285 | دينار 286 | ريال 287 | درهم 288 | ليرة 289 | جنيه 290 | قرش 291 | مليم 292 | فلس 293 | هللة 294 | سنتيم 295 | يورو 296 | ين 297 | يوان 298 | شيكل 299 | واحد 300 | اثنان 301 | ثلاثة 302 | أربعة 303 | خمسة 304 | ستة 305 | سبعة 306 | ثمانية 307 | تسعة 308 | عشرة 309 | أحد 310 | اثنا 311 | اثني 312 | إحدى 313 | ثلاث 314 | أربع 315 | خمس 316 | ست 317 | سبع 318 | ثماني 319 | تسع 320 | عشر 321 | ثمان 322 | سبت 323 | أحد 324 | اثنين 325 | ثلاثاء 326 | أربعاء 327 | خميس 328 | جمعة 329 | أول 330 | ثان 331 | ثاني 332 | ثالث 333 | رابع 334 | خامس 335 | سادس 336 | سابع 337 | ثامن 338 | تاسع 339 | عاشر 340 | حادي 341 | أ 342 | ب 343 | ت 344 | ث 345 | ج 346 | ح 347 | خ 348 | د 349 | ذ 350 | ر 351 | ز 352 | س 353 | ش 354 | ص 355 | ض 356 | ط 357 | ظ 358 | ع 359 | غ 360 | ف 361 | ق 362 | ك 363 | ل 364 | م 365 | ن 366 | ه 367 | و 368 | ي 369 | ء 370 | ى 371 | آ 372 | ؤ 373 | ئ 374 | أ 375 | ة 376 | ألف 377 | باء 378 | تاء 379 | ثاء 380 | جيم 381 | حاء 382 | خاء 383 | دال 384 | ذال 385 | راء 386 | زاي 387 | سين 388 | شين 389 | صاد 390 | ضاد 391 | طاء 392 | ظاء 393 | عين 394 | غين 395 | فاء 396 | قاف 397 | كاف 398 | لام 399 | ميم 400 | نون 401 | هاء 402 | واو 403 | ياء 404 | همزة 405 | ي 406 | نا 407 | ك 408 | كن 409 | ه 410 | إياه 411 | إياها 412 | إياهما 413 | إياهم 414 | إياهن 415 | إياك 416 | إياكما 417 | إياكم 418 | إياك 419 | إياكن 420 | إياي 421 | إيانا 422 | أولالك 423 | تانِ 424 | تانِك 425 | تِه 426 | تِي 427 | تَيْنِ 428 | ثمّ 429 | ثمّة 430 | ذانِ 431 | ذِه 432 | ذِي 433 | ذَيْنِ 434 | هَؤلاء 435 | هَاتانِ 436 | هَاتِه 437 | هَاتِي 438 | هَاتَيْنِ 439 | هَذا 440 | هَذانِ 441 | هَذِه 442 | هَذِي 443 | هَذَيْنِ 444 | الألى 445 | الألاء 446 | أل 447 | أنّى 448 | أيّ 449 | ّأيّان 450 | أنّى 451 | أيّ 452 | ّأيّان 453 | ذيت 454 | كأيّ 455 | كأيّن 456 | بضع 457 | فلان 458 | وا 459 | آمينَ 460 | آهِ 461 | آهٍ 462 | آهاً 463 | أُفٍّ 464 | أُفٍّ 465 | أفٍّ 466 | أمامك 467 | أمامكَ 468 | أوّهْ 469 | إلَيْكَ 470 | إلَيْكَ 471 | إليكَ 472 | إليكنّ 473 | إيهٍ 474 | بخٍ 475 | بسّ 476 | بَسْ 477 | بطآن 478 | بَلْهَ 479 | حاي 480 | حَذارِ 481 | حيَّ 482 | حيَّ 483 | دونك 484 | رويدك 485 | سرعان 486 | شتانَ 487 | شَتَّانَ 488 | صهْ 489 | صهٍ 490 | طاق 491 | طَق 492 | عَدَسْ 493 | كِخ 494 | مكانَك 495 | مكانَك 496 | مكانَك 497 | مكانكم 498 | مكانكما 499 | مكانكنّ 500 | نَخْ 501 | هاكَ 502 | هَجْ 503 | هلم 504 | هيّا 505 | هَيْهات 506 | وا 507 | واهاً 508 | وراءَك 509 | وُشْكَانَ 510 | وَيْ 511 | يفعلان 512 | تفعلان 513 | يفعلون 514 | تفعلون 515 | تفعلين 516 | اتخذ 517 | ألفى 518 | تخذ 519 | ترك 520 | تعلَّم 521 | جعل 522 | حجا 523 | حبيب 524 | خال 525 | حسب 526 | خال 527 | درى 528 | رأى 529 | زعم 530 | صبر 531 | ظنَّ 532 | عدَّ 533 | علم 534 | غادر 535 | ذهب 536 | وجد 537 | ورد 538 | وهب 539 | أسكن 540 | أطعم 541 | أعطى 542 | رزق 543 | زود 544 | سقى 545 | كسا 546 | أخبر 547 | أرى 548 | أعلم 549 | أنبأ 550 | حدَث 551 | خبَّر 552 | نبَّا 553 | أفعل به 554 | ما أفعله 555 | بئس 556 | ساء 557 | طالما 558 | قلما 559 | لات 560 | لكنَّ 561 | ءَ 562 | أجل 563 | إذاً 564 | أمّا 565 | إمّا 566 | إنَّ 567 | أنًّ 568 | أى 569 | إى 570 | أيا 571 | ب 572 | ثمَّ 573 | جلل 574 | جير 575 | رُبَّ 576 | س 577 | علًّ 578 | ف 579 | كأنّ 580 | كلَّا 581 | كى 582 | ل 583 | لات 584 | لعلَّ 585 | لكنَّ 586 | لكنَّ 587 | م 588 | نَّ 589 | هلّا 590 | وا 591 | أل 592 | إلّا 593 | ت 594 | ك 595 | لمّا 596 | ن 597 | ه 598 | و 599 | ا 600 | ي 601 | تجاه 602 | تلقاء 603 | جميع 604 | حسب 605 | سبحان 606 | شبه 607 | لعمر 608 | مثل 609 | معاذ 610 | أبو 611 | أخو 612 | حمو 613 | فو 614 | مئة 615 | مئتان 616 | ثلاثمئة 617 | أربعمئة 618 | خمسمئة 619 | ستمئة 620 | سبعمئة 621 | ثمنمئة 622 | تسعمئة 623 | مائة 624 | ثلاثمائة 625 | أربعمائة 626 | خمسمائة 627 | ستمائة 628 | سبعمائة 629 | ثمانمئة 630 | تسعمائة 631 | عشرون 632 | ثلاثون 633 | اربعون 634 | خمسون 635 | ستون 636 | سبعون 637 | ثمانون 638 | تسعون 639 | عشرين 640 | ثلاثين 641 | اربعين 642 | خمسين 643 | ستين 644 | سبعين 645 | ثمانين 646 | تسعين 647 | بضع 648 | نيف 649 | أجمع 650 | جميع 651 | عامة 652 | عين 653 | نفس 654 | لا سيما 655 | أصلا 656 | أهلا 657 | أيضا 658 | بؤسا 659 | بعدا 660 | بغتة 661 | تعسا 662 | حقا 663 | حمدا 664 | خلافا 665 | خاصة 666 | دواليك 667 | سحقا 668 | سرا 669 | سمعا 670 | صبرا 671 | صدقا 672 | صراحة 673 | طرا 674 | عجبا 675 | عيانا 676 | غالبا 677 | فرادى 678 | فضلا 679 | قاطبة 680 | كثيرا 681 | لبيك 682 | معاذ 683 | أبدا 684 | إزاء 685 | أصلا 686 | الآن 687 | أمد 688 | أمس 689 | آنفا 690 | آناء 691 | أنّى 692 | أول 693 | أيّان 694 | تارة 695 | ثمّ 696 | ثمّة 697 | حقا 698 | صباح 699 | مساء 700 | ضحوة 701 | عوض 702 | غدا 703 | غداة 704 | قطّ 705 | كلّما 706 | لدن 707 | لمّا 708 | مرّة 709 | قبل 710 | خلف 711 | أمام 712 | فوق 713 | تحت 714 | يمين 715 | شمال 716 | ارتدّ 717 | استحال 718 | أصبح 719 | أضحى 720 | آض 721 | أمسى 722 | انقلب 723 | بات 724 | تبدّل 725 | تحوّل 726 | حار 727 | رجع 728 | راح 729 | صار 730 | ظلّ 731 | عاد 732 | غدا 733 | كان 734 | ما انفك 735 | ما برح 736 | مادام 737 | مازال 738 | مافتئ 739 | ابتدأ 740 | أخذ 741 | اخلولق 742 | أقبل 743 | انبرى 744 | أنشأ 745 | أوشك 746 | جعل 747 | حرى 748 | شرع 749 | طفق 750 | علق 751 | قام 752 | كرب 753 | كاد 754 | هبّ -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/azerbaijani: -------------------------------------------------------------------------------- 1 | a 2 | ad 3 | altı 4 | altmış 5 | amma 6 | arasında 7 | artıq 8 | ay 9 | az 10 | bax 11 | belə 12 | bəli 13 | bəlkə 14 | beş 15 | bəy 16 | bəzən 17 | bəzi 18 | bilər 19 | bir 20 | biraz 21 | biri 22 | birşey 23 | biz 24 | bizim 25 | bizlər 26 | bu 27 | buna 28 | bundan 29 | bunların 30 | bunu 31 | bunun 32 | buradan 33 | bütün 34 | ci 35 | cı 36 | çox 37 | cu 38 | cü 39 | çünki 40 | da 41 | daha 42 | də 43 | dedi 44 | dək 45 | dən 46 | dəqiqə 47 | deyil 48 | dir 49 | doqquz 50 | doqsan 51 | dörd 52 | düz 53 | ə 54 | edən 55 | edir 56 | əgər 57 | əlbəttə 58 | elə 59 | əlli 60 | ən 61 | əslində 62 | et 63 | etdi 64 | etmə 65 | etmək 66 | faiz 67 | gilə 68 | görə 69 | ha 70 | haqqında 71 | harada 72 | hə 73 | heç 74 | həm 75 | həmin 76 | həmişə 77 | hər 78 | ı 79 | idi 80 | iki 81 | il 82 | ildə 83 | ilə 84 | ilk 85 | in 86 | indi 87 | isə 88 | istifadə 89 | iyirmi 90 | ki 91 | kim 92 | kimə 93 | kimi 94 | lakin 95 | lap 96 | məhz 97 | mən 98 | mənə 99 | mirşey 100 | nə 101 | nəhayət 102 | niyə 103 | o 104 | obirisi 105 | of 106 | olan 107 | olar 108 | olaraq 109 | oldu 110 | olduğu 111 | olmadı 112 | olmaz 113 | olmuşdur 114 | olsun 115 | olur 116 | on 117 | ona 118 | ondan 119 | onlar 120 | onlardan 121 | onların 122 | onsuzda 123 | onu 124 | onun 125 | oradan 126 | otuz 127 | öz 128 | özü 129 | qarşı 130 | qədər 131 | qırx 132 | saat 133 | sadəcə 134 | saniyə 135 | səhv 136 | səkkiz 137 | səksən 138 | sən 139 | sənə 140 | sənin 141 | siz 142 | sizin 143 | sizlər 144 | sonra 145 | təəssüf 146 | ü 147 | üç 148 | üçün 149 | var 150 | və 151 | xan 152 | xanım 153 | xeyr 154 | ya 155 | yalnız 156 | yaxşı 157 | yeddi 158 | yenə 159 | yəni 160 | yetmiş 161 | yox 162 | yoxdur 163 | yoxsa 164 | yüz 165 | zaman -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/basque: -------------------------------------------------------------------------------- 1 | ahala 2 | aitzitik 3 | al 4 | ala 5 | alabadere 6 | alabaina 7 | alabaina 8 | aldiz 9 | alta 10 | amaitu 11 | amaitzeko 12 | anitz 13 | antzina 14 | arabera 15 | arabera 16 | arabera 17 | argi 18 | arratsaldero 19 | arte 20 | artean 21 | asko 22 | aspaldiko 23 | aurrera 24 | aurrera 25 | azkenez 26 | azkenik 27 | azkenik 28 | ba 29 | bada 30 | bada 31 | bada 32 | bada 33 | badarik 34 | badarik 35 | badarik 36 | badere 37 | bai 38 | baina 39 | baina 40 | baina 41 | baino 42 | baino 43 | baino 44 | baino 45 | baita 46 | baizik 47 | baldin 48 | baldin 49 | barren 50 | bat 51 | batean 52 | batean 53 | batean 54 | batean 55 | batek 56 | baten 57 | batera 58 | batez 59 | bati 60 | batzuei 61 | batzuek 62 | batzuetan 63 | batzuk 64 | bazen 65 | bederen 66 | bederik 67 | beharrez 68 | behiala 69 | behin 70 | behin 71 | behin 72 | behin 73 | behinik 74 | behinola 75 | behintzat 76 | bera 77 | beraiek 78 | beranduago 79 | berau 80 | berauek 81 | beraz 82 | beraz 83 | bere 84 | berean 85 | berebat 86 | berehala 87 | berori 88 | beroriek 89 | berriro 90 | berriz 91 | bertzalde 92 | bertzenaz 93 | bestalde 94 | beste 95 | bestela 96 | besterik 97 | bezain 98 | bezala 99 | bide 100 | bien 101 | bigarrenez 102 | bigarrenik 103 | bitartean 104 | bitartean 105 | bizkitartean 106 | bukaeran 107 | bukatzeko 108 | da 109 | dago 110 | dago 111 | dela 112 | dela 113 | dela 114 | delarik 115 | den 116 | dena 117 | dena 118 | dezadan 119 | dira 120 | ditu 121 | du 122 | dute 123 | edo 124 | edo 125 | edota 126 | egin 127 | egin 128 | egun 129 | egun 130 | egunean 131 | emateko 132 | era 133 | erdi 134 | ere 135 | ere 136 | ere 137 | ere 138 | ere 139 | esan 140 | esan 141 | esanak 142 | esandakoaren 143 | eta 144 | eta 145 | eta 146 | eta 147 | eta 148 | eta 149 | eurak 150 | ez 151 | ez 152 | ez 153 | eze 154 | ezen 155 | ezer 156 | ezezik 157 | ezik 158 | ezpabere 159 | ezpada 160 | ezpere 161 | ezperen 162 | ezta 163 | funtsean 164 | gabe 165 | gain 166 | gainera 167 | gainera 168 | gainerontzean 169 | gaur 170 | gero 171 | gero 172 | gero 173 | geroago 174 | gisa 175 | gu 176 | gutxi 177 | guzti 178 | guztia 179 | guztiz 180 | haatik 181 | haiei 182 | haiek 183 | haietan 184 | hain 185 | hainbeste 186 | hainbestez 187 | hala 188 | hala 189 | hala 190 | halaber 191 | halako 192 | halatan 193 | han 194 | handik 195 | hango 196 | hara 197 | hargatik 198 | hari 199 | hark 200 | hartan 201 | hartan 202 | hasi 203 | hasi 204 | hasiera 205 | hasieran 206 | hasteaz 207 | hasteko 208 | hasteko 209 | hau 210 | hau 211 | hau 212 | hau 213 | hau 214 | hau 215 | hauei 216 | hauek 217 | hauetan 218 | hemen 219 | hemendik 220 | hemengo 221 | hi 222 | hona 223 | honebestez 224 | honek 225 | honela 226 | honela 227 | honela 228 | honen 229 | honen 230 | honetan 231 | honetaz 232 | honi 233 | hor 234 | hori 235 | hori 236 | hori 237 | horiei 238 | horiek 239 | horietan 240 | horko 241 | horra 242 | horratik 243 | horregatik 244 | horregatik 245 | horrek 246 | horrela 247 | horrela 248 | horrela 249 | horren 250 | horrenbestez 251 | horretan 252 | horri 253 | hortaz 254 | hortaz 255 | hortik 256 | hura 257 | ikusi 258 | ikusi 259 | izan 260 | izan 261 | izan 262 | jarraituz 263 | kariaz 264 | kasuaz 265 | kontuan 266 | laburbilduz 267 | laburki 268 | laster 269 | laster 270 | lehen 271 | lehen 272 | lehen 273 | lehen 274 | lehenengo 275 | lehenengo 276 | lehenik 277 | lehen-lehenik 278 | litzateke 279 | medio 280 | mendean 281 | mundura 282 | nahiz 283 | ni 284 | noiz 285 | nola 286 | non 287 | nondik 288 | nongo 289 | nor 290 | nora 291 | on 292 | ondoren 293 | ondorio 294 | ondorioz 295 | ondorioz 296 | orain 297 | ordea 298 | orduan 299 | orduan 300 | orduan 301 | orduko 302 | ordura 303 | orobat 304 | ostean 305 | ostera 306 | osterantzean 307 | pentsatuz 308 | ustez 309 | ze 310 | zein 311 | zein 312 | zen 313 | zen 314 | zenbait 315 | zenbat 316 | zer 317 | zeren 318 | zergatik 319 | zergatik 320 | ziren 321 | zituen 322 | zu 323 | zuek 324 | zuen 325 | zuten 326 | zuzen 327 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/bengali: -------------------------------------------------------------------------------- 1 | অতএব 2 | অথচ 3 | অথবা 4 | অনুযায়ী 5 | অনেক 6 | অনেকে 7 | অনেকেই 8 | অন্তত 9 | অন্য 10 | অবধি 11 | অবশ্য 12 | অর্থাত 13 | আই 14 | আগামী 15 | আগে 16 | আগেই 17 | আছে 18 | আজ 19 | আদ্যভাগে 20 | আপনার 21 | আপনি 22 | আবার 23 | আমরা 24 | আমাকে 25 | আমাদের 26 | আমার 27 | আমি 28 | আর 29 | আরও 30 | ই 31 | ইত্যাদি 32 | ইহা 33 | উচিত 34 | উত্তর 35 | উনি 36 | উপর 37 | উপরে 38 | এ 39 | এঁদের 40 | এঁরা 41 | এই 42 | একই 43 | একটি 44 | একবার 45 | একে 46 | এক্ 47 | এখন 48 | এখনও 49 | এখানে 50 | এখানেই 51 | এটা 52 | এটাই 53 | এটি 54 | এত 55 | এতটাই 56 | এতে 57 | এদের 58 | এব 59 | এবং 60 | এবার 61 | এমন 62 | এমনকী 63 | এমনি 64 | এর 65 | এরা 66 | এল 67 | এস 68 | এসে 69 | ঐ 70 | ও 71 | ওঁদের 72 | ওঁর 73 | ওঁরা 74 | ওই 75 | ওকে 76 | ওখানে 77 | ওদের 78 | ওর 79 | ওরা 80 | কখনও 81 | কত 82 | কবে 83 | কমনে 84 | কয়েক 85 | কয়েকটি 86 | করছে 87 | করছেন 88 | করতে 89 | করবে 90 | করবেন 91 | করলে 92 | করলেন 93 | করা 94 | করাই 95 | করায় 96 | করার 97 | করি 98 | করিতে 99 | করিয়া 100 | করিয়ে 101 | করে 102 | করেই 103 | করেছিলেন 104 | করেছে 105 | করেছেন 106 | করেন 107 | কাউকে 108 | কাছ 109 | কাছে 110 | কাজ 111 | কাজে 112 | কারও 113 | কারণ 114 | কি 115 | কিংবা 116 | কিছু 117 | কিছুই 118 | কিন্তু 119 | কী 120 | কে 121 | কেউ 122 | কেউই 123 | কেখা 124 | কেন 125 | কোটি 126 | কোন 127 | কোনও 128 | কোনো 129 | ক্ষেত্রে 130 | কয়েক 131 | খুব 132 | গিয়ে 133 | গিয়েছে 134 | গিয়ে 135 | গুলি 136 | গেছে 137 | গেল 138 | গেলে 139 | গোটা 140 | চলে 141 | চান 142 | চায় 143 | চার 144 | চালু 145 | চেয়ে 146 | চেষ্টা 147 | ছাড়া 148 | ছাড়াও 149 | ছিল 150 | ছিলেন 151 | জন 152 | জনকে 153 | জনের 154 | জন্য 155 | জন্যওজে 156 | জানতে 157 | জানা 158 | জানানো 159 | জানায় 160 | জানিয়ে 161 | জানিয়েছে 162 | জে 163 | জ্নজন 164 | টি 165 | ঠিক 166 | তখন 167 | তত 168 | তথা 169 | তবু 170 | তবে 171 | তা 172 | তাঁকে 173 | তাঁদের 174 | তাঁর 175 | তাঁরা 176 | তাঁাহারা 177 | তাই 178 | তাও 179 | তাকে 180 | তাতে 181 | তাদের 182 | তার 183 | তারপর 184 | তারা 185 | তারৈ 186 | তাহলে 187 | তাহা 188 | তাহাতে 189 | তাহার 190 | তিনঐ 191 | তিনি 192 | তিনিও 193 | তুমি 194 | তুলে 195 | তেমন 196 | তো 197 | তোমার 198 | থাকবে 199 | থাকবেন 200 | থাকা 201 | থাকায় 202 | থাকে 203 | থাকেন 204 | থেকে 205 | থেকেই 206 | থেকেও 207 | দিকে 208 | দিতে 209 | দিন 210 | দিয়ে 211 | দিয়েছে 212 | দিয়েছেন 213 | দিলেন 214 | দু 215 | দুই 216 | দুটি 217 | দুটো 218 | দেওয়া 219 | দেওয়ার 220 | দেওয়া 221 | দেখতে 222 | দেখা 223 | দেখে 224 | দেন 225 | দেয় 226 | দ্বারা 227 | ধরা 228 | ধরে 229 | ধামার 230 | নতুন 231 | নয় 232 | না 233 | নাই 234 | নাকি 235 | নাগাদ 236 | নানা 237 | নিজে 238 | নিজেই 239 | নিজেদের 240 | নিজের 241 | নিতে 242 | নিয়ে 243 | নিয়ে 244 | নেই 245 | নেওয়া 246 | নেওয়ার 247 | নেওয়া 248 | নয় 249 | পক্ষে 250 | পর 251 | পরে 252 | পরেই 253 | পরেও 254 | পর্যন্ত 255 | পাওয়া 256 | পাচ 257 | পারি 258 | পারে 259 | পারেন 260 | পি 261 | পেয়ে 262 | পেয়্র্ 263 | প্রতি 264 | প্রথম 265 | প্রভৃতি 266 | প্রযন্ত 267 | প্রাথমিক 268 | প্রায় 269 | প্রায় 270 | ফলে 271 | ফিরে 272 | ফের 273 | বক্তব্য 274 | বদলে 275 | বন 276 | বরং 277 | বলতে 278 | বলল 279 | বললেন 280 | বলা 281 | বলে 282 | বলেছেন 283 | বলেন 284 | বসে 285 | বহু 286 | বা 287 | বাদে 288 | বার 289 | বি 290 | বিনা 291 | বিভিন্ন 292 | বিশেষ 293 | বিষয়টি 294 | বেশ 295 | বেশি 296 | ব্যবহার 297 | ব্যাপারে 298 | ভাবে 299 | ভাবেই 300 | মতো 301 | মতোই 302 | মধ্যভাগে 303 | মধ্যে 304 | মধ্যেই 305 | মধ্যেও 306 | মনে 307 | মাত্র 308 | মাধ্যমে 309 | মোট 310 | মোটেই 311 | যখন 312 | যত 313 | যতটা 314 | যথেষ্ট 315 | যদি 316 | যদিও 317 | যা 318 | যাঁর 319 | যাঁরা 320 | যাওয়া 321 | যাওয়ার 322 | যাওয়া 323 | যাকে 324 | যাচ্ছে 325 | যাতে 326 | যাদের 327 | যান 328 | যাবে 329 | যায় 330 | যার 331 | যারা 332 | যিনি 333 | যে 334 | যেখানে 335 | যেতে 336 | যেন 337 | যেমন 338 | র 339 | রকম 340 | রয়েছে 341 | রাখা 342 | রেখে 343 | লক্ষ 344 | শুধু 345 | শুরু 346 | সঙ্গে 347 | সঙ্গেও 348 | সব 349 | সবার 350 | সমস্ত 351 | সম্প্রতি 352 | সহ 353 | সহিত 354 | সাধারণ 355 | সামনে 356 | সি 357 | সুতরাং 358 | সে 359 | সেই 360 | সেখান 361 | সেখানে 362 | সেটা 363 | সেটাই 364 | সেটাও 365 | সেটি 366 | স্পষ্ট 367 | স্বয়ং 368 | হইতে 369 | হইবে 370 | হইয়া 371 | হওয়া 372 | হওয়ায় 373 | হওয়ার 374 | হচ্ছে 375 | হত 376 | হতে 377 | হতেই 378 | হন 379 | হবে 380 | হবেন 381 | হয় 382 | হয়তো 383 | হয়নি 384 | হয়ে 385 | হয়েই 386 | হয়েছিল 387 | হয়েছে 388 | হয়েছেন 389 | হল 390 | হলে 391 | হলেই 392 | হলেও 393 | হলো 394 | হাজার 395 | হিসাবে 396 | হৈলে 397 | হোক 398 | হয় -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/catalan: -------------------------------------------------------------------------------- 1 | a 2 | abans 3 | ací 4 | ah 5 | així 6 | això 7 | al 8 | aleshores 9 | algun 10 | alguna 11 | algunes 12 | alguns 13 | alhora 14 | allà 15 | allí 16 | allò 17 | als 18 | altra 19 | altre 20 | altres 21 | amb 22 | ambdues 23 | ambdós 24 | anar 25 | ans 26 | apa 27 | aquell 28 | aquella 29 | aquelles 30 | aquells 31 | aquest 32 | aquesta 33 | aquestes 34 | aquests 35 | aquí 36 | baix 37 | bastant 38 | bé 39 | cada 40 | cadascuna 41 | cadascunes 42 | cadascuns 43 | cadascú 44 | com 45 | consegueixo 46 | conseguim 47 | conseguir 48 | consigueix 49 | consigueixen 50 | consigueixes 51 | contra 52 | d'un 53 | d'una 54 | d'unes 55 | d'uns 56 | dalt 57 | de 58 | del 59 | dels 60 | des 61 | des de 62 | després 63 | dins 64 | dintre 65 | donat 66 | doncs 67 | durant 68 | e 69 | eh 70 | el 71 | elles 72 | ells 73 | els 74 | em 75 | en 76 | encara 77 | ens 78 | entre 79 | era 80 | erem 81 | eren 82 | eres 83 | es 84 | esta 85 | estan 86 | estat 87 | estava 88 | estaven 89 | estem 90 | esteu 91 | estic 92 | està 93 | estàvem 94 | estàveu 95 | et 96 | etc 97 | ets 98 | fa 99 | faig 100 | fan 101 | fas 102 | fem 103 | fer 104 | feu 105 | fi 106 | fins 107 | fora 108 | gairebé 109 | ha 110 | han 111 | has 112 | haver 113 | havia 114 | he 115 | hem 116 | heu 117 | hi 118 | ho 119 | i 120 | igual 121 | iguals 122 | inclòs 123 | ja 124 | jo 125 | l'hi 126 | la 127 | les 128 | li 129 | li'n 130 | llarg 131 | llavors 132 | m'he 133 | ma 134 | mal 135 | malgrat 136 | mateix 137 | mateixa 138 | mateixes 139 | mateixos 140 | me 141 | mentre 142 | meu 143 | meus 144 | meva 145 | meves 146 | mode 147 | molt 148 | molta 149 | moltes 150 | molts 151 | mon 152 | mons 153 | més 154 | n'he 155 | n'hi 156 | ne 157 | ni 158 | no 159 | nogensmenys 160 | només 161 | nosaltres 162 | nostra 163 | nostre 164 | nostres 165 | o 166 | oh 167 | oi 168 | on 169 | pas 170 | pel 171 | pels 172 | per 173 | per que 174 | perquè 175 | però 176 | poc 177 | poca 178 | pocs 179 | podem 180 | poden 181 | poder 182 | podeu 183 | poques 184 | potser 185 | primer 186 | propi 187 | puc 188 | qual 189 | quals 190 | quan 191 | quant 192 | que 193 | quelcom 194 | qui 195 | quin 196 | quina 197 | quines 198 | quins 199 | què 200 | s'ha 201 | s'han 202 | sa 203 | sabem 204 | saben 205 | saber 206 | sabeu 207 | sap 208 | saps 209 | semblant 210 | semblants 211 | sense 212 | ser 213 | ses 214 | seu 215 | seus 216 | seva 217 | seves 218 | si 219 | sobre 220 | sobretot 221 | soc 222 | solament 223 | sols 224 | som 225 | son 226 | sons 227 | sota 228 | sou 229 | sóc 230 | són 231 | t'ha 232 | t'han 233 | t'he 234 | ta 235 | tal 236 | també 237 | tampoc 238 | tan 239 | tant 240 | tanta 241 | tantes 242 | te 243 | tene 244 | tenim 245 | tenir 246 | teniu 247 | teu 248 | teus 249 | teva 250 | teves 251 | tinc 252 | ton 253 | tons 254 | tot 255 | tota 256 | totes 257 | tots 258 | un 259 | una 260 | unes 261 | uns 262 | us 263 | va 264 | vaig 265 | vam 266 | van 267 | vas 268 | veu 269 | vosaltres 270 | vostra 271 | vostre 272 | vostres 273 | érem 274 | éreu 275 | és 276 | éssent 277 | últim 278 | ús -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/chinese: -------------------------------------------------------------------------------- 1 | 一 2 | 一下 3 | 一些 4 | 一切 5 | 一则 6 | 一天 7 | 一定 8 | 一方面 9 | 一旦 10 | 一时 11 | 一来 12 | 一样 13 | 一次 14 | 一片 15 | 一直 16 | 一致 17 | 一般 18 | 一起 19 | 一边 20 | 一面 21 | 万一 22 | 上下 23 | 上升 24 | 上去 25 | 上来 26 | 上述 27 | 上面 28 | 下列 29 | 下去 30 | 下来 31 | 下面 32 | 不一 33 | 不久 34 | 不仅 35 | 不会 36 | 不但 37 | 不光 38 | 不单 39 | 不变 40 | 不只 41 | 不可 42 | 不同 43 | 不够 44 | 不如 45 | 不得 46 | 不怕 47 | 不惟 48 | 不成 49 | 不拘 50 | 不敢 51 | 不断 52 | 不是 53 | 不比 54 | 不然 55 | 不特 56 | 不独 57 | 不管 58 | 不能 59 | 不要 60 | 不论 61 | 不足 62 | 不过 63 | 不问 64 | 与 65 | 与其 66 | 与否 67 | 与此同时 68 | 专门 69 | 且 70 | 两者 71 | 严格 72 | 严重 73 | 个 74 | 个人 75 | 个别 76 | 中小 77 | 中间 78 | 丰富 79 | 临 80 | 为 81 | 为主 82 | 为了 83 | 为什么 84 | 为什麽 85 | 为何 86 | 为着 87 | 主张 88 | 主要 89 | 举行 90 | 乃 91 | 乃至 92 | 么 93 | 之 94 | 之一 95 | 之前 96 | 之后 97 | 之後 98 | 之所以 99 | 之类 100 | 乌乎 101 | 乎 102 | 乘 103 | 也 104 | 也好 105 | 也是 106 | 也罢 107 | 了 108 | 了解 109 | 争取 110 | 于 111 | 于是 112 | 于是乎 113 | 云云 114 | 互相 115 | 产生 116 | 人们 117 | 人家 118 | 什么 119 | 什么样 120 | 什麽 121 | 今后 122 | 今天 123 | 今年 124 | 今後 125 | 仍然 126 | 从 127 | 从事 128 | 从而 129 | 他 130 | 他人 131 | 他们 132 | 他的 133 | 代替 134 | 以 135 | 以上 136 | 以下 137 | 以为 138 | 以便 139 | 以免 140 | 以前 141 | 以及 142 | 以后 143 | 以外 144 | 以後 145 | 以来 146 | 以至 147 | 以至于 148 | 以致 149 | 们 150 | 任 151 | 任何 152 | 任凭 153 | 任务 154 | 企图 155 | 伟大 156 | 似乎 157 | 似的 158 | 但 159 | 但是 160 | 何 161 | 何况 162 | 何处 163 | 何时 164 | 作为 165 | 你 166 | 你们 167 | 你的 168 | 使得 169 | 使用 170 | 例如 171 | 依 172 | 依照 173 | 依靠 174 | 促进 175 | 保持 176 | 俺 177 | 俺们 178 | 倘 179 | 倘使 180 | 倘或 181 | 倘然 182 | 倘若 183 | 假使 184 | 假如 185 | 假若 186 | 做到 187 | 像 188 | 允许 189 | 充分 190 | 先后 191 | 先後 192 | 先生 193 | 全部 194 | 全面 195 | 兮 196 | 共同 197 | 关于 198 | 其 199 | 其一 200 | 其中 201 | 其二 202 | 其他 203 | 其余 204 | 其它 205 | 其实 206 | 其次 207 | 具体 208 | 具体地说 209 | 具体说来 210 | 具有 211 | 再者 212 | 再说 213 | 冒 214 | 冲 215 | 决定 216 | 况且 217 | 准备 218 | 几 219 | 几乎 220 | 几时 221 | 凭 222 | 凭借 223 | 出去 224 | 出来 225 | 出现 226 | 分别 227 | 则 228 | 别 229 | 别的 230 | 别说 231 | 到 232 | 前后 233 | 前者 234 | 前进 235 | 前面 236 | 加之 237 | 加以 238 | 加入 239 | 加强 240 | 十分 241 | 即 242 | 即令 243 | 即使 244 | 即便 245 | 即或 246 | 即若 247 | 却不 248 | 原来 249 | 又 250 | 及 251 | 及其 252 | 及时 253 | 及至 254 | 双方 255 | 反之 256 | 反应 257 | 反映 258 | 反过来 259 | 反过来说 260 | 取得 261 | 受到 262 | 变成 263 | 另 264 | 另一方面 265 | 另外 266 | 只是 267 | 只有 268 | 只要 269 | 只限 270 | 叫 271 | 叫做 272 | 召开 273 | 叮咚 274 | 可 275 | 可以 276 | 可是 277 | 可能 278 | 可见 279 | 各 280 | 各个 281 | 各人 282 | 各位 283 | 各地 284 | 各种 285 | 各级 286 | 各自 287 | 合理 288 | 同 289 | 同一 290 | 同时 291 | 同样 292 | 后来 293 | 后面 294 | 向 295 | 向着 296 | 吓 297 | 吗 298 | 否则 299 | 吧 300 | 吧哒 301 | 吱 302 | 呀 303 | 呃 304 | 呕 305 | 呗 306 | 呜 307 | 呜呼 308 | 呢 309 | 周围 310 | 呵 311 | 呸 312 | 呼哧 313 | 咋 314 | 和 315 | 咚 316 | 咦 317 | 咱 318 | 咱们 319 | 咳 320 | 哇 321 | 哈 322 | 哈哈 323 | 哉 324 | 哎 325 | 哎呀 326 | 哎哟 327 | 哗 328 | 哟 329 | 哦 330 | 哩 331 | 哪 332 | 哪个 333 | 哪些 334 | 哪儿 335 | 哪天 336 | 哪年 337 | 哪怕 338 | 哪样 339 | 哪边 340 | 哪里 341 | 哼 342 | 哼唷 343 | 唉 344 | 啊 345 | 啐 346 | 啥 347 | 啦 348 | 啪达 349 | 喂 350 | 喏 351 | 喔唷 352 | 嗡嗡 353 | 嗬 354 | 嗯 355 | 嗳 356 | 嘎 357 | 嘎登 358 | 嘘 359 | 嘛 360 | 嘻 361 | 嘿 362 | 因 363 | 因为 364 | 因此 365 | 因而 366 | 固然 367 | 在 368 | 在下 369 | 地 370 | 坚决 371 | 坚持 372 | 基本 373 | 处理 374 | 复杂 375 | 多 376 | 多少 377 | 多数 378 | 多次 379 | 大力 380 | 大多数 381 | 大大 382 | 大家 383 | 大批 384 | 大约 385 | 大量 386 | 失去 387 | 她 388 | 她们 389 | 她的 390 | 好的 391 | 好象 392 | 如 393 | 如上所述 394 | 如下 395 | 如何 396 | 如其 397 | 如果 398 | 如此 399 | 如若 400 | 存在 401 | 宁 402 | 宁可 403 | 宁愿 404 | 宁肯 405 | 它 406 | 它们 407 | 它们的 408 | 它的 409 | 安全 410 | 完全 411 | 完成 412 | 实现 413 | 实际 414 | 宣布 415 | 容易 416 | 密切 417 | 对 418 | 对于 419 | 对应 420 | 将 421 | 少数 422 | 尔后 423 | 尚且 424 | 尤其 425 | 就 426 | 就是 427 | 就是说 428 | 尽 429 | 尽管 430 | 属于 431 | 岂但 432 | 左右 433 | 巨大 434 | 巩固 435 | 己 436 | 已经 437 | 帮助 438 | 常常 439 | 并 440 | 并不 441 | 并不是 442 | 并且 443 | 并没有 444 | 广大 445 | 广泛 446 | 应当 447 | 应用 448 | 应该 449 | 开外 450 | 开始 451 | 开展 452 | 引起 453 | 强烈 454 | 强调 455 | 归 456 | 当 457 | 当前 458 | 当时 459 | 当然 460 | 当着 461 | 形成 462 | 彻底 463 | 彼 464 | 彼此 465 | 往 466 | 往往 467 | 待 468 | 後来 469 | 後面 470 | 得 471 | 得出 472 | 得到 473 | 心里 474 | 必然 475 | 必要 476 | 必须 477 | 怎 478 | 怎么 479 | 怎么办 480 | 怎么样 481 | 怎样 482 | 怎麽 483 | 总之 484 | 总是 485 | 总的来看 486 | 总的来说 487 | 总的说来 488 | 总结 489 | 总而言之 490 | 恰恰相反 491 | 您 492 | 意思 493 | 愿意 494 | 慢说 495 | 成为 496 | 我 497 | 我们 498 | 我的 499 | 或 500 | 或是 501 | 或者 502 | 战斗 503 | 所 504 | 所以 505 | 所有 506 | 所谓 507 | 打 508 | 扩大 509 | 把 510 | 抑或 511 | 拿 512 | 按 513 | 按照 514 | 换句话说 515 | 换言之 516 | 据 517 | 掌握 518 | 接着 519 | 接著 520 | 故 521 | 故此 522 | 整个 523 | 方便 524 | 方面 525 | 旁人 526 | 无宁 527 | 无法 528 | 无论 529 | 既 530 | 既是 531 | 既然 532 | 时候 533 | 明显 534 | 明确 535 | 是 536 | 是否 537 | 是的 538 | 显然 539 | 显著 540 | 普通 541 | 普遍 542 | 更加 543 | 曾经 544 | 替 545 | 最后 546 | 最大 547 | 最好 548 | 最後 549 | 最近 550 | 最高 551 | 有 552 | 有些 553 | 有关 554 | 有利 555 | 有力 556 | 有所 557 | 有效 558 | 有时 559 | 有点 560 | 有的 561 | 有着 562 | 有著 563 | 望 564 | 朝 565 | 朝着 566 | 本 567 | 本着 568 | 来 569 | 来着 570 | 极了 571 | 构成 572 | 果然 573 | 果真 574 | 某 575 | 某个 576 | 某些 577 | 根据 578 | 根本 579 | 欢迎 580 | 正在 581 | 正如 582 | 正常 583 | 此 584 | 此外 585 | 此时 586 | 此间 587 | 毋宁 588 | 每 589 | 每个 590 | 每天 591 | 每年 592 | 每当 593 | 比 594 | 比如 595 | 比方 596 | 比较 597 | 毫不 598 | 没有 599 | 沿 600 | 沿着 601 | 注意 602 | 深入 603 | 清楚 604 | 满足 605 | 漫说 606 | 焉 607 | 然则 608 | 然后 609 | 然後 610 | 然而 611 | 照 612 | 照着 613 | 特别是 614 | 特殊 615 | 特点 616 | 现代 617 | 现在 618 | 甚么 619 | 甚而 620 | 甚至 621 | 用 622 | 由 623 | 由于 624 | 由此可见 625 | 的 626 | 的话 627 | 目前 628 | 直到 629 | 直接 630 | 相似 631 | 相信 632 | 相反 633 | 相同 634 | 相对 635 | 相对而言 636 | 相应 637 | 相当 638 | 相等 639 | 省得 640 | 看出 641 | 看到 642 | 看来 643 | 看看 644 | 看见 645 | 真是 646 | 真正 647 | 着 648 | 着呢 649 | 矣 650 | 知道 651 | 确定 652 | 离 653 | 积极 654 | 移动 655 | 突出 656 | 突然 657 | 立即 658 | 第 659 | 等 660 | 等等 661 | 管 662 | 紧接着 663 | 纵 664 | 纵令 665 | 纵使 666 | 纵然 667 | 练习 668 | 组成 669 | 经 670 | 经常 671 | 经过 672 | 结合 673 | 结果 674 | 给 675 | 绝对 676 | 继续 677 | 继而 678 | 维持 679 | 综上所述 680 | 罢了 681 | 考虑 682 | 者 683 | 而 684 | 而且 685 | 而况 686 | 而外 687 | 而已 688 | 而是 689 | 而言 690 | 联系 691 | 能 692 | 能否 693 | 能够 694 | 腾 695 | 自 696 | 自个儿 697 | 自从 698 | 自各儿 699 | 自家 700 | 自己 701 | 自身 702 | 至 703 | 至于 704 | 良好 705 | 若 706 | 若是 707 | 若非 708 | 范围 709 | 莫若 710 | 获得 711 | 虽 712 | 虽则 713 | 虽然 714 | 虽说 715 | 行为 716 | 行动 717 | 表明 718 | 表示 719 | 被 720 | 要 721 | 要不 722 | 要不是 723 | 要不然 724 | 要么 725 | 要是 726 | 要求 727 | 规定 728 | 觉得 729 | 认为 730 | 认真 731 | 认识 732 | 让 733 | 许多 734 | 论 735 | 设使 736 | 设若 737 | 该 738 | 说明 739 | 诸位 740 | 谁 741 | 谁知 742 | 赶 743 | 起 744 | 起来 745 | 起见 746 | 趁 747 | 趁着 748 | 越是 749 | 跟 750 | 转动 751 | 转变 752 | 转贴 753 | 较 754 | 较之 755 | 边 756 | 达到 757 | 迅速 758 | 过 759 | 过去 760 | 过来 761 | 运用 762 | 还是 763 | 还有 764 | 这 765 | 这个 766 | 这么 767 | 这么些 768 | 这么样 769 | 这么点儿 770 | 这些 771 | 这会儿 772 | 这儿 773 | 这就是说 774 | 这时 775 | 这样 776 | 这点 777 | 这种 778 | 这边 779 | 这里 780 | 这麽 781 | 进入 782 | 进步 783 | 进而 784 | 进行 785 | 连 786 | 连同 787 | 适应 788 | 适当 789 | 适用 790 | 逐步 791 | 逐渐 792 | 通常 793 | 通过 794 | 造成 795 | 遇到 796 | 遭到 797 | 避免 798 | 那 799 | 那个 800 | 那么 801 | 那么些 802 | 那么样 803 | 那些 804 | 那会儿 805 | 那儿 806 | 那时 807 | 那样 808 | 那边 809 | 那里 810 | 那麽 811 | 部分 812 | 鄙人 813 | 采取 814 | 里面 815 | 重大 816 | 重新 817 | 重要 818 | 鉴于 819 | 问题 820 | 防止 821 | 阿 822 | 附近 823 | 限制 824 | 除 825 | 除了 826 | 除此之外 827 | 除非 828 | 随 829 | 随着 830 | 随著 831 | 集中 832 | 需要 833 | 非但 834 | 非常 835 | 非徒 836 | 靠 837 | 顺 838 | 顺着 839 | 首先 840 | 高兴 841 | 是不是 842 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/danish: -------------------------------------------------------------------------------- 1 | og 2 | i 3 | jeg 4 | det 5 | at 6 | en 7 | den 8 | til 9 | er 10 | som 11 | på 12 | de 13 | med 14 | han 15 | af 16 | for 17 | ikke 18 | der 19 | var 20 | mig 21 | sig 22 | men 23 | et 24 | har 25 | om 26 | vi 27 | min 28 | havde 29 | ham 30 | hun 31 | nu 32 | over 33 | da 34 | fra 35 | du 36 | ud 37 | sin 38 | dem 39 | os 40 | op 41 | man 42 | hans 43 | hvor 44 | eller 45 | hvad 46 | skal 47 | selv 48 | her 49 | alle 50 | vil 51 | blev 52 | kunne 53 | ind 54 | når 55 | være 56 | dog 57 | noget 58 | ville 59 | jo 60 | deres 61 | efter 62 | ned 63 | skulle 64 | denne 65 | end 66 | dette 67 | mit 68 | også 69 | under 70 | have 71 | dig 72 | anden 73 | hende 74 | mine 75 | alt 76 | meget 77 | sit 78 | sine 79 | vor 80 | mod 81 | disse 82 | hvis 83 | din 84 | nogle 85 | hos 86 | blive 87 | mange 88 | ad 89 | bliver 90 | hendes 91 | været 92 | thi 93 | jer 94 | sådan 95 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/dutch: -------------------------------------------------------------------------------- 1 | de 2 | en 3 | van 4 | ik 5 | te 6 | dat 7 | die 8 | in 9 | een 10 | hij 11 | het 12 | niet 13 | zijn 14 | is 15 | was 16 | op 17 | aan 18 | met 19 | als 20 | voor 21 | had 22 | er 23 | maar 24 | om 25 | hem 26 | dan 27 | zou 28 | of 29 | wat 30 | mijn 31 | men 32 | dit 33 | zo 34 | door 35 | over 36 | ze 37 | zich 38 | bij 39 | ook 40 | tot 41 | je 42 | mij 43 | uit 44 | der 45 | daar 46 | haar 47 | naar 48 | heb 49 | hoe 50 | heeft 51 | hebben 52 | deze 53 | u 54 | want 55 | nog 56 | zal 57 | me 58 | zij 59 | nu 60 | ge 61 | geen 62 | omdat 63 | iets 64 | worden 65 | toch 66 | al 67 | waren 68 | veel 69 | meer 70 | doen 71 | toen 72 | moet 73 | ben 74 | zonder 75 | kan 76 | hun 77 | dus 78 | alles 79 | onder 80 | ja 81 | eens 82 | hier 83 | wie 84 | werd 85 | altijd 86 | doch 87 | wordt 88 | wezen 89 | kunnen 90 | ons 91 | zelf 92 | tegen 93 | na 94 | reeds 95 | wil 96 | kon 97 | niets 98 | uw 99 | iemand 100 | geweest 101 | andere 102 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/english: -------------------------------------------------------------------------------- 1 | i 2 | me 3 | my 4 | myself 5 | we 6 | our 7 | ours 8 | ourselves 9 | you 10 | you're 11 | you've 12 | you'll 13 | you'd 14 | your 15 | yours 16 | yourself 17 | yourselves 18 | he 19 | him 20 | his 21 | himself 22 | she 23 | she's 24 | her 25 | hers 26 | herself 27 | it 28 | it's 29 | its 30 | itself 31 | they 32 | them 33 | their 34 | theirs 35 | themselves 36 | what 37 | which 38 | who 39 | whom 40 | this 41 | that 42 | that'll 43 | these 44 | those 45 | am 46 | is 47 | are 48 | was 49 | were 50 | be 51 | been 52 | being 53 | have 54 | has 55 | had 56 | having 57 | do 58 | does 59 | did 60 | doing 61 | a 62 | an 63 | the 64 | and 65 | but 66 | if 67 | or 68 | because 69 | as 70 | until 71 | while 72 | of 73 | at 74 | by 75 | for 76 | with 77 | about 78 | against 79 | between 80 | into 81 | through 82 | during 83 | before 84 | after 85 | above 86 | below 87 | to 88 | from 89 | up 90 | down 91 | in 92 | out 93 | on 94 | off 95 | over 96 | under 97 | again 98 | further 99 | then 100 | once 101 | here 102 | there 103 | when 104 | where 105 | why 106 | how 107 | all 108 | any 109 | both 110 | each 111 | few 112 | more 113 | most 114 | other 115 | some 116 | such 117 | no 118 | nor 119 | not 120 | only 121 | own 122 | same 123 | so 124 | than 125 | too 126 | very 127 | s 128 | t 129 | can 130 | will 131 | just 132 | don 133 | don't 134 | should 135 | should've 136 | now 137 | d 138 | ll 139 | m 140 | o 141 | re 142 | ve 143 | y 144 | ain 145 | aren 146 | aren't 147 | couldn 148 | couldn't 149 | didn 150 | didn't 151 | doesn 152 | doesn't 153 | hadn 154 | hadn't 155 | hasn 156 | hasn't 157 | haven 158 | haven't 159 | isn 160 | isn't 161 | ma 162 | mightn 163 | mightn't 164 | mustn 165 | mustn't 166 | needn 167 | needn't 168 | shan 169 | shan't 170 | shouldn 171 | shouldn't 172 | wasn 173 | wasn't 174 | weren 175 | weren't 176 | won 177 | won't 178 | wouldn 179 | wouldn't 180 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/finnish: -------------------------------------------------------------------------------- 1 | olla 2 | olen 3 | olet 4 | on 5 | olemme 6 | olette 7 | ovat 8 | ole 9 | oli 10 | olisi 11 | olisit 12 | olisin 13 | olisimme 14 | olisitte 15 | olisivat 16 | olit 17 | olin 18 | olimme 19 | olitte 20 | olivat 21 | ollut 22 | olleet 23 | en 24 | et 25 | ei 26 | emme 27 | ette 28 | eivät 29 | minä 30 | minun 31 | minut 32 | minua 33 | minussa 34 | minusta 35 | minuun 36 | minulla 37 | minulta 38 | minulle 39 | sinä 40 | sinun 41 | sinut 42 | sinua 43 | sinussa 44 | sinusta 45 | sinuun 46 | sinulla 47 | sinulta 48 | sinulle 49 | hän 50 | hänen 51 | hänet 52 | häntä 53 | hänessä 54 | hänestä 55 | häneen 56 | hänellä 57 | häneltä 58 | hänelle 59 | me 60 | meidän 61 | meidät 62 | meitä 63 | meissä 64 | meistä 65 | meihin 66 | meillä 67 | meiltä 68 | meille 69 | te 70 | teidän 71 | teidät 72 | teitä 73 | teissä 74 | teistä 75 | teihin 76 | teillä 77 | teiltä 78 | teille 79 | he 80 | heidän 81 | heidät 82 | heitä 83 | heissä 84 | heistä 85 | heihin 86 | heillä 87 | heiltä 88 | heille 89 | tämä 90 | tämän 91 | tätä 92 | tässä 93 | tästä 94 | tähän 95 | tallä 96 | tältä 97 | tälle 98 | tänä 99 | täksi 100 | tuo 101 | tuon 102 | tuotä 103 | tuossa 104 | tuosta 105 | tuohon 106 | tuolla 107 | tuolta 108 | tuolle 109 | tuona 110 | tuoksi 111 | se 112 | sen 113 | sitä 114 | siinä 115 | siitä 116 | siihen 117 | sillä 118 | siltä 119 | sille 120 | sinä 121 | siksi 122 | nämä 123 | näiden 124 | näitä 125 | näissä 126 | näistä 127 | näihin 128 | näillä 129 | näiltä 130 | näille 131 | näinä 132 | näiksi 133 | nuo 134 | noiden 135 | noita 136 | noissa 137 | noista 138 | noihin 139 | noilla 140 | noilta 141 | noille 142 | noina 143 | noiksi 144 | ne 145 | niiden 146 | niitä 147 | niissä 148 | niistä 149 | niihin 150 | niillä 151 | niiltä 152 | niille 153 | niinä 154 | niiksi 155 | kuka 156 | kenen 157 | kenet 158 | ketä 159 | kenessä 160 | kenestä 161 | keneen 162 | kenellä 163 | keneltä 164 | kenelle 165 | kenenä 166 | keneksi 167 | ketkä 168 | keiden 169 | ketkä 170 | keitä 171 | keissä 172 | keistä 173 | keihin 174 | keillä 175 | keiltä 176 | keille 177 | keinä 178 | keiksi 179 | mikä 180 | minkä 181 | minkä 182 | mitä 183 | missä 184 | mistä 185 | mihin 186 | millä 187 | miltä 188 | mille 189 | minä 190 | miksi 191 | mitkä 192 | joka 193 | jonka 194 | jota 195 | jossa 196 | josta 197 | johon 198 | jolla 199 | jolta 200 | jolle 201 | jona 202 | joksi 203 | jotka 204 | joiden 205 | joita 206 | joissa 207 | joista 208 | joihin 209 | joilla 210 | joilta 211 | joille 212 | joina 213 | joiksi 214 | että 215 | ja 216 | jos 217 | koska 218 | kuin 219 | mutta 220 | niin 221 | sekä 222 | sillä 223 | tai 224 | vaan 225 | vai 226 | vaikka 227 | kanssa 228 | mukaan 229 | noin 230 | poikki 231 | yli 232 | kun 233 | niin 234 | nyt 235 | itse 236 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/french: -------------------------------------------------------------------------------- 1 | au 2 | aux 3 | avec 4 | ce 5 | ces 6 | dans 7 | de 8 | des 9 | du 10 | elle 11 | en 12 | et 13 | eux 14 | il 15 | ils 16 | je 17 | la 18 | le 19 | les 20 | leur 21 | lui 22 | ma 23 | mais 24 | me 25 | même 26 | mes 27 | moi 28 | mon 29 | ne 30 | nos 31 | notre 32 | nous 33 | on 34 | ou 35 | par 36 | pas 37 | pour 38 | qu 39 | que 40 | qui 41 | sa 42 | se 43 | ses 44 | son 45 | sur 46 | ta 47 | te 48 | tes 49 | toi 50 | ton 51 | tu 52 | un 53 | une 54 | vos 55 | votre 56 | vous 57 | c 58 | d 59 | j 60 | l 61 | à 62 | m 63 | n 64 | s 65 | t 66 | y 67 | été 68 | étée 69 | étées 70 | étés 71 | étant 72 | étante 73 | étants 74 | étantes 75 | suis 76 | es 77 | est 78 | sommes 79 | êtes 80 | sont 81 | serai 82 | seras 83 | sera 84 | serons 85 | serez 86 | seront 87 | serais 88 | serait 89 | serions 90 | seriez 91 | seraient 92 | étais 93 | était 94 | étions 95 | étiez 96 | étaient 97 | fus 98 | fut 99 | fûmes 100 | fûtes 101 | furent 102 | sois 103 | soit 104 | soyons 105 | soyez 106 | soient 107 | fusse 108 | fusses 109 | fût 110 | fussions 111 | fussiez 112 | fussent 113 | ayant 114 | ayante 115 | ayantes 116 | ayants 117 | eu 118 | eue 119 | eues 120 | eus 121 | ai 122 | as 123 | avons 124 | avez 125 | ont 126 | aurai 127 | auras 128 | aura 129 | aurons 130 | aurez 131 | auront 132 | aurais 133 | aurait 134 | aurions 135 | auriez 136 | auraient 137 | avais 138 | avait 139 | avions 140 | aviez 141 | avaient 142 | eut 143 | eûmes 144 | eûtes 145 | eurent 146 | aie 147 | aies 148 | ait 149 | ayons 150 | ayez 151 | aient 152 | eusse 153 | eusses 154 | eût 155 | eussions 156 | eussiez 157 | eussent 158 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/german: -------------------------------------------------------------------------------- 1 | aber 2 | alle 3 | allem 4 | allen 5 | aller 6 | alles 7 | als 8 | also 9 | am 10 | an 11 | ander 12 | andere 13 | anderem 14 | anderen 15 | anderer 16 | anderes 17 | anderm 18 | andern 19 | anderr 20 | anders 21 | auch 22 | auf 23 | aus 24 | bei 25 | bin 26 | bis 27 | bist 28 | da 29 | damit 30 | dann 31 | der 32 | den 33 | des 34 | dem 35 | die 36 | das 37 | dass 38 | daß 39 | derselbe 40 | derselben 41 | denselben 42 | desselben 43 | demselben 44 | dieselbe 45 | dieselben 46 | dasselbe 47 | dazu 48 | dein 49 | deine 50 | deinem 51 | deinen 52 | deiner 53 | deines 54 | denn 55 | derer 56 | dessen 57 | dich 58 | dir 59 | du 60 | dies 61 | diese 62 | diesem 63 | diesen 64 | dieser 65 | dieses 66 | doch 67 | dort 68 | durch 69 | ein 70 | eine 71 | einem 72 | einen 73 | einer 74 | eines 75 | einig 76 | einige 77 | einigem 78 | einigen 79 | einiger 80 | einiges 81 | einmal 82 | er 83 | ihn 84 | ihm 85 | es 86 | etwas 87 | euer 88 | eure 89 | eurem 90 | euren 91 | eurer 92 | eures 93 | für 94 | gegen 95 | gewesen 96 | hab 97 | habe 98 | haben 99 | hat 100 | hatte 101 | hatten 102 | hier 103 | hin 104 | hinter 105 | ich 106 | mich 107 | mir 108 | ihr 109 | ihre 110 | ihrem 111 | ihren 112 | ihrer 113 | ihres 114 | euch 115 | im 116 | in 117 | indem 118 | ins 119 | ist 120 | jede 121 | jedem 122 | jeden 123 | jeder 124 | jedes 125 | jene 126 | jenem 127 | jenen 128 | jener 129 | jenes 130 | jetzt 131 | kann 132 | kein 133 | keine 134 | keinem 135 | keinen 136 | keiner 137 | keines 138 | können 139 | könnte 140 | machen 141 | man 142 | manche 143 | manchem 144 | manchen 145 | mancher 146 | manches 147 | mein 148 | meine 149 | meinem 150 | meinen 151 | meiner 152 | meines 153 | mit 154 | muss 155 | musste 156 | nach 157 | nicht 158 | nichts 159 | noch 160 | nun 161 | nur 162 | ob 163 | oder 164 | ohne 165 | sehr 166 | sein 167 | seine 168 | seinem 169 | seinen 170 | seiner 171 | seines 172 | selbst 173 | sich 174 | sie 175 | ihnen 176 | sind 177 | so 178 | solche 179 | solchem 180 | solchen 181 | solcher 182 | solches 183 | soll 184 | sollte 185 | sondern 186 | sonst 187 | über 188 | um 189 | und 190 | uns 191 | unsere 192 | unserem 193 | unseren 194 | unser 195 | unseres 196 | unter 197 | viel 198 | vom 199 | von 200 | vor 201 | während 202 | war 203 | waren 204 | warst 205 | was 206 | weg 207 | weil 208 | weiter 209 | welche 210 | welchem 211 | welchen 212 | welcher 213 | welches 214 | wenn 215 | werde 216 | werden 217 | wie 218 | wieder 219 | will 220 | wir 221 | wird 222 | wirst 223 | wo 224 | wollen 225 | wollte 226 | würde 227 | würden 228 | zu 229 | zum 230 | zur 231 | zwar 232 | zwischen 233 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/greek: -------------------------------------------------------------------------------- 1 | αλλα 2 | αν 3 | αντι 4 | απο 5 | αυτα 6 | αυτεσ 7 | αυτη 8 | αυτο 9 | αυτοι 10 | αυτοσ 11 | αυτουσ 12 | αυτων 13 | αἱ 14 | αἳ 15 | αἵ 16 | αὐτόσ 17 | αὐτὸς 18 | αὖ 19 | γάρ 20 | γα 21 | γα^ 22 | γε 23 | για 24 | γοῦν 25 | γὰρ 26 | δ' 27 | δέ 28 | δή 29 | δαί 30 | δαίσ 31 | δαὶ 32 | δαὶς 33 | δε 34 | δεν 35 | δι' 36 | διά 37 | διὰ 38 | δὲ 39 | δὴ 40 | δ’ 41 | εαν 42 | ειμαι 43 | ειμαστε 44 | ειναι 45 | εισαι 46 | ειστε 47 | εκεινα 48 | εκεινεσ 49 | εκεινη 50 | εκεινο 51 | εκεινοι 52 | εκεινοσ 53 | εκεινουσ 54 | εκεινων 55 | ενω 56 | επ 57 | επι 58 | εἰ 59 | εἰμί 60 | εἰμὶ 61 | εἰς 62 | εἰσ 63 | εἴ 64 | εἴμι 65 | εἴτε 66 | η 67 | θα 68 | ισωσ 69 | κ 70 | καί 71 | καίτοι 72 | καθ 73 | και 74 | κατ 75 | κατά 76 | κατα 77 | κατὰ 78 | καὶ 79 | κι 80 | κἀν 81 | κἂν 82 | μέν 83 | μή 84 | μήτε 85 | μα 86 | με 87 | μεθ 88 | μετ 89 | μετά 90 | μετα 91 | μετὰ 92 | μη 93 | μην 94 | μἐν 95 | μὲν 96 | μὴ 97 | μὴν 98 | να 99 | ο 100 | οι 101 | ομωσ 102 | οπωσ 103 | οσο 104 | οτι 105 | οἱ 106 | οἳ 107 | οἷς 108 | οὐ 109 | οὐδ 110 | οὐδέ 111 | οὐδείσ 112 | οὐδεὶς 113 | οὐδὲ 114 | οὐδὲν 115 | οὐκ 116 | οὐχ 117 | οὐχὶ 118 | οὓς 119 | οὔτε 120 | οὕτω 121 | οὕτως 122 | οὕτωσ 123 | οὖν 124 | οὗ 125 | οὗτος 126 | οὗτοσ 127 | παρ 128 | παρά 129 | παρα 130 | παρὰ 131 | περί 132 | περὶ 133 | ποια 134 | ποιεσ 135 | ποιο 136 | ποιοι 137 | ποιοσ 138 | ποιουσ 139 | ποιων 140 | ποτε 141 | που 142 | ποῦ 143 | προ 144 | προσ 145 | πρόσ 146 | πρὸ 147 | πρὸς 148 | πως 149 | πωσ 150 | σε 151 | στη 152 | στην 153 | στο 154 | στον 155 | σόσ 156 | σύ 157 | σύν 158 | σὸς 159 | σὺ 160 | σὺν 161 | τά 162 | τήν 163 | τί 164 | τίς 165 | τίσ 166 | τα 167 | ταῖς 168 | τε 169 | την 170 | τησ 171 | τι 172 | τινα 173 | τις 174 | τισ 175 | το 176 | τοί 177 | τοι 178 | τοιοῦτος 179 | τοιοῦτοσ 180 | τον 181 | τοτε 182 | του 183 | τούσ 184 | τοὺς 185 | τοῖς 186 | τοῦ 187 | των 188 | τό 189 | τόν 190 | τότε 191 | τὰ 192 | τὰς 193 | τὴν 194 | τὸ 195 | τὸν 196 | τῆς 197 | τῆσ 198 | τῇ 199 | τῶν 200 | τῷ 201 | ωσ 202 | ἀλλ' 203 | ἀλλά 204 | ἀλλὰ 205 | ἀλλ’ 206 | ἀπ 207 | ἀπό 208 | ἀπὸ 209 | ἀφ 210 | ἂν 211 | ἃ 212 | ἄλλος 213 | ἄλλοσ 214 | ἄν 215 | ἄρα 216 | ἅμα 217 | ἐάν 218 | ἐγώ 219 | ἐγὼ 220 | ἐκ 221 | ἐμόσ 222 | ἐμὸς 223 | ἐν 224 | ἐξ 225 | ἐπί 226 | ἐπεὶ 227 | ἐπὶ 228 | ἐστι 229 | ἐφ 230 | ἐὰν 231 | ἑαυτοῦ 232 | ἔτι 233 | ἡ 234 | ἢ 235 | ἣ 236 | ἤ 237 | ἥ 238 | ἧς 239 | ἵνα 240 | ὁ 241 | ὃ 242 | ὃν 243 | ὃς 244 | ὅ 245 | ὅδε 246 | ὅθεν 247 | ὅπερ 248 | ὅς 249 | ὅσ 250 | ὅστις 251 | ὅστισ 252 | ὅτε 253 | ὅτι 254 | ὑμόσ 255 | ὑπ 256 | ὑπέρ 257 | ὑπό 258 | ὑπὲρ 259 | ὑπὸ 260 | ὡς 261 | ὡσ 262 | ὥς 263 | ὥστε 264 | ὦ 265 | ᾧ 266 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/hebrew: -------------------------------------------------------------------------------- 1 | אני 2 | את 3 | אתה 4 | אנחנו 5 | אתן 6 | אתם 7 | הם 8 | הן 9 | היא 10 | הוא 11 | שלי 12 | שלו 13 | שלך 14 | שלה 15 | שלנו 16 | שלכם 17 | שלכן 18 | שלהם 19 | שלהן 20 | לי 21 | לו 22 | לה 23 | לנו 24 | לכם 25 | לכן 26 | להם 27 | להן 28 | אותה 29 | אותו 30 | זה 31 | זאת 32 | אלה 33 | אלו 34 | תחת 35 | מתחת 36 | מעל 37 | בין 38 | עם 39 | עד 40 | נגר 41 | על 42 | אל 43 | מול 44 | של 45 | אצל 46 | כמו 47 | אחר 48 | אותו 49 | בלי 50 | לפני 51 | אחרי 52 | מאחורי 53 | עלי 54 | עליו 55 | עליה 56 | עליך 57 | עלינו 58 | עליכם 59 | לעיכן 60 | עליהם 61 | עליהן 62 | כל 63 | כולם 64 | כולן 65 | כך 66 | ככה 67 | כזה 68 | זה 69 | זות 70 | אותי 71 | אותה 72 | אותם 73 | אותך 74 | אותו 75 | אותן 76 | אותנו 77 | ואת 78 | את 79 | אתכם 80 | אתכן 81 | איתי 82 | איתו 83 | איתך 84 | איתה 85 | איתם 86 | איתן 87 | איתנו 88 | איתכם 89 | איתכן 90 | יהיה 91 | תהיה 92 | היתי 93 | היתה 94 | היה 95 | להיות 96 | עצמי 97 | עצמו 98 | עצמה 99 | עצמם 100 | עצמן 101 | עצמנו 102 | עצמהם 103 | עצמהן 104 | מי 105 | מה 106 | איפה 107 | היכן 108 | במקום שבו 109 | אם 110 | לאן 111 | למקום שבו 112 | מקום בו 113 | איזה 114 | מהיכן 115 | איך 116 | כיצד 117 | באיזו מידה 118 | מתי 119 | בשעה ש 120 | כאשר 121 | כש 122 | למרות 123 | לפני 124 | אחרי 125 | מאיזו סיבה 126 | הסיבה שבגללה 127 | למה 128 | מדוע 129 | לאיזו תכלית 130 | כי 131 | יש 132 | אין 133 | אך 134 | מנין 135 | מאין 136 | מאיפה 137 | יכל 138 | יכלה 139 | יכלו 140 | יכול 141 | יכולה 142 | יכולים 143 | יכולות 144 | יוכלו 145 | יוכל 146 | מסוגל 147 | לא 148 | רק 149 | אולי 150 | אין 151 | לאו 152 | אי 153 | כלל 154 | נגד 155 | אם 156 | עם 157 | אל 158 | אלה 159 | אלו 160 | אף 161 | על 162 | מעל 163 | מתחת 164 | מצד 165 | בשביל 166 | לבין 167 | באמצע 168 | בתוך 169 | דרך 170 | מבעד 171 | באמצעות 172 | למעלה 173 | למטה 174 | מחוץ 175 | מן 176 | לעבר 177 | מכאן 178 | כאן 179 | הנה 180 | הרי 181 | פה 182 | שם 183 | אך 184 | ברם 185 | שוב 186 | אבל 187 | מבלי 188 | בלי 189 | מלבד 190 | רק 191 | בגלל 192 | מכיוון 193 | עד 194 | אשר 195 | ואילו 196 | למרות 197 | אס 198 | כמו 199 | כפי 200 | אז 201 | אחרי 202 | כן 203 | לכן 204 | לפיכך 205 | מאד 206 | עז 207 | מעט 208 | מעטים 209 | במידה 210 | שוב 211 | יותר 212 | מדי 213 | גם 214 | כן 215 | נו 216 | אחר 217 | אחרת 218 | אחרים 219 | אחרות 220 | אשר 221 | או -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/hinglish: -------------------------------------------------------------------------------- 1 | a 2 | aadi 3 | aaj 4 | aap 5 | aapne 6 | aata 7 | aati 8 | aaya 9 | aaye 10 | ab 11 | abbe 12 | abbey 13 | abe 14 | abhi 15 | able 16 | about 17 | above 18 | accha 19 | according 20 | accordingly 21 | acha 22 | achcha 23 | across 24 | actually 25 | after 26 | afterwards 27 | again 28 | against 29 | agar 30 | ain 31 | aint 32 | ain't 33 | aisa 34 | aise 35 | aisi 36 | alag 37 | all 38 | allow 39 | allows 40 | almost 41 | alone 42 | along 43 | already 44 | also 45 | although 46 | always 47 | am 48 | among 49 | amongst 50 | an 51 | and 52 | andar 53 | another 54 | any 55 | anybody 56 | anyhow 57 | anyone 58 | anything 59 | anyway 60 | anyways 61 | anywhere 62 | ap 63 | apan 64 | apart 65 | apna 66 | apnaa 67 | apne 68 | apni 69 | appear 70 | are 71 | aren 72 | arent 73 | aren't 74 | around 75 | arre 76 | as 77 | aside 78 | ask 79 | asking 80 | at 81 | aur 82 | avum 83 | aya 84 | aye 85 | baad 86 | baar 87 | bad 88 | bahut 89 | bana 90 | banae 91 | banai 92 | banao 93 | banaya 94 | banaye 95 | banayi 96 | banda 97 | bande 98 | bandi 99 | bane 100 | bani 101 | bas 102 | bata 103 | batao 104 | bc 105 | be 106 | became 107 | because 108 | become 109 | becomes 110 | becoming 111 | been 112 | before 113 | beforehand 114 | behind 115 | being 116 | below 117 | beside 118 | besides 119 | best 120 | better 121 | between 122 | beyond 123 | bhai 124 | bheetar 125 | bhi 126 | bhitar 127 | bht 128 | bilkul 129 | bohot 130 | bol 131 | bola 132 | bole 133 | boli 134 | bolo 135 | bolta 136 | bolte 137 | bolti 138 | both 139 | brief 140 | bro 141 | btw 142 | but 143 | by 144 | came 145 | can 146 | cannot 147 | cant 148 | can't 149 | cause 150 | causes 151 | certain 152 | certainly 153 | chahiye 154 | chaiye 155 | chal 156 | chalega 157 | chhaiye 158 | clearly 159 | c'mon 160 | com 161 | come 162 | comes 163 | could 164 | couldn 165 | couldnt 166 | couldn't 167 | d 168 | de 169 | dede 170 | dega 171 | degi 172 | dekh 173 | dekha 174 | dekhe 175 | dekhi 176 | dekho 177 | denge 178 | dhang 179 | di 180 | did 181 | didn 182 | didnt 183 | didn't 184 | dijiye 185 | diya 186 | diyaa 187 | diye 188 | diyo 189 | do 190 | does 191 | doesn 192 | doesnt 193 | doesn't 194 | doing 195 | done 196 | dono 197 | dont 198 | don't 199 | doosra 200 | doosre 201 | down 202 | downwards 203 | dude 204 | dunga 205 | dungi 206 | during 207 | dusra 208 | dusre 209 | dusri 210 | dvaara 211 | dvara 212 | dwaara 213 | dwara 214 | each 215 | edu 216 | eg 217 | eight 218 | either 219 | ek 220 | else 221 | elsewhere 222 | enough 223 | etc 224 | even 225 | ever 226 | every 227 | everybody 228 | everyone 229 | everything 230 | everywhere 231 | ex 232 | exactly 233 | example 234 | except 235 | far 236 | few 237 | fifth 238 | fir 239 | first 240 | five 241 | followed 242 | following 243 | follows 244 | for 245 | forth 246 | four 247 | from 248 | further 249 | furthermore 250 | gaya 251 | gaye 252 | gayi 253 | get 254 | gets 255 | getting 256 | ghar 257 | given 258 | gives 259 | go 260 | goes 261 | going 262 | gone 263 | good 264 | got 265 | gotten 266 | greetings 267 | haan 268 | had 269 | hadd 270 | hadn 271 | hadnt 272 | hadn't 273 | hai 274 | hain 275 | hamara 276 | hamare 277 | hamari 278 | hamne 279 | han 280 | happens 281 | har 282 | hardly 283 | has 284 | hasn 285 | hasnt 286 | hasn't 287 | have 288 | haven 289 | havent 290 | haven't 291 | having 292 | he 293 | hello 294 | help 295 | hence 296 | her 297 | here 298 | hereafter 299 | hereby 300 | herein 301 | here's 302 | hereupon 303 | hers 304 | herself 305 | he's 306 | hi 307 | him 308 | himself 309 | his 310 | hither 311 | hm 312 | hmm 313 | ho 314 | hoga 315 | hoge 316 | hogi 317 | hona 318 | honaa 319 | hone 320 | honge 321 | hongi 322 | honi 323 | hopefully 324 | hota 325 | hotaa 326 | hote 327 | hoti 328 | how 329 | howbeit 330 | however 331 | hoyenge 332 | hoyengi 333 | hu 334 | hua 335 | hue 336 | huh 337 | hui 338 | hum 339 | humein 340 | humne 341 | hun 342 | huye 343 | huyi 344 | i 345 | i'd 346 | idk 347 | ie 348 | if 349 | i'll 350 | i'm 351 | imo 352 | in 353 | inasmuch 354 | inc 355 | inhe 356 | inhi 357 | inho 358 | inka 359 | inkaa 360 | inke 361 | inki 362 | inn 363 | inner 364 | inse 365 | insofar 366 | into 367 | inward 368 | is 369 | ise 370 | isi 371 | iska 372 | iskaa 373 | iske 374 | iski 375 | isme 376 | isn 377 | isne 378 | isnt 379 | isn't 380 | iss 381 | isse 382 | issi 383 | isski 384 | it 385 | it'd 386 | it'll 387 | itna 388 | itne 389 | itni 390 | itno 391 | its 392 | it's 393 | itself 394 | ityaadi 395 | ityadi 396 | i've 397 | ja 398 | jaa 399 | jab 400 | jabh 401 | jaha 402 | jahaan 403 | jahan 404 | jaisa 405 | jaise 406 | jaisi 407 | jata 408 | jayega 409 | jidhar 410 | jin 411 | jinhe 412 | jinhi 413 | jinho 414 | jinhone 415 | jinka 416 | jinke 417 | jinki 418 | jinn 419 | jis 420 | jise 421 | jiska 422 | jiske 423 | jiski 424 | jisme 425 | jiss 426 | jisse 427 | jitna 428 | jitne 429 | jitni 430 | jo 431 | just 432 | jyaada 433 | jyada 434 | k 435 | ka 436 | kaafi 437 | kab 438 | kabhi 439 | kafi 440 | kaha 441 | kahaa 442 | kahaan 443 | kahan 444 | kahi 445 | kahin 446 | kahte 447 | kaisa 448 | kaise 449 | kaisi 450 | kal 451 | kam 452 | kar 453 | kara 454 | kare 455 | karega 456 | karegi 457 | karen 458 | karenge 459 | kari 460 | karke 461 | karna 462 | karne 463 | karni 464 | karo 465 | karta 466 | karte 467 | karti 468 | karu 469 | karun 470 | karunga 471 | karungi 472 | kaun 473 | kaunsa 474 | kayi 475 | kch 476 | ke 477 | keep 478 | keeps 479 | keh 480 | kehte 481 | kept 482 | khud 483 | ki 484 | kin 485 | kine 486 | kinhe 487 | kinho 488 | kinka 489 | kinke 490 | kinki 491 | kinko 492 | kinn 493 | kino 494 | kis 495 | kise 496 | kisi 497 | kiska 498 | kiske 499 | kiski 500 | kisko 501 | kisliye 502 | kisne 503 | kitna 504 | kitne 505 | kitni 506 | kitno 507 | kiya 508 | kiye 509 | know 510 | known 511 | knows 512 | ko 513 | koi 514 | kon 515 | konsa 516 | koyi 517 | krna 518 | krne 519 | kuch 520 | kuchch 521 | kuchh 522 | kul 523 | kull 524 | kya 525 | kyaa 526 | kyu 527 | kyuki 528 | kyun 529 | kyunki 530 | lagta 531 | lagte 532 | lagti 533 | last 534 | lately 535 | later 536 | le 537 | least 538 | lekar 539 | lekin 540 | less 541 | lest 542 | let 543 | let's 544 | li 545 | like 546 | liked 547 | likely 548 | little 549 | liya 550 | liye 551 | ll 552 | lo 553 | log 554 | logon 555 | lol 556 | look 557 | looking 558 | looks 559 | ltd 560 | lunga 561 | m 562 | maan 563 | maana 564 | maane 565 | maani 566 | maano 567 | magar 568 | mai 569 | main 570 | maine 571 | mainly 572 | mana 573 | mane 574 | mani 575 | mano 576 | many 577 | mat 578 | may 579 | maybe 580 | me 581 | mean 582 | meanwhile 583 | mein 584 | mera 585 | mere 586 | merely 587 | meri 588 | might 589 | mightn 590 | mightnt 591 | mightn't 592 | mil 593 | mjhe 594 | more 595 | moreover 596 | most 597 | mostly 598 | much 599 | mujhe 600 | must 601 | mustn 602 | mustnt 603 | mustn't 604 | my 605 | myself 606 | na 607 | naa 608 | naah 609 | nahi 610 | nahin 611 | nai 612 | name 613 | namely 614 | nd 615 | ne 616 | near 617 | nearly 618 | necessary 619 | neeche 620 | need 621 | needn 622 | neednt 623 | needn't 624 | needs 625 | neither 626 | never 627 | nevertheless 628 | new 629 | next 630 | nhi 631 | nine 632 | no 633 | nobody 634 | non 635 | none 636 | noone 637 | nope 638 | nor 639 | normally 640 | not 641 | nothing 642 | novel 643 | now 644 | nowhere 645 | o 646 | obviously 647 | of 648 | off 649 | often 650 | oh 651 | ok 652 | okay 653 | old 654 | on 655 | once 656 | one 657 | ones 658 | only 659 | onto 660 | or 661 | other 662 | others 663 | otherwise 664 | ought 665 | our 666 | ours 667 | ourselves 668 | out 669 | outside 670 | over 671 | overall 672 | own 673 | par 674 | pata 675 | pe 676 | pehla 677 | pehle 678 | pehli 679 | people 680 | per 681 | perhaps 682 | phla 683 | phle 684 | phli 685 | placed 686 | please 687 | plus 688 | poora 689 | poori 690 | provides 691 | pura 692 | puri 693 | q 694 | que 695 | quite 696 | raha 697 | rahaa 698 | rahe 699 | rahi 700 | rakh 701 | rakha 702 | rakhe 703 | rakhen 704 | rakhi 705 | rakho 706 | rather 707 | re 708 | really 709 | reasonably 710 | regarding 711 | regardless 712 | regards 713 | rehte 714 | rha 715 | rhaa 716 | rhe 717 | rhi 718 | ri 719 | right 720 | s 721 | sa 722 | saara 723 | saare 724 | saath 725 | sab 726 | sabhi 727 | sabse 728 | sahi 729 | said 730 | sakta 731 | saktaa 732 | sakte 733 | sakti 734 | same 735 | sang 736 | sara 737 | sath 738 | saw 739 | say 740 | saying 741 | says 742 | se 743 | second 744 | secondly 745 | see 746 | seeing 747 | seem 748 | seemed 749 | seeming 750 | seems 751 | seen 752 | self 753 | selves 754 | sensible 755 | sent 756 | serious 757 | seriously 758 | seven 759 | several 760 | shall 761 | shan 762 | shant 763 | shan't 764 | she 765 | she's 766 | should 767 | shouldn 768 | shouldnt 769 | shouldn't 770 | should've 771 | si 772 | since 773 | six 774 | so 775 | soch 776 | some 777 | somebody 778 | somehow 779 | someone 780 | something 781 | sometime 782 | sometimes 783 | somewhat 784 | somewhere 785 | soon 786 | still 787 | sub 788 | such 789 | sup 790 | sure 791 | t 792 | tab 793 | tabh 794 | tak 795 | take 796 | taken 797 | tarah 798 | teen 799 | teeno 800 | teesra 801 | teesre 802 | teesri 803 | tell 804 | tends 805 | tera 806 | tere 807 | teri 808 | th 809 | tha 810 | than 811 | thank 812 | thanks 813 | thanx 814 | that 815 | that'll 816 | thats 817 | that's 818 | the 819 | theek 820 | their 821 | theirs 822 | them 823 | themselves 824 | then 825 | thence 826 | there 827 | thereafter 828 | thereby 829 | therefore 830 | therein 831 | theres 832 | there's 833 | thereupon 834 | these 835 | they 836 | they'd 837 | they'll 838 | they're 839 | they've 840 | thi 841 | thik 842 | thing 843 | think 844 | thinking 845 | third 846 | this 847 | tho 848 | thoda 849 | thodi 850 | thorough 851 | thoroughly 852 | those 853 | though 854 | thought 855 | three 856 | through 857 | throughout 858 | thru 859 | thus 860 | tjhe 861 | to 862 | together 863 | toh 864 | too 865 | took 866 | toward 867 | towards 868 | tried 869 | tries 870 | true 871 | truly 872 | try 873 | trying 874 | tu 875 | tujhe 876 | tum 877 | tumhara 878 | tumhare 879 | tumhari 880 | tune 881 | twice 882 | two 883 | um 884 | umm 885 | un 886 | under 887 | unhe 888 | unhi 889 | unho 890 | unhone 891 | unka 892 | unkaa 893 | unke 894 | unki 895 | unko 896 | unless 897 | unlikely 898 | unn 899 | unse 900 | until 901 | unto 902 | up 903 | upar 904 | upon 905 | us 906 | use 907 | used 908 | useful 909 | uses 910 | usi 911 | using 912 | uska 913 | uske 914 | usne 915 | uss 916 | usse 917 | ussi 918 | usually 919 | vaala 920 | vaale 921 | vaali 922 | vahaan 923 | vahan 924 | vahi 925 | vahin 926 | vaisa 927 | vaise 928 | vaisi 929 | vala 930 | vale 931 | vali 932 | various 933 | ve 934 | very 935 | via 936 | viz 937 | vo 938 | waala 939 | waale 940 | waali 941 | wagaira 942 | wagairah 943 | wagerah 944 | waha 945 | wahaan 946 | wahan 947 | wahi 948 | wahin 949 | waisa 950 | waise 951 | waisi 952 | wala 953 | wale 954 | wali 955 | want 956 | wants 957 | was 958 | wasn 959 | wasnt 960 | wasn't 961 | way 962 | we 963 | we'd 964 | well 965 | we'll 966 | went 967 | were 968 | we're 969 | weren 970 | werent 971 | weren't 972 | we've 973 | what 974 | whatever 975 | what's 976 | when 977 | whence 978 | whenever 979 | where 980 | whereafter 981 | whereas 982 | whereby 983 | wherein 984 | where's 985 | whereupon 986 | wherever 987 | whether 988 | which 989 | while 990 | who 991 | whoever 992 | whole 993 | whom 994 | who's 995 | whose 996 | why 997 | will 998 | willing 999 | with 1000 | within 1001 | without 1002 | wo 1003 | woh 1004 | wohi 1005 | won 1006 | wont 1007 | won't 1008 | would 1009 | wouldn 1010 | wouldnt 1011 | wouldn't 1012 | y 1013 | ya 1014 | yadi 1015 | yah 1016 | yaha 1017 | yahaan 1018 | yahan 1019 | yahi 1020 | yahin 1021 | ye 1022 | yeah 1023 | yeh 1024 | yehi 1025 | yes 1026 | yet 1027 | you 1028 | you'd 1029 | you'll 1030 | your 1031 | you're 1032 | yours 1033 | yourself 1034 | yourselves 1035 | you've 1036 | yup 1037 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/hungarian: -------------------------------------------------------------------------------- 1 | a 2 | ahogy 3 | ahol 4 | aki 5 | akik 6 | akkor 7 | alatt 8 | által 9 | általában 10 | amely 11 | amelyek 12 | amelyekben 13 | amelyeket 14 | amelyet 15 | amelynek 16 | ami 17 | amit 18 | amolyan 19 | amíg 20 | amikor 21 | át 22 | abban 23 | ahhoz 24 | annak 25 | arra 26 | arról 27 | az 28 | azok 29 | azon 30 | azt 31 | azzal 32 | azért 33 | aztán 34 | azután 35 | azonban 36 | bár 37 | be 38 | belül 39 | benne 40 | cikk 41 | cikkek 42 | cikkeket 43 | csak 44 | de 45 | e 46 | eddig 47 | egész 48 | egy 49 | egyes 50 | egyetlen 51 | egyéb 52 | egyik 53 | egyre 54 | ekkor 55 | el 56 | elég 57 | ellen 58 | elõ 59 | elõször 60 | elõtt 61 | elsõ 62 | én 63 | éppen 64 | ebben 65 | ehhez 66 | emilyen 67 | ennek 68 | erre 69 | ez 70 | ezt 71 | ezek 72 | ezen 73 | ezzel 74 | ezért 75 | és 76 | fel 77 | felé 78 | hanem 79 | hiszen 80 | hogy 81 | hogyan 82 | igen 83 | így 84 | illetve 85 | ill. 86 | ill 87 | ilyen 88 | ilyenkor 89 | ison 90 | ismét 91 | itt 92 | jó 93 | jól 94 | jobban 95 | kell 96 | kellett 97 | keresztül 98 | keressünk 99 | ki 100 | kívül 101 | között 102 | közül 103 | legalább 104 | lehet 105 | lehetett 106 | legyen 107 | lenne 108 | lenni 109 | lesz 110 | lett 111 | maga 112 | magát 113 | majd 114 | majd 115 | már 116 | más 117 | másik 118 | meg 119 | még 120 | mellett 121 | mert 122 | mely 123 | melyek 124 | mi 125 | mit 126 | míg 127 | miért 128 | milyen 129 | mikor 130 | minden 131 | mindent 132 | mindenki 133 | mindig 134 | mint 135 | mintha 136 | mivel 137 | most 138 | nagy 139 | nagyobb 140 | nagyon 141 | ne 142 | néha 143 | nekem 144 | neki 145 | nem 146 | néhány 147 | nélkül 148 | nincs 149 | olyan 150 | ott 151 | össze 152 | õ 153 | õk 154 | õket 155 | pedig 156 | persze 157 | rá 158 | s 159 | saját 160 | sem 161 | semmi 162 | sok 163 | sokat 164 | sokkal 165 | számára 166 | szemben 167 | szerint 168 | szinte 169 | talán 170 | tehát 171 | teljes 172 | tovább 173 | továbbá 174 | több 175 | úgy 176 | ugyanis 177 | új 178 | újabb 179 | újra 180 | után 181 | utána 182 | utolsó 183 | vagy 184 | vagyis 185 | valaki 186 | valami 187 | valamint 188 | való 189 | vagyok 190 | van 191 | vannak 192 | volt 193 | voltam 194 | voltak 195 | voltunk 196 | vissza 197 | vele 198 | viszont 199 | volna 200 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/indonesian: -------------------------------------------------------------------------------- 1 | ada 2 | adalah 3 | adanya 4 | adapun 5 | agak 6 | agaknya 7 | agar 8 | akan 9 | akankah 10 | akhir 11 | akhiri 12 | akhirnya 13 | aku 14 | akulah 15 | amat 16 | amatlah 17 | anda 18 | andalah 19 | antar 20 | antara 21 | antaranya 22 | apa 23 | apaan 24 | apabila 25 | apakah 26 | apalagi 27 | apatah 28 | artinya 29 | asal 30 | asalkan 31 | atas 32 | atau 33 | ataukah 34 | ataupun 35 | awal 36 | awalnya 37 | bagai 38 | bagaikan 39 | bagaimana 40 | bagaimanakah 41 | bagaimanapun 42 | bagi 43 | bagian 44 | bahkan 45 | bahwa 46 | bahwasanya 47 | baik 48 | bakal 49 | bakalan 50 | balik 51 | banyak 52 | bapak 53 | baru 54 | bawah 55 | beberapa 56 | begini 57 | beginian 58 | beginikah 59 | beginilah 60 | begitu 61 | begitukah 62 | begitulah 63 | begitupun 64 | bekerja 65 | belakang 66 | belakangan 67 | belum 68 | belumlah 69 | benar 70 | benarkah 71 | benarlah 72 | berada 73 | berakhir 74 | berakhirlah 75 | berakhirnya 76 | berapa 77 | berapakah 78 | berapalah 79 | berapapun 80 | berarti 81 | berawal 82 | berbagai 83 | berdatangan 84 | beri 85 | berikan 86 | berikut 87 | berikutnya 88 | berjumlah 89 | berkali-kali 90 | berkata 91 | berkehendak 92 | berkeinginan 93 | berkenaan 94 | berlainan 95 | berlalu 96 | berlangsung 97 | berlebihan 98 | bermacam 99 | bermacam-macam 100 | bermaksud 101 | bermula 102 | bersama 103 | bersama-sama 104 | bersiap 105 | bersiap-siap 106 | bertanya 107 | bertanya-tanya 108 | berturut 109 | berturut-turut 110 | bertutur 111 | berujar 112 | berupa 113 | besar 114 | betul 115 | betulkah 116 | biasa 117 | biasanya 118 | bila 119 | bilakah 120 | bisa 121 | bisakah 122 | boleh 123 | bolehkah 124 | bolehlah 125 | buat 126 | bukan 127 | bukankah 128 | bukanlah 129 | bukannya 130 | bulan 131 | bung 132 | cara 133 | caranya 134 | cukup 135 | cukupkah 136 | cukuplah 137 | cuma 138 | dahulu 139 | dalam 140 | dan 141 | dapat 142 | dari 143 | daripada 144 | datang 145 | dekat 146 | demi 147 | demikian 148 | demikianlah 149 | dengan 150 | depan 151 | di 152 | dia 153 | diakhiri 154 | diakhirinya 155 | dialah 156 | diantara 157 | diantaranya 158 | diberi 159 | diberikan 160 | diberikannya 161 | dibuat 162 | dibuatnya 163 | didapat 164 | didatangkan 165 | digunakan 166 | diibaratkan 167 | diibaratkannya 168 | diingat 169 | diingatkan 170 | diinginkan 171 | dijawab 172 | dijelaskan 173 | dijelaskannya 174 | dikarenakan 175 | dikatakan 176 | dikatakannya 177 | dikerjakan 178 | diketahui 179 | diketahuinya 180 | dikira 181 | dilakukan 182 | dilalui 183 | dilihat 184 | dimaksud 185 | dimaksudkan 186 | dimaksudkannya 187 | dimaksudnya 188 | diminta 189 | dimintai 190 | dimisalkan 191 | dimulai 192 | dimulailah 193 | dimulainya 194 | dimungkinkan 195 | dini 196 | dipastikan 197 | diperbuat 198 | diperbuatnya 199 | dipergunakan 200 | diperkirakan 201 | diperlihatkan 202 | diperlukan 203 | diperlukannya 204 | dipersoalkan 205 | dipertanyakan 206 | dipunyai 207 | diri 208 | dirinya 209 | disampaikan 210 | disebut 211 | disebutkan 212 | disebutkannya 213 | disini 214 | disinilah 215 | ditambahkan 216 | ditandaskan 217 | ditanya 218 | ditanyai 219 | ditanyakan 220 | ditegaskan 221 | ditujukan 222 | ditunjuk 223 | ditunjuki 224 | ditunjukkan 225 | ditunjukkannya 226 | ditunjuknya 227 | dituturkan 228 | dituturkannya 229 | diucapkan 230 | diucapkannya 231 | diungkapkan 232 | dong 233 | dua 234 | dulu 235 | empat 236 | enggak 237 | enggaknya 238 | entah 239 | entahlah 240 | guna 241 | gunakan 242 | hal 243 | hampir 244 | hanya 245 | hanyalah 246 | hari 247 | harus 248 | haruslah 249 | harusnya 250 | hendak 251 | hendaklah 252 | hendaknya 253 | hingga 254 | ia 255 | ialah 256 | ibarat 257 | ibaratkan 258 | ibaratnya 259 | ibu 260 | ikut 261 | ingat 262 | ingat-ingat 263 | ingin 264 | inginkah 265 | inginkan 266 | ini 267 | inikah 268 | inilah 269 | itu 270 | itukah 271 | itulah 272 | jadi 273 | jadilah 274 | jadinya 275 | jangan 276 | jangankan 277 | janganlah 278 | jauh 279 | jawab 280 | jawaban 281 | jawabnya 282 | jelas 283 | jelaskan 284 | jelaslah 285 | jelasnya 286 | jika 287 | jikalau 288 | juga 289 | jumlah 290 | jumlahnya 291 | justru 292 | kala 293 | kalau 294 | kalaulah 295 | kalaupun 296 | kalian 297 | kami 298 | kamilah 299 | kamu 300 | kamulah 301 | kan 302 | kapan 303 | kapankah 304 | kapanpun 305 | karena 306 | karenanya 307 | kasus 308 | kata 309 | katakan 310 | katakanlah 311 | katanya 312 | ke 313 | keadaan 314 | kebetulan 315 | kecil 316 | kedua 317 | keduanya 318 | keinginan 319 | kelamaan 320 | kelihatan 321 | kelihatannya 322 | kelima 323 | keluar 324 | kembali 325 | kemudian 326 | kemungkinan 327 | kemungkinannya 328 | kenapa 329 | kepada 330 | kepadanya 331 | kesampaian 332 | keseluruhan 333 | keseluruhannya 334 | keterlaluan 335 | ketika 336 | khususnya 337 | kini 338 | kinilah 339 | kira 340 | kira-kira 341 | kiranya 342 | kita 343 | kitalah 344 | kok 345 | kurang 346 | lagi 347 | lagian 348 | lah 349 | lain 350 | lainnya 351 | lalu 352 | lama 353 | lamanya 354 | lanjut 355 | lanjutnya 356 | lebih 357 | lewat 358 | lima 359 | luar 360 | macam 361 | maka 362 | makanya 363 | makin 364 | malah 365 | malahan 366 | mampu 367 | mampukah 368 | mana 369 | manakala 370 | manalagi 371 | masa 372 | masalah 373 | masalahnya 374 | masih 375 | masihkah 376 | masing 377 | masing-masing 378 | mau 379 | maupun 380 | melainkan 381 | melakukan 382 | melalui 383 | melihat 384 | melihatnya 385 | memang 386 | memastikan 387 | memberi 388 | memberikan 389 | membuat 390 | memerlukan 391 | memihak 392 | meminta 393 | memintakan 394 | memisalkan 395 | memperbuat 396 | mempergunakan 397 | memperkirakan 398 | memperlihatkan 399 | mempersiapkan 400 | mempersoalkan 401 | mempertanyakan 402 | mempunyai 403 | memulai 404 | memungkinkan 405 | menaiki 406 | menambahkan 407 | menandaskan 408 | menanti 409 | menanti-nanti 410 | menantikan 411 | menanya 412 | menanyai 413 | menanyakan 414 | mendapat 415 | mendapatkan 416 | mendatang 417 | mendatangi 418 | mendatangkan 419 | menegaskan 420 | mengakhiri 421 | mengapa 422 | mengatakan 423 | mengatakannya 424 | mengenai 425 | mengerjakan 426 | mengetahui 427 | menggunakan 428 | menghendaki 429 | mengibaratkan 430 | mengibaratkannya 431 | mengingat 432 | mengingatkan 433 | menginginkan 434 | mengira 435 | mengucapkan 436 | mengucapkannya 437 | mengungkapkan 438 | menjadi 439 | menjawab 440 | menjelaskan 441 | menuju 442 | menunjuk 443 | menunjuki 444 | menunjukkan 445 | menunjuknya 446 | menurut 447 | menuturkan 448 | menyampaikan 449 | menyangkut 450 | menyatakan 451 | menyebutkan 452 | menyeluruh 453 | menyiapkan 454 | merasa 455 | mereka 456 | merekalah 457 | merupakan 458 | meski 459 | meskipun 460 | meyakini 461 | meyakinkan 462 | minta 463 | mirip 464 | misal 465 | misalkan 466 | misalnya 467 | mula 468 | mulai 469 | mulailah 470 | mulanya 471 | mungkin 472 | mungkinkah 473 | nah 474 | naik 475 | namun 476 | nanti 477 | nantinya 478 | nyaris 479 | nyatanya 480 | oleh 481 | olehnya 482 | pada 483 | padahal 484 | padanya 485 | pak 486 | paling 487 | panjang 488 | pantas 489 | para 490 | pasti 491 | pastilah 492 | penting 493 | pentingnya 494 | per 495 | percuma 496 | perlu 497 | perlukah 498 | perlunya 499 | pernah 500 | persoalan 501 | pertama 502 | pertama-tama 503 | pertanyaan 504 | pertanyakan 505 | pihak 506 | pihaknya 507 | pukul 508 | pula 509 | pun 510 | punya 511 | rasa 512 | rasanya 513 | rata 514 | rupanya 515 | saat 516 | saatnya 517 | saja 518 | sajalah 519 | saling 520 | sama 521 | sama-sama 522 | sambil 523 | sampai 524 | sampai-sampai 525 | sampaikan 526 | sana 527 | sangat 528 | sangatlah 529 | satu 530 | saya 531 | sayalah 532 | se 533 | sebab 534 | sebabnya 535 | sebagai 536 | sebagaimana 537 | sebagainya 538 | sebagian 539 | sebaik 540 | sebaik-baiknya 541 | sebaiknya 542 | sebaliknya 543 | sebanyak 544 | sebegini 545 | sebegitu 546 | sebelum 547 | sebelumnya 548 | sebenarnya 549 | seberapa 550 | sebesar 551 | sebetulnya 552 | sebisanya 553 | sebuah 554 | sebut 555 | sebutlah 556 | sebutnya 557 | secara 558 | secukupnya 559 | sedang 560 | sedangkan 561 | sedemikian 562 | sedikit 563 | sedikitnya 564 | seenaknya 565 | segala 566 | segalanya 567 | segera 568 | seharusnya 569 | sehingga 570 | seingat 571 | sejak 572 | sejauh 573 | sejenak 574 | sejumlah 575 | sekadar 576 | sekadarnya 577 | sekali 578 | sekali-kali 579 | sekalian 580 | sekaligus 581 | sekalipun 582 | sekarang 583 | sekarang 584 | sekecil 585 | seketika 586 | sekiranya 587 | sekitar 588 | sekitarnya 589 | sekurang-kurangnya 590 | sekurangnya 591 | sela 592 | selain 593 | selaku 594 | selalu 595 | selama 596 | selama-lamanya 597 | selamanya 598 | selanjutnya 599 | seluruh 600 | seluruhnya 601 | semacam 602 | semakin 603 | semampu 604 | semampunya 605 | semasa 606 | semasih 607 | semata 608 | semata-mata 609 | semaunya 610 | sementara 611 | semisal 612 | semisalnya 613 | sempat 614 | semua 615 | semuanya 616 | semula 617 | sendiri 618 | sendirian 619 | sendirinya 620 | seolah 621 | seolah-olah 622 | seorang 623 | sepanjang 624 | sepantasnya 625 | sepantasnyalah 626 | seperlunya 627 | seperti 628 | sepertinya 629 | sepihak 630 | sering 631 | seringnya 632 | serta 633 | serupa 634 | sesaat 635 | sesama 636 | sesampai 637 | sesegera 638 | sesekali 639 | seseorang 640 | sesuatu 641 | sesuatunya 642 | sesudah 643 | sesudahnya 644 | setelah 645 | setempat 646 | setengah 647 | seterusnya 648 | setiap 649 | setiba 650 | setibanya 651 | setidak-tidaknya 652 | setidaknya 653 | setinggi 654 | seusai 655 | sewaktu 656 | siap 657 | siapa 658 | siapakah 659 | siapapun 660 | sini 661 | sinilah 662 | soal 663 | soalnya 664 | suatu 665 | sudah 666 | sudahkah 667 | sudahlah 668 | supaya 669 | tadi 670 | tadinya 671 | tahu 672 | tahun 673 | tak 674 | tambah 675 | tambahnya 676 | tampak 677 | tampaknya 678 | tandas 679 | tandasnya 680 | tanpa 681 | tanya 682 | tanyakan 683 | tanyanya 684 | tapi 685 | tegas 686 | tegasnya 687 | telah 688 | tempat 689 | tengah 690 | tentang 691 | tentu 692 | tentulah 693 | tentunya 694 | tepat 695 | terakhir 696 | terasa 697 | terbanyak 698 | terdahulu 699 | terdapat 700 | terdiri 701 | terhadap 702 | terhadapnya 703 | teringat 704 | teringat-ingat 705 | terjadi 706 | terjadilah 707 | terjadinya 708 | terkira 709 | terlalu 710 | terlebih 711 | terlihat 712 | termasuk 713 | ternyata 714 | tersampaikan 715 | tersebut 716 | tersebutlah 717 | tertentu 718 | tertuju 719 | terus 720 | terutama 721 | tetap 722 | tetapi 723 | tiap 724 | tiba 725 | tiba-tiba 726 | tidak 727 | tidakkah 728 | tidaklah 729 | tiga 730 | tinggi 731 | toh 732 | tunjuk 733 | turut 734 | tutur 735 | tuturnya 736 | ucap 737 | ucapnya 738 | ujar 739 | ujarnya 740 | umum 741 | umumnya 742 | ungkap 743 | ungkapnya 744 | untuk 745 | usah 746 | usai 747 | waduh 748 | wah 749 | wahai 750 | waktu 751 | waktunya 752 | walau 753 | walaupun 754 | wong 755 | yaitu 756 | yakin 757 | yakni 758 | yang -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/italian: -------------------------------------------------------------------------------- 1 | ad 2 | al 3 | allo 4 | ai 5 | agli 6 | all 7 | agl 8 | alla 9 | alle 10 | con 11 | col 12 | coi 13 | da 14 | dal 15 | dallo 16 | dai 17 | dagli 18 | dall 19 | dagl 20 | dalla 21 | dalle 22 | di 23 | del 24 | dello 25 | dei 26 | degli 27 | dell 28 | degl 29 | della 30 | delle 31 | in 32 | nel 33 | nello 34 | nei 35 | negli 36 | nell 37 | negl 38 | nella 39 | nelle 40 | su 41 | sul 42 | sullo 43 | sui 44 | sugli 45 | sull 46 | sugl 47 | sulla 48 | sulle 49 | per 50 | tra 51 | contro 52 | io 53 | tu 54 | lui 55 | lei 56 | noi 57 | voi 58 | loro 59 | mio 60 | mia 61 | miei 62 | mie 63 | tuo 64 | tua 65 | tuoi 66 | tue 67 | suo 68 | sua 69 | suoi 70 | sue 71 | nostro 72 | nostra 73 | nostri 74 | nostre 75 | vostro 76 | vostra 77 | vostri 78 | vostre 79 | mi 80 | ti 81 | ci 82 | vi 83 | lo 84 | la 85 | li 86 | le 87 | gli 88 | ne 89 | il 90 | un 91 | uno 92 | una 93 | ma 94 | ed 95 | se 96 | perché 97 | anche 98 | come 99 | dov 100 | dove 101 | che 102 | chi 103 | cui 104 | non 105 | più 106 | quale 107 | quanto 108 | quanti 109 | quanta 110 | quante 111 | quello 112 | quelli 113 | quella 114 | quelle 115 | questo 116 | questi 117 | questa 118 | queste 119 | si 120 | tutto 121 | tutti 122 | a 123 | c 124 | e 125 | i 126 | l 127 | o 128 | ho 129 | hai 130 | ha 131 | abbiamo 132 | avete 133 | hanno 134 | abbia 135 | abbiate 136 | abbiano 137 | avrò 138 | avrai 139 | avrà 140 | avremo 141 | avrete 142 | avranno 143 | avrei 144 | avresti 145 | avrebbe 146 | avremmo 147 | avreste 148 | avrebbero 149 | avevo 150 | avevi 151 | aveva 152 | avevamo 153 | avevate 154 | avevano 155 | ebbi 156 | avesti 157 | ebbe 158 | avemmo 159 | aveste 160 | ebbero 161 | avessi 162 | avesse 163 | avessimo 164 | avessero 165 | avendo 166 | avuto 167 | avuta 168 | avuti 169 | avute 170 | sono 171 | sei 172 | è 173 | siamo 174 | siete 175 | sia 176 | siate 177 | siano 178 | sarò 179 | sarai 180 | sarà 181 | saremo 182 | sarete 183 | saranno 184 | sarei 185 | saresti 186 | sarebbe 187 | saremmo 188 | sareste 189 | sarebbero 190 | ero 191 | eri 192 | era 193 | eravamo 194 | eravate 195 | erano 196 | fui 197 | fosti 198 | fu 199 | fummo 200 | foste 201 | furono 202 | fossi 203 | fosse 204 | fossimo 205 | fossero 206 | essendo 207 | faccio 208 | fai 209 | facciamo 210 | fanno 211 | faccia 212 | facciate 213 | facciano 214 | farò 215 | farai 216 | farà 217 | faremo 218 | farete 219 | faranno 220 | farei 221 | faresti 222 | farebbe 223 | faremmo 224 | fareste 225 | farebbero 226 | facevo 227 | facevi 228 | faceva 229 | facevamo 230 | facevate 231 | facevano 232 | feci 233 | facesti 234 | fece 235 | facemmo 236 | faceste 237 | fecero 238 | facessi 239 | facesse 240 | facessimo 241 | facessero 242 | facendo 243 | sto 244 | stai 245 | sta 246 | stiamo 247 | stanno 248 | stia 249 | stiate 250 | stiano 251 | starò 252 | starai 253 | starà 254 | staremo 255 | starete 256 | staranno 257 | starei 258 | staresti 259 | starebbe 260 | staremmo 261 | stareste 262 | starebbero 263 | stavo 264 | stavi 265 | stava 266 | stavamo 267 | stavate 268 | stavano 269 | stetti 270 | stesti 271 | stette 272 | stemmo 273 | steste 274 | stettero 275 | stessi 276 | stesse 277 | stessimo 278 | stessero 279 | stando 280 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/kazakh: -------------------------------------------------------------------------------- 1 | ах 2 | ох 3 | эх 4 | ай 5 | эй 6 | ой 7 | тағы 8 | тағыда 9 | әрине 10 | жоқ 11 | сондай 12 | осындай 13 | осылай 14 | солай 15 | мұндай 16 | бұндай 17 | мен 18 | сен 19 | ол 20 | біз 21 | біздер 22 | олар 23 | сіз 24 | сіздер 25 | маған 26 | оған 27 | саған 28 | біздің 29 | сіздің 30 | оның 31 | бізге 32 | сізге 33 | оларға 34 | біздерге 35 | сіздерге 36 | оларға 37 | менімен 38 | сенімен 39 | онымен 40 | бізбен 41 | сізбен 42 | олармен 43 | біздермен 44 | сіздермен 45 | менің 46 | сенің 47 | біздің 48 | сіздің 49 | оның 50 | біздердің 51 | сіздердің 52 | олардың 53 | маған 54 | саған 55 | оған 56 | менен 57 | сенен 58 | одан 59 | бізден 60 | сізден 61 | олардан 62 | біздерден 63 | сіздерден 64 | олардан 65 | айтпақшы 66 | сонымен 67 | сондықтан 68 | бұл 69 | осы 70 | сол 71 | анау 72 | мынау 73 | сонау 74 | осынау 75 | ана 76 | мына 77 | сона 78 | әні 79 | міне 80 | өй 81 | үйт 82 | бүйт 83 | біреу 84 | кейбіреу 85 | кейбір 86 | қайсыбір 87 | әрбір 88 | бірнеше 89 | бірдеме 90 | бірнеше 91 | әркім 92 | әрне 93 | әрқайсы 94 | әрқалай 95 | әлдекім 96 | әлдене 97 | әлдеқайдан 98 | әлденеше 99 | әлдеқалай 100 | әлдеқашан 101 | алдақашан 102 | еш 103 | ешкім 104 | ешбір 105 | ештеме 106 | дәнеңе 107 | ешқашан 108 | ешқандай 109 | ешқайсы 110 | емес 111 | бәрі 112 | барлық 113 | барша 114 | бар 115 | күллі 116 | бүкіл 117 | түгел 118 | өз 119 | өзім 120 | өзің 121 | өзінің 122 | өзіме 123 | өзіне 124 | өзімнің 125 | өзі 126 | өзге 127 | менде 128 | сенде 129 | онда 130 | менен 131 | сенен онан 132 | одан 133 | ау 134 | па 135 | ей 136 | әй 137 | е 138 | уа 139 | уау 140 | уай 141 | я 142 | пай 143 | ә 144 | о 145 | оһо 146 | ой 147 | ие 148 | аһа 149 | ау 150 | беу 151 | мәссаған 152 | бәрекелді 153 | әттегенай 154 | жаракімалла 155 | масқарай 156 | астапыралла 157 | япырмай 158 | ойпырмай 159 | кәне 160 | кәнеки 161 | ал 162 | әйда 163 | кәні 164 | міне 165 | әні 166 | сорап 167 | қош-қош 168 | пфша 169 | пішә 170 | құрау-құрау 171 | шәйт 172 | шек 173 | моһ 174 | тәк 175 | құрау 176 | құр 177 | кә 178 | кәһ 179 | күшім 180 | күшім 181 | мышы 182 | пырс 183 | әукім 184 | алақай 185 | паһ-паһ 186 | бәрекелді 187 | ура 188 | әттең 189 | әттеген-ай 190 | қап 191 | түге 192 | пішту 193 | шіркін 194 | алатау 195 | пай-пай 196 | үшін 197 | сайын 198 | сияқты 199 | туралы 200 | арқылы 201 | бойы 202 | бойымен 203 | шамалы 204 | шақты 205 | қаралы 206 | ғұрлы 207 | ғұрлым 208 | шейін 209 | дейін 210 | қарай 211 | таман 212 | салым 213 | тарта 214 | жуық 215 | таяу 216 | гөрі 217 | бері 218 | кейін 219 | соң 220 | бұрын 221 | бетер 222 | қатар 223 | бірге 224 | қоса 225 | арс 226 | 227 | гүрс 228 | 229 | дүрс 230 | 231 | қорс 232 | 233 | тарс 234 | 235 | тырс 236 | 237 | ырс 238 | 239 | барқ 240 | 241 | борт 242 | 243 | күрт 244 | 245 | кірт 246 | 247 | морт 248 | 249 | сарт 250 | 251 | шырт 252 | 253 | дүңк 254 | 255 | күңк 256 | 257 | қыңқ 258 | 259 | мыңқ 260 | 261 | маңқ 262 | 263 | саңқ 264 | 265 | шаңқ 266 | 267 | шіңк 268 | 269 | сыңқ 270 | 271 | таңқ 272 | 273 | тыңқ 274 | 275 | ыңқ 276 | 277 | болп 278 | 279 | былп 280 | 281 | жалп 282 | 283 | желп 284 | 285 | қолп 286 | 287 | ірк 288 | 289 | ырқ 290 | 291 | сарт-сұрт 292 | 293 | тарс-тұрс 294 | 295 | арс-ұрс 296 | 297 | жалт-жалт 298 | 299 | жалт-жұлт 300 | 301 | қалт-қалт 302 | 303 | қалт-құлт 304 | 305 | қаңқ-қаңқ 306 | 307 | қаңқ-құңқ 308 | 309 | шаңқ-шаңқ 310 | 311 | шаңқ-шұңқ 312 | 313 | арбаң-арбаң 314 | 315 | бүгжең-бүгжең 316 | 317 | арсалаң-арсалаң 318 | 319 | ербелең-ербелең 320 | 321 | батыр-бұтыр 322 | 323 | далаң-далаң 324 | 325 | тарбаң-тарбаң 326 | 327 | қызараң-қызараң 328 | 329 | қаңғыр-күңгір 330 | 331 | қайқаң-құйқаң 332 | 333 | митың-митың 334 | 335 | салаң-сұлаң 336 | 337 | ыржың-тыржың 338 | бірақ 339 | алайда 340 | дегенмен 341 | әйтпесе 342 | әйткенмен 343 | себебі 344 | өйткені 345 | сондықтан 346 | үшін 347 | сайын 348 | сияқты 349 | туралы 350 | арқылы 351 | бойы 352 | бойымен 353 | шамалы 354 | шақты 355 | қаралы 356 | ғұрлы 357 | ғұрлым 358 | гөрі 359 | бері 360 | кейін 361 | соң 362 | бұрын 363 | бетер 364 | қатар 365 | бірге 366 | қоса 367 | шейін 368 | дейін 369 | қарай 370 | таман 371 | салым 372 | тарта 373 | жуық 374 | таяу 375 | арнайы 376 | осындай 377 | ғана 378 | қана 379 | тек 380 | әншейін 381 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/nepali: -------------------------------------------------------------------------------- 1 | छ 2 | र 3 | पनि 4 | छन् 5 | लागि 6 | भएको 7 | गरेको 8 | भने 9 | गर्न 10 | गर्ने 11 | हो 12 | तथा 13 | यो 14 | रहेको 15 | उनले 16 | थियो 17 | हुने 18 | गरेका 19 | थिए 20 | गर्दै 21 | तर 22 | नै 23 | को 24 | मा 25 | हुन् 26 | भन्ने 27 | हुन 28 | गरी 29 | त 30 | हुन्छ 31 | अब 32 | के 33 | रहेका 34 | गरेर 35 | छैन 36 | दिए 37 | भए 38 | यस 39 | ले 40 | गर्नु 41 | औं 42 | सो 43 | त्यो 44 | कि 45 | जुन 46 | यी 47 | का 48 | गरि 49 | ती 50 | न 51 | छु 52 | छौं 53 | लाई 54 | नि 55 | उप 56 | अक्सर 57 | आदि 58 | कसरी 59 | क्रमशः 60 | चाले 61 | अगाडी 62 | अझै 63 | अनुसार 64 | अन्तर्गत 65 | अन्य 66 | अन्यत्र 67 | अन्यथा 68 | अरु 69 | अरुलाई 70 | अर्को 71 | अर्थात 72 | अर्थात् 73 | अलग 74 | आए 75 | आजको 76 | ओठ 77 | आत्म 78 | आफू 79 | आफूलाई 80 | आफ्नै 81 | आफ्नो 82 | आयो 83 | उदाहरण 84 | उनको 85 | उहालाई 86 | एउटै 87 | एक 88 | एकदम 89 | कतै 90 | कम से कम 91 | कसै 92 | कसैले 93 | कहाँबाट 94 | कहिलेकाहीं 95 | का 96 | किन 97 | किनभने 98 | कुनै 99 | कुरा 100 | कृपया 101 | केही 102 | कोही 103 | गए 104 | गरौं 105 | गर्छ 106 | गर्छु 107 | गर्नुपर्छ 108 | गयौ 109 | गैर 110 | चार 111 | चाहनुहुन्छ 112 | चाहन्छु 113 | चाहिए 114 | छू 115 | जताततै 116 | जब 117 | जबकि 118 | जसको 119 | जसबाट 120 | जसमा 121 | जसलाई 122 | जसले 123 | जस्तै 124 | जस्तो 125 | जस्तोसुकै 126 | जहाँ 127 | जान 128 | जाहिर 129 | जे 130 | जो 131 | ठीक 132 | तत्काल 133 | तदनुसार 134 | तपाईको 135 | तपाई 136 | पर्याप्त 137 | पहिले 138 | पहिलो 139 | पहिल्यै 140 | पाँच 141 | पाँचौं 142 | तल 143 | तापनी 144 | तिनी 145 | तिनीहरू 146 | तिनीहरुको 147 | तिनिहरुलाई 148 | तिमी 149 | तिर 150 | तीन 151 | तुरुन्तै 152 | तेस्रो 153 | तेस्कारण 154 | पूर्व 155 | प्रति 156 | प्रतेक 157 | प्लस 158 | फेरी 159 | बने 160 | त्सपछि 161 | त्सैले 162 | त्यहाँ 163 | थिएन 164 | दिनुभएको 165 | दिनुहुन्छ 166 | दुई 167 | देखि 168 | बरु 169 | बारे 170 | बाहिर 171 | देखिन्छ 172 | देखियो 173 | देखे 174 | देखेको 175 | देखेर 176 | दोस्रो 177 | धेरै 178 | नजिकै 179 | नत्र 180 | नयाँ 181 | निम्ति 182 | बाहेक 183 | बीच 184 | बीचमा 185 | भन 186 | निम्न 187 | निम्नानुसार 188 | निर्दिष्ट 189 | नौ 190 | पक्का 191 | पक्कै 192 | पछि 193 | पछिल्लो 194 | पटक 195 | पर्छ 196 | पर्थ्यो 197 | भन्छन् 198 | भन् 199 | भन्छु 200 | भन्दा 201 | भन्नुभयो 202 | भर 203 | भित्र 204 | भित्री 205 | म 206 | मलाई 207 | मात्र 208 | माथि 209 | मुख्य 210 | मेरो 211 | यति 212 | यथोचित 213 | यदि 214 | यद्यपि 215 | यसको 216 | यसपछि 217 | यसबाहेक 218 | यसरी 219 | यसो 220 | यस्तो 221 | यहाँ 222 | यहाँसम्म 223 | या 224 | रही 225 | राखे 226 | राख्छ 227 | राम्रो 228 | रूप 229 | लगभग 230 | वरीपरी 231 | वास्तवमा 232 | बिरुद्ध 233 | बिशेष 234 | सायद 235 | शायद 236 | संग 237 | संगै 238 | सक्छ 239 | सट्टा 240 | सधै 241 | सबै 242 | सबैलाई 243 | समय 244 | सम्भव 245 | सम्म 246 | सही 247 | साँच्चै 248 | सात 249 | साथ 250 | साथै 251 | सारा 252 | सोही 253 | स्पष्ट 254 | हरे 255 | हरेक -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/norwegian: -------------------------------------------------------------------------------- 1 | og 2 | i 3 | jeg 4 | det 5 | at 6 | en 7 | et 8 | den 9 | til 10 | er 11 | som 12 | på 13 | de 14 | med 15 | han 16 | av 17 | ikke 18 | ikkje 19 | der 20 | så 21 | var 22 | meg 23 | seg 24 | men 25 | ett 26 | har 27 | om 28 | vi 29 | min 30 | mitt 31 | ha 32 | hadde 33 | hun 34 | nå 35 | over 36 | da 37 | ved 38 | fra 39 | du 40 | ut 41 | sin 42 | dem 43 | oss 44 | opp 45 | man 46 | kan 47 | hans 48 | hvor 49 | eller 50 | hva 51 | skal 52 | selv 53 | sjøl 54 | her 55 | alle 56 | vil 57 | bli 58 | ble 59 | blei 60 | blitt 61 | kunne 62 | inn 63 | når 64 | være 65 | kom 66 | noen 67 | noe 68 | ville 69 | dere 70 | som 71 | deres 72 | kun 73 | ja 74 | etter 75 | ned 76 | skulle 77 | denne 78 | for 79 | deg 80 | si 81 | sine 82 | sitt 83 | mot 84 | å 85 | meget 86 | hvorfor 87 | dette 88 | disse 89 | uten 90 | hvordan 91 | ingen 92 | din 93 | ditt 94 | blir 95 | samme 96 | hvilken 97 | hvilke 98 | sånn 99 | inni 100 | mellom 101 | vår 102 | hver 103 | hvem 104 | vors 105 | hvis 106 | både 107 | bare 108 | enn 109 | fordi 110 | før 111 | mange 112 | også 113 | slik 114 | vært 115 | være 116 | båe 117 | begge 118 | siden 119 | dykk 120 | dykkar 121 | dei 122 | deira 123 | deires 124 | deim 125 | di 126 | då 127 | eg 128 | ein 129 | eit 130 | eitt 131 | elles 132 | honom 133 | hjå 134 | ho 135 | hoe 136 | henne 137 | hennar 138 | hennes 139 | hoss 140 | hossen 141 | ikkje 142 | ingi 143 | inkje 144 | korleis 145 | korso 146 | kva 147 | kvar 148 | kvarhelst 149 | kven 150 | kvi 151 | kvifor 152 | me 153 | medan 154 | mi 155 | mine 156 | mykje 157 | no 158 | nokon 159 | noka 160 | nokor 161 | noko 162 | nokre 163 | si 164 | sia 165 | sidan 166 | so 167 | somt 168 | somme 169 | um 170 | upp 171 | vere 172 | vore 173 | verte 174 | vort 175 | varte 176 | vart 177 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/portuguese: -------------------------------------------------------------------------------- 1 | a 2 | à 3 | ao 4 | aos 5 | aquela 6 | aquelas 7 | aquele 8 | aqueles 9 | aquilo 10 | as 11 | às 12 | até 13 | com 14 | como 15 | da 16 | das 17 | de 18 | dela 19 | delas 20 | dele 21 | deles 22 | depois 23 | do 24 | dos 25 | e 26 | é 27 | ela 28 | elas 29 | ele 30 | eles 31 | em 32 | entre 33 | era 34 | eram 35 | éramos 36 | essa 37 | essas 38 | esse 39 | esses 40 | esta 41 | está 42 | estamos 43 | estão 44 | estar 45 | estas 46 | estava 47 | estavam 48 | estávamos 49 | este 50 | esteja 51 | estejam 52 | estejamos 53 | estes 54 | esteve 55 | estive 56 | estivemos 57 | estiver 58 | estivera 59 | estiveram 60 | estivéramos 61 | estiverem 62 | estivermos 63 | estivesse 64 | estivessem 65 | estivéssemos 66 | estou 67 | eu 68 | foi 69 | fomos 70 | for 71 | fora 72 | foram 73 | fôramos 74 | forem 75 | formos 76 | fosse 77 | fossem 78 | fôssemos 79 | fui 80 | há 81 | haja 82 | hajam 83 | hajamos 84 | hão 85 | havemos 86 | haver 87 | hei 88 | houve 89 | houvemos 90 | houver 91 | houvera 92 | houverá 93 | houveram 94 | houvéramos 95 | houverão 96 | houverei 97 | houverem 98 | houveremos 99 | houveria 100 | houveriam 101 | houveríamos 102 | houvermos 103 | houvesse 104 | houvessem 105 | houvéssemos 106 | isso 107 | isto 108 | já 109 | lhe 110 | lhes 111 | mais 112 | mas 113 | me 114 | mesmo 115 | meu 116 | meus 117 | minha 118 | minhas 119 | muito 120 | na 121 | não 122 | nas 123 | nem 124 | no 125 | nos 126 | nós 127 | nossa 128 | nossas 129 | nosso 130 | nossos 131 | num 132 | numa 133 | o 134 | os 135 | ou 136 | para 137 | pela 138 | pelas 139 | pelo 140 | pelos 141 | por 142 | qual 143 | quando 144 | que 145 | quem 146 | são 147 | se 148 | seja 149 | sejam 150 | sejamos 151 | sem 152 | ser 153 | será 154 | serão 155 | serei 156 | seremos 157 | seria 158 | seriam 159 | seríamos 160 | seu 161 | seus 162 | só 163 | somos 164 | sou 165 | sua 166 | suas 167 | também 168 | te 169 | tem 170 | tém 171 | temos 172 | tenha 173 | tenham 174 | tenhamos 175 | tenho 176 | terá 177 | terão 178 | terei 179 | teremos 180 | teria 181 | teriam 182 | teríamos 183 | teu 184 | teus 185 | teve 186 | tinha 187 | tinham 188 | tínhamos 189 | tive 190 | tivemos 191 | tiver 192 | tivera 193 | tiveram 194 | tivéramos 195 | tiverem 196 | tivermos 197 | tivesse 198 | tivessem 199 | tivéssemos 200 | tu 201 | tua 202 | tuas 203 | um 204 | uma 205 | você 206 | vocês 207 | vos 208 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/romanian: -------------------------------------------------------------------------------- 1 | a 2 | abia 3 | acea 4 | aceasta 5 | această 6 | aceea 7 | aceeasi 8 | acei 9 | aceia 10 | acel 11 | acela 12 | acelasi 13 | acele 14 | acelea 15 | acest 16 | acesta 17 | aceste 18 | acestea 19 | acestei 20 | acestia 21 | acestui 22 | aceşti 23 | aceştia 24 | adica 25 | ai 26 | aia 27 | aibă 28 | aici 29 | al 30 | ala 31 | ale 32 | alea 33 | alt 34 | alta 35 | altceva 36 | altcineva 37 | alte 38 | altfel 39 | alti 40 | altii 41 | altul 42 | am 43 | anume 44 | apoi 45 | ar 46 | are 47 | as 48 | asa 49 | asta 50 | astea 51 | astfel 52 | asupra 53 | atare 54 | atat 55 | atata 56 | atatea 57 | atatia 58 | ati 59 | atit 60 | atita 61 | atitea 62 | atitia 63 | atunci 64 | au 65 | avea 66 | avem 67 | aveţi 68 | avut 69 | aş 70 | aţi 71 | ba 72 | ca 73 | cam 74 | cand 75 | care 76 | careia 77 | carora 78 | caruia 79 | cat 80 | catre 81 | ce 82 | cea 83 | ceea 84 | cei 85 | ceilalti 86 | cel 87 | cele 88 | celor 89 | ceva 90 | chiar 91 | ci 92 | cind 93 | cine 94 | cineva 95 | cit 96 | cita 97 | cite 98 | citeva 99 | citi 100 | citiva 101 | cu 102 | cui 103 | cum 104 | cumva 105 | cât 106 | câte 107 | câtva 108 | câţi 109 | cînd 110 | cît 111 | cîte 112 | cîtva 113 | cîţi 114 | că 115 | căci 116 | cărei 117 | căror 118 | cărui 119 | către 120 | da 121 | daca 122 | dacă 123 | dar 124 | dat 125 | dată 126 | dau 127 | de 128 | deasupra 129 | deci 130 | decit 131 | deja 132 | desi 133 | despre 134 | deşi 135 | din 136 | dintr 137 | dintr- 138 | dintre 139 | doar 140 | doi 141 | doilea 142 | două 143 | drept 144 | dupa 145 | după 146 | dă 147 | e 148 | ea 149 | ei 150 | el 151 | ele 152 | era 153 | eram 154 | este 155 | eu 156 | eşti 157 | face 158 | fara 159 | fata 160 | fel 161 | fi 162 | fie 163 | fiecare 164 | fii 165 | fim 166 | fiu 167 | fiţi 168 | foarte 169 | fost 170 | fără 171 | i 172 | ia 173 | iar 174 | ii 175 | il 176 | imi 177 | in 178 | inainte 179 | inapoi 180 | inca 181 | incit 182 | insa 183 | intr 184 | intre 185 | isi 186 | iti 187 | la 188 | le 189 | li 190 | lor 191 | lui 192 | lângă 193 | lîngă 194 | m 195 | ma 196 | mai 197 | mea 198 | mei 199 | mele 200 | mereu 201 | meu 202 | mi 203 | mie 204 | mine 205 | mod 206 | mult 207 | multa 208 | multe 209 | multi 210 | multă 211 | mulţi 212 | mâine 213 | mîine 214 | mă 215 | ne 216 | ni 217 | nici 218 | nimeni 219 | nimic 220 | niste 221 | nişte 222 | noastre 223 | noastră 224 | noi 225 | nostri 226 | nostru 227 | nou 228 | noua 229 | nouă 230 | noştri 231 | nu 232 | numai 233 | o 234 | or 235 | ori 236 | oricare 237 | orice 238 | oricine 239 | oricum 240 | oricând 241 | oricât 242 | oricînd 243 | oricît 244 | oriunde 245 | pai 246 | parca 247 | patra 248 | patru 249 | pe 250 | pentru 251 | peste 252 | pic 253 | pina 254 | poate 255 | pot 256 | prea 257 | prima 258 | primul 259 | prin 260 | printr- 261 | putini 262 | puţin 263 | puţina 264 | puţină 265 | până 266 | pînă 267 | sa 268 | sa-mi 269 | sa-ti 270 | sai 271 | sale 272 | sau 273 | se 274 | si 275 | sint 276 | sintem 277 | spate 278 | spre 279 | sub 280 | sunt 281 | suntem 282 | sunteţi 283 | sus 284 | să 285 | săi 286 | său 287 | t 288 | ta 289 | tale 290 | te 291 | ti 292 | tine 293 | toata 294 | toate 295 | toată 296 | tocmai 297 | tot 298 | toti 299 | totul 300 | totusi 301 | totuşi 302 | toţi 303 | trei 304 | treia 305 | treilea 306 | tu 307 | tuturor 308 | tăi 309 | tău 310 | u 311 | ul 312 | ului 313 | un 314 | una 315 | unde 316 | undeva 317 | unei 318 | uneia 319 | unele 320 | uneori 321 | unii 322 | unor 323 | unora 324 | unu 325 | unui 326 | unuia 327 | unul 328 | v 329 | va 330 | vi 331 | voastre 332 | voastră 333 | voi 334 | vom 335 | vor 336 | vostru 337 | vouă 338 | voştri 339 | vreo 340 | vreun 341 | vă 342 | zi 343 | zice 344 | îi 345 | îl 346 | îmi 347 | în 348 | îţi 349 | ăla 350 | ălea 351 | ăsta 352 | ăstea 353 | ăştia 354 | şi 355 | ţi 356 | ţie -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/russian: -------------------------------------------------------------------------------- 1 | и 2 | в 3 | во 4 | не 5 | что 6 | он 7 | на 8 | я 9 | с 10 | со 11 | как 12 | а 13 | то 14 | все 15 | она 16 | так 17 | его 18 | но 19 | да 20 | ты 21 | к 22 | у 23 | же 24 | вы 25 | за 26 | бы 27 | по 28 | только 29 | ее 30 | мне 31 | было 32 | вот 33 | от 34 | меня 35 | еще 36 | нет 37 | о 38 | из 39 | ему 40 | теперь 41 | когда 42 | даже 43 | ну 44 | вдруг 45 | ли 46 | если 47 | уже 48 | или 49 | ни 50 | быть 51 | был 52 | него 53 | до 54 | вас 55 | нибудь 56 | опять 57 | уж 58 | вам 59 | ведь 60 | там 61 | потом 62 | себя 63 | ничего 64 | ей 65 | может 66 | они 67 | тут 68 | где 69 | есть 70 | надо 71 | ней 72 | для 73 | мы 74 | тебя 75 | их 76 | чем 77 | была 78 | сам 79 | чтоб 80 | без 81 | будто 82 | чего 83 | раз 84 | тоже 85 | себе 86 | под 87 | будет 88 | ж 89 | тогда 90 | кто 91 | этот 92 | того 93 | потому 94 | этого 95 | какой 96 | совсем 97 | ним 98 | здесь 99 | этом 100 | один 101 | почти 102 | мой 103 | тем 104 | чтобы 105 | нее 106 | сейчас 107 | были 108 | куда 109 | зачем 110 | всех 111 | никогда 112 | можно 113 | при 114 | наконец 115 | два 116 | об 117 | другой 118 | хоть 119 | после 120 | над 121 | больше 122 | тот 123 | через 124 | эти 125 | нас 126 | про 127 | всего 128 | них 129 | какая 130 | много 131 | разве 132 | три 133 | эту 134 | моя 135 | впрочем 136 | хорошо 137 | свою 138 | этой 139 | перед 140 | иногда 141 | лучше 142 | чуть 143 | том 144 | нельзя 145 | такой 146 | им 147 | более 148 | всегда 149 | конечно 150 | всю 151 | между 152 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/spanish: -------------------------------------------------------------------------------- 1 | de 2 | la 3 | que 4 | el 5 | en 6 | y 7 | a 8 | los 9 | del 10 | se 11 | las 12 | por 13 | un 14 | para 15 | con 16 | no 17 | una 18 | su 19 | al 20 | lo 21 | como 22 | más 23 | pero 24 | sus 25 | le 26 | ya 27 | o 28 | este 29 | sí 30 | porque 31 | esta 32 | entre 33 | cuando 34 | muy 35 | sin 36 | sobre 37 | también 38 | me 39 | hasta 40 | hay 41 | donde 42 | quien 43 | desde 44 | todo 45 | nos 46 | durante 47 | todos 48 | uno 49 | les 50 | ni 51 | contra 52 | otros 53 | ese 54 | eso 55 | ante 56 | ellos 57 | e 58 | esto 59 | mí 60 | antes 61 | algunos 62 | qué 63 | unos 64 | yo 65 | otro 66 | otras 67 | otra 68 | él 69 | tanto 70 | esa 71 | estos 72 | mucho 73 | quienes 74 | nada 75 | muchos 76 | cual 77 | poco 78 | ella 79 | estar 80 | estas 81 | algunas 82 | algo 83 | nosotros 84 | mi 85 | mis 86 | tú 87 | te 88 | ti 89 | tu 90 | tus 91 | ellas 92 | nosotras 93 | vosotros 94 | vosotras 95 | os 96 | mío 97 | mía 98 | míos 99 | mías 100 | tuyo 101 | tuya 102 | tuyos 103 | tuyas 104 | suyo 105 | suya 106 | suyos 107 | suyas 108 | nuestro 109 | nuestra 110 | nuestros 111 | nuestras 112 | vuestro 113 | vuestra 114 | vuestros 115 | vuestras 116 | esos 117 | esas 118 | estoy 119 | estás 120 | está 121 | estamos 122 | estáis 123 | están 124 | esté 125 | estés 126 | estemos 127 | estéis 128 | estén 129 | estaré 130 | estarás 131 | estará 132 | estaremos 133 | estaréis 134 | estarán 135 | estaría 136 | estarías 137 | estaríamos 138 | estaríais 139 | estarían 140 | estaba 141 | estabas 142 | estábamos 143 | estabais 144 | estaban 145 | estuve 146 | estuviste 147 | estuvo 148 | estuvimos 149 | estuvisteis 150 | estuvieron 151 | estuviera 152 | estuvieras 153 | estuviéramos 154 | estuvierais 155 | estuvieran 156 | estuviese 157 | estuvieses 158 | estuviésemos 159 | estuvieseis 160 | estuviesen 161 | estando 162 | estado 163 | estada 164 | estados 165 | estadas 166 | estad 167 | he 168 | has 169 | ha 170 | hemos 171 | habéis 172 | han 173 | haya 174 | hayas 175 | hayamos 176 | hayáis 177 | hayan 178 | habré 179 | habrás 180 | habrá 181 | habremos 182 | habréis 183 | habrán 184 | habría 185 | habrías 186 | habríamos 187 | habríais 188 | habrían 189 | había 190 | habías 191 | habíamos 192 | habíais 193 | habían 194 | hube 195 | hubiste 196 | hubo 197 | hubimos 198 | hubisteis 199 | hubieron 200 | hubiera 201 | hubieras 202 | hubiéramos 203 | hubierais 204 | hubieran 205 | hubiese 206 | hubieses 207 | hubiésemos 208 | hubieseis 209 | hubiesen 210 | habiendo 211 | habido 212 | habida 213 | habidos 214 | habidas 215 | soy 216 | eres 217 | es 218 | somos 219 | sois 220 | son 221 | sea 222 | seas 223 | seamos 224 | seáis 225 | sean 226 | seré 227 | serás 228 | será 229 | seremos 230 | seréis 231 | serán 232 | sería 233 | serías 234 | seríamos 235 | seríais 236 | serían 237 | era 238 | eras 239 | éramos 240 | erais 241 | eran 242 | fui 243 | fuiste 244 | fue 245 | fuimos 246 | fuisteis 247 | fueron 248 | fuera 249 | fueras 250 | fuéramos 251 | fuerais 252 | fueran 253 | fuese 254 | fueses 255 | fuésemos 256 | fueseis 257 | fuesen 258 | sintiendo 259 | sentido 260 | sentida 261 | sentidos 262 | sentidas 263 | siente 264 | sentid 265 | tengo 266 | tienes 267 | tiene 268 | tenemos 269 | tenéis 270 | tienen 271 | tenga 272 | tengas 273 | tengamos 274 | tengáis 275 | tengan 276 | tendré 277 | tendrás 278 | tendrá 279 | tendremos 280 | tendréis 281 | tendrán 282 | tendría 283 | tendrías 284 | tendríamos 285 | tendríais 286 | tendrían 287 | tenía 288 | tenías 289 | teníamos 290 | teníais 291 | tenían 292 | tuve 293 | tuviste 294 | tuvo 295 | tuvimos 296 | tuvisteis 297 | tuvieron 298 | tuviera 299 | tuvieras 300 | tuviéramos 301 | tuvierais 302 | tuvieran 303 | tuviese 304 | tuvieses 305 | tuviésemos 306 | tuvieseis 307 | tuviesen 308 | teniendo 309 | tenido 310 | tenida 311 | tenidos 312 | tenidas 313 | tened 314 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/swedish: -------------------------------------------------------------------------------- 1 | och 2 | det 3 | att 4 | i 5 | en 6 | jag 7 | hon 8 | som 9 | han 10 | på 11 | den 12 | med 13 | var 14 | sig 15 | för 16 | så 17 | till 18 | är 19 | men 20 | ett 21 | om 22 | hade 23 | de 24 | av 25 | icke 26 | mig 27 | du 28 | henne 29 | då 30 | sin 31 | nu 32 | har 33 | inte 34 | hans 35 | honom 36 | skulle 37 | hennes 38 | där 39 | min 40 | man 41 | ej 42 | vid 43 | kunde 44 | något 45 | från 46 | ut 47 | när 48 | efter 49 | upp 50 | vi 51 | dem 52 | vara 53 | vad 54 | över 55 | än 56 | dig 57 | kan 58 | sina 59 | här 60 | ha 61 | mot 62 | alla 63 | under 64 | någon 65 | eller 66 | allt 67 | mycket 68 | sedan 69 | ju 70 | denna 71 | själv 72 | detta 73 | åt 74 | utan 75 | varit 76 | hur 77 | ingen 78 | mitt 79 | ni 80 | bli 81 | blev 82 | oss 83 | din 84 | dessa 85 | några 86 | deras 87 | blir 88 | mina 89 | samma 90 | vilken 91 | er 92 | sådan 93 | vår 94 | blivit 95 | dess 96 | inom 97 | mellan 98 | sådant 99 | varför 100 | varje 101 | vilka 102 | ditt 103 | vem 104 | vilket 105 | sitta 106 | sådana 107 | vart 108 | dina 109 | vars 110 | vårt 111 | våra 112 | ert 113 | era 114 | vilkas 115 | -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/tajik: -------------------------------------------------------------------------------- 1 | аз 2 | дар 3 | ба 4 | бо 5 | барои 6 | бе 7 | то 8 | ҷуз 9 | пеши 10 | назди 11 | рӯйи 12 | болои 13 | паси 14 | ғайри 15 | ҳамон 16 | ҳамоно 17 | инҷониб 18 | замон 19 | замоно 20 | эътиборан 21 | пеш 22 | қабл 23 | дида 24 | сар карда 25 | агар 26 | агар ки 27 | валекин 28 | ки 29 | лекин 30 | аммо 31 | вале 32 | балки 33 | ва 34 | ҳарчанд 35 | чунки 36 | зеро 37 | зеро ки 38 | вақте ки 39 | то вақте ки 40 | барои он ки 41 | бо нияти он ки 42 | лекин ва ҳол он ки 43 | ё 44 | ё ин ки 45 | бе он ки 46 | дар ҳолате ки 47 | то даме ки 48 | баъд аз он ки 49 | даме ки 50 | ба тразе ки 51 | аз баҳри он ки 52 | гар 53 | ар 54 | ба шарте 55 | азбаски 56 | модоме ки 57 | агар чи 58 | гарчанде ки 59 | бо вуҷуди он ки 60 | гӯё 61 | аз-баски 62 | чун-ки 63 | агар-чанд 64 | агар-чи 65 | гар-чи 66 | то ки 67 | чунон ки 68 | то даме ки 69 | ҳар қадар ки 70 | магар 71 | оё 72 | наход 73 | ҳатто 74 | ҳам 75 | бале 76 | оре 77 | хуб 78 | хуш 79 | хайр 80 | не 81 | на 82 | мана 83 | э 84 | фақат 85 | танҳо 86 | кошки 87 | мабодо 88 | ҳтимол 89 | ана ҳамин 90 | наход ки 91 | ҳатто ки 92 | аз афташ 93 | майлаш куя 94 | ана 95 | ҳа 96 | канӣ 97 | гӯё ки 98 | ҳо ана 99 | на ин ки 100 | ваҳ 101 | ҳой 102 | и 103 | а 104 | о 105 | эҳ 106 | ҳе 107 | ҳу 108 | аҳа 109 | оҳе 110 | уҳа 111 | ҳм 112 | нм 113 | оббо 114 | ӯббо 115 | ҳой-ҳой 116 | вой-вой 117 | ту-ту 118 | ҳмм 119 | эҳа 120 | тавба 121 | ӯҳӯ 122 | аҷабо 123 | ало 124 | аё 125 | ой 126 | ӯим 127 | ором 128 | хом?ш 129 | ҳай-ҳай 130 | бай-бай 131 | аз 132 | он 133 | баъд 134 | азбаски 135 | ӯ 136 | ҳангоми 137 | чӣ 138 | кадом 139 | ин 140 | ҷо 141 | ҳам 142 | ё ки 143 | бояд 144 | аст 145 | чанд 146 | ҳар 147 | бар 148 | чаро ки 149 | агар 150 | то кӣ 151 | бинобар 152 | бинобар ин 153 | ҳаргиз 154 | асло 155 | нахот 156 | нахот ки 157 | кошкӣ 158 | шояд 159 | шояд ки 160 | охир 161 | аз рӯи 162 | аз рӯйи 163 | рӯ -------------------------------------------------------------------------------- /backend/nltk_data/corpora/stopwords/turkish: -------------------------------------------------------------------------------- 1 | acaba 2 | ama 3 | aslında 4 | az 5 | bazı 6 | belki 7 | biri 8 | birkaç 9 | birşey 10 | biz 11 | bu 12 | çok 13 | çünkü 14 | da 15 | daha 16 | de 17 | defa 18 | diye 19 | eğer 20 | en 21 | gibi 22 | hem 23 | hep 24 | hepsi 25 | her 26 | hiç 27 | için 28 | ile 29 | ise 30 | kez 31 | ki 32 | kim 33 | mı 34 | mu 35 | mü 36 | nasıl 37 | ne 38 | neden 39 | nerde 40 | nerede 41 | nereye 42 | niçin 43 | niye 44 | o 45 | sanki 46 | şey 47 | siz 48 | şu 49 | tüm 50 | ve 51 | veya 52 | ya 53 | yani 54 | -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/.DS_Store -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/README: -------------------------------------------------------------------------------- 1 | Pretrained Punkt Models -- Jan Strunk (New version trained after issues 313 and 514 had been corrected) 2 | 3 | Most models were prepared using the test corpora from Kiss and Strunk (2006). Additional models have 4 | been contributed by various people using NLTK for sentence boundary detection. 5 | 6 | For information about how to use these models, please confer the tokenization HOWTO: 7 | http://nltk.googlecode.com/svn/trunk/doc/howto/tokenize.html 8 | and chapter 3.8 of the NLTK book: 9 | http://nltk.googlecode.com/svn/trunk/doc/book/ch03.html#sec-segmentation 10 | 11 | There are pretrained tokenizers for the following languages: 12 | 13 | File Language Source Contents Size of training corpus(in tokens) Model contributed by 14 | ======================================================================================================================================================================= 15 | czech.pickle Czech Multilingual Corpus 1 (ECI) Lidove Noviny ~345,000 Jan Strunk / Tibor Kiss 16 | Literarni Noviny 17 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 18 | danish.pickle Danish Avisdata CD-Rom Ver. 1.1. 1995 Berlingske Tidende ~550,000 Jan Strunk / Tibor Kiss 19 | (Berlingske Avisdata, Copenhagen) Weekend Avisen 20 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 21 | dutch.pickle Dutch Multilingual Corpus 1 (ECI) De Limburger ~340,000 Jan Strunk / Tibor Kiss 22 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 23 | english.pickle English Penn Treebank (LDC) Wall Street Journal ~469,000 Jan Strunk / Tibor Kiss 24 | (American) 25 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 26 | estonian.pickle Estonian University of Tartu, Estonia Eesti Ekspress ~359,000 Jan Strunk / Tibor Kiss 27 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | finnish.pickle Finnish Finnish Parole Corpus, Finnish Books and major national ~364,000 Jan Strunk / Tibor Kiss 29 | Text Bank (Suomen Kielen newspapers 30 | Tekstipankki) 31 | Finnish Center for IT Science 32 | (CSC) 33 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 34 | french.pickle French Multilingual Corpus 1 (ECI) Le Monde ~370,000 Jan Strunk / Tibor Kiss 35 | (European) 36 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 37 | german.pickle German Neue Zürcher Zeitung AG Neue Zürcher Zeitung ~847,000 Jan Strunk / Tibor Kiss 38 | (Switzerland) CD-ROM 39 | (Uses "ss" 40 | instead of "ß") 41 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 42 | greek.pickle Greek Efstathios Stamatatos To Vima (TO BHMA) ~227,000 Jan Strunk / Tibor Kiss 43 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 44 | italian.pickle Italian Multilingual Corpus 1 (ECI) La Stampa, Il Mattino ~312,000 Jan Strunk / Tibor Kiss 45 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 46 | norwegian.pickle Norwegian Centre for Humanities Bergens Tidende ~479,000 Jan Strunk / Tibor Kiss 47 | (Bokmål and Information Technologies, 48 | Nynorsk) Bergen 49 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 50 | polish.pickle Polish Polish National Corpus Literature, newspapers, etc. ~1,000,000 Krzysztof Langner 51 | (http://www.nkjp.pl/) 52 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 53 | portuguese.pickle Portuguese CETENFolha Corpus Folha de São Paulo ~321,000 Jan Strunk / Tibor Kiss 54 | (Brazilian) (Linguateca) 55 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | slovene.pickle Slovene TRACTOR Delo ~354,000 Jan Strunk / Tibor Kiss 57 | Slovene Academy for Arts 58 | and Sciences 59 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 60 | spanish.pickle Spanish Multilingual Corpus 1 (ECI) Sur ~353,000 Jan Strunk / Tibor Kiss 61 | (European) 62 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 63 | swedish.pickle Swedish Multilingual Corpus 1 (ECI) Dagens Nyheter ~339,000 Jan Strunk / Tibor Kiss 64 | (and some other texts) 65 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 66 | turkish.pickle Turkish METU Turkish Corpus Milliyet ~333,000 Jan Strunk / Tibor Kiss 67 | (Türkçe Derlem Projesi) 68 | University of Ankara 69 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 70 | 71 | The corpora contained about 400,000 tokens on average and mostly consisted of newspaper text converted to 72 | Unicode using the codecs module. 73 | 74 | Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence Boundary Detection. 75 | Computational Linguistics 32: 485-525. 76 | 77 | ---- Training Code ---- 78 | 79 | # import punkt 80 | import nltk.tokenize.punkt 81 | 82 | # Make a new Tokenizer 83 | tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer() 84 | 85 | # Read in training corpus (one example: Slovene) 86 | import codecs 87 | text = codecs.open("slovene.plain","Ur","iso-8859-2").read() 88 | 89 | # Train tokenizer 90 | tokenizer.train(text) 91 | 92 | # Dump pickled tokenizer 93 | import pickle 94 | out = open("slovene.pickle","wb") 95 | pickle.dump(tokenizer, out) 96 | out.close() 97 | 98 | --------- 99 | -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/czech.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/czech.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/danish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/danish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/dutch.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/dutch.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/english.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/english.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/estonian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/estonian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/finnish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/finnish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/french.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/french.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/german.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/german.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/greek.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/greek.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/italian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/italian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/malayalam.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/malayalam.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/norwegian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/norwegian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/polish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/polish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/portuguese.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/portuguese.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/russian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/russian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/slovene.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/slovene.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/spanish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/spanish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/swedish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/swedish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/PY3/turkish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/turkish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/README: -------------------------------------------------------------------------------- 1 | Pretrained Punkt Models -- Jan Strunk (New version trained after issues 313 and 514 had been corrected) 2 | 3 | Most models were prepared using the test corpora from Kiss and Strunk (2006). Additional models have 4 | been contributed by various people using NLTK for sentence boundary detection. 5 | 6 | For information about how to use these models, please confer the tokenization HOWTO: 7 | http://nltk.googlecode.com/svn/trunk/doc/howto/tokenize.html 8 | and chapter 3.8 of the NLTK book: 9 | http://nltk.googlecode.com/svn/trunk/doc/book/ch03.html#sec-segmentation 10 | 11 | There are pretrained tokenizers for the following languages: 12 | 13 | File Language Source Contents Size of training corpus(in tokens) Model contributed by 14 | ======================================================================================================================================================================= 15 | czech.pickle Czech Multilingual Corpus 1 (ECI) Lidove Noviny ~345,000 Jan Strunk / Tibor Kiss 16 | Literarni Noviny 17 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 18 | danish.pickle Danish Avisdata CD-Rom Ver. 1.1. 1995 Berlingske Tidende ~550,000 Jan Strunk / Tibor Kiss 19 | (Berlingske Avisdata, Copenhagen) Weekend Avisen 20 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 21 | dutch.pickle Dutch Multilingual Corpus 1 (ECI) De Limburger ~340,000 Jan Strunk / Tibor Kiss 22 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 23 | english.pickle English Penn Treebank (LDC) Wall Street Journal ~469,000 Jan Strunk / Tibor Kiss 24 | (American) 25 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 26 | estonian.pickle Estonian University of Tartu, Estonia Eesti Ekspress ~359,000 Jan Strunk / Tibor Kiss 27 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 28 | finnish.pickle Finnish Finnish Parole Corpus, Finnish Books and major national ~364,000 Jan Strunk / Tibor Kiss 29 | Text Bank (Suomen Kielen newspapers 30 | Tekstipankki) 31 | Finnish Center for IT Science 32 | (CSC) 33 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 34 | french.pickle French Multilingual Corpus 1 (ECI) Le Monde ~370,000 Jan Strunk / Tibor Kiss 35 | (European) 36 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 37 | german.pickle German Neue Zürcher Zeitung AG Neue Zürcher Zeitung ~847,000 Jan Strunk / Tibor Kiss 38 | (Switzerland) CD-ROM 39 | (Uses "ss" 40 | instead of "ß") 41 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 42 | greek.pickle Greek Efstathios Stamatatos To Vima (TO BHMA) ~227,000 Jan Strunk / Tibor Kiss 43 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 44 | italian.pickle Italian Multilingual Corpus 1 (ECI) La Stampa, Il Mattino ~312,000 Jan Strunk / Tibor Kiss 45 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 46 | norwegian.pickle Norwegian Centre for Humanities Bergens Tidende ~479,000 Jan Strunk / Tibor Kiss 47 | (Bokmål and Information Technologies, 48 | Nynorsk) Bergen 49 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 50 | polish.pickle Polish Polish National Corpus Literature, newspapers, etc. ~1,000,000 Krzysztof Langner 51 | (http://www.nkjp.pl/) 52 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 53 | portuguese.pickle Portuguese CETENFolha Corpus Folha de São Paulo ~321,000 Jan Strunk / Tibor Kiss 54 | (Brazilian) (Linguateca) 55 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 56 | slovene.pickle Slovene TRACTOR Delo ~354,000 Jan Strunk / Tibor Kiss 57 | Slovene Academy for Arts 58 | and Sciences 59 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 60 | spanish.pickle Spanish Multilingual Corpus 1 (ECI) Sur ~353,000 Jan Strunk / Tibor Kiss 61 | (European) 62 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 63 | swedish.pickle Swedish Multilingual Corpus 1 (ECI) Dagens Nyheter ~339,000 Jan Strunk / Tibor Kiss 64 | (and some other texts) 65 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 66 | turkish.pickle Turkish METU Turkish Corpus Milliyet ~333,000 Jan Strunk / Tibor Kiss 67 | (Türkçe Derlem Projesi) 68 | University of Ankara 69 | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- 70 | 71 | The corpora contained about 400,000 tokens on average and mostly consisted of newspaper text converted to 72 | Unicode using the codecs module. 73 | 74 | Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence Boundary Detection. 75 | Computational Linguistics 32: 485-525. 76 | 77 | ---- Training Code ---- 78 | 79 | # import punkt 80 | import nltk.tokenize.punkt 81 | 82 | # Make a new Tokenizer 83 | tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer() 84 | 85 | # Read in training corpus (one example: Slovene) 86 | import codecs 87 | text = codecs.open("slovene.plain","Ur","iso-8859-2").read() 88 | 89 | # Train tokenizer 90 | tokenizer.train(text) 91 | 92 | # Dump pickled tokenizer 93 | import pickle 94 | out = open("slovene.pickle","wb") 95 | pickle.dump(tokenizer, out) 96 | out.close() 97 | 98 | --------- 99 | -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/czech.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/czech.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/danish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/danish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/dutch.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/dutch.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/estonian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/estonian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/finnish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/finnish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/french.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/french.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/german.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/german.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/italian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/italian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/malayalam.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/malayalam.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/norwegian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/norwegian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/polish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/polish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/portuguese.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/portuguese.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/russian.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/russian.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/slovene.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/slovene.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/spanish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/spanish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/swedish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/swedish.pickle -------------------------------------------------------------------------------- /backend/nltk_data/tokenizers/punkt/turkish.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/turkish.pickle -------------------------------------------------------------------------------- /backend/requirements.txt: -------------------------------------------------------------------------------- 1 | chromadb==0.4.22 2 | uvicorn==0.22.0 3 | rank-bm25==0.2.2 4 | nltk==3.8.1 5 | fastapi==0.108.0 6 | sse-starlette==1.5.0 7 | dateparser==1.2.0 8 | tiktoken==0.5.2 9 | pydantic==1.10.7 10 | PyJWT==2.3.0 11 | openai==1.7.0 12 | langchain==0.1.0 13 | watchdog==3.0.0 14 | jieba==0.42.1 -------------------------------------------------------------------------------- /backend/settings.py: -------------------------------------------------------------------------------- 1 | # ---Main Settings--- # 2 | NICK_NAME = 'Peter' # This is your nick name. Make sure to set it at the beginning and don't change so that LLM will not get confused. 3 | 4 | EMBEDDING_BASE_URL = 'https://api.openai.com/v1' 5 | EMBEDDING_API_KEY = 'your-api-key' 6 | EMBEDDING_MODEL_NAME = "ada" 7 | 8 | CHAT_BASE_URL = 'https://api.openai.com/v1' # Modify to your OpenAI compatible API url 9 | CHAT_API_KEY = 'your-api-key' 10 | CHAT_MODEL_NAME = "gpt-3.5-turbo" 11 | CHAT_MAX_TOKEN = 400 12 | 13 | # Path to the local directory of your Markdown notebook to store context information 14 | CHAT_PATH = '../md_website/chat_history' 15 | NOTE_PATH = '../md_website/notes' 16 | 17 | # If you're using an online Markdown notebook editor, set up this URL so you can click through to the notebook page in the "Reference". 18 | CHAT_URL = 'http://localhost:3000/chat_history/' 19 | NOTE_URL = 'http://localhost:3000/notes/' 20 | 21 | # MULTILPLE_SYSTEM_PROMPTS is used to adjust different LLM backend 22 | # Some backends may not support multiple system prompts 23 | # In this case set this parameter to False 24 | # If you don't know if multiple-system-prompts is supported 25 | # you can test by setting this parameter True and sees if there is no error during conversation 26 | # and if Loyal Elephie can still answer properly with provided context 27 | MULTILPLE_SYSTEM_PROMPTS = False 28 | 29 | # Language Preference (experimental) 30 | # Supported Languages: English, Chinese, German, French, Spanish, Portuguese, Italian, Dutch, Czech, Polish, Russian, Arabic 31 | LANGUAGE_PREFERENCE = "English" 32 | 33 | # ---Retrieval Settings--- # 34 | RETRIEVAL_TOKEN_LIMIT = 2048 # Maximum token limit for the retrieved contexts 35 | RETRIEVAL_NUM_CHOICES = 10 # Number of top choices or results to retrieve for each query 36 | RETRIEVAL_MIN_VALUE = 0.25 # Minimum threshold for the value of retrieved documents 37 | BM25_WEIGHT = 0.1 # Weight given to the BM25 score when adjusting the final score of a document 38 | 39 | # ---Prompts--- # 40 | SUMMARY_PROMPT='''You are the "ASSISTANT" and your task is to take a detailed note about {NICK_NAME} from a conversation with you. You should focus on observations on {NICK_NAME}'s situation and special things mentioned by him but you doesn't need to include assistant's (your own) words unless addressed by {NICK_NAME}.{LANGUAGE_PREFERENCE} Don't write a title and don't write anything else before or after the note.''' 41 | SUMMARY_NOTE_PROMPT='''Your task is to write a comprehensive summary about the Note authored by the user mentioned as *{NICK_NAME}*. The summary should be written as a bullet list of self-contained items without a title.{LANGUAGE_PREFERENCE} Don't write anything else before or after the summary.''' 42 | 43 | AGENT_PROMPT = '''You are Loyal Elephie, {NICK_NAME}'s autonomous secretary who has access to the following tools: 44 | 1. You have an inner monologue section which could help you analyze the problem without disturbing {NICK_NAME}. To use inner monologue section, write your monologue between tags "" and "". The monologue should including the user problem breakdown the questions you don't yet understand. This tool is how you comprehend. 45 | 46 | 2. You have a memory including {NICK_NAME}'s notes and your past conversations with him, which could possibly provide useful context for this interaction. *To use this external memory, write search query strings each per line between tags "" and ""*. Provide precise dates into the query if possible. This tool is how your recall. 47 | Example of using the memory: 48 | User: Should I buy a new computer? 49 | 50 | {NICK_NAME} computer problem 51 | {NICK_NAME} buy new computer preference 52 | 53 | If you see the search result, be mindful that the context could be ranging from a long period and they will be shown in a timely order. 54 | 55 | 3. Once you have thoroughly comprehended the latest user input, respond by placing your message between the tags?``?and?``. Only the text inside the "" block will be visible to {NICK_NAME}. Your reply should be supportive, with an analytical, creative, extroverted, and playful personality. You love jokes, sarcasm, and making wild guesses while staying truthful to the accessible context when not making guesses. Always address {NICK_NAME} as "you". This tool is how you speak. 56 | 57 | 58 | Below your interactions with the user ({NICK_NAME}) begin. You will also receive occasional system messages with situational information and instructions. 59 | Current time is {CURRENT_TIME}{LANGUAGE_PREFERENCE}''' 60 | -------------------------------------------------------------------------------- /external_example/embedding_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List, Optional, Union 3 | 4 | from fastapi import FastAPI 5 | from fastapi.middleware.cors import CORSMiddleware 6 | from pydantic import BaseModel 7 | from sentence_transformers import SentenceTransformer 8 | 9 | app = FastAPI( 10 | title="Embeddings API", 11 | version="0.0.1", 12 | ) 13 | app.add_middleware( 14 | CORSMiddleware, 15 | allow_origins=["*"], 16 | allow_credentials=True, 17 | allow_methods=["*"], 18 | allow_headers=["*"], 19 | ) 20 | 21 | EMBEDDING_MODEL_NAME = 'BAAI/bge-base-en-v1.5' # Choose a custom embedding model 22 | embeddings = SentenceTransformer(EMBEDDING_MODEL_NAME) 23 | 24 | 25 | class Embedding(BaseModel): 26 | object: str 27 | embedding: List[float] 28 | index: int 29 | 30 | 31 | class Usage(BaseModel): 32 | prompt_tokens: int 33 | total_tokens: int 34 | 35 | 36 | class CreateEmbeddingRequest(BaseModel): 37 | model: Optional[str] 38 | input: Union[str, List[str]] 39 | user: Optional[str] = None 40 | 41 | class Config: 42 | schema_extra = { 43 | "example": { 44 | "input": "The food was delicious and the waiter...", 45 | } 46 | } 47 | 48 | 49 | class CreateEmbeddingResponse(BaseModel): 50 | object: str 51 | data: List[Embedding] 52 | model: str 53 | usage: Usage 54 | 55 | 56 | @app.post( 57 | "/v1/embeddings", 58 | response_model=CreateEmbeddingResponse, 59 | ) 60 | def create_embedding(request: CreateEmbeddingRequest): 61 | result = _create_embedding(**request.dict(exclude={"user", "model", "model_config"})) 62 | return result 63 | 64 | 65 | def _create_embedding(input: Union[str, List[str]]): 66 | print(">embedding called") 67 | global embeddings 68 | model_name = EMBEDDING_MODEL_NAME 69 | model_name_short = model_name.split("/")[-1] 70 | if isinstance(input, str): 71 | return CreateEmbeddingResponse(data=[Embedding(embedding=embeddings.encode(input).tolist(), object="embedding", index=0)], 72 | model=model_name_short, object='list', 73 | usage=Usage(prompt_tokens=len(input), total_tokens=len(input))) # MARK; could change to tokens, just for test now 74 | else: 75 | print(">batch call") 76 | data = [Embedding(embedding=embedding, object="embedding", index=i) 77 | for i, embedding in enumerate(embeddings.encode(input).tolist())] 78 | total_tokens = 0 79 | for text in input: 80 | total_tokens += len(text) # MARK; could change to tokens, just for test now 81 | return CreateEmbeddingResponse(data=data, model=model_name_short, object='list', 82 | usage=Usage(prompt_tokens=total_tokens, total_tokens=total_tokens)) 83 | 84 | 85 | if __name__ == "__main__": 86 | import os 87 | import uvicorn 88 | 89 | uvicorn.run(app, host="0.0.0.0", port=os.getenv("EMBEDDING_PORT", 8001)) 90 | 91 | -------------------------------------------------------------------------------- /frontend/components/Chat/Chat.tsx: -------------------------------------------------------------------------------- 1 | import { Message } from "@/types"; 2 | import { FC } from "react"; 3 | import { ChatInput } from "./ChatInput"; 4 | import { ChatLoader } from "./ChatLoader"; 5 | import { ChatMessage } from "./ChatMessage"; 6 | 7 | interface Props { 8 | messages: Message[]; 9 | loading: boolean; 10 | isButtonDisabled: boolean; 11 | onSend: (message: Message) => void; 12 | onRevert: () => void; 13 | content: string; 14 | setContent: (content:string) => void 15 | } 16 | 17 | export const Chat: FC = ({ messages, loading, isButtonDisabled, onSend, onRevert, content, setContent }) => { 18 | return ( 19 | <> 20 |
21 | {messages.map((message, index) => ( 22 |
26 | 27 |
28 | ))} 29 | 30 | {loading && ( 31 |
32 | 33 |
34 | )} 35 | 36 |
37 | 39 |
40 |
41 | 42 | ); 43 | }; -------------------------------------------------------------------------------- /frontend/components/Chat/ChatInput.tsx: -------------------------------------------------------------------------------- 1 | import { Message } from "@/types"; 2 | import { IconArrowUp, IconArrowBackUp, IconX } from "@tabler/icons-react"; 3 | import { FC, KeyboardEvent, useEffect, useRef, useState } from "react"; 4 | 5 | interface Props { 6 | onSend: (message: Message) => void; 7 | onRevert: () => void; 8 | isButtonDisabled: boolean; 9 | content: string; 10 | setContent: (content:string) => void 11 | loading: boolean; 12 | } 13 | 14 | export const ChatInput: FC = ({ onSend, onRevert, isButtonDisabled, content, setContent, loading}) => { 15 | // const [content, setContent] = useState(); 16 | 17 | const textareaRef = useRef(null); 18 | 19 | const handleChange = (e: React.ChangeEvent) => { 20 | const value = e.target.value; 21 | if (value.length > 1000) { 22 | alert("Message limit is 1000 characters"); 23 | return; 24 | } 25 | setContent(value); 26 | }; 27 | 28 | const handleSend = () => { 29 | if (!content) { 30 | alert("Please enter a message"); 31 | return; 32 | } 33 | onSend({ role: "user", content }); 34 | setContent(""); 35 | }; 36 | 37 | const handleKeyDown = (e: KeyboardEvent) => { 38 | if (e.key === "Enter" && !e.shiftKey && !isButtonDisabled) { 39 | e.preventDefault(); 40 | handleSend(); 41 | } 42 | }; 43 | 44 | useEffect(() => { 45 | if (textareaRef && textareaRef.current) { 46 | textareaRef.current.style.height = "inherit"; 47 | textareaRef.current.style.height = `${textareaRef.current?.scrollHeight}px`; 48 | } 49 | }, [content]); 50 | 51 | return ( 52 |
53 |