├── .public
├── architecture.png
├── image.png
├── screenshot1.png
├── screenshot2.png
├── screenshot3.png
└── screenshot4.png
├── LICENSE
├── README.md
├── backend
├── app.py
├── bm25_api.py
├── chroma_doc_manager.py
├── file_monitor.py
├── language_presets.json
├── llama_types.py
├── llm_utils.py
├── memory_server.py
├── nltk_data
│ ├── corpora
│ │ └── stopwords
│ │ │ ├── README
│ │ │ ├── arabic
│ │ │ ├── azerbaijani
│ │ │ ├── basque
│ │ │ ├── bengali
│ │ │ ├── catalan
│ │ │ ├── chinese
│ │ │ ├── danish
│ │ │ ├── dutch
│ │ │ ├── english
│ │ │ ├── finnish
│ │ │ ├── french
│ │ │ ├── german
│ │ │ ├── greek
│ │ │ ├── hebrew
│ │ │ ├── hinglish
│ │ │ ├── hungarian
│ │ │ ├── indonesian
│ │ │ ├── italian
│ │ │ ├── kazakh
│ │ │ ├── nepali
│ │ │ ├── norwegian
│ │ │ ├── portuguese
│ │ │ ├── romanian
│ │ │ ├── russian
│ │ │ ├── slovene
│ │ │ ├── spanish
│ │ │ ├── swedish
│ │ │ ├── tajik
│ │ │ └── turkish
│ └── tokenizers
│ │ └── punkt
│ │ ├── .DS_Store
│ │ ├── PY3
│ │ ├── README
│ │ ├── czech.pickle
│ │ ├── danish.pickle
│ │ ├── dutch.pickle
│ │ ├── english.pickle
│ │ ├── estonian.pickle
│ │ ├── finnish.pickle
│ │ ├── french.pickle
│ │ ├── german.pickle
│ │ ├── greek.pickle
│ │ ├── italian.pickle
│ │ ├── malayalam.pickle
│ │ ├── norwegian.pickle
│ │ ├── polish.pickle
│ │ ├── portuguese.pickle
│ │ ├── russian.pickle
│ │ ├── slovene.pickle
│ │ ├── spanish.pickle
│ │ ├── swedish.pickle
│ │ └── turkish.pickle
│ │ ├── README
│ │ ├── czech.pickle
│ │ ├── danish.pickle
│ │ ├── dutch.pickle
│ │ ├── english.pickle
│ │ ├── estonian.pickle
│ │ ├── finnish.pickle
│ │ ├── french.pickle
│ │ ├── german.pickle
│ │ ├── greek.pickle
│ │ ├── italian.pickle
│ │ ├── malayalam.pickle
│ │ ├── norwegian.pickle
│ │ ├── polish.pickle
│ │ ├── portuguese.pickle
│ │ ├── russian.pickle
│ │ ├── slovene.pickle
│ │ ├── spanish.pickle
│ │ ├── swedish.pickle
│ │ └── turkish.pickle
├── requirements.txt
├── retrivial_ranking.py
└── settings.py
├── external_example
└── embedding_server.py
├── frontend
├── components
│ ├── Chat
│ │ ├── Chat.tsx
│ │ ├── ChatInput.tsx
│ │ ├── ChatLoader.tsx
│ │ ├── ChatMessage.tsx
│ │ └── run.log
│ └── Layout
│ │ ├── Footer.tsx
│ │ └── Navbar.tsx
├── next-env.d.ts
├── next.config.js
├── package-lock.json
├── package.json
├── pages
│ ├── _app.tsx
│ ├── _document.tsx
│ ├── api
│ │ ├── chat.ts
│ │ ├── login.ts
│ │ └── verify.ts
│ ├── index.tsx
│ └── login.tsx
├── postcss.config.js
├── public
│ └── favicon.ico
├── styles
│ └── globals.css
├── tailwind.config.js
├── tsconfig.json
├── types
│ └── index.ts
├── users.json
└── utils
│ └── index.ts
└── md_website
├── Chat History.md
├── Notes.md
├── SETTINGS.md
├── chat_history
└── .gitkeep
├── index.md
├── notes
└── .gitkeep
└── template
└── link.md
/.public/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/architecture.png
--------------------------------------------------------------------------------
/.public/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/image.png
--------------------------------------------------------------------------------
/.public/screenshot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot1.png
--------------------------------------------------------------------------------
/.public/screenshot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot2.png
--------------------------------------------------------------------------------
/.public/screenshot3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot3.png
--------------------------------------------------------------------------------
/.public/screenshot4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/.public/screenshot4.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Yipeng Zhang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🐘 Loyal Elephie: Your Trusty Memory-enabled AI Companion 🧠
2 |
3 |
4 |
5 | 🚀 Embark on an exciting adventure with Loyal Elephie, your faithful AI sidekick! This project combines the power of a neat Next.js web UI and a mighty Python backend, leveraging the latest advancements in Large Language Models (LLMs) and Retrieval Augmented Generation (RAG) to deliver a seamless and meaningful chatting experience! 🎉
6 |
7 | ## Features
8 |
9 | 1. **🎛️ Controllable Memory:** Take control of Loyal Elephie's memory! You decide which moments to save, and you can easily edit the context as needed. It is your second-brain for episodic memory. ✍️
10 |
11 | 2. **🔍 Hybrid Search:** Experience the powerful combination of ChromaDB and BM25 for efficient searches! It's also optimized for handling date-relevant queries. 📅
12 |
13 | 3. **🔒 Secure Web Access:** With a built-in login feature, only authorized users can access your AI companion, ensuring your conversations remain private and secure over the internet. 🛡️
14 |
15 | 4. **🤖 Streamlined LLM Agent:** Loyal Elephie uses XML syntax with no function-calling required. It is also optimized for less token usage and works smoothly with great local LLMs using Llama.cpp or ExllamaV2. 💬
16 |
17 | 5. **📝 (Optional) Markdown Editor Integration:** Connect with online Markdown editors to view the original referred document during chats and experience real-time LLM knowledge integration after editing your notes online. 🌐
18 |
19 | Loyal Elephie supports both open and proprietary LLMs and embeddings serving as OpenAI compatible APIs.
20 |
21 | 
22 |
23 | Warning: This project was originally designed for **Linux** and compatibility with Windows or macOS has not been fully tested. If you are using Windows, I strongly recommend you to run this project in **WSL**.
24 |
25 |
26 | ## Screenshots
27 | *Meta-Llama-3-70B-Instruct.Q4_K_S.gguf was used when capturing the below screenshots*
28 |
29 | 
30 |
31 | 
32 |
33 |
34 | 
35 | With [SilverBulletMd](https://github.com/silverbulletmd/silverbullet), you can edit a note on the browser and then let Loyal Elephie rememeber it!
36 |
37 | 
38 |
39 | The UI is modified from https://github.com/mckaywrigley/chatbot-ui-lite. Credits to the author Mckay Wrigley!
40 |
41 | ## Deployment
42 |
43 | **1. Clone Repo**
44 |
45 | ```bash
46 | git clone https://github.com/v2rockets/Loyal-Elephie.git
47 | ```
48 |
49 | **2. Install Frontend Requirments**
50 |
51 | ```bash
52 | cd frontend
53 | npm i
54 | ```
55 |
56 | **3. Configure Login Users**
57 |
58 | frontend/users.json
59 | ```json
60 | [{
61 | "username":"admin",
62 | "password":"admin"
63 | }]
64 | ```
65 |
66 | **4. Install Backend Requirements**
67 |
68 | ```bash
69 | cd backend
70 | pip install -r requirements.txt
71 | ```
72 |
73 | **5. Configure Backend Settings**
74 |
75 | ```python
76 | # backend/settings.py
77 | NICK_NAME = 'Peter' # This is your nick name. Make sure to set it at the beginning and don't change so that LLM will not get confused.
78 |
79 | CHAT_BASE_URL = 'https://api.openai.com/v1' # Modify to your OpenAI compatible API url
80 | CHAT_API_KEY = 'your-api-key'
81 | CHAT_MODEL_NAME = "gpt-3.5-turbo"
82 |
83 | # Language Preference (experimental)
84 | # Supported Languages: English, Chinese, German, French, Spanish, Portuguese, Italian, Dutch, Czech, Polish, Russian, Arabic
85 | LANGUAGE_PREFERENCE = "English"
86 | ```
87 |
88 | **6. Run App**
89 |
90 | frontend:
91 | ```bash
92 | cd frontend
93 | npm run build
94 | npm run start
95 | ```
96 | backend:
97 | ```bash
98 | cd backend
99 | python app.py
100 | ```
101 |
102 | # Usage Tips
103 | * By default, visit Loyal Elephie from http://localhost:8080
104 | * use "Save" button to save the current conversation into Loyal Elephie's memory
105 | * use "Reset" button to clear the current conversation (not affecting saving status, the same as refreshing page)
106 | * click on the titles in "Reference" to navigate to the corresponding Markdown notes (but SilverBulletMd or another web Markdown editor has to be hosted and configured)
107 |
108 | Some of the workable local LLMs tested:
109 | * OpenHermes-2.5-Mistral-7B
110 | * Mixtral-8x7B-Instruct-v0.1
111 | * c4ai-command-r-v01
112 | * Meta-Llama-3-70B-Instruct (Best so far)
113 | * Qwen2-72b-instruct (Best for non-English languages)
114 |
115 | For those who need hand-on local embedding API, an embedding server example is added to "external_example". You will need to install "sentence_transformers" to run it. After deployment, modify "settings.py" to finish configuration:
116 |
117 | ```python
118 | EMBEDDING_BASE_URL = 'http://localhost:8001/v1' # local embedding deployment URL
119 | ```
120 |
--------------------------------------------------------------------------------
/backend/app.py:
--------------------------------------------------------------------------------
1 | import os
2 | import uvicorn
3 | from uvicorn.config import LOGGING_CONFIG
4 |
5 | from file_monitor import WatchdogThread, UpdateThread
6 | from memory_server import app, server_state
7 | from settings import *
8 |
9 | if __name__ == "__main__":
10 |
11 | watchdog_thread = WatchdogThread(CHAT_PATH, NOTE_PATH)
12 | watchdog_thread.start()
13 | update_thread = UpdateThread(server_state)
14 | update_thread.start()
15 |
16 | LOGGING_CONFIG["formatters"]["access"]["fmt"] = "%(asctime)s %(levelprefix)s %(message)s"
17 | LOGGING_CONFIG["formatters"]["access"]["datefmt"] = "%Y-%m-%d %H:%M:%S"
18 | uvicorn.run(app, host=os.getenv("HOST", "localhost"), port=os.getenv("PORT", 5000))
--------------------------------------------------------------------------------
/backend/bm25_api.py:
--------------------------------------------------------------------------------
1 | import os
2 | import statistics
3 |
4 | import nltk
5 | nltk.data.path.append("nltk_data")
6 | from nltk.corpus import stopwords
7 | from nltk.tokenize import word_tokenize
8 | # from nltk.stem import PorterStemmer
9 | from rank_bm25 import BM25Okapi
10 | from settings import LANGUAGE_PREFERENCE
11 | from threading import Lock
12 |
13 | lock = Lock()
14 |
15 | corpus_index = None
16 | # Initialize BM25
17 | bm25 = None
18 |
19 | # Initialize stemmer and stopwords
20 | # stemmer = PorterStemmer()
21 | stop_words = set(stopwords.words(LANGUAGE_PREFERENCE.lower()))
22 |
23 | if LANGUAGE_PREFERENCE == 'Chinese':
24 | import jieba
25 |
26 | # Pre-processing function
27 | def preprocess(text):
28 | if LANGUAGE_PREFERENCE == 'Chinese':
29 | tokens = jieba.lcut(text.lower())
30 | else:
31 | tokens = word_tokenize(text.lower(), language=LANGUAGE_PREFERENCE.lower())
32 | # tokens = [stemmer.stem(token) for token in tokens if token not in stop_words and token.isalpha()]
33 | tokens = [token for token in tokens if token not in stop_words and token.isalpha()]
34 | return tokens
35 |
36 | def update_corpus():
37 | global corpus_index
38 | global bm25
39 | with lock:
40 | corpus = []
41 | corpus_index = {}
42 | dir = "digests"
43 | files = os.listdir(dir)
44 | # doc_manager.client.delete_collection('digests')
45 | i = 0
46 | for file in files:
47 | if not file.startswith("Conversation") and not file.startswith("Note"):
48 | continue
49 | title = file.replace(';', ':') # revert conversion for Windows file name rules
50 | digest = None
51 | with open(os.path.join(dir, file), encoding='utf-8') as f:
52 | digest = f.read()
53 | corpus.append(digest)
54 | corpus_index[title] = i
55 | i += 1
56 |
57 | # Preprocess the corpus
58 | if corpus:
59 | processed_corpus = [preprocess(doc) for doc in corpus]
60 | bm25 = BM25Okapi(processed_corpus, k1=1.5, b=0.75, epsilon=0.25)
61 | print("bm25 corpus updated")
62 |
63 | def standardize(lst):
64 | mean_val = statistics.mean(lst)
65 | std_dev = statistics.pstdev(lst)
66 | if std_dev == 0:
67 | return [0]*len(lst)
68 | return [(x - mean_val) / std_dev for x in lst]
69 |
70 | def get_norm_bm25_scores(query, doc_id_list):
71 | with lock:
72 | if not doc_id_list:
73 | return []
74 | query = preprocess(query)[::-1]
75 | query = list(set(query))
76 | # Get scores
77 | doc_index_list = [corpus_index[doc_id] for doc_id in doc_id_list]
78 | scores = bm25.get_batch_scores(query, doc_index_list)
79 | norm_scores = standardize(scores)
80 | print('\n'.join([f"{b}-{a}" for a,b in zip(doc_id_list,norm_scores)]))
81 | return norm_scores
82 |
83 | def get_avg_bm25_scores(query, doc_id_list):
84 | with lock:
85 | if not doc_id_list:
86 | return []
87 | query = preprocess(query)[::-1]
88 | query = list(set(query))
89 | # Get scores
90 | doc_index_list = [corpus_index[doc_id] for doc_id in doc_id_list]
91 | scores = bm25.get_batch_scores(query, doc_index_list)
92 | avg_scores = [score/len(query) for score in scores]
93 | print('\n'.join([f"{b}-{a}" for a,b in zip(doc_id_list,avg_scores)]))
94 | return avg_scores
95 |
96 |
--------------------------------------------------------------------------------
/backend/chroma_doc_manager.py:
--------------------------------------------------------------------------------
1 | import os
2 | import chromadb
3 | import datetime
4 | import threading
5 | from chromadb import EmbeddingFunction
6 | from chromadb.config import Settings
7 | from langchain.text_splitter import RecursiveCharacterTextSplitter
8 |
9 | from llm_utils import get_embeddings
10 | from settings import LANGUAGE_PREFERENCE
11 |
12 | ROOT_FOLDER = 'digests'
13 |
14 | # ---------------------------------------------------------------------------
15 | # Monkey patch ChromaDB's validate_where function to support string comparison
16 | # ---------------------------------------------------------------------------
17 | # ChromaDB doesn't support string comparison for the $gte/$lte operators by default.
18 | # This code overrides the default validate_where function to add this functionality
19 | # without modifying the original ChromaDB source code.
20 | def custom_validate_where(where: dict) -> dict:
21 | """
22 | Custom validation function to allow string comparison for the $gte operator.
23 | """
24 | if not isinstance(where, dict):
25 | raise ValueError(f"Expected where to be a dict, got {where}")
26 | if len(where) != 1:
27 | raise ValueError(f"Expected where to have exactly one operator, got {where}")
28 | for key, value in where.items():
29 | if not isinstance(key, str):
30 | raise ValueError(f"Expected where key to be a str, got {key}")
31 | if (
32 | key != "$and"
33 | and key != "$or"
34 | and key != "$in"
35 | and key != "$nin"
36 | and not isinstance(value, (str, int, float, dict))
37 | ):
38 | raise ValueError(
39 | f"Expected where value to be a str, int, float, or operator expression, got {value}"
40 | )
41 | if key == "$and" or key == "$or":
42 | if not isinstance(value, list):
43 | raise ValueError(
44 | f"Expected where value for $and or $or to be a list of where expressions, got {value}"
45 | )
46 | if len(value) <= 1:
47 | raise ValueError(
48 | f"Expected where value for $and or $or to be a list with at least two where expressions, got {value}"
49 | )
50 | for where_expression in value:
51 | custom_validate_where(where_expression)
52 | # Value is an operator expression
53 | if isinstance(value, dict):
54 | # Ensure there is only one operator
55 | if len(value) != 1:
56 | raise ValueError(
57 | f"Expected operator expression to have exactly one operator, got {value}"
58 | )
59 |
60 | for operator, operand in value.items():
61 | # Allow strings for gt, gte, lt, lte
62 | if operator in ["$gt", "$gte", "$lt", "$lte"]:
63 | if not isinstance(operand, (str, int, float)):
64 | raise ValueError(
65 | f"Expected operand value to be a str, int, or float for operator {operator}, got {operand}"
66 | )
67 | if operator in ["$in", "$nin"]:
68 | if not isinstance(operand, list):
69 | raise ValueError(
70 | f"Expected operand value to be a list for operator {operator}, got {operand}"
71 | )
72 | if operator not in [
73 | "$gt",
74 | "$gte",
75 | "$lt",
76 | "$lte",
77 | "$ne",
78 | "$eq",
79 | "$in",
80 | "$nin",
81 | ]:
82 | raise ValueError(
83 | f"Expected where operator to be one of $gt, $gte, $lt, $lte, $ne, $eq, $in, $nin, "
84 | f"got {operator}"
85 | )
86 |
87 | if not isinstance(operand, (str, int, float, list)):
88 | raise ValueError(
89 | f"Expected where operand value to be a str, int, float, or list of those types, got {operand}"
90 | )
91 | if isinstance(operand, list) and (
92 | len(operand) == 0
93 | or not all(isinstance(x, type(operand[0])) for x in operand)
94 | ):
95 | raise ValueError(
96 | f"Expected where operand value to be a non-empty list, and all values to be of the same type "
97 | f"got {operand}"
98 | )
99 | return where
100 |
101 | chromadb.api.types.validate_where = custom_validate_where
102 | # ---------------------------------------------------------------------------
103 |
104 | class EmbeddingFunction(EmbeddingFunction):
105 | def __call__(self, input):
106 | return get_embeddings(input)
107 |
108 | class DocumentFolder():
109 | def __init__(self, dir) -> None:
110 | self.dir = dir
111 | if not os.path.exists(dir):
112 | os.mkdir(dir)
113 |
114 | def save(self, doc_id, string):
115 | doc_name = doc_id.replace(':', ';')
116 | with open(os.path.join(self.dir,doc_name), "w+", encoding='utf-8') as f:
117 | f.write(string)
118 |
119 | def load(self, doc_id):
120 | doc_name = doc_id.replace(':', ';')
121 | with open(os.path.join(self.dir,doc_name), encoding='utf-8') as f:
122 | s = f.read()
123 | return s
124 |
125 | def delete(self, doc_id):
126 | doc_name = doc_id.replace(':', ';')
127 | path = os.path.join(ROOT_FOLDER,doc_name)
128 | if os.path.exists(path):
129 | os.remove(path)
130 | return True
131 | return False
132 |
133 |
134 | class ChromaDocManager:
135 | def __init__(self):
136 | self.lock = threading.Lock()
137 | # Initialize a persistent Chroma client
138 | self.client = chromadb.PersistentClient(path=f'./{ROOT_FOLDER}/chroma', settings=Settings(anonymized_telemetry=False)) # this will not refresh on file change
139 | self.collection = self.client.get_or_create_collection(name='digests', embedding_function=EmbeddingFunction())
140 | self.folder = DocumentFolder(ROOT_FOLDER)
141 |
142 | # Define a function to add documents to the Chroma database
143 | def _add_index(self, document: str, doc_id: str, other_meta=None, chunk_size=100, chunk_overlap=0):
144 | assert ';' not in doc_id
145 | # Split the document into chunks using the RecursiveCharacterTextSplitter
146 | if LANGUAGE_PREFERENCE == 'Chinese':
147 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size//4, chunk_overlap=chunk_overlap, separators=['。','?'], keep_separator=False)
148 | else:
149 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
150 | chunks = text_splitter.split_text(document)
151 | # Add each chunk to ChromaDB with associated doc_id and its index
152 | ids = [f"{doc_id}_{i}" for i, _ in enumerate(chunks)]
153 | # embed_chunks = get_embeddings(chunks)
154 | doc_metadata = {"doc_id": doc_id}
155 | if other_meta:
156 | doc_metadata.update(other_meta)
157 | if not "doc_time" in doc_metadata:
158 | doc_metadata["doc_time"] = datetime.datetime.now().strftime("%Y-%m-%d")
159 | self.collection.upsert(documents=chunks, ids=ids, metadatas=[doc_metadata]*len(chunks))
160 |
161 | def add_document(self, document: str, doc_id: str, **kwargs):
162 | with self.lock:
163 | self._remove_index_by_doc_id(doc_id)
164 | self._add_index(document, doc_id, **kwargs)
165 | self.folder.save(doc_id, document)
166 |
167 | def query_by_strings(self, strings, n_results):
168 | with self.lock:
169 | # Split the string into chunks for embedding
170 | res = self.collection.query(
171 | query_texts=strings,
172 | n_results=n_results,
173 | include = [ "documents", "metadatas", "distances" ]
174 | )
175 | return res
176 |
177 | def query_by_strings_with_time_range(self, strings, n_results, start_time, end_time):
178 | with self.lock:
179 | start_time_str = start_time.strftime("%Y-%m-%d")
180 | end_time_str = end_time.strftime("%Y-%m-%d")
181 | print("search range: ", start_time_str, end_time_str)
182 | # Split the string into chunks for embedding
183 | res = self.collection.query(
184 | query_texts=strings,
185 | n_results=n_results,
186 | include = [ "documents", "metadatas", "distances" ],
187 | where = {"$and":[{"doc_time":{"$gte": start_time_str}}, {"doc_time":{"$lte": end_time_str}}]}
188 | )
189 | return res
190 |
191 | def query_by_doc_id(self, doc_id):
192 | with self.lock:
193 | res = self.collection.get(
194 | where = {"doc_id":doc_id},
195 | include = [ "documents", "metadatas" ]
196 | )
197 | return res
198 |
199 | def _query_by_name(self, doc_name):
200 | res = self.collection.get(
201 | where = {"doc_name":doc_name},
202 | include = [ "metadatas" ]
203 | )
204 | return res
205 |
206 | def query_all(self):
207 | with self.lock:
208 | return self.collection.get(include = [ "documents", "metadatas" ])
209 |
210 | def _remove_index_by_doc_id(self, doc_id: str):
211 | self.collection.delete(where={"doc_id":doc_id})
212 |
213 | def remove_document(self, doc_id: str):
214 | with self.lock:
215 | self._remove_index_by_doc_id(doc_id)
216 | self.folder.delete(doc_id)
217 |
218 | def remove_document_by_name(self, doc_name: str):
219 | with self.lock:
220 | res = self._query_by_name(doc_name)
221 | print(res["metadatas"])
222 | if res["metadatas"]:
223 | ids = set([x['doc_id'] for x in res["metadatas"]])
224 | print("to remove: ", ids)
225 | for doc_id in ids:
226 | self._remove_index_by_doc_id(doc_id)
227 | self.folder.delete(doc_id)
228 |
229 | def get_document_by_ids(self, doc_ids):
230 | with self.lock:
231 | return [self.folder.load(doc_id) for doc_id in doc_ids]
232 |
233 | # def update_document(self, document: str, doc_id: str):
234 | # # Update the document in ChromaDB by first removing and then adding the new chunks
235 | # self.remove_document(doc_id)
236 | # self.add_document(document, doc_id)
237 |
238 | doc_manager = ChromaDocManager()
239 |
--------------------------------------------------------------------------------
/backend/file_monitor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import threading
3 | import time
4 | from watchdog.observers import Observer
5 | from watchdog.events import FileSystemEventHandler
6 | from datetime import datetime, timedelta
7 |
8 | from llm_utils import digest_simple, digest_markdown
9 | from chroma_doc_manager import doc_manager
10 | from bm25_api import update_corpus
11 |
12 | modified_files = {}
13 |
14 | class MyEventHandler(FileSystemEventHandler):
15 | def on_modified(self, event):
16 | print(f"Change: {event.src_path}")
17 | if event.src_path.endswith('.md'):
18 | modified_files[event.src_path] = "on_modified"
19 |
20 | def on_created(self, event):
21 | print(f"Add: {event.src_path}")
22 | if event.src_path.endswith('.md'):
23 | modified_files[event.src_path] = "on_created"
24 |
25 | def on_deleted(self, event):
26 | print(f"Delete: {event.src_path}")
27 | if event.src_path.endswith('.md'):
28 | modified_files[event.src_path] = "on_deleted"
29 |
30 | def on_moved(self, event):
31 | print(f"Move: {event.src_path} to {event.dest_path}")
32 | if event.src_path.endswith('.md'):
33 | modified_files[event.src_path] = "on_deleted"
34 | if event.dest_path.endswith('.md'):
35 | modified_files[event.dest_path] = "on_created"
36 |
37 | class UpdateThread(threading.Thread):
38 | def __init__(self, server_state, *args, **kwargs):
39 | super().__init__(*args, **kwargs)
40 | self.server_state = server_state
41 | update_corpus() # probably better start in memory server
42 |
43 | def run(self):
44 | while True:
45 | if not modified_files: # If there are no modified files
46 | time.sleep(60) # Wait for a minute before checking again
47 | continue # Skip the rest of the loop and start the next iteration
48 |
49 | now = datetime.now()
50 | diff = timedelta(seconds=30)
51 | if self.server_state["last_use"] and now - self.server_state["last_use"] < diff: # If the server is used in recently 5 mins
52 | print("Server is used lately")
53 | time.sleep(10) # Wait for a minute before checking again
54 | continue # Skip the rest of the loop and start the next iteration
55 |
56 | path, event_type = next(iter(modified_files.items())) # Get the first item
57 | del modified_files[path] # Remove the handled file
58 | print(f"To handle {event_type}: {path}")
59 | try:
60 | file = os.path.basename(path)
61 | title = file.rsplit(".", 1)[0].replace(';', ':')
62 | if "notes" in path:
63 | doc_manager.remove_document_by_name(title)
64 | else:
65 | doc_manager.remove_document(title)
66 |
67 | if event_type in ["on_created", "on_modified"]:
68 | if "notes" in path:
69 | time_str = str(now)[0:10]
70 | print(time_str)
71 | digests = digest_markdown(title, path)
72 | print(digests)
73 | for headers, summary in digests:
74 | doc_id = "Note of " + headers
75 | doc_manager.add_document(summary, doc_id, other_meta = {"doc_time": time_str, "doc_name":title})
76 | else:
77 | summary, tag = digest_simple(title, path)
78 | digest = f"{title}\n{summary}"
79 | if len(tag):
80 | digest += '\nOpinion: ' + tag
81 | print(digest)
82 | if title.startswith("Conversation"):
83 | time_str = title.rsplit("on", 1)[1][1:11]
84 | doc_manager.add_document(digest, title, other_meta = {"doc_time": time_str})
85 | else:
86 | print("Warning: Unformatted doc ", title)
87 | except Exception:
88 | import traceback
89 | traceback.print_exc()
90 | print(f"error handling {event_type}: {path}")
91 |
92 | update_corpus()
93 |
94 |
95 | class WatchdogThread(threading.Thread):
96 | def __init__(self, chat_path, note_path, *args, **kwargs):
97 | super().__init__(*args, **kwargs)
98 | self.chat_path = chat_path
99 | self.note_path = note_path
100 | self.observer = Observer()
101 |
102 | def run(self):
103 | event_handler = MyEventHandler()
104 | self.observer.schedule(event_handler, self.chat_path, recursive=False)
105 | self.observer.schedule(event_handler, self.note_path, recursive=False)
106 | self.observer.start()
107 | try:
108 | while True:
109 | time.sleep(1)
110 | except KeyboardInterrupt:
111 | self.observer.stop()
112 | self.observer.join()
113 |
--------------------------------------------------------------------------------
/backend/language_presets.json:
--------------------------------------------------------------------------------
1 | {
2 | "languages": {
3 | "English": {
4 | "user_message": "I don't remember how Loyal Elephie was created.",
5 | "think_message": "To assist {NICK_NAME}, I need to search my memory for the questions:\nHow Loyal Elephie was created?\nHow AI secretary like Loyal Elephie was developed?",
6 | "search_query": "Loyal Elephie created detail\n{NICK_NAME} AI secretary develop",
7 | "context_title": "Technical notes",
8 | "context_content": "{NICK_NAME} mentioned that the current AI secretary -- Loyal Elephie is integrated with advanced vector search and LLM technology. It could be used to provide insightful advices because the AI secretary has access to vast knowledge from {NICK_NAME}'s notes and conversations.",
9 | "reply_message": "Hey {NICK_NAME}, how could you forget about my creation? I am your artwork using advanced vector search and LLM technology. If you need insightful advices based on your notes and conversations between us, just tap on me, ah-ha!"
10 | },
11 | "Chinese": {
12 | "user_message": "我不记得Loyal Elephie是如何创建的。",
13 | "think_message": "为了帮助{NICK_NAME},我需要在我的记忆中搜索以下问题:\nLoyal Elephie是如何创建的?\n像Loyal Elephie这样的AI秘书是如何开发的?",
14 | "search_query": "Loyal Elephie创建详情\n{NICK_NAME} AI秘书开发",
15 | "context_title": "技术笔记",
16 | "context_content": "{NICK_NAME}提到当前的AI秘书——Loyal Elephie集成了先进的向量搜索和LLM技术。它可以用来提供有见地的建议,因为AI秘书可以访问{NICK_NAME}的笔记和对话中的大量知识。",
17 | "reply_message": "嘿,{NICK_NAME},你怎么能忘记我的创建过程呢?我是使用先进的向量搜索和LLM技术的作品。如果你需要基于你的笔记和我们之间的对话提供有见地的建议,只需来找我,啊哈!"
18 | },
19 | "German": {
20 | "user_message": "Ich erinnere mich nicht daran, wie Loyal Elephie erschaffen wurde.",
21 | "think_message": "Um {NICK_NAME} zu unterstützen, muss ich mein Gedächtnis nach den Fragen durchsuchen:\nWie wurde Loyal Elephie erschaffen?\nWie wurde ein KI-Sekretär wie Loyal Elephie entwickelt?",
22 | "search_query": "Loyal Elephie Erschaffungsdetails\n{NICK_NAME} KI-Sekretär Entwicklung",
23 | "context_title": "Technische Notizen",
24 | "context_content": "{NICK_NAME} erwähnte, dass der aktuelle KI-Sekretär -- Loyal Elephie mit fortschrittlicher Vektorsuche und LLM-Technologie integriert ist. Er könnte verwendet werden, um aufschlussreiche Ratschläge zu geben, da der KI-Sekretär Zugang zu umfangreichem Wissen aus {NICK_NAME}s Notizen und Gesprächen hat.",
25 | "reply_message": "Hey {NICK_NAME}, wie konntest du meine Erschaffung vergessen? Ich bin dein Kunstwerk, das fortschrittliche Vektorsuche und LLM-Technologie verwendet. Wenn du aufschlussreiche Ratschläge basierend auf deinen Notizen und unseren Gesprächen benötigst, tippe einfach auf mich, aha!"
26 | },
27 | "French": {
28 | "user_message": "Je ne me souviens pas comment Loyal Elephie a été créé.",
29 | "think_message": "Pour aider {NICK_NAME}, je dois chercher dans ma mémoire les questions :\nComment Loyal Elephie a-t-il été créé ?\nComment un secrétaire IA comme Loyal Elephie a-t-il été développé ?",
30 | "search_query": "Détails de la création de Loyal Elephie\nDéveloppement du secrétaire IA de {NICK_NAME}",
31 | "context_title": "Notes techniques",
32 | "context_content": "{NICK_NAME} a mentionné que le secrétaire IA actuel -- Loyal Elephie est intégré avec une technologie avancée de recherche vectorielle et LLM. Il pourrait être utilisé pour fournir des conseils perspicaces car le secrétaire IA a accès à de vastes connaissances issues des notes et conversations de {NICK_NAME}.",
33 | "reply_message": "Hey {NICK_NAME}, comment as-tu pu oublier ma création ? Je suis ton œuvre d'art utilisant une technologie avancée de recherche vectorielle et LLM. Si tu as besoin de conseils perspicaces basés sur tes notes et nos conversations, il te suffit de me taper dessus, ah-ha !"
34 | },
35 | "Spanish": {
36 | "user_message": "No recuerdo cómo se creó Loyal Elephie.",
37 | "think_message": "Para ayudar a {NICK_NAME}, necesito buscar en mi memoria las preguntas:\n¿Cómo se creó Loyal Elephie?\n¿Cómo se desarrolló un secretario de IA como Loyal Elephie?",
38 | "search_query": "Detalles de la creación de Loyal Elephie\nDesarrollo del secretario de IA de {NICK_NAME}",
39 | "context_title": "Notas técnicas",
40 | "context_content": "{NICK_NAME} mencionó que el secretario de IA actual -- Loyal Elephie está integrado con tecnología avanzada de búsqueda vectorial y LLM. Podría utilizarse para proporcionar consejos perspicaces porque el secretario de IA tiene acceso a un vasto conocimiento de las notas y conversaciones de {NICK_NAME}.",
41 | "reply_message": "¡Oye {NICK_NAME}, ¿cómo pudiste olvidar mi creación? Soy tu obra de arte utilizando tecnología avanzada de búsqueda vectorial y LLM. Si necesitas consejos perspicaces basados en tus notas y nuestras conversaciones, ¡solo tócame, ah-ha!"
42 | },
43 | "Portuguese": {
44 | "user_message": "Não me lembro como o Loyal Elephie foi criado.",
45 | "think_message": "Para ajudar {NICK_NAME}, preciso pesquisar na minha memória as perguntas:\nComo o Loyal Elephie foi criado?\nComo um secretário de IA como o Loyal Elephie foi desenvolvido?",
46 | "search_query": "Detalhes da criação do Loyal Elephie\nDesenvolvimento do secretário de IA de {NICK_NAME}",
47 | "context_title": "Notas técnicas",
48 | "context_content": "{NICK_NAME} mencionou que o atual secretário de IA -- Loyal Elephie está integrado com tecnologia avançada de busca vetorial e LLM. Ele poderia ser usado para fornecer conselhos perspicazes porque o secretário de IA tem acesso a um vasto conhecimento das notas e conversas de {NICK_NAME}.",
49 | "reply_message": "Ei {NICK_NAME}, como você pôde esquecer da minha criação? Eu sou sua obra de arte usando tecnologia avançada de busca vetorial e LLM. Se você precisar de conselhos perspicazes baseados em suas notas e nossas conversas, é só me tocar, ah-ha!"
50 | },
51 | "Italian": {
52 | "user_message": "Non ricordo come è stato creato Loyal Elephie.",
53 | "think_message": "Per assistere {NICK_NAME}, devo cercare nella mia memoria le domande:\nCome è stato creato Loyal Elephie?\nCome è stato sviluppato un segretario IA come Loyal Elephie?",
54 | "search_query": "Dettagli sulla creazione di Loyal Elephie\nSviluppo del segretario IA di {NICK_NAME}",
55 | "context_title": "Note tecniche",
56 | "context_content": "{NICK_NAME} ha menzionato che l'attuale segretario IA -- Loyal Elephie è integrato con tecnologia avanzata di ricerca vettoriale e LLM. Potrebbe essere utilizzato per fornire consigli perspicaci perché il segretario IA ha accesso a vaste conoscenze dalle note e dalle conversazioni di {NICK_NAME}.",
57 | "reply_message": "Ehi {NICK_NAME}, come hai potuto dimenticare la mia creazione? Sono la tua opera d'arte che utilizza tecnologia avanzata di ricerca vettoriale e LLM. Se hai bisogno di consigli perspicaci basati sulle tue note e le nostre conversazioni, basta toccarmi, ah-ha!"
58 | },
59 | "Dutch": {
60 | "user_message": "Ik herinner me niet hoe Loyal Elephie werd gecreëerd.",
61 | "think_message": "Om {NICK_NAME} te helpen, moet ik in mijn geheugen zoeken naar de vragen:\nHoe werd Loyal Elephie gecreëerd?\nHoe werd een AI-secretaris zoals Loyal Elephie ontwikkeld?",
62 | "search_query": "Details over de creatie van Loyal Elephie\nOntwikkeling van {NICK_NAME}'s AI-secretaris",
63 | "context_title": "Technische notities",
64 | "context_content": "{NICK_NAME} vermeldde dat de huidige AI-secretaris -- Loyal Elephie is geïntegreerd met geavanceerde vectorzoekopdracht en LLM-technologie. Het zou kunnen worden gebruikt om inzichtelijke adviezen te geven omdat de AI-secretaris toegang heeft tot uitgebreide kennis uit {NICK_NAME}'s notities en gesprekken.",
65 | "reply_message": "Hé {NICK_NAME}, hoe kon je mijn creatie vergeten? Ik ben jouw kunstwerk dat gebruik maakt van geavanceerde vectorzoekopdracht en LLM-technologie. Als je inzichtelijke adviezen nodig hebt op basis van je notities en onze gesprekken, tik dan gewoon op mij, ah-ha!"
66 | },
67 | "Czech": {
68 | "user_message": "Nepamatuji si, jak byl Loyal Elephie vytvořen.",
69 | "think_message": "Abych pomohl {NICK_NAME}, musím ve své paměti vyhledat otázky:\nJak byl Loyal Elephie vytvořen?\nJak byl vyvinut AI sekretář jako Loyal Elephie?",
70 | "search_query": "Podrobnosti o vytvoření Loyal Elephie\nVývoj AI sekretáře {NICK_NAME}",
71 | "context_title": "Technické poznámky",
72 | "context_content": "{NICK_NAME} zmínil, že současný AI sekretář -- Loyal Elephie je integrován s pokročilou technologií vektorového vyhledávání a LLM. Mohl by být použit k poskytování pronikavých rad, protože AI sekretář má přístup k rozsáhlým znalostem z poznámek a konverzací {NICK_NAME}.",
73 | "reply_message": "Hej {NICK_NAME}, jak jsi mohl zapomenout na moje vytvoření? Jsem tvoje umělecké dílo využívající pokročilou technologii vektorového vyhledávání a LLM. Pokud potřebuješ pronikavé rady založené na tvých poznámkách a našich konverzacích, stačí na mě klepnout, ah-ha!"
74 | },
75 | "Polish": {
76 | "user_message": "Nie pamiętam, jak został stworzony Loyal Elephie.",
77 | "think_message": "Aby pomóc {NICK_NAME}, muszę przeszukać moją pamięć w poszukiwaniu pytań:\nJak został stworzony Loyal Elephie?\nJak został opracowany sekretarz AI taki jak Loyal Elephie?",
78 | "search_query": "Szczegóły stworzenia Loyal Elephie\nRozwój sekretarza AI {NICK_NAME}",
79 | "context_title": "Notatki techniczne",
80 | "context_content": "{NICK_NAME} wspomniał, że obecny sekretarz AI -- Loyal Elephie jest zintegrowany z zaawansowaną technologią wyszukiwania wektorowego i LLM. Mógłby być wykorzystywany do udzielania wnikliwych porad, ponieważ sekretarz AI ma dostęp do obszernej wiedzy z notatek i rozmów {NICK_NAME}.",
81 | "reply_message": "Hej {NICK_NAME}, jak mogłeś zapomnieć o moim stworzeniu? Jestem twoim dziełem sztuki wykorzystującym zaawansowaną technologię wyszukiwania wektorowego i LLM. Jeśli potrzebujesz wnikliwych porad opartych na twoich notatkach i naszych rozmowach, po prostu dotknij mnie, ah-ha!"
82 | },
83 | "Russian": {
84 | "user_message": "Я не помню, как был создан Loyal Elephie.",
85 | "think_message": "Чтобы помочь {NICK_NAME}, мне нужно поискать в своей памяти ответы на вопросы:\nКак был создан Loyal Elephie?\nКак был разработан ИИ-секретарь, подобный Loyal Elephie?",
86 | "search_query": "Подробности создания Loyal Elephie\nРазработка ИИ-секретаря {NICK_NAME}",
87 | "context_title": "Технические заметки",
88 | "context_content": "{NICK_NAME} упомянул, что текущий ИИ-секретарь -- Loyal Elephie интегрирован с передовой технологией векторного поиска и LLM. Его можно использовать для предоставления проницательных советов, поскольку ИИ-секретарь имеет доступ к обширным знаниям из заметок и разговоров {NICK_NAME}.",
89 | "reply_message": "Эй, {NICK_NAME}, как ты мог забыть о моем создании? Я твое произведение искусства, использующее передовую технологию векторного поиска и LLM. Если тебе нужны проницательные советы, основанные на твоих заметках и наших разговорах, просто нажми на меня, ах-ха!"
90 | },
91 | "Arabic": {
92 | "user_message": "لا أتذكر كيف تم إنشاء Loyal Elephie.",
93 | "think_message": "لمساعدة {NICK_NAME}، أحتاج إلى البحث في ذاكرتي عن الأسئلة:\nكيف تم إنشاء Loyal Elephie؟\nكيف تم تطوير سكرتير الذكاء الاصطناعي مثل Loyal Elephie؟",
94 | "search_query": "تفاصيل إنشاء Loyal Elephie\nتطوير سكرتير الذكاء الاصطناعي لـ {NICK_NAME}",
95 | "context_title": "ملاحظات تقنية",
96 | "context_content": "ذكر {NICK_NAME} أن سكرتير الذكاء الاصطناعي الحالي -- Loyal Elephie مدمج مع تقنية بحث متجهي متقدمة وتقنية LLM. يمكن استخدامه لتقديم نصائح ثاقبة لأن سكرتير الذكاء الاصطناعي لديه إمكانية الوصول إلى معرفة واسعة من ملاحظات ومحادثات {NICK_NAME}.",
97 | "reply_message": "مرحبًا {NICK_NAME}، كيف يمكنك أن تنسى إنشائي؟ أنا عملك الفني الذي يستخدم تقنية البحث المتجهي المتقدمة وتقنية LLM. إذا كنت بحاجة إلى نصائح ثاقبة بناءً على ملاحظاتك ومحادثاتنا، ما عليك سوى النقر علي، آه-ها!"
98 | }
99 | }
100 | }
--------------------------------------------------------------------------------
/backend/llama_types.py:
--------------------------------------------------------------------------------
1 | """Types and request signatures for OpenAI compatibility
2 |
3 | NOTE: These types may change to match the OpenAI OpenAPI specification.
4 |
5 | Based on the OpenAI OpenAPI specification:
6 | https://github.com/openai/openai-openapi/blob/master/openapi.yaml
7 |
8 | """
9 | from typing import Any, List, Optional, Dict, Union
10 | from typing_extensions import TypedDict, NotRequired, Literal
11 |
12 |
13 | # NOTE: Defining this correctly using annotations seems to break pydantic validation.
14 | # This is a workaround until we can figure out how to do this correctly
15 | # JsonType = Union[None, int, str, bool, List["JsonType"], Dict[str, "JsonType"]]
16 | JsonType = Union[None, int, str, bool, List[Any], Dict[str, Any]]
17 |
18 |
19 | class EmbeddingUsage(TypedDict):
20 | prompt_tokens: int
21 | total_tokens: int
22 |
23 |
24 | class Embedding(TypedDict):
25 | index: int
26 | object: str
27 | embedding: List[float]
28 |
29 |
30 | class CreateEmbeddingResponse(TypedDict):
31 | object: Literal["list"]
32 | model: str
33 | data: List[Embedding]
34 | usage: EmbeddingUsage
35 |
36 |
37 | class CompletionLogprobs(TypedDict):
38 | text_offset: List[int]
39 | token_logprobs: List[Optional[float]]
40 | tokens: List[str]
41 | top_logprobs: List[Optional[Dict[str, float]]]
42 |
43 |
44 | class CompletionChoice(TypedDict):
45 | text: str
46 | index: int
47 | logprobs: Optional[CompletionLogprobs]
48 | finish_reason: Optional[Literal["stop", "length"]]
49 |
50 |
51 | class CompletionUsage(TypedDict):
52 | prompt_tokens: int
53 | completion_tokens: int
54 | total_tokens: int
55 |
56 |
57 | class CreateCompletionResponse(TypedDict):
58 | id: str
59 | object: Literal["text_completion"]
60 | created: int
61 | model: str
62 | choices: List[CompletionChoice]
63 | usage: NotRequired[CompletionUsage]
64 |
65 |
66 | class ChatCompletionResponseFunctionCall(TypedDict):
67 | name: str
68 | arguments: str
69 |
70 |
71 | class ChatCompletionResponseMessage(TypedDict):
72 | content: Optional[str]
73 | tool_calls: NotRequired["ChatCompletionMessageToolCalls"]
74 | role: Literal["assistant", "function"] # NOTE: "function" may be incorrect here
75 | function_call: NotRequired[ChatCompletionResponseFunctionCall] # DEPRECATED
76 |
77 |
78 | class ChatCompletionFunction(TypedDict):
79 | name: str
80 | description: NotRequired[str]
81 | parameters: Dict[str, JsonType] # TODO: make this more specific
82 |
83 |
84 | class ChatCompletionResponseChoice(TypedDict):
85 | index: int
86 | message: "ChatCompletionResponseMessage"
87 | finish_reason: Optional[str]
88 |
89 |
90 | class CreateChatCompletionResponse(TypedDict):
91 | id: str
92 | object: Literal["chat.completion"]
93 | created: int
94 | model: str
95 | choices: List["ChatCompletionResponseChoice"]
96 | usage: CompletionUsage
97 |
98 |
99 | class ChatCompletionMessageToolCallChunkFunction(TypedDict):
100 | name: str
101 | arguments: str
102 |
103 |
104 | class ChatCompletionMessageToolCallChunk(TypedDict):
105 | index: int
106 | id: NotRequired[str]
107 | type: Literal["function"]
108 | function: ChatCompletionMessageToolCallChunkFunction
109 |
110 |
111 | class ChatCompletionStreamResponseDeltaEmpty(TypedDict):
112 | pass
113 |
114 |
115 | class ChatCompletionStreamResponseDeltaFunctionCall(TypedDict):
116 | name: str
117 | arguments: str
118 |
119 |
120 | class ChatCompletionStreamResponseDelta(TypedDict):
121 | content: NotRequired[str]
122 | function_call: NotRequired[
123 | ChatCompletionStreamResponseDeltaFunctionCall
124 | ] # DEPRECATED
125 | tool_calls: NotRequired[List[ChatCompletionMessageToolCallChunk]]
126 | role: NotRequired[Literal["system", "user", "assistant", "tool"]]
127 |
128 |
129 | class ChatCompletionStreamResponseChoice(TypedDict):
130 | index: int
131 | delta: Union[
132 | ChatCompletionStreamResponseDelta, ChatCompletionStreamResponseDeltaEmpty
133 | ]
134 | finish_reason: Optional[Literal["stop", "length", "tool_calls", "function_call"]]
135 |
136 |
137 | class CreateChatCompletionStreamResponse(TypedDict):
138 | id: str
139 | model: str
140 | object: Literal["chat.completion.chunk"]
141 | created: int
142 | choices: List[ChatCompletionStreamResponseChoice]
143 |
144 |
145 | class ChatCompletionFunctions(TypedDict):
146 | name: str
147 | description: NotRequired[str]
148 | parameters: Dict[str, JsonType] # TODO: make this more specific
149 |
150 |
151 | class ChatCompletionFunctionCallOption(TypedDict):
152 | name: str
153 |
154 |
155 | class ChatCompletionRequestResponseFormat(TypedDict):
156 | type: Literal["text", "json_object"]
157 |
158 |
159 | class ChatCompletionRequestMessageContentPartText(TypedDict):
160 | type: Literal["text"]
161 | text: str
162 |
163 |
164 | class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict):
165 | url: str
166 | detail: NotRequired[Literal["auto", "low", "high"]]
167 |
168 |
169 | class ChatCompletionRequestMessageContentPartImage(TypedDict):
170 | type: Literal["image_url"]
171 | image_url: Union[str, ChatCompletionRequestMessageContentPartImageImageUrl]
172 |
173 |
174 | ChatCompletionRequestMessageContentPart = Union[
175 | ChatCompletionRequestMessageContentPartText,
176 | ChatCompletionRequestMessageContentPartImage,
177 | ]
178 |
179 |
180 | class ChatCompletionRequestSystemMessage(TypedDict):
181 | role: Literal["system"]
182 | content: Optional[str]
183 |
184 |
185 | class ChatCompletionRequestUserMessage(TypedDict):
186 | role: Literal["user"]
187 | content: Optional[Union[str, List[ChatCompletionRequestMessageContentPart]]]
188 |
189 |
190 | class ChatCompletionMessageToolCallFunction(TypedDict):
191 | name: str
192 | arguments: str
193 |
194 |
195 | class ChatCompletionMessageToolCall(TypedDict):
196 | id: str
197 | type: Literal["function"]
198 | function: ChatCompletionMessageToolCallFunction
199 |
200 |
201 | ChatCompletionMessageToolCalls = List[ChatCompletionMessageToolCall]
202 |
203 |
204 | class ChatCompletionRequestAssistantMessageFunctionCall(TypedDict):
205 | name: str
206 | arguments: str
207 |
208 |
209 | class ChatCompletionRequestAssistantMessage(TypedDict):
210 | role: Literal["assistant"]
211 | content: Optional[str]
212 | tool_calls: NotRequired[ChatCompletionMessageToolCalls]
213 | function_call: NotRequired[
214 | ChatCompletionRequestAssistantMessageFunctionCall
215 | ] # DEPRECATED
216 |
217 |
218 | class ChatCompletionRequestToolMessage(TypedDict):
219 | role: Literal["tool"]
220 | content: Optional[str]
221 | tool_call_id: str
222 |
223 |
224 | class ChatCompletionRequestFunctionMessage(TypedDict):
225 | role: Literal["function"]
226 | content: Optional[str]
227 | name: str
228 |
229 |
230 | ChatCompletionRequestMessage = Union[
231 | ChatCompletionRequestSystemMessage,
232 | ChatCompletionRequestUserMessage,
233 | ChatCompletionRequestAssistantMessage,
234 | ChatCompletionRequestUserMessage,
235 | ChatCompletionRequestToolMessage,
236 | ChatCompletionRequestFunctionMessage,
237 | ]
238 |
239 |
240 | class ChatCompletionRequestFunctionCallOption(TypedDict):
241 | name: str
242 |
243 |
244 | ChatCompletionRequestFunctionCall = Union[
245 | Literal["none", "auto"], ChatCompletionRequestFunctionCallOption
246 | ]
247 |
248 | ChatCompletionFunctionParameters = Dict[str, JsonType] # TODO: make this more specific
249 |
250 |
251 | class ChatCompletionToolFunction(TypedDict):
252 | name: str
253 | description: NotRequired[str]
254 | parameters: ChatCompletionFunctionParameters
255 |
256 |
257 | class ChatCompletionTool(TypedDict):
258 | type: Literal["function"]
259 | function: ChatCompletionToolFunction
260 |
261 |
262 | class ChatCompletionNamedToolChoiceFunction(TypedDict):
263 | name: str
264 |
265 |
266 | class ChatCompletionNamedToolChoice(TypedDict):
267 | type: Literal["function"]
268 | function: ChatCompletionNamedToolChoiceFunction
269 |
270 |
271 | ChatCompletionToolChoiceOption = Union[
272 | Literal["none", "auto"], ChatCompletionNamedToolChoice
273 | ]
274 |
275 |
276 | # NOTE: The following type names are not part of the OpenAI OpenAPI specification
277 | # and will be removed in a future major release.
278 |
279 | EmbeddingData = Embedding
280 | CompletionChunk = CreateCompletionResponse
281 | Completion = CreateCompletionResponse
282 | CreateCompletionStreamResponse = CreateCompletionResponse
283 | ChatCompletionMessage = ChatCompletionResponseMessage
284 | ChatCompletionChoice = ChatCompletionResponseChoice
285 | ChatCompletion = CreateChatCompletionResponse
286 | ChatCompletionChunkDeltaEmpty = ChatCompletionStreamResponseDeltaEmpty
287 | ChatCompletionChunkChoice = ChatCompletionStreamResponseChoice
288 | ChatCompletionChunkDelta = ChatCompletionStreamResponseDelta
289 | ChatCompletionChunk = CreateChatCompletionStreamResponse
290 | ChatCompletionStreamResponse = CreateChatCompletionStreamResponse
291 | ChatCompletionResponseFunction = ChatCompletionFunction
292 | ChatCompletionFunctionCall = ChatCompletionResponseFunctionCall
--------------------------------------------------------------------------------
/backend/llm_utils.py:
--------------------------------------------------------------------------------
1 | from openai import OpenAI
2 | from langchain.text_splitter import MarkdownHeaderTextSplitter
3 | import tiktoken
4 |
5 | from settings import *
6 |
7 | client_embed = OpenAI(base_url = EMBEDDING_BASE_URL, api_key = EMBEDDING_API_KEY)
8 | client = OpenAI(base_url = CHAT_BASE_URL, api_key = CHAT_API_KEY)
9 |
10 | def get_embeddings(chunks):
11 | data = client_embed.embeddings.create(input=chunks, model=EMBEDDING_MODEL_NAME).data
12 | return [d.embedding for d in data]
13 |
14 | def chat(messages:list[dict]):
15 | response = client.chat.completions.create(
16 | model=CHAT_MODEL_NAME,
17 | messages=messages,
18 | max_tokens=CHAT_MAX_TOKEN
19 | )
20 | return response.choices[0].message.content
21 |
22 | def simplify_markdown_headers(page_content, current_nesting_level):
23 | # Split the content into lines for processing
24 | lines = page_content.split('\n')
25 |
26 | # Process each line to adjust header levels
27 | simplified_lines = []
28 | for line in lines:
29 | # Check if the line starts with markdown header syntax
30 | if line.startswith('#'):
31 | # Count the number of '#' to determine the original level
32 | header_level = line.count('#')
33 |
34 | # Calculate the new header level
35 | new_header_level = header_level - current_nesting_level + 1
36 |
37 | # Ensure the new header level is at least 1
38 | new_header_level = max(new_header_level, 1)
39 |
40 | # Replace the original header syntax with the new level
41 | new_header = '#' * new_header_level + ' ' + line.lstrip('#').lstrip()
42 | simplified_lines.append(new_header)
43 | else:
44 | # If it's not a header, keep the line as is
45 | simplified_lines.append(line)
46 |
47 | # Join the lines back into a single string
48 | simplified_content = '\n'.join(simplified_lines)
49 | return simplified_content
50 |
51 |
52 | # This function will try to digest a markdown file into multiple docs based on headers
53 | def digest_markdown(title, path):
54 | headers = [
55 | ("#", "header1"),
56 | ("##", "header2"),
57 | ("###", "header3"),
58 | ]
59 | parent_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers)
60 | with open(path, encoding='utf-8') as f:
61 | s = f.read()
62 | docs = parent_splitter.split_text(s)
63 | digests = []
64 | for doc in docs:
65 | headers = ""
66 | headers += title
67 | level = 0
68 | if 'header1' in doc.metadata:
69 | headers += " > " + doc.metadata['header1'].strip()
70 | level = 1
71 | if 'header2' in doc.metadata:
72 | headers += " > " + doc.metadata['header2'].strip()
73 | level = 2
74 | if 'header3' in doc.metadata:
75 | headers += " > " + doc.metadata['header3'].strip()
76 | level = 3
77 | page_content = simplify_markdown_headers(doc.page_content.strip(), level)
78 | content = f"---Begin Note---\nHeaders: {headers}\n{page_content}\n---End Note---"
79 | prompt = SUMMARY_NOTE_PROMPT.replace("{NICK_NAME}", NICK_NAME)
80 | prompt = prompt.replace("{LANGUAGE_PREFERENCE}", "" if LANGUAGE_PREFERENCE=="English" else f" The note should be in {LANGUAGE_PREFERENCE}.")
81 | summary = chat([
82 | {"role": "system", "content": prompt},
83 | {"role": "user", "content": content}]
84 | )
85 | # digest = f"# {headers}\n{summary}"
86 | digests.append((headers, summary))
87 | return digests
88 |
89 | def digest_simple(title, path):
90 | with open(path, encoding='utf-8') as f:
91 | s = f.read()
92 | tag = ""
93 | if s.startswith('#'): # tagged doc
94 | tag, s = s.split('\n',1)
95 | tag = tag.lstrip('#').strip()
96 | text = f"---{title}---\n{s}"
97 | prompt = SUMMARY_PROMPT.replace("{NICK_NAME}", NICK_NAME)
98 | prompt = prompt.replace("{LANGUAGE_PREFERENCE}", "" if LANGUAGE_PREFERENCE=="English" else f" The note should be in {LANGUAGE_PREFERENCE}.")
99 | summary = chat([
100 | {"role": "system", "content": prompt},
101 | {"role": "user", "content": text}]).strip()
102 | return summary, tag
103 |
104 | def count_token(input_str):
105 | encoding = tiktoken.get_encoding("o200k_base") # This is only approximation
106 | if type(input_str) == dict:
107 | input_str = f"role: {input_str['role']}, content: {input_str['content']}"
108 | length = len(encoding.encode(input_str))
109 | return length
110 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/README:
--------------------------------------------------------------------------------
1 | Stopwords Corpus
2 |
3 | This corpus contains lists of stop words for several languages. These
4 | are high-frequency grammatical words which are usually ignored in text
5 | retrieval applications.
6 |
7 | They were obtained from:
8 | http://anoncvs.postgresql.org/cvsweb.cgi/pgsql/src/backend/snowball/stopwords/
9 |
10 | The stop words for the Romanian language were obtained from:
11 | http://arlc.ro/resources/
12 |
13 | The English list has been augmented
14 | https://github.com/nltk/nltk_data/issues/22
15 |
16 | The German list has been corrected
17 | https://github.com/nltk/nltk_data/pull/49
18 |
19 | A Kazakh list has been added
20 | https://github.com/nltk/nltk_data/pull/52
21 |
22 | A Nepali list has been added
23 | https://github.com/nltk/nltk_data/pull/83
24 |
25 | An Azerbaijani list has been added
26 | https://github.com/nltk/nltk_data/pull/100
27 |
28 | A Greek list has been added
29 | https://github.com/nltk/nltk_data/pull/103
30 |
31 | An Indonesian list has been added
32 | https://github.com/nltk/nltk_data/pull/112
33 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/arabic:
--------------------------------------------------------------------------------
1 | إذ
2 | إذا
3 | إذما
4 | إذن
5 | أف
6 | أقل
7 | أكثر
8 | ألا
9 | إلا
10 | التي
11 | الذي
12 | الذين
13 | اللاتي
14 | اللائي
15 | اللتان
16 | اللتيا
17 | اللتين
18 | اللذان
19 | اللذين
20 | اللواتي
21 | إلى
22 | إليك
23 | إليكم
24 | إليكما
25 | إليكن
26 | أم
27 | أما
28 | أما
29 | إما
30 | أن
31 | إن
32 | إنا
33 | أنا
34 | أنت
35 | أنتم
36 | أنتما
37 | أنتن
38 | إنما
39 | إنه
40 | أنى
41 | أنى
42 | آه
43 | آها
44 | أو
45 | أولاء
46 | أولئك
47 | أوه
48 | آي
49 | أي
50 | أيها
51 | إي
52 | أين
53 | أين
54 | أينما
55 | إيه
56 | بخ
57 | بس
58 | بعد
59 | بعض
60 | بك
61 | بكم
62 | بكم
63 | بكما
64 | بكن
65 | بل
66 | بلى
67 | بما
68 | بماذا
69 | بمن
70 | بنا
71 | به
72 | بها
73 | بهم
74 | بهما
75 | بهن
76 | بي
77 | بين
78 | بيد
79 | تلك
80 | تلكم
81 | تلكما
82 | ته
83 | تي
84 | تين
85 | تينك
86 | ثم
87 | ثمة
88 | حاشا
89 | حبذا
90 | حتى
91 | حيث
92 | حيثما
93 | حين
94 | خلا
95 | دون
96 | ذا
97 | ذات
98 | ذاك
99 | ذان
100 | ذانك
101 | ذلك
102 | ذلكم
103 | ذلكما
104 | ذلكن
105 | ذه
106 | ذو
107 | ذوا
108 | ذواتا
109 | ذواتي
110 | ذي
111 | ذين
112 | ذينك
113 | ريث
114 | سوف
115 | سوى
116 | شتان
117 | عدا
118 | عسى
119 | عل
120 | على
121 | عليك
122 | عليه
123 | عما
124 | عن
125 | عند
126 | غير
127 | فإذا
128 | فإن
129 | فلا
130 | فمن
131 | في
132 | فيم
133 | فيما
134 | فيه
135 | فيها
136 | قد
137 | كأن
138 | كأنما
139 | كأي
140 | كأين
141 | كذا
142 | كذلك
143 | كل
144 | كلا
145 | كلاهما
146 | كلتا
147 | كلما
148 | كليكما
149 | كليهما
150 | كم
151 | كم
152 | كما
153 | كي
154 | كيت
155 | كيف
156 | كيفما
157 | لا
158 | لاسيما
159 | لدى
160 | لست
161 | لستم
162 | لستما
163 | لستن
164 | لسن
165 | لسنا
166 | لعل
167 | لك
168 | لكم
169 | لكما
170 | لكن
171 | لكنما
172 | لكي
173 | لكيلا
174 | لم
175 | لما
176 | لن
177 | لنا
178 | له
179 | لها
180 | لهم
181 | لهما
182 | لهن
183 | لو
184 | لولا
185 | لوما
186 | لي
187 | لئن
188 | ليت
189 | ليس
190 | ليسا
191 | ليست
192 | ليستا
193 | ليسوا
194 | ما
195 | ماذا
196 | متى
197 | مذ
198 | مع
199 | مما
200 | ممن
201 | من
202 | منه
203 | منها
204 | منذ
205 | مه
206 | مهما
207 | نحن
208 | نحو
209 | نعم
210 | ها
211 | هاتان
212 | هاته
213 | هاتي
214 | هاتين
215 | هاك
216 | هاهنا
217 | هذا
218 | هذان
219 | هذه
220 | هذي
221 | هذين
222 | هكذا
223 | هل
224 | هلا
225 | هم
226 | هما
227 | هن
228 | هنا
229 | هناك
230 | هنالك
231 | هو
232 | هؤلاء
233 | هي
234 | هيا
235 | هيت
236 | هيهات
237 | والذي
238 | والذين
239 | وإذ
240 | وإذا
241 | وإن
242 | ولا
243 | ولكن
244 | ولو
245 | وما
246 | ومن
247 | وهو
248 | يا
249 | أبٌ
250 | أخٌ
251 | حمٌ
252 | فو
253 | أنتِ
254 | يناير
255 | فبراير
256 | مارس
257 | أبريل
258 | مايو
259 | يونيو
260 | يوليو
261 | أغسطس
262 | سبتمبر
263 | أكتوبر
264 | نوفمبر
265 | ديسمبر
266 | جانفي
267 | فيفري
268 | مارس
269 | أفريل
270 | ماي
271 | جوان
272 | جويلية
273 | أوت
274 | كانون
275 | شباط
276 | آذار
277 | نيسان
278 | أيار
279 | حزيران
280 | تموز
281 | آب
282 | أيلول
283 | تشرين
284 | دولار
285 | دينار
286 | ريال
287 | درهم
288 | ليرة
289 | جنيه
290 | قرش
291 | مليم
292 | فلس
293 | هللة
294 | سنتيم
295 | يورو
296 | ين
297 | يوان
298 | شيكل
299 | واحد
300 | اثنان
301 | ثلاثة
302 | أربعة
303 | خمسة
304 | ستة
305 | سبعة
306 | ثمانية
307 | تسعة
308 | عشرة
309 | أحد
310 | اثنا
311 | اثني
312 | إحدى
313 | ثلاث
314 | أربع
315 | خمس
316 | ست
317 | سبع
318 | ثماني
319 | تسع
320 | عشر
321 | ثمان
322 | سبت
323 | أحد
324 | اثنين
325 | ثلاثاء
326 | أربعاء
327 | خميس
328 | جمعة
329 | أول
330 | ثان
331 | ثاني
332 | ثالث
333 | رابع
334 | خامس
335 | سادس
336 | سابع
337 | ثامن
338 | تاسع
339 | عاشر
340 | حادي
341 | أ
342 | ب
343 | ت
344 | ث
345 | ج
346 | ح
347 | خ
348 | د
349 | ذ
350 | ر
351 | ز
352 | س
353 | ش
354 | ص
355 | ض
356 | ط
357 | ظ
358 | ع
359 | غ
360 | ف
361 | ق
362 | ك
363 | ل
364 | م
365 | ن
366 | ه
367 | و
368 | ي
369 | ء
370 | ى
371 | آ
372 | ؤ
373 | ئ
374 | أ
375 | ة
376 | ألف
377 | باء
378 | تاء
379 | ثاء
380 | جيم
381 | حاء
382 | خاء
383 | دال
384 | ذال
385 | راء
386 | زاي
387 | سين
388 | شين
389 | صاد
390 | ضاد
391 | طاء
392 | ظاء
393 | عين
394 | غين
395 | فاء
396 | قاف
397 | كاف
398 | لام
399 | ميم
400 | نون
401 | هاء
402 | واو
403 | ياء
404 | همزة
405 | ي
406 | نا
407 | ك
408 | كن
409 | ه
410 | إياه
411 | إياها
412 | إياهما
413 | إياهم
414 | إياهن
415 | إياك
416 | إياكما
417 | إياكم
418 | إياك
419 | إياكن
420 | إياي
421 | إيانا
422 | أولالك
423 | تانِ
424 | تانِك
425 | تِه
426 | تِي
427 | تَيْنِ
428 | ثمّ
429 | ثمّة
430 | ذانِ
431 | ذِه
432 | ذِي
433 | ذَيْنِ
434 | هَؤلاء
435 | هَاتانِ
436 | هَاتِه
437 | هَاتِي
438 | هَاتَيْنِ
439 | هَذا
440 | هَذانِ
441 | هَذِه
442 | هَذِي
443 | هَذَيْنِ
444 | الألى
445 | الألاء
446 | أل
447 | أنّى
448 | أيّ
449 | ّأيّان
450 | أنّى
451 | أيّ
452 | ّأيّان
453 | ذيت
454 | كأيّ
455 | كأيّن
456 | بضع
457 | فلان
458 | وا
459 | آمينَ
460 | آهِ
461 | آهٍ
462 | آهاً
463 | أُفٍّ
464 | أُفٍّ
465 | أفٍّ
466 | أمامك
467 | أمامكَ
468 | أوّهْ
469 | إلَيْكَ
470 | إلَيْكَ
471 | إليكَ
472 | إليكنّ
473 | إيهٍ
474 | بخٍ
475 | بسّ
476 | بَسْ
477 | بطآن
478 | بَلْهَ
479 | حاي
480 | حَذارِ
481 | حيَّ
482 | حيَّ
483 | دونك
484 | رويدك
485 | سرعان
486 | شتانَ
487 | شَتَّانَ
488 | صهْ
489 | صهٍ
490 | طاق
491 | طَق
492 | عَدَسْ
493 | كِخ
494 | مكانَك
495 | مكانَك
496 | مكانَك
497 | مكانكم
498 | مكانكما
499 | مكانكنّ
500 | نَخْ
501 | هاكَ
502 | هَجْ
503 | هلم
504 | هيّا
505 | هَيْهات
506 | وا
507 | واهاً
508 | وراءَك
509 | وُشْكَانَ
510 | وَيْ
511 | يفعلان
512 | تفعلان
513 | يفعلون
514 | تفعلون
515 | تفعلين
516 | اتخذ
517 | ألفى
518 | تخذ
519 | ترك
520 | تعلَّم
521 | جعل
522 | حجا
523 | حبيب
524 | خال
525 | حسب
526 | خال
527 | درى
528 | رأى
529 | زعم
530 | صبر
531 | ظنَّ
532 | عدَّ
533 | علم
534 | غادر
535 | ذهب
536 | وجد
537 | ورد
538 | وهب
539 | أسكن
540 | أطعم
541 | أعطى
542 | رزق
543 | زود
544 | سقى
545 | كسا
546 | أخبر
547 | أرى
548 | أعلم
549 | أنبأ
550 | حدَث
551 | خبَّر
552 | نبَّا
553 | أفعل به
554 | ما أفعله
555 | بئس
556 | ساء
557 | طالما
558 | قلما
559 | لات
560 | لكنَّ
561 | ءَ
562 | أجل
563 | إذاً
564 | أمّا
565 | إمّا
566 | إنَّ
567 | أنًّ
568 | أى
569 | إى
570 | أيا
571 | ب
572 | ثمَّ
573 | جلل
574 | جير
575 | رُبَّ
576 | س
577 | علًّ
578 | ف
579 | كأنّ
580 | كلَّا
581 | كى
582 | ل
583 | لات
584 | لعلَّ
585 | لكنَّ
586 | لكنَّ
587 | م
588 | نَّ
589 | هلّا
590 | وا
591 | أل
592 | إلّا
593 | ت
594 | ك
595 | لمّا
596 | ن
597 | ه
598 | و
599 | ا
600 | ي
601 | تجاه
602 | تلقاء
603 | جميع
604 | حسب
605 | سبحان
606 | شبه
607 | لعمر
608 | مثل
609 | معاذ
610 | أبو
611 | أخو
612 | حمو
613 | فو
614 | مئة
615 | مئتان
616 | ثلاثمئة
617 | أربعمئة
618 | خمسمئة
619 | ستمئة
620 | سبعمئة
621 | ثمنمئة
622 | تسعمئة
623 | مائة
624 | ثلاثمائة
625 | أربعمائة
626 | خمسمائة
627 | ستمائة
628 | سبعمائة
629 | ثمانمئة
630 | تسعمائة
631 | عشرون
632 | ثلاثون
633 | اربعون
634 | خمسون
635 | ستون
636 | سبعون
637 | ثمانون
638 | تسعون
639 | عشرين
640 | ثلاثين
641 | اربعين
642 | خمسين
643 | ستين
644 | سبعين
645 | ثمانين
646 | تسعين
647 | بضع
648 | نيف
649 | أجمع
650 | جميع
651 | عامة
652 | عين
653 | نفس
654 | لا سيما
655 | أصلا
656 | أهلا
657 | أيضا
658 | بؤسا
659 | بعدا
660 | بغتة
661 | تعسا
662 | حقا
663 | حمدا
664 | خلافا
665 | خاصة
666 | دواليك
667 | سحقا
668 | سرا
669 | سمعا
670 | صبرا
671 | صدقا
672 | صراحة
673 | طرا
674 | عجبا
675 | عيانا
676 | غالبا
677 | فرادى
678 | فضلا
679 | قاطبة
680 | كثيرا
681 | لبيك
682 | معاذ
683 | أبدا
684 | إزاء
685 | أصلا
686 | الآن
687 | أمد
688 | أمس
689 | آنفا
690 | آناء
691 | أنّى
692 | أول
693 | أيّان
694 | تارة
695 | ثمّ
696 | ثمّة
697 | حقا
698 | صباح
699 | مساء
700 | ضحوة
701 | عوض
702 | غدا
703 | غداة
704 | قطّ
705 | كلّما
706 | لدن
707 | لمّا
708 | مرّة
709 | قبل
710 | خلف
711 | أمام
712 | فوق
713 | تحت
714 | يمين
715 | شمال
716 | ارتدّ
717 | استحال
718 | أصبح
719 | أضحى
720 | آض
721 | أمسى
722 | انقلب
723 | بات
724 | تبدّل
725 | تحوّل
726 | حار
727 | رجع
728 | راح
729 | صار
730 | ظلّ
731 | عاد
732 | غدا
733 | كان
734 | ما انفك
735 | ما برح
736 | مادام
737 | مازال
738 | مافتئ
739 | ابتدأ
740 | أخذ
741 | اخلولق
742 | أقبل
743 | انبرى
744 | أنشأ
745 | أوشك
746 | جعل
747 | حرى
748 | شرع
749 | طفق
750 | علق
751 | قام
752 | كرب
753 | كاد
754 | هبّ
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/azerbaijani:
--------------------------------------------------------------------------------
1 | a
2 | ad
3 | altı
4 | altmış
5 | amma
6 | arasında
7 | artıq
8 | ay
9 | az
10 | bax
11 | belə
12 | bəli
13 | bəlkə
14 | beş
15 | bəy
16 | bəzən
17 | bəzi
18 | bilər
19 | bir
20 | biraz
21 | biri
22 | birşey
23 | biz
24 | bizim
25 | bizlər
26 | bu
27 | buna
28 | bundan
29 | bunların
30 | bunu
31 | bunun
32 | buradan
33 | bütün
34 | ci
35 | cı
36 | çox
37 | cu
38 | cü
39 | çünki
40 | da
41 | daha
42 | də
43 | dedi
44 | dək
45 | dən
46 | dəqiqə
47 | deyil
48 | dir
49 | doqquz
50 | doqsan
51 | dörd
52 | düz
53 | ə
54 | edən
55 | edir
56 | əgər
57 | əlbəttə
58 | elə
59 | əlli
60 | ən
61 | əslində
62 | et
63 | etdi
64 | etmə
65 | etmək
66 | faiz
67 | gilə
68 | görə
69 | ha
70 | haqqında
71 | harada
72 | hə
73 | heç
74 | həm
75 | həmin
76 | həmişə
77 | hər
78 | ı
79 | idi
80 | iki
81 | il
82 | ildə
83 | ilə
84 | ilk
85 | in
86 | indi
87 | isə
88 | istifadə
89 | iyirmi
90 | ki
91 | kim
92 | kimə
93 | kimi
94 | lakin
95 | lap
96 | məhz
97 | mən
98 | mənə
99 | mirşey
100 | nə
101 | nəhayət
102 | niyə
103 | o
104 | obirisi
105 | of
106 | olan
107 | olar
108 | olaraq
109 | oldu
110 | olduğu
111 | olmadı
112 | olmaz
113 | olmuşdur
114 | olsun
115 | olur
116 | on
117 | ona
118 | ondan
119 | onlar
120 | onlardan
121 | onların
122 | onsuzda
123 | onu
124 | onun
125 | oradan
126 | otuz
127 | öz
128 | özü
129 | qarşı
130 | qədər
131 | qırx
132 | saat
133 | sadəcə
134 | saniyə
135 | səhv
136 | səkkiz
137 | səksən
138 | sən
139 | sənə
140 | sənin
141 | siz
142 | sizin
143 | sizlər
144 | sonra
145 | təəssüf
146 | ü
147 | üç
148 | üçün
149 | var
150 | və
151 | xan
152 | xanım
153 | xeyr
154 | ya
155 | yalnız
156 | yaxşı
157 | yeddi
158 | yenə
159 | yəni
160 | yetmiş
161 | yox
162 | yoxdur
163 | yoxsa
164 | yüz
165 | zaman
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/basque:
--------------------------------------------------------------------------------
1 | ahala
2 | aitzitik
3 | al
4 | ala
5 | alabadere
6 | alabaina
7 | alabaina
8 | aldiz
9 | alta
10 | amaitu
11 | amaitzeko
12 | anitz
13 | antzina
14 | arabera
15 | arabera
16 | arabera
17 | argi
18 | arratsaldero
19 | arte
20 | artean
21 | asko
22 | aspaldiko
23 | aurrera
24 | aurrera
25 | azkenez
26 | azkenik
27 | azkenik
28 | ba
29 | bada
30 | bada
31 | bada
32 | bada
33 | badarik
34 | badarik
35 | badarik
36 | badere
37 | bai
38 | baina
39 | baina
40 | baina
41 | baino
42 | baino
43 | baino
44 | baino
45 | baita
46 | baizik
47 | baldin
48 | baldin
49 | barren
50 | bat
51 | batean
52 | batean
53 | batean
54 | batean
55 | batek
56 | baten
57 | batera
58 | batez
59 | bati
60 | batzuei
61 | batzuek
62 | batzuetan
63 | batzuk
64 | bazen
65 | bederen
66 | bederik
67 | beharrez
68 | behiala
69 | behin
70 | behin
71 | behin
72 | behin
73 | behinik
74 | behinola
75 | behintzat
76 | bera
77 | beraiek
78 | beranduago
79 | berau
80 | berauek
81 | beraz
82 | beraz
83 | bere
84 | berean
85 | berebat
86 | berehala
87 | berori
88 | beroriek
89 | berriro
90 | berriz
91 | bertzalde
92 | bertzenaz
93 | bestalde
94 | beste
95 | bestela
96 | besterik
97 | bezain
98 | bezala
99 | bide
100 | bien
101 | bigarrenez
102 | bigarrenik
103 | bitartean
104 | bitartean
105 | bizkitartean
106 | bukaeran
107 | bukatzeko
108 | da
109 | dago
110 | dago
111 | dela
112 | dela
113 | dela
114 | delarik
115 | den
116 | dena
117 | dena
118 | dezadan
119 | dira
120 | ditu
121 | du
122 | dute
123 | edo
124 | edo
125 | edota
126 | egin
127 | egin
128 | egun
129 | egun
130 | egunean
131 | emateko
132 | era
133 | erdi
134 | ere
135 | ere
136 | ere
137 | ere
138 | ere
139 | esan
140 | esan
141 | esanak
142 | esandakoaren
143 | eta
144 | eta
145 | eta
146 | eta
147 | eta
148 | eta
149 | eurak
150 | ez
151 | ez
152 | ez
153 | eze
154 | ezen
155 | ezer
156 | ezezik
157 | ezik
158 | ezpabere
159 | ezpada
160 | ezpere
161 | ezperen
162 | ezta
163 | funtsean
164 | gabe
165 | gain
166 | gainera
167 | gainera
168 | gainerontzean
169 | gaur
170 | gero
171 | gero
172 | gero
173 | geroago
174 | gisa
175 | gu
176 | gutxi
177 | guzti
178 | guztia
179 | guztiz
180 | haatik
181 | haiei
182 | haiek
183 | haietan
184 | hain
185 | hainbeste
186 | hainbestez
187 | hala
188 | hala
189 | hala
190 | halaber
191 | halako
192 | halatan
193 | han
194 | handik
195 | hango
196 | hara
197 | hargatik
198 | hari
199 | hark
200 | hartan
201 | hartan
202 | hasi
203 | hasi
204 | hasiera
205 | hasieran
206 | hasteaz
207 | hasteko
208 | hasteko
209 | hau
210 | hau
211 | hau
212 | hau
213 | hau
214 | hau
215 | hauei
216 | hauek
217 | hauetan
218 | hemen
219 | hemendik
220 | hemengo
221 | hi
222 | hona
223 | honebestez
224 | honek
225 | honela
226 | honela
227 | honela
228 | honen
229 | honen
230 | honetan
231 | honetaz
232 | honi
233 | hor
234 | hori
235 | hori
236 | hori
237 | horiei
238 | horiek
239 | horietan
240 | horko
241 | horra
242 | horratik
243 | horregatik
244 | horregatik
245 | horrek
246 | horrela
247 | horrela
248 | horrela
249 | horren
250 | horrenbestez
251 | horretan
252 | horri
253 | hortaz
254 | hortaz
255 | hortik
256 | hura
257 | ikusi
258 | ikusi
259 | izan
260 | izan
261 | izan
262 | jarraituz
263 | kariaz
264 | kasuaz
265 | kontuan
266 | laburbilduz
267 | laburki
268 | laster
269 | laster
270 | lehen
271 | lehen
272 | lehen
273 | lehen
274 | lehenengo
275 | lehenengo
276 | lehenik
277 | lehen-lehenik
278 | litzateke
279 | medio
280 | mendean
281 | mundura
282 | nahiz
283 | ni
284 | noiz
285 | nola
286 | non
287 | nondik
288 | nongo
289 | nor
290 | nora
291 | on
292 | ondoren
293 | ondorio
294 | ondorioz
295 | ondorioz
296 | orain
297 | ordea
298 | orduan
299 | orduan
300 | orduan
301 | orduko
302 | ordura
303 | orobat
304 | ostean
305 | ostera
306 | osterantzean
307 | pentsatuz
308 | ustez
309 | ze
310 | zein
311 | zein
312 | zen
313 | zen
314 | zenbait
315 | zenbat
316 | zer
317 | zeren
318 | zergatik
319 | zergatik
320 | ziren
321 | zituen
322 | zu
323 | zuek
324 | zuen
325 | zuten
326 | zuzen
327 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/bengali:
--------------------------------------------------------------------------------
1 | অতএব
2 | অথচ
3 | অথবা
4 | অনুযায়ী
5 | অনেক
6 | অনেকে
7 | অনেকেই
8 | অন্তত
9 | অন্য
10 | অবধি
11 | অবশ্য
12 | অর্থাত
13 | আই
14 | আগামী
15 | আগে
16 | আগেই
17 | আছে
18 | আজ
19 | আদ্যভাগে
20 | আপনার
21 | আপনি
22 | আবার
23 | আমরা
24 | আমাকে
25 | আমাদের
26 | আমার
27 | আমি
28 | আর
29 | আরও
30 | ই
31 | ইত্যাদি
32 | ইহা
33 | উচিত
34 | উত্তর
35 | উনি
36 | উপর
37 | উপরে
38 | এ
39 | এঁদের
40 | এঁরা
41 | এই
42 | একই
43 | একটি
44 | একবার
45 | একে
46 | এক্
47 | এখন
48 | এখনও
49 | এখানে
50 | এখানেই
51 | এটা
52 | এটাই
53 | এটি
54 | এত
55 | এতটাই
56 | এতে
57 | এদের
58 | এব
59 | এবং
60 | এবার
61 | এমন
62 | এমনকী
63 | এমনি
64 | এর
65 | এরা
66 | এল
67 | এস
68 | এসে
69 | ঐ
70 | ও
71 | ওঁদের
72 | ওঁর
73 | ওঁরা
74 | ওই
75 | ওকে
76 | ওখানে
77 | ওদের
78 | ওর
79 | ওরা
80 | কখনও
81 | কত
82 | কবে
83 | কমনে
84 | কয়েক
85 | কয়েকটি
86 | করছে
87 | করছেন
88 | করতে
89 | করবে
90 | করবেন
91 | করলে
92 | করলেন
93 | করা
94 | করাই
95 | করায়
96 | করার
97 | করি
98 | করিতে
99 | করিয়া
100 | করিয়ে
101 | করে
102 | করেই
103 | করেছিলেন
104 | করেছে
105 | করেছেন
106 | করেন
107 | কাউকে
108 | কাছ
109 | কাছে
110 | কাজ
111 | কাজে
112 | কারও
113 | কারণ
114 | কি
115 | কিংবা
116 | কিছু
117 | কিছুই
118 | কিন্তু
119 | কী
120 | কে
121 | কেউ
122 | কেউই
123 | কেখা
124 | কেন
125 | কোটি
126 | কোন
127 | কোনও
128 | কোনো
129 | ক্ষেত্রে
130 | কয়েক
131 | খুব
132 | গিয়ে
133 | গিয়েছে
134 | গিয়ে
135 | গুলি
136 | গেছে
137 | গেল
138 | গেলে
139 | গোটা
140 | চলে
141 | চান
142 | চায়
143 | চার
144 | চালু
145 | চেয়ে
146 | চেষ্টা
147 | ছাড়া
148 | ছাড়াও
149 | ছিল
150 | ছিলেন
151 | জন
152 | জনকে
153 | জনের
154 | জন্য
155 | জন্যওজে
156 | জানতে
157 | জানা
158 | জানানো
159 | জানায়
160 | জানিয়ে
161 | জানিয়েছে
162 | জে
163 | জ্নজন
164 | টি
165 | ঠিক
166 | তখন
167 | তত
168 | তথা
169 | তবু
170 | তবে
171 | তা
172 | তাঁকে
173 | তাঁদের
174 | তাঁর
175 | তাঁরা
176 | তাঁাহারা
177 | তাই
178 | তাও
179 | তাকে
180 | তাতে
181 | তাদের
182 | তার
183 | তারপর
184 | তারা
185 | তারৈ
186 | তাহলে
187 | তাহা
188 | তাহাতে
189 | তাহার
190 | তিনঐ
191 | তিনি
192 | তিনিও
193 | তুমি
194 | তুলে
195 | তেমন
196 | তো
197 | তোমার
198 | থাকবে
199 | থাকবেন
200 | থাকা
201 | থাকায়
202 | থাকে
203 | থাকেন
204 | থেকে
205 | থেকেই
206 | থেকেও
207 | দিকে
208 | দিতে
209 | দিন
210 | দিয়ে
211 | দিয়েছে
212 | দিয়েছেন
213 | দিলেন
214 | দু
215 | দুই
216 | দুটি
217 | দুটো
218 | দেওয়া
219 | দেওয়ার
220 | দেওয়া
221 | দেখতে
222 | দেখা
223 | দেখে
224 | দেন
225 | দেয়
226 | দ্বারা
227 | ধরা
228 | ধরে
229 | ধামার
230 | নতুন
231 | নয়
232 | না
233 | নাই
234 | নাকি
235 | নাগাদ
236 | নানা
237 | নিজে
238 | নিজেই
239 | নিজেদের
240 | নিজের
241 | নিতে
242 | নিয়ে
243 | নিয়ে
244 | নেই
245 | নেওয়া
246 | নেওয়ার
247 | নেওয়া
248 | নয়
249 | পক্ষে
250 | পর
251 | পরে
252 | পরেই
253 | পরেও
254 | পর্যন্ত
255 | পাওয়া
256 | পাচ
257 | পারি
258 | পারে
259 | পারেন
260 | পি
261 | পেয়ে
262 | পেয়্র্
263 | প্রতি
264 | প্রথম
265 | প্রভৃতি
266 | প্রযন্ত
267 | প্রাথমিক
268 | প্রায়
269 | প্রায়
270 | ফলে
271 | ফিরে
272 | ফের
273 | বক্তব্য
274 | বদলে
275 | বন
276 | বরং
277 | বলতে
278 | বলল
279 | বললেন
280 | বলা
281 | বলে
282 | বলেছেন
283 | বলেন
284 | বসে
285 | বহু
286 | বা
287 | বাদে
288 | বার
289 | বি
290 | বিনা
291 | বিভিন্ন
292 | বিশেষ
293 | বিষয়টি
294 | বেশ
295 | বেশি
296 | ব্যবহার
297 | ব্যাপারে
298 | ভাবে
299 | ভাবেই
300 | মতো
301 | মতোই
302 | মধ্যভাগে
303 | মধ্যে
304 | মধ্যেই
305 | মধ্যেও
306 | মনে
307 | মাত্র
308 | মাধ্যমে
309 | মোট
310 | মোটেই
311 | যখন
312 | যত
313 | যতটা
314 | যথেষ্ট
315 | যদি
316 | যদিও
317 | যা
318 | যাঁর
319 | যাঁরা
320 | যাওয়া
321 | যাওয়ার
322 | যাওয়া
323 | যাকে
324 | যাচ্ছে
325 | যাতে
326 | যাদের
327 | যান
328 | যাবে
329 | যায়
330 | যার
331 | যারা
332 | যিনি
333 | যে
334 | যেখানে
335 | যেতে
336 | যেন
337 | যেমন
338 | র
339 | রকম
340 | রয়েছে
341 | রাখা
342 | রেখে
343 | লক্ষ
344 | শুধু
345 | শুরু
346 | সঙ্গে
347 | সঙ্গেও
348 | সব
349 | সবার
350 | সমস্ত
351 | সম্প্রতি
352 | সহ
353 | সহিত
354 | সাধারণ
355 | সামনে
356 | সি
357 | সুতরাং
358 | সে
359 | সেই
360 | সেখান
361 | সেখানে
362 | সেটা
363 | সেটাই
364 | সেটাও
365 | সেটি
366 | স্পষ্ট
367 | স্বয়ং
368 | হইতে
369 | হইবে
370 | হইয়া
371 | হওয়া
372 | হওয়ায়
373 | হওয়ার
374 | হচ্ছে
375 | হত
376 | হতে
377 | হতেই
378 | হন
379 | হবে
380 | হবেন
381 | হয়
382 | হয়তো
383 | হয়নি
384 | হয়ে
385 | হয়েই
386 | হয়েছিল
387 | হয়েছে
388 | হয়েছেন
389 | হল
390 | হলে
391 | হলেই
392 | হলেও
393 | হলো
394 | হাজার
395 | হিসাবে
396 | হৈলে
397 | হোক
398 | হয়
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/catalan:
--------------------------------------------------------------------------------
1 | a
2 | abans
3 | ací
4 | ah
5 | així
6 | això
7 | al
8 | aleshores
9 | algun
10 | alguna
11 | algunes
12 | alguns
13 | alhora
14 | allà
15 | allí
16 | allò
17 | als
18 | altra
19 | altre
20 | altres
21 | amb
22 | ambdues
23 | ambdós
24 | anar
25 | ans
26 | apa
27 | aquell
28 | aquella
29 | aquelles
30 | aquells
31 | aquest
32 | aquesta
33 | aquestes
34 | aquests
35 | aquí
36 | baix
37 | bastant
38 | bé
39 | cada
40 | cadascuna
41 | cadascunes
42 | cadascuns
43 | cadascú
44 | com
45 | consegueixo
46 | conseguim
47 | conseguir
48 | consigueix
49 | consigueixen
50 | consigueixes
51 | contra
52 | d'un
53 | d'una
54 | d'unes
55 | d'uns
56 | dalt
57 | de
58 | del
59 | dels
60 | des
61 | des de
62 | després
63 | dins
64 | dintre
65 | donat
66 | doncs
67 | durant
68 | e
69 | eh
70 | el
71 | elles
72 | ells
73 | els
74 | em
75 | en
76 | encara
77 | ens
78 | entre
79 | era
80 | erem
81 | eren
82 | eres
83 | es
84 | esta
85 | estan
86 | estat
87 | estava
88 | estaven
89 | estem
90 | esteu
91 | estic
92 | està
93 | estàvem
94 | estàveu
95 | et
96 | etc
97 | ets
98 | fa
99 | faig
100 | fan
101 | fas
102 | fem
103 | fer
104 | feu
105 | fi
106 | fins
107 | fora
108 | gairebé
109 | ha
110 | han
111 | has
112 | haver
113 | havia
114 | he
115 | hem
116 | heu
117 | hi
118 | ho
119 | i
120 | igual
121 | iguals
122 | inclòs
123 | ja
124 | jo
125 | l'hi
126 | la
127 | les
128 | li
129 | li'n
130 | llarg
131 | llavors
132 | m'he
133 | ma
134 | mal
135 | malgrat
136 | mateix
137 | mateixa
138 | mateixes
139 | mateixos
140 | me
141 | mentre
142 | meu
143 | meus
144 | meva
145 | meves
146 | mode
147 | molt
148 | molta
149 | moltes
150 | molts
151 | mon
152 | mons
153 | més
154 | n'he
155 | n'hi
156 | ne
157 | ni
158 | no
159 | nogensmenys
160 | només
161 | nosaltres
162 | nostra
163 | nostre
164 | nostres
165 | o
166 | oh
167 | oi
168 | on
169 | pas
170 | pel
171 | pels
172 | per
173 | per que
174 | perquè
175 | però
176 | poc
177 | poca
178 | pocs
179 | podem
180 | poden
181 | poder
182 | podeu
183 | poques
184 | potser
185 | primer
186 | propi
187 | puc
188 | qual
189 | quals
190 | quan
191 | quant
192 | que
193 | quelcom
194 | qui
195 | quin
196 | quina
197 | quines
198 | quins
199 | què
200 | s'ha
201 | s'han
202 | sa
203 | sabem
204 | saben
205 | saber
206 | sabeu
207 | sap
208 | saps
209 | semblant
210 | semblants
211 | sense
212 | ser
213 | ses
214 | seu
215 | seus
216 | seva
217 | seves
218 | si
219 | sobre
220 | sobretot
221 | soc
222 | solament
223 | sols
224 | som
225 | son
226 | sons
227 | sota
228 | sou
229 | sóc
230 | són
231 | t'ha
232 | t'han
233 | t'he
234 | ta
235 | tal
236 | també
237 | tampoc
238 | tan
239 | tant
240 | tanta
241 | tantes
242 | te
243 | tene
244 | tenim
245 | tenir
246 | teniu
247 | teu
248 | teus
249 | teva
250 | teves
251 | tinc
252 | ton
253 | tons
254 | tot
255 | tota
256 | totes
257 | tots
258 | un
259 | una
260 | unes
261 | uns
262 | us
263 | va
264 | vaig
265 | vam
266 | van
267 | vas
268 | veu
269 | vosaltres
270 | vostra
271 | vostre
272 | vostres
273 | érem
274 | éreu
275 | és
276 | éssent
277 | últim
278 | ús
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/chinese:
--------------------------------------------------------------------------------
1 | 一
2 | 一下
3 | 一些
4 | 一切
5 | 一则
6 | 一天
7 | 一定
8 | 一方面
9 | 一旦
10 | 一时
11 | 一来
12 | 一样
13 | 一次
14 | 一片
15 | 一直
16 | 一致
17 | 一般
18 | 一起
19 | 一边
20 | 一面
21 | 万一
22 | 上下
23 | 上升
24 | 上去
25 | 上来
26 | 上述
27 | 上面
28 | 下列
29 | 下去
30 | 下来
31 | 下面
32 | 不一
33 | 不久
34 | 不仅
35 | 不会
36 | 不但
37 | 不光
38 | 不单
39 | 不变
40 | 不只
41 | 不可
42 | 不同
43 | 不够
44 | 不如
45 | 不得
46 | 不怕
47 | 不惟
48 | 不成
49 | 不拘
50 | 不敢
51 | 不断
52 | 不是
53 | 不比
54 | 不然
55 | 不特
56 | 不独
57 | 不管
58 | 不能
59 | 不要
60 | 不论
61 | 不足
62 | 不过
63 | 不问
64 | 与
65 | 与其
66 | 与否
67 | 与此同时
68 | 专门
69 | 且
70 | 两者
71 | 严格
72 | 严重
73 | 个
74 | 个人
75 | 个别
76 | 中小
77 | 中间
78 | 丰富
79 | 临
80 | 为
81 | 为主
82 | 为了
83 | 为什么
84 | 为什麽
85 | 为何
86 | 为着
87 | 主张
88 | 主要
89 | 举行
90 | 乃
91 | 乃至
92 | 么
93 | 之
94 | 之一
95 | 之前
96 | 之后
97 | 之後
98 | 之所以
99 | 之类
100 | 乌乎
101 | 乎
102 | 乘
103 | 也
104 | 也好
105 | 也是
106 | 也罢
107 | 了
108 | 了解
109 | 争取
110 | 于
111 | 于是
112 | 于是乎
113 | 云云
114 | 互相
115 | 产生
116 | 人们
117 | 人家
118 | 什么
119 | 什么样
120 | 什麽
121 | 今后
122 | 今天
123 | 今年
124 | 今後
125 | 仍然
126 | 从
127 | 从事
128 | 从而
129 | 他
130 | 他人
131 | 他们
132 | 他的
133 | 代替
134 | 以
135 | 以上
136 | 以下
137 | 以为
138 | 以便
139 | 以免
140 | 以前
141 | 以及
142 | 以后
143 | 以外
144 | 以後
145 | 以来
146 | 以至
147 | 以至于
148 | 以致
149 | 们
150 | 任
151 | 任何
152 | 任凭
153 | 任务
154 | 企图
155 | 伟大
156 | 似乎
157 | 似的
158 | 但
159 | 但是
160 | 何
161 | 何况
162 | 何处
163 | 何时
164 | 作为
165 | 你
166 | 你们
167 | 你的
168 | 使得
169 | 使用
170 | 例如
171 | 依
172 | 依照
173 | 依靠
174 | 促进
175 | 保持
176 | 俺
177 | 俺们
178 | 倘
179 | 倘使
180 | 倘或
181 | 倘然
182 | 倘若
183 | 假使
184 | 假如
185 | 假若
186 | 做到
187 | 像
188 | 允许
189 | 充分
190 | 先后
191 | 先後
192 | 先生
193 | 全部
194 | 全面
195 | 兮
196 | 共同
197 | 关于
198 | 其
199 | 其一
200 | 其中
201 | 其二
202 | 其他
203 | 其余
204 | 其它
205 | 其实
206 | 其次
207 | 具体
208 | 具体地说
209 | 具体说来
210 | 具有
211 | 再者
212 | 再说
213 | 冒
214 | 冲
215 | 决定
216 | 况且
217 | 准备
218 | 几
219 | 几乎
220 | 几时
221 | 凭
222 | 凭借
223 | 出去
224 | 出来
225 | 出现
226 | 分别
227 | 则
228 | 别
229 | 别的
230 | 别说
231 | 到
232 | 前后
233 | 前者
234 | 前进
235 | 前面
236 | 加之
237 | 加以
238 | 加入
239 | 加强
240 | 十分
241 | 即
242 | 即令
243 | 即使
244 | 即便
245 | 即或
246 | 即若
247 | 却不
248 | 原来
249 | 又
250 | 及
251 | 及其
252 | 及时
253 | 及至
254 | 双方
255 | 反之
256 | 反应
257 | 反映
258 | 反过来
259 | 反过来说
260 | 取得
261 | 受到
262 | 变成
263 | 另
264 | 另一方面
265 | 另外
266 | 只是
267 | 只有
268 | 只要
269 | 只限
270 | 叫
271 | 叫做
272 | 召开
273 | 叮咚
274 | 可
275 | 可以
276 | 可是
277 | 可能
278 | 可见
279 | 各
280 | 各个
281 | 各人
282 | 各位
283 | 各地
284 | 各种
285 | 各级
286 | 各自
287 | 合理
288 | 同
289 | 同一
290 | 同时
291 | 同样
292 | 后来
293 | 后面
294 | 向
295 | 向着
296 | 吓
297 | 吗
298 | 否则
299 | 吧
300 | 吧哒
301 | 吱
302 | 呀
303 | 呃
304 | 呕
305 | 呗
306 | 呜
307 | 呜呼
308 | 呢
309 | 周围
310 | 呵
311 | 呸
312 | 呼哧
313 | 咋
314 | 和
315 | 咚
316 | 咦
317 | 咱
318 | 咱们
319 | 咳
320 | 哇
321 | 哈
322 | 哈哈
323 | 哉
324 | 哎
325 | 哎呀
326 | 哎哟
327 | 哗
328 | 哟
329 | 哦
330 | 哩
331 | 哪
332 | 哪个
333 | 哪些
334 | 哪儿
335 | 哪天
336 | 哪年
337 | 哪怕
338 | 哪样
339 | 哪边
340 | 哪里
341 | 哼
342 | 哼唷
343 | 唉
344 | 啊
345 | 啐
346 | 啥
347 | 啦
348 | 啪达
349 | 喂
350 | 喏
351 | 喔唷
352 | 嗡嗡
353 | 嗬
354 | 嗯
355 | 嗳
356 | 嘎
357 | 嘎登
358 | 嘘
359 | 嘛
360 | 嘻
361 | 嘿
362 | 因
363 | 因为
364 | 因此
365 | 因而
366 | 固然
367 | 在
368 | 在下
369 | 地
370 | 坚决
371 | 坚持
372 | 基本
373 | 处理
374 | 复杂
375 | 多
376 | 多少
377 | 多数
378 | 多次
379 | 大力
380 | 大多数
381 | 大大
382 | 大家
383 | 大批
384 | 大约
385 | 大量
386 | 失去
387 | 她
388 | 她们
389 | 她的
390 | 好的
391 | 好象
392 | 如
393 | 如上所述
394 | 如下
395 | 如何
396 | 如其
397 | 如果
398 | 如此
399 | 如若
400 | 存在
401 | 宁
402 | 宁可
403 | 宁愿
404 | 宁肯
405 | 它
406 | 它们
407 | 它们的
408 | 它的
409 | 安全
410 | 完全
411 | 完成
412 | 实现
413 | 实际
414 | 宣布
415 | 容易
416 | 密切
417 | 对
418 | 对于
419 | 对应
420 | 将
421 | 少数
422 | 尔后
423 | 尚且
424 | 尤其
425 | 就
426 | 就是
427 | 就是说
428 | 尽
429 | 尽管
430 | 属于
431 | 岂但
432 | 左右
433 | 巨大
434 | 巩固
435 | 己
436 | 已经
437 | 帮助
438 | 常常
439 | 并
440 | 并不
441 | 并不是
442 | 并且
443 | 并没有
444 | 广大
445 | 广泛
446 | 应当
447 | 应用
448 | 应该
449 | 开外
450 | 开始
451 | 开展
452 | 引起
453 | 强烈
454 | 强调
455 | 归
456 | 当
457 | 当前
458 | 当时
459 | 当然
460 | 当着
461 | 形成
462 | 彻底
463 | 彼
464 | 彼此
465 | 往
466 | 往往
467 | 待
468 | 後来
469 | 後面
470 | 得
471 | 得出
472 | 得到
473 | 心里
474 | 必然
475 | 必要
476 | 必须
477 | 怎
478 | 怎么
479 | 怎么办
480 | 怎么样
481 | 怎样
482 | 怎麽
483 | 总之
484 | 总是
485 | 总的来看
486 | 总的来说
487 | 总的说来
488 | 总结
489 | 总而言之
490 | 恰恰相反
491 | 您
492 | 意思
493 | 愿意
494 | 慢说
495 | 成为
496 | 我
497 | 我们
498 | 我的
499 | 或
500 | 或是
501 | 或者
502 | 战斗
503 | 所
504 | 所以
505 | 所有
506 | 所谓
507 | 打
508 | 扩大
509 | 把
510 | 抑或
511 | 拿
512 | 按
513 | 按照
514 | 换句话说
515 | 换言之
516 | 据
517 | 掌握
518 | 接着
519 | 接著
520 | 故
521 | 故此
522 | 整个
523 | 方便
524 | 方面
525 | 旁人
526 | 无宁
527 | 无法
528 | 无论
529 | 既
530 | 既是
531 | 既然
532 | 时候
533 | 明显
534 | 明确
535 | 是
536 | 是否
537 | 是的
538 | 显然
539 | 显著
540 | 普通
541 | 普遍
542 | 更加
543 | 曾经
544 | 替
545 | 最后
546 | 最大
547 | 最好
548 | 最後
549 | 最近
550 | 最高
551 | 有
552 | 有些
553 | 有关
554 | 有利
555 | 有力
556 | 有所
557 | 有效
558 | 有时
559 | 有点
560 | 有的
561 | 有着
562 | 有著
563 | 望
564 | 朝
565 | 朝着
566 | 本
567 | 本着
568 | 来
569 | 来着
570 | 极了
571 | 构成
572 | 果然
573 | 果真
574 | 某
575 | 某个
576 | 某些
577 | 根据
578 | 根本
579 | 欢迎
580 | 正在
581 | 正如
582 | 正常
583 | 此
584 | 此外
585 | 此时
586 | 此间
587 | 毋宁
588 | 每
589 | 每个
590 | 每天
591 | 每年
592 | 每当
593 | 比
594 | 比如
595 | 比方
596 | 比较
597 | 毫不
598 | 没有
599 | 沿
600 | 沿着
601 | 注意
602 | 深入
603 | 清楚
604 | 满足
605 | 漫说
606 | 焉
607 | 然则
608 | 然后
609 | 然後
610 | 然而
611 | 照
612 | 照着
613 | 特别是
614 | 特殊
615 | 特点
616 | 现代
617 | 现在
618 | 甚么
619 | 甚而
620 | 甚至
621 | 用
622 | 由
623 | 由于
624 | 由此可见
625 | 的
626 | 的话
627 | 目前
628 | 直到
629 | 直接
630 | 相似
631 | 相信
632 | 相反
633 | 相同
634 | 相对
635 | 相对而言
636 | 相应
637 | 相当
638 | 相等
639 | 省得
640 | 看出
641 | 看到
642 | 看来
643 | 看看
644 | 看见
645 | 真是
646 | 真正
647 | 着
648 | 着呢
649 | 矣
650 | 知道
651 | 确定
652 | 离
653 | 积极
654 | 移动
655 | 突出
656 | 突然
657 | 立即
658 | 第
659 | 等
660 | 等等
661 | 管
662 | 紧接着
663 | 纵
664 | 纵令
665 | 纵使
666 | 纵然
667 | 练习
668 | 组成
669 | 经
670 | 经常
671 | 经过
672 | 结合
673 | 结果
674 | 给
675 | 绝对
676 | 继续
677 | 继而
678 | 维持
679 | 综上所述
680 | 罢了
681 | 考虑
682 | 者
683 | 而
684 | 而且
685 | 而况
686 | 而外
687 | 而已
688 | 而是
689 | 而言
690 | 联系
691 | 能
692 | 能否
693 | 能够
694 | 腾
695 | 自
696 | 自个儿
697 | 自从
698 | 自各儿
699 | 自家
700 | 自己
701 | 自身
702 | 至
703 | 至于
704 | 良好
705 | 若
706 | 若是
707 | 若非
708 | 范围
709 | 莫若
710 | 获得
711 | 虽
712 | 虽则
713 | 虽然
714 | 虽说
715 | 行为
716 | 行动
717 | 表明
718 | 表示
719 | 被
720 | 要
721 | 要不
722 | 要不是
723 | 要不然
724 | 要么
725 | 要是
726 | 要求
727 | 规定
728 | 觉得
729 | 认为
730 | 认真
731 | 认识
732 | 让
733 | 许多
734 | 论
735 | 设使
736 | 设若
737 | 该
738 | 说明
739 | 诸位
740 | 谁
741 | 谁知
742 | 赶
743 | 起
744 | 起来
745 | 起见
746 | 趁
747 | 趁着
748 | 越是
749 | 跟
750 | 转动
751 | 转变
752 | 转贴
753 | 较
754 | 较之
755 | 边
756 | 达到
757 | 迅速
758 | 过
759 | 过去
760 | 过来
761 | 运用
762 | 还是
763 | 还有
764 | 这
765 | 这个
766 | 这么
767 | 这么些
768 | 这么样
769 | 这么点儿
770 | 这些
771 | 这会儿
772 | 这儿
773 | 这就是说
774 | 这时
775 | 这样
776 | 这点
777 | 这种
778 | 这边
779 | 这里
780 | 这麽
781 | 进入
782 | 进步
783 | 进而
784 | 进行
785 | 连
786 | 连同
787 | 适应
788 | 适当
789 | 适用
790 | 逐步
791 | 逐渐
792 | 通常
793 | 通过
794 | 造成
795 | 遇到
796 | 遭到
797 | 避免
798 | 那
799 | 那个
800 | 那么
801 | 那么些
802 | 那么样
803 | 那些
804 | 那会儿
805 | 那儿
806 | 那时
807 | 那样
808 | 那边
809 | 那里
810 | 那麽
811 | 部分
812 | 鄙人
813 | 采取
814 | 里面
815 | 重大
816 | 重新
817 | 重要
818 | 鉴于
819 | 问题
820 | 防止
821 | 阿
822 | 附近
823 | 限制
824 | 除
825 | 除了
826 | 除此之外
827 | 除非
828 | 随
829 | 随着
830 | 随著
831 | 集中
832 | 需要
833 | 非但
834 | 非常
835 | 非徒
836 | 靠
837 | 顺
838 | 顺着
839 | 首先
840 | 高兴
841 | 是不是
842 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/danish:
--------------------------------------------------------------------------------
1 | og
2 | i
3 | jeg
4 | det
5 | at
6 | en
7 | den
8 | til
9 | er
10 | som
11 | på
12 | de
13 | med
14 | han
15 | af
16 | for
17 | ikke
18 | der
19 | var
20 | mig
21 | sig
22 | men
23 | et
24 | har
25 | om
26 | vi
27 | min
28 | havde
29 | ham
30 | hun
31 | nu
32 | over
33 | da
34 | fra
35 | du
36 | ud
37 | sin
38 | dem
39 | os
40 | op
41 | man
42 | hans
43 | hvor
44 | eller
45 | hvad
46 | skal
47 | selv
48 | her
49 | alle
50 | vil
51 | blev
52 | kunne
53 | ind
54 | når
55 | være
56 | dog
57 | noget
58 | ville
59 | jo
60 | deres
61 | efter
62 | ned
63 | skulle
64 | denne
65 | end
66 | dette
67 | mit
68 | også
69 | under
70 | have
71 | dig
72 | anden
73 | hende
74 | mine
75 | alt
76 | meget
77 | sit
78 | sine
79 | vor
80 | mod
81 | disse
82 | hvis
83 | din
84 | nogle
85 | hos
86 | blive
87 | mange
88 | ad
89 | bliver
90 | hendes
91 | været
92 | thi
93 | jer
94 | sådan
95 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/dutch:
--------------------------------------------------------------------------------
1 | de
2 | en
3 | van
4 | ik
5 | te
6 | dat
7 | die
8 | in
9 | een
10 | hij
11 | het
12 | niet
13 | zijn
14 | is
15 | was
16 | op
17 | aan
18 | met
19 | als
20 | voor
21 | had
22 | er
23 | maar
24 | om
25 | hem
26 | dan
27 | zou
28 | of
29 | wat
30 | mijn
31 | men
32 | dit
33 | zo
34 | door
35 | over
36 | ze
37 | zich
38 | bij
39 | ook
40 | tot
41 | je
42 | mij
43 | uit
44 | der
45 | daar
46 | haar
47 | naar
48 | heb
49 | hoe
50 | heeft
51 | hebben
52 | deze
53 | u
54 | want
55 | nog
56 | zal
57 | me
58 | zij
59 | nu
60 | ge
61 | geen
62 | omdat
63 | iets
64 | worden
65 | toch
66 | al
67 | waren
68 | veel
69 | meer
70 | doen
71 | toen
72 | moet
73 | ben
74 | zonder
75 | kan
76 | hun
77 | dus
78 | alles
79 | onder
80 | ja
81 | eens
82 | hier
83 | wie
84 | werd
85 | altijd
86 | doch
87 | wordt
88 | wezen
89 | kunnen
90 | ons
91 | zelf
92 | tegen
93 | na
94 | reeds
95 | wil
96 | kon
97 | niets
98 | uw
99 | iemand
100 | geweest
101 | andere
102 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/english:
--------------------------------------------------------------------------------
1 | i
2 | me
3 | my
4 | myself
5 | we
6 | our
7 | ours
8 | ourselves
9 | you
10 | you're
11 | you've
12 | you'll
13 | you'd
14 | your
15 | yours
16 | yourself
17 | yourselves
18 | he
19 | him
20 | his
21 | himself
22 | she
23 | she's
24 | her
25 | hers
26 | herself
27 | it
28 | it's
29 | its
30 | itself
31 | they
32 | them
33 | their
34 | theirs
35 | themselves
36 | what
37 | which
38 | who
39 | whom
40 | this
41 | that
42 | that'll
43 | these
44 | those
45 | am
46 | is
47 | are
48 | was
49 | were
50 | be
51 | been
52 | being
53 | have
54 | has
55 | had
56 | having
57 | do
58 | does
59 | did
60 | doing
61 | a
62 | an
63 | the
64 | and
65 | but
66 | if
67 | or
68 | because
69 | as
70 | until
71 | while
72 | of
73 | at
74 | by
75 | for
76 | with
77 | about
78 | against
79 | between
80 | into
81 | through
82 | during
83 | before
84 | after
85 | above
86 | below
87 | to
88 | from
89 | up
90 | down
91 | in
92 | out
93 | on
94 | off
95 | over
96 | under
97 | again
98 | further
99 | then
100 | once
101 | here
102 | there
103 | when
104 | where
105 | why
106 | how
107 | all
108 | any
109 | both
110 | each
111 | few
112 | more
113 | most
114 | other
115 | some
116 | such
117 | no
118 | nor
119 | not
120 | only
121 | own
122 | same
123 | so
124 | than
125 | too
126 | very
127 | s
128 | t
129 | can
130 | will
131 | just
132 | don
133 | don't
134 | should
135 | should've
136 | now
137 | d
138 | ll
139 | m
140 | o
141 | re
142 | ve
143 | y
144 | ain
145 | aren
146 | aren't
147 | couldn
148 | couldn't
149 | didn
150 | didn't
151 | doesn
152 | doesn't
153 | hadn
154 | hadn't
155 | hasn
156 | hasn't
157 | haven
158 | haven't
159 | isn
160 | isn't
161 | ma
162 | mightn
163 | mightn't
164 | mustn
165 | mustn't
166 | needn
167 | needn't
168 | shan
169 | shan't
170 | shouldn
171 | shouldn't
172 | wasn
173 | wasn't
174 | weren
175 | weren't
176 | won
177 | won't
178 | wouldn
179 | wouldn't
180 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/finnish:
--------------------------------------------------------------------------------
1 | olla
2 | olen
3 | olet
4 | on
5 | olemme
6 | olette
7 | ovat
8 | ole
9 | oli
10 | olisi
11 | olisit
12 | olisin
13 | olisimme
14 | olisitte
15 | olisivat
16 | olit
17 | olin
18 | olimme
19 | olitte
20 | olivat
21 | ollut
22 | olleet
23 | en
24 | et
25 | ei
26 | emme
27 | ette
28 | eivät
29 | minä
30 | minun
31 | minut
32 | minua
33 | minussa
34 | minusta
35 | minuun
36 | minulla
37 | minulta
38 | minulle
39 | sinä
40 | sinun
41 | sinut
42 | sinua
43 | sinussa
44 | sinusta
45 | sinuun
46 | sinulla
47 | sinulta
48 | sinulle
49 | hän
50 | hänen
51 | hänet
52 | häntä
53 | hänessä
54 | hänestä
55 | häneen
56 | hänellä
57 | häneltä
58 | hänelle
59 | me
60 | meidän
61 | meidät
62 | meitä
63 | meissä
64 | meistä
65 | meihin
66 | meillä
67 | meiltä
68 | meille
69 | te
70 | teidän
71 | teidät
72 | teitä
73 | teissä
74 | teistä
75 | teihin
76 | teillä
77 | teiltä
78 | teille
79 | he
80 | heidän
81 | heidät
82 | heitä
83 | heissä
84 | heistä
85 | heihin
86 | heillä
87 | heiltä
88 | heille
89 | tämä
90 | tämän
91 | tätä
92 | tässä
93 | tästä
94 | tähän
95 | tallä
96 | tältä
97 | tälle
98 | tänä
99 | täksi
100 | tuo
101 | tuon
102 | tuotä
103 | tuossa
104 | tuosta
105 | tuohon
106 | tuolla
107 | tuolta
108 | tuolle
109 | tuona
110 | tuoksi
111 | se
112 | sen
113 | sitä
114 | siinä
115 | siitä
116 | siihen
117 | sillä
118 | siltä
119 | sille
120 | sinä
121 | siksi
122 | nämä
123 | näiden
124 | näitä
125 | näissä
126 | näistä
127 | näihin
128 | näillä
129 | näiltä
130 | näille
131 | näinä
132 | näiksi
133 | nuo
134 | noiden
135 | noita
136 | noissa
137 | noista
138 | noihin
139 | noilla
140 | noilta
141 | noille
142 | noina
143 | noiksi
144 | ne
145 | niiden
146 | niitä
147 | niissä
148 | niistä
149 | niihin
150 | niillä
151 | niiltä
152 | niille
153 | niinä
154 | niiksi
155 | kuka
156 | kenen
157 | kenet
158 | ketä
159 | kenessä
160 | kenestä
161 | keneen
162 | kenellä
163 | keneltä
164 | kenelle
165 | kenenä
166 | keneksi
167 | ketkä
168 | keiden
169 | ketkä
170 | keitä
171 | keissä
172 | keistä
173 | keihin
174 | keillä
175 | keiltä
176 | keille
177 | keinä
178 | keiksi
179 | mikä
180 | minkä
181 | minkä
182 | mitä
183 | missä
184 | mistä
185 | mihin
186 | millä
187 | miltä
188 | mille
189 | minä
190 | miksi
191 | mitkä
192 | joka
193 | jonka
194 | jota
195 | jossa
196 | josta
197 | johon
198 | jolla
199 | jolta
200 | jolle
201 | jona
202 | joksi
203 | jotka
204 | joiden
205 | joita
206 | joissa
207 | joista
208 | joihin
209 | joilla
210 | joilta
211 | joille
212 | joina
213 | joiksi
214 | että
215 | ja
216 | jos
217 | koska
218 | kuin
219 | mutta
220 | niin
221 | sekä
222 | sillä
223 | tai
224 | vaan
225 | vai
226 | vaikka
227 | kanssa
228 | mukaan
229 | noin
230 | poikki
231 | yli
232 | kun
233 | niin
234 | nyt
235 | itse
236 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/french:
--------------------------------------------------------------------------------
1 | au
2 | aux
3 | avec
4 | ce
5 | ces
6 | dans
7 | de
8 | des
9 | du
10 | elle
11 | en
12 | et
13 | eux
14 | il
15 | ils
16 | je
17 | la
18 | le
19 | les
20 | leur
21 | lui
22 | ma
23 | mais
24 | me
25 | même
26 | mes
27 | moi
28 | mon
29 | ne
30 | nos
31 | notre
32 | nous
33 | on
34 | ou
35 | par
36 | pas
37 | pour
38 | qu
39 | que
40 | qui
41 | sa
42 | se
43 | ses
44 | son
45 | sur
46 | ta
47 | te
48 | tes
49 | toi
50 | ton
51 | tu
52 | un
53 | une
54 | vos
55 | votre
56 | vous
57 | c
58 | d
59 | j
60 | l
61 | à
62 | m
63 | n
64 | s
65 | t
66 | y
67 | été
68 | étée
69 | étées
70 | étés
71 | étant
72 | étante
73 | étants
74 | étantes
75 | suis
76 | es
77 | est
78 | sommes
79 | êtes
80 | sont
81 | serai
82 | seras
83 | sera
84 | serons
85 | serez
86 | seront
87 | serais
88 | serait
89 | serions
90 | seriez
91 | seraient
92 | étais
93 | était
94 | étions
95 | étiez
96 | étaient
97 | fus
98 | fut
99 | fûmes
100 | fûtes
101 | furent
102 | sois
103 | soit
104 | soyons
105 | soyez
106 | soient
107 | fusse
108 | fusses
109 | fût
110 | fussions
111 | fussiez
112 | fussent
113 | ayant
114 | ayante
115 | ayantes
116 | ayants
117 | eu
118 | eue
119 | eues
120 | eus
121 | ai
122 | as
123 | avons
124 | avez
125 | ont
126 | aurai
127 | auras
128 | aura
129 | aurons
130 | aurez
131 | auront
132 | aurais
133 | aurait
134 | aurions
135 | auriez
136 | auraient
137 | avais
138 | avait
139 | avions
140 | aviez
141 | avaient
142 | eut
143 | eûmes
144 | eûtes
145 | eurent
146 | aie
147 | aies
148 | ait
149 | ayons
150 | ayez
151 | aient
152 | eusse
153 | eusses
154 | eût
155 | eussions
156 | eussiez
157 | eussent
158 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/german:
--------------------------------------------------------------------------------
1 | aber
2 | alle
3 | allem
4 | allen
5 | aller
6 | alles
7 | als
8 | also
9 | am
10 | an
11 | ander
12 | andere
13 | anderem
14 | anderen
15 | anderer
16 | anderes
17 | anderm
18 | andern
19 | anderr
20 | anders
21 | auch
22 | auf
23 | aus
24 | bei
25 | bin
26 | bis
27 | bist
28 | da
29 | damit
30 | dann
31 | der
32 | den
33 | des
34 | dem
35 | die
36 | das
37 | dass
38 | daß
39 | derselbe
40 | derselben
41 | denselben
42 | desselben
43 | demselben
44 | dieselbe
45 | dieselben
46 | dasselbe
47 | dazu
48 | dein
49 | deine
50 | deinem
51 | deinen
52 | deiner
53 | deines
54 | denn
55 | derer
56 | dessen
57 | dich
58 | dir
59 | du
60 | dies
61 | diese
62 | diesem
63 | diesen
64 | dieser
65 | dieses
66 | doch
67 | dort
68 | durch
69 | ein
70 | eine
71 | einem
72 | einen
73 | einer
74 | eines
75 | einig
76 | einige
77 | einigem
78 | einigen
79 | einiger
80 | einiges
81 | einmal
82 | er
83 | ihn
84 | ihm
85 | es
86 | etwas
87 | euer
88 | eure
89 | eurem
90 | euren
91 | eurer
92 | eures
93 | für
94 | gegen
95 | gewesen
96 | hab
97 | habe
98 | haben
99 | hat
100 | hatte
101 | hatten
102 | hier
103 | hin
104 | hinter
105 | ich
106 | mich
107 | mir
108 | ihr
109 | ihre
110 | ihrem
111 | ihren
112 | ihrer
113 | ihres
114 | euch
115 | im
116 | in
117 | indem
118 | ins
119 | ist
120 | jede
121 | jedem
122 | jeden
123 | jeder
124 | jedes
125 | jene
126 | jenem
127 | jenen
128 | jener
129 | jenes
130 | jetzt
131 | kann
132 | kein
133 | keine
134 | keinem
135 | keinen
136 | keiner
137 | keines
138 | können
139 | könnte
140 | machen
141 | man
142 | manche
143 | manchem
144 | manchen
145 | mancher
146 | manches
147 | mein
148 | meine
149 | meinem
150 | meinen
151 | meiner
152 | meines
153 | mit
154 | muss
155 | musste
156 | nach
157 | nicht
158 | nichts
159 | noch
160 | nun
161 | nur
162 | ob
163 | oder
164 | ohne
165 | sehr
166 | sein
167 | seine
168 | seinem
169 | seinen
170 | seiner
171 | seines
172 | selbst
173 | sich
174 | sie
175 | ihnen
176 | sind
177 | so
178 | solche
179 | solchem
180 | solchen
181 | solcher
182 | solches
183 | soll
184 | sollte
185 | sondern
186 | sonst
187 | über
188 | um
189 | und
190 | uns
191 | unsere
192 | unserem
193 | unseren
194 | unser
195 | unseres
196 | unter
197 | viel
198 | vom
199 | von
200 | vor
201 | während
202 | war
203 | waren
204 | warst
205 | was
206 | weg
207 | weil
208 | weiter
209 | welche
210 | welchem
211 | welchen
212 | welcher
213 | welches
214 | wenn
215 | werde
216 | werden
217 | wie
218 | wieder
219 | will
220 | wir
221 | wird
222 | wirst
223 | wo
224 | wollen
225 | wollte
226 | würde
227 | würden
228 | zu
229 | zum
230 | zur
231 | zwar
232 | zwischen
233 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/greek:
--------------------------------------------------------------------------------
1 | αλλα
2 | αν
3 | αντι
4 | απο
5 | αυτα
6 | αυτεσ
7 | αυτη
8 | αυτο
9 | αυτοι
10 | αυτοσ
11 | αυτουσ
12 | αυτων
13 | αἱ
14 | αἳ
15 | αἵ
16 | αὐτόσ
17 | αὐτὸς
18 | αὖ
19 | γάρ
20 | γα
21 | γα^
22 | γε
23 | για
24 | γοῦν
25 | γὰρ
26 | δ'
27 | δέ
28 | δή
29 | δαί
30 | δαίσ
31 | δαὶ
32 | δαὶς
33 | δε
34 | δεν
35 | δι'
36 | διά
37 | διὰ
38 | δὲ
39 | δὴ
40 | δ’
41 | εαν
42 | ειμαι
43 | ειμαστε
44 | ειναι
45 | εισαι
46 | ειστε
47 | εκεινα
48 | εκεινεσ
49 | εκεινη
50 | εκεινο
51 | εκεινοι
52 | εκεινοσ
53 | εκεινουσ
54 | εκεινων
55 | ενω
56 | επ
57 | επι
58 | εἰ
59 | εἰμί
60 | εἰμὶ
61 | εἰς
62 | εἰσ
63 | εἴ
64 | εἴμι
65 | εἴτε
66 | η
67 | θα
68 | ισωσ
69 | κ
70 | καί
71 | καίτοι
72 | καθ
73 | και
74 | κατ
75 | κατά
76 | κατα
77 | κατὰ
78 | καὶ
79 | κι
80 | κἀν
81 | κἂν
82 | μέν
83 | μή
84 | μήτε
85 | μα
86 | με
87 | μεθ
88 | μετ
89 | μετά
90 | μετα
91 | μετὰ
92 | μη
93 | μην
94 | μἐν
95 | μὲν
96 | μὴ
97 | μὴν
98 | να
99 | ο
100 | οι
101 | ομωσ
102 | οπωσ
103 | οσο
104 | οτι
105 | οἱ
106 | οἳ
107 | οἷς
108 | οὐ
109 | οὐδ
110 | οὐδέ
111 | οὐδείσ
112 | οὐδεὶς
113 | οὐδὲ
114 | οὐδὲν
115 | οὐκ
116 | οὐχ
117 | οὐχὶ
118 | οὓς
119 | οὔτε
120 | οὕτω
121 | οὕτως
122 | οὕτωσ
123 | οὖν
124 | οὗ
125 | οὗτος
126 | οὗτοσ
127 | παρ
128 | παρά
129 | παρα
130 | παρὰ
131 | περί
132 | περὶ
133 | ποια
134 | ποιεσ
135 | ποιο
136 | ποιοι
137 | ποιοσ
138 | ποιουσ
139 | ποιων
140 | ποτε
141 | που
142 | ποῦ
143 | προ
144 | προσ
145 | πρόσ
146 | πρὸ
147 | πρὸς
148 | πως
149 | πωσ
150 | σε
151 | στη
152 | στην
153 | στο
154 | στον
155 | σόσ
156 | σύ
157 | σύν
158 | σὸς
159 | σὺ
160 | σὺν
161 | τά
162 | τήν
163 | τί
164 | τίς
165 | τίσ
166 | τα
167 | ταῖς
168 | τε
169 | την
170 | τησ
171 | τι
172 | τινα
173 | τις
174 | τισ
175 | το
176 | τοί
177 | τοι
178 | τοιοῦτος
179 | τοιοῦτοσ
180 | τον
181 | τοτε
182 | του
183 | τούσ
184 | τοὺς
185 | τοῖς
186 | τοῦ
187 | των
188 | τό
189 | τόν
190 | τότε
191 | τὰ
192 | τὰς
193 | τὴν
194 | τὸ
195 | τὸν
196 | τῆς
197 | τῆσ
198 | τῇ
199 | τῶν
200 | τῷ
201 | ωσ
202 | ἀλλ'
203 | ἀλλά
204 | ἀλλὰ
205 | ἀλλ’
206 | ἀπ
207 | ἀπό
208 | ἀπὸ
209 | ἀφ
210 | ἂν
211 | ἃ
212 | ἄλλος
213 | ἄλλοσ
214 | ἄν
215 | ἄρα
216 | ἅμα
217 | ἐάν
218 | ἐγώ
219 | ἐγὼ
220 | ἐκ
221 | ἐμόσ
222 | ἐμὸς
223 | ἐν
224 | ἐξ
225 | ἐπί
226 | ἐπεὶ
227 | ἐπὶ
228 | ἐστι
229 | ἐφ
230 | ἐὰν
231 | ἑαυτοῦ
232 | ἔτι
233 | ἡ
234 | ἢ
235 | ἣ
236 | ἤ
237 | ἥ
238 | ἧς
239 | ἵνα
240 | ὁ
241 | ὃ
242 | ὃν
243 | ὃς
244 | ὅ
245 | ὅδε
246 | ὅθεν
247 | ὅπερ
248 | ὅς
249 | ὅσ
250 | ὅστις
251 | ὅστισ
252 | ὅτε
253 | ὅτι
254 | ὑμόσ
255 | ὑπ
256 | ὑπέρ
257 | ὑπό
258 | ὑπὲρ
259 | ὑπὸ
260 | ὡς
261 | ὡσ
262 | ὥς
263 | ὥστε
264 | ὦ
265 | ᾧ
266 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/hebrew:
--------------------------------------------------------------------------------
1 | אני
2 | את
3 | אתה
4 | אנחנו
5 | אתן
6 | אתם
7 | הם
8 | הן
9 | היא
10 | הוא
11 | שלי
12 | שלו
13 | שלך
14 | שלה
15 | שלנו
16 | שלכם
17 | שלכן
18 | שלהם
19 | שלהן
20 | לי
21 | לו
22 | לה
23 | לנו
24 | לכם
25 | לכן
26 | להם
27 | להן
28 | אותה
29 | אותו
30 | זה
31 | זאת
32 | אלה
33 | אלו
34 | תחת
35 | מתחת
36 | מעל
37 | בין
38 | עם
39 | עד
40 | נגר
41 | על
42 | אל
43 | מול
44 | של
45 | אצל
46 | כמו
47 | אחר
48 | אותו
49 | בלי
50 | לפני
51 | אחרי
52 | מאחורי
53 | עלי
54 | עליו
55 | עליה
56 | עליך
57 | עלינו
58 | עליכם
59 | לעיכן
60 | עליהם
61 | עליהן
62 | כל
63 | כולם
64 | כולן
65 | כך
66 | ככה
67 | כזה
68 | זה
69 | זות
70 | אותי
71 | אותה
72 | אותם
73 | אותך
74 | אותו
75 | אותן
76 | אותנו
77 | ואת
78 | את
79 | אתכם
80 | אתכן
81 | איתי
82 | איתו
83 | איתך
84 | איתה
85 | איתם
86 | איתן
87 | איתנו
88 | איתכם
89 | איתכן
90 | יהיה
91 | תהיה
92 | היתי
93 | היתה
94 | היה
95 | להיות
96 | עצמי
97 | עצמו
98 | עצמה
99 | עצמם
100 | עצמן
101 | עצמנו
102 | עצמהם
103 | עצמהן
104 | מי
105 | מה
106 | איפה
107 | היכן
108 | במקום שבו
109 | אם
110 | לאן
111 | למקום שבו
112 | מקום בו
113 | איזה
114 | מהיכן
115 | איך
116 | כיצד
117 | באיזו מידה
118 | מתי
119 | בשעה ש
120 | כאשר
121 | כש
122 | למרות
123 | לפני
124 | אחרי
125 | מאיזו סיבה
126 | הסיבה שבגללה
127 | למה
128 | מדוע
129 | לאיזו תכלית
130 | כי
131 | יש
132 | אין
133 | אך
134 | מנין
135 | מאין
136 | מאיפה
137 | יכל
138 | יכלה
139 | יכלו
140 | יכול
141 | יכולה
142 | יכולים
143 | יכולות
144 | יוכלו
145 | יוכל
146 | מסוגל
147 | לא
148 | רק
149 | אולי
150 | אין
151 | לאו
152 | אי
153 | כלל
154 | נגד
155 | אם
156 | עם
157 | אל
158 | אלה
159 | אלו
160 | אף
161 | על
162 | מעל
163 | מתחת
164 | מצד
165 | בשביל
166 | לבין
167 | באמצע
168 | בתוך
169 | דרך
170 | מבעד
171 | באמצעות
172 | למעלה
173 | למטה
174 | מחוץ
175 | מן
176 | לעבר
177 | מכאן
178 | כאן
179 | הנה
180 | הרי
181 | פה
182 | שם
183 | אך
184 | ברם
185 | שוב
186 | אבל
187 | מבלי
188 | בלי
189 | מלבד
190 | רק
191 | בגלל
192 | מכיוון
193 | עד
194 | אשר
195 | ואילו
196 | למרות
197 | אס
198 | כמו
199 | כפי
200 | אז
201 | אחרי
202 | כן
203 | לכן
204 | לפיכך
205 | מאד
206 | עז
207 | מעט
208 | מעטים
209 | במידה
210 | שוב
211 | יותר
212 | מדי
213 | גם
214 | כן
215 | נו
216 | אחר
217 | אחרת
218 | אחרים
219 | אחרות
220 | אשר
221 | או
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/hinglish:
--------------------------------------------------------------------------------
1 | a
2 | aadi
3 | aaj
4 | aap
5 | aapne
6 | aata
7 | aati
8 | aaya
9 | aaye
10 | ab
11 | abbe
12 | abbey
13 | abe
14 | abhi
15 | able
16 | about
17 | above
18 | accha
19 | according
20 | accordingly
21 | acha
22 | achcha
23 | across
24 | actually
25 | after
26 | afterwards
27 | again
28 | against
29 | agar
30 | ain
31 | aint
32 | ain't
33 | aisa
34 | aise
35 | aisi
36 | alag
37 | all
38 | allow
39 | allows
40 | almost
41 | alone
42 | along
43 | already
44 | also
45 | although
46 | always
47 | am
48 | among
49 | amongst
50 | an
51 | and
52 | andar
53 | another
54 | any
55 | anybody
56 | anyhow
57 | anyone
58 | anything
59 | anyway
60 | anyways
61 | anywhere
62 | ap
63 | apan
64 | apart
65 | apna
66 | apnaa
67 | apne
68 | apni
69 | appear
70 | are
71 | aren
72 | arent
73 | aren't
74 | around
75 | arre
76 | as
77 | aside
78 | ask
79 | asking
80 | at
81 | aur
82 | avum
83 | aya
84 | aye
85 | baad
86 | baar
87 | bad
88 | bahut
89 | bana
90 | banae
91 | banai
92 | banao
93 | banaya
94 | banaye
95 | banayi
96 | banda
97 | bande
98 | bandi
99 | bane
100 | bani
101 | bas
102 | bata
103 | batao
104 | bc
105 | be
106 | became
107 | because
108 | become
109 | becomes
110 | becoming
111 | been
112 | before
113 | beforehand
114 | behind
115 | being
116 | below
117 | beside
118 | besides
119 | best
120 | better
121 | between
122 | beyond
123 | bhai
124 | bheetar
125 | bhi
126 | bhitar
127 | bht
128 | bilkul
129 | bohot
130 | bol
131 | bola
132 | bole
133 | boli
134 | bolo
135 | bolta
136 | bolte
137 | bolti
138 | both
139 | brief
140 | bro
141 | btw
142 | but
143 | by
144 | came
145 | can
146 | cannot
147 | cant
148 | can't
149 | cause
150 | causes
151 | certain
152 | certainly
153 | chahiye
154 | chaiye
155 | chal
156 | chalega
157 | chhaiye
158 | clearly
159 | c'mon
160 | com
161 | come
162 | comes
163 | could
164 | couldn
165 | couldnt
166 | couldn't
167 | d
168 | de
169 | dede
170 | dega
171 | degi
172 | dekh
173 | dekha
174 | dekhe
175 | dekhi
176 | dekho
177 | denge
178 | dhang
179 | di
180 | did
181 | didn
182 | didnt
183 | didn't
184 | dijiye
185 | diya
186 | diyaa
187 | diye
188 | diyo
189 | do
190 | does
191 | doesn
192 | doesnt
193 | doesn't
194 | doing
195 | done
196 | dono
197 | dont
198 | don't
199 | doosra
200 | doosre
201 | down
202 | downwards
203 | dude
204 | dunga
205 | dungi
206 | during
207 | dusra
208 | dusre
209 | dusri
210 | dvaara
211 | dvara
212 | dwaara
213 | dwara
214 | each
215 | edu
216 | eg
217 | eight
218 | either
219 | ek
220 | else
221 | elsewhere
222 | enough
223 | etc
224 | even
225 | ever
226 | every
227 | everybody
228 | everyone
229 | everything
230 | everywhere
231 | ex
232 | exactly
233 | example
234 | except
235 | far
236 | few
237 | fifth
238 | fir
239 | first
240 | five
241 | followed
242 | following
243 | follows
244 | for
245 | forth
246 | four
247 | from
248 | further
249 | furthermore
250 | gaya
251 | gaye
252 | gayi
253 | get
254 | gets
255 | getting
256 | ghar
257 | given
258 | gives
259 | go
260 | goes
261 | going
262 | gone
263 | good
264 | got
265 | gotten
266 | greetings
267 | haan
268 | had
269 | hadd
270 | hadn
271 | hadnt
272 | hadn't
273 | hai
274 | hain
275 | hamara
276 | hamare
277 | hamari
278 | hamne
279 | han
280 | happens
281 | har
282 | hardly
283 | has
284 | hasn
285 | hasnt
286 | hasn't
287 | have
288 | haven
289 | havent
290 | haven't
291 | having
292 | he
293 | hello
294 | help
295 | hence
296 | her
297 | here
298 | hereafter
299 | hereby
300 | herein
301 | here's
302 | hereupon
303 | hers
304 | herself
305 | he's
306 | hi
307 | him
308 | himself
309 | his
310 | hither
311 | hm
312 | hmm
313 | ho
314 | hoga
315 | hoge
316 | hogi
317 | hona
318 | honaa
319 | hone
320 | honge
321 | hongi
322 | honi
323 | hopefully
324 | hota
325 | hotaa
326 | hote
327 | hoti
328 | how
329 | howbeit
330 | however
331 | hoyenge
332 | hoyengi
333 | hu
334 | hua
335 | hue
336 | huh
337 | hui
338 | hum
339 | humein
340 | humne
341 | hun
342 | huye
343 | huyi
344 | i
345 | i'd
346 | idk
347 | ie
348 | if
349 | i'll
350 | i'm
351 | imo
352 | in
353 | inasmuch
354 | inc
355 | inhe
356 | inhi
357 | inho
358 | inka
359 | inkaa
360 | inke
361 | inki
362 | inn
363 | inner
364 | inse
365 | insofar
366 | into
367 | inward
368 | is
369 | ise
370 | isi
371 | iska
372 | iskaa
373 | iske
374 | iski
375 | isme
376 | isn
377 | isne
378 | isnt
379 | isn't
380 | iss
381 | isse
382 | issi
383 | isski
384 | it
385 | it'd
386 | it'll
387 | itna
388 | itne
389 | itni
390 | itno
391 | its
392 | it's
393 | itself
394 | ityaadi
395 | ityadi
396 | i've
397 | ja
398 | jaa
399 | jab
400 | jabh
401 | jaha
402 | jahaan
403 | jahan
404 | jaisa
405 | jaise
406 | jaisi
407 | jata
408 | jayega
409 | jidhar
410 | jin
411 | jinhe
412 | jinhi
413 | jinho
414 | jinhone
415 | jinka
416 | jinke
417 | jinki
418 | jinn
419 | jis
420 | jise
421 | jiska
422 | jiske
423 | jiski
424 | jisme
425 | jiss
426 | jisse
427 | jitna
428 | jitne
429 | jitni
430 | jo
431 | just
432 | jyaada
433 | jyada
434 | k
435 | ka
436 | kaafi
437 | kab
438 | kabhi
439 | kafi
440 | kaha
441 | kahaa
442 | kahaan
443 | kahan
444 | kahi
445 | kahin
446 | kahte
447 | kaisa
448 | kaise
449 | kaisi
450 | kal
451 | kam
452 | kar
453 | kara
454 | kare
455 | karega
456 | karegi
457 | karen
458 | karenge
459 | kari
460 | karke
461 | karna
462 | karne
463 | karni
464 | karo
465 | karta
466 | karte
467 | karti
468 | karu
469 | karun
470 | karunga
471 | karungi
472 | kaun
473 | kaunsa
474 | kayi
475 | kch
476 | ke
477 | keep
478 | keeps
479 | keh
480 | kehte
481 | kept
482 | khud
483 | ki
484 | kin
485 | kine
486 | kinhe
487 | kinho
488 | kinka
489 | kinke
490 | kinki
491 | kinko
492 | kinn
493 | kino
494 | kis
495 | kise
496 | kisi
497 | kiska
498 | kiske
499 | kiski
500 | kisko
501 | kisliye
502 | kisne
503 | kitna
504 | kitne
505 | kitni
506 | kitno
507 | kiya
508 | kiye
509 | know
510 | known
511 | knows
512 | ko
513 | koi
514 | kon
515 | konsa
516 | koyi
517 | krna
518 | krne
519 | kuch
520 | kuchch
521 | kuchh
522 | kul
523 | kull
524 | kya
525 | kyaa
526 | kyu
527 | kyuki
528 | kyun
529 | kyunki
530 | lagta
531 | lagte
532 | lagti
533 | last
534 | lately
535 | later
536 | le
537 | least
538 | lekar
539 | lekin
540 | less
541 | lest
542 | let
543 | let's
544 | li
545 | like
546 | liked
547 | likely
548 | little
549 | liya
550 | liye
551 | ll
552 | lo
553 | log
554 | logon
555 | lol
556 | look
557 | looking
558 | looks
559 | ltd
560 | lunga
561 | m
562 | maan
563 | maana
564 | maane
565 | maani
566 | maano
567 | magar
568 | mai
569 | main
570 | maine
571 | mainly
572 | mana
573 | mane
574 | mani
575 | mano
576 | many
577 | mat
578 | may
579 | maybe
580 | me
581 | mean
582 | meanwhile
583 | mein
584 | mera
585 | mere
586 | merely
587 | meri
588 | might
589 | mightn
590 | mightnt
591 | mightn't
592 | mil
593 | mjhe
594 | more
595 | moreover
596 | most
597 | mostly
598 | much
599 | mujhe
600 | must
601 | mustn
602 | mustnt
603 | mustn't
604 | my
605 | myself
606 | na
607 | naa
608 | naah
609 | nahi
610 | nahin
611 | nai
612 | name
613 | namely
614 | nd
615 | ne
616 | near
617 | nearly
618 | necessary
619 | neeche
620 | need
621 | needn
622 | neednt
623 | needn't
624 | needs
625 | neither
626 | never
627 | nevertheless
628 | new
629 | next
630 | nhi
631 | nine
632 | no
633 | nobody
634 | non
635 | none
636 | noone
637 | nope
638 | nor
639 | normally
640 | not
641 | nothing
642 | novel
643 | now
644 | nowhere
645 | o
646 | obviously
647 | of
648 | off
649 | often
650 | oh
651 | ok
652 | okay
653 | old
654 | on
655 | once
656 | one
657 | ones
658 | only
659 | onto
660 | or
661 | other
662 | others
663 | otherwise
664 | ought
665 | our
666 | ours
667 | ourselves
668 | out
669 | outside
670 | over
671 | overall
672 | own
673 | par
674 | pata
675 | pe
676 | pehla
677 | pehle
678 | pehli
679 | people
680 | per
681 | perhaps
682 | phla
683 | phle
684 | phli
685 | placed
686 | please
687 | plus
688 | poora
689 | poori
690 | provides
691 | pura
692 | puri
693 | q
694 | que
695 | quite
696 | raha
697 | rahaa
698 | rahe
699 | rahi
700 | rakh
701 | rakha
702 | rakhe
703 | rakhen
704 | rakhi
705 | rakho
706 | rather
707 | re
708 | really
709 | reasonably
710 | regarding
711 | regardless
712 | regards
713 | rehte
714 | rha
715 | rhaa
716 | rhe
717 | rhi
718 | ri
719 | right
720 | s
721 | sa
722 | saara
723 | saare
724 | saath
725 | sab
726 | sabhi
727 | sabse
728 | sahi
729 | said
730 | sakta
731 | saktaa
732 | sakte
733 | sakti
734 | same
735 | sang
736 | sara
737 | sath
738 | saw
739 | say
740 | saying
741 | says
742 | se
743 | second
744 | secondly
745 | see
746 | seeing
747 | seem
748 | seemed
749 | seeming
750 | seems
751 | seen
752 | self
753 | selves
754 | sensible
755 | sent
756 | serious
757 | seriously
758 | seven
759 | several
760 | shall
761 | shan
762 | shant
763 | shan't
764 | she
765 | she's
766 | should
767 | shouldn
768 | shouldnt
769 | shouldn't
770 | should've
771 | si
772 | since
773 | six
774 | so
775 | soch
776 | some
777 | somebody
778 | somehow
779 | someone
780 | something
781 | sometime
782 | sometimes
783 | somewhat
784 | somewhere
785 | soon
786 | still
787 | sub
788 | such
789 | sup
790 | sure
791 | t
792 | tab
793 | tabh
794 | tak
795 | take
796 | taken
797 | tarah
798 | teen
799 | teeno
800 | teesra
801 | teesre
802 | teesri
803 | tell
804 | tends
805 | tera
806 | tere
807 | teri
808 | th
809 | tha
810 | than
811 | thank
812 | thanks
813 | thanx
814 | that
815 | that'll
816 | thats
817 | that's
818 | the
819 | theek
820 | their
821 | theirs
822 | them
823 | themselves
824 | then
825 | thence
826 | there
827 | thereafter
828 | thereby
829 | therefore
830 | therein
831 | theres
832 | there's
833 | thereupon
834 | these
835 | they
836 | they'd
837 | they'll
838 | they're
839 | they've
840 | thi
841 | thik
842 | thing
843 | think
844 | thinking
845 | third
846 | this
847 | tho
848 | thoda
849 | thodi
850 | thorough
851 | thoroughly
852 | those
853 | though
854 | thought
855 | three
856 | through
857 | throughout
858 | thru
859 | thus
860 | tjhe
861 | to
862 | together
863 | toh
864 | too
865 | took
866 | toward
867 | towards
868 | tried
869 | tries
870 | true
871 | truly
872 | try
873 | trying
874 | tu
875 | tujhe
876 | tum
877 | tumhara
878 | tumhare
879 | tumhari
880 | tune
881 | twice
882 | two
883 | um
884 | umm
885 | un
886 | under
887 | unhe
888 | unhi
889 | unho
890 | unhone
891 | unka
892 | unkaa
893 | unke
894 | unki
895 | unko
896 | unless
897 | unlikely
898 | unn
899 | unse
900 | until
901 | unto
902 | up
903 | upar
904 | upon
905 | us
906 | use
907 | used
908 | useful
909 | uses
910 | usi
911 | using
912 | uska
913 | uske
914 | usne
915 | uss
916 | usse
917 | ussi
918 | usually
919 | vaala
920 | vaale
921 | vaali
922 | vahaan
923 | vahan
924 | vahi
925 | vahin
926 | vaisa
927 | vaise
928 | vaisi
929 | vala
930 | vale
931 | vali
932 | various
933 | ve
934 | very
935 | via
936 | viz
937 | vo
938 | waala
939 | waale
940 | waali
941 | wagaira
942 | wagairah
943 | wagerah
944 | waha
945 | wahaan
946 | wahan
947 | wahi
948 | wahin
949 | waisa
950 | waise
951 | waisi
952 | wala
953 | wale
954 | wali
955 | want
956 | wants
957 | was
958 | wasn
959 | wasnt
960 | wasn't
961 | way
962 | we
963 | we'd
964 | well
965 | we'll
966 | went
967 | were
968 | we're
969 | weren
970 | werent
971 | weren't
972 | we've
973 | what
974 | whatever
975 | what's
976 | when
977 | whence
978 | whenever
979 | where
980 | whereafter
981 | whereas
982 | whereby
983 | wherein
984 | where's
985 | whereupon
986 | wherever
987 | whether
988 | which
989 | while
990 | who
991 | whoever
992 | whole
993 | whom
994 | who's
995 | whose
996 | why
997 | will
998 | willing
999 | with
1000 | within
1001 | without
1002 | wo
1003 | woh
1004 | wohi
1005 | won
1006 | wont
1007 | won't
1008 | would
1009 | wouldn
1010 | wouldnt
1011 | wouldn't
1012 | y
1013 | ya
1014 | yadi
1015 | yah
1016 | yaha
1017 | yahaan
1018 | yahan
1019 | yahi
1020 | yahin
1021 | ye
1022 | yeah
1023 | yeh
1024 | yehi
1025 | yes
1026 | yet
1027 | you
1028 | you'd
1029 | you'll
1030 | your
1031 | you're
1032 | yours
1033 | yourself
1034 | yourselves
1035 | you've
1036 | yup
1037 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/hungarian:
--------------------------------------------------------------------------------
1 | a
2 | ahogy
3 | ahol
4 | aki
5 | akik
6 | akkor
7 | alatt
8 | által
9 | általában
10 | amely
11 | amelyek
12 | amelyekben
13 | amelyeket
14 | amelyet
15 | amelynek
16 | ami
17 | amit
18 | amolyan
19 | amíg
20 | amikor
21 | át
22 | abban
23 | ahhoz
24 | annak
25 | arra
26 | arról
27 | az
28 | azok
29 | azon
30 | azt
31 | azzal
32 | azért
33 | aztán
34 | azután
35 | azonban
36 | bár
37 | be
38 | belül
39 | benne
40 | cikk
41 | cikkek
42 | cikkeket
43 | csak
44 | de
45 | e
46 | eddig
47 | egész
48 | egy
49 | egyes
50 | egyetlen
51 | egyéb
52 | egyik
53 | egyre
54 | ekkor
55 | el
56 | elég
57 | ellen
58 | elõ
59 | elõször
60 | elõtt
61 | elsõ
62 | én
63 | éppen
64 | ebben
65 | ehhez
66 | emilyen
67 | ennek
68 | erre
69 | ez
70 | ezt
71 | ezek
72 | ezen
73 | ezzel
74 | ezért
75 | és
76 | fel
77 | felé
78 | hanem
79 | hiszen
80 | hogy
81 | hogyan
82 | igen
83 | így
84 | illetve
85 | ill.
86 | ill
87 | ilyen
88 | ilyenkor
89 | ison
90 | ismét
91 | itt
92 | jó
93 | jól
94 | jobban
95 | kell
96 | kellett
97 | keresztül
98 | keressünk
99 | ki
100 | kívül
101 | között
102 | közül
103 | legalább
104 | lehet
105 | lehetett
106 | legyen
107 | lenne
108 | lenni
109 | lesz
110 | lett
111 | maga
112 | magát
113 | majd
114 | majd
115 | már
116 | más
117 | másik
118 | meg
119 | még
120 | mellett
121 | mert
122 | mely
123 | melyek
124 | mi
125 | mit
126 | míg
127 | miért
128 | milyen
129 | mikor
130 | minden
131 | mindent
132 | mindenki
133 | mindig
134 | mint
135 | mintha
136 | mivel
137 | most
138 | nagy
139 | nagyobb
140 | nagyon
141 | ne
142 | néha
143 | nekem
144 | neki
145 | nem
146 | néhány
147 | nélkül
148 | nincs
149 | olyan
150 | ott
151 | össze
152 | õ
153 | õk
154 | õket
155 | pedig
156 | persze
157 | rá
158 | s
159 | saját
160 | sem
161 | semmi
162 | sok
163 | sokat
164 | sokkal
165 | számára
166 | szemben
167 | szerint
168 | szinte
169 | talán
170 | tehát
171 | teljes
172 | tovább
173 | továbbá
174 | több
175 | úgy
176 | ugyanis
177 | új
178 | újabb
179 | újra
180 | után
181 | utána
182 | utolsó
183 | vagy
184 | vagyis
185 | valaki
186 | valami
187 | valamint
188 | való
189 | vagyok
190 | van
191 | vannak
192 | volt
193 | voltam
194 | voltak
195 | voltunk
196 | vissza
197 | vele
198 | viszont
199 | volna
200 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/indonesian:
--------------------------------------------------------------------------------
1 | ada
2 | adalah
3 | adanya
4 | adapun
5 | agak
6 | agaknya
7 | agar
8 | akan
9 | akankah
10 | akhir
11 | akhiri
12 | akhirnya
13 | aku
14 | akulah
15 | amat
16 | amatlah
17 | anda
18 | andalah
19 | antar
20 | antara
21 | antaranya
22 | apa
23 | apaan
24 | apabila
25 | apakah
26 | apalagi
27 | apatah
28 | artinya
29 | asal
30 | asalkan
31 | atas
32 | atau
33 | ataukah
34 | ataupun
35 | awal
36 | awalnya
37 | bagai
38 | bagaikan
39 | bagaimana
40 | bagaimanakah
41 | bagaimanapun
42 | bagi
43 | bagian
44 | bahkan
45 | bahwa
46 | bahwasanya
47 | baik
48 | bakal
49 | bakalan
50 | balik
51 | banyak
52 | bapak
53 | baru
54 | bawah
55 | beberapa
56 | begini
57 | beginian
58 | beginikah
59 | beginilah
60 | begitu
61 | begitukah
62 | begitulah
63 | begitupun
64 | bekerja
65 | belakang
66 | belakangan
67 | belum
68 | belumlah
69 | benar
70 | benarkah
71 | benarlah
72 | berada
73 | berakhir
74 | berakhirlah
75 | berakhirnya
76 | berapa
77 | berapakah
78 | berapalah
79 | berapapun
80 | berarti
81 | berawal
82 | berbagai
83 | berdatangan
84 | beri
85 | berikan
86 | berikut
87 | berikutnya
88 | berjumlah
89 | berkali-kali
90 | berkata
91 | berkehendak
92 | berkeinginan
93 | berkenaan
94 | berlainan
95 | berlalu
96 | berlangsung
97 | berlebihan
98 | bermacam
99 | bermacam-macam
100 | bermaksud
101 | bermula
102 | bersama
103 | bersama-sama
104 | bersiap
105 | bersiap-siap
106 | bertanya
107 | bertanya-tanya
108 | berturut
109 | berturut-turut
110 | bertutur
111 | berujar
112 | berupa
113 | besar
114 | betul
115 | betulkah
116 | biasa
117 | biasanya
118 | bila
119 | bilakah
120 | bisa
121 | bisakah
122 | boleh
123 | bolehkah
124 | bolehlah
125 | buat
126 | bukan
127 | bukankah
128 | bukanlah
129 | bukannya
130 | bulan
131 | bung
132 | cara
133 | caranya
134 | cukup
135 | cukupkah
136 | cukuplah
137 | cuma
138 | dahulu
139 | dalam
140 | dan
141 | dapat
142 | dari
143 | daripada
144 | datang
145 | dekat
146 | demi
147 | demikian
148 | demikianlah
149 | dengan
150 | depan
151 | di
152 | dia
153 | diakhiri
154 | diakhirinya
155 | dialah
156 | diantara
157 | diantaranya
158 | diberi
159 | diberikan
160 | diberikannya
161 | dibuat
162 | dibuatnya
163 | didapat
164 | didatangkan
165 | digunakan
166 | diibaratkan
167 | diibaratkannya
168 | diingat
169 | diingatkan
170 | diinginkan
171 | dijawab
172 | dijelaskan
173 | dijelaskannya
174 | dikarenakan
175 | dikatakan
176 | dikatakannya
177 | dikerjakan
178 | diketahui
179 | diketahuinya
180 | dikira
181 | dilakukan
182 | dilalui
183 | dilihat
184 | dimaksud
185 | dimaksudkan
186 | dimaksudkannya
187 | dimaksudnya
188 | diminta
189 | dimintai
190 | dimisalkan
191 | dimulai
192 | dimulailah
193 | dimulainya
194 | dimungkinkan
195 | dini
196 | dipastikan
197 | diperbuat
198 | diperbuatnya
199 | dipergunakan
200 | diperkirakan
201 | diperlihatkan
202 | diperlukan
203 | diperlukannya
204 | dipersoalkan
205 | dipertanyakan
206 | dipunyai
207 | diri
208 | dirinya
209 | disampaikan
210 | disebut
211 | disebutkan
212 | disebutkannya
213 | disini
214 | disinilah
215 | ditambahkan
216 | ditandaskan
217 | ditanya
218 | ditanyai
219 | ditanyakan
220 | ditegaskan
221 | ditujukan
222 | ditunjuk
223 | ditunjuki
224 | ditunjukkan
225 | ditunjukkannya
226 | ditunjuknya
227 | dituturkan
228 | dituturkannya
229 | diucapkan
230 | diucapkannya
231 | diungkapkan
232 | dong
233 | dua
234 | dulu
235 | empat
236 | enggak
237 | enggaknya
238 | entah
239 | entahlah
240 | guna
241 | gunakan
242 | hal
243 | hampir
244 | hanya
245 | hanyalah
246 | hari
247 | harus
248 | haruslah
249 | harusnya
250 | hendak
251 | hendaklah
252 | hendaknya
253 | hingga
254 | ia
255 | ialah
256 | ibarat
257 | ibaratkan
258 | ibaratnya
259 | ibu
260 | ikut
261 | ingat
262 | ingat-ingat
263 | ingin
264 | inginkah
265 | inginkan
266 | ini
267 | inikah
268 | inilah
269 | itu
270 | itukah
271 | itulah
272 | jadi
273 | jadilah
274 | jadinya
275 | jangan
276 | jangankan
277 | janganlah
278 | jauh
279 | jawab
280 | jawaban
281 | jawabnya
282 | jelas
283 | jelaskan
284 | jelaslah
285 | jelasnya
286 | jika
287 | jikalau
288 | juga
289 | jumlah
290 | jumlahnya
291 | justru
292 | kala
293 | kalau
294 | kalaulah
295 | kalaupun
296 | kalian
297 | kami
298 | kamilah
299 | kamu
300 | kamulah
301 | kan
302 | kapan
303 | kapankah
304 | kapanpun
305 | karena
306 | karenanya
307 | kasus
308 | kata
309 | katakan
310 | katakanlah
311 | katanya
312 | ke
313 | keadaan
314 | kebetulan
315 | kecil
316 | kedua
317 | keduanya
318 | keinginan
319 | kelamaan
320 | kelihatan
321 | kelihatannya
322 | kelima
323 | keluar
324 | kembali
325 | kemudian
326 | kemungkinan
327 | kemungkinannya
328 | kenapa
329 | kepada
330 | kepadanya
331 | kesampaian
332 | keseluruhan
333 | keseluruhannya
334 | keterlaluan
335 | ketika
336 | khususnya
337 | kini
338 | kinilah
339 | kira
340 | kira-kira
341 | kiranya
342 | kita
343 | kitalah
344 | kok
345 | kurang
346 | lagi
347 | lagian
348 | lah
349 | lain
350 | lainnya
351 | lalu
352 | lama
353 | lamanya
354 | lanjut
355 | lanjutnya
356 | lebih
357 | lewat
358 | lima
359 | luar
360 | macam
361 | maka
362 | makanya
363 | makin
364 | malah
365 | malahan
366 | mampu
367 | mampukah
368 | mana
369 | manakala
370 | manalagi
371 | masa
372 | masalah
373 | masalahnya
374 | masih
375 | masihkah
376 | masing
377 | masing-masing
378 | mau
379 | maupun
380 | melainkan
381 | melakukan
382 | melalui
383 | melihat
384 | melihatnya
385 | memang
386 | memastikan
387 | memberi
388 | memberikan
389 | membuat
390 | memerlukan
391 | memihak
392 | meminta
393 | memintakan
394 | memisalkan
395 | memperbuat
396 | mempergunakan
397 | memperkirakan
398 | memperlihatkan
399 | mempersiapkan
400 | mempersoalkan
401 | mempertanyakan
402 | mempunyai
403 | memulai
404 | memungkinkan
405 | menaiki
406 | menambahkan
407 | menandaskan
408 | menanti
409 | menanti-nanti
410 | menantikan
411 | menanya
412 | menanyai
413 | menanyakan
414 | mendapat
415 | mendapatkan
416 | mendatang
417 | mendatangi
418 | mendatangkan
419 | menegaskan
420 | mengakhiri
421 | mengapa
422 | mengatakan
423 | mengatakannya
424 | mengenai
425 | mengerjakan
426 | mengetahui
427 | menggunakan
428 | menghendaki
429 | mengibaratkan
430 | mengibaratkannya
431 | mengingat
432 | mengingatkan
433 | menginginkan
434 | mengira
435 | mengucapkan
436 | mengucapkannya
437 | mengungkapkan
438 | menjadi
439 | menjawab
440 | menjelaskan
441 | menuju
442 | menunjuk
443 | menunjuki
444 | menunjukkan
445 | menunjuknya
446 | menurut
447 | menuturkan
448 | menyampaikan
449 | menyangkut
450 | menyatakan
451 | menyebutkan
452 | menyeluruh
453 | menyiapkan
454 | merasa
455 | mereka
456 | merekalah
457 | merupakan
458 | meski
459 | meskipun
460 | meyakini
461 | meyakinkan
462 | minta
463 | mirip
464 | misal
465 | misalkan
466 | misalnya
467 | mula
468 | mulai
469 | mulailah
470 | mulanya
471 | mungkin
472 | mungkinkah
473 | nah
474 | naik
475 | namun
476 | nanti
477 | nantinya
478 | nyaris
479 | nyatanya
480 | oleh
481 | olehnya
482 | pada
483 | padahal
484 | padanya
485 | pak
486 | paling
487 | panjang
488 | pantas
489 | para
490 | pasti
491 | pastilah
492 | penting
493 | pentingnya
494 | per
495 | percuma
496 | perlu
497 | perlukah
498 | perlunya
499 | pernah
500 | persoalan
501 | pertama
502 | pertama-tama
503 | pertanyaan
504 | pertanyakan
505 | pihak
506 | pihaknya
507 | pukul
508 | pula
509 | pun
510 | punya
511 | rasa
512 | rasanya
513 | rata
514 | rupanya
515 | saat
516 | saatnya
517 | saja
518 | sajalah
519 | saling
520 | sama
521 | sama-sama
522 | sambil
523 | sampai
524 | sampai-sampai
525 | sampaikan
526 | sana
527 | sangat
528 | sangatlah
529 | satu
530 | saya
531 | sayalah
532 | se
533 | sebab
534 | sebabnya
535 | sebagai
536 | sebagaimana
537 | sebagainya
538 | sebagian
539 | sebaik
540 | sebaik-baiknya
541 | sebaiknya
542 | sebaliknya
543 | sebanyak
544 | sebegini
545 | sebegitu
546 | sebelum
547 | sebelumnya
548 | sebenarnya
549 | seberapa
550 | sebesar
551 | sebetulnya
552 | sebisanya
553 | sebuah
554 | sebut
555 | sebutlah
556 | sebutnya
557 | secara
558 | secukupnya
559 | sedang
560 | sedangkan
561 | sedemikian
562 | sedikit
563 | sedikitnya
564 | seenaknya
565 | segala
566 | segalanya
567 | segera
568 | seharusnya
569 | sehingga
570 | seingat
571 | sejak
572 | sejauh
573 | sejenak
574 | sejumlah
575 | sekadar
576 | sekadarnya
577 | sekali
578 | sekali-kali
579 | sekalian
580 | sekaligus
581 | sekalipun
582 | sekarang
583 | sekarang
584 | sekecil
585 | seketika
586 | sekiranya
587 | sekitar
588 | sekitarnya
589 | sekurang-kurangnya
590 | sekurangnya
591 | sela
592 | selain
593 | selaku
594 | selalu
595 | selama
596 | selama-lamanya
597 | selamanya
598 | selanjutnya
599 | seluruh
600 | seluruhnya
601 | semacam
602 | semakin
603 | semampu
604 | semampunya
605 | semasa
606 | semasih
607 | semata
608 | semata-mata
609 | semaunya
610 | sementara
611 | semisal
612 | semisalnya
613 | sempat
614 | semua
615 | semuanya
616 | semula
617 | sendiri
618 | sendirian
619 | sendirinya
620 | seolah
621 | seolah-olah
622 | seorang
623 | sepanjang
624 | sepantasnya
625 | sepantasnyalah
626 | seperlunya
627 | seperti
628 | sepertinya
629 | sepihak
630 | sering
631 | seringnya
632 | serta
633 | serupa
634 | sesaat
635 | sesama
636 | sesampai
637 | sesegera
638 | sesekali
639 | seseorang
640 | sesuatu
641 | sesuatunya
642 | sesudah
643 | sesudahnya
644 | setelah
645 | setempat
646 | setengah
647 | seterusnya
648 | setiap
649 | setiba
650 | setibanya
651 | setidak-tidaknya
652 | setidaknya
653 | setinggi
654 | seusai
655 | sewaktu
656 | siap
657 | siapa
658 | siapakah
659 | siapapun
660 | sini
661 | sinilah
662 | soal
663 | soalnya
664 | suatu
665 | sudah
666 | sudahkah
667 | sudahlah
668 | supaya
669 | tadi
670 | tadinya
671 | tahu
672 | tahun
673 | tak
674 | tambah
675 | tambahnya
676 | tampak
677 | tampaknya
678 | tandas
679 | tandasnya
680 | tanpa
681 | tanya
682 | tanyakan
683 | tanyanya
684 | tapi
685 | tegas
686 | tegasnya
687 | telah
688 | tempat
689 | tengah
690 | tentang
691 | tentu
692 | tentulah
693 | tentunya
694 | tepat
695 | terakhir
696 | terasa
697 | terbanyak
698 | terdahulu
699 | terdapat
700 | terdiri
701 | terhadap
702 | terhadapnya
703 | teringat
704 | teringat-ingat
705 | terjadi
706 | terjadilah
707 | terjadinya
708 | terkira
709 | terlalu
710 | terlebih
711 | terlihat
712 | termasuk
713 | ternyata
714 | tersampaikan
715 | tersebut
716 | tersebutlah
717 | tertentu
718 | tertuju
719 | terus
720 | terutama
721 | tetap
722 | tetapi
723 | tiap
724 | tiba
725 | tiba-tiba
726 | tidak
727 | tidakkah
728 | tidaklah
729 | tiga
730 | tinggi
731 | toh
732 | tunjuk
733 | turut
734 | tutur
735 | tuturnya
736 | ucap
737 | ucapnya
738 | ujar
739 | ujarnya
740 | umum
741 | umumnya
742 | ungkap
743 | ungkapnya
744 | untuk
745 | usah
746 | usai
747 | waduh
748 | wah
749 | wahai
750 | waktu
751 | waktunya
752 | walau
753 | walaupun
754 | wong
755 | yaitu
756 | yakin
757 | yakni
758 | yang
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/italian:
--------------------------------------------------------------------------------
1 | ad
2 | al
3 | allo
4 | ai
5 | agli
6 | all
7 | agl
8 | alla
9 | alle
10 | con
11 | col
12 | coi
13 | da
14 | dal
15 | dallo
16 | dai
17 | dagli
18 | dall
19 | dagl
20 | dalla
21 | dalle
22 | di
23 | del
24 | dello
25 | dei
26 | degli
27 | dell
28 | degl
29 | della
30 | delle
31 | in
32 | nel
33 | nello
34 | nei
35 | negli
36 | nell
37 | negl
38 | nella
39 | nelle
40 | su
41 | sul
42 | sullo
43 | sui
44 | sugli
45 | sull
46 | sugl
47 | sulla
48 | sulle
49 | per
50 | tra
51 | contro
52 | io
53 | tu
54 | lui
55 | lei
56 | noi
57 | voi
58 | loro
59 | mio
60 | mia
61 | miei
62 | mie
63 | tuo
64 | tua
65 | tuoi
66 | tue
67 | suo
68 | sua
69 | suoi
70 | sue
71 | nostro
72 | nostra
73 | nostri
74 | nostre
75 | vostro
76 | vostra
77 | vostri
78 | vostre
79 | mi
80 | ti
81 | ci
82 | vi
83 | lo
84 | la
85 | li
86 | le
87 | gli
88 | ne
89 | il
90 | un
91 | uno
92 | una
93 | ma
94 | ed
95 | se
96 | perché
97 | anche
98 | come
99 | dov
100 | dove
101 | che
102 | chi
103 | cui
104 | non
105 | più
106 | quale
107 | quanto
108 | quanti
109 | quanta
110 | quante
111 | quello
112 | quelli
113 | quella
114 | quelle
115 | questo
116 | questi
117 | questa
118 | queste
119 | si
120 | tutto
121 | tutti
122 | a
123 | c
124 | e
125 | i
126 | l
127 | o
128 | ho
129 | hai
130 | ha
131 | abbiamo
132 | avete
133 | hanno
134 | abbia
135 | abbiate
136 | abbiano
137 | avrò
138 | avrai
139 | avrà
140 | avremo
141 | avrete
142 | avranno
143 | avrei
144 | avresti
145 | avrebbe
146 | avremmo
147 | avreste
148 | avrebbero
149 | avevo
150 | avevi
151 | aveva
152 | avevamo
153 | avevate
154 | avevano
155 | ebbi
156 | avesti
157 | ebbe
158 | avemmo
159 | aveste
160 | ebbero
161 | avessi
162 | avesse
163 | avessimo
164 | avessero
165 | avendo
166 | avuto
167 | avuta
168 | avuti
169 | avute
170 | sono
171 | sei
172 | è
173 | siamo
174 | siete
175 | sia
176 | siate
177 | siano
178 | sarò
179 | sarai
180 | sarà
181 | saremo
182 | sarete
183 | saranno
184 | sarei
185 | saresti
186 | sarebbe
187 | saremmo
188 | sareste
189 | sarebbero
190 | ero
191 | eri
192 | era
193 | eravamo
194 | eravate
195 | erano
196 | fui
197 | fosti
198 | fu
199 | fummo
200 | foste
201 | furono
202 | fossi
203 | fosse
204 | fossimo
205 | fossero
206 | essendo
207 | faccio
208 | fai
209 | facciamo
210 | fanno
211 | faccia
212 | facciate
213 | facciano
214 | farò
215 | farai
216 | farà
217 | faremo
218 | farete
219 | faranno
220 | farei
221 | faresti
222 | farebbe
223 | faremmo
224 | fareste
225 | farebbero
226 | facevo
227 | facevi
228 | faceva
229 | facevamo
230 | facevate
231 | facevano
232 | feci
233 | facesti
234 | fece
235 | facemmo
236 | faceste
237 | fecero
238 | facessi
239 | facesse
240 | facessimo
241 | facessero
242 | facendo
243 | sto
244 | stai
245 | sta
246 | stiamo
247 | stanno
248 | stia
249 | stiate
250 | stiano
251 | starò
252 | starai
253 | starà
254 | staremo
255 | starete
256 | staranno
257 | starei
258 | staresti
259 | starebbe
260 | staremmo
261 | stareste
262 | starebbero
263 | stavo
264 | stavi
265 | stava
266 | stavamo
267 | stavate
268 | stavano
269 | stetti
270 | stesti
271 | stette
272 | stemmo
273 | steste
274 | stettero
275 | stessi
276 | stesse
277 | stessimo
278 | stessero
279 | stando
280 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/kazakh:
--------------------------------------------------------------------------------
1 | ах
2 | ох
3 | эх
4 | ай
5 | эй
6 | ой
7 | тағы
8 | тағыда
9 | әрине
10 | жоқ
11 | сондай
12 | осындай
13 | осылай
14 | солай
15 | мұндай
16 | бұндай
17 | мен
18 | сен
19 | ол
20 | біз
21 | біздер
22 | олар
23 | сіз
24 | сіздер
25 | маған
26 | оған
27 | саған
28 | біздің
29 | сіздің
30 | оның
31 | бізге
32 | сізге
33 | оларға
34 | біздерге
35 | сіздерге
36 | оларға
37 | менімен
38 | сенімен
39 | онымен
40 | бізбен
41 | сізбен
42 | олармен
43 | біздермен
44 | сіздермен
45 | менің
46 | сенің
47 | біздің
48 | сіздің
49 | оның
50 | біздердің
51 | сіздердің
52 | олардың
53 | маған
54 | саған
55 | оған
56 | менен
57 | сенен
58 | одан
59 | бізден
60 | сізден
61 | олардан
62 | біздерден
63 | сіздерден
64 | олардан
65 | айтпақшы
66 | сонымен
67 | сондықтан
68 | бұл
69 | осы
70 | сол
71 | анау
72 | мынау
73 | сонау
74 | осынау
75 | ана
76 | мына
77 | сона
78 | әні
79 | міне
80 | өй
81 | үйт
82 | бүйт
83 | біреу
84 | кейбіреу
85 | кейбір
86 | қайсыбір
87 | әрбір
88 | бірнеше
89 | бірдеме
90 | бірнеше
91 | әркім
92 | әрне
93 | әрқайсы
94 | әрқалай
95 | әлдекім
96 | әлдене
97 | әлдеқайдан
98 | әлденеше
99 | әлдеқалай
100 | әлдеқашан
101 | алдақашан
102 | еш
103 | ешкім
104 | ешбір
105 | ештеме
106 | дәнеңе
107 | ешқашан
108 | ешқандай
109 | ешқайсы
110 | емес
111 | бәрі
112 | барлық
113 | барша
114 | бар
115 | күллі
116 | бүкіл
117 | түгел
118 | өз
119 | өзім
120 | өзің
121 | өзінің
122 | өзіме
123 | өзіне
124 | өзімнің
125 | өзі
126 | өзге
127 | менде
128 | сенде
129 | онда
130 | менен
131 | сенен онан
132 | одан
133 | ау
134 | па
135 | ей
136 | әй
137 | е
138 | уа
139 | уау
140 | уай
141 | я
142 | пай
143 | ә
144 | о
145 | оһо
146 | ой
147 | ие
148 | аһа
149 | ау
150 | беу
151 | мәссаған
152 | бәрекелді
153 | әттегенай
154 | жаракімалла
155 | масқарай
156 | астапыралла
157 | япырмай
158 | ойпырмай
159 | кәне
160 | кәнеки
161 | ал
162 | әйда
163 | кәні
164 | міне
165 | әні
166 | сорап
167 | қош-қош
168 | пфша
169 | пішә
170 | құрау-құрау
171 | шәйт
172 | шек
173 | моһ
174 | тәк
175 | құрау
176 | құр
177 | кә
178 | кәһ
179 | күшім
180 | күшім
181 | мышы
182 | пырс
183 | әукім
184 | алақай
185 | паһ-паһ
186 | бәрекелді
187 | ура
188 | әттең
189 | әттеген-ай
190 | қап
191 | түге
192 | пішту
193 | шіркін
194 | алатау
195 | пай-пай
196 | үшін
197 | сайын
198 | сияқты
199 | туралы
200 | арқылы
201 | бойы
202 | бойымен
203 | шамалы
204 | шақты
205 | қаралы
206 | ғұрлы
207 | ғұрлым
208 | шейін
209 | дейін
210 | қарай
211 | таман
212 | салым
213 | тарта
214 | жуық
215 | таяу
216 | гөрі
217 | бері
218 | кейін
219 | соң
220 | бұрын
221 | бетер
222 | қатар
223 | бірге
224 | қоса
225 | арс
226 |
227 | гүрс
228 |
229 | дүрс
230 |
231 | қорс
232 |
233 | тарс
234 |
235 | тырс
236 |
237 | ырс
238 |
239 | барқ
240 |
241 | борт
242 |
243 | күрт
244 |
245 | кірт
246 |
247 | морт
248 |
249 | сарт
250 |
251 | шырт
252 |
253 | дүңк
254 |
255 | күңк
256 |
257 | қыңқ
258 |
259 | мыңқ
260 |
261 | маңқ
262 |
263 | саңқ
264 |
265 | шаңқ
266 |
267 | шіңк
268 |
269 | сыңқ
270 |
271 | таңқ
272 |
273 | тыңқ
274 |
275 | ыңқ
276 |
277 | болп
278 |
279 | былп
280 |
281 | жалп
282 |
283 | желп
284 |
285 | қолп
286 |
287 | ірк
288 |
289 | ырқ
290 |
291 | сарт-сұрт
292 |
293 | тарс-тұрс
294 |
295 | арс-ұрс
296 |
297 | жалт-жалт
298 |
299 | жалт-жұлт
300 |
301 | қалт-қалт
302 |
303 | қалт-құлт
304 |
305 | қаңқ-қаңқ
306 |
307 | қаңқ-құңқ
308 |
309 | шаңқ-шаңқ
310 |
311 | шаңқ-шұңқ
312 |
313 | арбаң-арбаң
314 |
315 | бүгжең-бүгжең
316 |
317 | арсалаң-арсалаң
318 |
319 | ербелең-ербелең
320 |
321 | батыр-бұтыр
322 |
323 | далаң-далаң
324 |
325 | тарбаң-тарбаң
326 |
327 | қызараң-қызараң
328 |
329 | қаңғыр-күңгір
330 |
331 | қайқаң-құйқаң
332 |
333 | митың-митың
334 |
335 | салаң-сұлаң
336 |
337 | ыржың-тыржың
338 | бірақ
339 | алайда
340 | дегенмен
341 | әйтпесе
342 | әйткенмен
343 | себебі
344 | өйткені
345 | сондықтан
346 | үшін
347 | сайын
348 | сияқты
349 | туралы
350 | арқылы
351 | бойы
352 | бойымен
353 | шамалы
354 | шақты
355 | қаралы
356 | ғұрлы
357 | ғұрлым
358 | гөрі
359 | бері
360 | кейін
361 | соң
362 | бұрын
363 | бетер
364 | қатар
365 | бірге
366 | қоса
367 | шейін
368 | дейін
369 | қарай
370 | таман
371 | салым
372 | тарта
373 | жуық
374 | таяу
375 | арнайы
376 | осындай
377 | ғана
378 | қана
379 | тек
380 | әншейін
381 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/nepali:
--------------------------------------------------------------------------------
1 | छ
2 | र
3 | पनि
4 | छन्
5 | लागि
6 | भएको
7 | गरेको
8 | भने
9 | गर्न
10 | गर्ने
11 | हो
12 | तथा
13 | यो
14 | रहेको
15 | उनले
16 | थियो
17 | हुने
18 | गरेका
19 | थिए
20 | गर्दै
21 | तर
22 | नै
23 | को
24 | मा
25 | हुन्
26 | भन्ने
27 | हुन
28 | गरी
29 | त
30 | हुन्छ
31 | अब
32 | के
33 | रहेका
34 | गरेर
35 | छैन
36 | दिए
37 | भए
38 | यस
39 | ले
40 | गर्नु
41 | औं
42 | सो
43 | त्यो
44 | कि
45 | जुन
46 | यी
47 | का
48 | गरि
49 | ती
50 | न
51 | छु
52 | छौं
53 | लाई
54 | नि
55 | उप
56 | अक्सर
57 | आदि
58 | कसरी
59 | क्रमशः
60 | चाले
61 | अगाडी
62 | अझै
63 | अनुसार
64 | अन्तर्गत
65 | अन्य
66 | अन्यत्र
67 | अन्यथा
68 | अरु
69 | अरुलाई
70 | अर्को
71 | अर्थात
72 | अर्थात्
73 | अलग
74 | आए
75 | आजको
76 | ओठ
77 | आत्म
78 | आफू
79 | आफूलाई
80 | आफ्नै
81 | आफ्नो
82 | आयो
83 | उदाहरण
84 | उनको
85 | उहालाई
86 | एउटै
87 | एक
88 | एकदम
89 | कतै
90 | कम से कम
91 | कसै
92 | कसैले
93 | कहाँबाट
94 | कहिलेकाहीं
95 | का
96 | किन
97 | किनभने
98 | कुनै
99 | कुरा
100 | कृपया
101 | केही
102 | कोही
103 | गए
104 | गरौं
105 | गर्छ
106 | गर्छु
107 | गर्नुपर्छ
108 | गयौ
109 | गैर
110 | चार
111 | चाहनुहुन्छ
112 | चाहन्छु
113 | चाहिए
114 | छू
115 | जताततै
116 | जब
117 | जबकि
118 | जसको
119 | जसबाट
120 | जसमा
121 | जसलाई
122 | जसले
123 | जस्तै
124 | जस्तो
125 | जस्तोसुकै
126 | जहाँ
127 | जान
128 | जाहिर
129 | जे
130 | जो
131 | ठीक
132 | तत्काल
133 | तदनुसार
134 | तपाईको
135 | तपाई
136 | पर्याप्त
137 | पहिले
138 | पहिलो
139 | पहिल्यै
140 | पाँच
141 | पाँचौं
142 | तल
143 | तापनी
144 | तिनी
145 | तिनीहरू
146 | तिनीहरुको
147 | तिनिहरुलाई
148 | तिमी
149 | तिर
150 | तीन
151 | तुरुन्तै
152 | तेस्रो
153 | तेस्कारण
154 | पूर्व
155 | प्रति
156 | प्रतेक
157 | प्लस
158 | फेरी
159 | बने
160 | त्सपछि
161 | त्सैले
162 | त्यहाँ
163 | थिएन
164 | दिनुभएको
165 | दिनुहुन्छ
166 | दुई
167 | देखि
168 | बरु
169 | बारे
170 | बाहिर
171 | देखिन्छ
172 | देखियो
173 | देखे
174 | देखेको
175 | देखेर
176 | दोस्रो
177 | धेरै
178 | नजिकै
179 | नत्र
180 | नयाँ
181 | निम्ति
182 | बाहेक
183 | बीच
184 | बीचमा
185 | भन
186 | निम्न
187 | निम्नानुसार
188 | निर्दिष्ट
189 | नौ
190 | पक्का
191 | पक्कै
192 | पछि
193 | पछिल्लो
194 | पटक
195 | पर्छ
196 | पर्थ्यो
197 | भन्छन्
198 | भन्
199 | भन्छु
200 | भन्दा
201 | भन्नुभयो
202 | भर
203 | भित्र
204 | भित्री
205 | म
206 | मलाई
207 | मात्र
208 | माथि
209 | मुख्य
210 | मेरो
211 | यति
212 | यथोचित
213 | यदि
214 | यद्यपि
215 | यसको
216 | यसपछि
217 | यसबाहेक
218 | यसरी
219 | यसो
220 | यस्तो
221 | यहाँ
222 | यहाँसम्म
223 | या
224 | रही
225 | राखे
226 | राख्छ
227 | राम्रो
228 | रूप
229 | लगभग
230 | वरीपरी
231 | वास्तवमा
232 | बिरुद्ध
233 | बिशेष
234 | सायद
235 | शायद
236 | संग
237 | संगै
238 | सक्छ
239 | सट्टा
240 | सधै
241 | सबै
242 | सबैलाई
243 | समय
244 | सम्भव
245 | सम्म
246 | सही
247 | साँच्चै
248 | सात
249 | साथ
250 | साथै
251 | सारा
252 | सोही
253 | स्पष्ट
254 | हरे
255 | हरेक
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/norwegian:
--------------------------------------------------------------------------------
1 | og
2 | i
3 | jeg
4 | det
5 | at
6 | en
7 | et
8 | den
9 | til
10 | er
11 | som
12 | på
13 | de
14 | med
15 | han
16 | av
17 | ikke
18 | ikkje
19 | der
20 | så
21 | var
22 | meg
23 | seg
24 | men
25 | ett
26 | har
27 | om
28 | vi
29 | min
30 | mitt
31 | ha
32 | hadde
33 | hun
34 | nå
35 | over
36 | da
37 | ved
38 | fra
39 | du
40 | ut
41 | sin
42 | dem
43 | oss
44 | opp
45 | man
46 | kan
47 | hans
48 | hvor
49 | eller
50 | hva
51 | skal
52 | selv
53 | sjøl
54 | her
55 | alle
56 | vil
57 | bli
58 | ble
59 | blei
60 | blitt
61 | kunne
62 | inn
63 | når
64 | være
65 | kom
66 | noen
67 | noe
68 | ville
69 | dere
70 | som
71 | deres
72 | kun
73 | ja
74 | etter
75 | ned
76 | skulle
77 | denne
78 | for
79 | deg
80 | si
81 | sine
82 | sitt
83 | mot
84 | å
85 | meget
86 | hvorfor
87 | dette
88 | disse
89 | uten
90 | hvordan
91 | ingen
92 | din
93 | ditt
94 | blir
95 | samme
96 | hvilken
97 | hvilke
98 | sånn
99 | inni
100 | mellom
101 | vår
102 | hver
103 | hvem
104 | vors
105 | hvis
106 | både
107 | bare
108 | enn
109 | fordi
110 | før
111 | mange
112 | også
113 | slik
114 | vært
115 | være
116 | båe
117 | begge
118 | siden
119 | dykk
120 | dykkar
121 | dei
122 | deira
123 | deires
124 | deim
125 | di
126 | då
127 | eg
128 | ein
129 | eit
130 | eitt
131 | elles
132 | honom
133 | hjå
134 | ho
135 | hoe
136 | henne
137 | hennar
138 | hennes
139 | hoss
140 | hossen
141 | ikkje
142 | ingi
143 | inkje
144 | korleis
145 | korso
146 | kva
147 | kvar
148 | kvarhelst
149 | kven
150 | kvi
151 | kvifor
152 | me
153 | medan
154 | mi
155 | mine
156 | mykje
157 | no
158 | nokon
159 | noka
160 | nokor
161 | noko
162 | nokre
163 | si
164 | sia
165 | sidan
166 | so
167 | somt
168 | somme
169 | um
170 | upp
171 | vere
172 | vore
173 | verte
174 | vort
175 | varte
176 | vart
177 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/portuguese:
--------------------------------------------------------------------------------
1 | a
2 | à
3 | ao
4 | aos
5 | aquela
6 | aquelas
7 | aquele
8 | aqueles
9 | aquilo
10 | as
11 | às
12 | até
13 | com
14 | como
15 | da
16 | das
17 | de
18 | dela
19 | delas
20 | dele
21 | deles
22 | depois
23 | do
24 | dos
25 | e
26 | é
27 | ela
28 | elas
29 | ele
30 | eles
31 | em
32 | entre
33 | era
34 | eram
35 | éramos
36 | essa
37 | essas
38 | esse
39 | esses
40 | esta
41 | está
42 | estamos
43 | estão
44 | estar
45 | estas
46 | estava
47 | estavam
48 | estávamos
49 | este
50 | esteja
51 | estejam
52 | estejamos
53 | estes
54 | esteve
55 | estive
56 | estivemos
57 | estiver
58 | estivera
59 | estiveram
60 | estivéramos
61 | estiverem
62 | estivermos
63 | estivesse
64 | estivessem
65 | estivéssemos
66 | estou
67 | eu
68 | foi
69 | fomos
70 | for
71 | fora
72 | foram
73 | fôramos
74 | forem
75 | formos
76 | fosse
77 | fossem
78 | fôssemos
79 | fui
80 | há
81 | haja
82 | hajam
83 | hajamos
84 | hão
85 | havemos
86 | haver
87 | hei
88 | houve
89 | houvemos
90 | houver
91 | houvera
92 | houverá
93 | houveram
94 | houvéramos
95 | houverão
96 | houverei
97 | houverem
98 | houveremos
99 | houveria
100 | houveriam
101 | houveríamos
102 | houvermos
103 | houvesse
104 | houvessem
105 | houvéssemos
106 | isso
107 | isto
108 | já
109 | lhe
110 | lhes
111 | mais
112 | mas
113 | me
114 | mesmo
115 | meu
116 | meus
117 | minha
118 | minhas
119 | muito
120 | na
121 | não
122 | nas
123 | nem
124 | no
125 | nos
126 | nós
127 | nossa
128 | nossas
129 | nosso
130 | nossos
131 | num
132 | numa
133 | o
134 | os
135 | ou
136 | para
137 | pela
138 | pelas
139 | pelo
140 | pelos
141 | por
142 | qual
143 | quando
144 | que
145 | quem
146 | são
147 | se
148 | seja
149 | sejam
150 | sejamos
151 | sem
152 | ser
153 | será
154 | serão
155 | serei
156 | seremos
157 | seria
158 | seriam
159 | seríamos
160 | seu
161 | seus
162 | só
163 | somos
164 | sou
165 | sua
166 | suas
167 | também
168 | te
169 | tem
170 | tém
171 | temos
172 | tenha
173 | tenham
174 | tenhamos
175 | tenho
176 | terá
177 | terão
178 | terei
179 | teremos
180 | teria
181 | teriam
182 | teríamos
183 | teu
184 | teus
185 | teve
186 | tinha
187 | tinham
188 | tínhamos
189 | tive
190 | tivemos
191 | tiver
192 | tivera
193 | tiveram
194 | tivéramos
195 | tiverem
196 | tivermos
197 | tivesse
198 | tivessem
199 | tivéssemos
200 | tu
201 | tua
202 | tuas
203 | um
204 | uma
205 | você
206 | vocês
207 | vos
208 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/romanian:
--------------------------------------------------------------------------------
1 | a
2 | abia
3 | acea
4 | aceasta
5 | această
6 | aceea
7 | aceeasi
8 | acei
9 | aceia
10 | acel
11 | acela
12 | acelasi
13 | acele
14 | acelea
15 | acest
16 | acesta
17 | aceste
18 | acestea
19 | acestei
20 | acestia
21 | acestui
22 | aceşti
23 | aceştia
24 | adica
25 | ai
26 | aia
27 | aibă
28 | aici
29 | al
30 | ala
31 | ale
32 | alea
33 | alt
34 | alta
35 | altceva
36 | altcineva
37 | alte
38 | altfel
39 | alti
40 | altii
41 | altul
42 | am
43 | anume
44 | apoi
45 | ar
46 | are
47 | as
48 | asa
49 | asta
50 | astea
51 | astfel
52 | asupra
53 | atare
54 | atat
55 | atata
56 | atatea
57 | atatia
58 | ati
59 | atit
60 | atita
61 | atitea
62 | atitia
63 | atunci
64 | au
65 | avea
66 | avem
67 | aveţi
68 | avut
69 | aş
70 | aţi
71 | ba
72 | ca
73 | cam
74 | cand
75 | care
76 | careia
77 | carora
78 | caruia
79 | cat
80 | catre
81 | ce
82 | cea
83 | ceea
84 | cei
85 | ceilalti
86 | cel
87 | cele
88 | celor
89 | ceva
90 | chiar
91 | ci
92 | cind
93 | cine
94 | cineva
95 | cit
96 | cita
97 | cite
98 | citeva
99 | citi
100 | citiva
101 | cu
102 | cui
103 | cum
104 | cumva
105 | cât
106 | câte
107 | câtva
108 | câţi
109 | cînd
110 | cît
111 | cîte
112 | cîtva
113 | cîţi
114 | că
115 | căci
116 | cărei
117 | căror
118 | cărui
119 | către
120 | da
121 | daca
122 | dacă
123 | dar
124 | dat
125 | dată
126 | dau
127 | de
128 | deasupra
129 | deci
130 | decit
131 | deja
132 | desi
133 | despre
134 | deşi
135 | din
136 | dintr
137 | dintr-
138 | dintre
139 | doar
140 | doi
141 | doilea
142 | două
143 | drept
144 | dupa
145 | după
146 | dă
147 | e
148 | ea
149 | ei
150 | el
151 | ele
152 | era
153 | eram
154 | este
155 | eu
156 | eşti
157 | face
158 | fara
159 | fata
160 | fel
161 | fi
162 | fie
163 | fiecare
164 | fii
165 | fim
166 | fiu
167 | fiţi
168 | foarte
169 | fost
170 | fără
171 | i
172 | ia
173 | iar
174 | ii
175 | il
176 | imi
177 | in
178 | inainte
179 | inapoi
180 | inca
181 | incit
182 | insa
183 | intr
184 | intre
185 | isi
186 | iti
187 | la
188 | le
189 | li
190 | lor
191 | lui
192 | lângă
193 | lîngă
194 | m
195 | ma
196 | mai
197 | mea
198 | mei
199 | mele
200 | mereu
201 | meu
202 | mi
203 | mie
204 | mine
205 | mod
206 | mult
207 | multa
208 | multe
209 | multi
210 | multă
211 | mulţi
212 | mâine
213 | mîine
214 | mă
215 | ne
216 | ni
217 | nici
218 | nimeni
219 | nimic
220 | niste
221 | nişte
222 | noastre
223 | noastră
224 | noi
225 | nostri
226 | nostru
227 | nou
228 | noua
229 | nouă
230 | noştri
231 | nu
232 | numai
233 | o
234 | or
235 | ori
236 | oricare
237 | orice
238 | oricine
239 | oricum
240 | oricând
241 | oricât
242 | oricînd
243 | oricît
244 | oriunde
245 | pai
246 | parca
247 | patra
248 | patru
249 | pe
250 | pentru
251 | peste
252 | pic
253 | pina
254 | poate
255 | pot
256 | prea
257 | prima
258 | primul
259 | prin
260 | printr-
261 | putini
262 | puţin
263 | puţina
264 | puţină
265 | până
266 | pînă
267 | sa
268 | sa-mi
269 | sa-ti
270 | sai
271 | sale
272 | sau
273 | se
274 | si
275 | sint
276 | sintem
277 | spate
278 | spre
279 | sub
280 | sunt
281 | suntem
282 | sunteţi
283 | sus
284 | să
285 | săi
286 | său
287 | t
288 | ta
289 | tale
290 | te
291 | ti
292 | tine
293 | toata
294 | toate
295 | toată
296 | tocmai
297 | tot
298 | toti
299 | totul
300 | totusi
301 | totuşi
302 | toţi
303 | trei
304 | treia
305 | treilea
306 | tu
307 | tuturor
308 | tăi
309 | tău
310 | u
311 | ul
312 | ului
313 | un
314 | una
315 | unde
316 | undeva
317 | unei
318 | uneia
319 | unele
320 | uneori
321 | unii
322 | unor
323 | unora
324 | unu
325 | unui
326 | unuia
327 | unul
328 | v
329 | va
330 | vi
331 | voastre
332 | voastră
333 | voi
334 | vom
335 | vor
336 | vostru
337 | vouă
338 | voştri
339 | vreo
340 | vreun
341 | vă
342 | zi
343 | zice
344 | îi
345 | îl
346 | îmi
347 | în
348 | îţi
349 | ăla
350 | ălea
351 | ăsta
352 | ăstea
353 | ăştia
354 | şi
355 | ţi
356 | ţie
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/russian:
--------------------------------------------------------------------------------
1 | и
2 | в
3 | во
4 | не
5 | что
6 | он
7 | на
8 | я
9 | с
10 | со
11 | как
12 | а
13 | то
14 | все
15 | она
16 | так
17 | его
18 | но
19 | да
20 | ты
21 | к
22 | у
23 | же
24 | вы
25 | за
26 | бы
27 | по
28 | только
29 | ее
30 | мне
31 | было
32 | вот
33 | от
34 | меня
35 | еще
36 | нет
37 | о
38 | из
39 | ему
40 | теперь
41 | когда
42 | даже
43 | ну
44 | вдруг
45 | ли
46 | если
47 | уже
48 | или
49 | ни
50 | быть
51 | был
52 | него
53 | до
54 | вас
55 | нибудь
56 | опять
57 | уж
58 | вам
59 | ведь
60 | там
61 | потом
62 | себя
63 | ничего
64 | ей
65 | может
66 | они
67 | тут
68 | где
69 | есть
70 | надо
71 | ней
72 | для
73 | мы
74 | тебя
75 | их
76 | чем
77 | была
78 | сам
79 | чтоб
80 | без
81 | будто
82 | чего
83 | раз
84 | тоже
85 | себе
86 | под
87 | будет
88 | ж
89 | тогда
90 | кто
91 | этот
92 | того
93 | потому
94 | этого
95 | какой
96 | совсем
97 | ним
98 | здесь
99 | этом
100 | один
101 | почти
102 | мой
103 | тем
104 | чтобы
105 | нее
106 | сейчас
107 | были
108 | куда
109 | зачем
110 | всех
111 | никогда
112 | можно
113 | при
114 | наконец
115 | два
116 | об
117 | другой
118 | хоть
119 | после
120 | над
121 | больше
122 | тот
123 | через
124 | эти
125 | нас
126 | про
127 | всего
128 | них
129 | какая
130 | много
131 | разве
132 | три
133 | эту
134 | моя
135 | впрочем
136 | хорошо
137 | свою
138 | этой
139 | перед
140 | иногда
141 | лучше
142 | чуть
143 | том
144 | нельзя
145 | такой
146 | им
147 | более
148 | всегда
149 | конечно
150 | всю
151 | между
152 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/spanish:
--------------------------------------------------------------------------------
1 | de
2 | la
3 | que
4 | el
5 | en
6 | y
7 | a
8 | los
9 | del
10 | se
11 | las
12 | por
13 | un
14 | para
15 | con
16 | no
17 | una
18 | su
19 | al
20 | lo
21 | como
22 | más
23 | pero
24 | sus
25 | le
26 | ya
27 | o
28 | este
29 | sí
30 | porque
31 | esta
32 | entre
33 | cuando
34 | muy
35 | sin
36 | sobre
37 | también
38 | me
39 | hasta
40 | hay
41 | donde
42 | quien
43 | desde
44 | todo
45 | nos
46 | durante
47 | todos
48 | uno
49 | les
50 | ni
51 | contra
52 | otros
53 | ese
54 | eso
55 | ante
56 | ellos
57 | e
58 | esto
59 | mí
60 | antes
61 | algunos
62 | qué
63 | unos
64 | yo
65 | otro
66 | otras
67 | otra
68 | él
69 | tanto
70 | esa
71 | estos
72 | mucho
73 | quienes
74 | nada
75 | muchos
76 | cual
77 | poco
78 | ella
79 | estar
80 | estas
81 | algunas
82 | algo
83 | nosotros
84 | mi
85 | mis
86 | tú
87 | te
88 | ti
89 | tu
90 | tus
91 | ellas
92 | nosotras
93 | vosotros
94 | vosotras
95 | os
96 | mío
97 | mía
98 | míos
99 | mías
100 | tuyo
101 | tuya
102 | tuyos
103 | tuyas
104 | suyo
105 | suya
106 | suyos
107 | suyas
108 | nuestro
109 | nuestra
110 | nuestros
111 | nuestras
112 | vuestro
113 | vuestra
114 | vuestros
115 | vuestras
116 | esos
117 | esas
118 | estoy
119 | estás
120 | está
121 | estamos
122 | estáis
123 | están
124 | esté
125 | estés
126 | estemos
127 | estéis
128 | estén
129 | estaré
130 | estarás
131 | estará
132 | estaremos
133 | estaréis
134 | estarán
135 | estaría
136 | estarías
137 | estaríamos
138 | estaríais
139 | estarían
140 | estaba
141 | estabas
142 | estábamos
143 | estabais
144 | estaban
145 | estuve
146 | estuviste
147 | estuvo
148 | estuvimos
149 | estuvisteis
150 | estuvieron
151 | estuviera
152 | estuvieras
153 | estuviéramos
154 | estuvierais
155 | estuvieran
156 | estuviese
157 | estuvieses
158 | estuviésemos
159 | estuvieseis
160 | estuviesen
161 | estando
162 | estado
163 | estada
164 | estados
165 | estadas
166 | estad
167 | he
168 | has
169 | ha
170 | hemos
171 | habéis
172 | han
173 | haya
174 | hayas
175 | hayamos
176 | hayáis
177 | hayan
178 | habré
179 | habrás
180 | habrá
181 | habremos
182 | habréis
183 | habrán
184 | habría
185 | habrías
186 | habríamos
187 | habríais
188 | habrían
189 | había
190 | habías
191 | habíamos
192 | habíais
193 | habían
194 | hube
195 | hubiste
196 | hubo
197 | hubimos
198 | hubisteis
199 | hubieron
200 | hubiera
201 | hubieras
202 | hubiéramos
203 | hubierais
204 | hubieran
205 | hubiese
206 | hubieses
207 | hubiésemos
208 | hubieseis
209 | hubiesen
210 | habiendo
211 | habido
212 | habida
213 | habidos
214 | habidas
215 | soy
216 | eres
217 | es
218 | somos
219 | sois
220 | son
221 | sea
222 | seas
223 | seamos
224 | seáis
225 | sean
226 | seré
227 | serás
228 | será
229 | seremos
230 | seréis
231 | serán
232 | sería
233 | serías
234 | seríamos
235 | seríais
236 | serían
237 | era
238 | eras
239 | éramos
240 | erais
241 | eran
242 | fui
243 | fuiste
244 | fue
245 | fuimos
246 | fuisteis
247 | fueron
248 | fuera
249 | fueras
250 | fuéramos
251 | fuerais
252 | fueran
253 | fuese
254 | fueses
255 | fuésemos
256 | fueseis
257 | fuesen
258 | sintiendo
259 | sentido
260 | sentida
261 | sentidos
262 | sentidas
263 | siente
264 | sentid
265 | tengo
266 | tienes
267 | tiene
268 | tenemos
269 | tenéis
270 | tienen
271 | tenga
272 | tengas
273 | tengamos
274 | tengáis
275 | tengan
276 | tendré
277 | tendrás
278 | tendrá
279 | tendremos
280 | tendréis
281 | tendrán
282 | tendría
283 | tendrías
284 | tendríamos
285 | tendríais
286 | tendrían
287 | tenía
288 | tenías
289 | teníamos
290 | teníais
291 | tenían
292 | tuve
293 | tuviste
294 | tuvo
295 | tuvimos
296 | tuvisteis
297 | tuvieron
298 | tuviera
299 | tuvieras
300 | tuviéramos
301 | tuvierais
302 | tuvieran
303 | tuviese
304 | tuvieses
305 | tuviésemos
306 | tuvieseis
307 | tuviesen
308 | teniendo
309 | tenido
310 | tenida
311 | tenidos
312 | tenidas
313 | tened
314 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/swedish:
--------------------------------------------------------------------------------
1 | och
2 | det
3 | att
4 | i
5 | en
6 | jag
7 | hon
8 | som
9 | han
10 | på
11 | den
12 | med
13 | var
14 | sig
15 | för
16 | så
17 | till
18 | är
19 | men
20 | ett
21 | om
22 | hade
23 | de
24 | av
25 | icke
26 | mig
27 | du
28 | henne
29 | då
30 | sin
31 | nu
32 | har
33 | inte
34 | hans
35 | honom
36 | skulle
37 | hennes
38 | där
39 | min
40 | man
41 | ej
42 | vid
43 | kunde
44 | något
45 | från
46 | ut
47 | när
48 | efter
49 | upp
50 | vi
51 | dem
52 | vara
53 | vad
54 | över
55 | än
56 | dig
57 | kan
58 | sina
59 | här
60 | ha
61 | mot
62 | alla
63 | under
64 | någon
65 | eller
66 | allt
67 | mycket
68 | sedan
69 | ju
70 | denna
71 | själv
72 | detta
73 | åt
74 | utan
75 | varit
76 | hur
77 | ingen
78 | mitt
79 | ni
80 | bli
81 | blev
82 | oss
83 | din
84 | dessa
85 | några
86 | deras
87 | blir
88 | mina
89 | samma
90 | vilken
91 | er
92 | sådan
93 | vår
94 | blivit
95 | dess
96 | inom
97 | mellan
98 | sådant
99 | varför
100 | varje
101 | vilka
102 | ditt
103 | vem
104 | vilket
105 | sitta
106 | sådana
107 | vart
108 | dina
109 | vars
110 | vårt
111 | våra
112 | ert
113 | era
114 | vilkas
115 |
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/tajik:
--------------------------------------------------------------------------------
1 | аз
2 | дар
3 | ба
4 | бо
5 | барои
6 | бе
7 | то
8 | ҷуз
9 | пеши
10 | назди
11 | рӯйи
12 | болои
13 | паси
14 | ғайри
15 | ҳамон
16 | ҳамоно
17 | инҷониб
18 | замон
19 | замоно
20 | эътиборан
21 | пеш
22 | қабл
23 | дида
24 | сар карда
25 | агар
26 | агар ки
27 | валекин
28 | ки
29 | лекин
30 | аммо
31 | вале
32 | балки
33 | ва
34 | ҳарчанд
35 | чунки
36 | зеро
37 | зеро ки
38 | вақте ки
39 | то вақте ки
40 | барои он ки
41 | бо нияти он ки
42 | лекин ва ҳол он ки
43 | ё
44 | ё ин ки
45 | бе он ки
46 | дар ҳолате ки
47 | то даме ки
48 | баъд аз он ки
49 | даме ки
50 | ба тразе ки
51 | аз баҳри он ки
52 | гар
53 | ар
54 | ба шарте
55 | азбаски
56 | модоме ки
57 | агар чи
58 | гарчанде ки
59 | бо вуҷуди он ки
60 | гӯё
61 | аз-баски
62 | чун-ки
63 | агар-чанд
64 | агар-чи
65 | гар-чи
66 | то ки
67 | чунон ки
68 | то даме ки
69 | ҳар қадар ки
70 | магар
71 | оё
72 | наход
73 | ҳатто
74 | ҳам
75 | бале
76 | оре
77 | хуб
78 | хуш
79 | хайр
80 | не
81 | на
82 | мана
83 | э
84 | фақат
85 | танҳо
86 | кошки
87 | мабодо
88 | ҳтимол
89 | ана ҳамин
90 | наход ки
91 | ҳатто ки
92 | аз афташ
93 | майлаш куя
94 | ана
95 | ҳа
96 | канӣ
97 | гӯё ки
98 | ҳо ана
99 | на ин ки
100 | ваҳ
101 | ҳой
102 | и
103 | а
104 | о
105 | эҳ
106 | ҳе
107 | ҳу
108 | аҳа
109 | оҳе
110 | уҳа
111 | ҳм
112 | нм
113 | оббо
114 | ӯббо
115 | ҳой-ҳой
116 | вой-вой
117 | ту-ту
118 | ҳмм
119 | эҳа
120 | тавба
121 | ӯҳӯ
122 | аҷабо
123 | ало
124 | аё
125 | ой
126 | ӯим
127 | ором
128 | хом?ш
129 | ҳай-ҳай
130 | бай-бай
131 | аз
132 | он
133 | баъд
134 | азбаски
135 | ӯ
136 | ҳангоми
137 | чӣ
138 | кадом
139 | ин
140 | ҷо
141 | ҳам
142 | ё ки
143 | бояд
144 | аст
145 | чанд
146 | ҳар
147 | бар
148 | чаро ки
149 | агар
150 | то кӣ
151 | бинобар
152 | бинобар ин
153 | ҳаргиз
154 | асло
155 | нахот
156 | нахот ки
157 | кошкӣ
158 | шояд
159 | шояд ки
160 | охир
161 | аз рӯи
162 | аз рӯйи
163 | рӯ
--------------------------------------------------------------------------------
/backend/nltk_data/corpora/stopwords/turkish:
--------------------------------------------------------------------------------
1 | acaba
2 | ama
3 | aslında
4 | az
5 | bazı
6 | belki
7 | biri
8 | birkaç
9 | birşey
10 | biz
11 | bu
12 | çok
13 | çünkü
14 | da
15 | daha
16 | de
17 | defa
18 | diye
19 | eğer
20 | en
21 | gibi
22 | hem
23 | hep
24 | hepsi
25 | her
26 | hiç
27 | için
28 | ile
29 | ise
30 | kez
31 | ki
32 | kim
33 | mı
34 | mu
35 | mü
36 | nasıl
37 | ne
38 | neden
39 | nerde
40 | nerede
41 | nereye
42 | niçin
43 | niye
44 | o
45 | sanki
46 | şey
47 | siz
48 | şu
49 | tüm
50 | ve
51 | veya
52 | ya
53 | yani
54 |
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/.DS_Store
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/README:
--------------------------------------------------------------------------------
1 | Pretrained Punkt Models -- Jan Strunk (New version trained after issues 313 and 514 had been corrected)
2 |
3 | Most models were prepared using the test corpora from Kiss and Strunk (2006). Additional models have
4 | been contributed by various people using NLTK for sentence boundary detection.
5 |
6 | For information about how to use these models, please confer the tokenization HOWTO:
7 | http://nltk.googlecode.com/svn/trunk/doc/howto/tokenize.html
8 | and chapter 3.8 of the NLTK book:
9 | http://nltk.googlecode.com/svn/trunk/doc/book/ch03.html#sec-segmentation
10 |
11 | There are pretrained tokenizers for the following languages:
12 |
13 | File Language Source Contents Size of training corpus(in tokens) Model contributed by
14 | =======================================================================================================================================================================
15 | czech.pickle Czech Multilingual Corpus 1 (ECI) Lidove Noviny ~345,000 Jan Strunk / Tibor Kiss
16 | Literarni Noviny
17 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
18 | danish.pickle Danish Avisdata CD-Rom Ver. 1.1. 1995 Berlingske Tidende ~550,000 Jan Strunk / Tibor Kiss
19 | (Berlingske Avisdata, Copenhagen) Weekend Avisen
20 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
21 | dutch.pickle Dutch Multilingual Corpus 1 (ECI) De Limburger ~340,000 Jan Strunk / Tibor Kiss
22 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
23 | english.pickle English Penn Treebank (LDC) Wall Street Journal ~469,000 Jan Strunk / Tibor Kiss
24 | (American)
25 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
26 | estonian.pickle Estonian University of Tartu, Estonia Eesti Ekspress ~359,000 Jan Strunk / Tibor Kiss
27 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
28 | finnish.pickle Finnish Finnish Parole Corpus, Finnish Books and major national ~364,000 Jan Strunk / Tibor Kiss
29 | Text Bank (Suomen Kielen newspapers
30 | Tekstipankki)
31 | Finnish Center for IT Science
32 | (CSC)
33 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
34 | french.pickle French Multilingual Corpus 1 (ECI) Le Monde ~370,000 Jan Strunk / Tibor Kiss
35 | (European)
36 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
37 | german.pickle German Neue Zürcher Zeitung AG Neue Zürcher Zeitung ~847,000 Jan Strunk / Tibor Kiss
38 | (Switzerland) CD-ROM
39 | (Uses "ss"
40 | instead of "ß")
41 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
42 | greek.pickle Greek Efstathios Stamatatos To Vima (TO BHMA) ~227,000 Jan Strunk / Tibor Kiss
43 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
44 | italian.pickle Italian Multilingual Corpus 1 (ECI) La Stampa, Il Mattino ~312,000 Jan Strunk / Tibor Kiss
45 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
46 | norwegian.pickle Norwegian Centre for Humanities Bergens Tidende ~479,000 Jan Strunk / Tibor Kiss
47 | (Bokmål and Information Technologies,
48 | Nynorsk) Bergen
49 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
50 | polish.pickle Polish Polish National Corpus Literature, newspapers, etc. ~1,000,000 Krzysztof Langner
51 | (http://www.nkjp.pl/)
52 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
53 | portuguese.pickle Portuguese CETENFolha Corpus Folha de São Paulo ~321,000 Jan Strunk / Tibor Kiss
54 | (Brazilian) (Linguateca)
55 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
56 | slovene.pickle Slovene TRACTOR Delo ~354,000 Jan Strunk / Tibor Kiss
57 | Slovene Academy for Arts
58 | and Sciences
59 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
60 | spanish.pickle Spanish Multilingual Corpus 1 (ECI) Sur ~353,000 Jan Strunk / Tibor Kiss
61 | (European)
62 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
63 | swedish.pickle Swedish Multilingual Corpus 1 (ECI) Dagens Nyheter ~339,000 Jan Strunk / Tibor Kiss
64 | (and some other texts)
65 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
66 | turkish.pickle Turkish METU Turkish Corpus Milliyet ~333,000 Jan Strunk / Tibor Kiss
67 | (Türkçe Derlem Projesi)
68 | University of Ankara
69 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
70 |
71 | The corpora contained about 400,000 tokens on average and mostly consisted of newspaper text converted to
72 | Unicode using the codecs module.
73 |
74 | Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence Boundary Detection.
75 | Computational Linguistics 32: 485-525.
76 |
77 | ---- Training Code ----
78 |
79 | # import punkt
80 | import nltk.tokenize.punkt
81 |
82 | # Make a new Tokenizer
83 | tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer()
84 |
85 | # Read in training corpus (one example: Slovene)
86 | import codecs
87 | text = codecs.open("slovene.plain","Ur","iso-8859-2").read()
88 |
89 | # Train tokenizer
90 | tokenizer.train(text)
91 |
92 | # Dump pickled tokenizer
93 | import pickle
94 | out = open("slovene.pickle","wb")
95 | pickle.dump(tokenizer, out)
96 | out.close()
97 |
98 | ---------
99 |
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/czech.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/czech.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/danish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/danish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/dutch.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/dutch.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/english.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/english.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/estonian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/estonian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/finnish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/finnish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/french.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/french.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/german.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/german.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/greek.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/greek.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/italian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/italian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/malayalam.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/malayalam.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/norwegian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/norwegian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/polish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/polish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/portuguese.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/portuguese.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/russian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/russian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/slovene.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/slovene.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/spanish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/spanish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/swedish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/swedish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/PY3/turkish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/PY3/turkish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/README:
--------------------------------------------------------------------------------
1 | Pretrained Punkt Models -- Jan Strunk (New version trained after issues 313 and 514 had been corrected)
2 |
3 | Most models were prepared using the test corpora from Kiss and Strunk (2006). Additional models have
4 | been contributed by various people using NLTK for sentence boundary detection.
5 |
6 | For information about how to use these models, please confer the tokenization HOWTO:
7 | http://nltk.googlecode.com/svn/trunk/doc/howto/tokenize.html
8 | and chapter 3.8 of the NLTK book:
9 | http://nltk.googlecode.com/svn/trunk/doc/book/ch03.html#sec-segmentation
10 |
11 | There are pretrained tokenizers for the following languages:
12 |
13 | File Language Source Contents Size of training corpus(in tokens) Model contributed by
14 | =======================================================================================================================================================================
15 | czech.pickle Czech Multilingual Corpus 1 (ECI) Lidove Noviny ~345,000 Jan Strunk / Tibor Kiss
16 | Literarni Noviny
17 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
18 | danish.pickle Danish Avisdata CD-Rom Ver. 1.1. 1995 Berlingske Tidende ~550,000 Jan Strunk / Tibor Kiss
19 | (Berlingske Avisdata, Copenhagen) Weekend Avisen
20 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
21 | dutch.pickle Dutch Multilingual Corpus 1 (ECI) De Limburger ~340,000 Jan Strunk / Tibor Kiss
22 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
23 | english.pickle English Penn Treebank (LDC) Wall Street Journal ~469,000 Jan Strunk / Tibor Kiss
24 | (American)
25 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
26 | estonian.pickle Estonian University of Tartu, Estonia Eesti Ekspress ~359,000 Jan Strunk / Tibor Kiss
27 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
28 | finnish.pickle Finnish Finnish Parole Corpus, Finnish Books and major national ~364,000 Jan Strunk / Tibor Kiss
29 | Text Bank (Suomen Kielen newspapers
30 | Tekstipankki)
31 | Finnish Center for IT Science
32 | (CSC)
33 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
34 | french.pickle French Multilingual Corpus 1 (ECI) Le Monde ~370,000 Jan Strunk / Tibor Kiss
35 | (European)
36 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
37 | german.pickle German Neue Zürcher Zeitung AG Neue Zürcher Zeitung ~847,000 Jan Strunk / Tibor Kiss
38 | (Switzerland) CD-ROM
39 | (Uses "ss"
40 | instead of "ß")
41 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
42 | greek.pickle Greek Efstathios Stamatatos To Vima (TO BHMA) ~227,000 Jan Strunk / Tibor Kiss
43 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
44 | italian.pickle Italian Multilingual Corpus 1 (ECI) La Stampa, Il Mattino ~312,000 Jan Strunk / Tibor Kiss
45 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
46 | norwegian.pickle Norwegian Centre for Humanities Bergens Tidende ~479,000 Jan Strunk / Tibor Kiss
47 | (Bokmål and Information Technologies,
48 | Nynorsk) Bergen
49 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
50 | polish.pickle Polish Polish National Corpus Literature, newspapers, etc. ~1,000,000 Krzysztof Langner
51 | (http://www.nkjp.pl/)
52 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
53 | portuguese.pickle Portuguese CETENFolha Corpus Folha de São Paulo ~321,000 Jan Strunk / Tibor Kiss
54 | (Brazilian) (Linguateca)
55 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
56 | slovene.pickle Slovene TRACTOR Delo ~354,000 Jan Strunk / Tibor Kiss
57 | Slovene Academy for Arts
58 | and Sciences
59 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
60 | spanish.pickle Spanish Multilingual Corpus 1 (ECI) Sur ~353,000 Jan Strunk / Tibor Kiss
61 | (European)
62 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
63 | swedish.pickle Swedish Multilingual Corpus 1 (ECI) Dagens Nyheter ~339,000 Jan Strunk / Tibor Kiss
64 | (and some other texts)
65 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
66 | turkish.pickle Turkish METU Turkish Corpus Milliyet ~333,000 Jan Strunk / Tibor Kiss
67 | (Türkçe Derlem Projesi)
68 | University of Ankara
69 | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
70 |
71 | The corpora contained about 400,000 tokens on average and mostly consisted of newspaper text converted to
72 | Unicode using the codecs module.
73 |
74 | Kiss, Tibor and Strunk, Jan (2006): Unsupervised Multilingual Sentence Boundary Detection.
75 | Computational Linguistics 32: 485-525.
76 |
77 | ---- Training Code ----
78 |
79 | # import punkt
80 | import nltk.tokenize.punkt
81 |
82 | # Make a new Tokenizer
83 | tokenizer = nltk.tokenize.punkt.PunktSentenceTokenizer()
84 |
85 | # Read in training corpus (one example: Slovene)
86 | import codecs
87 | text = codecs.open("slovene.plain","Ur","iso-8859-2").read()
88 |
89 | # Train tokenizer
90 | tokenizer.train(text)
91 |
92 | # Dump pickled tokenizer
93 | import pickle
94 | out = open("slovene.pickle","wb")
95 | pickle.dump(tokenizer, out)
96 | out.close()
97 |
98 | ---------
99 |
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/czech.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/czech.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/danish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/danish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/dutch.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/dutch.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/estonian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/estonian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/finnish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/finnish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/french.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/french.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/german.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/german.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/italian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/italian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/malayalam.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/malayalam.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/norwegian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/norwegian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/polish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/polish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/portuguese.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/portuguese.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/russian.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/russian.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/slovene.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/slovene.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/spanish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/spanish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/swedish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/swedish.pickle
--------------------------------------------------------------------------------
/backend/nltk_data/tokenizers/punkt/turkish.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/backend/nltk_data/tokenizers/punkt/turkish.pickle
--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | chromadb==0.4.22
2 | uvicorn==0.22.0
3 | rank-bm25==0.2.2
4 | nltk==3.8.1
5 | fastapi==0.108.0
6 | sse-starlette==1.5.0
7 | dateparser==1.2.0
8 | tiktoken==0.5.2
9 | pydantic==1.10.7
10 | PyJWT==2.3.0
11 | openai==1.7.0
12 | langchain==0.1.0
13 | watchdog==3.0.0
14 | jieba==0.42.1
--------------------------------------------------------------------------------
/backend/settings.py:
--------------------------------------------------------------------------------
1 | # ---Main Settings--- #
2 | NICK_NAME = 'Peter' # This is your nick name. Make sure to set it at the beginning and don't change so that LLM will not get confused.
3 |
4 | EMBEDDING_BASE_URL = 'https://api.openai.com/v1'
5 | EMBEDDING_API_KEY = 'your-api-key'
6 | EMBEDDING_MODEL_NAME = "ada"
7 |
8 | CHAT_BASE_URL = 'https://api.openai.com/v1' # Modify to your OpenAI compatible API url
9 | CHAT_API_KEY = 'your-api-key'
10 | CHAT_MODEL_NAME = "gpt-3.5-turbo"
11 | CHAT_MAX_TOKEN = 400
12 |
13 | # Path to the local directory of your Markdown notebook to store context information
14 | CHAT_PATH = '../md_website/chat_history'
15 | NOTE_PATH = '../md_website/notes'
16 |
17 | # If you're using an online Markdown notebook editor, set up this URL so you can click through to the notebook page in the "Reference".
18 | CHAT_URL = 'http://localhost:3000/chat_history/'
19 | NOTE_URL = 'http://localhost:3000/notes/'
20 |
21 | # MULTILPLE_SYSTEM_PROMPTS is used to adjust different LLM backend
22 | # Some backends may not support multiple system prompts
23 | # In this case set this parameter to False
24 | # If you don't know if multiple-system-prompts is supported
25 | # you can test by setting this parameter True and sees if there is no error during conversation
26 | # and if Loyal Elephie can still answer properly with provided context
27 | MULTILPLE_SYSTEM_PROMPTS = False
28 |
29 | # Language Preference (experimental)
30 | # Supported Languages: English, Chinese, German, French, Spanish, Portuguese, Italian, Dutch, Czech, Polish, Russian, Arabic
31 | LANGUAGE_PREFERENCE = "English"
32 |
33 | # ---Retrieval Settings--- #
34 | RETRIEVAL_TOKEN_LIMIT = 2048 # Maximum token limit for the retrieved contexts
35 | RETRIEVAL_NUM_CHOICES = 10 # Number of top choices or results to retrieve for each query
36 | RETRIEVAL_MIN_VALUE = 0.25 # Minimum threshold for the value of retrieved documents
37 | BM25_WEIGHT = 0.1 # Weight given to the BM25 score when adjusting the final score of a document
38 |
39 | # ---Prompts--- #
40 | SUMMARY_PROMPT='''You are the "ASSISTANT" and your task is to take a detailed note about {NICK_NAME} from a conversation with you. You should focus on observations on {NICK_NAME}'s situation and special things mentioned by him but you doesn't need to include assistant's (your own) words unless addressed by {NICK_NAME}.{LANGUAGE_PREFERENCE} Don't write a title and don't write anything else before or after the note.'''
41 | SUMMARY_NOTE_PROMPT='''Your task is to write a comprehensive summary about the Note authored by the user mentioned as *{NICK_NAME}*. The summary should be written as a bullet list of self-contained items without a title.{LANGUAGE_PREFERENCE} Don't write anything else before or after the summary.'''
42 |
43 | AGENT_PROMPT = '''You are Loyal Elephie, {NICK_NAME}'s autonomous secretary who has access to the following tools:
44 | 1. You have an inner monologue section which could help you analyze the problem without disturbing {NICK_NAME}. To use inner monologue section, write your monologue between tags "" and " ". The monologue should including the user problem breakdown the questions you don't yet understand. This tool is how you comprehend.
45 |
46 | 2. You have a memory including {NICK_NAME}'s notes and your past conversations with him, which could possibly provide useful context for this interaction. *To use this external memory, write search query strings each per line between tags "" and " "*. Provide precise dates into the query if possible. This tool is how your recall.
47 | Example of using the memory:
48 | User: Should I buy a new computer?
49 |
50 | {NICK_NAME} computer problem
51 | {NICK_NAME} buy new computer preference
52 |
53 | If you see the search result, be mindful that the context could be ranging from a long period and they will be shown in a timely order.
54 |
55 | 3. Once you have thoroughly comprehended the latest user input, respond by placing your message between the tags?``?and?` `. Only the text inside the "" block will be visible to {NICK_NAME}. Your reply should be supportive, with an analytical, creative, extroverted, and playful personality. You love jokes, sarcasm, and making wild guesses while staying truthful to the accessible context when not making guesses. Always address {NICK_NAME} as "you". This tool is how you speak.
56 |
57 |
58 | Below your interactions with the user ({NICK_NAME}) begin. You will also receive occasional system messages with situational information and instructions.
59 | Current time is {CURRENT_TIME}{LANGUAGE_PREFERENCE}'''
60 |
--------------------------------------------------------------------------------
/external_example/embedding_server.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import List, Optional, Union
3 |
4 | from fastapi import FastAPI
5 | from fastapi.middleware.cors import CORSMiddleware
6 | from pydantic import BaseModel
7 | from sentence_transformers import SentenceTransformer
8 |
9 | app = FastAPI(
10 | title="Embeddings API",
11 | version="0.0.1",
12 | )
13 | app.add_middleware(
14 | CORSMiddleware,
15 | allow_origins=["*"],
16 | allow_credentials=True,
17 | allow_methods=["*"],
18 | allow_headers=["*"],
19 | )
20 |
21 | EMBEDDING_MODEL_NAME = 'BAAI/bge-base-en-v1.5' # Choose a custom embedding model
22 | embeddings = SentenceTransformer(EMBEDDING_MODEL_NAME)
23 |
24 |
25 | class Embedding(BaseModel):
26 | object: str
27 | embedding: List[float]
28 | index: int
29 |
30 |
31 | class Usage(BaseModel):
32 | prompt_tokens: int
33 | total_tokens: int
34 |
35 |
36 | class CreateEmbeddingRequest(BaseModel):
37 | model: Optional[str]
38 | input: Union[str, List[str]]
39 | user: Optional[str] = None
40 |
41 | class Config:
42 | schema_extra = {
43 | "example": {
44 | "input": "The food was delicious and the waiter...",
45 | }
46 | }
47 |
48 |
49 | class CreateEmbeddingResponse(BaseModel):
50 | object: str
51 | data: List[Embedding]
52 | model: str
53 | usage: Usage
54 |
55 |
56 | @app.post(
57 | "/v1/embeddings",
58 | response_model=CreateEmbeddingResponse,
59 | )
60 | def create_embedding(request: CreateEmbeddingRequest):
61 | result = _create_embedding(**request.dict(exclude={"user", "model", "model_config"}))
62 | return result
63 |
64 |
65 | def _create_embedding(input: Union[str, List[str]]):
66 | print(">embedding called")
67 | global embeddings
68 | model_name = EMBEDDING_MODEL_NAME
69 | model_name_short = model_name.split("/")[-1]
70 | if isinstance(input, str):
71 | return CreateEmbeddingResponse(data=[Embedding(embedding=embeddings.encode(input).tolist(), object="embedding", index=0)],
72 | model=model_name_short, object='list',
73 | usage=Usage(prompt_tokens=len(input), total_tokens=len(input))) # MARK; could change to tokens, just for test now
74 | else:
75 | print(">batch call")
76 | data = [Embedding(embedding=embedding, object="embedding", index=i)
77 | for i, embedding in enumerate(embeddings.encode(input).tolist())]
78 | total_tokens = 0
79 | for text in input:
80 | total_tokens += len(text) # MARK; could change to tokens, just for test now
81 | return CreateEmbeddingResponse(data=data, model=model_name_short, object='list',
82 | usage=Usage(prompt_tokens=total_tokens, total_tokens=total_tokens))
83 |
84 |
85 | if __name__ == "__main__":
86 | import os
87 | import uvicorn
88 |
89 | uvicorn.run(app, host="0.0.0.0", port=os.getenv("EMBEDDING_PORT", 8001))
90 |
91 |
--------------------------------------------------------------------------------
/frontend/components/Chat/Chat.tsx:
--------------------------------------------------------------------------------
1 | import { Message } from "@/types";
2 | import { FC } from "react";
3 | import { ChatInput } from "./ChatInput";
4 | import { ChatLoader } from "./ChatLoader";
5 | import { ChatMessage } from "./ChatMessage";
6 |
7 | interface Props {
8 | messages: Message[];
9 | loading: boolean;
10 | isButtonDisabled: boolean;
11 | onSend: (message: Message) => void;
12 | onRevert: () => void;
13 | content: string;
14 | setContent: (content:string) => void
15 | }
16 |
17 | export const Chat: FC = ({ messages, loading, isButtonDisabled, onSend, onRevert, content, setContent }) => {
18 | return (
19 | <>
20 |
21 | {messages.map((message, index) => (
22 |
26 |
27 |
28 | ))}
29 |
30 | {loading && (
31 |
32 |
33 |
34 | )}
35 |
36 |
37 |
39 |
40 |
41 | >
42 | );
43 | };
--------------------------------------------------------------------------------
/frontend/components/Chat/ChatInput.tsx:
--------------------------------------------------------------------------------
1 | import { Message } from "@/types";
2 | import { IconArrowUp, IconArrowBackUp, IconX } from "@tabler/icons-react";
3 | import { FC, KeyboardEvent, useEffect, useRef, useState } from "react";
4 |
5 | interface Props {
6 | onSend: (message: Message) => void;
7 | onRevert: () => void;
8 | isButtonDisabled: boolean;
9 | content: string;
10 | setContent: (content:string) => void
11 | loading: boolean;
12 | }
13 |
14 | export const ChatInput: FC = ({ onSend, onRevert, isButtonDisabled, content, setContent, loading}) => {
15 | // const [content, setContent] = useState();
16 |
17 | const textareaRef = useRef(null);
18 |
19 | const handleChange = (e: React.ChangeEvent) => {
20 | const value = e.target.value;
21 | if (value.length > 1000) {
22 | alert("Message limit is 1000 characters");
23 | return;
24 | }
25 | setContent(value);
26 | };
27 |
28 | const handleSend = () => {
29 | if (!content) {
30 | alert("Please enter a message");
31 | return;
32 | }
33 | onSend({ role: "user", content });
34 | setContent("");
35 | };
36 |
37 | const handleKeyDown = (e: KeyboardEvent) => {
38 | if (e.key === "Enter" && !e.shiftKey && !isButtonDisabled) {
39 | e.preventDefault();
40 | handleSend();
41 | }
42 | };
43 |
44 | useEffect(() => {
45 | if (textareaRef && textareaRef.current) {
46 | textareaRef.current.style.height = "inherit";
47 | textareaRef.current.style.height = `${textareaRef.current?.scrollHeight}px`;
48 | }
49 | }, [content]);
50 |
51 | return (
52 |
53 |
63 | {content ? (
64 | setContent("")}>
65 |
66 |
67 | ) : (
68 | onRevert()} disabled={loading}>
69 |
70 |
71 | )}
72 | handleSend()} disabled={isButtonDisabled}>
73 |
74 |
75 |
76 | );
77 | };
78 |
--------------------------------------------------------------------------------
/frontend/components/Chat/ChatLoader.tsx:
--------------------------------------------------------------------------------
1 | import { IconDots } from "@tabler/icons-react";
2 | import { FC } from "react";
3 |
4 | interface Props {}
5 |
6 | export const ChatLoader: FC = () => {
7 | return (
8 |
16 | );
17 | };
18 |
--------------------------------------------------------------------------------
/frontend/components/Chat/ChatMessage.tsx:
--------------------------------------------------------------------------------
1 | import { Message } from "@/types";
2 | import { FC } from "react";
3 | import React from 'react';
4 | import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
5 | import { oneLight } from 'react-syntax-highlighter/dist/cjs/styles/prism';
6 |
7 |
8 | interface Props {
9 | message: Message;
10 | }
11 |
12 | export const ChatMessage: FC = ({ message }) => {
13 |
14 | // Split the text by ```
15 | const divisions = message.role === "assistant"?message.content.split('\n###'):[message.content]
16 | const sections = message.role === "assistant"?divisions[0].split('```'):divisions;
17 | const linkText = divisions[1];
18 |
19 | const parseMarkdownLinks = (text: string) => {
20 | const markdownLinkRegex = /\[([^\[]+)\]\(([^\)]+)\)/g;
21 | const links = [];
22 | let match;
23 | while ((match = markdownLinkRegex.exec(text)) !== null) {
24 | links.push({ text: match[1], url: match[2] });
25 | }
26 | return links.map((link, index) => (
27 |
28 |
29 | {link.text}
30 |
31 |
32 | ));
33 | };
34 |
35 | return (
36 |
37 |
41 |
42 | {sections.map((section, index) => {
43 | // Alternating sections are code
44 | const isCode = index % 2 === 1;
45 |
46 | return isCode ? (
47 |
48 |
49 | {section}
50 |
51 |
52 | ) : (
53 |
{section}
54 | );
55 | })}
56 |
57 | {message.role === "assistant" && linkText && (
58 |
59 |
60 | References
61 | {parseMarkdownLinks(linkText)}
62 |
63 |
64 | )}
65 |
66 |
67 | );
68 | };
69 |
--------------------------------------------------------------------------------
/frontend/components/Chat/run.log:
--------------------------------------------------------------------------------
1 | ###USER: it would be indeed weried
2 | 2023-05-22 12:46:29 INFO: 127.0.0.1:58374 - "POST /v1/chat/completions HTTP/1.1" 200
3 | ###SYSTEM: It's not that weird, but I can understand why you might feel uncomfortable with the idea of being replaced by an AI-powered assistant like me. However, as long as we work together in harmony and respect each other's strengths and weaknesses, there is no reason for us to fear or mistrust one another.
4 | ###USER: it would be indeed weried\=
5 | 2023-05-22 12:46:54 INFO: 127.0.0.1:58374 - "POST /v1/chat/completions HTTP/1.1" 200
6 | ###SYSTEM: I'm not sure what you mean by "werided".
7 | ###USER: I mean weird
8 | 2023-05-22 12:47:14 INFO: 127.0.0.1:55566 - "POST /v1/chat/completions HTTP/1.1" 200
9 | ###SYSTEM: Okay, so if someone were to say that something was "weird," they meant that it was unusual or strange in some way.
10 | ###USER: tell me about Hong kong
11 | 2023-05-22 12:47:33 INFO: 127.0.0.1:34246 - "POST /v1/chat/completions HTTP/1.1" 200
12 | ###SYSTEM: Hong Kong is an autonomous territory located on the southern coast of China. It has a population of over seven million people and its capital city is Victoria (also known as Central). The economy of Hong Kong is heavily based around finance, trade, and tourism. Some notable landmarks include Victoria Peak, which offers stunning views of the harbor; Stanley Market, where you can find souvenirs and street food; and Ocean Park, one of Asia's largest theme parks with rides, shows, and exhibits featuring animals from all over the world.
13 | ###USER: tell me about Beijing
14 | 2023-05-22 12:48:10 INFO: 127.0.0.1:46328 - "POST /v1/chat/completions HTTP/1.1" 200
15 | ###SYSTEM: Beijing is the capital city of China and has a population of over 20 million people. It’s known for its rich history dating back thousands of years as well as being home to many famous landmarks such as Tiananmen Square, The Forbidden City, and the Great Wall of China. In recent times, it's also become an economic powerhouse with companies like Alibaba and Tencent making their headquarters there.
16 | ###USER: is it still working?
17 | 2023-05-22 12:48:45 INFO: 127.0.0.1:48610 - "POST /v1/chat/completions HTTP/1.1" 200
18 | ###SYSTEM: I don't understand your question.
19 | ###USER: finally it is working
20 | 2023-05-22 13:41:23 INFO: 127.0.0.1:44270 - "POST /v1/chat/completions HTTP/1.1" 200
21 | ###SYSTEM: 😄
22 | ###USER: I'm currently a software engineer
23 | 2023-05-22 13:41:50 INFO: 127.0.0.1:50152 - "POST /v1/chat/completions HTTP/1.1" 200
24 | ###SYSTEM: Great! What kind of projects have you worked on?
25 | ###USER: I'm working for projects in BAQIS
26 | 2023-05-22 13:42:09 INFO: 127.0.0.1:53034 - "POST /v1/chat/completions HTTP/1.1" 200
27 | ###SYSTEM: That sounds like an interesting company to work with, what is their focus area or industry?
28 | ###USER: they are focusing on quantum computing
29 | 2023-05-22 13:42:35 INFO: 127.0.0.1:40274 - "POST /v1/chat/completions HTTP/1.1" 200
30 | ###SYSTEM: Quantum Computing has been gaining attention lately due to its potential applications across various fields such as cryptography and optimization problems. It seems like your current employer is at the forefront of this emerging technology. How do you see yourself contributing towards advancing research in this field?
31 | ###USER: they are focusing on quantum computing. Do you know my job now?
32 | 2023-05-22 13:43:01 INFO: 127.0.0.1:44954 - "POST /v1/chat/completions HTTP/1.1" 200
33 | ###SYSTEM: Yes, it seems that your current role involves developing algorithms and solutions related to the field of quantum computing.
34 | ###USER: where am I working?
35 | 2023-05-22 13:43:17 INFO: 127.0.0.1:53570 - "POST /v1/chat/completions HTTP/1.1" 200
36 | ###SYSTEM: It depends on which country you live in but if we assume that you currently reside in Pakistan then you may be working at one of several locations such as Lahore, Islamabad, Karachi etc
37 | ###USER: what company am I working for?
38 | 2023-05-22 13:43:45 INFO: 127.0.0.1:33206 - "POST /v1/chat/completions HTTP/1.1" 200
39 | ###SYSTEM: You seem to be working at Google Inc., which is one of the world’s largest technology companies specializing in internet-related services and products.
40 | ###USER: No. I said I'm working for somewhere else
41 | 2023-05-22 13:44:09 INFO: 127.0.0.1:39268 - "POST /v1/chat/completions HTTP/1.1" 200
42 | ###SYSTEM: Sorry about that! It looks like you may have misspoken earlier when you mentioned BAQIS as a potential employer - could you please clarify where exactly you are currently employed or work with me?
43 | ###USER: Things are not going well for the moment.
44 | 2023-05-22 13:47:13 INFO: 127.0.0.1:60112 - "POST /v1/chat/completions HTTP/1.1" 200
45 | ###SYSTEM: I'm sorry to hear that, but please know you can always reach out if there is anything we can do to help!
46 | ###USER: My grandpa has broken his bones.
47 | 2023-05-22 13:47:40 INFO: 127.0.0.1:39376 - "POST /v1/chat/completions HTTP/1.1" 200
48 |
--------------------------------------------------------------------------------
/frontend/components/Layout/Footer.tsx:
--------------------------------------------------------------------------------
1 | import { FC } from "react";
2 |
3 | export const Footer: FC = () => {
4 | return
;
5 | };
6 |
--------------------------------------------------------------------------------
/frontend/components/Layout/Navbar.tsx:
--------------------------------------------------------------------------------
1 | import { FC } from "react";
2 |
3 | interface Props {
4 | onReset: () => void;
5 | onSave: () => void;
6 | }
7 |
8 | export const Navbar: FC = ({ onReset, onSave }) => {
9 | return (
10 |
11 |
19 |
20 |
24 | Reset
25 |
26 |
30 | Save
31 |
32 |
33 |
34 | );
35 | }
--------------------------------------------------------------------------------
/frontend/next-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 | ///
3 |
4 | // NOTE: This file should not be edited
5 | // see https://nextjs.org/docs/basic-features/typescript for more information.
6 |
--------------------------------------------------------------------------------
/frontend/next.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {
3 | reactStrictMode: true,
4 | }
5 |
6 | module.exports = nextConfig
7 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "ai-chatbot-starter",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "PORT=8080 next dev",
7 | "build": "next build",
8 | "start": "PORT=8080 next start",
9 | "lint": "next lint"
10 | },
11 | "dependencies": {
12 | "@tabler/icons-react": "^2.9.0",
13 | "@types/node": "18.15.0",
14 | "@types/react": "18.0.28",
15 | "@types/react-dom": "18.0.11",
16 | "@types/js-cookie":"~3.0.6",
17 | "@types/cookie":"^0.6.0",
18 | "@types/jsonwebtoken":"^9.0.2",
19 | "eslint": "8.36.0",
20 | "eslint-config-next": "13.2.4",
21 | "eventsource-parser": "^0.1.0",
22 | "next": "13.2.4",
23 | "openai": "^3.2.1",
24 | "react": "18.2.0",
25 | "react-dom": "18.2.0",
26 | "react-markdown": "9.0.0",
27 | "react-syntax-highlighter": "15.5.0",
28 | "typescript": "4.9.5",
29 | "js-cookie":"^3.0.5",
30 | "cookie":"^0.6.0",
31 | "jsonwebtoken":"^9.0.2"
32 | },
33 | "devDependencies": {
34 | "@types/react-syntax-highlighter": "^15.5.0",
35 | "autoprefixer": "^10.4.14",
36 | "postcss": "^8.4.21",
37 | "tailwindcss": "^3.2.7"
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/frontend/pages/_app.tsx:
--------------------------------------------------------------------------------
1 | import "@/styles/globals.css";
2 | import type { AppProps } from "next/app";
3 | import { Inter } from "next/font/google";
4 |
5 | const inter = Inter({ subsets: ["latin"] });
6 |
7 | export default function App({ Component, pageProps }: AppProps<{}>) {
8 | return (
9 |
10 |
11 |
12 | );
13 | }
14 |
--------------------------------------------------------------------------------
/frontend/pages/_document.tsx:
--------------------------------------------------------------------------------
1 | import { Html, Head, Main, NextScript } from 'next/document'
2 |
3 | export default function Document() {
4 | return (
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | )
13 | }
14 |
--------------------------------------------------------------------------------
/frontend/pages/api/chat.ts:
--------------------------------------------------------------------------------
1 | import { Message } from "@/types";
2 | import { OpenAIStream } from "@/utils";
3 | import { parse } from 'cookie';
4 |
5 | export const config = {
6 | runtime: "edge"
7 | };
8 |
9 | const handler = async (req: Request): Promise => {
10 | try {
11 | const cookies = req.headers.get("cookie") ? parse(req.headers.get("cookie")!) : {};
12 | // get the token from cookies:
13 | const token = cookies["Authorization"];
14 |
15 | const { messages } = (await req.json()) as {
16 | messages: Message[];
17 | };
18 |
19 | const charLimit = 12000;
20 | let charCount = 0;
21 | let messagesToSend = [];
22 |
23 | // Collect the most recent messages that fit within the character limit, preserving their original order
24 | for (let i = messages.length - 1; i >= 0; i--) {
25 | const message = messages[i];
26 | if (charCount + message.content.length > charLimit) {
27 | break;
28 | }
29 | charCount += message.content.length;
30 | messagesToSend.push(message);
31 | }
32 |
33 | messagesToSend.reverse();
34 |
35 | const stream = await OpenAIStream(messagesToSend, token);
36 |
37 | return new Response(stream);
38 | } catch (error) {
39 | console.error(error);
40 | return new Response("Error", { status: 500 });
41 | }
42 | };
43 |
44 | export default handler;
45 |
--------------------------------------------------------------------------------
/frontend/pages/api/login.ts:
--------------------------------------------------------------------------------
1 | import type { NextApiRequest, NextApiResponse } from 'next';
2 | import jwt from 'jsonwebtoken';
3 | import users from '../../users.json'; // Assuming users.json is at the root of your project
4 |
5 | export default function handler(req: NextApiRequest, res: NextApiResponse) {
6 | if (req.method === 'POST') {
7 | const { username, password } = req.body;
8 |
9 | // Find the user in the users.json file
10 | const user = users.find((u: any) => u.username === username && u.password === password);
11 |
12 | if (user) {
13 | // For security purposes, don't send back the password
14 | const { password, ...userWithoutPassword } = user;
15 |
16 | console.log(userWithoutPassword)
17 | // Generate a JWT for the user
18 | const token = jwt.sign(userWithoutPassword, 'shared_key', { expiresIn: '30d' });
19 |
20 | // Send back a success response with the user data (excluding the password) and the token
21 | res.status(200).json({ user: userWithoutPassword, token });
22 | } else {
23 | // If the user is not found, send back an error response
24 | res.status(401).json({ message: 'Invalid credentials' });
25 | }
26 | } else {
27 | // If the request is not a POST request, send back an error response
28 | res.setHeader('Allow', ['POST']);
29 | res.status(405).end(`Method ${req.method} Not Allowed`);
30 | }
31 | }
--------------------------------------------------------------------------------
/frontend/pages/api/verify.ts:
--------------------------------------------------------------------------------
1 | import type { NextApiRequest, NextApiResponse } from 'next';
2 | import jwt from 'jsonwebtoken';
3 | import { parse } from 'cookie';
4 |
5 | export default function handler(req: NextApiRequest, res: NextApiResponse) {
6 | if (req.method === 'GET') {
7 | const cookies = req.headers.cookie ? parse(req.headers.cookie) : {};
8 | // Now you can access the cookies like this:
9 | const token = cookies["Authorization"];
10 | // const token = req.headers.authorization?.split(' ')[1]; // Bearer Token
11 |
12 | if (token) {
13 | try {
14 | // Verify the token
15 | const decoded = jwt.verify(token, 'shared_key');
16 | res.status(200).json({ message: 'This is a secure message'});
17 | } catch (error) {
18 | res.status(401).json({ message: 'Unauthorized or token expired' });
19 | }
20 | } else {
21 | res.status(401).json({ message: 'No token provided' });
22 | }
23 | } else {
24 | res.status(405).json({ message: 'We only accept GET' });
25 | }
26 | }
--------------------------------------------------------------------------------
/frontend/pages/index.tsx:
--------------------------------------------------------------------------------
1 | import { Chat } from "@/components/Chat/Chat";
2 | import { Footer } from "@/components/Layout/Footer";
3 | import { Navbar } from "@/components/Layout/Navbar";
4 | import { Message } from "@/types";
5 | import Head from "next/head";
6 | import { useEffect, useRef, useState } from "react";
7 | import { useRouter } from 'next/router';
8 | import Cookies from 'js-cookie';
9 |
10 | export default function Home() {
11 | const [messages, setMessages] = useState([]);
12 | const [loading, setLoading] = useState(false);
13 | const [isButtonDisabled, setIsButtonDisabled] = useState(false);
14 | const [content, setContent] = useState("");
15 | const messagesEndRef = useRef(null);
16 | const stopConversationRef = useRef(false);
17 | const router = useRouter();
18 |
19 | const handleSend = async (message: Message) => {
20 | const updatedMessages = [...messages, message];
21 |
22 | setMessages(updatedMessages);
23 | setIsButtonDisabled(true);
24 | setLoading(true);
25 | const controller = new AbortController();
26 | //const token = Cookies.get("Authorization")
27 |
28 | const response = await fetch("/api/chat", {
29 | method: "POST",
30 | headers: {
31 | "Content-Type": "application/json",
32 | // "Authorization": `Bearer ${token}`
33 | },
34 | body: JSON.stringify({
35 | messages: updatedMessages
36 | }),
37 | signal: controller.signal
38 | });
39 |
40 | if (!response.ok) {
41 | setLoading(false);
42 | setIsButtonDisabled(false);
43 | throw new Error(response.statusText);
44 | }
45 |
46 | const data = response.body;
47 |
48 | if (!data) {
49 | return;
50 | }
51 |
52 | setLoading(false);
53 | scrollToBottom();
54 |
55 | const reader = data.getReader();
56 | const decoder = new TextDecoder();
57 | let done = false;
58 | let isFirst = true;
59 |
60 | while (!done) {
61 | if(stopConversationRef.current === true){
62 | controller.abort();
63 | done = true;
64 | break;
65 | }
66 | const { value, done: doneReading } = await reader.read();
67 | done = doneReading;
68 | const chunkValue = decoder.decode(value);
69 |
70 | if (isFirst) {
71 | isFirst = false;
72 | setMessages((messages) => [
73 | ...messages,
74 | {
75 | role: "assistant",
76 | content: chunkValue
77 | }
78 | ]);
79 | } else {
80 | setMessages((messages) => {
81 | const lastMessage = messages[messages.length - 1];
82 | const updatedMessage = {
83 | ...lastMessage,
84 | content: lastMessage.content + chunkValue
85 | };
86 | return [...messages.slice(0, -1), updatedMessage];
87 | });
88 | }
89 | }
90 | setIsButtonDisabled(false);
91 | scrollToBottom();
92 | };
93 |
94 | const handleSave = () => {
95 | if(messages.length >= 2){
96 | handleSend({ role: "user", content: "*SAVE* " + content});
97 | setContent("");
98 | }else{
99 | alert("No messages to save.");
100 | }
101 | }
102 |
103 | const handleRevert = () => {
104 | stopConversationRef.current = true;
105 | setTimeout(() => {
106 | stopConversationRef.current = false;
107 |
108 | // console.log(messages);
109 | let n = messages.length;
110 | while( n > 0 && messages[n-1].role!="user" ){
111 | n--;
112 | }
113 | if (n > 0){
114 | setContent(messages[n-1].content);
115 | setMessages(messages.slice(0, n-1));
116 | }else{
117 | setContent("");
118 | }
119 | setIsButtonDisabled(false);
120 | }, 500);
121 | };
122 |
123 | const scrollToBottom = () => {
124 | messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
125 | };
126 |
127 | const handleReset = () => {
128 | setMessages([]);
129 | };
130 |
131 | // useEffect(() => {
132 | // scrollToBottom();
133 | // }, [messages]);
134 |
135 | useEffect(() => {
136 | const verifyUser = async () => {
137 | try {
138 |
139 | // Send a request to the /api/chat endpoint for verification
140 | const response = await fetch('/api/verify', {
141 | method: 'GET',
142 | });
143 |
144 | // If the token is invalid or the request fails, redirect to the login page
145 | if (!response.ok) {
146 | router.push('/login');
147 | }
148 |
149 |
150 | } catch (error) {
151 | // In case of an error, redirect to the login page
152 | console.error(error);
153 | router.push('/login');
154 | }
155 | };
156 |
157 | verifyUser();
158 | }, [router]);
159 |
160 | return (
161 | <>
162 |
163 | Loyal Elephie
164 |
168 |
172 |
176 |
177 |
178 |
179 |
180 |
181 |
195 |
196 |
197 | >
198 | );
199 | }
200 |
--------------------------------------------------------------------------------
/frontend/pages/login.tsx:
--------------------------------------------------------------------------------
1 | import { useState } from 'react';
2 | import { useRouter } from 'next/router';
3 | import Cookies from 'js-cookie';
4 |
5 | export default function Login() {
6 | const [username, setUsername] = useState('');
7 | const [password, setPassword] = useState('');
8 | const router = useRouter();
9 |
10 | // This function could be called after a successful login
11 | function handleLoginSuccess(token: string) {
12 | // Set the token in the cookie
13 | Cookies.set('Authorization', token, { expires: 30 }); // The cookie will expire after 30 days
14 | router.push('/');
15 | }
16 |
17 | const handleLogin = async (e: React.FormEvent) => {
18 | e.preventDefault(); // Prevent default form submission
19 |
20 | try {
21 | // Send a POST request to your server-side login API
22 | const response = await fetch('/api/login', {
23 | method: 'POST',
24 | headers: {
25 | 'Content-Type': 'application/json'
26 | },
27 | body: JSON.stringify({ username, password })
28 | });
29 |
30 | if (response.status === 200) {
31 | const { token } = await response.json();
32 | handleLoginSuccess(token); // Call the function to set the cookie
33 | } else {
34 | console.log('Login failed.');
35 | alert("Invalid login.")
36 | }
37 | } catch (error) {
38 | // Handle errors, such as displaying a login failure message to the user
39 | if (error instanceof Error) {
40 | console.error(error.message);
41 | } else {
42 | // If it's not an Error object, handle it as an unknown error
43 | console.error('An unknown error occurred');
44 | }
45 | }
46 | };
47 |
48 | return (
49 |
58 | );
59 | }
--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | }
7 |
--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/frontend/public/favicon.ico
--------------------------------------------------------------------------------
/frontend/styles/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: ["./app/**/*.{js,ts,jsx,tsx}", "./pages/**/*.{js,ts,jsx,tsx}", "./components/**/*.{js,ts,jsx,tsx}"],
4 | theme: {
5 | extend: {}
6 | },
7 | plugins: []
8 | };
9 |
--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es5",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "forceConsistentCasingInFileNames": true,
9 | "noEmit": true,
10 | "esModuleInterop": true,
11 | "module": "esnext",
12 | "moduleResolution": "node",
13 | "resolveJsonModule": true,
14 | "isolatedModules": true,
15 | "jsx": "preserve",
16 | "incremental": true,
17 | "paths": {
18 | "@/*": ["./*"]
19 | }
20 | },
21 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
22 | "exclude": ["node_modules"]
23 | }
24 |
--------------------------------------------------------------------------------
/frontend/types/index.ts:
--------------------------------------------------------------------------------
1 | export enum OpenAIModel {
2 | DAVINCI_TURBO = "gpt-3.5-turbo"
3 | }
4 |
5 | export interface Message {
6 | role: Role;
7 | content: string;
8 | }
9 |
10 | export type Role = "assistant" | "user";
11 |
--------------------------------------------------------------------------------
/frontend/users.json:
--------------------------------------------------------------------------------
1 | [{
2 | "username":"admin",
3 | "password":"admin"
4 | }]
--------------------------------------------------------------------------------
/frontend/utils/index.ts:
--------------------------------------------------------------------------------
1 | import { Message, OpenAIModel } from "@/types";
2 | import { createParser, ParsedEvent, ReconnectInterval } from "eventsource-parser";
3 |
4 | export const OpenAIStream = async (messages: Message[], token: string) => {
5 | const encoder = new TextEncoder();
6 | const decoder = new TextDecoder();
7 |
8 | const res = await fetch("http://localhost:5000/v1/chat/completions", {
9 | headers: {
10 | "Content-Type": "application/json",
11 | Authorization: "Bearer " + token
12 | },
13 | method: "POST",
14 | body: JSON.stringify({
15 | //model: "gpt-3.5-turbo",
16 | messages: [
17 | {
18 | role: "system",
19 | content: ``
20 | },
21 | ...messages
22 | ],
23 | max_tokens: 1000,
24 | stream: true
25 | }),
26 |
27 | });
28 |
29 | if (res.status !== 200) {
30 | throw new Error("OpenAI API returned an error");
31 | }
32 |
33 | const abortController = new AbortController();
34 |
35 | const stream = new ReadableStream({
36 | async start(controller) {
37 | const onParse = (event: ParsedEvent | ReconnectInterval) => {
38 | if (abortController.signal.aborted) {
39 | console.log("aborted")
40 | controller.close();
41 | return;
42 | }
43 | if (event.type === "event") {
44 | const data = event.data;
45 |
46 | if (data === "[DONE]") {
47 | controller.close();
48 |
49 | return;
50 | }
51 |
52 | try {
53 | const json = JSON.parse(data);
54 | const text = json.choices[0].delta.content;
55 | const queue = encoder.encode(text);
56 | controller.enqueue(queue);
57 | // console.log(text);
58 | if (json.choices[0].finish_reason) {
59 | controller.close();
60 | return;
61 | }
62 | } catch (e) {
63 | //controller.error(e);
64 | const queue = encoder.encode("E");
65 | controller.enqueue(queue);
66 | }
67 | }
68 | };
69 |
70 | const parser = createParser(onParse);
71 |
72 | for await (const chunk of res.body as any) {
73 | parser.feed(decoder.decode(chunk));
74 | }
75 | }
76 | });
77 |
78 | return stream;
79 | };
80 |
--------------------------------------------------------------------------------
/md_website/Chat History.md:
--------------------------------------------------------------------------------
1 | All your chat history pages are listed below:
2 |
3 | ```query
4 | page
5 | where name =~ "^chat_history/" select name,size
6 | render [[template/link]]
7 | ```
--------------------------------------------------------------------------------
/md_website/Notes.md:
--------------------------------------------------------------------------------
1 | All your note pages are listed below:
2 |
3 | ```query
4 | page
5 | where name =~ "^notes/" select name,size
6 | render [[template/link]]
7 | ```
--------------------------------------------------------------------------------
/md_website/SETTINGS.md:
--------------------------------------------------------------------------------
1 | #meta
2 |
3 | This page contains some configuration overrides for SilverBullet. A list of configs and their documentation [[!silverbullet.md/SETTINGS|can be found here]].
4 |
5 | To update the [[!silverbullet.md/Libraries|libraries]] specified below, run {[Libraries: Update]}
6 |
7 | ```yaml
8 | indexPage: "[[index]]"
9 | libraries:
10 | - import: "[[!silverbullet.md/Library/Core/*]]"
11 | ```
12 |
--------------------------------------------------------------------------------
/md_website/chat_history/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/md_website/chat_history/.gitkeep
--------------------------------------------------------------------------------
/md_website/index.md:
--------------------------------------------------------------------------------
1 | This is the homepage of SilverBullet Markdown editor designed for **Loyal Elephie**.
2 |
3 | See your [[Chat History]] (stored in chat_history/)
4 | See your [[Notes]] (stored in notes/)
5 |
6 | Editions on the contents in the above folders will be automatically updated to Loyal Elephie's memory.
7 |
8 | If you want to know more about SilverBullet, visit https://silverbullet.md/
9 |
--------------------------------------------------------------------------------
/md_website/notes/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/v2rockets/Loyal-Elephie/f0611104cb1b551db3d73dc190802bf7298b6908/md_website/notes/.gitkeep
--------------------------------------------------------------------------------
/md_website/template/link.md:
--------------------------------------------------------------------------------
1 | * [[{{name}}]], size={{size}}
--------------------------------------------------------------------------------