├── .gitignore ├── .vscode └── settings.json ├── LICENCE ├── docker-compose.yml ├── fastapi ├── .langchain.db ├── Dockerfile ├── app │ ├── .env │ ├── __pycache__ │ │ ├── main.cpython-310.pyc │ │ ├── main.cpython-311.pyc │ │ └── main.cpython-39.pyc │ └── main.py ├── gunicorn_conf.py ├── requirements.txt ├── start-reload.sh ├── start.sh └── submodules │ ├── __pycache__ │ └── prompts.cpython-310.pyc │ └── prompts.py ├── qdrant └── Dockerfile ├── readme.md └── sveltekit ├── .env ├── .gitignore ├── .npmrc ├── .prettierignore ├── .prettierrc ├── Dockerfile ├── README.md ├── package.json ├── playwright.config.ts ├── postcss.config.cjs ├── src ├── app.css ├── app.d.ts ├── app.html ├── hooks.server.ts ├── index.test.ts ├── lib │ ├── components │ │ ├── PrismJS.svelte │ │ ├── chatbot.svelte │ │ └── chatbotstream.svelte │ ├── conversationStore.ts │ └── streamStore.ts └── routes │ ├── +layout.server.ts │ ├── +layout.svelte │ ├── +page.server.ts │ ├── +page.svelte │ ├── api │ └── queryGPT │ │ └── +server.ts │ ├── collection │ └── [collection] │ │ ├── +page.server.ts │ │ └── +page.svelte │ ├── document │ └── [collection] │ │ ├── +page.server.ts │ │ └── +page.svelte │ └── robots.txt │ └── +server.ts ├── static ├── favicon.png └── profile-image.png ├── svelte.config.js ├── tailwind.config.cjs ├── tests └── test.ts ├── tsconfig.json └── vite.config.ts /.gitignore: -------------------------------------------------------------------------------- 1 | fastapi/__pycache__ 2 | sveltekit/.svelte-kit 3 | sveltekit/node_modules 4 | azure-devops-pipeline.yml 5 | docker-pipeline.yml 6 | fastapi/.env 7 | # fastapi/app/.env 8 | fastapi/app/.env 9 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.formatting.provider": "black" 3 | } -------------------------------------------------------------------------------- /LICENCE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Dag Thomas Olsen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.10" 2 | networks: 3 | app-tier: 4 | driver: bridge 5 | services: 6 | fastapi: 7 | build: ./fastapi 8 | expose: 9 | - "5000" 10 | ports: 11 | - "5000:5000" 12 | environment: 13 | - QDRANT_HOST=qdrant 14 | depends_on: 15 | - qdrant 16 | networks: 17 | - app-tier 18 | volumes: 19 | - ./fastapi:/app:Z 20 | sveltekit: 21 | build: ./sveltekit 22 | ports: 23 | - 3000:3000 24 | networks: 25 | - app-tier 26 | depends_on: 27 | - fastapi 28 | volumes: 29 | - ./sveltekit:/app:Z 30 | environment: 31 | - VITE_BACKEND_URL=http://localhost:5000 32 | qdrant: 33 | build: ./qdrant 34 | ports: 35 | - 6333:6333 36 | volumes: 37 | - ./data/qdrant_storage:/qdrant/storage 38 | 39 | networks: 40 | - app-tier 41 | 42 | volumes: 43 | app-qdrant-data: -------------------------------------------------------------------------------- /fastapi/.langchain.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dagthomas/LangchainComposeChatYourDocs/b13915a6e395b006ad67668c822ddef71b570b16/fastapi/.langchain.db -------------------------------------------------------------------------------- /fastapi/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | COPY requirements.txt /tmp/requirements.txt 4 | RUN pip install --no-cache-dir -r /tmp/requirements.txt 5 | RUN apt-get update && apt-get install -y --no-install-recommends libmagic1 && rm -rf /var/lib/apt/lists/* 6 | COPY ./start.sh /start.sh 7 | RUN chmod +x /start.sh 8 | 9 | COPY ./gunicorn_conf.py /gunicorn_conf.py 10 | 11 | COPY ./start-reload.sh /start-reload.sh 12 | RUN chmod +x /start-reload.sh 13 | 14 | COPY ./app /app 15 | WORKDIR /app/ 16 | 17 | ENV PYTHONPATH=/app 18 | 19 | EXPOSE 5000:5000 20 | 21 | # Run the start script, it will check for an /app/prestart.sh script (e.g. for migrations) 22 | # And then will start Gunicorn with Uvicorn 23 | CMD ["/start.sh"] -------------------------------------------------------------------------------- /fastapi/app/.env: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY = sk-xxx 2 | AUTHORIZED_API_KEY = 5pBHDjr4bkNFc1xdqIMR6INLItKuPvZrf8zNdc6enlXqhy8qVO8YCYKRcdd 3 | APIFY_API_TOKEN = apify_api_xxx 4 | 5 | -------------------------------------------------------------------------------- /fastapi/app/__pycache__/main.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dagthomas/LangchainComposeChatYourDocs/b13915a6e395b006ad67668c822ddef71b570b16/fastapi/app/__pycache__/main.cpython-310.pyc -------------------------------------------------------------------------------- /fastapi/app/__pycache__/main.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dagthomas/LangchainComposeChatYourDocs/b13915a6e395b006ad67668c822ddef71b570b16/fastapi/app/__pycache__/main.cpython-311.pyc -------------------------------------------------------------------------------- /fastapi/app/__pycache__/main.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dagthomas/LangchainComposeChatYourDocs/b13915a6e395b006ad67668c822ddef71b570b16/fastapi/app/__pycache__/main.cpython-39.pyc -------------------------------------------------------------------------------- /fastapi/app/main.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import queue 3 | import openai 4 | import aiofiles 5 | import langchain 6 | from dotenv import load_dotenv 7 | from pydantic import BaseModel 8 | from qdrant_client import QdrantClient 9 | import logging 10 | from typing import List 11 | import urllib3 12 | import os 13 | import tempfile 14 | import sys 15 | import magic 16 | import pandas as pd 17 | import typing as t 18 | from slugify import slugify 19 | 20 | # custom 21 | import submodules.prompts as prompts 22 | from langchain.schema import HumanMessage, SystemMessage 23 | 24 | from langchain.document_loaders.base import Document 25 | from langchain.document_loaders import ApifyDatasetLoader 26 | from langchain.cache import InMemoryCache 27 | from langchain.document_loaders import WebBaseLoader 28 | from langchain.chat_models import ChatOpenAI 29 | from langchain.document_loaders.csv_loader import CSVLoader 30 | from langchain.document_loaders import SRTLoader 31 | from langchain.document_loaders import UnstructuredWordDocumentLoader 32 | from langchain.document_loaders import UnstructuredEPubLoader 33 | from langchain.document_loaders import UnstructuredPowerPointLoader 34 | from langchain.document_loaders import PyPDFLoader 35 | from langchain.vectorstores import Qdrant 36 | from langchain.text_splitter import RecursiveCharacterTextSplitter 37 | from langchain.embeddings.openai import OpenAIEmbeddings 38 | from langchain.chains.qa_with_sources import load_qa_with_sources_chain 39 | from langchain.callbacks.base import CallbackManager 40 | from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 41 | 42 | from fastapi import FastAPI, Depends, HTTPException, status, UploadFile, File 43 | from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer 44 | from fastapi.responses import StreamingResponse 45 | from fastapi.security import OAuth2PasswordBearer 46 | from fastapi.middleware.cors import CORSMiddleware 47 | 48 | 49 | langchain.llm_cache = InMemoryCache() 50 | 51 | 52 | load_dotenv() 53 | 54 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 55 | AUTHORIZED_API_KEY = os.getenv("AUTHORIZED_API_KEY") 56 | os.environ["APIFY_API_TOKEN"] = os.getenv("APIFY_API_TOKEN") 57 | openai.api_key = OPENAI_API_KEY 58 | 59 | host = "qdrant" 60 | client = QdrantClient(host=host, prefer_grpc=True) 61 | 62 | http = urllib3.PoolManager(cert_reqs="CERT_NONE", retries=False) 63 | logging.captureWarnings(True) 64 | get_bearer_token = HTTPBearer(auto_error=False) 65 | # def bearer token auth 66 | oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") 67 | 68 | 69 | class UnauthorizedMessage(BaseModel): 70 | detail: str = "Bearer token missing or unknown" 71 | 72 | 73 | known_tokens = set([AUTHORIZED_API_KEY]) 74 | 75 | 76 | async def get_token( 77 | auth: t.Optional[HTTPAuthorizationCredentials] = Depends(get_bearer_token), 78 | ) -> str: 79 | # Simulate a database query to find a known token 80 | if auth is None or (token := auth.credentials) not in known_tokens: 81 | raise HTTPException( 82 | status_code=status.HTTP_401_UNAUTHORIZED, 83 | detail=UnauthorizedMessage().detail, 84 | ) 85 | return token 86 | 87 | 88 | # start app 89 | app = FastAPI( 90 | title="LangChain Starter API", 91 | ) 92 | 93 | origins = [ 94 | "*", 95 | ] 96 | app.add_middleware( 97 | CORSMiddleware, 98 | allow_origins=origins, 99 | allow_credentials=True, 100 | allow_methods=["*"], 101 | allow_headers=["*"], 102 | ) 103 | 104 | 105 | async def startup(): 106 | print("Server Startup!") 107 | 108 | 109 | class GPTQuery(BaseModel): 110 | prompt: str 111 | system_intel: str 112 | temperature: float 113 | 114 | 115 | class Query(BaseModel): 116 | query: str 117 | collection: str 118 | 119 | 120 | class Apify(BaseModel): 121 | dataset: str 122 | collection: str 123 | 124 | 125 | class Collection(BaseModel): 126 | collection: str 127 | prompt: str 128 | temperature: float 129 | 130 | 131 | class Webpage(BaseModel): 132 | url: str 133 | 134 | 135 | class Webpages(BaseModel): 136 | urls: List[str] 137 | collection_name: str 138 | 139 | 140 | @app.get("/") 141 | async def read_root(): 142 | message = f"Hello world! From FastAPI running on Uvicorn with Gunicorn. Using Python {sys.version_info.major}.{sys.version_info.minor}" 143 | return {message} 144 | 145 | 146 | # Fastapi endpoint for returning a list of collections 147 | 148 | 149 | @app.get("/collections") 150 | async def read_collections(token: str = Depends(get_token)): 151 | data = client.get_collections() 152 | return data.collections 153 | 154 | 155 | @app.post("/documents") 156 | async def create_item(item: Query, token: str = Depends(get_token)): 157 | qdrant = Qdrant( 158 | client, item.collection, embedding_function=OpenAIEmbeddings().embed_query 159 | ) 160 | docs = qdrant.similarity_search_with_score(item.query) 161 | return docs 162 | 163 | 164 | @app.post("/collections") 165 | async def create_item(item: Collection, token: str = Depends(get_token)): 166 | qdrant = Qdrant( 167 | client, item.collection, embedding_function=OpenAIEmbeddings().embed_query 168 | ) 169 | docs = qdrant.similarity_search(item.prompt) 170 | llm = ChatOpenAI(temperature=item.temperature, model_name="gpt-3.5-turbo") 171 | # , metadata_keys=['source'] 172 | chain = load_qa_with_sources_chain(llm, chain_type="stuff") 173 | result = chain( 174 | {"input_documents": docs, "question": item.prompt}, return_only_outputs=True 175 | ) 176 | return result 177 | 178 | 179 | loader_classes = { 180 | ".pdf": PyPDFLoader, 181 | ".xls": CSVLoader, 182 | ".xlsx": CSVLoader, 183 | ".csv": CSVLoader, 184 | ".epub": UnstructuredEPubLoader, 185 | ".pptx": UnstructuredPowerPointLoader, 186 | ".docx": UnstructuredWordDocumentLoader, 187 | ".txt": SRTLoader, 188 | ".srt": SRTLoader, 189 | } 190 | 191 | 192 | async def ingest_data(tmp_file, slug, file_type, chunk_size, chunk_overlap): 193 | if file_type in loader_classes: 194 | loader_class = loader_classes[file_type] 195 | if loader_class == CSVLoader: 196 | excel = pd.read_excel(tmp_file) 197 | excel.to_csv( 198 | f"./files/{AUTHORIZED_API_KEY}/{slug}.csv", index=None, header=True 199 | ) 200 | loader = loader_class(tmp_file) 201 | else: 202 | return "Filetype not supported" 203 | documents = loader.load() 204 | # cache the embeddings 205 | if not hasattr(ingest_data, "embeddings"): 206 | ingest_data.embeddings = OpenAIEmbeddings() 207 | text_splitter = RecursiveCharacterTextSplitter( 208 | chunk_size=chunk_size, chunk_overlap=chunk_overlap 209 | ) 210 | print(documents) 211 | docs = text_splitter.split_documents(documents) 212 | Qdrant.from_documents( 213 | docs, ingest_data.embeddings, host=host, collection_name=slug, prefer_grpc=True 214 | ) 215 | return slug 216 | 217 | 218 | @app.post("/upload") 219 | async def upload_file(file: UploadFile = File(...), token: str = Depends(get_token)): 220 | with tempfile.NamedTemporaryFile(delete=False) as tmp_file: 221 | tmp_file.write(await file.read()) 222 | tmp_file_path = tmp_file.name 223 | filetype = os.path.splitext(file.filename) 224 | response = await ingest_data( 225 | tmp_file_path, 226 | slugify(os.path.splitext(file.filename)[0]), 227 | filetype[1], 228 | chunk_size=256, 229 | chunk_overlap=40, 230 | ) 231 | tmp_file.close() 232 | os.unlink(tmp_file.name) 233 | return response 234 | 235 | 236 | @app.post("/webpage") 237 | async def create_webpage(item: Webpage, token: str = Depends(get_token)): 238 | collection_name = slugify(item.url.split("/")[-1]) 239 | loader = WebBaseLoader(item.url) 240 | documents = loader.load() 241 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=40) 242 | docs = text_splitter.split_documents(documents) 243 | embeddings = OpenAIEmbeddings() 244 | Qdrant.from_documents( 245 | docs, embeddings, host=host, collection_name=collection_name, prefer_grpc=True 246 | ) 247 | return collection_name 248 | 249 | 250 | @app.post("/webpages") 251 | async def create_webpages(item: Webpages, token: str = Depends(get_token)): 252 | loader = WebBaseLoader(item.urls) 253 | documents = loader.load() 254 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=40) 255 | docs = text_splitter.split_documents(documents) 256 | embeddings = OpenAIEmbeddings() 257 | Qdrant.from_documents( 258 | docs, 259 | embeddings, 260 | host=host, 261 | collection_name=item.collection_name, 262 | prefer_grpc=True, 263 | ) 264 | 265 | return item.collection_name 266 | 267 | 268 | @app.post("/openai") 269 | async def openai_query(item: GPTQuery, token: str = Depends(get_token)): 270 | system_intel = item.system_intel 271 | prompt = item.prompt 272 | result = openai.ChatCompletion.create( 273 | model="gpt-3.5-turbo", 274 | temperature=item.temperature, 275 | messages=[ 276 | {"role": "system", "content": system_intel}, 277 | {"role": "user", "content": prompt}, 278 | ], 279 | ) 280 | 281 | return result.choices[0].message.content 282 | 283 | 284 | @app.post("/apify") 285 | async def stream(item: Apify, token: str = Depends(get_token)): 286 | loader = ApifyDatasetLoader( 287 | dataset_id=item.dataset, 288 | dataset_mapping_function=lambda dataset_item: Document( 289 | page_content=dataset_item["aml_text"], 290 | metadata={ 291 | "source": dataset_item["url"], 292 | "title": dataset_item["aml_title"], 293 | "paragraph": dataset_item["paragraph"], 294 | }, 295 | ), 296 | ) 297 | embeddings = OpenAIEmbeddings() 298 | documents = loader.load() 299 | 300 | text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, length_function = len) 301 | n = 50 302 | final = [ 303 | documents[i * n : (i + 1) * n] for i in range((len(documents) + n - 1) // n) 304 | ] 305 | docs = text_splitter.split_documents(final[0]) 306 | 307 | Qdrant.from_documents(docs, embeddings, host=host, collection_name=item.collection) 308 | for docs in final[1:]: 309 | print("iterate") 310 | doc = text_splitter.split_documents(docs) 311 | if hasattr(doc, "page_content"): 312 | print("has") 313 | Qdrant.add_documents( 314 | doc, embeddings, host=host, collection_name=item.collection 315 | ) 316 | 317 | 318 | class ThreadedGenerator: 319 | def __init__(self): 320 | self.queue = queue.Queue() 321 | 322 | def __iter__(self): 323 | return self 324 | 325 | def __next__(self): 326 | item = self.queue.get() 327 | if item is StopIteration: 328 | raise item 329 | return item 330 | 331 | def send(self, data): 332 | self.queue.put(data) 333 | 334 | def close(self): 335 | self.queue.put(StopIteration) 336 | 337 | 338 | class ChainStreamHandler(StreamingStdOutCallbackHandler): 339 | def __init__(self, gen): 340 | super().__init__() 341 | self.gen = gen 342 | 343 | def on_llm_new_token(self, token: str, **kwargs): 344 | self.gen.send(token) 345 | 346 | 347 | def llm_thread(g, prompt, system_intel, temperature): 348 | try: 349 | chat = ChatOpenAI( 350 | model_name="gpt-3.5-turbo", 351 | verbose=True, 352 | streaming=True, 353 | callback_manager=CallbackManager([ChainStreamHandler(g)]), 354 | temperature=temperature, 355 | ) 356 | 357 | chat([SystemMessage(content=system_intel), HumanMessage(content=prompt)]) 358 | 359 | finally: 360 | g.close() 361 | 362 | 363 | def chat(prompt, system_intel, temperature): 364 | g = ThreadedGenerator() 365 | threading.Thread( 366 | target=llm_thread, args=(g, prompt, system_intel, temperature) 367 | ).start() 368 | return g 369 | 370 | 371 | @app.post("/openai/stream") 372 | async def stream(item: GPTQuery, token: str = Depends(get_token)): 373 | return StreamingResponse( 374 | chat(item.prompt, item.system_intel, item.temperature), 375 | media_type="text/event-stream", 376 | ) 377 | 378 | 379 | @app.post("/collections/stream") 380 | async def stream(item: Collection, token: str = Depends(get_token)): 381 | qdrant = Qdrant( 382 | client, item.collection, embedding_function=OpenAIEmbeddings().embed_query 383 | ) 384 | retriever = qdrant.as_retriever(search_type="similarity") 385 | query = item.prompt 386 | relevant_docs = retriever.get_relevant_documents(query) 387 | # docs = qdrant.similarity_search(item.prompt) 388 | template = prompts.documentSearch(item.prompt, relevant_docs) 389 | return StreamingResponse( 390 | chat(item.prompt, template, item.temperature), media_type="text/event-stream" 391 | ) 392 | -------------------------------------------------------------------------------- /fastapi/gunicorn_conf.py: -------------------------------------------------------------------------------- 1 | import json 2 | import multiprocessing 3 | import os 4 | 5 | workers_per_core_str = os.getenv("WORKERS_PER_CORE", "1") 6 | max_workers_str = os.getenv("MAX_WORKERS") 7 | use_max_workers = None 8 | if max_workers_str: 9 | use_max_workers = int(max_workers_str) 10 | web_concurrency_str = os.getenv("WEB_CONCURRENCY", None) 11 | 12 | host = os.getenv("HOST", "0.0.0.0") 13 | port = os.getenv("PORT", "5000") 14 | bind_env = os.getenv("BIND", None) 15 | use_loglevel = os.getenv("LOG_LEVEL", "info") 16 | if bind_env: 17 | use_bind = bind_env 18 | else: 19 | use_bind = f"{host}:{port}" 20 | 21 | cores = multiprocessing.cpu_count() 22 | workers_per_core = float(workers_per_core_str) 23 | default_web_concurrency = workers_per_core * cores 24 | if web_concurrency_str: 25 | web_concurrency = int(web_concurrency_str) 26 | assert web_concurrency > 0 27 | else: 28 | web_concurrency = max(int(default_web_concurrency), 2) 29 | if use_max_workers: 30 | web_concurrency = min(web_concurrency, use_max_workers) 31 | accesslog_var = os.getenv("ACCESS_LOG", "-") 32 | use_accesslog = accesslog_var or None 33 | errorlog_var = os.getenv("ERROR_LOG", "-") 34 | use_errorlog = errorlog_var or None 35 | graceful_timeout_str = os.getenv("GRACEFUL_TIMEOUT", "120") 36 | timeout_str = os.getenv("TIMEOUT", "120") 37 | keepalive_str = os.getenv("KEEP_ALIVE", "5") 38 | 39 | # Gunicorn config variables 40 | loglevel = use_loglevel 41 | # workers = web_concurrency 42 | # threads = web_concurrency 43 | workers = 4 44 | threads = 4 45 | worker_connections = 1000 46 | bind = use_bind 47 | errorlog = use_errorlog 48 | worker_tmp_dir = "/dev/shm" 49 | accesslog = use_accesslog 50 | graceful_timeout = int(graceful_timeout_str) 51 | timeout = int(timeout_str) 52 | keepalive = int(keepalive_str) 53 | 54 | # For debugging and testing 55 | log_data = { 56 | "loglevel": loglevel, 57 | "workers": workers, 58 | "threads": threads, 59 | "worker_connections": worker_connections, 60 | "bind": bind, 61 | "graceful_timeout": graceful_timeout, 62 | "timeout": timeout, 63 | "keepalive": keepalive, 64 | "errorlog": errorlog, 65 | "accesslog": accesslog, 66 | # Additional, non-gunicorn variables 67 | "workers_per_core": workers_per_core, 68 | "use_max_workers": use_max_workers, 69 | "host": host, 70 | "port": port, 71 | } 72 | print(json.dumps(log_data)) 73 | -------------------------------------------------------------------------------- /fastapi/requirements.txt: -------------------------------------------------------------------------------- 1 | uvicorn[standard]==0.20.0 2 | gunicorn==20.1.0 3 | fastapi>=0.89.1 4 | langchain==0.0.153 5 | openai>=0.27.1 6 | qdrant_client>=0.1.0 7 | python-multipart>=0.0.6 8 | pypdf>=1.26.0 9 | pyodc>=0.0.1 10 | python-slugify>=5.0.2 11 | pandas>=1.3.3 12 | python-magic>=0.4.24 13 | bs4>=0.0.1 14 | transformers>=4.11.3 15 | openpyxl>=3.0.9 16 | aiofiles>=0.7.0 17 | asyncio>=3.4.3 18 | unstructured>=0.0.1 19 | aiohttp>=3.7.4.post0 20 | pysrt>=1.1.2 21 | sseclient>=0.0.27 22 | pycryptodome>=3.15.0 23 | apify-client>=0.5.20 24 | tiktoken>=0.0.1 25 | nltk>=3.6.5 26 | spacy>=3.1.4 -------------------------------------------------------------------------------- /fastapi/start-reload.sh: -------------------------------------------------------------------------------- 1 | 2 | set -e 3 | 4 | if [ -f /app/app/main.py ]; then 5 | DEFAULT_MODULE_NAME=app.main 6 | elif [ -f /app/main.py ]; then 7 | DEFAULT_MODULE_NAME=main 8 | fi 9 | MODULE_NAME=${MODULE_NAME:-$DEFAULT_MODULE_NAME} 10 | VARIABLE_NAME=${VARIABLE_NAME:-app} 11 | export APP_MODULE=${APP_MODULE:-"$MODULE_NAME:$VARIABLE_NAME"} 12 | 13 | HOST=${HOST:-0.0.0.0} 14 | PORT=${PORT:-5000} 15 | LOG_LEVEL=${LOG_LEVEL:-info} 16 | 17 | exec uvicorn --reload --host $HOST --port $PORT --log-level $LOG_LEVEL "$APP_MODULE" 18 | -------------------------------------------------------------------------------- /fastapi/start.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env sh 2 | set -e 3 | 4 | if [ -f /app/app/main.py ]; then 5 | DEFAULT_MODULE_NAME=app.main 6 | elif [ -f /app/main.py ]; then 7 | DEFAULT_MODULE_NAME=main 8 | fi 9 | MODULE_NAME=${MODULE_NAME:-$DEFAULT_MODULE_NAME} 10 | VARIABLE_NAME=${VARIABLE_NAME:-app} 11 | export APP_MODULE=${APP_MODULE:-"$MODULE_NAME:$VARIABLE_NAME"} 12 | 13 | if [ -f /app/gunicorn_conf.py ]; then 14 | DEFAULT_GUNICORN_CONF=/app/gunicorn_conf.py 15 | elif [ -f /app/app/gunicorn_conf.py ]; then 16 | DEFAULT_GUNICORN_CONF=/app/app/gunicorn_conf.py 17 | else 18 | DEFAULT_GUNICORN_CONF=/gunicorn_conf.py 19 | fi 20 | export GUNICORN_CONF=${GUNICORN_CONF:-$DEFAULT_GUNICORN_CONF} 21 | export WORKER_CLASS=${WORKER_CLASS:-"uvicorn.workers.UvicornWorker"} 22 | 23 | # Start Gunicorn 24 | exec gunicorn -k "$WORKER_CLASS" --timeout 360 -c "$GUNICORN_CONF" "$APP_MODULE" 25 | 26 | # exec gunicorn -k gevent --timeout 120 --workers=3 --threads=3 --worker-connections=1000 "$APP_MODULE" --log-level debug 27 | -------------------------------------------------------------------------------- /fastapi/submodules/__pycache__/prompts.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dagthomas/LangchainComposeChatYourDocs/b13915a6e395b006ad67668c822ddef71b570b16/fastapi/submodules/__pycache__/prompts.cpython-310.pyc -------------------------------------------------------------------------------- /fastapi/submodules/prompts.py: -------------------------------------------------------------------------------- 1 | def documentSearch(prompt, docs): 2 | template = f"""You are given the following extracted parts of a long document and a question, create a final answer with references. 3 | If you don't know the answer, just say that you don't know. Don't try to make up an answer. 4 | If you know the answer ALWAYS return the sources in the answer. 5 | Identify the language and reply in the indetified language. Do not output the identified language. 6 | ========= 7 | QUESTION: {prompt} 8 | ========= 9 | CONTENT: {docs} 10 | ========= 11 | FINAL ANSWER:""" 12 | return template 13 | -------------------------------------------------------------------------------- /qdrant/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM qdrant/qdrant -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # LangchainComposeChatYourDocs 2 | A fastapi, qdrant, langchain, sveltekit starter project with some cool features. 3 | 4 | ## Project Description 5 | Purpose is to learn, and help others along the way. 6 | 7 | ## Installation and Setup 8 | Change /fastapi/app/.env.example to .env and fill out the apikey for OpenAI. 9 | Have Docker Desktop installed 10 | Run: "Docker compose up" 11 | Goto: http://localhost:3000 12 | 13 | ## Usage 14 | From the startpage, upload a file, webpage or webpages 15 | 16 | https://www.youtube.com/watch?v=tnB5jOdsPqM 17 | -------------------------------------------------------------------------------- /sveltekit/.env: -------------------------------------------------------------------------------- 1 | BODY_SIZE_LIMIT = 0 2 | VITE_BASE_URL=http://localhost:5000 3 | VITE_BASE_FASTAPI_URL=http://fastapi:5000 4 | VITE_BEARER_TOKEN=5pBHDjr4bkNFc1xdqIMR6INLItKuPvZrf8zNdc6enlXqhy8qVO8YCYKRcdd -------------------------------------------------------------------------------- /sveltekit/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /build 4 | /.svelte-kit 5 | /package 6 | !.env.example 7 | vite.config.js.timestamp-* 8 | vite.config.ts.timestamp-* 9 | -------------------------------------------------------------------------------- /sveltekit/.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true 2 | -------------------------------------------------------------------------------- /sveltekit/.prettierignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | /build 4 | /.svelte-kit 5 | /package 6 | .env 7 | .env.* 8 | !.env.example 9 | 10 | # Ignore files for PNPM, NPM and YARN 11 | pnpm-lock.yaml 12 | package-lock.json 13 | yarn.lock 14 | -------------------------------------------------------------------------------- /sveltekit/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "useTabs": true, 3 | "singleQuote": true, 4 | "trailingComma": "none", 5 | "printWidth": 100, 6 | "plugins": ["prettier-plugin-svelte"], 7 | "pluginSearchDirs": ["."], 8 | "overrides": [{ "files": "*.svelte", "options": { "parser": "svelte" } }] 9 | } 10 | -------------------------------------------------------------------------------- /sveltekit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:16-bullseye-slim AS BUILDER 2 | LABEL dockerfile.baseimage="node:lts-bullseye-slim" dockerfile.description="LearningLibrary Build Container" dockerfile.stage="BUILDER" 3 | ENV NODE_ENV development 4 | ENV BODY_SIZE_LIMIT=0 5 | ENV ORIGIN=http://localhost:3000 6 | WORKDIR /usr/src/app 7 | COPY package.json tsconfig.json ./ 8 | RUN npm i 9 | COPY . /usr/src/app 10 | RUN npm run build && npm prune --omit=dev 11 | 12 | FROM gcr.io/distroless/nodejs:16 13 | 14 | LABEL dockerfile.baseimage="gcr.io/distroless/nodejs:16" dockerfile.description="LearningLibrary Production Container" dockerfile.stage="PRODUCTION" 15 | ENV NODE_ENV production 16 | ENV BODY_SIZE_LIMIT=0 17 | ENV ORIGIN=http://localhost:3000 18 | WORKDIR /usr/src/app 19 | COPY --from=BUILDER /usr/src/app/build ./build 20 | COPY --from=BUILDER /usr/src/app/node_modules ./node_modules 21 | COPY package.json . 22 | 23 | EXPOSE 3000:3000 24 | CMD ["build"] -------------------------------------------------------------------------------- /sveltekit/README.md: -------------------------------------------------------------------------------- 1 | # create-svelte 2 | 3 | Everything you need to build a Svelte project, powered by [`create-svelte`](https://github.com/sveltejs/kit/tree/master/packages/create-svelte). 4 | 5 | ## Creating a project 6 | 7 | If you're seeing this, you've probably already done this step. Congrats! 8 | 9 | ```bash 10 | # create a new project in the current directory 11 | npm create svelte@latest 12 | 13 | # create a new project in my-app 14 | npm create svelte@latest my-app 15 | ``` 16 | 17 | ## Developing 18 | 19 | Once you've created a project and installed dependencies with `npm install` (or `pnpm install` or `yarn`), start a development server: 20 | 21 | ```bash 22 | npm run dev 23 | 24 | # or start the server and open the app in a new browser tab 25 | npm run dev -- --open 26 | ``` 27 | 28 | ## Building 29 | 30 | To create a production version of your app: 31 | 32 | ```bash 33 | npm run build 34 | ``` 35 | 36 | You can preview the production build with `npm run preview`. 37 | 38 | > To deploy your app, you may need to install an [adapter](https://kit.svelte.dev/docs/adapters) for your target environment. 39 | -------------------------------------------------------------------------------- /sveltekit/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "sveltekit", 3 | "version": "0.0.1", 4 | "private": true, 5 | "scripts": { 6 | "dev": "vite dev", 7 | "build": "BODY_SIZE_LIMIT=20000000 vite build", 8 | "preview": "vite preview", 9 | "test": "playwright test", 10 | "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", 11 | "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", 12 | "test:unit": "vitest", 13 | "lint": "prettier --plugin-search-dir . --check .", 14 | "format": "prettier --plugin-search-dir . --write ." 15 | }, 16 | "devDependencies": { 17 | "@playwright/test": "^1.28.1", 18 | "@sveltejs/adapter-node": "^1.2.4", 19 | "@sveltejs/kit": "^1.15.10", 20 | "autoprefixer": "^10.4.13", 21 | "postcss": "^8.4.21", 22 | "prettier": "^2.8.0", 23 | "prettier-plugin-svelte": "^2.8.1", 24 | "svelte": "^3.58.0", 25 | "svelte-check": "^3.0.1", 26 | "tailwindcss": "^3.3.1", 27 | "tslib": "^2.4.1", 28 | "typescript": "^4.9.3", 29 | "vite": "^4.3.4", 30 | "vitest": "^0.25.3" 31 | }, 32 | "type": "module", 33 | "dependencies": { 34 | "@auth/core": "^0.5.1", 35 | "@auth/sveltekit": "^0.3.0", 36 | "@fontsource/poppins": "^4.5.10", 37 | "daisyui": "^2.51.5", 38 | "dotenv": "^16.0.3", 39 | "openai": "^3.2.1" 40 | } 41 | } -------------------------------------------------------------------------------- /sveltekit/playwright.config.ts: -------------------------------------------------------------------------------- 1 | import type { PlaywrightTestConfig } from '@playwright/test'; 2 | 3 | const config: PlaywrightTestConfig = { 4 | webServer: { 5 | command: 'npm run build && npm run preview', 6 | port: 4173 7 | }, 8 | testDir: 'tests' 9 | }; 10 | 11 | export default config; 12 | -------------------------------------------------------------------------------- /sveltekit/postcss.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /sveltekit/src/app.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | * { 5 | scrollbar-color: rgb(195,60,102) #061E30; 6 | } 7 | 8 | *::-webkit-scrollbar { 9 | width: 5px; 10 | } 11 | /* Track */ 12 | 13 | *::-webkit-scrollbar-track { 14 | background: #061E30 15 | } 16 | 17 | *::-webkit-scrollbar-thumb { 18 | background: rgb(195,60,102); 19 | } -------------------------------------------------------------------------------- /sveltekit/src/app.d.ts: -------------------------------------------------------------------------------- 1 | // See https://kit.svelte.dev/docs/types#app 2 | // for information about these interfaces 3 | declare global { 4 | namespace App { 5 | // interface Error {} 6 | // interface Locals {} 7 | // interface PageData {} 8 | // interface Platform {} 9 | } 10 | } 11 | 12 | export {}; 13 | -------------------------------------------------------------------------------- /sveltekit/src/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 7 | %sveltekit.head% 8 | 9 | 10 |{code}
79 | .PDF, .CSV, .EPUB, .PPTX, .DOCX, .XLSX, .SRT
82 | 100 |Enter URL to index body text
105 | 128 |Enter URLs to index body text
133 | 170 |