├── tests └── __init__.py ├── pautobot ├── __init__.py ├── routers │ ├── __init__.py │ ├── bot.py │ ├── contexts.py │ └── documents.py ├── engine │ ├── __init__.py │ ├── bot_enums.py │ ├── llm_factory.py │ ├── qa_factory.py │ ├── chatbot_factory.py │ ├── context_manager.py │ ├── bot_context.py │ ├── ingest.py │ └── engine.py ├── models.py ├── app_info.py ├── config.py ├── database.py ├── globals.py ├── db_models.py ├── app.py └── utils.py ├── MANIFEST.in ├── frontend ├── .prettierignore ├── styles │ └── globals.css ├── public │ ├── favicon.ico │ ├── pautobot.png │ └── loading.svg ├── jsconfig.json ├── postcss.config.js ├── README.md ├── tailwind.config.js ├── components │ ├── RightSidebar.js │ ├── icons │ │ ├── UploadIcon.js │ │ └── LoadingIcon.js │ ├── ModelSelector.js │ ├── Sidebar.js │ ├── ContextManager.js │ ├── SidebarBottomMenu.js │ ├── SidebarMenu.js │ ├── SidebarTopMenu.js │ ├── NewMessage.js │ ├── Main.js │ └── QADBManager.js ├── next.config.js ├── lib │ └── requests │ │ ├── history.js │ │ ├── bot.js │ │ └── documents.js ├── pages │ ├── _app.js │ └── index.js └── package.json ├── docs ├── pautobot.png ├── screenshot.png └── python3.11.3_lite.zip ├── pyproject.toml ├── requirements.txt ├── .pre-commit-config.yaml ├── .gitignore ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pautobot/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pautobot/routers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pautobot/frontend-dist * -------------------------------------------------------------------------------- /frontend/.prettierignore: -------------------------------------------------------------------------------- 1 | .next 2 | dist 3 | node_modules -------------------------------------------------------------------------------- /pautobot/engine/__init__.py: -------------------------------------------------------------------------------- 1 | from pautobot.engine.engine import * 2 | -------------------------------------------------------------------------------- /docs/pautobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/docs/pautobot.png -------------------------------------------------------------------------------- /docs/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/docs/screenshot.png -------------------------------------------------------------------------------- /frontend/styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /docs/python3.11.3_lite.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/docs/python3.11.3_lite.zip -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/pautobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/frontend/public/pautobot.png -------------------------------------------------------------------------------- /frontend/jsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "paths": { 4 | "@/*": ["./*"] 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /pautobot/models.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | 4 | class Query(BaseModel): 5 | mode: str 6 | query: str 7 | -------------------------------------------------------------------------------- /frontend/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /pautobot/app_info.py: -------------------------------------------------------------------------------- 1 | __appname__ = "PautoBot" 2 | __description__ = ( 3 | "Private AutoGPT Robot - Your private task assistant with GPT!" 4 | ) 5 | __version__ = "0.0.27" 6 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 |

2 | PAutoBot 3 |

🔥 PⒶutoBot 🔥

4 |

Your private task assistant with GPT

5 |

6 | -------------------------------------------------------------------------------- /pautobot/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | 4 | DATA_ROOT = os.path.abspath( 5 | os.path.join(os.path.expanduser("~"), "pautobot-data") 6 | ) 7 | pathlib.Path(DATA_ROOT).mkdir(parents=True, exist_ok=True) 8 | 9 | DATABASE_PATH = os.path.abspath(os.path.join(DATA_ROOT, "pautobot.db")) 10 | -------------------------------------------------------------------------------- /pautobot/engine/bot_enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class BotStatus(str, Enum): 5 | """Bot status.""" 6 | 7 | READY = "READY" 8 | THINKING = "THINKING" 9 | ERROR = "ERROR" 10 | 11 | 12 | class BotMode(str, Enum): 13 | """Bot mode.""" 14 | 15 | QA = "QA" 16 | CHAT = "CHAT" 17 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 79 3 | #include = '\.pyi?$' 4 | exclude = ''' 5 | /( 6 | \.git 7 | | \.hg 8 | | \.tox 9 | | \.venv 10 | | _build 11 | | buck-out 12 | | build 13 | | dist 14 | )/ 15 | ''' 16 | 17 | [build-system] 18 | requires = ["setuptools", "wheel"] 19 | build-backend = "setuptools.build_meta" 20 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.194 2 | gpt4all==0.3.0 3 | chromadb==0.3.23 4 | urllib3==2.0.2 5 | pdfminer.six==20221105 6 | unstructured==0.6.6 7 | extract-msg==0.41.1 8 | tabulate==0.9.0 9 | pandoc==2.3 10 | pypandoc==1.11 11 | tqdm==4.65.0 12 | python-multipart==0.0.6 13 | fastapi==0.96.0 14 | SQLAlchemy==2.0.15 15 | alembic==1.11.1 16 | sentence_transformers==2.2.2 17 | requests 18 | -------------------------------------------------------------------------------- /frontend/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: [ 4 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 5 | "./pages/**/*.{js,ts,jsx,tsx,mdx}", 6 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 7 | ], 8 | theme: { 9 | extend: { 10 | rotate: { 11 | logo: "-10deg", 12 | }, 13 | }, 14 | }, 15 | plugins: [], 16 | }; 17 | -------------------------------------------------------------------------------- /frontend/components/RightSidebar.js: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React from "react"; 3 | 4 | import ModelSelector from "./ModelSelector"; 5 | import QADBManager from "./QADBManager"; 6 | import ContextManager from "./ContextManager"; 7 | 8 | export default function SidebarTools() { 9 | return ( 10 | <> 11 | 12 | 13 | 14 | 15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /frontend/next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | let nextConfig = { 3 | distDir: "dist", 4 | }; 5 | 6 | if (process.env.NODE_ENV === "development") { 7 | nextConfig.rewrites = async () => { 8 | return [ 9 | { 10 | source: "/api/:path*", 11 | destination: "http://127.0.0.1:5678/api/:path*", 12 | }, 13 | ]; 14 | }; 15 | } else { 16 | nextConfig.output = "export"; 17 | } 18 | 19 | module.exports = nextConfig; 20 | -------------------------------------------------------------------------------- /pautobot/database.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy import create_engine 2 | from sqlalchemy.ext.declarative import declarative_base 3 | from sqlalchemy.orm import sessionmaker 4 | 5 | from pautobot.config import DATABASE_PATH 6 | 7 | DATABASE_URL = "sqlite:///{}".format(DATABASE_PATH) 8 | engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False}) 9 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) 10 | session = SessionLocal() 11 | 12 | Base = declarative_base() 13 | -------------------------------------------------------------------------------- /pautobot/globals.py: -------------------------------------------------------------------------------- 1 | from pautobot.engine import PautoBotEngine 2 | from pautobot.engine.bot_enums import BotMode 3 | from pautobot.engine.context_manager import ContextManager 4 | 5 | engine = None 6 | context_manager = None 7 | 8 | 9 | def init(): 10 | """Initialize the global engine.""" 11 | global context_manager 12 | global engine 13 | 14 | context_manager = ContextManager() 15 | context_manager.load_contexts() 16 | 17 | engine = PautoBotEngine(mode=BotMode.QA, context_manager=context_manager) 18 | -------------------------------------------------------------------------------- /frontend/lib/requests/history.js: -------------------------------------------------------------------------------- 1 | export const getChatHistory = (contextId) => { 2 | const response = fetch(`/api/${contextId}/chat_history`, { 3 | method: "GET", 4 | headers: { 5 | "Content-Type": "application/json", 6 | }, 7 | }); 8 | return response; 9 | }; 10 | 11 | export const clearChatHistory = (contextId) => { 12 | const response = fetch(`/api/${contextId}/chat_history`, { 13 | method: "DELETE", 14 | headers: { 15 | "Content-Type": "application/json", 16 | }, 17 | }); 18 | return response; 19 | }; 20 | -------------------------------------------------------------------------------- /frontend/components/icons/UploadIcon.js: -------------------------------------------------------------------------------- 1 | export default function Icon() { 2 | return ( 3 | 18 | ); 19 | } 20 | -------------------------------------------------------------------------------- /frontend/pages/_app.js: -------------------------------------------------------------------------------- 1 | import "@/styles/globals.css"; 2 | import "react-toastify/dist/ReactToastify.css"; 3 | 4 | import { Bai_Jamjuree } from "next/font/google"; 5 | import Head from "next/head"; 6 | 7 | const bai_jam = Bai_Jamjuree({ 8 | subsets: ["latin", "vietnamese"], 9 | weight: ["200", "300", "400", "500", "600", "700"], 10 | }); 11 | 12 | export default function RootLayout({ Component, pageProps }) { 13 | return ( 14 | <> 15 | 16 | PAutoBot - Your Private GPT Assistant 17 | 18 |
19 | 20 |
21 | 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pauto-frontend", 3 | "version": "0.0.27", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "export": "next export", 9 | "start": "next start", 10 | "lint": "next lint", 11 | "pr": "prettier --write ." 12 | }, 13 | "dependencies": { 14 | "next": "13.4.3", 15 | "react": "18.2.0", 16 | "react-dom": "18.2.0", 17 | "react-toastify": "^9.1.3" 18 | }, 19 | "devDependencies": { 20 | "autoprefixer": "^10.4.14", 21 | "postcss": "^8.4.23", 22 | "tailwindcss": "^3.3.2", 23 | "prettier": "^2.8.8", 24 | "eslint": "8.41.0", 25 | "eslint-config-next": "13.4.3" 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /frontend/pages/index.js: -------------------------------------------------------------------------------- 1 | import { ToastContainer } from "react-toastify"; 2 | 3 | import Sidebar from "@/components/Sidebar"; 4 | import Main from "@/components/Main"; 5 | import SidebarTools from "@/components/RightSidebar"; 6 | 7 | export default function Home() { 8 | return ( 9 |
10 |
11 | 12 |
13 |
14 |
15 | 16 |
17 |
18 | 19 |
20 |
21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /pautobot/engine/llm_factory.py: -------------------------------------------------------------------------------- 1 | from langchain.llms import GPT4All 2 | 3 | from pautobot.utils import download_model 4 | 5 | 6 | class LLMFactory: 7 | """Factory for instantiating LLMs.""" 8 | 9 | @staticmethod 10 | def create_llm( 11 | model_type, model_path, model_n_ctx, streaming=False, verbose=False 12 | ): 13 | # Download the model 14 | download_model(model_type, model_path) 15 | 16 | # Prepare the LLM 17 | if model_type == "GPT4All": 18 | return GPT4All( 19 | model=model_path, 20 | n_ctx=model_n_ctx, 21 | backend="gptj", 22 | streaming=streaming, 23 | verbose=verbose, 24 | ) 25 | else: 26 | raise ValueError(f"Invalid model type: {model_type}") 27 | -------------------------------------------------------------------------------- /frontend/components/ModelSelector.js: -------------------------------------------------------------------------------- 1 | export default function ModelSelector() { 2 | return ( 3 | <> 4 |
Model
5 |
6 | 12 |
13 |
14 |
15 | Source:{" "} 16 | 21 | gpt4all.io 22 | 23 |
24 |
25 | License: Apache 2.0 26 |
27 |
28 | 29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /frontend/components/Sidebar.js: -------------------------------------------------------------------------------- 1 | import SidebarBottomMenu from "./SidebarBottomMenu"; 2 | import SidebarTopMenu from "./SidebarTopMenu"; 3 | 4 | export default function Sidebar() { 5 | return ( 6 | <> 7 |
8 |
9 |
window.open("https://pautobot.com/", "_blank")} 14 | > 15 | PAutoBot 20 |

21 | PAuto 22 |

23 |
24 |
25 |
26 | 27 |
28 |
29 | 30 |
31 |
32 |
33 |
34 | 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /pautobot/engine/qa_factory.py: -------------------------------------------------------------------------------- 1 | from chromadb.config import Settings 2 | from langchain.chains import RetrievalQA 3 | from langchain.embeddings import HuggingFaceEmbeddings 4 | from langchain.vectorstores import Chroma 5 | 6 | from pautobot.engine.bot_context import BotContext 7 | 8 | 9 | class QAFactory: 10 | """Factory for instantiating QAs.""" 11 | 12 | @staticmethod 13 | def create_qa( 14 | context: BotContext, 15 | llm, 16 | ): 17 | chroma_settings = Settings( 18 | chroma_db_impl="duckdb+parquet", 19 | persist_directory=context.search_db_directory, 20 | anonymized_telemetry=False, 21 | ) 22 | embeddings = HuggingFaceEmbeddings( 23 | model_name=context.embeddings_model_name 24 | ) 25 | database = Chroma( 26 | persist_directory=context.search_db_directory, 27 | embedding_function=embeddings, 28 | client_settings=chroma_settings, 29 | ) 30 | retriever = database.as_retriever(search_kwargs={"k": 4}) 31 | qa_instance = RetrievalQA.from_chain_type( 32 | llm=llm, 33 | chain_type="stuff", 34 | retriever=retriever, 35 | return_source_documents=True, 36 | ) 37 | return qa_instance 38 | -------------------------------------------------------------------------------- /frontend/components/ContextManager.js: -------------------------------------------------------------------------------- 1 | import { toast } from "react-toastify"; 2 | 3 | import { clearChatHistory } from "@/lib/requests/history"; 4 | import { ingestData } from "@/lib/requests/documents"; 5 | 6 | export default function ModelSelector() { 7 | return ( 8 | <> 9 |
This Context
10 |
11 | 24 | 35 |
36 | 37 | ); 38 | } 39 | -------------------------------------------------------------------------------- /frontend/components/icons/LoadingIcon.js: -------------------------------------------------------------------------------- 1 | export default function () { 2 | return ( 3 | 20 | ); 21 | } 22 | -------------------------------------------------------------------------------- /pautobot/db_models.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from sqlalchemy import Column, DateTime, ForeignKey, Integer, String 4 | from sqlalchemy.orm import relationship 5 | 6 | from pautobot.database import Base, engine 7 | 8 | 9 | class BotContext(Base): 10 | __tablename__ = "contexts" 11 | 12 | id = Column(Integer, primary_key=True, index=True) 13 | name = Column(String, index=True) 14 | created_at = Column(DateTime, default=datetime.datetime.utcnow) 15 | documents = relationship("Document", back_populates="bot_context") 16 | chat_chunks = relationship("ChatChunk", back_populates="bot_context") 17 | 18 | 19 | class Document(Base): 20 | __tablename__ = "documents" 21 | 22 | id = Column(Integer, primary_key=True, index=True) 23 | name = Column(String) 24 | storage_name = Column(String) 25 | created_at = Column(DateTime, default=datetime.datetime.utcnow) 26 | bot_context_id = Column(Integer, ForeignKey("contexts.id")) 27 | bot_context = relationship("BotContext", back_populates="documents") 28 | 29 | 30 | class ChatChunk(Base): 31 | __tablename__ = "chat_chunks" 32 | 33 | id = Column(Integer, primary_key=True, index=True) 34 | created_at = Column(DateTime, default=datetime.datetime.utcnow) 35 | text = Column(String) 36 | bot_context_id = Column(Integer, ForeignKey("contexts.id")) 37 | bot_context = relationship("BotContext", back_populates="chat_chunks") 38 | 39 | 40 | Base.metadata.create_all(engine) 41 | -------------------------------------------------------------------------------- /frontend/lib/requests/bot.js: -------------------------------------------------------------------------------- 1 | export const getBotInfo = () => { 2 | return fetch("/api/bot_info", { 3 | method: "GET", 4 | headers: { 5 | "Content-Type": "application/json", 6 | }, 7 | }).then(async (response) => { 8 | let data = await response.json(); 9 | if (!response.ok) { 10 | const error = (data && data.message) || response.status; 11 | return Promise.reject(error); 12 | } 13 | return Promise.resolve(data); 14 | }); 15 | }; 16 | 17 | export const ask = (contextId, mode, message) => { 18 | return fetch(`/api/${contextId}/ask`, { 19 | method: "POST", 20 | headers: { 21 | "Content-Type": "application/json", 22 | }, 23 | body: JSON.stringify({ mode: mode, query: message }), 24 | }).then(async (response) => { 25 | let data = await response.json(); 26 | if (!response.ok) { 27 | const error = (data && data.message) || response.status; 28 | return Promise.reject(error); 29 | } 30 | return Promise.resolve(data); 31 | }); 32 | }; 33 | 34 | export const queryBotResponse = (contextId) => { 35 | return fetch(`/api/${contextId}/get_answer`, { 36 | method: "GET", 37 | headers: { 38 | "Content-Type": "application/json", 39 | }, 40 | }).then(async (response) => { 41 | let data = await response.json(); 42 | if (!response.ok) { 43 | const error = (data && data.message) || response.status; 44 | return Promise.reject(error); 45 | } 46 | return Promise.resolve(data); 47 | }); 48 | }; 49 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | files: ^(.*\.(py|json|md|sh|yaml|cfg|txt))$ 3 | exclude: ^(\.[^/]*cache/.*|.*/_user.py|source_documents/)$ 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.4.0 7 | hooks: 8 | - id: check-yaml 9 | args: [--unsafe] 10 | - id: end-of-file-fixer 11 | - id: trailing-whitespace 12 | exclude-files: \.md$ 13 | - id: check-json 14 | - id: mixed-line-ending 15 | - id: check-merge-conflict 16 | - id: check-docstring-first 17 | - id: fix-byte-order-marker 18 | - id: check-case-conflict 19 | - repo: https://github.com/adrienverge/yamllint.git 20 | rev: v1.29.0 21 | hooks: 22 | - id: yamllint 23 | args: 24 | - --no-warnings 25 | - -d 26 | - '{extends: relaxed, rules: {line-length: {max: 90}}}' 27 | - repo: https://github.com/myint/autoflake 28 | rev: v1.4 29 | hooks: 30 | - id: autoflake 31 | exclude: .*/__init__.py 32 | args: 33 | - --in-place 34 | - --remove-all-unused-imports 35 | - --expand-star-imports 36 | - --remove-duplicate-keys 37 | - --remove-unused-variables 38 | - repo: https://github.com/pre-commit/mirrors-isort 39 | rev: v5.4.2 40 | hooks: 41 | - id: isort 42 | args: ["--profile", "black"] 43 | - repo: https://github.com/pre-commit/pre-commit-hooks 44 | rev: v3.3.0 45 | hooks: 46 | - id: trailing-whitespace 47 | - id: end-of-file-fixer 48 | -------------------------------------------------------------------------------- /pautobot/routers/bot.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import traceback 3 | 4 | from fastapi import APIRouter, BackgroundTasks, status 5 | from fastapi.responses import JSONResponse 6 | 7 | from pautobot import globals 8 | from pautobot.engine.bot_enums import BotStatus 9 | from pautobot.models import Query 10 | 11 | router = APIRouter( 12 | prefix="/api", 13 | tags=["Ask Bot"], 14 | ) 15 | 16 | 17 | @router.get("/bot_info") 18 | async def get_bot_info(): 19 | return globals.engine.get_bot_info() 20 | 21 | 22 | @router.post("/{context_id}/ask") 23 | async def ask( 24 | context_id: int, query: Query, background_tasks: BackgroundTasks 25 | ): 26 | try: 27 | globals.engine.check_query( 28 | query.mode, query.query, context_id=context_id 29 | ) 30 | except ValueError as e: 31 | logging.error(traceback.format_exc()) 32 | return JSONResponse( 33 | status_code=status.HTTP_400_BAD_REQUEST, 34 | content={"message": str(e)}, 35 | ) 36 | if globals.engine.context.current_answer["status"] == BotStatus.THINKING: 37 | return JSONResponse( 38 | status_code=status.HTTP_400_BAD_REQUEST, 39 | content={"message": "Bot is already thinking"}, 40 | ) 41 | globals.engine.context.current_answer = { 42 | "answer": "", 43 | "docs": [], 44 | } 45 | background_tasks.add_task(globals.engine.query, query.mode, query.query) 46 | return {"message": "Query received"} 47 | 48 | 49 | @router.get("/{context_id}/get_answer") 50 | async def get_answer(context_id: int): 51 | return globals.engine.get_answer(context_id=context_id) 52 | -------------------------------------------------------------------------------- /pautobot/engine/chatbot_factory.py: -------------------------------------------------------------------------------- 1 | from langchain import LLMChain, PromptTemplate 2 | from langchain.memory import ConversationBufferWindowMemory 3 | 4 | 5 | class ChatbotFactory: 6 | """Factory for instantiating chatbots.""" 7 | 8 | @staticmethod 9 | def create_chatbot( 10 | llm, 11 | ): 12 | template = """Assistant is a large language model train by human. 13 | 14 | Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand. 15 | 16 | Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics. 17 | 18 | Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist. 19 | 20 | {history} 21 | Human: {human_input} 22 | Assistant:""" 23 | 24 | prompt = PromptTemplate( 25 | input_variables=["history", "human_input"], template=template 26 | ) 27 | chatbot_instance = LLMChain( 28 | llm=llm, 29 | prompt=prompt, 30 | verbose=True, 31 | memory=ConversationBufferWindowMemory(k=2), 32 | ) 33 | return chatbot_instance 34 | -------------------------------------------------------------------------------- /frontend/lib/requests/documents.js: -------------------------------------------------------------------------------- 1 | export const ingestData = (contextId) => { 2 | return fetch(`/api/${contextId}/documents/ingest`, { 3 | method: "POST", 4 | headers: { 5 | "Content-Type": "application/json", 6 | }, 7 | }).then(async (response) => { 8 | let data = await response.json(); 9 | if (!response.ok) { 10 | const error = (data && data.message) || response.status; 11 | return Promise.reject(error); 12 | } 13 | return Promise.resolve(data); 14 | }); 15 | }; 16 | 17 | export const uploadDocument = (contextId, file) => { 18 | const formData = new FormData(); 19 | formData.append("file", file); 20 | return fetch(`/api/${contextId}/documents`, { 21 | method: "POST", 22 | body: formData, 23 | }).then(async (response) => { 24 | let data = await response.json(); 25 | if (!response.ok) { 26 | const error = (data && data.message) || response.status; 27 | console.log(error); 28 | return Promise.reject(error); 29 | } 30 | return Promise.resolve(data); 31 | }); 32 | }; 33 | 34 | export const openDocument = (contextId, documentId) => { 35 | return fetch( 36 | `/api/${contextId}/documents/${documentId}/open_in_file_explorer`, 37 | { 38 | method: "POST", 39 | } 40 | ); 41 | }; 42 | 43 | export const getDocuments = (contextId) => { 44 | return fetch(`/api/${contextId}/documents`, { 45 | method: "GET", 46 | headers: { 47 | "Content-Type": "application/json", 48 | }, 49 | }).then(async (response) => { 50 | let data = await response.json(); 51 | if (!response.ok) { 52 | const error = (data && data.message) || response.status; 53 | return Promise.reject(error); 54 | } 55 | return Promise.resolve(data); 56 | }); 57 | }; 58 | 59 | export const deleteDocument = (contextId, documentId) => { 60 | return fetch(`/api/${contextId}/documents/${documentId}`, { 61 | method: "DELETE", 62 | }).then(async (response) => { 63 | let data = await response.json(); 64 | if (!response.ok) { 65 | const error = (data && data.message) || response.status; 66 | return Promise.reject(error); 67 | } 68 | return Promise.resolve(data); 69 | }); 70 | }; 71 | -------------------------------------------------------------------------------- /frontend/components/SidebarBottomMenu.js: -------------------------------------------------------------------------------- 1 | import { toast } from "react-toastify"; 2 | 3 | export default function () { 4 | return ( 5 |
6 |
{ 9 | toast.info("Coming soon!"); 10 | }} 11 | > 12 | 18 | 19 | 20 |
21 | 22 |
23 | 31 | 32 | 33 |
34 |
35 |
36 | ); 37 | } 38 | -------------------------------------------------------------------------------- /frontend/components/SidebarMenu.js: -------------------------------------------------------------------------------- 1 | import { openDocumentsFolder } from "@/utils"; 2 | 3 | export default function SidebarMenu() { 4 | return ( 5 |
6 |
    7 |
  • 8 |
    9 | 15 | 20 | 21 | Query 22 |
    23 |
  • 24 |
  • 25 |
    { 28 | openDocumentsFolder(); 29 | }} 30 | > 31 | 40 | Manage Knowledge DB 41 |
    42 |
  • 43 |
44 |
45 | ); 46 | } 47 | -------------------------------------------------------------------------------- /pautobot/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 4 | 5 | import argparse 6 | import logging 7 | 8 | import uvicorn 9 | from fastapi import FastAPI 10 | from fastapi.middleware.cors import CORSMiddleware 11 | from fastapi.staticfiles import StaticFiles 12 | 13 | from pautobot import db_models as models 14 | from pautobot import globals 15 | from pautobot.app_info import __appname__, __description__, __version__ 16 | from pautobot.config import DATA_ROOT 17 | from pautobot.database import engine 18 | from pautobot.routers import bot, contexts, documents 19 | from pautobot.utils import extract_frontend_dist 20 | 21 | 22 | def main(): 23 | parser = argparse.ArgumentParser( 24 | description=__description__, 25 | ) 26 | parser.add_argument( 27 | "--host", 28 | type=str, 29 | default="127.0.0.1", 30 | help="Host to run the server on", 31 | ) 32 | parser.add_argument( 33 | "--port", 34 | type=int, 35 | default=5678, 36 | help="Port to run the server on", 37 | ) 38 | parser.add_argument( 39 | "--version", 40 | action="store_true", 41 | help="Print version and exit", 42 | ) 43 | args = parser.parse_args() 44 | 45 | if args.version: 46 | print(f"{__appname__} v{__version__}") 47 | return 48 | 49 | logging.info(f"Starting {__appname__}...") 50 | logging.info(f"Version: {__version__}") 51 | 52 | logging.info("Extracting frontend distribution...") 53 | static_folder = os.path.abspath(os.path.join(DATA_ROOT, "frontend-dist")) 54 | extract_frontend_dist(static_folder) 55 | 56 | logging.info("Creating database tables...") 57 | models.Base.metadata.create_all(bind=engine) 58 | 59 | logging.info("Starting FastAPI server...") 60 | globals.init() 61 | 62 | app = FastAPI( 63 | title=__appname__, 64 | description=__description__, 65 | ) 66 | app.add_middleware( 67 | CORSMiddleware, 68 | allow_origins=["*"], 69 | allow_methods=["*"], 70 | allow_headers=["*"], 71 | expose_headers=["*"], 72 | ) 73 | 74 | app.include_router(bot.router) 75 | app.include_router(contexts.router) 76 | app.include_router(documents.router) 77 | app.mount( 78 | "/", StaticFiles(directory=static_folder, html=True), name="static" 79 | ) 80 | 81 | uvicorn.run(app, host=args.host, port=args.port, reload=False, workers=1) 82 | 83 | 84 | if __name__ == "__main__": 85 | main() 86 | -------------------------------------------------------------------------------- /pautobot/routers/contexts.py: -------------------------------------------------------------------------------- 1 | from fastapi import APIRouter 2 | 3 | from pautobot import globals 4 | from pautobot.engine.bot_context import BotContext 5 | 6 | router = APIRouter( 7 | prefix="/api", 8 | tags=["Bot Context"], 9 | ) 10 | 11 | 12 | @router.get("/contexts") 13 | async def get_contexts(): 14 | """ 15 | Get all chat contexts 16 | Each context is a separate chat session 17 | """ 18 | contexts = globals.context_manager.get_contexts() 19 | context_list = [] 20 | for context in contexts: 21 | context_list.append(contexts[context].dict()) 22 | return context_list 23 | 24 | 25 | @router.get("/current_context") 26 | async def get_current_context(): 27 | """ 28 | Get the current chat context 29 | """ 30 | return globals.context_manager.get_current_context().dict() 31 | 32 | 33 | @router.post("/contexts") 34 | async def create_context(): 35 | """ 36 | Create a new chat context 37 | """ 38 | context = BotContext() 39 | globals.context_manager.register(context) 40 | return { 41 | "message": "Context created", 42 | "data": context.dict(), 43 | } 44 | 45 | 46 | @router.delete("/contexts/{context_id}") 47 | async def delete_context(context_id: int): 48 | """ 49 | Delete a chat context 50 | """ 51 | globals.context_manager.delete_context(context_id) 52 | return {"message": "Context deleted"} 53 | 54 | 55 | @router.put("/contexts/{context_id}") 56 | async def rename_context(context_id: int, new_name: str): 57 | """ 58 | Rename a chat context 59 | """ 60 | globals.context_manager.rename_context(context_id, new_name) 61 | return {"message": "Context renamed"} 62 | 63 | 64 | @router.post("/set_context") 65 | async def set_context(context_id: int): 66 | """ 67 | Set the current context 68 | """ 69 | globals.context_manager.set_current_context(context_id) 70 | globals.engine.set_context(globals.context_manager.get_current_context()) 71 | return {"message": "Context set"} 72 | 73 | 74 | @router.get("/{context_id}/chat_history") 75 | async def get_chat_history(context_id: int): 76 | """ 77 | Get the bot's chat history 78 | """ 79 | return globals.context_manager.get_context(context_id).get_chat_history() 80 | 81 | 82 | @router.delete("/{context_id}/chat_history") 83 | async def clear_chat_history(context_id: int): 84 | """ 85 | Clear the bot's chat history 86 | """ 87 | globals.context_manager.get_context(context_id).clear_chat_history() 88 | return {"message": "Chat history cleared"} 89 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | share/python-wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | *.py,cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | cover/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | .pybuilder/ 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | # For a library or package, you might want to ignore these files since the code is 86 | # intended to run in multiple environments; otherwise, check them in: 87 | # .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | 133 | # pytype static type analyzer 134 | .pytype/ 135 | 136 | # Cython debug symbols 137 | cython_debug/ 138 | 139 | # Text Editor 140 | .vscode 141 | .DS_Store 142 | .gitignore 143 | 144 | 145 | node_modules 146 | .next 147 | pautobot/frontend-dist -------------------------------------------------------------------------------- /frontend/public/loading.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /pautobot/engine/context_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shutil 4 | 5 | from pautobot import db_models 6 | from pautobot.config import DATA_ROOT 7 | from pautobot.database import session 8 | from pautobot.engine.bot_context import BotContext 9 | 10 | 11 | class ContextManager: 12 | """ 13 | Context manager. Handle logics related to PautoBot contexts. 14 | """ 15 | 16 | def __init__(self): 17 | self._current_context = None 18 | self._contexts = {} 19 | 20 | def load_contexts(self) -> None: 21 | """ 22 | Load all contexts from the database. 23 | """ 24 | self._contexts = {0: BotContext(id=0, name="Default")} 25 | self._current_context = self._contexts[0] 26 | for context in session.query(db_models.BotContext).all(): 27 | self._contexts[context.id] = BotContext(id=context.id) 28 | 29 | def rename_context(self, context_id: int, new_name: str) -> None: 30 | """ 31 | Rename a context. 32 | """ 33 | if context_id not in self._contexts: 34 | raise ValueError(f"Context {context_id} not found!") 35 | session.query(db_models.BotContext).filter_by(id=context_id).update( 36 | {"name": new_name} 37 | ) 38 | session.commit() 39 | 40 | def delete_context(self, context_id: int) -> None: 41 | """ 42 | Completely delete a context. 43 | """ 44 | if context_id not in self._contexts: 45 | raise ValueError(f"Context {context_id} not found!") 46 | if context_id in self._contexts: 47 | del self._contexts[context_id] 48 | try: 49 | session.query(db_models.BotContext).filter_by( 50 | id=context_id 51 | ).delete() 52 | session.commit() 53 | shutil.rmtree(os.path.join(DATA_ROOT, "contexts", str(context_id))) 54 | except Exception as e: 55 | logging.error(f"Error while deleting context {context_id}: {e}") 56 | 57 | def get_context(self, context_id: int) -> BotContext: 58 | """ 59 | Get a context by its ID. 60 | """ 61 | if context_id not in self._contexts: 62 | raise ValueError(f"Context {context_id} not found!") 63 | return self._contexts[context_id] 64 | 65 | def get_contexts(self) -> dict: 66 | """ 67 | Get all contexts. 68 | """ 69 | return self._contexts 70 | 71 | def set_current_context(self, context_id: int) -> None: 72 | """ 73 | Set the current context. 74 | """ 75 | if context_id not in self._contexts: 76 | raise ValueError(f"Context {context_id} not found!") 77 | self._current_context = self._contexts[context_id] 78 | 79 | def get_current_context(self) -> BotContext: 80 | """ 81 | Get the current context. 82 | """ 83 | return self._current_context 84 | -------------------------------------------------------------------------------- /pautobot/routers/documents.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import zipfile 4 | 5 | from fastapi import APIRouter, File, UploadFile 6 | from fastapi.responses import JSONResponse 7 | 8 | from pautobot import globals 9 | from pautobot.utils import SUPPORTED_DOCUMENT_TYPES 10 | 11 | router = APIRouter( 12 | prefix="/api", 13 | tags=["Documents"], 14 | ) 15 | 16 | 17 | @router.get("/{context_id}/documents") 18 | async def get_documents(context_id: int): 19 | """ 20 | Get all documents in the bot's context 21 | """ 22 | return globals.context_manager.get_context(context_id).get_documents() 23 | 24 | 25 | @router.post("/{context_id}/documents") 26 | async def upload_document(context_id: int, file: UploadFile = File(...)): 27 | """ 28 | Upload a document to the bot's context 29 | """ 30 | if not file: 31 | return {"message": "No file sent"} 32 | 33 | file_extension = os.path.splitext(file.filename)[1] 34 | if file_extension == ".zip": 35 | tmp_dir = tempfile.mkdtemp() 36 | tmp_zip_file = os.path.join(tmp_dir, file.filename) 37 | with open(tmp_zip_file, "wb") as tmp_zip: 38 | tmp_zip.write(file.file.read()) 39 | with zipfile.ZipFile(tmp_zip_file, "r") as zip_ref: 40 | zip_ref.extractall(tmp_dir) 41 | for filename in os.listdir(tmp_dir): 42 | if os.path.splitext(filename)[1] in SUPPORTED_DOCUMENT_TYPES: 43 | with open(os.path.join(tmp_dir, filename), "rb") as file: 44 | globals.context_manager.get_context( 45 | context_id 46 | ).add_document(file, filename) 47 | elif file_extension in SUPPORTED_DOCUMENT_TYPES: 48 | globals.context_manager.get_context(context_id).add_document( 49 | file.file, file.filename 50 | ) 51 | else: 52 | return JSONResponse( 53 | status_code=400, 54 | content={"message": f"File type {file_extension} not supported"}, 55 | ) 56 | globals.engine.ingest_documents_in_background(context_id=context_id) 57 | return {"message": "File uploaded"} 58 | 59 | 60 | @router.delete("/{context_id}/documents/{document_id}") 61 | async def delete_document(context_id: int, document_id: int): 62 | """ 63 | Delete a document from the bot's context 64 | """ 65 | try: 66 | globals.context_manager.get_context(context_id).delete_document( 67 | document_id 68 | ) 69 | except ValueError as e: 70 | return JSONResponse(status_code=400, content={"message": str(e)}) 71 | globals.engine.ingest_documents_in_background(context_id=context_id) 72 | return {"message": "Document deleted"} 73 | 74 | 75 | @router.post("/{context_id}/documents/ingest") 76 | async def ingest_documents(context_id: int): 77 | """ 78 | Ingest all documents in the bot's context 79 | """ 80 | globals.engine.ingest_documents_in_background(context_id=context_id) 81 | return {"message": "Ingestion finished!"} 82 | 83 | 84 | @router.post("/{context_id}/documents/{document_id}/open_in_file_explorer") 85 | async def open_in_file_explorer(context_id: int, document_id: int): 86 | """ 87 | Open the bot's context in the file explorer 88 | """ 89 | globals.context_manager.get_context(context_id).open_document(document_id) 90 | return {"message": "Documents folder opened"} 91 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from setuptools import find_packages, setup 4 | 5 | 6 | def get_version(): 7 | """Get package version from app_info.py file""" 8 | filename = "pautobot/app_info.py" 9 | with open(filename, encoding="utf-8") as f: 10 | match = re.search( 11 | r"""^__version__ = ['"]([^'"]*)['"]""", f.read(), re.M 12 | ) 13 | if not match: 14 | raise RuntimeError(f"{filename} doesn't contain __version__") 15 | version = match.groups()[0] 16 | return version 17 | 18 | 19 | def get_install_requires(): 20 | """Get python requirements based on context""" 21 | install_requires = [ 22 | "langchain>=0.0.194", 23 | "gpt4all>=0.3.0", 24 | "chromadb>=0.3.23", 25 | "urllib3>=2.0.2", 26 | "pdfminer.six>=20221105", 27 | "unstructured>=0.6.6", 28 | "extract-msg>=0.41.1", 29 | "tabulate>=0.9.0", 30 | "pandoc>=2.3", 31 | "pypandoc>=1.11", 32 | "tqdm>=4.65.0", 33 | "python-multipart>=0.0.6", 34 | "fastapi==0.96.0", 35 | "SQLAlchemy==2.0.15", 36 | "alembic==1.11.1", 37 | "sentence_transformers==2.2.2", 38 | "requests", 39 | ] 40 | 41 | return install_requires 42 | 43 | 44 | def get_long_description(): 45 | """Read long description from README""" 46 | with open("README.md", encoding="utf-8") as f: 47 | long_description = f.read() 48 | long_description = long_description.replace( 49 | "![PAutoBot](./docs/screenshot.png)", 50 | "![PAutoBot](https://raw.githubusercontent.com/nrl-ai/pautobot/main/docs/screenshot.png)", 51 | ) 52 | long_description = long_description.replace( 53 | 'PAutoBot', 54 | 'PAutoBot', 55 | ) 56 | return long_description 57 | 58 | 59 | setup( 60 | name="pautobot", 61 | version=get_version(), 62 | packages=find_packages(), 63 | description="Private AutoGPT Robot - Your private task assistant with GPT!", 64 | long_description=get_long_description(), 65 | long_description_content_type="text/markdown", 66 | author="Viet-Anh Nguyen", 67 | author_email="vietanh.dev@gmail.com", 68 | url="https://github.com/vietanhdev/pautobot", 69 | install_requires=get_install_requires(), 70 | license="Apache License 2.0", 71 | keywords="Personal Assistant, Automation, GPT, LLM, PrivateGPT", 72 | classifiers=[ 73 | "Natural Language :: English", 74 | "Operating System :: OS Independent", 75 | "Programming Language :: Python", 76 | "Programming Language :: Python :: 3.8", 77 | "Programming Language :: Python :: 3.9", 78 | "Programming Language :: Python :: 3.10", 79 | "Programming Language :: Python :: 3.11", 80 | "Programming Language :: Python :: 3 :: Only", 81 | ], 82 | package_data={ 83 | "pautobot": [ 84 | "pautobot/frontend-dist/**/*", 85 | "pautobot/frontend-dist/*", 86 | ] 87 | }, 88 | include_package_data=True, 89 | entry_points={ 90 | "console_scripts": [ 91 | "pautobot=pautobot.app:main", 92 | "pautobot.ingest=pautobot.ingest:main", 93 | ], 94 | }, 95 | ) 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | PAutoBot 3 |

🔥 PⒶutoBot 🔥

4 |

Private AutoGPT Robot - Your private task assistant with GPT!

5 |

6 | 7 | - 🔥 **Chat** to your offline **LLMs on CPU Only**. **100% private**, no data leaves your execution environment at any point. 8 | - 🔥 **Ask questions** to your documents without an internet connection. Engine developed based on [PrivateGPT](https://github.com/imartinez/privateGPT). 9 | - 🔥 **Automate tasks** easily with **PAutoBot plugins**. Easy for everyone. 10 | - 🔥 **Easy coding structure** with **Next.js** and **Python**. Easy to understand and modify. 11 | - 🔥 **Built with** [LangChain](https://github.com/hwchase17/langchain), [GPT4All](https://github.com/nomic-ai/gpt4all), [Chroma](https://www.trychroma.com/), [SentenceTransformers](https://www.sbert.net/), [PrivateGPT](https://github.com/imartinez/privateGPT). 12 | 13 | ![PAutoBot](./docs/screenshot.png) 14 | 15 | **The supported extensions are:** 16 | 17 | - `.csv`: CSV, 18 | - `.docx`: Word Document, 19 | - `.doc`: Word Document, 20 | - `.enex`: EverNote, 21 | - `.eml`: Email, 22 | - `.epub`: EPub, 23 | - `.html`: HTML File, 24 | - `.md`: Markdown, 25 | - `.msg`: Outlook Message, 26 | - `.odt`: Open Document Text, 27 | - `.pdf`: Portable Document Format (PDF), 28 | - `.pptx` : PowerPoint Document, 29 | - `.ppt` : PowerPoint Document, 30 | - `.txt`: Text file (UTF-8), 31 | 32 | ## I. Installation and Usage 33 | 34 | ### 1. Installation 35 | 36 | - Python 3.8 or higher. 37 | - Install **PAutoBot**: 38 | 39 | ```shell 40 | pip install pautobot 41 | ``` 42 | 43 | ### 2. Usage 44 | 45 | - Run the app: 46 | 47 | ```shell 48 | python -m pautobot.app 49 | ``` 50 | 51 | or just: 52 | 53 | ```shell 54 | pautobot 55 | ``` 56 | 57 | - Go to to see the user interface. You can choose one of the two modes: 58 | - **Chat Only** 59 | - **Documents Q&A** 60 | - Upload some documents to the app (see the supported extensions above). You can try [docs/python3.11.3_lite.zip](docs/python3.11.3_lite.zip) for a quick start. This zip file contains 45 files from the [Python 3.11.3 documentation](https://docs.python.org/3/download.html). 61 | - Force ingesting documents with **Ingest Data** button. 62 | 63 | You can also run PAutoBot publicly to your network or change the port with parameters. Example: 64 | 65 | ```shell 66 | pautobot --host 0.0.0.0 --port 8080 67 | ``` 68 | 69 | ## II. Development 70 | 71 | ### 1. Clone the source code 72 | 73 | ```shell 74 | git clone https://github.com/nrl-ai/pautobot 75 | cd pautobot 76 | ``` 77 | 78 | ### 2. Run your backend 79 | 80 | - Python 3.8 or higher. 81 | - To install Pautobot from source, from `pautobot` source code directory, run: 82 | 83 | ```shell 84 | pip install -e . 85 | ``` 86 | 87 | - Run the app: 88 | 89 | ```shell 90 | python -m pautobot.app 91 | ``` 92 | 93 | - Go to to see the user interface. 94 | 95 | ### 2. Run your frontend 96 | 97 | - Install the dependencies: 98 | 99 | ```shell 100 | cd frontend 101 | npm install 102 | ``` 103 | 104 | - Run the app: 105 | 106 | ```shell 107 | npm run dev 108 | ``` 109 | 110 | - Go to to see the user interface. Use this address to develop the frontend. 111 | -------------------------------------------------------------------------------- /frontend/components/SidebarTopMenu.js: -------------------------------------------------------------------------------- 1 | import { toast } from "react-toastify"; 2 | 3 | export default function () { 4 | return ( 5 |
6 |
7 | 13 | 18 | 19 |
20 |
{ 23 | toast.info("Coming soon!"); 24 | }} 25 | > 26 | 32 | 33 | 38 | 39 |
40 |
41 | ); 42 | } 43 | -------------------------------------------------------------------------------- /frontend/components/NewMessage.js: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef, useState } from "react"; 2 | 3 | export default function NewMessage({ onSubmitMessage }) { 4 | const defaultMode = "QA"; 5 | const [mode, setMode] = useState(defaultMode); 6 | const [message, setMessage] = useState(""); 7 | const textAreaRef = useRef(null); 8 | const MAX_LINES = 5; // Change this value to set the maximum number of lines 9 | useEffect(() => { 10 | // get number of lines in message 11 | const lines = message.split("\n").length; 12 | 13 | if (lines > MAX_LINES) { 14 | textAreaRef.current.rows = MAX_LINES; 15 | textAreaRef.current.style.overflowY = "auto"; 16 | } else { 17 | textAreaRef.current.rows = lines; 18 | textAreaRef.current.style.overflowY = "hidden"; 19 | } 20 | 21 | const borderRadius = lines > 1 ? "1rem" : "0"; 22 | const borderWidth = lines > 1 ? "1px" : "0px"; 23 | 24 | const styles = { 25 | transition: "all 0.1s ease-in-out", 26 | borderTopLeftRadius: borderRadius, 27 | borderBottomLeftRadius: borderRadius, 28 | borderLeftWidth: borderWidth, 29 | }; 30 | 31 | Object.assign(textAreaRef.current.style, styles); 32 | }, [message]); 33 | 34 | return ( 35 | <> 36 |
37 | 40 |
41 | 49 | 65 | 86 |
87 |
88 | 89 | ); 90 | } 91 | -------------------------------------------------------------------------------- /pautobot/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import pathlib 4 | import platform 5 | import shutil 6 | import subprocess 7 | import tempfile 8 | import traceback 9 | 10 | import pkg_resources 11 | import requests 12 | from tqdm import tqdm 13 | 14 | SUPPORTED_DOCUMENT_TYPES = [ 15 | ".csv", 16 | ".docx", 17 | ".doc", 18 | ".enex", 19 | ".eml", 20 | ".epub", 21 | ".html", 22 | ".md", 23 | ".msg", 24 | ".odt", 25 | ".pdf", 26 | ".pptx", 27 | ".ppt", 28 | ".txt", 29 | ] 30 | 31 | 32 | def open_file(path): 33 | """ 34 | Open file in default application 35 | """ 36 | if platform.system() == "Windows": 37 | os.startfile(path) 38 | elif platform.system() == "Darwin": 39 | subprocess.Popen(["open", path]) 40 | else: 41 | subprocess.Popen(["xdg-open", path]) 42 | 43 | 44 | def extract_frontend_dist(static_folder): 45 | """ 46 | Extract folder frontend/dist from package pautobot 47 | and put it in the same static folder for serving 48 | """ 49 | if os.path.exists(static_folder): 50 | logging.info(f"Refreshing {static_folder}...") 51 | shutil.rmtree(static_folder, ignore_errors=True) 52 | dist_folder = pkg_resources.resource_filename("pautobot", "frontend-dist") 53 | if os.path.exists(dist_folder): 54 | pathlib.Path(static_folder).parent.mkdir(parents=True, exist_ok=True) 55 | shutil.copytree(dist_folder, static_folder) 56 | if not os.path.exists(static_folder): 57 | logging.warning("frontend-dist not found in package pautobot") 58 | pathlib.Path(static_folder).mkdir(parents=True, exist_ok=True) 59 | with open(os.path.join(static_folder, "index.html"), "w") as f: 60 | f.write( 61 | "frontend-dist not found in package pautobot. Please run: bash build_frontend.sh" 62 | ) 63 | return 64 | 65 | 66 | def download_file(url, file_path): 67 | """ 68 | Send a GET request to the URL 69 | """ 70 | tmp_file = tempfile.NamedTemporaryFile(delete=False) 71 | pathlib.Path(file_path).parent.mkdir(parents=True, exist_ok=True) 72 | response = requests.get(url, stream=True) 73 | 74 | # Check if the request was successful 75 | if response.status_code == 200: 76 | total_size = int(response.headers.get("content-length", 0)) 77 | block_size = 8192 # Chunk size in bytes 78 | progress_bar = tqdm(total=total_size, unit="B", unit_scale=True) 79 | 80 | with open(tmp_file.name, "wb") as file: 81 | # Iterate over the response content in chunks 82 | for chunk in response.iter_content(chunk_size=block_size): 83 | file.write(chunk) 84 | progress_bar.update(len(chunk)) 85 | 86 | progress_bar.close() 87 | shutil.move(tmp_file.name, file_path) 88 | logging.info("File downloaded successfully.") 89 | else: 90 | logging.info("Failed to download file.") 91 | 92 | 93 | DEFAULT_MODEL_URLS = { 94 | "ggml-gpt4all-j": "https://gpt4all.io/models/ggml-gpt4all-j.bin", 95 | "ggml-gpt4all-j-v1.1-breezy": "https://gpt4all.io/models/ggml-gpt4all-j-v1.1-breezy.bin", 96 | "ggml-gpt4all-j-v1.2-jazzy": "https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin", 97 | "ggml-gpt4all-j-v1.3-groovy": "https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin", 98 | "ggml-gpt4all-l13b-snoozy": "https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin", 99 | "ggml-mpt-7b-base": "https://gpt4all.io/models/ggml-mpt-7b-base.bin", 100 | "ggml-mpt-7b-instruct": "https://gpt4all.io/models/ggml-mpt-7b-instruct.bin", 101 | "ggml-nous-gpt4-vicuna-13b": "https://gpt4all.io/models/ggml-nous-gpt4-vicuna-13b.bin", 102 | "ggml-replit-code-v1-3b": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin", 103 | "ggml-stable-vicuna-13B.q4_2": "https://gpt4all.io/models/ggml-stable-vicuna-13B.q4_2.bin", 104 | "ggml-v3-13b-hermes-q5_1": "https://huggingface.co/eachadea/ggml-nous-hermes-13b/resolve/main/ggml-v3-13b-hermes-q5_1.bin", 105 | "ggml-vicuna-13b-1.1-q4_2": "https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin", 106 | "ggml-vicuna-7b-1.1-q4_2": "https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin", 107 | "ggml-wizard-13b-uncensored": "https://gpt4all.io/models/ggml-wizard-13b-uncensored.bin", 108 | "ggml-wizardLM-7B.q4_2": "https://gpt4all.io/models/ggml-wizardLM-7B.q4_2.bin", 109 | } 110 | 111 | 112 | def download_model(model_type, model_path): 113 | """ 114 | Download model if not exists 115 | TODO (vietanhdev): 116 | - Support more model types 117 | - Multiple download links 118 | - Check hash of the downloaded file 119 | """ 120 | MODEL_URL = DEFAULT_MODEL_URLS["ggml-gpt4all-j-v1.3-groovy"] 121 | if not os.path.exists(model_path): 122 | logging.info("Downloading model...") 123 | try: 124 | download_file(MODEL_URL, model_path) 125 | except Exception as e: 126 | logging.info(f"Error while downloading model: {e}") 127 | traceback.print_exc() 128 | exit(1) 129 | logging.info("Model downloaded!") 130 | -------------------------------------------------------------------------------- /frontend/components/Main.js: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import React, { useState, useRef, useEffect } from "react"; 3 | import { toast } from "react-toastify"; 4 | import NewMessage from "./NewMessage"; 5 | 6 | import { getChatHistory } from "@/lib/requests/history"; 7 | import { ask, queryBotResponse } from "@/lib/requests/bot"; 8 | import { openDocument } from "@/lib/requests/documents"; 9 | 10 | export default function Main() { 11 | const [messages, setMessages] = useState([]); 12 | const [thinking, setThinking] = useState(false); 13 | const messagesRef = useRef(null); 14 | 15 | const scrollMessages = () => { 16 | setTimeout(() => { 17 | messagesRef.current.scrollTop = messagesRef.current.scrollHeight; 18 | }, 300); 19 | }; 20 | 21 | useEffect(() => { 22 | getChatHistory(0).then(async (response) => { 23 | let data = await response.json(); 24 | if (!response.ok) { 25 | const error = (data && data.message) || response.status; 26 | return Promise.reject(error); 27 | } 28 | setMessages(data); 29 | scrollMessages(); 30 | }); 31 | }, []); 32 | 33 | const onSubmitMessage = (mode, message) => { 34 | if (thinking) { 35 | toast.warning("I am thinking about previous question! Please wait..."); 36 | return; 37 | } 38 | setThinking(true); 39 | let newMessages = [ 40 | ...messages, 41 | { query: message }, 42 | { answer: "Thinking..." }, 43 | ]; 44 | setMessages(newMessages); 45 | scrollMessages(); 46 | 47 | ask(0, mode, message) 48 | .then(async (data) => { 49 | // Query data from /api/get_answer 50 | const interval = setInterval(async () => { 51 | queryBotResponse(0) 52 | .then(async (data) => { 53 | if (data.status == "THINKING" && data.answer) { 54 | newMessages.pop(); 55 | newMessages = [ 56 | ...newMessages, 57 | { answer: data.answer, docs: null }, 58 | ]; 59 | setMessages(newMessages); 60 | scrollMessages(); 61 | } else if (data.status == "READY") { 62 | clearInterval(interval); 63 | newMessages.pop(); 64 | newMessages = [ 65 | ...newMessages, 66 | { answer: data.answer, docs: data.docs }, 67 | ]; 68 | setMessages(newMessages); 69 | setThinking(false); 70 | scrollMessages(); 71 | } 72 | }) 73 | .catch((error) => { 74 | toast.error(error); 75 | setThinking(false); 76 | }); 77 | }, 2000); 78 | }) 79 | .catch((error) => { 80 | toast.error(error); 81 | setThinking(false); 82 | }); 83 | }; 84 | 85 | return ( 86 | <> 87 |
88 |
92 |
93 | {messages.map((message, index) => { 94 | if (message.query) { 95 | return ( 96 |
97 |
98 |

{message.query}

99 |
100 |
101 | ); 102 | } else { 103 | return ( 104 |
105 |
106 |

107 | {message.answer} 108 | {message.answer === "Thinking..." && ( 109 | Thinking... 110 | )} 111 |

112 | {message.docs && ( 113 |
114 |
115 | {message.docs.map((doc, index) => { 116 | return ( 117 |
118 |
{ 121 | openDocument(0, doc.source_id); 122 | }} 123 | > 124 | {doc.source} 125 |
126 |

{doc.content}

127 |
128 | ); 129 | })} 130 |
131 |
132 | )} 133 |
134 |
135 | ); 136 | } 137 | })} 138 | {messages.length === 0 && ( 139 |
140 |

Hello World!

141 |

142 | We are in the mission of building an all-in-one task assistant 143 | with PrivateGPT! 144 |

145 |
146 | )} 147 |
148 |
149 |
150 | 151 |
152 |
153 |
154 | 155 | ); 156 | } 157 | -------------------------------------------------------------------------------- /pautobot/engine/bot_context.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | import os 4 | import pathlib 5 | import shutil 6 | import uuid 7 | 8 | from pautobot import db_models 9 | from pautobot.config import DATA_ROOT 10 | from pautobot.database import session 11 | from pautobot.engine.bot_enums import BotStatus 12 | from pautobot.utils import open_file 13 | 14 | DEFAULT_ANSWER = { 15 | "status": BotStatus.READY, 16 | "answer": "", 17 | "docs": [], 18 | } 19 | 20 | 21 | class BotContext: 22 | def __init__( 23 | self, id=None, name=None, storage_path=None, *args, **kwargs 24 | ) -> None: 25 | if id is None: 26 | id = 0 27 | db_bot_context = ( 28 | session.query(db_models.BotContext).filter_by(id=id).first() 29 | ) 30 | if db_bot_context is None: 31 | if name is None: 32 | name = str(uuid.uuid4()) 33 | db_bot_context = db_models.BotContext(id=id, name=name) 34 | session.add(db_bot_context) 35 | session.commit() 36 | name = db_bot_context.name 37 | if storage_path is None: 38 | storage_path = os.path.join(DATA_ROOT, "contexts", str(id)) 39 | pathlib.Path(storage_path).mkdir(parents=True, exist_ok=True) 40 | self.id = id 41 | self.name = name 42 | self.storage_path = storage_path 43 | self.embeddings_model_name = "all-MiniLM-L6-v2" 44 | self.documents_directory = os.path.join(storage_path, "documents") 45 | self.search_db_directory = os.path.join(storage_path, "search_db") 46 | self.chat_files_directory = os.path.join(storage_path, "chat_files") 47 | self.info_file = os.path.join(storage_path, "info.json") 48 | if not os.path.exists(self.info_file): 49 | self.initialize_bot_context() 50 | self.current_answer = copy.deepcopy(DEFAULT_ANSWER) 51 | 52 | @staticmethod 53 | def get_default_bot_context(): 54 | """Get the default bot context.""" 55 | return BotContext(id=0, name="Default") 56 | 57 | def get_info(self) -> dict: 58 | """Get the bot info.""" 59 | return { 60 | "id": self.id, 61 | "name": self.name, 62 | } 63 | 64 | def initialize_bot_context(self) -> None: 65 | """Initialize the bot context.""" 66 | for directory in [ 67 | self.documents_directory, 68 | self.search_db_directory, 69 | self.chat_files_directory, 70 | ]: 71 | pathlib.Path(directory).mkdir(parents=True, exist_ok=True) 72 | 73 | def rename(self, new_name: str) -> None: 74 | """Rename the bot context.""" 75 | db_bot_context = ( 76 | session.query(db_models.BotContext).filter_by(id=self.id).first() 77 | ) 78 | db_bot_context.name = new_name 79 | session.commit() 80 | 81 | def add_document(self, file, filename) -> None: 82 | """Add a document to the bot's knowledge base.""" 83 | pathlib.Path(self.documents_directory).mkdir( 84 | parents=True, exist_ok=True 85 | ) 86 | file_extension = os.path.splitext(filename)[1] 87 | 88 | # Create a new document in the database 89 | db_document = db_models.Document(bot_context_id=self.id, name=filename) 90 | session.add(db_document) 91 | session.commit() 92 | document_id = db_document.id 93 | 94 | new_filename = f"{document_id}{file_extension}" 95 | with open( 96 | os.path.join(self.documents_directory, new_filename), "wb+" 97 | ) as destination: 98 | shutil.copyfileobj(file, destination) 99 | 100 | db_document.storage_name = new_filename 101 | session.commit() 102 | 103 | def delete_document(self, document_id: int) -> None: 104 | """Delete a document from the bot's knowledge base.""" 105 | db_document = ( 106 | session.query(db_models.Document) 107 | .filter_by(bot_context_id=self.id, id=document_id) 108 | .first() 109 | ) 110 | if db_document is None: 111 | raise ValueError(f"Document with id {document_id} not found.") 112 | os.remove( 113 | os.path.join(self.documents_directory, db_document.storage_name) 114 | ) 115 | session.delete(db_document) 116 | session.commit() 117 | 118 | def get_documents(self) -> list: 119 | """List all documents.""" 120 | documents = [] 121 | for db_document in ( 122 | session.query(db_models.Document) 123 | .filter_by(bot_context_id=self.id) 124 | .all() 125 | ): 126 | documents.append( 127 | { 128 | "id": db_document.id, 129 | "name": db_document.name, 130 | "storage_name": db_document.storage_name, 131 | } 132 | ) 133 | return documents 134 | 135 | def open_documents_folder(self) -> None: 136 | """Open the documents folder.""" 137 | open_file(self.documents_directory) 138 | 139 | def open_document(self, document_id: int) -> None: 140 | """Open a document.""" 141 | db_document = ( 142 | session.query(db_models.Document) 143 | .filter_by(bot_context_id=self.id, id=document_id) 144 | .first() 145 | ) 146 | if db_document is None: 147 | raise ValueError(f"Document with id {document_id} not found.") 148 | open_file( 149 | os.path.join(self.documents_directory, db_document.storage_name) 150 | ) 151 | 152 | def write_chat_history(self, chat_history: dict) -> None: 153 | """Write a message to the bot's chat history.""" 154 | chat_history_text = json.dumps(chat_history) 155 | db_chat_chunk = db_models.ChatChunk( 156 | bot_context_id=self.id, text=chat_history_text 157 | ) 158 | session.add(db_chat_chunk) 159 | session.commit() 160 | 161 | def get_chat_history(self) -> list: 162 | """Get the bot's chat history.""" 163 | chat_history = [] 164 | for db_chat_chunk in ( 165 | session.query(db_models.ChatChunk) 166 | .filter_by(bot_context_id=self.id) 167 | .all() 168 | ): 169 | chat_history.append(json.loads(db_chat_chunk.text)) 170 | return chat_history 171 | 172 | def clear_chat_history(self) -> None: 173 | """Clear the bot's chat history.""" 174 | session.query(db_models.ChatChunk).filter_by( 175 | bot_context_id=self.id 176 | ).delete() 177 | session.commit() 178 | 179 | def __str__(self) -> str: 180 | return f"ChatContext(storage_path={self.storage_path})" 181 | 182 | def dict(self) -> dict: 183 | return { 184 | "id": self.id, 185 | "name": self.name, 186 | "storage_path": self.storage_path, 187 | "embeddings_model_name": self.embeddings_model_name, 188 | "documents_directory": self.documents_directory, 189 | "search_db_directory": self.search_db_directory, 190 | "chat_files_directory": self.chat_files_directory, 191 | "info_file": self.info_file, 192 | } 193 | -------------------------------------------------------------------------------- /pautobot/engine/ingest.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import logging 3 | import os 4 | from multiprocessing import Pool 5 | from typing import List 6 | 7 | from chromadb.config import Settings 8 | from langchain.docstore.document import Document 9 | from langchain.document_loaders import ( 10 | CSVLoader, 11 | EverNoteLoader, 12 | PDFMinerLoader, 13 | TextLoader, 14 | UnstructuredEmailLoader, 15 | UnstructuredEPubLoader, 16 | UnstructuredHTMLLoader, 17 | UnstructuredMarkdownLoader, 18 | UnstructuredODTLoader, 19 | UnstructuredPowerPointLoader, 20 | UnstructuredWordDocumentLoader, 21 | ) 22 | from langchain.embeddings import HuggingFaceEmbeddings 23 | from langchain.text_splitter import RecursiveCharacterTextSplitter 24 | from langchain.vectorstores import Chroma 25 | from tqdm import tqdm 26 | 27 | chunk_size = 500 28 | chunk_overlap = 50 29 | 30 | 31 | # Custom document loaders 32 | class MyElmLoader(UnstructuredEmailLoader): 33 | """Wrapper to fallback to text/plain when default does not work""" 34 | 35 | def load(self) -> List[Document]: 36 | """Wrapper adding fallback for elm without html""" 37 | try: 38 | try: 39 | doc = UnstructuredEmailLoader.load(self) 40 | except ValueError as e: 41 | if "text/html content not found in email" in str(e): 42 | # Try plain text 43 | self.unstructured_kwargs["content_source"] = "text/plain" 44 | doc = UnstructuredEmailLoader.load(self) 45 | else: 46 | raise 47 | except Exception as e: 48 | # Add file_path to exception message 49 | raise type(e)(f"{self.file_path}: {e}") from e 50 | 51 | return doc 52 | 53 | 54 | # Map file extensions to document loaders and their arguments 55 | LOADER_MAPPING = { 56 | ".csv": (CSVLoader, {}), 57 | ".doc": (UnstructuredWordDocumentLoader, {}), 58 | ".docx": (UnstructuredWordDocumentLoader, {}), 59 | ".enex": (EverNoteLoader, {}), 60 | ".eml": (MyElmLoader, {}), 61 | ".epub": (UnstructuredEPubLoader, {}), 62 | ".html": (UnstructuredHTMLLoader, {}), 63 | ".md": (UnstructuredMarkdownLoader, {}), 64 | ".odt": (UnstructuredODTLoader, {}), 65 | ".pdf": (PDFMinerLoader, {}), 66 | ".ppt": (UnstructuredPowerPointLoader, {}), 67 | ".pptx": (UnstructuredPowerPointLoader, {}), 68 | ".txt": (TextLoader, {"encoding": "utf8"}), 69 | } 70 | 71 | 72 | def load_single_document(file_path: str) -> Document: 73 | ext = "." + file_path.rsplit(".", 1)[-1] 74 | if ext in LOADER_MAPPING: 75 | loader_class, loader_args = LOADER_MAPPING[ext] 76 | loader = loader_class(file_path, **loader_args) 77 | return loader.load()[0] 78 | 79 | raise ValueError(f"Unsupported file extension '{ext}'") 80 | 81 | 82 | def load_documents( 83 | source_dir: str, ignored_files: List[str] = [] 84 | ) -> List[Document]: 85 | """ 86 | Loads all documents from the source documents 87 | directory, ignoring specified files 88 | """ 89 | all_files = [] 90 | for ext in LOADER_MAPPING: 91 | all_files.extend( 92 | glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True) 93 | ) 94 | filtered_files = [ 95 | file_path for file_path in all_files if file_path not in ignored_files 96 | ] 97 | 98 | with Pool(processes=os.cpu_count()) as pool: 99 | results = [] 100 | with tqdm( 101 | total=len(filtered_files), desc="Loading new documents", ncols=80 102 | ) as pbar: 103 | for i, doc in enumerate( 104 | pool.imap_unordered(load_single_document, filtered_files) 105 | ): 106 | results.append(doc) 107 | pbar.update() 108 | 109 | return results 110 | 111 | 112 | def process_documents( 113 | source_directory, ignored_files: List[str] = [] 114 | ) -> List[Document]: 115 | """ 116 | Load documents and split in chunks 117 | """ 118 | logging.info(f"Loading documents from {source_directory}") 119 | documents = load_documents(source_directory, ignored_files) 120 | if not documents: 121 | logging.info("No new documents to load") 122 | return [] 123 | logging.info( 124 | f"Loaded {len(documents)} new documents from {source_directory}" 125 | ) 126 | text_splitter = RecursiveCharacterTextSplitter( 127 | chunk_size=chunk_size, chunk_overlap=chunk_overlap 128 | ) 129 | texts = text_splitter.split_documents(documents) 130 | logging.info( 131 | f"Split into {len(texts)} chunks of text " 132 | f"(max. {chunk_size} tokens each)" 133 | ) 134 | return texts 135 | 136 | 137 | def does_vectorstore_exist(persist_directory: str) -> bool: 138 | """ 139 | Checks if vectorstore exists 140 | """ 141 | if os.path.exists(os.path.join(persist_directory, "index")): 142 | if os.path.exists( 143 | os.path.join(persist_directory, "chroma-collections.parquet") 144 | ) and os.path.exists( 145 | os.path.join(persist_directory, "chroma-embeddings.parquet") 146 | ): 147 | list_index_files = glob.glob( 148 | os.path.join(persist_directory, "index/*.bin") 149 | ) 150 | list_index_files += glob.glob( 151 | os.path.join(persist_directory, "index/*.pkl") 152 | ) 153 | # At least 3 documents are needed in a working vectorstore 154 | if len(list_index_files) > 3: 155 | return True 156 | return False 157 | 158 | 159 | def ingest_documents( 160 | source_directory, persist_directory, embeddings_model_name 161 | ): 162 | chroma_settings = Settings( 163 | chroma_db_impl="duckdb+parquet", 164 | persist_directory=persist_directory, 165 | anonymized_telemetry=False, 166 | ) 167 | 168 | # Create embeddings 169 | embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name) 170 | 171 | if does_vectorstore_exist(persist_directory): 172 | # Update and store locally vectorstore 173 | logging.info( 174 | f"Appending to existing vectorstore at {persist_directory}" 175 | ) 176 | db = Chroma( 177 | persist_directory=persist_directory, 178 | embedding_function=embeddings, 179 | client_settings=chroma_settings, 180 | ) 181 | collection = db.get() 182 | texts = process_documents( 183 | source_directory, 184 | [metadata["source"] for metadata in collection["metadatas"]], 185 | ) 186 | if not texts: 187 | logging.info("No new documents to load") 188 | return 189 | logging.info("Creating embeddings. May take some minutes...") 190 | db.add_documents(texts) 191 | else: 192 | # Create and store locally vectorstore 193 | logging.info("Creating new vectorstore") 194 | texts = process_documents(source_directory) 195 | if not texts: 196 | logging.info("No new documents to load") 197 | return 198 | logging.info("Creating embeddings. May take some minutes...") 199 | db = Chroma.from_documents( 200 | texts, 201 | embeddings, 202 | persist_directory=persist_directory, 203 | client_settings=chroma_settings, 204 | ) 205 | db.persist() 206 | db = None 207 | 208 | logging.info("Ingestion complete! You can now query the vectorstore") 209 | -------------------------------------------------------------------------------- /frontend/components/QADBManager.js: -------------------------------------------------------------------------------- 1 | import { toast } from "react-toastify"; 2 | import { useState, useRef, useEffect } from "react"; 3 | 4 | import LoadingIcon from "./icons/LoadingIcon"; 5 | import { 6 | openDocument, 7 | deleteDocument, 8 | getDocuments, 9 | uploadDocument, 10 | } from "@/lib/requests/documents"; 11 | import { getBotInfo } from "@/lib/requests/bot"; 12 | 13 | export default function QADBManager() { 14 | const SUPPORTED_FILE_TYPES = [ 15 | ".csv", 16 | ".docx", 17 | ".doc", 18 | ".enex", 19 | ".eml", 20 | ".epub", 21 | ".html", 22 | ".md", 23 | ".msg", 24 | ".odt", 25 | ".pdf", 26 | ".pptx", 27 | ".ppt", 28 | ".txt", 29 | ".zip", 30 | ]; 31 | 32 | const fileInput = useRef(null); 33 | const [uploading, setUploading] = useState(false); 34 | const [documents, setDocuments] = useState([]); 35 | const refetchDocuments = (contextId) => { 36 | getDocuments(contextId) 37 | .then((data) => { 38 | setDocuments(data); 39 | }) 40 | .catch((error) => { 41 | toast.error(error); 42 | }); 43 | }; 44 | useEffect(() => { 45 | refetchDocuments(0); 46 | }, []); 47 | 48 | const [botInfo, setBotInfo] = useState(null); 49 | const getAndSetBotInfo = () => { 50 | getBotInfo() 51 | .then((data) => { 52 | setBotInfo(data); 53 | }) 54 | .catch((error) => { 55 | toast.error(error); 56 | }); 57 | }; 58 | 59 | // Periodically get bot info every 5 seconds 60 | useEffect(() => { 61 | getAndSetBotInfo(); 62 | const interval = setInterval(() => { 63 | getAndSetBotInfo(); 64 | }, 5000); 65 | return () => clearInterval(interval); 66 | }, []); 67 | 68 | const isValidFile = (file) => { 69 | let fileExtension = file.name?.split(".")?.pop(); 70 | fileExtension = fileExtension?.toLowerCase(); 71 | if (!fileExtension || !SUPPORTED_FILE_TYPES.includes("." + fileExtension)) { 72 | return false; 73 | } 74 | return true; 75 | }; 76 | 77 | const uploadFiles = async (files) => { 78 | if (!files || files.length == 0) { 79 | toast.error("No file selected."); 80 | return; 81 | } 82 | 83 | // Clone the files array 84 | files = [...files]; 85 | 86 | // Start uploading 87 | setUploading(true); 88 | 89 | let numUploaded = 0; 90 | let numFailed = 0; 91 | for (let i = 0; i < files.length; i++) { 92 | const file = files[i]; 93 | if (!isValidFile(file)) { 94 | toast.error("File type not supported: " + file.name); 95 | numFailed++; 96 | continue; 97 | } 98 | await uploadDocument(0, file) 99 | .then(async (response) => { 100 | numUploaded++; 101 | refetchDocuments(0); 102 | }) 103 | .catch((error) => { 104 | toast.error(error); 105 | numFailed++; 106 | refetchDocuments(0); 107 | }); 108 | } 109 | 110 | toast.info( 111 | "Uploaded " + 112 | numUploaded + 113 | " file(s). " + 114 | (numFailed > 0 ? "Failed to upload " + numFailed + " file(s)." : "") 115 | ); 116 | fileInput.current.value = ""; 117 | setUploading(false); 118 | }; 119 | 120 | return ( 121 | <> 122 |
Q&A Database
123 |
124 | {documents.length > 0 ? ( 125 | You have {documents.length} document(s). 126 | ) : ( 127 | 128 | You have no document. Please upload a file and ingest data for Q&A. 129 | 130 | )} 131 |
132 |
{ 135 | e.preventDefault(); 136 | e.stopPropagation(); 137 | }} 138 | onDrop={(e) => { 139 | e.preventDefault(); 140 | e.stopPropagation(); 141 | uploadFiles(e.dataTransfer.files); 142 | }} 143 | > 144 | 145 | 146 | {documents.map((document, key) => ( 147 | 148 | 149 | 152 | 206 | 207 | ))} 208 | 209 |
{key + 1}. 150 | {document.name} 151 | 153 | 173 | 205 |
210 |
211 | {botInfo?.is_ingesting_data && ( 212 |
213 | 214 | Note: The bot is 215 | currently ingesting data. Please wait until it finishes. 216 | 217 | 218 |
219 | )} 220 |
221 | { 228 | uploadFiles(e.target.files); 229 | }} 230 | /> 231 | 244 |
245 | 246 | ); 247 | } 248 | -------------------------------------------------------------------------------- /pautobot/engine/engine.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import threading 4 | import traceback 5 | 6 | from pautobot import db_models 7 | from pautobot.config import DATA_ROOT 8 | from pautobot.database import session 9 | from pautobot.engine.bot_enums import BotMode, BotStatus 10 | from pautobot.engine.chatbot_factory import ChatbotFactory 11 | from pautobot.engine.context_manager import ContextManager 12 | from pautobot.engine.ingest import ingest_documents 13 | from pautobot.engine.llm_factory import LLMFactory 14 | from pautobot.engine.qa_factory import QAFactory 15 | 16 | 17 | class PautoBotEngine: 18 | """PautoBot engine for answering questions.""" 19 | 20 | def __init__( 21 | self, mode, context_manager: ContextManager, model_type="GPT4All" 22 | ) -> None: 23 | self.mode = mode 24 | self.model_type = model_type 25 | self.model_path = os.path.join( 26 | DATA_ROOT, 27 | "models", 28 | "ggml-gpt4all-j-v1.3-groovy.bin", 29 | ) 30 | self.context_manager = context_manager 31 | if not self.context_manager.get_contexts(): 32 | raise ValueError( 33 | "No contexts found! Please create at least one context first." 34 | ) 35 | self.context = self.context_manager.get_current_context() 36 | self.status = BotStatus.READY 37 | 38 | # Prepare the LLM 39 | self.model_n_ctx = 1000 40 | self.llm = LLMFactory.create_llm( 41 | model_type=self.model_type, 42 | model_path=self.model_path, 43 | model_n_ctx=self.model_n_ctx, 44 | streaming=False, 45 | verbose=False, 46 | ) 47 | self.chatbot_instance = ChatbotFactory.create_chatbot(self.llm) 48 | 49 | # Prepare the retriever 50 | self.qa_instance = None 51 | self.qa_instance_error = None 52 | self.is_ingesting_data = False 53 | if mode == BotMode.CHAT.value: 54 | return 55 | self.ingest_documents_in_background() 56 | 57 | def get_bot_info(self) -> dict: 58 | """Get the bot's info.""" 59 | return { 60 | "mode": self.mode, 61 | "model_type": self.status.value, 62 | "qa_instance_error": self.qa_instance_error, 63 | "status": self.status.value, 64 | "is_ingesting_data": self.is_ingesting_data, 65 | "context": self.context.dict(), 66 | } 67 | 68 | def ingest_documents(self, context_id=None) -> None: 69 | """Ingest the bot's documents.""" 70 | if self.is_ingesting_data: 71 | logging.warning("Already ingesting data. Skipping...") 72 | return 73 | self.is_ingesting_data = True 74 | if context_id is not None: 75 | self.switch_context(context_id) 76 | try: 77 | ingest_documents( 78 | self.context.documents_directory, 79 | self.context.search_db_directory, 80 | self.context.embeddings_model_name, 81 | ) 82 | # Reload QA 83 | self.qa_instance = QAFactory.create_qa( 84 | context=self.context, 85 | llm=self.llm, 86 | ) 87 | except Exception as e: 88 | logging.error(f"Error while ingesting documents: {e}") 89 | logging.error(traceback.format_exc()) 90 | self.qa_instance_error = "Error while ingesting documents!" 91 | finally: 92 | self.is_ingesting_data = False 93 | 94 | def ingest_documents_in_background(self, context_id=None) -> None: 95 | """Ingest the bot's documents in the background using a thread.""" 96 | if self.is_ingesting_data: 97 | logging.warning("Already ingesting data. Skipping...") 98 | return 99 | thread = threading.Thread( 100 | target=self.ingest_documents, 101 | args=(context_id,), 102 | ) 103 | thread.start() 104 | 105 | def switch_context(self, context_id: int) -> None: 106 | """Switch the bot context if needed.""" 107 | if self.context.id != context_id: 108 | self.context = self.context_manager.get_context(context_id) 109 | self.qa_instance = QAFactory.create_qa( 110 | context=self.context, 111 | llm=self.llm, 112 | ) 113 | 114 | def check_query(self, mode, query, context_id=None) -> None: 115 | """ 116 | Check if the query is valid. 117 | Raises an exception on invalid query. 118 | """ 119 | if context_id is not None: 120 | self.switch_context(context_id) 121 | if not query: 122 | raise ValueError("Query cannot be empty!") 123 | if mode == BotMode.QA.value and self.mode == BotMode.CHAT.value: 124 | raise ValueError( 125 | "PautobotEngine was initialized in chat mode! " 126 | "Please restart in QA mode." 127 | ) 128 | elif mode == BotMode.QA.value and self.is_ingesting_data: 129 | raise ValueError( 130 | "Pautobot is currently ingesting data! Please wait a few minutes and try again." 131 | ) 132 | elif mode == BotMode.QA.value and self.qa_instance_error is not None: 133 | raise ValueError( 134 | "Pautobot QA instance is not ready! Please wait a few minutes and try again." 135 | ) 136 | 137 | def query(self, mode, query, context_id=None) -> None: 138 | """Query the bot.""" 139 | self.status = BotStatus.THINKING 140 | if context_id is not None: 141 | self.switch_context(context_id) 142 | self.check_query(mode, query) 143 | if mode is None: 144 | mode = self.mode 145 | if mode == BotMode.QA.value and self.qa_instance is None: 146 | logging.info(self.qa_instance_error) 147 | mode = BotMode.CHAT 148 | self.context.current_answer = { 149 | "status": self.status, 150 | "answer": "", 151 | "docs": [], 152 | } 153 | self.context.write_chat_history( 154 | { 155 | "query": query, 156 | "mode": mode, 157 | } 158 | ) 159 | if mode == BotMode.QA.value: 160 | try: 161 | logging.info("Received query: ", query) 162 | logging.info("Searching...") 163 | res = self.qa_instance(query) 164 | answer, docs = ( 165 | res["result"], 166 | res["source_documents"], 167 | ) 168 | doc_json = [] 169 | for document in docs: 170 | document_file = document.metadata["source"] 171 | document_id = os.path.basename(document_file).split(".")[0] 172 | document_id = int(document_id) 173 | db_document = ( 174 | session.query(db_models.Document) 175 | .filter(db_models.Document.id == document_id) 176 | .first() 177 | ) 178 | if not db_document: 179 | continue 180 | doc_json.append( 181 | { 182 | "source": db_document.name, 183 | "source_id": db_document.id, 184 | "content": document.page_content, 185 | } 186 | ) 187 | self.status = BotStatus.READY 188 | self.context.current_answer = { 189 | "status": self.status, 190 | "answer": answer, 191 | "docs": doc_json, 192 | } 193 | self.context.write_chat_history(self.context.current_answer) 194 | except Exception as e: 195 | logging.error("Error during thinking: ", e) 196 | traceback.print_exc() 197 | answer = "Error during thinking! Please try again." 198 | if "Index not found" in str(e): 199 | answer = "Index not found! Please ingest documents first." 200 | self.status = BotStatus.READY 201 | self.context.current_answer = { 202 | "status": self.status, 203 | "answer": answer, 204 | "docs": None, 205 | } 206 | self.context.write_chat_history(self.context.current_answer) 207 | else: 208 | try: 209 | logging.info("Received query: ", query) 210 | logging.info("Thinking...") 211 | answer = self.chatbot_instance.predict(human_input=query) 212 | logging.info("Answer: ", answer) 213 | self.status = BotStatus.READY 214 | self.context.current_answer = { 215 | "status": self.status, 216 | "answer": answer, 217 | "docs": None, 218 | } 219 | self.context.write_chat_history(self.context.current_answer) 220 | except Exception as e: 221 | logging.error("Error during thinking: ", e) 222 | traceback.print_exc() 223 | self.status = BotStatus.READY 224 | self.context.current_answer = { 225 | "status": self.status, 226 | "answer": "Error during thinking! Please try again.", 227 | "docs": None, 228 | } 229 | self.context.write_chat_history(self.context.current_answer) 230 | 231 | def get_answer(self, context_id=None) -> dict: 232 | """Get the bot's answer.""" 233 | if context_id is not None: 234 | self.switch_context(context_id) 235 | return self.context.current_answer 236 | --------------------------------------------------------------------------------