├── tests
└── __init__.py
├── pautobot
├── __init__.py
├── routers
│ ├── __init__.py
│ ├── bot.py
│ ├── contexts.py
│ └── documents.py
├── engine
│ ├── __init__.py
│ ├── bot_enums.py
│ ├── llm_factory.py
│ ├── qa_factory.py
│ ├── chatbot_factory.py
│ ├── context_manager.py
│ ├── bot_context.py
│ ├── ingest.py
│ └── engine.py
├── models.py
├── app_info.py
├── config.py
├── database.py
├── globals.py
├── db_models.py
├── app.py
└── utils.py
├── MANIFEST.in
├── frontend
├── .prettierignore
├── styles
│ └── globals.css
├── public
│ ├── favicon.ico
│ ├── pautobot.png
│ └── loading.svg
├── jsconfig.json
├── postcss.config.js
├── README.md
├── tailwind.config.js
├── components
│ ├── RightSidebar.js
│ ├── icons
│ │ ├── UploadIcon.js
│ │ └── LoadingIcon.js
│ ├── ModelSelector.js
│ ├── Sidebar.js
│ ├── ContextManager.js
│ ├── SidebarBottomMenu.js
│ ├── SidebarMenu.js
│ ├── SidebarTopMenu.js
│ ├── NewMessage.js
│ ├── Main.js
│ └── QADBManager.js
├── next.config.js
├── lib
│ └── requests
│ │ ├── history.js
│ │ ├── bot.js
│ │ └── documents.js
├── pages
│ ├── _app.js
│ └── index.js
└── package.json
├── docs
├── pautobot.png
├── screenshot.png
└── python3.11.3_lite.zip
├── pyproject.toml
├── requirements.txt
├── .pre-commit-config.yaml
├── .gitignore
├── setup.py
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pautobot/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pautobot/routers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include pautobot/frontend-dist *
--------------------------------------------------------------------------------
/frontend/.prettierignore:
--------------------------------------------------------------------------------
1 | .next
2 | dist
3 | node_modules
--------------------------------------------------------------------------------
/pautobot/engine/__init__.py:
--------------------------------------------------------------------------------
1 | from pautobot.engine.engine import *
2 |
--------------------------------------------------------------------------------
/docs/pautobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/docs/pautobot.png
--------------------------------------------------------------------------------
/docs/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/docs/screenshot.png
--------------------------------------------------------------------------------
/frontend/styles/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
--------------------------------------------------------------------------------
/docs/python3.11.3_lite.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/docs/python3.11.3_lite.zip
--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/frontend/public/favicon.ico
--------------------------------------------------------------------------------
/frontend/public/pautobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awesomedev08/pautobot/HEAD/frontend/public/pautobot.png
--------------------------------------------------------------------------------
/frontend/jsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "paths": {
4 | "@/*": ["./*"]
5 | }
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/pautobot/models.py:
--------------------------------------------------------------------------------
1 | from pydantic import BaseModel
2 |
3 |
4 | class Query(BaseModel):
5 | mode: str
6 | query: str
7 |
--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/pautobot/app_info.py:
--------------------------------------------------------------------------------
1 | __appname__ = "PautoBot"
2 | __description__ = (
3 | "Private AutoGPT Robot - Your private task assistant with GPT!"
4 | )
5 | __version__ = "0.0.27"
6 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
🔥 PⒶutoBot 🔥
4 | Your private task assistant with GPT
5 |
6 |
--------------------------------------------------------------------------------
/pautobot/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 |
4 | DATA_ROOT = os.path.abspath(
5 | os.path.join(os.path.expanduser("~"), "pautobot-data")
6 | )
7 | pathlib.Path(DATA_ROOT).mkdir(parents=True, exist_ok=True)
8 |
9 | DATABASE_PATH = os.path.abspath(os.path.join(DATA_ROOT, "pautobot.db"))
10 |
--------------------------------------------------------------------------------
/pautobot/engine/bot_enums.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class BotStatus(str, Enum):
5 | """Bot status."""
6 |
7 | READY = "READY"
8 | THINKING = "THINKING"
9 | ERROR = "ERROR"
10 |
11 |
12 | class BotMode(str, Enum):
13 | """Bot mode."""
14 |
15 | QA = "QA"
16 | CHAT = "CHAT"
17 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | line-length = 79
3 | #include = '\.pyi?$'
4 | exclude = '''
5 | /(
6 | \.git
7 | | \.hg
8 | | \.tox
9 | | \.venv
10 | | _build
11 | | buck-out
12 | | build
13 | | dist
14 | )/
15 | '''
16 |
17 | [build-system]
18 | requires = ["setuptools", "wheel"]
19 | build-backend = "setuptools.build_meta"
20 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | langchain==0.0.194
2 | gpt4all==0.3.0
3 | chromadb==0.3.23
4 | urllib3==2.0.2
5 | pdfminer.six==20221105
6 | unstructured==0.6.6
7 | extract-msg==0.41.1
8 | tabulate==0.9.0
9 | pandoc==2.3
10 | pypandoc==1.11
11 | tqdm==4.65.0
12 | python-multipart==0.0.6
13 | fastapi==0.96.0
14 | SQLAlchemy==2.0.15
15 | alembic==1.11.1
16 | sentence_transformers==2.2.2
17 | requests
18 |
--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: [
4 | "./app/**/*.{js,ts,jsx,tsx,mdx}",
5 | "./pages/**/*.{js,ts,jsx,tsx,mdx}",
6 | "./components/**/*.{js,ts,jsx,tsx,mdx}",
7 | ],
8 | theme: {
9 | extend: {
10 | rotate: {
11 | logo: "-10deg",
12 | },
13 | },
14 | },
15 | plugins: [],
16 | };
17 |
--------------------------------------------------------------------------------
/frontend/components/RightSidebar.js:
--------------------------------------------------------------------------------
1 | "use client";
2 | import React from "react";
3 |
4 | import ModelSelector from "./ModelSelector";
5 | import QADBManager from "./QADBManager";
6 | import ContextManager from "./ContextManager";
7 |
8 | export default function SidebarTools() {
9 | return (
10 | <>
11 |
12 |
13 |
14 | >
15 | );
16 | }
17 |
--------------------------------------------------------------------------------
/frontend/next.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | let nextConfig = {
3 | distDir: "dist",
4 | };
5 |
6 | if (process.env.NODE_ENV === "development") {
7 | nextConfig.rewrites = async () => {
8 | return [
9 | {
10 | source: "/api/:path*",
11 | destination: "http://127.0.0.1:5678/api/:path*",
12 | },
13 | ];
14 | };
15 | } else {
16 | nextConfig.output = "export";
17 | }
18 |
19 | module.exports = nextConfig;
20 |
--------------------------------------------------------------------------------
/pautobot/database.py:
--------------------------------------------------------------------------------
1 | from sqlalchemy import create_engine
2 | from sqlalchemy.ext.declarative import declarative_base
3 | from sqlalchemy.orm import sessionmaker
4 |
5 | from pautobot.config import DATABASE_PATH
6 |
7 | DATABASE_URL = "sqlite:///{}".format(DATABASE_PATH)
8 | engine = create_engine(DATABASE_URL, connect_args={"check_same_thread": False})
9 | SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
10 | session = SessionLocal()
11 |
12 | Base = declarative_base()
13 |
--------------------------------------------------------------------------------
/pautobot/globals.py:
--------------------------------------------------------------------------------
1 | from pautobot.engine import PautoBotEngine
2 | from pautobot.engine.bot_enums import BotMode
3 | from pautobot.engine.context_manager import ContextManager
4 |
5 | engine = None
6 | context_manager = None
7 |
8 |
9 | def init():
10 | """Initialize the global engine."""
11 | global context_manager
12 | global engine
13 |
14 | context_manager = ContextManager()
15 | context_manager.load_contexts()
16 |
17 | engine = PautoBotEngine(mode=BotMode.QA, context_manager=context_manager)
18 |
--------------------------------------------------------------------------------
/frontend/lib/requests/history.js:
--------------------------------------------------------------------------------
1 | export const getChatHistory = (contextId) => {
2 | const response = fetch(`/api/${contextId}/chat_history`, {
3 | method: "GET",
4 | headers: {
5 | "Content-Type": "application/json",
6 | },
7 | });
8 | return response;
9 | };
10 |
11 | export const clearChatHistory = (contextId) => {
12 | const response = fetch(`/api/${contextId}/chat_history`, {
13 | method: "DELETE",
14 | headers: {
15 | "Content-Type": "application/json",
16 | },
17 | });
18 | return response;
19 | };
20 |
--------------------------------------------------------------------------------
/frontend/components/icons/UploadIcon.js:
--------------------------------------------------------------------------------
1 | export default function Icon() {
2 | return (
3 |
11 |
17 |
18 | );
19 | }
20 |
--------------------------------------------------------------------------------
/frontend/pages/_app.js:
--------------------------------------------------------------------------------
1 | import "@/styles/globals.css";
2 | import "react-toastify/dist/ReactToastify.css";
3 |
4 | import { Bai_Jamjuree } from "next/font/google";
5 | import Head from "next/head";
6 |
7 | const bai_jam = Bai_Jamjuree({
8 | subsets: ["latin", "vietnamese"],
9 | weight: ["200", "300", "400", "500", "600", "700"],
10 | });
11 |
12 | export default function RootLayout({ Component, pageProps }) {
13 | return (
14 | <>
15 |
16 | PAutoBot - Your Private GPT Assistant
17 |
18 |
19 |
20 |
21 | >
22 | );
23 | }
24 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "pauto-frontend",
3 | "version": "0.0.27",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev",
7 | "build": "next build",
8 | "export": "next export",
9 | "start": "next start",
10 | "lint": "next lint",
11 | "pr": "prettier --write ."
12 | },
13 | "dependencies": {
14 | "next": "13.4.3",
15 | "react": "18.2.0",
16 | "react-dom": "18.2.0",
17 | "react-toastify": "^9.1.3"
18 | },
19 | "devDependencies": {
20 | "autoprefixer": "^10.4.14",
21 | "postcss": "^8.4.23",
22 | "tailwindcss": "^3.3.2",
23 | "prettier": "^2.8.8",
24 | "eslint": "8.41.0",
25 | "eslint-config-next": "13.4.3"
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/frontend/pages/index.js:
--------------------------------------------------------------------------------
1 | import { ToastContainer } from "react-toastify";
2 |
3 | import Sidebar from "@/components/Sidebar";
4 | import Main from "@/components/Main";
5 | import SidebarTools from "@/components/RightSidebar";
6 |
7 | export default function Home() {
8 | return (
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | );
22 | }
23 |
--------------------------------------------------------------------------------
/pautobot/engine/llm_factory.py:
--------------------------------------------------------------------------------
1 | from langchain.llms import GPT4All
2 |
3 | from pautobot.utils import download_model
4 |
5 |
6 | class LLMFactory:
7 | """Factory for instantiating LLMs."""
8 |
9 | @staticmethod
10 | def create_llm(
11 | model_type, model_path, model_n_ctx, streaming=False, verbose=False
12 | ):
13 | # Download the model
14 | download_model(model_type, model_path)
15 |
16 | # Prepare the LLM
17 | if model_type == "GPT4All":
18 | return GPT4All(
19 | model=model_path,
20 | n_ctx=model_n_ctx,
21 | backend="gptj",
22 | streaming=streaming,
23 | verbose=verbose,
24 | )
25 | else:
26 | raise ValueError(f"Invalid model type: {model_type}")
27 |
--------------------------------------------------------------------------------
/frontend/components/ModelSelector.js:
--------------------------------------------------------------------------------
1 | export default function ModelSelector() {
2 | return (
3 | <>
4 | Model
5 |
6 |
10 | GPT4All-J v1.3-groovy
11 |
12 |
13 |
14 |
24 |
25 | License: Apache 2.0
26 |
27 |
28 | >
29 | );
30 | }
31 |
--------------------------------------------------------------------------------
/frontend/components/Sidebar.js:
--------------------------------------------------------------------------------
1 | import SidebarBottomMenu from "./SidebarBottomMenu";
2 | import SidebarTopMenu from "./SidebarTopMenu";
3 |
4 | export default function Sidebar() {
5 | return (
6 | <>
7 |
8 |
9 |
window.open("https://pautobot.com/", "_blank")}
14 | >
15 |
20 |
21 | PAuto
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 | >
35 | );
36 | }
37 |
--------------------------------------------------------------------------------
/pautobot/engine/qa_factory.py:
--------------------------------------------------------------------------------
1 | from chromadb.config import Settings
2 | from langchain.chains import RetrievalQA
3 | from langchain.embeddings import HuggingFaceEmbeddings
4 | from langchain.vectorstores import Chroma
5 |
6 | from pautobot.engine.bot_context import BotContext
7 |
8 |
9 | class QAFactory:
10 | """Factory for instantiating QAs."""
11 |
12 | @staticmethod
13 | def create_qa(
14 | context: BotContext,
15 | llm,
16 | ):
17 | chroma_settings = Settings(
18 | chroma_db_impl="duckdb+parquet",
19 | persist_directory=context.search_db_directory,
20 | anonymized_telemetry=False,
21 | )
22 | embeddings = HuggingFaceEmbeddings(
23 | model_name=context.embeddings_model_name
24 | )
25 | database = Chroma(
26 | persist_directory=context.search_db_directory,
27 | embedding_function=embeddings,
28 | client_settings=chroma_settings,
29 | )
30 | retriever = database.as_retriever(search_kwargs={"k": 4})
31 | qa_instance = RetrievalQA.from_chain_type(
32 | llm=llm,
33 | chain_type="stuff",
34 | retriever=retriever,
35 | return_source_documents=True,
36 | )
37 | return qa_instance
38 |
--------------------------------------------------------------------------------
/frontend/components/ContextManager.js:
--------------------------------------------------------------------------------
1 | import { toast } from "react-toastify";
2 |
3 | import { clearChatHistory } from "@/lib/requests/history";
4 | import { ingestData } from "@/lib/requests/documents";
5 |
6 | export default function ModelSelector() {
7 | return (
8 | <>
9 | This Context
10 |
11 | {
16 | toast.info("Ingesting data...");
17 | ingestData(0).catch((error) => {
18 | toast.error(error);
19 | });
20 | }}
21 | >
22 | Ingest Data
23 |
24 | {
27 | clearChatHistory(0).then(() => {
28 | toast.success("Chat history cleared!");
29 | window.location.reload();
30 | });
31 | }}
32 | >
33 | Clear History
34 |
35 |
36 | >
37 | );
38 | }
39 |
--------------------------------------------------------------------------------
/frontend/components/icons/LoadingIcon.js:
--------------------------------------------------------------------------------
1 | export default function () {
2 | return (
3 |
11 |
15 |
19 |
20 | );
21 | }
22 |
--------------------------------------------------------------------------------
/pautobot/db_models.py:
--------------------------------------------------------------------------------
1 | import datetime
2 |
3 | from sqlalchemy import Column, DateTime, ForeignKey, Integer, String
4 | from sqlalchemy.orm import relationship
5 |
6 | from pautobot.database import Base, engine
7 |
8 |
9 | class BotContext(Base):
10 | __tablename__ = "contexts"
11 |
12 | id = Column(Integer, primary_key=True, index=True)
13 | name = Column(String, index=True)
14 | created_at = Column(DateTime, default=datetime.datetime.utcnow)
15 | documents = relationship("Document", back_populates="bot_context")
16 | chat_chunks = relationship("ChatChunk", back_populates="bot_context")
17 |
18 |
19 | class Document(Base):
20 | __tablename__ = "documents"
21 |
22 | id = Column(Integer, primary_key=True, index=True)
23 | name = Column(String)
24 | storage_name = Column(String)
25 | created_at = Column(DateTime, default=datetime.datetime.utcnow)
26 | bot_context_id = Column(Integer, ForeignKey("contexts.id"))
27 | bot_context = relationship("BotContext", back_populates="documents")
28 |
29 |
30 | class ChatChunk(Base):
31 | __tablename__ = "chat_chunks"
32 |
33 | id = Column(Integer, primary_key=True, index=True)
34 | created_at = Column(DateTime, default=datetime.datetime.utcnow)
35 | text = Column(String)
36 | bot_context_id = Column(Integer, ForeignKey("contexts.id"))
37 | bot_context = relationship("BotContext", back_populates="chat_chunks")
38 |
39 |
40 | Base.metadata.create_all(engine)
41 |
--------------------------------------------------------------------------------
/frontend/lib/requests/bot.js:
--------------------------------------------------------------------------------
1 | export const getBotInfo = () => {
2 | return fetch("/api/bot_info", {
3 | method: "GET",
4 | headers: {
5 | "Content-Type": "application/json",
6 | },
7 | }).then(async (response) => {
8 | let data = await response.json();
9 | if (!response.ok) {
10 | const error = (data && data.message) || response.status;
11 | return Promise.reject(error);
12 | }
13 | return Promise.resolve(data);
14 | });
15 | };
16 |
17 | export const ask = (contextId, mode, message) => {
18 | return fetch(`/api/${contextId}/ask`, {
19 | method: "POST",
20 | headers: {
21 | "Content-Type": "application/json",
22 | },
23 | body: JSON.stringify({ mode: mode, query: message }),
24 | }).then(async (response) => {
25 | let data = await response.json();
26 | if (!response.ok) {
27 | const error = (data && data.message) || response.status;
28 | return Promise.reject(error);
29 | }
30 | return Promise.resolve(data);
31 | });
32 | };
33 |
34 | export const queryBotResponse = (contextId) => {
35 | return fetch(`/api/${contextId}/get_answer`, {
36 | method: "GET",
37 | headers: {
38 | "Content-Type": "application/json",
39 | },
40 | }).then(async (response) => {
41 | let data = await response.json();
42 | if (!response.ok) {
43 | const error = (data && data.message) || response.status;
44 | return Promise.reject(error);
45 | }
46 | return Promise.resolve(data);
47 | });
48 | };
49 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | files: ^(.*\.(py|json|md|sh|yaml|cfg|txt))$
3 | exclude: ^(\.[^/]*cache/.*|.*/_user.py|source_documents/)$
4 | repos:
5 | - repo: https://github.com/pre-commit/pre-commit-hooks
6 | rev: v4.4.0
7 | hooks:
8 | - id: check-yaml
9 | args: [--unsafe]
10 | - id: end-of-file-fixer
11 | - id: trailing-whitespace
12 | exclude-files: \.md$
13 | - id: check-json
14 | - id: mixed-line-ending
15 | - id: check-merge-conflict
16 | - id: check-docstring-first
17 | - id: fix-byte-order-marker
18 | - id: check-case-conflict
19 | - repo: https://github.com/adrienverge/yamllint.git
20 | rev: v1.29.0
21 | hooks:
22 | - id: yamllint
23 | args:
24 | - --no-warnings
25 | - -d
26 | - '{extends: relaxed, rules: {line-length: {max: 90}}}'
27 | - repo: https://github.com/myint/autoflake
28 | rev: v1.4
29 | hooks:
30 | - id: autoflake
31 | exclude: .*/__init__.py
32 | args:
33 | - --in-place
34 | - --remove-all-unused-imports
35 | - --expand-star-imports
36 | - --remove-duplicate-keys
37 | - --remove-unused-variables
38 | - repo: https://github.com/pre-commit/mirrors-isort
39 | rev: v5.4.2
40 | hooks:
41 | - id: isort
42 | args: ["--profile", "black"]
43 | - repo: https://github.com/pre-commit/pre-commit-hooks
44 | rev: v3.3.0
45 | hooks:
46 | - id: trailing-whitespace
47 | - id: end-of-file-fixer
48 |
--------------------------------------------------------------------------------
/pautobot/routers/bot.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import traceback
3 |
4 | from fastapi import APIRouter, BackgroundTasks, status
5 | from fastapi.responses import JSONResponse
6 |
7 | from pautobot import globals
8 | from pautobot.engine.bot_enums import BotStatus
9 | from pautobot.models import Query
10 |
11 | router = APIRouter(
12 | prefix="/api",
13 | tags=["Ask Bot"],
14 | )
15 |
16 |
17 | @router.get("/bot_info")
18 | async def get_bot_info():
19 | return globals.engine.get_bot_info()
20 |
21 |
22 | @router.post("/{context_id}/ask")
23 | async def ask(
24 | context_id: int, query: Query, background_tasks: BackgroundTasks
25 | ):
26 | try:
27 | globals.engine.check_query(
28 | query.mode, query.query, context_id=context_id
29 | )
30 | except ValueError as e:
31 | logging.error(traceback.format_exc())
32 | return JSONResponse(
33 | status_code=status.HTTP_400_BAD_REQUEST,
34 | content={"message": str(e)},
35 | )
36 | if globals.engine.context.current_answer["status"] == BotStatus.THINKING:
37 | return JSONResponse(
38 | status_code=status.HTTP_400_BAD_REQUEST,
39 | content={"message": "Bot is already thinking"},
40 | )
41 | globals.engine.context.current_answer = {
42 | "answer": "",
43 | "docs": [],
44 | }
45 | background_tasks.add_task(globals.engine.query, query.mode, query.query)
46 | return {"message": "Query received"}
47 |
48 |
49 | @router.get("/{context_id}/get_answer")
50 | async def get_answer(context_id: int):
51 | return globals.engine.get_answer(context_id=context_id)
52 |
--------------------------------------------------------------------------------
/pautobot/engine/chatbot_factory.py:
--------------------------------------------------------------------------------
1 | from langchain import LLMChain, PromptTemplate
2 | from langchain.memory import ConversationBufferWindowMemory
3 |
4 |
5 | class ChatbotFactory:
6 | """Factory for instantiating chatbots."""
7 |
8 | @staticmethod
9 | def create_chatbot(
10 | llm,
11 | ):
12 | template = """Assistant is a large language model train by human.
13 |
14 | Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
15 |
16 | Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.
17 |
18 | Overall, Assistant is a powerful tool that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.
19 |
20 | {history}
21 | Human: {human_input}
22 | Assistant:"""
23 |
24 | prompt = PromptTemplate(
25 | input_variables=["history", "human_input"], template=template
26 | )
27 | chatbot_instance = LLMChain(
28 | llm=llm,
29 | prompt=prompt,
30 | verbose=True,
31 | memory=ConversationBufferWindowMemory(k=2),
32 | )
33 | return chatbot_instance
34 |
--------------------------------------------------------------------------------
/frontend/lib/requests/documents.js:
--------------------------------------------------------------------------------
1 | export const ingestData = (contextId) => {
2 | return fetch(`/api/${contextId}/documents/ingest`, {
3 | method: "POST",
4 | headers: {
5 | "Content-Type": "application/json",
6 | },
7 | }).then(async (response) => {
8 | let data = await response.json();
9 | if (!response.ok) {
10 | const error = (data && data.message) || response.status;
11 | return Promise.reject(error);
12 | }
13 | return Promise.resolve(data);
14 | });
15 | };
16 |
17 | export const uploadDocument = (contextId, file) => {
18 | const formData = new FormData();
19 | formData.append("file", file);
20 | return fetch(`/api/${contextId}/documents`, {
21 | method: "POST",
22 | body: formData,
23 | }).then(async (response) => {
24 | let data = await response.json();
25 | if (!response.ok) {
26 | const error = (data && data.message) || response.status;
27 | console.log(error);
28 | return Promise.reject(error);
29 | }
30 | return Promise.resolve(data);
31 | });
32 | };
33 |
34 | export const openDocument = (contextId, documentId) => {
35 | return fetch(
36 | `/api/${contextId}/documents/${documentId}/open_in_file_explorer`,
37 | {
38 | method: "POST",
39 | }
40 | );
41 | };
42 |
43 | export const getDocuments = (contextId) => {
44 | return fetch(`/api/${contextId}/documents`, {
45 | method: "GET",
46 | headers: {
47 | "Content-Type": "application/json",
48 | },
49 | }).then(async (response) => {
50 | let data = await response.json();
51 | if (!response.ok) {
52 | const error = (data && data.message) || response.status;
53 | return Promise.reject(error);
54 | }
55 | return Promise.resolve(data);
56 | });
57 | };
58 |
59 | export const deleteDocument = (contextId, documentId) => {
60 | return fetch(`/api/${contextId}/documents/${documentId}`, {
61 | method: "DELETE",
62 | }).then(async (response) => {
63 | let data = await response.json();
64 | if (!response.ok) {
65 | const error = (data && data.message) || response.status;
66 | return Promise.reject(error);
67 | }
68 | return Promise.resolve(data);
69 | });
70 | };
71 |
--------------------------------------------------------------------------------
/frontend/components/SidebarBottomMenu.js:
--------------------------------------------------------------------------------
1 | import { toast } from "react-toastify";
2 |
3 | export default function () {
4 | return (
5 |
6 |
{
9 | toast.info("Coming soon!");
10 | }}
11 | >
12 |
18 |
19 |
20 |
21 |
22 |
34 |
35 |
36 | );
37 | }
38 |
--------------------------------------------------------------------------------
/frontend/components/SidebarMenu.js:
--------------------------------------------------------------------------------
1 | import { openDocumentsFolder } from "@/utils";
2 |
3 | export default function SidebarMenu() {
4 | return (
5 |
6 |
7 |
8 |
9 |
15 |
20 |
21 | Query
22 |
23 |
24 |
25 | {
28 | openDocumentsFolder();
29 | }}
30 | >
31 |
38 |
39 |
40 | Manage Knowledge DB
41 |
42 |
43 |
44 |
45 | );
46 | }
47 |
--------------------------------------------------------------------------------
/pautobot/app.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
4 |
5 | import argparse
6 | import logging
7 |
8 | import uvicorn
9 | from fastapi import FastAPI
10 | from fastapi.middleware.cors import CORSMiddleware
11 | from fastapi.staticfiles import StaticFiles
12 |
13 | from pautobot import db_models as models
14 | from pautobot import globals
15 | from pautobot.app_info import __appname__, __description__, __version__
16 | from pautobot.config import DATA_ROOT
17 | from pautobot.database import engine
18 | from pautobot.routers import bot, contexts, documents
19 | from pautobot.utils import extract_frontend_dist
20 |
21 |
22 | def main():
23 | parser = argparse.ArgumentParser(
24 | description=__description__,
25 | )
26 | parser.add_argument(
27 | "--host",
28 | type=str,
29 | default="127.0.0.1",
30 | help="Host to run the server on",
31 | )
32 | parser.add_argument(
33 | "--port",
34 | type=int,
35 | default=5678,
36 | help="Port to run the server on",
37 | )
38 | parser.add_argument(
39 | "--version",
40 | action="store_true",
41 | help="Print version and exit",
42 | )
43 | args = parser.parse_args()
44 |
45 | if args.version:
46 | print(f"{__appname__} v{__version__}")
47 | return
48 |
49 | logging.info(f"Starting {__appname__}...")
50 | logging.info(f"Version: {__version__}")
51 |
52 | logging.info("Extracting frontend distribution...")
53 | static_folder = os.path.abspath(os.path.join(DATA_ROOT, "frontend-dist"))
54 | extract_frontend_dist(static_folder)
55 |
56 | logging.info("Creating database tables...")
57 | models.Base.metadata.create_all(bind=engine)
58 |
59 | logging.info("Starting FastAPI server...")
60 | globals.init()
61 |
62 | app = FastAPI(
63 | title=__appname__,
64 | description=__description__,
65 | )
66 | app.add_middleware(
67 | CORSMiddleware,
68 | allow_origins=["*"],
69 | allow_methods=["*"],
70 | allow_headers=["*"],
71 | expose_headers=["*"],
72 | )
73 |
74 | app.include_router(bot.router)
75 | app.include_router(contexts.router)
76 | app.include_router(documents.router)
77 | app.mount(
78 | "/", StaticFiles(directory=static_folder, html=True), name="static"
79 | )
80 |
81 | uvicorn.run(app, host=args.host, port=args.port, reload=False, workers=1)
82 |
83 |
84 | if __name__ == "__main__":
85 | main()
86 |
--------------------------------------------------------------------------------
/pautobot/routers/contexts.py:
--------------------------------------------------------------------------------
1 | from fastapi import APIRouter
2 |
3 | from pautobot import globals
4 | from pautobot.engine.bot_context import BotContext
5 |
6 | router = APIRouter(
7 | prefix="/api",
8 | tags=["Bot Context"],
9 | )
10 |
11 |
12 | @router.get("/contexts")
13 | async def get_contexts():
14 | """
15 | Get all chat contexts
16 | Each context is a separate chat session
17 | """
18 | contexts = globals.context_manager.get_contexts()
19 | context_list = []
20 | for context in contexts:
21 | context_list.append(contexts[context].dict())
22 | return context_list
23 |
24 |
25 | @router.get("/current_context")
26 | async def get_current_context():
27 | """
28 | Get the current chat context
29 | """
30 | return globals.context_manager.get_current_context().dict()
31 |
32 |
33 | @router.post("/contexts")
34 | async def create_context():
35 | """
36 | Create a new chat context
37 | """
38 | context = BotContext()
39 | globals.context_manager.register(context)
40 | return {
41 | "message": "Context created",
42 | "data": context.dict(),
43 | }
44 |
45 |
46 | @router.delete("/contexts/{context_id}")
47 | async def delete_context(context_id: int):
48 | """
49 | Delete a chat context
50 | """
51 | globals.context_manager.delete_context(context_id)
52 | return {"message": "Context deleted"}
53 |
54 |
55 | @router.put("/contexts/{context_id}")
56 | async def rename_context(context_id: int, new_name: str):
57 | """
58 | Rename a chat context
59 | """
60 | globals.context_manager.rename_context(context_id, new_name)
61 | return {"message": "Context renamed"}
62 |
63 |
64 | @router.post("/set_context")
65 | async def set_context(context_id: int):
66 | """
67 | Set the current context
68 | """
69 | globals.context_manager.set_current_context(context_id)
70 | globals.engine.set_context(globals.context_manager.get_current_context())
71 | return {"message": "Context set"}
72 |
73 |
74 | @router.get("/{context_id}/chat_history")
75 | async def get_chat_history(context_id: int):
76 | """
77 | Get the bot's chat history
78 | """
79 | return globals.context_manager.get_context(context_id).get_chat_history()
80 |
81 |
82 | @router.delete("/{context_id}/chat_history")
83 | async def clear_chat_history(context_id: int):
84 | """
85 | Clear the bot's chat history
86 | """
87 | globals.context_manager.get_context(context_id).clear_chat_history()
88 | return {"message": "Chat history cleared"}
89 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 |
18 | parts/
19 | sdist/
20 | var/
21 | wheels/
22 | share/python-wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .nox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *.cover
48 | *.py,cover
49 | .hypothesis/
50 | .pytest_cache/
51 | cover/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 | db.sqlite3
61 | db.sqlite3-journal
62 |
63 | # Flask stuff:
64 | instance/
65 | .webassets-cache
66 |
67 | # Scrapy stuff:
68 | .scrapy
69 |
70 | # Sphinx documentation
71 | docs/_build/
72 |
73 | # PyBuilder
74 | .pybuilder/
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | # For a library or package, you might want to ignore these files since the code is
86 | # intended to run in multiple environments; otherwise, check them in:
87 | # .python-version
88 |
89 | # pipenv
90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
93 | # install all needed dependencies.
94 | #Pipfile.lock
95 |
96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97 | __pypackages__/
98 |
99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 |
103 | # SageMath parsed files
104 | *.sage.py
105 |
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
133 | # pytype static type analyzer
134 | .pytype/
135 |
136 | # Cython debug symbols
137 | cython_debug/
138 |
139 | # Text Editor
140 | .vscode
141 | .DS_Store
142 | .gitignore
143 |
144 |
145 | node_modules
146 | .next
147 | pautobot/frontend-dist
--------------------------------------------------------------------------------
/frontend/public/loading.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/pautobot/engine/context_manager.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import shutil
4 |
5 | from pautobot import db_models
6 | from pautobot.config import DATA_ROOT
7 | from pautobot.database import session
8 | from pautobot.engine.bot_context import BotContext
9 |
10 |
11 | class ContextManager:
12 | """
13 | Context manager. Handle logics related to PautoBot contexts.
14 | """
15 |
16 | def __init__(self):
17 | self._current_context = None
18 | self._contexts = {}
19 |
20 | def load_contexts(self) -> None:
21 | """
22 | Load all contexts from the database.
23 | """
24 | self._contexts = {0: BotContext(id=0, name="Default")}
25 | self._current_context = self._contexts[0]
26 | for context in session.query(db_models.BotContext).all():
27 | self._contexts[context.id] = BotContext(id=context.id)
28 |
29 | def rename_context(self, context_id: int, new_name: str) -> None:
30 | """
31 | Rename a context.
32 | """
33 | if context_id not in self._contexts:
34 | raise ValueError(f"Context {context_id} not found!")
35 | session.query(db_models.BotContext).filter_by(id=context_id).update(
36 | {"name": new_name}
37 | )
38 | session.commit()
39 |
40 | def delete_context(self, context_id: int) -> None:
41 | """
42 | Completely delete a context.
43 | """
44 | if context_id not in self._contexts:
45 | raise ValueError(f"Context {context_id} not found!")
46 | if context_id in self._contexts:
47 | del self._contexts[context_id]
48 | try:
49 | session.query(db_models.BotContext).filter_by(
50 | id=context_id
51 | ).delete()
52 | session.commit()
53 | shutil.rmtree(os.path.join(DATA_ROOT, "contexts", str(context_id)))
54 | except Exception as e:
55 | logging.error(f"Error while deleting context {context_id}: {e}")
56 |
57 | def get_context(self, context_id: int) -> BotContext:
58 | """
59 | Get a context by its ID.
60 | """
61 | if context_id not in self._contexts:
62 | raise ValueError(f"Context {context_id} not found!")
63 | return self._contexts[context_id]
64 |
65 | def get_contexts(self) -> dict:
66 | """
67 | Get all contexts.
68 | """
69 | return self._contexts
70 |
71 | def set_current_context(self, context_id: int) -> None:
72 | """
73 | Set the current context.
74 | """
75 | if context_id not in self._contexts:
76 | raise ValueError(f"Context {context_id} not found!")
77 | self._current_context = self._contexts[context_id]
78 |
79 | def get_current_context(self) -> BotContext:
80 | """
81 | Get the current context.
82 | """
83 | return self._current_context
84 |
--------------------------------------------------------------------------------
/pautobot/routers/documents.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 | import zipfile
4 |
5 | from fastapi import APIRouter, File, UploadFile
6 | from fastapi.responses import JSONResponse
7 |
8 | from pautobot import globals
9 | from pautobot.utils import SUPPORTED_DOCUMENT_TYPES
10 |
11 | router = APIRouter(
12 | prefix="/api",
13 | tags=["Documents"],
14 | )
15 |
16 |
17 | @router.get("/{context_id}/documents")
18 | async def get_documents(context_id: int):
19 | """
20 | Get all documents in the bot's context
21 | """
22 | return globals.context_manager.get_context(context_id).get_documents()
23 |
24 |
25 | @router.post("/{context_id}/documents")
26 | async def upload_document(context_id: int, file: UploadFile = File(...)):
27 | """
28 | Upload a document to the bot's context
29 | """
30 | if not file:
31 | return {"message": "No file sent"}
32 |
33 | file_extension = os.path.splitext(file.filename)[1]
34 | if file_extension == ".zip":
35 | tmp_dir = tempfile.mkdtemp()
36 | tmp_zip_file = os.path.join(tmp_dir, file.filename)
37 | with open(tmp_zip_file, "wb") as tmp_zip:
38 | tmp_zip.write(file.file.read())
39 | with zipfile.ZipFile(tmp_zip_file, "r") as zip_ref:
40 | zip_ref.extractall(tmp_dir)
41 | for filename in os.listdir(tmp_dir):
42 | if os.path.splitext(filename)[1] in SUPPORTED_DOCUMENT_TYPES:
43 | with open(os.path.join(tmp_dir, filename), "rb") as file:
44 | globals.context_manager.get_context(
45 | context_id
46 | ).add_document(file, filename)
47 | elif file_extension in SUPPORTED_DOCUMENT_TYPES:
48 | globals.context_manager.get_context(context_id).add_document(
49 | file.file, file.filename
50 | )
51 | else:
52 | return JSONResponse(
53 | status_code=400,
54 | content={"message": f"File type {file_extension} not supported"},
55 | )
56 | globals.engine.ingest_documents_in_background(context_id=context_id)
57 | return {"message": "File uploaded"}
58 |
59 |
60 | @router.delete("/{context_id}/documents/{document_id}")
61 | async def delete_document(context_id: int, document_id: int):
62 | """
63 | Delete a document from the bot's context
64 | """
65 | try:
66 | globals.context_manager.get_context(context_id).delete_document(
67 | document_id
68 | )
69 | except ValueError as e:
70 | return JSONResponse(status_code=400, content={"message": str(e)})
71 | globals.engine.ingest_documents_in_background(context_id=context_id)
72 | return {"message": "Document deleted"}
73 |
74 |
75 | @router.post("/{context_id}/documents/ingest")
76 | async def ingest_documents(context_id: int):
77 | """
78 | Ingest all documents in the bot's context
79 | """
80 | globals.engine.ingest_documents_in_background(context_id=context_id)
81 | return {"message": "Ingestion finished!"}
82 |
83 |
84 | @router.post("/{context_id}/documents/{document_id}/open_in_file_explorer")
85 | async def open_in_file_explorer(context_id: int, document_id: int):
86 | """
87 | Open the bot's context in the file explorer
88 | """
89 | globals.context_manager.get_context(context_id).open_document(document_id)
90 | return {"message": "Documents folder opened"}
91 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from setuptools import find_packages, setup
4 |
5 |
6 | def get_version():
7 | """Get package version from app_info.py file"""
8 | filename = "pautobot/app_info.py"
9 | with open(filename, encoding="utf-8") as f:
10 | match = re.search(
11 | r"""^__version__ = ['"]([^'"]*)['"]""", f.read(), re.M
12 | )
13 | if not match:
14 | raise RuntimeError(f"{filename} doesn't contain __version__")
15 | version = match.groups()[0]
16 | return version
17 |
18 |
19 | def get_install_requires():
20 | """Get python requirements based on context"""
21 | install_requires = [
22 | "langchain>=0.0.194",
23 | "gpt4all>=0.3.0",
24 | "chromadb>=0.3.23",
25 | "urllib3>=2.0.2",
26 | "pdfminer.six>=20221105",
27 | "unstructured>=0.6.6",
28 | "extract-msg>=0.41.1",
29 | "tabulate>=0.9.0",
30 | "pandoc>=2.3",
31 | "pypandoc>=1.11",
32 | "tqdm>=4.65.0",
33 | "python-multipart>=0.0.6",
34 | "fastapi==0.96.0",
35 | "SQLAlchemy==2.0.15",
36 | "alembic==1.11.1",
37 | "sentence_transformers==2.2.2",
38 | "requests",
39 | ]
40 |
41 | return install_requires
42 |
43 |
44 | def get_long_description():
45 | """Read long description from README"""
46 | with open("README.md", encoding="utf-8") as f:
47 | long_description = f.read()
48 | long_description = long_description.replace(
49 | "",
50 | "",
51 | )
52 | long_description = long_description.replace(
53 | ' ',
54 | ' ',
55 | )
56 | return long_description
57 |
58 |
59 | setup(
60 | name="pautobot",
61 | version=get_version(),
62 | packages=find_packages(),
63 | description="Private AutoGPT Robot - Your private task assistant with GPT!",
64 | long_description=get_long_description(),
65 | long_description_content_type="text/markdown",
66 | author="Viet-Anh Nguyen",
67 | author_email="vietanh.dev@gmail.com",
68 | url="https://github.com/vietanhdev/pautobot",
69 | install_requires=get_install_requires(),
70 | license="Apache License 2.0",
71 | keywords="Personal Assistant, Automation, GPT, LLM, PrivateGPT",
72 | classifiers=[
73 | "Natural Language :: English",
74 | "Operating System :: OS Independent",
75 | "Programming Language :: Python",
76 | "Programming Language :: Python :: 3.8",
77 | "Programming Language :: Python :: 3.9",
78 | "Programming Language :: Python :: 3.10",
79 | "Programming Language :: Python :: 3.11",
80 | "Programming Language :: Python :: 3 :: Only",
81 | ],
82 | package_data={
83 | "pautobot": [
84 | "pautobot/frontend-dist/**/*",
85 | "pautobot/frontend-dist/*",
86 | ]
87 | },
88 | include_package_data=True,
89 | entry_points={
90 | "console_scripts": [
91 | "pautobot=pautobot.app:main",
92 | "pautobot.ingest=pautobot.ingest:main",
93 | ],
94 | },
95 | )
96 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
🔥 PⒶutoBot 🔥
4 | Private AutoGPT Robot - Your private task assistant with GPT!
5 |
6 |
7 | - 🔥 **Chat** to your offline **LLMs on CPU Only**. **100% private**, no data leaves your execution environment at any point.
8 | - 🔥 **Ask questions** to your documents without an internet connection. Engine developed based on [PrivateGPT](https://github.com/imartinez/privateGPT).
9 | - 🔥 **Automate tasks** easily with **PAutoBot plugins**. Easy for everyone.
10 | - 🔥 **Easy coding structure** with **Next.js** and **Python**. Easy to understand and modify.
11 | - 🔥 **Built with** [LangChain](https://github.com/hwchase17/langchain), [GPT4All](https://github.com/nomic-ai/gpt4all), [Chroma](https://www.trychroma.com/), [SentenceTransformers](https://www.sbert.net/), [PrivateGPT](https://github.com/imartinez/privateGPT).
12 |
13 | 
14 |
15 | **The supported extensions are:**
16 |
17 | - `.csv`: CSV,
18 | - `.docx`: Word Document,
19 | - `.doc`: Word Document,
20 | - `.enex`: EverNote,
21 | - `.eml`: Email,
22 | - `.epub`: EPub,
23 | - `.html`: HTML File,
24 | - `.md`: Markdown,
25 | - `.msg`: Outlook Message,
26 | - `.odt`: Open Document Text,
27 | - `.pdf`: Portable Document Format (PDF),
28 | - `.pptx` : PowerPoint Document,
29 | - `.ppt` : PowerPoint Document,
30 | - `.txt`: Text file (UTF-8),
31 |
32 | ## I. Installation and Usage
33 |
34 | ### 1. Installation
35 |
36 | - Python 3.8 or higher.
37 | - Install **PAutoBot**:
38 |
39 | ```shell
40 | pip install pautobot
41 | ```
42 |
43 | ### 2. Usage
44 |
45 | - Run the app:
46 |
47 | ```shell
48 | python -m pautobot.app
49 | ```
50 |
51 | or just:
52 |
53 | ```shell
54 | pautobot
55 | ```
56 |
57 | - Go to to see the user interface. You can choose one of the two modes:
58 | - **Chat Only**
59 | - **Documents Q&A**
60 | - Upload some documents to the app (see the supported extensions above). You can try [docs/python3.11.3_lite.zip](docs/python3.11.3_lite.zip) for a quick start. This zip file contains 45 files from the [Python 3.11.3 documentation](https://docs.python.org/3/download.html).
61 | - Force ingesting documents with **Ingest Data** button.
62 |
63 | You can also run PAutoBot publicly to your network or change the port with parameters. Example:
64 |
65 | ```shell
66 | pautobot --host 0.0.0.0 --port 8080
67 | ```
68 |
69 | ## II. Development
70 |
71 | ### 1. Clone the source code
72 |
73 | ```shell
74 | git clone https://github.com/nrl-ai/pautobot
75 | cd pautobot
76 | ```
77 |
78 | ### 2. Run your backend
79 |
80 | - Python 3.8 or higher.
81 | - To install Pautobot from source, from `pautobot` source code directory, run:
82 |
83 | ```shell
84 | pip install -e .
85 | ```
86 |
87 | - Run the app:
88 |
89 | ```shell
90 | python -m pautobot.app
91 | ```
92 |
93 | - Go to to see the user interface.
94 |
95 | ### 2. Run your frontend
96 |
97 | - Install the dependencies:
98 |
99 | ```shell
100 | cd frontend
101 | npm install
102 | ```
103 |
104 | - Run the app:
105 |
106 | ```shell
107 | npm run dev
108 | ```
109 |
110 | - Go to to see the user interface. Use this address to develop the frontend.
111 |
--------------------------------------------------------------------------------
/frontend/components/SidebarTopMenu.js:
--------------------------------------------------------------------------------
1 | import { toast } from "react-toastify";
2 |
3 | export default function () {
4 | return (
5 |
6 |
20 |
{
23 | toast.info("Coming soon!");
24 | }}
25 | >
26 |
32 |
33 |
38 |
39 |
40 |
41 | );
42 | }
43 |
--------------------------------------------------------------------------------
/frontend/components/NewMessage.js:
--------------------------------------------------------------------------------
1 | import { useEffect, useRef, useState } from "react";
2 |
3 | export default function NewMessage({ onSubmitMessage }) {
4 | const defaultMode = "QA";
5 | const [mode, setMode] = useState(defaultMode);
6 | const [message, setMessage] = useState("");
7 | const textAreaRef = useRef(null);
8 | const MAX_LINES = 5; // Change this value to set the maximum number of lines
9 | useEffect(() => {
10 | // get number of lines in message
11 | const lines = message.split("\n").length;
12 |
13 | if (lines > MAX_LINES) {
14 | textAreaRef.current.rows = MAX_LINES;
15 | textAreaRef.current.style.overflowY = "auto";
16 | } else {
17 | textAreaRef.current.rows = lines;
18 | textAreaRef.current.style.overflowY = "hidden";
19 | }
20 |
21 | const borderRadius = lines > 1 ? "1rem" : "0";
22 | const borderWidth = lines > 1 ? "1px" : "0px";
23 |
24 | const styles = {
25 | transition: "all 0.1s ease-in-out",
26 | borderTopLeftRadius: borderRadius,
27 | borderBottomLeftRadius: borderRadius,
28 | borderLeftWidth: borderWidth,
29 | };
30 |
31 | Object.assign(textAreaRef.current.style, styles);
32 | }, [message]);
33 |
34 | return (
35 | <>
36 |
88 | >
89 | );
90 | }
91 |
--------------------------------------------------------------------------------
/pautobot/utils.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import pathlib
4 | import platform
5 | import shutil
6 | import subprocess
7 | import tempfile
8 | import traceback
9 |
10 | import pkg_resources
11 | import requests
12 | from tqdm import tqdm
13 |
14 | SUPPORTED_DOCUMENT_TYPES = [
15 | ".csv",
16 | ".docx",
17 | ".doc",
18 | ".enex",
19 | ".eml",
20 | ".epub",
21 | ".html",
22 | ".md",
23 | ".msg",
24 | ".odt",
25 | ".pdf",
26 | ".pptx",
27 | ".ppt",
28 | ".txt",
29 | ]
30 |
31 |
32 | def open_file(path):
33 | """
34 | Open file in default application
35 | """
36 | if platform.system() == "Windows":
37 | os.startfile(path)
38 | elif platform.system() == "Darwin":
39 | subprocess.Popen(["open", path])
40 | else:
41 | subprocess.Popen(["xdg-open", path])
42 |
43 |
44 | def extract_frontend_dist(static_folder):
45 | """
46 | Extract folder frontend/dist from package pautobot
47 | and put it in the same static folder for serving
48 | """
49 | if os.path.exists(static_folder):
50 | logging.info(f"Refreshing {static_folder}...")
51 | shutil.rmtree(static_folder, ignore_errors=True)
52 | dist_folder = pkg_resources.resource_filename("pautobot", "frontend-dist")
53 | if os.path.exists(dist_folder):
54 | pathlib.Path(static_folder).parent.mkdir(parents=True, exist_ok=True)
55 | shutil.copytree(dist_folder, static_folder)
56 | if not os.path.exists(static_folder):
57 | logging.warning("frontend-dist not found in package pautobot")
58 | pathlib.Path(static_folder).mkdir(parents=True, exist_ok=True)
59 | with open(os.path.join(static_folder, "index.html"), "w") as f:
60 | f.write(
61 | "frontend-dist not found in package pautobot. Please run: bash build_frontend.sh"
62 | )
63 | return
64 |
65 |
66 | def download_file(url, file_path):
67 | """
68 | Send a GET request to the URL
69 | """
70 | tmp_file = tempfile.NamedTemporaryFile(delete=False)
71 | pathlib.Path(file_path).parent.mkdir(parents=True, exist_ok=True)
72 | response = requests.get(url, stream=True)
73 |
74 | # Check if the request was successful
75 | if response.status_code == 200:
76 | total_size = int(response.headers.get("content-length", 0))
77 | block_size = 8192 # Chunk size in bytes
78 | progress_bar = tqdm(total=total_size, unit="B", unit_scale=True)
79 |
80 | with open(tmp_file.name, "wb") as file:
81 | # Iterate over the response content in chunks
82 | for chunk in response.iter_content(chunk_size=block_size):
83 | file.write(chunk)
84 | progress_bar.update(len(chunk))
85 |
86 | progress_bar.close()
87 | shutil.move(tmp_file.name, file_path)
88 | logging.info("File downloaded successfully.")
89 | else:
90 | logging.info("Failed to download file.")
91 |
92 |
93 | DEFAULT_MODEL_URLS = {
94 | "ggml-gpt4all-j": "https://gpt4all.io/models/ggml-gpt4all-j.bin",
95 | "ggml-gpt4all-j-v1.1-breezy": "https://gpt4all.io/models/ggml-gpt4all-j-v1.1-breezy.bin",
96 | "ggml-gpt4all-j-v1.2-jazzy": "https://gpt4all.io/models/ggml-gpt4all-j-v1.2-jazzy.bin",
97 | "ggml-gpt4all-j-v1.3-groovy": "https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin",
98 | "ggml-gpt4all-l13b-snoozy": "https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin",
99 | "ggml-mpt-7b-base": "https://gpt4all.io/models/ggml-mpt-7b-base.bin",
100 | "ggml-mpt-7b-instruct": "https://gpt4all.io/models/ggml-mpt-7b-instruct.bin",
101 | "ggml-nous-gpt4-vicuna-13b": "https://gpt4all.io/models/ggml-nous-gpt4-vicuna-13b.bin",
102 | "ggml-replit-code-v1-3b": "https://huggingface.co/nomic-ai/ggml-replit-code-v1-3b/resolve/main/ggml-replit-code-v1-3b.bin",
103 | "ggml-stable-vicuna-13B.q4_2": "https://gpt4all.io/models/ggml-stable-vicuna-13B.q4_2.bin",
104 | "ggml-v3-13b-hermes-q5_1": "https://huggingface.co/eachadea/ggml-nous-hermes-13b/resolve/main/ggml-v3-13b-hermes-q5_1.bin",
105 | "ggml-vicuna-13b-1.1-q4_2": "https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin",
106 | "ggml-vicuna-7b-1.1-q4_2": "https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin",
107 | "ggml-wizard-13b-uncensored": "https://gpt4all.io/models/ggml-wizard-13b-uncensored.bin",
108 | "ggml-wizardLM-7B.q4_2": "https://gpt4all.io/models/ggml-wizardLM-7B.q4_2.bin",
109 | }
110 |
111 |
112 | def download_model(model_type, model_path):
113 | """
114 | Download model if not exists
115 | TODO (vietanhdev):
116 | - Support more model types
117 | - Multiple download links
118 | - Check hash of the downloaded file
119 | """
120 | MODEL_URL = DEFAULT_MODEL_URLS["ggml-gpt4all-j-v1.3-groovy"]
121 | if not os.path.exists(model_path):
122 | logging.info("Downloading model...")
123 | try:
124 | download_file(MODEL_URL, model_path)
125 | except Exception as e:
126 | logging.info(f"Error while downloading model: {e}")
127 | traceback.print_exc()
128 | exit(1)
129 | logging.info("Model downloaded!")
130 |
--------------------------------------------------------------------------------
/frontend/components/Main.js:
--------------------------------------------------------------------------------
1 | "use client";
2 | import React, { useState, useRef, useEffect } from "react";
3 | import { toast } from "react-toastify";
4 | import NewMessage from "./NewMessage";
5 |
6 | import { getChatHistory } from "@/lib/requests/history";
7 | import { ask, queryBotResponse } from "@/lib/requests/bot";
8 | import { openDocument } from "@/lib/requests/documents";
9 |
10 | export default function Main() {
11 | const [messages, setMessages] = useState([]);
12 | const [thinking, setThinking] = useState(false);
13 | const messagesRef = useRef(null);
14 |
15 | const scrollMessages = () => {
16 | setTimeout(() => {
17 | messagesRef.current.scrollTop = messagesRef.current.scrollHeight;
18 | }, 300);
19 | };
20 |
21 | useEffect(() => {
22 | getChatHistory(0).then(async (response) => {
23 | let data = await response.json();
24 | if (!response.ok) {
25 | const error = (data && data.message) || response.status;
26 | return Promise.reject(error);
27 | }
28 | setMessages(data);
29 | scrollMessages();
30 | });
31 | }, []);
32 |
33 | const onSubmitMessage = (mode, message) => {
34 | if (thinking) {
35 | toast.warning("I am thinking about previous question! Please wait...");
36 | return;
37 | }
38 | setThinking(true);
39 | let newMessages = [
40 | ...messages,
41 | { query: message },
42 | { answer: "Thinking..." },
43 | ];
44 | setMessages(newMessages);
45 | scrollMessages();
46 |
47 | ask(0, mode, message)
48 | .then(async (data) => {
49 | // Query data from /api/get_answer
50 | const interval = setInterval(async () => {
51 | queryBotResponse(0)
52 | .then(async (data) => {
53 | if (data.status == "THINKING" && data.answer) {
54 | newMessages.pop();
55 | newMessages = [
56 | ...newMessages,
57 | { answer: data.answer, docs: null },
58 | ];
59 | setMessages(newMessages);
60 | scrollMessages();
61 | } else if (data.status == "READY") {
62 | clearInterval(interval);
63 | newMessages.pop();
64 | newMessages = [
65 | ...newMessages,
66 | { answer: data.answer, docs: data.docs },
67 | ];
68 | setMessages(newMessages);
69 | setThinking(false);
70 | scrollMessages();
71 | }
72 | })
73 | .catch((error) => {
74 | toast.error(error);
75 | setThinking(false);
76 | });
77 | }, 2000);
78 | })
79 | .catch((error) => {
80 | toast.error(error);
81 | setThinking(false);
82 | });
83 | };
84 |
85 | return (
86 | <>
87 |
88 |
92 |
93 | {messages.map((message, index) => {
94 | if (message.query) {
95 | return (
96 |
97 |
98 |
{message.query}
99 |
100 |
101 | );
102 | } else {
103 | return (
104 |
105 |
106 |
107 | {message.answer}
108 | {message.answer === "Thinking..." && (
109 |
110 | )}
111 |
112 | {message.docs && (
113 |
114 |
115 | {message.docs.map((doc, index) => {
116 | return (
117 |
118 |
{
121 | openDocument(0, doc.source_id);
122 | }}
123 | >
124 | {doc.source}
125 |
126 |
{doc.content}
127 |
128 | );
129 | })}
130 |
131 |
132 | )}
133 |
134 |
135 | );
136 | }
137 | })}
138 | {messages.length === 0 && (
139 |
140 |
Hello World!
141 |
142 | We are in the mission of building an all-in-one task assistant
143 | with PrivateGPT!
144 |
145 |
146 | )}
147 |
148 |
149 |
153 |
154 | >
155 | );
156 | }
157 |
--------------------------------------------------------------------------------
/pautobot/engine/bot_context.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import json
3 | import os
4 | import pathlib
5 | import shutil
6 | import uuid
7 |
8 | from pautobot import db_models
9 | from pautobot.config import DATA_ROOT
10 | from pautobot.database import session
11 | from pautobot.engine.bot_enums import BotStatus
12 | from pautobot.utils import open_file
13 |
14 | DEFAULT_ANSWER = {
15 | "status": BotStatus.READY,
16 | "answer": "",
17 | "docs": [],
18 | }
19 |
20 |
21 | class BotContext:
22 | def __init__(
23 | self, id=None, name=None, storage_path=None, *args, **kwargs
24 | ) -> None:
25 | if id is None:
26 | id = 0
27 | db_bot_context = (
28 | session.query(db_models.BotContext).filter_by(id=id).first()
29 | )
30 | if db_bot_context is None:
31 | if name is None:
32 | name = str(uuid.uuid4())
33 | db_bot_context = db_models.BotContext(id=id, name=name)
34 | session.add(db_bot_context)
35 | session.commit()
36 | name = db_bot_context.name
37 | if storage_path is None:
38 | storage_path = os.path.join(DATA_ROOT, "contexts", str(id))
39 | pathlib.Path(storage_path).mkdir(parents=True, exist_ok=True)
40 | self.id = id
41 | self.name = name
42 | self.storage_path = storage_path
43 | self.embeddings_model_name = "all-MiniLM-L6-v2"
44 | self.documents_directory = os.path.join(storage_path, "documents")
45 | self.search_db_directory = os.path.join(storage_path, "search_db")
46 | self.chat_files_directory = os.path.join(storage_path, "chat_files")
47 | self.info_file = os.path.join(storage_path, "info.json")
48 | if not os.path.exists(self.info_file):
49 | self.initialize_bot_context()
50 | self.current_answer = copy.deepcopy(DEFAULT_ANSWER)
51 |
52 | @staticmethod
53 | def get_default_bot_context():
54 | """Get the default bot context."""
55 | return BotContext(id=0, name="Default")
56 |
57 | def get_info(self) -> dict:
58 | """Get the bot info."""
59 | return {
60 | "id": self.id,
61 | "name": self.name,
62 | }
63 |
64 | def initialize_bot_context(self) -> None:
65 | """Initialize the bot context."""
66 | for directory in [
67 | self.documents_directory,
68 | self.search_db_directory,
69 | self.chat_files_directory,
70 | ]:
71 | pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
72 |
73 | def rename(self, new_name: str) -> None:
74 | """Rename the bot context."""
75 | db_bot_context = (
76 | session.query(db_models.BotContext).filter_by(id=self.id).first()
77 | )
78 | db_bot_context.name = new_name
79 | session.commit()
80 |
81 | def add_document(self, file, filename) -> None:
82 | """Add a document to the bot's knowledge base."""
83 | pathlib.Path(self.documents_directory).mkdir(
84 | parents=True, exist_ok=True
85 | )
86 | file_extension = os.path.splitext(filename)[1]
87 |
88 | # Create a new document in the database
89 | db_document = db_models.Document(bot_context_id=self.id, name=filename)
90 | session.add(db_document)
91 | session.commit()
92 | document_id = db_document.id
93 |
94 | new_filename = f"{document_id}{file_extension}"
95 | with open(
96 | os.path.join(self.documents_directory, new_filename), "wb+"
97 | ) as destination:
98 | shutil.copyfileobj(file, destination)
99 |
100 | db_document.storage_name = new_filename
101 | session.commit()
102 |
103 | def delete_document(self, document_id: int) -> None:
104 | """Delete a document from the bot's knowledge base."""
105 | db_document = (
106 | session.query(db_models.Document)
107 | .filter_by(bot_context_id=self.id, id=document_id)
108 | .first()
109 | )
110 | if db_document is None:
111 | raise ValueError(f"Document with id {document_id} not found.")
112 | os.remove(
113 | os.path.join(self.documents_directory, db_document.storage_name)
114 | )
115 | session.delete(db_document)
116 | session.commit()
117 |
118 | def get_documents(self) -> list:
119 | """List all documents."""
120 | documents = []
121 | for db_document in (
122 | session.query(db_models.Document)
123 | .filter_by(bot_context_id=self.id)
124 | .all()
125 | ):
126 | documents.append(
127 | {
128 | "id": db_document.id,
129 | "name": db_document.name,
130 | "storage_name": db_document.storage_name,
131 | }
132 | )
133 | return documents
134 |
135 | def open_documents_folder(self) -> None:
136 | """Open the documents folder."""
137 | open_file(self.documents_directory)
138 |
139 | def open_document(self, document_id: int) -> None:
140 | """Open a document."""
141 | db_document = (
142 | session.query(db_models.Document)
143 | .filter_by(bot_context_id=self.id, id=document_id)
144 | .first()
145 | )
146 | if db_document is None:
147 | raise ValueError(f"Document with id {document_id} not found.")
148 | open_file(
149 | os.path.join(self.documents_directory, db_document.storage_name)
150 | )
151 |
152 | def write_chat_history(self, chat_history: dict) -> None:
153 | """Write a message to the bot's chat history."""
154 | chat_history_text = json.dumps(chat_history)
155 | db_chat_chunk = db_models.ChatChunk(
156 | bot_context_id=self.id, text=chat_history_text
157 | )
158 | session.add(db_chat_chunk)
159 | session.commit()
160 |
161 | def get_chat_history(self) -> list:
162 | """Get the bot's chat history."""
163 | chat_history = []
164 | for db_chat_chunk in (
165 | session.query(db_models.ChatChunk)
166 | .filter_by(bot_context_id=self.id)
167 | .all()
168 | ):
169 | chat_history.append(json.loads(db_chat_chunk.text))
170 | return chat_history
171 |
172 | def clear_chat_history(self) -> None:
173 | """Clear the bot's chat history."""
174 | session.query(db_models.ChatChunk).filter_by(
175 | bot_context_id=self.id
176 | ).delete()
177 | session.commit()
178 |
179 | def __str__(self) -> str:
180 | return f"ChatContext(storage_path={self.storage_path})"
181 |
182 | def dict(self) -> dict:
183 | return {
184 | "id": self.id,
185 | "name": self.name,
186 | "storage_path": self.storage_path,
187 | "embeddings_model_name": self.embeddings_model_name,
188 | "documents_directory": self.documents_directory,
189 | "search_db_directory": self.search_db_directory,
190 | "chat_files_directory": self.chat_files_directory,
191 | "info_file": self.info_file,
192 | }
193 |
--------------------------------------------------------------------------------
/pautobot/engine/ingest.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import logging
3 | import os
4 | from multiprocessing import Pool
5 | from typing import List
6 |
7 | from chromadb.config import Settings
8 | from langchain.docstore.document import Document
9 | from langchain.document_loaders import (
10 | CSVLoader,
11 | EverNoteLoader,
12 | PDFMinerLoader,
13 | TextLoader,
14 | UnstructuredEmailLoader,
15 | UnstructuredEPubLoader,
16 | UnstructuredHTMLLoader,
17 | UnstructuredMarkdownLoader,
18 | UnstructuredODTLoader,
19 | UnstructuredPowerPointLoader,
20 | UnstructuredWordDocumentLoader,
21 | )
22 | from langchain.embeddings import HuggingFaceEmbeddings
23 | from langchain.text_splitter import RecursiveCharacterTextSplitter
24 | from langchain.vectorstores import Chroma
25 | from tqdm import tqdm
26 |
27 | chunk_size = 500
28 | chunk_overlap = 50
29 |
30 |
31 | # Custom document loaders
32 | class MyElmLoader(UnstructuredEmailLoader):
33 | """Wrapper to fallback to text/plain when default does not work"""
34 |
35 | def load(self) -> List[Document]:
36 | """Wrapper adding fallback for elm without html"""
37 | try:
38 | try:
39 | doc = UnstructuredEmailLoader.load(self)
40 | except ValueError as e:
41 | if "text/html content not found in email" in str(e):
42 | # Try plain text
43 | self.unstructured_kwargs["content_source"] = "text/plain"
44 | doc = UnstructuredEmailLoader.load(self)
45 | else:
46 | raise
47 | except Exception as e:
48 | # Add file_path to exception message
49 | raise type(e)(f"{self.file_path}: {e}") from e
50 |
51 | return doc
52 |
53 |
54 | # Map file extensions to document loaders and their arguments
55 | LOADER_MAPPING = {
56 | ".csv": (CSVLoader, {}),
57 | ".doc": (UnstructuredWordDocumentLoader, {}),
58 | ".docx": (UnstructuredWordDocumentLoader, {}),
59 | ".enex": (EverNoteLoader, {}),
60 | ".eml": (MyElmLoader, {}),
61 | ".epub": (UnstructuredEPubLoader, {}),
62 | ".html": (UnstructuredHTMLLoader, {}),
63 | ".md": (UnstructuredMarkdownLoader, {}),
64 | ".odt": (UnstructuredODTLoader, {}),
65 | ".pdf": (PDFMinerLoader, {}),
66 | ".ppt": (UnstructuredPowerPointLoader, {}),
67 | ".pptx": (UnstructuredPowerPointLoader, {}),
68 | ".txt": (TextLoader, {"encoding": "utf8"}),
69 | }
70 |
71 |
72 | def load_single_document(file_path: str) -> Document:
73 | ext = "." + file_path.rsplit(".", 1)[-1]
74 | if ext in LOADER_MAPPING:
75 | loader_class, loader_args = LOADER_MAPPING[ext]
76 | loader = loader_class(file_path, **loader_args)
77 | return loader.load()[0]
78 |
79 | raise ValueError(f"Unsupported file extension '{ext}'")
80 |
81 |
82 | def load_documents(
83 | source_dir: str, ignored_files: List[str] = []
84 | ) -> List[Document]:
85 | """
86 | Loads all documents from the source documents
87 | directory, ignoring specified files
88 | """
89 | all_files = []
90 | for ext in LOADER_MAPPING:
91 | all_files.extend(
92 | glob.glob(os.path.join(source_dir, f"**/*{ext}"), recursive=True)
93 | )
94 | filtered_files = [
95 | file_path for file_path in all_files if file_path not in ignored_files
96 | ]
97 |
98 | with Pool(processes=os.cpu_count()) as pool:
99 | results = []
100 | with tqdm(
101 | total=len(filtered_files), desc="Loading new documents", ncols=80
102 | ) as pbar:
103 | for i, doc in enumerate(
104 | pool.imap_unordered(load_single_document, filtered_files)
105 | ):
106 | results.append(doc)
107 | pbar.update()
108 |
109 | return results
110 |
111 |
112 | def process_documents(
113 | source_directory, ignored_files: List[str] = []
114 | ) -> List[Document]:
115 | """
116 | Load documents and split in chunks
117 | """
118 | logging.info(f"Loading documents from {source_directory}")
119 | documents = load_documents(source_directory, ignored_files)
120 | if not documents:
121 | logging.info("No new documents to load")
122 | return []
123 | logging.info(
124 | f"Loaded {len(documents)} new documents from {source_directory}"
125 | )
126 | text_splitter = RecursiveCharacterTextSplitter(
127 | chunk_size=chunk_size, chunk_overlap=chunk_overlap
128 | )
129 | texts = text_splitter.split_documents(documents)
130 | logging.info(
131 | f"Split into {len(texts)} chunks of text "
132 | f"(max. {chunk_size} tokens each)"
133 | )
134 | return texts
135 |
136 |
137 | def does_vectorstore_exist(persist_directory: str) -> bool:
138 | """
139 | Checks if vectorstore exists
140 | """
141 | if os.path.exists(os.path.join(persist_directory, "index")):
142 | if os.path.exists(
143 | os.path.join(persist_directory, "chroma-collections.parquet")
144 | ) and os.path.exists(
145 | os.path.join(persist_directory, "chroma-embeddings.parquet")
146 | ):
147 | list_index_files = glob.glob(
148 | os.path.join(persist_directory, "index/*.bin")
149 | )
150 | list_index_files += glob.glob(
151 | os.path.join(persist_directory, "index/*.pkl")
152 | )
153 | # At least 3 documents are needed in a working vectorstore
154 | if len(list_index_files) > 3:
155 | return True
156 | return False
157 |
158 |
159 | def ingest_documents(
160 | source_directory, persist_directory, embeddings_model_name
161 | ):
162 | chroma_settings = Settings(
163 | chroma_db_impl="duckdb+parquet",
164 | persist_directory=persist_directory,
165 | anonymized_telemetry=False,
166 | )
167 |
168 | # Create embeddings
169 | embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
170 |
171 | if does_vectorstore_exist(persist_directory):
172 | # Update and store locally vectorstore
173 | logging.info(
174 | f"Appending to existing vectorstore at {persist_directory}"
175 | )
176 | db = Chroma(
177 | persist_directory=persist_directory,
178 | embedding_function=embeddings,
179 | client_settings=chroma_settings,
180 | )
181 | collection = db.get()
182 | texts = process_documents(
183 | source_directory,
184 | [metadata["source"] for metadata in collection["metadatas"]],
185 | )
186 | if not texts:
187 | logging.info("No new documents to load")
188 | return
189 | logging.info("Creating embeddings. May take some minutes...")
190 | db.add_documents(texts)
191 | else:
192 | # Create and store locally vectorstore
193 | logging.info("Creating new vectorstore")
194 | texts = process_documents(source_directory)
195 | if not texts:
196 | logging.info("No new documents to load")
197 | return
198 | logging.info("Creating embeddings. May take some minutes...")
199 | db = Chroma.from_documents(
200 | texts,
201 | embeddings,
202 | persist_directory=persist_directory,
203 | client_settings=chroma_settings,
204 | )
205 | db.persist()
206 | db = None
207 |
208 | logging.info("Ingestion complete! You can now query the vectorstore")
209 |
--------------------------------------------------------------------------------
/frontend/components/QADBManager.js:
--------------------------------------------------------------------------------
1 | import { toast } from "react-toastify";
2 | import { useState, useRef, useEffect } from "react";
3 |
4 | import LoadingIcon from "./icons/LoadingIcon";
5 | import {
6 | openDocument,
7 | deleteDocument,
8 | getDocuments,
9 | uploadDocument,
10 | } from "@/lib/requests/documents";
11 | import { getBotInfo } from "@/lib/requests/bot";
12 |
13 | export default function QADBManager() {
14 | const SUPPORTED_FILE_TYPES = [
15 | ".csv",
16 | ".docx",
17 | ".doc",
18 | ".enex",
19 | ".eml",
20 | ".epub",
21 | ".html",
22 | ".md",
23 | ".msg",
24 | ".odt",
25 | ".pdf",
26 | ".pptx",
27 | ".ppt",
28 | ".txt",
29 | ".zip",
30 | ];
31 |
32 | const fileInput = useRef(null);
33 | const [uploading, setUploading] = useState(false);
34 | const [documents, setDocuments] = useState([]);
35 | const refetchDocuments = (contextId) => {
36 | getDocuments(contextId)
37 | .then((data) => {
38 | setDocuments(data);
39 | })
40 | .catch((error) => {
41 | toast.error(error);
42 | });
43 | };
44 | useEffect(() => {
45 | refetchDocuments(0);
46 | }, []);
47 |
48 | const [botInfo, setBotInfo] = useState(null);
49 | const getAndSetBotInfo = () => {
50 | getBotInfo()
51 | .then((data) => {
52 | setBotInfo(data);
53 | })
54 | .catch((error) => {
55 | toast.error(error);
56 | });
57 | };
58 |
59 | // Periodically get bot info every 5 seconds
60 | useEffect(() => {
61 | getAndSetBotInfo();
62 | const interval = setInterval(() => {
63 | getAndSetBotInfo();
64 | }, 5000);
65 | return () => clearInterval(interval);
66 | }, []);
67 |
68 | const isValidFile = (file) => {
69 | let fileExtension = file.name?.split(".")?.pop();
70 | fileExtension = fileExtension?.toLowerCase();
71 | if (!fileExtension || !SUPPORTED_FILE_TYPES.includes("." + fileExtension)) {
72 | return false;
73 | }
74 | return true;
75 | };
76 |
77 | const uploadFiles = async (files) => {
78 | if (!files || files.length == 0) {
79 | toast.error("No file selected.");
80 | return;
81 | }
82 |
83 | // Clone the files array
84 | files = [...files];
85 |
86 | // Start uploading
87 | setUploading(true);
88 |
89 | let numUploaded = 0;
90 | let numFailed = 0;
91 | for (let i = 0; i < files.length; i++) {
92 | const file = files[i];
93 | if (!isValidFile(file)) {
94 | toast.error("File type not supported: " + file.name);
95 | numFailed++;
96 | continue;
97 | }
98 | await uploadDocument(0, file)
99 | .then(async (response) => {
100 | numUploaded++;
101 | refetchDocuments(0);
102 | })
103 | .catch((error) => {
104 | toast.error(error);
105 | numFailed++;
106 | refetchDocuments(0);
107 | });
108 | }
109 |
110 | toast.info(
111 | "Uploaded " +
112 | numUploaded +
113 | " file(s). " +
114 | (numFailed > 0 ? "Failed to upload " + numFailed + " file(s)." : "")
115 | );
116 | fileInput.current.value = "";
117 | setUploading(false);
118 | };
119 |
120 | return (
121 | <>
122 | Q&A Database
123 |
124 | {documents.length > 0 ? (
125 | You have {documents.length} document(s).
126 | ) : (
127 |
128 | You have no document. Please upload a file and ingest data for Q&A.
129 |
130 | )}
131 |
132 | {
135 | e.preventDefault();
136 | e.stopPropagation();
137 | }}
138 | onDrop={(e) => {
139 | e.preventDefault();
140 | e.stopPropagation();
141 | uploadFiles(e.dataTransfer.files);
142 | }}
143 | >
144 |
145 |
146 | {documents.map((document, key) => (
147 |
148 | {key + 1}.
149 |
150 | {document.name}
151 |
152 |
153 | {
155 | openDocument(0, document.id);
156 | }}
157 | >
158 |
166 |
171 |
172 |
173 | {
175 | let confirmation = confirm(
176 | "Are you sure you want to delete this document?"
177 | );
178 | if (confirmation) {
179 | deleteDocument(0, document.id)
180 | .then((response) => {
181 | toast.success("Document deleted!");
182 | refetchDocuments(0);
183 | })
184 | .catch((error) => {
185 | toast.error(error);
186 | });
187 | }
188 | }}
189 | >
190 |
198 |
203 |
204 |
205 |
206 |
207 | ))}
208 |
209 |
210 |
211 | {botInfo?.is_ingesting_data && (
212 |
213 |
214 | Note: The bot is
215 | currently ingesting data. Please wait until it finishes.
216 |
217 |
218 |
219 | )}
220 |
221 | {
228 | uploadFiles(e.target.files);
229 | }}
230 | />
231 | {
237 | if (uploading) return;
238 | fileInput.current.click();
239 | }}
240 | >
241 | Upload Files
242 | {uploading ? : null}
243 |
244 |
245 | >
246 | );
247 | }
248 |
--------------------------------------------------------------------------------
/pautobot/engine/engine.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import threading
4 | import traceback
5 |
6 | from pautobot import db_models
7 | from pautobot.config import DATA_ROOT
8 | from pautobot.database import session
9 | from pautobot.engine.bot_enums import BotMode, BotStatus
10 | from pautobot.engine.chatbot_factory import ChatbotFactory
11 | from pautobot.engine.context_manager import ContextManager
12 | from pautobot.engine.ingest import ingest_documents
13 | from pautobot.engine.llm_factory import LLMFactory
14 | from pautobot.engine.qa_factory import QAFactory
15 |
16 |
17 | class PautoBotEngine:
18 | """PautoBot engine for answering questions."""
19 |
20 | def __init__(
21 | self, mode, context_manager: ContextManager, model_type="GPT4All"
22 | ) -> None:
23 | self.mode = mode
24 | self.model_type = model_type
25 | self.model_path = os.path.join(
26 | DATA_ROOT,
27 | "models",
28 | "ggml-gpt4all-j-v1.3-groovy.bin",
29 | )
30 | self.context_manager = context_manager
31 | if not self.context_manager.get_contexts():
32 | raise ValueError(
33 | "No contexts found! Please create at least one context first."
34 | )
35 | self.context = self.context_manager.get_current_context()
36 | self.status = BotStatus.READY
37 |
38 | # Prepare the LLM
39 | self.model_n_ctx = 1000
40 | self.llm = LLMFactory.create_llm(
41 | model_type=self.model_type,
42 | model_path=self.model_path,
43 | model_n_ctx=self.model_n_ctx,
44 | streaming=False,
45 | verbose=False,
46 | )
47 | self.chatbot_instance = ChatbotFactory.create_chatbot(self.llm)
48 |
49 | # Prepare the retriever
50 | self.qa_instance = None
51 | self.qa_instance_error = None
52 | self.is_ingesting_data = False
53 | if mode == BotMode.CHAT.value:
54 | return
55 | self.ingest_documents_in_background()
56 |
57 | def get_bot_info(self) -> dict:
58 | """Get the bot's info."""
59 | return {
60 | "mode": self.mode,
61 | "model_type": self.status.value,
62 | "qa_instance_error": self.qa_instance_error,
63 | "status": self.status.value,
64 | "is_ingesting_data": self.is_ingesting_data,
65 | "context": self.context.dict(),
66 | }
67 |
68 | def ingest_documents(self, context_id=None) -> None:
69 | """Ingest the bot's documents."""
70 | if self.is_ingesting_data:
71 | logging.warning("Already ingesting data. Skipping...")
72 | return
73 | self.is_ingesting_data = True
74 | if context_id is not None:
75 | self.switch_context(context_id)
76 | try:
77 | ingest_documents(
78 | self.context.documents_directory,
79 | self.context.search_db_directory,
80 | self.context.embeddings_model_name,
81 | )
82 | # Reload QA
83 | self.qa_instance = QAFactory.create_qa(
84 | context=self.context,
85 | llm=self.llm,
86 | )
87 | except Exception as e:
88 | logging.error(f"Error while ingesting documents: {e}")
89 | logging.error(traceback.format_exc())
90 | self.qa_instance_error = "Error while ingesting documents!"
91 | finally:
92 | self.is_ingesting_data = False
93 |
94 | def ingest_documents_in_background(self, context_id=None) -> None:
95 | """Ingest the bot's documents in the background using a thread."""
96 | if self.is_ingesting_data:
97 | logging.warning("Already ingesting data. Skipping...")
98 | return
99 | thread = threading.Thread(
100 | target=self.ingest_documents,
101 | args=(context_id,),
102 | )
103 | thread.start()
104 |
105 | def switch_context(self, context_id: int) -> None:
106 | """Switch the bot context if needed."""
107 | if self.context.id != context_id:
108 | self.context = self.context_manager.get_context(context_id)
109 | self.qa_instance = QAFactory.create_qa(
110 | context=self.context,
111 | llm=self.llm,
112 | )
113 |
114 | def check_query(self, mode, query, context_id=None) -> None:
115 | """
116 | Check if the query is valid.
117 | Raises an exception on invalid query.
118 | """
119 | if context_id is not None:
120 | self.switch_context(context_id)
121 | if not query:
122 | raise ValueError("Query cannot be empty!")
123 | if mode == BotMode.QA.value and self.mode == BotMode.CHAT.value:
124 | raise ValueError(
125 | "PautobotEngine was initialized in chat mode! "
126 | "Please restart in QA mode."
127 | )
128 | elif mode == BotMode.QA.value and self.is_ingesting_data:
129 | raise ValueError(
130 | "Pautobot is currently ingesting data! Please wait a few minutes and try again."
131 | )
132 | elif mode == BotMode.QA.value and self.qa_instance_error is not None:
133 | raise ValueError(
134 | "Pautobot QA instance is not ready! Please wait a few minutes and try again."
135 | )
136 |
137 | def query(self, mode, query, context_id=None) -> None:
138 | """Query the bot."""
139 | self.status = BotStatus.THINKING
140 | if context_id is not None:
141 | self.switch_context(context_id)
142 | self.check_query(mode, query)
143 | if mode is None:
144 | mode = self.mode
145 | if mode == BotMode.QA.value and self.qa_instance is None:
146 | logging.info(self.qa_instance_error)
147 | mode = BotMode.CHAT
148 | self.context.current_answer = {
149 | "status": self.status,
150 | "answer": "",
151 | "docs": [],
152 | }
153 | self.context.write_chat_history(
154 | {
155 | "query": query,
156 | "mode": mode,
157 | }
158 | )
159 | if mode == BotMode.QA.value:
160 | try:
161 | logging.info("Received query: ", query)
162 | logging.info("Searching...")
163 | res = self.qa_instance(query)
164 | answer, docs = (
165 | res["result"],
166 | res["source_documents"],
167 | )
168 | doc_json = []
169 | for document in docs:
170 | document_file = document.metadata["source"]
171 | document_id = os.path.basename(document_file).split(".")[0]
172 | document_id = int(document_id)
173 | db_document = (
174 | session.query(db_models.Document)
175 | .filter(db_models.Document.id == document_id)
176 | .first()
177 | )
178 | if not db_document:
179 | continue
180 | doc_json.append(
181 | {
182 | "source": db_document.name,
183 | "source_id": db_document.id,
184 | "content": document.page_content,
185 | }
186 | )
187 | self.status = BotStatus.READY
188 | self.context.current_answer = {
189 | "status": self.status,
190 | "answer": answer,
191 | "docs": doc_json,
192 | }
193 | self.context.write_chat_history(self.context.current_answer)
194 | except Exception as e:
195 | logging.error("Error during thinking: ", e)
196 | traceback.print_exc()
197 | answer = "Error during thinking! Please try again."
198 | if "Index not found" in str(e):
199 | answer = "Index not found! Please ingest documents first."
200 | self.status = BotStatus.READY
201 | self.context.current_answer = {
202 | "status": self.status,
203 | "answer": answer,
204 | "docs": None,
205 | }
206 | self.context.write_chat_history(self.context.current_answer)
207 | else:
208 | try:
209 | logging.info("Received query: ", query)
210 | logging.info("Thinking...")
211 | answer = self.chatbot_instance.predict(human_input=query)
212 | logging.info("Answer: ", answer)
213 | self.status = BotStatus.READY
214 | self.context.current_answer = {
215 | "status": self.status,
216 | "answer": answer,
217 | "docs": None,
218 | }
219 | self.context.write_chat_history(self.context.current_answer)
220 | except Exception as e:
221 | logging.error("Error during thinking: ", e)
222 | traceback.print_exc()
223 | self.status = BotStatus.READY
224 | self.context.current_answer = {
225 | "status": self.status,
226 | "answer": "Error during thinking! Please try again.",
227 | "docs": None,
228 | }
229 | self.context.write_chat_history(self.context.current_answer)
230 |
231 | def get_answer(self, context_id=None) -> dict:
232 | """Get the bot's answer."""
233 | if context_id is not None:
234 | self.switch_context(context_id)
235 | return self.context.current_answer
236 |
--------------------------------------------------------------------------------