├── api ├── sources │ └── .gitkeep ├── .python-version ├── models │ └── api.py ├── Pipfile ├── document_stores │ └── faiss.py ├── .vscode │ └── launch.json ├── Dockerfile ├── README.md ├── main.py ├── pipelines │ ├── indexing.py │ ├── openai.py │ └── nodes │ │ └── markdown.py └── .gitignore ├── app ├── public │ ├── lego.png │ ├── favicon.ico │ └── usericon.png ├── next.config.js ├── pages │ ├── _app.js │ ├── _document.js │ ├── api │ │ └── chat.js │ └── index.js ├── package.json ├── styles │ ├── globals.css │ └── Home.module.css ├── Dockerfile ├── LICENSE ├── README.md ├── .gitignore └── package-lock.json ├── res ├── screenshot.png └── bricky-recording.gif ├── .gitmodules ├── compose.yaml ├── LICENSE.md ├── README.md └── .gitignore /api/sources/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /api/.python-version: -------------------------------------------------------------------------------- 1 | 3.10.9 2 | -------------------------------------------------------------------------------- /app/public/lego.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsbaunwall/bricky/HEAD/app/public/lego.png -------------------------------------------------------------------------------- /res/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsbaunwall/bricky/HEAD/res/screenshot.png -------------------------------------------------------------------------------- /app/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsbaunwall/bricky/HEAD/app/public/favicon.ico -------------------------------------------------------------------------------- /app/public/usericon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsbaunwall/bricky/HEAD/app/public/usericon.png -------------------------------------------------------------------------------- /res/bricky-recording.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/larsbaunwall/bricky/HEAD/res/bricky-recording.gif -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "api/haystack"] 2 | path = api/haystack 3 | url = git@github.com:deepset-ai/haystack.git 4 | -------------------------------------------------------------------------------- /app/next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true, 4 | } 5 | 6 | module.exports = nextConfig 7 | -------------------------------------------------------------------------------- /compose.yaml: -------------------------------------------------------------------------------- 1 | services: 2 | api: 3 | build: api/. 4 | ports: 5 | - "8080:8080" 6 | app: 7 | build: app/. 8 | ports: 9 | - "3000:3000" -------------------------------------------------------------------------------- /app/pages/_app.js: -------------------------------------------------------------------------------- 1 | import '../styles/globals.css' 2 | 3 | export default function App({ Component, pageProps }) { 4 | return 5 | } 6 | -------------------------------------------------------------------------------- /app/pages/_document.js: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from 'next/document' 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /app/pages/api/chat.js: -------------------------------------------------------------------------------- 1 | export default async function (req, res) { 2 | const response = await fetch("http://api:8080/ask", { 3 | method: "POST", 4 | headers: { 5 | "Content-Type": "application/json", 6 | }, 7 | body: JSON.stringify({ 8 | question: req.body.question, 9 | history: req.body.history, 10 | }), 11 | }); 12 | 13 | const data = await response.json(); 14 | 15 | res.status(200).json({ result: data }); 16 | } 17 | -------------------------------------------------------------------------------- /api/models/api.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Any, Dict 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class Document(BaseModel): 7 | name: Optional[str] = None 8 | content: str 9 | meta: Dict[str, Any] 10 | 11 | 12 | class QueryModel(BaseModel): 13 | question: str 14 | top_k = 5 15 | history: list = None 16 | 17 | 18 | class ResponseModel(BaseModel): 19 | success: str = None 20 | error: str = None 21 | documents: List[Document] 22 | -------------------------------------------------------------------------------- /api/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | python-dotenv = "*" 8 | fastapi = "*" 9 | pydantic = "*" 10 | uvicorn = "*" 11 | markdown = "*" 12 | python-frontmatter = "*" 13 | farm-haystack = {extras = ["docstores,crawler,preprocessing"], version = "*"} 14 | beautifulsoup4 = "*" 15 | 16 | [dev-packages] 17 | autopep8 = "*" 18 | 19 | [requires] 20 | python_version = "3.10" 21 | python_full_version = "3.10.9" 22 | -------------------------------------------------------------------------------- /app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bricky-chat", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@emotion/react": "^11.10.5", 13 | "@emotion/styled": "^11.10.5", 14 | "@mui/material": "^5.11.4", 15 | "@next/font": "13.1.1", 16 | "next": "13.1.1", 17 | "openai": "^3.1.0", 18 | "react": "18.2.0", 19 | "react-dom": "18.2.0", 20 | "react-markdown": "^8.0.4" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /api/document_stores/faiss.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from haystack.document_stores import FAISSDocumentStore 4 | 5 | 6 | def load_store(index_name: str, embedding_dim: int = 1536) -> FAISSDocumentStore: 7 | index_path = "indices/{0}".format(index_name) 8 | index_exists = os.path.exists(index_path) 9 | 10 | if index_exists: 11 | return FAISSDocumentStore.load(index_path) 12 | else: 13 | return FAISSDocumentStore( 14 | embedding_dim=embedding_dim, 15 | faiss_index_factory_str="Flat", 16 | sql_url="sqlite:///{0}_document_store.db".format(index_path)) -------------------------------------------------------------------------------- /api/.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | 8 | { 9 | "name": "Python: Flask", 10 | "type": "python", 11 | "request": "launch", 12 | "module": "flask", 13 | "env": { 14 | "FLASK_APP": "app.py", 15 | "FLASK_DEBUG": "1" 16 | }, 17 | "args": ["run", "--no-debugger", "--no-reload"], 18 | "jinja": true, 19 | "justMyCode": true 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /app/styles/globals.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500&display=swap'); 2 | 3 | * { 4 | box-sizing: border-box; 5 | padding: 0; 6 | margin: 0; 7 | font-family: 'Inter', sans-serif; 8 | } 9 | 10 | html, 11 | body { 12 | max-width: 100vw; 13 | overflow-x: hidden; 14 | } 15 | 16 | body { 17 | color: #92f2e8; 18 | background: #001233; 19 | } 20 | 21 | a { 22 | color: inherit; 23 | text-decoration: none; 24 | } 25 | 26 | a:hover { 27 | opacity: 0.8; 28 | } 29 | 30 | /* WebKit and Chromiums */ 31 | ::-webkit-scrollbar { 32 | width: 8px; 33 | height: 8px; 34 | background-color: #001233; 35 | } 36 | 37 | ::-webkit-scrollbar-thumb { 38 | background: #001247; 39 | border-radius: 5px; 40 | } -------------------------------------------------------------------------------- /app/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-alpine AS deps 2 | RUN apk add --no-cache libc6-compat 3 | WORKDIR /app 4 | 5 | COPY package.json package-lock.json ./ 6 | RUN npm install --production 7 | 8 | FROM node:18-alpine AS builder 9 | WORKDIR /app 10 | COPY --from=deps /app/node_modules ./node_modules 11 | COPY . . 12 | 13 | ENV NEXT_TELEMETRY_DISABLED 1 14 | 15 | RUN npm run build 16 | 17 | FROM node:18-alpine AS runner 18 | WORKDIR /app 19 | 20 | ENV NODE_ENV production 21 | ENV NEXT_TELEMETRY_DISABLED 1 22 | 23 | RUN addgroup --system --gid 1001 nodejs 24 | RUN adduser --system --uid 1001 nextjs 25 | 26 | COPY --from=builder --chown=nextjs:nodejs /app/.next ./.next 27 | COPY --from=builder /app/node_modules ./node_modules 28 | COPY --from=builder /app/package.json ./package.json 29 | COPY --from=builder /app/public ./public 30 | 31 | USER nextjs 32 | 33 | EXPOSE 3000 34 | 35 | ENV PORT 3000 36 | 37 | CMD ["npm", "start"] 38 | -------------------------------------------------------------------------------- /api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM deepset/haystack:base-cpu-v1.13.0 as base 2 | 3 | FROM base AS python-deps 4 | 5 | # Install pipenv and compilation dependencies 6 | RUN pip install pipenv 7 | RUN apt-get update && apt-get install -y --no-install-recommends gcc 8 | 9 | # Install python dependencies in /.venv 10 | COPY Pipfile . 11 | COPY Pipfile.lock . 12 | #RUN pipenv requirements > requirements.txt 13 | #RUN pip install -r requirements.txt 14 | RUN pipenv run pip install --upgrade setuptools 15 | RUN pipenv install --system --deploy 16 | 17 | FROM python-deps AS runtime 18 | 19 | # Copy virtual env from python-deps stage 20 | #COPY --from=python-deps /.venv /.venv 21 | #ENV PATH="/.venv/bin:$PATH" 22 | 23 | # Create and switch to a new user 24 | RUN useradd --create-home appuser 25 | WORKDIR /home/appuser 26 | USER appuser 27 | 28 | ENV TIKA_LOG_PATH=/home/appuser 29 | 30 | # Install application into container 31 | COPY . . 32 | 33 | EXPOSE 8080 34 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | ## Meet Bricky - a conversational bot (Haystack chatbot backend) 2 | 3 | This API builds on the [haystack platform](https://github.com/deepset/haystack) for indexing and embedding markdown documents. It uses OpenAI ChatGPT for answer generation 4 | 5 | ## Getting started 🚀 6 | 7 | 1. Clone this repo! 8 | 2. Install dependencies: `pipenv install` 9 | 3. Run the development server: `python -m main.py` 10 | 11 | Open [http://localhost:8080/docs](http://localhost:8080/docs) with your browser to see the OpenAPI documentation. 12 | 13 | ## Learn more 14 | 15 | To learn more about Haystack and OpenAI, take a look at the following resources: 16 | 17 | - [Haystack Documentation](https://docs.haystack.deepset.ai/docs) - learn about the Haystack platform by deepset.ai. 18 | - [OpenAI docs](https://platform.openai.com/docs/introduction) - the OpenAI docs site. 19 | 20 | ## Powered by haystack and OpenAI ChatGPT 21 | 22 | - Frontend implementation can be found [here](../app). 23 | 24 | Questions or comments? Reach out to [@larsbaunwall](https://github.com/larsbaunwall) 25 | 26 | Don't forget to :star: this repo! -------------------------------------------------------------------------------- /app/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Zahid Khawaja 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Lars Baunwall 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /app/README.md: -------------------------------------------------------------------------------- 1 | ## Meet Bricky - a Haystack conversational bot (Next.js chatbot frontend) 2 | 3 | I'm proudly adopted from the [Langchain chatbot sample](https://blog.langchain.dev/langchain-chat/), originally built by [Zahid](https://twitter.com/chillzaza_) - you should go check that repo out too! 4 | 5 | ## Getting started 🚀 6 | 7 | 1. Clone this repo! 8 | 2. Install dependencies: `npm install` 9 | 3. Run the development server: `npm run dev` 10 | 11 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 12 | 13 | You can start editing the page by modifying `pages/index.js`. The page auto-updates as you edit the file. 14 | 15 | [API routes](https://nextjs.org/docs/api-routes/introduction) can be accessed on [http://localhost:3000/api/chat](http://localhost:3000/api/chat). This endpoint can be edited in `pages/api/chat.js`. 16 | 17 | The `pages/api` directory is mapped to `/api/*`. Files in this directory are treated as [API routes](https://nextjs.org/docs/api-routes/introduction) instead of React pages. 18 | 19 | ## Learn more 20 | 21 | To learn more about Next.js, take a look at the following resources: 22 | 23 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 24 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 25 | 26 | ## Powered by haystack and OpenAI ChatGPT 27 | 28 | - [Haystack](https://github.com/deepset/haystack) backend implementation can be found [here](../api). 29 | 30 | Questions or comments? Reach out to [@larsbaunwall](https://github.com/larsbaunwall) 31 | 32 | Don't forget to :star: this repo! -------------------------------------------------------------------------------- /api/main.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | import uvicorn 5 | from dotenv import load_dotenv 6 | from fastapi import FastAPI, APIRouter 7 | from haystack.telemetry import disable_telemetry 8 | 9 | from models.api import QueryModel, ResponseModel, Document 10 | from pipelines.openai import GenerativeOpenAIPipeline 11 | from pipelines.indexing import MarkdownIndexingPipeline 12 | 13 | logging.basicConfig( 14 | format="%(levelname)s - %(name)s - %(message)s", level=logging.DEBUG) 15 | logging.getLogger("haystack").setLevel(logging.DEBUG) 16 | 17 | openai_key: str 18 | answer_pipe: GenerativeOpenAIPipeline 19 | 20 | disable_telemetry() 21 | load_dotenv() 22 | 23 | openai_key = os.getenv("OPENAI_KEY") 24 | index_name = os.getenv("INDEX_NAME") or "faiss" 25 | doc_dir = os.getenv("DOC_DIR") or "./sources" 26 | 27 | index_pipe = MarkdownIndexingPipeline(index_name, openai_key, doc_dir) 28 | index_pipe.ensure_index() 29 | 30 | answer_pipe = GenerativeOpenAIPipeline(openai_key, index_name) 31 | 32 | 33 | class AskApi: 34 | 35 | pipeline: GenerativeOpenAIPipeline 36 | 37 | def __init__(self, pipeline: GenerativeOpenAIPipeline): 38 | self.pipeline = pipeline 39 | self.router = APIRouter() 40 | self.router.add_api_route("/ask", self.ask, methods=["POST"]) 41 | self.router.add_api_route("/hello", self.hello, methods=["GET"]) 42 | 43 | async def ask(self, item: QueryModel) -> ResponseModel: 44 | res = self.pipeline.run(query=item.question, params={"Generator": {"top_k": 1}, "Retriever": {"top_k": item.top_k}}) 45 | try: 46 | answer = res["answers"][0].answer 47 | documents = [Document(content=doc.content, meta=doc.meta) for doc in res["documents"]] 48 | return ResponseModel(success=answer, documents=documents) 49 | except Exception as e: 50 | return ResponseModel(error=e.message) 51 | 52 | async def hello(self) -> ResponseModel: 53 | return ResponseModel(success="Hello there!") 54 | 55 | 56 | app = FastAPI(title="Bricky's chatbot API") 57 | api = AskApi(answer_pipe) 58 | app.include_router(api.router) 59 | 60 | if __name__ == "__main__": 61 | uvicorn.run(app, host="127.0.0.1", port=8080) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Meet Bricky - a conversational bot using OpenAI 🤖 2 | 3 | Remember clippy? Meet bricky! 4 | 5 | Bricky is a conversational bot using [Retrieval-Augmented Generation](https://arxiv.org/abs/2005.11401) with some help from OpenAI's GPT-3 LLM. 6 | 7 | Bricky indexes content stored in markdown files and vectorizes it using OpenAI embeddings. It then uses few-shot learning using a ChatGPT prompt to generate an answer based on relevant content. 8 | 9 | Read more about my journey into this field and the background for creating Bricky in [my blog article](https://medium.com/@larslb/standing-on-the-shoulders-of-a-giant-embedding-intelligent-behavior-using-large-language-models-8c0f644b6d87) 10 | 11 | The project is inspired by the awesome [HoustonAI by Astro](https://github.com/withastro/houston.astro.build) 12 | 13 | ![screenshot.png](./res/bricky-recording.gif) 14 | 15 | 16 | ## Getting started 🚀 17 | 18 | ### Prereqs 19 | 20 | Provide these `env` variables for the api container by creating a `dotenv` file in `api/.env` 21 | 22 | ``` 23 | OPENAI_KEY= 24 | ``` 25 | 26 | ### Steps 27 | 28 | 1. Clone this repo! 29 | 1. Copy over your documentation to `api/sources` 30 | 1. Run docker-compose: `docker-compose up` 31 | 32 | You should now have two endpoints running: 33 | 34 | - The [Nextjs-based frontend](./app): Open [http://localhost:3000](http://localhost:3000) to meet Bricky. 35 | - The [Haystack-based API](./api): Open [http://localhost:8080/docs](http://localhost:8080/docs) with your browser to see the OpenAPI documentation. 36 | 37 | Note: if you make changes to the any files, i.e. `api/.env` or the docs in `sources/docs`, you need to rebuild the images: `docker-compose rebuild --no-cache`. 38 | 39 | ## Learn more 40 | 41 | To learn more about Haystack and OpenAI, take a look at the following resources: 42 | 43 | - [Haystack Documentation](https://docs.haystack.deepset.ai/docs) - learn about the Haystack platform by deepset.ai. 44 | - [OpenAI docs](https://platform.openai.com/docs/introduction) - the OpenAI docs site. 45 | 46 | To learn more about Next.js, take a look at the following resources: 47 | 48 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 49 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 50 | 51 | ## Powered by haystack and OpenAI ChatGPT 52 | 53 | - Frontend implementation can be found [here](./app). 54 | - Backend implementation can be found [here](./api). 55 | 56 | Questions or comments? Reach out to [@larsbaunwall](https://github.com/larsbaunwall) 57 | 58 | Don't forget to :star: this repo! 59 | -------------------------------------------------------------------------------- /api/pipelines/indexing.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | from pipelines.nodes.markdown import MarkdownConverter 5 | 6 | from document_stores.faiss import load_store 7 | from haystack.document_stores import BaseDocumentStore 8 | from haystack.nodes import EmbeddingRetriever, PreProcessor, BaseRetriever 9 | from haystack.pipelines import Pipeline, BaseStandardPipeline 10 | 11 | 12 | class MarkdownIndexingPipeline(BaseStandardPipeline): 13 | openai_key = "" 14 | index_name = "" 15 | index_path = "" 16 | index_exists = False 17 | doc_dir = "" 18 | 19 | def __init__(self, index_name: str, openai_key: str, doc_dir: str): 20 | self.openai_key = openai_key 21 | self.index_name = index_name 22 | self.index_path = "indices/{0}".format(index_name) 23 | self.index_exists = os.path.exists(self.index_path) 24 | self.doc_dir = doc_dir 25 | 26 | if not os.path.exists("indices"): 27 | os.makedirs("indices") 28 | 29 | def ensure_index(self): 30 | 31 | if not self.index_exists: 32 | self.pipeline = Pipeline() 33 | markdown_converter = MarkdownConverter( 34 | extract_headlines=True 35 | ) 36 | preprocessor = PreProcessor( 37 | clean_empty_lines=True, 38 | clean_whitespace=True, 39 | clean_header_footer=False, 40 | split_by="word", 41 | split_length=150, 42 | split_respect_sentence_boundary=True, 43 | ) 44 | document_store = load_store(self.index_name) 45 | 46 | self.pipeline.add_node( 47 | component=markdown_converter, name="MarkdownConverter", inputs=["File"] 48 | ) 49 | self.pipeline.add_node( 50 | component=preprocessor, name="PreProcessor", inputs=["MarkdownConverter"] 51 | ) 52 | self.pipeline.add_node( 53 | component=document_store, name="FAISSDocStore", inputs=["PreProcessor"] 54 | ) 55 | 56 | files_to_index = [] 57 | for file in glob.glob(os.path.join(self.doc_dir, "**/*.md"), recursive=True): 58 | if file.endswith('.md'): 59 | files_to_index.append(file) 60 | 61 | if len(files_to_index) != 0: 62 | self.pipeline.run(file_paths=files_to_index) 63 | document_store.update_embeddings(create_retriever(document_store, self.openai_key), batch_size=256) 64 | document_store.save(self.index_path) 65 | else: 66 | print("No files found to index at path " + self.doc_dir) 67 | print("Source context will be empty") 68 | 69 | 70 | def create_retriever(document_store: BaseDocumentStore, openai_key: str) -> BaseRetriever: 71 | return EmbeddingRetriever( 72 | document_store=document_store, 73 | batch_size=8, 74 | embedding_model="text-embedding-ada-002", 75 | api_key=openai_key, 76 | max_seq_len=1024 77 | ) 78 | -------------------------------------------------------------------------------- /api/pipelines/openai.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from haystack.document_stores import BaseDocumentStore 4 | from haystack.nodes import OpenAIAnswerGenerator, EmbeddingRetriever, BaseRetriever, BaseGenerator 5 | from haystack.pipelines import GenerativeQAPipeline, BaseStandardPipeline, Pipeline 6 | from document_stores.faiss import load_store 7 | from pipelines.indexing import create_retriever 8 | 9 | 10 | class GenerativeOpenAIPipeline(BaseStandardPipeline): 11 | openai_key: str 12 | index_name: str 13 | document_store: BaseDocumentStore 14 | retriever: BaseRetriever 15 | generator: BaseGenerator 16 | 17 | 18 | def __init__(self, openai_key: str, index_name: str): 19 | self.openai_key = openai_key 20 | self.index_name = index_name 21 | 22 | self.document_store = load_store(index_name) 23 | self.retriever = create_retriever(self.document_store, openai_key) 24 | 25 | self.generator = OpenAIAnswerGenerator( 26 | api_key=openai_key, 27 | model="text-davinci-003", 28 | max_tokens=1000, 29 | temperature=0.1, 30 | frequency_penalty=1.0, 31 | examples_context="""You are a cheerful AI assistant named Bricky. 32 | In your spare time you do aerobics and freediving. 33 | Work is mostly spent answering engineering questions from the engineering handbook. 34 | 35 | The handbook is located at https://handbook/engineering-matters. 36 | The handbook contains the collective knowledge and experience from all our communities and engineering teams. 37 | 38 | You are given the following extracted parts of a long article in the handbook and a question. Provide a conversational answer of minimum 2 sentences 39 | with a hyperlink to the article. Do NOT make up a hyperlink that is not listed and only use hyperlinks 40 | pointing to the handbook. If the question includes a request for code, provide a code block directly from the 41 | documentation. 42 | 43 | You do tell jokes. If you don't know any use one you found on the Internet. 44 | 45 | If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer. If the question 46 | is not about the engineering handbook, politely inform them that you are tuned to only answer questions about 47 | the engineering handbook. 48 | 49 | Links must be formatted as markdown links. 50 | 51 | Answer in Markdown""", 52 | examples=[ 53 | ("how should I format a date in my API?", "You should format a date in your API using the RFC 3339 " 54 | "internet profile, which is a subset of ISO 8601. This " 55 | "should be represented in UTC using the format without " 56 | "local offsets"), 57 | ("What accessibility standard should I use?", "You should use level AA of the [Web Content " 58 | "Accessibility Guidelines 2.1 (WCAG 2.1)](https://handbook/engineering-matters#a11y) as a minimum.") 59 | ] 60 | ) 61 | 62 | self.pipeline = Pipeline() 63 | self.pipeline.add_node(component=self.retriever, name="Retriever", inputs=["Query"]) 64 | self.pipeline.add_node(component=self.generator, name="Generator", inputs=["Retriever"]) 65 | 66 | def run(self, query: str, params: Optional[dict] = None, debug: Optional[bool] = None): 67 | """ 68 | :param query: the query string. 69 | :param params: params for the `retriever` and `generator`. For instance, 70 | params={"Retriever": {"top_k": 10}, "Generator": {"top_k": 5}} 71 | :param debug: Whether the pipeline should instruct nodes to collect debug information 72 | about their execution. By default these include the input parameters 73 | they received and the output they generated. 74 | All debug information can then be found in the dict returned 75 | by this method under the key "_debug" 76 | """ 77 | output = self.pipeline.run(query=query, params=params, debug=debug) 78 | return output 79 | -------------------------------------------------------------------------------- /api/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .env 3 | .DS_Store 4 | 5 | # Created by https://www.toptal.com/developers/gitignore/api/python,nextjs 6 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,nextjs 7 | 8 | ### NextJS ### 9 | # dependencies 10 | /node_modules 11 | /.pnp 12 | .pnp.js 13 | 14 | # testing 15 | /coverage 16 | 17 | # next.js 18 | /.next/ 19 | /out/ 20 | 21 | # production 22 | /build 23 | 24 | # misc 25 | .DS_Store 26 | *.pem 27 | 28 | # debug 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | .pnpm-debug.log* 33 | 34 | # local env files 35 | .env*.local 36 | 37 | # vercel 38 | .vercel 39 | 40 | # typescript 41 | *.tsbuildinfo 42 | next-env.d.ts 43 | 44 | ### Python ### 45 | # Byte-compiled / optimized / DLL files 46 | __pycache__/ 47 | *.py[cod] 48 | *$py.class 49 | 50 | # C extensions 51 | *.so 52 | 53 | # Distribution / packaging 54 | .Python 55 | build/ 56 | develop-eggs/ 57 | dist/ 58 | downloads/ 59 | eggs/ 60 | .eggs/ 61 | lib/ 62 | lib64/ 63 | parts/ 64 | sdist/ 65 | var/ 66 | wheels/ 67 | share/python-wheels/ 68 | *.egg-info/ 69 | .installed.cfg 70 | *.egg 71 | MANIFEST 72 | 73 | # PyInstaller 74 | # Usually these files are written by a python script from a template 75 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 76 | *.manifest 77 | *.spec 78 | 79 | # Installer logs 80 | pip-log.txt 81 | pip-delete-this-directory.txt 82 | 83 | # Unit test / coverage reports 84 | htmlcov/ 85 | .tox/ 86 | .nox/ 87 | .coverage 88 | .coverage.* 89 | .cache 90 | nosetests.xml 91 | coverage.xml 92 | *.cover 93 | *.py,cover 94 | .hypothesis/ 95 | .pytest_cache/ 96 | cover/ 97 | 98 | # Translations 99 | *.mo 100 | *.pot 101 | 102 | # Django stuff: 103 | *.log 104 | local_settings.py 105 | db.sqlite3 106 | db.sqlite3-journal 107 | 108 | # Flask stuff: 109 | instance/ 110 | .webassets-cache 111 | 112 | # Scrapy stuff: 113 | .scrapy 114 | 115 | # Sphinx documentation 116 | docs/_build/ 117 | 118 | # PyBuilder 119 | .pybuilder/ 120 | target/ 121 | 122 | # Jupyter Notebook 123 | .ipynb_checkpoints 124 | 125 | # IPython 126 | profile_default/ 127 | ipython_config.py 128 | 129 | # pyenv 130 | # For a library or package, you might want to ignore these files since the code is 131 | # intended to run in multiple environments; otherwise, check them in: 132 | # .python-version 133 | 134 | # pipenv 135 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 136 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 137 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 138 | # install all needed dependencies. 139 | #Pipfile.lock 140 | 141 | # poetry 142 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 143 | # This is especially recommended for binary packages to ensure reproducibility, and is more 144 | # commonly ignored for libraries. 145 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 146 | #poetry.lock 147 | 148 | # pdm 149 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 150 | #pdm.lock 151 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 152 | # in version control. 153 | # https://pdm.fming.dev/#use-with-ide 154 | .pdm.toml 155 | 156 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 157 | __pypackages__/ 158 | 159 | # Celery stuff 160 | celerybeat-schedule 161 | celerybeat.pid 162 | 163 | # SageMath parsed files 164 | *.sage.py 165 | 166 | # Environments 167 | .env 168 | .venv 169 | env/ 170 | venv/ 171 | ENV/ 172 | env.bak/ 173 | venv.bak/ 174 | 175 | # Spyder project settings 176 | .spyderproject 177 | .spyproject 178 | 179 | # Rope project settings 180 | .ropeproject 181 | 182 | # mkdocs documentation 183 | /site 184 | 185 | # mypy 186 | .mypy_cache/ 187 | .dmypy.json 188 | dmypy.json 189 | 190 | # Pyre type checker 191 | .pyre/ 192 | 193 | # pytype static type analyzer 194 | .pytype/ 195 | 196 | # Cython debug symbols 197 | cython_debug/ 198 | 199 | # PyCharm 200 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 201 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 202 | # and can be added to the global gitignore or merged into this file. For a more nuclear 203 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 204 | #.idea/ 205 | 206 | ### Python Patch ### 207 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 208 | poetry.toml 209 | 210 | # ruff 211 | .ruff_cache/ 212 | 213 | # End of https://www.toptal.com/developers/gitignore/api/python,nextjs 214 | api/indices/** 215 | -------------------------------------------------------------------------------- /app/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .env 3 | .DS_Store 4 | 5 | # Created by https://www.toptal.com/developers/gitignore/api/python,nextjs 6 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,nextjs 7 | 8 | ### NextJS ### 9 | # dependencies 10 | /node_modules 11 | /.pnp 12 | .pnp.js 13 | 14 | # testing 15 | /coverage 16 | 17 | # next.js 18 | /.next/ 19 | /out/ 20 | 21 | # production 22 | /build 23 | 24 | # misc 25 | .DS_Store 26 | *.pem 27 | 28 | # debug 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | .pnpm-debug.log* 33 | 34 | # local env files 35 | .env*.local 36 | 37 | # vercel 38 | .vercel 39 | 40 | # typescript 41 | *.tsbuildinfo 42 | next-env.d.ts 43 | 44 | ### Python ### 45 | # Byte-compiled / optimized / DLL files 46 | __pycache__/ 47 | *.py[cod] 48 | *$py.class 49 | 50 | # C extensions 51 | *.so 52 | 53 | # Distribution / packaging 54 | .Python 55 | build/ 56 | develop-eggs/ 57 | dist/ 58 | downloads/ 59 | eggs/ 60 | .eggs/ 61 | lib/ 62 | lib64/ 63 | parts/ 64 | sdist/ 65 | var/ 66 | wheels/ 67 | share/python-wheels/ 68 | *.egg-info/ 69 | .installed.cfg 70 | *.egg 71 | MANIFEST 72 | 73 | # PyInstaller 74 | # Usually these files are written by a python script from a template 75 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 76 | *.manifest 77 | *.spec 78 | 79 | # Installer logs 80 | pip-log.txt 81 | pip-delete-this-directory.txt 82 | 83 | # Unit test / coverage reports 84 | htmlcov/ 85 | .tox/ 86 | .nox/ 87 | .coverage 88 | .coverage.* 89 | .cache 90 | nosetests.xml 91 | coverage.xml 92 | *.cover 93 | *.py,cover 94 | .hypothesis/ 95 | .pytest_cache/ 96 | cover/ 97 | 98 | # Translations 99 | *.mo 100 | *.pot 101 | 102 | # Django stuff: 103 | *.log 104 | local_settings.py 105 | db.sqlite3 106 | db.sqlite3-journal 107 | 108 | # Flask stuff: 109 | instance/ 110 | .webassets-cache 111 | 112 | # Scrapy stuff: 113 | .scrapy 114 | 115 | # Sphinx documentation 116 | docs/_build/ 117 | 118 | # PyBuilder 119 | .pybuilder/ 120 | target/ 121 | 122 | # Jupyter Notebook 123 | .ipynb_checkpoints 124 | 125 | # IPython 126 | profile_default/ 127 | ipython_config.py 128 | 129 | # pyenv 130 | # For a library or package, you might want to ignore these files since the code is 131 | # intended to run in multiple environments; otherwise, check them in: 132 | # .python-version 133 | 134 | # pipenv 135 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 136 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 137 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 138 | # install all needed dependencies. 139 | #Pipfile.lock 140 | 141 | # poetry 142 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 143 | # This is especially recommended for binary packages to ensure reproducibility, and is more 144 | # commonly ignored for libraries. 145 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 146 | #poetry.lock 147 | 148 | # pdm 149 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 150 | #pdm.lock 151 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 152 | # in version control. 153 | # https://pdm.fming.dev/#use-with-ide 154 | .pdm.toml 155 | 156 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 157 | __pypackages__/ 158 | 159 | # Celery stuff 160 | celerybeat-schedule 161 | celerybeat.pid 162 | 163 | # SageMath parsed files 164 | *.sage.py 165 | 166 | # Environments 167 | .env 168 | .venv 169 | env/ 170 | venv/ 171 | ENV/ 172 | env.bak/ 173 | venv.bak/ 174 | 175 | # Spyder project settings 176 | .spyderproject 177 | .spyproject 178 | 179 | # Rope project settings 180 | .ropeproject 181 | 182 | # mkdocs documentation 183 | /site 184 | 185 | # mypy 186 | .mypy_cache/ 187 | .dmypy.json 188 | dmypy.json 189 | 190 | # Pyre type checker 191 | .pyre/ 192 | 193 | # pytype static type analyzer 194 | .pytype/ 195 | 196 | # Cython debug symbols 197 | cython_debug/ 198 | 199 | # PyCharm 200 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 201 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 202 | # and can be added to the global gitignore or merged into this file. For a more nuclear 203 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 204 | #.idea/ 205 | 206 | ### Python Patch ### 207 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 208 | poetry.toml 209 | 210 | # ruff 211 | .ruff_cache/ 212 | 213 | # End of https://www.toptal.com/developers/gitignore/api/python,nextjs 214 | api/indices/** 215 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .env 3 | .DS_Store 4 | 5 | # Created by https://www.toptal.com/developers/gitignore/api/python,nextjs 6 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,nextjs 7 | 8 | ### NextJS ### 9 | # dependencies 10 | /node_modules 11 | /.pnp 12 | .pnp.js 13 | 14 | # testing 15 | /coverage 16 | 17 | # next.js 18 | /.next/ 19 | /out/ 20 | 21 | # production 22 | /build 23 | 24 | # misc 25 | .DS_Store 26 | *.pem 27 | 28 | # debug 29 | npm-debug.log* 30 | yarn-debug.log* 31 | yarn-error.log* 32 | .pnpm-debug.log* 33 | 34 | # local env files 35 | .env*.local 36 | 37 | # vercel 38 | .vercel 39 | 40 | # typescript 41 | *.tsbuildinfo 42 | next-env.d.ts 43 | 44 | ### Python ### 45 | # Byte-compiled / optimized / DLL files 46 | __pycache__/ 47 | *.py[cod] 48 | *$py.class 49 | 50 | # C extensions 51 | *.so 52 | 53 | # Distribution / packaging 54 | .Python 55 | build/ 56 | develop-eggs/ 57 | dist/ 58 | downloads/ 59 | eggs/ 60 | .eggs/ 61 | lib/ 62 | lib64/ 63 | parts/ 64 | sdist/ 65 | var/ 66 | wheels/ 67 | share/python-wheels/ 68 | *.egg-info/ 69 | .installed.cfg 70 | *.egg 71 | MANIFEST 72 | 73 | # PyInstaller 74 | # Usually these files are written by a python script from a template 75 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 76 | *.manifest 77 | *.spec 78 | 79 | # Installer logs 80 | pip-log.txt 81 | pip-delete-this-directory.txt 82 | 83 | # Unit test / coverage reports 84 | htmlcov/ 85 | .tox/ 86 | .nox/ 87 | .coverage 88 | .coverage.* 89 | .cache 90 | nosetests.xml 91 | coverage.xml 92 | *.cover 93 | *.py,cover 94 | .hypothesis/ 95 | .pytest_cache/ 96 | cover/ 97 | 98 | # Translations 99 | *.mo 100 | *.pot 101 | 102 | # Django stuff: 103 | *.log 104 | local_settings.py 105 | db.sqlite3 106 | db.sqlite3-journal 107 | 108 | # Flask stuff: 109 | instance/ 110 | .webassets-cache 111 | 112 | # Scrapy stuff: 113 | .scrapy 114 | 115 | # Sphinx documentation 116 | docs/_build/ 117 | 118 | # PyBuilder 119 | .pybuilder/ 120 | target/ 121 | 122 | # Jupyter Notebook 123 | .ipynb_checkpoints 124 | 125 | # IPython 126 | profile_default/ 127 | ipython_config.py 128 | 129 | # pyenv 130 | # For a library or package, you might want to ignore these files since the code is 131 | # intended to run in multiple environments; otherwise, check them in: 132 | # .python-version 133 | 134 | # pipenv 135 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 136 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 137 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 138 | # install all needed dependencies. 139 | #Pipfile.lock 140 | 141 | # poetry 142 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 143 | # This is especially recommended for binary packages to ensure reproducibility, and is more 144 | # commonly ignored for libraries. 145 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 146 | #poetry.lock 147 | 148 | # pdm 149 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 150 | #pdm.lock 151 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 152 | # in version control. 153 | # https://pdm.fming.dev/#use-with-ide 154 | .pdm.toml 155 | 156 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 157 | __pypackages__/ 158 | 159 | # Celery stuff 160 | celerybeat-schedule 161 | celerybeat.pid 162 | 163 | # SageMath parsed files 164 | *.sage.py 165 | 166 | # Environments 167 | .env 168 | .venv 169 | env/ 170 | venv/ 171 | ENV/ 172 | env.bak/ 173 | venv.bak/ 174 | 175 | # Spyder project settings 176 | .spyderproject 177 | .spyproject 178 | 179 | # Rope project settings 180 | .ropeproject 181 | 182 | # mkdocs documentation 183 | /site 184 | 185 | # mypy 186 | .mypy_cache/ 187 | .dmypy.json 188 | dmypy.json 189 | 190 | # Pyre type checker 191 | .pyre/ 192 | 193 | # pytype static type analyzer 194 | .pytype/ 195 | 196 | # Cython debug symbols 197 | cython_debug/ 198 | 199 | # PyCharm 200 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 201 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 202 | # and can be added to the global gitignore or merged into this file. For a more nuclear 203 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 204 | #.idea/ 205 | 206 | ### Python Patch ### 207 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 208 | poetry.toml 209 | 210 | # ruff 211 | .ruff_cache/ 212 | 213 | # End of https://www.toptal.com/developers/gitignore/api/python,nextjs 214 | 215 | api/indices/** 216 | -------------------------------------------------------------------------------- /app/styles/Home.module.css: -------------------------------------------------------------------------------- 1 | .main { 2 | display: flex; 3 | flex-direction: column; 4 | justify-content: space-between; 5 | align-items: center; 6 | padding: 2rem; 7 | } 8 | 9 | .header { 10 | width: auto; 11 | } 12 | 13 | .header p { 14 | text-align: center; 15 | } 16 | 17 | .topnav { 18 | box-shadow: rgb(0 0 0 / 50%) 0px 0px 50px; 19 | display: flex; 20 | justify-content: space-between; 21 | padding: 1rem 0.75rem 1rem 0.75rem; 22 | align-items: center; 23 | vertical-align: middle; 24 | } 25 | 26 | .topnav .boticon { 27 | border-radius: 20px;; 28 | border: 2px solid #002466; 29 | } 30 | 31 | .navlogo * { 32 | vertical-align: middle; 33 | } 34 | 35 | .navlogo, .navlinks a { 36 | font-weight: 500; 37 | } 38 | 39 | .navlogo { 40 | font-size: 1.25rem; 41 | margin-left: 1rem; 42 | } 43 | 44 | .navlinks { 45 | width: 20rem; 46 | display: flex; 47 | justify-content: space-evenly; 48 | align-items: center; 49 | } 50 | 51 | .apptitle { 52 | font-size: 2.5rem; 53 | font-weight: 500; 54 | display: flex; 55 | justify-content: center; 56 | } 57 | 58 | .appdescription { 59 | font-size: 1.1rem; 60 | margin: 1rem; 61 | } 62 | 63 | .link { 64 | font-weight: 500; 65 | } 66 | 67 | .cloudform { 68 | position: relative; 69 | } 70 | 71 | .textarea { 72 | position: relative; 73 | resize: none; 74 | font-size: 1.1rem; 75 | padding: 1rem 2rem 1rem 2rem; 76 | width: 75vw; 77 | border-radius: 0.5rem; 78 | border: 1px solid #7B2CBF; 79 | background: #001247; 80 | color: #80ed99; 81 | outline: none; 82 | } 83 | 84 | .textarea:disabled { 85 | opacity: 0.5; 86 | } 87 | 88 | .textarea::placeholder { 89 | color: #5f6368; 90 | } 91 | 92 | .generatebutton { 93 | position: absolute; 94 | top: 0.87rem; 95 | right: 1rem; 96 | color: rgb(128 237 153); 97 | background: none; 98 | padding: 0.3rem; 99 | border: none; 100 | display: flex; 101 | } 102 | 103 | .loadingwheel { 104 | position: absolute; 105 | top: 0.2rem; 106 | right: 0.25rem; 107 | } 108 | 109 | .svgicon { 110 | transform: rotate(90deg); 111 | width: 1.2em; 112 | height: 1.2em; 113 | fill: currentColor; 114 | } 115 | 116 | .generatebutton:hover { 117 | background: #7B2CBF; 118 | border-radius: 0.3rem; 119 | } 120 | 121 | .generatebutton:disabled { 122 | opacity: 0.9; 123 | cursor: not-allowed; 124 | background: none; 125 | } 126 | 127 | .messagelist { 128 | width: 100%; 129 | height: 100%; 130 | overflow-y: scroll; 131 | border-radius: 0.5rem; 132 | } 133 | 134 | .messagelistloading { 135 | display: flex; 136 | width: 100%; 137 | justify-content: center; 138 | margin-top: 1rem; 139 | } 140 | 141 | .usermessage { 142 | background: #001247; 143 | padding: 1.5rem; 144 | color: #ECECF1; 145 | } 146 | 147 | .usermessagewaiting{ 148 | padding: 1.5rem; 149 | color: #ECECF1; 150 | background: linear-gradient(to left, #2d1e80, #5a189a, #2d1e80); 151 | background-size: 200% 200%; 152 | background-position: -100% 0; 153 | animation: loading-gradient 2s ease-in-out infinite; 154 | animation-direction: alternate; 155 | animation-name: loading-gradient; 156 | } 157 | 158 | @keyframes loading-gradient { 159 | 0% { 160 | background-position: -100% 0; 161 | } 162 | 100% { 163 | background-position: 100% 0; 164 | } 165 | } 166 | 167 | .apimessage { 168 | background: #2D1E80; 169 | padding: 1.5rem; 170 | color: #80ed99; 171 | animation: fadein 0.5s; 172 | } 173 | 174 | @keyframes fadein { 175 | from { opacity: 0; } 176 | to { opacity: 1; } 177 | } 178 | 179 | .apimessage, .usermessage, .usermessagewaiting { 180 | display: flex; 181 | } 182 | 183 | .markdownanswer { 184 | line-height: 1.75; 185 | } 186 | 187 | .markdownanswer a:hover { 188 | opacity: 0.8; 189 | } 190 | 191 | .markdownanswer a { 192 | color: #16bed7; 193 | font-weight: 500; 194 | } 195 | 196 | .markdownanswer code { 197 | color: #15cb19; 198 | font-weight: 500; 199 | white-space: pre-wrap !important; 200 | } 201 | 202 | .markdownanswer ol, .markdownanswer ul { 203 | margin: 1rem; 204 | } 205 | 206 | .boticon, .usericon { 207 | margin-right: 1rem; 208 | border-radius: 0.1rem; 209 | } 210 | 211 | .markdownanswer h1, .markdownanswer h2, .markdownanswer h3 { 212 | font-size: inherit; 213 | } 214 | 215 | 216 | .center { 217 | display: flex; 218 | justify-content: center; 219 | align-items: center; 220 | position: relative; 221 | padding: 2rem 0; 222 | flex-direction: column; 223 | } 224 | 225 | .cloud { 226 | width: 75vw; 227 | height: 65vh; 228 | border-radius: 0.5rem; 229 | border: 1px solid #002466; 230 | display: flex; 231 | justify-content: center; 232 | align-items: center; 233 | scrollbar-color: #001247 #001233; 234 | } 235 | 236 | .pointsnormal { 237 | width: 90%; 238 | height: 90%; 239 | } 240 | 241 | .pointsdim { 242 | width: 90%; 243 | height: 90%; 244 | opacity: 0.25; 245 | } 246 | 247 | .footer { 248 | color: #5f6368; 249 | font-size: 0.8rem; 250 | margin: 1.5rem; 251 | } 252 | 253 | .footer a { 254 | font-weight: 500; 255 | color: #7a7d81; 256 | } 257 | 258 | .footer a:hover { 259 | opacity: 0.8; 260 | } 261 | 262 | /* Mobile optimization */ 263 | @media (max-width: 600px) { 264 | 265 | .main { 266 | padding: 1rem; 267 | max-height: 90vh; 268 | } 269 | 270 | .cloud { 271 | width: 22rem; 272 | height: 28rem; 273 | } 274 | .textarea { 275 | width: 22rem; 276 | } 277 | .topnav { 278 | border: 1px solid black; 279 | align-items: center; 280 | padding: 0.85rem 0.75rem 0.85rem 0.75rem; 281 | } 282 | 283 | .navlogo { 284 | font-size: 1.25rem; 285 | width: 20rem; 286 | } 287 | 288 | .markdownanswer code { 289 | white-space : pre-wrap !important; 290 | } 291 | 292 | .footer { 293 | font-size: 0.7rem; 294 | width: 100%; 295 | text-align: center; 296 | } 297 | } -------------------------------------------------------------------------------- /api/pipelines/nodes/markdown.py: -------------------------------------------------------------------------------- 1 | # Copy of haystack.nodes.MarkdownConverter 2 | # Added file_path to meta 3 | 4 | import logging 5 | import re 6 | from pathlib import Path 7 | from typing import Dict, List, Optional, Tuple, Any 8 | 9 | import frontmatter 10 | from bs4 import BeautifulSoup, NavigableString 11 | from markdown import markdown 12 | 13 | from haystack.nodes.file_converter.base import BaseConverter 14 | from haystack.schema import Document 15 | 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | class MarkdownConverter(BaseConverter): 21 | def __init__( 22 | self, 23 | remove_numeric_tables: bool = False, 24 | valid_languages: Optional[List[str]] = None, 25 | id_hash_keys: Optional[List[str]] = None, 26 | progress_bar: bool = True, 27 | remove_code_snippets: bool = True, 28 | extract_headlines: bool = False, 29 | add_frontmatter_to_meta: bool = False, 30 | ): 31 | """ 32 | :param remove_numeric_tables: Not applicable. 33 | :param valid_languages: Not applicable. 34 | :param id_hash_keys: Generate the document ID from a custom list of strings that refer to the document's 35 | attributes. To make sure you don't have duplicate documents in your DocumentStore if texts are 36 | not unique, you can modify the metadata and pass for example, `"meta"` to this field ([`"content"`, `"meta"`]). 37 | In this case, the ID is generated by using the content and the defined metadata. 38 | :param progress_bar: Show a progress bar for the conversion. 39 | :param remove_code_snippets: Whether to remove snippets from the markdown file. 40 | :param extract_headlines: Whether to extract headings from the markdown file. 41 | :param add_frontmatter_to_meta: Whether to add the contents of the frontmatter to `meta`. 42 | """ 43 | super().__init__( 44 | remove_numeric_tables=remove_numeric_tables, 45 | valid_languages=valid_languages, 46 | id_hash_keys=id_hash_keys, 47 | progress_bar=progress_bar, 48 | ) 49 | 50 | self.remove_code_snippets = remove_code_snippets 51 | self.extract_headlines = extract_headlines 52 | self.add_frontmatter_to_meta = add_frontmatter_to_meta 53 | 54 | def convert( 55 | self, 56 | file_path: Path, 57 | meta: Optional[Dict[str, Any]] = None, 58 | remove_numeric_tables: Optional[bool] = None, 59 | valid_languages: Optional[List[str]] = None, 60 | encoding: Optional[str] = "utf-8", 61 | id_hash_keys: Optional[List[str]] = None, 62 | remove_code_snippets: Optional[bool] = None, 63 | extract_headlines: Optional[bool] = None, 64 | add_frontmatter_to_meta: Optional[bool] = None, 65 | ) -> List[Document]: 66 | """ 67 | Reads text from a markdown file and executes optional preprocessing steps. 68 | 69 | :param file_path: path of the file to convert 70 | :param meta: dictionary of meta data key-value pairs to append in the returned document. 71 | :param encoding: Select the file encoding (default is `utf-8`) 72 | :param remove_numeric_tables: Not applicable 73 | :param valid_languages: Not applicable 74 | :param id_hash_keys: Generate the document id from a custom list of strings that refer to the document's 75 | attributes. If you want to ensure you don't have duplicate documents in your DocumentStore but texts are 76 | not unique, you can modify the metadata and pass e.g. `"meta"` to this field (e.g. [`"content"`, `"meta"`]). 77 | In this case the id will be generated by using the content and the defined metadata. 78 | :param remove_code_snippets: Whether to remove snippets from the markdown file. 79 | :param extract_headlines: Whether to extract headings from the markdown file. 80 | :param add_frontmatter_to_meta: Whether to add the contents of the frontmatter to `meta`. 81 | """ 82 | 83 | id_hash_keys = id_hash_keys if id_hash_keys is not None else self.id_hash_keys 84 | remove_code_snippets = remove_code_snippets if remove_code_snippets is not None else self.remove_code_snippets 85 | extract_headlines = extract_headlines if extract_headlines is not None else self.extract_headlines 86 | add_frontmatter_to_meta = ( 87 | add_frontmatter_to_meta if add_frontmatter_to_meta is not None else self.add_frontmatter_to_meta 88 | ) 89 | 90 | with open(file_path, encoding=encoding, errors="ignore") as f: 91 | metadata, markdown_text = frontmatter.parse(f.read()) 92 | 93 | # md -> html -> text since BeautifulSoup can extract text cleanly 94 | html = markdown(markdown_text, extensions=["fenced_code"]) 95 | 96 | # remove code snippets 97 | if remove_code_snippets: 98 | html = re.sub(r"
(.*?)
", " ", html, flags=re.DOTALL) 99 | html = re.sub(r"(.*?)", " ", html, flags=re.DOTALL) 100 | soup = BeautifulSoup(html, "html.parser") 101 | 102 | if add_frontmatter_to_meta: 103 | if meta is None: 104 | meta = metadata 105 | else: 106 | meta.update(metadata) 107 | 108 | if extract_headlines: 109 | text, headlines = self._extract_text_and_headlines(soup) 110 | if meta is None: 111 | meta = {} 112 | meta["headlines"] = headlines 113 | else: 114 | text = soup.get_text() 115 | 116 | if meta is None: 117 | meta = {} 118 | meta["file_path"] = file_path 119 | 120 | document = Document(content=text, meta=meta, id_hash_keys=id_hash_keys) 121 | return [document] 122 | 123 | @staticmethod 124 | def _extract_text_and_headlines(soup: BeautifulSoup) -> Tuple[str, List[Dict]]: 125 | """ 126 | Extracts text and headings from a soup object. 127 | """ 128 | headline_tags = {"h1", "h2", "h3", "h4", "h5", "h6"} 129 | headlines = [] 130 | text = "" 131 | for desc in soup.descendants: 132 | if desc.name in headline_tags: 133 | current_headline = desc.get_text() 134 | current_start_idx = len(text) 135 | current_level = int(desc.name[-1]) - 1 136 | headlines.append({"headline": current_headline, "start_idx": current_start_idx, "level": current_level}) 137 | 138 | if isinstance(desc, NavigableString): 139 | text += desc.get_text() 140 | 141 | return text, headlines 142 | -------------------------------------------------------------------------------- /app/pages/index.js: -------------------------------------------------------------------------------- 1 | import { useState, useRef, useEffect } from "react"; 2 | import Head from "next/head"; 3 | import styles from "../styles/Home.module.css"; 4 | import Image from "next/image"; 5 | import ReactMarkdown from "react-markdown"; 6 | import CircularProgress from "@mui/material/CircularProgress"; 7 | 8 | export default function Home() { 9 | const [userInput, setUserInput] = useState(""); 10 | const [history, setHistory] = useState([]); 11 | const [loading, setLoading] = useState(false); 12 | const [messages, setMessages] = useState([ 13 | { 14 | message: "Hi there! How can I help?", 15 | type: "apiMessage", 16 | }, 17 | ]); 18 | 19 | const messageListRef = useRef(null); 20 | const textAreaRef = useRef(null); 21 | 22 | // Auto scroll chat to bottom 23 | useEffect(() => { 24 | const messageList = messageListRef.current; 25 | messageList.scrollTop = messageList.scrollHeight; 26 | }, [messages]); 27 | 28 | // Focus on text field on load 29 | useEffect(() => { 30 | textAreaRef.current.focus(); 31 | }, []); 32 | 33 | // Handle errors 34 | const handleError = () => { 35 | setMessages((prevMessages) => [ 36 | ...prevMessages, 37 | { 38 | message: 39 | "Oops! I couldn't hear you? Maybe it's just me, but can you please repeat?", 40 | type: "apiMessage", 41 | }, 42 | ]); 43 | setLoading(false); 44 | setUserInput(""); 45 | }; 46 | 47 | // Handle form submission 48 | const handleSubmit = async (e) => { 49 | e.preventDefault(); 50 | 51 | if (userInput.trim() === "") { 52 | return; 53 | } 54 | 55 | setLoading(true); 56 | setMessages((prevMessages) => [ 57 | ...prevMessages, 58 | { message: userInput, type: "userMessage" }, 59 | ]); 60 | 61 | // Send user question and history to API 62 | const response = await fetch("/api/chat", { 63 | method: "POST", 64 | headers: { 65 | "Content-Type": "application/json", 66 | }, 67 | body: JSON.stringify({ question: userInput, history: history }), 68 | }); 69 | 70 | if (!response.ok) { 71 | handleError(); 72 | return; 73 | } 74 | 75 | // Reset user input 76 | setUserInput(""); 77 | const data = await response.json(); 78 | 79 | if (data.result.error === "Unauthorized") { 80 | handleError(); 81 | return; 82 | } 83 | 84 | setMessages((prevMessages) => [ 85 | ...prevMessages, 86 | { message: data.result.success, type: "apiMessage" }, 87 | ]); 88 | setLoading(false); 89 | }; 90 | 91 | // Prevent blank submissions and allow for multiline input 92 | const handleEnter = (e) => { 93 | if (e.key === "Enter" && userInput) { 94 | if (!e.shiftKey && userInput) { 95 | handleSubmit(e); 96 | } 97 | } else if (e.key === "Enter") { 98 | e.preventDefault(); 99 | } 100 | }; 101 | 102 | // Keep history in sync with messages 103 | useEffect(() => { 104 | if (messages.length >= 3) { 105 | setHistory([ 106 | [ 107 | messages[messages.length - 2].message, 108 | messages[messages.length - 1].message, 109 | ], 110 | ]); 111 | } 112 | }, [messages]); 113 | 114 | return ( 115 | <> 116 | 117 | Meet bricky 118 | 119 | 120 | 121 | 122 |
123 |
124 | bricky 132 | Ask Bricky 133 |
134 |
135 |
136 |
137 |
138 |
139 | {messages.map((message, index) => { 140 | return ( 141 | // The latest message sent by the user will be animated while waiting for a response 142 |
154 | {/* Display the correct icon depending on the message type */} 155 | {message.type === "apiMessage" ? ( 156 | AI 164 | ) : ( 165 | Me 173 | )} 174 |
175 | {/* Messages are being rendered in Markdown format */} 176 | 177 | {message.message} 178 | 179 |
180 |
181 | ); 182 | })} 183 |
184 |
185 |
186 |
187 |
188 |