├── .env.template ├── .gitignore ├── .gitpod.yml ├── README.md ├── api ├── ai.py ├── api.py ├── db.py ├── load_pdf_util.py ├── requirements.txt ├── users.py └── utils │ └── localCORS.py ├── app ├── package-lock.json ├── package.json ├── public │ ├── favicon.ico │ ├── index.html │ ├── manifest.json │ ├── robots.txt │ └── squid.jpg ├── src │ ├── components │ │ ├── AddFileForm.tsx │ │ ├── App.css │ │ ├── App.tsx │ │ ├── AskQuestionForm.tsx │ │ ├── Docs.tsx │ │ ├── Home.tsx │ │ ├── Identity.tsx │ │ ├── Query.tsx │ │ ├── SiteContents.tsx │ │ └── Slides.tsx │ ├── index.css │ ├── index.tsx │ ├── interfaces │ │ ├── enums.ts │ │ └── interfaces.ts │ ├── react-app-env.d.ts │ ├── reportWebVitals.ts │ └── utils │ │ └── api.ts └── tsconfig.json ├── astra.json ├── images ├── diagrams │ ├── flare_arch1.png │ ├── flare_arch_ask.png │ ├── flare_arch_write.png │ └── flare_full.png ├── new_vector_ui.png └── open_in_gitpod.svg ├── scripts ├── ingest_openai_key.sh ├── prepare_and_launch.sh ├── read_and_output_nonempty_secret.sh └── read_and_output_secret.sh └── sources ├── nausea.pdf └── the_hobbit.pdf /.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY="REPLACE_WITH_OPENAI_API_KEY" 2 | 3 | ASTRA_DB_APPLICATION_TOKEN="REPLACE_WITH_ASTRA_DB_APPLICATION_TOKEN" 4 | # "AstraCS:xxxxxx....." 5 | 6 | ASTRA_DB_ID="REPLACE_WITH_ASTRA_DB_ID" 7 | # "0123abcd-..." 8 | 9 | ASTRA_DB_KEYSPACE="REPLACE_WITH_ASTRA_DB_KEYSPACE" # optional 10 | # "your_keyspace" 11 | 12 | 13 | # UNCOMMENT THE FOLLOWING FOR A CASSANDRA CLUSTER ... 14 | # USE_CASSANDRA_CLUSTER="1" 15 | # ... then provide these parameters as well: 16 | # CASSANDRA_KEYSPACE="flare_pdf_demo" 17 | # CASSANDRA_CONTACT_POINTS="127.0.0.1" # optional 18 | # CASSANDRA_USERNAME="cassandra" # optional 19 | # CASSANDRA_PASSWORD="cassandra" # optional 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # API / GENERIC 2 | 3 | .env 4 | __pycache__ 5 | 6 | 7 | ## CLIENT APP 8 | 9 | # dependencies 10 | /app/node_modules 11 | /app/.pnp 12 | .pnp.js 13 | 14 | # testing 15 | /app/coverage 16 | 17 | # production 18 | /app/build 19 | 20 | # misc 21 | .DS_Store 22 | .env.local 23 | .env.development.local 24 | .env.test.local 25 | .env.production.local 26 | 27 | npm-debug.log* 28 | yarn-debug.log* 29 | yarn-error.log* 30 | 31 | # GITPOD 32 | .gitpod_logs 33 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | image: gitpod/workspace-full:2023-02-27-14-39-56 2 | tasks: 3 | - name: app-console 4 | before: | 5 | cd /workspace/langchain-flare-pdf-qa-demo/app 6 | npm install 7 | command: | 8 | cd /workspace/langchain-flare-pdf-qa-demo/app 9 | REACT_APP_API_BASE_URL=`gp url 8000` npm start 10 | - name: api-console 11 | before: | 12 | cd /workspace/langchain-flare-pdf-qa-demo 13 | mkdir .gitpod_logs 14 | # curl -Ls "https://dtsx.io/get-astra-cli" | bash | tee -a /workspace/langchain-flare-pdf-qa-demo/.gitpod_logs/astra-cli-install.log 15 | echo -e "\n\n** NOW YOU CAN RUN THE COMMAND scripts/prepare_and_launch.sh **" 16 | command: /workspace/langchain-flare-pdf-qa-demo/scripts/prepare_and_launch.sh | tee -a /workspace/langchain-flare-pdf-qa-demo/.gitpod_logs/dotenv-setup.log 17 | ports : 18 | - port: 3000 19 | onOpen: open-preview 20 | - port: 8000 21 | onOpen: ignore 22 | visibility: public 23 | github: 24 | prebuilds: 25 | master: true 26 | branches: true 27 | pullRequests: true 28 | pullRequestsFromForks: false 29 | addCheck: true 30 | addComment: false 31 | addBadge: true 32 | addLabel: false 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDF FLARE demo with Langchain and Cassandra as Vector Store 2 | 3 | ## What 4 | 5 | Ingest PDF files from their URL into an Astra DB vector store 6 | and run FLARE Question-Answering on them. 7 | 8 | Features: 9 | 10 | - Python API (CassIO, LangChain, FastAPI) + React client (Typescript) 11 | - per-user store of ingested documents 12 | - Other Q-A methods in comparison 13 | - Start-with-a-click on Gitpod 14 | 15 | For some architectural/flow diagrams, check out [this dir](images/diagrams). 16 | 17 | ## Prerequisites 18 | 19 | You need: 20 | 21 | - an [Astra](https://astra.datastax.com) Vector Database (free tier is fine!). **You'll be asked to supply a [Database Administrator token](https://awesome-astra.github.io/docs/pages/astra/create-token/#c-procedure)**, the string starting with `AstraCS:...`; 22 | - likewise, get your [Database ID](https://awesome-astra.github.io/docs/pages/astra/faq/#where-should-i-find-a-database-identifier) ready, you will have to enter it; 23 | - an **OpenAI API Key**. (More info [here](https://cassio.org/start_here/#llm-access), note that out-of-the-box this demo supports OpenAI unless you tinker with the code.) 24 | 25 |
Note: If you have switched Astra to the New Vector Developer Experience UI, click here for instructions on the DB credentials. 26 | 27 | 28 | Go to your database dashboard and click on the "Connection Details" button on the right. A dialog will open with instructions for connecting. You'll do two things: 29 | 30 | - click "Generate Token" and copy the `AstraCS:...` string in its entirety once that appears on the dialog; 31 | - locate the `api_endpoint=...` line in the Python code example. The database ID is the sequence after `https://` and before the dash + region name (e.g. `-us-east1`) in the definition of the endpoint. It looks like `01234567-89ab-cdef-0123-456789abcdef` (and has always this length). 32 | 33 | ![DB credentials in the Vector Developer Experience](images/new_vector_ui.png) 34 | 35 |
36 | 37 | 38 | ## How-to (Gitpod) 39 | 40 | Click this button, confirm opening of the workspace 41 | (you might need to do a Gitpod login in the process) and wait 1-2 minutes: 42 | instructions will show up in the console below, where you'll have 43 | to provide connection details and OpenAI key when prompted. 44 | 45 | In the meantime, the app will open in the top panel. 46 | 47 | 48 | 49 | ## How-to (local run) 50 | 51 | ### API 52 | 53 | Create a Python `3.8+` virtual environment and install 54 | the dependencies in `requirements.txt`. 55 | 56 | Make a copy `cp .env.template .env` and set the secrets for your DB and OpenAI. 57 | 58 | Finally enter the subdirectory and launch the API: 59 | 60 | ``` 61 | cd api 62 | uvicorn api:app 63 | ``` 64 | 65 | #### Use a Cassandra cluster 66 | 67 | To use a Cassandra cluster instead of Astra DB, check the `.env.template` file: 68 | uncomment the `USE_CASSANDRA_CLUSTER` environment variable in your `.env` 69 | and provide the necessary connection parameters (keyspace name, plus: 70 | contact points and/or authentication if required). 71 | 72 | The next time you start the API, it will attempt connecting to Cassandra. 73 | 74 | ### Client 75 | 76 | You need a modern Node.js. Enter the subdirectory and install the dependencies: 77 | 78 | ``` 79 | cd app 80 | npm install 81 | ``` 82 | 83 | If the API is running you can launch the client: 84 | 85 | ``` 86 | npm start 87 | ``` 88 | 89 | and point your browser to local port 3000. 90 | 91 | _(Note: if the API run elsewhere, you can launch `REACT_APP_API_BASE_URL="http://something..." npm start`.)_ 92 | 93 | #### User journey 94 | 95 | First, "log in" (mocked) with a made-up username. 96 | 97 | Then you access the panel. Go to the "Docs" panel, where you can load pdf files 98 | by entering their URL (click on the "i" icon to get example URLs to paste). 99 | 100 | You can "Ask questions", comparing different methods (FLARE/RAG/Plain LLM) and 101 | their answers. 102 | -------------------------------------------------------------------------------- /api/ai.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import tempfile 3 | import os 4 | from dotenv import load_dotenv 5 | from urllib import request 6 | 7 | from langchain.embeddings.openai import OpenAIEmbeddings 8 | from langchain.vectorstores import Cassandra 9 | from langchain.document_loaders import PyPDFLoader 10 | from langchain.text_splitter import RecursiveCharacterTextSplitter 11 | from langchain.chains import FlareChain 12 | from langchain.chat_models import ChatOpenAI 13 | from langchain.llms import OpenAI 14 | from langchain.indexes.vectorstore import VectorStoreIndexWrapper 15 | 16 | 17 | VECTOR_PDF_TABLE_NAME = "flare_doc_bank" 18 | 19 | load_dotenv("../.env") 20 | 21 | embeddingService = None 22 | chatModel = None 23 | llm = None 24 | 25 | 26 | def get_chat_model(): 27 | global chatModel 28 | if chatModel is None: 29 | chatModel = ChatOpenAI(temperature=0) 30 | return chatModel 31 | 32 | 33 | def get_llm(): 34 | global llm 35 | if llm is None: 36 | llm = OpenAI(temperature=0) 37 | return llm 38 | 39 | 40 | def get_flare_chain(chmodel, vstore): 41 | retriever = vstore.as_retriever() 42 | flareChain = FlareChain.from_llm( 43 | chmodel, 44 | retriever=retriever, 45 | max_generation_len=164, 46 | min_prob=0.3, 47 | ) 48 | return flareChain 49 | 50 | 51 | def get_embeddings(): 52 | global embeddingService 53 | if embeddingService is None: 54 | embeddingService = OpenAIEmbeddings() 55 | return embeddingService 56 | 57 | 58 | def get_rag_index(embeddings, user_id): 59 | vectorstore_u = get_vectorstore(embeddings, user_id=user_id) 60 | rag_index = VectorStoreIndexWrapper(vectorstore=vectorstore_u) 61 | return rag_index 62 | 63 | 64 | def get_vectorstore(embeddings, user_id=None): 65 | """ 66 | if user_id is None, 67 | we assume this is an init call: 68 | we require table provisioning (and pass a made-up user id) 69 | if user_id is passed: 70 | we spawn a no-provision instance set to that partition 71 | """ 72 | vectorStore = Cassandra( 73 | embedding=embeddings, 74 | table_name=VECTOR_PDF_TABLE_NAME, 75 | partition_id="placeholder" if user_id is None else user_id, 76 | partitioned=True, 77 | skip_provisioning=user_id is not None, 78 | ) 79 | return vectorStore 80 | 81 | # PDF loading machinery 82 | def _finalize_metadata(md_dict): 83 | return { 84 | k: v if k != "source" else os.path.split(v)[1] 85 | for k, v in md_dict.items() 86 | } 87 | 88 | 89 | def load_pdf_from_file(file_name, vector_store): 90 | try: 91 | print(f"Loading {file_name}") 92 | pdf_loader = PyPDFLoader(file_name) 93 | text_splitter = RecursiveCharacterTextSplitter( 94 | chunk_size=500, 95 | chunk_overlap=80, 96 | ) 97 | documents = [ 98 | doc 99 | for doc in pdf_loader.load_and_split(text_splitter=text_splitter) 100 | ] 101 | texts, metadatas0 = zip(*((doc.page_content, doc.metadata) for doc in documents)) 102 | # 103 | metadatas = [ 104 | _finalize_metadata(md) 105 | for md in metadatas0 106 | ] 107 | # 108 | vector_store.add_texts(texts=texts, metadatas=metadatas) 109 | print(f"Finished loading.") 110 | return len(documents) 111 | except Exception: 112 | return None 113 | 114 | def extract_file_title(file_url): 115 | try: 116 | pre, title = os.path.split(file_url) 117 | if "?" in title: 118 | return title.split("?")[0] 119 | else: 120 | return title 121 | except: 122 | return "unnamed.pdf" 123 | 124 | 125 | def load_pdf_from_url(file_url, vector_store): 126 | tmp_dir = tempfile.mkdtemp() 127 | try: 128 | file_title = extract_file_title(file_url) 129 | pdf_file_path = os.path.join(tmp_dir, file_title) 130 | request.urlretrieve(file_url, pdf_file_path) 131 | return load_pdf_from_file(pdf_file_path, vector_store), file_title 132 | except: 133 | return None, None 134 | finally: 135 | shutil.rmtree(tmp_dir) 136 | -------------------------------------------------------------------------------- /api/api.py: -------------------------------------------------------------------------------- 1 | # from typing import List 2 | 3 | from fastapi import FastAPI, Depends 4 | from pydantic import BaseModel 5 | 6 | from utils.localCORS import permitReactLocalhostClient 7 | from db import set_db_session 8 | from ai import ( 9 | get_embeddings, 10 | get_vectorstore, 11 | load_pdf_from_url, 12 | get_chat_model, 13 | get_flare_chain, 14 | get_llm, 15 | get_rag_index, 16 | ) 17 | from users import ( 18 | get_user_store, 19 | files_for_user, 20 | add_file_to_user, 21 | delete_file_from_user, 22 | ) 23 | 24 | set_db_session() 25 | embeddings = get_embeddings() 26 | chatmodel = get_chat_model() 27 | llm = get_llm() 28 | user_store = get_user_store() 29 | 30 | class ListFileRequest(BaseModel): 31 | user_id: str 32 | 33 | class LoadPDFRequest(BaseModel): 34 | user_id: str 35 | file_url: str 36 | 37 | class QuestionRequest(BaseModel): 38 | user_id: str 39 | question_id: str 40 | question: str 41 | 42 | class RemovePDFRequest(BaseModel): 43 | user_id: str 44 | file_name: str 45 | 46 | # app 47 | 48 | app = FastAPI() 49 | permitReactLocalhostClient(app) 50 | _ = get_vectorstore(embeddings) 51 | 52 | 53 | 54 | @app.post('/list_files') 55 | def list_files(payload: ListFileRequest): 56 | return files_for_user(user_store, payload.user_id) 57 | 58 | 59 | @app.post('/load_pdf_url') 60 | def load_pdf_url(payload: LoadPDFRequest): 61 | try: 62 | vectorstore_u = get_vectorstore(embeddings, user_id=payload.user_id) 63 | n_rows, file_name = load_pdf_from_url(payload.file_url, vectorstore_u) 64 | if n_rows is not None: 65 | add_file_to_user(user_store, payload.user_id, file_name, payload.file_url) 66 | return { 67 | "success": True, 68 | "n_rows": n_rows, 69 | } 70 | else: 71 | return { 72 | "success": False, 73 | } 74 | except Exception: 75 | return { 76 | "success": False, 77 | } 78 | 79 | 80 | @app.post('/remove_pdf') 81 | def remove_pdf(payload: RemovePDFRequest): 82 | try: 83 | vectorstore_u = get_vectorstore(embeddings, user_id=payload.user_id) 84 | num_deleted = vectorstore_u.vector_table.find_and_delete_entries(metadata={"source": payload.file_name}) 85 | delete_file_from_user(user_store, payload.user_id, payload.file_name) 86 | return { 87 | "success": True, 88 | "num_deleted": num_deleted, 89 | } 90 | except Exception: 91 | return { 92 | "success": False, 93 | "num_deleted": None, 94 | } 95 | 96 | 97 | @app.post('/flare_ask') 98 | def flare_ask(payload: QuestionRequest): 99 | try: 100 | vectorstore_u = get_vectorstore(embeddings, user_id=payload.user_id) 101 | flarechain_u = get_flare_chain(chatmodel, vectorstore_u) 102 | result = flarechain_u.run(payload.question) 103 | return { 104 | "question_id": payload.question_id, 105 | "success": True, 106 | "answer": result, 107 | } 108 | except Exception as e: 109 | return { 110 | "question_id": payload.question_id, 111 | "success": False, 112 | "error": str(e), 113 | } 114 | 115 | 116 | @app.post('/rag_ask') 117 | def llm_ask(payload: QuestionRequest): 118 | try: 119 | rag_index = get_rag_index(embeddings, user_id=payload.user_id) 120 | result = rag_index.query(payload.question, llm=llm).strip() 121 | return { 122 | "question_id": payload.question_id, 123 | "success": True, 124 | "answer": result, 125 | } 126 | except Exception as e: 127 | return { 128 | "question_id": payload.question_id, 129 | "success": False, 130 | "error": str(e), 131 | } 132 | 133 | 134 | @app.post('/llm_ask') 135 | def llm_ask(payload: QuestionRequest): 136 | try: 137 | result = llm(payload.question).strip() 138 | return { 139 | "question_id": payload.question_id, 140 | "success": True, 141 | "answer": result, 142 | } 143 | except Exception as e: 144 | return { 145 | "question_id": payload.question_id, 146 | "success": False, 147 | "error": str(e), 148 | } 149 | -------------------------------------------------------------------------------- /api/db.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | 4 | import cassio 5 | 6 | from cassandra.cluster import ( 7 | Cluster, 8 | ) 9 | from cassandra.auth import PlainTextAuthProvider 10 | 11 | 12 | load_dotenv("../.env") 13 | 14 | def set_db_session(): 15 | global dbSession 16 | # A separate route for a Cassandra cluster session 17 | use_cassandra = int(os.environ.get("USE_CASSANDRA_CLUSTER", "0")) 18 | if use_cassandra != 0: 19 | set_cassandra_session_keyspace() 20 | else: 21 | cassio.init( 22 | token=os.environ["ASTRA_DB_APPLICATION_TOKEN"], 23 | database_id=os.environ["ASTRA_DB_ID"], 24 | keyspace=os.environ.get("ASTRA_DB_KEYSPACE"), 25 | ) 26 | 27 | 28 | def set_cassandra_session_keyspace(): 29 | contact_points = [ 30 | cp.strip() 31 | for cp in os.environ.get("CASSANDRA_CONTACT_POINTS", "").split(',') 32 | if cp.strip() 33 | ] 34 | CASSANDRA_KEYSPACE = os.environ["CASSANDRA_KEYSPACE"] 35 | CASSANDRA_USERNAME = os.environ.get("CASSANDRA_USERNAME") 36 | CASSANDRA_PASSWORD = os.environ.get("CASSANDRA_PASSWORD") 37 | # 38 | if CASSANDRA_USERNAME and CASSANDRA_PASSWORD: 39 | auth_provider = PlainTextAuthProvider( 40 | CASSANDRA_USERNAME, 41 | CASSANDRA_PASSWORD, 42 | ) 43 | else: 44 | auth_provider = None 45 | 46 | c_cluster = Cluster(contact_points if contact_points else None, auth_provider=auth_provider) 47 | session = c_cluster.connect() 48 | print("Cassandra session created.") 49 | # 50 | cassio.init( 51 | session=session, 52 | keyspace=CASSANDRA_KEYSPACE, 53 | ) 54 | 55 | -------------------------------------------------------------------------------- /api/load_pdf_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | from db import set_db_session 5 | from ai import ( 6 | get_embeddings, 7 | get_vectorstore, 8 | load_pdf_from_file, 9 | ) 10 | from users import ( 11 | get_user_store, 12 | add_file_to_user, 13 | ) 14 | 15 | 16 | if __name__ == '__main__': 17 | user_id = sys.argv[1] 18 | pdf_filepaths = sys.argv[2:] 19 | print(f"Trying to import {', '.join(pdf_filepaths)} as user '{user_id}' ...") 20 | # 21 | set_db_session() 22 | embeddings = get_embeddings() 23 | user_store = get_user_store() 24 | vectorstore_u = get_vectorstore(embeddings, user_id=user_id) 25 | print("DB Connection established.") 26 | # 27 | for pdf_filepath in pdf_filepaths: 28 | _, file_title = os.path.split(pdf_filepath) 29 | print(f"* Starting {file_title} ...") 30 | n_rows = load_pdf_from_file(pdf_filepath, vectorstore_u) 31 | if n_rows is not None: 32 | add_file_to_user(user_store, user_id, file_title, "#") 33 | print(f"* Success ({n_rows} rows inserted).") 34 | else: 35 | print(f"* Errored/nothing inserted.") 36 | print("\nFinished.") 37 | -------------------------------------------------------------------------------- /api/requirements.txt: -------------------------------------------------------------------------------- 1 | cassio>=0.1.3 2 | 3 | # langchain>=0.0.309 4 | git+https://github.com/hemidactylus/langchain@updated-full-preview-remove-shims#egg=langchain&subdirectory=libs/langchain 5 | 6 | fastapi==0.99.1 7 | openai==0.27.8 8 | pypdf==3.12.0 9 | python-dotenv==1.0.0 10 | tiktoken==0.4.0 11 | uvicorn==0.22.0 12 | -------------------------------------------------------------------------------- /api/users.py: -------------------------------------------------------------------------------- 1 | # Cassio interaction with the DB 2 | import json 3 | 4 | from cassio.table import ClusteredCassandraTable 5 | 6 | USER_TABLE_NAME = "flare_users" 7 | 8 | userStore = None 9 | 10 | def get_user_store(): 11 | global userStore 12 | if userStore is None: 13 | userStore = ClusteredCassandraTable( 14 | table=USER_TABLE_NAME, 15 | primary_key_type=["TEXT", "TEXT"], 16 | ordering_in_partition="ASC", 17 | ) 18 | return userStore 19 | 20 | def files_for_user(user_store, user_id): 21 | return [ 22 | json.loads(row["body_blob"]) 23 | for row in user_store.get_partition( 24 | partition_id=user_id, 25 | ) 26 | ] 27 | 28 | def add_file_to_user(user_store, user_id, file_name, file_url): 29 | blob = json.dumps({"name": file_name, "url": file_url}) 30 | user_store.put( 31 | partition_id=user_id, 32 | row_id=file_name, 33 | body_blob=blob, 34 | ) 35 | 36 | def delete_file_from_user(user_store, user_id, file_name): 37 | user_store.delete( 38 | partition_id=user_id, 39 | row_id=file_name, 40 | ) 41 | -------------------------------------------------------------------------------- /api/utils/localCORS.py: -------------------------------------------------------------------------------- 1 | # Demo-mode to enable React client to axios request an API (both on localhost) 2 | # Not suitable for production. 3 | from fastapi.middleware.cors import CORSMiddleware 4 | 5 | 6 | def permitReactLocalhostClient(app): 7 | app.add_middleware( 8 | CORSMiddleware, 9 | # This is to avoid CORS issues while on gitpod. Don't do in production. 10 | allow_origins=['*'], 11 | # Prefer individual source domains, such as: 12 | # allow_origins=['http://localhost:3000'], 13 | allow_credentials=True, 14 | allow_methods=["*"], 15 | allow_headers=["*"], 16 | ) 17 | -------------------------------------------------------------------------------- /app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "app", 3 | "version": "0.1.0", 4 | "private": true, 5 | "dependencies": { 6 | "@testing-library/jest-dom": "^5.17.0", 7 | "@testing-library/react": "^13.4.0", 8 | "@testing-library/user-event": "^13.5.0", 9 | "@types/jest": "^27.5.2", 10 | "@types/node": "^16.18.39", 11 | "@types/react": "^18.2.16", 12 | "@types/react-dom": "^18.2.7", 13 | "axios": "^1.4.0", 14 | "react": "^18.2.0", 15 | "react-dom": "^18.2.0", 16 | "react-hook-form": "^7.45.4", 17 | "react-router-dom": "^6.14.2", 18 | "react-scripts": "5.0.1", 19 | "typescript": "^4.9.5", 20 | "uuid": "^9.0.0", 21 | "web-vitals": "^2.1.4" 22 | }, 23 | "scripts": { 24 | "start": "react-scripts start", 25 | "build": "react-scripts build", 26 | "test": "react-scripts test", 27 | "eject": "react-scripts eject" 28 | }, 29 | "eslintConfig": { 30 | "extends": [ 31 | "react-app", 32 | "react-app/jest" 33 | ] 34 | }, 35 | "browserslist": { 36 | "production": [ 37 | ">0.2%", 38 | "not dead", 39 | "not op_mini all" 40 | ], 41 | "development": [ 42 | "last 1 chrome version", 43 | "last 1 firefox version", 44 | "last 1 safari version" 45 | ] 46 | }, 47 | "devDependencies": { 48 | "@types/uuid": "^9.0.2" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /app/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/app/public/favicon.ico -------------------------------------------------------------------------------- /app/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 12 | 13 | 17 | 18 | 27 | PDF FLARE demo 28 | 29 | 30 | 31 |
32 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /app/public/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "short_name": "React App", 3 | "name": "Create React App Sample", 4 | "icons": [ 5 | { 6 | "src": "favicon.ico", 7 | "sizes": "64x64 32x32 24x24 16x16", 8 | "type": "image/x-icon" 9 | }, 10 | { 11 | "src": "logo192.png", 12 | "type": "image/png", 13 | "sizes": "192x192" 14 | }, 15 | { 16 | "src": "logo512.png", 17 | "type": "image/png", 18 | "sizes": "512x512" 19 | } 20 | ], 21 | "start_url": ".", 22 | "display": "standalone", 23 | "theme_color": "#000000", 24 | "background_color": "#ffffff" 25 | } 26 | -------------------------------------------------------------------------------- /app/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /app/public/squid.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/app/public/squid.jpg -------------------------------------------------------------------------------- /app/src/components/AddFileForm.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | import { useState } from "react" 3 | import { useForm } from "react-hook-form"; 4 | 5 | import {UserDesc, FileURLSubmission} from "../interfaces/interfaces"; 6 | import {RequestStatus} from "../interfaces/enums"; 7 | import {submit_url_to_load} from "../utils/api"; 8 | 9 | const AddFileForm = (props: UserDesc & {refreshFiles: () => void}) => { 10 | 11 | const {userId, refreshFiles} = props; 12 | 13 | const {register, handleSubmit} = useForm(); 14 | 15 | const [submitState, setSubmitState] = useState("initialized"); 16 | 17 | const [showExampleUrls, setShowExampleUrls] = useState(false); 18 | 19 | const onSubmitHandler = (values: FileURLSubmission) => { 20 | if (values.fileURL) { 21 | setSubmitState("in_flight"); 22 | console.log(`AddFileForm submitted, with ${values.fileURL}.`); 23 | submit_url_to_load( 24 | userId || "", 25 | values.fileURL, 26 | (response: any) => { 27 | console.log(`Gotten: ${JSON.stringify(response)}`); 28 | if (response.success){ 29 | setSubmitState("completed"); 30 | console.log(`Written ${response.n_rows} rows to vector table.`); 31 | refreshFiles(); 32 | }else{ 33 | console.log("Something went wrong loading the file"); 34 | setSubmitState("errored"); 35 | } 36 | }, 37 | (e: any) => {console.log(e); setSubmitState("errored");} 38 | ); 39 | } else { 40 | console.log(`AddFileForm submitted but EMPTY INPUT`); 41 | } 42 | }; 43 | 44 | const toggleExampleUrls = () => { 45 | setShowExampleUrls( (v) => !v ); 46 | } 47 | 48 | if (submitState === "initialized" || submitState === "errored" || submitState === "completed"){ 49 | return ( 50 |
51 | { (submitState === "errored") && 52 |
53 | Submission errored! 54 |
55 | } 56 |
57 |
58 | 61 | 62 | 63 |
64 | { ( showExampleUrls && 65 |
66 | Example submissions: 67 |
    68 |
  • https://github.com/CassioML/langchain-flare-pdf-qa-demo/blob/main/sources/nausea.pdf?raw=true
  • 69 |
  • https://github.com/CassioML/langchain-flare-pdf-qa-demo/blob/main/sources/the_hobbit.pdf?raw=true
  • 70 |
  • https://arxiv.org/pdf/1311.3081.pdf
  • 71 |
72 |
) 73 | } 74 |
75 |
76 | ); 77 | } else if (submitState === "in_flight"){ 78 | return

file submitted...

79 | } else { 80 | return

(trouble with submission form)

81 | } 82 | } 83 | 84 | export default AddFileForm 85 | -------------------------------------------------------------------------------- /app/src/components/App.css: -------------------------------------------------------------------------------- 1 | .App { 2 | background-color: #6599cc; 3 | font-family: Courier; 4 | min-height: 100vh; 5 | } 6 | 7 | .App-header { 8 | display: flex; 9 | flex-direction: column; 10 | font-size: 120%; 11 | font-weight: bold; 12 | } 13 | 14 | .App-identity { 15 | display: flex; 16 | flex-direction: column; 17 | padding-left: 40px; 18 | color: #12386f; 19 | } 20 | 21 | .App-navbar { 22 | padding-left: 10px; 23 | } 24 | 25 | .App-navbar span { 26 | font-size: 120%; 27 | font-weight: bold; 28 | margin: 1vh; 29 | cursor: pointer; 30 | color: #A0FFFF; 31 | } 32 | 33 | .linkUrl:visited { 34 | color: #A0FFFF; 35 | } 36 | 37 | .linkUrl { 38 | color: #A0FFFF; 39 | } 40 | 41 | .linkUrl:hover { 42 | color: #12386f; 43 | } 44 | 45 | .App-body { 46 | margin-top: 20px; 47 | padding-left: 30px; 48 | padding-right: 30px; 49 | color: #12386f; 50 | } 51 | 52 | .userName { 53 | color: black; 54 | font-weight: bold; 55 | } 56 | 57 | .inlineInput { 58 | width: 18vh; 59 | margin-left: 15px; 60 | padding-left: 10px; 61 | font-size: 100%; 62 | background-color: transparent; 63 | border: 2px solid #A0FFFF; 64 | border-radius: 10px; 65 | color: black; 66 | font-weight: bold; 67 | font-family: Courier; 68 | } 69 | 70 | .inlineInputLong { 71 | width: 50%; 72 | margin-left: 15px; 73 | padding-left: 10px; 74 | font-size: 100%; 75 | background-color: transparent; 76 | border: 2px solid #A0FFFF; 77 | border-radius: 10px; 78 | color: black; 79 | font-weight: bold; 80 | font-family: Courier; 81 | } 82 | 83 | .inlineButton { 84 | margin-left: 25px; 85 | font-size: 100%; 86 | background-color: transparent; 87 | border: 2px solid #A0FFFF; 88 | border-radius: 10px; 89 | color: #12386f; 90 | cursor: pointer; 91 | } 92 | 93 | .headerSubtitle { 94 | color: #90DFDF; 95 | margin-top: 0px; 96 | font-size: 60%; 97 | font-style: italic; 98 | } 99 | 100 | .urlExamples { 101 | margin: 3vh 3vh; 102 | border-radius: 12px; 103 | padding: 8px; 104 | border: 2px solid #12386f; 105 | font-size: 80%; 106 | color: #A0FFFF; 107 | } 108 | 109 | .urlExamples .urlExample { 110 | font-weight: bold; 111 | } 112 | 113 | ul.fileList { 114 | list-style-type: none; 115 | } 116 | 117 | .fileList > li { 118 | margin-left: 3vh; 119 | font-weight: bold; 120 | margin-top: 10px; 121 | } 122 | 123 | .App-link { 124 | color: #61dafb; 125 | } 126 | 127 | .questionBlock { 128 | margin: 1vh 3vh; 129 | border-radius: 12px; 130 | padding: 12px; 131 | border: 2px solid purple; 132 | font-weight: bold; 133 | } 134 | 135 | .questionBlock > .questionBody { 136 | color: #A0FFFF; 137 | font-size: 80%; 138 | } 139 | 140 | .questionBlock .QAMode { 141 | margin-left: 2vh; 142 | padding: 5px; 143 | color: #12386f; 144 | background-color: #A0FFFF; 145 | border-radius: 10px; 146 | border: 1px solid black; 147 | } 148 | 149 | .questionBlock > .answerBody { 150 | color: #12386f; 151 | } 152 | 153 | .qaMode { 154 | font-variant: small-caps; 155 | font-weight: bold; 156 | margin-left: 6px; 157 | margin-right: 6px; 158 | padding-left: 4px; 159 | padding-right: 4px; 160 | border-radius: 5px; 161 | cursor: pointer; 162 | } 163 | 164 | .selected { 165 | background-color: #A0FFFF; 166 | border: 1px solid #12386f; 167 | color: #12386f; 168 | } 169 | 170 | .unselected { 171 | color: #70BFBF; 172 | } 173 | 174 | hr.fancy { 175 | border: 5px solid #4579ac; 176 | } 177 | 178 | .homeImage { 179 | display: block; 180 | margin-left: auto; 181 | margin-right: auto; 182 | width: 40%; 183 | margin-top: 5vh; 184 | } 185 | 186 | .slideTitle { 187 | display:table; 188 | margin:0 auto; 189 | font-weight: bold; 190 | font-size: 140%; 191 | color: #12386f; 192 | margin-bottom: 3vh; 193 | } 194 | 195 | .slideImage { 196 | display: block; 197 | margin-left: auto; 198 | margin-right: auto; 199 | border-radius: 10px; 200 | max-width:100%; 201 | max-height:75vh; 202 | } 203 | -------------------------------------------------------------------------------- /app/src/components/App.tsx: -------------------------------------------------------------------------------- 1 | // import React from 'react'; 2 | import './App.css'; 3 | 4 | import Identity from './Identity'; 5 | import SiteContents from './SiteContents'; 6 | import {SitePage} from "../interfaces/enums"; 7 | 8 | import { useState } from "react" 9 | 10 | function App() { 11 | 12 | const [userId, setUserId] = useState(); 13 | const [page, setPage] = useState("home"); 14 | 15 | return ( 16 |
17 |
18 | 23 |
24 |
25 |
26 | 31 |
32 |
33 | ); 34 | } 35 | 36 | export default App; 37 | -------------------------------------------------------------------------------- /app/src/components/AskQuestionForm.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | import { useState } from "react" 3 | import {v4 as uuidv4} from 'uuid'; 4 | import { useForm } from "react-hook-form"; 5 | 6 | import {UserDesc, QuestionSubmission} from "../interfaces/interfaces"; 7 | import {QAMode} from "../interfaces/enums"; 8 | import {submit_question} from "../utils/api"; 9 | 10 | const AskQuestionForm = (props: UserDesc & {completeQuestion: (q_id: string, answer: string|undefined) => void , addQuestion: (q_id: string, qaMode: QAMode, question: string) => void;}) => { 11 | 12 | const {userId, addQuestion, completeQuestion} = props; 13 | 14 | const {register, handleSubmit, reset} = useForm(); 15 | 16 | const [qaMode, setQaMode] = useState("FLARE"); 17 | 18 | const onSubmitHandler = (values: QuestionSubmission) => { 19 | if (values.question) { 20 | const q_id = uuidv4(); 21 | console.log(`AskQuestionForm submitted[${q_id}], with ${values.question}.`); 22 | reset(); 23 | addQuestion(q_id, qaMode, values.question); 24 | submit_question( 25 | qaMode, 26 | userId || "", 27 | q_id, 28 | values.question, 29 | (response: any) => { 30 | console.log(`Gotten: ${JSON.stringify(response)}`); 31 | if (response.success){ 32 | console.log(`Answer to ${q_id}: ${response.answer}`); 33 | completeQuestion(q_id, response.answer); 34 | }else{ 35 | console.log(`Failed answer to ${q_id}`); 36 | completeQuestion(q_id, "(Failure!)"); 37 | } 38 | }, 39 | (e: any) => { 40 | console.log(e); 41 | completeQuestion(q_id, "(Failure!)"); 42 | } 43 | ); 44 | } else { 45 | console.log(`AskQuestionForm submitted but EMPTY INPUT`); 46 | } 47 | }; 48 | 49 | return ( 50 |
51 |
52 | QA mode: 53 | setQaMode("FLARE")}> 54 | flare 55 | 56 | setQaMode("RAG")}> 57 | rag 58 | 59 | setQaMode("SIMPLE")}> 60 | simple 61 | 62 |
63 |
64 |
65 | 66 | 67 | 68 |
69 |
70 |
71 | ); 72 | 73 | } 74 | 75 | export default AskQuestionForm 76 | -------------------------------------------------------------------------------- /app/src/components/Docs.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | import { useEffect, useState } from "react" 3 | 4 | import {UserDesc, FileItem} from "../interfaces/interfaces"; 5 | import {RequestStatus} from "../interfaces/enums"; 6 | import {get_loaded_files, remove_file} from "../utils/api"; 7 | 8 | import AddFileForm from "./AddFileForm"; 9 | 10 | const Docs = (props: UserDesc) => { 11 | 12 | const {userId} = props; 13 | 14 | const [queryState, setQueryState] = useState("initialized"); 15 | const [fileList, setFileList] = useState([]); 16 | 17 | const refreshFiles = () => { 18 | setQueryState("in_flight"); 19 | get_loaded_files( 20 | userId || "", 21 | (r: FileItem[]) => { 22 | setFileList(r); 23 | setQueryState("completed"); 24 | }, 25 | (e: any) => {console.log(e); setQueryState("errored");} 26 | ); 27 | } 28 | 29 | const removeFile = (file_name: string) => { 30 | console.log(`Removing ${file_name}`); 31 | setQueryState("in_flight"); 32 | remove_file( 33 | userId || "", 34 | file_name, 35 | (r: any) => { 36 | console.log(`Removed ${r.num_deleted} entries.`); 37 | refreshFiles(); 38 | }, 39 | (e: any) => {console.log(e); setQueryState("errored");} 40 | ); 41 | } 42 | 43 | useEffect( 44 | refreshFiles, 45 | [userId] 46 | ); 47 | 48 | return ( 49 |
50 | { (queryState === "initialized") && 51 |

(nothing to see here)

52 | } 53 | { (queryState === "in_flight") && 54 |

wait...

55 | } 56 | { (queryState === "completed") && 57 |
{userId}'s docs 58 |
    59 | { fileList.map( (f: FileItem) =>
  • 60 | {f.name} (source) 61 | 62 |
  • ) } 63 |
64 |
65 | 66 |
67 | } 68 | { (queryState === "errored") && 69 |

Error fetching docs

70 | } 71 |
72 | ); 73 | } 74 | 75 | export default Docs 76 | -------------------------------------------------------------------------------- /app/src/components/Home.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | import {UserDesc} from "../interfaces/interfaces"; 3 | 4 | const Home = (props: UserDesc) => { 5 | 6 | const {userId} = props; 7 | 8 | return ( 9 |
10 |

Welcome, {userId}.

11 |

This demo is about:

12 |
    13 |
  • LangChain's FLARE question-answering
  • 14 |
  • Ingestion of PDF documents
  • 15 |
  • Astra as a vector store, partitioned per-user
  • 16 |
  • API: Python (LangChain, CassIO, FastAPI)
  • 17 |
  • Client: React/Typescript
  • 18 |
19 |

Enjoy!

20 |
21 | ); 22 | } 23 | 24 | export default Home 25 | -------------------------------------------------------------------------------- /app/src/components/Identity.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react" 2 | import { Dispatch, SetStateAction } from "react"; 3 | 4 | import './App.css'; 5 | 6 | 7 | export interface UserProps { 8 | userId: string|undefined; 9 | setUserId: Dispatch>; 10 | } 11 | 12 | 13 | const Identity = (props: {userId: any, setUserId: any, setPage: any}) => { 14 | 15 | const {userId, setUserId, setPage} = props; 16 | 17 | const [editUserId, setEditUserId] = useState(''); 18 | 19 | 20 | const trySetUserId = (newUserId: string) => { 21 | if(newUserId){ 22 | setUserId(newUserId); 23 | setPage("home"); 24 | } 25 | } 26 | 27 | return ( <> 28 |
29 | { !userId &&
30 |

31 | PDF FLARE demo - Who are you? 32 | setEditUserId(e.target.value)} 38 | onKeyPress={(e) => {if (e.key === 'Enter') { trySetUserId(editUserId) }}} 39 | /> 40 | 46 |

47 |
} 48 | { userId &&
49 |

50 | PDF FLARE demo - Welcome, {userId} 51 | 52 | 60 | 61 |

62 | 63 |
} 64 |
65 | ); 66 | } 67 | 68 | export default Identity 69 | -------------------------------------------------------------------------------- /app/src/components/Query.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | import { useState } from "react" 3 | import {UserDesc, QuestionAndAnswer} from "../interfaces/interfaces"; 4 | 5 | import {QAMode} from "../interfaces/enums"; 6 | import AskQuestionForm from "./AskQuestionForm"; 7 | 8 | const Query = (props: UserDesc) => { 9 | 10 | const {userId} = props; 11 | 12 | const [history, setHistory] = useState([]); 13 | 14 | const completeQuestion = (q_id: string, answer: string | undefined) => { 15 | console.log(`completing ${q_id} with ${answer}`); 16 | setHistory( (h) => h.map( q => { 17 | if (q.question_id === q_id){ 18 | return {...q, ...{answer: answer}}; 19 | }else{ 20 | return q; 21 | } 22 | })); 23 | } 24 | const addQuestion = (q_id: string, qaMode: QAMode, question: string) => { 25 | console.log(`adding ${q_id}: ${question}`); 26 | setHistory( (h) => h.concat( [{ 27 | question_id: q_id, 28 | question: question, 29 | answer: undefined, 30 | qa_mode: qaMode, 31 | }] )); 32 | } 33 | 34 | return ( 35 |
36 | 37 | { ( (history.length > 0) && <> 38 |

Question history:

39 | { history.slice().reverse().map( q => 40 |
41 |

42 | {q.question} 43 | {q.qa_mode} 44 |

45 |

{q.answer === undefined ? "⌛" : q.answer || "(no answer)"}

46 |
47 | ) } 48 | ) } 49 |
50 | ); 51 | } 52 | 53 | export default Query 54 | -------------------------------------------------------------------------------- /app/src/components/SiteContents.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | import {UserDesc} from "../interfaces/interfaces"; 3 | 4 | import Home from "./Home"; 5 | import Docs from "./Docs"; 6 | import Query from "./Query"; 7 | import Slides from "./Slides"; 8 | 9 | const SiteContents = (props: UserDesc & {page: any, setPage: any}) => { 10 | 11 | const {userId, page, setPage} = props; 12 | 13 | return ( 14 |
15 | { userId && <> 16 |
17 | setPage("home")}>Home 18 | | 19 | setPage("docs")}>My docs 20 | | 21 | setPage("ask")}>Ask questions 22 | | 23 | setPage("slides")}>Info 24 |
25 |
26 | { (page === "home" && <> 27 | 28 | )} 29 | { (page === "docs" && <> 30 | 31 | )} 32 | { (page === "ask" && <> 33 | 34 | )} 35 | { (page === "slides" && <> 36 | 37 | )} 38 |
39 | } 40 | { userId === undefined && <> 41 | 42 | } 43 |
44 | ); 45 | } 46 | 47 | export default SiteContents 48 | -------------------------------------------------------------------------------- /app/src/components/Slides.tsx: -------------------------------------------------------------------------------- 1 | import './App.css'; 2 | 3 | import { useState } from "react" 4 | 5 | const slides = [ 6 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_arch1.png", 7 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_arch_write.png", 8 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_arch_ask.png", 9 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_full.png", 10 | ]; 11 | const titles = [ 12 | "Tech stack", 13 | "File ingestion", 14 | "Question-time flow", 15 | "FLARE at a glance", 16 | ] 17 | 18 | const Slides = () => { 19 | 20 | const [slide, setSlide] = useState(0); 21 | 22 | return ( 23 |
24 | {titles[slide]} ({slide+1}/{slides.length}) 25 | setSlide(s => (s+1) % slides.length) } 27 | className="slideImage" 28 | src={slides[slide]} 29 | /> 30 |
31 | ); 32 | } 33 | 34 | export default Slides 35 | -------------------------------------------------------------------------------- /app/src/index.css: -------------------------------------------------------------------------------- 1 | body { 2 | margin: 0; 3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', 5 | sans-serif; 6 | -webkit-font-smoothing: antialiased; 7 | -moz-osx-font-smoothing: grayscale; 8 | } 9 | 10 | code { 11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', 12 | monospace; 13 | } 14 | -------------------------------------------------------------------------------- /app/src/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import './index.css'; 4 | import App from './components/App'; 5 | import reportWebVitals from './reportWebVitals'; 6 | 7 | const root = ReactDOM.createRoot( 8 | document.getElementById('root') as HTMLElement 9 | ); 10 | root.render( 11 | 12 | 13 | 14 | ); 15 | 16 | // If you want to start measuring performance in your app, pass a function 17 | // to log results (for example: reportWebVitals(console.log)) 18 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals 19 | reportWebVitals(); 20 | -------------------------------------------------------------------------------- /app/src/interfaces/enums.ts: -------------------------------------------------------------------------------- 1 | export type RequestStatus = "initialized" | "in_flight" | "completed" | "errored" 2 | 3 | export type SitePage = "home" | "docs" | "ask" | "slides" 4 | 5 | export type QAMode = "FLARE" | "RAG" | "SIMPLE" 6 | -------------------------------------------------------------------------------- /app/src/interfaces/interfaces.ts: -------------------------------------------------------------------------------- 1 | import { Dispatch, SetStateAction } from "react"; 2 | 3 | import {QAMode} from "../interfaces/enums"; 4 | 5 | export interface UserDesc { 6 | userId: string|undefined; 7 | } 8 | 9 | export interface FileURLSubmission { 10 | fileURL: string; 11 | } 12 | 13 | export interface QuestionSubmission { 14 | question: string; 15 | } 16 | 17 | export interface QuestionAndAnswer { 18 | question: string; 19 | answer: string | undefined; 20 | question_id: string; 21 | qa_mode: QAMode; 22 | } 23 | 24 | export interface FileItem { 25 | name: string; 26 | url: string; 27 | } -------------------------------------------------------------------------------- /app/src/react-app-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /app/src/reportWebVitals.ts: -------------------------------------------------------------------------------- 1 | import { ReportHandler } from 'web-vitals'; 2 | 3 | const reportWebVitals = (onPerfEntry?: ReportHandler) => { 4 | if (onPerfEntry && onPerfEntry instanceof Function) { 5 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { 6 | getCLS(onPerfEntry); 7 | getFID(onPerfEntry); 8 | getFCP(onPerfEntry); 9 | getLCP(onPerfEntry); 10 | getTTFB(onPerfEntry); 11 | }); 12 | } 13 | }; 14 | 15 | export default reportWebVitals; 16 | -------------------------------------------------------------------------------- /app/src/utils/api.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | 3 | import {QAMode} from "../interfaces/enums"; 4 | 5 | const base_url: string = process.env["REACT_APP_API_BASE_URL"] || "http://127.0.0.1:8000"; 6 | 7 | export const get_loaded_files = (userId: string, callback: any, error_callback: any) => { 8 | axios.post( 9 | `${base_url}/list_files`, 10 | {user_id: userId} 11 | ) 12 | .then((response: any) => { 13 | callback(response.data); 14 | }) 15 | .catch((error: any) => { 16 | if(error_callback){ 17 | error_callback(error); 18 | } 19 | }); 20 | } 21 | 22 | export const remove_file = (userId: string, file_name: string, callback: any, error_callback: any) => { 23 | axios.post( 24 | `${base_url}/remove_pdf`, 25 | {user_id: userId, file_name: file_name} 26 | ) 27 | .then((response: any) => { 28 | callback(response.data); 29 | }) 30 | .catch((error: any) => { 31 | if(error_callback){ 32 | error_callback(error); 33 | } 34 | }); 35 | } 36 | 37 | 38 | export const submit_url_to_load = (userId: string, fileURL: string, callback: any, error_callback: any) => { 39 | axios.post( 40 | `${base_url}/load_pdf_url`, 41 | { 42 | user_id: userId, 43 | file_url: fileURL, 44 | } 45 | ) 46 | .then((response: any) => { 47 | callback(response.data); 48 | }) 49 | .catch((error: any) => { 50 | if(error_callback){ 51 | error_callback(error); 52 | } 53 | }); 54 | } 55 | 56 | export const submit_question = (qaMode: QAMode, userId: string, question_id: string, question: string, callback: any, error_callback: any) => { 57 | let endpoint: string 58 | if (qaMode === "FLARE") { 59 | endpoint = "flare_ask"; 60 | } else if (qaMode === "RAG") { 61 | endpoint = "rag_ask"; 62 | } else{ // qaMode === "SIMPLE" 63 | endpoint = "llm_ask"; 64 | } 65 | // 66 | axios.post( 67 | `${base_url}/${endpoint}`, 68 | { 69 | user_id: userId, 70 | question_id: question_id, 71 | question: question, 72 | } 73 | ) 74 | .then((response: any) => { 75 | callback(response.data); 76 | }) 77 | .catch((error: any) => { 78 | if(error_callback){ 79 | error_callback(error); 80 | } 81 | }); 82 | } 83 | -------------------------------------------------------------------------------- /app/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": [ 5 | "dom", 6 | "dom.iterable", 7 | "esnext" 8 | ], 9 | "allowJs": true, 10 | "skipLibCheck": true, 11 | "esModuleInterop": true, 12 | "allowSyntheticDefaultImports": true, 13 | "strict": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "noFallthroughCasesInSwitch": true, 16 | "module": "esnext", 17 | "moduleResolution": "node", 18 | "resolveJsonModule": true, 19 | "isolatedModules": true, 20 | "noEmit": true, 21 | "jsx": "react-jsx" 22 | }, 23 | "include": [ 24 | "src" 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /astra.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "PDF FLARE demo with Langchain and Cassandra as Vector Store", 3 | "description": "Ingest PDF files from their URL into an Astra DB vector store and run FLARE Question-Answering on them. (1) Python API (CassIO, LangChain, FastAPI) + React client (Typescript); (2) per-user store of ingested documents; (3) Other Q-A methods in comparison; (4) Start-with-a-click on Gitpod", 4 | "duration": "20 minutes", 5 | "skillLevel": "Intermediate", 6 | "language":["javascript", "python"], 7 | "stack":["cassio", "python", "cassandra", "react", "typescript", "fastAPI"], 8 | "githubUrl": "https://github.com/cassioml/langchain-flare-pdf-qa-demo", 9 | "gitpodUrl": "https://gitpod.io/#https://github.com/cassioml/langchain-flare-pdf-qa-demo", 10 | "tags": [ 11 | { "name":"cassandra" }, 12 | { "name":"astradb" }, 13 | { "name":"vector" }, 14 | { "name":"vectordb" }, 15 | { "name":"cassio" }, 16 | { "name":"flare" } 17 | ], 18 | "category": "starters", 19 | "usecases": ["GenAI", "Question answering", "LLM", "Chatbots"] 20 | } 21 | -------------------------------------------------------------------------------- /images/diagrams/flare_arch1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/images/diagrams/flare_arch1.png -------------------------------------------------------------------------------- /images/diagrams/flare_arch_ask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/images/diagrams/flare_arch_ask.png -------------------------------------------------------------------------------- /images/diagrams/flare_arch_write.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/images/diagrams/flare_arch_write.png -------------------------------------------------------------------------------- /images/diagrams/flare_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/images/diagrams/flare_full.png -------------------------------------------------------------------------------- /images/new_vector_ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/images/new_vector_ui.png -------------------------------------------------------------------------------- /images/open_in_gitpod.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /scripts/ingest_openai_key.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | REPO_HOME="/workspace/langchain-flare-pdf-qa-demo" 4 | 5 | DOTENV="$1" 6 | 7 | clear 8 | echo "==========================" 9 | OPENAI_KEY="$(${REPO_HOME}/scripts/read_and_output_nonempty_secret.sh "Enter your OpenAI API Key")"; 10 | echo -e "\nOK" 11 | 12 | echo -e "\n\nOPENAI_API_KEY=\"${OPENAI_KEY}\"" >> "$DOTENV" 13 | -------------------------------------------------------------------------------- /scripts/prepare_and_launch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | REPO_HOME="/workspace/langchain-flare-pdf-qa-demo" 4 | 5 | # source /home/gitpod/.astra/cli/astra-init.sh 6 | clear 7 | echo "==========================" 8 | 9 | ASTRA_TOKEN="$(${REPO_HOME}/scripts/read_and_output_nonempty_secret.sh "Enter your Astra 'DB Admin' Token")"; 10 | echo -e "\nOK" 11 | echo -e "ASTRA_DB_APPLICATION_TOKEN=\"${ASTRA_TOKEN}\"\n" > .env 12 | 13 | DATABASE_ID="" 14 | while [ -z "${DATABASE_ID}" ]; do 15 | echo -n "Enter your Database ID: " 16 | read DATABASE_ID 17 | done 18 | echo -e "\nOK" 19 | echo -e "ASTRA_DB_ID=\"${DATABASE_ID}\"\n" >> .env 20 | 21 | echo -n "(Optional) Enter your Keyspace: " 22 | read KEYSPACE 23 | echo -e "\nOK" 24 | if [ ! -z "${KEYSPACE}" ]; then 25 | echo -e "ASTRA_DB_KEYSPACE=\"${KEYSPACE}\"\n" >> .env 26 | fi 27 | 28 | ${REPO_HOME}/scripts/ingest_openai_key.sh ${REPO_HOME}/.env 29 | 30 | cd /workspace/langchain-flare-pdf-qa-demo/api 31 | pip install -r requirements.txt 32 | uvicorn api:app 33 | -------------------------------------------------------------------------------- /scripts/read_and_output_nonempty_secret.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Input of secrets with asterisk-mask on screen. 4 | # The secret is ECHOED BACK, so make sure you use this in another script, like: 5 | # MY_PWD="$(./read_and_output_nonempty_secret.sh "Enter secret")"; 6 | # ... do something with ${MY_PWD} ... 7 | # Usage: 8 | # ./read_and_output_nonempty_secret.sh "PROMPT" 9 | # ./read_and_output_nonempty_secret.sh "PROMPT" CAN_BE_EMPTY 10 | # CAN_BE_EMPTY is either 11 | # "0" (default: question is repeated until input is given) 12 | # "1" accept empty user input and go on 13 | # 14 | 15 | # Adapted from: https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script 16 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ); 17 | 18 | PROMPT="$1"; 19 | CAN_BE_EMPTY="$2"; 20 | CAN_BE_EMPTY=${CAN_BE_EMPTY:-"0"}; 21 | SECRET=""; 22 | IS_NOT_FIRST=""; 23 | 24 | if [ "${CAN_BE_EMPTY}" -eq "0" ]; then 25 | while [ -z "${SECRET}" ]; do 26 | SECRET="$($SCRIPT_DIR/read_and_output_secret.sh "${PROMPT}" "${IS_NOT_FIRST}")"; 27 | IS_NOT_FIRST="1"; 28 | done 29 | else 30 | SECRET="$(./read_and_output_secret.sh "${PROMPT}" "${IS_NOT_FIRST}")"; 31 | fi 32 | echo "${SECRET}"; 33 | -------------------------------------------------------------------------------- /scripts/read_and_output_secret.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Read and echo a password, echoing responsive 'stars' for input characters 4 | # Also handles: backspaces, deleted and ^U (kill-line) control-chars 5 | # 6 | # Lightly adapted from: https://stackoverflow.com/questions/63778473/bash-masking-user-input-for-a-password-with-support-for-backspace-and-specia 7 | # 8 | PROMPT="$1"; 9 | PROMPT=${PROMPT:-"Enter secret"}; 10 | IS_NOT_FIRST="$2"; 11 | unset PWORD; 12 | # 13 | if [ -z "${IS_NOT_FIRST}" ]; then 14 | echo -en "${PROMPT}: " 1>&2; 15 | else 16 | echo -en "\n${PROMPT}: " 1>&2; 17 | fi 18 | # 19 | while true; do 20 | IFS= read -r -N1 -s char 21 | # Note a NULL will return a empty string 22 | # Convert users key press to hexadecimal character code 23 | code=$(printf '%02x' "'$char") # EOL (empty char) -> 00 24 | case "$code" in 25 | ''|0a|0d) break ;; # Exit EOF, Linefeed or Return 26 | 08|7f) # backspace or delete 27 | if [ -n "$PWORD" ]; then 28 | PWORD="$( echo "$PWORD" | sed 's/.$//' )" 29 | echo -n $'\b \b' 1>&2 30 | fi 31 | ;; 32 | 15) # ^U or kill line 33 | echo -n "$PWORD" | sed 's/./\cH \cH/g' >&2 34 | PWORD='' 35 | ;; 36 | [01]?) ;; # Ignore ALL other control characters 37 | *) PWORD="$PWORD$char" 38 | echo -n '*' 1>&2 39 | ;; 40 | esac 41 | done 42 | # echo 43 | echo $PWORD 44 | -------------------------------------------------------------------------------- /sources/nausea.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/sources/nausea.pdf -------------------------------------------------------------------------------- /sources/the_hobbit.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/39b99175fce0cb9095a9a3703614706b613058b3/sources/the_hobbit.pdf --------------------------------------------------------------------------------