├── app
├── src
│ ├── react-app-env.d.ts
│ ├── interfaces
│ │ ├── enums.ts
│ │ └── interfaces.ts
│ ├── index.css
│ ├── reportWebVitals.ts
│ ├── index.tsx
│ ├── components
│ │ ├── Home.tsx
│ │ ├── App.tsx
│ │ ├── Slides.tsx
│ │ ├── SiteContents.tsx
│ │ ├── Query.tsx
│ │ ├── Identity.tsx
│ │ ├── Docs.tsx
│ │ ├── AskQuestionForm.tsx
│ │ ├── AddFileForm.tsx
│ │ └── App.css
│ └── utils
│ │ └── api.ts
├── public
│ ├── robots.txt
│ ├── squid.jpg
│ ├── favicon.ico
│ ├── manifest.json
│ └── index.html
├── tsconfig.json
└── package.json
├── sources
├── nausea.pdf
└── the_hobbit.pdf
├── images
├── new_vector_ui.png
├── diagrams
│ ├── flare_full.png
│ ├── flare_arch1.png
│ ├── flare_arch_ask.png
│ └── flare_arch_write.png
└── open_in_gitpod.svg
├── api
├── requirements.txt
├── utils
│ └── localCORS.py
├── users.py
├── load_pdf_util.py
├── db.py
├── ai.py
└── api.py
├── scripts
├── ingest_openai_key.sh
├── prepare_and_launch.sh
├── read_and_output_nonempty_secret.sh
└── read_and_output_secret.sh
├── .gitignore
├── .env.template
├── astra.json
├── .gitpod.yml
└── README.md
/app/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/app/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 |
--------------------------------------------------------------------------------
/sources/nausea.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/sources/nausea.pdf
--------------------------------------------------------------------------------
/app/public/squid.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/app/public/squid.jpg
--------------------------------------------------------------------------------
/app/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/app/public/favicon.ico
--------------------------------------------------------------------------------
/sources/the_hobbit.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/sources/the_hobbit.pdf
--------------------------------------------------------------------------------
/images/new_vector_ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/images/new_vector_ui.png
--------------------------------------------------------------------------------
/images/diagrams/flare_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/images/diagrams/flare_full.png
--------------------------------------------------------------------------------
/images/diagrams/flare_arch1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/images/diagrams/flare_arch1.png
--------------------------------------------------------------------------------
/images/diagrams/flare_arch_ask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/images/diagrams/flare_arch_ask.png
--------------------------------------------------------------------------------
/images/diagrams/flare_arch_write.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/HEAD/images/diagrams/flare_arch_write.png
--------------------------------------------------------------------------------
/app/src/interfaces/enums.ts:
--------------------------------------------------------------------------------
1 | export type RequestStatus = "initialized" | "in_flight" | "completed" | "errored"
2 |
3 | export type SitePage = "home" | "docs" | "ask" | "slides"
4 |
5 | export type QAMode = "FLARE" | "RAG" | "SIMPLE"
6 |
--------------------------------------------------------------------------------
/api/requirements.txt:
--------------------------------------------------------------------------------
1 | cassio>=0.1.3
2 |
3 | # langchain>=0.0.309
4 | git+https://github.com/hemidactylus/langchain@updated-full-preview-remove-shims#egg=langchain&subdirectory=libs/langchain
5 |
6 | fastapi==0.99.1
7 | openai==0.27.8
8 | pypdf==3.12.0
9 | python-dotenv==1.0.0
10 | tiktoken==0.4.0
11 | uvicorn==0.22.0
12 |
--------------------------------------------------------------------------------
/scripts/ingest_openai_key.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | REPO_HOME="/workspace/langchain-flare-pdf-qa-demo"
4 |
5 | DOTENV="$1"
6 |
7 | clear
8 | echo "=========================="
9 | OPENAI_KEY="$(${REPO_HOME}/scripts/read_and_output_nonempty_secret.sh "Enter your OpenAI API Key")";
10 | echo -e "\nOK"
11 |
12 | echo -e "\n\nOPENAI_API_KEY=\"${OPENAI_KEY}\"" >> "$DOTENV"
13 |
--------------------------------------------------------------------------------
/app/src/index.css:
--------------------------------------------------------------------------------
1 | body {
2 | margin: 0;
3 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
4 | 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
5 | sans-serif;
6 | -webkit-font-smoothing: antialiased;
7 | -moz-osx-font-smoothing: grayscale;
8 | }
9 |
10 | code {
11 | font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
12 | monospace;
13 | }
14 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # API / GENERIC
2 |
3 | .env
4 | __pycache__
5 |
6 |
7 | ## CLIENT APP
8 |
9 | # dependencies
10 | /app/node_modules
11 | /app/.pnp
12 | .pnp.js
13 |
14 | # testing
15 | /app/coverage
16 |
17 | # production
18 | /app/build
19 |
20 | # misc
21 | .DS_Store
22 | .env.local
23 | .env.development.local
24 | .env.test.local
25 | .env.production.local
26 |
27 | npm-debug.log*
28 | yarn-debug.log*
29 | yarn-error.log*
30 |
31 | # GITPOD
32 | .gitpod_logs
33 |
--------------------------------------------------------------------------------
/app/src/reportWebVitals.ts:
--------------------------------------------------------------------------------
1 | import { ReportHandler } from 'web-vitals';
2 |
3 | const reportWebVitals = (onPerfEntry?: ReportHandler) => {
4 | if (onPerfEntry && onPerfEntry instanceof Function) {
5 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
6 | getCLS(onPerfEntry);
7 | getFID(onPerfEntry);
8 | getFCP(onPerfEntry);
9 | getLCP(onPerfEntry);
10 | getTTFB(onPerfEntry);
11 | });
12 | }
13 | };
14 |
15 | export default reportWebVitals;
16 |
--------------------------------------------------------------------------------
/app/src/interfaces/interfaces.ts:
--------------------------------------------------------------------------------
1 | import { Dispatch, SetStateAction } from "react";
2 |
3 | import {QAMode} from "../interfaces/enums";
4 |
5 | export interface UserDesc {
6 | userId: string|undefined;
7 | }
8 |
9 | export interface FileURLSubmission {
10 | fileURL: string;
11 | }
12 |
13 | export interface QuestionSubmission {
14 | question: string;
15 | }
16 |
17 | export interface QuestionAndAnswer {
18 | question: string;
19 | answer: string | undefined;
20 | question_id: string;
21 | qa_mode: QAMode;
22 | }
23 |
24 | export interface FileItem {
25 | name: string;
26 | url: string;
27 | }
--------------------------------------------------------------------------------
/api/utils/localCORS.py:
--------------------------------------------------------------------------------
1 | # Demo-mode to enable React client to axios request an API (both on localhost)
2 | # Not suitable for production.
3 | from fastapi.middleware.cors import CORSMiddleware
4 |
5 |
6 | def permitReactLocalhostClient(app):
7 | app.add_middleware(
8 | CORSMiddleware,
9 | # This is to avoid CORS issues while on gitpod. Don't do in production.
10 | allow_origins=['*'],
11 | # Prefer individual source domains, such as:
12 | # allow_origins=['http://localhost:3000'],
13 | allow_credentials=True,
14 | allow_methods=["*"],
15 | allow_headers=["*"],
16 | )
17 |
--------------------------------------------------------------------------------
/app/public/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "short_name": "React App",
3 | "name": "Create React App Sample",
4 | "icons": [
5 | {
6 | "src": "favicon.ico",
7 | "sizes": "64x64 32x32 24x24 16x16",
8 | "type": "image/x-icon"
9 | },
10 | {
11 | "src": "logo192.png",
12 | "type": "image/png",
13 | "sizes": "192x192"
14 | },
15 | {
16 | "src": "logo512.png",
17 | "type": "image/png",
18 | "sizes": "512x512"
19 | }
20 | ],
21 | "start_url": ".",
22 | "display": "standalone",
23 | "theme_color": "#000000",
24 | "background_color": "#ffffff"
25 | }
26 |
--------------------------------------------------------------------------------
/app/src/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom/client';
3 | import './index.css';
4 | import App from './components/App';
5 | import reportWebVitals from './reportWebVitals';
6 |
7 | const root = ReactDOM.createRoot(
8 | document.getElementById('root') as HTMLElement
9 | );
10 | root.render(
11 |
12 |
13 |
14 | );
15 |
16 | // If you want to start measuring performance in your app, pass a function
17 | // to log results (for example: reportWebVitals(console.log))
18 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
19 | reportWebVitals();
20 |
--------------------------------------------------------------------------------
/app/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es5",
4 | "lib": [
5 | "dom",
6 | "dom.iterable",
7 | "esnext"
8 | ],
9 | "allowJs": true,
10 | "skipLibCheck": true,
11 | "esModuleInterop": true,
12 | "allowSyntheticDefaultImports": true,
13 | "strict": true,
14 | "forceConsistentCasingInFileNames": true,
15 | "noFallthroughCasesInSwitch": true,
16 | "module": "esnext",
17 | "moduleResolution": "node",
18 | "resolveJsonModule": true,
19 | "isolatedModules": true,
20 | "noEmit": true,
21 | "jsx": "react-jsx"
22 | },
23 | "include": [
24 | "src"
25 | ]
26 | }
27 |
--------------------------------------------------------------------------------
/.env.template:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY="REPLACE_WITH_OPENAI_API_KEY"
2 |
3 | ASTRA_DB_APPLICATION_TOKEN="REPLACE_WITH_ASTRA_DB_APPLICATION_TOKEN"
4 | # "AstraCS:xxxxxx....."
5 |
6 | ASTRA_DB_ID="REPLACE_WITH_ASTRA_DB_ID"
7 | # "0123abcd-..."
8 |
9 | ASTRA_DB_KEYSPACE="REPLACE_WITH_ASTRA_DB_KEYSPACE" # optional
10 | # "your_keyspace"
11 |
12 |
13 | # UNCOMMENT THE FOLLOWING FOR A CASSANDRA CLUSTER ...
14 | # USE_CASSANDRA_CLUSTER="1"
15 | # ... then provide these parameters as well:
16 | # CASSANDRA_KEYSPACE="flare_pdf_demo"
17 | # CASSANDRA_CONTACT_POINTS="127.0.0.1" # optional
18 | # CASSANDRA_USERNAME="cassandra" # optional
19 | # CASSANDRA_PASSWORD="cassandra" # optional
20 |
--------------------------------------------------------------------------------
/app/src/components/Home.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 | import {UserDesc} from "../interfaces/interfaces";
3 |
4 | const Home = (props: UserDesc) => {
5 |
6 | const {userId} = props;
7 |
8 | return (
9 |
10 |
Welcome, {userId}.
11 |
This demo is about:
12 |
13 | - LangChain's FLARE question-answering
14 | - Ingestion of PDF documents
15 | - Astra as a vector store, partitioned per-user
16 | - API: Python (LangChain, CassIO, FastAPI)
17 | - Client: React/Typescript
18 |
19 |
Enjoy!
20 |
21 | );
22 | }
23 |
24 | export default Home
25 |
--------------------------------------------------------------------------------
/app/src/components/App.tsx:
--------------------------------------------------------------------------------
1 | // import React from 'react';
2 | import './App.css';
3 |
4 | import Identity from './Identity';
5 | import SiteContents from './SiteContents';
6 | import {SitePage} from "../interfaces/enums";
7 |
8 | import { useState } from "react"
9 |
10 | function App() {
11 |
12 | const [userId, setUserId] = useState();
13 | const [page, setPage] = useState("home");
14 |
15 | return (
16 |
17 |
18 |
23 |
24 |
25 |
26 |
31 |
32 |
33 | );
34 | }
35 |
36 | export default App;
37 |
--------------------------------------------------------------------------------
/scripts/prepare_and_launch.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | REPO_HOME="/workspace/langchain-flare-pdf-qa-demo"
4 |
5 | # source /home/gitpod/.astra/cli/astra-init.sh
6 | clear
7 | echo "=========================="
8 |
9 | ASTRA_TOKEN="$(${REPO_HOME}/scripts/read_and_output_nonempty_secret.sh "Enter your Astra 'DB Admin' Token")";
10 | echo -e "\nOK"
11 | echo -e "ASTRA_DB_APPLICATION_TOKEN=\"${ASTRA_TOKEN}\"\n" > .env
12 |
13 | DATABASE_ID=""
14 | while [ -z "${DATABASE_ID}" ]; do
15 | echo -n "Enter your Database ID: "
16 | read DATABASE_ID
17 | done
18 | echo -e "\nOK"
19 | echo -e "ASTRA_DB_ID=\"${DATABASE_ID}\"\n" >> .env
20 |
21 | echo -n "(Optional) Enter your Keyspace: "
22 | read KEYSPACE
23 | echo -e "\nOK"
24 | if [ ! -z "${KEYSPACE}" ]; then
25 | echo -e "ASTRA_DB_KEYSPACE=\"${KEYSPACE}\"\n" >> .env
26 | fi
27 |
28 | ${REPO_HOME}/scripts/ingest_openai_key.sh ${REPO_HOME}/.env
29 |
30 | cd /workspace/langchain-flare-pdf-qa-demo/api
31 | pip install -r requirements.txt
32 | uvicorn api:app
33 |
--------------------------------------------------------------------------------
/astra.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "PDF FLARE demo with Langchain and Cassandra as Vector Store",
3 | "description": "Ingest PDF files from their URL into an Astra DB vector store and run FLARE Question-Answering on them. (1) Python API (CassIO, LangChain, FastAPI) + React client (Typescript); (2) per-user store of ingested documents; (3) Other Q-A methods in comparison; (4) Start-with-a-click on Gitpod",
4 | "duration": "20 minutes",
5 | "skillLevel": "Intermediate",
6 | "language":["javascript", "python"],
7 | "stack":["cassio", "python", "cassandra", "react", "typescript", "fastAPI"],
8 | "githubUrl": "https://github.com/cassioml/langchain-flare-pdf-qa-demo",
9 | "gitpodUrl": "https://gitpod.io/#https://github.com/cassioml/langchain-flare-pdf-qa-demo",
10 | "tags": [
11 | { "name":"cassandra" },
12 | { "name":"astradb" },
13 | { "name":"vector" },
14 | { "name":"vectordb" },
15 | { "name":"cassio" },
16 | { "name":"flare" }
17 | ],
18 | "category": "starters",
19 | "usecases": ["GenAI", "Question answering", "LLM", "Chatbots"]
20 | }
21 |
--------------------------------------------------------------------------------
/api/users.py:
--------------------------------------------------------------------------------
1 | # Cassio interaction with the DB
2 | import json
3 |
4 | from cassio.table import ClusteredCassandraTable
5 |
6 | USER_TABLE_NAME = "flare_users"
7 |
8 | userStore = None
9 |
10 | def get_user_store():
11 | global userStore
12 | if userStore is None:
13 | userStore = ClusteredCassandraTable(
14 | table=USER_TABLE_NAME,
15 | primary_key_type=["TEXT", "TEXT"],
16 | ordering_in_partition="ASC",
17 | )
18 | return userStore
19 |
20 | def files_for_user(user_store, user_id):
21 | return [
22 | json.loads(row["body_blob"])
23 | for row in user_store.get_partition(
24 | partition_id=user_id,
25 | )
26 | ]
27 |
28 | def add_file_to_user(user_store, user_id, file_name, file_url):
29 | blob = json.dumps({"name": file_name, "url": file_url})
30 | user_store.put(
31 | partition_id=user_id,
32 | row_id=file_name,
33 | body_blob=blob,
34 | )
35 |
36 | def delete_file_from_user(user_store, user_id, file_name):
37 | user_store.delete(
38 | partition_id=user_id,
39 | row_id=file_name,
40 | )
41 |
--------------------------------------------------------------------------------
/app/src/components/Slides.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 |
3 | import { useState } from "react"
4 |
5 | const slides = [
6 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_arch1.png",
7 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_arch_write.png",
8 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_arch_ask.png",
9 | "https://raw.githubusercontent.com/CassioML/langchain-flare-pdf-qa-demo/main/images/diagrams/flare_full.png",
10 | ];
11 | const titles = [
12 | "Tech stack",
13 | "File ingestion",
14 | "Question-time flow",
15 | "FLARE at a glance",
16 | ]
17 |
18 | const Slides = () => {
19 |
20 | const [slide, setSlide] = useState(0);
21 |
22 | return (
23 |
24 |
{titles[slide]} ({slide+1}/{slides.length})
25 |
![]()
setSlide(s => (s+1) % slides.length) }
27 | className="slideImage"
28 | src={slides[slide]}
29 | />
30 |
31 | );
32 | }
33 |
34 | export default Slides
35 |
--------------------------------------------------------------------------------
/api/load_pdf_util.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | from db import set_db_session
5 | from ai import (
6 | get_embeddings,
7 | get_vectorstore,
8 | load_pdf_from_file,
9 | )
10 | from users import (
11 | get_user_store,
12 | add_file_to_user,
13 | )
14 |
15 |
16 | if __name__ == '__main__':
17 | user_id = sys.argv[1]
18 | pdf_filepaths = sys.argv[2:]
19 | print(f"Trying to import {', '.join(pdf_filepaths)} as user '{user_id}' ...")
20 | #
21 | set_db_session()
22 | embeddings = get_embeddings()
23 | user_store = get_user_store()
24 | vectorstore_u = get_vectorstore(embeddings, user_id=user_id)
25 | print("DB Connection established.")
26 | #
27 | for pdf_filepath in pdf_filepaths:
28 | _, file_title = os.path.split(pdf_filepath)
29 | print(f"* Starting {file_title} ...")
30 | n_rows = load_pdf_from_file(pdf_filepath, vectorstore_u)
31 | if n_rows is not None:
32 | add_file_to_user(user_store, user_id, file_title, "#")
33 | print(f"* Success ({n_rows} rows inserted).")
34 | else:
35 | print(f"* Errored/nothing inserted.")
36 | print("\nFinished.")
37 |
--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
1 | image: gitpod/workspace-full:2023-02-27-14-39-56
2 | tasks:
3 | - name: app-console
4 | before: |
5 | cd /workspace/langchain-flare-pdf-qa-demo/app
6 | npm install
7 | command: |
8 | cd /workspace/langchain-flare-pdf-qa-demo/app
9 | REACT_APP_API_BASE_URL=`gp url 8000` npm start
10 | - name: api-console
11 | before: |
12 | cd /workspace/langchain-flare-pdf-qa-demo
13 | mkdir .gitpod_logs
14 | # curl -Ls "https://dtsx.io/get-astra-cli" | bash | tee -a /workspace/langchain-flare-pdf-qa-demo/.gitpod_logs/astra-cli-install.log
15 | echo -e "\n\n** NOW YOU CAN RUN THE COMMAND scripts/prepare_and_launch.sh **"
16 | command: /workspace/langchain-flare-pdf-qa-demo/scripts/prepare_and_launch.sh | tee -a /workspace/langchain-flare-pdf-qa-demo/.gitpod_logs/dotenv-setup.log
17 | ports :
18 | - port: 3000
19 | onOpen: open-preview
20 | - port: 8000
21 | onOpen: ignore
22 | visibility: public
23 | github:
24 | prebuilds:
25 | master: true
26 | branches: true
27 | pullRequests: true
28 | pullRequestsFromForks: false
29 | addCheck: true
30 | addComment: false
31 | addBadge: true
32 | addLabel: false
33 |
--------------------------------------------------------------------------------
/scripts/read_and_output_nonempty_secret.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Input of secrets with asterisk-mask on screen.
4 | # The secret is ECHOED BACK, so make sure you use this in another script, like:
5 | # MY_PWD="$(./read_and_output_nonempty_secret.sh "Enter secret")";
6 | # ... do something with ${MY_PWD} ...
7 | # Usage:
8 | # ./read_and_output_nonempty_secret.sh "PROMPT"
9 | # ./read_and_output_nonempty_secret.sh "PROMPT" CAN_BE_EMPTY
10 | # CAN_BE_EMPTY is either
11 | # "0" (default: question is repeated until input is given)
12 | # "1" accept empty user input and go on
13 | #
14 |
15 | # Adapted from: https://stackoverflow.com/questions/59895/how-do-i-get-the-directory-where-a-bash-script-is-located-from-within-the-script
16 | SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd );
17 |
18 | PROMPT="$1";
19 | CAN_BE_EMPTY="$2";
20 | CAN_BE_EMPTY=${CAN_BE_EMPTY:-"0"};
21 | SECRET="";
22 | IS_NOT_FIRST="";
23 |
24 | if [ "${CAN_BE_EMPTY}" -eq "0" ]; then
25 | while [ -z "${SECRET}" ]; do
26 | SECRET="$($SCRIPT_DIR/read_and_output_secret.sh "${PROMPT}" "${IS_NOT_FIRST}")";
27 | IS_NOT_FIRST="1";
28 | done
29 | else
30 | SECRET="$(./read_and_output_secret.sh "${PROMPT}" "${IS_NOT_FIRST}")";
31 | fi
32 | echo "${SECRET}";
33 |
--------------------------------------------------------------------------------
/app/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "app",
3 | "version": "0.1.0",
4 | "private": true,
5 | "dependencies": {
6 | "@testing-library/jest-dom": "^5.17.0",
7 | "@testing-library/react": "^13.4.0",
8 | "@testing-library/user-event": "^13.5.0",
9 | "@types/jest": "^27.5.2",
10 | "@types/node": "^16.18.39",
11 | "@types/react": "^18.2.16",
12 | "@types/react-dom": "^18.2.7",
13 | "axios": "^1.4.0",
14 | "react": "^18.2.0",
15 | "react-dom": "^18.2.0",
16 | "react-hook-form": "^7.45.4",
17 | "react-router-dom": "^6.14.2",
18 | "react-scripts": "5.0.1",
19 | "typescript": "^4.9.5",
20 | "uuid": "^9.0.0",
21 | "web-vitals": "^2.1.4"
22 | },
23 | "scripts": {
24 | "start": "react-scripts start",
25 | "build": "react-scripts build",
26 | "test": "react-scripts test",
27 | "eject": "react-scripts eject"
28 | },
29 | "eslintConfig": {
30 | "extends": [
31 | "react-app",
32 | "react-app/jest"
33 | ]
34 | },
35 | "browserslist": {
36 | "production": [
37 | ">0.2%",
38 | "not dead",
39 | "not op_mini all"
40 | ],
41 | "development": [
42 | "last 1 chrome version",
43 | "last 1 firefox version",
44 | "last 1 safari version"
45 | ]
46 | },
47 | "devDependencies": {
48 | "@types/uuid": "^9.0.2"
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/scripts/read_and_output_secret.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Read and echo a password, echoing responsive 'stars' for input characters
4 | # Also handles: backspaces, deleted and ^U (kill-line) control-chars
5 | #
6 | # Lightly adapted from: https://stackoverflow.com/questions/63778473/bash-masking-user-input-for-a-password-with-support-for-backspace-and-specia
7 | #
8 | PROMPT="$1";
9 | PROMPT=${PROMPT:-"Enter secret"};
10 | IS_NOT_FIRST="$2";
11 | unset PWORD;
12 | #
13 | if [ -z "${IS_NOT_FIRST}" ]; then
14 | echo -en "${PROMPT}: " 1>&2;
15 | else
16 | echo -en "\n${PROMPT}: " 1>&2;
17 | fi
18 | #
19 | while true; do
20 | IFS= read -r -N1 -s char
21 | # Note a NULL will return a empty string
22 | # Convert users key press to hexadecimal character code
23 | code=$(printf '%02x' "'$char") # EOL (empty char) -> 00
24 | case "$code" in
25 | ''|0a|0d) break ;; # Exit EOF, Linefeed or Return
26 | 08|7f) # backspace or delete
27 | if [ -n "$PWORD" ]; then
28 | PWORD="$( echo "$PWORD" | sed 's/.$//' )"
29 | echo -n $'\b \b' 1>&2
30 | fi
31 | ;;
32 | 15) # ^U or kill line
33 | echo -n "$PWORD" | sed 's/./\cH \cH/g' >&2
34 | PWORD=''
35 | ;;
36 | [01]?) ;; # Ignore ALL other control characters
37 | *) PWORD="$PWORD$char"
38 | echo -n '*' 1>&2
39 | ;;
40 | esac
41 | done
42 | # echo
43 | echo $PWORD
44 |
--------------------------------------------------------------------------------
/app/src/components/SiteContents.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 | import {UserDesc} from "../interfaces/interfaces";
3 |
4 | import Home from "./Home";
5 | import Docs from "./Docs";
6 | import Query from "./Query";
7 | import Slides from "./Slides";
8 |
9 | const SiteContents = (props: UserDesc & {page: any, setPage: any}) => {
10 |
11 | const {userId, page, setPage} = props;
12 |
13 | return (
14 |
15 | { userId && <>
16 |
17 | setPage("home")}>Home
18 | |
19 | setPage("docs")}>My docs
20 | |
21 | setPage("ask")}>Ask questions
22 | |
23 | setPage("slides")}>Info
24 |
25 |
26 | { (page === "home" && <>
27 |
28 | > )}
29 | { (page === "docs" && <>
30 |
31 | > )}
32 | { (page === "ask" && <>
33 |
34 | > )}
35 | { (page === "slides" && <>
36 |
37 | > )}
38 |
39 | > }
40 | { userId === undefined && <>
41 |

42 | > }
43 |
44 | );
45 | }
46 |
47 | export default SiteContents
48 |
--------------------------------------------------------------------------------
/api/db.py:
--------------------------------------------------------------------------------
1 | import os
2 | from dotenv import load_dotenv
3 |
4 | import cassio
5 |
6 | from cassandra.cluster import (
7 | Cluster,
8 | )
9 | from cassandra.auth import PlainTextAuthProvider
10 |
11 |
12 | load_dotenv("../.env")
13 |
14 | def set_db_session():
15 | global dbSession
16 | # A separate route for a Cassandra cluster session
17 | use_cassandra = int(os.environ.get("USE_CASSANDRA_CLUSTER", "0"))
18 | if use_cassandra != 0:
19 | set_cassandra_session_keyspace()
20 | else:
21 | cassio.init(
22 | token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
23 | database_id=os.environ["ASTRA_DB_ID"],
24 | keyspace=os.environ.get("ASTRA_DB_KEYSPACE"),
25 | )
26 |
27 |
28 | def set_cassandra_session_keyspace():
29 | contact_points = [
30 | cp.strip()
31 | for cp in os.environ.get("CASSANDRA_CONTACT_POINTS", "").split(',')
32 | if cp.strip()
33 | ]
34 | CASSANDRA_KEYSPACE = os.environ["CASSANDRA_KEYSPACE"]
35 | CASSANDRA_USERNAME = os.environ.get("CASSANDRA_USERNAME")
36 | CASSANDRA_PASSWORD = os.environ.get("CASSANDRA_PASSWORD")
37 | #
38 | if CASSANDRA_USERNAME and CASSANDRA_PASSWORD:
39 | auth_provider = PlainTextAuthProvider(
40 | CASSANDRA_USERNAME,
41 | CASSANDRA_PASSWORD,
42 | )
43 | else:
44 | auth_provider = None
45 |
46 | c_cluster = Cluster(contact_points if contact_points else None, auth_provider=auth_provider)
47 | session = c_cluster.connect()
48 | print("Cassandra session created.")
49 | #
50 | cassio.init(
51 | session=session,
52 | keyspace=CASSANDRA_KEYSPACE,
53 | )
54 |
55 |
--------------------------------------------------------------------------------
/app/src/components/Query.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 | import { useState } from "react"
3 | import {UserDesc, QuestionAndAnswer} from "../interfaces/interfaces";
4 |
5 | import {QAMode} from "../interfaces/enums";
6 | import AskQuestionForm from "./AskQuestionForm";
7 |
8 | const Query = (props: UserDesc) => {
9 |
10 | const {userId} = props;
11 |
12 | const [history, setHistory] = useState([]);
13 |
14 | const completeQuestion = (q_id: string, answer: string | undefined) => {
15 | console.log(`completing ${q_id} with ${answer}`);
16 | setHistory( (h) => h.map( q => {
17 | if (q.question_id === q_id){
18 | return {...q, ...{answer: answer}};
19 | }else{
20 | return q;
21 | }
22 | }));
23 | }
24 | const addQuestion = (q_id: string, qaMode: QAMode, question: string) => {
25 | console.log(`adding ${q_id}: ${question}`);
26 | setHistory( (h) => h.concat( [{
27 | question_id: q_id,
28 | question: question,
29 | answer: undefined,
30 | qa_mode: qaMode,
31 | }] ));
32 | }
33 |
34 | return (
35 |
36 |
37 | { ( (history.length > 0) && <>
38 |
Question history:
39 | { history.slice().reverse().map( q =>
40 |
41 |
42 | {q.question}
43 | {q.qa_mode}
44 |
45 |
{q.answer === undefined ? "⌛" : q.answer || "(no answer)"}
46 |
47 | ) }
48 | > ) }
49 |
50 | );
51 | }
52 |
53 | export default Query
54 |
--------------------------------------------------------------------------------
/app/src/components/Identity.tsx:
--------------------------------------------------------------------------------
1 | import { useState } from "react"
2 | import { Dispatch, SetStateAction } from "react";
3 |
4 | import './App.css';
5 |
6 |
7 | export interface UserProps {
8 | userId: string|undefined;
9 | setUserId: Dispatch>;
10 | }
11 |
12 |
13 | const Identity = (props: {userId: any, setUserId: any, setPage: any}) => {
14 |
15 | const {userId, setUserId, setPage} = props;
16 |
17 | const [editUserId, setEditUserId] = useState('');
18 |
19 |
20 | const trySetUserId = (newUserId: string) => {
21 | if(newUserId){
22 | setUserId(newUserId);
23 | setPage("home");
24 | }
25 | }
26 |
27 | return ( <>
28 |
29 | { !userId &&
30 |
31 | PDF FLARE demo - Who are you?
32 | setEditUserId(e.target.value)}
38 | onKeyPress={(e) => {if (e.key === 'Enter') { trySetUserId(editUserId) }}}
39 | />
40 |
46 |
47 |
}
48 | { userId &&
49 |
50 | PDF FLARE demo - Welcome, {userId}
51 |
52 |
60 |
61 |
62 |
63 |
}
64 |
65 | > );
66 | }
67 |
68 | export default Identity
69 |
--------------------------------------------------------------------------------
/app/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
12 |
13 |
17 |
18 |
27 | PDF FLARE demo
28 |
29 |
30 |
31 |
32 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/app/src/utils/api.ts:
--------------------------------------------------------------------------------
1 | import axios from "axios";
2 |
3 | import {QAMode} from "../interfaces/enums";
4 |
5 | const base_url: string = process.env["REACT_APP_API_BASE_URL"] || "http://127.0.0.1:8000";
6 |
7 | export const get_loaded_files = (userId: string, callback: any, error_callback: any) => {
8 | axios.post(
9 | `${base_url}/list_files`,
10 | {user_id: userId}
11 | )
12 | .then((response: any) => {
13 | callback(response.data);
14 | })
15 | .catch((error: any) => {
16 | if(error_callback){
17 | error_callback(error);
18 | }
19 | });
20 | }
21 |
22 | export const remove_file = (userId: string, file_name: string, callback: any, error_callback: any) => {
23 | axios.post(
24 | `${base_url}/remove_pdf`,
25 | {user_id: userId, file_name: file_name}
26 | )
27 | .then((response: any) => {
28 | callback(response.data);
29 | })
30 | .catch((error: any) => {
31 | if(error_callback){
32 | error_callback(error);
33 | }
34 | });
35 | }
36 |
37 |
38 | export const submit_url_to_load = (userId: string, fileURL: string, callback: any, error_callback: any) => {
39 | axios.post(
40 | `${base_url}/load_pdf_url`,
41 | {
42 | user_id: userId,
43 | file_url: fileURL,
44 | }
45 | )
46 | .then((response: any) => {
47 | callback(response.data);
48 | })
49 | .catch((error: any) => {
50 | if(error_callback){
51 | error_callback(error);
52 | }
53 | });
54 | }
55 |
56 | export const submit_question = (qaMode: QAMode, userId: string, question_id: string, question: string, callback: any, error_callback: any) => {
57 | let endpoint: string
58 | if (qaMode === "FLARE") {
59 | endpoint = "flare_ask";
60 | } else if (qaMode === "RAG") {
61 | endpoint = "rag_ask";
62 | } else{ // qaMode === "SIMPLE"
63 | endpoint = "llm_ask";
64 | }
65 | //
66 | axios.post(
67 | `${base_url}/${endpoint}`,
68 | {
69 | user_id: userId,
70 | question_id: question_id,
71 | question: question,
72 | }
73 | )
74 | .then((response: any) => {
75 | callback(response.data);
76 | })
77 | .catch((error: any) => {
78 | if(error_callback){
79 | error_callback(error);
80 | }
81 | });
82 | }
83 |
--------------------------------------------------------------------------------
/app/src/components/Docs.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 | import { useEffect, useState } from "react"
3 |
4 | import {UserDesc, FileItem} from "../interfaces/interfaces";
5 | import {RequestStatus} from "../interfaces/enums";
6 | import {get_loaded_files, remove_file} from "../utils/api";
7 |
8 | import AddFileForm from "./AddFileForm";
9 |
10 | const Docs = (props: UserDesc) => {
11 |
12 | const {userId} = props;
13 |
14 | const [queryState, setQueryState] = useState("initialized");
15 | const [fileList, setFileList] = useState([]);
16 |
17 | const refreshFiles = () => {
18 | setQueryState("in_flight");
19 | get_loaded_files(
20 | userId || "",
21 | (r: FileItem[]) => {
22 | setFileList(r);
23 | setQueryState("completed");
24 | },
25 | (e: any) => {console.log(e); setQueryState("errored");}
26 | );
27 | }
28 |
29 | const removeFile = (file_name: string) => {
30 | console.log(`Removing ${file_name}`);
31 | setQueryState("in_flight");
32 | remove_file(
33 | userId || "",
34 | file_name,
35 | (r: any) => {
36 | console.log(`Removed ${r.num_deleted} entries.`);
37 | refreshFiles();
38 | },
39 | (e: any) => {console.log(e); setQueryState("errored");}
40 | );
41 | }
42 |
43 | useEffect(
44 | refreshFiles,
45 | [userId]
46 | );
47 |
48 | return (
49 |
50 | { (queryState === "initialized") &&
51 |
(nothing to see here)
52 | }
53 | { (queryState === "in_flight") &&
54 |
wait...
55 | }
56 | { (queryState === "completed") &&
57 |
{userId}'s docs
58 |
59 | { fileList.map( (f: FileItem) => -
60 | {f.name} (source)
61 |
62 |
) }
63 |
64 |
65 |
66 |
67 | }
68 | { (queryState === "errored") &&
69 |
Error fetching docs
70 | }
71 |
72 | );
73 | }
74 |
75 | export default Docs
76 |
--------------------------------------------------------------------------------
/app/src/components/AskQuestionForm.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 | import { useState } from "react"
3 | import {v4 as uuidv4} from 'uuid';
4 | import { useForm } from "react-hook-form";
5 |
6 | import {UserDesc, QuestionSubmission} from "../interfaces/interfaces";
7 | import {QAMode} from "../interfaces/enums";
8 | import {submit_question} from "../utils/api";
9 |
10 | const AskQuestionForm = (props: UserDesc & {completeQuestion: (q_id: string, answer: string|undefined) => void , addQuestion: (q_id: string, qaMode: QAMode, question: string) => void;}) => {
11 |
12 | const {userId, addQuestion, completeQuestion} = props;
13 |
14 | const {register, handleSubmit, reset} = useForm();
15 |
16 | const [qaMode, setQaMode] = useState("FLARE");
17 |
18 | const onSubmitHandler = (values: QuestionSubmission) => {
19 | if (values.question) {
20 | const q_id = uuidv4();
21 | console.log(`AskQuestionForm submitted[${q_id}], with ${values.question}.`);
22 | reset();
23 | addQuestion(q_id, qaMode, values.question);
24 | submit_question(
25 | qaMode,
26 | userId || "",
27 | q_id,
28 | values.question,
29 | (response: any) => {
30 | console.log(`Gotten: ${JSON.stringify(response)}`);
31 | if (response.success){
32 | console.log(`Answer to ${q_id}: ${response.answer}`);
33 | completeQuestion(q_id, response.answer);
34 | }else{
35 | console.log(`Failed answer to ${q_id}`);
36 | completeQuestion(q_id, "(Failure!)");
37 | }
38 | },
39 | (e: any) => {
40 | console.log(e);
41 | completeQuestion(q_id, "(Failure!)");
42 | }
43 | );
44 | } else {
45 | console.log(`AskQuestionForm submitted but EMPTY INPUT`);
46 | }
47 | };
48 |
49 | return (
50 |
51 |
52 | QA mode:
53 | setQaMode("FLARE")}>
54 | flare
55 |
56 | setQaMode("RAG")}>
57 | rag
58 |
59 | setQaMode("SIMPLE")}>
60 | simple
61 |
62 |
63 |
70 |
71 | );
72 |
73 | }
74 |
75 | export default AskQuestionForm
76 |
--------------------------------------------------------------------------------
/app/src/components/AddFileForm.tsx:
--------------------------------------------------------------------------------
1 | import './App.css';
2 | import { useState } from "react"
3 | import { useForm } from "react-hook-form";
4 |
5 | import {UserDesc, FileURLSubmission} from "../interfaces/interfaces";
6 | import {RequestStatus} from "../interfaces/enums";
7 | import {submit_url_to_load} from "../utils/api";
8 |
9 | const AddFileForm = (props: UserDesc & {refreshFiles: () => void}) => {
10 |
11 | const {userId, refreshFiles} = props;
12 |
13 | const {register, handleSubmit} = useForm();
14 |
15 | const [submitState, setSubmitState] = useState("initialized");
16 |
17 | const [showExampleUrls, setShowExampleUrls] = useState(false);
18 |
19 | const onSubmitHandler = (values: FileURLSubmission) => {
20 | if (values.fileURL) {
21 | setSubmitState("in_flight");
22 | console.log(`AddFileForm submitted, with ${values.fileURL}.`);
23 | submit_url_to_load(
24 | userId || "",
25 | values.fileURL,
26 | (response: any) => {
27 | console.log(`Gotten: ${JSON.stringify(response)}`);
28 | if (response.success){
29 | setSubmitState("completed");
30 | console.log(`Written ${response.n_rows} rows to vector table.`);
31 | refreshFiles();
32 | }else{
33 | console.log("Something went wrong loading the file");
34 | setSubmitState("errored");
35 | }
36 | },
37 | (e: any) => {console.log(e); setSubmitState("errored");}
38 | );
39 | } else {
40 | console.log(`AddFileForm submitted but EMPTY INPUT`);
41 | }
42 | };
43 |
44 | const toggleExampleUrls = () => {
45 | setShowExampleUrls( (v) => !v );
46 | }
47 |
48 | if (submitState === "initialized" || submitState === "errored" || submitState === "completed"){
49 | return (
50 |
51 | { (submitState === "errored") &&
52 |
53 | Submission errored!
54 |
55 | }
56 |
75 |
76 | );
77 | } else if (submitState === "in_flight"){
78 | return file submitted...
79 | } else {
80 | return (trouble with submission form)
81 | }
82 | }
83 |
84 | export default AddFileForm
85 |
--------------------------------------------------------------------------------
/app/src/components/App.css:
--------------------------------------------------------------------------------
1 | .App {
2 | background-color: #6599cc;
3 | font-family: Courier;
4 | min-height: 100vh;
5 | }
6 |
7 | .App-header {
8 | display: flex;
9 | flex-direction: column;
10 | font-size: 120%;
11 | font-weight: bold;
12 | }
13 |
14 | .App-identity {
15 | display: flex;
16 | flex-direction: column;
17 | padding-left: 40px;
18 | color: #12386f;
19 | }
20 |
21 | .App-navbar {
22 | padding-left: 10px;
23 | }
24 |
25 | .App-navbar span {
26 | font-size: 120%;
27 | font-weight: bold;
28 | margin: 1vh;
29 | cursor: pointer;
30 | color: #A0FFFF;
31 | }
32 |
33 | .linkUrl:visited {
34 | color: #A0FFFF;
35 | }
36 |
37 | .linkUrl {
38 | color: #A0FFFF;
39 | }
40 |
41 | .linkUrl:hover {
42 | color: #12386f;
43 | }
44 |
45 | .App-body {
46 | margin-top: 20px;
47 | padding-left: 30px;
48 | padding-right: 30px;
49 | color: #12386f;
50 | }
51 |
52 | .userName {
53 | color: black;
54 | font-weight: bold;
55 | }
56 |
57 | .inlineInput {
58 | width: 18vh;
59 | margin-left: 15px;
60 | padding-left: 10px;
61 | font-size: 100%;
62 | background-color: transparent;
63 | border: 2px solid #A0FFFF;
64 | border-radius: 10px;
65 | color: black;
66 | font-weight: bold;
67 | font-family: Courier;
68 | }
69 |
70 | .inlineInputLong {
71 | width: 50%;
72 | margin-left: 15px;
73 | padding-left: 10px;
74 | font-size: 100%;
75 | background-color: transparent;
76 | border: 2px solid #A0FFFF;
77 | border-radius: 10px;
78 | color: black;
79 | font-weight: bold;
80 | font-family: Courier;
81 | }
82 |
83 | .inlineButton {
84 | margin-left: 25px;
85 | font-size: 100%;
86 | background-color: transparent;
87 | border: 2px solid #A0FFFF;
88 | border-radius: 10px;
89 | color: #12386f;
90 | cursor: pointer;
91 | }
92 |
93 | .headerSubtitle {
94 | color: #90DFDF;
95 | margin-top: 0px;
96 | font-size: 60%;
97 | font-style: italic;
98 | }
99 |
100 | .urlExamples {
101 | margin: 3vh 3vh;
102 | border-radius: 12px;
103 | padding: 8px;
104 | border: 2px solid #12386f;
105 | font-size: 80%;
106 | color: #A0FFFF;
107 | }
108 |
109 | .urlExamples .urlExample {
110 | font-weight: bold;
111 | }
112 |
113 | ul.fileList {
114 | list-style-type: none;
115 | }
116 |
117 | .fileList > li {
118 | margin-left: 3vh;
119 | font-weight: bold;
120 | margin-top: 10px;
121 | }
122 |
123 | .App-link {
124 | color: #61dafb;
125 | }
126 |
127 | .questionBlock {
128 | margin: 1vh 3vh;
129 | border-radius: 12px;
130 | padding: 12px;
131 | border: 2px solid purple;
132 | font-weight: bold;
133 | }
134 |
135 | .questionBlock > .questionBody {
136 | color: #A0FFFF;
137 | font-size: 80%;
138 | }
139 |
140 | .questionBlock .QAMode {
141 | margin-left: 2vh;
142 | padding: 5px;
143 | color: #12386f;
144 | background-color: #A0FFFF;
145 | border-radius: 10px;
146 | border: 1px solid black;
147 | }
148 |
149 | .questionBlock > .answerBody {
150 | color: #12386f;
151 | }
152 |
153 | .qaMode {
154 | font-variant: small-caps;
155 | font-weight: bold;
156 | margin-left: 6px;
157 | margin-right: 6px;
158 | padding-left: 4px;
159 | padding-right: 4px;
160 | border-radius: 5px;
161 | cursor: pointer;
162 | }
163 |
164 | .selected {
165 | background-color: #A0FFFF;
166 | border: 1px solid #12386f;
167 | color: #12386f;
168 | }
169 |
170 | .unselected {
171 | color: #70BFBF;
172 | }
173 |
174 | hr.fancy {
175 | border: 5px solid #4579ac;
176 | }
177 |
178 | .homeImage {
179 | display: block;
180 | margin-left: auto;
181 | margin-right: auto;
182 | width: 40%;
183 | margin-top: 5vh;
184 | }
185 |
186 | .slideTitle {
187 | display:table;
188 | margin:0 auto;
189 | font-weight: bold;
190 | font-size: 140%;
191 | color: #12386f;
192 | margin-bottom: 3vh;
193 | }
194 |
195 | .slideImage {
196 | display: block;
197 | margin-left: auto;
198 | margin-right: auto;
199 | border-radius: 10px;
200 | max-width:100%;
201 | max-height:75vh;
202 | }
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PDF FLARE demo with Langchain and Cassandra as Vector Store
2 |
3 | ## What
4 |
5 | Ingest PDF files from their URL into an Astra DB vector store
6 | and run FLARE Question-Answering on them.
7 |
8 | Features:
9 |
10 | - Python API (CassIO, LangChain, FastAPI) + React client (Typescript)
11 | - per-user store of ingested documents
12 | - Other Q-A methods in comparison
13 | - Start-with-a-click on Gitpod
14 |
15 | For some architectural/flow diagrams, check out [this dir](images/diagrams).
16 |
17 | ## Prerequisites
18 |
19 | You need:
20 |
21 | - an [Astra](https://astra.datastax.com) Vector Database (free tier is fine!). **You'll be asked to supply a [Database Administrator token](https://awesome-astra.github.io/docs/pages/astra/create-token/#c-procedure)**, the string starting with `AstraCS:...`;
22 | - likewise, get your [Database ID](https://awesome-astra.github.io/docs/pages/astra/faq/#where-should-i-find-a-database-identifier) ready, you will have to enter it;
23 | - an **OpenAI API Key**. (More info [here](https://cassio.org/start_here/#llm-access), note that out-of-the-box this demo supports OpenAI unless you tinker with the code.)
24 |
25 | Note: If you have switched Astra to the New Vector Developer Experience UI, click here for instructions on the DB credentials.
26 |
27 |
28 | Go to your database dashboard and click on the "Connection Details" button on the right. A dialog will open with instructions for connecting. You'll do two things:
29 |
30 | - click "Generate Token" and copy the `AstraCS:...` string in its entirety once that appears on the dialog;
31 | - locate the `api_endpoint=...` line in the Python code example. The database ID is the sequence after `https://` and before the dash + region name (e.g. `-us-east1`) in the definition of the endpoint. It looks like `01234567-89ab-cdef-0123-456789abcdef` (and has always this length).
32 |
33 | 
34 |
35 |
36 |
37 |
38 | ## How-to (Gitpod)
39 |
40 | Click this button, confirm opening of the workspace
41 | (you might need to do a Gitpod login in the process) and wait 1-2 minutes:
42 | instructions will show up in the console below, where you'll have
43 | to provide connection details and OpenAI key when prompted.
44 |
45 | In the meantime, the app will open in the top panel.
46 |
47 |
48 |
49 | ## How-to (local run)
50 |
51 | ### API
52 |
53 | Create a Python `3.8+` virtual environment and install
54 | the dependencies in `requirements.txt`.
55 |
56 | Make a copy `cp .env.template .env` and set the secrets for your DB and OpenAI.
57 |
58 | Finally enter the subdirectory and launch the API:
59 |
60 | ```
61 | cd api
62 | uvicorn api:app
63 | ```
64 |
65 | #### Use a Cassandra cluster
66 |
67 | To use a Cassandra cluster instead of Astra DB, check the `.env.template` file:
68 | uncomment the `USE_CASSANDRA_CLUSTER` environment variable in your `.env`
69 | and provide the necessary connection parameters (keyspace name, plus:
70 | contact points and/or authentication if required).
71 |
72 | The next time you start the API, it will attempt connecting to Cassandra.
73 |
74 | ### Client
75 |
76 | You need a modern Node.js. Enter the subdirectory and install the dependencies:
77 |
78 | ```
79 | cd app
80 | npm install
81 | ```
82 |
83 | If the API is running you can launch the client:
84 |
85 | ```
86 | npm start
87 | ```
88 |
89 | and point your browser to local port 3000.
90 |
91 | _(Note: if the API run elsewhere, you can launch `REACT_APP_API_BASE_URL="http://something..." npm start`.)_
92 |
93 | #### User journey
94 |
95 | First, "log in" (mocked) with a made-up username.
96 |
97 | Then you access the panel. Go to the "Docs" panel, where you can load pdf files
98 | by entering their URL (click on the "i" icon to get example URLs to paste).
99 |
100 | You can "Ask questions", comparing different methods (FLARE/RAG/Plain LLM) and
101 | their answers.
102 |
--------------------------------------------------------------------------------
/api/ai.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import tempfile
3 | import os
4 | from dotenv import load_dotenv
5 | from urllib import request
6 |
7 | from langchain.embeddings.openai import OpenAIEmbeddings
8 | from langchain.vectorstores import Cassandra
9 | from langchain.document_loaders import PyPDFLoader
10 | from langchain.text_splitter import RecursiveCharacterTextSplitter
11 | from langchain.chains import FlareChain
12 | from langchain.chat_models import ChatOpenAI
13 | from langchain.llms import OpenAI
14 | from langchain.indexes.vectorstore import VectorStoreIndexWrapper
15 |
16 |
17 | VECTOR_PDF_TABLE_NAME = "flare_doc_bank"
18 |
19 | load_dotenv("../.env")
20 |
21 | embeddingService = None
22 | chatModel = None
23 | llm = None
24 |
25 |
26 | def get_chat_model():
27 | global chatModel
28 | if chatModel is None:
29 | chatModel = ChatOpenAI(temperature=0)
30 | return chatModel
31 |
32 |
33 | def get_llm():
34 | global llm
35 | if llm is None:
36 | llm = OpenAI(temperature=0)
37 | return llm
38 |
39 |
40 | def get_flare_chain(chmodel, vstore):
41 | retriever = vstore.as_retriever()
42 | flareChain = FlareChain.from_llm(
43 | chmodel,
44 | retriever=retriever,
45 | max_generation_len=164,
46 | min_prob=0.3,
47 | )
48 | return flareChain
49 |
50 |
51 | def get_embeddings():
52 | global embeddingService
53 | if embeddingService is None:
54 | embeddingService = OpenAIEmbeddings()
55 | return embeddingService
56 |
57 |
58 | def get_rag_index(embeddings, user_id):
59 | vectorstore_u = get_vectorstore(embeddings, user_id=user_id)
60 | rag_index = VectorStoreIndexWrapper(vectorstore=vectorstore_u)
61 | return rag_index
62 |
63 |
64 | def get_vectorstore(embeddings, user_id=None):
65 | """
66 | if user_id is None,
67 | we assume this is an init call:
68 | we require table provisioning (and pass a made-up user id)
69 | if user_id is passed:
70 | we spawn a no-provision instance set to that partition
71 | """
72 | vectorStore = Cassandra(
73 | embedding=embeddings,
74 | table_name=VECTOR_PDF_TABLE_NAME,
75 | partition_id="placeholder" if user_id is None else user_id,
76 | partitioned=True,
77 | skip_provisioning=user_id is not None,
78 | )
79 | return vectorStore
80 |
81 | # PDF loading machinery
82 | def _finalize_metadata(md_dict):
83 | return {
84 | k: v if k != "source" else os.path.split(v)[1]
85 | for k, v in md_dict.items()
86 | }
87 |
88 |
89 | def load_pdf_from_file(file_name, vector_store):
90 | try:
91 | print(f"Loading {file_name}")
92 | pdf_loader = PyPDFLoader(file_name)
93 | text_splitter = RecursiveCharacterTextSplitter(
94 | chunk_size=500,
95 | chunk_overlap=80,
96 | )
97 | documents = [
98 | doc
99 | for doc in pdf_loader.load_and_split(text_splitter=text_splitter)
100 | ]
101 | texts, metadatas0 = zip(*((doc.page_content, doc.metadata) for doc in documents))
102 | #
103 | metadatas = [
104 | _finalize_metadata(md)
105 | for md in metadatas0
106 | ]
107 | #
108 | vector_store.add_texts(texts=texts, metadatas=metadatas)
109 | print(f"Finished loading.")
110 | return len(documents)
111 | except Exception:
112 | return None
113 |
114 | def extract_file_title(file_url):
115 | try:
116 | pre, title = os.path.split(file_url)
117 | if "?" in title:
118 | return title.split("?")[0]
119 | else:
120 | return title
121 | except:
122 | return "unnamed.pdf"
123 |
124 |
125 | def load_pdf_from_url(file_url, vector_store):
126 | tmp_dir = tempfile.mkdtemp()
127 | try:
128 | file_title = extract_file_title(file_url)
129 | pdf_file_path = os.path.join(tmp_dir, file_title)
130 | request.urlretrieve(file_url, pdf_file_path)
131 | return load_pdf_from_file(pdf_file_path, vector_store), file_title
132 | except:
133 | return None, None
134 | finally:
135 | shutil.rmtree(tmp_dir)
136 |
--------------------------------------------------------------------------------
/api/api.py:
--------------------------------------------------------------------------------
1 | # from typing import List
2 |
3 | from fastapi import FastAPI, Depends
4 | from pydantic import BaseModel
5 |
6 | from utils.localCORS import permitReactLocalhostClient
7 | from db import set_db_session
8 | from ai import (
9 | get_embeddings,
10 | get_vectorstore,
11 | load_pdf_from_url,
12 | get_chat_model,
13 | get_flare_chain,
14 | get_llm,
15 | get_rag_index,
16 | )
17 | from users import (
18 | get_user_store,
19 | files_for_user,
20 | add_file_to_user,
21 | delete_file_from_user,
22 | )
23 |
24 | set_db_session()
25 | embeddings = get_embeddings()
26 | chatmodel = get_chat_model()
27 | llm = get_llm()
28 | user_store = get_user_store()
29 |
30 | class ListFileRequest(BaseModel):
31 | user_id: str
32 |
33 | class LoadPDFRequest(BaseModel):
34 | user_id: str
35 | file_url: str
36 |
37 | class QuestionRequest(BaseModel):
38 | user_id: str
39 | question_id: str
40 | question: str
41 |
42 | class RemovePDFRequest(BaseModel):
43 | user_id: str
44 | file_name: str
45 |
46 | # app
47 |
48 | app = FastAPI()
49 | permitReactLocalhostClient(app)
50 | _ = get_vectorstore(embeddings)
51 |
52 |
53 |
54 | @app.post('/list_files')
55 | def list_files(payload: ListFileRequest):
56 | return files_for_user(user_store, payload.user_id)
57 |
58 |
59 | @app.post('/load_pdf_url')
60 | def load_pdf_url(payload: LoadPDFRequest):
61 | try:
62 | vectorstore_u = get_vectorstore(embeddings, user_id=payload.user_id)
63 | n_rows, file_name = load_pdf_from_url(payload.file_url, vectorstore_u)
64 | if n_rows is not None:
65 | add_file_to_user(user_store, payload.user_id, file_name, payload.file_url)
66 | return {
67 | "success": True,
68 | "n_rows": n_rows,
69 | }
70 | else:
71 | return {
72 | "success": False,
73 | }
74 | except Exception:
75 | return {
76 | "success": False,
77 | }
78 |
79 |
80 | @app.post('/remove_pdf')
81 | def remove_pdf(payload: RemovePDFRequest):
82 | try:
83 | vectorstore_u = get_vectorstore(embeddings, user_id=payload.user_id)
84 | num_deleted = vectorstore_u.vector_table.find_and_delete_entries(metadata={"source": payload.file_name})
85 | delete_file_from_user(user_store, payload.user_id, payload.file_name)
86 | return {
87 | "success": True,
88 | "num_deleted": num_deleted,
89 | }
90 | except Exception:
91 | return {
92 | "success": False,
93 | "num_deleted": None,
94 | }
95 |
96 |
97 | @app.post('/flare_ask')
98 | def flare_ask(payload: QuestionRequest):
99 | try:
100 | vectorstore_u = get_vectorstore(embeddings, user_id=payload.user_id)
101 | flarechain_u = get_flare_chain(chatmodel, vectorstore_u)
102 | result = flarechain_u.run(payload.question)
103 | return {
104 | "question_id": payload.question_id,
105 | "success": True,
106 | "answer": result,
107 | }
108 | except Exception as e:
109 | return {
110 | "question_id": payload.question_id,
111 | "success": False,
112 | "error": str(e),
113 | }
114 |
115 |
116 | @app.post('/rag_ask')
117 | def llm_ask(payload: QuestionRequest):
118 | try:
119 | rag_index = get_rag_index(embeddings, user_id=payload.user_id)
120 | result = rag_index.query(payload.question, llm=llm).strip()
121 | return {
122 | "question_id": payload.question_id,
123 | "success": True,
124 | "answer": result,
125 | }
126 | except Exception as e:
127 | return {
128 | "question_id": payload.question_id,
129 | "success": False,
130 | "error": str(e),
131 | }
132 |
133 |
134 | @app.post('/llm_ask')
135 | def llm_ask(payload: QuestionRequest):
136 | try:
137 | result = llm(payload.question).strip()
138 | return {
139 | "question_id": payload.question_id,
140 | "success": True,
141 | "answer": result,
142 | }
143 | except Exception as e:
144 | return {
145 | "question_id": payload.question_id,
146 | "success": False,
147 | "error": str(e),
148 | }
149 |
--------------------------------------------------------------------------------
/images/open_in_gitpod.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------