├── docs └── chatPDF.png ├── tests ├── test.pdf ├── util.test.js ├── chroma.test.js └── pinecone.test.js ├── .gitignore ├── src ├── lib │ ├── helper.js │ ├── util.js │ ├── chromaHelper.js │ └── pineconeHelper.js ├── views │ ├── docs.ejs │ └── home.ejs ├── style.css └── index.js ├── package.json ├── .github └── workflows │ └── node.js.yml ├── README.md └── LICENSE /docs/chatPDF.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walicar/chatPDF/HEAD/docs/chatPDF.png -------------------------------------------------------------------------------- /tests/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/walicar/chatPDF/HEAD/tests/test.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .env 3 | .DS_Store 4 | uploads 5 | nodemon.json 6 | servicename -------------------------------------------------------------------------------- /src/lib/helper.js: -------------------------------------------------------------------------------- 1 | export class Helper { 2 | async createDocument(texts, name) {} 3 | async deleteDocument(name) {} 4 | async queryDocument(query) {} 5 | async getDocuments() {} 6 | async useDocument(name) {} 7 | } 8 | -------------------------------------------------------------------------------- /tests/util.test.js: -------------------------------------------------------------------------------- 1 | import { util } from "../src/lib/util.js"; 2 | // all tests are run in root dir 3 | 4 | /* 5 | test("getIndices does not throw", async () => { 6 | expect(async () => {await util.getIndices()}).not.toThrow(); 7 | }) 8 | */ 9 | 10 | test("getTexts does not fail", async () => { 11 | const result = await util.processTexts("tests/test.pdf"); 12 | expect(result.length).toBe(7); 13 | }); 14 | 15 | test("merge works correctly", () => { 16 | const a = ["apple", "orange"]; 17 | const b = ["banana", "apple"]; 18 | const result = util.merge(a, b); 19 | expect(result).toEqual(["apple", "orange", "banana"]); 20 | }); 21 | -------------------------------------------------------------------------------- /tests/chroma.test.js: -------------------------------------------------------------------------------- 1 | import { ChromaHelper } from "../src/lib/chromaHelper"; 2 | import { expect } from "@jest/globals"; 3 | import { util } from "../src/lib/util.js"; 4 | 5 | test.skip("create embeddings with collection", async () => { 6 | const helper = new ChromaHelper(); 7 | const name = "exist"; 8 | const texts = await util.getTexts("./tests/test.pdf"); 9 | await helper.deleteDocument(name); 10 | const store = await helper.createDocument(texts, name); 11 | const result = await helper.queryDocument("What is OpenAI?"); 12 | console.log(result); 13 | expect(result).toContain("OpenAI"); 14 | await helper.deleteDocument(name); 15 | }, 15000); 16 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chatpdf", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "src/index.js", 6 | "type": "module", 7 | "scripts": { 8 | "start": "node src/index.js", 9 | "dev": "npx nodemon", 10 | "test": "NODE_OPTIONS=--experimental-vm-modules npx jest" 11 | }, 12 | "keywords": [], 13 | "author": "", 14 | "license": "ISC", 15 | "dependencies": { 16 | "@pinecone-database/pinecone": "^0.1.6", 17 | "body-parser": "^1.20.2", 18 | "chromadb": "^1.5.2", 19 | "dotenv": "^16.0.3", 20 | "ejs": "^3.1.9", 21 | "express": "^4.18.2", 22 | "langchain": "^0.0.90", 23 | "multer": "^1.4.5-lts.1", 24 | "pdf-parse": "^1.1.1", 25 | "pdfjs-dist": "^3.4.120" 26 | }, 27 | "devDependencies": { 28 | "@jest/globals": "^29.5.0", 29 | "jest": "^29.5.0", 30 | "nodemon": "^2.0.22", 31 | "prettier": "^2.8.8" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /.github/workflows/node.js.yml: -------------------------------------------------------------------------------- 1 | # This workflow will do a clean installation of node dependencies, cache/restore them, build the source code and run tests across different versions of node 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-nodejs 3 | 4 | name: Node.js CI 5 | 6 | on: 7 | push: 8 | branches: ["main"] 9 | pull_request: 10 | branches: ["main"] 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | node-version: [18.x] 19 | # See supported Node.js release schedule at https://nodejs.org/en/about/releases/ 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Use Node.js ${{ matrix.node-version }} 24 | uses: actions/setup-node@v3 25 | with: 26 | node-version: ${{ matrix.node-version }} 27 | cache: "npm" 28 | - run: npm ci 29 | - run: npm run build --if-present 30 | - run: npm test 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chatPDF 2 | 3 | Chat with a PDF through a Web UI using OpenAI, Pinecone, Chroma, and LangChain 4 | 5 | ![docs/chatPDF.png](docs/chatPDF.png) 6 | 7 | ## Requirements 8 | 9 | - OpenAI account 10 | - Pinecone account, or locally [run Chroma](https://docs.trychroma.com/api-reference#run-the-backend) a faster alternative. 11 | 12 | ## Setup 13 | 14 | 1. run `npm i` 15 | 2. create `.env` file and add the following: 16 | 17 | ``` 18 | OPEN_API_KEY= 19 | PINECONE_API_KEY= 20 | PINECONE_API_ENV= 21 | ``` 22 | 23 | 3. Run the server by calling `npm start` 24 | 25 | ## Usage 26 | 27 | Upload a PDF by clicking the "Manage Documents" button. 28 | 29 | To talk with the PDF that you uploaded, go to the home page and select your PDF from the document dropdown box. 30 | 31 | If you want to switch from Pinecone to Chroma, make sure you have Chroma running, then select Chroma from the service dropdown box. 32 | 33 | ## Roadmap 34 | 35 | GOAL: locally hosted chat bot to interface with PDF 36 | 37 | - BLOCKED: Integrate with GPT4All TS bindings 38 | - as of 5/22/23 official bindings haven't been made yet 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 William Alicar Jr 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/pinecone.test.js: -------------------------------------------------------------------------------- 1 | import { PineconeHelper } from "../src/lib/pineconeHelper.js"; 2 | import { describe, expect, jest } from "@jest/globals"; 3 | import { util } from "../src/lib/util.js"; 4 | 5 | test.skip("getIndices", async () => { 6 | const helper = new PineconeHelper(); 7 | const indices = await helper.getIndices(); 8 | expect(indices).toEqual(["world"]); 9 | }, 10000); 10 | 11 | test.skip("describeIndex", async () => { 12 | const helper = new PineconeHelper(); 13 | const result = await helper.describeIndex("world"); 14 | expect(result.database.name).toEqual("world"); 15 | }, 10000); 16 | 17 | test.skip("describeIndex that doesn't exist", async () => { 18 | const helper = new PineconeHelper(); 19 | const desc = await helper.describeIndex("NOTEXIST"); 20 | console.log(desc.message); 21 | expect(desc.messag).toContain("Error"); 22 | }); 23 | 24 | test.skip("createDocument", async () => { 25 | // will need to be mocked to be tested 26 | const name = "varia"; 27 | const helper = new PineconeHelper(); 28 | const texts = await util.getTexts("./tests/test.pdf"); 29 | const store = await helper.createDocument(texts, name); 30 | }); 31 | -------------------------------------------------------------------------------- /src/lib/util.js: -------------------------------------------------------------------------------- 1 | import * as dotenv from "dotenv"; 2 | dotenv.config(); 3 | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; 4 | import { PDFLoader } from "langchain/document_loaders/fs/pdf"; 5 | import fs from "fs/promises"; 6 | 7 | export async function processTexts(path) { 8 | try { 9 | const loader = new PDFLoader(path); 10 | const doc = await loader.load(); 11 | const splitter = new RecursiveCharacterTextSplitter({ 12 | chunkSize: 1000, 13 | chunkOverlap: 20, 14 | }); 15 | const docs = await splitter.splitDocuments(doc); 16 | return docs.map((d) => d.pageContent); 17 | } catch (e) { 18 | throw e; 19 | } 20 | } 21 | 22 | async function getTexts(file) { 23 | try { 24 | const data = await fs.readFile(file.path); 25 | await fs.writeFile(`uploads/${file.originalname}`, data); 26 | console.log(`File uploaded: ${file.originalname}`); 27 | await fs.unlink(file.path); 28 | return await processTexts(`./uploads/${file.originalname}`); 29 | } catch (e) { 30 | throw e; 31 | } 32 | } 33 | 34 | export function makeMessage(color, name, content) { 35 | const message = { 36 | color: color, 37 | name: name, 38 | content: content, 39 | }; 40 | return message; 41 | } 42 | 43 | export function merge(a, b) { 44 | // merge a and b with no dupes 45 | const map = new Map(); 46 | let res = a.concat(b.filter((item) => a.indexOf(item) < 0)); 47 | return res; 48 | } 49 | 50 | export function updateList(list, item) { 51 | list.splice(list.indexOf(item), 1); 52 | list.unshift(item); 53 | } 54 | 55 | export async function mockPromisePass() { 56 | const promise = new Promise((resolve, _reject) => { 57 | setTimeout(() => { 58 | resolve("mockPromise resolved in 1s"); 59 | }, 1000); 60 | }); 61 | return promise; 62 | } 63 | 64 | export async function mockPromiseFail() { 65 | const promise = new Promise((_resolve, reject) => { 66 | setTimeout(() => { 67 | reject("mockPromise resolved in 1s"); 68 | }, 1000); 69 | }); 70 | return promise; 71 | } 72 | 73 | const util = { 74 | getTexts, 75 | processTexts, 76 | makeMessage, 77 | merge, 78 | updateList, 79 | mockPromisePass, 80 | mockPromiseFail, 81 | }; 82 | 83 | export { util }; 84 | -------------------------------------------------------------------------------- /src/views/docs.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | chatPDF 6 | 7 | 8 | 9 | 10 | 11 |
12 |
13 | 14 | 15 |
16 |
17 |
18 | <% if (error && error.length>0) {%> 19 |

20 | <%= error %> 21 |

22 | <% } %> 23 |
24 |
25 |

Create Document

26 |
27 | 28 |
29 |
32 | 33 |
34 |
35 |
36 |

Delete Document

37 | <% if (documents && documents.length> 1) { %> 38 |
39 | 40 | 47 | 48 |
49 |
50 | 51 |
52 | <% } else { %> 53 |
54 | 55 |
56 | <% } %> 57 |
58 |
59 |
60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /src/lib/chromaHelper.js: -------------------------------------------------------------------------------- 1 | import * as dotenv from "dotenv"; 2 | dotenv.config(); 3 | import { Chroma } from "langchain/vectorstores/chroma"; 4 | import { OpenAIEmbeddings } from "langchain/embeddings/openai"; 5 | import { ChromaClient, OpenAIEmbeddingFunction } from "chromadb"; 6 | import { OpenAI } from "langchain/llms/openai"; 7 | import { loadQAStuffChain } from "langchain/chains"; 8 | import { Helper } from "./helper.js"; 9 | export class ChromaHelper extends Helper { 10 | constructor() { 11 | super(); 12 | this.client = new ChromaClient(); 13 | } 14 | 15 | async createDocument(texts, name) { 16 | const embeddings = new OpenAIEmbeddings({ 17 | openAIApiKey: process.env.OPENAI_API_KEY, 18 | }); 19 | const metadatas = [{}]; 20 | try { 21 | const embedder = new OpenAIEmbeddingFunction({ 22 | openai_api_key: process.env.OPENAI_API_KEY, 23 | }); 24 | await this.client.createCollection({ name, embeddingFunction: embedder }); 25 | const store = await Chroma.fromTexts(texts, metadatas, embeddings, { 26 | collectionName: name, 27 | }); 28 | this.store = store; 29 | return store; 30 | } catch (e) { 31 | throw e; 32 | } 33 | } 34 | 35 | async deleteDocument(name) { 36 | try { 37 | const res = await this.client.deleteCollection({ name }); 38 | return res; 39 | } catch (e) { 40 | throw e; 41 | } 42 | } 43 | 44 | async queryDocument(query) { 45 | if (this.store) { 46 | try { 47 | const docs = await this.store.similaritySearch(query, 1); 48 | const llm = new OpenAI({ 49 | openAIApiKey: process.env.OPENAI_API_KEY, 50 | temperature: 0.3, 51 | }); 52 | const chain = loadQAStuffChain(llm); 53 | const answer = await chain.call({ 54 | input_documents: docs, 55 | question: query, 56 | }); 57 | return answer.text; 58 | } catch (e) { 59 | throw e; 60 | } 61 | } else { 62 | return Error("Store does not exist"); 63 | } 64 | } 65 | 66 | async getDocuments() { 67 | try { 68 | const list = await this.client.listCollections(); 69 | const res = list.map((item) => item.name); 70 | console.log(res); 71 | return res; 72 | } catch (e) { 73 | throw e; 74 | } 75 | } 76 | 77 | async useDocument(name) { 78 | try { 79 | const embeddings = new OpenAIEmbeddings({ 80 | openAIApiKey: process.env.OPENAI_API_KEY, 81 | }); 82 | this.store = await Chroma.fromExistingCollection(embeddings, { 83 | collectionName: name, 84 | }); 85 | } catch (e) { 86 | throw e; 87 | } 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/style.css: -------------------------------------------------------------------------------- 1 | @import url("https://fonts.googleapis.com/css2?family=Source+Sans+Pro&display=swap"); 2 | 3 | :root { 4 | font-size: 18px; 5 | --border-theme: 2.5px solid black; 6 | font-family: "Source Sans Pro", sans-serif; 7 | --shadow-theme: 0 4px 8px 0 rgba(0, 0, 0, 0.2), 8 | 0 6px 20px 0 rgba(0, 0, 0, 0.19); 9 | } 10 | 11 | textarea { 12 | font-family: "Source Sans Pro", sans-serif; 13 | } 14 | 15 | input, 16 | form, 17 | label { 18 | font-size: 18px; 19 | } 20 | 21 | input[type="file"] { 22 | white-space: nowrap; 23 | overflow: hidden; 24 | width: 325px; 25 | } 26 | 27 | input[type="file"]::file-selector-button { 28 | background-color: rgba(51, 51, 51, 0.09); 29 | border-radius: 8px; 30 | padding: 6px; 31 | border: none; 32 | font-size: 15px; 33 | cursor: pointer; 34 | margin-left: 7px; 35 | margin-bottom: 15px; 36 | } 37 | 38 | textarea { 39 | border-radius: 8px; 40 | border: var(--border-theme); 41 | padding: 2px; 42 | } 43 | 44 | button, 45 | select { 46 | font-size: 15px; 47 | background-color: rgba(51, 51, 51, 0.09); 48 | border-radius: 8px; 49 | padding: 6px; 50 | border: none rgba(55, 55, 55, 0.5); 51 | cursor: pointer; 52 | } 53 | 54 | .container { 55 | margin: 0% auto; 56 | height: calc(100vh - 22px); 57 | width: 80%; 58 | display: grid; 59 | grid-template-areas: 60 | "top-bar top-bar top-bar" 61 | "chat-box chat-box chat-box" 62 | "query-box query-box query-box"; 63 | grid-template-rows: 46px 1fr 110px; 64 | border: var(--border-theme); 65 | border-radius: 21px; 66 | box-shadow: var(--shadow-theme); 67 | } 68 | 69 | #top-bar { 70 | border-bottom: var(--border-theme); 71 | grid-area: top-bar; 72 | position: relative; 73 | display: flex; 74 | flex-direction: row; 75 | } 76 | 77 | #top-bar > * { 78 | padding-left: 10px; 79 | padding-right: 10px; 80 | } 81 | 82 | #top-bar > .top-bar-item { 83 | padding-top: 6.5px; 84 | /* border-left: var(--border-theme); */ 85 | } 86 | 87 | .logo { 88 | display: inline; 89 | font-size: 2em; 90 | font-weight: bold; 91 | flex-grow: 3; 92 | } 93 | 94 | #chat-box { 95 | padding: 0px 10px; 96 | grid-area: chat-box; 97 | overflow: scroll; 98 | } 99 | 100 | #query-box { 101 | border-top: var(--border-theme); 102 | padding-top: 10px; 103 | grid-area: query-box; 104 | } 105 | 106 | #query-box form { 107 | padding: 0px 10px; 108 | display: flex; 109 | gap: 10px; 110 | flex-direction: row; 111 | height: 90px; 112 | } 113 | 114 | #query-input { 115 | flex-grow: 4; 116 | resize: none; 117 | padding: 10px; 118 | } 119 | 120 | .user-color { 121 | color: white; 122 | background: blue; 123 | border-radius: 3px; 124 | padding: 1px; 125 | } 126 | 127 | .chat-color { 128 | color: white; 129 | background: red; 130 | border-radius: 3px; 131 | padding: 1px; 132 | } 133 | 134 | .index-form input { 135 | margin-bottom: 10px; 136 | } 137 | -------------------------------------------------------------------------------- /src/views/home.ejs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | chatPDF 6 | 7 | 9 | 29 | 30 | 31 | 32 |
33 |
34 | 35 |
36 | 37 | 44 |
45 | <% if (documents && documents.length> 1) { %> 46 |
47 | 48 | 55 |
56 |
57 | 58 |
59 | <% } else { %> 60 |
61 | 62 |
63 | <% } %> 64 | 65 |
66 |
67 | <% messages.forEach((message, index)=> {%> 68 | <% if (index === messages.length - 1) {%> 69 |

70 | [<%= message.name %>] <%= message.content %> 71 |

72 | <%} else {%> 73 |

74 | [<%= message.name %>] <%= message.content %> 75 |

76 | <% } %> 77 | <% }) %> 78 |
79 |
80 |
81 | 82 | 83 |
84 |
85 |
86 | 87 | 88 | -------------------------------------------------------------------------------- /src/lib/pineconeHelper.js: -------------------------------------------------------------------------------- 1 | import * as dotenv from "dotenv"; 2 | dotenv.config(); 3 | import { PineconeStore } from "langchain/vectorstores/pinecone"; 4 | import { PineconeClient } from "@pinecone-database/pinecone"; 5 | import { OpenAIEmbeddings } from "langchain/embeddings/openai"; 6 | import { OpenAI } from "langchain/llms/openai"; 7 | import { loadQAStuffChain } from "langchain/chains"; 8 | import { Helper } from "./helper.js"; 9 | 10 | // pinecone dimensions 1536 11 | 12 | export class PineconeHelper extends Helper { 13 | constructor() { 14 | super(); 15 | } 16 | 17 | async init() { 18 | const client = new PineconeClient(); 19 | try { 20 | await client.init({ 21 | apiKey: process.env.PINECONE_API_KEY, 22 | environment: process.env.PINECONE_API_ENV, 23 | }); 24 | } catch (e) { 25 | throw e; 26 | } 27 | return client; 28 | } 29 | 30 | async createDocument(texts, name) { 31 | try { 32 | const desc = await this.describeIndex(name); 33 | if (desc instanceof Error) { 34 | await this.createIndex(name); 35 | const result = await this.poll(name); 36 | if (result) { 37 | await this.createEmbeddings(texts, name); 38 | console.log("Document uploaded!"); 39 | } else { 40 | throw new Error("Timed out uploading document"); 41 | } 42 | } else { 43 | const store = await this.useDocument(name); 44 | this.store = store; 45 | } 46 | } catch (e) { 47 | throw e; 48 | } 49 | } 50 | 51 | async createEmbeddings(texts, indexName) { 52 | await new Promise((r) => setTimeout(r, 5000)); 53 | const fields = { openAIApiKey: process.env.OPENAI_API_KEY }; 54 | const embeddings = new OpenAIEmbeddings(fields); 55 | const client = await this.init(); 56 | const index = client.Index(indexName); 57 | const metadatas = [{}]; 58 | const dbConfig = { pineconeIndex: index }; 59 | try { 60 | const store = await PineconeStore.fromTexts( 61 | texts, 62 | metadatas, 63 | embeddings, 64 | dbConfig 65 | ); 66 | this.store = store; 67 | return store; 68 | } catch (e) { 69 | console.log(e); 70 | } 71 | } 72 | 73 | async queryDocument(query) { 74 | try { 75 | const docs = await this.store.similaritySearch(query); 76 | const llm = new OpenAI({ 77 | openAIApiKey: process.env.OPENAI_API_KEY, 78 | temperature: 0.5, 79 | }); 80 | const chain = loadQAStuffChain(llm); 81 | const answer = await chain.call({ 82 | input_documents: docs, 83 | question: query, 84 | }); 85 | return answer.text; 86 | } catch (e) { 87 | throw e; 88 | } 89 | } 90 | 91 | async createIndex(name) { 92 | const client = await this.init(); 93 | const createRequest = { 94 | name, 95 | dimension: 1536, 96 | metric: "cosine", 97 | podType: "p1", 98 | }; 99 | try { 100 | await client.createIndex({ createRequest }); 101 | } catch (e) { 102 | throw e; 103 | } 104 | } 105 | 106 | async getDocuments() { 107 | const client = await this.init(); 108 | try { 109 | const list = await client.listIndexes(); 110 | return list; 111 | } catch (e) { 112 | throw e; 113 | } 114 | } 115 | 116 | async useDocument(name) { 117 | const fields = { openaiapikey: process.env.openai_api_key }; 118 | const embeddings = new OpenAIEmbeddings(fields); 119 | const client = await this.init(); 120 | const index = client.Index(name); 121 | const dbconfig = { pineconeIndex: index }; 122 | try { 123 | const store = await PineconeStore.fromExistingIndex(embeddings, dbconfig); 124 | this.store = store; 125 | return store; 126 | } catch (e) { 127 | throw e; 128 | } 129 | } 130 | 131 | async describeIndex(name) { 132 | const client = await this.init(); 133 | try { 134 | const result = await client.describeIndex({ indexName: name }); 135 | return result; 136 | } catch (e) { 137 | return e; 138 | } 139 | } 140 | 141 | async deleteDocument(name) { 142 | const client = await this.init(); 143 | try { 144 | await client.deleteIndex({ indexName: name }); 145 | } catch (e) { 146 | throw e; 147 | } 148 | } 149 | 150 | async poll(name) { 151 | const time = 15000; 152 | const limit = 20; 153 | const client = await this.init(); 154 | let tries = 0; 155 | while (tries < limit) { 156 | try { 157 | let desc = await client.describeIndex({ indexName: name }); 158 | if (desc.status.ready) { 159 | return true; 160 | } 161 | } catch (e) { 162 | throw e; 163 | } 164 | tries++; 165 | console.log("Waiting..."); 166 | await new Promise((r) => setTimeout(r, time)); 167 | } 168 | return false; 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | import express from "express"; 2 | import fs from "fs"; 3 | import multer from "multer"; 4 | import bodyParser from "body-parser"; 5 | import path from "path"; 6 | import { fileURLToPath, parse } from "url"; 7 | import { util } from "./lib/util.js"; 8 | import { PineconeHelper } from "./lib/pineconeHelper.js"; 9 | import { ChromaHelper } from "./lib/chromaHelper.js"; 10 | import { URL } from 'url'; 11 | 12 | const services = { 13 | pinecone: () => new PineconeHelper(), 14 | chroma: () => new ChromaHelper(), 15 | }; 16 | 17 | const app = express(); 18 | const upload = multer({ dest: "./uploads/" }); 19 | let state = { 20 | service: { 21 | name: "pinecone", 22 | names: ["pinecone", "chroma"], 23 | helper: undefined, 24 | }, 25 | error: undefined, // used for routes that are not "/home" 26 | document: undefined, 27 | documents: ["none"], 28 | messages: [ 29 | { 30 | color: "chat-color", 31 | name: "ChatPDF", 32 | content: "Welcome to chatPDF, select a document and ask me a question!", 33 | }, 34 | ], 35 | }; 36 | loadService(); 37 | 38 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); 39 | app.set("views", path.join(__dirname, "views")); 40 | app.set("view engine", "ejs"); 41 | app.use(bodyParser.urlencoded({ extended: true })); 42 | app.use(express.static(__dirname)); 43 | 44 | app.post("/query", async (req, res) => { 45 | if (state.service.helper.store && req.body.query) { 46 | const content = req.body.query; 47 | const queryMessage = util.makeMessage("user-color", "User", content); 48 | state.messages.push(queryMessage); 49 | try { 50 | const response = await state.service.helper.queryDocument(req.body.query); 51 | const answerMessage = util.makeMessage("chat-color", "ChatPDF", response); 52 | state.messages.push(answerMessage); 53 | console.log("Query Fulfilled"); 54 | } catch (e) { 55 | pushError(e.message); 56 | } 57 | } else { 58 | pushError("No document selected, or empty query"); 59 | } 60 | res.redirect("/home"); 61 | }); 62 | 63 | app.post("/getDocuments", async (req, res) => { 64 | const redirectURL = new URL(req.get("Referer")).pathname; 65 | try { 66 | const res = await state.service.helper.getDocuments(); 67 | state.documents = util.merge(state.documents, res); 68 | } catch (e) { 69 | if (redirectURL == "/home") { 70 | pushError(e.message); 71 | } else { 72 | state.error = e.message; 73 | } 74 | } 75 | res.redirect(redirectURL); 76 | }); 77 | 78 | app.post("/setDocument", async (req, res) => { 79 | const redirectURL = new URL(req.get("Referer")).pathname; 80 | if (req.body.document == "none") { 81 | state.document = req.body.document; 82 | state.service.helper.store = undefined; 83 | util.updateList(state.documents, state.document); 84 | } else { 85 | try { 86 | state.document = req.body.document; 87 | await state.service.helper.useDocument(state.document); 88 | util.updateList(state.documents, state.document); 89 | } catch (e) { 90 | if (redirectURL == "/home") { 91 | pushError(e.message); 92 | } else { 93 | state.error = e.message; 94 | } 95 | } 96 | } 97 | res.redirect(redirectURL); 98 | }); 99 | 100 | app.post("/deleteDocument", async (req, res) => { 101 | if (state.document == req.body.document) { 102 | state.document = undefined; 103 | state.service.helper.store = undefined; 104 | } 105 | await state.service.helper.deleteDocument(req.body.document); 106 | state.documents.splice(state.documents.indexOf(req.body.document), 1); 107 | res.redirect("/docs"); 108 | }); 109 | 110 | app.post("/createDocument", upload.single("doc"), async (req, res) => { 111 | const file = req.file; 112 | try { 113 | const text = await util.getTexts(file); 114 | const docname = req.body.docname; 115 | await state.service.helper.createDocument(text, docname); 116 | } catch (e) { 117 | state.error = e.message; 118 | } 119 | res.redirect("/docs"); 120 | }); 121 | 122 | app.post("/setService", (req, res) => { 123 | state.service.name = req.body.servicename; 124 | util.updateList(state.service.names, state.service.name); 125 | saveService(); 126 | state.service.helper = getService(state.service.name); 127 | state.documents = ["none"]; 128 | state.messages = [ 129 | { 130 | color: "chat-color", 131 | name: "ChatPDF", 132 | content: "Welcome to chatPDF, select a document and ask me a question!", 133 | }, 134 | ]; 135 | res.redirect("/home"); 136 | }); 137 | 138 | app.get("/", (_req, res) => { 139 | res.redirect("/home"); 140 | }); 141 | 142 | app.get("/home", (req, res) => { 143 | eraseError("/home", req.get("Referer")); 144 | res.render("home", state); 145 | }); 146 | 147 | app.get("/docs", (req, res) => { 148 | eraseError("/docs", req.get("Referer")); 149 | res.render("docs", state); 150 | }); 151 | 152 | app.listen(3000, () => { 153 | console.log("Visit chatPDF on http://localhost:3000/"); 154 | }); 155 | 156 | function pushError(msg) { 157 | const errorMessage = util.makeMessage("chat-color", "ChatPDF", msg); 158 | state.messages.push(errorMessage); 159 | } 160 | 161 | function getService(name) { 162 | const factory = services[name]; 163 | if (factory) { 164 | return factory(); 165 | } else { 166 | return null; 167 | } 168 | } 169 | 170 | function saveService() { 171 | fs.writeFileSync("servicename", state.service.name); 172 | } 173 | 174 | function loadService() { 175 | if (fs.existsSync("servicename")) { 176 | state.service.name = fs.readFileSync("servicename", "utf-8"); 177 | state.service.helper = getService(state.service.name); 178 | util.updateList(state.service.names, state.service.name); 179 | } else { 180 | state.service.name = "pinecone"; 181 | state.service.helper = getService("pinecone"); 182 | saveService(); 183 | } 184 | } 185 | 186 | function eraseError(curUrl, prevUrl) { 187 | // erase error when we've come from a different path 188 | if (prevUrl && new URL(prevUrl).pathname != curUrl) { 189 | state.error = undefined; 190 | } 191 | } --------------------------------------------------------------------------------