├── .env.example ├── .gitignore ├── .prettierrc ├── LICENSE ├── README.md ├── app ├── api │ ├── chat │ │ └── route.ts │ ├── ingestPdf │ │ └── route.ts │ └── utils │ │ ├── embeddings │ │ └── index.ts │ │ └── vector_store │ │ ├── index.ts │ │ ├── mongo.ts │ │ └── pinecone.ts ├── dashboard │ ├── dashboard-client.tsx │ ├── layout.tsx │ └── page.tsx ├── document │ └── [id] │ │ ├── document-client.tsx │ │ ├── layout.tsx │ │ └── page.tsx ├── layout.tsx ├── page.tsx ├── sign-in │ └── [[...sign-in]] │ │ └── page.tsx └── sign-up │ └── [[...sign-up]] │ └── page.tsx ├── components ├── home │ ├── Footer.tsx │ ├── Header.tsx │ ├── Hero.tsx │ ├── HowItWorks.tsx │ └── ProudlyOpenSource.tsx └── ui │ ├── DocIcon.tsx │ ├── Header.tsx │ ├── LoadingDots.tsx │ ├── Logo.tsx │ ├── TextArea.tsx │ └── Toggle.tsx ├── middleware.ts ├── next.config.js ├── package-lock.json ├── package.json ├── postcss.config.js ├── prisma └── schema.prisma ├── public ├── align-justify.svg ├── bot-icon.png ├── chat.png ├── custom-chat-bg.png ├── favicon.ico ├── github.png ├── logo.png ├── og-image.png ├── pen.png ├── profile-icon.png ├── right-arrow.svg ├── upload.png ├── user.svg └── usericon.png ├── styles ├── globals.css └── loading-dots.module.css ├── tailwind.config.js ├── tsconfig.json └── utils ├── chatType.ts ├── cn.ts ├── config.ts ├── prisma.ts └── ragChain.ts /.env.example: -------------------------------------------------------------------------------- 1 | TOGETHER_AI_API_KEY= 2 | NEXT_PUBLIC_BYTESCALE_API_KEY= 3 | 4 | # The vector store you'd like to use. 5 | # Can be one of "pinecone" or "mongodb" 6 | # Defaults to "pinecone" 7 | NEXT_PUBLIC_VECTORSTORE="pinecone" 8 | 9 | # Update these with your pinecone details from your dashboard. 10 | # Not required if `NEXT_PUBLIC_VECTORSTORE` is set to "mongodb" 11 | PINECONE_API_KEY= 12 | PINECONE_INDEX_NAME= # PINECONE_INDEX_NAME is in the indexes tab under "index name" in blue 13 | 14 | # Update these with your MongoDB Atlas details from your dashboard. 15 | # Not required if `NEXT_PUBLIC_VECTORSTORE` is set to "pinecone" 16 | MONGODB_ATLAS_URI= 17 | MONGODB_ATLAS_DB_NAME= 18 | MONGODB_ATLAS_COLLECTION_NAME= 19 | MONGODB_ATLAS_INDEX_NAME= 20 | 21 | CLERK_SECRET_KEY= 22 | NEXT_PUBLIC_CLERK_PUBLISHABLE_KEY= 23 | NEXT_PUBLIC_CLERK_SIGN_IN_URL=/sign-in 24 | NEXT_PUBLIC_CLERK_SIGN_UP_URL=/sign-up 25 | 26 | POSTGRES_URL= 27 | POSTGRES_URL_NON_POOLING= 28 | POSTGRES_PRISMA_URL= 29 | 30 | # Enable tracing via smith.langchain.com 31 | # LANGCHAIN_TRACING_V2=true 32 | # LANGCHAIN_API_KEY= 33 | # LANGCHAIN_SESSION=pdftochat 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | .pnpm-debug.log* 27 | 28 | # local env files 29 | .env*.local 30 | .env 31 | 32 | # vercel 33 | .vercel 34 | 35 | # typescript 36 | *.tsbuildinfo 37 | next-env.d.ts 38 | 39 | #Notion_db 40 | /Notion_DB 41 | 42 | .yarn/ -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "all", 3 | "singleQuote": true, 4 | "printWidth": 80, 5 | "tabWidth": 2 6 | } 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License Copyright (c) 2023 Hassan El Mghari 2 | 3 | Permission is hereby granted, free of 4 | charge, to any person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, copy, modify, merge, 7 | publish, distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to the 9 | following conditions: 10 | 11 | The above copyright notice and this permission notice 12 | (including the next paragraph) shall be included in all copies or substantial 13 | portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO 18 | EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | PDFToChat – Chat with your PDFs in seconds. 3 |

PDFToChat

4 |
5 | 6 |

7 | Chat with your PDFs in seconds. Powered by Together AI and Pinecone. 8 |

9 | 10 |

11 | Tech Stack · 12 | Deploy Your Own · 13 | Common Errors 14 | · 15 | Credits 16 | · 17 | Future Tasks 18 |

19 |
20 | 21 | ## Tech Stack 22 | 23 | - Next.js [App Router](https://nextjs.org/docs/app) for the framework 24 | - Mixtral through [Together AI](https://togetherai.link) inference for the LLM 25 | - M2 Bert 80M through [Together AI](https://togetherai.link) for embeddings 26 | - [LangChain.js](https://js.langchain.com/docs/get_started/introduction/) for the RAG code 27 | - [MongoDB Atlas](https://www.mongodb.com/atlas/database) for the vector database 28 | - [Bytescale](https://www.bytescale.com/) for the PDF storage 29 | - [Vercel](https://vercel.com/) for hosting and for the postgres DB 30 | - [Clerk](https://clerk.dev/) for user authentication 31 | - [Tailwind CSS](https://tailwindcss.com/) for styling 32 | 33 | ## Deploy Your Own 34 | 35 | You can deploy this template to Vercel or any other host. Note that you'll need to: 36 | 37 | - Set up [Together.ai](https://togetherai.link) 38 | - Set up a [MongoDB Atlas](https://www.mongodb.com/atlas/database) Atlas database with 768 dimensions 39 | - See instructions below for MongoDB 40 | - Set up [Bytescale](https://www.bytescale.com/) 41 | - Set up [Clerk](https://clerk.dev/) 42 | - Set up [Vercel](https://vercel.com/) 43 | - (Optional) Set up [LangSmith](https://smith.langchain.com/) for tracing. 44 | 45 | See the .example.env for a list of all the required environment variables. 46 | 47 | You will also need to prepare your database schema by running `npx prisma db push`. 48 | 49 | ### MongoDB Atlas 50 | 51 | To set up a [MongoDB Atlas](https://www.mongodb.com/atlas/database) database as the backing vectorstore, you will need to perform the following steps: 52 | 53 | 1. Sign up on their website, then create a database cluster. Find it under the `Database` sidebar tab. 54 | 2. Create a **collection** by switching to `Collections` the tab and creating a blank collection. 55 | 3. Create an **index** by switching to the `Atlas Search` tab and clicking `Create Search Index`. 56 | 4. Make sure you select `Atlas Vector Search - JSON Editor`, select the appropriate database and collection, and paste the following into the textbox: 57 | 58 | ```json 59 | { 60 | "fields": [ 61 | { 62 | "numDimensions": 768, 63 | "path": "embedding", 64 | "similarity": "euclidean", 65 | "type": "vector" 66 | }, 67 | { 68 | "path": "docstore_document_id", 69 | "type": "filter" 70 | } 71 | ] 72 | } 73 | ``` 74 | 75 | Note that the `numDimensions` is 768 dimensions to match the embeddings model we're using, and that we have another index on `docstore_document_id`. This allows us to filter later. 76 | 77 | You may call the index whatever you wish, just make a note of it! 78 | 79 | 5. Finally, retrieve and set the following environment variables: 80 | 81 | ```ini 82 | NEXT_PUBLIC_VECTORSTORE=mongodb # Set MongoDB Atlas as your vectorstore 83 | 84 | MONGODB_ATLAS_URI= # Connection string for your database. 85 | MONGODB_ATLAS_DB_NAME= # The name of your database. 86 | MONGODB_ATLAS_COLLECTION_NAME= # The name of your collection. 87 | MONGODB_ATLAS_INDEX_NAME= # The name of the index you just created. 88 | ``` 89 | 90 | ## Common errors 91 | 92 | - Check that you've created an `.env` file that contains your valid (and working) API keys, environment and index name. 93 | - Check that you've set the vector dimensions to `768` and that `index` matched your specified field in the `.env variable`. 94 | - Check that you've added a credit card on Together AI if you're hitting rate limiting issues due to the free tier 95 | 96 | ## Credits 97 | 98 | - [Youssef](https://twitter.com/YoussefUiUx) for the design of the app 99 | - [Mayo](https://twitter.com/mayowaoshin) for the original RAG repo and inspiration 100 | - [Jacob](https://twitter.com/Hacubu) for the LangChain help 101 | - Together AI, Bytescale, Pinecone, and Clerk for sponsoring 102 | 103 | ## Future tasks 104 | 105 | These are some future tasks that I have planned. Contributions are welcome! 106 | 107 | - [ ] Add a trash icon for folks to delete PDFs from the dashboard and implement delete functionality 108 | - [ ] Try different embedding models like UAE-large-v1 to see if it improves accuracy 109 | - [ ] Explore best practices for auto scrolling based on other chat apps like chatGPT 110 | - [ ] Do some prompt engineering for Mixtral to make replies as good as possible 111 | - [ ] Protect API routes by making sure users are signed in before executing chats 112 | - [ ] Run an initial benchmark on how accurate chunking / retrieval are 113 | - [ ] Research best practices for chunking and retrieval and play around with them – ideally run benchmarks 114 | - [ ] Try out Langsmith for more observability into how the RAG app runs 115 | - [ ] Add demo video to the homepage to demonstrate functionality more easily 116 | - [ ] Upgrade to Next.js 14 and fix any issues with that 117 | - [ ] Implement sources like perplexity to be clickable with more info 118 | - [ ] Add analytics to track the number of chats & errors 119 | - [ ] Make some changes to the default tailwind `prose` to decrease padding 120 | - [ ] Add an initial message with sample questions or just add them as bubbles on the page 121 | - [ ] Add an option to get answers as markdown or in regular paragraphs 122 | - [ ] Implement something like SWR to automatically revalidate data 123 | - [ ] Save chats for each user to get back to later in the postgres DB 124 | - [ ] Bring up a message to direct folks to compress PDFs if they're beyond 10MB 125 | - [ ] Use a self-designed custom uploader 126 | - [ ] Use a session tracking tool to better understand how folks are using the site 127 | - [ ] Add better error handling overall with appropriate toasts when actions fail 128 | - [ ] Add support for images in PDFs with something like [Nougat](https://replicate.com/meta/nougat) 129 | -------------------------------------------------------------------------------- /app/api/chat/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import type { Message as VercelChatMessage } from 'ai'; 3 | import { createRAGChain } from '@/utils/ragChain'; 4 | 5 | import type { Document } from '@langchain/core/documents'; 6 | import { HumanMessage, AIMessage, ChatMessage } from '@langchain/core/messages'; 7 | import { ChatTogetherAI } from '@langchain/community/chat_models/togetherai'; 8 | import { type MongoClient } from 'mongodb'; 9 | import { loadRetriever } from '../utils/vector_store'; 10 | import { loadEmbeddingsModel } from '../utils/embeddings'; 11 | 12 | export const runtime = 13 | process.env.NEXT_PUBLIC_VECTORSTORE === 'mongodb' ? 'nodejs' : 'edge'; 14 | 15 | const formatVercelMessages = (message: VercelChatMessage) => { 16 | if (message.role === 'user') { 17 | return new HumanMessage(message.content); 18 | } else if (message.role === 'assistant') { 19 | return new AIMessage(message.content); 20 | } else { 21 | console.warn( 22 | `Unknown message type passed: "${message.role}". Falling back to generic message type.`, 23 | ); 24 | return new ChatMessage({ content: message.content, role: message.role }); 25 | } 26 | }; 27 | 28 | /** 29 | * This handler initializes and calls a retrieval chain. It composes the chain using 30 | * LangChain Expression Language. See the docs for more information: 31 | * 32 | * https://js.langchain.com/docs/get_started/quickstart 33 | * https://js.langchain.com/docs/guides/expression_language/cookbook#conversational-retrieval-chain 34 | */ 35 | export async function POST(req: NextRequest) { 36 | let mongoDbClient: MongoClient | undefined; 37 | 38 | try { 39 | const body = await req.json(); 40 | const messages = body.messages ?? []; 41 | if (!messages.length) { 42 | throw new Error('No messages provided.'); 43 | } 44 | const formattedPreviousMessages = messages 45 | .slice(0, -1) 46 | .map(formatVercelMessages); 47 | const currentMessageContent = messages[messages.length - 1].content; 48 | const chatId = body.chatId; 49 | 50 | const model = new ChatTogetherAI({ 51 | modelName: 'mistralai/Mixtral-8x7B-Instruct-v0.1', 52 | temperature: 0, 53 | }); 54 | 55 | const embeddings = loadEmbeddingsModel(); 56 | 57 | let resolveWithDocuments: (value: Document[]) => void; 58 | const documentPromise = new Promise((resolve) => { 59 | resolveWithDocuments = resolve; 60 | }); 61 | 62 | const retrieverInfo = await loadRetriever({ 63 | chatId, 64 | embeddings, 65 | callbacks: [ 66 | { 67 | handleRetrieverEnd(documents) { 68 | // Extract retrieved source documents so that they can be displayed as sources 69 | // on the frontend. 70 | resolveWithDocuments(documents); 71 | }, 72 | }, 73 | ], 74 | }); 75 | 76 | const retriever = retrieverInfo.retriever; 77 | mongoDbClient = retrieverInfo.mongoDbClient; 78 | 79 | const ragChain = await createRAGChain(model, retriever); 80 | 81 | const stream = await ragChain.stream({ 82 | input: currentMessageContent, 83 | chat_history: formattedPreviousMessages, 84 | }); 85 | 86 | const documents = await documentPromise; 87 | const serializedSources = Buffer.from( 88 | JSON.stringify( 89 | documents.map((doc) => { 90 | return { 91 | pageContent: doc.pageContent.slice(0, 50) + '...', 92 | metadata: doc.metadata, 93 | }; 94 | }), 95 | ), 96 | ).toString('base64'); 97 | 98 | // Convert to bytes so that we can pass into the HTTP response 99 | const byteStream = stream.pipeThrough(new TextEncoderStream()); 100 | 101 | return new Response(byteStream, { 102 | headers: { 103 | 'x-message-index': (formattedPreviousMessages.length + 1).toString(), 104 | 'x-sources': serializedSources, 105 | }, 106 | }); 107 | } catch (e: any) { 108 | return NextResponse.json({ error: e.message }, { status: 500 }); 109 | } finally { 110 | if (mongoDbClient) { 111 | await mongoDbClient.close(); 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /app/api/ingestPdf/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter'; 3 | import { PDFLoader } from 'langchain/document_loaders/fs/pdf'; 4 | import prisma from '@/utils/prisma'; 5 | import { getAuth } from '@clerk/nextjs/server'; 6 | import { loadEmbeddingsModel } from '../utils/embeddings'; 7 | import { loadVectorStore } from '../utils/vector_store'; 8 | import { type MongoClient } from 'mongodb'; 9 | 10 | export async function POST(request: Request) { 11 | let mongoDbClient: MongoClient | null = null; 12 | 13 | const { fileUrl, fileName, vectorStoreId } = await request.json(); 14 | 15 | const { userId } = getAuth(request as any); 16 | 17 | if (!userId) { 18 | return NextResponse.json({ error: 'You must be logged in to ingest data' }); 19 | } 20 | 21 | const docAmount = await prisma.document.count({ 22 | where: { 23 | userId, 24 | }, 25 | }); 26 | 27 | if (docAmount > 3) { 28 | return NextResponse.json({ 29 | error: 'You have reached the maximum number of documents', 30 | }); 31 | } 32 | 33 | const doc = await prisma.document.create({ 34 | data: { 35 | fileName, 36 | fileUrl, 37 | userId, 38 | }, 39 | }); 40 | 41 | const namespace = doc.id; 42 | 43 | try { 44 | /* load from remote pdf URL */ 45 | const response = await fetch(fileUrl); 46 | const buffer = await response.blob(); 47 | const loader = new PDFLoader(buffer); 48 | const rawDocs = await loader.load(); 49 | 50 | /* Split text into chunks */ 51 | const textSplitter = new RecursiveCharacterTextSplitter({ 52 | chunkSize: 1000, 53 | chunkOverlap: 200, 54 | }); 55 | const splitDocs = await textSplitter.splitDocuments(rawDocs); 56 | // Necessary for Mongo - we'll query on this later. 57 | for (const splitDoc of splitDocs) { 58 | splitDoc.metadata.docstore_document_id = namespace; 59 | } 60 | 61 | console.log('creating vector store...'); 62 | 63 | /* create and store the embeddings in the vectorStore */ 64 | const embeddings = loadEmbeddingsModel(); 65 | 66 | const store = await loadVectorStore({ 67 | namespace: doc.id, 68 | embeddings, 69 | }); 70 | const vectorstore = store.vectorstore; 71 | if ('mongoDbClient' in store) { 72 | mongoDbClient = store.mongoDbClient; 73 | } 74 | 75 | // embed the PDF documents 76 | await vectorstore.addDocuments(splitDocs); 77 | } catch (error) { 78 | console.log('error', error); 79 | return NextResponse.json({ error: 'Failed to ingest your data' }); 80 | } finally { 81 | if (mongoDbClient) { 82 | await mongoDbClient.close(); 83 | } 84 | } 85 | 86 | return NextResponse.json({ 87 | text: 'Successfully embedded pdf', 88 | id: namespace, 89 | }); 90 | } 91 | -------------------------------------------------------------------------------- /app/api/utils/embeddings/index.ts: -------------------------------------------------------------------------------- 1 | import { TogetherAIEmbeddings } from '@langchain/community/embeddings/togetherai'; 2 | 3 | export function loadEmbeddingsModel() { 4 | return new TogetherAIEmbeddings({ 5 | apiKey: process.env.TOGETHER_AI_API_KEY, 6 | modelName: 'togethercomputer/m2-bert-80M-8k-retrieval', 7 | }); 8 | } 9 | -------------------------------------------------------------------------------- /app/api/utils/vector_store/index.ts: -------------------------------------------------------------------------------- 1 | import { Embeddings } from '@langchain/core/embeddings'; 2 | import { loadPineconeStore } from './pinecone'; 3 | import { loadMongoDBStore } from './mongo'; 4 | import { Callbacks } from '@langchain/core/callbacks/manager'; 5 | 6 | export async function loadVectorStore({ 7 | namespace, 8 | embeddings, 9 | }: { 10 | namespace: string; 11 | embeddings: Embeddings; 12 | }) { 13 | const vectorStoreEnv = process.env.NEXT_PUBLIC_VECTORSTORE ?? 'pinecone'; 14 | 15 | if (vectorStoreEnv === 'pinecone') { 16 | return await loadPineconeStore({ 17 | namespace, 18 | embeddings, 19 | }); 20 | } else if (vectorStoreEnv === 'mongodb') { 21 | return await loadMongoDBStore({ 22 | embeddings, 23 | }); 24 | } else { 25 | throw new Error(`Invalid vector store id provided: ${vectorStoreEnv}`); 26 | } 27 | } 28 | 29 | export async function loadRetriever({ 30 | embeddings, 31 | chatId, 32 | callbacks, 33 | }: { 34 | // namespace: string; 35 | embeddings: Embeddings; 36 | chatId: string; 37 | callbacks?: Callbacks; 38 | }) { 39 | let mongoDbClient; 40 | const store = await loadVectorStore({ 41 | namespace: chatId, 42 | embeddings, 43 | }); 44 | const vectorstore = store.vectorstore; 45 | if ('mongoDbClient' in store) { 46 | mongoDbClient = store.mongoDbClient; 47 | } 48 | // For Mongo, we will use metadata filtering to separate documents. 49 | // For Pinecone, we will use namespaces, so no filter is necessary. 50 | const filter = 51 | process.env.NEXT_PUBLIC_VECTORSTORE === 'mongodb' 52 | ? { 53 | preFilter: { 54 | docstore_document_id: { 55 | $eq: chatId, 56 | }, 57 | }, 58 | } 59 | : undefined; 60 | const retriever = vectorstore.asRetriever({ 61 | filter, 62 | callbacks, 63 | }); 64 | return { 65 | retriever, 66 | mongoDbClient, 67 | }; 68 | } 69 | -------------------------------------------------------------------------------- /app/api/utils/vector_store/mongo.ts: -------------------------------------------------------------------------------- 1 | import { MongoClient } from 'mongodb'; 2 | import { MongoDBAtlasVectorSearch } from '@langchain/mongodb'; 3 | import { Embeddings } from '@langchain/core/embeddings'; 4 | 5 | export async function loadMongoDBStore({ 6 | embeddings, 7 | }: { 8 | embeddings: Embeddings; 9 | }) { 10 | const mongoDbClient = new MongoClient(process.env.MONGODB_ATLAS_URI ?? ''); 11 | 12 | await mongoDbClient.connect(); 13 | 14 | const dbName = process.env.MONGODB_ATLAS_DB_NAME ?? ''; 15 | const collectionName = process.env.MONGODB_ATLAS_COLLECTION_NAME ?? ''; 16 | const collection = mongoDbClient.db(dbName).collection(collectionName); 17 | 18 | const vectorstore = new MongoDBAtlasVectorSearch(embeddings, { 19 | indexName: process.env.MONGODB_ATLAS_INDEX_NAME ?? 'vector_index', 20 | collection, 21 | }); 22 | 23 | return { 24 | vectorstore, 25 | mongoDbClient, 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /app/api/utils/vector_store/pinecone.ts: -------------------------------------------------------------------------------- 1 | import { Embeddings } from '@langchain/core/embeddings'; 2 | import { Pinecone } from '@pinecone-database/pinecone'; 3 | import { PineconeStore } from '@langchain/pinecone'; 4 | 5 | export async function loadPineconeStore({ 6 | namespace, 7 | embeddings, 8 | }: { 9 | namespace: string; 10 | embeddings: Embeddings; 11 | }) { 12 | const pinecone = new Pinecone({ 13 | apiKey: process.env.PINECONE_API_KEY ?? '', 14 | }); 15 | 16 | const PINECONE_INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? ''; 17 | const index = pinecone.index(PINECONE_INDEX_NAME); 18 | 19 | const vectorstore = await PineconeStore.fromExistingIndex(embeddings, { 20 | pineconeIndex: index, 21 | namespace, 22 | textKey: 'text', 23 | }); 24 | 25 | return { 26 | vectorstore, 27 | }; 28 | } 29 | -------------------------------------------------------------------------------- /app/dashboard/dashboard-client.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { UploadDropzone } from 'react-uploader'; 4 | import { Uploader } from 'uploader'; 5 | import { useRouter } from 'next/navigation'; 6 | import DocIcon from '@/components/ui/DocIcon'; 7 | import { formatDistanceToNow } from 'date-fns'; 8 | import { useState } from 'react'; 9 | 10 | // Configuration for the uploader 11 | const uploader = Uploader({ 12 | apiKey: !!process.env.NEXT_PUBLIC_BYTESCALE_API_KEY 13 | ? process.env.NEXT_PUBLIC_BYTESCALE_API_KEY 14 | : 'no api key found', 15 | }); 16 | 17 | export default function DashboardClient({ docsList }: { docsList: any }) { 18 | const router = useRouter(); 19 | 20 | const [loading, setLoading] = useState(false); 21 | 22 | const options = { 23 | maxFileCount: 1, 24 | mimeTypes: ['application/pdf'], 25 | editor: { images: { crop: false } }, 26 | styles: { 27 | colors: { 28 | primary: '#000', // Primary buttons & links 29 | error: '#d23f4d', // Error messages 30 | }, 31 | }, 32 | onValidate: async (file: File): Promise => { 33 | return docsList.length > 3 34 | ? `You've reached your limit for PDFs.` 35 | : undefined; 36 | }, 37 | }; 38 | 39 | const UploadDropZone = () => ( 40 | { 44 | if (file.length !== 0) { 45 | setLoading(true); 46 | ingestPdf( 47 | file[0].fileUrl, 48 | file[0].originalFile.originalFileName || file[0].filePath, 49 | ); 50 | } 51 | }} 52 | width="470px" 53 | height="250px" 54 | /> 55 | ); 56 | 57 | async function ingestPdf(fileUrl: string, fileName: string) { 58 | let res = await fetch('/api/ingestPdf', { 59 | method: 'POST', 60 | headers: { 61 | 'Content-Type': 'application/json', 62 | }, 63 | body: JSON.stringify({ 64 | fileUrl, 65 | fileName, 66 | }), 67 | }); 68 | 69 | let data = await res.json(); 70 | router.push(`/document/${data.id}`); 71 | } 72 | 73 | return ( 74 |
75 |

76 | Chat With Your PDFs 77 |

78 | {docsList.length > 0 && ( 79 |
80 |
81 | {docsList.map((doc: any) => ( 82 |
86 | 93 | {formatDistanceToNow(doc.createdAt)} ago 94 |
95 | ))} 96 |
97 |
98 | )} 99 | {docsList.length > 0 ? ( 100 |

101 | Or upload a new PDF 102 |

103 | ) : ( 104 |

105 | No PDFs found. Upload a new PDF below! 106 |

107 | )} 108 |
109 | {loading ? ( 110 | 136 | ) : ( 137 | 138 | )} 139 |
140 |
141 | ); 142 | } 143 | -------------------------------------------------------------------------------- /app/dashboard/layout.tsx: -------------------------------------------------------------------------------- 1 | import Footer from '@/components/home/Footer'; 2 | import Header from '@/components/ui/Header'; 3 | 4 | export default function RootLayout({ 5 | children, 6 | }: { 7 | children: React.ReactNode; 8 | }) { 9 | return ( 10 |
11 |
12 |
{children}
13 |
14 |
15 |
16 |
17 | ); 18 | } 19 | -------------------------------------------------------------------------------- /app/dashboard/page.tsx: -------------------------------------------------------------------------------- 1 | import DashboardClient from './dashboard-client'; 2 | import prisma from '@/utils/prisma'; 3 | import { currentUser } from '@clerk/nextjs'; 4 | import type { User } from '@clerk/nextjs/api'; 5 | 6 | export default async function Page() { 7 | const user: User | null = await currentUser(); 8 | 9 | const docsList = await prisma.document.findMany({ 10 | where: { 11 | userId: user?.id, 12 | }, 13 | orderBy: { 14 | createdAt: 'desc', 15 | }, 16 | }); 17 | 18 | return ( 19 |
20 | 21 |
22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /app/document/[id]/document-client.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import { useRef, useState, useEffect } from 'react'; 4 | import Image from 'next/image'; 5 | import ReactMarkdown from 'react-markdown'; 6 | import LoadingDots from '@/components/ui/LoadingDots'; 7 | import { Viewer, Worker } from '@react-pdf-viewer/core'; 8 | import '@react-pdf-viewer/core/lib/styles/index.css'; 9 | import '@react-pdf-viewer/default-layout/lib/styles/index.css'; 10 | import type { 11 | ToolbarSlot, 12 | TransformToolbarSlot, 13 | } from '@react-pdf-viewer/toolbar'; 14 | import { toolbarPlugin } from '@react-pdf-viewer/toolbar'; 15 | import { pageNavigationPlugin } from '@react-pdf-viewer/page-navigation'; 16 | import { Document } from '@prisma/client'; 17 | import { useChat } from 'ai/react'; 18 | import Toggle from '@/components/ui/Toggle'; 19 | 20 | export default function DocumentClient({ 21 | currentDoc, 22 | userImage, 23 | }: { 24 | currentDoc: Document; 25 | userImage?: string; 26 | }) { 27 | const toolbarPluginInstance = toolbarPlugin(); 28 | const pageNavigationPluginInstance = pageNavigationPlugin(); 29 | const { renderDefaultToolbar, Toolbar } = toolbarPluginInstance; 30 | 31 | const transform: TransformToolbarSlot = (slot: ToolbarSlot) => ({ 32 | ...slot, 33 | Download: () => <>, 34 | SwitchTheme: () => <>, 35 | Open: () => <>, 36 | }); 37 | 38 | const chatId = currentDoc.id; 39 | const pdfUrl = currentDoc.fileUrl; 40 | 41 | const [sourcesForMessages, setSourcesForMessages] = useState< 42 | Record 43 | >({}); 44 | const [error, setError] = useState(''); 45 | const [chatOnlyView, setChatOnlyView] = useState(false); 46 | 47 | const { messages, input, handleInputChange, handleSubmit, isLoading } = 48 | useChat({ 49 | api: '/api/chat', 50 | body: { 51 | chatId, 52 | }, 53 | onResponse(response) { 54 | const sourcesHeader = response.headers.get('x-sources'); 55 | const sources = sourcesHeader ? JSON.parse(atob(sourcesHeader)) : []; 56 | 57 | const messageIndexHeader = response.headers.get('x-message-index'); 58 | if (sources.length && messageIndexHeader !== null) { 59 | setSourcesForMessages({ 60 | ...sourcesForMessages, 61 | [messageIndexHeader]: sources, 62 | }); 63 | } 64 | }, 65 | onError: (e) => { 66 | setError(e.message); 67 | }, 68 | onFinish() {}, 69 | }); 70 | 71 | const messageListRef = useRef(null); 72 | const textAreaRef = useRef(null); 73 | 74 | useEffect(() => { 75 | textAreaRef.current?.focus(); 76 | }, []); 77 | 78 | // Prevent empty chat submissions 79 | const handleEnter = (e: any) => { 80 | if (e.key === 'Enter' && messages) { 81 | handleSubmit(e); 82 | } else if (e.key == 'Enter') { 83 | e.preventDefault(); 84 | } 85 | }; 86 | 87 | let userProfilePic = userImage ? userImage : '/profile-icon.png'; 88 | 89 | const extractSourcePageNumber = (source: { 90 | metadata: Record; 91 | }) => { 92 | return source.metadata['loc.pageNumber'] ?? source.metadata.loc?.pageNumber; 93 | }; 94 | return ( 95 |
96 | 97 |
98 | {/* Left hand side */} 99 | 100 |
105 |
111 | {renderDefaultToolbar(transform)} 112 |
113 | 117 |
118 |
119 | {/* Right hand side */} 120 |
121 |
125 |
129 | {messages.length === 0 && ( 130 |
131 | Ask your first question below! 132 |
133 | )} 134 | {messages.map((message, index) => { 135 | const sources = sourcesForMessages[index] || undefined; 136 | const isLastMessage = 137 | !isLoading && index === messages.length - 1; 138 | const previousMessages = index !== messages.length - 1; 139 | return ( 140 |
141 |
150 |
151 | profile image 164 | 165 | {message.content} 166 | 167 |
168 | {/* Display the sources */} 169 | {(isLastMessage || previousMessages) && sources && ( 170 |
171 | {sources 172 | .filter((source: any, index: number, self: any) => { 173 | const pageNumber = 174 | extractSourcePageNumber(source); 175 | // Check if the current pageNumber is the first occurrence in the array 176 | return ( 177 | self.findIndex( 178 | (s: any) => 179 | extractSourcePageNumber(s) === pageNumber, 180 | ) === index 181 | ); 182 | }) 183 | .map((source: any) => ( 184 | 194 | ))} 195 |
196 | )} 197 |
198 |
199 | ); 200 | })} 201 |
202 |
203 |
204 |
handleSubmit(e)} 206 | className="relative w-full px-4 sm:pt-10 pt-2" 207 | > 208 |