├── .env.example ├── .eslintrc.json ├── .gitignore ├── .prettierrc ├── README.md ├── components ├── layout.tsx └── ui │ ├── LoadingDots.tsx │ ├── TextArea.tsx │ └── accordion.tsx ├── config └── chroma.ts ├── declarations └── pdf-parse.d.ts ├── next.config.js ├── package.json ├── pages ├── _app.tsx ├── _document.tsx ├── api │ └── chat.ts └── index.tsx ├── postcss.config.cjs ├── public ├── bot-image.png ├── favicon.ico └── usericon.png ├── scripts └── ingest-data.ts ├── styles ├── Home.module.css ├── base.css ├── chrome-bug.css └── loading-dots.module.css ├── tailwind.config.cjs ├── tsconfig.json ├── types └── chat.ts ├── utils ├── cn.ts ├── customPDFLoader.ts └── makechain.ts ├── visual-guide └── gpt-langchain-pdf.png └── yarn.lock /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | COLLECTION_NAME= 3 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | .pnpm-debug.log* 27 | 28 | # local env files 29 | .env*.local 30 | .env 31 | 32 | # vercel 33 | .vercel 34 | 35 | # typescript 36 | *.tsbuildinfo 37 | next-env.d.ts 38 | 39 | #Notion_db 40 | /Notion_DB 41 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "all", 3 | "singleQuote": true, 4 | "printWidth": 80, 5 | "tabWidth": 2 6 | } 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Files 2 | 3 | Use the new GPT-4 api to build a chatGPT chatbot for multiple Large PDF files. 4 | 5 | Tech stack used includes LangChain, Chroma, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Chroma is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. 6 | 7 | [Tutorial video](https://www.youtube.com/watch?v=ih9PBGVVOO4) 8 | 9 | [Join the discord if you have questions](https://discord.gg/E4Mc77qwjm) 10 | 11 | The visual guide of this repo and tutorial is in the `visual guide` folder. 12 | 13 | **If you run into errors, please review the troubleshooting section further down this page.** 14 | 15 | Prelude: Please make sure you have already downloaded node on your system and the version is 18 or greater. 16 | 17 | ## Development 18 | 19 | 1. Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) for your platform. 20 | 21 | 2. Clone the repo or download the ZIP 22 | 23 | ``` 24 | git clone [github https url] 25 | ``` 26 | 27 | 3. Install packages 28 | 29 | First run `npm install yarn -g` to install yarn globally (if you haven't already). 30 | 31 | Then run: 32 | 33 | ``` 34 | yarn install 35 | ``` 36 | 37 | After installation, you should now see a `node_modules` folder. 38 | 39 | 4. Set up your `.env` file 40 | 41 | - Copy `.env.example` into `.env` 42 | Your `.env` file should look like this: 43 | 44 | ``` 45 | OPENAI_API_KEY= 46 | COLLECTION_NAME= 47 | 48 | ``` 49 | 50 | - Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. 51 | - Choose a collection name where you'd like to store your embeddings in Chroma. This collection will later be used for queries and retrieval. 52 | 53 | 5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAI` to `gpt-4`, if you have access to `gpt-4` api. Please verify outside this repo that you have access to `gpt-4` api, otherwise the application will not work. 54 | 55 | 6. In a new terminal window, run Chroma in the Docker container: 56 | 57 | ``` 58 | docker run -p 8000:8000 ghcr.io/chroma-core/chroma:0.3.21 59 | ``` 60 | 61 | ## Convert your PDF files to embeddings 62 | 63 | **This repo can load multiple PDF files** 64 | 65 | 1. Inside `docs` folder, add your pdf files or folders that contain pdf files. 66 | 67 | 2. Run the script `npm run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below. 68 | 69 | 3. Check Pinecone dashboard to verify your namespace and vectors have been added. 70 | 71 | ## Run the app 72 | 73 | Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `npm run dev` to launch the local dev environment, and then type a question in the chat interface. 74 | 75 | ## Troubleshooting 76 | 77 | In general, keep an eye out in the `issues` and `discussions` section of this repo for solutions. 78 | 79 | **General errors** 80 | 81 | - Make sure you're running the latest Node version. Run `node -v` 82 | - Try a different PDF or convert your PDF to text first. It's possible your PDF is corrupted, scanned, or requires OCR to convert to text. 83 | - `Console.log` the `env` variables and make sure they are exposed. 84 | - Make sure you're using the same versions of LangChain and Pinecone as this repo. 85 | - Check that you've created an `.env` file that contains your valid (and working) API keys, environment and index name. 86 | - If you change `modelName` in `OpenAI`, make sure you have access to the api for the appropriate model. 87 | - Make sure you have enough OpenAI credits and a valid card on your billings account. 88 | - Check that you don't have multiple OPENAPI keys in your global environment. If you do, the local `env` file from the project will be overwritten by systems `env` variable. 89 | - Try to hard code your API keys into the `process.env` variables if there are still issues. 90 | 91 | ## Credit 92 | 93 | Frontend of this repo is inspired by [langchain-chat-nextjs](https://github.com/zahidkhawaja/langchain-chat-nextjs) 94 | -------------------------------------------------------------------------------- /components/layout.tsx: -------------------------------------------------------------------------------- 1 | interface LayoutProps { 2 | children?: React.ReactNode; 3 | } 4 | 5 | export default function Layout({ children }: LayoutProps) { 6 | return ( 7 |
8 |
9 |
10 | 15 |
16 |
17 |
18 |
19 | {children} 20 |
21 |
22 |
23 | ); 24 | } 25 | -------------------------------------------------------------------------------- /components/ui/LoadingDots.tsx: -------------------------------------------------------------------------------- 1 | import styles from '@/styles/loading-dots.module.css'; 2 | 3 | const LoadingDots = ({ 4 | color = '#000', 5 | style = 'small', 6 | }: { 7 | color: string; 8 | style: string; 9 | }) => { 10 | return ( 11 | 12 | 13 | 14 | 15 | 16 | ); 17 | }; 18 | 19 | export default LoadingDots; 20 | 21 | LoadingDots.defaultProps = { 22 | style: 'small', 23 | }; 24 | -------------------------------------------------------------------------------- /components/ui/TextArea.tsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react'; 2 | import { cn } from '@/utils/cn'; 3 | 4 | export interface TextareaProps 5 | extends React.TextareaHTMLAttributes {} 6 | 7 | const Textarea = React.forwardRef( 8 | ({ className, ...props }, ref) => { 9 | return ( 10 |