├── .env.example ├── .eslintrc.json ├── .gitignore ├── .prettierrc ├── README.md ├── components ├── layout.tsx └── ui │ ├── LoadingDots.tsx │ ├── TextArea.tsx │ └── accordion.tsx ├── config └── pinecone.ts ├── docs └── MorseVsFrederick.pdf ├── next.config.js ├── package.json ├── pages ├── _app.tsx ├── _document.tsx ├── api │ └── chat.ts └── index.tsx ├── pnpm-lock.yaml ├── postcss.config.cjs ├── public ├── bot-image.png ├── favicon.ico └── usericon.png ├── scripts └── ingest-data.ts ├── styles ├── Home.module.css ├── base.css ├── chrome-bug.css └── loading-dots.module.css ├── tailwind.config.cjs ├── tsconfig.json ├── types └── chat.ts ├── utils ├── cn.ts ├── makechain.ts ├── openai-client.ts └── pinecone-client.ts └── visual-guide └── gpt-langchain-pdf.png /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | 3 | # Update these with your Supabase details from your project settings > API 4 | PINECONE_API_KEY= 5 | PINECONE_ENVIRONMENT= 6 | 7 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | .pnpm-debug.log* 27 | 28 | # local env files 29 | .env*.local 30 | .env 31 | 32 | # vercel 33 | .vercel 34 | 35 | # typescript 36 | *.tsbuildinfo 37 | next-env.d.ts 38 | 39 | #Notion_db 40 | /Notion_DB 41 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "all", 3 | "singleQuote": true, 4 | "printWidth": 80, 5 | "tabWidth": 2 6 | } 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT-4 & LangChain - Create a ChatGPT Chatbot for Your PDF Docs 2 | 3 | Use the new GPT-4 api to build a chatGPT chatbot for Large PDF docs (56 pages used in this example). 4 | 5 | Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your PDF in text to later retrieve similar docs. 6 | 7 | [Tutorial video](https://www.youtube.com/watch?v=ih9PBGVVOO4) 8 | 9 | [Get in touch via twitter if you have questions](https://twitter.com/mayowaoshin) 10 | 11 | The visual guide of this repo and tutorial is in the `visual guide` folder. 12 | 13 | **If you run into errors, please review the troubleshooting section further down this page.** 14 | 15 | ## Development 16 | 17 | 1. Clone the repo 18 | 19 | ``` 20 | git clone [github https url] 21 | ``` 22 | 23 | 2. Install packages 24 | 25 | ``` 26 | pnpm install 27 | ``` 28 | 29 | 3. Set up your `.env` file 30 | 31 | - Copy `.env.example` into `.env` 32 | Your `.env` file should look like this: 33 | 34 | ``` 35 | OPENAI_API_KEY= 36 | 37 | PINECONE_API_KEY= 38 | PINECONE_ENVIRONMENT= 39 | 40 | ``` 41 | 42 | - Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file. 43 | - Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys. 44 | 45 | 4. In the `config` folder, replace the `PINECONE_INDEX_NAME` and `PINECONE_NAME_SPACE` with your own details from your pinecone dashboard. 46 | 47 | 5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to a different api model if you don't have access to `gpt-4`. See [the OpenAI docs](https://platform.openai.com/docs/models/model-endpoint-compatibility) for a list of supported `modelName`s. For example you could use `gpt-3.5-turbo` if you do not have access to `gpt-4`, yet. 48 | 49 | ## Convert your PDF to embeddings 50 | 51 | 1. In `docs` folder replace the pdf with your own pdf doc. 52 | 53 | 2. In `scripts/ingest-data.ts` replace `filePath` with `docs/{yourdocname}.pdf` 54 | 55 | 3. Run the script `pnpm run ingest` to 'ingest' and embed your docs 56 | 57 | 4. Check Pinecone dashboard to verify your namespace and vectors have been added. 58 | 59 | ## Run the app 60 | 61 | Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `pnpm run dev` to launch the local dev environment and then type a question in the chat interface. 62 | 63 | ## Troubleshooting 64 | 65 | In general, keep an eye out in the `issues` and `discussions` section of this repo for solutions. 66 | 67 | **General errors** 68 | 69 | - Make sure you're running the latest Node version. Run `node -v` 70 | - Make sure you're using the same versions of LangChain and Pinecone as this repo. 71 | - Check that you've created an `.env` file that contains your valid (and working) API keys. 72 | - If you change `modelName` in `OpenAIChat` note that the correct name of the alternative model is `gpt-3.5-turbo` 73 | - Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter. 74 | 75 | **Pinecone errors** 76 | 77 | - Make sure your pinecone dashboard `environment` and `index` matches the one in your `config` folder. 78 | - Check that you've set the vector dimensions to `1536`. 79 | - Switch your Environment in pinecone to `us-east1-gcp` if the other environment is causing issues. 80 | 81 | If you're stuck after trying all these steps, delete `node_modules`, restart your computer, then `pnpm install` again. 82 | 83 | ## Credit 84 | 85 | Frontend of this repo is inspired by [langchain-chat-nextjs](https://github.com/zahidkhawaja/langchain-chat-nextjs) 86 | -------------------------------------------------------------------------------- /components/layout.tsx: -------------------------------------------------------------------------------- 1 | interface LayoutProps { 2 | children?: React.ReactNode; 3 | } 4 | 5 | export default function Layout({ children }: LayoutProps) { 6 | return ( 7 |
8 |
9 |
10 | 15 |
16 |
17 |
18 |
19 | {children} 20 |
21 |
22 |
23 | ); 24 | } 25 | -------------------------------------------------------------------------------- /components/ui/LoadingDots.tsx: -------------------------------------------------------------------------------- 1 | import styles from '@/styles/loading-dots.module.css'; 2 | 3 | const LoadingDots = ({ 4 | color = '#000', 5 | style = 'small', 6 | }: { 7 | color: string; 8 | style: string; 9 | }) => { 10 | return ( 11 | 12 | 13 | 14 | 15 | 16 | ); 17 | }; 18 | 19 | export default LoadingDots; 20 | 21 | LoadingDots.defaultProps = { 22 | style: 'small', 23 | }; 24 | -------------------------------------------------------------------------------- /components/ui/TextArea.tsx: -------------------------------------------------------------------------------- 1 | import * as React from 'react'; 2 | import { cn } from '@/utils/cn'; 3 | 4 | export interface TextareaProps 5 | extends React.TextareaHTMLAttributes {} 6 | 7 | const Textarea = React.forwardRef( 8 | ({ className, ...props }, ref) => { 9 | return ( 10 |