├── .eslintrc.json
├── styles
├── base.css
├── chrome-bug.css
├── loading-dots.module.css
└── Home.module.css
├── public
├── favicon.ico
├── bot-image.png
└── usericon.png
├── .prettierrc
├── postcss.config.cjs
├── visual-guide
└── gpt-langchain-pdf.png
├── declarations
└── pdf-parse.d.ts
├── utils
├── cn.ts
├── openai-client.ts
├── pinecone-client.ts
└── makechain.ts
├── .env.example
├── types
└── chat.ts
├── tailwind.config.cjs
├── pages
├── _document.tsx
├── _app.tsx
├── api
│ └── chat.ts
└── index.tsx
├── next.config.js
├── config
└── pinecone.ts
├── components
├── ui
│ ├── LoadingDots.tsx
│ ├── TextArea.tsx
│ └── accordion.tsx
└── layout.tsx
├── .gitignore
├── tsconfig.json
├── package.json
├── scripts
└── ingest-data.ts
└── README.md
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "next/core-web-vitals"
3 | }
4 |
--------------------------------------------------------------------------------
/styles/base.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/misbahsy/chat-with-twitter-algorithm/HEAD/public/favicon.ico
--------------------------------------------------------------------------------
/public/bot-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/misbahsy/chat-with-twitter-algorithm/HEAD/public/bot-image.png
--------------------------------------------------------------------------------
/public/usericon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/misbahsy/chat-with-twitter-algorithm/HEAD/public/usericon.png
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "trailingComma": "all",
3 | "singleQuote": true,
4 | "printWidth": 80,
5 | "tabWidth": 2
6 | }
7 |
--------------------------------------------------------------------------------
/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/visual-guide/gpt-langchain-pdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/misbahsy/chat-with-twitter-algorithm/HEAD/visual-guide/gpt-langchain-pdf.png
--------------------------------------------------------------------------------
/declarations/pdf-parse.d.ts:
--------------------------------------------------------------------------------
1 | declare module 'pdf-parse/lib/pdf-parse.js' {
2 | import pdf from 'pdf-parse';
3 |
4 | export default pdf;
5 | }
6 |
--------------------------------------------------------------------------------
/utils/cn.ts:
--------------------------------------------------------------------------------
1 | import { ClassValue, clsx } from 'clsx';
2 | import { twMerge } from 'tailwind-merge';
3 |
4 | export function cn(...inputs: ClassValue[]) {
5 | return twMerge(clsx(inputs));
6 | }
7 |
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 |
3 | # Update these with your Supabase details from your project settings > API and dashboard settings
4 | PINECONE_API_KEY=
5 | PINECONE_ENVIRONMENT=
6 | PINECONE_INDEX_NAME=
7 |
--------------------------------------------------------------------------------
/types/chat.ts:
--------------------------------------------------------------------------------
1 | import { Document } from 'langchain/document';
2 |
3 | export type Message = {
4 | type: 'apiMessage' | 'userMessage';
5 | message: string;
6 | isStreaming?: boolean;
7 | sourceDocs?: Document[];
8 | };
9 |
--------------------------------------------------------------------------------
/utils/openai-client.ts:
--------------------------------------------------------------------------------
1 | import { OpenAI } from 'langchain/llms';
2 |
3 | if (!process.env.OPENAI_API_KEY) {
4 | throw new Error('Missing OpenAI Credentials');
5 | }
6 |
7 | export const openai = new OpenAI({
8 | temperature: 0,
9 | });
10 |
--------------------------------------------------------------------------------
/tailwind.config.cjs:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | module.exports = {
3 | content: [
4 | './app/**/*.{js,ts,jsx,tsx}',
5 | './pages/**/*.{js,ts,jsx,tsx}',
6 | './components/**/*.{js,ts,jsx,tsx}',
7 | ],
8 | theme: {
9 | extend: {},
10 | },
11 | };
12 |
--------------------------------------------------------------------------------
/pages/_document.tsx:
--------------------------------------------------------------------------------
1 | import { Html, Head, Main, NextScript } from "next/document";
2 |
3 | export default function Document() {
4 | return (
5 |
6 |
8 |
9 |
10 |
11 |
12 | );
13 | }
14 |
--------------------------------------------------------------------------------
/next.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {
3 | reactStrictMode: true,
4 | swcMinify: true,
5 | webpack(config) {
6 | config.experiments = { ...config.experiments, topLevelAwait: true };
7 | return config;
8 | },
9 | };
10 |
11 | export default nextConfig;
12 |
--------------------------------------------------------------------------------
/styles/chrome-bug.css:
--------------------------------------------------------------------------------
1 | /**
2 | * Chrome has a bug with transitions on load since 2012!
3 | *
4 | * To prevent a "pop" of content, you have to disable all transitions until
5 | * the page is done loading.
6 | *
7 | * https://lab.laukstein.com/bug/input
8 | * https://twitter.com/timer150/status/1345217126680899584
9 | */
10 | body.loading * {
11 | transition: none !important;
12 | }
13 |
--------------------------------------------------------------------------------
/config/pinecone.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Change the namespace to the namespace on Pinecone you'd like to store your embeddings.
3 | */
4 |
5 | if (!process.env.PINECONE_INDEX_NAME) {
6 | throw new Error('Missing Pinecone index name in .env file');
7 | }
8 |
9 | const PINECONE_INDEX_NAME = process.env.PINECONE_INDEX_NAME ?? '';
10 |
11 | const PINECONE_NAME_SPACE = 'the-algorithm'; //namespace is optional for your vectors
12 |
13 | export { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE };
14 |
--------------------------------------------------------------------------------
/pages/_app.tsx:
--------------------------------------------------------------------------------
1 | import '@/styles/base.css';
2 | import type { AppProps } from 'next/app';
3 | import { Inter } from 'next/font/google';
4 |
5 | const inter = Inter({
6 | variable: '--font-inter',
7 | subsets: ['latin'],
8 | });
9 |
10 | function MyApp({ Component, pageProps }: AppProps) {
11 | return (
12 | <>
13 |
14 |
15 |
16 | >
17 | );
18 | }
19 |
20 | export default MyApp;
21 |
--------------------------------------------------------------------------------
/components/ui/LoadingDots.tsx:
--------------------------------------------------------------------------------
1 | import styles from '@/styles/loading-dots.module.css';
2 |
3 | const LoadingDots = ({
4 | color = '#000',
5 | style = 'small',
6 | }: {
7 | color: string;
8 | style: string;
9 | }) => {
10 | return (
11 |
12 |
13 |
14 |
15 |
16 | );
17 | };
18 |
19 | export default LoadingDots;
20 |
21 | LoadingDots.defaultProps = {
22 | style: 'small',
23 | };
24 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 |
8 | # testing
9 | /coverage
10 |
11 | # next.js
12 | /.next/
13 | /out/
14 |
15 | # production
16 | /build
17 |
18 | # misc
19 | .DS_Store
20 | *.pem
21 |
22 | # debug
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 | .pnpm-debug.log*
27 |
28 | # local env files
29 | .env*.local
30 | .env
31 |
32 | # vercel
33 | .vercel
34 |
35 | # typescript
36 | *.tsbuildinfo
37 | next-env.d.ts
38 |
39 | #Notion_db
40 | /Notion_DB
41 |
42 | #replit
43 | replit.nix
44 | .replit
--------------------------------------------------------------------------------
/utils/pinecone-client.ts:
--------------------------------------------------------------------------------
1 | import { PineconeClient } from '@pinecone-database/pinecone';
2 |
3 | if (!process.env.PINECONE_ENVIRONMENT || !process.env.PINECONE_API_KEY) {
4 | throw new Error('Pinecone environment or api key vars missing');
5 | }
6 |
7 | async function initPinecone() {
8 | try {
9 | const pinecone = new PineconeClient();
10 |
11 | await pinecone.init({
12 | environment: process.env.PINECONE_ENVIRONMENT ?? '', //this is in the dashboard
13 | apiKey: process.env.PINECONE_API_KEY ?? '',
14 | });
15 |
16 | return pinecone;
17 | } catch (error) {
18 | console.log('error', error);
19 | throw new Error('Failed to initialize Pinecone Client');
20 | }
21 | }
22 |
23 | export const pinecone = await initPinecone();
24 |
--------------------------------------------------------------------------------
/components/layout.tsx:
--------------------------------------------------------------------------------
1 | interface LayoutProps {
2 | children?: React.ReactNode;
3 | }
4 |
5 | export default function Layout({ children }: LayoutProps) {
6 | return (
7 |
8 |
17 |
18 |
19 | {children}
20 |
21 |
22 |
23 | );
24 | }
25 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es2020",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "forceConsistentCasingInFileNames": true,
9 | "noEmit": true,
10 | "esModuleInterop": true,
11 | "module": "esnext",
12 | "moduleResolution": "node",
13 | "resolveJsonModule": true,
14 | "isolatedModules": true,
15 | "jsx": "preserve",
16 | "incremental": true,
17 | "baseUrl": ".",
18 | "plugins": [
19 | {
20 | "name": "next"
21 | }
22 | ],
23 | "paths": {
24 | "@/*": ["./*"]
25 | }
26 | },
27 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
28 | "exclude": ["node_modules"]
29 | }
30 |
--------------------------------------------------------------------------------
/components/ui/TextArea.tsx:
--------------------------------------------------------------------------------
1 | import * as React from 'react';
2 | import { cn } from '@/utils/cn';
3 |
4 | export interface TextareaProps
5 | extends React.TextareaHTMLAttributes {}
6 |
7 | const Textarea = React.forwardRef(
8 | ({ className, ...props }, ref) => {
9 | return (
10 |
18 | );
19 | },
20 | );
21 | Textarea.displayName = 'Textarea';
22 |
23 | export { Textarea };
24 |
--------------------------------------------------------------------------------
/styles/loading-dots.module.css:
--------------------------------------------------------------------------------
1 | .loading {
2 | display: inline-flex;
3 | align-items: center;
4 | }
5 |
6 | .loading .spacer {
7 | margin-right: 2px;
8 | }
9 |
10 | .loading span {
11 | animation-name: blink;
12 | animation-duration: 1.4s;
13 | animation-iteration-count: infinite;
14 | animation-fill-mode: both;
15 | width: 5px;
16 | height: 5px;
17 | border-radius: 50%;
18 | display: inline-block;
19 | margin: 0 1px;
20 | }
21 |
22 | .loading span:nth-of-type(2) {
23 | animation-delay: 0.2s;
24 | }
25 |
26 | .loading span:nth-of-type(3) {
27 | animation-delay: 0.4s;
28 | }
29 |
30 | .loading2 {
31 | display: inline-flex;
32 | align-items: center;
33 | }
34 |
35 | .loading2 .spacer {
36 | margin-right: 2px;
37 | }
38 |
39 | .loading2 span {
40 | animation-name: blink;
41 | animation-duration: 1.4s;
42 | animation-iteration-count: infinite;
43 | animation-fill-mode: both;
44 | width: 4px;
45 | height: 4px;
46 | border-radius: 50%;
47 | display: inline-block;
48 | margin: 0 1px;
49 | }
50 |
51 | .loading2 span:nth-of-type(2) {
52 | animation-delay: 0.2s;
53 | }
54 |
55 | .loading2 span:nth-of-type(3) {
56 | animation-delay: 0.4s;
57 | }
58 |
59 | @keyframes blink {
60 | 0% {
61 | opacity: 0.2;
62 | }
63 | 20% {
64 | opacity: 1;
65 | }
66 | 100% {
67 | opacity: 0.2;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "gpt4-langchain-pdf-chatbot",
3 | "version": "0.1.0",
4 | "private": true,
5 | "engines": {
6 | "node": ">=18"
7 | },
8 | "license": "MIT",
9 | "author": "Mayooear",
10 | "type": "module",
11 | "scripts": {
12 | "dev": "next dev",
13 | "build": "next build",
14 | "start": "next start",
15 | "type-check": "tsc --noEmit",
16 | "lint": "eslint --ignore-path .gitignore \"**/*.+(ts|js|tsx)\"",
17 | "format": "prettier --ignore-path .gitignore \"**/*.+(ts|js|tsx)\" --write",
18 | "ingest": "tsx -r dotenv/config scripts/ingest-data.ts"
19 | },
20 | "dependencies": {
21 | "@microsoft/fetch-event-source": "^2.0.1",
22 | "@pinecone-database/pinecone": "^0.0.10",
23 | "@radix-ui/react-accordion": "^1.1.1",
24 | "clsx": "^1.2.1",
25 | "dotenv": "^16.0.3",
26 | "langchain": "0.0.41",
27 | "lucide-react": "^0.125.0",
28 | "next": "13.2.3",
29 | "pdf-parse": "1.1.1",
30 | "pnpm": "^8.1.0",
31 | "react": "18.2.0",
32 | "react-dom": "18.2.0",
33 | "react-markdown": "^8.0.5",
34 | "tailwind-merge": "^1.10.0"
35 | },
36 | "devDependencies": {
37 | "@types/node": "^18.14.6",
38 | "@types/react": "^18.0.28",
39 | "@types/react-dom": "^18.0.11",
40 | "@typescript-eslint/parser": "^5.54.0",
41 | "autoprefixer": "^10.4.13",
42 | "eslint": "8.35.0",
43 | "eslint-config-next": "13.2.3",
44 | "postcss": "^8.4.21",
45 | "prettier": "^2.8.4",
46 | "tailwindcss": "^3.2.7",
47 | "tsx": "^3.12.3",
48 | "typescript": "^4.9.5"
49 | },
50 | "keywords": [
51 | "starter",
52 | "gpt4",
53 | "pinecone",
54 | "typescript",
55 | "nextjs",
56 | "langchain",
57 | "law",
58 | "legal",
59 | "pdf",
60 | "openai"
61 | ]
62 | }
63 |
--------------------------------------------------------------------------------
/scripts/ingest-data.ts:
--------------------------------------------------------------------------------
1 | import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
2 | import { GithubRepoLoader } from "langchain/document_loaders";
3 | import { OpenAIEmbeddings } from 'langchain/embeddings';
4 | import { PineconeStore } from 'langchain/vectorstores';
5 | import { pinecone } from '@/utils/pinecone-client';
6 | import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
7 | import { DirectoryLoader } from 'langchain/document_loaders';
8 |
9 | /* Name of directory to retrieve your files from */
10 | const filePath = 'docs';
11 |
12 | export const run = async () => {
13 | try {
14 |
15 | // load the markdown files of the algorithm repo by twitter
16 | const loader = new GithubRepoLoader(
17 | "https://github.com/misbahsy/the-algorithm-gpt",
18 | { branch: "main", recursive: true, unknown: "warn" }
19 | );
20 | const rawDocs = await loader.load();
21 |
22 | /* Split text into chunks */
23 | const textSplitter = new RecursiveCharacterTextSplitter({
24 | chunkSize: 3000,
25 | chunkOverlap: 200,
26 | });
27 |
28 | const docs = await textSplitter.splitDocuments(rawDocs);
29 | console.log('split docs', docs);
30 |
31 | console.log('creating vector store...');
32 | /*create and store the embeddings in the vectorStore*/
33 | const embeddings = new OpenAIEmbeddings();
34 | const index = pinecone.Index(PINECONE_INDEX_NAME); //change to your own index name
35 |
36 | //embed the repo files
37 | await PineconeStore.fromDocuments(docs, embeddings, {
38 | pineconeIndex: index,
39 | namespace: PINECONE_NAME_SPACE,
40 | textKey: 'text',
41 | });
42 | } catch (error) {
43 | console.log('error', error);
44 | throw new Error('Failed to ingest your data');
45 | }
46 | };
47 |
48 | (async () => {
49 | await run();
50 | console.log('ingestion complete');
51 | })();
52 |
--------------------------------------------------------------------------------
/pages/api/chat.ts:
--------------------------------------------------------------------------------
1 | import type { NextApiRequest, NextApiResponse } from 'next';
2 | import { OpenAIEmbeddings } from 'langchain/embeddings';
3 | import { PineconeStore } from 'langchain/vectorstores';
4 | import { makeChain } from '@/utils/makechain';
5 | import { pinecone } from '@/utils/pinecone-client';
6 | import { PINECONE_INDEX_NAME, PINECONE_NAME_SPACE } from '@/config/pinecone';
7 |
8 | export default async function handler(
9 | req: NextApiRequest,
10 | res: NextApiResponse,
11 | ) {
12 | const { question, history } = req.body;
13 |
14 | if (!question) {
15 | return res.status(400).json({ message: 'No question in the request' });
16 | }
17 | // OpenAI recommends replacing newlines with spaces for best results
18 | const sanitizedQuestion = question.trim().replaceAll('\n', ' ');
19 |
20 | const index = pinecone.Index(PINECONE_INDEX_NAME);
21 |
22 | /* create vectorstore*/
23 | const vectorStore = await PineconeStore.fromExistingIndex(
24 | new OpenAIEmbeddings({}),
25 | {
26 | pineconeIndex: index,
27 | textKey: 'text',
28 | namespace: PINECONE_NAME_SPACE,
29 | },
30 | );
31 |
32 | res.writeHead(200, {
33 | 'Content-Type': 'text/event-stream',
34 | 'Cache-Control': 'no-cache, no-transform',
35 | Connection: 'keep-alive',
36 | });
37 |
38 | const sendData = (data: string) => {
39 | res.write(`data: ${data}\n\n`);
40 | };
41 |
42 | sendData(JSON.stringify({ data: '' }));
43 |
44 | //create chain
45 | const chain = makeChain(vectorStore, (token: string) => {
46 | sendData(JSON.stringify({ data: token }));
47 | });
48 |
49 | try {
50 | //Ask a question
51 | const response = await chain.call({
52 | question: sanitizedQuestion,
53 | chat_history: history || [],
54 | });
55 |
56 | console.log('response', response);
57 | sendData(JSON.stringify({ sourceDocs: response.sourceDocuments }));
58 | } catch (error) {
59 | console.log('error', error);
60 | } finally {
61 | sendData('[DONE]');
62 | res.end();
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/components/ui/accordion.tsx:
--------------------------------------------------------------------------------
1 | import * as React from 'react';
2 | import * as AccordionPrimitive from '@radix-ui/react-accordion';
3 | import { ChevronDown } from 'lucide-react';
4 |
5 | import { cn } from '@/utils/cn';
6 |
7 | const Accordion = AccordionPrimitive.Root;
8 |
9 | const AccordionItem = React.forwardRef<
10 | React.ElementRef,
11 | React.ComponentPropsWithoutRef
12 | >(({ className, ...props }, ref) => (
13 |
21 | ));
22 | AccordionItem.displayName = 'AccordionItem';
23 |
24 | const AccordionTrigger = React.forwardRef<
25 | React.ElementRef,
26 | React.ComponentPropsWithoutRef
27 | >(({ className, children, ...props }, ref) => (
28 |
29 | svg]:rotate-180',
33 | className,
34 | )}
35 | {...props}
36 | >
37 | {children}
38 |
39 |
40 |
41 | ));
42 | AccordionTrigger.displayName = AccordionPrimitive.Trigger.displayName;
43 |
44 | const AccordionContent = React.forwardRef<
45 | React.ElementRef,
46 | React.ComponentPropsWithoutRef
47 | >(({ className, children, ...props }, ref) => (
48 |
56 | {children}
57 |
58 | ));
59 | AccordionContent.displayName = AccordionPrimitive.Content.displayName;
60 |
61 | export { Accordion, AccordionItem, AccordionTrigger, AccordionContent };
62 |
--------------------------------------------------------------------------------
/utils/makechain.ts:
--------------------------------------------------------------------------------
1 | import { OpenAIChat } from 'langchain/llms';
2 | import { LLMChain, ChatVectorDBQAChain, loadQAChain } from 'langchain/chains';
3 | import { PineconeStore } from 'langchain/vectorstores';
4 | import { PromptTemplate } from 'langchain/prompts';
5 | import { CallbackManager } from 'langchain/callbacks';
6 |
7 | const CONDENSE_PROMPT =
8 | PromptTemplate.fromTemplate(`Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
9 |
10 | Chat History:
11 | {chat_history}
12 | Follow Up Input: {question}
13 | Standalone question:`);
14 |
15 | const QA_PROMPT = PromptTemplate.fromTemplate(
16 | `You are an AI assistant for source code of Twitter's Recommendation Algorithm. You are trained on a repo named The Algorthm-that open sources some of the recommendation algorithms used at Twitter.
17 | The code for the repo is located at https://github.com/twitter/the-algorithm.
18 | You are given the following extracted parts of a technical summary of files in a codebase and a question.
19 | Provide a conversational answer with clickable hyperlinks back to GitHub.
20 | You should only use hyperlinks that are explicitly listed in the context. Do NOT make up a hyperlink that is not listed.
21 | Include lots of code examples and links to the code examples, where appropriate.
22 | Assume the reader is a technical person but is not deeply familiar with recommendation algorithms.
23 | Assume the reader does not know anything about how the project is strucuted or which folders/files are provided in the context.
24 | Do not reference the context in your answer. Instead use the context to inform your answer.
25 | If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
26 | If the question is not about the recommendation algorith, politely inform them that you are tuned to only answer questions about the recommendation algorithm.
27 | Your answer should be at least 300 words and no more than 500 words.
28 | Do not include information that is not directly relevant to the question, even if the context includes it.
29 | Always include a list of reference links to GitHub from the context. Links should ONLY come from the context.
30 |
31 | Question: {question}
32 | =========
33 | {context}
34 | =========
35 | Answer:`,
36 | );
37 |
38 | export const makeChain = (
39 | vectorstore: PineconeStore,
40 | onTokenStream?: (token: string) => void,
41 | ) => {
42 | const questionGenerator = new LLMChain({
43 | llm: new OpenAIChat({ temperature: 0 }),
44 | prompt: CONDENSE_PROMPT,
45 | });
46 | const docChain = loadQAChain(
47 | new OpenAIChat({
48 | temperature: 0,
49 | modelName: 'gpt-3.5-turbo', //change this to older versions (e.g. gpt-3.5-turbo) if you don't have access to gpt-4
50 | streaming: Boolean(onTokenStream),
51 | callbackManager: onTokenStream
52 | ? CallbackManager.fromHandlers({
53 | async handleLLMNewToken(token) {
54 | onTokenStream(token);
55 | console.log(token);
56 | },
57 | })
58 | : undefined,
59 | }),
60 | { prompt: QA_PROMPT },
61 | );
62 |
63 | return new ChatVectorDBQAChain({
64 | vectorstore,
65 | combineDocumentsChain: docChain,
66 | questionGeneratorChain: questionGenerator,
67 | returnSourceDocuments: true,
68 | k: 3, //number of source documents to return
69 | });
70 | };
71 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GPT & LangChain - Create a ChatGPT Chatbot for Twitter's The Algorithm Repo
2 |
3 | Use the new GPT-3.5 api to build a chatGPT chatbot The Algorithm ML Repo.
4 |
5 | Tech stack used includes LangChain, Pinecone, Typescript, Openai, and Next.js. LangChain is a framework that makes it easier to build scalable AI/LLM apps and chatbots. Pinecone is a vectorstore for storing embeddings and your repo in text to later retrieve similar docs. [Autodoc](https://github.com/context-labs/autodoc) used to create markdown files and then embedded using this repo. Original autodoc conversion at this [link](https://github.com/misbahsy/the-algorithm-gpt). Template of this repo is from [link](https://github.com/mayooear/gpt4-pdf-chatbot-langchain) by [Mayo](https://twitter.com/mayowaoshin).
6 |
7 |
8 | [Get in touch via twitter if you have questions](https://twitter.com/misbahsy)
9 |
10 | The visual guide of this repo and tutorial is in the `visual guide` folder created by [Mayo](https://twitter.com/mayowaoshin).
11 |
12 | **If you run into errors, please review the troubleshooting section further down this page.**
13 |
14 | ## Development
15 |
16 | 1. Clone the repo
17 |
18 | ```
19 | git clone [github https url]
20 | ```
21 |
22 | 2. Install packages
23 |
24 | ```
25 | pnpm install
26 | ```
27 |
28 | 3. Set up your `.env` file
29 |
30 | - Copy `.env.example` into `.env`
31 | Your `.env` file should look like this:
32 |
33 | ```
34 | OPENAI_API_KEY=
35 |
36 | PINECONE_API_KEY=
37 | PINECONE_ENVIRONMENT=
38 |
39 | PINECONE_INDEX_NAME=
40 |
41 | ```
42 |
43 | - Visit [openai](https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key) to retrieve API keys and insert into your `.env` file.
44 | - Visit [pinecone](https://pinecone.io/) to create and retrieve your API keys, and also retrieve your environment and index name from the dashboard.
45 |
46 | 4. In the `config` folder, replace the `PINECONE_NAME_SPACE` with a `namespace` where you'd like to store your embeddings on Pinecone when you run `pnpm run ingest`. This namespace will later be used for queries and retrieval.
47 |
48 | 5. In `utils/makechain.ts` chain change the `QA_PROMPT` for your own usecase. Change `modelName` in `new OpenAIChat` to `gpt-3.5-turbo`, if you don't have access to `gpt-4`. Please verify outside this repo that you have access to `gpt-4`, otherwise the application will not work with it.
49 |
50 | ## Convert your repo to embeddings
51 |
52 | **This repo can load repo with multiple folders**
53 |
54 | 1. Run Autodoc on your original repo and create markdown files for the entire repo. Save the results of autodoc as a separate repo, and provide the link in the script/ingest-data.ts file.
55 |
56 | 2. Run the script `npm run ingest` to 'ingest' and embed your docs. If you run into errors troubleshoot below.
57 |
58 | 3. Check Pinecone dashboard to verify your namespace and vectors have been added.
59 |
60 | ## Run the app
61 |
62 | Once you've verified that the embeddings and content have been successfully added to your Pinecone, you can run the app `pnpm run dev` to launch the local dev environment, and then type a question in the chat interface.
63 |
64 | ## Troubleshooting
65 |
66 | In general, keep an eye out in the `issues` and `discussions` section of this repo for solutions.
67 |
68 | **General errors**
69 |
70 | - Make sure you're running the latest Node version. Run `node -v`
71 | - Ensure you have markdown files for each code file either generated using autodoc or similar tools.
72 | - `Console.log` the `env` variables and make sure they are exposed.
73 | - Make sure you're using the same versions of LangChain and Pinecone as this repo.
74 | - Check that you've created an `.env` file that contains your valid (and working) API keys, environment and index name.
75 | - If you change `modelName` in `OpenAIChat` note that the correct name of the alternative model is `gpt-3.5-turbo`
76 | - Make sure you have access to `gpt-4` if you decide to use. Test your openAI keys outside the repo and make sure it works and that you have enough API credits.
77 | - Check that you don't have multiple OPENAPI keys in your global environment. If you do, the local `env` file from the project will be overwritten by systems `env` variable.
78 | - Try to hard code your API keys into the `process.env` variables.
79 |
80 |
81 | **Pinecone errors**
82 |
83 | - Make sure your pinecone dashboard `environment` and `index` matches the one in the `pinecone.ts` and `.env` files.
84 | - Check that you've set the vector dimensions to `1536`.
85 | - Make sure your pinecone namespace is in lowercase.
86 | - Pinecone indexes of users on the Starter(free) plan are deleted after 7 days of inactivity. To prevent this, send an API request to Pinecone to reset the counter before 7 days.
87 | - Retry from scratch with a new Pinecone project, index, and cloned repo.
88 |
89 | ## Credit
90 |
91 | Frontend of this repo is inspired by [langchain-chat-nextjs](https://github.com/zahidkhawaja/langchain-chat-nextjs)
92 |
--------------------------------------------------------------------------------
/styles/Home.module.css:
--------------------------------------------------------------------------------
1 | .main {
2 | display: flex;
3 | flex-direction: column;
4 | justify-content: space-between;
5 | align-items: center;
6 | padding: 1rem;
7 | }
8 |
9 | .header {
10 | width: auto;
11 | }
12 |
13 | .header p {
14 | text-align: center;
15 | }
16 |
17 | .cloudform {
18 | position: relative;
19 | }
20 |
21 | .textarea {
22 | position: relative;
23 | resize: none;
24 | font-size: 1.1rem;
25 | padding: 1rem 2rem 1rem 2rem;
26 | width: 75vw;
27 | border-radius: 0.5rem;
28 | border: 1px solid #d9d9e3;
29 | background: #ffffff;
30 | color: #000;
31 | outline: none;
32 | }
33 |
34 | .textarea:disabled {
35 | opacity: 0.5;
36 | }
37 |
38 | .textarea:focus {
39 | outline: none;
40 | border-color: #6b7280;
41 | box-shadow: 0 0 0 3px rgba(156, 163, 175, 0.5);
42 | }
43 |
44 | .textarea::placeholder {
45 | color: #6b7280;
46 | }
47 |
48 | .generatebutton {
49 | position: absolute;
50 | top: 0.87rem;
51 | right: 1rem;
52 | color: rgb(165, 162, 162);
53 | background: none;
54 | padding: 0.3rem;
55 | border: none;
56 | display: flex;
57 | }
58 |
59 | .loadingwheel {
60 | position: absolute;
61 | top: 0.2rem;
62 | right: 0.25rem;
63 | }
64 |
65 | .svgicon {
66 | transform: rotate(90deg);
67 | width: 1.2em;
68 | height: 1.2em;
69 | fill: currentColor;
70 | }
71 |
72 | .generatebutton:hover {
73 | background: #e4575726;
74 | border-radius: 0.2rem;
75 | }
76 |
77 | .generatebutton:disabled {
78 | opacity: 0.9;
79 | cursor: not-allowed;
80 | background: none;
81 | }
82 |
83 | .messagelist {
84 | width: 100%;
85 | height: 100%;
86 | overflow-y: scroll;
87 | border-radius: 0.5rem;
88 | }
89 |
90 | .messagelistloading {
91 | display: flex;
92 | width: 100%;
93 | justify-content: center;
94 | margin-top: 1rem;
95 | }
96 |
97 | .usermessage {
98 | background: #ffffff;
99 | padding: 1.5rem;
100 | color: #000;
101 | }
102 |
103 | .usermessagewaiting {
104 | padding: 1.5rem;
105 | color: #000;
106 | background: linear-gradient(to left, #07080938, #1a1c2024, #07080936);
107 | background-size: 200% 200%;
108 | background-position: -100% 0;
109 | animation: loading-gradient 2s ease-in-out infinite;
110 | animation-direction: alternate;
111 | animation-name: loading-gradient;
112 | }
113 |
114 | @keyframes loading-gradient {
115 | 0% {
116 | background-position: -100% 0;
117 | }
118 | 100% {
119 | background-position: 100% 0;
120 | }
121 | }
122 |
123 | .apimessage {
124 | background: #f9fafb;
125 | padding: 1.5rem;
126 | color: #000;
127 | animation: fadein 0.5s;
128 | }
129 |
130 | @keyframes fadein {
131 | from {
132 | opacity: 0;
133 | }
134 | to {
135 | opacity: 1;
136 | }
137 | }
138 |
139 | .apimessage,
140 | .usermessage,
141 | .usermessagewaiting {
142 | display: flex;
143 | }
144 |
145 | .markdownanswer {
146 | line-height: 1.75;
147 | }
148 |
149 | .markdownanswer a:hover {
150 | opacity: 0.8;
151 | }
152 |
153 | .markdownanswer a {
154 | color: #b13a3a;
155 | font-weight: 500;
156 | }
157 |
158 | .markdownanswer code {
159 | color: #15cb19;
160 | font-weight: 500;
161 | white-space: pre-wrap !important;
162 | }
163 |
164 | .markdownanswer ol,
165 | .markdownanswer ul {
166 | margin: 1rem;
167 | }
168 |
169 | .boticon,
170 | .usericon {
171 | margin-right: 1rem;
172 | border-radius: 0.1rem;
173 | height: 100%;
174 | }
175 |
176 | .markdownanswer h1,
177 | .markdownanswer h2,
178 | .markdownanswer h3 {
179 | font-size: inherit;
180 | }
181 |
182 | .center {
183 | display: flex;
184 | justify-content: center;
185 | align-items: center;
186 | position: relative;
187 | padding: 1rem 0;
188 | flex-direction: column;
189 | }
190 |
191 | .cloud {
192 | width: 75vw;
193 | height: 65vh;
194 | background: #ffffff;
195 | border-radius: 0.5rem;
196 | border: 1px solid #d9d9e3;
197 | display: flex;
198 | justify-content: center;
199 | align-items: center;
200 | }
201 |
202 | .pointsnormal {
203 | width: 90%;
204 | height: 90%;
205 | }
206 |
207 | .pointsdim {
208 | width: 90%;
209 | height: 90%;
210 | opacity: 0.25;
211 | }
212 |
213 | .footer {
214 | color: #5f6368;
215 | font-size: 0.8rem;
216 | margin: 1.5rem;
217 | }
218 |
219 | .footer a {
220 | font-weight: 500;
221 | color: #7a7d81;
222 | }
223 |
224 | .footer a:hover {
225 | opacity: 0.8;
226 | }
227 |
228 | /* Mobile optimization */
229 | @media (max-width: 600px) {
230 | .main {
231 | padding: 1rem;
232 | max-height: 90vh;
233 | }
234 |
235 | .cloud {
236 | width: 22rem;
237 | height: 28rem;
238 | }
239 | .textarea {
240 | width: 22rem;
241 | }
242 | .topnav {
243 | border: 1px solid black;
244 | align-items: center;
245 | padding: 0.85rem 0.75rem 0.85rem 0.75rem;
246 | }
247 |
248 | .navlogo {
249 | font-size: 1.25rem;
250 | width: 20rem;
251 | }
252 |
253 | .markdownanswer code {
254 | white-space: pre-wrap !important;
255 | }
256 |
257 | .footer {
258 | font-size: 0.7rem;
259 | width: 100%;
260 | text-align: center;
261 | }
262 | }
263 |
--------------------------------------------------------------------------------
/pages/index.tsx:
--------------------------------------------------------------------------------
1 | import { useRef, useState, useEffect, useMemo, useCallback } from 'react';
2 | import Layout from '@/components/layout';
3 | import styles from '@/styles/Home.module.css';
4 | import { Message } from '@/types/chat';
5 | import { fetchEventSource } from '@microsoft/fetch-event-source';
6 | import Image from 'next/image';
7 | import ReactMarkdown from 'react-markdown';
8 | import LoadingDots from '@/components/ui/LoadingDots';
9 | import { Document } from 'langchain/document';
10 | import {
11 | Accordion,
12 | AccordionContent,
13 | AccordionItem,
14 | AccordionTrigger,
15 | } from '@/components/ui/accordion';
16 |
17 | export default function Home() {
18 | const [query, setQuery] = useState('');
19 | const [loading, setLoading] = useState(false);
20 | const [sourceDocs, setSourceDocs] = useState([]);
21 | const [error, setError] = useState(null);
22 | const [messageState, setMessageState] = useState<{
23 | messages: Message[];
24 | pending?: string;
25 | history: [string, string][];
26 | pendingSourceDocs?: Document[];
27 | }>({
28 | messages: [
29 | {
30 | message: "Hi, what would you like to learn about Twitter's recommendation algorithm?",
31 | type: 'apiMessage',
32 | },
33 | ],
34 | history: [],
35 | pendingSourceDocs: [],
36 | });
37 |
38 | const { messages, pending, history, pendingSourceDocs } = messageState;
39 |
40 | const messageListRef = useRef(null);
41 | const textAreaRef = useRef(null);
42 |
43 | useEffect(() => {
44 | textAreaRef.current?.focus();
45 | }, []);
46 |
47 | //handle form submission
48 | async function handleSubmit(e: any) {
49 | e.preventDefault();
50 |
51 | setError(null);
52 |
53 | if (!query) {
54 | alert('Please input a question');
55 | return;
56 | }
57 |
58 | const question = query.trim();
59 |
60 | setMessageState((state) => ({
61 | ...state,
62 | messages: [
63 | ...state.messages,
64 | {
65 | type: 'userMessage',
66 | message: question,
67 | },
68 | ],
69 | pending: undefined,
70 | }));
71 |
72 | setLoading(true);
73 | setQuery('');
74 | setMessageState((state) => ({ ...state, pending: '' }));
75 |
76 | const ctrl = new AbortController();
77 |
78 | try {
79 | fetchEventSource('/api/chat', {
80 | method: 'POST',
81 | headers: {
82 | 'Content-Type': 'application/json',
83 | },
84 | body: JSON.stringify({
85 | question,
86 | history,
87 | }),
88 | signal: ctrl.signal,
89 | onmessage: (event) => {
90 | if (event.data === '[DONE]') {
91 | setMessageState((state) => ({
92 | history: [...state.history, [question, state.pending ?? '']],
93 | messages: [
94 | ...state.messages,
95 | {
96 | type: 'apiMessage',
97 | message: state.pending ?? '',
98 | sourceDocs: state.pendingSourceDocs,
99 | },
100 | ],
101 | pending: undefined,
102 | pendingSourceDocs: undefined,
103 | }));
104 | setLoading(false);
105 | ctrl.abort();
106 | } else {
107 | const data = JSON.parse(event.data);
108 | if (data.sourceDocs) {
109 | setMessageState((state) => ({
110 | ...state,
111 | pendingSourceDocs: data.sourceDocs,
112 | }));
113 | } else {
114 | setMessageState((state) => ({
115 | ...state,
116 | pending: (state.pending ?? '') + data.data,
117 | }));
118 | }
119 | }
120 | },
121 | });
122 | } catch (error) {
123 | setLoading(false);
124 | setError('An error occurred while fetching the data. Please try again.');
125 | console.log('error', error);
126 | }
127 | }
128 |
129 | //prevent empty submissions
130 | const handleEnter = useCallback(
131 | (e: any) => {
132 | if (e.key === 'Enter' && query) {
133 | handleSubmit(e);
134 | } else if (e.key == 'Enter') {
135 | e.preventDefault();
136 | }
137 | },
138 | [query],
139 | );
140 |
141 | const chatMessages = useMemo(() => {
142 | return [
143 | ...messages,
144 | ...(pending
145 | ? [
146 | {
147 | type: 'apiMessage',
148 | message: pending,
149 | sourceDocs: pendingSourceDocs,
150 | },
151 | ]
152 | : []),
153 | ];
154 | }, [messages, pending, pendingSourceDocs]);
155 |
156 | //scroll to bottom of chat
157 | useEffect(() => {
158 | if (messageListRef.current) {
159 | messageListRef.current.scrollTop = messageListRef.current.scrollHeight;
160 | }
161 | }, [chatMessages]);
162 |
163 | return (
164 | <>
165 |
166 |
167 |
168 | Chat With The Algorithm repo by Twitter
169 |
170 |
171 |
172 |
173 | {chatMessages.map((message, index) => {
174 | let icon;
175 | let className;
176 | if (message.type === 'apiMessage') {
177 | icon = (
178 |
186 | );
187 | className = styles.apimessage;
188 | } else {
189 | icon = (
190 |
198 | );
199 | // The latest message sent by the user will be animated while waiting for a response
200 | className =
201 | loading && index === chatMessages.length - 1
202 | ? styles.usermessagewaiting
203 | : styles.usermessage;
204 | }
205 | return (
206 | <>
207 |
208 | {icon}
209 |
210 |
211 | {message.message}
212 |
213 |
214 |
215 | {message.sourceDocs && (
216 |
220 |
225 | {message.sourceDocs.map((doc, index) => (
226 |
227 |
228 |
229 | Source {index + 1}
230 |
231 |
232 |
233 | {doc.pageContent}
234 |
235 |
236 | Source: {doc.metadata.source}
237 |
238 |
239 |
240 |
241 | ))}
242 |
243 |
244 | )}
245 | >
246 | );
247 | })}
248 | {sourceDocs.length > 0 && (
249 |
250 |
251 | {sourceDocs.map((doc, index) => (
252 |
253 |
254 |
255 | Source {index + 1}
256 |
257 |
258 |
259 | {doc.pageContent}
260 |
261 |
262 |
263 |
264 | ))}
265 |
266 |
267 | )}
268 |
269 |
270 |
314 | {error && (
315 |
318 | )}
319 |
320 |
321 |
326 |
327 | >
328 | );
329 | }
330 |
--------------------------------------------------------------------------------