├── .env.example.txt ├── .eslintrc.json ├── .gitignore ├── README.md ├── docs ├── doc.html └── doc_files │ ├── image001.png │ ├── image002.jpg │ ├── image003.jpg │ ├── image004.png │ ├── image005.jpg │ ├── image006.png │ └── image007.jpg ├── next.config.js ├── package-lock.json ├── package.json ├── prisma └── schema.prisma ├── public ├── favicon.ico ├── next.svg ├── thirteen.svg └── vercel.svg ├── src ├── crawler.ts └── pages │ ├── _app.tsx │ ├── _document.tsx │ ├── api │ ├── chat.ts │ ├── conversationLog.ts │ ├── crawl.ts │ ├── createTokenRequest.ts │ └── database.js │ └── index.tsx └── tsconfig.json /.env.example.txt: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=sk-ltjEScc6ot0DSoCQt2eUT3BlbkFJo111111111111111 2 | PINECONE_API_KEY=051a516d-46b2-4200-81111111111 3 | PINECONE_ENVIRONMENT=northamerica-northeast1-gcp 4 | PINECONE_INDEX=chatbot-index 5 | DATABASE_URL=mongodb+srv://USERNAME:PASSWORD@cluster0.xd1ua.mongodb.net/conversations?retryWrites=true&w=majority 6 | ABLY_API_KEY=QzvNyQ.2RDnbw:0E0Av7lCsCwcos5ZEwA3Rdi11111111111111111 7 | API_ROOT=http://localhost:3000 -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | .env 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-User Chatbot with Langchain and Pinecone in Next.JS 2 | 3 | This is a Multi-User Chatbot with Langchain and pinecone in Next.JS. The concept of this application is very similar to my Node.js [LangChain Document Helper](https://github.com/Ashot72/LangChain-Documentation-Helper) application. You can read about LangChain, Vector database such as Pinecone, embeddings etc., in the Langchain Document Helper app. 4 | 5 | As a cloud-based database, [MongoDB](https://www.mongodb.com/atlas/database) is used with [Prisma ORM](https://www.prisma.io/ ). 6 | 7 | We can crawl a single site or multiple sites. The sites can be of the same domain, and the app will not crawl the pages that have already been crawled. You can also specify different domains with commas. In this application I will crawl a single site which is [Lightning Tools](https://lightningtools.com/) the company I work for. 8 | 9 | ``` 10 | Crawling samples 11 | 12 | http://localhost:3000/api/crawl?urls=https://lightningtools.com/&limit=1000 13 | 14 | http://localhost:3000/api/crawl?urls=https://lightningtools.com/,https://lightningtools.com/about-us&limit=1000 15 | 16 | http://localhost:3000/api/crawl?urls=https://lightningtools.com/,https://www.microsoft.com/&limit=1000 17 | 18 | ``` 19 | LLMs are stateless, which means they have no concept of memory. That means that they do not maintain the chain of conversation on their own. We need to build a mechanism that will maintain conversation history that will be part of the context for each response we get back from the chatbox. For that reason, we use [ably](https://ably.com/) 20 | 21 | When you start the chat, you must specify a username. This name should be unique in general, as with this name you can access the entire conversation log and also display it on a screen. In a real app, a user must be authenticated, and username can be their unique email address, ensuring no two users can have the same usernames. 22 | 23 | To get started. 24 | ``` 25 | Clone the repository 26 | 27 | git clone https://github.com/Ashot72/AI-Chatbot 28 | cd AI-Chatbot 29 | 30 | Add .env file base on env.example.txt file and add respective keys 31 | 32 | # installs dependencies 33 | npm install 34 | 35 | # to run locally 36 | npm run dev 37 | 38 | ``` 39 | 40 | Go to [AI Chatbot Video](https://youtu.be/TkZCDJJrQqw) page 41 | 42 | Go to [AI Chatbot Description](https://ashot72.github.io/AI-Chatbot/doc.html) page 43 | -------------------------------------------------------------------------------- /docs/doc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 39 | 40 | 41 | 42 | 43 | 44 |
45 | 46 |

AI Chatbot

48 | 49 |

Video Link: https://youtu.be/TkZCDJJrQqw

52 | 53 |

GitHub 54 | Repository: https://github.com/Ashot72/AI-Chatbot

56 | 57 |

 

58 | 59 |

This Next.js 60 | application is very similar to my Langchain Document Helper https://github.com/Ashot72/LangChain-Documentation-Helper application. You can

64 | 65 |

read about 66 | LangChain, Vector database such as Pinecone, embeddings etc.

67 | 68 |

70 | 71 |

Figure 1

72 | 73 |

We should 74 | crawl a single site or multiple sites. The sites can be of the same domain, and 75 | the app will not crawl the pages that have already been crawled. You can also 76 | specify

77 | 78 |

different domains 79 | with commas. In this application I will crawl a single site which is https://lightningtools.com/ the company I work for.

83 | 84 |

 

85 | 86 |

88 | 89 |

Figure 2

90 | 91 |

We crawl the 92 | site and put into Pinecone vector database.

93 | 94 |

 

95 | 96 |

98 | 99 |

Figure 3

100 | 101 |

You can look 102 | at crawl.ts and crawler.ts files for crawling.

103 | 104 |

LLMs are stateless, 105 | which means they have no concept of memory. That means that they do not 106 | maintain the chain of conversation on their own. We need to build

107 | 108 |

a mechanism that 109 | will maintain conversation history that will be part of the context for each 110 | response we get back from the chatbox.

111 | 112 |

For that reason, 113 | we use ably https://ably.com/

115 | 116 |

Ably is a 117 | real-time delivery platform that provides infrastructure and APIs for 118 | developers to build scalable and reliable real-time applications. It can be 119 | used to handle real-time

120 | 121 |

communication, 122 | data synchronization, and messaging across various platforms and devices. As 123 | out chatbot gains more users, the number of messages exchanged between the bot

124 | 125 |

and the 126 | users will increase. Ably is built to handle such growth in traffic without any 127 | performance degradation.

128 | 129 |

Ably also 130 | ensures message delivery and provides message history, even in cases of 131 | temporary disconnections or network issues. Implementing this level of 132 | reliability using only

133 | 134 |

WebSockets 135 | can be challenging and time-consuming.

136 | 137 |

Ably also 138 | provides built-in security features like token-based authentication and 139 | fine-grained access control, simplifying the process of securing chatbot's 141 | real-time communication.

142 | 143 |

 

144 | 145 |

147 | 148 |

Figure 4

149 | 150 |

You should 151 | go to Ably site and create a free API KEY.

152 | 153 |

 

154 | 155 |

157 | 158 |

Figure 5

159 | 160 |

When you 161 | start the chat, you must specify a username. This name should be unique in 162 | general, as with this name you can access the entire conversation log

163 | 164 |

and also 165 | display it on a screen. In a real app, a user must be authenticated, and 166 | username can be their unique email address, ensuring no two users can have the 167 | same usernames.

168 | 169 |

 

170 | 171 |

173 | 174 |

Figure 6

175 | 176 |

We use https://www.mongodb.com/atlas/database a MongoDB cloud database, to store 180 | and register each user conversation. This allows you to enhance the app and

181 | 182 |

display 183 | users' conversation logs on the screen.

185 | 186 |

 

187 | 188 |

190 | 191 |

Figure 7

192 | 193 |

We use 194 | Prisma ORM https://www.prisma.io/ to access MongoDB.

197 | 198 |

 

199 | 200 |

 

201 | 202 |

 

203 | 204 |

 

205 | 206 |

 

207 | 208 |
209 | 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /docs/doc_files/image001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image001.png -------------------------------------------------------------------------------- /docs/doc_files/image002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image002.jpg -------------------------------------------------------------------------------- /docs/doc_files/image003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image003.jpg -------------------------------------------------------------------------------- /docs/doc_files/image004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image004.png -------------------------------------------------------------------------------- /docs/doc_files/image005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image005.jpg -------------------------------------------------------------------------------- /docs/doc_files/image006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image006.png -------------------------------------------------------------------------------- /docs/doc_files/image007.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/docs/doc_files/image007.jpg -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true 4 | } 5 | 6 | module.exports = nextConfig 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "chatbot-demo", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@ably-labs/react-hooks": "^2.1.0", 13 | "@chatscope/chat-ui-kit-react": "^1.10.1", 14 | "@pinecone-database/pinecone": "^0.1.5", 15 | "@prisma/client": "^4.15.0", 16 | "@types/node": "20.2.5", 17 | "@types/react": "18.2.8", 18 | "@types/react-dom": "18.2.4", 19 | "ably": "^1.2.5-beta.1", 20 | "cheerio": "^1.0.0-rc.12", 21 | "langchain": "^0.0.96", 22 | "next": "13.4.4", 23 | "node-spider": "^1.4.1", 24 | "react": "18.2.0", 25 | "react-dom": "18.2.0", 26 | "string-strip-html": "^13.4.1", 27 | "timeago": "^1.6.7", 28 | "timeago.js": "^4.0.2", 29 | "typescript": "5.1.3", 30 | "url-parse": "^1.5.10", 31 | "uuidv4": "^6.2.13" 32 | }, 33 | "devDependencies": { 34 | "@types/async": "^3.2.18", 35 | "@types/express": "^4.17.17", 36 | "@types/node": "^18.16.11", 37 | "@types/react": "18.0.28", 38 | "@types/react-dom": "18.0.11", 39 | "@types/url-parse": "^1.4.8", 40 | "eslint": "8.34.0", 41 | "eslint-config-next": "13.1.6", 42 | "prisma": "^4.6.1", 43 | "typescript": "4.9.5" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /prisma/schema.prisma: -------------------------------------------------------------------------------- 1 | // This is your Prisma schema file, 2 | // learn more about it in the docs: https://pris.ly/d/prisma-schema 3 | 4 | generator client { 5 | provider = "prisma-client-js" 6 | } 7 | 8 | datasource db { 9 | provider = "mongodb" 10 | url = env("DATABASE_URL") 11 | } 12 | 13 | enum Speaker { 14 | user 15 | bot 16 | } 17 | 18 | model Conversations { 19 | id String @id @default(auto()) @map("_id") @db.ObjectId 20 | user_id String 21 | entry String 22 | speaker Speaker 23 | create_at DateTime @default(now()) 24 | } 25 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ashot72/AI-Chatbot/329b26a969e77ad31206fe9ef0174edee5d8c1c0/public/favicon.ico -------------------------------------------------------------------------------- /public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/thirteen.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/crawler.ts: -------------------------------------------------------------------------------- 1 | //@ts-ignore 2 | import * as Spider from 'node-spider' 3 | import * as cheerio from 'cheerio' 4 | import parse from 'url-parse' 5 | import { stripHtml } from "string-strip-html"; 6 | 7 | export type Page = { 8 | url: string 9 | text: string 10 | } 11 | 12 | class Crawler { 13 | pages: Page[] = [] 14 | limit: number = 1000 15 | urls: string[] = [] 16 | spider: Spider | null = { } 17 | count: number = 0 18 | textLengthMinimum: number = 200 19 | 20 | constructor(urls: string[], limit: number = 1000, textLengthMinimum: number = 200) { 21 | this.urls = urls 22 | this.limit = limit 23 | this.textLengthMinimum = textLengthMinimum 24 | 25 | this.count = 0 26 | this.pages = [] 27 | this.spider = {} 28 | } 29 | 30 | handleRequest = (doc: any) => { 31 | const $ = cheerio.load(doc.res.body) 32 | 33 | $("script").remove(); 34 | $("img").remove(); 35 | $('br').replaceWith('\n') 36 | $('p').replaceWith('\n') 37 | const html = $("body").html() 38 | 39 | const text = stripHtml(html!).result 40 | 41 | const page: Page = { 42 | url: doc.url, 43 | text 44 | } 45 | 46 | if(text.length > this.textLengthMinimum) { 47 | this.pages.push(page) 48 | } 49 | 50 | doc.$("a").each((i: number, elem: any) => { 51 | var href = doc.$(elem).attr("href")?.split("#")[0] 52 | var targetUrl = href && doc.resolve(href) 53 | 54 | //crawl more 55 | if(targetUrl && this.urls.some(u => { 56 | const targetUrlParts = parse(targetUrl) 57 | const uParts = parse(u) 58 | 59 | return targetUrlParts.hostname === uParts.hostname 60 | }) && this.count < this.limit) { 61 | this.spider.queue(targetUrl, this.handleRequest) 62 | this.count = this.count + 1 63 | } 64 | }) 65 | } 66 | 67 | start = async () => { 68 | this.pages = [] 69 | 70 | return new Promise((resolve, reject) => { 71 | this.spider = new Spider({ 72 | concurrent: 5, 73 | delay: 0, 74 | allowDuplicates: false, 75 | catchErrors: true, 76 | addReferrer: false, 77 | xhr: false, 78 | keepAlive: false, 79 | error: (err: any, url: string) => { 80 | console.log(err, url) 81 | reject(err) 82 | }, 83 | // called when there ar no more requests 84 | done: () => resolve(this.pages), 85 | headers: { "user-agent": "node-spider" }, 86 | encoding: "utf8" 87 | }) 88 | 89 | this.urls.forEach((url) => { 90 | this.spider.queue(url, this.handleRequest) 91 | }) 92 | }) 93 | } 94 | } 95 | 96 | export { Crawler } -------------------------------------------------------------------------------- /src/pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import type { AppProps } from 'next/app' 2 | import { configureAbly } from "@ably-labs/react-hooks" 3 | 4 | const prefix = process.env.API_ROOT || "" 5 | 6 | const clientId = 7 | Math.random().toString(36).substring(2, 15) + 8 | Math.random().toString(36).substring(2, 15); 9 | 10 | configureAbly({ 11 | authUrl: `${prefix}/api/createTokenRequest?clientId=${clientId}`, 12 | clientId 13 | }) 14 | 15 | export default function App({ Component, pageProps }: AppProps) { 16 | return ( 17 | 18 | ) 19 | } 20 | -------------------------------------------------------------------------------- /src/pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from 'next/document' 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /src/pages/api/chat.ts: -------------------------------------------------------------------------------- 1 | import type { NextApiRequest, NextApiResponse } from 'next' 2 | import { PineconeClient } from "@pinecone-database/pinecone"; 3 | import { PineconeStore } from "langchain/vectorstores/pinecone"; 4 | import * as Ably from 'ably' 5 | import { uuid } from 'uuidv4'; 6 | import { ConversationalRetrievalQAChain } from "langchain/chains"; 7 | import { OpenAIEmbeddings } from "langchain/embeddings/openai"; 8 | import { ConversationLog } from "./conversationLog"; 9 | import { ChatOpenAI } from "langchain/chat_models/openai"; 10 | 11 | let client: PineconeClient | null = null 12 | 13 | type PageSource = { 14 | pageContent: string, 15 | metadata: { 16 | url: string 17 | } 18 | } 19 | 20 | const initPineconeClient = async () => { 21 | client = new PineconeClient() 22 | await client.init({ 23 | environment: process.env.PINECONE_ENVIRONMENT!, 24 | apiKey: process.env.PINECONE_API_KEY! 25 | }) 26 | } 27 | 28 | const ably = new Ably.Realtime({ key: process.env.ABLY_API_KEY}) 29 | 30 | const handleRequest = async ({ prompt, userId, source, streaming }: { prompt: string, userId: string, source: boolean, streaming: boolean }) => { 31 | if(!client) { 32 | await initPineconeClient() 33 | } 34 | 35 | try { 36 | const channel = ably.channels.get(userId) 37 | const interactionId = uuid() 38 | 39 | const conversationLog = new ConversationLog(userId) 40 | const conversationHistory = await conversationLog.getConverstion({ limit: 10}) 41 | await conversationLog.addEntry({ entry: prompt, speaker: "user"}) 42 | 43 | const pineconeIndex = client!.Index(process.env.PINECONE_INDEX!) 44 | 45 | channel.publish({ 46 | data: { 47 | event: "status", 48 | message: "Finding matches..." 49 | } 50 | }) 51 | 52 | const vectorStore = await PineconeStore.fromExistingIndex( 53 | new OpenAIEmbeddings(), 54 | { pineconeIndex } 55 | ) 56 | 57 | const model = new ChatOpenAI({ 58 | temperature: 0, 59 | streaming, 60 | callbacks: [{ 61 | async handleLLMNewToken(token) { 62 | channel.publish({ 63 | data: { 64 | event: "response", 65 | token, 66 | interactionId 67 | } 68 | }) 69 | }, 70 | async handleLLMEnd() { 71 | channel.publish({ 72 | data: { 73 | event: "responseEnd" 74 | } 75 | }) 76 | } 77 | }] 78 | }) 79 | 80 | const nonStreamingModel = new ChatOpenAI({}) 81 | 82 | const chain = ConversationalRetrievalQAChain.fromLLM( 83 | model, 84 | vectorStore.asRetriever(), 85 | { 86 | returnSourceDocuments: true, 87 | questionGeneratorChainOptions: { 88 | llm: nonStreamingModel 89 | } 90 | } 91 | ) 92 | 93 | let chat_history = conversationHistory.join("\n") 94 | const response = await chain.call({ question: prompt, chat_history }) 95 | 96 | if(!streaming) { 97 | channel.publish({ 98 | data: { 99 | event: "response", 100 | token: response.text, 101 | interactionId 102 | } 103 | }) 104 | } 105 | 106 | if(source) { 107 | const pageContents: string[] = [] 108 | 109 | let index = 1 110 | response.sourceDocuments.forEach((source: PageSource) => { 111 | const { pageContent, metadata: { url }} = source 112 | 113 | if(!pageContents.includes(pageContent)){ 114 | const token = `
Source #${index} 115 |
${pageContent} 116 |
${url}` 117 | 118 | channel.publish({ 119 | data: { 120 | event: "response", 121 | token: "
" + token, 122 | interactionId 123 | } 124 | }) 125 | 126 | pageContents.push(pageContent) 127 | index++ 128 | } 129 | }); 130 | } 131 | 132 | await conversationLog.addEntry({ entry: response.text, speaker: "bot" }) 133 | 134 | } catch(error) { 135 | console.error(error) 136 | } 137 | } 138 | 139 | export default async function handler(req: NextApiRequest, res: NextApiResponse) { 140 | const { body: { prompt, userId, source, streaming } } = req 141 | await handleRequest({ prompt, userId, source, streaming}) 142 | res.status(200).json({ "message": "started" }) 143 | } -------------------------------------------------------------------------------- /src/pages/api/conversationLog.ts: -------------------------------------------------------------------------------- 1 | import { prisma } from "./database"; 2 | import { Speaker } from '@prisma/client'; 3 | 4 | class ConversationLog { 5 | constructor(public userId: string) { 6 | this.userId = userId 7 | } 8 | 9 | public async addEntry({ entry, speaker }: { entry: string, speaker: Speaker }) { 10 | await prisma.conversations.create({ data: { user_id: this.userId, entry, speaker}}) 11 | } 12 | 13 | public async getConverstion({ limit }: { limit: number }): Promise { 14 | const conversaion = await prisma.conversations.findMany( 15 | { 16 | where: { user_id: this.userId }, 17 | take: limit, 18 | orderBy: [{ create_at: 'desc' }] 19 | }) 20 | return conversaion.map((data) => data.entry).reverse() 21 | } 22 | } 23 | 24 | export { ConversationLog } -------------------------------------------------------------------------------- /src/pages/api/crawl.ts: -------------------------------------------------------------------------------- 1 | import { NextApiRequest, NextApiResponse } from "next"; 2 | import { PineconeClient } from "@pinecone-database/pinecone" 3 | import { PineconeStore } from "langchain/vectorstores/pinecone" 4 | import { Crawler, Page } from "crawler"; 5 | import { Document } from "langchain/document" 6 | import { OpenAIEmbeddings } from "langchain/embeddings/openai" 7 | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter" 8 | 9 | let client: PineconeClient | null = null 10 | 11 | const initPineconeClient = async () => { 12 | client = new PineconeClient() 13 | 14 | await client.init({ 15 | apiKey: process.env.PINECONE_API_KEY!, 16 | environment: process.env.PINECONE_ENVIRONMENT! 17 | }) 18 | } 19 | 20 | export default async function handler( 21 | req: NextApiRequest, 22 | res: NextApiResponse 23 | ) { 24 | 25 | if ( 26 | !process.env.PINECONE_API_KEY || 27 | !process.env.PINECONE_ENVIRONMENT || 28 | !process.env.PINECONE_INDEX 29 | ) { 30 | res.status(500).json({ message: "PINECONE_ENVIRONMENT and PINECONE_API_KEY and PINECONE_INDEX must be set" }) 31 | return 32 | } 33 | 34 | const { query } = req 35 | const { urls: urlString, limit } = query 36 | const urls = (urlString as string).split(",") 37 | const crawlLimit = parseInt(limit as string) || 100 38 | 39 | if (!client) { 40 | await initPineconeClient() 41 | } 42 | 43 | const index = client!.Index(process.env.PINECONE_INDEX) 44 | 45 | const crawler = new Crawler(urls, crawlLimit, 200) 46 | const pages = await crawler.start() as Page[] 47 | 48 | const documents = await Promise.all( 49 | pages.map((page) => { 50 | const splitter = new RecursiveCharacterTextSplitter({ 51 | chunkSize: 1000, 52 | chunkOverlap: 100 53 | }) 54 | 55 | const docs = splitter.splitDocuments([ 56 | new Document({ 57 | pageContent: page.text, 58 | metadata: { 59 | url: page.url 60 | } 61 | }) 62 | ]) 63 | 64 | return docs 65 | }) 66 | ) 67 | 68 | console.log("Documents length", documents.flat().length) 69 | 70 | await PineconeStore.fromDocuments( 71 | documents.flat(), 72 | new OpenAIEmbeddings(), 73 | { pineconeIndex: index! } 74 | ) 75 | 76 | console.log("Added to Pinecore vectorestore vectors") 77 | 78 | res.status(200).json({ messgae: "Done"}) 79 | } -------------------------------------------------------------------------------- /src/pages/api/createTokenRequest.ts: -------------------------------------------------------------------------------- 1 | import Ably from "ably/promises" 2 | import { NextApiRequest, NextApiResponse } from "next" 3 | let options: Ably.Types.ClientOptions = { key: process.env.ABLY_API_KEY } 4 | 5 | export default async function handler(req: NextApiRequest, res: NextApiResponse) { 6 | const client = new Ably.Realtime(options) 7 | const tokenRequestData = await client.auth.createTokenRequest({ clientId: req.query.clientId as string }) 8 | 9 | res.status(200).json(tokenRequestData) 10 | } -------------------------------------------------------------------------------- /src/pages/api/database.js: -------------------------------------------------------------------------------- 1 | import { PrismaClient } from '@prisma/client'; 2 | 3 | /** 4 | * @type PrismaClient 5 | */ 6 | let prisma; 7 | 8 | if (process.env.NODE_ENV === 'production') { 9 | prisma = new PrismaClient(); 10 | prisma.$connect(); 11 | } else { 12 | if (!global.__db) { 13 | global.__db = new PrismaClient(); 14 | global.__db.$connect(); 15 | } 16 | prisma = global.__db; 17 | } 18 | 19 | export { prisma }; -------------------------------------------------------------------------------- /src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import Head from 'next/head' 2 | import { useState } from 'react' 3 | import * as timeago from "timeago.js" 4 | import { 5 | MainContainer, 6 | ChatContainer, 7 | MessageList, 8 | Message, 9 | MessageInput, 10 | ConversationHeader, 11 | TypingIndicator 12 | } from "@chatscope/chat-ui-kit-react" 13 | import styles from "@chatscope/chat-ui-kit-styles/dist/default/styles.min.css"; 14 | import { useChannel } from '@ably-labs/react-hooks' 15 | import { Types } from "ably" 16 | 17 | type ConversationEntry = { 18 | message: string 19 | speaker: "bot" | "user" 20 | date: Date 21 | id?: string 22 | } 23 | 24 | const updateChatbotMessage = ( 25 | conversation: ConversationEntry[], 26 | message: Types.Message 27 | ): ConversationEntry[] => { 28 | const interactionId = message.data.interactionId; 29 | 30 | const updatedConversation = conversation.reduce( 31 | (acc: ConversationEntry[], e: ConversationEntry) => [ 32 | ...acc, 33 | e.id === interactionId 34 | ? { ...e, message: e.message + message.data.token } 35 | : e, 36 | ], 37 | [] 38 | ); 39 | 40 | return conversation.some((e) => e.id === interactionId) 41 | ? updatedConversation 42 | : [ 43 | ...updatedConversation, 44 | { 45 | id: interactionId, 46 | message: message.data.token, 47 | speaker: "bot", 48 | date: new Date(), 49 | }, 50 | ]; 51 | }; 52 | 53 | export default function Home() { 54 | const [ text, setText ] = useState("") 55 | const [ conversation, setConversation] = useState([]) 56 | const [ botIsTyping, setBotIsTyping] = useState(false) 57 | const [ statusMessage, setStatusMessage] = useState("Waiting for query...") 58 | const [ source, setSource] = useState(false) 59 | const [ streaming, setStreaming] = useState(true) 60 | const [ userId, setUserId] = useState("") 61 | 62 | useChannel(userId || 'default', (message) => { 63 | switch(message.data.event) { 64 | case "response": 65 | setConversation((state) => updateChatbotMessage(state, message)) 66 | break 67 | case "status": 68 | setStatusMessage(message.data.message) 69 | break 70 | case "responseEnd": 71 | default: 72 | setBotIsTyping(false) 73 | setStatusMessage("Waiting for query...") 74 | } 75 | }) 76 | 77 | const submit = async () => { 78 | if(!userId) { 79 | alert("Please specify username.") 80 | return 81 | } 82 | 83 | setConversation((state) => [ 84 | ... state, { 85 | message: text, 86 | speaker: "user", 87 | date: new Date() 88 | } 89 | ]) 90 | 91 | try { 92 | setBotIsTyping(true) 93 | const response = await fetch("/api/chat", { 94 | method: "POST", 95 | headers: { 96 | "Content-Type": "application/json" 97 | }, 98 | body: JSON.stringify({ prompt: text, userId, source, streaming }) 99 | }) 100 | 101 | await response.json() 102 | } catch(error) { 103 | console.error("Error submitting message:", error) 104 | } finally { 105 | setBotIsTyping(false) 106 | } 107 | setText("") 108 | } 109 | 110 | return ( 111 | <> 112 | 113 | ChatBot 114 | 115 | 116 | 117 | 118 |
119 |
120 |
121 | setStreaming(!streaming)} /> 122 | 123 |
124 |
125 | setSource(!source)} /> 126 | 127 |
128 |
129 | Username setUserId(e.target.value)} style={{padding: "6px"}}/> 130 |
131 |
132 |
133 | 134 | 135 | 136 | 137 | 141 | 142 | 146 | ) : null 147 | } 148 | > 149 | { 150 | conversation.map((entry, index) => { 151 | return ( 152 | 163 | 164 | 165 | 166 | 170 | 171 | ) 172 | }) 173 | } 174 | 175 | { 179 | setText(text) 180 | }} 181 | sendButton={true} 182 | autoFocus 183 | disabled={botIsTyping} 184 | /> 185 | 186 | 187 |
188 |
189 | 190 | ) 191 | } 192 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "baseUrl": "./src", 18 | "paths": { 19 | "@/*": ["./src/*"] 20 | } 21 | }, 22 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 23 | "exclude": ["node_modules"] 24 | } 25 | --------------------------------------------------------------------------------