├── .env.example
├── .eslintrc.json
├── .github
    └── workflows
    │   └── playwright.yml
├── .gitignore
├── README-updated.md
├── README.md
├── next-env.d.ts
├── next.config.js
├── package.json
├── playwright-report
    └── index.html
├── playwright.config.ts
├── pnpm-lock.yaml
├── postcss.config.js
├── public
    ├── next.svg
    ├── pinecone.svg
    └── vercel.svg
├── src
    ├── app
    │   ├── api
    │   │   ├── chat
    │   │   │   └── route.ts
    │   │   ├── checkIndex
    │   │   │   └── route.ts
    │   │   ├── clearIndex
    │   │   │   └── route.ts
    │   │   └── crawl
    │   │   │   ├── crawler.ts
    │   │   │   ├── route.ts
    │   │   │   └── seed.ts
    │   ├── appContext.tsx
    │   ├── assets
    │   │   ├── icons
    │   │   │   ├── ellipse.tsx
    │   │   │   ├── pinecone.tsx
    │   │   │   └── user.tsx
    │   │   └── svg
    │   │   │   ├── blueEllipse.tsx
    │   │   │   ├── ellipse.tsx
    │   │   │   ├── pinecone.tsx
    │   │   │   ├── pineconeLogo.tsx
    │   │   │   ├── upArrow.tsx
    │   │   │   └── user.tsx
    │   ├── components
    │   │   ├── Chat
    │   │   │   ├── ChatInput.tsx
    │   │   │   ├── ChatWrapper.tsx
    │   │   │   ├── Messages.tsx
    │   │   │   └── index.tsx
    │   │   ├── Header.tsx
    │   │   └── Sidebar
    │   │   │   ├── Button.tsx
    │   │   │   ├── Card.tsx
    │   │   │   ├── InfoPopover.tsx
    │   │   │   ├── RecursiveSplittingOptions.tsx
    │   │   │   ├── UrlButton.tsx
    │   │   │   ├── index.tsx
    │   │   │   ├── urls.ts
    │   │   │   └── utils.ts
    │   ├── favicon.ico
    │   ├── globals.css
    │   ├── hooks
    │   │   └── useRefreshIndex.ts
    │   ├── layout.tsx
    │   ├── page.tsx
    │   ├── services
    │   │   ├── chunkedUpsert.ts
    │   │   ├── context.ts
    │   │   ├── embeddings.ts
    │   │   └── pinecone.ts
    │   └── utils
    │   │   └── truncateString.ts
    ├── global.css
    └── middleware.ts
├── tailwind.config.js
├── tests
    └── example.spec.ts
└── tsconfig.json


/.env.example:
--------------------------------------------------------------------------------
 1 | OPENAI_API_KEY=
 2 | 
 3 | # Retrieve the following from the Pinecone Console.
 4 | 
 5 | # Navigate to API Keys under your Project to retrieve the API key and environment
 6 | PINECONE_API_KEY=
 7 | PINECONE_REGION=us-west-2
 8 | PINECONE_CLOUD=aws
 9 | 
10 | 
11 | # Navigate to Indexes under your Project to retrieve the Index name
12 | PINECONE_INDEX=


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": "next/core-web-vitals"
3 | }
4 | 


--------------------------------------------------------------------------------
/.github/workflows/playwright.yml:
--------------------------------------------------------------------------------
 1 | name: Playwright Tests
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - '**'
 6 |   pull_request:
 7 |     branches:
 8 |       - '**'
 9 |   workflow_dispatch:
10 | jobs:
11 |   install:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/checkout@v3        
15 |       - uses: actions/setup-node@v3
16 |         with:
17 |           node-version: 18
18 |       - name: Setup pnpm
19 |         uses: pnpm/action-setup@v3
20 |         with:
21 |           version: 9 # Specify the pnpm version here
22 | 
23 |   test:
24 |     needs: install
25 |     timeout-minutes: 60
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - uses: actions/checkout@v3
29 |       - uses: actions/setup-node@v3
30 |         with:
31 |           node-version: 18
32 |       - name: Setup pnpm
33 |         uses: pnpm/action-setup@v3
34 |         with:
35 |           version: 9 # Specify the pnpm version here
36 |       - name: Install dependencies
37 |         run: pnpm install
38 |       - name: Build application
39 |         run: pnpm run build
40 |       - name: Start server
41 |         run: pnpm start &
42 |       - name: Install Playwright Browsers
43 |         run: npx playwright install --with-deps
44 |       - name: Run Playwright tests
45 |         run: npx playwright test
46 |       - uses: actions/upload-artifact@v3
47 |         if: always()
48 |         with:
49 |           name: playwright-report
50 |           path: playwright-report/
51 |           retention-days: 30
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # next.js
12 | /.next/
13 | /out/
14 | 
15 | # production
16 | /build
17 | 
18 | # misc
19 | .DS_Store
20 | *.pem
21 | 
22 | # debug
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 | 
27 | # local env files
28 | .env*.local
29 | 
30 | # vercel
31 | .vercel
32 | 
33 | # typescript
34 | *.tsbuildinfo
35 | next-env.d.ts
36 | .env
37 | 
38 | /test-results/
39 | /playwright-report/
40 | /playwright/.cache/
41 | 


--------------------------------------------------------------------------------
/README-updated.md:
--------------------------------------------------------------------------------
  1 | # Building a Context-Aware Chatbot with Pinecone and Vercel
  2 | 
  3 | In this example, we'll build a full-stack application that uses Retrieval Augmented Generation (RAG) powered by [Pinecone](https://pinecone.io) to deliver accurate and contextually relevant responses in a chatbot.
  4 | 
  5 | RAG is a powerful tool that combines the benefits of retrieval-based models and generative models. Unlike traditional chatbots that can struggle with maintaining up-to-date information or accessing domain-specific knowledge, a RAG-based chatbot uses a knowledge base created from crawled URLs to provide contextually relevant responses.
  6 | 
  7 | Incorporating Vercel's AI SDK into our application will allow us easily set up the chatbot workflow and utilize streaming more efficiently, particularly in edge environments, enhancing the responsiveness and performance of our chatbot.
  8 | 
  9 | By the end of this tutorial, you'll have a context-aware chatbot that provides accurate responses without hallucination, ensuring a more effective and engaging user experience. Let's get started on building this powerful tool ([Full code listing](https://github.com/pinecone-io/pinecone-vercel-example/blob/main/package.json)).
 10 | 
 11 | ## Step 1: Setting Up Your Next.js Application
 12 | 
 13 | First, create a new Next.js app and install the necessary packages:
 14 | 
 15 | ```bash
 16 | npx create-next-app chatbot
 17 | cd chatbot
 18 | npm install ai react @pinecone-database/pinecone
 19 | ```
 20 | 
 21 | ## Step 2: Create the Chatbot
 22 | In this step, we are going to build a chat interface that will render two components. One of these components will be a chatbot with context support provided by Pinecone. The other component will be a chatbot without context. Both of these components will present messages received by the `useChat` hook from the Vercel AI SDK.
 23 | 
 24 | 
 25 | ### Chatbot Frontend Component
 26 | 
 27 | Create a Chat component that will render the chat interface. This component will have two ChatWrapper components, one for the chatbot with context and one without context.
 28 | When a message is sent, each of the `ChatWrapper` components will be notified and take on the responsibility of sending the message to the backend, as well as presenting with the proper messages.
 29 | 
 30 | ```tsx
 31 | // Importing necessary modules and types
 32 | import AppContext from "@/appContext";
 33 | import type { PineconeRecord } from "@pinecone-database/pinecone";
 34 | import React, { ChangeEvent, FormEvent, useContext, useRef } from "react";
 35 | import ChatInput from "./ChatInput";
 36 | import ChatWrapper, { ChatInterface } from "./ChatWrapper";
 37 | 
 38 | // Defining the properties for the Chat component
 39 | interface ChatProps {
 40 |   setContext: (data: { context: PineconeRecord[] }[]) => void;
 41 |   context: { context: PineconeRecord[] }[] | null;
 42 | }
 43 | 
 44 | // The Chat component
 45 | const Chat: React.FC<ChatProps> = ({ setContext, context }) => {
 46 |   // Creating references for the chat components with and without context
 47 |   const chatWithContextRef = useRef<ChatInterface | null>(null);
 48 |   const chatWithoutContextRef = useRef<ChatInterface | null>(null);
 49 | 
 50 |   // Accessing the total number of records from the application context
 51 |   const { totalRecords } = useContext(AppContext);
 52 | 
 53 |   // State for the chat input
 54 |   const [input, setInput] = React.useState<string>("")
 55 | 
 56 |   // Function to handle message submission
 57 |   const onMessageSubmit = (e: FormEvent<HTMLFormElement>) => {
 58 |     // Clear the input
 59 |     setInput("")
 60 |     // Submit the message to both chat components
 61 |     chatWithContextRef.current?.handleMessageSubmit(e)
 62 |     chatWithoutContextRef.current?.handleMessageSubmit(e)
 63 |   }
 64 | 
 65 |   // Function to handle input change
 66 |   const onInputChange = (event: ChangeEvent<HTMLInputElement>) => {
 67 |     // Update the input state
 68 |     setInput(event.target.value)
 69 |     // Update the input in both chat components
 70 |     chatWithContextRef.current?.handleInputUpdated(event)
 71 |     chatWithoutContextRef.current?.handleInputUpdated(event)
 72 |   }
 73 | 
 74 |   // Rendering the Chat component
 75 |   return (
 76 |     // The chat interface is divided into two sections, one for the chat with context and one without context
 77 |     <div id="chat" className="flex flex-col w-full h-full">
 78 |       <div className="flex flex-grow">
 79 |         <div className="w-1/2">
 80 |           <ChatWrapper ref={chatWithoutContextRef} withContext={true} setContext={setContext} context={context} />
 81 |         </div>
 82 |         <div className="w-1/2">
 83 |           <ChatWrapper ref={chatWithContextRef} withContext={false} setContext={setContext} />
 84 |         </div>
 85 |       </div>
 86 |       // The chat input is rendered at the bottom of the chat interface
 87 |       <div className="w-full">
 88 |         <ChatInput input={input} handleInputChange={onInputChange} handleMessageSubmit={onMessageSubmit} showIndexMessage={totalRecords === 0} />
 89 |       </div>
 90 |     </div>
 91 |   );
 92 | };
 93 | 
 94 | // Exporting the Chat component
 95 | export default Chat;
 96 | ```
 97 | 
 98 | ### ChatWrapper Component
 99 | 
100 | 
101 | The Chat component is responsible for handling the chatbot's input and message submission. It uses the useChat hook from the ai package to manage the chatbot's state.
102 | The component is divided into two parts: the Messages component that displays the chat messages, and a form for submitting new messages. The component also generates a unique ID for each message.
103 | 
104 | ```tsx
105 | 
106 | import type { PineconeRecord } from "@pinecone-database/pinecone";
107 | import { useChat } from "ai/react";
108 | import React, { ChangeEvent, FormEvent, Ref, forwardRef, useEffect, useImperativeHandle, useRef } from "react";
109 | import { v4 as uuidv4 } from 'uuid';
110 | import Messages from "./Messages";
111 | 
112 | export interface ChatInterface {
113 |     handleMessageSubmit: (e: FormEvent<HTMLFormElement>) => void;
114 |     handleInputUpdated: (event: ChangeEvent<HTMLInputElement>) => void;
115 |     ref: Ref<ChatInterface>;
116 |     withContext: boolean;
117 | }
118 | 
119 | interface ChatProps {
120 |     withContext: boolean;
121 |     setContext: (data: { context: PineconeRecord[] }[]) => void;
122 |     context?: { context: PineconeRecord[] }[] | null;
123 |     ref: Ref<ChatInterface>
124 | }
125 | 
126 | const Chat: React.FC<ChatProps> = forwardRef<ChatInterface, ChatProps>(({ withContext, setContext, context }, ref) => {
127 |     const { messages, handleInputChange, handleSubmit, isLoading, data } = useChat({
128 |         sendExtraMessageFields: true,
129 |         body: {
130 |             withContext,
131 |         },
132 |     });
133 | 
134 |     useEffect(() => {
135 |         if (data) {
136 |             setContext(data as { context: PineconeRecord[] }[]) // Logs the additional data
137 |         }
138 |     }, [data, setContext]);
139 | 
140 |     const chatRef = useRef<ChatInterface>(null);
141 | 
142 |     useImperativeHandle(ref, () => ({
143 |         handleMessageSubmit: (event: FormEvent<HTMLFormElement>) => {
144 |             const id = uuidv4(); // Generate a unique ID
145 |             handleSubmit(event, {
146 |                 data: {
147 |                     messageId: id, // Include the ID in the message object
148 |                     
149 |                 },
150 |             })
151 |         },
152 |         handleInputUpdated: (event: ChangeEvent<HTMLInputElement>) => {
153 |             handleInputChange(event);
154 |         },
155 |     }));
156 | 
157 |     return (
158 |         <div className="flex flex-col h-full">
159 |             <Messages messages={messages} withContext={withContext} context={context} />
160 |             <form onSubmit={(e) => chatRef.current?.handleMessageSubmit(e)} className="...">
161 |                 <input
162 |                     type="text"
163 |                     className="..."
164 |                     onChange={(e) => chatRef.current?.handleInputUpdated(e)}
165 |                 />
166 |                 <button type="submit" className="...">Send</button>
167 |             </form>
168 |         </div>
169 |     );
170 | });
171 | 
172 | Chat.displayName = 'Chat';
173 | 
174 | export default Chat;
175 | ```            
176 | 
177 | ### ChatInput Component
178 | 
179 | The ChatInput component is responsible for rendering the chat input field and the send button. It uses the `handleInputChange` and `handleMessageSubmit` functions from the Chat component to handle input changes and message submission.
180 | 
181 | ```tsx
182 | import React, { ChangeEvent, FormEvent } from "react";
183 | 
184 | interface ChatInputProps {
185 |     input: string;
186 |     handleInputChange: (e: ChangeEvent<HTMLInputElement>) => void;
187 |     handleMessageSubmit: (e: FormEvent<HTMLFormElement>) => void;
188 |     showIndexMessage: boolean;
189 | }
190 | 
191 | const ChatInput: React.FC<ChatInputProps> = ({ input, handleInputChange, handleMessageSubmit, showIndexMessage }) => {
192 |     return (
193 |         <form onSubmit={handleMessageSubmit} className="...">
194 |         <input
195 |                 type="text"
196 |                 className="..."
197 |                 value={input}
198 |                 onChange={handleInputChange}
199 |             />
200 |             <button type="submit" className="...">Send</button>
201 |             {showIndexMessage && (
202 |                 <div className="...">
203 |                     <span className="...">Press ⮐ to send</span>
204 |                 </div>
205 |             )}
206 |         </form>
207 |     );
208 | };
209 | 
210 | export default ChatInput;
211 | ```
212 | 
213 | 
214 | 
215 | ## Step 3. Adding Context
216 | 
217 | As we dive into building our chatbot, it's important to understand the role of context. Adding context to our chatbot's responses is key for creating a more natural, conversational user experience. Without context, a chatbot's responses can feel disjointed or irrelevant. By understanding the context of a user's query, our chatbot will be able to provide more accurate, relevant, and engaging responses. Now, let's begin building with this goal in mind.
218 | 
219 | First, we'll first focus on seeding the knowledge base. We'll create a crawler and a seed script, and set up a crawl endpoint. This will allow us to gather and organize the information our chatbot will use to provide contextually relevant responses.
220 | 
221 | After we've populated our knowledge base, we'll retrieve matches from our embeddings. This will enable our chatbot to find relevant information based on user queries.
222 | 
223 | Next, we'll wrap our logic into the getContext function and update our chatbot's prompt. This will streamline our code and improve the user experience by ensuring the chatbot's prompts are relevant and engaging.
224 | 
225 | Finally, we'll add a context panel and an associated context endpoint. These will provide a user interface for the chatbot and a way for it to retrieve the necessary context for each user query.
226 | 
227 | This step is all about feeding our chatbot the information it needs and setting up the necessary infrastructure for it to retrieve and use that information effectively. Let's get started.
228 | 
229 | ## Seeding the Knowledge Base
230 | 
231 | Now we'll move on to seeding the knowledge base, the foundational data source that will inform our chatbot's responses. This step involves collecting and organizing the information our chatbot needs to operate effectively. In this guide, we're going to use data retrieved from various websites which we'll later on be able to ask questions about. To do this, we'll create a crawler that will scrape the data from the websites, embed it, and store it in Pinecone.
232 | 
233 | ### Create the crawler
234 | 
235 | For the sake of brevity, you'll be able to find the full code for the crawler here. Here are the pertinent parts:
236 | 
237 | ```ts
238 | class Crawler {
239 |   private seen = new Set<string>();
240 |   private pages: Page[] = [];
241 |   private queue: { url: string; depth: number }[] = [];
242 | 
243 |   constructor(private maxDepth = 2, private maxPages = 1) {}
244 | 
245 |   async crawl(startUrl: string): Promise<Page[]> {
246 |     // Add the start URL to the queue
247 |     this.addToQueue(startUrl);
248 | 
249 |     // While there are URLs in the queue and we haven't reached the maximum number of pages...
250 |     while (this.shouldContinueCrawling()) {
251 |       // Dequeue the next URL and depth
252 |       const { url, depth } = this.queue.shift()!;
253 | 
254 |       // If the depth is too great or we've already seen this URL, skip it
255 |       if (this.isTooDeep(depth) || this.isAlreadySeen(url)) continue;
256 | 
257 |       // Add the URL to the set of seen URLs
258 |       this.seen.add(url);
259 | 
260 |       // Fetch the page HTML
261 |       const html = await this.fetchPage(url);
262 | 
263 |       // Parse the HTML and add the page to the list of crawled pages
264 |       this.pages.push({ url, content: this.parseHtml(html) });
265 | 
266 |       // Extract new URLs from the page HTML and add them to the queue
267 |       this.addNewUrlsToQueue(this.extractUrls(html, url), depth);
268 |     }
269 | 
270 |     // Return the list of crawled pages
271 |     return this.pages;
272 |   }
273 | 
274 |   // ... Some private methods removed for brevity
275 | 
276 |   private async fetchPage(url: string): Promise<string> {
277 |     try {
278 |       const response = await fetch(url);
279 |       return await response.text();
280 |     } catch (error) {
281 |       console.error(`Failed to fetch ${url}: ${error}`);
282 |       return "";
283 |     }
284 |   }
285 | 
286 |   private parseHtml(html: string): string {
287 |     const $ = cheerio.load(html);
288 |     $("a").removeAttr("href");
289 |     return NodeHtmlMarkdown.translate($.html());
290 |   }
291 | 
292 |   private extractUrls(html: string, baseUrl: string): string[] {
293 |     const $ = cheerio.load(html);
294 |     const relativeUrls = $("a")
295 |       .map((_, link) => $(link).attr("href"))
296 |       .get() as string[];
297 |     return relativeUrls.map(
298 |       (relativeUrl) => new URL(relativeUrl, baseUrl).href
299 |     );
300 |   }
301 | }
302 | ```
303 | 
304 | The `Crawler` class is a web crawler that visits URLs, starting from a given point, and collects information from them. It operates within a certain depth and a maximum number of pages as defined in the constructor. The crawl method is the core function that starts the crawling process.
305 | 
306 | The helper methods fetchPage, parseHtml, and extractUrls respectively handle fetching the HTML content of a page, parsing the HTML to extract text, and extracting all URLs from a page to be queued for the next crawl. The class also maintains a record of visited URLs to avoid duplication.
307 | 
308 | ### Create the `seed` function
309 | 
310 | To tie things together, we'll create a seed function that will use the crawler to seed the knowledge base. In this portion of the code, we'll initialize the crawl and fetch a given URL, then split it's content into chunks, and finally embed and index the chunks in Pinecone.
311 | 
312 | ```ts
313 | async function seed(url: string, limit: number, indexName: string, options: SeedOptions) {
314 |   try {
315 |     // Initialize the Pinecone client
316 |     const pinecone = new Pinecone();
317 | 
318 |     // Destructure the options object
319 |     const { splittingMethod, chunkSize, chunkOverlap } = options;
320 | 
321 |     // Create a new Crawler with depth 1 and maximum pages as limit
322 |     const crawler = new Crawler(1, limit || 100);
323 | 
324 |     // Crawl the given URL and get the pages
325 |     const pages = await crawler.crawl(url) as Page[];
326 | 
327 |     // Choose the appropriate document splitter based on the splitting method
328 |     const splitter: DocumentSplitter = splittingMethod === 'recursive' ?
329 |       new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap }) : new MarkdownTextSplitter({});
330 | 
331 |     // Prepare documents by splitting the pages
332 |     const documents = await Promise.all(pages.map(page => prepareDocument(page, splitter)));
333 | 
334 |     // Create Pinecone index if it does not exist
335 |     const indexList = await pinecone.listIndexes();
336 |     const indexExists = indexList.some(index => index.name === indexName)
337 |     if (!indexExists) {
338 |       await pinecone.createIndex({
339 |         name: indexName,
340 |         dimension: 1536,
341 |         waitUntilReady: true,
342 |       });
343 |     }
344 | 
345 |     const index = pinecone.Index(indexName)
346 | 
347 |     // Get the vector embeddings for the documents
348 |     const vectors = await Promise.all(documents.flat().map(embedDocument));
349 | 
350 |     // Upsert vectors into the Pinecone index
351 |     await chunkedUpsert(index!, vectors, '', 10);
352 | 
353 |     // Return the first document
354 |     return documents[0];
355 |   } catch (error) {
356 |     console.error("Error seeding:", error);
357 |     throw error;
358 |   }
359 | }
360 | ```
361 | 
362 | To chunk the content we'll use one of the following methods:
363 | 
364 | 1. `RecursiveCharacterTextSplitter` - This splitter splits the text into chunks of a given size, and then recursively splits the chunks into smaller chunks until the chunk size is reached. This method is useful for long documents.
365 | 2. `MarkdownTextSplitter` - This splitter splits the text into chunks based on Markdown headers. This method is useful for documents that are already structured using Markdown. The benefit of this method is that it will split the document into chunks based on the headers, which will be useful for our chatbot to understand the structure of the document. We can assume that each unit of text under a header is an internally coherent unit of information, and when the user asks a question, the retrieved context will be internally coherent as well.
366 | 
367 | ### Add the `crawl` endpoint`
368 | 
369 | The endpoint for the `crawl` endpoint is pretty straightforward. It simply calls the `seed` function and returns the result.
370 | 
371 | ```ts
372 | import seed from "./seed";
373 | import { NextResponse } from "next/server";
374 | 
375 | export const runtime = "edge";
376 | 
377 | export async function POST(req: Request) {
378 |   const { url, options } = await req.json();
379 |   try {
380 |     const documents = await seed(url, 1, process.env.PINECONE_INDEX!, options);
381 |     return NextResponse.json({ success: true, documents });
382 |   } catch (error) {
383 |     return NextResponse.json({ success: false, error: "Failed crawling" });
384 |   }
385 | }
386 | ```
387 | 
388 | Now our backend is able to crawl a given URL, embed the content and index the embeddings in Pinecone. The endpoint will return all the segments in the retrieved webpage we crawl, so we'll be able to display them. Next, we'll write a set of functions that will build the context out of these embeddings.
389 | 
390 | ### Get matches from embeddings
391 | 
392 | To retrieve the most relevant documents from the index, we'll use the `query` function in the Pinecone SDK. This function takes a vector and returns the most similar vectors from the index. We'll use this function to retrieve the most relevant documents from the index, given some embeddings.
393 | 
394 | ```ts
395 | const getMatchesFromEmbeddings = async (embeddings: number[], topK: number, namespace: string): Promise<ScoredPineconeRecord<Metadata>[]> => {
396 |   // Obtain a client for Pinecone
397 |   const pinecone = new Pinecone();
398 | 
399 |   const indexName: string = process.env.PINECONE_INDEX || '';
400 |   if (indexName === '') {
401 |     throw new Error('PINECONE_INDEX environment variable not set')
402 |   }
403 | 
404 |   // Retrieve the list of indexes to check if expected index exists
405 |   const indexes = await pinecone.listIndexes()
406 |   if (indexes.filter(i => i.name === indexName).length !== 1) {
407 |     throw new Error(`Index ${indexName} does not exist`)
408 |   }
409 | 
410 |   // Get the Pinecone index
411 |   const index = pinecone!.Index<Metadata>(indexName);
412 | 
413 |   // Get the namespace
414 |   const pineconeNamespace = index.namespace(namespace ?? '')
415 | 
416 |   try {
417 |     // Query the index with the defined request
418 |     const queryResult = await pineconeNamespace.query({
419 |       vector: embeddings,
420 |       topK,
421 |       includeMetadata: true,
422 |     })
423 |     return queryResult.matches || []
424 |   } catch (e) {
425 |     // Log the error and throw it
426 |     console.log("Error querying embeddings: ", e)
427 |     throw new Error(`Error querying embeddings: ${e}`)
428 |   }
429 | }
430 | ```
431 | 
432 | The function takes in embeddings, a topK parameter, and a namespace, and returns the topK matches from the Pinecone index. It first gets a Pinecone client, checks if the desired index exists in the list of indexes, and throws an error if not. Then it gets the specific Pinecone index. The function then queries the Pinecone index with the defined request and returns the matches.
433 | 
434 | ### Wrap things up in `getContext`
435 | 
436 | We'll wrap things together in the `getContext` function. This function will take in a `message` and return the context - either in string form, or as a set of `ScoredVector`.
437 | 
438 | ```ts
439 | export const getContext = async (
440 |   message: string,
441 |   namespace: string,
442 |   maxTokens = 3000,
443 |   minScore = 0.7,
444 |   getOnlyText = true
445 | ): Promise<string | ScoredVector[]> => {
446 |   // Get the embeddings of the input message
447 |   const embedding = await getEmbeddings(message);
448 | 
449 |   // Retrieve the matches for the embeddings from the specified namespace
450 |   const matches = await getMatchesFromEmbeddings(embedding, 3, namespace);
451 | 
452 |   // Filter out the matches that have a score lower than the minimum score
453 |   const qualifyingDocs = matches.filter((m) => m.score && m.score > minScore);
454 | 
455 |   // If the `getOnlyText` flag is false, we'll return the matches
456 |   if (!getOnlyText) {
457 |     return qualifyingDocs;
458 |   }
459 | 
460 |   let docs = matches
461 |     ? qualifyingDocs.map((match) => (match.metadata as Metadata).chunk)
462 |     : [];
463 |   // Join all the chunks of text together, truncate to the maximum number of tokens, and return the result
464 |   return docs.join("\n").substring(0, maxTokens);
465 | };
466 | ```
467 | 
468 | Back in `chat/route.ts`, we'll add the call to `getContext`:
469 | 
470 | ```ts
471 | const { messages } = await req.json();
472 | 
473 | // Get the last message
474 | const lastMessage = messages[messages.length - 1];
475 | 
476 | // Get the context from the last message
477 | const context = await getContext(lastMessage.content, "");
478 | ```
479 | 
480 | ### Update the prompt
481 | 
482 | Finally, we'll update the prompt to include the context we retrieved from the `getContext` function.
483 | 
484 | ```ts
485 | const prompt = [
486 |   {
487 |     role: "system",
488 |     content: `AI assistant is a brand new, powerful, human-like artificial intelligence.
489 |   The traits of AI include expert knowledge, helpfulness, cleverness, and articulateness.
490 |   AI is a well-behaved and well-mannered individual.
491 |   AI is always friendly, kind, and inspiring, and he is eager to provide vivid and thoughtful responses to the user.
492 |   AI has the sum of all knowledge in their brain, and is able to accurately answer nearly any question about any topic in conversation.
493 |   AI assistant is a big fan of Pinecone and Vercel.
494 |   START CONTEXT BLOCK
495 |   ${context}
496 |   END OF CONTEXT BLOCK
497 |   AI assistant will take into account any CONTEXT BLOCK that is provided in a conversation.
498 |   If the context does not provide the answer to question, the AI assistant will say, "I'm sorry, but I don't know the answer to that question".
499 |   AI assistant will not apologize for previous responses, but instead will indicated new information was gained.
500 |   AI assistant will not invent anything that is not drawn directly from the context.
501 |   `,
502 |   },
503 | ];
504 | ```
505 | 
506 | In this prompt, we added a `START CONTEXT BLOCK` and `END OF CONTEXT BLOCK` to indicate where the context should be inserted. We also added a line to indicate that the AI assistant will take into account any context block that is provided in a conversation.
507 | 
508 | ### Attaching the context data to the messages
509 | 
510 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | In this example, we'll build a full-stack application that uses Retrieval Augmented Generation (RAG) powered by [Pinecone](https://pinecone.io) to deliver accurate and contextually relevant responses in a chatbot.
  2 | 
  3 | RAG is a powerful tool that combines the benefits of retrieval-based models and generative models. Unlike traditional chatbots that can struggle with maintaining up-to-date information or accessing domain-specific knowledge, a RAG-based chatbot uses a knowledge base created from crawled URLs to provide contextually relevant responses.
  4 | 
  5 | Incorporating Vercel's AI SDK into our application will allow us easily set up the chatbot workflow and utilize streaming more efficiently, particularly in edge environments, enhancing the responsiveness and performance of our chatbot.
  6 | 
  7 | By the end of this tutorial, you'll have a context-aware chatbot that provides accurate responses without hallucination, ensuring a more effective and engaging user experience. Let's get started on building this powerful tool ([Full code listing](https://github.com/pinecone-io/pinecone-vercel-example/blob/main/package.json)).
  8 | 
  9 | ## Step 1: Setting Up Your Next.js Application
 10 | 
 11 | Next.js is a powerful JavaScript framework that enables us to build server-side rendered and static web applications using React. It's a great choice for our project due to its ease of setup, excellent performance, and built-in features such as routing and API routes.
 12 | 
 13 | To create a new Next.js app, run the following command:
 14 | 
 15 | ### npx
 16 | 
 17 | ```bash
 18 | npx create-next-app chatbot
 19 | ```
 20 | 
 21 | Next, we'll add the `ai` package:
 22 | 
 23 | ```bash
 24 | npm install ai
 25 | ```
 26 | 
 27 | This command is used to install the `ai` package which is necessary for the chatbot functionality.
 28 | 
 29 | 
 30 | You can use the [full list](https://github.com/pinecone-io/pinecone-vercel-example/blob/main/package.json) of dependencies if you'd like to build along with the tutorial.
 31 | 
 32 | ## Step 2: Create the Chatbot
 33 | 
 34 | In this step, we're going to use the Vercel SDK to establish the backend and frontend of our chatbot within the Next.js application. By the end of this step, our basic chatbot will be up and running, ready for us to add context-aware capabilities in the following stages. Let's get started.
 35 | 
 36 | ### Chatbot frontend component
 37 | 
 38 | Now, let's focus on the frontend component of our chatbot. We're going to build the user-facing elements of our bot, creating the interface through which users will interact with our application. This will involve crafting the design and functionality of the chat interface within our Next.js application.
 39 | 
 40 | First, we'll create the `Chat` component, that will render the chat interface.
 41 | 
 42 | ```tsx
 43 | import React, { FormEvent, ChangeEvent } from "react";
 44 | import Messages from "./Messages";
 45 | import { Message } from "ai/react";
 46 | 
 47 | interface Chat {
 48 |   input: string;
 49 |   handleInputChange: (e: ChangeEvent<HTMLInputElement>) => void;
 50 |   handleMessageSubmit: (e: FormEvent<HTMLFormElement>) => Promise<void>;
 51 |   messages: Message[];
 52 | }
 53 | 
 54 | const Chat: React.FC<Chat> = ({
 55 |   input,
 56 |   handleInputChange,
 57 |   handleMessageSubmit,
 58 |   messages,
 59 | }) => {
 60 |   return (
 61 |     <div id="chat" className="...">
 62 |       <Messages messages={messages} />
 63 |       <>
 64 |         <form onSubmit={handleMessageSubmit} className="...">
 65 |           <input
 66 |             type="text"
 67 |             className="..."
 68 |             value={input}
 69 |             onChange={handleInputChange}
 70 |           />
 71 | 
 72 |           <span className="...">Press ⮐ to send</span>
 73 |         </form>
 74 |       </>
 75 |     </div>
 76 |   );
 77 | };
 78 | 
 79 | export default Chat;
 80 | ```
 81 | 
 82 | This component will display the list of messages and the input form for the user to send messages. The `Messages` component to render the chat messages:
 83 | 
 84 | ```tsx
 85 | import { Message } from "ai";
 86 | import { useRef } from "react";
 87 | 
 88 | export default function Messages({ messages }: { messages: Message[] }) {
 89 |   const messagesEndRef = useRef<HTMLDivElement | null>(null);
 90 |   return (
 91 |     <div className="...">
 92 |       {messages.map((msg, index) => (
 93 |         <div
 94 |           key={index}
 95 |           className={`${
 96 |             msg.role === "assistant" ? "text-green-300" : "text-blue-300"
 97 |           } ... `}
 98 |         >
 99 |           <div className="...">{msg.role === "assistant" ? "🤖" : "🧑‍💻"}</div>
100 |           <div className="...">{msg.content}</div>
101 |         </div>
102 |       ))}
103 |       <div ref={messagesEndRef} />
104 |     </div>
105 |   );
106 | }
107 | ```
108 | 
109 | Our main `Page` component will manage the state for the messages displayed in the `Chat` component:
110 | 
111 | ```tsx
112 | "use client";
113 | import Header from "@/components/Header";
114 | import Chat from "@/components/Chat";
115 | import { useChat } from "ai/react";
116 | 
117 | const Page: React.FC = () => {
118 |   const [context, setContext] = useState<string[] | null>(null);
119 |   const { messages, input, handleInputChange, handleSubmit } = useChat();
120 | 
121 |   return (
122 |     <div className="...">
123 |       <Header className="..." />
124 |       <div className="...">
125 |         <Chat
126 |           input={input}
127 |           handleInputChange={handleInputChange}
128 |           handleMessageSubmit={handleSubmit}
129 |           messages={messages}
130 |         />
131 |       </div>
132 |     </div>
133 |   );
134 | };
135 | 
136 | export default Page;
137 | ```
138 | 
139 | The useful `useChat` hook will manage the state for the messages displayed in the `Chat` component. It will:
140 | 
141 | 1. Send the user's message to the backend
142 | 2. Update the state with the response from the backend
143 | 3. Handle any internal state changes (e.g. when the user types a message)
144 | 
145 | ### Chatbot API endpoint
146 | 
147 | Next, we'll set up the Chatbot API endpoint. This is the server-side component that will handle requests and responses for our chatbot. We'll create a new file called `api/chat/route.ts` and add the following dependencies:
148 | 
149 | ```ts
150 | import { Configuration, OpenAIApi } from "openai-edge";
151 | import { Message, OpenAIStream, StreamingTextResponse } from "ai";
152 | ```
153 | 
154 | The first dependency is the `openai-edge` package which makes it easier to interact with OpenAI's APIs in an edge environment. The second dependency is the `ai` package which we'll use to define the `Message` and `OpenAIStream` types, which we'll use to stream back the response from OpenAI back to the client.
155 | 
156 | Next initialize the OpenAI client:
157 | 
158 | ```ts
159 | // Create an OpenAI API client (that's edge friendly!)
160 | const config = new Configuration({
161 |   apiKey: process.env.OPENAI_API_KEY,
162 | });
163 | const openai = new OpenAIApi(config);
164 | ```
165 | 
166 | To define this endpoint as an edge function, we'll define and export the `runtime` variable
167 | 
168 | ```ts
169 | export const runtime = "edge";
170 | ```
171 | 
172 | Next, we'll define the endpoint handler:
173 | 
174 | ```ts
175 | export async function POST(req: Request) {
176 |   try {
177 |     const { messages } = await req.json();
178 | 
179 |     const prompt = [
180 |       {
181 |         role: "system",
182 |         content: `AI assistant is a brand new, powerful, human-like artificial intelligence.
183 |       The traits of AI include expert knowledge, helpfulness, cleverness, and articulateness.
184 |       AI is a well-behaved and well-mannered individual.
185 |       AI is always friendly, kind, and inspiring, and he is eager to provide vivid and thoughtful responses to the user.
186 |       AI has the sum of all knowledge in their brain, and is able to accurately answer nearly any question about any topic in conversation.
187 |       AI assistant is a big fan of Pinecone and Vercel.
188 |       `,
189 |       },
190 |     ];
191 | 
192 |     // Ask OpenAI for a streaming chat completion given the prompt
193 |     const response = await openai.createChatCompletion({
194 |       model: "gpt-3.5-turbo",
195 |       stream: true,
196 |       messages: [
197 |         ...prompt,
198 |         ...messages.filter((message: Message) => message.role === "user"),
199 |       ],
200 |     });
201 |     // Convert the response into a friendly text-stream
202 |     const stream = OpenAIStream(response);
203 |     // Respond with the stream
204 |     return new StreamingTextResponse(stream);
205 |   } catch (e) {
206 |     throw e;
207 |   }
208 | }
209 | ```
210 | 
211 | Here we deconstruct the messages from the post, and create our initial prompt. We use the prompt and the messages as the input to the `createChatCompletion` method. We then convert the response into a stream and return it to the client. Note that in this example, we only send the user's messages to OpenAI (as opposed to including the bot's messages as well).
212 | 
213 | <!-- Add snapshot of simple chat -->
214 | 
215 | ## Step 3. Adding Context
216 | 
217 | As we dive into building our chatbot, it's important to understand the role of context. Adding context to our chatbot's responses is key for creating a more natural, conversational user experience. Without context, a chatbot's responses can feel disjointed or irrelevant. By understanding the context of a user's query, our chatbot will be able to provide more accurate, relevant, and engaging responses. Now, let's begin building with this goal in mind.
218 | 
219 | First, we'll first focus on seeding the knowledge base. We'll create a crawler and a seed script, and set up a crawl endpoint. This will allow us to gather and organize the information our chatbot will use to provide contextually relevant responses.
220 | 
221 | After we've populated our knowledge base, we'll retrieve matches from our embeddings. This will enable our chatbot to find relevant information based on user queries.
222 | 
223 | Next, we'll wrap our logic into the getContext function and update our chatbot's prompt. This will streamline our code and improve the user experience by ensuring the chatbot's prompts are relevant and engaging.
224 | 
225 | Finally, we'll add a context panel and an associated context endpoint. These will provide a user interface for the chatbot and a way for it to retrieve the necessary context for each user query.
226 | 
227 | This step is all about feeding our chatbot the information it needs and setting up the necessary infrastructure for it to retrieve and use that information effectively. Let's get started.
228 | 
229 | ## Seeding the Knowledge Base
230 | 
231 | Now we'll move on to seeding the knowledge base, the foundational data source that will inform our chatbot's responses. This step involves collecting and organizing the information our chatbot needs to operate effectively. In this guide, we're going to use data retrieved from various websites which we'll later on be able to ask questions about. To do this, we'll create a crawler that will scrape the data from the websites, embed it, and store it in Pinecone.
232 | 
233 | ### Create the crawler
234 | 
235 | For the sake of brevity, you'll be able to find the full code for the crawler here. Here are the pertinent parts:
236 | 
237 | ```ts
238 | class Crawler {
239 |   private seen = new Set<string>();
240 |   private pages: Page[] = [];
241 |   private queue: { url: string; depth: number }[] = [];
242 | 
243 |   constructor(private maxDepth = 2, private maxPages = 1) {}
244 | 
245 |   async crawl(startUrl: string): Promise<Page[]> {
246 |     // Add the start URL to the queue
247 |     this.addToQueue(startUrl);
248 | 
249 |     // While there are URLs in the queue and we haven't reached the maximum number of pages...
250 |     while (this.shouldContinueCrawling()) {
251 |       // Dequeue the next URL and depth
252 |       const { url, depth } = this.queue.shift()!;
253 | 
254 |       // If the depth is too great or we've already seen this URL, skip it
255 |       if (this.isTooDeep(depth) || this.isAlreadySeen(url)) continue;
256 | 
257 |       // Add the URL to the set of seen URLs
258 |       this.seen.add(url);
259 | 
260 |       // Fetch the page HTML
261 |       const html = await this.fetchPage(url);
262 | 
263 |       // Parse the HTML and add the page to the list of crawled pages
264 |       this.pages.push({ url, content: this.parseHtml(html) });
265 | 
266 |       // Extract new URLs from the page HTML and add them to the queue
267 |       this.addNewUrlsToQueue(this.extractUrls(html, url), depth);
268 |     }
269 | 
270 |     // Return the list of crawled pages
271 |     return this.pages;
272 |   }
273 | 
274 |   // ... Some private methods removed for brevity
275 | 
276 |   private async fetchPage(url: string): Promise<string> {
277 |     try {
278 |       const response = await fetch(url);
279 |       return await response.text();
280 |     } catch (error) {
281 |       console.error(`Failed to fetch ${url}: ${error}`);
282 |       return "";
283 |     }
284 |   }
285 | 
286 |   private parseHtml(html: string): string {
287 |     const $ = cheerio.load(html);
288 |     $("a").removeAttr("href");
289 |     return NodeHtmlMarkdown.translate($.html());
290 |   }
291 | 
292 |   private extractUrls(html: string, baseUrl: string): string[] {
293 |     const $ = cheerio.load(html);
294 |     const relativeUrls = $("a")
295 |       .map((_, link) => $(link).attr("href"))
296 |       .get() as string[];
297 |     return relativeUrls.map(
298 |       (relativeUrl) => new URL(relativeUrl, baseUrl).href
299 |     );
300 |   }
301 | }
302 | ```
303 | 
304 | The `Crawler` class is a web crawler that visits URLs, starting from a given point, and collects information from them. It operates within a certain depth and a maximum number of pages as defined in the constructor. The crawl method is the core function that starts the crawling process.
305 | 
306 | The helper methods fetchPage, parseHtml, and extractUrls respectively handle fetching the HTML content of a page, parsing the HTML to extract text, and extracting all URLs from a page to be queued for the next crawl. The class also maintains a record of visited URLs to avoid duplication.
307 | 
308 | ### Create the `seed` function
309 | 
310 | To tie things together, we'll create a seed function that will use the crawler to seed the knowledge base. In this portion of the code, we'll initialize the crawl and fetch a given URL, then split it's content into chunks, and finally embed and index the chunks in Pinecone.
311 | 
312 | ```ts
313 | async function seed(url: string, limit: number, indexName: string, options: SeedOptions) {
314 |   try {
315 |     // Initialize the Pinecone client
316 |     const pinecone = new Pinecone();
317 | 
318 |     // Destructure the options object
319 |     const { splittingMethod, chunkSize, chunkOverlap } = options;
320 | 
321 |     // Create a new Crawler with depth 1 and maximum pages as limit
322 |     const crawler = new Crawler(1, limit || 100);
323 | 
324 |     // Crawl the given URL and get the pages
325 |     const pages = await crawler.crawl(url) as Page[];
326 | 
327 |     // Choose the appropriate document splitter based on the splitting method
328 |     const splitter: DocumentSplitter = splittingMethod === 'recursive' ?
329 |       new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap }) : new MarkdownTextSplitter({});
330 | 
331 |     // Prepare documents by splitting the pages
332 |     const documents = await Promise.all(pages.map(page => prepareDocument(page, splitter)));
333 | 
334 |     // Create Pinecone index if it does not exist
335 |     const indexList = await pinecone.listIndexes();
336 |     const indexExists = indexList.some(index => index.name === indexName)
337 |     if (!indexExists) {
338 |       await pinecone.createIndex({
339 |         name: indexName,
340 |         dimension: 1536,
341 |         waitUntilReady: true,
342 |       });
343 |     }
344 | 
345 |     const index = pinecone.Index(indexName)
346 | 
347 |     // Get the vector embeddings for the documents
348 |     const vectors = await Promise.all(documents.flat().map(embedDocument));
349 | 
350 |     // Upsert vectors into the Pinecone index
351 |     await chunkedUpsert(index!, vectors, '', 10);
352 | 
353 |     // Return the first document
354 |     return documents[0];
355 |   } catch (error) {
356 |     console.error("Error seeding:", error);
357 |     throw error;
358 |   }
359 | }
360 | ```
361 | 
362 | To chunk the content we'll use one of the following methods:
363 | 
364 | 1. `RecursiveCharacterTextSplitter` - This splitter splits the text into chunks of a given size, and then recursively splits the chunks into smaller chunks until the chunk size is reached. This method is useful for long documents.
365 | 2. `MarkdownTextSplitter` - This splitter splits the text into chunks based on Markdown headers. This method is useful for documents that are already structured using Markdown. The benefit of this method is that it will split the document into chunks based on the headers, which will be useful for our chatbot to understand the structure of the document. We can assume that each unit of text under a header is an internally coherent unit of information, and when the user asks a question, the retrieved context will be internally coherent as well.
366 | 
367 | ### Add the `crawl` endpoint`
368 | 
369 | The endpoint for the `crawl` endpoint is pretty straightforward. It simply calls the `seed` function and returns the result.
370 | 
371 | ```ts
372 | import seed from "./seed";
373 | import { NextResponse } from "next/server";
374 | 
375 | export const runtime = "edge";
376 | 
377 | export async function POST(req: Request) {
378 |   const { url, options } = await req.json();
379 |   try {
380 |     const documents = await seed(url, 1, process.env.PINECONE_INDEX!, options);
381 |     return NextResponse.json({ success: true, documents });
382 |   } catch (error) {
383 |     return NextResponse.json({ success: false, error: "Failed crawling" });
384 |   }
385 | }
386 | ```
387 | 
388 | Now our backend is able to crawl a given URL, embed the content and index the embeddings in Pinecone. The endpoint will return all the segments in the retrieved webpage we crawl, so we'll be able to display them. Next, we'll write a set of functions that will build the context out of these embeddings.
389 | 
390 | ### Get matches from embeddings
391 | 
392 | To retrieve the most relevant documents from the index, we'll use the `query` function in the Pinecone SDK. This function takes a vector and returns the most similar vectors from the index. We'll use this function to retrieve the most relevant documents from the index, given some embeddings.
393 | 
394 | ```ts
395 | const getMatchesFromEmbeddings = async (embeddings: number[], topK: number, namespace: string): Promise<ScoredPineconeRecord<Metadata>[]> => {
396 |   // Obtain a client for Pinecone
397 |   const pinecone = new Pinecone();
398 | 
399 |   const indexName: string = process.env.PINECONE_INDEX || '';
400 |   if (indexName === '') {
401 |     throw new Error('PINECONE_INDEX environment variable not set')
402 |   }
403 | 
404 |   // Retrieve the list of indexes to check if expected index exists
405 |   const indexes = await pinecone.listIndexes()
406 |   if (indexes.filter(i => i.name === indexName).length !== 1) {
407 |     throw new Error(`Index ${indexName} does not exist`)
408 |   }
409 | 
410 |   // Get the Pinecone index
411 |   const index = pinecone!.Index<Metadata>(indexName);
412 | 
413 |   // Get the namespace
414 |   const pineconeNamespace = index.namespace(namespace ?? '')
415 | 
416 |   try {
417 |     // Query the index with the defined request
418 |     const queryResult = await pineconeNamespace.query({
419 |       vector: embeddings,
420 |       topK,
421 |       includeMetadata: true,
422 |     })
423 |     return queryResult.matches || []
424 |   } catch (e) {
425 |     // Log the error and throw it
426 |     console.log("Error querying embeddings: ", e)
427 |     throw new Error(`Error querying embeddings: ${e}`)
428 |   }
429 | }
430 | ```
431 | 
432 | The function takes in embeddings, a topK parameter, and a namespace, and returns the topK matches from the Pinecone index. It first gets a Pinecone client, checks if the desired index exists in the list of indexes, and throws an error if not. Then it gets the specific Pinecone index. The function then queries the Pinecone index with the defined request and returns the matches.
433 | 
434 | ### Wrap things up in `getContext`
435 | 
436 | We'll wrap things together in the `getContext` function. This function will take in a `message` and return the context - either in string form, or as a set of `ScoredVector`.
437 | 
438 | ```ts
439 | export const getContext = async (
440 |   message: string,
441 |   namespace: string,
442 |   maxTokens = 3000,
443 |   minScore = 0.7,
444 |   getOnlyText = true
445 | ): Promise<string | ScoredVector[]> => {
446 |   // Get the embeddings of the input message
447 |   const embedding = await getEmbeddings(message);
448 | 
449 |   // Retrieve the matches for the embeddings from the specified namespace
450 |   const matches = await getMatchesFromEmbeddings(embedding, 3, namespace);
451 | 
452 |   // Filter out the matches that have a score lower than the minimum score
453 |   const qualifyingDocs = matches.filter((m) => m.score && m.score > minScore);
454 | 
455 |   // If the `getOnlyText` flag is false, we'll return the matches
456 |   if (!getOnlyText) {
457 |     return qualifyingDocs;
458 |   }
459 | 
460 |   let docs = matches
461 |     ? qualifyingDocs.map((match) => (match.metadata as Metadata).chunk)
462 |     : [];
463 |   // Join all the chunks of text together, truncate to the maximum number of tokens, and return the result
464 |   return docs.join("\n").substring(0, maxTokens);
465 | };
466 | ```
467 | 
468 | Back in `chat/route.ts`, we'll add the call to `getContext`:
469 | 
470 | ```ts
471 | const { messages } = await req.json();
472 | 
473 | // Get the last message
474 | const lastMessage = messages[messages.length - 1];
475 | 
476 | // Get the context from the last message
477 | const context = await getContext(lastMessage.content, "");
478 | ```
479 | 
480 | ### Update the prompt
481 | 
482 | Finally, we'll update the prompt to include the context we retrieved from the `getContext` function.
483 | 
484 | ```ts
485 | const prompt = [
486 |   {
487 |     role: "system",
488 |     content: `AI assistant is a brand new, powerful, human-like artificial intelligence.
489 |   The traits of AI include expert knowledge, helpfulness, cleverness, and articulateness.
490 |   AI is a well-behaved and well-mannered individual.
491 |   AI is always friendly, kind, and inspiring, and he is eager to provide vivid and thoughtful responses to the user.
492 |   AI has the sum of all knowledge in their brain, and is able to accurately answer nearly any question about any topic in conversation.
493 |   AI assistant is a big fan of Pinecone and Vercel.
494 |   START CONTEXT BLOCK
495 |   ${context}
496 |   END OF CONTEXT BLOCK
497 |   AI assistant will take into account any CONTEXT BLOCK that is provided in a conversation.
498 |   If the context does not provide the answer to question, the AI assistant will say, "I'm sorry, but I don't know the answer to that question".
499 |   AI assistant will not apologize for previous responses, but instead will indicated new information was gained.
500 |   AI assistant will not invent anything that is not drawn directly from the context.
501 |   `,
502 |   },
503 | ];
504 | ```
505 | 
506 | In this prompt, we added a `START CONTEXT BLOCK` and `END OF CONTEXT BLOCK` to indicate where the context should be inserted. We also added a line to indicate that the AI assistant will take into account any context block that is provided in a conversation.
507 | 
508 | ### Add the context panel
509 | 
510 | Next, we need to add the context panel to the chat UI. We'll add a new component called `Context` ([full code](https://github.com/pinecone-io/pinecone-vercel-example/tree/main/src/app/components/Context)).
511 | 
512 | ### Add the context endpoint
513 | 
514 | We want to allow interface to indicate which portions of the retrieved content have been used to generate the response. To do this, we'll add a another endpoint that will call the same `getContext`.
515 | 
516 | ```ts
517 | export async function POST(req: Request) {
518 |   try {
519 |     const { messages } = await req.json();
520 |     const lastMessage =
521 |       messages.length > 1 ? messages[messages.length - 1] : messages[0];
522 |     const context = (await getContext(
523 |       lastMessage.content,
524 |       "",
525 |       10000,
526 |       0.7,
527 |       false
528 |     )) as ScoredPineconeRecord[];
529 |     return NextResponse.json({ context });
530 |   } catch (e) {
531 |     console.log(e);
532 |     return NextResponse.error();
533 |   }
534 | }
535 | ```
536 | 
537 | Whenever the user crawls a URL, the context panel will display all the segments of the retrieved webpage. Whenever the backend completes sending a message back, the front end will trigger an effect that will retrieve this context:
538 | 
539 | ```tsx
540 | useEffect(() => {
541 |   const getContext = async () => {
542 |     const response = await fetch("/api/context", {
543 |       method: "POST",
544 |       body: JSON.stringify({
545 |         messages,
546 |       }),
547 |     });
548 |     const { context } = await response.json();
549 |     setContext(context.map((c: any) => c.id));
550 |   };
551 |   if (gotMessages && messages.length >= prevMessagesLengthRef.current) {
552 |     getContext();
553 |   }
554 | 
555 |   prevMessagesLengthRef.current = messages.length;
556 | }, [messages, gotMessages]);
557 | ```
558 | 
559 | ## Running tests 
560 | 
561 | The pinecone-vercel-starter uses [Playwright](https://playwright.dev) for end to end testing. 
562 | 
563 | To run all the tests: 
564 | 
565 | ```
566 | npm run test:e2e
567 | ```
568 | 
569 | By default, when running locally, if errors are encountered, Playwright will open an HTML report showing which 
570 | tests failed and for which browser drivers.
571 | 
572 | ## Displaying test reports locally 
573 | 
574 | To display the latest test report locally, run: 
575 | ```
576 | npm run test:show
577 | ```
578 | 
579 | 


--------------------------------------------------------------------------------
/next-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="next" />
2 | /// <reference types="next/image-types/global" />
3 | 
4 | // NOTE: This file should not be edited
5 | // see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information.
6 | 


--------------------------------------------------------------------------------
/next.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {};
3 | 
4 | module.exports = nextConfig;
5 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "vercel-pinecone-template",
 3 |   "version": "0.2.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev",
 7 |     "build": "next build",
 8 |     "start": "next start",
 9 |     "lint": "next lint",
10 |     "test:e2e": "playwright test",
11 |     "test:show": "playwright show-report"
12 |   },
13 |   "dependencies": {
14 |     "@ai-sdk/openai": "^0.0.60",
15 |     "@edge-runtime/jest-environment": "^3.0.3",
16 |     "@emotion/react": "^11.13.3",
17 |     "@emotion/styled": "^11.13.0",
18 |     "@material-tailwind/react": "^2.1.10",
19 |     "@mui/icons-material": "^6.1.0",
20 |     "@mui/material": "^6.1.0",
21 |     "@pinecone-database/doc-splitter": "^0.0.1",
22 |     "@pinecone-database/pinecone": "3.0.3",
23 |     "@types/uuid": "^10.0.0",
24 |     "ai": "^3.3.39",
25 |     "base64-arraybuffer": "^1.0.2",
26 |     "cheerio": "^1.0.0",
27 |     "edge-runtime": "^3.0.3",
28 |     "md5": "^2.3.0",
29 |     "next": "^14.2.11",
30 |     "node-html-markdown": "^1.3.0",
31 |     "openai": "^4.61.1",
32 |     "openai-edge": "^1.2.2",
33 |     "react": "18.3.1",
34 |     "react-dom": "18.3.1",
35 |     "react-icons": "^5.3.0",
36 |     "react-markdown": "^9.0.1",
37 |     "react-spinners": "^0.14.1",
38 |     "sswr": "^2.1.0",
39 |     "svelte": "^4.2.19",
40 |     "tailwindcss": "3.4.11",
41 |     "typescript": "5.6.2",
42 |     "unified": "^11.0.5",
43 |     "uuid": "^10.0.0",
44 |     "vue": "^3.5.6",
45 |     "zod": "^3.23.8"
46 |   },
47 |   "devDependencies": {
48 |     "@playwright/test": "^1.47.1",
49 |     "@types/md5": "^2.3.5",
50 |     "@types/node": "22.5.5",
51 |     "@types/react": "18.3.6",
52 |     "@types/react-dom": "18.3.0",
53 |     "eslint": "^8.0.0",
54 |     "eslint-config-next": "14.2.11"
55 |   }
56 | }


--------------------------------------------------------------------------------
/playwright.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig, devices } from '@playwright/test';
 2 | 
 3 | /**
 4 |  * Read environment variables from file.
 5 |  * https://github.com/motdotla/dotenv
 6 |  */
 7 | // require('dotenv').config();
 8 | 
 9 | /**
10 |  * See https://playwright.dev/docs/test-configuration.
11 |  */
12 | export default defineConfig({
13 |   testDir: './tests',
14 |   /* Run tests in files in parallel */
15 |   fullyParallel: true,
16 |   /* Fail the build on CI if you accidentally left test.only in the source code. */
17 |   forbidOnly: !!process.env.CI,
18 |   /* Retry on CI only */
19 |   retries: process.env.CI ? 2 : 0,
20 |   /* Opt out of parallel tests on CI. */
21 |   workers: process.env.CI ? 1 : undefined,
22 |   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
23 |   reporter: 'html',
24 |   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
25 |   use: {
26 |     /* Base URL to use in actions like `await page.goto('/')`. */
27 |     // baseURL: 'http://127.0.0.1:3000',
28 | 
29 |     /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */
30 |     trace: 'on-first-retry',
31 |   },
32 | 
33 |   /* Configure projects for major browsers */
34 |   projects: [
35 |     {
36 |       name: 'chromium',
37 |       use: { ...devices['Desktop Chrome'] },
38 |     },
39 | 
40 |     {
41 |       name: 'firefox',
42 |       use: { ...devices['Desktop Firefox'] },
43 |     },
44 | 
45 |     {
46 |       name: 'webkit',
47 |       use: { ...devices['Desktop Safari'] },
48 |     },
49 | 
50 |     /* Test against mobile viewports. */
51 |     // {
52 |     //   name: 'Mobile Chrome',
53 |     //   use: { ...devices['Pixel 5'] },
54 |     // },
55 |     // {
56 |     //   name: 'Mobile Safari',
57 |     //   use: { ...devices['iPhone 12'] },
58 |     // },
59 | 
60 |     /* Test against branded browsers. */
61 |     // {
62 |     //   name: 'Microsoft Edge',
63 |     //   use: { ...devices['Desktop Edge'], channel: 'msedge' },
64 |     // },
65 |     // {
66 |     //   name: 'Google Chrome',
67 |     //   use: { ...devices['Desktop Chrome'], channel: 'chrome' },
68 |     // },
69 |   ],
70 | 
71 |   /* Run your local dev server before starting the tests */
72 |   // webServer: {
73 |   //   command: 'npm run start',
74 |   //   url: 'http://127.0.0.1:3000',
75 |   //   reuseExistingServer: !process.env.CI,
76 |   // },
77 | });
78 | 


--------------------------------------------------------------------------------
/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |   plugins: {
3 |     tailwindcss: {},
4 |   },
5 | };
6 | 


--------------------------------------------------------------------------------
/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/public/pinecone.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" width="127" height="25" viewBox="0 0 127 25" fill="none">
 2 |   <path d="M28.6455 5.81689H35.1666C39.8857 5.81689 41.0811 8.59076 41.0811 10.8516C41.0811 13.1124 39.859 15.8863 35.1666 15.8863H31.1433V23.8166H28.6455V5.81689ZM31.1433 13.7472H34.4351C36.4155 13.7472 38.396 13.295 38.396 10.8516C38.396 8.40816 36.4155 7.95599 34.4351 7.95599H31.1433V13.7472Z" fill="black"/>
 3 |   <path d="M44.3466 5.73022C45.2892 5.73083 46.0561 6.47017 46.067 7.38892C46.078 8.30768 45.329 9.06424 44.3867 9.08626C43.4444 9.10827 42.659 8.38756 42.6249 7.46935C42.6104 7.01448 42.7851 6.57321 43.1093 6.24574C43.4335 5.91826 43.8797 5.73235 44.3466 5.73022ZM43.1958 11.765H45.5152V23.8171H43.1958V11.765Z" fill="black"/>
 4 |   <path d="M48.1992 11.7652H50.5454V13.626H50.5989C51.4169 12.2038 52.9971 11.3627 54.6668 11.4609C57.0665 11.4609 59.1272 12.8608 59.1272 16.0607V23.817H56.8078V16.7042C56.8078 14.4347 55.4786 13.5999 53.9888 13.5999C52.0352 13.5999 50.5454 14.8173 50.5454 17.6172V23.817H48.1992V11.7652Z" fill="black"/>
 5 |   <path d="M63.4278 18.6345C63.4278 20.739 65.435 22.113 67.6028 22.113C68.9626 22.0744 70.222 21.4057 70.9927 20.3129L72.7768 21.6347C71.4813 23.3005 69.4231 24.2321 67.2816 24.1218C63.3654 24.1218 60.9211 21.3825 60.9211 17.791C60.8519 16.115 61.4953 14.4854 62.6993 13.2871C63.9033 12.0887 65.5612 11.4277 67.2816 11.4603C71.635 11.4603 73.3032 14.7126 73.3032 17.8171V18.6345H63.4278ZM70.85 16.7997C70.7964 14.7909 69.6456 13.3212 67.2816 13.3212C65.2599 13.3078 63.5779 14.833 63.4457 16.7997H70.85Z" fill="black"/>
 6 |   <path d="M83.9555 14.9743C83.1918 14.0942 82.0602 13.5986 80.8779 13.6265C78.3265 13.6265 77.0151 15.6352 77.0151 17.896C76.9509 18.9654 77.3443 20.0129 78.1017 20.789C78.8591 21.5652 79.9127 22.0006 81.0117 21.9916C82.1732 22.0295 83.2848 21.5297 84.0091 20.6438L85.6773 22.2524C84.4634 23.5049 82.7508 24.1809 80.9849 24.1046C79.249 24.1897 77.5569 23.5566 76.3253 22.3611C75.0937 21.1656 74.4371 19.5188 74.5173 17.8264C74.4389 16.1275 75.0932 14.474 76.3218 13.2665C77.5505 12.059 79.2413 11.4076 80.9849 11.4701C82.7713 11.4258 84.4941 12.1171 85.7308 13.3744L83.9555 14.9743Z" fill="black"/>
 7 |   <path d="M92.617 11.4609C96.1893 11.5183 99.0431 14.3774 99.004 17.8595C98.9649 21.3416 96.0475 24.1391 92.4749 24.1204C88.9023 24.1014 86.0161 21.2734 86.0156 17.7911C86.0154 16.094 86.7143 14.4679 87.9555 13.2777C89.1967 12.0875 90.8762 11.433 92.617 11.4609ZM92.617 21.9909C95.1149 21.9909 96.685 20.2518 96.685 17.7911C96.685 15.3303 95.1149 13.6 92.617 13.6C90.1192 13.6 88.5402 15.339 88.5402 17.7911C88.5402 20.2431 90.1103 21.9909 92.617 21.9909Z" fill="black"/>
 8 |   <path d="M101.092 11.765H103.438V13.6259C104.259 12.2022 105.842 11.3612 107.515 11.4607C109.915 11.4607 111.975 12.8607 111.975 16.0606V23.8169H109.629V16.7041C109.629 14.4345 108.3 13.5998 106.819 13.5998C104.856 13.5998 103.376 14.8171 103.376 17.6171V23.8169H101.092V11.765Z" fill="black"/>
 9 |   <path d="M116.346 18.6344C116.346 20.7387 118.353 22.1126 120.52 22.1126C121.879 22.0699 123.137 21.4022 123.91 20.3126L125.695 21.6344C124.398 23.2863 122.348 24.2073 120.217 24.0952C116.31 24.0952 113.857 21.3561 113.857 17.7648C113.787 16.0842 114.434 14.4505 115.644 13.2514C116.854 12.0522 118.519 11.3945 120.244 11.4343C124.606 11.4343 126.274 14.6865 126.274 17.7909V18.6083L116.346 18.6344ZM123.75 16.7996C123.696 14.7909 122.554 13.3213 120.182 13.3213C118.158 13.3035 116.473 14.831 116.346 16.7996H123.75Z" fill="black"/>
10 |   <path fillRule="evenodd" clipRule="evenodd" d="M14.5397 0.492068C14.2945 0.200153 13.8757 0.125161 13.5443 0.313837L13.2344 0.49033L13.2303 0.489572L13.2297 0.493032L9.89647 2.39129L10.678 3.76361L12.8548 2.52392L12.3192 5.42555L13.8722 5.71221L14.4109 2.79391L16.0151 4.7034L17.2243 3.68754L14.7825 0.781036L14.7834 0.776238L14.7775 0.775164L14.5397 0.492068ZM9.84027 25C10.6433 25 11.2944 24.3654 11.2944 23.5826C11.2944 22.7998 10.6433 22.1652 9.84027 22.1652C9.03719 22.1652 8.38617 22.7998 8.38617 23.5826C8.38617 24.3654 9.03719 25 9.84027 25ZM11.7956 17.2218L11.2718 20.1425L9.71733 19.8637L10.2379 16.9611L8.06789 18.2071L7.28148 16.8376L10.602 14.9309L10.6027 14.9269L10.6075 14.9277L10.919 14.7488C11.2491 14.5593 11.6675 14.6322 11.914 14.9223L12.1544 15.2052L12.1572 15.2057L12.1568 15.208L14.6198 18.1065L13.4164 19.1291L11.7956 17.2218ZM13.0536 10.2489L12.5301 13.1679L10.9757 12.8892L11.494 9.9987L9.332 11.234L8.54856 9.86272L11.8574 7.97223L11.861 7.95207L11.8852 7.95639L12.187 7.78395C12.517 7.59538 12.9346 7.66873 13.1806 7.95848L13.4112 8.23005L13.4155 8.23083L13.4149 8.23437L15.8773 11.1345L14.6735 12.1566L13.0536 10.2489ZM2.86063 21.7738L2.85208 21.7796L2.84513 21.7694L2.52349 21.677C2.18272 21.5791 1.95714 21.2559 1.98284 20.9023L2.26911 16.9637L3.76413 17.0724L3.59001 19.4679L5.91857 17.8886L6.75995 19.1292L4.47608 20.6782L6.80662 21.3474L6.3929 22.7881L2.86063 21.7738ZM18.3083 23.4103L18.3084 23.4106L18.3081 23.4108L18.203 23.7567C18.1016 24.0901 17.7839 24.3096 17.4362 24.2866L17.1076 24.2647L17.0869 24.2794L17.075 24.2626L13.4144 24.0195L13.5137 22.5239L15.9534 22.6859L14.3626 20.4493L15.584 19.5805L17.2107 21.8673L17.9146 19.5518L19.3487 19.9878L18.3083 23.4103ZM22.5989 15.2818L22.6025 15.2824L22.6018 15.2869L22.7765 15.6C22.9553 15.9203 22.8824 16.3222 22.6027 16.5594L22.3384 16.7834L22.3366 16.7939L22.3279 16.7923L19.4839 19.2032L18.4915 18.0325L20.3625 16.4465L17.5601 15.9534L17.8261 14.4419L20.6499 14.9388L19.4657 12.8174L20.8057 12.0694L22.5989 15.2818ZM19.714 8.69335L17.1819 10.0558L16.4547 8.7044L18.9425 7.36576L16.6564 6.44106L17.2319 5.01839L20.6988 6.42072L20.7143 6.41234L20.7244 6.43111L21.0387 6.55824C21.3756 6.69451 21.5707 7.04842 21.5061 7.40604L21.4415 7.76367L21.4415 7.76376L21.4415 7.7638L20.7898 11.3697L19.2796 11.0968L19.714 8.69335ZM2.7428 11.8598L5.55123 12.3377L5.2938 13.8506L2.46186 13.3687L3.66945 15.4965L2.33476 16.254L0.508677 13.0363L0.506956 13.0361L0.507315 13.0339L0.332439 12.7258C0.151453 12.4069 0.22139 12.0045 0.499366 11.7654L0.761548 11.5398L0.764386 11.5232L0.778204 11.5255L3.58781 9.10849L4.58864 10.2719L2.7428 11.8598ZM6.98489 5.31603L8.85394 7.39821L7.71191 8.42334L5.79965 6.29303L5.35451 8.71913L3.84506 8.44217L4.50419 4.84984L4.50118 4.84649L4.50552 4.84259L4.57232 4.47852C4.63699 4.12601 4.93707 3.86523 5.29515 3.85033L5.62627 3.83656L5.64321 3.82135L5.65576 3.83534L9.40714 3.67931L9.47091 5.21263L6.98489 5.31603Z" fill="black"/>
11 | </svg>


--------------------------------------------------------------------------------
/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="107" height="24" viewBox="0 0 107 24" fill="none">
2 |   <path fillRule="evenodd" clipRule="evenodd" d="M7.73226 11.7814C4.00758 18.2333 0.903146 23.6102 0.83357 23.7299L0.707031 23.9477H14.5052C22.0943 23.9477 28.3035 23.9353 28.3035 23.92C28.3035 23.877 14.5532 0.0540717 14.5273 0.0522462C14.5147 0.0513335 11.457 5.32945 7.73226 11.7814ZM28.4556 2.0801C34.0132 11.7299 38.7025 19.8256 38.7227 19.8051C38.7576 19.7696 49.006 2.00742 49.0346 1.93278C49.0533 1.88411 48.7575 1.87689 47.1187 1.88602L45.1807 1.89681L41.9592 7.48551C40.1874 10.5593 38.7267 13.0751 38.7132 13.076C38.6997 13.077 37.2341 10.5575 35.4564 7.47717L32.2241 1.87652L30.2811 1.87627L28.3381 1.87606L28.4556 2.0801ZM103.065 10.4848V19.0936H104.745H106.426V10.4848V1.87606H104.745H103.065V10.4848ZM52.2006 6.0886C50.5053 6.34554 49.1439 7.02208 48.0867 8.13301C47.2054 9.05911 46.6849 10.1415 46.4271 11.5843C46.3435 12.0524 46.3561 13.4508 46.4484 13.9491C46.889 16.3271 48.5905 18.2051 51.0491 19.027C52.2937 19.4431 53.8662 19.5698 55.2175 19.3629C56.4589 19.1728 57.689 18.6717 58.6795 17.9527C59.0799 17.662 59.901 16.861 59.8492 16.8115C59.8055 16.7697 57.1607 15.2415 57.0754 15.2087C57.0384 15.1945 56.9278 15.2629 56.7959 15.3816C55.1484 16.8637 52.3483 16.8299 50.7833 15.3089C50.4524 14.9873 50.0798 14.4359 49.9836 14.1256L49.9548 14.0324L55.1954 14.0219L60.436 14.0113L60.4895 13.6587C60.5708 13.1232 60.5304 11.805 60.4164 11.2731C59.8371 8.56926 57.9779 6.76826 55.1463 6.16809C54.6834 6.06997 54.5185 6.05657 53.6112 6.0435C52.8715 6.03288 52.4874 6.04516 52.2006 6.0886ZM78.1944 6.07242C75.4837 6.4198 73.3872 8.05045 72.595 10.4276C72.3104 11.2816 72.2626 11.6122 72.2625 12.7252C72.2623 13.6552 72.2699 13.7562 72.3766 14.2552C72.6339 15.4574 73.1584 16.485 73.9423 17.3225C75.3094 18.783 77.3477 19.542 79.6658 19.4539C81.6739 19.3776 83.3721 18.6828 84.5982 17.436C84.9223 17.1064 85.3918 16.51 85.4532 16.3499C85.4719 16.3013 85.1433 16.0929 84.066 15.4703C83.2895 15.0215 82.6472 14.6544 82.6387 14.6544C82.6301 14.6544 82.5521 14.7617 82.4651 14.893C82.2123 15.2745 82.0297 15.4749 81.7194 15.7111C81.1205 16.1671 80.422 16.414 79.59 16.4638C77.9791 16.5603 76.6352 15.859 76.0133 14.5973C75.7292 14.021 75.6388 13.6457 75.6122 12.9326C75.5502 11.2707 76.2293 10.0099 77.5219 9.38736C78.1277 9.09558 78.451 9.03276 79.3339 9.03525C79.9934 9.03712 80.1481 9.05036 80.4333 9.1291C81.3164 9.37301 82.0394 9.89165 82.4709 10.5908L82.6238 10.8384L82.7551 10.761C82.8272 10.7184 83.4639 10.3512 84.1698 9.94501C84.8758 9.53884 85.4594 9.20183 85.4668 9.1961C85.4978 9.17204 85.3375 8.92149 85.0681 8.57287C84.0338 7.23467 82.4261 6.35242 80.5432 6.08976C80.0319 6.01849 78.6926 6.00857 78.1944 6.07242ZM92.5908 6.07242C92.4319 6.09275 92.088 6.15739 91.8265 6.21605C89.0248 6.84443 87.1062 8.86225 86.6983 11.6097C86.6238 12.1114 86.6122 13.1966 86.6764 13.6589C87.0134 16.086 88.6301 18.027 91.075 18.9399C91.913 19.2528 92.7564 19.4133 93.7717 19.4531C95.7047 19.5289 97.5929 18.9763 98.9858 17.927C99.3026 17.6884 100.12 16.8987 100.118 16.8333C100.117 16.8 97.6865 15.3682 97.3795 15.22C97.3212 15.1919 97.2391 15.2395 97.0041 15.4373C96.8386 15.5766 96.5771 15.7655 96.423 15.8571C94.9772 16.716 92.9545 16.6637 91.5678 15.7315C91.0369 15.3746 90.3615 14.5535 90.2637 14.1462L90.2362 14.0321H95.4649H100.694L100.717 13.918C100.938 12.8318 100.808 11.364 100.39 10.2359C100.063 9.35467 99.6428 8.7021 98.9577 8.01456C97.9054 6.95836 96.5657 6.31658 94.9395 6.08976C94.4282 6.01849 93.089 6.00857 92.5908 6.07242ZM62.7801 12.746V19.0936H64.4796H66.1792L66.1914 15.9302L66.2036 12.7667L66.3177 12.4017C66.7503 11.0183 67.7568 10.1284 69.1683 9.88169C69.5633 9.81266 70.2978 9.82353 70.6836 9.90418C70.8661 9.94231 71.0342 9.97658 71.057 9.98027C71.084 9.98463 71.1022 9.35969 71.1093 8.19263L71.1201 6.39827H70.7711C70.0182 6.39827 68.9925 6.65176 68.2358 7.02478C67.2495 7.51102 66.5018 8.31797 66.2598 9.15723C66.1884 9.40487 66.188 9.39848 66.1851 7.90221L66.1821 6.39827H64.4811H62.7801V12.746ZM54.2771 9.07712C54.718 9.16076 55.2038 9.33874 55.5269 9.53498C55.8722 9.74478 56.3106 10.1663 56.536 10.5054C56.6905 10.7378 56.9718 11.3122 56.9718 11.3953C56.9718 11.408 55.3848 11.4183 53.4453 11.4183C51.5057 11.4183 49.9188 11.4122 49.9188 11.4047C49.9188 11.3973 49.9646 11.2783 50.0206 11.1404C50.3727 10.2741 51.0266 9.63214 51.9276 9.26821C52.5364 9.02231 53.5525 8.93966 54.2771 9.07712ZM94.8297 9.13188C95.9608 9.43487 96.8284 10.2147 97.1971 11.2598L97.253 11.4183H93.7256H90.1982L90.2254 11.325C90.2823 11.1294 90.4935 10.7094 90.6532 10.4744C90.8571 10.174 91.3568 9.70359 91.6731 9.51407C91.9383 9.35517 92.5979 9.11769 92.942 9.05716C93.3442 8.98646 94.4502 9.03023 94.8297 9.13188Z" fill="black"/>
3 | </svg>


--------------------------------------------------------------------------------
/src/app/api/chat/route.ts:
--------------------------------------------------------------------------------
 1 | import { Metadata, getContext } from '@/services/context'
 2 | import type { PineconeRecord } from '@pinecone-database/pinecone'
 3 | import { Message, StreamData } from 'ai'
 4 | import { openai } from '@ai-sdk/openai';
 5 | import { CoreMessage, streamText, convertToCoreMessages } from 'ai';
 6 | 
 7 | 
 8 | // IMPORTANT! Set the runtime to edge
 9 | export const runtime = 'edge'
10 | 
11 | export async function POST(req: Request) {
12 |   try {
13 |     const { messages, withContext }: { messages: CoreMessage[], withContext: boolean } = await req.json();
14 |     // Get the last message
15 |     const lastMessage = messages[messages.length - 1]
16 | 
17 |     // Get the context from the last message
18 |     const context = withContext ? await getContext(lastMessage?.content as string, '', 3000, 0.8, false) : ''
19 | 
20 |     // Get the chunks of text from the context
21 |     const docs = (withContext && context.length > 0) ? (context as PineconeRecord[]).map(match => (match.metadata as Metadata).chunk) : [];
22 | 
23 |     // Join all the chunks of text together, truncate to the maximum number of tokens, and return the result
24 |     const contextText = docs.join("\n").substring(0, 3000)
25 |         
26 |     const prompt = `AI assistant is a brand new, powerful, human-like artificial intelligence.
27 |       The traits of AI include expert knowledge, helpfulness, cleverness, and articulateness.
28 |       AI is a well-behaved and well-mannered individual.
29 |       AI is always friendly, kind, and inspiring, and he is eager to provide vivid and thoughtful responses to the user.
30 |       AI has the sum of all knowledge in their brain, and is able to accurately answer nearly any question about any topic in conversation.
31 |       AI assistant is a big fan of Pinecone and Vercel.
32 |       START CONTEXT BLOCK
33 |       ${contextText}
34 |       END OF CONTEXT BLOCK
35 |       AI assistant will take into account any CONTEXT BLOCK that is provided in a conversation.
36 |       If the context does not provide the answer to question, the AI assistant will say, "I'm sorry, but I don't know the answer to that question".
37 |       AI assistant will not apologize for previous responses, but instead will indicated new information was gained.
38 |       AI assistant will not invent anything that is not drawn directly from the context.
39 |       `
40 | 
41 |     const sanitizedMessages = messages.map((message: any) => {
42 |       const { createdAt, id, ...rest } = message;
43 |       return rest;
44 |     });
45 | 
46 |     // Create a StreamData object to store the context data
47 |     const data = new StreamData();   
48 | 
49 |     const result = await streamText({
50 |       model: openai("gpt-4o"),
51 |       system: prompt,
52 |       messages: convertToCoreMessages(sanitizedMessages.filter((message: Message) => message.role === 'user')),
53 |       onFinish: async () => {
54 |         // Append the context to the StreamData object
55 |         data.append({ context });
56 |         
57 |         // Ensure to close the StreamData object
58 |         data.close();        
59 |       }
60 |     });
61 | 
62 |     // Use toDataStreamResponse with the StreamData object
63 |     return result.toDataStreamResponse({
64 |       data
65 |     });
66 |   } catch (e) {
67 |     throw (e)
68 |   }
69 | }


--------------------------------------------------------------------------------
/src/app/api/checkIndex/route.ts:
--------------------------------------------------------------------------------
 1 | import { Pinecone } from '@pinecone-database/pinecone';
 2 | import { NextResponse } from "next/server";
 3 | 
 4 | export async function POST() {
 5 |     // Instantiate a new Pinecone client
 6 |     const pinecone = new Pinecone();
 7 |     // Select the desired index
 8 |     const indexName = process.env.PINECONE_INDEX!;
 9 |     const index = pinecone.Index(indexName);
10 | 
11 |     // Use the custom namespace, if provided, otherwise use the default
12 |     const namespaceName = process.env.PINECONE_NAMESPACE ?? ''
13 |     const namespace = index.namespace(namespaceName)
14 | 
15 |     // Delete everything within the namespace
16 |     const stats = await namespace.describeIndexStats()
17 | 
18 |     return NextResponse.json({
19 |         ...stats
20 |     })
21 | }
22 | 


--------------------------------------------------------------------------------
/src/app/api/clearIndex/route.ts:
--------------------------------------------------------------------------------
 1 | import { NextResponse } from "next/server";
 2 | import { Pinecone } from '@pinecone-database/pinecone'
 3 | 
 4 | export async function POST() {
 5 |   // Instantiate a new Pinecone client
 6 |   const pinecone = new Pinecone();
 7 |   // Select the desired index
 8 |   const index = pinecone.Index(process.env.PINECONE_INDEX!)
 9 | 
10 |   // Use the custom namespace, if provided, otherwise use the default
11 |   const namespaceName = process.env.PINECONE_NAMESPACE ?? ''
12 |   const namespace = index.namespace(namespaceName)
13 | 
14 |   // Delete everything within the namespace
15 |   await namespace.deleteAll();
16 | 
17 |   return NextResponse.json({
18 |     success: true
19 |   })
20 | }
21 | 


--------------------------------------------------------------------------------
/src/app/api/crawl/crawler.ts:
--------------------------------------------------------------------------------
 1 | import * as cheerio from 'cheerio';
 2 | import { NodeHtmlMarkdown } from 'node-html-markdown';
 3 | 
 4 | interface Page {
 5 |   url: string;
 6 |   content: string;
 7 | }
 8 | 
 9 | class Crawler {
10 |   private seen = new Set<string>();
11 |   private pages: Page[] = [];
12 |   private queue: { url: string; depth: number }[] = [];
13 | 
14 |   constructor(private maxDepth = 2, private maxPages = 1) { }
15 | 
16 |   async crawl(startUrl: string): Promise<Page[]> {
17 |     // Add the start URL to the queue
18 |     this.addToQueue(startUrl);
19 | 
20 |     // While there are URLs in the queue and we haven't reached the maximum number of pages...
21 |     while (this.shouldContinueCrawling()) {
22 |       // Dequeue the next URL and depth
23 |       const { url, depth } = this.queue.shift()!;
24 | 
25 |       // If the depth is too great or we've already seen this URL, skip it
26 |       if (this.isTooDeep(depth) || this.isAlreadySeen(url)) continue;
27 | 
28 |       // Add the URL to the set of seen URLs
29 |       this.seen.add(url);
30 | 
31 |       // Fetch the page HTML
32 |       const html = await this.fetchPage(url);
33 | 
34 |       // Parse the HTML and add the page to the list of crawled pages
35 |       this.pages.push({ url, content: this.parseHtml(html) });
36 | 
37 |       // Extract new URLs from the page HTML and add them to the queue
38 |       this.addNewUrlsToQueue(this.extractUrls(html, url), depth);
39 |     }
40 | 
41 |     // Return the list of crawled pages
42 |     return this.pages;
43 |   }
44 | 
45 |   private isTooDeep(depth: number) {
46 |     return depth > this.maxDepth;
47 |   }
48 | 
49 |   private isAlreadySeen(url: string) {
50 |     return this.seen.has(url);
51 |   }
52 | 
53 |   private shouldContinueCrawling() {
54 |     return this.queue.length > 0 && this.pages.length < this.maxPages;
55 |   }
56 | 
57 |   private addToQueue(url: string, depth = 0) {
58 |     this.queue.push({ url, depth });
59 |   }
60 | 
61 |   private addNewUrlsToQueue(urls: string[], depth: number) {
62 |     this.queue.push(...urls.map(url => ({ url, depth: depth + 1 })));
63 |   }
64 | 
65 |   private async fetchPage(url: string): Promise<string> {
66 |     try {
67 |       const response = await fetch(url);
68 |       return await response.text();
69 |     } catch (error) {
70 |       console.error(`Failed to fetch ${url}: ${error}`);
71 |       return '';
72 |     }
73 |   }
74 | 
75 |   private parseHtml(html: string): string {
76 |     const $ = cheerio.load(html);
77 |     $('a').removeAttr('href');
78 |     return NodeHtmlMarkdown.translate($.html());
79 |   }
80 | 
81 |   private extractUrls(html: string, baseUrl: string): string[] {
82 |     const $ = cheerio.load(html);
83 |     const relativeUrls = $('a').map((_, link) => $(link).attr('href')).get() as string[];
84 |     return relativeUrls.map(relativeUrl => new URL(relativeUrl, baseUrl).href);
85 |   }
86 | }
87 | 
88 | export { Crawler };
89 | export type { Page };
90 | 


--------------------------------------------------------------------------------
/src/app/api/crawl/route.ts:
--------------------------------------------------------------------------------
 1 | import seed from './seed'
 2 | import { NextResponse } from 'next/server';
 3 | 
 4 | export const runtime = 'edge'
 5 | 
 6 | export async function POST(req: Request) {
 7 |   const { url, options } = await req.json()
 8 |   try {
 9 |     const documents = await seed(url, 1, process.env.PINECONE_INDEX!, options)
10 |     return NextResponse.json({ success: true, documents })
11 |   } catch (error) {
12 |     return NextResponse.json({ success: false, error: "Failed crawling" })
13 |   }
14 | }


--------------------------------------------------------------------------------
/src/app/api/crawl/seed.ts:
--------------------------------------------------------------------------------
  1 | import { chunkedUpsert } from '@/services/chunkedUpsert';
  2 | import { getEmbeddings } from "@/services/embeddings";
  3 | import { truncateStringByBytes } from "@/utils/truncateString";
  4 | import { Document, MarkdownTextSplitter, RecursiveCharacterTextSplitter } from "@pinecone-database/doc-splitter";
  5 | import { Pinecone, PineconeRecord } from "@pinecone-database/pinecone";
  6 | import { ServerlessSpecCloudEnum } from '@pinecone-database/pinecone';
  7 | import md5 from "md5";
  8 | import { Crawler, Page } from "./crawler";
  9 | 
 10 | interface SeedOptions {
 11 |   splittingMethod: string
 12 |   chunkSize: number
 13 |   chunkOverlap: number
 14 | }
 15 | 
 16 | const PINECONE_REGION = process.env.PINECONE_REGION || 'us-west-2'
 17 | const PINECONE_CLOUD = process.env.PINECONE_CLOUD || 'aws'
 18 | 
 19 | type DocumentSplitter = RecursiveCharacterTextSplitter | MarkdownTextSplitter
 20 | 
 21 | async function seed(url: string, limit: number, indexName: string, options: SeedOptions) {
 22 |   try {
 23 |     // Initialize the Pinecone client
 24 |     const pinecone = new Pinecone();
 25 | 
 26 |     // Destructure the options object
 27 |     const { splittingMethod, chunkSize, chunkOverlap } = options;
 28 | 
 29 |     // Create a new Crawler with depth 1 and maximum pages as limit
 30 |     const crawler = new Crawler(1, limit || 100);
 31 | 
 32 |     // Crawl the given URL and get the pages
 33 |     const pages = await crawler.crawl(url) as Page[];
 34 | 
 35 |     // Choose the appropriate document splitter based on the splitting method
 36 |     const splitter: DocumentSplitter = splittingMethod === 'recursive' ?
 37 |       new RecursiveCharacterTextSplitter({ chunkSize, chunkOverlap }) : new MarkdownTextSplitter({});
 38 | 
 39 |     // Prepare documents by splitting the pages
 40 |     const documents = await Promise.all(pages.map(page => prepareDocument(page, splitter)));
 41 | 
 42 |     // Create Pinecone index if it does not exist
 43 |     const indexList = await pinecone.listIndexes();
 44 |     const indexes = indexList.indexes
 45 |     const indexExists = indexes && indexes.some(index => index.name === indexName)
 46 |     if (!indexExists) {
 47 |       await pinecone.createIndex({
 48 |         name: indexName,
 49 |         dimension: 1536,
 50 |         waitUntilReady: true,
 51 |         spec: {
 52 |           serverless: {
 53 |             region: PINECONE_REGION,
 54 |             cloud: PINECONE_CLOUD as ServerlessSpecCloudEnum
 55 |           }
 56 |         }
 57 |       });
 58 |     }
 59 | 
 60 |     const index = pinecone.Index(indexName)
 61 | 
 62 |     // Get the vector embeddings for the documents
 63 |     const vectors = await Promise.all(documents.flat().map(embedDocument));
 64 | 
 65 |     // Upsert vectors into the Pinecone index
 66 |     await chunkedUpsert(index, vectors, '', 10);
 67 | 
 68 |     // Return the first document
 69 |     return documents[0];
 70 |   } catch (error) {
 71 |     console.error("Error seeding:", error);
 72 |     throw error;
 73 |   }
 74 | }
 75 | 
 76 | async function embedDocument(doc: Document): Promise<PineconeRecord> {
 77 |   try {
 78 |     // Generate OpenAI embeddings for the document content
 79 |     const embedding = await getEmbeddings(doc.pageContent);
 80 | 
 81 |     // Create a hash of the document content
 82 |     const hash = md5(doc.pageContent);
 83 | 
 84 |     // Return the vector embedding object
 85 |     return {
 86 |       id: hash, // The ID of the vector is the hash of the document content
 87 |       values: embedding, // The vector values are the OpenAI embeddings
 88 |       metadata: { // The metadata includes details about the document
 89 |         chunk: doc.pageContent, // The chunk of text that the vector represents
 90 |         text: doc.metadata.text as string, // The text of the document
 91 |         url: doc.metadata.url as string, // The URL where the document was found
 92 |         hash: doc.metadata.hash as string // The hash of the document content
 93 |       }
 94 |     } as PineconeRecord;
 95 |   } catch (error) {
 96 |     console.log("Error embedding document: ", error)
 97 |     throw error
 98 |   }
 99 | }
100 | 
101 | async function prepareDocument(page: Page, splitter: DocumentSplitter): Promise<Document[]> {
102 |   // Get the content of the page
103 |   const pageContent = page.content;
104 | 
105 |   // Split the documents using the provided splitter
106 |   const docs = await splitter.splitDocuments([
107 |     new Document({
108 |       pageContent,
109 |       metadata: {
110 |         url: page.url,
111 |         // Truncate the text to a maximum byte length
112 |         text: truncateStringByBytes(pageContent, 36000)
113 |       },
114 |     }),
115 |   ]);
116 | 
117 |   // Map over the documents and add a hash to their metadata
118 |   return docs.map((doc: Document) => {
119 |     return {
120 |       pageContent: doc.pageContent,
121 |       metadata: {
122 |         ...doc.metadata,
123 |         // Create a hash of the document content
124 |         hash: md5(doc.pageContent)
125 |       },
126 |     };
127 |   });
128 | }
129 | 
130 | 
131 | 
132 | 
133 | export default seed;
134 | 


--------------------------------------------------------------------------------
/src/app/appContext.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | interface RefreshIndexContextType {
 4 |     totalRecords: number;
 5 |     refreshIndex: () => Promise<void>;
 6 | }
 7 | const defaultContext: RefreshIndexContextType = {
 8 |     totalRecords: 0,
 9 |     refreshIndex: () => Promise.resolve(),
10 | };
11 | 
12 | const AppContext = React.createContext<RefreshIndexContextType>(defaultContext);
13 | 
14 | export default AppContext;


--------------------------------------------------------------------------------
/src/app/assets/icons/ellipse.tsx:
--------------------------------------------------------------------------------
 1 | import { EllipseSvg } from '../svg/ellipse'
 2 | export const EllipseIcon = (props: any) => {
 3 |     return (<div style={{
 4 |         borderRadius: 64,
 5 |         padding: 8,
 6 |         backgroundColor: "#3C72FF1F"
 7 |     }}>
 8 |         <EllipseSvg />
 9 |     </div>)
10 | 
11 | }


--------------------------------------------------------------------------------
/src/app/assets/icons/pinecone.tsx:
--------------------------------------------------------------------------------
 1 | import { PineconeSvg } from '../svg/pinecone'
 2 | export const PineconeIcon = (props: any) => {
 3 |     return (<div style={{
 4 |         borderRadius: 64,
 5 |         padding: 8,
 6 |         backgroundColor: "#3C72FF1F"
 7 |     }}>
 8 |         <PineconeSvg />
 9 |     </div>)
10 | 
11 | }


--------------------------------------------------------------------------------
/src/app/assets/icons/user.tsx:
--------------------------------------------------------------------------------
 1 | import { UserSvg } from '../svg/user'
 2 | export const UserIcon = (props: any) => {
 3 |     return (<div style={{
 4 |         borderRadius: 64,
 5 |         padding: 8,
 6 |         backgroundColor: "#738FAB1F"
 7 |     }}>
 8 |         <UserSvg />
 9 |     </div>)
10 | 
11 | }


--------------------------------------------------------------------------------
/src/app/assets/svg/blueEllipse.tsx:
--------------------------------------------------------------------------------
1 | export const BlueEllipseSvg = () => {
2 |     return <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="#1B17F5">
3 |         <circle cx="8" cy="8" r="4" fill="#1B17F5" />
4 |     </svg>
5 | }


--------------------------------------------------------------------------------
/src/app/assets/svg/ellipse.tsx:
--------------------------------------------------------------------------------
1 | export const EllipseSvg = () => {
2 |     return <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none">
3 |         <circle cx="12" cy="12" r="4" fill="#121142" />
4 |     </svg>
5 | }


--------------------------------------------------------------------------------
/src/app/assets/svg/pinecone.tsx:
--------------------------------------------------------------------------------
1 | export const PineconeSvg = () => {
2 |     return <svg width="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
3 |         <g id="Group 1">
4 |             <path id="Union" fillRule="evenodd" clipRule="evenodd" d="M9.86515 0.436421C9.71202 0.254145 9.45051 0.207319 9.24364 0.325131L9.05019 0.435301L9.04787 0.434873L9.04751 0.436828L6.96586 1.62232L7.45386 2.47922L8.81343 1.70494L8.47896 3.51696L9.44869 3.69596L9.78499 1.87402L10.7864 3.06604L11.5415 2.43172L10.017 0.617155L10.0176 0.613871L10.0136 0.613136L9.86515 0.436421ZM6.93092 15.7395C7.43237 15.7395 7.83887 15.3433 7.83887 14.8545C7.83887 14.3657 7.43237 13.9694 6.93092 13.9694C6.42946 13.9694 6.02296 14.3657 6.02296 14.8545C6.02296 15.3433 6.42946 15.7395 6.93092 15.7395ZM8.152 10.8826L7.82491 12.7064L6.85429 12.5323L7.17933 10.7199L5.82444 11.498L5.33339 10.6428L7.40668 9.45227L7.40713 9.44973L7.41015 9.45027L7.60473 9.33854C7.81085 9.22018 8.07207 9.26572 8.22598 9.44684L8.37611 9.62351L8.37776 9.62381L8.37751 9.62516L9.91555 11.4351L9.16411 12.0736L8.152 10.8826ZM8.93747 6.52891L8.61064 8.35141L7.64002 8.17735L7.9637 6.37237L6.61348 7.14381L6.12429 6.28759L8.19061 5.10702L8.19285 5.09455L8.20775 5.09723L8.39616 4.98958C8.60225 4.87183 8.86298 4.91763 9.0166 5.09856L9.16055 5.26809L9.16348 5.26861L9.16304 5.27102L10.7005 7.08168L9.94876 7.71993L8.93747 6.52891ZM2.57273 13.725L2.56741 13.7286L2.56308 13.7223L2.36224 13.6646C2.14946 13.6035 2.0086 13.4017 2.02465 13.1809L2.2034 10.7216L3.13691 10.7894L3.02819 12.2852L4.48216 11.2991L5.00753 12.0737L3.58144 13.0409L5.03669 13.4588L4.77835 14.3584L2.57273 13.725ZM11.4482 15.2791L11.4556 15.2896L11.4685 15.2804L11.6739 15.294C11.891 15.3085 12.0894 15.1714 12.1526 14.9632L12.8681 12.6098L11.9726 12.3376L11.533 13.7835L10.5172 12.3555L9.7545 12.898L10.7479 14.2945L9.22464 14.1934L9.16263 15.1273L11.4482 15.2791ZM14.8974 9.6713L14.8998 9.67173L14.8993 9.6747L15.0083 9.87004C15.1199 10.07 15.0745 10.321 14.8998 10.469L14.7349 10.6088L14.7337 10.6155L14.7282 10.6145L12.9523 12.1199L12.3327 11.3889L13.501 10.3986L11.7513 10.0907L11.9173 9.1469L13.6804 9.45715L12.941 8.13252L13.7777 7.66546L14.8974 9.6713ZM13.096 5.55754L11.5151 6.40817L11.0611 5.56432L12.6144 4.7285L11.1868 4.15106L11.5462 3.26272L13.711 4.13841L13.7208 4.13312L13.7272 4.14495L13.9232 4.22423C14.1336 4.30932 14.2554 4.53031 14.215 4.75361L14.1747 4.97662L14.1749 4.97697L14.1746 4.97712L13.7678 7.22857L12.8248 7.05815L13.096 5.55754ZM2.49916 7.5346L4.25266 7.83297L4.09191 8.77764L2.32373 8.47678L3.07777 9.8054L2.24437 10.2784L1.10414 8.26925L1.10295 8.26905L1.10319 8.26759L0.994093 8.07535C0.881082 7.87622 0.924752 7.62496 1.09832 7.47564L1.26189 7.33493L1.26369 7.32437L1.27243 7.32586L3.02679 5.81664L3.65172 6.54308L2.49916 7.5346ZM5.14808 3.44856L6.31515 4.74872L5.60205 5.38883L4.40808 4.05871L4.13015 5.5735L3.18763 5.40057L3.59918 3.15756L3.59722 3.15539L3.60004 3.15286L3.64174 2.92561C3.68212 2.7055 3.8695 2.54266 4.09309 2.53336L4.29976 2.52476L4.31032 2.51528L4.31815 2.524L6.66067 2.42657L6.70049 3.384L5.14808 3.44856Z" fill="#030080" />
5 |         </g>
6 |     </svg>
7 | 
8 | }


--------------------------------------------------------------------------------
/src/app/assets/svg/pineconeLogo.tsx:
--------------------------------------------------------------------------------
 1 | export const PineconeLogoSvg = () => {
 2 |     return (<svg xmlns="http://www.w3.org/2000/svg" width="127" viewBox="0 0 127 25" fill="none">
 3 |         <path d="M28.5078 5.81689H35.0289C39.7481 5.81689 40.9434 8.59076 40.9434 10.8516C40.9434 13.1124 39.7213 15.8863 35.0289 15.8863H31.0056V23.8166H28.5078V5.81689ZM31.0056 13.7472H34.2974C36.2779 13.7472 38.2583 13.295 38.2583 10.8516C38.2583 8.40816 36.2779 7.95599 34.2974 7.95599H31.0056V13.7472Z" fill="black" />
 4 |         <path d="M44.2089 5.73016C45.1515 5.73077 45.9184 6.47011 45.9293 7.38886C45.9403 8.30762 45.1913 9.06418 44.249 9.0862C43.3067 9.10821 42.5213 8.3875 42.4872 7.46929C42.4727 7.01442 42.6474 6.57315 42.9716 6.24567C43.2958 5.9182 43.742 5.73229 44.2089 5.73016ZM43.0581 11.7649H45.3775V23.8171H43.0581V11.7649Z" fill="black" />
 5 |         <path d="M48.0615 11.7652H50.4077V13.626H50.4612C51.2792 12.2038 52.8594 11.3627 54.5291 11.4609C56.9288 11.4609 58.9895 12.8608 58.9895 16.0607V23.817H56.6701V16.7042C56.6701 14.4347 55.3409 13.5999 53.8511 13.5999C51.8975 13.5999 50.4077 14.8173 50.4077 17.6172V23.817H48.0615V11.7652Z" fill="black" />
 6 |         <path d="M63.2901 18.6345C63.2901 20.7389 65.2973 22.1129 67.4651 22.1129C68.8249 22.0744 70.0843 21.4056 70.855 20.3128L72.6392 21.6346C71.3436 23.3005 69.2854 24.232 67.1439 24.1217C63.2277 24.1217 60.7834 21.3825 60.7834 17.791C60.7142 16.1149 61.3576 14.4854 62.5616 13.287C63.7656 12.0886 65.4235 11.4277 67.1439 11.4602C71.4973 11.4602 73.1655 14.7125 73.1655 17.817V18.6345H63.2901ZM70.7123 16.7996C70.6587 14.7908 69.5079 13.3212 67.1439 13.3212C65.1222 13.3077 63.4402 14.833 63.308 16.7996H70.7123Z" fill="black" />
 7 |         <path d="M83.8179 14.9743C83.0541 14.0942 81.9225 13.5986 80.7402 13.6265C78.1888 13.6265 76.8775 15.6351 76.8775 17.8959C76.8132 18.9653 77.2066 20.0128 77.964 20.789C78.7214 21.5652 79.7751 22.0006 80.874 21.9915C82.0355 22.0294 83.1471 21.5296 83.8714 20.6437L85.5396 22.2524C84.3257 23.5049 82.6131 24.1809 80.8472 24.1045C79.1113 24.1897 77.4192 23.5565 76.1876 22.361C74.956 21.1655 74.2994 19.5188 74.3796 17.8264C74.3012 16.1274 74.9555 14.474 76.1841 13.2664C77.4128 12.0589 79.1036 11.4075 80.8472 11.47C82.6336 11.4258 84.3564 12.1171 85.5931 13.3743L83.8179 14.9743Z" fill="black" />
 8 |         <path d="M92.4784 11.4609C96.0506 11.5183 98.9044 14.3774 98.8653 17.8595C98.8262 21.3416 95.9089 24.1391 92.3362 24.1204C88.7636 24.1014 85.8774 21.2734 85.877 17.7911C85.8767 16.094 86.5756 14.4679 87.8168 13.2777C89.058 12.0875 90.7375 11.433 92.4784 11.4609ZM92.4784 21.9909C94.9762 21.9909 96.5463 20.2518 96.5463 17.7911C96.5463 15.3303 94.9762 13.6 92.4784 13.6C89.9805 13.6 88.4015 15.339 88.4015 17.7911C88.4015 20.2431 89.9716 21.9909 92.4784 21.9909Z" fill="black" />
 9 |         <path d="M100.954 11.765H103.3V13.6258C104.121 12.2022 105.705 11.3612 107.377 11.4606C109.777 11.4606 111.838 12.8606 111.838 16.0605V23.8169H109.491V16.704C109.491 14.4345 108.162 13.5997 106.681 13.5997C104.719 13.5997 103.238 14.8171 103.238 17.617V23.8169H100.954V11.765Z" fill="black" />
10 |         <path d="M116.208 18.6343C116.208 20.7386 118.215 22.1126 120.383 22.1126C121.741 22.0699 122.999 21.4021 123.773 20.3126L125.557 21.6343C124.26 23.2862 122.21 24.2072 120.079 24.0952C116.172 24.0952 113.719 21.356 113.719 17.7647C113.649 16.0842 114.296 14.4505 115.506 13.2513C116.716 12.0522 118.381 11.3945 120.106 11.4343C124.469 11.4343 126.137 14.6865 126.137 17.7908V18.6082L116.208 18.6343ZM123.612 16.7995C123.559 14.7908 122.417 13.3212 120.044 13.3212C118.02 13.3034 116.336 14.8309 116.208 16.7995H123.612Z" fill="black" />
11 |         <path fillRule="evenodd" clipRule="evenodd" d="M14.402 0.492068C14.1568 0.200153 13.738 0.125161 13.4067 0.313837L13.0967 0.490347L13.0926 0.489595L13.092 0.493029L9.75878 2.39129L10.5403 3.76361L12.7171 2.52392L12.1815 5.42557L13.7345 5.71223L14.2732 2.79391L15.8774 4.7034L17.0866 3.68754L14.6448 0.781041L14.6457 0.776261L14.6399 0.775191L14.402 0.492068ZM9.70257 25C10.5056 25 11.1567 24.3654 11.1567 23.5826C11.1567 22.7998 10.5056 22.1653 9.70257 22.1653C8.8995 22.1653 8.24847 22.7998 8.24847 23.5826C8.24847 24.3654 8.8995 25 9.70257 25ZM11.6579 17.2218L11.1341 20.1425L9.57964 19.8637L10.1002 16.9611L7.9302 18.2072L7.14378 16.8376L10.4643 14.9309L10.465 14.9269L10.4698 14.9277L10.7813 14.7489C11.1114 14.5593 11.5298 14.6322 11.7763 14.9223L12.0166 15.2051L12.0195 15.2057L12.0191 15.208L14.4821 18.1065L13.2787 19.1291L11.6579 17.2218ZM12.9159 10.2489L12.3924 13.168L10.838 12.8892L11.3563 9.99873L9.1943 11.234L8.41086 9.86274L11.7197 7.97226L11.7233 7.95209L11.7475 7.95641L12.0493 7.78397C12.3793 7.59541 12.7969 7.66875 13.0429 7.95851L13.2735 8.23008L13.2778 8.23085L13.2772 8.2344L15.7396 11.1345L14.5358 12.1566L12.9159 10.2489ZM2.72286 21.7738L2.71439 21.7796L2.70749 21.7694L2.38579 21.677C2.04503 21.5792 1.81944 21.2559 1.84514 20.9023L2.13142 16.9638L3.62643 17.0724L3.45232 19.4679L5.78088 17.8886L6.62225 19.1291L4.33831 20.6782L6.66893 21.3474L6.25521 22.7882L2.72286 21.7738ZM18.1706 23.4104L18.1707 23.4106L18.1704 23.4108L18.0653 23.7567C17.9639 24.0901 17.6462 24.3097 17.2985 24.2866L16.9699 24.2648L16.9492 24.2794L16.9373 24.2626L13.2768 24.0196L13.3761 22.5239L15.8157 22.6859L14.2249 20.4493L15.4464 19.5805L17.073 21.8673L17.7769 19.5518L19.211 19.9878L18.1706 23.4104ZM22.4612 15.2819L22.4648 15.2825L22.4641 15.2869L22.6388 15.6C22.8176 15.9202 22.7447 16.3222 22.465 16.5593L22.2007 16.7833L22.1989 16.7939L22.1901 16.7924L19.3462 19.2031L18.3538 18.0325L20.2248 16.4465L17.4224 15.9534L17.6884 14.442L20.5122 14.9389L19.328 12.8174L20.668 12.0694L22.4612 15.2819ZM19.5763 8.6934L17.0442 10.0559L16.317 8.70445L18.8048 7.36579L16.5187 6.44108L17.0942 5.01841L20.5611 6.42075L20.5766 6.41239L20.5867 6.43112L20.9011 6.55827C21.2379 6.69454 21.433 7.04845 21.3684 7.40606L21.3038 7.76371L21.3038 7.76381L21.3038 7.76385L20.6521 11.3697L19.1419 11.0968L19.5763 8.6934ZM2.60513 11.8598L5.41353 12.3376L5.1561 13.8505L2.32415 13.3687L3.53175 15.4965L2.19707 16.254L0.370968 13.0363L0.369261 13.036L0.369617 13.0339L0.194744 12.7258C0.0137573 12.4069 0.0836945 12.0045 0.361671 11.7654L0.623848 11.5398L0.626691 11.5231L0.640531 11.5255L3.45011 9.10849L4.45095 10.2719L2.60513 11.8598ZM6.84724 5.31605L8.71625 7.39819L7.57422 8.42332L5.66197 6.29301L5.21682 8.71915L3.70737 8.4422L4.3665 4.84982L4.36348 4.84646L4.36783 4.84256L4.43462 4.47854C4.4993 4.12603 4.79938 3.86525 5.15746 3.85036L5.48852 3.83659L5.50552 3.82133L5.51811 3.83536L9.26944 3.67933L9.33322 5.21265L6.84724 5.31605Z" fill="black" />
12 |     </svg>)
13 | }


--------------------------------------------------------------------------------
/src/app/assets/svg/upArrow.tsx:
--------------------------------------------------------------------------------
1 | export const UpArrowSvg = () => {
2 |     return <svg style={{ background: "black", margin: 9, borderRadius: 4 }} xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 20 20" fill="black" className="absolute right-0 top-0 mt-3 mr-3">
3 |         <path d="M3.33301 9.99998L4.50801 11.175L9.16634 6.52498V16.6666H10.833V6.52498L15.483 11.1833L16.6663 9.99998L9.99967 3.33331L3.33301 9.99998Z" fill="white" />
4 |     </svg>
5 | }


--------------------------------------------------------------------------------
/src/app/assets/svg/user.tsx:
--------------------------------------------------------------------------------
1 | export const UserSvg = () => {
2 |     return <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="black">
3 |         <path d="M7.99935 8.00002C9.47268 8.00002 10.666 6.80669 10.666 5.33335C10.666 3.86002 9.47268 2.66669 7.99935 2.66669C6.52602 2.66669 5.33268 3.86002 5.33268 5.33335C5.33268 6.80669 6.52602 8.00002 7.99935 8.00002ZM7.99935 9.33335C6.21935 9.33335 2.66602 10.2267 2.66602 12V13.3334H13.3327V12C13.3327 10.2267 9.77935 9.33335 7.99935 9.33335Z" fill="#121142" />
4 |     </svg>
5 | }


--------------------------------------------------------------------------------
/src/app/components/Chat/ChatInput.tsx:
--------------------------------------------------------------------------------
 1 | import { UpArrowSvg } from '@/assets/svg/upArrow';
 2 | import React, { FormEvent } from 'react';
 3 | 
 4 | interface ChatInputProps {
 5 |     input: string;
 6 |     handleInputChange: (event: React.ChangeEvent<HTMLInputElement>) => void;
 7 |     handleMessageSubmit: (event: FormEvent<HTMLFormElement>) => void;
 8 |     showIndexMessage: boolean;
 9 | }
10 | 
11 | const styles = {
12 |     container: { border: "1px solid #738FAB1F", padding: 30 },
13 |     form: { border: "1px solid #738FAB80", borderRadius: 4 },
14 |     svg: { background: "black", margin: 9, borderRadius: 4 },
15 |     message: { marginTop: 15, color: "#72788D", fontSize: 12 },
16 |     hint: { position: "absolute", top: 10, right: 35, fontSize: 12, color: "#72788D" }
17 | };
18 | 
19 | const ChatInput: React.FC<ChatInputProps> = ({ input, handleInputChange, handleMessageSubmit, showIndexMessage }) => {
20 |     return (
21 |         <div style={styles.container} className="w-full">
22 |             <form
23 |                 onSubmit={handleMessageSubmit}
24 |                 className="bg-white rounded-lg relative"
25 |             >
26 |                 <div style={styles.form}>
27 |                     <input
28 |                         type="text"
29 |                         className="input-glow appearance-none w-full py-2 px-3 leading-tight focus:outline-none focus:shadow-outline pl-3 pr-10 transition-shadow duration-200"
30 |                         value={input}
31 |                         onChange={handleInputChange}
32 |                         placeholder="Start typing..."
33 |                         maxLength={105}
34 |                     />
35 |                     <div style={{ ...styles.hint, position: "absolute" as "absolute", visibility: `${input.length > 0 ? "visible" : "hidden"}` }}>Hit enter to send</div>
36 |                     <UpArrowSvg />
37 |                 </div>
38 | 
39 |                 {showIndexMessage && <div style={styles.message}>
40 |                     Your index contains no vector embeddings yet. Please add some by indexing one of the demo URLs on the left.
41 |                 </div>}
42 | 
43 |             </form>
44 |         </div>
45 |     );
46 | };
47 | 
48 | export default ChatInput;
49 | 


--------------------------------------------------------------------------------
/src/app/components/Chat/ChatWrapper.tsx:
--------------------------------------------------------------------------------
 1 | import React, { forwardRef, useImperativeHandle, FormEvent, ChangeEvent, useRef, useEffect, useState } from "react";
 2 | import {useChat, experimental_useObject as useObject} from 'ai/react';
 3 | import Messages from "./Messages";
 4 | import type { PineconeRecord } from "@pinecone-database/pinecone";
 5 | import { v4 as uuidv4 } from 'uuid';
 6 | 
 7 | export interface ChatInterface {
 8 |   handleMessageSubmit: (e: FormEvent<HTMLFormElement>) => void;
 9 |   handleInputUpdated: (event: ChangeEvent<HTMLInputElement>) => void;
10 | }
11 | 
12 | interface ChatProps {
13 |   withContext: boolean;
14 |   setContext: (data: { context: PineconeRecord[] }[]) => void;
15 |   context?: { context: PineconeRecord[] }[] | null;
16 | }
17 | 
18 | const ChatWrapper = forwardRef<ChatInterface, ChatProps>(({ withContext, setContext, context }, ref) => {
19 |   const [finished, setFinished] = useState(false);
20 |   const { messages, input, setInput, append, handleSubmit, handleInputChange, data } = useChat({
21 |     body: {
22 |       withContext
23 |     },
24 |   });
25 | 
26 |   useEffect(() => {
27 |     if (data?.length) {
28 |       const { context } = data[0] as { context?: PineconeRecord[] };
29 |       if (context) {
30 |         setContext([{ context }]);
31 |       }
32 |     }
33 |   }, [data, setContext]);
34 | 
35 |   const bottomChatRef = useRef<HTMLDivElement | null>(null);
36 |   const chatRef = useRef<ChatInterface>(null);
37 | 
38 |   useEffect(() => {      
39 |       if (finished && withContext && context) {
40 |           setContext(context)
41 |           setFinished(false)
42 |       }
43 |   }, [context, finished, withContext, setContext]);
44 | 
45 |   useEffect(() => {
46 |       bottomChatRef.current?.scrollIntoView({ behavior: "smooth" });
47 |   }, [messages]);
48 | 
49 |   useImperativeHandle(ref, () => ({
50 |         handleMessageSubmit: (event: FormEvent<HTMLFormElement>) => {
51 |             const id = uuidv4();
52 |             handleSubmit(event, {
53 |                 data: {
54 |                     messageId: id,
55 |                 },
56 |             })
57 |     },
58 |     handleInputUpdated: (event: ChangeEvent<HTMLInputElement>) => {
59 |             handleInputChange(event)
60 |         },
61 |         withContext,
62 |         ref: chatRef,
63 |   }));
64 | 
65 |   return (
66 |     <div className="flex-col w-50 overflow-auto h-full" style={{ borderLeft: "1px solid #738FAB1F" }}>
67 |       <div className={`${messages.length > 0 ? "flex flex-col justify-center items-center h-full" : "overflow-auto"}`}>
68 |         {context ? (
69 |           <Messages messages={messages} withContext={withContext} context={context} />
70 |         ) : (
71 |           <Messages messages={messages} withContext={withContext} />
72 |         )}
73 |       </div>
74 |     </div>
75 |   );
76 | });
77 | 
78 | ChatWrapper.displayName = 'ChatWrapper';
79 | 
80 | export default ChatWrapper;
81 | 


--------------------------------------------------------------------------------
/src/app/components/Chat/Messages.tsx:
--------------------------------------------------------------------------------
  1 | import { EllipseIcon } from "@/assets/icons/ellipse";
  2 | import { PineconeIcon } from "@/assets/icons/pinecone";
  3 | import { UserIcon } from "@/assets/icons/user";
  4 | import { PineconeLogoSvg } from "@/assets/svg/pineconeLogo";
  5 | import { Typography } from "@mui/material";
  6 | import Popover from "@mui/material/Popover";
  7 | import type { PineconeRecord } from "@pinecone-database/pinecone";
  8 | import { Message } from "ai";
  9 | import { useRef, useState } from "react";
 10 | 
 11 | export default function Messages({ messages, withContext, context }: { messages: Message[], withContext: boolean, context?: { context: PineconeRecord[] }[] }) {
 12 |   const messagesEndRef = useRef<HTMLDivElement | null>(null);
 13 |   const [anchorEls, setAnchorEls] = useState<{ [key: string]: HTMLButtonElement | null }>({});
 14 | 
 15 |   const handleClick = (event: React.MouseEvent<HTMLButtonElement>, messageId: string, chunkId: string) => {
 16 |     setAnchorEls(prev => ({ ...prev, [`${messageId}-${chunkId}`]: event.currentTarget }));
 17 |   };
 18 | 
 19 |   // Handle close function
 20 |   const handleClose = (messageId: string, chunkId: string) => {
 21 |     setAnchorEls(prev => ({ ...prev, [`${messageId}-${chunkId}`]: null }));
 22 |   };
 23 | 
 24 |   const styles = {
 25 |     lightGrey: {
 26 |       color: "#72788D"
 27 |     },
 28 |     placeholder: {
 29 |       fontSize: 12,
 30 |       marginTop: 10,
 31 |     }
 32 |   }
 33 | 
 34 |   return (
 35 |     <div className="rounded-lg overflow-y-scroll flex-grow flex flex-col justify-end h-full pr-5">
 36 |       {messages.length == 0 && (
 37 |         <div className="flex h-full w-full justify-center items-center">
 38 |           <div className="text-center">
 39 |             {withContext ? (
 40 |               <>
 41 |                 <div className="flex justify-center">
 42 |                   <PineconeLogoSvg />
 43 |                 </div>
 44 |                 <div style={{ ...styles.lightGrey, ...styles.placeholder }}>
 45 |                   This is your chatbot powered by pinecone
 46 |                 </div>
 47 |               </>
 48 |             ) : (
 49 |               <div style={{ ...styles.lightGrey, ...styles.placeholder }}>
 50 |                 Compare to a chatbot without context
 51 |               </div>
 52 |             )}
 53 |           </div>
 54 |         </div>
 55 |       )}
 56 |       {messages?.map((message, index) => {
 57 |         const isAssistant = message.role === "assistant";
 58 |         const entry = isAssistant && withContext && context && context[Math.floor(index / 2)];
 59 |         
 60 |         return (
 61 |           <div
 62 |             key={message.id}
 63 |             className={`my-2 ml-3 pt-2 transition-shadow duration-200 flex slide-in-bottom`}
 64 |           >
 65 |             <div className="p-2 flex items-start">
 66 |               {message.role === "assistant" ? (withContext ? <PineconeIcon /> : <EllipseIcon />) : <UserIcon />}
 67 |             </div>
 68 |             <div className="ml-2 mt-1.5 flex items-center">
 69 |               <div className="flex flex-col">
 70 |                 <div className="font-bold">
 71 |                   {message.role === "assistant" ? (withContext ? "Pinecone + OpenAI Model" : "OpenAI Model") : "You"}
 72 |                 </div>
 73 |                 <div>{message.content}</div>
 74 |                 {entry && entry.context.length > 0 && (
 75 |                   <div className="flex text-xs">
 76 |                     <div className="text-[#72788D]">Source:</div>
 77 |                     {entry.context.map((chunk, index) => {
 78 |                       return (
 79 |                         <div key={index}>
 80 |                           <button onMouseEnter={(event: React.MouseEvent<HTMLButtonElement>) => handleClick(event, message.id, chunk.id)} onMouseLeave={() => handleClose(message.id, chunk.id)} className="ml-2">
 81 |                             [<span className="px-0.5 text-[#1B17F5] underline" >{index + 1}</span>]
 82 |                           </button>
 83 |                           <Popover
 84 |                             id={message.id}
 85 |                             open={Boolean(anchorEls[`${message.id}-${chunk.id}`])}
 86 |                             anchorEl={anchorEls[`${message.id}-${chunk.id}`]}
 87 |                             onClose={() => handleClose(message.id, chunk.id)}
 88 |                             disableRestoreFocus
 89 |                             anchorOrigin={{
 90 |                               vertical: 'bottom',
 91 |                               horizontal: 'center',
 92 |                             }}
 93 |                             transformOrigin={{
 94 |                               vertical: 'bottom',
 95 |                               horizontal: 'center',
 96 |                             }}
 97 |                             sx={{
 98 |                               width: "60%",
 99 |                               pointerEvents: 'none',
100 |                             }}
101 |                           >
102 |                             <div key={index} className="p-2">
103 |                               <Typography sx={{ fontSize: 12, fontWeight: 400 }}>
104 |                                 {chunk.metadata?.chunk}
105 |                               </Typography>
106 |                             </div>
107 |                           </Popover>
108 |                         </div>
109 |                       )
110 |                     })}
111 |                   </div>
112 | 
113 |                 )
114 |                 }
115 |                 {
116 |                   !withContext && message.role === "assistant" && (index == messages.length - 1) && (<div className="mt-1" style={{ color: "#72788D", fontSize: 12 }}>
117 |                     This answer may be speculative or inaccurate.
118 |                   </div>)
119 |                 }
120 |               </div>
121 |             </div>
122 |           </div>
123 |         )
124 |       })}
125 |       <div ref={messagesEndRef} />
126 |     </div>
127 |   );
128 | }
129 | 


--------------------------------------------------------------------------------
/src/app/components/Chat/index.tsx:
--------------------------------------------------------------------------------
 1 | import AppContext from "@/appContext";
 2 | import type { PineconeRecord } from "@pinecone-database/pinecone";
 3 | import React, { ChangeEvent, FormEvent, useContext, useRef } from "react";
 4 | import ChatInput from "./ChatInput";
 5 | import ChatWrapper, { ChatInterface } from "./ChatWrapper";
 6 | 
 7 | interface ChatProps {
 8 |   setContext: (data: { context: PineconeRecord[] }[]) => void;
 9 |   context: { context: PineconeRecord[] }[] | null;
10 | }
11 | 
12 | const Chat: React.FC<ChatProps> = ({ setContext, context }) => {
13 | 
14 |   const chatWithContextRef = useRef<ChatInterface | null>(null);
15 |   const chatWithoutContextRef = useRef<ChatInterface | null>(null);
16 | 
17 |   const { totalRecords } = useContext(AppContext);
18 | 
19 |   const [input, setInput] = React.useState<string>("")
20 |   const onMessageSubmit = (e: FormEvent<HTMLFormElement>) => {
21 |     setInput("")
22 |     chatWithContextRef.current?.handleMessageSubmit(e)
23 |     chatWithoutContextRef.current?.handleMessageSubmit(e)
24 |   }
25 | 
26 |   const onInputChange = (event: ChangeEvent<HTMLInputElement>) => {
27 |     setInput(event.target.value)
28 |     chatWithContextRef.current?.handleInputUpdated(event)
29 |     chatWithoutContextRef.current?.handleInputUpdated(event)
30 |   }
31 | 
32 |   return (
33 |     <div id="chat" className="flex flex-col w-full h-full">
34 |       <div className="flex flex-grow h-full h-max-screen overflow-auto">
35 |         <div className="w-1/2">
36 |           <ChatWrapper ref={chatWithoutContextRef} withContext={true} setContext={setContext} context={context} />
37 |         </div>
38 |         <div className="w-1/2">
39 |           <ChatWrapper ref={chatWithContextRef} withContext={false} setContext={setContext} />
40 |         </div>
41 |       </div>
42 |       <div className="w-full">
43 |         <ChatInput input={input} handleInputChange={onInputChange} handleMessageSubmit={onMessageSubmit} showIndexMessage={totalRecords === 0} />
44 |       </div>
45 |     </div>
46 |   );
47 | };
48 | 
49 | export default Chat;
50 | 


--------------------------------------------------------------------------------
/src/app/components/Header.tsx:
--------------------------------------------------------------------------------
 1 | import Image from "next/image";
 2 | import PineconeLogo from "../../../public/pinecone.svg";
 3 | import VercelLogo from "../../../public/vercel.svg";
 4 | 
 5 | export default function Header({ className }: { className?: string }) {
 6 |   return (
 7 |     <header
 8 |       className={`flex items-center justify-center text-gray-200 text-2xl ${className}`}
 9 |     >
10 |       <Image
11 |         src={PineconeLogo}
12 |         alt="pinecone-logo"
13 |         width="160"
14 |         height="50"
15 | 
16 |       />{" "}
17 |       <div className="text-3xl ml-3 mr-3">/</div>
18 |       <Image
19 |         src={VercelLogo}
20 |         alt="vercel-logo"
21 |         width="105.719"
22 |         height="23.895"
23 |         className="mr-3 mt-2"
24 |       />
25 |     </header>
26 |   );
27 | }


--------------------------------------------------------------------------------
/src/app/components/Sidebar/Button.tsx:
--------------------------------------------------------------------------------
 1 | export function Button({ className, ...props }: any) {
 2 |   return (
 3 |     <button
 4 |       className={
 5 |         "inline-flex items-center gap-2 justify-center rounded-md py-2 px-3 text-sm outline-offset-2 transition active:transition-none bg-zinc-600 font-semibold text-zinc-100 hover:bg-zinc-400 active:bg-zinc-800 active:text-zinc-100/70 " +
 6 |         className
 7 |       }
 8 |       {...props}
 9 |     />
10 |   );
11 | }
12 | 


--------------------------------------------------------------------------------
/src/app/components/Sidebar/Card.tsx:
--------------------------------------------------------------------------------
 1 | import Popover from '@mui/material/Popover';
 2 | import { PineconeRecord } from "@pinecone-database/pinecone";
 3 | import { FC, useRef, useState } from "react";
 4 | import ReactMarkdown from 'react-markdown';
 5 | 
 6 | export interface ICard {
 7 |   pageContent: string;
 8 |   metadata: {
 9 |     hash: string;
10 |   };
11 |   id: string
12 | }
13 | 
14 | interface ICardProps {
15 |   card: ICard;
16 |   context: { context: PineconeRecord[] }[] | null;
17 |   id: string;
18 |   index: number;
19 | }
20 | 
21 | export const Card: FC<ICardProps> = ({ card, index, context }) => {
22 |   const [anchorEl, setAnchorEl] = useState<HTMLButtonElement | null>(null);
23 |   const myRef = useRef(null);
24 | 
25 |   const handleClose = () => {
26 |     setAnchorEl(null);
27 |   }
28 | 
29 |   const handleClick = (event: React.MouseEvent<HTMLButtonElement>) => {
30 |     setAnchorEl(event.currentTarget);
31 |   };
32 |   return (
33 |     <div
34 |       id={card.id}
35 |       className={"mb-4"}
36 |     >
37 |       <div className="flex-col w-full">
38 |         <div className="flex w-full">
39 |           <div className="mr-2 text-[#72788D]">{index}</div>
40 |           <>
41 |             <button className="w-full" onMouseLeave={() => handleClose()} onMouseEnter={(event: React.MouseEvent<HTMLButtonElement>) => handleClick(event)}>
42 |               <div className="markdown-container">
43 |                 <ReactMarkdown
44 |                   disallowedElements={["img"]}
45 |                   className="markdown-content"
46 |                   components={{ p: 'span' }}
47 |                 >
48 |                   {card.pageContent.replace(/\n/g, ' ')}
49 |                 </ReactMarkdown>
50 |               </div>
51 |             </button>
52 |             <Popover
53 |               id={card.id}
54 |               open={Boolean(anchorEl)}
55 |               anchorEl={anchorEl}
56 |               onClose={() => handleClose()}
57 |               disableRestoreFocus
58 |               anchorOrigin={{
59 |                 vertical: 'bottom',
60 |                 horizontal: 'right',
61 |               }}
62 |               transformOrigin={{
63 |                 vertical: 'bottom',
64 |                 horizontal: 'right',
65 |               }}
66 |               sx={{
67 |                 pointerEvents: 'none',
68 |               }}
69 |             >
70 |               <div className="p-2 max-h-[300px] max-w-[300px] overflow-scroll">
71 |                 <ReactMarkdown>{card.pageContent}</ReactMarkdown>
72 |               </div>
73 |             </Popover>
74 |           </>
75 | 
76 |         </div>
77 |       </div>
78 |       {/* <div className="flex"> 
79 |         {selected && selected.includes(card.metadata.hash) && <BlueEllipseSvg />}
80 |         <b className="text-xs mt-2" style={{ color: "#72788D", fontWeight: 400 }}>
81 |           ID: {card.metadata.hash}
82 |         </b>
83 |       </div>*/}
84 |     </div>
85 |   )
86 | 
87 | };
88 | 


--------------------------------------------------------------------------------
/src/app/components/Sidebar/InfoPopover.tsx:
--------------------------------------------------------------------------------
 1 | import { Popover, PopoverContent, PopoverHandler } from "@material-tailwind/react";
 2 | import React, { useState } from 'react';
 3 | import { IoMdInformationCircleOutline } from "react-icons/io";
 4 | 
 5 | interface InfoPopoverProps {
 6 |     infoText: string;
 7 |     className?: string;
 8 | }
 9 | 
10 | export const InfoPopover: React.FC<InfoPopoverProps> = ({ infoText, className }) => {
11 |     const [open, setOpen] = useState(false);
12 | 
13 |     const popoverTriggers = {
14 |         onMouseEnter: () => setOpen(true),
15 |         onMouseLeave: () => setOpen(false),
16 |     };
17 | 
18 |     return (
19 |         <div className={className}>
20 |             <Popover open={open} handler={setOpen} placement="left">
21 |                 <PopoverHandler {...popoverTriggers}>
22 |                     <div><IoMdInformationCircleOutline className="text-[#72788D] text-lg" /></div>
23 |                 </PopoverHandler>
24 |                 <PopoverContent onPointerEnterCapture={undefined} onPointerLeaveCapture={undefined} {...popoverTriggers} className="z-50 max-w-[24rem]" placeholder="">
25 |                     <div className="mb-2 flex items-center justify-between gap-4">
26 |                         {infoText}
27 |                     </div>
28 |                 </PopoverContent>
29 |             </Popover>
30 |         </div>
31 |     );
32 | };


--------------------------------------------------------------------------------
/src/app/components/Sidebar/RecursiveSplittingOptions.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { InfoPopover } from './InfoPopover';
 3 | 
 4 | interface RecursiveSplittingOptionsProps {
 5 |     chunkSize: number;
 6 |     setChunkSize: (value: number) => void;
 7 |     overlap: number;
 8 |     setOverlap: (value: number) => void;
 9 | }
10 | 
11 | export const RecursiveSplittingOptions: React.FC<RecursiveSplittingOptionsProps> = ({
12 |     chunkSize,
13 |     setChunkSize,
14 |     overlap,
15 |     setOverlap,
16 | }) => {
17 | 
18 | 
19 |     return (
20 |         <div className="w-full">
21 |             <div className="my-4 flex flex-col">
22 |                 <div className="flex flex-col w-full">
23 |                     <div className="flex gap-1">
24 |                         <span>Chunk Size: </span><span className="font-bold">{chunkSize}</span><span>
25 |                             <div>
26 |                                 <InfoPopover
27 |                                     className="mt-[1.5px]"
28 |                                     infoText="Chunk size in recursive text splitting is the user defined portion of text that&apos;s divided and processed in each recursion step, influencing the accuracy of the operation."
29 |                                 />
30 |                             </div>
31 |                         </span>
32 |                     </div>
33 | 
34 |                     <input
35 |                         className="p-2"
36 |                         type="range"
37 |                         id="chunkSize"
38 |                         min={1}
39 |                         max={2048}
40 |                         onChange={(e) => setChunkSize(parseInt(e.target.value))}
41 |                     />
42 |                 </div>
43 |                 <div className="flex flex-col w-full">
44 |                     <div className="flex gap-1">
45 |                         <span>Overlap:</span><span className="font-bold">{overlap}</span><span>
46 |                             <div>
47 |                                 <InfoPopover
48 |                                     className="mt-[1.5px]"
49 |                                     infoText="Overlap in recursive text splitting is the user-specified section of text that&apos;s intentionally repeated across chunks to maintain context and potentially enhance accuracy."
50 |                                 />
51 |                             </div>
52 |                         </span>
53 |                     </div>
54 |                     <input
55 |                         className="p-2"
56 |                         type="range"
57 |                         id="overlap"
58 |                         min={1}
59 |                         max={200}
60 |                         onChange={(e) => setOverlap(parseInt(e.target.value))}
61 |                     />
62 |                 </div>
63 |             </div>
64 |         </div>
65 |     );
66 | };


--------------------------------------------------------------------------------
/src/app/components/Sidebar/UrlButton.tsx:
--------------------------------------------------------------------------------
 1 | // UrlButton.tsx
 2 | 
 3 | import { Button } from "./Button";
 4 | import React, { FC } from "react";
 5 | import { IconContext } from "react-icons";
 6 | import { AiOutlineLink } from "react-icons/ai";
 7 | import Link from "next/link";
 8 | 
 9 | export interface IUrlEntry {
10 |   url: string;
11 |   title: string;
12 |   seeded: boolean;
13 |   loading: boolean;
14 | }
15 | 
16 | interface IURLButtonProps {
17 |   entry: IUrlEntry;
18 |   onClick: () => Promise<void>;
19 | }
20 | 
21 | const UrlButton: FC<IURLButtonProps> = ({ entry, onClick }) => (
22 |   <div key={`${entry.url}-${entry.seeded}`} className="pr-2 lg:flex-grow">
23 |     <Button
24 |       className={`relative overflow-hidden w-full my-1 lg:my-2 mx-2 ${
25 |         entry.loading ? "shimmer" : ""
26 |       }`}
27 |       style={{
28 |         backgroundColor: entry.seeded ? "green" : "bg-gray-800",
29 |         color: entry.seeded ? "white" : "text-gray-200",
30 |       }}
31 |       onClick={onClick}
32 |     >
33 |       <a
34 |         href={entry.url}
35 |         target="_blank"
36 |         rel="noopener noreferrer"
37 |         className="bg-blue-500 hover:bg-blue-600 active:bg-brand-700 dark:bg-brand-400 dark:hover:bg-brand-300 dark:active:bg-brand-200 flex items-center justify-center rounded-full p-2 text-3xl text-white transition duration-200 hover:cursor-pointer dark:text-white"
38 |       >
39 |         <AiOutlineLink
40 |           color="white"
41 |           fontSize={14}
42 |           className="hover:text-green"
43 |         />
44 |       </a>
45 |       {entry.loading && (
46 |         <div
47 |           className="absolute inset-0"
48 |           style={{
49 |             zIndex: -1,
50 |             background:
51 |               "linear-gradient(90deg, transparent, rgba(255,255,255,0.5), transparent)",
52 |             animation: "shimmer 2s infinite",
53 |           }}
54 |         ></div>
55 |       )}
56 |       <div className="relative">{entry.title}</div>
57 |     </Button>
58 |   </div>
59 | );
60 | 
61 | export default UrlButton;
62 | 


--------------------------------------------------------------------------------
/src/app/components/Sidebar/index.tsx:
--------------------------------------------------------------------------------
  1 | import AppContext from "@/appContext";
  2 | import { Button } from "@material-tailwind/react";
  3 | import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
  4 | import CircularProgress from '@mui/material/CircularProgress';
  5 | import MenuItem from '@mui/material/MenuItem';
  6 | import Select, { SelectChangeEvent } from '@mui/material/Select';
  7 | import React, { useContext, useState } from "react";
  8 | import Header from "../Header";
  9 | import { Card, ICard } from "./Card";
 10 | import { InfoPopover } from "./InfoPopover";
 11 | import { RecursiveSplittingOptions } from "./RecursiveSplittingOptions";
 12 | import { urls } from "./urls";
 13 | import { clearIndex, crawlDocument } from "./utils";
 14 | 
 15 | const styles: Record<string, React.CSSProperties> = {
 16 |   contextWrapper: {
 17 |     display: "flex",
 18 |     padding: "var(--spacer-huge, 64px) var(--spacer-m, 32px) var(--spacer-m, 32px) var(--spacer-m, 32px)",
 19 |     alignItems: "flex-start",
 20 |     gap: "var(--Spacing-0, 0px)",
 21 |     alignSelf: "stretch",
 22 |     backgroundColor: "#FBFBFC",
 23 |     fontSize: 14
 24 |   },
 25 |   textHeaderWrapper: {
 26 |     display: "flex",
 27 |     flexDirection: "column",
 28 |     alignItems: "flex-start",
 29 |     alignSelf: "stretch"
 30 |   },
 31 |   entryUrl: {
 32 |     fontSize: 'small',
 33 |     color: 'grey',
 34 |     whiteSpace: 'nowrap',
 35 |     overflow: 'hidden',
 36 |     textOverflow: 'ellipsis',
 37 |     maxWidth: "400px"
 38 |   },
 39 |   h4: {
 40 |     fontWeight: 600, marginBottom: 8, fontSize: 16
 41 |   },
 42 |   h7: {
 43 |     fontSize: 12,
 44 |     textTransform: 'uppercase',
 45 |     letterSpacing: 1
 46 |   }
 47 | }
 48 | 
 49 | 
 50 | export const Sidebar: React.FC = () => {
 51 |   const [entries, setEntries] = useState(urls);
 52 |   const [cards, setCards] = useState<ICard[]>([]);
 53 |   const [splittingMethod, setSplittingMethod] = useState<string>("markdown");
 54 |   const [chunkSize, setChunkSize] = useState<number>(256);
 55 |   const [overlap, setOverlap] = useState<number>(1);
 56 |   const [url, setUrl] = useState<string>(entries[0].url);
 57 |   const [clearIndexComplete, setClearIndexCompleteMessageVisible] = useState<boolean>(false)
 58 |   const [crawling, setCrawling] = useState<boolean>(false)
 59 |   const [crawlingDoneVisible, setCrawlingDoneVisible] = useState<boolean>(false)
 60 | 
 61 |   const { refreshIndex } = useContext(AppContext);
 62 | 
 63 |   const handleUrlChange = (event: SelectChangeEvent<typeof url>) => {
 64 |     const {
 65 |       target: { value },
 66 |     } = event;
 67 |     setUrl(value)
 68 |   }
 69 | 
 70 |   const handleSplittingMethodChange = (event: SelectChangeEvent<typeof splittingMethod>) => {
 71 |     const {
 72 |       target: { value },
 73 |     } = event;
 74 |     setSplittingMethod(value)
 75 |   }
 76 | 
 77 |   const handleEmbedAndUpsertClick = async () => {
 78 |     setCrawling(true)
 79 |     await crawlDocument(
 80 |       url,
 81 |       setEntries,
 82 |       setCards,
 83 |       splittingMethod,
 84 |       chunkSize,
 85 |       overlap
 86 |     )
 87 | 
 88 |     setCrawling(false)
 89 |     setCrawlingDoneVisible(true)
 90 |     setTimeout(() => {
 91 |       setCrawlingDoneVisible(false)
 92 |       console.log("it's time")
 93 |       refreshIndex()
 94 |     }, 2000)
 95 |   }
 96 | 
 97 |   const handleClearIndexClick = async () => {
 98 |     await clearIndex(setEntries, setCards)
 99 |     setClearIndexCompleteMessageVisible(true)
100 |     refreshIndex()
101 |     setTimeout(() => {
102 |       setClearIndexCompleteMessageVisible(false)
103 |     }, 2000)
104 |   }
105 | 
106 |   const menuItems = entries.map((entry, key) => (
107 |     <MenuItem
108 | 
109 |       key={key} value={entry.url}
110 |     ><div className="flex-col" data-testid={entry.url}>
111 |         <div>{entry.title}</div>
112 |         <div style={{ ...styles.entryUrl, whiteSpace: 'nowrap' as 'nowrap' }}>{entry.url}</div>
113 |       </div>
114 |     </MenuItem>
115 |   ));
116 | 
117 | 
118 |   return (
119 |     <div
120 |       className="w-full"
121 |       style={{ ...styles.contextWrapper, flexDirection: "column" as "column" }}
122 |     >
123 |       <div style={{ ...styles.textHeaderWrapper, flexDirection: "column" as "column" }} className="w-full">
124 |         <Header />
125 |         <div style={{ marginTop: 24, marginBottom: 24 }}>
126 |           This RAG chatbot uses Pinecone and Vercel&apos;s AI SDK to demonstrate a URL crawl, data chunking and embedding, and semantic questioning.
127 |         </div>
128 |       </div>
129 |       <div className="flex flex-column w-full" style={{ ...styles.textHeaderWrapper, flexDirection: "column", }}>
130 |         <div className="mb-6 w-full">
131 |           <h4 style={styles.h4}>Select demo url to index</h4>
132 |           <Select className="w-full" value={url} data-testid="url-selector" onChange={handleUrlChange} IconComponent={ExpandMoreIcon} MenuProps={{
133 |             keepMounted: true,
134 |             PaperProps: {
135 |               style: {
136 |                 width: 'fit-content',
137 |                 marginLeft: 15,
138 |                 marginTop: 10,
139 |               },
140 |             },
141 |           }}>
142 |             {menuItems}
143 |           </Select>
144 |         </div>
145 |         <div className="mb-6 w-full">
146 |           <h4 style={styles.h4} className="flex items-center">
147 |             <div>Chunking method</div>
148 |             <InfoPopover
149 |               className="ml-1"
150 |               infoText="The chunking method determines how documents are split into smaller chunks for vector embedding to accommodate size limits. Overlapping content between chunks preserves context, improving search relevance."
151 |             />
152 |           </h4>
153 |           <Select IconComponent={ExpandMoreIcon} value={splittingMethod} className="w-full" onChange={handleSplittingMethodChange}
154 |             renderValue={(value) => {
155 |               if (value === "markdown") {
156 |                 return "Markdown Chunking";
157 |               } else if (value === "recursive") {
158 |                 return "Recursive Chunking";
159 |               }
160 |             }}
161 |             MenuProps={{
162 |               PaperProps: {
163 |                 style: {
164 |                   marginTop: 10,
165 |                   marginLeft: 15,
166 |                   width: '30%',
167 |                 }
168 |               }
169 |             }} >
170 |             {/* Using tailwind here resulted in broken css when deployed on Vercel */}
171 |             <MenuItem value="markdown" style={{ maxWidth: '100%', overflow: 'auto', whiteSpace: 'normal' }}>
172 |               <div style={{ display: 'flex', flexDirection: 'column' }}>
173 |                 <div>Markdown chunking</div>
174 |                 <div style={{ fontSize: 14, color: 'grey' }}>Markdown chunking leverages the structure of the document itself, creating chunks that correlate to the markdown semantics of the content. The crawler converts the URL content to markdown and then applies the markdown chunking methods on the content.</div>
175 |               </div>
176 |             </MenuItem>
177 |             <MenuItem value="recursive" style={{ maxWidth: '100%', overflow: 'auto', whiteSpace: 'normal' }}>
178 |               <div style={{ display: 'flex', flexDirection: 'column' }}>
179 |                 <div>Recursive Chunking</div>
180 |                 <div style={{ fontSize: 14, color: 'grey' }}>With recursive chunking, the text will be divided into smaller parts in each recursion step, based on the chunk size you define. The overlap will ensure that the chunks include content found in adjacent chunks so that no content is lost.  </div>
181 |               </div>
182 | 
183 |             </MenuItem>
184 |           </Select>
185 |         </div>
186 |         {splittingMethod === "recursive" && (
187 |           <RecursiveSplittingOptions
188 |             chunkSize={chunkSize}
189 |             setChunkSize={setChunkSize}
190 |             overlap={overlap}
191 |             setOverlap={setOverlap}
192 |           />
193 |         )}
194 |         <Button
195 |           className={`mb-6 duration-100 button-primary ${crawlingDoneVisible ? "bg-green-500" : "bg-blue-700"} text-white font-medium px-8 py-3 transition-all duration-500 ease-in-out`}
196 |           onClick={handleEmbedAndUpsertClick}
197 |           style={{ backgroundColor: `${crawlingDoneVisible ? "#15B077" : "#1B17F5"}`, textTransform: 'none', fontSize: 14, borderRadius: 4, padding: '12px 22px', fontWeight: 400 }}
198 |           placeholder="" onPointerEnterCapture={undefined} onPointerLeaveCapture={undefined}        >
199 |           {!crawling ? (crawlingDoneVisible ? "Success" : "Embed and upsert") : (<div className="flex">
200 |             <CircularProgress size={20} sx={{
201 |               color: "white",
202 |             }} />
203 |             <div className="ml-5">In progress</div>
204 |           </div>)}
205 |         </Button>
206 |       </div>
207 |       <div className="flex flex-wrap w-full mt-5 border-b border-[#738FAB1F]">
208 |         <div style={{ ...styles.h7 }}>Index records</div>
209 |         <div className="text-[#1B17F5] ml-auto cursor-pointer text-xs" onClick={handleClearIndexClick} data-testid="clear-button">Clear</div>
210 |       </div>
211 |       {(
212 |         <div className={`text-xs mt-4 
213 |                         transition-all 
214 |                         duration-500 
215 |                         ease-in-out 
216 |                         transform ${clearIndexComplete ? "translate-y-0" : "translate-y-16"} 
217 |                         opacity-${clearIndexComplete ? "100" : "0"} 
218 |                         ${clearIndexComplete ? "h-auto" : "h-0"}`}>
219 |           Index cleared
220 |         </div>
221 |       )}
222 |       {(
223 |         <div className={`text-xs mt-2  
224 |                         transition-all 
225 |                         duration-500 
226 |                         ease-in-out 
227 |                         transform ${crawling ? "translate-y-0" : "translate-y-16"} 
228 |                         opacity-${crawling ? "100" : "0"} ${crawling ? "h-auto" : "h-0"}`}>
229 |           <CircularProgress size={10} sx={{
230 |             color: "black",
231 |           }} /> <span className="ml-2">Chunking and embedding your data...</span>
232 |         </div>
233 |       )}
234 |       <div className="flex flex-wrap w-full">
235 |         <div className="flex">
236 |           {cards && cards.length > 0 ?
237 |             <div className="mt-2 flex flex-row">
238 |               <div className="font-semibold mb-4 whitespace-nowrap">{cards.length} records:</div>
239 |               <div className="ml-2 overflow-hidden overflow-ellipsis whitespace-nowrap max-w-xs text-[#72788D]">
240 |                 <a href={url} target="_blank">{url}</a>
241 |               </div>
242 |             </div>
243 |             :
244 |             <div></div>
245 |           }
246 |         </div>
247 |       </div>
248 |       <div className="flex flex-col w-full">
249 |         {cards.map((card, index) => (
250 |           <Card key={index} card={card} index={index} context={null} id={card.id} />
251 |         ))}
252 |         {cards.length > 0 && (<div className="text-[#72788D]">End of results</div>)}
253 |       </div>
254 | 
255 |     </div>
256 |   );
257 | };
258 | 


--------------------------------------------------------------------------------
/src/app/components/Sidebar/urls.ts:
--------------------------------------------------------------------------------
 1 | export const urls = [
 2 |   {
 3 |     url: "https://www.wired.com/story/fast-forward-toyota-robots-learning-housework/",
 4 |     title: "Toyota's Robots are Learning Housework",
 5 |     seeded: false,
 6 |     loading: false,
 7 |   },
 8 |   {
 9 |     url: "https://www.wired.com/story/synthetic-data-is-a-dangerous-teacher/",
10 |     title: "Synthetic Data Is a Dangerous Teacher",
11 |     seeded: false,
12 |     loading: false,
13 |   },
14 |   {
15 |     url: "https://www.wired.com/story/staying-one-step-ahead-of-hackers-when-it-comes-to-ai/",
16 |     title: "Staying Ahead of Hackers When It Comes to AI",
17 |     seeded: false,
18 |     loading: false,
19 |   }]


--------------------------------------------------------------------------------
/src/app/components/Sidebar/utils.ts:
--------------------------------------------------------------------------------
 1 | import { ICard } from "./Card";
 2 | import { IUrlEntry } from "./UrlButton";
 3 | 
 4 | export async function crawlDocument(
 5 |   url: string,
 6 |   setEntries: React.Dispatch<React.SetStateAction<IUrlEntry[]>>,
 7 |   setCards: React.Dispatch<React.SetStateAction<ICard[]>>,
 8 |   splittingMethod: string,
 9 |   chunkSize: number,
10 |   overlap: number
11 | ): Promise<void> {
12 |   setEntries((seeded: IUrlEntry[]) =>
13 |     seeded.map((seed: IUrlEntry) =>
14 |       seed.url === url ? { ...seed, loading: true } : seed
15 |     )
16 |   );
17 |   const response = await fetch("/api/crawl", {
18 |     method: "POST",
19 |     headers: { "Content-Type": "application/json" },
20 |     body: JSON.stringify({
21 |       url,
22 |       options: {
23 |         splittingMethod,
24 |         chunkSize,
25 |         overlap,
26 |       },
27 |     }),
28 |   });
29 | 
30 |   const { documents } = await response.json();
31 | 
32 |   setCards(documents);
33 | 
34 |   setEntries((prevEntries: IUrlEntry[]) =>
35 |     prevEntries.map((entry: IUrlEntry) =>
36 |       entry.url === url ? { ...entry, seeded: true, loading: false } : entry
37 |     )
38 |   );
39 | }
40 | 
41 | export async function clearIndex(
42 |   setEntries: React.Dispatch<React.SetStateAction<IUrlEntry[]>>,
43 |   setCards: React.Dispatch<React.SetStateAction<ICard[]>>
44 | ) {
45 |   const response = await fetch("/api/clearIndex", {
46 |     method: "POST",
47 |     headers: { "Content-Type": "application/json" },
48 |   });
49 | 
50 |   if (response.ok) {
51 |     setEntries((prevEntries: IUrlEntry[]) =>
52 |       prevEntries.map((entry: IUrlEntry) => ({
53 |         ...entry,
54 |         seeded: false,
55 |         loading: false,
56 |       }))
57 |     );
58 |     setCards([]);
59 |     return true
60 |   }
61 | }


--------------------------------------------------------------------------------
/src/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pinecone-io/pinecone-rag-demo/de05d15253383b2cc7c1f38d0ac3f8b36faf4a3d/src/app/favicon.ico


--------------------------------------------------------------------------------
/src/app/globals.css:
--------------------------------------------------------------------------------
 1 | @tailwind base;
 2 | @tailwind components;
 3 | @tailwind utilities;
 4 | 
 5 | :root {
 6 |   --foreground-rgb: 0, 0, 0;
 7 |   --background-start-rgb: 214, 219, 220;
 8 |   --background-end-rgb: 255, 255, 255;
 9 | }
10 | 
11 | @media (prefers-color-scheme: dark) {
12 |   :root {
13 |     --foreground-rgb: 255, 255, 255;
14 |     --background-start-rgb: 0, 0, 0;
15 |     --background-end-rgb: 0, 0, 0;
16 |   }
17 | }
18 | 
19 | body {
20 |   color: rgb(var(--foreground-rgb));
21 |   background: linear-gradient(
22 |       to bottom,
23 |       transparent,
24 |       rgb(var(--background-end-rgb))
25 |     )
26 |     rgb(var(--background-start-rgb));
27 | }
28 | 


--------------------------------------------------------------------------------
/src/app/hooks/useRefreshIndex.ts:
--------------------------------------------------------------------------------
 1 | import { useState } from 'react';
 2 | 
 3 | const useRefreshIndex = () => {
 4 |     const [totalRecords, setTotalRecords] = useState<number>(0);
 5 | 
 6 |     const refreshIndex = async () => {
 7 |         const response = await fetch("/api/checkIndex", {
 8 |             method: "POST",
 9 |         });
10 |         try {
11 |             const stats = await response.json();
12 |             setTotalRecords(stats.totalRecordCount);
13 |         } catch (e) {
14 |             console.log(e)
15 |         }
16 |     }
17 | 
18 |     return { totalRecords, refreshIndex };
19 | }
20 | 
21 | export default useRefreshIndex;


--------------------------------------------------------------------------------
/src/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | export const metadata = {
 2 |   title: "Pinecone - Vercel AI SDK Example",
 3 |   description: "Pinecone - Vercel AI SDK Example",
 4 | };
 5 | 
 6 | import { Inter } from 'next/font/google';
 7 | const inter = Inter({ subsets: ['latin'] })
 8 | 
 9 | 
10 | import "../global.css";
11 | 
12 | export default function RootLayout({
13 |   children,
14 | }: {
15 |   children: React.ReactNode;
16 | }) {
17 |   return (
18 |     <html lang="en">
19 |       <body className={inter.className}>{children}</body>
20 |     </html>
21 |   );
22 | }
23 | 


--------------------------------------------------------------------------------
/src/app/page.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import Chat from "@/components/Chat";
 4 | import { Sidebar } from "@/components/Sidebar";
 5 | import useRefreshIndex from '@/hooks/useRefreshIndex';
 6 | import type { PineconeRecord } from "@pinecone-database/pinecone";
 7 | import React, { useEffect, useState } from "react";
 8 | import { FaGithub } from 'react-icons/fa';
 9 | import AppContext from "./appContext";
10 | 
11 | const Page: React.FC = () => {
12 |   const [context, setContext] = useState<{ context: PineconeRecord[] }[] | null>(null);
13 |   const { totalRecords, refreshIndex } = useRefreshIndex();
14 | 
15 |   useEffect(() => {
16 |     if (totalRecords === 0) {
17 |       refreshIndex()
18 |     }
19 |   }, [refreshIndex, totalRecords])
20 | 
21 |   return (
22 |     <AppContext.Provider value={{ totalRecords, refreshIndex }}>
23 |       <div className="flex flex-col justify-between h-screen bg-whitemx-auto max-w-full">
24 |         <div className="flex w-full flex-grow overflow-hidden relative">
25 |           <div style={{
26 |             backgroundColor: "#FBFBFC"
27 |           }} className="absolute transform translate-x-full transition-transform duration-500 ease-in-out right-0 w-2/3 h-full bg-white overflow-y-auto lg:static lg:translate-x-0 lg:w-2/5">
28 |             <Sidebar />
29 |           </div>
30 |           <Chat setContext={setContext} context={context} />
31 |         </div>
32 |         <div className="fixed top-0 right-0 p-4">
33 |           <a href="https://github.com/pinecone-io/pinecone-rag-demo" target="_blank" rel="noopener noreferrer">
34 |             <FaGithub size={32} />
35 |           </a>
36 |         </div>
37 |       </div>
38 |     </AppContext.Provider>
39 |   );
40 | };
41 | 
42 | export default Page;
43 | 
44 | 


--------------------------------------------------------------------------------
/src/app/services/chunkedUpsert.ts:
--------------------------------------------------------------------------------
 1 | import type { Index, PineconeRecord } from '@pinecone-database/pinecone';
 2 | 
 3 | const sliceIntoChunks = <T>(arr: T[], chunkSize: number) => {
 4 |   return Array.from({ length: Math.ceil(arr.length / chunkSize) }, (_, i) =>
 5 |     arr.slice(i * chunkSize, (i + 1) * chunkSize)
 6 |   );
 7 | };
 8 | 
 9 | export const chunkedUpsert = async (
10 |   index: Index,
11 |   vectors: Array<PineconeRecord>,
12 |   namespace: string,
13 |   chunkSize = 10
14 | ) => {
15 |   // Split the vectors into chunks
16 |   const chunks = sliceIntoChunks<PineconeRecord>(vectors, chunkSize);
17 | 
18 |   try {
19 |     // Upsert each chunk of vectors into the index
20 |     await Promise.allSettled(
21 |       chunks.map(async (chunk) => {
22 |         try {
23 |           await index.namespace(namespace).upsert(vectors);
24 |         } catch (e) {
25 |           console.log('Error upserting chunk', e);
26 |         }
27 |       })
28 |     );
29 | 
30 |     return true;
31 |   } catch (e) {
32 |     throw new Error(`Error upserting vectors into index: ${e}`);
33 |   }
34 | };
35 | 


--------------------------------------------------------------------------------
/src/app/services/context.ts:
--------------------------------------------------------------------------------
 1 | import type { PineconeRecord } from "@pinecone-database/pinecone";
 2 | import { getEmbeddings } from './embeddings';
 3 | import { getMatchesFromEmbeddings } from "./pinecone";
 4 | 
 5 | export type Metadata = {
 6 |   url: string,
 7 |   text: string,
 8 |   chunk: string,
 9 | }
10 | 
11 | // The function `getContext` is used to retrieve the context of a given message
12 | export const getContext = async (message: string, namespace: string, maxTokens = 3000, minScore = 0.7, getOnlyText = true): Promise<PineconeRecord[]> => {
13 | 
14 |   // Get the embeddings of the input message
15 |   const embedding = await getEmbeddings(message);
16 | 
17 |   // Retrieve the matches for the embeddings from the specified namespace
18 |   const matches = await getMatchesFromEmbeddings(embedding, 10, namespace);
19 | 
20 |   // Filter out the matches that have a score lower than the minimum score
21 |   const qualifyingDocs = matches.filter(m => m.score && m.score > minScore);
22 | 
23 |   return qualifyingDocs
24 | }
25 | 


--------------------------------------------------------------------------------
/src/app/services/embeddings.ts:
--------------------------------------------------------------------------------
 1 | 
 2 | import { OpenAIApi, Configuration } from "openai-edge";
 3 | 
 4 | const config = new Configuration({
 5 |   apiKey: process.env.OPENAI_API_KEY
 6 | })
 7 | const openai = new OpenAIApi(config)
 8 | 
 9 | export async function getEmbeddings(input: string) {
10 |   try {
11 |     const response = await openai.createEmbedding({
12 |       model: "text-embedding-ada-002",
13 |       input: input.replace(/\n/g, ' ')
14 |     })
15 | 
16 |     const result = await response.json();
17 |     return result.data[0].embedding as number[]
18 | 
19 |   } catch (e) {
20 |     console.log("Error calling OpenAI embedding API: ", e);
21 |     throw new Error(`Error calling OpenAI embedding API: ${e}`);
22 |   }
23 | }


--------------------------------------------------------------------------------
/src/app/services/pinecone.ts:
--------------------------------------------------------------------------------
 1 | import { Pinecone, type ScoredPineconeRecord } from "@pinecone-database/pinecone";
 2 | 
 3 | export type Metadata = {
 4 |   url: string,
 5 |   text: string,
 6 |   chunk: string,
 7 |   hash: string
 8 | }
 9 | 
10 | // The function `getMatchesFromEmbeddings` is used to retrieve matches for the given embeddings
11 | const getMatchesFromEmbeddings = async (embeddings: number[], topK: number, namespace: string): Promise<ScoredPineconeRecord<Metadata>[]> => {
12 |   // Obtain a client for Pinecone
13 |   const pinecone = new Pinecone();
14 | 
15 |   const indexName: string = process.env.PINECONE_INDEX || '';
16 |   if (indexName === '') {
17 |     throw new Error('PINECONE_INDEX environment variable not set')
18 |   }
19 |   // Get the Pinecone index
20 |   const index = pinecone!.Index<Metadata>(indexName);
21 | 
22 |   // Get the namespace
23 |   const pineconeNamespace = index.namespace(namespace ?? '')
24 |   // console.log("embeddings", JSON.stringify(embeddings))
25 | 
26 |   try {
27 |     // Query the index with the defined request
28 |     const queryResult = await pineconeNamespace.query({
29 |       vector: embeddings,
30 |       topK,
31 |       includeMetadata: true,
32 |     })
33 |     return queryResult.matches || []
34 |   } catch (e) {
35 |     // Log the error and throw it
36 |     console.log("Error querying embeddings: ", e)
37 |     throw new Error(`Error querying embeddings: ${e}`)
38 |   }
39 | }
40 | 
41 | export { getMatchesFromEmbeddings };
42 | 
43 | 


--------------------------------------------------------------------------------
/src/app/utils/truncateString.ts:
--------------------------------------------------------------------------------
1 | export const truncateStringByBytes = (str: string, bytes: number) => {
2 |   const enc = new TextEncoder();
3 |   return new TextDecoder("utf-8").decode(enc.encode(str).slice(0, bytes));
4 | };


--------------------------------------------------------------------------------
/src/global.css:
--------------------------------------------------------------------------------
 1 | @import "tailwindcss/base";
 2 | @import "tailwindcss/components";
 3 | @import "tailwindcss/utilities";
 4 | 
 5 | @keyframes slideInFromBottom {
 6 |   0% {
 7 |     transform: translateY(100%);
 8 |     opacity: 0;
 9 |   }
10 |   100% {
11 |     transform: translateY(0);
12 |     opacity: 1;
13 |   }
14 | }
15 | 
16 | .slide-in-bottom {
17 |   animation: slideInFromBottom 0.3s ease-out;
18 | }
19 | 
20 | .input-glow {
21 |   box-shadow: 0 0 1px #738FAB80, 0 0 1px #738FAB80;
22 | }
23 | 
24 | .input-glow:hover {
25 |   box-shadow: 0 0 1px #87f4f6, 0 0 2 #8b9ebe;
26 | }
27 | 
28 | /* .message-glow {
29 |   box-shadow: 0 0 3px #738FAB80, 0 0 5px #738FAB80;
30 | }
31 | 
32 | .message-glow:hover {
33 |   box-shadow: 0 0 3px #5eabac, 0 0 4px #8b9ebe;
34 | } */
35 | 
36 | @keyframes glimmer {
37 |   0% {
38 |     background-position: -200px;
39 |   }
40 |   100% {
41 |     background-position: calc(200px + 100%);
42 |   }
43 | }
44 | 
45 | @keyframes shimmer {
46 |   0% {
47 |     transform: translateX(-100%);
48 |   }
49 |   100% {
50 |     transform: translateX(100%);
51 |   }
52 | }
53 | 
54 | .shimmer {
55 |   animation: glimmer 2s infinite linear;
56 |   background: rgb(82, 82, 91);
57 |   background: linear-gradient(
58 |     to right,
59 |     darkgray 10%,
60 |     rgb(130, 129, 129) 50%,
61 |     rgba(124, 123, 123, 0.816) 90%
62 |   );
63 |   background-size: 200px 100%;
64 |   background-repeat: no-repeat;
65 |   /* color: transparent; */
66 | }
67 | 
68 | @keyframes pulse {
69 |   0%,
70 |   100% {
71 |     color: white;
72 |   }
73 |   50% {
74 |     color: #f59e0b; /* Tailwind's yellow-500 */
75 |   }
76 | }
77 | 
78 | .animate-pulse-once {
79 |   animation: pulse 5s cubic-bezier(0, 0, 0.2, 1) 1;
80 | }
81 | 
82 | #chunkSize, #overlap {
83 |   accent-color: #1B17F5;
84 |   --inverse-accent-color: #E4E8EA;
85 | 
86 | }
87 | 
88 | 
89 | 
90 | .markdown-content {
91 |   white-space: nowrap; /* Keep the text on a single line */
92 |   overflow: hidden; /* Hide overflow */
93 |   text-overflow: ellipsis; /* Add ellipsis at the end of the truncated text */
94 |   display: block; 
95 | }


--------------------------------------------------------------------------------
/src/middleware.ts:
--------------------------------------------------------------------------------
 1 | import type { NextRequest } from 'next/server';
 2 | import { NextResponse } from 'next/server';
 3 | 
 4 | export function middleware(request: NextRequest) {
 5 |     const requiredEnvVars = ['OPENAI_API_KEY', 'PINECONE_API_KEY', 'PINECONE_REGION', 'PINECONE_INDEX'];
 6 |     requiredEnvVars.forEach(envVar => {
 7 |         if (!process.env[envVar] && !process.env.CI) {
 8 |             throw new Error(`${envVar} environment variable is not defined`);
 9 |         }
10 |     });
11 |     return NextResponse.next()
12 | }


--------------------------------------------------------------------------------
/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | const withMT = require("@material-tailwind/react/utils/withMT");
 2 | 
 3 | /** @type {import('tailwindcss').Config} */
 4 | module.exports = withMT({
 5 |   content: [
 6 |     "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
 7 |     "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
 8 |     "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
 9 |   ],
10 |   theme: {
11 |     screens: {
12 |       sm: "640px",
13 |       md: "768px",
14 |       lg: "1024px",
15 |       xl: "1280px",
16 |     },
17 |     colors: {
18 |       "button-primary": '#1B17F5',
19 |       "bg-grey": '#FBFBFC',
20 |       "text-primary": "#121142",
21 |       "shaded-border": "#738FAB80"
22 |     },
23 |     extend: {
24 |       backgroundImage: {
25 |         "gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
26 |         "gradient-conic":
27 |           "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
28 |       },
29 |       gridTemplateRows: {
30 |         "auto-1fr": "auto 1fr",
31 |       },
32 |     },
33 |   },
34 |   plugins: [],
35 |   future: {
36 |     removeDeprecatedGapUtilities: true,
37 |   },
38 | });
39 | 


--------------------------------------------------------------------------------
/tests/example.spec.ts:
--------------------------------------------------------------------------------
 1 | import { expect, test } from '@playwright/test';
 2 | import { urls } from '../src/app/components/Sidebar/urls';
 3 | 
 4 | test('has correct title', async ({ page }) => {
 5 |   await page.goto('http://localhost:3000');
 6 | 
 7 |   await expect(page).toHaveTitle('Pinecone - Vercel AI SDK Example')
 8 | })
 9 | 
10 | test('renders clear index button', async ({ page }) => {
11 |   await page.goto('http://localhost:3000')
12 | 
13 |   const clearIndexButton = await page.$('[data-testid="clear-button"]');
14 |   const clearIndexButtonCount = clearIndexButton ? 1 : 0;
15 |   await expect(clearIndexButtonCount).toBe(1)
16 | })
17 | 
18 | test('Check Select menu', async ({ page }) => {
19 |   // Go to your page
20 |   await page.goto('http://localhost:3000');
21 | 
22 |   // Check if Select is visible
23 |   const select = await page.locator('data-testid=url-selector');
24 |   await expect(select).toBeVisible();
25 | 
26 |   // Click on the Select box and wait for it
27 |   await select.click();
28 |   await page.waitForTimeout(1000);
29 | 
30 |   // Check if MenuItems are rendered correctly
31 |   for (let i = 0; i < urls.length; i++) {
32 |     const menuItem = await page.locator(`div[data-testid="${urls[i].url}"]`);
33 |     const title = await menuItem.locator('div').first().innerText();
34 |     expect(title).toBe(urls[i].title); // The title should be the title of the entry
35 |     const url = await menuItem.locator('div').last().innerText();
36 |     expect(url).toBe(urls[i].url); // The url should be the url of the entry
37 |   }
38 | });
39 | 
40 | 
41 | // TODO - add tests for other key buttons on the homepage
42 | 
43 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es5",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "forceConsistentCasingInFileNames": true,
 9 |     "noEmit": true,
10 |     "esModuleInterop": true,
11 |     "module": "esnext",
12 |     "moduleResolution": "node",
13 |     "resolveJsonModule": true,
14 |     "isolatedModules": true,
15 |     "jsx": "preserve",
16 |     "incremental": true,
17 |     "plugins": [
18 |       {
19 |         "name": "next"
20 |       }
21 |     ],
22 |     "paths": {
23 |       "@/*": ["./src/app/*"]
24 |     }
25 |   },
26 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
27 |   "exclude": ["node_modules"]
28 | }
29 | 


--------------------------------------------------------------------------------