├── .env.local.example ├── .gitignore ├── README.md ├── components ├── LoadingDots.tsx ├── MarkdownRenderer.tsx ├── MetaTags.tsx └── ResizablePanel.tsx ├── lib └── embeddings-supabase.ts ├── next-env.d.ts ├── next.config.js ├── package-lock.json ├── package.json ├── pages ├── _app.tsx ├── _document.tsx ├── api │ ├── docs.ts │ └── generate-embeddings.ts ├── docs.tsx ├── embeddings.tsx └── index.tsx ├── postcss.config.js ├── public ├── bot │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ ├── apple-touch-icon-114x114.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-144x144.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-167x167.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-57x57.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-72x72.png │ ├── apple-touch-icon-76x76.png │ ├── bot.png │ ├── bot.svg │ ├── docs-og.png │ ├── favicon-128x128.png │ ├── favicon-16x16.png │ ├── favicon-196x196.png │ ├── favicon-32x32.png │ ├── favicon-96x96.png │ ├── mstile-144x144.png │ ├── mstile-150x150.png │ ├── mstile-310x150.png │ ├── mstile-310x310.png │ ├── mstile-70x70.png │ └── og.png └── images │ └── background.png ├── styles ├── chrome-bug.css ├── loading-dots.module.css └── main.css ├── tailwind.config.js ├── tsconfig.json ├── types.ts └── utils └── OpenAIStream.ts /.env.local.example: -------------------------------------------------------------------------------- 1 | # Supabase 2 | SUPABASE_ANON_KEY="" 3 | NEXT_PUBLIC_SUPABASE_URL="" 4 | # OpenAI 5 | OPENAI_API_KEY="" 6 | OPENAI_PROXY="" 7 | # Splash 8 | SPLASH_URL="" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | 27 | # local env files 28 | .env 29 | .env.local 30 | .env.development.local 31 | .env.test.local 32 | .env.production.local 33 | 34 | # vercel 35 | .vercel 36 | .next 37 | 38 | # editors 39 | .vscode 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Domain-specific ChatGTP Starter App 2 | 3 | ChatGPT is great for casual, general-purpose question-answers but falls short when domain-specific knowledge is needed. Further, it makes up answers to fill its knowledge gaps and never cites its sources, so it can't really be trusted. This starter app uses embeddings coupled with vector search to solve this, or more specifically, to show how OpenAI's chat completions API can be used to create conversational interfaces to domain-specific knowledge. 4 | 5 | Embeddings, as represented by vectors of floating-point numbers, measure the "relatedness" of text strings. These are super useful for ranking search results, clustering, classification, etc. Relatedness is measured by cosine similarity. If the cosine similarity between two vectors is close to 1, the vectors are highly similar and point in the same direction. In the case of text embeddings, a high cosine similarity between two embedding vectors indicates that the corresponding text strings are highly related. 6 | 7 | This starter app uses embeddings to generate a vector representation of a document, and then uses vector search to find the most similar documents to the query. The results of the vector search are then used to construct a prompt. The response is then streamed to the user. Check out the Supabase blog posts on [pgvector and OpenAI embeddings](https://supabase.com/blog/openai-embeddings-postgres-vector) for more background. 8 | 9 | Technologies used: 10 | 11 | - Nextjs (React framework) + Vercel hosting 12 | - Supabase (using their pgvector implementation as the vector database) 13 | - OpenAI API (for generating embeddings and chat completions) 14 | - TailwindCSS (for styling) 15 | 16 | ## Functional Overview 17 | 18 | Creating and storing the embeddings: 19 | 20 | - Web pages are scraped, stripped to plain text and split into 1000-character documents 21 | - OpenAI's embedding API is used to generate embeddings for each document using the "text-embedding-ada-002" model 22 | - The embeddings are then stored in a Supabase postgres table using pgvector; the table has three columns: the document text, the source URL, and the embedding vectors returned from the OpenAI API. 23 | 24 | Responding to queries: 25 | 26 | - A single embedding is generated from the user prompt 27 | - That embedding is used to perform a similarity search against the vector database 28 | - The results of the similarity search are used to construct a prompt for GPT-3.5/GPT-4 29 | - The GPT response is then streamed to the user. 30 | 31 | ## Getting Started 32 | 33 | The following set-up guide assumes at least basic familiarity developing web apps with React and Nextjs. Experience with OpenAI APIs and Supabase is helpful but not required to get things working. 34 | 35 | ### Set-up Supabase 36 | 37 | - Create a Supabase account and project at https://app.supabase.com/sign-in. NOTE: Supabase support for pgvector is relatively new (02/2023), so it's important to create a new project if your project was created before then. 38 | - First we'll enable the Vector extension. In Supabase, this can be done from the web portal through `Database` → `Extensions`. You can also do this in SQL by running: 39 | 40 | ``` 41 | create extension vector; 42 | ``` 43 | 44 | - Next let's create a table to store our documents and their embeddings. Head over to the SQL Editor and run the following query: 45 | 46 | ```sql 47 | create table documents ( 48 | id bigserial primary key, 49 | content text, 50 | url text, 51 | embedding vector (1536) 52 | ); 53 | ``` 54 | 55 | - Finally, we'll create a function that will be used to perform similarity searches. Head over to the SQL Editor and run the following query: 56 | 57 | ```sql 58 | create or replace function match_documents ( 59 | query_embedding vector(1536), 60 | similarity_threshold float, 61 | match_count int 62 | ) 63 | returns table ( 64 | id bigint, 65 | content text, 66 | url text, 67 | similarity float 68 | ) 69 | language plpgsql 70 | as $$ 71 | begin 72 | return query 73 | select 74 | documents.id, 75 | documents.content, 76 | documents.url, 77 | 1 - (documents.embedding <=> query_embedding) as similarity 78 | from documents 79 | where 1 - (documents.embedding <=> query_embedding) > similarity_threshold 80 | order by documents.embedding <=> query_embedding 81 | limit match_count; 82 | end; 83 | $$; 84 | ``` 85 | 86 | ### Set-up local environment 87 | 88 | - clone the repo: `gh repo clone gannonh/chatgpt-pgvector` 89 | - open in your favorite editor (the following assumes VS Code on a Mac) 90 | 91 | ```bash 92 | cd chatgpt-pgvector 93 | code . 94 | ``` 95 | 96 | - install dependencies 97 | 98 | ```bash 99 | npm install 100 | ``` 101 | 102 | - create a .env.local file in the root directory to store environment variables: 103 | 104 | ```bash 105 | cp .env.local.example .env.local 106 | ``` 107 | 108 | - open the .env.local file and add your Supabase project URL and API key. You can find these in the Supabase web portal under `Project` → `API`. The API key should be stored in the `SUPABASE_ANON_KEY` variable and project URL should be stored under `NEXT_PUBLIC_SUPABASE_URL`. 109 | - Add your OPENAI API key to .env.local. You can find this in the OpenAI web portal under `API Keys`. The API key should be stored in the `OPENAI_API_KEY` variable. 110 | - [optional] environment variable `OPEAI_PROXY` be provide to enable your custom proxy of OPENAI api. Left it `""` to call official API directly. 111 | - [optional] environment variable `SPLASH_URL` be provide to enable your [splash](https://splash.readthedocs.io/en/stable/index.html) (Splash is a javascript rendering service. It’s a lightweight web browser with an HTTP API, implemented in Python 3 using Twisted and QT5) api. Left it `""` to fetch url direct. 112 | - Start the app 113 | 114 | ```bash 115 | npm run dev 116 | ``` 117 | 118 | - Open http://localhost:3000 in your browser to view the app. 119 | -------------------------------------------------------------------------------- /components/LoadingDots.tsx: -------------------------------------------------------------------------------- 1 | import styles from "../styles/loading-dots.module.css"; 2 | 3 | const LoadingDots = ({ 4 | color = "#000", 5 | style = "small", 6 | }: { 7 | color: string; 8 | style: string; 9 | }) => { 10 | return ( 11 | 12 | 13 | 14 | 15 | 16 | ); 17 | }; 18 | 19 | export default LoadingDots; 20 | 21 | LoadingDots.defaultProps = { 22 | style: "small", 23 | }; 24 | -------------------------------------------------------------------------------- /components/MarkdownRenderer.tsx: -------------------------------------------------------------------------------- 1 | import ReactMarkdown from "react-markdown"; 2 | import { PrismLight as SyntaxHighlighter } from "react-syntax-highlighter"; 3 | import tsx from "react-syntax-highlighter/dist/cjs/languages/prism/tsx"; 4 | import typescript from "react-syntax-highlighter/dist/cjs/languages/prism/typescript"; 5 | import scss from "react-syntax-highlighter/dist/cjs/languages/prism/scss"; 6 | import bash from "react-syntax-highlighter/dist/cjs/languages/prism/bash"; 7 | import markdown from "react-syntax-highlighter/dist/cjs/languages/prism/markdown"; 8 | import json from "react-syntax-highlighter/dist/cjs/languages/prism/json"; 9 | import python from "react-syntax-highlighter/dist/cjs/languages/prism/python"; 10 | import javascript from "react-syntax-highlighter/dist/cjs/languages/prism/javascript"; 11 | import jsx from "react-syntax-highlighter/dist/cjs/languages/prism/jsx"; 12 | import rangeParser from "parse-numeric-range"; 13 | import { oneDark } from "react-syntax-highlighter/dist/cjs/styles/prism"; 14 | import { ReactNode } from "react"; 15 | 16 | SyntaxHighlighter.registerLanguage("tsx", tsx); 17 | SyntaxHighlighter.registerLanguage("typescript", typescript); 18 | SyntaxHighlighter.registerLanguage("scss", scss); 19 | SyntaxHighlighter.registerLanguage("bash", bash); 20 | SyntaxHighlighter.registerLanguage("markdown", markdown); 21 | SyntaxHighlighter.registerLanguage("json", json); 22 | SyntaxHighlighter.registerLanguage("python", python); 23 | SyntaxHighlighter.registerLanguage("javascript", javascript); 24 | SyntaxHighlighter.registerLanguage("jsx", jsx); 25 | 26 | 27 | const syntaxTheme = oneDark; 28 | 29 | const MarkdownComponents: object = { 30 | code({ 31 | node, 32 | inline, 33 | className, 34 | ...props 35 | }: { 36 | node: { data: { meta: string } }; 37 | inline: boolean; 38 | className: string; 39 | } & Record): ReactNode { 40 | const match = /language-(\w+)/.exec(className || ""); 41 | const hasMeta = node?.data?.meta; 42 | 43 | const applyHighlights: object = (applyHighlights: number) => { 44 | if (hasMeta) { 45 | const RE = /{([\d,-]+)}/; 46 | const metadata = node.data.meta?.replace(/\s/g, ""); 47 | const strlineNumbers = RE?.test(metadata) 48 | ? RE?.exec(metadata)![1] 49 | : "0"; 50 | const highlightLines = rangeParser(strlineNumbers); 51 | if (highlightLines.includes(applyHighlights)) { 52 | return { className: "highlight" }; 53 | } 54 | } 55 | return {}; 56 | }; 57 | 58 | const children = 59 | typeof props.children === "string" || Array.isArray(props.children) 60 | ? props.children 61 | : ""; 62 | 63 | return match ? ( 64 | 75 | {children} 76 | 77 | ) : ( 78 | 79 | ); 80 | } 81 | }; 82 | 83 | type Props = { 84 | content: string; 85 | }; 86 | 87 | const MarkdownRenderer: React.FC = ({ content }) => { 88 | return {content}; 89 | }; 90 | 91 | export default MarkdownRenderer; 92 | -------------------------------------------------------------------------------- /components/MetaTags.tsx: -------------------------------------------------------------------------------- 1 | // components/PageMeta.tsx 2 | import Head from "next/head"; 3 | 4 | interface Props { 5 | title: string; 6 | description: string; 7 | cardImage: string; 8 | url: string; 9 | } 10 | 11 | const MetaTags = ({ title, description, cardImage, url }: Props) => ( 12 | 13 | {title} 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 32 | 37 | 42 | 47 | 52 | 57 | 62 | 67 | 73 | 79 | 85 | 91 | 97 | 98 | 99 | 103 | 107 | 111 | 115 | 119 | 120 | ) 121 | 122 | export default MetaTags; -------------------------------------------------------------------------------- /components/ResizablePanel.tsx: -------------------------------------------------------------------------------- 1 | import { motion } from "framer-motion"; 2 | import useMeasure from "react-use-measure"; 3 | 4 | export default function ResizablePanel({ 5 | children, 6 | }: { 7 | children: React.ReactNode; 8 | }) { 9 | let [ref, { height }] = useMeasure(); 10 | 11 | return ( 12 | 18 |
19 | {children} 20 |
21 |
22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /lib/embeddings-supabase.ts: -------------------------------------------------------------------------------- 1 | import { createClient } from "@supabase/supabase-js"; 2 | 3 | interface Client { 4 | url?: string; 5 | key?: string; 6 | } 7 | 8 | const client: Client = { 9 | url: process.env.NEXT_PUBLIC_SUPABASE_URL, 10 | key: process.env.SUPABASE_ANON_KEY 11 | }; 12 | 13 | if (!client.url || !client.key) { 14 | throw new Error("Missing Supabase credentials"); 15 | } 16 | 17 | export const supabaseClient = createClient(client.url!, client.key!); 18 | -------------------------------------------------------------------------------- /next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/basic-features/typescript for more information. 6 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | 3 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gpt3-pgvector-starter", 3 | "version": "0.0.1", 4 | "license": "MIT", 5 | "scripts": { 6 | "dev": "next", 7 | "build": "next build", 8 | "start": "next start" 9 | }, 10 | "dependencies": { 11 | "@supabase/supabase-js": "^2.21.0", 12 | "autoprefixer": "^10.4.14", 13 | "cheerio": "^1.0.0-rc.12", 14 | "common-tags": "^1.8.2", 15 | "daisyui": "^2.51.6", 16 | "eslint-config-next": "13.3.1", 17 | "eventsource-parser": "^1.0.0", 18 | "framer-motion": "^10.12.4", 19 | "gpt3-tokenizer": "^1.1.5", 20 | "next": "^13.3.1", 21 | "parse-numeric-range": "^1.3.0", 22 | "react": "^18.2.0", 23 | "react-dom": "^18.2.0", 24 | "react-hot-toast": "^2.4.1", 25 | "react-markdown": "^8.0.7", 26 | "react-syntax-highlighter": "^15.5.0", 27 | "react-text-transition": "^3.1.0", 28 | "react-use-measure": "^2.1.1", 29 | "uuidv4": "^6.2.13" 30 | }, 31 | "devDependencies": { 32 | "@types/common-tags": "^1.8.1", 33 | "@types/node": "18.16.3", 34 | "@types/react": "^18.2.0", 35 | "@types/react-dom": "^18.2.1", 36 | "@types/react-syntax-highlighter": "^15.5.6", 37 | "eslint": "^8.39.0", 38 | "postcss": "^8.4.23", 39 | "prettier": "2.8.8", 40 | "supabase": "^1.52.2", 41 | "swr": "^2.1.5", 42 | "tailwindcss": "^3.3.2", 43 | "ts-node": "^10.9.1", 44 | "typescript": "5.0.4" 45 | }, 46 | "prettier": { 47 | "arrowParens": "always", 48 | "singleQuote": false, 49 | "tabWidth": 2, 50 | "trailingComma": "none" 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import "styles/main.css"; 2 | 3 | import { AppProps } from "next/app"; 4 | 5 | export default function MyApp({ Component, pageProps }: AppProps) { 6 | return ; 7 | } 8 | -------------------------------------------------------------------------------- /pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import Document, { Head, Html, Main, NextScript } from "next/document"; 2 | 3 | class MyDocument extends Document { 4 | render() { 5 | return ( 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | ); 14 | } 15 | } 16 | 17 | export default MyDocument; 18 | -------------------------------------------------------------------------------- /pages/api/docs.ts: -------------------------------------------------------------------------------- 1 | import { supabaseClient } from "@/lib/embeddings-supabase"; 2 | import { OpenAIStream, OpenAIStreamPayload } from "@/utils/OpenAIStream"; 3 | import { oneLine, stripIndent } from "common-tags"; 4 | import GPT3Tokenizer from "gpt3-tokenizer"; 5 | 6 | export const corsHeaders = { 7 | "Access-Control-Allow-Origin": "*", 8 | "Access-Control-Allow-Headers": 9 | "authorization, x-client-info, apikey, content-type" 10 | }; 11 | 12 | if (!process.env.OPENAI_API_KEY) { 13 | throw new Error("Missing env var from OpenAI"); 14 | } 15 | 16 | export const config = { 17 | runtime: "edge" 18 | }; 19 | 20 | const handler = async (req: Request): Promise => { 21 | // Handle CORS 22 | if (req.method === "OPTIONS") { 23 | console.log("req.method ", req.method); 24 | return new Response("ok", { headers: corsHeaders }); 25 | } 26 | 27 | const { question } = (await req.json()) as { 28 | question?: string; 29 | }; 30 | 31 | if (!question) { 32 | return new Response("No prompt in the request", { status: 400 }); 33 | } 34 | 35 | const query = question; 36 | 37 | // OpenAI recommends replacing newlines with spaces for best results 38 | const input = query.replace(/\n/g, " "); 39 | // console.log("input: ", input); 40 | 41 | const apiKey = process.env.OPENAI_API_KEY; 42 | 43 | const apiURL = process.env.OPENAI_PROXY == "" ? "https://api.openai.com" : process.env.OPENAI_PROXY; 44 | 45 | const embeddingResponse = await fetch( 46 | apiURL + "/v1/embeddings", 47 | { 48 | method: "POST", 49 | headers: { 50 | Authorization: `Bearer ${apiKey}`, 51 | "Content-Type": "application/json" 52 | }, 53 | body: JSON.stringify({ 54 | input, 55 | model: "text-embedding-ada-002" 56 | }) 57 | } 58 | ); 59 | 60 | const embeddingData = await embeddingResponse.json(); 61 | const [{ embedding }] = embeddingData.data; 62 | // console.log("embedding: ", embedding); 63 | 64 | const { data: documents, error } = await supabaseClient.rpc( 65 | "match_documents", 66 | { 67 | query_embedding: embedding, 68 | similarity_threshold: 0.1, // Choose an appropriate threshold for your data 69 | match_count: 10 // Choose the number of matches 70 | } 71 | ); 72 | 73 | if (error) console.error(error); 74 | 75 | const tokenizer = new GPT3Tokenizer({ type: "gpt3" }); 76 | let tokenCount = 0; 77 | let contextText = ""; 78 | 79 | // console.log("documents: ", documents); 80 | 81 | // Concat matched documents 82 | if (documents) { 83 | for (let i = 0; i < documents.length; i++) { 84 | const document = documents[i]; 85 | const content = document.content; 86 | const url = document.url; 87 | const encoded = tokenizer.encode(content); 88 | tokenCount += encoded.text.length; 89 | 90 | // Limit context to max 1500 tokens (configurable) 91 | if (tokenCount > 1500) { 92 | break; 93 | } 94 | 95 | contextText += `${content.trim()}\nSOURCE: ${url}\n---\n`; 96 | } 97 | } 98 | 99 | // console.log("contextText: ", contextText); 100 | 101 | const systemContent = `You are a helpful assistant. When given CONTEXT you answer questions using only that information, 102 | and you always format your output in markdown. You include code snippets if relevant. If you are unsure and the answer 103 | is not explicitly written in the CONTEXT provided, you say 104 | "Sorry, I don't know how to help with that." If the CONTEXT includes 105 | source URLs include them under a SOURCES heading at the end of your response. Always include all of the relevant source urls 106 | from the CONTEXT, but never list a URL more than once (ignore trailing forward slashes when comparing for uniqueness). Never include URLs that are not in the CONTEXT sections. Never make up URLs`; 107 | 108 | const userContent = `CONTEXT: 109 | Next.js is a React framework for creating production-ready web applications. It provides a variety of methods for fetching data, a built-in router, and a Next.js Compiler for transforming and minifying JavaScript code. It also includes a built-in Image Component and Automatic Image Optimization for resizing, optimizing, and serving images in modern formats. 110 | SOURCE: nextjs.org/docs/faq 111 | 112 | QUESTION: 113 | what is nextjs? 114 | `; 115 | 116 | const assistantContent = `Next.js is a framework for building production-ready web applications using React. It offers various data fetching options, comes equipped with an integrated router, and features a Next.js compiler for transforming and minifying JavaScript. Additionally, it has an inbuilt Image Component and Automatic Image Optimization that helps resize, optimize, and deliver images in modern formats. 117 | 118 | \`\`\`js 119 | function HomePage() { 120 | return
Welcome to Next.js!
121 | } 122 | 123 | export default HomePage 124 | \`\`\` 125 | 126 | SOURCES: 127 | https://nextjs.org/docs/faq`; 128 | 129 | const userMessage = `CONTEXT: 130 | ${contextText} 131 | 132 | USER QUESTION: 133 | ${query} 134 | `; 135 | 136 | const messages = [ 137 | { 138 | role: "system", 139 | content: systemContent 140 | }, 141 | { 142 | role: "user", 143 | content: userContent 144 | }, 145 | { 146 | role: "assistant", 147 | content: assistantContent 148 | }, 149 | { 150 | role: "user", 151 | content: userMessage 152 | } 153 | ]; 154 | 155 | 156 | console.log("messages: ", messages); 157 | 158 | const payload: OpenAIStreamPayload = { 159 | model: "gpt-3.5-turbo-0301", 160 | messages: messages, 161 | temperature: 0, 162 | top_p: 1, 163 | frequency_penalty: 0, 164 | presence_penalty: 0, 165 | max_tokens: 2000, 166 | stream: true, 167 | n: 1 168 | }; 169 | 170 | const stream = await OpenAIStream(payload); 171 | return new Response(stream); 172 | }; 173 | 174 | export default handler; 175 | -------------------------------------------------------------------------------- /pages/api/generate-embeddings.ts: -------------------------------------------------------------------------------- 1 | import { NextApiRequest, NextApiResponse } from "next"; 2 | import { supabaseClient } from "@/lib/embeddings-supabase"; 3 | import * as cheerio from "cheerio"; 4 | 5 | // embedding doc sizes 6 | const docSize: number = 1000; 7 | 8 | export default async function handle( 9 | req: NextApiRequest, 10 | res: NextApiResponse 11 | ) { 12 | const { method, body } = req; 13 | 14 | if (method === "POST") { 15 | const { urls } = body; 16 | const documents = await getDocuments(urls); 17 | 18 | for (const { url, body } of documents) { 19 | const input = body.replace(/\n/g, " "); 20 | 21 | console.log("\nDocument length: \n", body.length); 22 | console.log("\nURL: \n", url); 23 | 24 | const apiKey = process.env.OPENAI_API_KEY; 25 | const apiURL = process.env.OPENAI_PROXY == "" ? "https://api.openai.com" : process.env.OPENAI_PROXY; 26 | 27 | const embeddingResponse = await fetch( 28 | apiURL + "/v1/embeddings", 29 | { 30 | method: "POST", 31 | headers: { 32 | Authorization: `Bearer ${apiKey}`, 33 | "Content-Type": "application/json" 34 | }, 35 | body: JSON.stringify({ 36 | input, 37 | model: "text-embedding-ada-002" 38 | }) 39 | } 40 | ); 41 | // console.log("\nembeddingResponse: \n", embeddingResponse); 42 | const embeddingData = await embeddingResponse.json(); 43 | 44 | const [{ embedding }] = embeddingData.data; 45 | // console.log("embedding:" + embedding); 46 | 47 | // In production we should handle possible errors 48 | try { 49 | let res = await supabaseClient.from("documents").insert({ 50 | content: input, 51 | embedding, 52 | url 53 | }); 54 | } 55 | catch (error) { 56 | console.error("error in supabase insert: " + error); 57 | } 58 | 59 | } 60 | return res.status(200).json({ success: true }); 61 | } 62 | 63 | return res 64 | .status(405) 65 | .json({ success: false, message: "Method not allowed" }); 66 | } 67 | 68 | async function getDocuments(urls: string[]) { 69 | const documents = []; 70 | for (const url of urls) { 71 | let fetchURL = url; 72 | if (process.env.SPLASH_URL != "") { 73 | fetchURL = `${process.env.SPLASH_URL}/render.html?url=${encodeURIComponent(url)}&timeout=10&wait=0.5` 74 | } 75 | console.log("fetching url: " + fetchURL); 76 | 77 | const response = await fetch(fetchURL); 78 | const html = await response.text(); 79 | const $ = cheerio.load(html); 80 | // tag based e.g.
81 | const articleText = $("body").text(); 82 | // class bsaed e.g.
83 | // const articleText = $(".docs-content").text(); 84 | 85 | let start = 0; 86 | while (start < articleText.length) { 87 | const end = start + docSize; 88 | const chunk = articleText.slice(start, end); 89 | documents.push({ url, body: chunk }); 90 | start = end; 91 | } 92 | } 93 | return documents; 94 | } 95 | -------------------------------------------------------------------------------- /pages/docs.tsx: -------------------------------------------------------------------------------- 1 | import { AnimatePresence, motion } from "framer-motion"; 2 | import type { NextPage } from "next"; 3 | import { useState } from "react"; 4 | import { Toaster, toast } from "react-hot-toast"; 5 | import { v4 as uuidv4 } from 'uuid'; 6 | import LoadingDots from "@/components/LoadingDots"; 7 | import ResizablePanel from "@/components/ResizablePanel"; 8 | import MetaTags from "@/components/MetaTags"; 9 | import { ReactNode } from "react"; 10 | import { PageMeta } from "../types"; 11 | import MarkdownRenderer from "@/components/MarkdownRenderer"; 12 | 13 | 14 | interface Props { 15 | children: ReactNode; 16 | meta?: PageMeta; 17 | } 18 | 19 | const DocsPage: NextPage = ({ children, meta: pageMeta }: Props) => { 20 | const [loading, setLoading] = useState(false); 21 | const [userQ, setUserQ] = useState(""); 22 | const [answer, setAanswer] = useState(""); 23 | 24 | console.log("Streamed response: ", answer); 25 | 26 | const question = userQ; 27 | 28 | const generateAnswer = async (e: any) => { 29 | e.preventDefault(); 30 | if (!userQ) { 31 | return toast.error("Please enter a question!"); 32 | } 33 | 34 | setAanswer(""); 35 | setLoading(true); 36 | const response = await fetch("/api/docs", { 37 | method: "POST", 38 | headers: { 39 | "Content-Type": "application/json" 40 | }, 41 | body: JSON.stringify({ 42 | question 43 | }) 44 | }); 45 | console.log("Edge function returned."); 46 | 47 | if (!response.ok) { 48 | throw new Error(response.statusText); 49 | } 50 | 51 | // This data is a ReadableStream 52 | const data = response.body; 53 | if (!data) { 54 | return; 55 | } 56 | 57 | const reader = data.getReader(); 58 | const decoder = new TextDecoder(); 59 | let done = false; 60 | 61 | while (!done) { 62 | const { value, done: doneReading } = await reader.read(); 63 | done = doneReading; 64 | const chunkValue = decoder.decode(value); 65 | setAanswer((prev) => prev + chunkValue); 66 | } 67 | 68 | setLoading(false); 69 | }; 70 | 71 | 72 | return ( 73 | <> 74 | 80 |
81 | 82 | 83 |
84 |

85 | Ask me anything* about web development! 86 |

87 |
88 |