├── .eslintrc.json ├── .gitignore ├── README.md ├── components ├── Answer │ ├── Answer.tsx │ └── answer.module.css ├── Footer.tsx └── Navbar.tsx ├── license ├── next.config.js ├── package-lock.json ├── package.json ├── pages ├── _app.tsx ├── _document.tsx ├── api │ ├── answer.ts │ └── search.ts └── index.tsx ├── postcss.config.js ├── public ├── cover.png └── favicon.ico ├── schema.sql ├── scripts ├── embed.ts ├── scrape.ts └── tns.json ├── styles └── globals.css ├── tailwind.config.js ├── tsconfig.json ├── types └── index.ts └── utils └── index.ts /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "next/core-web-vitals" 3 | } 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | .pnpm-debug.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | 38 | # tsn 39 | tsn.json 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # The Network State GPT 2 | 3 | AI-powered search and chat for [Balaji Srinivasan's](https://twitter.com/balajis) ["The Network State."](https://thenetworkstate.com/) 4 | 5 | All code & data used is 100% open-source. 6 | 7 | [![The Network State GPT](./public/cover.png)](https://the-network-state-gpt.vercel.app/) 8 | 9 | ## Dataset 10 | 11 | The dataset is a CSV file containing all text & embeddings used. 12 | 13 | Download it [here](https://drive.google.com/file/d/1djr6I4-oPNTABaqrmubaL_5tSXetbrdf/view). 14 | 15 | I recommend getting familiar with fetching, cleaning, and storing data as outlined in the scraping and embedding scripts below, but feel free to skip those steps and just use the dataset. 16 | 17 | ## How It Works 18 | 19 | The Network State GPT provides 2 things: 20 | 21 | 1. A search interface for the book. 22 | 2. A chat interface for the book. 23 | 24 | ### Search 25 | 26 | Search was created with [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings) (`text-embedding-ada-002`). 27 | 28 | First, we loop over the book and generate embeddings for each chunk of text. 29 | 30 | Then in the app we take the user's search query, generate an embedding, and use the result to find the most similar passages from the book. 31 | 32 | The comparison is done using cosine similarity across our database of vectors. 33 | 34 | Our database is a Postgres database with the [pgvector](https://github.com/pgvector/pgvector) extension hosted on [Supabase](https://supabase.com/). 35 | 36 | Results are ranked by similarity score and returned to the user. 37 | 38 | ### Chat 39 | 40 | Chat builds on top of search. It uses search results to create a prompt that is fed into GPT-3. 41 | 42 | This allows for a chat-like experience where the user can ask questions about the book and get answers. 43 | 44 | ## Running Locally 45 | 46 | Here's a quick overview of how to run it locally. 47 | 48 | ### Requirements 49 | 50 | 1. Set up OpenAI 51 | 52 | You'll need an OpenAI API key to generate embeddings. 53 | 54 | 2. Set up Supabase 55 | 56 | There is a schema.sql file in the root of the repo that you can use to set up the database. 57 | 58 | Run that in the SQL editor in Supabase. 59 | 60 | I recommend turning on Row Level Security and setting up a service role to use with the app. 61 | 62 | Note: You don't have to use Supabase. Use whatever method you prefer to store your data. But I like Supabase and think it's easy to use. 63 | 64 | ### Repo Setup 65 | 66 | 3. Clone repo 67 | 68 | ```bash 69 | git clone https://github.com/mckaywrigley/the-network-state-gpt.git 70 | ``` 71 | 72 | 4. Install dependencies 73 | 74 | ```bash 75 | npm i 76 | ``` 77 | 78 | 5. Set up environment variables 79 | 80 | Create a .env.local file in the root of the repo with the following variables: 81 | 82 | ```bash 83 | OPENAI_API_KEY= 84 | 85 | NEXT_PUBLIC_SUPABASE_URL= 86 | SUPABASE_SERVICE_ROLE_KEY= 87 | ``` 88 | 89 | ### Dataset 90 | 91 | 6. Run scraping script 92 | 93 | ```bash 94 | npm run scrape 95 | ``` 96 | 97 | This scrapes the text of the book from The Network State website and saves it to a json file. 98 | 99 | 7. Run embedding script 100 | 101 | ```bash 102 | npm run embed 103 | ``` 104 | 105 | This reads the json file, generates embeddings for each chunk of text, and saves the results to your database. 106 | 107 | ### App 108 | 109 | 8. Run app 110 | 111 | ```bash 112 | npm run dev 113 | ``` 114 | 115 | ## Credits 116 | 117 | Thanks to [Balaji Srinivasan](https://twitter.com/balajis) for his work on [The Network State](https://www.thenetworkstate.com). This project would not be possible if he didn't make the book open-source. I highly recommend you check it out. 118 | 119 | ## Contact 120 | 121 | If you have any questions, feel free to reach out to me on [Twitter](https://twitter.com/mckaywrigley). 122 | 123 | ## Notes 124 | 125 | I sacrificed composability for simplicity in the app. 126 | 127 | Yes, you can make things more modular and reusable. 128 | 129 | But I kept pretty much everything in the homepage component for the sake of simplicity. 130 | -------------------------------------------------------------------------------- /components/Answer/Answer.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useState } from "react"; 2 | import styles from "./answer.module.css"; 3 | 4 | interface AnswerProps { 5 | text: string; 6 | } 7 | 8 | export const Answer: React.FC = ({ text }) => { 9 | const [words, setWords] = useState([]); 10 | 11 | useEffect(() => { 12 | setWords(text.split(" ")); 13 | }, [text]); 14 | 15 | return ( 16 |
17 | {words.map((word, index) => ( 18 | 23 | {word}{" "} 24 | 25 | ))} 26 |
27 | ); 28 | }; 29 | -------------------------------------------------------------------------------- /components/Answer/answer.module.css: -------------------------------------------------------------------------------- 1 | .fadeIn { 2 | animation: fadeIn 0.5s ease-in-out forwards; 3 | opacity: 0; 4 | } 5 | 6 | @keyframes fadeIn { 7 | from { 8 | opacity: 0; 9 | } 10 | to { 11 | opacity: 1; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /components/Footer.tsx: -------------------------------------------------------------------------------- 1 | import { IconBrandGithub, IconBrandTwitter } from "@tabler/icons-react"; 2 | import { FC } from "react"; 3 | 4 | export const Footer: FC = () => { 5 | return ( 6 |
7 |
8 | 9 |
10 | Created by 11 | 17 | Mckay Wrigley 18 | 19 | based on the work of 20 | 26 | Balaji Srinivasan 27 | 28 | . 29 |
30 | 31 |
32 | 38 | 39 | 40 | 41 | 47 | 48 | 49 |
50 |
51 | ); 52 | }; 53 | -------------------------------------------------------------------------------- /components/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import { IconExternalLink } from "@tabler/icons-react"; 2 | import Image from "next/image"; 3 | import { FC } from "react"; 4 | import cover from "../public/cover.png"; 5 | 6 | export const Navbar: FC = () => { 7 | return ( 8 |
9 |
10 | The Network State GPT 17 |
The Network State GPT
18 |
19 |
20 | 26 |
Official Site
27 | 28 | 32 |
33 |
34 |
35 | ); 36 | }; 37 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Mckay Wrigley 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true, 4 | } 5 | 6 | module.exports = nextConfig 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "the-network-state-gpt", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint", 10 | "scrape": "tsx scripts/scrape.ts", 11 | "embed": "tsx scripts/embed.ts" 12 | }, 13 | "dependencies": { 14 | "@next/font": "^13.2.3", 15 | "@tabler/icons-react": "^2.4.0", 16 | "@types/node": "18.14.0", 17 | "@types/react": "18.0.28", 18 | "@types/react-dom": "18.0.11", 19 | "endent": "^2.1.0", 20 | "eslint": "8.34.0", 21 | "eslint-config-next": "13.1.6", 22 | "eventsource-parser": "^0.1.0", 23 | "next": "13.1.6", 24 | "react": "18.2.0", 25 | "react-dom": "18.2.0", 26 | "typescript": "4.9.5" 27 | }, 28 | "devDependencies": { 29 | "@next/env": "^13.2.3", 30 | "@supabase/supabase-js": "^2.8.0", 31 | "autoprefixer": "^10.4.13", 32 | "axios": "^1.3.4", 33 | "cheerio": "^1.0.0-rc.12", 34 | "gpt-3-encoder": "^1.1.4", 35 | "openai": "^3.1.0", 36 | "postcss": "^8.4.21", 37 | "tailwindcss": "^3.2.7", 38 | "tsx": "^3.12.3" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import "@/styles/globals.css"; 2 | import { Inter } from "@next/font/google"; 3 | import type { AppProps } from "next/app"; 4 | 5 | const inter = Inter({ subsets: ["latin"] }); 6 | 7 | export default function App({ Component, pageProps }: AppProps<{}>) { 8 | return ( 9 |
10 | 11 |
12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from 'next/document' 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /pages/api/answer.ts: -------------------------------------------------------------------------------- 1 | import { OpenAIStream } from "@/utils"; 2 | 3 | export const config = { 4 | runtime: "edge" 5 | }; 6 | 7 | const handler = async (req: Request): Promise => { 8 | try { 9 | const { prompt, apiKey } = (await req.json()) as { 10 | prompt: string; 11 | apiKey: string; 12 | }; 13 | 14 | const stream = await OpenAIStream(prompt, apiKey); 15 | 16 | return new Response(stream); 17 | } catch (error) { 18 | console.error(error); 19 | return new Response("Error", { status: 500 }); 20 | } 21 | }; 22 | 23 | export default handler; 24 | -------------------------------------------------------------------------------- /pages/api/search.ts: -------------------------------------------------------------------------------- 1 | import { supabaseAdmin } from "@/utils"; 2 | 3 | export const config = { 4 | runtime: "edge" 5 | }; 6 | 7 | const handler = async (req: Request): Promise => { 8 | try { 9 | const { query, apiKey, matches } = (await req.json()) as { 10 | query: string; 11 | apiKey: string; 12 | matches: number; 13 | }; 14 | 15 | const input = query.replace(/\n/g, " "); 16 | 17 | const res = await fetch("https://api.openai.com/v1/embeddings", { 18 | headers: { 19 | "Content-Type": "application/json", 20 | Authorization: `Bearer ${apiKey}` 21 | }, 22 | method: "POST", 23 | body: JSON.stringify({ 24 | model: "text-embedding-ada-002", 25 | input 26 | }) 27 | }); 28 | 29 | const json = await res.json(); 30 | const embedding = json.data[0].embedding; 31 | 32 | const { data: chunks, error } = await supabaseAdmin.rpc("tns_search", { 33 | query_embedding: embedding, 34 | similarity_threshold: 0.01, 35 | match_count: matches 36 | }); 37 | 38 | if (error) { 39 | console.error(error); 40 | return new Response("Error", { status: 500 }); 41 | } 42 | 43 | return new Response(JSON.stringify(chunks), { status: 200 }); 44 | } catch (error) { 45 | console.error(error); 46 | return new Response("Error", { status: 500 }); 47 | } 48 | }; 49 | 50 | export default handler; 51 | -------------------------------------------------------------------------------- /pages/index.tsx: -------------------------------------------------------------------------------- 1 | import { Answer } from "@/components/Answer/Answer"; 2 | import { Footer } from "@/components/Footer"; 3 | import { Navbar } from "@/components/Navbar"; 4 | import { TNSChunk } from "@/types"; 5 | import { IconArrowRight, IconExternalLink, IconSearch } from "@tabler/icons-react"; 6 | import endent from "endent"; 7 | import Head from "next/head"; 8 | import { KeyboardEvent, useEffect, useRef, useState } from "react"; 9 | 10 | export default function Home() { 11 | const inputRef = useRef(null); 12 | 13 | const [query, setQuery] = useState(""); 14 | const [chunks, setChunks] = useState([]); 15 | const [answer, setAnswer] = useState(""); 16 | const [loading, setLoading] = useState(false); 17 | 18 | const [showSettings, setShowSettings] = useState(false); 19 | const [mode, setMode] = useState<"search" | "chat">("chat"); 20 | const [matchCount, setMatchCount] = useState(5); 21 | const [apiKey, setApiKey] = useState(""); 22 | 23 | const handleSearch = async () => { 24 | if (!apiKey) { 25 | alert("Please enter an API key."); 26 | return; 27 | } 28 | 29 | if (!query) { 30 | alert("Please enter a query."); 31 | return; 32 | } 33 | 34 | setAnswer(""); 35 | setChunks([]); 36 | 37 | setLoading(true); 38 | 39 | const searchResponse = await fetch("/api/search", { 40 | method: "POST", 41 | headers: { 42 | "Content-Type": "application/json" 43 | }, 44 | body: JSON.stringify({ query, apiKey, matches: matchCount }) 45 | }); 46 | 47 | if (!searchResponse.ok) { 48 | setLoading(false); 49 | throw new Error(searchResponse.statusText); 50 | } 51 | 52 | const results: TNSChunk[] = await searchResponse.json(); 53 | 54 | setChunks(results); 55 | 56 | setLoading(false); 57 | 58 | inputRef.current?.focus(); 59 | 60 | return results; 61 | }; 62 | 63 | const handleAnswer = async () => { 64 | if (!apiKey) { 65 | alert("Please enter an API key."); 66 | return; 67 | } 68 | 69 | if (!query) { 70 | alert("Please enter a query."); 71 | return; 72 | } 73 | 74 | setAnswer(""); 75 | setChunks([]); 76 | 77 | setLoading(true); 78 | 79 | const searchResponse = await fetch("/api/search", { 80 | method: "POST", 81 | headers: { 82 | "Content-Type": "application/json" 83 | }, 84 | body: JSON.stringify({ query, apiKey, matches: matchCount }) 85 | }); 86 | 87 | if (!searchResponse.ok) { 88 | setLoading(false); 89 | throw new Error(searchResponse.statusText); 90 | } 91 | 92 | const results: TNSChunk[] = await searchResponse.json(); 93 | 94 | setChunks(results); 95 | 96 | const prompt = endent` 97 | Use the following passages to provide an answer to the query: "${query}" 98 | 99 | ${results?.map((d: any) => d.content).join("\n\n")} 100 | `; 101 | 102 | const answerResponse = await fetch("/api/answer", { 103 | method: "POST", 104 | headers: { 105 | "Content-Type": "application/json" 106 | }, 107 | body: JSON.stringify({ prompt, apiKey }) 108 | }); 109 | 110 | if (!answerResponse.ok) { 111 | setLoading(false); 112 | throw new Error(answerResponse.statusText); 113 | } 114 | 115 | const data = answerResponse.body; 116 | 117 | if (!data) { 118 | return; 119 | } 120 | 121 | setLoading(false); 122 | 123 | const reader = data.getReader(); 124 | const decoder = new TextDecoder(); 125 | let done = false; 126 | 127 | while (!done) { 128 | const { value, done: doneReading } = await reader.read(); 129 | done = doneReading; 130 | const chunkValue = decoder.decode(value); 131 | setAnswer((prev) => prev + chunkValue); 132 | } 133 | 134 | inputRef.current?.focus(); 135 | }; 136 | 137 | const handleKeyDown = (e: KeyboardEvent) => { 138 | if (e.key === "Enter") { 139 | if (mode === "search") { 140 | handleSearch(); 141 | } else { 142 | handleAnswer(); 143 | } 144 | } 145 | }; 146 | 147 | const handleSave = () => { 148 | if (apiKey.length !== 51) { 149 | alert("Please enter a valid API key."); 150 | return; 151 | } 152 | 153 | localStorage.setItem("PG_KEY", apiKey); 154 | localStorage.setItem("PG_MATCH_COUNT", matchCount.toString()); 155 | localStorage.setItem("PG_MODE", mode); 156 | 157 | setShowSettings(false); 158 | inputRef.current?.focus(); 159 | }; 160 | 161 | const handleClear = () => { 162 | localStorage.removeItem("PG_KEY"); 163 | localStorage.removeItem("PG_MATCH_COUNT"); 164 | localStorage.removeItem("PG_MODE"); 165 | 166 | setApiKey(""); 167 | setMatchCount(5); 168 | setMode("search"); 169 | }; 170 | 171 | useEffect(() => { 172 | if (matchCount > 10) { 173 | setMatchCount(10); 174 | } else if (matchCount < 1) { 175 | setMatchCount(1); 176 | } 177 | }, [matchCount]); 178 | 179 | useEffect(() => { 180 | const PG_KEY = localStorage.getItem("PG_KEY"); 181 | const PG_MATCH_COUNT = localStorage.getItem("PG_MATCH_COUNT"); 182 | const PG_MODE = localStorage.getItem("PG_MODE"); 183 | 184 | if (PG_KEY) { 185 | setApiKey(PG_KEY); 186 | } 187 | 188 | if (PG_MATCH_COUNT) { 189 | setMatchCount(parseInt(PG_MATCH_COUNT)); 190 | } 191 | 192 | if (PG_MODE) { 193 | setMode(PG_MODE as "search" | "chat"); 194 | } 195 | 196 | inputRef.current?.focus(); 197 | }, []); 198 | 199 | return ( 200 | <> 201 | 202 | The Network State GPT 203 | 207 | 211 | 215 | 216 | 217 |
218 | 219 |
220 |
221 | 227 | 228 | {showSettings && ( 229 |
230 |
231 |
Mode
232 | 240 |
241 | 242 |
243 |
Passage Count
244 | setMatchCount(Number(e.target.value))} 250 | className="max-w-[400px] block w-full rounded-md border border-gray-300 p-2 text-black shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-500 sm:text-sm" 251 | /> 252 |
253 | 254 |
255 |
OpenAI API Key
256 | { 262 | setApiKey(e.target.value); 263 | 264 | if (e.target.value.length !== 51) { 265 | setShowSettings(true); 266 | } 267 | }} 268 | /> 269 |
270 | 271 |
272 |
276 | Save 277 |
278 | 279 |
283 | Clear 284 |
285 |
286 |
287 | )} 288 | 289 | {apiKey.length === 51 ? ( 290 |
291 | 292 | 293 | setQuery(e.target.value)} 300 | onKeyDown={handleKeyDown} 301 | /> 302 | 303 | 309 |
310 | ) : ( 311 |
312 | Please enter your 313 | 317 | OpenAI API key 318 | 319 | in settings. 320 |
321 | )} 322 | 323 | {loading ? ( 324 |
325 | {mode === "chat" && ( 326 | <> 327 |
Answer
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 | 336 | )} 337 | 338 |
Passages
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 | ) : answer ? ( 348 |
349 |
Answer
350 | 351 | 352 |
353 |
Passages
354 | 355 | {chunks.map((chunk, index) => ( 356 |
357 |
358 |
359 |
360 |
{chunk.chapter_title}
361 |
{chunk.section_title}
362 |
363 | 369 | 370 | 371 |
372 |
{chunk.content}
373 |
374 |
375 | ))} 376 |
377 |
378 | ) : chunks.length > 0 ? ( 379 |
380 |
Passages
381 | {chunks.map((chunk, index) => ( 382 |
383 |
384 |
385 |
386 |
{chunk.chapter_title}
387 |
{chunk.section_title}
388 |
389 | 395 | 396 | 397 |
398 |
{chunk.content}
399 |
400 |
401 | ))} 402 |
403 | ) : ( 404 |
{`AI-powered search & chat for Balaji Srinivasan's "The Network State."`}
405 | )} 406 |
407 |
408 |
410 | 411 | ); 412 | } 413 | -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /public/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mckaywrigley/the-network-state-gpt/122bccfad80a058d5adb63c871dedcd9b8edd6ea/public/cover.png -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mckaywrigley/the-network-state-gpt/122bccfad80a058d5adb63c871dedcd9b8edd6ea/public/favicon.ico -------------------------------------------------------------------------------- /schema.sql: -------------------------------------------------------------------------------- 1 | -- RUN 1st 2 | create extension vector; 3 | 4 | -- RUN 2nd 5 | create table tns ( 6 | id bigserial primary key, 7 | chapter_title text, 8 | chapter_num bigint, 9 | section_title text, 10 | section_num bigint, 11 | section_url text, 12 | chunk_num bigint, 13 | content text, 14 | content_length bigint, 15 | content_tokens bigint, 16 | embedding vector (1536) 17 | ); 18 | 19 | -- RUN 3rd after running the scripts 20 | create or replace function tns_search ( 21 | query_embedding vector(1536), 22 | similarity_threshold float, 23 | match_count int 24 | ) 25 | returns table ( 26 | id bigint, 27 | chapter_title text, 28 | chapter_num bigint, 29 | section_title text, 30 | section_num bigint, 31 | section_url text, 32 | chunk_num bigint, 33 | content text, 34 | content_length bigint, 35 | content_tokens bigint, 36 | similarity float 37 | ) 38 | language plpgsql 39 | as $$ 40 | begin 41 | return query 42 | select 43 | tns.id, 44 | tns.chapter_title, 45 | tns.chapter_num, 46 | tns.section_title, 47 | tns.section_num, 48 | tns.section_url, 49 | tns.chunk_num, 50 | tns.content, 51 | tns.content_length, 52 | tns.content_tokens, 53 | 1 - (tns.embedding <=> query_embedding) as similarity 54 | from tns 55 | where 1 - (tns.embedding <=> query_embedding) > similarity_threshold 56 | order by tns.embedding <=> query_embedding 57 | limit match_count; 58 | end; 59 | $$; 60 | 61 | -- RUN 4th 62 | create index on tns 63 | using ivfflat (embedding vector_cosine_ops) 64 | with (lists = 100); -------------------------------------------------------------------------------- /scripts/embed.ts: -------------------------------------------------------------------------------- 1 | import { TNSSection } from "@/types"; 2 | import { loadEnvConfig } from "@next/env"; 3 | import { createClient } from "@supabase/supabase-js"; 4 | import fs from "fs"; 5 | import { Configuration, OpenAIApi } from "openai"; 6 | import { TNSBook } from "./../types/index"; 7 | 8 | loadEnvConfig(""); 9 | 10 | const generateEmbeddings = async (sections: TNSSection[]) => { 11 | const configuration = new Configuration({ apiKey: process.env.OPENAI_API_KEY }); 12 | const openai = new OpenAIApi(configuration); 13 | 14 | const supabase = createClient(process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!); 15 | 16 | let sectionNum = 1; 17 | let chunkNum = 1; 18 | 19 | for (let i = 0; i < sections.length; i++) { 20 | const section = sections[i]; 21 | 22 | for (let j = 0; j < section.chunks.length; j++) { 23 | const chunk = section.chunks[j]; 24 | 25 | const { chapter_num, chapter_title, section_title, section_url, content, content_length, content_tokens } = chunk; 26 | 27 | const embeddingResponse = await openai.createEmbedding({ 28 | model: "text-embedding-ada-002", 29 | input: content 30 | }); 31 | 32 | const [{ embedding }] = embeddingResponse.data.data; 33 | 34 | const { data, error } = await supabase 35 | .from("tns") 36 | .insert({ 37 | chapter_num, 38 | chapter_title, 39 | section_title, 40 | section_url, 41 | section_num: sectionNum, 42 | chunk_num: chunkNum, 43 | content, 44 | content_length, 45 | content_tokens, 46 | embedding 47 | }) 48 | .select("*"); 49 | 50 | if (error) { 51 | console.log("error", error); 52 | } else { 53 | console.log("saved", i, j); 54 | } 55 | 56 | chunkNum++; 57 | } 58 | 59 | sectionNum++; 60 | } 61 | }; 62 | 63 | (async () => { 64 | const book: TNSBook = JSON.parse(fs.readFileSync("scripts/tns.json", "utf8")); 65 | 66 | await generateEmbeddings(book.sections); 67 | })(); 68 | -------------------------------------------------------------------------------- /scripts/scrape.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import * as cheerio from "cheerio"; 3 | import fs from "fs"; 4 | import { encode } from "gpt-3-encoder"; 5 | import { TNSChunk, TNSSection } from "./../types/index"; 6 | 7 | const BASE_URL = "https://thenetworkstate.com"; 8 | const CHUNK_SIZE = 200; 9 | 10 | let CHAPTER_NUM = 1; 11 | let CHAPTER_TITLE = ""; 12 | 13 | const getLinks = async () => { 14 | const html = await axios.get(BASE_URL); 15 | const $ = cheerio.load(html.data); 16 | const main = $("main"); 17 | 18 | const links = main.find("a"); 19 | const hrefs = links.map((i, link) => $(link).attr("href")).get(); 20 | const filtered = hrefs.filter((href) => !href.startsWith("https")).filter((href, i, arr) => arr.indexOf(href) === i); 21 | 22 | return filtered; 23 | }; 24 | 25 | const getChapterTitle = async (num: number) => { 26 | switch (num) { 27 | case 1: 28 | return "Quickstart"; 29 | case 2: 30 | return "History as Trajectory"; 31 | case 3: 32 | return "The Tripolar Movement"; 33 | case 4: 34 | return "Decentralization, Recentralization"; 35 | case 5: 36 | return "From Nation States to Network States"; 37 | case 6: 38 | return "Appendix"; 39 | default: 40 | return ""; 41 | } 42 | }; 43 | 44 | const getSection = async (link: string) => { 45 | const html = await axios.get(link); 46 | const $ = cheerio.load(html.data); 47 | const text = $("main").text(); 48 | 49 | const lines = text.split("\n").filter((line) => line.trim() !== ""); 50 | 51 | let sectionText = ""; 52 | let sectionTitle = ""; 53 | 54 | if (lines[0].includes("Chapter")) { 55 | const split = lines[0].split("Chapter"); 56 | 57 | CHAPTER_NUM = +split[1].trim(); 58 | CHAPTER_TITLE = await getChapterTitle(CHAPTER_NUM); 59 | 60 | sectionTitle = lines[1]; 61 | 62 | sectionText = lines.slice(2).join(" "); 63 | } else { 64 | sectionTitle = lines[0]; 65 | 66 | sectionText = lines.slice(1).join(" "); 67 | } 68 | 69 | const section: TNSSection = { 70 | chapter_num: CHAPTER_NUM, 71 | chapter_title: CHAPTER_TITLE, 72 | section_title: sectionTitle, 73 | section_url: link, 74 | section_num: 0, // handled in embed.ts 75 | content: sectionText, 76 | content_length: sectionText.length, 77 | content_tokens: encode(sectionText).length, 78 | chunks: [] 79 | }; 80 | 81 | return section; 82 | }; 83 | 84 | const chunkSection = async (section: TNSSection) => { 85 | const { chunks, content, ...chunklessSection } = section; 86 | 87 | let sectionTextChunks = []; 88 | 89 | if (encode(content).length > CHUNK_SIZE) { 90 | const split = content.split(". "); 91 | let chunkText = ""; 92 | 93 | for (let i = 0; i < split.length; i++) { 94 | const sentence = split[i]; 95 | const sentenceTokenLength = encode(sentence); 96 | const chunkTextTokenLength = encode(chunkText).length; 97 | 98 | if (chunkTextTokenLength + sentenceTokenLength.length > CHUNK_SIZE) { 99 | sectionTextChunks.push(chunkText); 100 | chunkText = ""; 101 | } 102 | 103 | if (sentence[sentence.length - 1].match(/[a-z0-9]/i)) { 104 | chunkText += sentence + ". "; 105 | } else { 106 | chunkText += sentence + " "; 107 | } 108 | } 109 | 110 | sectionTextChunks.push(chunkText.trim()); 111 | } else { 112 | sectionTextChunks.push(content.trim()); 113 | } 114 | 115 | const sectionChunks = sectionTextChunks.map((text) => { 116 | const trimmedText = text.trim(); 117 | 118 | const chunk: TNSChunk = { 119 | ...chunklessSection, 120 | content: trimmedText, 121 | content_length: trimmedText.length, 122 | content_tokens: encode(trimmedText).length, 123 | chunk_num: 0, // handled in embed.ts 124 | embedding: [] 125 | }; 126 | 127 | return chunk; 128 | }); 129 | 130 | if (sectionChunks.length > 1) { 131 | for (let i = 0; i < sectionChunks.length; i++) { 132 | const chunk = sectionChunks[i]; 133 | const prevChunk = sectionChunks[i - 1]; 134 | 135 | if (chunk.content_tokens < 100 && prevChunk) { 136 | prevChunk.content += " " + chunk.content; 137 | prevChunk.content_length += chunk.content_length; 138 | prevChunk.content_tokens += chunk.content_tokens; 139 | sectionChunks.splice(i, 1); 140 | i--; 141 | } 142 | } 143 | } 144 | 145 | const chunkedSection: TNSSection = { 146 | ...section, 147 | chunks: sectionChunks 148 | }; 149 | 150 | return chunkedSection; 151 | }; 152 | 153 | (async () => { 154 | const links = await getLinks(); 155 | 156 | let sections: TNSSection[] = []; 157 | 158 | for (let i = 0; i < links.length; i++) { 159 | const link = `${BASE_URL}${links[i]}`; 160 | const section = await getSection(link); 161 | const chunkedSection = await chunkSection(section); 162 | sections.push(chunkedSection); 163 | } 164 | 165 | const book = { 166 | book_title: "The Network State", 167 | author: "Balaji Srinivasan", 168 | book_url: BASE_URL, 169 | publication_date: "2022-07-04", 170 | current_date: "2023-03-01", 171 | length: sections.reduce((acc, section) => acc + section.content_length, 0), 172 | tokens: sections.reduce((acc, section) => acc + section.content_tokens, 0), 173 | sections 174 | }; 175 | 176 | fs.writeFileSync("scripts/tns.json", JSON.stringify(book, null, 2)); 177 | })(); 178 | -------------------------------------------------------------------------------- /styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ["./app/**/*.{js,ts,jsx,tsx}", "./pages/**/*.{js,ts,jsx,tsx}", "./components/**/*.{js,ts,jsx,tsx}"], 4 | theme: { 5 | extend: {} 6 | }, 7 | plugins: [] 8 | }; 9 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "baseUrl": ".", 18 | "paths": { 19 | "@/*": ["./*"] 20 | } 21 | }, 22 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 23 | "exclude": ["node_modules"] 24 | } 25 | -------------------------------------------------------------------------------- /types/index.ts: -------------------------------------------------------------------------------- 1 | export enum OpenAIModel { 2 | DAVINCI_TURBO = "gpt-3.5-turbo" 3 | } 4 | 5 | export type TNSSection = { 6 | chapter_num: number; 7 | chapter_title: string; 8 | section_title: string; 9 | section_url: string; 10 | section_num: number; 11 | content: string; 12 | content_length: number; 13 | content_tokens: number; 14 | chunks: TNSChunk[]; 15 | }; 16 | 17 | export type TNSChunk = { 18 | chapter_num: number; 19 | chapter_title: string; 20 | section_title: string; 21 | section_url: string; 22 | section_num: number; 23 | chunk_num: number; 24 | content: string; 25 | content_length: number; 26 | content_tokens: number; 27 | embedding: number[]; 28 | }; 29 | 30 | export type TNSBook = { 31 | book_title: string; 32 | author: string; 33 | book_url: string; 34 | publication_date: string; 35 | current_date: string; 36 | length: number; 37 | tokens: number; 38 | sections: TNSSection[]; 39 | }; 40 | -------------------------------------------------------------------------------- /utils/index.ts: -------------------------------------------------------------------------------- 1 | import { OpenAIModel } from "@/types"; 2 | import { createClient } from "@supabase/supabase-js"; 3 | import { createParser, ParsedEvent, ReconnectInterval } from "eventsource-parser"; 4 | 5 | export const supabaseAdmin = createClient(process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!); 6 | 7 | export const OpenAIStream = async (prompt: string, apiKey: string) => { 8 | const encoder = new TextEncoder(); 9 | const decoder = new TextDecoder(); 10 | 11 | const res = await fetch("https://api.openai.com/v1/chat/completions", { 12 | headers: { 13 | "Content-Type": "application/json", 14 | Authorization: `Bearer ${apiKey}` 15 | }, 16 | method: "POST", 17 | body: JSON.stringify({ 18 | model: OpenAIModel.DAVINCI_TURBO, 19 | messages: [ 20 | { 21 | role: "system", 22 | content: "You are a helpful assistant that accurately answers the user's queries based on the given text." 23 | }, 24 | { 25 | role: "user", 26 | content: prompt 27 | } 28 | ], 29 | max_tokens: 200, 30 | temperature: 0.2, 31 | stream: true 32 | }) 33 | }); 34 | 35 | if (res.status !== 200) { 36 | throw new Error("OpenAI API returned an error"); 37 | } 38 | 39 | const stream = new ReadableStream({ 40 | async start(controller) { 41 | const onParse = (event: ParsedEvent | ReconnectInterval) => { 42 | if (event.type === "event") { 43 | const data = event.data; 44 | 45 | if (data === "[DONE]") { 46 | controller.close(); 47 | return; 48 | } 49 | 50 | try { 51 | const json = JSON.parse(data); 52 | const text = json.choices[0].delta.content; 53 | const queue = encoder.encode(text); 54 | controller.enqueue(queue); 55 | } catch (e) { 56 | controller.error(e); 57 | } 58 | } 59 | }; 60 | 61 | const parser = createParser(onParse); 62 | 63 | for await (const chunk of res.body as any) { 64 | parser.feed(decoder.decode(chunk)); 65 | } 66 | } 67 | }); 68 | 69 | return stream; 70 | }; 71 | --------------------------------------------------------------------------------