├── .DS_Store ├── .gitignore ├── README.md ├── components ├── Answer │ ├── Answer.tsx │ └── answer.module.css ├── Footer.tsx ├── Navbar.tsx └── Player.tsx ├── license ├── next.config.js ├── package-lock.json ├── package.json ├── pages ├── _app.tsx ├── _document.tsx ├── api │ ├── answer.ts │ └── search.ts └── index.tsx ├── postcss.config.js ├── public ├── favicon.ico └── naval.jpeg ├── schema.sql ├── scripts ├── clips.json ├── embed-audio.ts ├── embed-text.ts ├── main.py ├── naval.json └── scrape.ts ├── styles └── globals.css ├── tailwind.config.js ├── tsconfig.json ├── types └── index.ts └── utils └── index.ts /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mckaywrigley/naval-gpt/89ce147352eabaa11780a0349fd9b2aad5363e5e/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # next.js 12 | /.next/ 13 | /out/ 14 | 15 | # production 16 | /build 17 | 18 | # misc 19 | .DS_Store 20 | *.pem 21 | 22 | # debug 23 | npm-debug.log* 24 | yarn-debug.log* 25 | yarn-error.log* 26 | .pnpm-debug.log* 27 | 28 | # local env files 29 | .env*.local 30 | 31 | # vercel 32 | .vercel 33 | 34 | # typescript 35 | *.tsbuildinfo 36 | next-env.d.ts 37 | 38 | # audio 39 | clips/ 40 | podcast.mp3 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Naval GPT 2 | 3 | AI-powered search & chat for Naval Ravikant's Twitter thread "How To Get Rich." 4 | 5 | (adding more content soon) 6 | 7 | Everything is 100% open source. 8 | 9 | ## Dataset 10 | 11 | The dataset consists of 2 CSV files containing all text & embeddings used. 12 | 13 | Download clips data [here](https://drive.google.com/file/d/1ekj7F2HCCFqyUe0kAUX94JZSb4Rr9qPP/view?usp=share_link). 14 | 15 | Download passages data [here](https://drive.google.com/file/d/1-yVP1VYe1D-fKHejijK1ZWLsX-o0Md0Y/view?usp=share_link). 16 | 17 | I recommend getting familiar with fetching, cleaning, and storing data as outlined in the scraping and embedding scripts below, but feel free to skip those steps and just use the dataset. 18 | 19 | ## How It Works 20 | 21 | Naval GPT provides 3 things: 22 | 23 | 1. Search 24 | 2. Chat 25 | 3. Audio 26 | 27 | ### Search 28 | 29 | Search was created with [OpenAI Embeddings](https://platform.openai.com/docs/guides/embeddings) (`text-embedding-ada-002`). 30 | 31 | First, we loop over the passages from Naval's formatted [blog post](https://nav.al/rich) and generate embeddings for each chunk of text. 32 | 33 | We do this because we can render the beautifully formatted text in our app by saving the HTML. 34 | 35 | In the app, we take the user's search query, generate an embedding, and use the result to find the most similar passages. 36 | 37 | The comparison is done using cosine similarity across our database of vectors. 38 | 39 | Our database is a Postgres database with the [pgvector](https://github.com/pgvector/pgvector) extension hosted on [Supabase](https://supabase.com/). 40 | 41 | Results are ranked by similarity score and returned to the user. 42 | 43 | ### Chat 44 | 45 | Chat builds on top of search. It uses search results to create a prompt that is fed into GPT-3.5-turbo. 46 | 47 | This allows for a chat-like experience where the user can ask questions about the topic and get answers. 48 | 49 | ### Audio 50 | 51 | The podcast player is a simple audio player that plays the [podcast](https://content.libsyn.com/p/4/b/0/4b0ce4b1beb1c234/Naval-Ep53.mp3?c_id=59607029&cs_id=59607029&response-content-type=audio%2Fmpeg&Expires=1678166856&Signature=Lfp~zMHa0ETN00JHMVG8xcCGvTnUonsl8ouhpdaH0A4XLHhMISlMySL2mS4e1q6yvONjTZ4pR9L~YDyaSZ~knatkNEVNloDCHjYQZ6-AMy7Qcd0~XwenWZDkRDbjkLj58QE2c6APgDYZqlio1PyO2m9JSIalKdmR1bWnZ02WV3VVymLQUJAaAZcRIX-X3KyO4IT6xbnyK8BiJfJXOo7uITW~xtY9PoaP3Id8yw0Ckna0uSfv60aOO2BDFO~ZyivpkfnfcEtimZYjFQDLhlzIbJCoOw52NRojeaSy2-T~d870-fd9FvSKkTwYAr04cDNrkBcrlKhzhnYRLwT0wWc6Yg__&Key-Pair-Id=K1YS7LZGUP96OI) for this thread. 52 | 53 | We use Python and [OpenAI Whisper](https://openai.com/research/whisper) to loop over the podcast to generate embeddings for each 1min chunk of audio. 54 | 55 | We then use the same method as search to find the most similar clip. 56 | 57 | During our audio processing we saved timestamps for each clip, so we then jump to that timestamp for the podcast in the app. 58 | 59 | ## Running Locally 60 | 61 | Here's a quick overview of how to run it locally. 62 | 63 | ### Requirements 64 | 65 | 1. Set up OpenAI 66 | 67 | You'll need an OpenAI API key to generate embeddings. 68 | 69 | 2. Set up Supabase and create a database 70 | 71 | Note: You don't have to use Supabase. Use whatever method you prefer to store your data. But I like Supabase and think it's easy to use. 72 | 73 | There is a schema.sql file in the root of the repo that you can use to set up the database. 74 | 75 | Run that in the SQL editor in Supabase as directed. 76 | 77 | I recommend turning on Row Level Security and setting up a service role to use with the app. 78 | 79 | ### Repo Setup 80 | 81 | 3. Clone repo 82 | 83 | ```bash 84 | git clone https://github.com/mckaywrigley/naval-gpt.git 85 | ``` 86 | 87 | 4. Install dependencies 88 | 89 | ```bash 90 | npm i 91 | ``` 92 | 93 | 5. Set up environment variables 94 | 95 | Create a .env.local file in the root of the repo with the following variables: 96 | 97 | ```bash 98 | OPENAI_API_KEY= 99 | 100 | NEXT_PUBLIC_SUPABASE_URL= 101 | SUPABASE_SERVICE_ROLE_KEY= 102 | ``` 103 | 104 | You'll also need to save your OpenAI API key as an environment variable in your OS. 105 | 106 | ```bash 107 | export OPENAI_API_KEY= 108 | ``` 109 | 110 | ### Process Text 111 | 112 | 6. Run text scraping script 113 | 114 | ```bash 115 | npm run scrape 116 | ``` 117 | 118 | This scrapes the content from Naval's website and saves it to a json file. 119 | 120 | 7. Run text embedding script 121 | 122 | ```bash 123 | npm run embed-text 124 | ``` 125 | 126 | This reads the json file, generates embeddings for each passage, and saves the results to your database. 127 | 128 | There is a 200ms delay between each request to avoid rate limiting. 129 | 130 | This process will take 10-15 minutes. 131 | 132 | ### Process Audio 133 | 134 | 8. Download podcast 135 | 136 | Download the [podcast](https://content.libsyn.com/p/4/b/0/4b0ce4b1beb1c234/Naval-Ep53.mp3?c_id=59607029&cs_id=59607029&response-content-type=audio%2Fmpeg&Expires=1678167549&Signature=KOxLpDUzvl~zD-yiSE55VedxazspCijG6-Mme~54wcaiwDlnONhDi7t--maXLNPK345FSXDq-G7T0RJNrIVF0z0u8-rc6Nv2r-uh72l2L2isJ4cNpCnCiEk4Hfe31fdu42D17kENRRM9ybiTHa0kst9qtZ4t6WIeACbT1Tdvf1GfI9s7TZxI4IceHHM~GEhxGdpMEMOrN8zKVJvKxuV9RXI9vMbhGPnOCtHAIw1~7gaXu-Ag3k3aOoD~gptl~cqk4aIEZLjjdeJg1evx48t4RCye5YtKZZIjYyrgyIji-HOXWPDan04oJymijc8AEMyL27E9F2ikOfQ6DVIQsx4qfA__&Key-Pair-Id=K1YS7LZGUP96OI) and add it as "podcast.mp3" to the public directory. 137 | 138 | 9. Run the audio processing script 139 | 140 | Note: You'll need to have Python installed on your machine. 141 | 142 | ```bash 143 | cd scripts 144 | 145 | python3 main.py 146 | ``` 147 | 148 | This splits the podcast into 1min chunks and generates embeddings for each chunk. 149 | 150 | The results are saved to a json file. 151 | 152 | There is a 1.2s delay between each request to avoid rate limiting. 153 | 154 | It will take 20-30 minutes to run. 155 | 156 | 10. Run audio embedding script 157 | 158 | ```bash 159 | npm run embed-audio 160 | ``` 161 | 162 | This reads the json file, generates embeddings for each clip, and saves the results to your database. 163 | 164 | There is a 200ms delay between each request to avoid rate limiting. 165 | 166 | This process will take about 5 minutes. 167 | 168 | ### App 169 | 170 | 11. Run app 171 | 172 | ```bash 173 | npm run dev 174 | ``` 175 | 176 | ## Credits 177 | 178 | Thanks to [Naval Ravikant](https://twitter.com/naval) for publicizing his thoughts - they've proven to be an invaluable source of wisdom for all of us. 179 | 180 | ## Contact 181 | 182 | If you have any questions, feel free to reach out to me on [Twitter](https://twitter.com/mckaywrigley)! 183 | 184 | ## Notes 185 | 186 | I sacrificed composability for simplicity in the app. 187 | 188 | You can split up a lot of the stuff in index.tsx into separate components. 189 | -------------------------------------------------------------------------------- /components/Answer/Answer.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useState } from "react"; 2 | import styles from "./answer.module.css"; 3 | 4 | interface AnswerProps { 5 | text: string; 6 | } 7 | 8 | export const Answer: React.FC = ({ text }) => { 9 | const [words, setWords] = useState([]); 10 | 11 | useEffect(() => { 12 | setWords(text.split(" ")); 13 | }, [text]); 14 | 15 | return ( 16 |
17 | {words.map((word, index) => ( 18 | 23 | {word}{" "} 24 | 25 | ))} 26 |
27 | ); 28 | }; 29 | -------------------------------------------------------------------------------- /components/Answer/answer.module.css: -------------------------------------------------------------------------------- 1 | .fadeIn { 2 | animation: fadeIn 0.5s ease-in-out forwards; 3 | opacity: 0; 4 | } 5 | 6 | @keyframes fadeIn { 7 | from { 8 | opacity: 0; 9 | } 10 | to { 11 | opacity: 1; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /components/Footer.tsx: -------------------------------------------------------------------------------- 1 | import { IconBrandGithub, IconBrandTwitter } from "@tabler/icons-react"; 2 | import { FC } from "react"; 3 | 4 | export const Footer: FC = () => { 5 | return ( 6 |
7 |
8 | 9 |
10 | Created by 11 | 17 | Mckay Wrigley 18 | 19 | based on 20 | 26 | Naval Ravikant's 27 | 28 | Twitter thread 29 | 35 | How To Get Rich 36 | 37 | . 38 |
39 | 40 |
41 | 47 | 48 | 49 | 50 | 56 | 57 | 58 |
59 |
60 | ); 61 | }; 62 | -------------------------------------------------------------------------------- /components/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import { IconExternalLink } from "@tabler/icons-react"; 2 | import Image from "next/image"; 3 | import { FC } from "react"; 4 | import naval from "../public/naval.jpeg"; 5 | 6 | export const Navbar: FC = () => { 7 | return ( 8 |
9 |
10 | The Network State GPT 16 | 20 | Naval GPT 21 | 22 |
23 |
24 | 30 |
nav.al
31 | 32 | 36 |
37 |
38 |
39 | ); 40 | }; 41 | -------------------------------------------------------------------------------- /components/Player.tsx: -------------------------------------------------------------------------------- 1 | import { IconPlayerPauseFilled, IconPlayerPlayFilled, IconPlayerSkipBackFilled, IconPlayerSkipForwardFilled } from "@tabler/icons-react"; 2 | import { ChangeEvent, FC, useEffect, useRef, useState } from "react"; 3 | 4 | interface PlayerProps { 5 | src: string; 6 | startTime: number; 7 | } 8 | 9 | export const Player: FC = ({ src, startTime }) => { 10 | const audioRef = useRef(null); 11 | 12 | const [isPlaying, setIsPlaying] = useState(false); 13 | const [currentTime, setCurrentTime] = useState(startTime - 5); 14 | const [duration, setDuration] = useState(0); 15 | 16 | const handlePlay = () => { 17 | if (!audioRef.current) return; 18 | setIsPlaying(true); 19 | audioRef.current.play(); 20 | }; 21 | 22 | const handlePause = () => { 23 | if (!audioRef.current) return; 24 | setIsPlaying(false); 25 | audioRef.current.pause(); 26 | }; 27 | 28 | const handleTimeUpdate = () => { 29 | if (!audioRef.current) return; 30 | setCurrentTime(audioRef.current.currentTime); 31 | setDuration(audioRef.current.duration); 32 | }; 33 | 34 | const handleSliderChange = (event: ChangeEvent) => { 35 | if (!audioRef.current) return; 36 | audioRef.current.currentTime = +event.target.value; 37 | }; 38 | 39 | const handleSkipBackward = () => { 40 | if (!audioRef.current) return; 41 | audioRef.current.currentTime -= 15; 42 | }; 43 | 44 | const handleSkipForward = () => { 45 | if (!audioRef.current) return; 46 | audioRef.current.currentTime += 15; 47 | }; 48 | 49 | const formatTime = (time: number) => { 50 | const minutes = Math.floor(time / 60); 51 | const seconds = Math.floor(time % 60) 52 | .toString() 53 | .padStart(2, "0"); 54 | return `${minutes}:${seconds}`; 55 | }; 56 | 57 | useEffect(() => { 58 | if (!audioRef.current) return; 59 | audioRef.current.currentTime = currentTime; 60 | setDuration(audioRef.current.duration); 61 | }, []); 62 | 63 | return ( 64 |
65 |
125 | ); 126 | }; 127 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Mckay Wrigley 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true, 4 | } 5 | 6 | module.exports = nextConfig 7 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "app", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint", 10 | "scrape": "tsx scripts/scrape.ts", 11 | "embed-text": "tsx scripts/embed-text.ts", 12 | "embed-audio": "tsx scripts/embed-audio.ts" 13 | }, 14 | "dependencies": { 15 | "@tabler/icons-react": "^2.7.0", 16 | "@types/node": "18.14.1", 17 | "@types/react": "18.0.28", 18 | "@types/react-dom": "18.0.11", 19 | "endent": "^2.1.0", 20 | "eslint": "8.34.0", 21 | "eslint-config-next": "13.2.1", 22 | "eventsource-parser": "^0.1.0", 23 | "next": "13.2.1", 24 | "react": "18.2.0", 25 | "react-dom": "18.2.0", 26 | "typescript": "4.9.5" 27 | }, 28 | "devDependencies": { 29 | "@next/env": "^13.2.3", 30 | "@supabase/supabase-js": "^2.10.0", 31 | "autoprefixer": "^10.4.13", 32 | "axios": "^1.3.4", 33 | "cheerio": "^1.0.0-rc.12", 34 | "gpt-3-encoder": "^1.1.4", 35 | "openai": "^3.2.1", 36 | "postcss": "^8.4.21", 37 | "tailwindcss": "^3.2.7", 38 | "tsx": "^3.12.3" 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import "@/styles/globals.css"; 2 | import type { AppProps } from "next/app"; 3 | import { Inter } from "next/font/google"; 4 | 5 | const inter = Inter({ subsets: ["latin"] }); 6 | 7 | export default function App({ Component, pageProps }: AppProps<{}>) { 8 | return ( 9 |
10 | 11 |
12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from 'next/document' 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /pages/api/answer.ts: -------------------------------------------------------------------------------- 1 | import { OpenAIStream } from "@/utils"; 2 | 3 | export const config = { 4 | runtime: "edge" 5 | }; 6 | 7 | const handler = async (req: Request): Promise => { 8 | try { 9 | const { prompt, apiKey } = (await req.json()) as { 10 | prompt: string; 11 | apiKey: string; 12 | }; 13 | 14 | const stream = await OpenAIStream(prompt, apiKey); 15 | 16 | return new Response(stream); 17 | } catch (error) { 18 | console.error(error); 19 | return new Response("Error", { status: 500 }); 20 | } 21 | }; 22 | 23 | export default handler; 24 | -------------------------------------------------------------------------------- /pages/api/search.ts: -------------------------------------------------------------------------------- 1 | import { supabaseAdmin } from "@/utils"; 2 | 3 | export const config = { 4 | runtime: "edge" 5 | }; 6 | 7 | const handler = async (req: Request): Promise => { 8 | try { 9 | const { query, apiKey, matches } = (await req.json()) as { 10 | query: string; 11 | apiKey: string; 12 | matches: number; 13 | }; 14 | 15 | const input = query.replace(/\n/g, " "); 16 | 17 | const res = await fetch("https://api.openai.com/v1/embeddings", { 18 | headers: { 19 | "Content-Type": "application/json", 20 | Authorization: `Bearer ${apiKey}` 21 | }, 22 | method: "POST", 23 | body: JSON.stringify({ 24 | model: "text-embedding-ada-002", 25 | input 26 | }) 27 | }); 28 | 29 | const json = await res.json(); 30 | const embedding = json.data[0].embedding; 31 | 32 | const { data: posts, error: postsError } = await supabaseAdmin.rpc("naval_posts_search", { 33 | query_embedding: embedding, 34 | similarity_threshold: 0.01, 35 | match_count: matches 36 | }); 37 | 38 | if (postsError) { 39 | console.error(postsError); 40 | return new Response("Posts Error", { status: 500 }); 41 | } 42 | 43 | const { data: clips, error: clipsError } = await supabaseAdmin.rpc("naval_clips_search", { 44 | query_embedding: embedding, 45 | similarity_threshold: 0.01, 46 | match_count: 1 47 | }); 48 | 49 | if (clipsError) { 50 | console.error(clipsError); 51 | return new Response("Clips Error", { status: 500 }); 52 | } 53 | 54 | return new Response(JSON.stringify({ posts, clips }), { status: 200 }); 55 | } catch (error) { 56 | console.error(error); 57 | return new Response("Error", { status: 500 }); 58 | } 59 | }; 60 | 61 | export default handler; 62 | -------------------------------------------------------------------------------- /pages/index.tsx: -------------------------------------------------------------------------------- 1 | import { Answer } from "@/components/Answer/Answer"; 2 | import { Footer } from "@/components/Footer"; 3 | import { Navbar } from "@/components/Navbar"; 4 | import { Player } from "@/components/Player"; 5 | import { NavalClip, NavalSubsection } from "@/types"; 6 | import { IconArrowRight, IconExternalLink, IconSearch } from "@tabler/icons-react"; 7 | import endent from "endent"; 8 | import Head from "next/head"; 9 | import { KeyboardEvent, useEffect, useRef, useState } from "react"; 10 | 11 | export default function Home() { 12 | const inputRef = useRef(null); 13 | 14 | const [query, setQuery] = useState(""); 15 | const [posts, setPosts] = useState([]); 16 | const [answer, setAnswer] = useState(""); 17 | const [loading, setLoading] = useState(false); 18 | 19 | const [showSettings, setShowSettings] = useState(false); 20 | const [mode, setMode] = useState<"search" | "chat">("chat"); 21 | const [matchCount, setMatchCount] = useState(3); 22 | const [apiKey, setApiKey] = useState(""); 23 | 24 | const [time, setTime] = useState(0); 25 | 26 | const handleSearch = async () => { 27 | if (!apiKey) { 28 | alert("Please enter an API key."); 29 | return; 30 | } 31 | 32 | if (!query) { 33 | alert("Please enter a query."); 34 | return; 35 | } 36 | 37 | setAnswer(""); 38 | setPosts([]); 39 | 40 | setLoading(true); 41 | 42 | const searchResponse = await fetch("/api/search", { 43 | method: "POST", 44 | headers: { 45 | "Content-Type": "application/json" 46 | }, 47 | body: JSON.stringify({ query, apiKey, matches: matchCount }) 48 | }); 49 | 50 | if (!searchResponse.ok) { 51 | setLoading(false); 52 | throw new Error(searchResponse.statusText); 53 | } 54 | 55 | const results: { posts: NavalSubsection[]; clips: NavalClip[] } = await searchResponse.json(); 56 | const posts = results.posts; 57 | const clip = results.clips[0]; 58 | 59 | setPosts(posts); 60 | setTime(clip.seconds); 61 | 62 | setLoading(false); 63 | 64 | return results; 65 | }; 66 | 67 | const handleAnswer = async () => { 68 | if (!apiKey) { 69 | alert("Please enter an API key."); 70 | return; 71 | } 72 | 73 | if (!query) { 74 | alert("Please enter a query."); 75 | return; 76 | } 77 | 78 | setAnswer(""); 79 | setPosts([]); 80 | 81 | setLoading(true); 82 | 83 | const searchResponse = await fetch("/api/search", { 84 | method: "POST", 85 | headers: { 86 | "Content-Type": "application/json" 87 | }, 88 | body: JSON.stringify({ query, apiKey, matches: matchCount }) 89 | }); 90 | 91 | if (!searchResponse.ok) { 92 | setLoading(false); 93 | throw new Error(searchResponse.statusText); 94 | } 95 | 96 | const results: { posts: NavalSubsection[]; clips: NavalClip[] } = await searchResponse.json(); 97 | const posts = results.posts; 98 | const clip = results.clips[0]; 99 | 100 | setPosts(posts); 101 | setTime(clip.seconds); 102 | 103 | const prompt = endent` 104 | Use the following passages to provide an answer to the query: "${query}" 105 | 106 | ${posts?.map((d: any) => d.content).join("\n\n")} 107 | `; 108 | 109 | const answerResponse = await fetch("/api/answer", { 110 | method: "POST", 111 | headers: { 112 | "Content-Type": "application/json" 113 | }, 114 | body: JSON.stringify({ prompt, apiKey }) 115 | }); 116 | 117 | if (!answerResponse.ok) { 118 | setLoading(false); 119 | throw new Error(answerResponse.statusText); 120 | } 121 | 122 | const data = answerResponse.body; 123 | 124 | if (!data) { 125 | return; 126 | } 127 | 128 | setLoading(false); 129 | 130 | const reader = data.getReader(); 131 | const decoder = new TextDecoder(); 132 | let done = false; 133 | 134 | while (!done) { 135 | const { value, done: doneReading } = await reader.read(); 136 | done = doneReading; 137 | const chunkValue = decoder.decode(value); 138 | setAnswer((prev) => prev + chunkValue); 139 | } 140 | }; 141 | 142 | const handleKeyDown = (e: KeyboardEvent) => { 143 | if (e.key === "Enter") { 144 | if (mode === "search") { 145 | handleSearch(); 146 | } else { 147 | handleAnswer(); 148 | } 149 | } 150 | }; 151 | 152 | const handleSave = () => { 153 | if (apiKey.length !== 51) { 154 | alert("Please enter a valid API key."); 155 | return; 156 | } 157 | 158 | localStorage.setItem("PG_KEY", apiKey); 159 | localStorage.setItem("PG_MATCH_COUNT", matchCount.toString()); 160 | localStorage.setItem("PG_MODE", mode); 161 | 162 | setShowSettings(false); 163 | }; 164 | 165 | const handleClear = () => { 166 | localStorage.removeItem("PG_KEY"); 167 | localStorage.removeItem("PG_MATCH_COUNT"); 168 | localStorage.removeItem("PG_MODE"); 169 | 170 | setApiKey(""); 171 | setMatchCount(3); 172 | setMode("search"); 173 | }; 174 | 175 | const renderHtml = (html: string) => { 176 | return ( 177 | <> 178 |
182 | 183 | ); 184 | }; 185 | 186 | useEffect(() => { 187 | if (matchCount > 8) { 188 | setMatchCount(8); 189 | } else if (matchCount < 1) { 190 | setMatchCount(1); 191 | } 192 | }, [matchCount]); 193 | 194 | useEffect(() => { 195 | const PG_KEY = localStorage.getItem("PG_KEY"); 196 | const PG_MATCH_COUNT = localStorage.getItem("PG_MATCH_COUNT"); 197 | const PG_MODE = localStorage.getItem("PG_MODE"); 198 | 199 | if (PG_KEY) { 200 | setApiKey(PG_KEY); 201 | } 202 | 203 | if (PG_MATCH_COUNT) { 204 | setMatchCount(parseInt(PG_MATCH_COUNT)); 205 | } 206 | 207 | if (PG_MODE) { 208 | setMode(PG_MODE as "search" | "chat"); 209 | } 210 | }, []); 211 | 212 | return ( 213 | <> 214 | 215 | Naval GPT 216 | 220 | 224 | 228 | 229 | 230 |
231 | 232 |
233 |
234 | 240 | 241 | {showSettings && ( 242 |
243 |
244 |
Mode
245 | 253 |
254 | 255 |
256 |
Passage Count
257 | setMatchCount(Number(e.target.value))} 263 | className="max-w-[400px] block w-full rounded-md border border-gray-300 p-2 text-black shadow-sm focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-500 sm:text-sm" 264 | /> 265 |
266 | 267 |
268 |
OpenAI API Key
269 | { 275 | setApiKey(e.target.value); 276 | 277 | if (e.target.value.length !== 51) { 278 | setShowSettings(true); 279 | } 280 | }} 281 | /> 282 |
283 | 284 |
285 |
289 | Save 290 |
291 | 292 |
296 | Clear 297 |
298 |
299 |
300 | )} 301 | 302 | {apiKey.length === 51 ? ( 303 |
304 | 305 | 306 | setQuery(e.target.value)} 313 | onKeyDown={handleKeyDown} 314 | /> 315 | 316 | 322 |
323 | ) : ( 324 |
325 | Please enter your 326 | 330 | OpenAI API key 331 | 332 | in settings. 333 |
334 | )} 335 | 336 | {loading ? ( 337 |
338 | {mode === "chat" && ( 339 | <> 340 |
Answer
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 | 349 | )} 350 | 351 |
Passages
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 | ) : answer ? ( 361 |
362 |
Answer
363 | 364 | 365 |
366 |
Podcast
367 | 371 |
Passages
372 | 373 | {posts.map((post, index) => ( 374 |
375 |
376 |
377 |
378 |
{post.title}
379 |
{post.subtitle}
380 |
381 | 387 | 388 | 389 |
390 | {renderHtml(post.html)} 391 |
392 |
393 | ))} 394 |
395 |
396 | ) : posts.length > 0 ? ( 397 |
398 |
Podcast
399 | 403 |
Passages
404 | {posts.map((post, index) => ( 405 |
406 |
407 |
408 |
409 |
410 |
{post.title}
411 |
{post.subtitle}
412 |
413 |
414 | 420 | 421 | 422 |
423 | {renderHtml(post.html)} 424 |
425 |
426 | ))} 427 |
428 | ) : ( 429 |
{`AI-powered search & chat for Naval Ravikant's Twitter thread "How To Get Rich."`}
430 | )} 431 |
432 |
433 |
434 |
435 | 436 | ); 437 | } 438 | -------------------------------------------------------------------------------- /postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mckaywrigley/naval-gpt/89ce147352eabaa11780a0349fd9b2aad5363e5e/public/favicon.ico -------------------------------------------------------------------------------- /public/naval.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mckaywrigley/naval-gpt/89ce147352eabaa11780a0349fd9b2aad5363e5e/public/naval.jpeg -------------------------------------------------------------------------------- /schema.sql: -------------------------------------------------------------------------------- 1 | -- RUN 1st 2 | create extension vector; 3 | 4 | -- RUN 2nd 5 | create table naval_posts ( 6 | id bigserial primary key, 7 | title text, 8 | subtitle text, 9 | html text, 10 | content text, 11 | length bigint, 12 | tokens bigint, 13 | embedding vector (1536) 14 | ); 15 | 16 | create table naval_clips ( 17 | id bigserial primary key, 18 | file text, 19 | content text, 20 | seconds bigint, 21 | embedding vector (1536) 22 | ); 23 | 24 | -- RUN 3rd after running the scripts 25 | create or replace function naval_posts_search ( 26 | query_embedding vector(1536), 27 | similarity_threshold float, 28 | match_count int 29 | ) 30 | returns table ( 31 | id bigint, 32 | title text, 33 | subtitle text, 34 | html text, 35 | content text, 36 | length bigint, 37 | tokens bigint, 38 | similarity float 39 | ) 40 | language plpgsql 41 | as $$ 42 | begin 43 | return query 44 | select 45 | naval_posts.id, 46 | naval_posts.title, 47 | naval_posts.subtitle, 48 | naval_posts.html, 49 | naval_posts.content, 50 | naval_posts.length, 51 | naval_posts.tokens, 52 | 1 - (naval_posts.embedding <=> query_embedding) as similarity 53 | from naval_posts 54 | where 1 - (naval_posts.embedding <=> query_embedding) > similarity_threshold 55 | order by naval_posts.embedding <=> query_embedding 56 | limit match_count; 57 | end; 58 | $$; 59 | 60 | create or replace function naval_clips_search ( 61 | query_embedding vector(1536), 62 | similarity_threshold float, 63 | match_count int 64 | ) 65 | returns table ( 66 | id bigint, 67 | file text, 68 | content text, 69 | seconds bigint, 70 | similarity float 71 | ) 72 | language plpgsql 73 | as $$ 74 | begin 75 | return query 76 | select 77 | naval_clips.id, 78 | naval_clips.file, 79 | naval_clips.content, 80 | naval_clips.seconds, 81 | 1 - (naval_clips.embedding <=> query_embedding) as similarity 82 | from naval_clips 83 | where 1 - (naval_clips.embedding <=> query_embedding) > similarity_threshold 84 | order by naval_clips.embedding <=> query_embedding 85 | limit match_count; 86 | end; 87 | $$; 88 | 89 | -- RUN 4th 90 | create index on naval_posts 91 | using ivfflat (embedding vector_cosine_ops) 92 | with (lists = 100); 93 | 94 | create index on naval_clips 95 | using ivfflat (embedding vector_cosine_ops) 96 | with (lists = 100); -------------------------------------------------------------------------------- /scripts/embed-audio.ts: -------------------------------------------------------------------------------- 1 | import { NavalClip } from "@/types"; 2 | import { loadEnvConfig } from "@next/env"; 3 | import { createClient } from "@supabase/supabase-js"; 4 | import fs from "fs"; 5 | import { Configuration, OpenAIApi } from "openai"; 6 | 7 | loadEnvConfig(""); 8 | 9 | const generateEmbeddings = async (clips: NavalClip[]) => { 10 | const configuration = new Configuration({ apiKey: process.env.OPENAI_API_KEY }); 11 | const openai = new OpenAIApi(configuration); 12 | 13 | const supabase = createClient(process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!); 14 | 15 | for (let i = 0; i < clips.length; i++) { 16 | const clip = clips[i]; 17 | 18 | const { file, content, seconds } = clip; 19 | 20 | const embeddingResponse = await openai.createEmbedding({ 21 | model: "text-embedding-ada-002", 22 | input: content 23 | }); 24 | 25 | const [{ embedding }] = embeddingResponse.data.data; 26 | 27 | const { data, error } = await supabase 28 | .from("naval_clips") 29 | .insert({ 30 | file, 31 | content, 32 | seconds, 33 | embedding 34 | }) 35 | .select("*"); 36 | 37 | if (error) { 38 | console.log("error", error); 39 | } else { 40 | console.log("saved", i); 41 | } 42 | 43 | await new Promise((resolve) => setTimeout(resolve, 200)); 44 | } 45 | }; 46 | 47 | (async () => { 48 | const json = JSON.parse(fs.readFileSync("scripts/clips.json", "utf8")); 49 | 50 | await generateEmbeddings(json); 51 | })(); 52 | -------------------------------------------------------------------------------- /scripts/embed-text.ts: -------------------------------------------------------------------------------- 1 | import { NavalJSON, NavalSection } from "@/types"; 2 | import { loadEnvConfig } from "@next/env"; 3 | import { createClient } from "@supabase/supabase-js"; 4 | import fs from "fs"; 5 | import { Configuration, OpenAIApi } from "openai"; 6 | 7 | loadEnvConfig(""); 8 | 9 | const generateEmbeddings = async (sections: NavalSection[]) => { 10 | const configuration = new Configuration({ apiKey: process.env.OPENAI_API_KEY }); 11 | const openai = new OpenAIApi(configuration); 12 | 13 | const supabase = createClient(process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!); 14 | 15 | for (let i = 0; i < sections.length; i++) { 16 | const section = sections[i]; 17 | 18 | for (let j = 0; j < section.subsections.length; j++) { 19 | const subsection = section.subsections[j]; 20 | 21 | const { title, subtitle, html, content, length, tokens } = subsection; 22 | 23 | const embeddingResponse = await openai.createEmbedding({ 24 | model: "text-embedding-ada-002", 25 | input: content 26 | }); 27 | 28 | const [{ embedding }] = embeddingResponse.data.data; 29 | 30 | const { data, error } = await supabase 31 | .from("naval_posts") 32 | .insert({ 33 | title, 34 | subtitle, 35 | html, 36 | content, 37 | length, 38 | tokens, 39 | embedding 40 | }) 41 | .select("*"); 42 | 43 | if (error) { 44 | console.log("error", error); 45 | } else { 46 | console.log("saved", i, j); 47 | } 48 | 49 | await new Promise((resolve) => setTimeout(resolve, 200)); 50 | } 51 | } 52 | }; 53 | 54 | (async () => { 55 | const book: NavalJSON = JSON.parse(fs.readFileSync("scripts/naval.json", "utf8")); 56 | 57 | await generateEmbeddings(book.sections); 58 | })(); 59 | -------------------------------------------------------------------------------- /scripts/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from pydub import AudioSegment 4 | import time 5 | import json 6 | 7 | openai.api_key = os.getenv("OPENAI_API_KEY") 8 | 9 | podcast = AudioSegment.from_mp3("../public/podcast.mp3") 10 | 11 | one_min = 60 * 1000 12 | podcast_length_seconds = len(podcast) / 1000 13 | clip_count = (podcast_length_seconds / 60) + 1 14 | 15 | clips = [] 16 | 17 | def create_clips(): 18 | print("Creating clips...") 19 | 20 | clip = podcast[0:one_min] 21 | clip.export("clips/1.mp3", format="mp3") 22 | print("Exported clip 1") 23 | 24 | for i in range(1, int(clip_count)): 25 | file_name = str(i + 1) + ".mp3" 26 | clip = podcast[i * one_min - 1000:(i + 1) * one_min] 27 | clip.export("clips/" + file_name, format="mp3") 28 | print("Exported clip " + str(i + 1)) 29 | 30 | def generate_transcript(): 31 | print("Generating transcript...") 32 | 33 | for i in range(0, int(clip_count)): 34 | print("Transcribing clip " + str(i + 1) + "...") 35 | audio_file = open("clips/" + str(i + 1) + ".mp3", "rb") 36 | prompt = "The transcript is a podcast between Naval Ravikant and Nivi Ravikant about Naval's popular Twitter thread \"How To Get Rich\" Nivi asks Naval questions as they go through the thread." 37 | 38 | transcript = openai.Audio.transcribe("whisper-1", audio_file, prompt) 39 | 40 | if transcript.text: 41 | text = transcript.text 42 | text = text.replace("nivald", "naval").replace("Nivald", "Naval") 43 | print("\n\nTranscribed text:\n\n" + text) 44 | 45 | timestamp = i * 60 46 | 47 | clip = { 48 | "file": str(i + 1) + ".mp3", 49 | "seconds": timestamp, 50 | "content": text 51 | } 52 | 53 | clips.append(clip) 54 | 55 | print("Waiting 1.2s before next transcription...") 56 | time.sleep(1.2) 57 | else: 58 | print('ERROR:' + str(i + 1)) 59 | 60 | clip = { 61 | "file": str(i + 1) + ".mp3", 62 | "seconds": timestamp, 63 | "content": "ERROR" 64 | } 65 | 66 | clips.append(clip) 67 | 68 | print("Waiting 10s before next transcription...") 69 | time.sleep(10) 70 | 71 | def create_json(): 72 | print("Creating JSON...") 73 | 74 | with open("clips.json", "w") as f: 75 | json_string = json.dumps(clips) 76 | f.write(json_string) 77 | 78 | 79 | 80 | create_clips() 81 | generate_transcript() 82 | create_json() 83 | -------------------------------------------------------------------------------- /scripts/scrape.ts: -------------------------------------------------------------------------------- 1 | import { NavalJSON, NavalSection, NavalSubsection } from "@/types"; 2 | import axios from "axios"; 3 | import * as cheerio from "cheerio"; 4 | import fs from "fs"; 5 | import { encode } from "gpt-3-encoder"; 6 | 7 | const scrapePost = async () => { 8 | const html = await axios.get(`https://nav.al/rich`); 9 | const $ = cheerio.load(html.data); 10 | const content = $(".content"); 11 | const children = content.children(); 12 | 13 | let sections: NavalSection[] = []; 14 | 15 | children.each((i, el) => { 16 | const tag = $(el).prop("tagName"); 17 | 18 | let sectionTitle = $(el).text(); 19 | let subsections: NavalSubsection[] = []; 20 | 21 | if (tag === "H2") { 22 | let subsectionIndex = -1; 23 | let subsectionTitle = ""; 24 | let subsectionHtml = ""; 25 | let subsectionText = ""; 26 | 27 | $(el) 28 | .nextUntil("H2") 29 | .each((i, el) => { 30 | if ($(el).prop("tagName") === "P") { 31 | const numChildren = $(el).children().length; 32 | 33 | let hasStrong = false; 34 | 35 | const checkChildren = (children: any) => { 36 | children.each((i: any, el: any) => { 37 | if ($(el).prop("tagName") === "STRONG" || $(el).prop("tagName") === "B") { 38 | hasStrong = true; 39 | } else { 40 | if ($(el).children().length > 0) { 41 | checkChildren($(el).children()); 42 | } 43 | } 44 | }); 45 | }; 46 | 47 | checkChildren($(el).children()); 48 | 49 | if (hasStrong && !$(el).text().startsWith("Naval:") && !$(el).text().startsWith("Nivi:")) { 50 | subsectionTitle = $(el).children().first().text(); 51 | 52 | subsections.push({ 53 | title: sectionTitle, 54 | subtitle: subsectionTitle, 55 | html: "", 56 | content: "", 57 | length: 0, 58 | tokens: 0 59 | }); 60 | 61 | subsectionIndex++; 62 | 63 | subsectionHtml = ""; 64 | subsectionText = ""; 65 | } else { 66 | if (subsectionIndex > -1) { 67 | subsectionHtml += `

${$(el).html()?.replace(/’/g, "'")}

`; 68 | subsectionText += $(el).text().replace(/’/g, "'"); 69 | 70 | subsections[subsectionIndex].html = subsectionHtml; 71 | subsections[subsectionIndex].content = subsectionText; 72 | subsections[subsectionIndex].length = subsectionText.length; 73 | subsections[subsectionIndex].tokens = encode(subsectionText).length; 74 | } 75 | } 76 | } 77 | }); 78 | 79 | sections.push({ 80 | title: sectionTitle, 81 | length: subsections.reduce((acc, subsection) => acc + subsection.length, 0), 82 | tokens: subsections.reduce((acc, subsection) => acc + subsection.tokens, 0), 83 | subsections 84 | }); 85 | } 86 | }); 87 | 88 | return sections; 89 | }; 90 | 91 | (async () => { 92 | const sections = await scrapePost(); 93 | 94 | const json: NavalJSON = { 95 | current_date: "2023-03-06", 96 | author: "Naval Ravikant", 97 | url: "https://nav.al/rich", 98 | length: sections.reduce((acc, essay) => acc + essay.length, 0), 99 | tokens: sections.reduce((acc, essay) => acc + essay.tokens, 0), 100 | sections 101 | }; 102 | 103 | const sectionCount = json.sections.length; 104 | const subsectionCount = json.sections.reduce((acc, section) => acc + section.subsections.length, 0); 105 | 106 | console.log(`Sections: ${sectionCount}, Subsections: ${subsectionCount}`); 107 | 108 | fs.writeFileSync("scripts/naval.json", JSON.stringify(json)); 109 | })(); 110 | -------------------------------------------------------------------------------- /styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ["./app/**/*.{js,ts,jsx,tsx}", "./pages/**/*.{js,ts,jsx,tsx}", "./components/**/*.{js,ts,jsx,tsx}"], 4 | theme: { 5 | extend: {} 6 | }, 7 | plugins: [] 8 | }; 9 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "esnext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "baseUrl": ".", 18 | "paths": { 19 | "@/*": ["./*"] 20 | } 21 | }, 22 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 23 | "exclude": ["node_modules"] 24 | } 25 | -------------------------------------------------------------------------------- /types/index.ts: -------------------------------------------------------------------------------- 1 | export enum OpenAIModel { 2 | DAVINCI_TURBO = "gpt-3.5-turbo" 3 | } 4 | 5 | export type NavalSection = { 6 | title: string; 7 | length: number; 8 | tokens: number; 9 | subsections: NavalSubsection[]; 10 | }; 11 | 12 | export type NavalSubsection = { 13 | title: string; 14 | subtitle: string; 15 | html: string; 16 | content: string; 17 | length: number; 18 | tokens: number; 19 | }; 20 | 21 | export type NavalJSON = { 22 | current_date: string; 23 | author: string; 24 | url: string; 25 | length: number; 26 | tokens: number; 27 | sections: NavalSection[]; 28 | }; 29 | 30 | export type NavalClip = { 31 | file: string; 32 | content: string; 33 | seconds: number; 34 | }; 35 | -------------------------------------------------------------------------------- /utils/index.ts: -------------------------------------------------------------------------------- 1 | import { OpenAIModel } from "@/types"; 2 | import { createClient } from "@supabase/supabase-js"; 3 | import { createParser, ParsedEvent, ReconnectInterval } from "eventsource-parser"; 4 | 5 | export const supabaseAdmin = createClient(process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SERVICE_ROLE_KEY!); 6 | 7 | export const OpenAIStream = async (prompt: string, apiKey: string) => { 8 | const encoder = new TextEncoder(); 9 | const decoder = new TextDecoder(); 10 | 11 | const res = await fetch("https://api.openai.com/v1/chat/completions", { 12 | headers: { 13 | "Content-Type": "application/json", 14 | Authorization: `Bearer ${apiKey}` 15 | }, 16 | method: "POST", 17 | body: JSON.stringify({ 18 | model: OpenAIModel.DAVINCI_TURBO, 19 | messages: [ 20 | { 21 | role: "system", 22 | content: `You are a helpful assistant that accurately answers queries using Naval Ravikant's Twitter thread "How To Get Rich". Keep your answer under 5 sentences. Be accurate, helpful, concise, and clear. Answer like Naval.` 23 | }, 24 | { 25 | role: "user", 26 | content: prompt 27 | } 28 | ], 29 | max_tokens: 150, 30 | temperature: 0.0, 31 | stream: true 32 | }) 33 | }); 34 | 35 | if (res.status !== 200) { 36 | throw new Error("OpenAI API returned an error"); 37 | } 38 | 39 | const stream = new ReadableStream({ 40 | async start(controller) { 41 | const onParse = (event: ParsedEvent | ReconnectInterval) => { 42 | if (event.type === "event") { 43 | const data = event.data; 44 | 45 | if (data === "[DONE]") { 46 | controller.close(); 47 | return; 48 | } 49 | 50 | try { 51 | const json = JSON.parse(data); 52 | const text = json.choices[0].delta.content; 53 | const queue = encoder.encode(text); 54 | controller.enqueue(queue); 55 | } catch (e) { 56 | controller.error(e); 57 | } 58 | } 59 | }; 60 | 61 | const parser = createParser(onParse); 62 | 63 | for await (const chunk of res.body as any) { 64 | parser.feed(decoder.decode(chunk)); 65 | } 66 | } 67 | }); 68 | 69 | return stream; 70 | }; 71 | --------------------------------------------------------------------------------