├── nextjs ├── utils │ └── index.ts ├── styles │ └── globals.css ├── public │ ├── 01.jpg │ ├── 02.jpg │ ├── 03.jpg │ ├── 04.jpg │ ├── 05.jpg │ ├── 06.jpg │ ├── 07.jpg │ ├── favicon.jpeg │ └── karpathy.jpg ├── postcss.config.js ├── next-env.d.ts ├── components │ ├── Answer │ │ ├── answer.module.css │ │ └── Answer.tsx │ ├── Navbar.tsx │ └── Footer.tsx ├── tailwind.config.js ├── pages │ ├── _document.tsx │ ├── _app.tsx │ └── index.tsx ├── types │ └── index.ts ├── next.config.js ├── tsconfig.json ├── README.md ├── .gitignore └── package.json ├── api ├── requirements.txt ├── railway.json ├── logging.conf ├── karpathy_app.py └── README.md ├── README.md └── eval └── test-set.csv /nextjs/utils/index.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /nextjs/styles/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | -------------------------------------------------------------------------------- /nextjs/public/01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/01.jpg -------------------------------------------------------------------------------- /nextjs/public/02.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/02.jpg -------------------------------------------------------------------------------- /nextjs/public/03.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/03.jpg -------------------------------------------------------------------------------- /nextjs/public/04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/04.jpg -------------------------------------------------------------------------------- /nextjs/public/05.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/05.jpg -------------------------------------------------------------------------------- /nextjs/public/06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/06.jpg -------------------------------------------------------------------------------- /nextjs/public/07.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/07.jpg -------------------------------------------------------------------------------- /nextjs/public/favicon.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/favicon.jpeg -------------------------------------------------------------------------------- /nextjs/public/karpathy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rlancemartin/karpathy-gpt/HEAD/nextjs/public/karpathy.jpg -------------------------------------------------------------------------------- /nextjs/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /api/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.194 2 | pinecone-client==2.2.1 3 | kor==0.9.2 4 | watchdog 5 | fastapi==0.85.2 6 | uvicorn==0.18.3 7 | sse_starlette==1.3.3 8 | python-multipart==0.0.6 -------------------------------------------------------------------------------- /nextjs/next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/basic-features/typescript for more information. 6 | -------------------------------------------------------------------------------- /nextjs/components/Answer/answer.module.css: -------------------------------------------------------------------------------- 1 | .fadeIn { 2 | animation: fadeIn 0.5s ease-in-out forwards; 3 | opacity: 0; 4 | } 5 | 6 | @keyframes fadeIn { 7 | from { 8 | opacity: 0; 9 | } 10 | to { 11 | opacity: 1; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /nextjs/tailwind.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('tailwindcss').Config} */ 2 | module.exports = { 3 | content: ["./app/**/*.{js,ts,jsx,tsx}", "./pages/**/*.{js,ts,jsx,tsx}", "./components/**/*.{js,ts,jsx,tsx}"], 4 | theme: { 5 | extend: {} 6 | }, 7 | plugins: [] 8 | }; 9 | -------------------------------------------------------------------------------- /nextjs/pages/_document.tsx: -------------------------------------------------------------------------------- 1 | import { Html, Head, Main, NextScript } from 'next/document' 2 | 3 | export default function Document() { 4 | return ( 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /nextjs/types/index.ts: -------------------------------------------------------------------------------- 1 | export enum OpenAIModel { 2 | DAVINCI_TURBO = "gpt-3.5-turbo" 3 | } 4 | 5 | export type LEXChunk = { 6 | pageContent: string; 7 | source: string; 8 | metadata: Metadata; 9 | length: number; 10 | }; 11 | 12 | interface Metadata { 13 | id: string; 14 | title: string; 15 | link: string; 16 | } 17 | -------------------------------------------------------------------------------- /api/railway.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://railway.app/railway.schema.json", 3 | "build": { 4 | "builder": "NIXPACKS" 5 | }, 6 | "deploy": { 7 | "startCommand": "uvicorn karpathy_app:app --host 0.0.0.0 --port $PORT", 8 | "restartPolicyType": "ON_FAILURE", 9 | "restartPolicyMaxRetries": 10 10 | } 11 | } -------------------------------------------------------------------------------- /nextjs/pages/_app.tsx: -------------------------------------------------------------------------------- 1 | import "@/styles/globals.css"; 2 | import { Inter } from "@next/font/google"; 3 | import type { AppProps } from "next/app"; 4 | 5 | const inter = Inter({ subsets: ["latin"] }); 6 | 7 | export default function App({ Component, pageProps }: AppProps<{}>) { 8 | return ( 9 |
10 | 11 |
12 | ); 13 | } 14 | -------------------------------------------------------------------------------- /nextjs/next.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('next').NextConfig} */ 2 | const nextConfig = { 3 | reactStrictMode: true, 4 | images: { 5 | remotePatterns: [ 6 | { 7 | protocol: "https", 8 | hostname: "**" 9 | } 10 | ] 11 | }, 12 | webpack(config) { 13 | config.experiments = { 14 | asyncWebAssembly: true, 15 | layers: true, 16 | }; 17 | 18 | return config; 19 | }, 20 | }; 21 | 22 | module.exports = nextConfig; 23 | -------------------------------------------------------------------------------- /api/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,uicheckapp 3 | 4 | [handlers] 5 | keys=consoleHandler 6 | 7 | [formatters] 8 | keys=normalFormatter 9 | 10 | [logger_root] 11 | level=INFO 12 | handlers=consoleHandler 13 | 14 | [logger_uicheckapp] 15 | level=DEBUG 16 | handlers=consoleHandler 17 | qualname=uicheckapp 18 | propagate=0 19 | 20 | [formatter_normalFormatter] 21 | format=%(asctime)s loglevel=%(levelname)-6s logger=%(name)s %(funcName)s() L%(lineno)-4d %(message)s 22 | 23 | [handler_consoleHandler] 24 | class=StreamHandler 25 | level=DEBUG 26 | formatter=normalFormatter 27 | args=(sys.stdout,) -------------------------------------------------------------------------------- /nextjs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "forceConsistentCasingInFileNames": true, 9 | "noEmit": true, 10 | "esModuleInterop": true, 11 | "module": "nodenext", 12 | "moduleResolution": "node", 13 | "resolveJsonModule": true, 14 | "isolatedModules": true, 15 | "jsx": "preserve", 16 | "incremental": true, 17 | "baseUrl": ".", 18 | "paths": { 19 | "@/*": ["./*"] 20 | } 21 | }, 22 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"], 23 | "exclude": ["node_modules"] 24 | } 25 | -------------------------------------------------------------------------------- /nextjs/components/Answer/Answer.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useState } from "react"; 2 | import styles from "./answer.module.css"; 3 | 4 | interface AnswerProps { 5 | text: string; 6 | } 7 | 8 | export const Answer: React.FC = ({ text }) => { 9 | const [words, setWords] = useState([]); 10 | 11 | useEffect(() => { 12 | setWords(text.split(" ")); 13 | }, [text]); 14 | 15 | return ( 16 |
17 | {words.map((word, index) => ( 18 | 23 | {word}{" "} 24 | 25 | ))} 26 |
27 | ); 28 | }; 29 | -------------------------------------------------------------------------------- /nextjs/README.md: -------------------------------------------------------------------------------- 1 | ## Testing 2 | 3 | To run front-end locally with you locally running back-end, simply change the source in `fetchEventSource` [here](https://github.com/rlancemartin/karpathy-gpt/blob/a338ceb8666c02b0ec7e7f47ca0a196d774d1e4d/nextjs/pages/index.tsx#L37) and [here](https://github.com/rlancemartin/karpathy-gpt/blob/a338ceb8666c02b0ec7e7f47ca0a196d774d1e4d/nextjs/pages/index.tsx#L55) to `http://localhost:8000/karpathy-docs` and `http://localhost:8000/karpathy-stream` 4 | * To run the front-end locally, run: 5 | ``` 6 | npm run dev 7 | ``` 8 | 9 | ## Credits 10 | 11 | Thanks to [Mckay Wrigley](https://twitter.com/mckaywrigley) for open-sourcing his UI. 12 | 13 | Thanks to Karapthy for the excellent course.ß 14 | 15 | ## Contact 16 | 17 | If you have any questions, feel free to reach out to me on [Twitter](https://twitter.com/RLanceMartin)! 18 | -------------------------------------------------------------------------------- /nextjs/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | /coverage-ts 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | /temp 20 | 21 | # misc 22 | .DS_Store 23 | *.pem 24 | 25 | # debug 26 | npm-debug.log* 27 | yarn-debug.log* 28 | yarn-error.log* 29 | 30 | # local env files 31 | .env.local 32 | .env.development.local 33 | .env.test.local 34 | .env.production.local 35 | 36 | # vercel 37 | .vercel 38 | 39 | 40 | /public/graphql/ 41 | .vscode/* 42 | 43 | /src/styles/styles.css 44 | 45 | *.generated.ts 46 | *.generated.tsx 47 | *.generated.json 48 | 49 | graphql.schema.json 50 | schema.graphql 51 | 52 | # Sentry 53 | .sentryclirc 54 | /test-results/ 55 | /playwright-report/ 56 | /playwright/.cache/ 57 | 58 | tsconfig.tsbuildinfo 59 | .next 60 | -------------------------------------------------------------------------------- /nextjs/components/Navbar.tsx: -------------------------------------------------------------------------------- 1 | import { IconExternalLink } from "@tabler/icons-react"; 2 | import Image from "next/image"; 3 | import { FC } from "react"; 4 | import king from "../public/karpathy.jpg"; 5 | 6 | export const Navbar: FC = () => { 7 | return ( 8 | 39 | ); 40 | }; 41 | -------------------------------------------------------------------------------- /nextjs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "app", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint", 10 | "scrape": "tsx scripts/scrape.ts", 11 | "embed": "tsx scripts/embed.ts", 12 | "images": "tsx scripts/images.ts" 13 | }, 14 | "dependencies": { 15 | "@dqbd/tiktoken": "^1.0.2", 16 | "@huggingface/inference": "^1.5.2", 17 | "@microsoft/fetch-event-source": "^2.0.1", 18 | "@next/font": "^13.2.3", 19 | "@pinecone-database/pinecone": "^0.0.10", 20 | "@tabler/icons-react": "^2.7.0", 21 | "@types/node": "18.14.2", 22 | "@types/react": "18.0.28", 23 | "@types/react-dom": "18.0.11", 24 | "chromadb": "^1.3.1", 25 | "cohere-ai": "^5.1.0", 26 | "endent": "^2.1.0", 27 | "eslint": "8.35.0", 28 | "eslint-config-next": "13.2.1", 29 | "eventsource-parser": "^0.1.0", 30 | "hnswlib-node": "^1.4.1", 31 | "langchain": "^0.0.52-0", 32 | "memory-cache": "^0.2.0", 33 | "next": "13.2.1", 34 | "react": "18.2.0", 35 | "react-dom": "18.2.0", 36 | "typescript": "4.9.5" 37 | }, 38 | "devDependencies": { 39 | "@next/env": "^13.2.3", 40 | "@supabase/supabase-js": "^2.10.0", 41 | "autoprefixer": "^10.4.13", 42 | "axios": "^1.3.4", 43 | "cheerio": "^1.0.0-rc.12", 44 | "gpt-3-encoder": "^1.1.4", 45 | "openai": "^3.2.1", 46 | "postcss": "^8.4.21", 47 | "tailwindcss": "^3.2.7", 48 | "tsx": "^3.12.3" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /nextjs/components/Footer.tsx: -------------------------------------------------------------------------------- 1 | import { IconBrandGithub, IconBrandTwitter } from "@tabler/icons-react"; 2 | import { FC } from "react"; 3 | 4 | export const Footer: FC = () => { 5 | return ( 6 |
7 |
8 | 9 |
10 | Created by 11 | 17 | Lance Martin 18 | 19 | based on 20 | 26 | Andrej Karpathy 27 | 28 | {`'s blog`} 29 | 35 | YouTube lectures 36 | 37 | . 38 |
39 | 40 |
41 | 47 | 48 | 49 | 50 | 56 | 57 | 58 |
59 |
60 | ); 61 | }; 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Karpathy-GPT 2 | 3 | ## Context 4 | 5 | This app is a template for using LangChain to build a LLM Q+A assistant from any set of YouTube videos. 6 | 7 | We use Karpathy's [course on LLMs](https://www.youtube.com/@AndrejKarpathy/videos) as an example. 8 | 9 | ![image](https://github.com/rlancemartin/karpathy-gpt/assets/122662504/775af292-e528-4760-9793-c8547dff3bcb) 10 | 11 | We use LangChain to: 12 | 13 | (1) convert YouTube urls to text 14 | 15 | (2) feed the text into LangChain [auto-evaluator](https://autoevaluator.langchain.com/) to test different chain parameters 16 | 17 | (3) with our chosen parameters, build a vectorstore retriever back-end with FastAPI (deployed to Railway) 18 | 19 | (4) stream the generated results (answer and retrieved docs) to a front-end (deployed to Vercel) 20 | 21 | --- 22 | 23 | ## Step 1: URLs to text 24 | 25 | See [the notebook](https://github.com/rlancemartin/karpathy-gpt/blob/main/index/youtube_urls_to_vectordb.ipynb) in `/index` folder: 26 | 27 | * Uses LangChain's `OpenAIWhisperParser` to convert urls to text in < 10 lines of code 28 | 29 | ## Step 2: Testing 30 | 31 | See [the text files](https://github.com/rlancemartin/karpathy-gpt/tree/main/eval) in `/eval` folder: 32 | 33 | * Feed the text from step 1 and, optionally, an eval set to the [auto-evaluator app](https://autoevaluator.langchain.com/playground) 34 | * We can use this to test different parameters (see full README in the repo [here](https://github.com/langchain-ai/auto-evaluator)) 35 | * Use the UI to run experiments 36 | * Select your best retriever, chain settings (e.g., k, split size, split overlap, etc), LLM, embeddings 37 | 38 | ![image](https://github.com/rlancemartin/karpathy-gpt/assets/122662504/fa3dabd5-6bf5-4607-b1a5-dd50ed3acbb7) 39 | 40 | ## Step 3: text to VectorDB 41 | 42 | See [the notebook](https://github.com/rlancemartin/karpathy-gpt/blob/main/index/youtube_urls_to_vectordb.ipynb) in `/index` folder: 43 | 44 | * Split the text from step 1 using parameters you found in step 2 45 | * Upsert the vectors to a VectorDB (e.g., in this example, `Pinecone`) with metadata 46 | * See this [PR / notebook](https://github.com/rlancemartin/langchain/blob/e1fa1a41d0b2d7f476627a6798e98f02ebe4a83d/docs/modules/indexes/document_loaders/examples/youtube_audio.ipynb) if you want to use locally with a different VectorDB 47 | 48 | ## Step 4: Back-end 49 | 50 | See the `karpathy_app.py` file in `/api` folder: 51 | 52 | * We use LangChain's `load_qa_chain` with a user specified LLM and prompt (see `default_prompt_template`) 53 | * Given a question, this will stream answer the text back to front-end and pass the retrieved documents back 54 | * We deploy this FastAPI API to Railway 55 | * See README.md in `/api` for local testing instructions 56 | 57 | ## Step 5: Front-end 58 | 59 | See `/nextjs` directory for nextJS app: 60 | 61 | * This will call the back-end with the query and fetch the documents / answer 62 | * Test the app locally by launching the back-end: 63 | ``` 64 | uvicorn karpathy_app:app 65 | ``` 66 | * To run front-end locally with you locally running back-end, simply change the source in `fetchEventSource` [here](https://github.com/rlancemartin/karpathy-gpt/blob/a338ceb8666c02b0ec7e7f47ca0a196d774d1e4d/nextjs/pages/index.tsx#L37) and [here](https://github.com/rlancemartin/karpathy-gpt/blob/a338ceb8666c02b0ec7e7f47ca0a196d774d1e4d/nextjs/pages/index.tsx#L55) to `http://localhost:8000/karpathy-docs` and `http://localhost:8000/karpathy-stream` 67 | * To run the front-end locally, run: 68 | ``` 69 | npm run dev 70 | ``` 71 | -------------------------------------------------------------------------------- /eval/test-set.csv: -------------------------------------------------------------------------------- 1 | "question","answer", 2 | "Why do we need to zero out the gradient before backprop at each step?","When we call backward, we fill in the gradients. This will update self.grad, so the gradients will accumulate unless we explicity flush it by setting to zero.", 3 | "What does the gradient tell us and how can we use it to minimize the loss?","The gradient tells us the direction to nudge each parameter in order to increace loss. Each update (to minimize loss) take the negative gradient multplied by a step size.", 4 | "What is the mean squared error loss?","The mean squared error loss is the average of the squared differences between actual values and predicted values", 5 | "What is Makemore?","Makemore is a simple charecter-level langugae model that will predict next char in a sequence given some prior charecters before it.", 6 | "What is log likelihood loss and why do we use the negative log likelihood?","We take the sum of the log probability of the label (correct charecter) for each charecter in the string. If the label has a high probability, such as 1, will have a low loss because the log(1) is 0. If the label has a low probability, such as 0, will have a very low loss because the log(0) is -inf. But, if the label has a low probability, we want it to have a high loss, so we take the negative log.", 7 | "How does cross entropy relate to negative log likelihood?","We use a softmax layer to compute probabilities from raw logits and then compute negative log likelihood loss from raw probabilities. Cross entropy just rolls these steps into 1.", 8 | "What is the problem with extreme values for logits?","The exponentiation of very large positive logits can exceed the dynamic range of floating-point numbers, causing overflow issues.", 9 | "Why do we use batch normalization?","We want roughly gaussian activaitions to avoid vanishing gradients and use a normalization layer to automate this.", 10 | "What context window does a transformer have when predicting the output?","Transformer will never see more than `block_size` when predicting the output.", 11 | "What is the problem with the Bigram model's context window?","The Bigram model is only looking at the last char to predict the next char.", 12 | "How can self-attention improve on the limited context window of the Bigram model?","Self-attention lets all prior tokens to 'talk' to each other when predicting the next token.", 13 | "How are keys and queries generated, and how do they interact?","We embed each token to a Key and Query. Each query does a dot-product with the key at each prior location. If a Key and Query are aligned, they will produce a high value.", 14 | "For any token, what are x, k, v, and q?","x is private information to the token. q is what the token is interested in. k is what the token has. v is what the token will communicate to you if you find it interesting.", 15 | "What is the difference between an encoder and decoder?","The decoder is typically just self-attention (communication) and feed-forward (compute). We condition on the past. It uses triangular mask on future tokens. It has an auto-regressive property where we can sample from it. The encoder can condition on the past or on a seperate source via cross-attention.", 16 | "What are two innovations that improve optimization for deep neural nets?","First, residual connections create a gradient superhighway that goes directly from the supervision all the way to the input, unimpeded. At initialization, residual blocks effectivly allow the gradient to flow unimpeded and, over time, they come online and start to contribute. Second, layer norm will normalize the rows (or examples) in each batch independently." 17 | -------------------------------------------------------------------------------- /api/karpathy_app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pinecone 3 | import logging 4 | import asyncio 5 | from fastapi import FastAPI, Form 6 | from langchain.prompts import PromptTemplate 7 | from langchain.chat_models import ChatOpenAI 8 | from langchain.vectorstores import Pinecone 9 | from sse_starlette.sse import EventSourceResponse 10 | from fastapi.middleware.cors import CORSMiddleware 11 | from langchain.embeddings.openai import OpenAIEmbeddings 12 | from langchain.chains.question_answering import load_qa_chain 13 | from langchain.callbacks import AsyncIteratorCallbackHandler 14 | 15 | # Prompt template for QA 16 | default_prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. 17 | 18 | {context} 19 | 20 | Question: {question} 21 | Helpful Answer:""" 22 | 23 | def make_llm(model_version): 24 | """ 25 | Make LLM 26 | @param model_version: model_version 27 | @return: llm, callback handler 28 | """ 29 | 30 | if (model_version == "gpt-3.5-turbo") or (model_version == "gpt-4"): 31 | callback = AsyncIteratorCallbackHandler() 32 | chosen_model = ChatOpenAI(model_name=model_version,streaming=True,callbacks=[callback],temperature=0) 33 | return chosen_model, callback 34 | 35 | def make_chain(llm): 36 | """ 37 | Make QA chain using specified default_prompt_template 38 | @param llm: llm for answering 39 | @return: qa_chain 40 | """ 41 | QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=default_prompt_template) 42 | qa_chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_CHAIN_PROMPT) 43 | return qa_chain 44 | 45 | def make_retriever(logger): 46 | """ 47 | Make document retriever 48 | @return: Pinecone 49 | """ 50 | logger.info("`Retriving docs ...`") 51 | 52 | # Set embeddings (must match your Pinecone DB) 53 | embedding = OpenAIEmbeddings() 54 | pc_api_key = os.environ.get('PINECONE_API_KEY') 55 | pc_region = "us-east1-gcp" 56 | pc_index = "karpathy-gpt" 57 | 58 | # Set Pinecone 59 | pinecone.init(api_key=str(pc_api_key), environment=str(pc_region)) 60 | p = Pinecone.from_existing_index(index_name=str(pc_index), embedding=embedding) 61 | return p 62 | 63 | import json 64 | async def generate_docs(question): 65 | """ 66 | @param question: question 67 | @return: docs 68 | """ 69 | 70 | # Set up logging 71 | logging.config.fileConfig('logging.conf', disable_existing_loggers=False) 72 | logger = logging.getLogger(__name__) 73 | 74 | # Model for answering 75 | model = "gpt-3.5-turbo" 76 | llm, callback=make_llm(model) 77 | 78 | # Chain 79 | chain=make_chain(llm) 80 | 81 | # Retriever 82 | retriever=make_retriever(logger) 83 | 84 | # Stream 85 | logger.info("`Getting docs ...`") 86 | docs = retriever.similarity_search(query=question,k=3) 87 | for doc in docs: 88 | yield json.dumps({"data":{"pageContent": doc.page_content, "metadata": doc.metadata}}) 89 | 90 | async def generate_response(question): 91 | """ 92 | @param question: question 93 | @return: answer stream 94 | """ 95 | 96 | # Set up logging 97 | logging.config.fileConfig('logging.conf', disable_existing_loggers=False) 98 | logger = logging.getLogger(__name__) 99 | 100 | # Model for answering 101 | model = "gpt-3.5-turbo" 102 | llm, callback=make_llm(model) 103 | 104 | # Chain 105 | chain=make_chain(llm) 106 | 107 | # Retriever 108 | retriever=make_retriever(logger) 109 | 110 | # Stream 111 | logger.info("`Generating answer ...`") 112 | docs = retriever.similarity_search(query=question,k=3) 113 | task = asyncio.create_task( 114 | chain.acall({ 115 | "input_documents": docs, 116 | "question": question 117 | }), 118 | ) 119 | async for token in callback.aiter(): 120 | yield token 121 | await task 122 | 123 | # App 124 | app = FastAPI() 125 | 126 | origins = [ 127 | "http://localhost:3000", 128 | "localhost:3000", 129 | ] 130 | 131 | app.add_middleware( 132 | CORSMiddleware, 133 | allow_origins=["*"], 134 | allow_credentials=True, 135 | allow_methods=["*"], 136 | allow_headers=["*"], 137 | ) 138 | 139 | @app.get("/") 140 | async def root(): 141 | return {"message": "Welcome to Karpathy GPT!"} 142 | 143 | # Docs 144 | @app.post("/karpathy-docs") 145 | async def create_docs_response( 146 | query: str = Form("What is the difference between an encoder and decoder?"), 147 | ): 148 | return EventSourceResponse(generate_docs(query), headers={"Content-Type": "text/event-stream", "Connection": "keep-alive", "Cache-Control": "no-cache"}) 149 | 150 | # Answer stream 151 | @app.post("/karpathy-stream") 152 | async def create_response( 153 | query: str = Form("What is the difference between an encoder and decoder?"), 154 | ): 155 | # Return SSE 156 | return EventSourceResponse(generate_response(query), headers={"Content-Type": "text/event-stream", "Connection": "keep-alive", "Cache-Control": "no-cache"}) 157 | -------------------------------------------------------------------------------- /api/README.md: -------------------------------------------------------------------------------- 1 | # `karpathy-gpt-api` 2 | 3 | This it is the back-end for Karpathy-GPT. 4 | 5 | ### `Test locally` - 6 | 7 | Set API keys: 8 | ``` 9 | export OPENAI_API_KEY=xxx 10 | ``` 11 | 12 | Start local server: 13 | ``` 14 | uvicorn karpathy_app:app 15 | ``` 16 | 17 | Inputs: 18 | ``` 19 | question 20 | ``` 21 | 22 | Test doc retrieval: 23 | ``` 24 | curl -X POST -F "question=What is makemore" http://localhost:8000/karpathy-docs 25 | ``` 26 | 27 | ``` 28 | data: page_content="Hi everyone, hope you're well. And next up what I'd like to do is I'd like to build out Makemore. Like Micrograd before it, Makemore is a repository that I have on my GitHub web page. You can look at it. But just like with Micrograd, I'm going to build it out step by step and I'm going to spell everything out. So we're going to build it out slowly and together. Now, what is Makemore? Makemore, as the name suggests, makes more of things that you give it. So here's an example. Names.txt is an example dataset to Makemore. And when you look at Names.txt, you'll find that it's a very large dataset of names. So here's lots of different types of names. In fact, I believe there are 32,000 names that I've sort of found randomly on a government website. And if you train Makemore on this dataset, it will learn to make more of things like this. And in particular, in this case, that will mean more things that sound name-like, but are actually unique names. And maybe if you have a baby and you're trying to assign a name, maybe you're looking for a cool new sounding unique name, Makemore might help you. So here are some example generations from the neural network once we train it on our dataset. So here's some example unique names that it will generate. Don't tell, I rot, Zendi, and so on. And so all these sort of sound name-like, but they're not, of course, names. So under the hood, Makemore is a character-level language model. So what that means is that it is treating every single line" metadata={'id': '02', 'link': 'https://youtu.be/PaCmpygFfXo', 'source': 'The spelled-out intro to language modeling: building makemore 02', 'title': 'The spelled-out intro to language modeling: building makemore'} 29 | 30 | data: page_content="not, of course, names. So under the hood, Makemore is a character-level language model. So what that means is that it is treating every single line here as an example. And within each example, it's treating them all as sequences of individual characters. So R-E-E-S-E is this example, and that's the sequence of characters. And that's the level on which we are building out Makemore. And what it means to be a character-level language model, then, is that it's just sort of modeling those sequences of characters, and it knows how to predict the next character in the sequence. Now, we're actually going to implement a large number of character-level language models in terms of the neural networks that are involved in predicting the next character in a sequence. So very simple bigram and bag-of-word models, multilayered perceptrons, recurrent neural networks, all the way to modern transformers. In fact, the transformer that we will build will be basically the equivalent transformer to GPT-2, if you have heard of GPT. So that's kind of a big deal. It's a modern network, and by the end of the series, you will actually understand how that works on the level of characters. Now, to give you a sense of the extensions here, after characters, we will probably spend some time on the word level, so that we can generate documents of words, not just little segments of characters, but we can generate entire large, much larger documents. And then we're probably going to go into images and" metadata={'id': '02', 'link': 'https://youtu.be/PaCmpygFfXo', 'source': 'The spelled-out intro to language modeling: building makemore 02', 'title': 'The spelled-out intro to language modeling: building makemore'} 31 | 32 | data: page_content="Hi everyone. Today we are continuing our implementation of MakeMore, our favorite character-level language model. Now, you'll notice that the background behind me is different. That's because I am in Kyoto and it is awesome. So I'm in a hotel room here. Now, over the last few lectures, we've built up to this architecture that is a multi-layer perceptron character-level language model. So we see that it receives three previous characters and tries to predict the fourth character in a sequence using a very simple multi-layer perceptron using one hidden layer of neurons with tenational neurons. So what I'd like to do now in this lecture is I'd like to complexify this architecture. In particular, we would like to take more characters in a sequence as an input, not just three. And in addition to that, we don't just want to feed them all into a single hidden layer because that squashes too much information too quickly. Instead, we would like to make a deeper model that progressively fuses this information to make its guess about the next character in a sequence. And so we'll see that as we make this architecture more complex, we're actually going to arrive at something that looks very much like a WaveNet. So WaveNet is this paper published by Dequined in 2016. And it is also a language model, basically, but it tries to predict audio sequences instead of character-level sequences or word-level sequences. But fundamentally, the modeling setup is identical. It is an autoregressive" metadata={'id': '06', 'link': 'htt 33 | ``` 34 | 35 | Test answer stream: 36 | 37 | ``` 38 | curl -X POST -F "question=What is makemore" http://localhost:8000/karpathy-stream 39 | ``` 40 | 41 | ``` 42 | data: M 43 | 44 | data: ak 45 | 46 | data: em 47 | 48 | data: ore 49 | 50 | data: is 51 | 52 | data: a 53 | 54 | data: character 55 | 56 | data: -level 57 | 58 | data: language 59 | 60 | data: model 61 | ``` 62 | 63 | ### `Test deployed API -` 64 | 65 | We deploy as an API to [Railway](https://railway.app/). 66 | 67 | Test: 68 | ``` 69 | curl -X POST -F "question=What is makemore" https://karpathy-gpt-production.up.railway.app/karpathy-stream 70 | ``` 71 | 72 | Returns streaming events, as shown above. -------------------------------------------------------------------------------- /nextjs/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import { Answer } from "@/components/Answer/Answer"; 2 | import { Footer } from "@/components/Footer"; 3 | import { Navbar } from "@/components/Navbar"; 4 | import { LEXChunk } from "@/types"; 5 | import { IconArrowRight, IconExternalLink, IconSearch } from "@tabler/icons-react"; 6 | import Head from "next/head"; 7 | import Image from "next/image"; 8 | import { KeyboardEvent, useEffect, useRef, useState } from "react"; 9 | import { fetchEventSource } from '@microsoft/fetch-event-source'; 10 | 11 | export default function Home() { 12 | 13 | const inputRef = useRef(null); 14 | const [query, setQuery] = useState(""); 15 | const [chunks, setChunks] = useState([]); 16 | const [answer, setAnswer] = useState(""); 17 | const [loading, setLoading] = useState(false); 18 | const [showSettings, setShowSettings] = useState(false); 19 | 20 | // Handle answer 21 | const handleAnswer = async () => { 22 | 23 | if (!query) { 24 | alert("Please enter a query."); 25 | return; 26 | } 27 | 28 | setAnswer(""); 29 | setChunks([]); 30 | setLoading(true); 31 | 32 | const formData = new FormData(); 33 | formData.append("query",query); 34 | console.log(formData) 35 | console.log(query) 36 | 37 | fetchEventSource("https://karpathy-gpt-production.up.railway.app/karpathy-docs", { 38 | method: "POST", 39 | headers: { 40 | Accept: "text/event-stream", 41 | Connection: "keep-alive", 42 | }, 43 | body: formData, 44 | onmessage: (event) => { 45 | setLoading(false); 46 | if (event.data === "DONE") { 47 | } else { 48 | const newChunk: LEXChunk = JSON.parse(event.data)?.data; 49 | setChunks((oldChunks) => [...oldChunks, newChunk]); 50 | } 51 | }}); 52 | 53 | const ctrl = new AbortController(); 54 | 55 | fetchEventSource("https://karpathy-gpt-production.up.railway.app/karpathy-stream", { 56 | method: "POST", 57 | headers: { 58 | Accept: "text/event-stream", 59 | Connection: "keep-alive", 60 | }, 61 | body: formData, 62 | onmessage: (event) => { 63 | setLoading(false); 64 | if (event.data === "DONE") { 65 | } else { 66 | setAnswer((prev) => prev + event.data); 67 | } 68 | }}); 69 | 70 | }; 71 | 72 | const handleKeyDown = (e: KeyboardEvent) => { 73 | if (e.key === "Enter") { 74 | handleAnswer(); 75 | } 76 | }; 77 | 78 | // Render page 79 | return ( 80 | <> 81 | 82 | Karpathy GPT 83 | 87 | 91 | 95 | 96 | 97 |
98 | 99 |
100 |
101 | { ( 102 |
103 | 104 | setQuery(e.target.value)} 111 | onKeyDown={handleKeyDown} 112 | /> 113 |
114 | ) } 115 | {loading ? ( 116 |
117 | 118 |
Passages
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 | ) : answer ? ( 128 |
129 |
Answer
130 | 131 | 132 |
133 |
Passages
134 | 135 | {chunks.map((chunk, index) => ( 136 |
137 |
138 |
139 |
140 | {chunk.metadata.title} 147 |
148 |
{chunk.metadata.title}
149 |
150 |
151 | 157 | 158 | 159 |
160 |
{chunk.pageContent}
161 |
162 |
163 | ))} 164 |
165 |
166 | ) : chunks.length > 0 ? ( 167 |
168 |
Passages
169 | {chunks.map((chunk, index) => ( 170 |
171 |
172 |
173 |
174 | {chunk.metadata.title} 181 |
182 |
{chunk.metadata.title}
183 |
184 |
185 | 191 | 192 | 193 |
194 |
{chunk.pageContent}
195 |
196 |
197 | ))} 198 |
199 | ) : ( 200 |
{`AI-powered search and chat for the Andrej Karpathy YouTube course.`}
201 | )} 202 |
203 |
204 |
206 | 207 | ); 208 | } 209 | --------------------------------------------------------------------------------