├── .env.example ├── .gitignore ├── README.md ├── bun.lockb ├── index.ts ├── lib ├── ask.ts ├── convertHTML.ts ├── discord.ts └── download.ts ├── package.json └── tsconfig.json /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | DISCORD_PERMISSIONS= 3 | DISCORD_APP_ID= 4 | DISCORD_PUBLIC_KEY= 5 | DISCORD_BOT_TOKEN= -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | content/ 4 | # Logs 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Caches 14 | 15 | .cache 16 | 17 | # Diagnostic reports (https://nodejs.org/api/report.html) 18 | 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 20 | 21 | # Runtime data 22 | 23 | pids 24 | _.pid 25 | _.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | 30 | lib-cov 31 | 32 | # Coverage directory used by tools like istanbul 33 | 34 | coverage 35 | *.lcov 36 | 37 | # nyc test coverage 38 | 39 | .nyc_output 40 | 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 42 | 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | 47 | bower_components 48 | 49 | # node-waf configuration 50 | 51 | .lock-wscript 52 | 53 | # Compiled binary addons (https://nodejs.org/api/addons.html) 54 | 55 | build/Release 56 | 57 | # Dependency directories 58 | 59 | node_modules/ 60 | jspm_packages/ 61 | 62 | # Snowpack dependency directory (https://snowpack.dev/) 63 | 64 | web_modules/ 65 | 66 | # TypeScript cache 67 | 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | 72 | .npm 73 | 74 | # Optional eslint cache 75 | 76 | .eslintcache 77 | 78 | # Optional stylelint cache 79 | 80 | .stylelintcache 81 | 82 | # Microbundle cache 83 | 84 | .rpt2_cache/ 85 | .rts2_cache_cjs/ 86 | .rts2_cache_es/ 87 | .rts2_cache_umd/ 88 | 89 | # Optional REPL history 90 | 91 | .node_repl_history 92 | 93 | # Output of 'npm pack' 94 | 95 | *.tgz 96 | 97 | # Yarn Integrity file 98 | 99 | .yarn-integrity 100 | 101 | # dotenv environment variable files 102 | 103 | .env 104 | .env.development.local 105 | .env.test.local 106 | .env.production.local 107 | .env.local 108 | 109 | # parcel-bundler cache (https://parceljs.org/) 110 | 111 | .parcel-cache 112 | 113 | # Next.js build output 114 | 115 | .next 116 | out 117 | 118 | # Nuxt.js build / generate output 119 | 120 | .nuxt 121 | dist 122 | 123 | # Gatsby files 124 | 125 | # Comment in the public line in if your project uses Gatsby and not Next.js 126 | 127 | # https://nextjs.org/blog/next-9-1#public-directory-support 128 | 129 | # public 130 | 131 | # vuepress build output 132 | 133 | .vuepress/dist 134 | 135 | # vuepress v2.x temp and cache directory 136 | 137 | .temp 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.* 170 | 171 | # IntelliJ based IDEs 172 | .idea 173 | 174 | # Finder (MacOS) folder config 175 | .DS_Store 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # autodelve 2 | A simple AI-powered Discord to answer questions based on a set of documents. 3 | 4 | **View the demo here: [Twitter/X Demo](https://x.com/0xSamHogan/status/1894937763717550272)** 5 | 6 | TODO: Better documentation 7 | 8 | ## Setup 9 | 10 | ```bash 11 | bun install 12 | ``` 13 | 14 | ### Create a `.env` file 15 | 16 | ```bash 17 | cp .env.example .env 18 | ``` 19 | 20 | Edit the `.env` file with your own values. 21 | 22 | 23 | ### Index a website 24 | 25 | ```bash 26 | bun run index.ts download https://docs.inference.net 27 | ``` 28 | 29 | This command will download the website, convert the HTML to Markdown, and save the content to the `content` directory. 30 | 31 | Once a website has been indexed, you can ask questions to the AI by running: 32 | 33 | ```bash 34 | bun run index.ts ask "How can I get started with inference.net?" 35 | ``` 36 | 37 | The response will be streamed to the console. 38 | 39 | ### Run in Discord 40 | 41 | 1. Create a Discord bot on the [Discord Developer Portal](https://discord.com/developers/applications). Make sure to add your secret values in the `.env` file. 42 | 43 | 2. Install the bot on your server 44 | 45 | 3. Run the bot with: 46 | 47 | ```bash 48 | bun index.ts 49 | ``` 50 | -------------------------------------------------------------------------------- /bun.lockb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/context-labs/autodelve/a0cea9a7a60ed669efcccb5a132a01b3401edcbf/bun.lockb -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | import { download, readMarkdownFiles } from './lib/download'; 2 | import { ask } from './lib/ask'; 3 | import { connect } from './lib/discord'; 4 | 5 | 6 | // Get command line arguments 7 | const args = process.argv.slice(2); 8 | const command = args[0]; 9 | const input = args[1]; 10 | 11 | if (command === 'download' && input) { 12 | download(input); 13 | } else if (command === 'ask' && input) { 14 | const answer = await ask(input); 15 | console.log(answer); 16 | } else { 17 | console.log('Starting Discord bot...'); 18 | connect(); 19 | } 20 | -------------------------------------------------------------------------------- /lib/ask.ts: -------------------------------------------------------------------------------- 1 | import endent from 'endent'; 2 | import OpenAI from "openai"; 3 | import { readMarkdownFiles } from './download'; 4 | import { zodFunction } from 'openai/helpers/zod'; 5 | import { z } from 'zod'; 6 | 7 | const openai = new OpenAI(); 8 | 9 | async function shouldAnswer(question: string, content: string) { 10 | const prompt = getPrompt(question, content); 11 | 12 | const completion = await openai.chat.completions.create({ 13 | model: "gpt-4o-mini", 14 | messages: [ 15 | { 16 | role: "system", 17 | content: "You are a assistant that answers questions based on the provided documents. Be very concise in your response." 18 | }, 19 | { 20 | role: "user", 21 | content: prompt 22 | }, 23 | ], 24 | tool_choice: { 25 | "type": "function", 26 | "function": { 27 | "name": "submitIsAnswerable" 28 | } 29 | }, 30 | tools: [ 31 | zodFunction({ name: "submitIsAnswerable", parameters: SubmitIsAnswerableSchema }), 32 | ], 33 | }); 34 | 35 | console.log(JSON.stringify(completion.choices[0] ?? '', null, 2)); 36 | 37 | const { isAnswerable } = JSON.parse(completion.choices[0]?.message.tool_calls?.[0]?.function.arguments ?? '{}') as SubmitIsAnswerable; 38 | 39 | return isAnswerable; 40 | } 41 | 42 | // Construct a prompt that combines the question with the document content 43 | function getPrompt(question: string, content: string) { 44 | const prompt = endent` 45 | 46 | ${content} 47 | 48 | 49 | Please provide a clear, accurate answer to the user's question based only on the information in the documents above. Follow the below instructions. 50 | 51 | Instructions: 52 | - Provide very concise answers. 53 | - Always respond with phrase and link to the relevant document. 54 | - Do not speculate or make up information. If you do not know the answer, say so politely. 55 | 56 | Example: 57 | 58 | 59 | How can I get a role? 60 | 61 | 62 | 63 | Please check the [roles documentation](https://docs.inference.supply/discord-roles) 64 | 65 | ---------------- 66 | 67 | 68 | ${question} 69 | 70 | `; 71 | 72 | return prompt; 73 | } 74 | 75 | const SubmitIsAnswerableSchema = z.object({ 76 | isAnswerable: z.boolean().describe("Whether the question can be answered based on the documents"), 77 | }); 78 | 79 | type SubmitIsAnswerable = z.infer; 80 | 81 | export async function ask(question: string): Promise { 82 | const files = await readMarkdownFiles(); 83 | const mappedFiles = files.map(file => 84 | endent` 85 | URL: ${file.url} 86 | CONTENT: ${file.content} 87 | ` 88 | ).join('\n\n'); 89 | 90 | 91 | const prompt = getPrompt(question, mappedFiles); 92 | 93 | const shouldRespond = await shouldAnswer(question, mappedFiles); 94 | 95 | if (!shouldRespond) { 96 | console.log('Not answering question:', question); 97 | return null; 98 | } 99 | 100 | const completion = await openai.chat.completions.create({ 101 | model: "gpt-4o-mini", 102 | messages: [ 103 | { 104 | role: "system", 105 | content: "You are a assistant that answers questions based on the provided documents. Be very concise in your response." 106 | }, 107 | { 108 | role: "user", 109 | content: prompt 110 | }, 111 | ], 112 | }); 113 | 114 | 115 | const answer = completion.choices[0]?.message.content || ''; 116 | 117 | return answer; 118 | } 119 | 120 | -------------------------------------------------------------------------------- /lib/convertHTML.ts: -------------------------------------------------------------------------------- 1 | import TurndownService from 'turndown'; 2 | import render from 'dom-serializer'; 3 | import * as cheerio from 'cheerio'; 4 | // import { gfm } from 'turndown-plugin-gfm'; 5 | 6 | const logger = console; 7 | 8 | const turndownService = new TurndownService({ 9 | headingStyle: 'atx', 10 | hr: '---', 11 | bulletListMarker: '*', 12 | codeBlockStyle: 'fenced', 13 | fence: '```', 14 | emDelimiter: '*', // unlike underscore, this works also intra-word 15 | strongDelimiter: '**', // unlike underscores, this works also intra-word 16 | linkStyle: 'inlined', 17 | linkReferenceStyle: 'full', 18 | br: ' ', 19 | }); 20 | 21 | /** 22 | * Remove all style and script tags 23 | */ 24 | turndownService.addRule('remove', { 25 | filter: ['style', 'script', 'aside', 'nav'], 26 | replacement() { 27 | return ''; 28 | }, 29 | }); 30 | 31 | 32 | /** 33 | * Suse has bad HTMl code snippets. We do our 34 | * best to parse them here. 35 | */ 36 | turndownService.addRule('remove', { 37 | filter: (node: any) => { 38 | if (node.nodeName !== 'PRE') return false; 39 | const firstChild = node.firstChild; 40 | if (firstChild.nodeName !== 'CODE') return false; 41 | 42 | const content = firstChild.textContent; 43 | if (content.startsWith('#') || content.startsWith('>')) return true; 44 | return true; 45 | }, 46 | replacement: (content: string) => { 47 | content = content.replace('`#`', '#'); 48 | content = content.replace('`>`', '>'); 49 | content = content.replace('`sudo`', 'sudo'); 50 | return `\n\`\`\`\n${content}\n\`\`\`\n`; 51 | }, 52 | }); 53 | /** 54 | * Add GFM support 55 | */ 56 | // turndownService.use(gfm); 57 | 58 | 59 | export async function convertHTMLToMarkdown(html: string): Promise { 60 | try { 61 | 62 | const select = cheerio.load(html); 63 | const title = select('title').text(); 64 | const root = select('body'); 65 | const md = turndownService.turndown(render(root)); 66 | 67 | return md; 68 | 69 | } catch (error) { 70 | logger.error('Error converting HTML to Markdown:', error); 71 | return ''; 72 | } 73 | } -------------------------------------------------------------------------------- /lib/discord.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Client, 3 | GatewayIntentBits, 4 | Message, 5 | Partials 6 | } from "discord.js"; 7 | import { ask } from "./ask"; 8 | import { appendFileSync, existsSync, mkdirSync } from "fs"; 9 | import path from "path"; 10 | 11 | /** 12 | * Stores a question-answer pair in a JSONL file on disk 13 | * @param question The user's question 14 | * @param answer The bot's answer 15 | */ 16 | function storeMessage(question: string, answer: string): void { 17 | // Create data directory if it doesn't exist 18 | const dataDir = path.join(process.cwd(), "logs"); 19 | if (!existsSync(dataDir)) { 20 | mkdirSync(dataDir, { recursive: true }); 21 | } 22 | 23 | const filePath = path.join(dataDir, "answers.jsonl"); 24 | 25 | // Create a record with timestamp 26 | const record = { 27 | timestamp: new Date().toISOString(), 28 | question, 29 | answer 30 | }; 31 | 32 | // Append the JSON record as a new line to the file 33 | appendFileSync(filePath, JSON.stringify(record) + "\n"); 34 | 35 | console.log(`Stored Q&A pair in ${filePath}`); 36 | } 37 | 38 | /** 39 | * Connects the Discord bot to the Discord API 40 | * @returns The Discord client instance 41 | */ 42 | export async function connect(): Promise { 43 | const client = new Client({ 44 | intents: [ 45 | GatewayIntentBits.Guilds, 46 | GatewayIntentBits.GuildMessages, 47 | GatewayIntentBits.MessageContent, 48 | ], 49 | partials: [Partials.Channel, Partials.Message], 50 | }); 51 | 52 | client.on("ready", () => { 53 | console.log(`Logged in as ${client.user!.tag}!`); 54 | }); 55 | 56 | client.on("debug", console.log); 57 | client.on("warn", console.log); 58 | client.on("error", console.error); 59 | 60 | client.on("messageCreate", async (message: Message) => { 61 | 62 | // Ignore messages from the bot itself 63 | if (message.author.id === client.user!.id) return; 64 | 65 | // console.log(message.content); 66 | console.log( 67 | `Received message: "${message.content}" from ${message.author.tag} in channel ${message.channel.id} (${message.channel.type})`, 68 | ); 69 | const content = message.content; 70 | const answer = await ask(content); 71 | 72 | if (answer) { 73 | storeMessage(content, answer); 74 | message.reply(answer); 75 | } 76 | }); 77 | 78 | await client.login(process.env.DISCORD_BOT_TOKEN); 79 | console.log("Autodelve is now running..."); 80 | return client; 81 | } 82 | 83 | /** 84 | * Lists all channels the bot has access to (can view and send messages) 85 | * @param client The Discord client instance 86 | */ 87 | export function listAccessibleChannels(client: Client): void { 88 | console.log("Channels the bot has access to:"); 89 | 90 | client.guilds.cache.forEach(guild => { 91 | console.log(`\nGuild: ${guild.name} (${guild.id})`); 92 | 93 | // Get the bot's member object in this guild 94 | const botMember = guild.members.cache.get(client.user!.id); 95 | 96 | guild.channels.cache.forEach(channel => { 97 | // Only check text-based channels 98 | if (channel.isTextBased()) { 99 | const canView = channel.permissionsFor(botMember!)?.has('ViewChannel'); 100 | const canSend = channel.permissionsFor(botMember!)?.has('SendMessages'); 101 | 102 | if (canView && canSend) { 103 | console.log(` ✅ ${channel.name} (${channel.id}) - Can view and send`); 104 | } else if (canView) { 105 | console.log(` 👁️ ${channel.name} (${channel.id}) - Can view only`); 106 | } else { 107 | console.log(` ❌ ${channel.name} (${channel.id}) - No access`); 108 | } 109 | } 110 | }); 111 | }); 112 | } 113 | 114 | // If this file is run directly, connect the bot 115 | if (require.main === module) { 116 | connect().catch(console.error); 117 | } 118 | -------------------------------------------------------------------------------- /lib/download.ts: -------------------------------------------------------------------------------- 1 | import * as cheerio from 'cheerio'; 2 | import path from 'path'; 3 | import { URL } from 'url'; 4 | import { convertHTMLToMarkdown } from './convertHTML'; 5 | 6 | function downloadWebsite(url: string, maxDepth: number = 3): Promise> { 7 | const visited = new Map(); 8 | 9 | async function crawl(currentUrl: string, depth: number): Promise { 10 | if (depth > maxDepth || visited.has(currentUrl)) { 11 | return; 12 | } 13 | 14 | try { 15 | const response = await fetch(currentUrl); 16 | const html = await response.text(); 17 | 18 | // Store the HTML content 19 | visited.set(currentUrl, html); 20 | 21 | // If we've reached max depth, don't extract more links 22 | if (depth === maxDepth) { 23 | return; 24 | } 25 | 26 | // Parse HTML and extract links 27 | const $ = cheerio.load(html); 28 | const links = new Set(); 29 | 30 | $('a').each((index: number, element) => { 31 | const href = $(element).attr('href'); 32 | if (href) { 33 | // Resolve relative URLs 34 | const resolvedUrl = new URL(href, currentUrl).toString(); 35 | 36 | // Only follow links from the same base URL 37 | const baseUrl = new URL(url).hostname; 38 | const resolvedUrlObj = new URL(resolvedUrl); 39 | 40 | if (resolvedUrlObj.hostname === baseUrl) { 41 | links.add(resolvedUrl); 42 | } 43 | } 44 | }); 45 | 46 | // Recursively crawl all extracted links 47 | for (const link of links) { 48 | await crawl(link, depth + 1); 49 | } 50 | } catch (error: unknown) { 51 | if (error instanceof Error) { 52 | console.error(`Error crawling ${currentUrl}:`, error.message); 53 | } else { 54 | console.error(`Unknown error crawling ${currentUrl}`); 55 | } 56 | } 57 | } 58 | 59 | // Start crawling from the initial URL 60 | return crawl(url, 1).then(() => visited); 61 | } 62 | 63 | 64 | export async function download(url: string) { 65 | const websiteContent = await downloadWebsite(url); 66 | 67 | // Convert the Map to an array of objects with url and content fields 68 | const contentArray = Array.from(websiteContent.entries()).map(([pageUrl, html]) => { 69 | return { 70 | url: pageUrl, 71 | content: convertHTMLToMarkdown(html) // Convert HTML to Markdown 72 | }; 73 | }); 74 | 75 | // Wait for all conversions to complete 76 | const resolvedContentArray = await Promise.all( 77 | contentArray.map(async (item) => { 78 | return { 79 | url: item.url, 80 | content: await item.content 81 | }; 82 | }) 83 | ); 84 | 85 | // Save all content to a single JSON file 86 | const filePath = path.join('content', 'website_content.json'); 87 | await Bun.write(filePath, JSON.stringify(resolvedContentArray, null, 2)); 88 | 89 | console.log(`Website content saved to ${filePath}`); 90 | } 91 | 92 | export async function readMarkdownFiles(): Promise> { 93 | const filePath = path.join('content', 'website_content.json'); 94 | 95 | try { 96 | const fileContent = await Bun.file(filePath).text(); 97 | const contentArray = JSON.parse(fileContent); 98 | 99 | return contentArray; 100 | } catch (error) { 101 | console.error('Failed to read website content:', error); 102 | return []; 103 | } 104 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "autodelve", 3 | "module": "index.ts", 4 | "type": "module", 5 | "devDependencies": { 6 | "@types/bun": "latest" 7 | }, 8 | "peerDependencies": { 9 | "typescript": "^5.0.0" 10 | }, 11 | "dependencies": { 12 | "@types/turndown": "^5.0.5", 13 | "cheerio": "^1.0.0", 14 | "discord.js": "^14.18.0", 15 | "dom-serializer": "^2.0.0", 16 | "endent": "^2.1.0", 17 | "openai": "^4.85.4", 18 | "turndown": "^7.2.0", 19 | "turndown-plugin-gfm": "^1.0.2", 20 | "url": "^0.11.4", 21 | "zod": "^3.24.2" 22 | } 23 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | // Enable latest features 4 | "lib": ["ESNext", "DOM"], 5 | "target": "ESNext", 6 | "module": "ESNext", 7 | "moduleDetection": "force", 8 | "jsx": "react-jsx", 9 | "allowJs": true, 10 | 11 | // Bundler mode 12 | "moduleResolution": "bundler", 13 | "allowImportingTsExtensions": true, 14 | "verbatimModuleSyntax": true, 15 | "noEmit": true, 16 | 17 | // Best practices 18 | "strict": true, 19 | "skipLibCheck": true, 20 | "noFallthroughCasesInSwitch": true, 21 | 22 | // Some stricter flags (disabled by default) 23 | "noUnusedLocals": false, 24 | "noUnusedParameters": false, 25 | "noPropertyAccessFromIndexSignature": false 26 | } 27 | } 28 | --------------------------------------------------------------------------------