├── .env.example
├── .gitignore
├── README.md
├── bun.lockb
├── index.ts
├── lib
    ├── ask.ts
    ├── convertHTML.ts
    ├── discord.ts
    └── download.ts
├── package.json
└── tsconfig.json


/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | DISCORD_PERMISSIONS=
3 | DISCORD_APP_ID=
4 | DISCORD_PUBLIC_KEY=
5 | DISCORD_BOT_TOKEN=


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
  2 | 
  3 | content/
  4 | # Logs
  5 | logs
  6 | _.log
  7 | npm-debug.log_
  8 | yarn-debug.log*
  9 | yarn-error.log*
 10 | lerna-debug.log*
 11 | .pnpm-debug.log*
 12 | 
 13 | # Caches
 14 | 
 15 | .cache
 16 | 
 17 | # Diagnostic reports (https://nodejs.org/api/report.html)
 18 | 
 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 20 | 
 21 | # Runtime data
 22 | 
 23 | pids
 24 | _.pid
 25 | _.seed
 26 | *.pid.lock
 27 | 
 28 | # Directory for instrumented libs generated by jscoverage/JSCover
 29 | 
 30 | lib-cov
 31 | 
 32 | # Coverage directory used by tools like istanbul
 33 | 
 34 | coverage
 35 | *.lcov
 36 | 
 37 | # nyc test coverage
 38 | 
 39 | .nyc_output
 40 | 
 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 42 | 
 43 | .grunt
 44 | 
 45 | # Bower dependency directory (https://bower.io/)
 46 | 
 47 | bower_components
 48 | 
 49 | # node-waf configuration
 50 | 
 51 | .lock-wscript
 52 | 
 53 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 54 | 
 55 | build/Release
 56 | 
 57 | # Dependency directories
 58 | 
 59 | node_modules/
 60 | jspm_packages/
 61 | 
 62 | # Snowpack dependency directory (https://snowpack.dev/)
 63 | 
 64 | web_modules/
 65 | 
 66 | # TypeScript cache
 67 | 
 68 | *.tsbuildinfo
 69 | 
 70 | # Optional npm cache directory
 71 | 
 72 | .npm
 73 | 
 74 | # Optional eslint cache
 75 | 
 76 | .eslintcache
 77 | 
 78 | # Optional stylelint cache
 79 | 
 80 | .stylelintcache
 81 | 
 82 | # Microbundle cache
 83 | 
 84 | .rpt2_cache/
 85 | .rts2_cache_cjs/
 86 | .rts2_cache_es/
 87 | .rts2_cache_umd/
 88 | 
 89 | # Optional REPL history
 90 | 
 91 | .node_repl_history
 92 | 
 93 | # Output of 'npm pack'
 94 | 
 95 | *.tgz
 96 | 
 97 | # Yarn Integrity file
 98 | 
 99 | .yarn-integrity
100 | 
101 | # dotenv environment variable files
102 | 
103 | .env
104 | .env.development.local
105 | .env.test.local
106 | .env.production.local
107 | .env.local
108 | 
109 | # parcel-bundler cache (https://parceljs.org/)
110 | 
111 | .parcel-cache
112 | 
113 | # Next.js build output
114 | 
115 | .next
116 | out
117 | 
118 | # Nuxt.js build / generate output
119 | 
120 | .nuxt
121 | dist
122 | 
123 | # Gatsby files
124 | 
125 | # Comment in the public line in if your project uses Gatsby and not Next.js
126 | 
127 | # https://nextjs.org/blog/next-9-1#public-directory-support
128 | 
129 | # public
130 | 
131 | # vuepress build output
132 | 
133 | .vuepress/dist
134 | 
135 | # vuepress v2.x temp and cache directory
136 | 
137 | .temp
138 | 
139 | # Docusaurus cache and generated files
140 | 
141 | .docusaurus
142 | 
143 | # Serverless directories
144 | 
145 | .serverless/
146 | 
147 | # FuseBox cache
148 | 
149 | .fusebox/
150 | 
151 | # DynamoDB Local files
152 | 
153 | .dynamodb/
154 | 
155 | # TernJS port file
156 | 
157 | .tern-port
158 | 
159 | # Stores VSCode versions used for testing VSCode extensions
160 | 
161 | .vscode-test
162 | 
163 | # yarn v2
164 | 
165 | .yarn/cache
166 | .yarn/unplugged
167 | .yarn/build-state.yml
168 | .yarn/install-state.gz
169 | .pnp.*
170 | 
171 | # IntelliJ based IDEs
172 | .idea
173 | 
174 | # Finder (MacOS) folder config
175 | .DS_Store
176 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # autodelve
 2 | A simple AI-powered Discord to answer questions based on a set of documents.
 3 | 
 4 | **View the demo here: [Twitter/X Demo](https://x.com/0xSamHogan/status/1894937763717550272)**
 5 | 
 6 | TODO: Better documentation
 7 | 
 8 | ## Setup
 9 | 
10 | ```bash
11 | bun install
12 | ```
13 | 
14 | ### Create a `.env` file
15 | 
16 | ```bash
17 | cp .env.example .env
18 | ```
19 | 
20 | Edit the `.env` file with your own values.
21 | 
22 | 
23 | ### Index a website
24 | 
25 | ```bash
26 | bun run index.ts download https://docs.inference.net
27 | ```
28 | 
29 | This command will download the website, convert the HTML to Markdown, and save the content to the `content` directory.
30 | 
31 | Once a website has been indexed, you can ask questions to the AI by running:
32 | 
33 | ```bash
34 | bun run index.ts ask "How can I get started with inference.net?"
35 | ```
36 | 
37 | The response will be streamed to the console.
38 | 
39 | ### Run in Discord
40 | 
41 | 1. Create a Discord bot on the [Discord Developer Portal](https://discord.com/developers/applications). Make sure to add your secret values in the `.env` file.
42 | 
43 | 2. Install the bot on your server
44 | 
45 | 3. Run the bot with:
46 | 
47 | ```bash
48 | bun index.ts
49 | ```
50 | 


--------------------------------------------------------------------------------
/bun.lockb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/context-labs/autodelve/a0cea9a7a60ed669efcccb5a132a01b3401edcbf/bun.lockb


--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
 1 | import { download, readMarkdownFiles } from './lib/download';
 2 | import { ask } from './lib/ask';
 3 | import { connect } from './lib/discord';
 4 | 
 5 | 
 6 | // Get command line arguments
 7 | const args = process.argv.slice(2);
 8 | const command = args[0];
 9 | const input = args[1];
10 | 
11 | if (command === 'download' && input) {
12 |   download(input);
13 | } else if (command === 'ask' && input) {
14 |   const answer = await ask(input);
15 |   console.log(answer);
16 | } else {
17 |   console.log('Starting Discord bot...');
18 |   connect();
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/ask.ts:
--------------------------------------------------------------------------------
  1 | import endent from 'endent';
  2 | import OpenAI from "openai";
  3 | import { readMarkdownFiles } from './download';
  4 | import { zodFunction } from 'openai/helpers/zod';
  5 | import { z } from 'zod';
  6 | 
  7 | const openai = new OpenAI();
  8 | 
  9 | async function shouldAnswer(question: string, content: string) {
 10 |   const prompt = getPrompt(question, content);
 11 | 
 12 |   const completion = await openai.chat.completions.create({
 13 |     model: "gpt-4o-mini",
 14 |     messages: [
 15 |       {
 16 |         role: "system",
 17 |         content: "You are a assistant that answers questions based on the provided documents. Be very concise in your response."
 18 |       },
 19 |       {
 20 |         role: "user",
 21 |         content: prompt
 22 |       },
 23 |     ],
 24 |     tool_choice: {
 25 |       "type": "function",
 26 |       "function": {
 27 |         "name": "submitIsAnswerable"
 28 |       }
 29 |     },
 30 |     tools: [
 31 |       zodFunction({ name: "submitIsAnswerable", parameters: SubmitIsAnswerableSchema }),
 32 |     ],
 33 |   });
 34 | 
 35 |   console.log(JSON.stringify(completion.choices[0] ?? '', null, 2));
 36 | 
 37 |   const { isAnswerable } = JSON.parse(completion.choices[0]?.message.tool_calls?.[0]?.function.arguments ?? '{}') as SubmitIsAnswerable;
 38 | 
 39 |   return isAnswerable;
 40 | }
 41 | 
 42 | // Construct a prompt that combines the question with the document content
 43 | function getPrompt(question: string, content: string) {
 44 |   const prompt = endent`
 45 |     <documents>
 46 |     ${content}
 47 |     </documents>
 48 | 
 49 |     Please provide a clear, accurate answer to the user's question based only on the information in the documents above. Follow the below instructions.
 50 |     
 51 |     Instructions:
 52 |     - Provide very concise answers. 
 53 |     - Always respond with phrase and link to the relevant document.
 54 |     - Do not speculate or make up information. If you do not know the answer, say so politely.
 55 | 
 56 |     Example:
 57 | 
 58 |     <example_user_question>
 59 |     How can I get a role?
 60 |     </example_user_question>
 61 | 
 62 |     <example_assistant_response>
 63 |     Please check the [roles documentation](https://docs.inference.supply/discord-roles)
 64 |     </example_assistant_response>
 65 |     ----------------
 66 | 
 67 |     <user_question>
 68 |     ${question}
 69 |     </user_question>
 70 |   `;
 71 | 
 72 |   return prompt;
 73 | }
 74 | 
 75 | const SubmitIsAnswerableSchema = z.object({
 76 |   isAnswerable: z.boolean().describe("Whether the question can be answered based on the documents"),
 77 | });
 78 | 
 79 | type SubmitIsAnswerable = z.infer<typeof SubmitIsAnswerableSchema>;
 80 | 
 81 | export async function ask(question: string): Promise<string | null> {
 82 |   const files = await readMarkdownFiles();
 83 |   const mappedFiles = files.map(file =>
 84 |     endent`
 85 |       URL: ${file.url}
 86 |       CONTENT: ${file.content}
 87 |     `
 88 |   ).join('\n\n');
 89 | 
 90 | 
 91 |   const prompt = getPrompt(question, mappedFiles);
 92 | 
 93 |   const shouldRespond = await shouldAnswer(question, mappedFiles);
 94 | 
 95 |   if (!shouldRespond) {
 96 |     console.log('Not answering question:', question);
 97 |     return null;
 98 |   }
 99 | 
100 |   const completion = await openai.chat.completions.create({
101 |     model: "gpt-4o-mini",
102 |     messages: [
103 |       {
104 |         role: "system",
105 |         content: "You are a assistant that answers questions based on the provided documents. Be very concise in your response."
106 |       },
107 |       {
108 |         role: "user",
109 |         content: prompt
110 |       },
111 |     ],
112 |   });
113 | 
114 | 
115 |   const answer = completion.choices[0]?.message.content || '';
116 | 
117 |   return answer;
118 | }
119 | 
120 | 


--------------------------------------------------------------------------------
/lib/convertHTML.ts:
--------------------------------------------------------------------------------
 1 | import TurndownService from 'turndown';
 2 | import render from 'dom-serializer';
 3 | import * as cheerio from 'cheerio';
 4 | // import { gfm } from 'turndown-plugin-gfm';
 5 | 
 6 | const logger = console;
 7 | 
 8 | const turndownService = new TurndownService({
 9 |   headingStyle: 'atx',
10 |   hr: '---',
11 |   bulletListMarker: '*',
12 |   codeBlockStyle: 'fenced',
13 |   fence: '```',
14 |   emDelimiter: '*', // unlike underscore, this works also intra-word
15 |   strongDelimiter: '**', // unlike underscores, this works also intra-word
16 |   linkStyle: 'inlined',
17 |   linkReferenceStyle: 'full',
18 |   br: '  ',
19 | });
20 | 
21 | /**
22 |  * Remove all style and script tags
23 |  */
24 | turndownService.addRule('remove', {
25 |   filter: ['style', 'script', 'aside', 'nav'],
26 |   replacement() {
27 |     return '';
28 |   },
29 | });
30 | 
31 | 
32 | /**
33 |  * Suse has bad HTMl code snippets. We do our
34 |  * best to parse them here.
35 |  */
36 | turndownService.addRule('remove', {
37 |   filter: (node: any) => {
38 |     if (node.nodeName !== 'PRE') return false;
39 |     const firstChild = node.firstChild;
40 |     if (firstChild.nodeName !== 'CODE') return false;
41 | 
42 |     const content = firstChild.textContent;
43 |     if (content.startsWith('#') || content.startsWith('>')) return true;
44 |     return true;
45 |   },
46 |   replacement: (content: string) => {
47 |     content = content.replace('`#`', '#');
48 |     content = content.replace('`>`', '>');
49 |     content = content.replace('`sudo`', 'sudo');
50 |     return `\n\`\`\`\n${content}\n\`\`\`\n`;
51 |   },
52 | });
53 | /**
54 |  * Add GFM support
55 |  */
56 | // turndownService.use(gfm);
57 | 
58 | 
59 | export async function convertHTMLToMarkdown(html: string): Promise<string> {
60 |   try {
61 |     
62 |     const select = cheerio.load(html);
63 |     const title = select('title').text();
64 |     const root = select('body');
65 |     const md = turndownService.turndown(render(root));
66 | 
67 |     return md;
68 | 
69 |   } catch (error) {
70 |     logger.error('Error converting HTML to Markdown:', error);
71 |     return '';
72 |   }
73 | }


--------------------------------------------------------------------------------
/lib/discord.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   Client,
  3 |   GatewayIntentBits,
  4 |   Message,
  5 |   Partials
  6 | } from "discord.js";
  7 | import { ask } from "./ask";
  8 | import { appendFileSync, existsSync, mkdirSync } from "fs";
  9 | import path from "path";
 10 | 
 11 | /**
 12 |  * Stores a question-answer pair in a JSONL file on disk
 13 |  * @param question The user's question
 14 |  * @param answer The bot's answer
 15 |  */
 16 | function storeMessage(question: string, answer: string): void {
 17 |   // Create data directory if it doesn't exist
 18 |   const dataDir = path.join(process.cwd(), "logs");
 19 |   if (!existsSync(dataDir)) {
 20 |     mkdirSync(dataDir, { recursive: true });
 21 |   }
 22 | 
 23 |   const filePath = path.join(dataDir, "answers.jsonl");
 24 | 
 25 |   // Create a record with timestamp
 26 |   const record = {
 27 |     timestamp: new Date().toISOString(),
 28 |     question,
 29 |     answer
 30 |   };
 31 | 
 32 |   // Append the JSON record as a new line to the file
 33 |   appendFileSync(filePath, JSON.stringify(record) + "\n");
 34 | 
 35 |   console.log(`Stored Q&A pair in ${filePath}`);
 36 | }
 37 | 
 38 | /**
 39 |  * Connects the Discord bot to the Discord API
 40 |  * @returns The Discord client instance
 41 |  */
 42 | export async function connect(): Promise<Client> {
 43 |   const client = new Client({
 44 |     intents: [
 45 |       GatewayIntentBits.Guilds,
 46 |       GatewayIntentBits.GuildMessages,
 47 |       GatewayIntentBits.MessageContent,
 48 |     ],
 49 |     partials: [Partials.Channel, Partials.Message],
 50 |   });
 51 | 
 52 |   client.on("ready", () => {
 53 |     console.log(`Logged in as ${client.user!.tag}!`);
 54 |   });
 55 | 
 56 |   client.on("debug", console.log);
 57 |   client.on("warn", console.log);
 58 |   client.on("error", console.error);
 59 | 
 60 |   client.on("messageCreate", async (message: Message) => {
 61 |     
 62 |     // Ignore messages from the bot itself
 63 |     if (message.author.id === client.user!.id) return;
 64 | 
 65 |     // console.log(message.content);
 66 |     console.log(
 67 |       `Received message: "${message.content}" from ${message.author.tag} in channel ${message.channel.id} (${message.channel.type})`,
 68 |     );
 69 |     const content = message.content;
 70 |     const answer = await ask(content);
 71 | 
 72 |     if (answer) {
 73 |       storeMessage(content, answer);
 74 |       message.reply(answer);
 75 |     }
 76 |   });
 77 | 
 78 |   await client.login(process.env.DISCORD_BOT_TOKEN);
 79 |   console.log("Autodelve is now running...");
 80 |   return client;
 81 | }
 82 | 
 83 | /**
 84 |  * Lists all channels the bot has access to (can view and send messages)
 85 |  * @param client The Discord client instance
 86 |  */
 87 | export function listAccessibleChannels(client: Client): void {
 88 |   console.log("Channels the bot has access to:");
 89 | 
 90 |   client.guilds.cache.forEach(guild => {
 91 |     console.log(`\nGuild: ${guild.name} (${guild.id})`);
 92 | 
 93 |     // Get the bot's member object in this guild
 94 |     const botMember = guild.members.cache.get(client.user!.id);
 95 | 
 96 |     guild.channels.cache.forEach(channel => {
 97 |       // Only check text-based channels
 98 |       if (channel.isTextBased()) {
 99 |         const canView = channel.permissionsFor(botMember!)?.has('ViewChannel');
100 |         const canSend = channel.permissionsFor(botMember!)?.has('SendMessages');
101 | 
102 |         if (canView && canSend) {
103 |           console.log(`  ✅ ${channel.name} (${channel.id}) - Can view and send`);
104 |         } else if (canView) {
105 |           console.log(`  👁️ ${channel.name} (${channel.id}) - Can view only`);
106 |         } else {
107 |           console.log(`  ❌ ${channel.name} (${channel.id}) - No access`);
108 |         }
109 |       }
110 |     });
111 |   });
112 | }
113 | 
114 | // If this file is run directly, connect the bot
115 | if (require.main === module) {
116 |   connect().catch(console.error);
117 | }
118 | 


--------------------------------------------------------------------------------
/lib/download.ts:
--------------------------------------------------------------------------------
  1 | import * as cheerio from 'cheerio';
  2 | import path from 'path';
  3 | import { URL } from 'url';
  4 | import { convertHTMLToMarkdown } from './convertHTML';
  5 | 
  6 | function downloadWebsite(url: string, maxDepth: number = 3): Promise<Map<string, string>> {
  7 |   const visited = new Map<string, string>();
  8 |   
  9 |   async function crawl(currentUrl: string, depth: number): Promise<void> {
 10 |     if (depth > maxDepth || visited.has(currentUrl)) {
 11 |       return;
 12 |     }
 13 |     
 14 |     try {
 15 |       const response = await fetch(currentUrl);
 16 |       const html = await response.text();
 17 |       
 18 |       // Store the HTML content
 19 |       visited.set(currentUrl, html);
 20 |       
 21 |       // If we've reached max depth, don't extract more links
 22 |       if (depth === maxDepth) {
 23 |         return;
 24 |       }
 25 |       
 26 |       // Parse HTML and extract links
 27 |       const $ = cheerio.load(html);
 28 |       const links = new Set<string>();
 29 |       
 30 |       $('a').each((index: number, element) => {
 31 |         const href = $(element).attr('href');
 32 |         if (href) {
 33 |           // Resolve relative URLs
 34 |           const resolvedUrl = new URL(href, currentUrl).toString();
 35 |           
 36 |           // Only follow links from the same base URL
 37 |           const baseUrl = new URL(url).hostname;
 38 |           const resolvedUrlObj = new URL(resolvedUrl);
 39 |           
 40 |           if (resolvedUrlObj.hostname === baseUrl) {
 41 |             links.add(resolvedUrl);
 42 |           }
 43 |         }
 44 |       });
 45 |       
 46 |       // Recursively crawl all extracted links
 47 |       for (const link of links) {
 48 |         await crawl(link, depth + 1);
 49 |       }
 50 |     } catch (error: unknown) {
 51 |       if (error instanceof Error) {
 52 |         console.error(`Error crawling ${currentUrl}:`, error.message);
 53 |       } else {
 54 |         console.error(`Unknown error crawling ${currentUrl}`);
 55 |       }
 56 |     }
 57 |   }
 58 |   
 59 |   // Start crawling from the initial URL
 60 |   return crawl(url, 1).then(() => visited);
 61 | }
 62 | 
 63 | 
 64 | export async function download(url: string) {
 65 |   const websiteContent = await downloadWebsite(url);
 66 |   
 67 |   // Convert the Map to an array of objects with url and content fields
 68 |   const contentArray = Array.from(websiteContent.entries()).map(([pageUrl, html]) => {
 69 |     return {
 70 |       url: pageUrl,
 71 |       content: convertHTMLToMarkdown(html) // Convert HTML to Markdown
 72 |     };
 73 |   });
 74 |   
 75 |   // Wait for all conversions to complete
 76 |   const resolvedContentArray = await Promise.all(
 77 |     contentArray.map(async (item) => {
 78 |       return {
 79 |         url: item.url,
 80 |         content: await item.content
 81 |       };
 82 |     })
 83 |   );
 84 |   
 85 |   // Save all content to a single JSON file
 86 |   const filePath = path.join('content', 'website_content.json');
 87 |   await Bun.write(filePath, JSON.stringify(resolvedContentArray, null, 2));
 88 |   
 89 |   console.log(`Website content saved to ${filePath}`);
 90 | }
 91 | 
 92 | export async function readMarkdownFiles(): Promise<Array<{ url: string, content: string }>> {
 93 |   const filePath = path.join('content', 'website_content.json');
 94 |   
 95 |   try {
 96 |     const fileContent = await Bun.file(filePath).text();
 97 |     const contentArray = JSON.parse(fileContent);
 98 |     
 99 |     return contentArray;
100 |   } catch (error) {
101 |     console.error('Failed to read website content:', error);
102 |     return [];
103 |   }
104 | }


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "autodelve",
 3 |   "module": "index.ts",
 4 |   "type": "module",
 5 |   "devDependencies": {
 6 |     "@types/bun": "latest"
 7 |   },
 8 |   "peerDependencies": {
 9 |     "typescript": "^5.0.0"
10 |   },
11 |   "dependencies": {
12 |     "@types/turndown": "^5.0.5",
13 |     "cheerio": "^1.0.0",
14 |     "discord.js": "^14.18.0",
15 |     "dom-serializer": "^2.0.0",
16 |     "endent": "^2.1.0",
17 |     "openai": "^4.85.4",
18 |     "turndown": "^7.2.0",
19 |     "turndown-plugin-gfm": "^1.0.2",
20 |     "url": "^0.11.4",
21 |     "zod": "^3.24.2"
22 |   }
23 | }


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     // Enable latest features
 4 |     "lib": ["ESNext", "DOM"],
 5 |     "target": "ESNext",
 6 |     "module": "ESNext",
 7 |     "moduleDetection": "force",
 8 |     "jsx": "react-jsx",
 9 |     "allowJs": true,
10 | 
11 |     // Bundler mode
12 |     "moduleResolution": "bundler",
13 |     "allowImportingTsExtensions": true,
14 |     "verbatimModuleSyntax": true,
15 |     "noEmit": true,
16 | 
17 |     // Best practices
18 |     "strict": true,
19 |     "skipLibCheck": true,
20 |     "noFallthroughCasesInSwitch": true,
21 | 
22 |     // Some stricter flags (disabled by default)
23 |     "noUnusedLocals": false,
24 |     "noUnusedParameters": false,
25 |     "noPropertyAccessFromIndexSignature": false
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------