├── .env.example
├── .gitignore
├── README.md
├── bun.lockb
├── index.ts
├── lib
├── ask.ts
├── convertHTML.ts
├── discord.ts
└── download.ts
├── package.json
└── tsconfig.json
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | DISCORD_PERMISSIONS=
3 | DISCORD_APP_ID=
4 | DISCORD_PUBLIC_KEY=
5 | DISCORD_BOT_TOKEN=
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
2 |
3 | content/
4 | # Logs
5 | logs
6 | _.log
7 | npm-debug.log_
8 | yarn-debug.log*
9 | yarn-error.log*
10 | lerna-debug.log*
11 | .pnpm-debug.log*
12 |
13 | # Caches
14 |
15 | .cache
16 |
17 | # Diagnostic reports (https://nodejs.org/api/report.html)
18 |
19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
20 |
21 | # Runtime data
22 |
23 | pids
24 | _.pid
25 | _.seed
26 | *.pid.lock
27 |
28 | # Directory for instrumented libs generated by jscoverage/JSCover
29 |
30 | lib-cov
31 |
32 | # Coverage directory used by tools like istanbul
33 |
34 | coverage
35 | *.lcov
36 |
37 | # nyc test coverage
38 |
39 | .nyc_output
40 |
41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
42 |
43 | .grunt
44 |
45 | # Bower dependency directory (https://bower.io/)
46 |
47 | bower_components
48 |
49 | # node-waf configuration
50 |
51 | .lock-wscript
52 |
53 | # Compiled binary addons (https://nodejs.org/api/addons.html)
54 |
55 | build/Release
56 |
57 | # Dependency directories
58 |
59 | node_modules/
60 | jspm_packages/
61 |
62 | # Snowpack dependency directory (https://snowpack.dev/)
63 |
64 | web_modules/
65 |
66 | # TypeScript cache
67 |
68 | *.tsbuildinfo
69 |
70 | # Optional npm cache directory
71 |
72 | .npm
73 |
74 | # Optional eslint cache
75 |
76 | .eslintcache
77 |
78 | # Optional stylelint cache
79 |
80 | .stylelintcache
81 |
82 | # Microbundle cache
83 |
84 | .rpt2_cache/
85 | .rts2_cache_cjs/
86 | .rts2_cache_es/
87 | .rts2_cache_umd/
88 |
89 | # Optional REPL history
90 |
91 | .node_repl_history
92 |
93 | # Output of 'npm pack'
94 |
95 | *.tgz
96 |
97 | # Yarn Integrity file
98 |
99 | .yarn-integrity
100 |
101 | # dotenv environment variable files
102 |
103 | .env
104 | .env.development.local
105 | .env.test.local
106 | .env.production.local
107 | .env.local
108 |
109 | # parcel-bundler cache (https://parceljs.org/)
110 |
111 | .parcel-cache
112 |
113 | # Next.js build output
114 |
115 | .next
116 | out
117 |
118 | # Nuxt.js build / generate output
119 |
120 | .nuxt
121 | dist
122 |
123 | # Gatsby files
124 |
125 | # Comment in the public line in if your project uses Gatsby and not Next.js
126 |
127 | # https://nextjs.org/blog/next-9-1#public-directory-support
128 |
129 | # public
130 |
131 | # vuepress build output
132 |
133 | .vuepress/dist
134 |
135 | # vuepress v2.x temp and cache directory
136 |
137 | .temp
138 |
139 | # Docusaurus cache and generated files
140 |
141 | .docusaurus
142 |
143 | # Serverless directories
144 |
145 | .serverless/
146 |
147 | # FuseBox cache
148 |
149 | .fusebox/
150 |
151 | # DynamoDB Local files
152 |
153 | .dynamodb/
154 |
155 | # TernJS port file
156 |
157 | .tern-port
158 |
159 | # Stores VSCode versions used for testing VSCode extensions
160 |
161 | .vscode-test
162 |
163 | # yarn v2
164 |
165 | .yarn/cache
166 | .yarn/unplugged
167 | .yarn/build-state.yml
168 | .yarn/install-state.gz
169 | .pnp.*
170 |
171 | # IntelliJ based IDEs
172 | .idea
173 |
174 | # Finder (MacOS) folder config
175 | .DS_Store
176 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # autodelve
2 | A simple AI-powered Discord to answer questions based on a set of documents.
3 |
4 | **View the demo here: [Twitter/X Demo](https://x.com/0xSamHogan/status/1894937763717550272)**
5 |
6 | TODO: Better documentation
7 |
8 | ## Setup
9 |
10 | ```bash
11 | bun install
12 | ```
13 |
14 | ### Create a `.env` file
15 |
16 | ```bash
17 | cp .env.example .env
18 | ```
19 |
20 | Edit the `.env` file with your own values.
21 |
22 |
23 | ### Index a website
24 |
25 | ```bash
26 | bun run index.ts download https://docs.inference.net
27 | ```
28 |
29 | This command will download the website, convert the HTML to Markdown, and save the content to the `content` directory.
30 |
31 | Once a website has been indexed, you can ask questions to the AI by running:
32 |
33 | ```bash
34 | bun run index.ts ask "How can I get started with inference.net?"
35 | ```
36 |
37 | The response will be streamed to the console.
38 |
39 | ### Run in Discord
40 |
41 | 1. Create a Discord bot on the [Discord Developer Portal](https://discord.com/developers/applications). Make sure to add your secret values in the `.env` file.
42 |
43 | 2. Install the bot on your server
44 |
45 | 3. Run the bot with:
46 |
47 | ```bash
48 | bun index.ts
49 | ```
50 |
--------------------------------------------------------------------------------
/bun.lockb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/context-labs/autodelve/a0cea9a7a60ed669efcccb5a132a01b3401edcbf/bun.lockb
--------------------------------------------------------------------------------
/index.ts:
--------------------------------------------------------------------------------
1 | import { download, readMarkdownFiles } from './lib/download';
2 | import { ask } from './lib/ask';
3 | import { connect } from './lib/discord';
4 |
5 |
6 | // Get command line arguments
7 | const args = process.argv.slice(2);
8 | const command = args[0];
9 | const input = args[1];
10 |
11 | if (command === 'download' && input) {
12 | download(input);
13 | } else if (command === 'ask' && input) {
14 | const answer = await ask(input);
15 | console.log(answer);
16 | } else {
17 | console.log('Starting Discord bot...');
18 | connect();
19 | }
20 |
--------------------------------------------------------------------------------
/lib/ask.ts:
--------------------------------------------------------------------------------
1 | import endent from 'endent';
2 | import OpenAI from "openai";
3 | import { readMarkdownFiles } from './download';
4 | import { zodFunction } from 'openai/helpers/zod';
5 | import { z } from 'zod';
6 |
7 | const openai = new OpenAI();
8 |
9 | async function shouldAnswer(question: string, content: string) {
10 | const prompt = getPrompt(question, content);
11 |
12 | const completion = await openai.chat.completions.create({
13 | model: "gpt-4o-mini",
14 | messages: [
15 | {
16 | role: "system",
17 | content: "You are a assistant that answers questions based on the provided documents. Be very concise in your response."
18 | },
19 | {
20 | role: "user",
21 | content: prompt
22 | },
23 | ],
24 | tool_choice: {
25 | "type": "function",
26 | "function": {
27 | "name": "submitIsAnswerable"
28 | }
29 | },
30 | tools: [
31 | zodFunction({ name: "submitIsAnswerable", parameters: SubmitIsAnswerableSchema }),
32 | ],
33 | });
34 |
35 | console.log(JSON.stringify(completion.choices[0] ?? '', null, 2));
36 |
37 | const { isAnswerable } = JSON.parse(completion.choices[0]?.message.tool_calls?.[0]?.function.arguments ?? '{}') as SubmitIsAnswerable;
38 |
39 | return isAnswerable;
40 | }
41 |
42 | // Construct a prompt that combines the question with the document content
43 | function getPrompt(question: string, content: string) {
44 | const prompt = endent`
45 |
46 | ${content}
47 |
48 |
49 | Please provide a clear, accurate answer to the user's question based only on the information in the documents above. Follow the below instructions.
50 |
51 | Instructions:
52 | - Provide very concise answers.
53 | - Always respond with phrase and link to the relevant document.
54 | - Do not speculate or make up information. If you do not know the answer, say so politely.
55 |
56 | Example:
57 |
58 |
59 | How can I get a role?
60 |
61 |
62 |
63 | Please check the [roles documentation](https://docs.inference.supply/discord-roles)
64 |
65 | ----------------
66 |
67 |
68 | ${question}
69 |
70 | `;
71 |
72 | return prompt;
73 | }
74 |
75 | const SubmitIsAnswerableSchema = z.object({
76 | isAnswerable: z.boolean().describe("Whether the question can be answered based on the documents"),
77 | });
78 |
79 | type SubmitIsAnswerable = z.infer;
80 |
81 | export async function ask(question: string): Promise {
82 | const files = await readMarkdownFiles();
83 | const mappedFiles = files.map(file =>
84 | endent`
85 | URL: ${file.url}
86 | CONTENT: ${file.content}
87 | `
88 | ).join('\n\n');
89 |
90 |
91 | const prompt = getPrompt(question, mappedFiles);
92 |
93 | const shouldRespond = await shouldAnswer(question, mappedFiles);
94 |
95 | if (!shouldRespond) {
96 | console.log('Not answering question:', question);
97 | return null;
98 | }
99 |
100 | const completion = await openai.chat.completions.create({
101 | model: "gpt-4o-mini",
102 | messages: [
103 | {
104 | role: "system",
105 | content: "You are a assistant that answers questions based on the provided documents. Be very concise in your response."
106 | },
107 | {
108 | role: "user",
109 | content: prompt
110 | },
111 | ],
112 | });
113 |
114 |
115 | const answer = completion.choices[0]?.message.content || '';
116 |
117 | return answer;
118 | }
119 |
120 |
--------------------------------------------------------------------------------
/lib/convertHTML.ts:
--------------------------------------------------------------------------------
1 | import TurndownService from 'turndown';
2 | import render from 'dom-serializer';
3 | import * as cheerio from 'cheerio';
4 | // import { gfm } from 'turndown-plugin-gfm';
5 |
6 | const logger = console;
7 |
8 | const turndownService = new TurndownService({
9 | headingStyle: 'atx',
10 | hr: '---',
11 | bulletListMarker: '*',
12 | codeBlockStyle: 'fenced',
13 | fence: '```',
14 | emDelimiter: '*', // unlike underscore, this works also intra-word
15 | strongDelimiter: '**', // unlike underscores, this works also intra-word
16 | linkStyle: 'inlined',
17 | linkReferenceStyle: 'full',
18 | br: ' ',
19 | });
20 |
21 | /**
22 | * Remove all style and script tags
23 | */
24 | turndownService.addRule('remove', {
25 | filter: ['style', 'script', 'aside', 'nav'],
26 | replacement() {
27 | return '';
28 | },
29 | });
30 |
31 |
32 | /**
33 | * Suse has bad HTMl code snippets. We do our
34 | * best to parse them here.
35 | */
36 | turndownService.addRule('remove', {
37 | filter: (node: any) => {
38 | if (node.nodeName !== 'PRE') return false;
39 | const firstChild = node.firstChild;
40 | if (firstChild.nodeName !== 'CODE') return false;
41 |
42 | const content = firstChild.textContent;
43 | if (content.startsWith('#') || content.startsWith('>')) return true;
44 | return true;
45 | },
46 | replacement: (content: string) => {
47 | content = content.replace('`#`', '#');
48 | content = content.replace('`>`', '>');
49 | content = content.replace('`sudo`', 'sudo');
50 | return `\n\`\`\`\n${content}\n\`\`\`\n`;
51 | },
52 | });
53 | /**
54 | * Add GFM support
55 | */
56 | // turndownService.use(gfm);
57 |
58 |
59 | export async function convertHTMLToMarkdown(html: string): Promise {
60 | try {
61 |
62 | const select = cheerio.load(html);
63 | const title = select('title').text();
64 | const root = select('body');
65 | const md = turndownService.turndown(render(root));
66 |
67 | return md;
68 |
69 | } catch (error) {
70 | logger.error('Error converting HTML to Markdown:', error);
71 | return '';
72 | }
73 | }
--------------------------------------------------------------------------------
/lib/discord.ts:
--------------------------------------------------------------------------------
1 | import {
2 | Client,
3 | GatewayIntentBits,
4 | Message,
5 | Partials
6 | } from "discord.js";
7 | import { ask } from "./ask";
8 | import { appendFileSync, existsSync, mkdirSync } from "fs";
9 | import path from "path";
10 |
11 | /**
12 | * Stores a question-answer pair in a JSONL file on disk
13 | * @param question The user's question
14 | * @param answer The bot's answer
15 | */
16 | function storeMessage(question: string, answer: string): void {
17 | // Create data directory if it doesn't exist
18 | const dataDir = path.join(process.cwd(), "logs");
19 | if (!existsSync(dataDir)) {
20 | mkdirSync(dataDir, { recursive: true });
21 | }
22 |
23 | const filePath = path.join(dataDir, "answers.jsonl");
24 |
25 | // Create a record with timestamp
26 | const record = {
27 | timestamp: new Date().toISOString(),
28 | question,
29 | answer
30 | };
31 |
32 | // Append the JSON record as a new line to the file
33 | appendFileSync(filePath, JSON.stringify(record) + "\n");
34 |
35 | console.log(`Stored Q&A pair in ${filePath}`);
36 | }
37 |
38 | /**
39 | * Connects the Discord bot to the Discord API
40 | * @returns The Discord client instance
41 | */
42 | export async function connect(): Promise {
43 | const client = new Client({
44 | intents: [
45 | GatewayIntentBits.Guilds,
46 | GatewayIntentBits.GuildMessages,
47 | GatewayIntentBits.MessageContent,
48 | ],
49 | partials: [Partials.Channel, Partials.Message],
50 | });
51 |
52 | client.on("ready", () => {
53 | console.log(`Logged in as ${client.user!.tag}!`);
54 | });
55 |
56 | client.on("debug", console.log);
57 | client.on("warn", console.log);
58 | client.on("error", console.error);
59 |
60 | client.on("messageCreate", async (message: Message) => {
61 |
62 | // Ignore messages from the bot itself
63 | if (message.author.id === client.user!.id) return;
64 |
65 | // console.log(message.content);
66 | console.log(
67 | `Received message: "${message.content}" from ${message.author.tag} in channel ${message.channel.id} (${message.channel.type})`,
68 | );
69 | const content = message.content;
70 | const answer = await ask(content);
71 |
72 | if (answer) {
73 | storeMessage(content, answer);
74 | message.reply(answer);
75 | }
76 | });
77 |
78 | await client.login(process.env.DISCORD_BOT_TOKEN);
79 | console.log("Autodelve is now running...");
80 | return client;
81 | }
82 |
83 | /**
84 | * Lists all channels the bot has access to (can view and send messages)
85 | * @param client The Discord client instance
86 | */
87 | export function listAccessibleChannels(client: Client): void {
88 | console.log("Channels the bot has access to:");
89 |
90 | client.guilds.cache.forEach(guild => {
91 | console.log(`\nGuild: ${guild.name} (${guild.id})`);
92 |
93 | // Get the bot's member object in this guild
94 | const botMember = guild.members.cache.get(client.user!.id);
95 |
96 | guild.channels.cache.forEach(channel => {
97 | // Only check text-based channels
98 | if (channel.isTextBased()) {
99 | const canView = channel.permissionsFor(botMember!)?.has('ViewChannel');
100 | const canSend = channel.permissionsFor(botMember!)?.has('SendMessages');
101 |
102 | if (canView && canSend) {
103 | console.log(` ✅ ${channel.name} (${channel.id}) - Can view and send`);
104 | } else if (canView) {
105 | console.log(` 👁️ ${channel.name} (${channel.id}) - Can view only`);
106 | } else {
107 | console.log(` ❌ ${channel.name} (${channel.id}) - No access`);
108 | }
109 | }
110 | });
111 | });
112 | }
113 |
114 | // If this file is run directly, connect the bot
115 | if (require.main === module) {
116 | connect().catch(console.error);
117 | }
118 |
--------------------------------------------------------------------------------
/lib/download.ts:
--------------------------------------------------------------------------------
1 | import * as cheerio from 'cheerio';
2 | import path from 'path';
3 | import { URL } from 'url';
4 | import { convertHTMLToMarkdown } from './convertHTML';
5 |
6 | function downloadWebsite(url: string, maxDepth: number = 3): Promise