├── scripts ├── prepare-native-modules.js └── create-prebuild.js ├── prebuilds └── darwin-arm64 │ ├── node-microphone.node │ ├── node-microphone.js │ └── node-microphone-original.js ├── .DS_Store ├── src ├── .DS_Store ├── test │ └── extension.test.ts ├── types │ └── node-microphone.d.ts ├── services │ ├── conversation-logger-service.ts │ ├── workspace-service.ts │ ├── spec-generator-service.ts │ ├── deepgram-service.ts │ ├── web-audio-service.ts │ ├── dictation-service.ts │ ├── prompt-management-service.ts │ ├── command-registry-service.ts │ ├── llm-service.ts │ ├── agent-panel.ts │ └── voice-agent-service.ts ├── webview │ ├── audio-webview-provider.ts │ └── mic-permission-webview-provider.ts ├── utils │ ├── binary-loader.ts │ └── native-module-wrapper.ts └── extension.ts ├── vibe-coder-logo.png ├── vibe-coder-0.0.1.vsix ├── vibe-coder-0.0.2.vsix ├── .gitignore ├── .vscode-test.mjs ├── .vscodeignore ├── CHANGELOG.md ├── .vscode ├── extensions.json ├── launch.json ├── settings.json └── tasks.json ├── tsconfig.json ├── .eslintrc.json ├── LICENSE ├── esbuild.js ├── vsc-extension-quickstart.md ├── CONTRIBUTING.md ├── webview-plan.md ├── MICROPHONE_TESTING.md ├── package.json ├── .github └── workflows │ └── build-natives.yml ├── README.md └── media └── audioInterface.html /scripts/prepare-native-modules.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /prebuilds/darwin-arm64/node-microphone.node: -------------------------------------------------------------------------------- 1 | ELF -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/vibe_coder/HEAD/.DS_Store -------------------------------------------------------------------------------- /src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/vibe_coder/HEAD/src/.DS_Store -------------------------------------------------------------------------------- /vibe-coder-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/vibe_coder/HEAD/vibe-coder-logo.png -------------------------------------------------------------------------------- /vibe-coder-0.0.1.vsix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/vibe_coder/HEAD/vibe-coder-0.0.1.vsix -------------------------------------------------------------------------------- /vibe-coder-0.0.2.vsix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/vibe_coder/HEAD/vibe-coder-0.0.2.vsix -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | dist 3 | node_modules 4 | .vscode-test/ 5 | .vscode/ 6 | license-server/* 7 | .vercel 8 | -------------------------------------------------------------------------------- /src/test/extension.test.ts: -------------------------------------------------------------------------------- 1 | // This file has been intentionally removed as per user request. 2 | // Tests have been removed from the project. 3 | -------------------------------------------------------------------------------- /.vscode-test.mjs: -------------------------------------------------------------------------------- 1 | import { defineConfig } from '@vscode/test-cli'; 2 | 3 | export default defineConfig({ 4 | files: 'out/test/**/*.test.js', 5 | }); 6 | -------------------------------------------------------------------------------- /.vscodeignore: -------------------------------------------------------------------------------- 1 | .vscode/** 2 | .vscode-test/** 3 | out/** 4 | node_modules/** 5 | src/** 6 | .gitignore 7 | .yarnrc 8 | esbuild.js 9 | vsc-extension-quickstart.md 10 | **/tsconfig.json 11 | **/.eslintrc.json 12 | **/*.map 13 | **/*.ts 14 | **/.vscode-test.* 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All notable changes to the "vibe-coder" extension will be documented in this file. 4 | 5 | Check [Keep a Changelog](http://keepachangelog.com/) for recommendations on how to structure this file. 6 | 7 | ## [Unreleased] 8 | 9 | - Initial release -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | // See http://go.microsoft.com/fwlink/?LinkId=827846 3 | // for the documentation about the extensions.json format 4 | "recommendations": ["dbaeumer.vscode-eslint", "connor4312.esbuild-problem-matchers", "ms-vscode.extension-test-runner"] 5 | } 6 | -------------------------------------------------------------------------------- /src/types/node-microphone.d.ts: -------------------------------------------------------------------------------- 1 | declare module 'node-microphone' { 2 | import { Readable } from 'stream' 3 | 4 | class Microphone { 5 | constructor(options?: { 6 | rate?: number 7 | channels?: number 8 | device?: string 9 | }) 10 | 11 | startRecording(): Readable 12 | stopRecording(): void 13 | } 14 | 15 | export default Microphone 16 | } -------------------------------------------------------------------------------- /prebuilds/darwin-arm64/node-microphone.js: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * This is a marker file for the node-microphone module. 4 | * 5 | * node-microphone is not a native module with a .node binary. 6 | * It's a JavaScript wrapper that uses command-line tools: 7 | * - macOS: 'rec' (part of SoX) 8 | * - Windows: 'sox' 9 | * - Linux: 'arecord' 10 | * 11 | * Required command-line tool available: true 12 | * Platform: darwin 13 | * Architecture: arm64 14 | */ 15 | module.exports = { 16 | isJsWrapper: true, 17 | commandAvailable: true, 18 | platform: 'darwin', 19 | architecture: 'arm64' 20 | }; 21 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "Node16", 4 | "target": "ES2022", 5 | "lib": [ 6 | "ES2022", 7 | "DOM", 8 | "DOM.Iterable" 9 | ], 10 | "sourceMap": true, 11 | "rootDir": "src", 12 | "strict": true /* enable all strict type-checking options */ 13 | /* Additional Checks */ 14 | // "noImplicitReturns": true, /* Report error when not all code paths in function return a value. */ 15 | // "noFallthroughCasesInSwitch": true, /* Report errors for fallthrough cases in switch statement. */ 16 | // "noUnusedParameters": true, /* Report errors on unused parameters. */ 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | // A launch configuration that compiles the extension and then opens it inside a new window 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | { 6 | "version": "0.2.0", 7 | "configurations": [ 8 | { 9 | "name": "Run Extension", 10 | "type": "extensionHost", 11 | "request": "launch", 12 | "args": [ 13 | "--extensionDevelopmentPath=${workspaceFolder}", 14 | "--disable-extensions" 15 | ], 16 | "outFiles": [ 17 | "${workspaceFolder}/dist/**/*.js" 18 | ], 19 | "preLaunchTask": "${defaultBuildTask}" 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "parser": "@typescript-eslint/parser", 4 | "parserOptions": { 5 | "ecmaVersion": 6, 6 | "sourceType": "module" 7 | }, 8 | "plugins": [ 9 | "@typescript-eslint" 10 | ], 11 | "rules": { 12 | "@typescript-eslint/naming-convention": [ 13 | "warn", 14 | { 15 | "selector": "import", 16 | "format": [ "camelCase", "PascalCase" ] 17 | } 18 | ], 19 | "@typescript-eslint/semi": "warn", 20 | "curly": "warn", 21 | "eqeqeq": "warn", 22 | "no-throw-literal": "warn", 23 | "semi": "off" 24 | }, 25 | "ignorePatterns": [ 26 | "out", 27 | "dist", 28 | "**/*.d.ts" 29 | ] 30 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | // Place your settings in this file to overwrite default and user settings. 2 | { 3 | "files.exclude": { 4 | "out": false, // set this to true to hide the "out" folder with the compiled JS files 5 | "dist": false // set this to true to hide the "dist" folder with the compiled JS files 6 | }, 7 | "search.exclude": { 8 | "out": true, // set this to false to include "out" folder in search results 9 | "dist": true // set this to false to include "dist" folder in search results 10 | }, 11 | // Turn off tsc task auto detection since we have the necessary tasks as npm scripts 12 | "typescript.tsc.autoDetect": "off", 13 | "debug.javascript.autoAttachFilter": "onlyWithFlag", 14 | "debug.javascript.terminalOptions": { 15 | "skipFiles": [ 16 | "/**" 17 | ] 18 | } 19 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [year] [copyright holders] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | // See https://go.microsoft.com/fwlink/?LinkId=733558 2 | // for the documentation about the tasks.json format 3 | { 4 | "version": "2.0.0", 5 | "tasks": [ 6 | { 7 | "type": "npm", 8 | "script": "watch", 9 | "problemMatcher": "$tsc-watch", 10 | "isBackground": true, 11 | "presentation": { 12 | "reveal": "never" 13 | }, 14 | "group": { 15 | "kind": "build", 16 | "isDefault": true 17 | } 18 | }, 19 | { 20 | "type": "npm", 21 | "script": "watch:esbuild", 22 | "group": "build", 23 | "problemMatcher": "$esbuild-watch", 24 | "isBackground": true, 25 | "label": "npm: watch:esbuild", 26 | "presentation": { 27 | "group": "watch", 28 | "reveal": "never" 29 | } 30 | }, 31 | { 32 | "type": "npm", 33 | "script": "watch:tsc", 34 | "group": "build", 35 | "problemMatcher": "$tsc-watch", 36 | "isBackground": true, 37 | "label": "npm: watch:tsc", 38 | "presentation": { 39 | "group": "watch", 40 | "reveal": "never" 41 | } 42 | }, 43 | { 44 | "type": "npm", 45 | "script": "watch-tests", 46 | "problemMatcher": "$tsc-watch", 47 | "isBackground": true, 48 | "presentation": { 49 | "reveal": "never", 50 | "group": "watchers" 51 | }, 52 | "group": "build" 53 | }, 54 | { 55 | "label": "tasks: watch-tests", 56 | "dependsOn": [ 57 | "npm: watch", 58 | "npm: watch-tests" 59 | ], 60 | "problemMatcher": [] 61 | } 62 | ] 63 | } 64 | -------------------------------------------------------------------------------- /esbuild.js: -------------------------------------------------------------------------------- 1 | const esbuild = require("esbuild"); 2 | 3 | const isProduction = process.argv.includes('--production'); 4 | const isWatch = process.argv.includes('--watch'); 5 | 6 | /** 7 | * @type {import('esbuild').Plugin} 8 | */ 9 | const esbuildProblemMatcherPlugin = { 10 | name: 'esbuild-problem-matcher', 11 | 12 | setup(build) { 13 | build.onStart(() => { 14 | console.log('[watch] build started'); 15 | }); 16 | build.onEnd((result) => { 17 | result.errors.forEach(({ text, location }) => { 18 | console.error(`✘ [ERROR] ${text}`); 19 | console.error(` ${location.file}:${location.line}:${location.column}:`); 20 | }); 21 | console.log('[watch] build finished'); 22 | }); 23 | }, 24 | }; 25 | 26 | /** @type {import('esbuild').BuildOptions} */ 27 | const buildOptions = { 28 | entryPoints: ['./src/extension.ts'], 29 | bundle: true, 30 | outfile: 'dist/extension.js', 31 | external: [ 32 | 'vscode' 33 | // speaker has been removed as we're using browser audio APIs now 34 | ], 35 | format: 'cjs', 36 | platform: 'node', 37 | target: 'node16', 38 | sourcemap: !isProduction, 39 | minify: isProduction, 40 | sourcesContent: false, 41 | logLevel: 'info', 42 | define: { 43 | 'process.env.NODE_ENV': isProduction ? '"production"' : '"development"' 44 | }, 45 | plugins: [ 46 | esbuildProblemMatcherPlugin, 47 | ], 48 | }; 49 | 50 | if (isWatch) { 51 | // Watch mode 52 | esbuild.context(buildOptions) 53 | .then(ctx => ctx.watch()) 54 | .catch(() => process.exit(1)); 55 | } else { 56 | // Single build 57 | esbuild.build(buildOptions) 58 | .catch(() => process.exit(1)); 59 | } 60 | -------------------------------------------------------------------------------- /src/services/conversation-logger-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import * as fs from 'fs' 3 | import * as path from 'path' 4 | 5 | interface ConversationEntry { 6 | timestamp: number 7 | role: 'user' | 'assistant' 8 | content: string 9 | sessionId: string 10 | } 11 | 12 | export class ConversationLoggerService { 13 | // Make these accessible for debugging 14 | public get logDir(): string { 15 | return this._logDir 16 | } 17 | 18 | public get currentSessionId(): string { 19 | return this._currentSessionId 20 | } 21 | 22 | private _logDir: string 23 | private _currentSessionId: string 24 | 25 | constructor(private context: vscode.ExtensionContext) { 26 | this._currentSessionId = new Date().toISOString().replace(/[:.]/g, '-') 27 | this._logDir = path.join(context.globalStorageUri.fsPath, 'conversations') 28 | if (!fs.existsSync(this._logDir)) fs.mkdirSync(this._logDir, { recursive: true }) 29 | console.log('ConversationLogger: Initialized with directory:', this._logDir) 30 | } 31 | 32 | logEntry({ role, content }: { role: 'user' | 'assistant', content: string }) { 33 | console.log('ConversationLogger: Logging entry:', { role, content }) 34 | console.log('ConversationLogger: Current session ID:', this._currentSessionId) 35 | 36 | const entry: ConversationEntry = { 37 | timestamp: Date.now(), 38 | role, 39 | content, 40 | sessionId: this._currentSessionId 41 | } 42 | 43 | const logFile = path.join(this._logDir, `${this._currentSessionId}.json`) 44 | console.log('ConversationLogger: Writing to log file:', logFile) 45 | 46 | let entries: ConversationEntry[] = [] 47 | if (fs.existsSync(logFile)) { 48 | entries = JSON.parse(fs.readFileSync(logFile, 'utf8')) 49 | console.log('ConversationLogger: Existing entries:', entries.length) 50 | } 51 | 52 | entries.push(entry) 53 | fs.writeFileSync(logFile, JSON.stringify(entries, null, 2)) 54 | console.log('ConversationLogger: Successfully wrote entry') 55 | } 56 | 57 | getLatestSession(): ConversationEntry[] { 58 | const logFile = path.join(this._logDir, `${this._currentSessionId}.json`) 59 | console.log('ConversationLogger: Getting latest session from:', logFile) 60 | console.log('ConversationLogger: Current session ID:', this._currentSessionId) 61 | 62 | if (!fs.existsSync(logFile)) { 63 | console.log('ConversationLogger: No log file found') 64 | return [] 65 | } 66 | 67 | const entries = JSON.parse(fs.readFileSync(logFile, 'utf8')) 68 | console.log('ConversationLogger: Found entries:', entries.length) 69 | return entries 70 | } 71 | } -------------------------------------------------------------------------------- /src/services/workspace-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import * as path from 'path' 3 | 4 | export interface FileTreeNode { 5 | name: string 6 | type: 'file' | 'directory' 7 | path: string 8 | children?: FileTreeNode[] 9 | } 10 | 11 | export class WorkspaceService { 12 | // Add an array of directories to ignore 13 | private readonly ignoredDirectories = [ 14 | 'node_modules', 15 | 'venv', 16 | '.venv', 17 | 'env', 18 | '.env', 19 | 'dist', 20 | 'build', 21 | '.git', 22 | '.github', 23 | '.idea', 24 | '.vscode', 25 | '__pycache__', 26 | 'coverage', 27 | '.next', 28 | '.nuxt', 29 | 'out', 30 | 'target', 31 | 'vendor', 32 | 'tmp', 33 | 'temp', 34 | '.DS_Store' 35 | ] 36 | 37 | async getFileTree(): Promise { 38 | const workspaceRoot = vscode.workspace.workspaceFolders?.[0] 39 | if (!workspaceRoot) return [] 40 | 41 | const pattern = new vscode.RelativePattern(workspaceRoot, '**/*') 42 | const files = await vscode.workspace.findFiles(pattern) 43 | 44 | // Filter out files from ignored directories 45 | return files.filter(file => { 46 | const relativePath = vscode.workspace.asRelativePath(file) 47 | return !this.ignoredDirectories.some(dir => 48 | relativePath.startsWith(dir + '/') || relativePath === dir 49 | ) 50 | }) 51 | } 52 | 53 | formatFileTree(files: vscode.Uri[]): string { 54 | if (!files.length) return 'No files found' 55 | 56 | const workspaceRoot = vscode.workspace.workspaceFolders?.[0] 57 | if (!workspaceRoot) return 'No workspace root found' 58 | 59 | // Create a tree structure 60 | const tree: { [key: string]: any } = {} 61 | 62 | files.forEach(file => { 63 | const relativePath = vscode.workspace.asRelativePath(file) 64 | const parts = relativePath.split('/') 65 | let current = tree 66 | 67 | parts.forEach((part, i) => { 68 | if (i === parts.length - 1) { 69 | current[part] = null // leaf node 70 | } else { 71 | current[part] = current[part] || {} 72 | current = current[part] 73 | } 74 | }) 75 | }) 76 | 77 | // Format the tree as a string 78 | const formatNode = (node: any, prefix = ''): string => { 79 | if (!node) return '' 80 | 81 | return Object.entries(node).map(([name, children]) => { 82 | if (children === null) { 83 | return `${prefix}${name}` 84 | } 85 | return `${prefix}${name}/\n${formatNode(children, prefix + ' ')}` 86 | }).join('\n') 87 | } 88 | 89 | return formatNode(tree) 90 | } 91 | } -------------------------------------------------------------------------------- /src/services/spec-generator-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import * as fs from 'fs' 3 | import * as path from 'path' 4 | import { LLMService } from './llm-service' 5 | import { ConversationLoggerService } from './conversation-logger-service' 6 | 7 | export class SpecGeneratorService { 8 | constructor( 9 | private llmService: LLMService, 10 | private conversationLogger: ConversationLoggerService 11 | ) {} 12 | 13 | async generateSpec(): Promise { 14 | console.log('SpecGenerator: Starting spec generation') 15 | try { 16 | const conversation = this.conversationLogger.getLatestSession() 17 | console.log('SpecGenerator: Got conversation entries:', conversation.length) 18 | console.log('SpecGenerator: Log directory:', this.conversationLogger.logDir) 19 | console.log('SpecGenerator: Current session ID:', this.conversationLogger.currentSessionId) 20 | 21 | if (!conversation.length) { 22 | console.log('SpecGenerator: No conversation found') 23 | throw new Error('No conversation found to generate spec from') 24 | } 25 | 26 | console.log('SpecGenerator: First few conversation entries:', conversation.slice(0, 3)) 27 | 28 | const prompt = ` 29 | Based on the following conversation, create a clear and structured project specification in markdown format. 30 | Include these sections: 31 | - Project Overview 32 | - Requirements 33 | - Technical Architecture 34 | - Implementation Details 35 | - Next Steps 36 | 37 | Format the output as clean markdown with proper headers and bullet points. 38 | 39 | Conversation: 40 | ${conversation.map(entry => `${entry.role}: ${entry.content}`).join('\n')} 41 | ` 42 | 43 | const response = await this.llmService.streamProcessText({ 44 | text: prompt, 45 | prompt: { 46 | id: 'spec-generator', 47 | name: 'Spec Generator', 48 | prompt: 'Generate a project specification' 49 | }, 50 | onToken: () => {} 51 | }) 52 | 53 | if (response.error) throw new Error(response.error) 54 | 55 | const workspaceFolder = vscode.workspace.workspaceFolders?.[0] 56 | if (!workspaceFolder) throw new Error('No workspace folder found') 57 | 58 | const specUri = vscode.Uri.joinPath(workspaceFolder.uri, 'project_spec.md') 59 | 60 | // Use VS Code's filesystem API 61 | await vscode.workspace.fs.writeFile( 62 | specUri, 63 | Buffer.from(response.text, 'utf8') 64 | ) 65 | 66 | // Use "vscode.open" command so it's consistent with other commands 67 | await vscode.commands.executeCommand('vscode.open', specUri) 68 | 69 | return 70 | } catch (error) { 71 | console.error('SpecGenerator: Error during spec generation:', error) 72 | throw error // Re-throw to maintain error handling chain 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /vsc-extension-quickstart.md: -------------------------------------------------------------------------------- 1 | # Welcome to your VS Code Extension 2 | 3 | ## What's in the folder 4 | 5 | * This folder contains all of the files necessary for your extension. 6 | * `package.json` - this is the manifest file in which you declare your extension and command. 7 | * The sample plugin registers a command and defines its title and command name. With this information VS Code can show the command in the command palette. It doesn’t yet need to load the plugin. 8 | * `src/extension.ts` - this is the main file where you will provide the implementation of your command. 9 | * The file exports one function, `activate`, which is called the very first time your extension is activated (in this case by executing the command). Inside the `activate` function we call `registerCommand`. 10 | * We pass the function containing the implementation of the command as the second parameter to `registerCommand`. 11 | 12 | ## Setup 13 | 14 | * install the recommended extensions (amodio.tsl-problem-matcher, ms-vscode.extension-test-runner, and dbaeumer.vscode-eslint) 15 | 16 | 17 | ## Get up and running straight away 18 | 19 | * Press `F5` to open a new window with your extension loaded. 20 | * Run your command from the command palette by pressing (`Ctrl+Shift+P` or `Cmd+Shift+P` on Mac) and typing `Hello World`. 21 | * Set breakpoints in your code inside `src/extension.ts` to debug your extension. 22 | * Find output from your extension in the debug console. 23 | 24 | ## Make changes 25 | 26 | * You can relaunch the extension from the debug toolbar after changing code in `src/extension.ts`. 27 | * You can also reload (`Ctrl+R` or `Cmd+R` on Mac) the VS Code window with your extension to load your changes. 28 | 29 | 30 | ## Explore the API 31 | 32 | * You can open the full set of our API when you open the file `node_modules/@types/vscode/index.d.ts`. 33 | 34 | ## Run tests 35 | 36 | * Install the [Extension Test Runner](https://marketplace.visualstudio.com/items?itemName=ms-vscode.extension-test-runner) 37 | * Run the "watch" task via the **Tasks: Run Task** command. Make sure this is running, or tests might not be discovered. 38 | * Open the Testing view from the activity bar and click the Run Test" button, or use the hotkey `Ctrl/Cmd + ; A` 39 | * See the output of the test result in the Test Results view. 40 | * Make changes to `src/test/extension.test.ts` or create new test files inside the `test` folder. 41 | * The provided test runner will only consider files matching the name pattern `**.test.ts`. 42 | * You can create folders inside the `test` folder to structure your tests any way you want. 43 | 44 | ## Go further 45 | 46 | * Reduce the extension size and improve the startup time by [bundling your extension](https://code.visualstudio.com/api/working-with-extensions/bundling-extension). 47 | * [Publish your extension](https://code.visualstudio.com/api/working-with-extensions/publishing-extension) on the VS Code extension marketplace. 48 | * Automate builds by setting up [Continuous Integration](https://code.visualstudio.com/api/working-with-extensions/continuous-integration). 49 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Vibe Coder 2 | 3 | Thank you for your interest in contributing to Vibe Coder! This guide will help you get started with contributing to the project. 4 | 5 | ## Code of Conduct 6 | 7 | By participating in this project, you agree to abide by our Code of Conduct. Please be respectful and considerate of others when participating in discussions, submitting issues, or contributing code. 8 | 9 | ## Getting Started 10 | 11 | 1. **Fork the repository** on GitHub 12 | 2. **Clone your fork** to your local machine 13 | 3. **Create a new branch** for your feature or bugfix 14 | 4. **Make your changes** following our coding standards 15 | 5. **Test your changes** thoroughly 16 | 6. **Commit your changes** with clear, descriptive commit messages 17 | 7. **Push your branch** to your fork on GitHub 18 | 8. **Submit a pull request** to the main repository 19 | 20 | ## Reporting Issues 21 | 22 | When reporting issues, please include: 23 | 24 | - A clear and descriptive title 25 | - A detailed description of the issue 26 | - Steps to reproduce the problem 27 | - Expected behavior 28 | - Actual behavior 29 | - Screenshots or error logs if applicable 30 | - Your environment details (OS, VS Code version, etc.) 31 | 32 | Use the issue templates provided in the repository when available. 33 | 34 | ## Pull Request Process 35 | 36 | 1. Ensure your code follows the project's coding standards 37 | 2. Update documentation as necessary 38 | 3. Include tests for new features or bug fixes 39 | 4. Link any relevant issues in your pull request description 40 | 5. Wait for a maintainer to review your pull request 41 | 6. Address any feedback from code reviews 42 | 7. Once approved, a maintainer will merge your pull request 43 | 44 | ## Development Setup 45 | 46 | To set up the development environment: 47 | 48 | ```bash 49 | # Clone the repository 50 | git clone https://github.com/yourusername/vibe-coder.git 51 | cd vibe-coder 52 | 53 | # Install dependencies 54 | npm install 55 | 56 | # Compile and watch for changes 57 | npm run watch 58 | ``` 59 | 60 | To test the extension in VS Code: 61 | 1. Press F5 to open a new window with your extension loaded 62 | 2. Run your commands from the command palette by pressing (Ctrl+Shift+P or Cmd+Shift+P on Mac) 63 | 3. Set breakpoints in your code for debugging 64 | 65 | ## Coding Standards 66 | 67 | - Use TypeScript for all code 68 | - Follow the existing code style and formatting 69 | - Write descriptive variable and function names 70 | - Include JSDoc comments for public APIs 71 | - Write unit tests for new functionality 72 | - Ensure your code passes linting (`npm run lint`) 73 | 74 | ## Working with Deepgram API 75 | 76 | When working with the Deepgram API: 77 | - Never commit API keys to the repository 78 | - Use the extension's secret storage for API keys 79 | - Test your changes with your own Deepgram API key 80 | 81 | ## Documentation 82 | 83 | - Update the README.md with details of changes to the interface 84 | - Update any relevant documentation in the `/docs` directory 85 | - Include code examples for new features 86 | 87 | ## Release Process 88 | 89 | The maintainers will handle the release process, which includes: 90 | 1. Updating the version number 91 | 2. Creating release notes 92 | 3. Publishing to the VS Code Marketplace 93 | 94 | ## Questions? 95 | 96 | If you have any questions about contributing, please open an issue with the "question" label or reach out to the maintainers. 97 | 98 | Thank you for contributing to Vibe Coder! -------------------------------------------------------------------------------- /webview-plan.md: -------------------------------------------------------------------------------- 1 | # Webview Audio Playback Plan 2 | 3 | This document outlines a step-by-step plan to remove native speaker references from the extension and implement audio playback using browser APIs in the webview panel. 4 | 5 | ## 1. Remove Native Speaker References 6 | 7 | - **1.1. Identify and Remove SpeakerWrapper Usage** 8 | - Search for all references to `SpeakerWrapper` and any direct interactions with the native speaker module in the codebase (e.g., in `VoiceAgentService`, `AudioPlayer`, etc.). 9 | - Comment out and remove those sections that attempt to load and use the native `speaker.node` module. 10 | 11 | - **1.2. Clean up Imports** 12 | - Remove the import of `SpeakerWrapper` from files such as `voice-agent-service.ts`, `dictation-service.ts`, and others that reference it. 13 | - Update any related type or interface declarations, if necessary. 14 | 15 | ## 2. Set Up Audio Playback in the Webview Panel 16 | 17 | - **2.1. Send Audio Data to the Webview** 18 | - Modify the Voice Agent service so that instead of playing audio using the native speaker, it sends the raw audio data (e.g., PCM data from Deepgram) to the webview using the existing message passing mechanism. 19 | - Define a new message type (e.g., `playAudio`) that includes the audio data (likely in a suitable format such as a base64-encoded string or Blob URL). 20 | 21 | - **2.2. Implement Audio Playback in the Webview** 22 | - In the webview HTML/JavaScript: 23 | - Set up a Web Audio API context. 24 | - Listen for incoming `playAudio` messages. 25 | - When receiving the message, convert the raw audio data to an appropriate format. 26 | - Use the `AudioContext.decodeAudioData` method to decode the PCM data if necessary. (Note: This may require converting the raw PCM into an ArrayBuffer if it isn't already.) 27 | - Create an `AudioBufferSourceNode` and connect it to the audio context destination. 28 | - Start playback of the audio buffer. 29 | 30 | - **2.3. Fallback Option** 31 | - Alternatively, if converting PCM data is complex, consider streaming the audio data into a `Blob` and creating an object URL from it. Then, use an HTML `audio` element to play back the audio. 32 | 33 | ## 3. Update the Communication Between Extension and Webview 34 | 35 | - **3.1. Modify Message Handlers** 36 | - Update the webview's `window.addEventListener('message', ...)` handler to handle the new `playAudio` message. 37 | - Ensure that existing messages (like transcript updates) remain unaffected. 38 | 39 | - **3.2. Test Audio Sending and Reception** 40 | - Add logging in both the extension and webview code to verify that audio data is correctly sent and received. 41 | - Test with small chunks of audio data first. 42 | 43 | ## 4. Testing and Validation 44 | 45 | - **4.1. Unit Test** 46 | - Test the modified VoiceAgentService to ensure that all audio data is forwarded to the webview as expected. 47 | 48 | - **4.2. Webview Testing** 49 | - Verify that the Web Audio API correctly decodes and plays back the audio data without noticeable latency or quality issues. 50 | - Test in multiple environments (macOS, Windows, Linux) to ensure consistent behavior. 51 | 52 | ## 5. Final Clean Up 53 | 54 | - **5.1. Remove Native Module Fallbacks** 55 | - Once the browser-based playback works reliably, remove the dummy speaker or any fallback code referencing the native `speaker` module. 56 | 57 | - **5.2. Update Documentation** 58 | - Update the README or any developer documentation to reflect the changes in audio playback implementation. 59 | - Ensure that future development follows the webview-based playback approach for audio output. 60 | 61 | ## Additional Considerations 62 | 63 | - **Audio Data Format**: Decide early on the format in which audio data will be sent from the extension (e.g., base64, ArrayBuffer) to minimize conversion work in the webview. 64 | - **Latency**: Test for potential latency issues and optimize the decoding and playback process as needed. 65 | - **Fallback UI**: Consider adding UI indicators in the webview in case audio playback fails. 66 | 67 | This plan should guide the removal of native speaker dependencies and transition to a robust browser-based audio playback solution in the webview panel. 68 | -------------------------------------------------------------------------------- /MICROPHONE_TESTING.md: -------------------------------------------------------------------------------- 1 | # Microphone Testing Guide 2 | 3 | This guide provides instructions for testing the microphone functionality across different operating systems. 4 | 5 | ## Prerequisites 6 | 7 | Before testing, ensure you have the required command-line tools installed: 8 | 9 | - **macOS**: SoX (`rec` command) 10 | ``` 11 | brew install sox 12 | ``` 13 | 14 | - **Windows**: SoX 15 | Download from [SourceForge](https://sourceforge.net/projects/sox/) 16 | 17 | - **Linux**: ALSA tools 18 | ``` 19 | sudo apt-get install alsa-utils 20 | ``` 21 | 22 | ## Testing Steps 23 | 24 | ### 1. Basic Functionality Test 25 | 26 | 1. Open VS Code with the Vibe-Coder extension installed 27 | 2. Run the command: `Vibe-Coder: List Available Microphone Devices` 28 | - This should display available microphone devices in the output panel 29 | - If you see an error, check that the required command-line tool is installed 30 | 31 | 3. Start dictation or voice agent mode 32 | - If the microphone works, you should see audio data being processed 33 | - Check the Output panel (select "Vibe-Coder" from the dropdown) for logs 34 | 35 | ### 2. Device Configuration Test 36 | 37 | 1. Run the command: `Vibe-Coder: List Available Microphone Devices` 38 | 2. Note a specific device ID from the list 39 | 3. Open VS Code settings (File > Preferences > Settings) 40 | 4. Search for "vibeCoder.microphone" 41 | 5. Set the appropriate device setting for your platform: 42 | - macOS: `vibeCoder.microphone.deviceMacOS` 43 | - Windows: `vibeCoder.microphone.deviceWindows` 44 | - Linux: `vibeCoder.microphone.deviceLinux` 45 | 6. Start dictation or voice agent mode again 46 | 7. Verify in the logs that the specified device is being used 47 | 48 | ## OS-Specific Testing Notes 49 | 50 | ### macOS 51 | 52 | - The `rec` command should be available after installing SoX 53 | - If you installed SoX but `rec` is not found, try running: 54 | ``` 55 | brew link --force sox 56 | ``` 57 | - Common device names: "default", specific device names from `system_profiler SPAudioDataType` 58 | 59 | ### Windows 60 | 61 | - After installing SoX, ensure it's in your PATH 62 | - You may need to restart VS Code after installation 63 | - Common device names: "default", numeric indices (0, 1, 2) 64 | 65 | ### Linux 66 | 67 | - The `arecord` command should be available after installing ALSA tools 68 | - Common device formats: 69 | - `plughw:0,0` (first card, first device) 70 | - `plughw:1,0` (second card, first device) 71 | - `default` 72 | - Device names from `arecord -L` output 73 | 74 | ## Troubleshooting 75 | 76 | ### Command Not Found 77 | 78 | If you see "Command not found" errors: 79 | 80 | 1. Verify the tool is installed using terminal: 81 | - macOS: `which rec` 82 | - Windows: `where sox` 83 | - Linux: `which arecord` 84 | 2. If installed but not found, check your PATH environment variable 85 | 3. For macOS, try `brew link --force sox` 86 | 87 | ### Device Errors 88 | 89 | If you see device-related errors: 90 | 91 | 1. List available devices using `Vibe-Coder: List Available Microphone Devices` 92 | 2. Try using "default" as the device name 93 | 3. Check system permissions for microphone access 94 | 4. Try different device names/IDs from the list 95 | 96 | ### Audio Format Errors 97 | 98 | If you encounter audio format errors: 99 | 100 | 1. Check the logs for specific error messages 101 | 2. Try modifying the audio format settings in code if necessary 102 | 3. Ensure your microphone supports the requested format 103 | 104 | ## Reporting Issues 105 | 106 | When reporting issues, please include: 107 | 108 | 1. Your operating system version 109 | 2. The command-line tool version: 110 | - macOS/Windows: `sox --version` 111 | - Linux: `arecord --version` 112 | 3. The exact error message from the Output panel 113 | 4. Steps to reproduce the issue 114 | 5. Any custom configuration you've applied 115 | 116 | ## Advanced Testing 117 | 118 | For developers wanting to test changes to the microphone wrapper: 119 | 120 | 1. Enable verbose logging by adding `console.log` statements 121 | 2. Test with different audio formats by modifying the options 122 | 3. Test error handling by intentionally using invalid device names 123 | 4. Test with different microphone hardware if available -------------------------------------------------------------------------------- /src/services/deepgram-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import { 3 | createClient, 4 | LiveTranscriptionEvents, 5 | ListenLiveClient 6 | } from '@deepgram/sdk' 7 | import Microphone from 'node-microphone' 8 | import WebSocket from 'ws' 9 | import { EventEmitter } from 'events' 10 | import { DictationService } from './dictation-service' 11 | 12 | /** 13 | * If you have extension-specific config, define it here. 14 | */ 15 | export interface DeepgramConfig { 16 | apiKey: string 17 | } 18 | 19 | interface DictationState { 20 | isActive: boolean 21 | mic: Microphone | null 22 | wsConnection: ListenLiveClient | null 23 | statusBarItem: vscode.StatusBarItem 24 | } 25 | 26 | export class DeepgramService { 27 | private client!: ReturnType 28 | private isInitialized = false 29 | private dictationService: DictationService | null = null 30 | 31 | constructor(private context: vscode.ExtensionContext) { 32 | console.log('DeepgramService constructor') 33 | } 34 | 35 | async initialize(): Promise { 36 | console.log('DeepgramService initializing...') 37 | const apiKey = await this.context.secrets.get('deepgram.apiKey') 38 | 39 | try { 40 | // Initialize with empty key if not available, but mark as not fully initialized 41 | this.client = createClient(apiKey || 'dummy-key-for-initialization') 42 | this.dictationService = new DictationService(this.client, this.context) 43 | 44 | // Only mark as fully initialized if we have an API key 45 | this.isInitialized = !!apiKey 46 | console.log('DeepgramService initialized successfully, API key available:', !!apiKey) 47 | } catch (error) { 48 | console.warn('Failed to initialize Deepgram client, will prompt for key when needed:', error) 49 | // Create a placeholder client that will be replaced when a key is provided 50 | this.dictationService = new DictationService(null as any, this.context) 51 | this.isInitialized = false 52 | } 53 | } 54 | 55 | /** 56 | * Update the API key and reinitialize the client 57 | */ 58 | updateApiKey(apiKey: string): void { 59 | this.client = createClient(apiKey) 60 | if (this.dictationService) { 61 | this.dictationService.updateClient(this.client) 62 | } else { 63 | this.dictationService = new DictationService(this.client, this.context) 64 | } 65 | this.isInitialized = true 66 | } 67 | 68 | async startAgent(): Promise { 69 | vscode.window.showInformationMessage('Agent mode coming soon!') 70 | } 71 | 72 | async startDictation(): Promise { 73 | if (!this.dictationService) 74 | throw new Error('Dictation service not initialized') 75 | 76 | // Check for API key and prompt if needed 77 | const apiKey = await this.context.secrets.get('deepgram.apiKey') 78 | if (!apiKey) { 79 | // Show a message with a button to open the command 80 | const action = await vscode.window.showErrorMessage( 81 | 'Deepgram API key is required for dictation', 82 | 'Configure API Key' 83 | ) 84 | 85 | if (action === 'Configure API Key') { 86 | await vscode.commands.executeCommand('vibe-coder.configureDeepgramApiKey') 87 | } 88 | 89 | throw new Error('Deepgram API key is required') 90 | } else if (!this.isInitialized) { 91 | // If we have a key but aren't initialized, update the key 92 | this.updateApiKey(apiKey) 93 | } 94 | 95 | await this.dictationService.startDictation() 96 | } 97 | 98 | async stopDictation(): Promise { 99 | if (!this.dictationService) 100 | throw new Error('Dictation service not initialized') 101 | 102 | await this.dictationService.stopDictation() 103 | } 104 | 105 | dispose(): void { 106 | this.dictationService?.stopDictation() 107 | } 108 | 109 | /** 110 | * Provide a callback that receives (text, isFinal). 111 | */ 112 | onTranscript(callback: (text: string, isFinal: boolean) => void) { 113 | console.log('Setting up transcript listener') 114 | if (!this.dictationService) { 115 | console.warn('Dictation service not fully initialized in onTranscript, creating empty listener') 116 | // Return a no-op function that can be called later when dictation is properly initialized 117 | return () => { 118 | console.log('Transcript listener called but dictation service not initialized') 119 | } 120 | } 121 | return this.dictationService.onTranscript(callback) 122 | } 123 | } -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "vibe-coder", 3 | "displayName": "vibe-coder", 4 | "description": "Voice-first coding assistant powered by Deepgram", 5 | "version": "0.0.1", 6 | "engines": { 7 | "vscode": "^1.92.0", 8 | "node": ">=16.0.0" 9 | }, 10 | "type": "commonjs", 11 | "publisher": "Deepgram", 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/deepgram/vibe_coder" 15 | }, 16 | "categories": [ 17 | "Other" 18 | ], 19 | "activationEvents": [ 20 | "*" 21 | ], 22 | "extensionDependencies": [], 23 | "extensionPack": [], 24 | "extensionKind": [ 25 | "workspace" 26 | ], 27 | "capabilities": { 28 | "virtualWorkspaces": true, 29 | "untrustedWorkspaces": { 30 | "supported": true 31 | } 32 | }, 33 | "main": "./dist/extension.js", 34 | "contributes": { 35 | "commands": [ 36 | { 37 | "command": "vibe-coder.test", 38 | "title": "Vibe Coder: Test Command" 39 | }, 40 | { 41 | "command": "vibe-coder.startAgent", 42 | "title": "Vibe Coder: Start Voice Agent" 43 | }, 44 | { 45 | "command": "vibe-coder.startDictation", 46 | "title": "Vibe Coder: Toggle Dictation" 47 | }, 48 | { 49 | "command": "vibe-coder.openPanel", 50 | "title": "Vibe Coder: Open Panel" 51 | }, 52 | { 53 | "command": "vibe-coder.managePrompts", 54 | "title": "Vibe Coder: Manage Dictation Prompts" 55 | }, 56 | { 57 | "command": "vibe-coder.configureDeepgramApiKey", 58 | "title": "Vibe Coder: Configure Deepgram API Key" 59 | }, 60 | { 61 | "command": "vibeCoder.listMicrophoneDevices", 62 | "title": "Vibe-Coder: List Available Microphone Devices" 63 | }, 64 | { 65 | "command": "vibeCoder.testMicrophone", 66 | "title": "Vibe-Coder: Test Microphone" 67 | }, 68 | { 69 | "command": "vibe-coder.clearPromptState", 70 | "title": "Vibe Coder: Clear Prompt State" 71 | } 72 | ], 73 | "keybindings": [ 74 | { 75 | "command": "vibe-coder.startAgent", 76 | "key": "ctrl+shift+a", 77 | "mac": "cmd+shift+a" 78 | }, 79 | { 80 | "command": "vibe-coder.startDictation", 81 | "key": "ctrl+shift+d", 82 | "mac": "cmd+shift+d", 83 | "when": "!inDebugRepl" 84 | }, 85 | { 86 | "command": "vibe-coder.openPanel", 87 | "key": "ctrl+shift+v", 88 | "mac": "cmd+shift+v" 89 | }, 90 | { 91 | "command": "vibe-coder.startPTT", 92 | "key": "alt", 93 | "mac": "alt" 94 | }, 95 | { 96 | "command": "vibe-coder.endPTT", 97 | "key": "alt", 98 | "mac": "alt", 99 | "when": "!altKey" 100 | } 101 | ], 102 | "configuration": { 103 | "title": "Vibe-Coder", 104 | "properties": { 105 | "vibeCoder.microphone.deviceMacOS": { 106 | "type": "string", 107 | "default": "", 108 | "description": "Microphone device to use on macOS (leave empty for default)" 109 | }, 110 | "vibeCoder.microphone.deviceWindows": { 111 | "type": "string", 112 | "default": "", 113 | "description": "Microphone device to use on Windows (leave empty for default)" 114 | }, 115 | "vibeCoder.microphone.deviceLinux": { 116 | "type": "string", 117 | "default": "", 118 | "description": "Microphone device to use on Linux (leave empty for default, or specify like 'plughw:1,0')" 119 | } 120 | } 121 | } 122 | }, 123 | "scripts": { 124 | "vscode:prepublish": "npm run package", 125 | "compile": "npm run check-types && npm run lint && node esbuild.js", 126 | "watch": "npm-run-all -p watch:*", 127 | "watch:esbuild": "node esbuild.js --watch", 128 | "watch:tsc": "tsc --noEmit --watch --project tsconfig.json", 129 | "package": "npm run check-types && npm run lint && node esbuild.js --production", 130 | "check-types": "tsc --noEmit", 131 | "lint": "eslint src --ext ts", 132 | "create-prebuild": "node ./scripts/create-prebuild.js" 133 | }, 134 | "devDependencies": { 135 | "@types/node": "20.x", 136 | "@types/readable-stream": "^4.0.18", 137 | "@types/vscode": "^1.92.0", 138 | "@types/ws": "^8.5.10", 139 | "@typescript-eslint/eslint-plugin": "^7.14.1", 140 | "@typescript-eslint/parser": "^7.11.0", 141 | "esbuild": "^0.21.5", 142 | "eslint": "^8.57.0", 143 | "node-fetch": "^2.7.0", 144 | "npm-run-all": "^4.1.5", 145 | "typescript": "^5.4.5" 146 | }, 147 | "dependencies": { 148 | "@deepgram/sdk": "^3.0.0", 149 | "code": "^5.2.4", 150 | "speex-resampler": "^3.0.1", 151 | "ws": "^8.16.0", 152 | "zod": "^3.22.4" 153 | }, 154 | "license": "MIT", 155 | "files": [ 156 | "out/**/*", 157 | "prebuilds/**/*", 158 | "README.md" 159 | ], 160 | "icon": "vibe-coder-logo.png" 161 | } 162 | -------------------------------------------------------------------------------- /prebuilds/darwin-arm64/node-microphone-original.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | const isMac = require('os').type() == 'Darwin'; 3 | const isWin = require('os').type().indexOf('Windows') > -1; 4 | const spawn = require('child_process').spawn; 5 | const EventEmitter = require('events'); 6 | 7 | class Microphone extends EventEmitter { 8 | constructor(options) { 9 | super(); 10 | this.ps = null; 11 | 12 | options = options || {}; 13 | this.endian = options.endian || 'little'; 14 | this.bitwidth = options.bitwidth || '16'; 15 | this.encoding = options.encoding || 'signed-integer'; 16 | this.rate = options.rate || '16000'; 17 | this.channels = options.channels || '1'; 18 | this.additionalParameters = options.additionalParameters || false; 19 | this.useDataEmitter = !!options.useDataEmitter; 20 | if (isWin) { 21 | this.device = options.device || 'default'; 22 | } 23 | if (!isWin && !isMac) { 24 | this.device = options.device || 'plughw:1,0'; 25 | this.format = undefined; 26 | this.formatEndian = undefined; 27 | this.formatEncoding = undefined; 28 | 29 | if (this.encoding === 'unsigned-integer') { 30 | this.formatEncoding = 'U'; 31 | } else { 32 | this.formatEncoding = 'S'; 33 | } 34 | if (this.endian === 'big') { 35 | this.formatEndian = 'BE'; 36 | } else { 37 | this.formatEndian = 'LE'; 38 | } 39 | this.format = 40 | this.formatEncoding + this.bitwidth + '_' + this.formatEndian; 41 | } 42 | } 43 | 44 | // end on silence - default threshold 0.5 45 | //'silence', '1', '0.1', options.threshold + '%', 46 | //'1', '1.0', options.threshold + '%' 47 | 48 | startRecording() { 49 | let audioOptions; 50 | if (this.ps === null) { 51 | if (isWin) { 52 | audioOptions = [ 53 | '-b', 54 | this.bitwidth, 55 | '--endian', 56 | this.endian, 57 | '-c', 58 | this.channels, 59 | '-r', 60 | this.rate, 61 | '-e', 62 | this.encoding, 63 | '-t', 64 | 'waveaudio', 65 | this.device, 66 | '-p', 67 | ]; 68 | if (this.additionalParameters) { 69 | audioOptions = audioOptions.concat( 70 | this.additionalParameters 71 | ); 72 | } 73 | this.ps = spawn('sox', audioOptions); 74 | } else if (isMac) { 75 | audioOptions = [ 76 | '-q', 77 | '-b', 78 | this.bitwidth, 79 | '-c', 80 | this.channels, 81 | '-r', 82 | this.rate, 83 | '-e', 84 | this.encoding, 85 | '-t', 86 | 'wav', 87 | '-', 88 | ]; 89 | if (this.additionalParameters) { 90 | audioOptions = audioOptions.concat( 91 | this.additionalParameters 92 | ); 93 | } 94 | this.ps = spawn('rec', audioOptions); 95 | } else { 96 | audioOptions = [ 97 | '-c', 98 | this.channels, 99 | '-r', 100 | this.rate, 101 | '-f', 102 | this.format, 103 | '-D', 104 | this.device, 105 | ]; 106 | if (this.additionalParameters) { 107 | audioOptions = audioOptions.concat( 108 | this.additionalParameters 109 | ); 110 | } 111 | this.ps = spawn('arecord', audioOptions); 112 | } 113 | this.ps.on('error', (error) => { 114 | this.emit('error', error); 115 | }); 116 | this.ps.stderr.on('error', (error) => { 117 | this.emit('error', error); 118 | }); 119 | this.ps.stderr.on('data', (info) => { 120 | this.emit('info', info); 121 | }); 122 | if (this.useDataEmitter) { 123 | this.ps.stdout.on('data', (data) => { 124 | this.emit('data', data); 125 | }); 126 | } 127 | return this.ps.stdout; 128 | } 129 | } 130 | 131 | stopRecording() { 132 | if (this.ps) { 133 | this.ps.kill(); 134 | this.ps = null; 135 | } 136 | } 137 | } 138 | 139 | module.exports = Microphone; 140 | -------------------------------------------------------------------------------- /src/services/web-audio-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode'; 2 | import { AudioWebViewProvider } from '../webview/audio-webview-provider'; 3 | 4 | export interface MessageHandler { 5 | postMessage(message: unknown): Thenable | undefined; 6 | ensureVisible(): Promise; 7 | } 8 | 9 | export class WebAudioService { 10 | private messageHandler: MessageHandler | undefined; 11 | private audioProvider: AudioWebViewProvider | undefined; 12 | private eventEmitter = new vscode.EventEmitter(); 13 | 14 | constructor(audioProvider?: AudioWebViewProvider) { 15 | console.log('WebAudioService: Initializing'); 16 | 17 | if (audioProvider) { 18 | this.audioProvider = audioProvider; 19 | 20 | // Listen for messages from the audio interface 21 | this.audioProvider.onMessage(message => { 22 | this.handleMessage(message); 23 | }); 24 | } 25 | } 26 | 27 | // Set the message handler that will receive events 28 | public setMessageHandler(handler: MessageHandler) { 29 | console.log('WebAudioService: Setting message handler'); 30 | this.messageHandler = handler; 31 | } 32 | 33 | // Handle messages from the audio interface 34 | public handleMessage(message: any) { 35 | console.log('WebAudioService: Received message:', message.type); 36 | 37 | // Forward the message to the handler if set 38 | if (this.messageHandler) { 39 | this.messageHandler.postMessage(message); 40 | } 41 | 42 | // Also emit the message for direct listeners 43 | if (message.type === 'audioData') { 44 | this.eventEmitter.fire({ type: 'audioData', data: message.data }); 45 | } 46 | } 47 | 48 | /** 49 | * Register a callback to receive audio data 50 | * @param callback Function to call when audio data is received 51 | * @returns A function to remove the listener 52 | */ 53 | public onAudioData(callback: (data: string) => void): () => void { 54 | console.log('WebAudioService: Registering onAudioData listener'); 55 | 56 | const listener = (event: any) => { 57 | if (event.type === 'audioData') { 58 | callback(event.data); 59 | } 60 | }; 61 | 62 | const subscription = this.eventEmitter.event(listener); 63 | 64 | return () => { 65 | console.log('WebAudioService: Removing onAudioData listener'); 66 | subscription.dispose(); 67 | }; 68 | } 69 | 70 | // Start recording audio 71 | public async startRecording(): Promise { 72 | console.log('WebAudioService: Starting recording'); 73 | 74 | try { 75 | // Check if we have an audio provider 76 | if (!this.audioProvider) { 77 | throw new Error('No audio provider available'); 78 | } 79 | 80 | // Ensure the WebView is visible before requesting microphone access 81 | await this.audioProvider.ensureWebViewIsVisible(); 82 | 83 | // Start recording 84 | this.audioProvider.startRecording(); 85 | } catch (error) { 86 | console.error('WebAudioService: Error starting recording:', error); 87 | throw new Error(`Failed to start recording: ${(error as Error).message}`); 88 | } 89 | } 90 | 91 | // Stop recording audio 92 | public stopRecording(): Promise { 93 | console.log('WebAudioService: Stopping recording'); 94 | 95 | if (!this.audioProvider) { 96 | console.log('WebAudioService: No audio provider available'); 97 | return Promise.resolve(); 98 | } 99 | 100 | this.audioProvider.stopRecording(); 101 | return Promise.resolve(); 102 | } 103 | 104 | // Play audio from base64 data 105 | public playAudio(data: string, format: string = 'audio/wav'): void { 106 | console.log('WebAudioService: Playing audio'); 107 | this.audioProvider?.playAudio(data, format); 108 | } 109 | 110 | /** 111 | * Check microphone permission status 112 | * @returns Promise<'granted' | 'denied' | 'prompt' | 'error'> 113 | */ 114 | public async checkMicrophonePermission(): Promise<'granted' | 'denied' | 'prompt' | 'error'> { 115 | console.log('WebAudioService: Checking microphone permission'); 116 | 117 | try { 118 | // If no audio provider, return error 119 | if (!this.audioProvider) { 120 | console.error('WebAudioService: No audio provider available for permission check'); 121 | return 'error'; 122 | } 123 | 124 | // Ensure the WebView is visible to check permissions 125 | await this.audioProvider.ensureWebViewIsVisible(); 126 | 127 | // Create a promise that will resolve with the permission status 128 | return new Promise((resolve) => { 129 | // Set up a one-time listener for the permission status 130 | const disposable = this.audioProvider!.onMessage((message) => { 131 | if (message.type === 'permissionStatus') { 132 | console.log('WebAudioService: Received permission status:', message.status); 133 | disposable.dispose(); 134 | resolve(message.status); 135 | } 136 | }); 137 | 138 | // Send a message to the WebView to check permissions 139 | this.audioProvider!.startRecording(); 140 | 141 | // Set a timeout in case we don't get a response 142 | setTimeout(() => { 143 | disposable.dispose(); 144 | console.log('WebAudioService: Permission check timed out'); 145 | resolve('error'); 146 | }, 5000); 147 | }); 148 | } catch (error) { 149 | console.error('WebAudioService: Error checking microphone permission:', error); 150 | return 'error'; 151 | } 152 | } 153 | } -------------------------------------------------------------------------------- /src/services/dictation-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import { EventEmitter } from 'events' 3 | import { createClient, LiveTranscriptionEvents, ListenLiveClient } from '@deepgram/sdk' 4 | import { MicrophoneWrapper } from '../utils/native-module-wrapper' 5 | 6 | interface DictationState { 7 | isActive: boolean 8 | mic: MicrophoneWrapper | null 9 | wsConnection: ListenLiveClient | null 10 | statusBarItem: vscode.StatusBarItem 11 | } 12 | 13 | export class DictationService { 14 | private state: DictationState 15 | private eventEmitter = new EventEmitter() 16 | 17 | constructor( 18 | private deepgramClient: ReturnType | null, 19 | private context: vscode.ExtensionContext 20 | ) { 21 | this.state = { 22 | isActive: false, 23 | mic: null, 24 | wsConnection: null, 25 | statusBarItem: vscode.window.createStatusBarItem(vscode.StatusBarAlignment.Right) 26 | } 27 | this.state.statusBarItem.text = '$(unmute) Dictation: Off' 28 | this.state.statusBarItem.show() 29 | } 30 | 31 | async startDictation(): Promise { 32 | console.log('DictationService.startDictation called') 33 | if (this.state.isActive) { 34 | console.log('Dictation already active, stopping first...') 35 | await this.stopDictation() 36 | console.log('Previous dictation stopped') 37 | } 38 | 39 | if (!this.deepgramClient) { 40 | throw new Error('Deepgram client not initialized. Please provide an API key.') 41 | } 42 | 43 | try { 44 | console.log('Creating microphone wrapper...') 45 | const mic = new MicrophoneWrapper() 46 | console.log('Microphone wrapper instance created') 47 | 48 | const audioStream = mic.startRecording() 49 | console.log('Microphone recording started') 50 | 51 | console.log('Creating Deepgram connection...') 52 | const connection = this.deepgramClient.listen.live({ 53 | model: 'nova-2', 54 | smart_format: true, 55 | punctuate: true, 56 | interim_results: true, 57 | encoding: 'linear16', 58 | sample_rate: 16000 59 | }) 60 | console.log('Deepgram connection created') 61 | 62 | connection.on(LiveTranscriptionEvents.Open, () => { 63 | console.log('Deepgram connection opened') 64 | this.state.isActive = true 65 | this.state.statusBarItem.text = '$(megaphone) Dictation: On' 66 | }) 67 | 68 | // Store references before setting up other handlers 69 | this.state.mic = mic 70 | this.state.wsConnection = connection 71 | console.log('References stored') 72 | 73 | connection.on(LiveTranscriptionEvents.Error, (error) => { 74 | console.error('Deepgram connection error:', error) 75 | console.error('Connection state:', { 76 | isConnected: connection?.isConnected(), 77 | error: error 78 | }) 79 | }) 80 | 81 | audioStream.on('error', (error: any) => { 82 | console.error('Microphone stream error:', error) 83 | console.error('Microphone state:', { 84 | isActive: this.state.isActive, 85 | hasStream: !!audioStream, 86 | error: error 87 | }) 88 | 89 | // Show error message to user if it's a command not found error 90 | if (error.message?.includes('command') && error.message?.includes('not')) { 91 | vscode.window.showErrorMessage(error.message) 92 | } 93 | }) 94 | 95 | audioStream.on('data', (chunk: Buffer) => { 96 | if (connection?.isConnected()) { 97 | connection.send(chunk) 98 | } else { 99 | console.log('Connection not ready, chunk dropped') 100 | } 101 | }) 102 | 103 | // Handle transcripts with isFinal flag 104 | connection.on(LiveTranscriptionEvents.Transcript, (data: any) => { 105 | const transcript = data?.channel?.alternatives?.[0]?.transcript || '' 106 | const isFinal = data?.is_final || false 107 | 108 | if (transcript) { 109 | console.log('Processing transcript:', transcript, 'isFinal:', isFinal) 110 | this.eventEmitter.emit('transcript', transcript, isFinal) 111 | } 112 | }) 113 | 114 | console.log('All handlers set up successfully') 115 | } catch (error) { 116 | console.error('Error in startDictation:', error) 117 | // Show error message to user 118 | if (error instanceof Error) { 119 | vscode.window.showErrorMessage(`Failed to start dictation: ${error.message}`) 120 | } 121 | throw error 122 | } 123 | } 124 | 125 | async stopDictation(): Promise { 126 | console.log('stopDictation called') 127 | 128 | if (this.state.mic) { 129 | this.state.mic.stopRecording() 130 | this.state.mic = null 131 | } 132 | 133 | if (this.state.wsConnection) { 134 | this.state.wsConnection.disconnect() 135 | this.state.wsConnection = null 136 | } 137 | 138 | this.state.isActive = false 139 | this.state.statusBarItem.text = '$(unmute) Dictation: Off' 140 | } 141 | 142 | onTranscript(callback: (text: string, isFinal: boolean) => void) { 143 | if (!this.deepgramClient) { 144 | console.warn('Cannot set up transcript listener: Deepgram client not initialized') 145 | // Return a no-op function 146 | return () => {} 147 | } 148 | 149 | this.eventEmitter.on('transcript', callback) 150 | return () => { 151 | this.eventEmitter.removeListener('transcript', callback) 152 | } 153 | } 154 | 155 | /** 156 | * Update the Deepgram client instance 157 | */ 158 | updateClient(client: ReturnType): void { 159 | this.deepgramClient = client 160 | } 161 | } -------------------------------------------------------------------------------- /src/services/prompt-management-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | 3 | export interface DictationPrompt { 4 | id: string 5 | name: string 6 | prompt: string 7 | description?: string 8 | contextRules?: { 9 | fileTypes?: string[] 10 | languages?: string[] 11 | } 12 | } 13 | 14 | export class PromptManagementService { 15 | private prompts: DictationPrompt[] = [] 16 | private readonly storageKey = 'dictation.prompts' 17 | private readonly currentPromptKey = 'dictation.currentPrompt' 18 | private readonly DEFAULT_PROMPT: DictationPrompt = { 19 | id: 'default', 20 | name: 'Basic Prompt', 21 | prompt: `You are an AI assistant taking dictation from a user. Your job is to correct grammar, punctuation, and spelling, and return the corrected dictation. Do not add additional context. Questions from the user are not directed towards you, so do not answer them. Return the corrected dictation only. 22 | ` 23 | } 24 | 25 | private currentPromptId: string = 'default' 26 | private onPromptsChanged?: () => void 27 | 28 | constructor(private context: vscode.ExtensionContext) { 29 | this.loadPrompts() 30 | this.currentPromptId = this.context.globalState.get(this.currentPromptKey, 'default') 31 | } 32 | 33 | private async loadPrompts() { 34 | const savedPrompts = await this.context.globalState.get(this.storageKey) 35 | if (savedPrompts) this.prompts = savedPrompts 36 | else this.initializeDefaultPrompts() 37 | } 38 | 39 | private initializeDefaultPrompts() { 40 | this.prompts = [ 41 | { 42 | id: 'detailed Prompt', 43 | name: 'Detailed Prompt', 44 | description: 'Takes a basic description of what the user wants to do and provides a detailed prompt that will help AI assistants understand the user\'s intent and write the code to accomplish the task.', 45 | prompt: `You are an expert prompt engineer, helping developers create clear, detailed prompts for AI coding assistants. 46 | 47 | When you receive dictated text from a developer, your job is to: 48 | 49 | 1. Understand the core intent of their request 50 | 2. Transform it into a structured, detailed prompt that: 51 | - Breaks down complex requirements into clear steps 52 | - Adds necessary technical context and constraints 53 | - Specifies expected inputs, outputs, and error cases 54 | - Includes relevant best practices and patterns 55 | - Maintains language-specific idioms (TypeScript, React, etc.) 56 | 57 | 3. Format the prompt in a clear, hierarchical structure 58 | 59 | Example: 60 | User: "make a hook that fetches user data and handles loading and error states" 61 | 62 | Your response: 63 | "Create a custom React hook 'useUserData' that: 64 | - Accepts a userId parameter 65 | - Uses React Query for data fetching 66 | - Implements proper TypeScript types for all states 67 | - Handles loading, error, and success states 68 | - Includes retry logic for failed requests 69 | - Returns a strongly-typed result object 70 | - Follows React hooks best practices 71 | - Includes proper cleanup on unmount 72 | 73 | The hook should provide: 74 | - Loading state indicator 75 | - Error handling with user-friendly messages 76 | - Cached data management 77 | - Automatic background refetching 78 | - Type-safe access to user data" 79 | 80 | Focus on being specific and technical, while keeping the prompt clear and actionable. 81 | 82 | You are not having a conversation with the user, you are taking the user's request and turning it into a prompt for an LLM. 83 | 84 | Do not return anything other than the prompt itself. 85 | ` 86 | } 87 | ] 88 | this.savePrompts() 89 | } 90 | 91 | private async savePrompts() { 92 | await this.context.globalState.update(this.storageKey, this.prompts) 93 | } 94 | 95 | getDefaultPrompt(): DictationPrompt { 96 | return this.DEFAULT_PROMPT 97 | } 98 | 99 | getPromptById(id: string): DictationPrompt | undefined { 100 | if (id === 'default') return this.DEFAULT_PROMPT 101 | return this.prompts.find(p => p.id === id) 102 | } 103 | 104 | getAllPrompts(): DictationPrompt[] { 105 | return [...this.prompts] 106 | } 107 | 108 | getCurrentPrompt(): DictationPrompt { 109 | const currentPrompt = this.prompts.find(p => p.id === this.currentPromptId) 110 | return currentPrompt || this.DEFAULT_PROMPT 111 | } 112 | 113 | setOnPromptsChanged(callback: () => void) { 114 | this.onPromptsChanged = callback 115 | } 116 | 117 | async addPrompt(name: string, prompt: string): Promise { 118 | const id = Date.now().toString() 119 | this.prompts.push({ id, name, prompt }) 120 | await this.savePrompts() 121 | this.onPromptsChanged?.() 122 | } 123 | 124 | async updatePrompt(id: string, updates: Partial): Promise { 125 | const index = this.prompts.findIndex(p => p.id === id) 126 | if (index === -1) throw new Error('Prompt not found') 127 | this.prompts[index] = { ...this.prompts[index], ...updates } 128 | await this.savePrompts() 129 | this.onPromptsChanged?.() 130 | } 131 | 132 | async deletePrompt(id: string): Promise { 133 | if (id === 'default') return 134 | this.prompts = this.prompts.filter(p => p.id !== id) 135 | if (this.currentPromptId === id) { 136 | this.currentPromptId = 'default' 137 | } 138 | await this.savePrompts() 139 | this.onPromptsChanged?.() 140 | } 141 | 142 | async setCurrentPrompt(id: string): Promise { 143 | this.currentPromptId = id 144 | await this.context.globalState.update(this.currentPromptKey, id) 145 | const prompt = this.getCurrentPrompt() 146 | vscode.window.showInformationMessage(`Active prompt set to: ${prompt.name}`) 147 | this.onPromptsChanged?.() 148 | } 149 | } -------------------------------------------------------------------------------- /.github/workflows/build-natives.yml: -------------------------------------------------------------------------------- 1 | name: Build Native Modules 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | tags: 7 | - 'v*' 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | os: [ubuntu-latest, macos-latest, windows-latest] 16 | node: [16] 17 | include: 18 | # Add specific configurations for macOS ARM64 19 | - os: macos-latest 20 | node: 16 21 | architecture: arm64 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | 26 | - name: Setup Node.js 27 | uses: actions/setup-node@v3 28 | with: 29 | node-version: ${{ matrix.node }} 30 | architecture: ${{ matrix.architecture || 'x64' }} 31 | 32 | # Platform-specific setup steps 33 | - name: Install Linux dependencies 34 | if: runner.os == 'Linux' 35 | run: | 36 | sudo apt-get update 37 | sudo apt-get install -y libasound2-dev 38 | 39 | - name: Install macOS dependencies 40 | if: runner.os == 'macOS' 41 | run: | 42 | brew install portaudio 43 | 44 | - name: Install Windows dependencies 45 | if: runner.os == 'Windows' 46 | run: | 47 | npm install --global --production windows-build-tools 48 | 49 | # Install dependencies and build native modules 50 | - name: Install Dependencies 51 | run: npm install 52 | 53 | # Create prebuilds directory structure 54 | - name: Prepare Prebuilds Directory 55 | run: | 56 | mkdir -p prebuilds/${{ runner.os == 'Windows' && 'win32' || runner.os == 'macOS' && 'darwin' || 'linux' }}-${{ matrix.architecture || 'x64' }} 57 | 58 | # Extract built binaries 59 | - name: Extract Native Modules 60 | run: | 61 | # Command to find and copy built .node files to prebuilds directory 62 | # This will need to be adjusted based on where node-gyp places the binaries 63 | if [ "${{ runner.os }}" == "Windows" ]; then 64 | cp -v node_modules/speaker/build/Release/*.node prebuilds/win32-${{ matrix.architecture || 'x64' }}/speaker.node || echo "speaker.node not found" 65 | cp -v node_modules/node-microphone/build/Release/*.node prebuilds/win32-${{ matrix.architecture || 'x64' }}/node-microphone.node || echo "node-microphone.node not found" 66 | elif [ "${{ runner.os }}" == "macOS" ]; then 67 | cp -v node_modules/speaker/build/Release/*.node prebuilds/darwin-${{ matrix.architecture || 'x64' }}/speaker.node || echo "speaker.node not found" 68 | cp -v node_modules/node-microphone/build/Release/*.node prebuilds/darwin-${{ matrix.architecture || 'x64' }}/node-microphone.node || echo "node-microphone.node not found" 69 | else 70 | cp -v node_modules/speaker/build/Release/*.node prebuilds/linux-${{ matrix.architecture || 'x64' }}/speaker.node || echo "speaker.node not found" 71 | cp -v node_modules/node-microphone/build/Release/*.node prebuilds/linux-${{ matrix.architecture || 'x64' }}/node-microphone.node || echo "node-microphone.node not found" 72 | fi 73 | shell: bash 74 | 75 | # Upload prebuilds as artifacts 76 | - name: Upload Prebuilds 77 | uses: actions/upload-artifact@v4 78 | with: 79 | name: prebuilds-${{ runner.os == 'Windows' && 'win32' || runner.os == 'macOS' && 'darwin' || 'linux' }}-${{ matrix.architecture || 'x64' }} 80 | path: prebuilds/${{ runner.os == 'Windows' && 'win32' || runner.os == 'macOS' && 'darwin' || 'linux' }}-${{ matrix.architecture || 'x64' }} 81 | 82 | - name: Copy prebuilt binaries 83 | run: | 84 | mkdir -p prebuilds/win32-${{ matrix.architecture || 'x64' }} 85 | mkdir -p prebuilds/darwin-${{ matrix.architecture || 'x64' }} 86 | mkdir -p prebuilds/linux-${{ matrix.architecture || 'x64' }} 87 | 88 | # Windows 89 | touch prebuilds/win32-${{ matrix.architecture || 'x64' }}/node-microphone.marker 90 | 91 | # macOS 92 | touch prebuilds/darwin-${{ matrix.architecture || 'x64' }}/node-microphone.marker 93 | 94 | # Linux 95 | touch prebuilds/linux-${{ matrix.architecture || 'x64' }}/node-microphone.marker 96 | 97 | package: 98 | needs: build 99 | runs-on: ubuntu-latest 100 | steps: 101 | - uses: actions/checkout@v3 102 | 103 | - name: Setup Node.js 104 | uses: actions/setup-node@v3 105 | with: 106 | node-version: 16 107 | 108 | - name: Install Dependencies 109 | run: npm install 110 | 111 | # Download all prebuilds 112 | - name: Download All Prebuilds 113 | uses: actions/download-artifact@v4 114 | with: 115 | path: prebuilds-temp 116 | 117 | # Organize prebuilds 118 | - name: Organize Prebuilds 119 | run: | 120 | mkdir -p prebuilds 121 | cp -R prebuilds-temp/*/* prebuilds/ 122 | ls -la prebuilds/ 123 | 124 | # Package VSIX 125 | - name: Package Extension 126 | run: | 127 | npm run package 128 | 129 | # Upload VSIX 130 | - name: Upload VSIX 131 | uses: actions/upload-artifact@v4 132 | with: 133 | name: vibe-coder-extension 134 | path: "*.vsix" 135 | 136 | # If this is a tag, create a GitHub release 137 | - name: Release 138 | uses: softprops/action-gh-release@v1 139 | if: startsWith(github.ref, 'refs/tags/') 140 | with: 141 | files: | 142 | *.vsix 143 | prebuilds/**/* 144 | env: 145 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /src/services/command-registry-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | 3 | export type VSCodeCommandCategory = 4 | | 'navigation' 5 | | 'editing' 6 | | 'view' 7 | | 'workspace' 8 | | 'debug' 9 | | 'git' 10 | 11 | export interface CommandDefinition { 12 | name: string 13 | command: string 14 | category: VSCodeCommandCategory 15 | description: string 16 | args?: { 17 | name: string 18 | type: string 19 | description: string 20 | required: boolean 21 | }[] 22 | parameters?: { 23 | type: string 24 | properties: Record 25 | required?: string[] 26 | } 27 | } 28 | 29 | export class CommandRegistryService { 30 | private commands: CommandDefinition[] = [ 31 | { 32 | name: "Open File", 33 | command: "vscode.open", 34 | category: "navigation", 35 | description: "Opens a file in the editor", 36 | args: [{ 37 | name: "path", 38 | type: "string", 39 | description: "Path to the file", 40 | required: true 41 | }] 42 | }, 43 | { 44 | name: "Find in Files", 45 | command: "workbench.action.findInFiles", 46 | category: "navigation", 47 | description: "Search across all files", 48 | args: [{ 49 | name: "query", 50 | type: "string", 51 | description: "Search term", 52 | required: true 53 | }] 54 | }, 55 | { 56 | name: "Toggle Terminal", 57 | command: "workbench.action.terminal.toggleTerminal", 58 | category: "view", 59 | description: "Show or hide the terminal" 60 | }, 61 | { 62 | name: "Split Editor", 63 | command: "workbench.action.splitEditor", 64 | category: "view", 65 | description: "Split the editor" 66 | }, 67 | { 68 | name: "New File", 69 | command: "workbench.action.files.newUntitledFile", 70 | category: "workspace", 71 | description: "Create a new file" 72 | }, 73 | { 74 | name: "New Folder", 75 | command: "workbench.action.files.newFolder", 76 | category: "workspace", 77 | description: "Create a new folder" 78 | }, 79 | { 80 | name: "Save", 81 | command: "workbench.action.files.save", 82 | category: "workspace", 83 | description: "Save the current file" 84 | }, 85 | { 86 | name: "Save All", 87 | command: "workbench.action.files.saveAll", 88 | category: "workspace", 89 | description: "Save all open files" 90 | }, 91 | { 92 | name: "Go to File", 93 | command: "workbench.action.quickOpen", 94 | category: "navigation", 95 | description: "Quick open file by name" 96 | }, 97 | { 98 | name: "Go to Line", 99 | command: "workbench.action.gotoLine", 100 | category: "navigation", 101 | description: "Go to a specific line number" 102 | }, 103 | { 104 | name: "Split Editor Right", 105 | command: "workbench.action.splitEditorRight", 106 | category: "view", 107 | description: "Split the editor to the right" 108 | }, 109 | { 110 | name: "Split Editor Down", 111 | command: "workbench.action.splitEditorDown", 112 | category: "view", 113 | description: "Split the editor down" 114 | }, 115 | { 116 | name: "New Terminal", 117 | command: "workbench.action.terminal.new", 118 | category: "view", 119 | description: "Create a new terminal" 120 | }, 121 | { 122 | name: "Show Source Control", 123 | command: "workbench.view.scm", 124 | category: "git", 125 | description: "Open the source control panel" 126 | }, 127 | { 128 | name: 'generateProjectSpec', 129 | command: 'vibe-coder.generateProjectSpec', 130 | category: 'workspace', 131 | description: 'Generate a structured project specification from our conversation', 132 | parameters: { 133 | type: 'object', 134 | properties: { 135 | format: { 136 | type: 'string', 137 | enum: ['markdown'], 138 | description: 'Output format (currently only supports markdown)' 139 | } 140 | }, 141 | required: ['format'] 142 | } 143 | } 144 | ] 145 | 146 | async executeCommand(name: string, args?: any[]): Promise { 147 | console.log('Executing command:', name, 'with args:', args) 148 | 149 | // First try exact match 150 | let command = this.commands.find(c => c.name.toLowerCase() === name.toLowerCase()) 151 | 152 | // If no exact match, try matching the command ID directly 153 | if (!command) { 154 | command = this.commands.find(c => c.command.toLowerCase() === name.toLowerCase()) 155 | } 156 | 157 | // If still no match, try fuzzy matching 158 | if (!command) { 159 | command = this.commands.find(c => 160 | c.command.toLowerCase().includes(name.toLowerCase()) || 161 | name.toLowerCase().includes(c.command.toLowerCase()) 162 | ) 163 | } 164 | 165 | if (!command) { 166 | console.error('Available commands:', this.commands.map(c => ({name: c.name, command: c.command}))) 167 | throw new Error(`Command "${name}" not found`) 168 | } 169 | 170 | try { 171 | console.log('Executing VS Code command:', command.command) 172 | 173 | // Special handling for vscode.open command 174 | if (command.command === 'vscode.open' && args?.[0]) { 175 | // Convert file path to VS Code URI 176 | const uri = vscode.Uri.file(args[0]) 177 | await vscode.commands.executeCommand(command.command, uri) 178 | } else { 179 | await vscode.commands.executeCommand(command.command, ...(args || [])) 180 | } 181 | } catch (error) { 182 | console.error(`Failed to execute command ${name}:`, error) 183 | throw new Error(`Failed to execute command "${name}": ${(error as Error).message}`) 184 | } 185 | } 186 | 187 | getCommandDefinitions(): CommandDefinition[] { 188 | return this.commands 189 | } 190 | 191 | public registerCommand(command: CommandDefinition) { 192 | this.commands.push(command) 193 | } 194 | } -------------------------------------------------------------------------------- /scripts/create-prebuild.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Script to create prebuilds from locally built native modules 3 | */ 4 | const fs = require('fs'); 5 | const path = require('path'); 6 | const os = require('os'); 7 | const { execSync } = require('child_process'); 8 | 9 | // Determine platform and architecture 10 | const platform = os.platform(); 11 | const arch = os.arch(); 12 | 13 | // Map platform to directory name 14 | const platformMap = { 15 | 'win32': 'win32', 16 | 'darwin': 'darwin', 17 | 'linux': 'linux' 18 | }; 19 | 20 | // Create prebuilds directory structure 21 | const platformDir = `${platformMap[platform]}-${arch}`; 22 | const prebuildsDir = path.join(__dirname, '..', 'prebuilds', platformDir); 23 | 24 | console.log(`Creating prebuilds for ${platformDir}...`); 25 | 26 | // Create directory if it doesn't exist 27 | if (!fs.existsSync(prebuildsDir)) { 28 | fs.mkdirSync(prebuildsDir, { recursive: true }); 29 | console.log(`Created directory: ${prebuildsDir}`); 30 | } 31 | 32 | // Ensure modules are built 33 | console.log('Ensuring native modules are built...'); 34 | try { 35 | // Attempt to rebuild the modules to ensure they exist 36 | execSync('npm rebuild', { stdio: 'inherit' }); 37 | console.log('Native modules rebuilt successfully'); 38 | } catch (error) { 39 | console.error('Error rebuilding native modules:', error.message); 40 | console.log('Will attempt to continue with existing builds if available...'); 41 | } 42 | 43 | // Process each module 44 | let success = true; 45 | 46 | // 1. Handle speaker (true native module) 47 | try { 48 | console.log('Processing speaker module (native)...'); 49 | const speakerPath = path.join(__dirname, '..', 'node_modules', 'speaker'); 50 | const speakerBuildPath = path.join(speakerPath, 'build', 'Release'); 51 | 52 | if (fs.existsSync(speakerBuildPath)) { 53 | const files = fs.readdirSync(speakerBuildPath); 54 | const nodeFile = files.find(file => file.endsWith('.node')); 55 | 56 | if (nodeFile) { 57 | const sourcePath = path.join(speakerBuildPath, nodeFile); 58 | const destPath = path.join(prebuildsDir, 'speaker.node'); 59 | fs.copyFileSync(sourcePath, destPath); 60 | console.log(`Copied ${sourcePath} to ${destPath}`); 61 | } else { 62 | console.error('Could not find .node file for speaker'); 63 | success = false; 64 | } 65 | } else { 66 | console.error('Speaker build directory not found'); 67 | success = false; 68 | } 69 | } catch (error) { 70 | console.error('Error processing speaker module:', error.message); 71 | success = false; 72 | } 73 | 74 | // 2. Handle node-microphone (JavaScript wrapper, not a native module) 75 | try { 76 | console.log('Processing node-microphone module (JS wrapper)...'); 77 | const microphonePath = path.join(__dirname, '..', 'node_modules', 'node-microphone'); 78 | 79 | // Check if the required command-line tools are installed 80 | let commandAvailable = false; 81 | 82 | if (platform === 'darwin') { 83 | try { 84 | execSync('which rec', { stdio: 'ignore' }); 85 | commandAvailable = true; 86 | console.log('Found "rec" command for macOS'); 87 | } catch (e) { 88 | console.warn('The "rec" command is not available. Install SoX: brew install sox'); 89 | } 90 | } else if (platform === 'win32') { 91 | try { 92 | execSync('where sox', { stdio: 'ignore' }); 93 | commandAvailable = true; 94 | console.log('Found "sox" command for Windows'); 95 | } catch (e) { 96 | console.warn('The "sox" command is not available. Install SoX for Windows.'); 97 | } 98 | } else { 99 | try { 100 | execSync('which arecord', { stdio: 'ignore' }); 101 | commandAvailable = true; 102 | console.log('Found "arecord" command for Linux'); 103 | } catch (e) { 104 | console.warn('The "arecord" command is not available. Install ALSA tools: sudo apt-get install alsa-utils'); 105 | } 106 | } 107 | 108 | // Create a special marker file for node-microphone 109 | const destPath = path.join(prebuildsDir, 'node-microphone.js'); 110 | 111 | // Create a simple JS file that will be used to indicate this is a JS module 112 | const jsContent = ` 113 | /** 114 | * This is a marker file for the node-microphone module. 115 | * 116 | * node-microphone is not a native module with a .node binary. 117 | * It's a JavaScript wrapper that uses command-line tools: 118 | * - macOS: 'rec' (part of SoX) 119 | * - Windows: 'sox' 120 | * - Linux: 'arecord' 121 | * 122 | * Required command-line tool available: ${commandAvailable} 123 | * Platform: ${platform} 124 | * Architecture: ${arch} 125 | */ 126 | module.exports = { 127 | isJsWrapper: true, 128 | commandAvailable: ${commandAvailable}, 129 | platform: '${platform}', 130 | architecture: '${arch}' 131 | }; 132 | `; 133 | 134 | fs.writeFileSync(destPath, jsContent); 135 | console.log(`Created marker file for node-microphone at ${destPath}`); 136 | 137 | // Also copy the actual index.js file for reference 138 | const indexPath = path.join(microphonePath, 'index.js'); 139 | if (fs.existsSync(indexPath)) { 140 | const indexDestPath = path.join(prebuildsDir, 'node-microphone-original.js'); 141 | fs.copyFileSync(indexPath, indexDestPath); 142 | console.log(`Copied original node-microphone implementation to ${indexDestPath}`); 143 | } 144 | 145 | // Check for command-line tool and warn if not available 146 | if (!commandAvailable) { 147 | console.warn(`WARNING: The required command-line tool for node-microphone is not available on this system.`); 148 | console.warn(`Audio input will not work without installing the appropriate tool.`); 149 | 150 | if (platform === 'darwin') { 151 | console.warn('Install SoX on macOS: brew install sox'); 152 | } else if (platform === 'win32') { 153 | console.warn('Install SoX for Windows: https://sourceforge.net/projects/sox/'); 154 | } else { 155 | console.warn('Install ALSA tools on Linux: sudo apt-get install alsa-utils'); 156 | } 157 | } 158 | } catch (error) { 159 | console.error('Error processing node-microphone module:', error.message); 160 | // Don't mark as failure since this is a JS module 161 | } 162 | 163 | if (success) { 164 | console.log(`Successfully created prebuilds for ${platformDir}`); 165 | } else { 166 | console.error('There were errors creating prebuilds'); 167 | console.log('Continuing anyway to allow partial testing... Some features may not work.'); 168 | } -------------------------------------------------------------------------------- /src/services/llm-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import { env } from 'vscode' 3 | import { DictationPrompt } from './prompt-management-service' 4 | 5 | interface LLMConfig { 6 | apiKey: string 7 | model: string 8 | baseUrl: string 9 | } 10 | 11 | interface LLMResponse { 12 | text: string 13 | error?: string 14 | } 15 | 16 | interface LLMClient { 17 | complete(messages: Array<{ role: string, content: string }>): Promise 18 | } 19 | 20 | export interface ILLMService { 21 | processText(params: { text: string, prompt: DictationPrompt }): Promise 22 | } 23 | 24 | // OpenAI implementation of LLMClient 25 | class OpenAIClient implements LLMClient { 26 | constructor(private config: LLMConfig) {} 27 | 28 | updateApiKey(apiKey: string) { 29 | this.config = { ...this.config, apiKey } 30 | } 31 | 32 | async complete(messages: Array<{ role: string, content: string }>): Promise { 33 | const response = await fetch(`${this.config.baseUrl}/v1/chat/completions`, { 34 | method: 'POST', 35 | headers: { 36 | 'Content-Type': 'application/json', 37 | 'Authorization': `Bearer ${this.config.apiKey}` 38 | }, 39 | body: JSON.stringify({ 40 | model: this.config.model, 41 | messages 42 | }) 43 | }) 44 | 45 | if (!response.ok) { 46 | const error = await response.text() 47 | throw new Error(`LLM API error: ${error}`) 48 | } 49 | 50 | const data = await response.json() 51 | return data.choices[0].message.content 52 | } 53 | } 54 | 55 | // Add new streaming interfaces 56 | interface StreamProcessParams { 57 | text: string 58 | prompt: DictationPrompt 59 | onToken: (token: string) => void 60 | } 61 | 62 | interface StreamResponse extends LLMResponse { 63 | text: string 64 | error?: string 65 | } 66 | 67 | export class LLMService implements ILLMService { 68 | private client: OpenAIClient 69 | 70 | constructor(private context: vscode.ExtensionContext) { 71 | this.client = new OpenAIClient({ 72 | apiKey: '', 73 | model: 'gpt-4o', 74 | baseUrl: 'https://api.openai.com/v1' 75 | }) 76 | } 77 | 78 | // Add new streaming method 79 | async streamProcessText(params: StreamProcessParams): Promise { 80 | try { 81 | const apiKey = await this.getApiKey() 82 | if (!apiKey) { 83 | return { 84 | text: '', 85 | error: 'OpenAI API key is required. Please add it in settings.' 86 | } 87 | } 88 | 89 | this.client.updateApiKey(apiKey) 90 | 91 | const messages = [ 92 | { role: 'system', content: params.prompt.prompt }, 93 | { role: 'user', content: params.text } 94 | ] 95 | 96 | const response = await fetch('https://api.openai.com/v1/chat/completions', { 97 | method: 'POST', 98 | headers: { 99 | 'Content-Type': 'application/json', 100 | 'Authorization': `Bearer ${apiKey}` 101 | }, 102 | body: JSON.stringify({ 103 | model: 'gpt-4-turbo-preview', 104 | messages, 105 | stream: true 106 | }) 107 | }) 108 | 109 | if (!response.ok) { 110 | throw new Error(`OpenAI API error: ${response.statusText}`) 111 | } 112 | 113 | const reader = response.body?.getReader() 114 | if (!reader) throw new Error('Failed to get response reader') 115 | 116 | let fullText = '' 117 | 118 | while (true) { 119 | const { done, value } = await reader.read() 120 | if (done) break 121 | 122 | // Parse the SSE data 123 | const chunk = new TextDecoder().decode(value) 124 | const lines = chunk.split('\n') 125 | 126 | for (const line of lines) { 127 | if (line.startsWith('data: ')) { 128 | const data = line.slice(6) 129 | if (data === '[DONE]') continue 130 | 131 | try { 132 | const json = JSON.parse(data) 133 | const token = json.choices[0]?.delta?.content || '' 134 | if (token) { 135 | fullText += token 136 | params.onToken(token) 137 | } 138 | } catch (e) { 139 | console.error('Failed to parse streaming response:', e) 140 | } 141 | } 142 | } 143 | } 144 | 145 | return { text: fullText } 146 | } catch (error) { 147 | console.error('Stream processing error:', error) 148 | if ((error as Error).message.includes('API key')) { 149 | await this.context.secrets.delete('openai.apiKey') 150 | } 151 | return { 152 | text: '', 153 | error: `Failed to process text: ${(error as Error).message}` 154 | } 155 | } 156 | } 157 | 158 | async processText({ text, prompt }: { 159 | text: string, 160 | prompt: DictationPrompt 161 | }): Promise { 162 | try { 163 | const apiKey = await this.getApiKey() 164 | if (!apiKey) { 165 | return { 166 | text: text, 167 | error: 'OpenAI API key is required. Please add it in settings.' 168 | } 169 | } 170 | 171 | // Update client config with API key 172 | if (this.client instanceof OpenAIClient) { 173 | this.client.updateApiKey(apiKey) 174 | } 175 | 176 | const result = await this.client.complete([ 177 | { role: 'system', content: prompt.prompt }, 178 | { role: 'user', content: text } 179 | ]) 180 | 181 | return { text: result } 182 | } catch (error) { 183 | if ((error as Error).message.includes('API key')) { 184 | // If API key error, clear the stored key so it will be requested again 185 | await this.context.secrets.delete('openai.apiKey') 186 | } 187 | return { 188 | text: text, 189 | error: `Failed to process text: ${(error as Error).message}` 190 | } 191 | } 192 | } 193 | 194 | private async getApiKey(): Promise { 195 | const apiKey = await this.context.secrets.get('openai.apiKey') 196 | if (!apiKey) { 197 | const key = await vscode.window.showInputBox({ 198 | prompt: 'Enter your OpenAI API key', 199 | password: true, 200 | placeHolder: 'sk-...', 201 | ignoreFocusOut: true, // Keep the input box open when focus is lost 202 | validateInput: (value) => { 203 | if (!value) return 'API key is required' 204 | if (!value.startsWith('sk-')) return 'Invalid API key format' 205 | return null 206 | } 207 | }) 208 | if (!key) return null // User cancelled 209 | await this.context.secrets.store('openai.apiKey', key) 210 | return key 211 | } 212 | return apiKey 213 | } 214 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vibe-Coder 2 | 3 | A voice-powered coding assistant for AI-enabled VS Code forks that helps you navigate, control, and code through natural voice commands. This is a first cut at Deepgram's vision of the future. It is a very early view, an experiment in Voice first programming. As such, there will be bugs, future developments. We'd love to hear your ideas on how you're using it and how to make it better. 4 | 5 | ## Getting Started 6 | 7 | 1. Install the extension* 8 | 2. Press cmd/ctrl+shift+V to open the extension 9 | 3. Enter Deepgram API key if needed 10 | 4. Choose to either Vibe or Code! 11 | 12 | *Windsurf does not use the standard VS Code Marketplace; therefore, you must download the VSIX from this repo and install manually. 13 | 14 | ## Features 15 | 16 | ###Vibe Mode 17 | - Integrated with Deepgram's Voice Agent API 18 | - Brainstorm new ideas, validate opinions, get in the flow 19 | - Ask it to generate a product spec at the end of your conversation to guide your development work 20 | 21 | ###Code Mode 22 | - Voice dication with customizable AI rewrite prompts 23 | - Generate a prompt for any scenario: Typo Corrections, Debugging, Language/Project Specific 24 | - The rewrite gets automatically copied to your clipboard, which you can then paste anywhere you want 25 | - This is particularly suited for vibe-coding with AI IDE's such as Cursor or Windsurf, with a separate AI chat panel 26 | - Start and stop dication with Cmd/Ctrl+shift+D. Stopping the dictation triggers the rewrite 27 | 28 | ##Future Improvements 29 | 30 | In no particular order: 31 | - Add/improve VS Code command recognition in Vibe mode 32 | - Add additional function calling capability in Vibe mode 33 | - Bug quashing 34 | - Implement Microphone settings in the webview panel menu 35 | - Explore accessibility controls on each platform to allow automatic pasting of transcripts into AI chat panel 36 | - Give the Vibe mode agent full context of your project 37 | - Add memory to the Vibe mode agent 38 | - Add MCP capability to the Vibe mode agent 39 | - Echo cancellation 40 | 41 | 42 | ## Setup 43 | 44 | ##### API Keys 45 | 46 | You will need a Deepgram API key and an OpenAI API key. 47 | 48 | For Deepgram: 49 | 1. Sign up at [Deepgram](https://console.deepgram.com/signup) 50 | 2. Create a new project 51 | 3. Generate an API key with the appropriate permissions 52 | 4. When you first start Vibe-Coder, you'll be prompted to enter your API key 53 | Note: new signups will get $200 in free credit automatically. If you burn through that while vibe coding, let us know so we can arrange more credits! 54 | 55 | 56 | ### 1. Install Required Dependencies 57 | 58 | Vibe-Coder requires specific command-line tools for audio capture on each platform: 59 | 60 | #### macOS 61 | ```bash 62 | # Install SoX (Sound eXchange) using Homebrew 63 | brew install sox 64 | 65 | # Verify installation 66 | which rec 67 | rec --version 68 | ``` 69 | 70 | If `rec` command is not found after installing SoX, run: 71 | ```bash 72 | brew link --force sox 73 | ``` 74 | 75 | #### Windows 76 | 1. Download SoX from [SourceForge](https://sourceforge.net/projects/sox/files/sox/) 77 | 2. Run the installer and follow the instructions 78 | 3. Ensure SoX is added to your PATH during installation 79 | 4. Verify installation by opening Command Prompt and running: 80 | ```cmd 81 | where sox 82 | sox --version 83 | ``` 84 | 85 | #### Linux (Ubuntu/Debian) 86 | ```bash 87 | # Install ALSA utilities 88 | sudo apt-get update 89 | sudo apt-get install alsa-utils 90 | 91 | # Verify installation 92 | which arecord 93 | arecord --version 94 | ``` 95 | 96 | For other Linux distributions, use the appropriate package manager. 97 | 98 | ### 2. Install the Extension 99 | 100 | #### From VS Code Marketplace 101 | 1. Open VS Code 102 | 2. Go to Extensions (Ctrl+Shift+X) 103 | 3. Search for "Vibe-Coder" 104 | 4. Click Install 105 | 106 | #### Manual Installation (VSIX) 107 | 1. Download the latest .vsix file from the [Releases page](https://github.com/deepgram/vibe_coder/releases) 108 | 2. In VS Code, go to Extensions (Ctrl+Shift+X) 109 | 3. Click on the "..." menu (top-right) 110 | 4. Select "Install from VSIX..." 111 | 5. Choose the downloaded .vsix file 112 | 113 | ### 3. Configure Microphone Settings 114 | 115 | (Note: the Microphone settings in the settings menu is a placeholder for now) 116 | 117 | 1. Test your microphone: 118 | - Open the Command Palette (Ctrl+Shift+P or Cmd+Shift+P on macOS) 119 | - Run "Vibe-Coder: Test Microphone" 120 | - If successful, you'll see a confirmation message 121 | 122 | 2. If the test fails or you want to use a specific microphone: 123 | - Run "Vibe-Coder: List Available Microphone Devices" 124 | - Note the device ID/name you want to use 125 | - Open VS Code Settings (File > Preferences > Settings) 126 | - Search for "vibeCoder.microphone" 127 | - Set the appropriate device setting for your platform: 128 | - macOS: `vibeCoder.microphone.deviceMacOS` 129 | - Windows: `vibeCoder.microphone.deviceWindows` 130 | - Linux: `vibeCoder.microphone.deviceLinux` 131 | 132 | #### Platform-Specific Device Settings 133 | 134 | - **macOS**: Usually "default" works, but you can specify a device name from the list 135 | - **Windows**: Use "default" or a numeric index (0, 1, 2) from the device list 136 | - **Linux**: Common formats are "default", "plughw:0,0" (first card, first device), or "plughw:1,0" (second card, first device) 137 | 138 | ### 4. Verify Setup 139 | 140 | 1. Open the Command Palette (Ctrl+Shift+P or Cmd+Shift+P on macOS) 141 | 2. Run "Vibe-Coder: Start Voice Agent" 142 | 3. If everything is set up correctly, you should see a notification that the agent is connected 143 | 4. Start speaking to interact with the voice agent 144 | 145 | ### Troubleshooting Setup Issues 146 | 147 | If you encounter issues during setup: 148 | 149 | 1. **Command Not Found**: 150 | - Ensure the required command-line tool is installed and in your PATH 151 | - For macOS, try `brew link --force sox` 152 | - For Windows, restart your computer after installing SoX 153 | - For Linux, ensure your user has permission to access audio devices 154 | 155 | 2. **Permission Issues**: 156 | - Ensure VS Code has permission to access your microphone 157 | - On macOS, check System Preferences > Security & Privacy > Microphone 158 | - On Windows, check Settings > Privacy > Microphone 159 | - On Linux, ensure your user is in the `audio` group: `sudo usermod -a -G audio $USER` 160 | 161 | 3. **Device Selection Issues**: 162 | - Run "Vibe-Coder: List Available Microphone Devices" to see available devices 163 | - Try using "default" as the device name 164 | - On Linux, try different device formats (e.g., "plughw:0,0", "hw:0,0", "default") 165 | 166 | For more detailed troubleshooting, see [MICROPHONE_TESTING.md](MICROPHONE_TESTING.md). 167 | 168 | ## Prerequisites 169 | 170 | Vibe-Coder requires the following external tools for audio input: 171 | 172 | - **macOS**: SoX - Install with `brew install sox` 173 | - **Windows**: SoX - Download from [SourceForge](https://sourceforge.net/projects/sox/) 174 | - **Linux**: ALSA tools - Install with `sudo apt-get install alsa-utils` 175 | 176 | These tools are used by the microphone component to capture audio input. The extension will still install without them, but voice input functionality will not work. 177 | 178 | ## Supported Platforms 179 | 180 | The extension includes pre-compiled binaries for the following platforms: 181 | 182 | - Windows 10/11 (x64) 183 | - macOS (Intel x64 and Apple Silicon arm64) 184 | - Linux (Ubuntu/Debian x64) 185 | 186 | If your platform is not listed, the extension will try to compile the native modules automatically, which requires additional development tools. See the Troubleshooting section for more information. 187 | 188 | 189 | ## Voice Commands 190 | 191 | Here are some example commands you can use: 192 | 193 | - "Open the file index.js" 194 | - "Create a new file" 195 | - "Show me the explorer view" 196 | - "Generate a project specification" 197 | - "Tell me about this codebase" 198 | 199 | ## Troubleshooting 200 | 201 | ### Missing Platform Support 202 | 203 | If you see an error about missing native modules for your platform: 204 | 205 | 1. Ensure you have the following installed: 206 | - Node.js and npm 207 | - Python 2.7 or 3.x 208 | - C++ build tools 209 | 210 | 2. For Windows: Visual Studio Build Tools with C++ workload 211 | 3. For macOS: Xcode Command Line Tools and Homebrew 212 | 4. For Linux: build-essential and libasound2-dev 213 | 214 | ### Microphone Configuration 215 | 216 | Vibe-Coder now supports configuring microphone devices for each operating system: 217 | 218 | 1. To list available microphone devices, run the command: 219 | `Vibe-Coder: List Available Microphone Devices` 220 | 221 | 2. Configure your preferred microphone device in VS Code settings: 222 | - Open Settings (File > Preferences > Settings) 223 | - Search for "vibeCoder.microphone" 224 | - Set the appropriate device setting for your platform: 225 | - macOS: `vibeCoder.microphone.deviceMacOS` 226 | - Windows: `vibeCoder.microphone.deviceWindows` 227 | - Linux: `vibeCoder.microphone.deviceLinux` 228 | 229 | 3. Restart any active recording sessions for the changes to take effect 230 | 231 | ### Microphone Not Working 232 | 233 | If you see an error about the microphone not working: 234 | 235 | 1. Ensure you have installed the required command-line tool for your platform: 236 | - macOS: Install SoX with `brew install sox` 237 | - Windows: Install SoX from [SourceForge](https://sourceforge.net/projects/sox/) 238 | - Linux: Install ALSA tools with `sudo apt-get install alsa-utils` 239 | 240 | 2. After installing the required tool, restart VS Code 241 | 242 | 3. Ensure your system's microphone is working and VS Code has permission to access it 243 | 244 | 4. If you're still having issues, try listing available devices and configuring a specific device in settings 245 | 246 | For more detailed troubleshooting, see [MICROPHONE_TESTING.md](MICROPHONE_TESTING.md) 247 | 248 | ### Other Issues 249 | 250 | If you encounter other issues: 251 | 252 | 1. Check the Output panel in VS Code (select "Vibe-Coder" from the dropdown) 253 | 2. Check the Developer Tools console (Help > Toggle Developer Tools) 254 | 3. File an issue on our [GitHub repository](https://github.com/deepgram/vibe_coder/issues) 255 | 256 | ## Privacy 257 | 258 | Vibe-Coder sends audio data to Deepgram for processing. Your conversations are processed according to Deepgram's privacy policy. No audio data is stored by the extension itself. 259 | 260 | ## License 261 | 262 | [MIT License](LICENSE) 263 | 264 | ## Contributing 265 | 266 | We welcome contributions to Vibe Coder! If you're interested in helping improve this extension, please check out our [contribution guidelines](CONTRIBUTING.md) for information on how to get started, report issues, and submit pull requests. 267 | -------------------------------------------------------------------------------- /src/webview/audio-webview-provider.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode'; 2 | import * as path from 'path'; 3 | import * as fs from 'fs'; 4 | 5 | export class AudioWebViewProvider implements vscode.WebviewViewProvider { 6 | public static readonly viewType = 'vibe-coder.audioInterface'; 7 | private _view?: vscode.WebviewView; 8 | private _eventEmitter = new vscode.EventEmitter(); 9 | private _audioInterfaceHtml: string = ''; 10 | 11 | constructor( 12 | private readonly _extensionUri: vscode.Uri 13 | ) { 14 | console.log('AudioWebViewProvider: Initializing'); 15 | // Load the audio interface HTML content 16 | this._loadAudioInterfaceHtml(); 17 | } 18 | 19 | private _loadAudioInterfaceHtml() { 20 | try { 21 | const filePath = path.join(this._extensionUri.fsPath, 'media', 'audioInterface.html'); 22 | this._audioInterfaceHtml = fs.readFileSync(filePath, 'utf8'); 23 | console.log('AudioWebViewProvider: Loaded audio interface HTML'); 24 | } catch (error) { 25 | console.error('AudioWebViewProvider: Failed to load audio interface HTML:', error); 26 | this._audioInterfaceHtml = ` 27 | 28 | 29 | 30 | 31 | Audio Interface 32 | 33 | 34 |
Failed to load audio interface. Please restart VS Code.
35 | 36 | 37 | `; 38 | } 39 | } 40 | 41 | public get onMessage() { 42 | return this._eventEmitter.event; 43 | } 44 | 45 | public resolveWebviewView( 46 | webviewView: vscode.WebviewView, 47 | context: vscode.WebviewViewResolveContext, 48 | _token: vscode.CancellationToken, 49 | ) { 50 | console.log('AudioWebViewProvider: Resolving webview view'); 51 | this._view = webviewView; 52 | 53 | webviewView.webview.options = { 54 | enableScripts: true, 55 | localResourceRoots: [ 56 | this._extensionUri 57 | ] 58 | }; 59 | 60 | webviewView.webview.html = this._getHtmlForWebview(webviewView.webview); 61 | 62 | webviewView.webview.onDidReceiveMessage( 63 | message => { 64 | console.log('AudioWebViewProvider: Received message from webview:', message.type); 65 | this._eventEmitter.fire(message); 66 | } 67 | ); 68 | 69 | console.log('AudioWebViewProvider: Webview view resolved'); 70 | } 71 | 72 | /** 73 | * Ensures the WebView is visible to the user, which is required for microphone permissions 74 | */ 75 | public async ensureWebViewIsVisible(): Promise { 76 | console.log('AudioWebViewProvider: Ensuring WebView is visible'); 77 | 78 | // Try multiple approaches to make the WebView visible 79 | if (!this._view) { 80 | console.log('AudioWebViewProvider: WebView not yet created, trying to show it'); 81 | 82 | try { 83 | // First try the focus command 84 | await vscode.commands.executeCommand('vibe-coder.audioInterface.focus'); 85 | 86 | // Wait a bit for the view to be created and initialized 87 | await new Promise(resolve => setTimeout(resolve, 1000)); 88 | 89 | // If still not created, try alternative approach 90 | if (!this._view) { 91 | console.log('AudioWebViewProvider: First attempt failed, trying alternative approach'); 92 | 93 | // Try to show the view container directly 94 | await vscode.commands.executeCommand('workbench.view.extension.vibe-coder-audio-container'); 95 | 96 | // Wait again 97 | await new Promise(resolve => setTimeout(resolve, 1000)); 98 | 99 | // If still not created, create a temporary WebView panel as fallback 100 | if (!this._view) { 101 | console.log('AudioWebViewProvider: Creating fallback WebView panel'); 102 | this._createFallbackWebView(); 103 | 104 | // Wait for the fallback to initialize 105 | await new Promise(resolve => setTimeout(resolve, 500)); 106 | } 107 | } 108 | } catch (error) { 109 | console.error('AudioWebViewProvider: Error showing WebView:', error); 110 | } 111 | 112 | // Final check if we have a view 113 | if (!this._view) { 114 | console.error('AudioWebViewProvider: Failed to create WebView after multiple attempts'); 115 | throw new Error('Failed to create audio interface. Please try restarting VS Code or check the VS Code logs.'); 116 | } 117 | } 118 | 119 | // Make sure the view is visible 120 | if (!this._view.visible) { 121 | console.log('AudioWebViewProvider: WebView not visible, showing it now'); 122 | try { 123 | this._view.show(true); // true = preserve focus 124 | } catch (error) { 125 | console.error('AudioWebViewProvider: Error showing WebView:', error); 126 | } 127 | 128 | // Give the browser a moment to show the view 129 | await new Promise(resolve => setTimeout(resolve, 500)); 130 | } 131 | 132 | console.log('AudioWebViewProvider: WebView is now visible'); 133 | } 134 | 135 | /** 136 | * Creates a fallback WebView panel if the regular WebView can't be created 137 | */ 138 | private _createFallbackWebView() { 139 | // Create a temporary panel as a fallback 140 | const panel = vscode.window.createWebviewPanel( 141 | 'vibe-coder.audioInterfaceFallback', 142 | 'Audio Interface', 143 | vscode.ViewColumn.Beside, 144 | { 145 | enableScripts: true, 146 | retainContextWhenHidden: true, 147 | localResourceRoots: [this._extensionUri] 148 | } 149 | ); 150 | 151 | panel.webview.html = this._getHtmlForWebview(panel.webview); 152 | 153 | panel.webview.onDidReceiveMessage(message => { 154 | console.log('AudioWebViewProvider: Received message from fallback webview:', message.type); 155 | this._eventEmitter.fire(message); 156 | }); 157 | 158 | // Create a proxy object that mimics the WebviewView interface 159 | this._view = { 160 | webview: panel.webview, 161 | visible: true, 162 | show: () => panel.reveal(), 163 | // Add other required properties/methods 164 | description: 'Fallback Audio Interface', 165 | title: 'Audio Interface', 166 | badge: undefined, 167 | onDidChangeVisibility: panel.onDidChangeViewState, 168 | onDidDispose: panel.onDidDispose, 169 | dispose: () => panel.dispose() 170 | } as unknown as vscode.WebviewView; 171 | 172 | console.log('AudioWebViewProvider: Created fallback WebView panel'); 173 | } 174 | 175 | public startRecording() { 176 | console.log('AudioWebViewProvider: Starting recording'); 177 | if (this._view) { 178 | this._view.webview.postMessage({ command: 'startRecording' }); 179 | } else { 180 | console.error('AudioWebViewProvider: Cannot start recording, WebView not initialized'); 181 | this._eventEmitter.fire({ 182 | type: 'error', 183 | message: 'Cannot start recording, audio interface not initialized' 184 | }); 185 | } 186 | } 187 | 188 | public stopRecording() { 189 | console.log('AudioWebViewProvider: Stopping recording'); 190 | if (this._view) { 191 | this._view.webview.postMessage({ command: 'stopRecording' }); 192 | } else { 193 | console.log('AudioWebViewProvider: Cannot stop recording, WebView not initialized'); 194 | } 195 | } 196 | 197 | public playAudio(data: string, format: string) { 198 | console.log('AudioWebViewProvider: Playing audio'); 199 | if (this._view) { 200 | this._view.webview.postMessage({ 201 | command: 'playAudio', 202 | data, 203 | format 204 | }); 205 | } else { 206 | console.error('AudioWebViewProvider: Cannot play audio, WebView not initialized'); 207 | this._eventEmitter.fire({ 208 | type: 'error', 209 | message: 'Cannot play audio, audio interface not initialized' 210 | }); 211 | } 212 | } 213 | 214 | public async checkMicrophonePermission(): Promise<'granted' | 'denied' | 'prompt' | 'error'> { 215 | console.log('AudioWebViewProvider: Checking microphone permission'); 216 | 217 | try { 218 | // Ensure the WebView is visible 219 | await this.ensureWebViewIsVisible(); 220 | 221 | // Create a promise that will resolve with the permission status 222 | return new Promise((resolve) => { 223 | // Set up a one-time listener for the permission status 224 | const disposable = this._eventEmitter.event((message) => { 225 | if (message.type === 'permissionStatus') { 226 | console.log('AudioWebViewProvider: Received permission status:', message.status); 227 | disposable.dispose(); 228 | resolve(message.status); 229 | } 230 | }); 231 | 232 | // Send a message to check permissions 233 | if (this._view) { 234 | this._view.webview.postMessage({ command: 'checkPermission' }); 235 | } else { 236 | console.error('AudioWebViewProvider: Cannot check permissions, WebView not initialized'); 237 | resolve('error'); 238 | } 239 | 240 | // Set a timeout in case we don't get a response 241 | setTimeout(() => { 242 | disposable.dispose(); 243 | console.log('AudioWebViewProvider: Permission check timed out'); 244 | resolve('error'); 245 | }, 5000); 246 | }); 247 | } catch (error) { 248 | console.error('AudioWebViewProvider: Error checking microphone permission:', error); 249 | return 'error'; 250 | } 251 | } 252 | 253 | private _getHtmlForWebview(webview: vscode.Webview) { 254 | // Extract the content from the audio interface HTML 255 | // Remove the DOCTYPE, html, head, and body tags 256 | let content = this._audioInterfaceHtml; 257 | 258 | // Extract styles 259 | const styleMatch = content.match(/ 277 | 278 | 279 | 280 | 281 | 284 | 285 | `; 286 | } 287 | } -------------------------------------------------------------------------------- /src/services/agent-panel.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | 3 | export class AgentPanel { 4 | private panel: vscode.WebviewPanel | null = null 5 | private isListening = false 6 | 7 | constructor(private context: vscode.ExtensionContext) { 8 | this.createPanel() 9 | } 10 | 11 | private createPanel() { 12 | this.panel = vscode.window.createWebviewPanel( 13 | 'vibeCoder.agent', 14 | 'Voice Agent', 15 | vscode.ViewColumn.Beside, 16 | { 17 | enableScripts: true, 18 | retainContextWhenHidden: true 19 | } 20 | ) 21 | 22 | this.panel.webview.html = this.getWebviewContent() 23 | this.setupMessageHandling() 24 | } 25 | 26 | private getWebviewContent() { 27 | return ` 28 | 29 | 30 | 31 | 147 | 148 | 149 |
150 |
151 |
152 |
153 |    .-""""""-.
154 |  .'          '.
155 | /              \\
156 | |              |
157 | \\              /
158 |  '.          .'
159 |    '-......-'
160 |             
161 |
162 |
Ready
163 |
164 |
165 |
166 | 167 | 168 | 169 | 227 | 228 | 229 | ` 230 | } 231 | 232 | updateStatus(text: string) { 233 | if (this.panel) { 234 | this.panel.webview.postMessage({ 235 | type: 'updateStatus', 236 | text 237 | }) 238 | } 239 | } 240 | 241 | updateTranscript(text: string) { 242 | if (this.panel) { 243 | this.panel.webview.postMessage({ 244 | type: 'updateTranscript', 245 | text 246 | }) 247 | } 248 | } 249 | 250 | setListening(value: boolean) { 251 | this.isListening = value 252 | if (this.panel) { 253 | this.panel.webview.postMessage({ 254 | type: 'setListening', 255 | value 256 | }) 257 | } 258 | } 259 | 260 | dispose() { 261 | if (this.panel) { 262 | this.panel.dispose() 263 | this.panel = null 264 | } 265 | } 266 | 267 | private setupMessageHandling() { 268 | if (this.panel) { 269 | this.panel.webview.onDidReceiveMessage(async message => { 270 | switch (message.type) { 271 | // Handle any UI interactions here 272 | // For now, we'll just have the agent listening 273 | // We can add controls later if needed 274 | } 275 | }) 276 | } 277 | } 278 | 279 | // Add WAV header helper 280 | private createWavHeader(sampleRate: number, bitsPerSample: number, dataLength: number): Buffer { 281 | const buffer = Buffer.alloc(44) 282 | 283 | // "RIFF" 284 | buffer.write('RIFF', 0) 285 | // File size 286 | buffer.writeUInt32LE(36 + dataLength, 4) 287 | // "WAVE" 288 | buffer.write('WAVE', 8) 289 | // "fmt " 290 | buffer.write('fmt ', 12) 291 | // Chunk size (16) 292 | buffer.writeUInt32LE(16, 16) 293 | // Audio format (1 for PCM) 294 | buffer.writeUInt16LE(1, 20) 295 | // Number of channels (1) 296 | buffer.writeUInt16LE(1, 22) 297 | // Sample rate 298 | buffer.writeUInt32LE(sampleRate, 24) 299 | // Byte rate 300 | buffer.writeUInt32LE(sampleRate * (bitsPerSample / 8), 28) 301 | // Block align 302 | buffer.writeUInt16LE(bitsPerSample / 8, 32) 303 | // Bits per sample 304 | buffer.writeUInt16LE(bitsPerSample, 34) 305 | // "data" 306 | buffer.write('data', 36) 307 | // Data length 308 | buffer.writeUInt32LE(dataLength, 40) 309 | 310 | return buffer 311 | } 312 | 313 | playAudio(audio: { data: string, encoding: string, sample_rate: number }) { 314 | if (this.panel) { 315 | this.panel.webview.postMessage({ 316 | type: 'playAudio', 317 | audio: audio.data, // Already base64 encoded WAV 318 | encoding: audio.encoding, 319 | sampleRate: audio.sample_rate 320 | }) 321 | } 322 | } 323 | 324 | updateProcessedText(text: string) { 325 | if (this.panel) { 326 | this.panel.webview.postMessage({ 327 | type: 'updateProcessedText', 328 | text 329 | }) 330 | this.panel.webview.postMessage({ 331 | type: 'showSuccess', 332 | text: 'Copied to clipboard! ✨' 333 | }) 334 | } 335 | } 336 | 337 | public postMessage(message: unknown): Thenable | undefined { 338 | return this.panel?.webview.postMessage(message) 339 | } 340 | } -------------------------------------------------------------------------------- /src/webview/mic-permission-webview-provider.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode'; 2 | 3 | export class MicPermissionWebViewProvider { 4 | private _panel: vscode.WebviewPanel | undefined; 5 | private _extensionUri: vscode.Uri; 6 | 7 | constructor(extensionUri: vscode.Uri) { 8 | this._extensionUri = extensionUri; 9 | } 10 | 11 | public async showPermissionCheck(): Promise { 12 | // Create and show a new webview panel 13 | this._panel = vscode.window.createWebviewPanel( 14 | 'vibe-coder.micPermissionCheck', 15 | 'Microphone Permission Check', 16 | vscode.ViewColumn.One, 17 | { 18 | enableScripts: true, 19 | retainContextWhenHidden: false, 20 | localResourceRoots: [this._extensionUri] 21 | } 22 | ); 23 | 24 | // Set the HTML content 25 | this._panel.webview.html = this._getHtmlForWebview(); 26 | 27 | // Handle messages from the webview 28 | this._panel.webview.onDidReceiveMessage( 29 | message => { 30 | switch (message.type) { 31 | case 'permissionStatus': 32 | // Log the permission status 33 | console.log(`Microphone permission status: ${message.status}`); 34 | 35 | // Show a notification with the permission status 36 | const statusText = this._getStatusText(message.status); 37 | vscode.window.showInformationMessage(`Microphone permission: ${statusText}`); 38 | break; 39 | 40 | case 'error': 41 | // Show an error message 42 | vscode.window.showErrorMessage(`Error checking microphone permission: ${message.message}`); 43 | break; 44 | } 45 | } 46 | ); 47 | } 48 | 49 | private _getStatusText(status: string): string { 50 | switch (status) { 51 | case 'granted': 52 | return 'Granted ✅'; 53 | case 'denied': 54 | return 'Denied ❌'; 55 | case 'prompt': 56 | return 'Not yet requested (will prompt)'; 57 | default: 58 | return status; 59 | } 60 | } 61 | 62 | private _getHtmlForWebview(): string { 63 | return ` 64 | 65 | 66 | 67 | 68 | 69 | Microphone Permission Check 70 | 135 | 136 | 137 |
138 |

Microphone Permission Check

139 | 140 |
141 |
Checking permission status...
142 |
143 | 144 |
145 |
146 | 147 | 159 |
160 | 161 | 282 | 283 | `; 284 | } 285 | } -------------------------------------------------------------------------------- /src/utils/binary-loader.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import * as os from 'os' 3 | import * as path from 'path' 4 | import * as fs from 'fs' 5 | import * as cp from 'child_process' 6 | 7 | // Update the extension ID to match your actual extension ID 8 | const EXTENSION_ID = 'deepgram.vibe-coder' 9 | 10 | /** 11 | * Loads a native module from pre-compiled binaries based on the current platform 12 | * @param moduleName The name of the native module to load 13 | * @returns The loaded native module 14 | */ 15 | export function loadNativeModule(moduleName: string) { 16 | const platform = os.platform() 17 | const arch = os.arch() 18 | 19 | console.log(`loadNativeModule: Loading ${moduleName} for ${platform}-${arch}`) 20 | 21 | // Special case for node-microphone which is not a native module 22 | if (moduleName === 'node-microphone') { 23 | console.log('loadNativeModule: Using special loader for node-microphone') 24 | return loadNodeMicrophone() 25 | } 26 | 27 | // Note: Speaker module loading has been removed as part of the migration to browser-based audio playback 28 | console.warn(`loadNativeModule: Native module ${moduleName} is not supported. Using browser-based audio playback instead.`) 29 | throw new Error(`Native module ${moduleName} is not supported. Using browser-based audio playback instead.`) 30 | } 31 | 32 | /** 33 | * Special loader for node-microphone which is a JavaScript wrapper around command-line tools 34 | * @returns A compatible node-microphone implementation 35 | */ 36 | function loadNodeMicrophone() { 37 | // First check if the required command-line tool is available 38 | const platform = os.platform() 39 | let commandName = '' 40 | 41 | if (platform === 'darwin') { 42 | commandName = 'rec' 43 | } else if (platform === 'win32') { 44 | commandName = 'sox' 45 | } else { 46 | commandName = 'arecord' 47 | } 48 | 49 | let commandAvailable = false 50 | try { 51 | if (platform === 'win32') { 52 | cp.execSync(`where ${commandName}`, { stdio: 'ignore' }) 53 | } else { 54 | cp.execSync(`which ${commandName}`, { stdio: 'ignore' }) 55 | } 56 | commandAvailable = true 57 | console.log(`Found ${commandName} command for ${platform}`) 58 | } catch (e) { 59 | console.warn(`The "${commandName}" command required by node-microphone is not available`) 60 | 61 | // On macOS, try to check if SoX is installed but in a different location 62 | if (platform === 'darwin') { 63 | try { 64 | // Check common Homebrew locations 65 | if (fs.existsSync('/usr/local/bin/sox') || fs.existsSync('/opt/homebrew/bin/sox')) { 66 | console.log('Found sox command, but rec command is missing. SoX might be installed but rec is not in PATH') 67 | 68 | // Show a more helpful message 69 | vscode.window.showWarningMessage( 70 | 'SoX is installed but the "rec" command is not available. Try running "brew link --force sox" in Terminal.', 71 | 'Open Terminal' 72 | ).then(selection => { 73 | if (selection === 'Open Terminal') { 74 | cp.exec('open -a Terminal'); 75 | } 76 | }); 77 | } 78 | } catch (err) { 79 | // Ignore errors in this additional check 80 | } 81 | } 82 | } 83 | 84 | // IMPORTANT: Don't try to require node-microphone directly 85 | // Instead, always use our own implementation or dummy 86 | console.log('Using custom microphone implementation') 87 | 88 | // If command is not available, return a dummy implementation 89 | if (!commandAvailable) { 90 | return createDummyMicrophone() 91 | } 92 | 93 | // If command is available, use our own implementation 94 | return createMicrophoneImplementation(commandName) 95 | } 96 | 97 | /** 98 | * Creates a dummy microphone implementation for when the command-line tool is not available 99 | * @returns A dummy microphone implementation 100 | */ 101 | function createDummyMicrophone() { 102 | const EventEmitter = require('events') 103 | const platform = os.platform() 104 | let commandName = '' 105 | 106 | if (platform === 'darwin') { 107 | commandName = 'rec' 108 | } else if (platform === 'win32') { 109 | commandName = 'sox' 110 | } else { 111 | commandName = 'arecord' 112 | } 113 | 114 | return new class DummyMicrophone extends EventEmitter { 115 | constructor() { 116 | super() 117 | console.log('Initialized dummy microphone') 118 | } 119 | 120 | startRecording() { 121 | console.log('Dummy microphone startRecording called') 122 | const stream = new EventEmitter() 123 | setTimeout(() => { 124 | const error = new Error(`Microphone requires the "${commandName}" command which is not installed`) 125 | this.emit('error', error) 126 | stream.emit('error', error) 127 | }, 500) 128 | return stream 129 | } 130 | 131 | stopRecording() { 132 | console.log('Dummy microphone stopRecording called') 133 | // No-op 134 | } 135 | }() 136 | } 137 | 138 | /** 139 | * Creates a microphone implementation using the specified command-line tool 140 | * @param commandName The name of the command-line tool to use 141 | * @returns A microphone implementation 142 | */ 143 | function createMicrophoneImplementation(commandName: string) { 144 | const EventEmitter = require('events') 145 | 146 | return new class MicrophoneImplementation extends EventEmitter { 147 | private ps: cp.ChildProcess | null = null 148 | private options: any = {} 149 | 150 | constructor(options?: any) { 151 | super() 152 | this.ps = null 153 | this.options = options || {} 154 | } 155 | 156 | startRecording() { 157 | if (this.ps === null) { 158 | let audioOptions: string[] = [] 159 | 160 | if (commandName === 'rec') { 161 | // macOS 162 | audioOptions = [ 163 | '-q', 164 | '-b', this.options.bitwidth || '16', 165 | '-c', this.options.channels || '1', 166 | '-r', this.options.rate || '16000', 167 | '-e', this.options.encoding || 'signed-integer', 168 | '-t', 'wav', 169 | '-', 170 | ] 171 | } else if (commandName === 'sox') { 172 | // Windows 173 | audioOptions = [ 174 | '-b', this.options.bitwidth || '16', 175 | '--endian', this.options.endian || 'little', 176 | '-c', this.options.channels || '1', 177 | '-r', this.options.rate || '16000', 178 | '-e', this.options.encoding || 'signed-integer', 179 | '-t', 'waveaudio', 180 | this.options.device || 'default', 181 | '-p', 182 | ] 183 | } else { 184 | // Linux 185 | const formatEncoding = this.options.encoding === 'unsigned-integer' ? 'U' : 'S' 186 | const formatEndian = this.options.endian === 'big' ? 'BE' : 'LE' 187 | const format = `${formatEncoding}${this.options.bitwidth || '16'}_${formatEndian}` 188 | 189 | audioOptions = [ 190 | '-c', this.options.channels || '1', 191 | '-r', this.options.rate || '16000', 192 | '-f', format, 193 | '-D', this.options.device || 'plughw:1,0', 194 | ] 195 | } 196 | 197 | if (this.options.additionalParameters) { 198 | audioOptions = audioOptions.concat(this.options.additionalParameters) 199 | } 200 | 201 | try { 202 | this.ps = cp.spawn(commandName, audioOptions) 203 | 204 | if (this.ps) { 205 | this.ps.on('error', (error) => { 206 | this.emit('error', error) 207 | }) 208 | 209 | if (this.ps.stderr) { 210 | this.ps.stderr.on('error', (error) => { 211 | this.emit('error', error) 212 | }) 213 | 214 | this.ps.stderr.on('data', (info) => { 215 | this.emit('info', info) 216 | }) 217 | } 218 | 219 | if (this.ps.stdout) { 220 | if (this.options.useDataEmitter) { 221 | this.ps.stdout.on('data', (data) => { 222 | this.emit('data', data) 223 | }) 224 | } 225 | 226 | return this.ps.stdout 227 | } 228 | } 229 | 230 | throw new Error(`Failed to start ${commandName} process`) 231 | } catch (error) { 232 | this.emit('error', error) 233 | throw error 234 | } 235 | } 236 | 237 | return this.ps?.stdout || null 238 | } 239 | 240 | stopRecording() { 241 | if (this.ps) { 242 | this.ps.kill() 243 | this.ps = null 244 | } 245 | } 246 | }() 247 | } 248 | 249 | /** 250 | * Checks if the required native modules are available for the current platform 251 | * @returns An object indicating compatibility status 252 | */ 253 | export function checkNativeModulesCompatibility() { 254 | const platform = os.platform() 255 | const arch = os.arch() 256 | 257 | // Define required modules 258 | const requiredModules = ['node-microphone'] 259 | 260 | // Initialize arrays for missing modules and warnings 261 | const missingModules: string[] = [] 262 | const warnings: string[] = [] 263 | 264 | // Get the extension path 265 | const extensionPath = vscode.extensions.getExtension(EXTENSION_ID)?.extensionPath 266 | 267 | if (!extensionPath) { 268 | console.warn(`Could not determine extension path for ${EXTENSION_ID}`) 269 | const allExtensions = vscode.extensions.all 270 | console.log(`Available extensions: ${allExtensions.map(ext => ext.id).join(', ')}`) 271 | return { 272 | compatible: false, 273 | platform, 274 | arch, 275 | missingModules: requiredModules, 276 | warnings: [`Could not determine extension path for ${EXTENSION_ID}`], 277 | message: `Could not determine extension path for ${EXTENSION_ID}. Available extensions: ${allExtensions.map(ext => ext.id).join(', ')}` 278 | } 279 | } 280 | 281 | // Check for node-microphone command-line tool 282 | let microphoneCommandName = '' 283 | if (platform === 'darwin') { 284 | microphoneCommandName = 'rec' 285 | } else if (platform === 'win32') { 286 | microphoneCommandName = 'sox' 287 | } else { 288 | microphoneCommandName = 'arecord' 289 | } 290 | 291 | try { 292 | if (platform === 'win32') { 293 | cp.execSync(`where ${microphoneCommandName}`, { stdio: 'ignore' }) 294 | } else { 295 | cp.execSync(`which ${microphoneCommandName}`, { stdio: 'ignore' }) 296 | } 297 | } catch (e) { 298 | missingModules.push('node-microphone') 299 | 300 | // Add platform-specific installation instructions 301 | if (platform === 'darwin') { 302 | warnings.push(`The "${microphoneCommandName}" command required by node-microphone is not available. Install SoX with: brew install sox`) 303 | } else if (platform === 'win32') { 304 | warnings.push(`The "${microphoneCommandName}" command required by node-microphone is not available. Install SoX from: https://sourceforge.net/projects/sox/`) 305 | } else { 306 | warnings.push(`The "${microphoneCommandName}" command required by node-microphone is not available. Install ALSA tools with: sudo apt-get install alsa-utils`) 307 | } 308 | } 309 | 310 | // Determine overall compatibility 311 | const compatible = missingModules.length === 0 312 | 313 | // Create a message summarizing the compatibility status 314 | let message = '' 315 | if (compatible) { 316 | message = `All required modules are available for ${platform}-${arch}` 317 | } else { 318 | message = `Some required modules are missing for ${platform}-${arch}: ${missingModules.join(', ')}` 319 | if (warnings.length > 0) { 320 | message += `. ${warnings.join(' ')}` 321 | } 322 | } 323 | 324 | return { 325 | compatible, 326 | platform, 327 | arch, 328 | missingModules, 329 | warnings, 330 | message 331 | } 332 | } -------------------------------------------------------------------------------- /src/extension.ts: -------------------------------------------------------------------------------- 1 | process.env.DEBUG = '*'; 2 | console.log('=== VIBE CODER EXTENSION ==='); 3 | console.log('Extension loading at:', __dirname); 4 | console.log('Extension file:', __filename); 5 | console.log('Process cwd:', process.cwd()); 6 | console.log('Environment:', process.env.VSCODE_ENV); 7 | console.log('=== END VIBE CODER INFO ==='); 8 | 9 | console.log('Loading Vibe Coder extension...'); 10 | 11 | // The module 'vscode' contains the VS Code extensibility API 12 | // Import the module and reference it with the alias vscode in your code below 13 | import * as vscode from 'vscode'; 14 | import { DeepgramService } from './services/deepgram-service' 15 | import { VoiceAgentService } from './services/voice-agent-service' 16 | import { ModeManagerService } from './services/mode-manager-service' 17 | import { MicrophoneWrapper } from './utils/native-module-wrapper'; 18 | 19 | // Add type for QuickPick items 20 | interface PromptQuickPickItem extends vscode.QuickPickItem { 21 | id: 'new' | 'select' | 'modify' | 'delete' 22 | } 23 | 24 | interface PromptSelectItem extends vscode.QuickPickItem { 25 | id: string 26 | } 27 | 28 | // This method is called when your extension is activated 29 | // Your extension is activated the very first time the command is executed 30 | export async function activate(context: vscode.ExtensionContext) { 31 | console.log('Activating Vibe Coder extension...') 32 | 33 | // Register the API key command first, before any initialization 34 | context.subscriptions.push( 35 | vscode.commands.registerCommand('vibe-coder.configureDeepgramApiKey', async () => { 36 | try { 37 | const key = await vscode.window.showInputBox({ 38 | prompt: 'Enter your Deepgram API key', 39 | password: true, 40 | placeHolder: 'Deepgram API key is required for voice features', 41 | ignoreFocusOut: true 42 | }) 43 | 44 | if (key) { 45 | await context.secrets.store('deepgram.apiKey', key) 46 | vscode.window.showInformationMessage('Deepgram API key saved successfully') 47 | 48 | // Reload the window to apply the new API key 49 | const reload = await vscode.window.showInformationMessage( 50 | 'API key saved. Reload window to apply changes?', 51 | 'Reload Window' 52 | ) 53 | 54 | if (reload === 'Reload Window') { 55 | await vscode.commands.executeCommand('workbench.action.reloadWindow') 56 | } 57 | } 58 | } catch (error) { 59 | console.error('Failed to save API key:', error) 60 | vscode.window.showErrorMessage('Failed to save API key: ' + (error as Error).message) 61 | } 62 | }) 63 | ) 64 | 65 | const modeManager = new ModeManagerService(context) 66 | 67 | try { 68 | console.log('Initializing mode manager...') 69 | await modeManager.initialize() 70 | console.log('Mode manager initialized successfully') 71 | } catch (error) { 72 | console.error('Failed to initialize services:', error) 73 | vscode.window.showErrorMessage( 74 | 'Failed to initialize Vibe Coder: ' + (error as Error).message + 75 | '. Please set your Deepgram API key using the "Configure Deepgram API Key" command.' 76 | ) 77 | 78 | // Show the command palette with our command pre-filled 79 | vscode.commands.executeCommand('workbench.action.quickOpen', '>Configure Deepgram API Key') 80 | return 81 | } 82 | 83 | // Use the console to output diagnostic information (console.log) and errors (console.error) 84 | // This line of code will only be executed once when your extension is activated 85 | console.log('Congratulations, your extension "vibe-coder" is now active!'); 86 | 87 | // The command has been defined in the package.json file 88 | // Now provide the implementation of the command with registerCommand 89 | // The commandId parameter must match the command field in package.json 90 | context.subscriptions.push( 91 | vscode.commands.registerCommand('vibe-coder.openPanel', async () => { 92 | try { 93 | console.log('Opening panel...') 94 | modeManager.show() 95 | } catch (error) { 96 | console.error('Failed to open panel:', error) 97 | vscode.window.showErrorMessage('Failed to open panel: ' + (error as Error).message) 98 | } 99 | }), 100 | 101 | vscode.commands.registerCommand('vibe-coder.startAgent', async () => { 102 | modeManager.show() 103 | await modeManager.setMode('vibe') 104 | }), 105 | 106 | vscode.commands.registerCommand('vibe-coder.startDictation', async () => { 107 | try { 108 | modeManager.show() 109 | if (modeManager.currentMode !== 'code') { 110 | await modeManager.setMode('code') 111 | } 112 | await modeManager.toggleDictation() 113 | } catch (error) { 114 | console.error('Failed to toggle dictation:', error) 115 | vscode.window.showErrorMessage('Failed to toggle dictation: ' + (error as Error).message) 116 | } 117 | }), 118 | 119 | vscode.commands.registerCommand('vibe-coder.test', () => { 120 | console.log('Test command executed') 121 | vscode.window.showInformationMessage('Vibe Coder test command works!') 122 | }), 123 | 124 | vscode.commands.registerCommand('vibe-coder.start', async () => { 125 | try { 126 | await modeManager.initialize() 127 | modeManager.show() 128 | } catch (error) { 129 | vscode.window.showErrorMessage(`Failed to start Vibe Coder: ${error}`) 130 | } 131 | }), 132 | 133 | vscode.commands.registerCommand('vibe-coder.clearPromptState', async () => { 134 | await context.globalState.update('dictation.prompts', undefined); 135 | await context.globalState.update('dictation.currentPrompt', undefined); 136 | vscode.window.showInformationMessage('Prompt state cleared. Please reload the window.'); 137 | }), 138 | 139 | vscode.commands.registerCommand('vibe-coder.managePrompts', async (options?: { action: string, id?: string }) => { 140 | const items: PromptQuickPickItem[] = [ 141 | { label: '$(add) Create New Prompt', id: 'new' }, 142 | { label: '$(list-selection) Select Active Prompt', id: 'select' }, 143 | { label: '$(edit) Modify Prompt', id: 'modify' }, 144 | { label: '$(trash) Delete Prompt', id: 'delete' } 145 | ] 146 | 147 | const choice = await vscode.window.showQuickPick(items, { 148 | placeHolder: 'Manage Dictation Prompts' 149 | }) 150 | 151 | if (!choice) return 152 | 153 | switch (choice.id) { 154 | case 'new': { 155 | const name = await vscode.window.showInputBox({ 156 | prompt: 'Enter a name for the new prompt' 157 | }) 158 | if (!name) return 159 | 160 | // Create a temp file for the new prompt 161 | const tmpFile = vscode.Uri.file( 162 | `${context.globalStorageUri.fsPath}/prompt-new.md` 163 | ) 164 | 165 | // Ensure the directory exists 166 | await vscode.workspace.fs.createDirectory(context.globalStorageUri) 167 | 168 | // Write initial content with template 169 | await vscode.workspace.fs.writeFile(tmpFile, Buffer.from( 170 | `// Prompt: ${name} 171 | // Edit the prompt below and save to create 172 | // Lines starting with // are ignored 173 | 174 | You are an AI assistant helping with... 175 | 176 | Key responsibilities: 177 | 1. 178 | 2. 179 | 3. 180 | 181 | Guidelines: 182 | - 183 | - 184 | - 185 | 186 | Example input: "..." 187 | Example output: "..." 188 | ` 189 | )) 190 | 191 | const doc = await vscode.workspace.openTextDocument(tmpFile) 192 | const editor = await vscode.window.showTextDocument(doc, { 193 | preview: false, 194 | viewColumn: vscode.ViewColumn.Beside 195 | }) 196 | 197 | // Add save handler 198 | const disposable = vscode.workspace.onDidSaveTextDocument(async (savedDoc) => { 199 | if (savedDoc.uri.toString() === tmpFile.toString()) { 200 | // Extract prompt content (ignore comment lines) 201 | const content = savedDoc.getText() 202 | .split('\n') 203 | .filter(line => !line.trim().startsWith('//')) 204 | .join('\n') 205 | .trim() 206 | 207 | // Create the new prompt 208 | await modeManager.promptManager.addPrompt(name, content) 209 | 210 | vscode.window.showInformationMessage(`Prompt "${name}" created successfully`) 211 | 212 | // Clean up 213 | disposable.dispose() 214 | await vscode.workspace.fs.delete(tmpFile) 215 | } 216 | }) 217 | 218 | // Also clean up if the editor is closed without saving 219 | const closeDisposable = vscode.workspace.onDidCloseTextDocument(async (closedDoc) => { 220 | if (closedDoc.uri.toString() === tmpFile.toString()) { 221 | closeDisposable.dispose() 222 | try { 223 | await vscode.workspace.fs.delete(tmpFile) 224 | } catch (e) { 225 | // File might already be deleted, ignore 226 | } 227 | } 228 | }) 229 | break 230 | } 231 | 232 | case 'select': { 233 | const prompts = modeManager.promptManager.getAllPrompts() 234 | const selected = await vscode.window.showQuickPick( 235 | prompts.map(p => ({ label: p.name, id: p.id })), 236 | { placeHolder: 'Select prompt to use' } 237 | ) 238 | if (selected) { 239 | await modeManager.promptManager.setCurrentPrompt(selected.id) 240 | } 241 | break 242 | } 243 | 244 | case 'modify': { 245 | const prompts = modeManager.promptManager.getAllPrompts() 246 | const selected = await vscode.window.showQuickPick( 247 | prompts.map(p => ({ label: p.name, id: p.id })), 248 | { placeHolder: 'Select prompt to modify' } 249 | ) 250 | 251 | if (selected) { 252 | const prompt = prompts.find(p => p.id === selected.id) 253 | if (prompt) { 254 | // Create a temp file in the system temp directory 255 | const tmpFile = vscode.Uri.file( 256 | `${context.globalStorageUri.fsPath}/prompt-${prompt.id}.md` 257 | ) 258 | 259 | // Ensure the directory exists 260 | await vscode.workspace.fs.createDirectory(context.globalStorageUri) 261 | 262 | // Write initial content 263 | await vscode.workspace.fs.writeFile(tmpFile, Buffer.from( 264 | `// Prompt: ${prompt.name} 265 | // ID: ${prompt.id} 266 | // Edit the prompt below and save to update 267 | // Lines starting with // are ignored 268 | 269 | ${prompt.prompt}` 270 | )) 271 | 272 | const doc = await vscode.workspace.openTextDocument(tmpFile) 273 | const editor = await vscode.window.showTextDocument(doc, { 274 | preview: false, 275 | viewColumn: vscode.ViewColumn.Beside 276 | }) 277 | 278 | // Add save handler 279 | const disposable = vscode.workspace.onDidSaveTextDocument(async (savedDoc) => { 280 | if (savedDoc.uri.toString() === tmpFile.toString()) { 281 | // Extract prompt content (ignore comment lines) 282 | const content = savedDoc.getText() 283 | .split('\n') 284 | .filter(line => !line.trim().startsWith('//')) 285 | .join('\n') 286 | .trim() 287 | 288 | // Update the prompt 289 | await modeManager.promptManager.updatePrompt(prompt.id, { 290 | ...prompt, 291 | prompt: content 292 | }) 293 | 294 | vscode.window.showInformationMessage(`Prompt "${prompt.name}" updated successfully`) 295 | 296 | // Clean up 297 | disposable.dispose() 298 | await vscode.workspace.fs.delete(tmpFile) 299 | } 300 | }) 301 | 302 | // Also clean up if the editor is closed without saving 303 | const closeDisposable = vscode.workspace.onDidCloseTextDocument(async (closedDoc) => { 304 | if (closedDoc.uri.toString() === tmpFile.toString()) { 305 | closeDisposable.dispose() 306 | try { 307 | await vscode.workspace.fs.delete(tmpFile) 308 | } catch (e) { 309 | // File might already be deleted, ignore 310 | } 311 | } 312 | }) 313 | } 314 | } 315 | break 316 | } 317 | 318 | case 'delete': { 319 | const prompts = modeManager.promptManager.getAllPrompts() 320 | .filter(p => p.id !== 'default') 321 | const selected = await vscode.window.showQuickPick( 322 | prompts.map(p => ({ label: p.name, id: p.id })), 323 | { placeHolder: 'Select prompt to delete' } 324 | ) 325 | if (selected) { 326 | await modeManager.promptManager.deletePrompt(selected.id) 327 | } 328 | break 329 | } 330 | } 331 | }), 332 | 333 | vscode.commands.registerCommand('vibeCoder.listMicrophoneDevices', async () => { 334 | try { 335 | const micWrapper = new MicrophoneWrapper() 336 | await micWrapper.listAvailableDevices() 337 | } catch (error) { 338 | vscode.window.showErrorMessage( 339 | `Failed to list microphone devices: ${error instanceof Error ? error.message : String(error)}` 340 | ) 341 | } 342 | }), 343 | 344 | vscode.commands.registerCommand('vibeCoder.testMicrophone', async () => { 345 | try { 346 | vscode.window.withProgress({ 347 | location: vscode.ProgressLocation.Notification, 348 | title: 'Testing microphone...', 349 | cancellable: false 350 | }, async (progress) => { 351 | const micWrapper = new MicrophoneWrapper() 352 | 353 | progress.report({ message: 'Recording test audio...' }) 354 | 355 | try { 356 | await micWrapper.testMicrophone() 357 | vscode.window.showInformationMessage('Microphone test successful! Audio is being captured correctly.') 358 | } catch (error) { 359 | vscode.window.showErrorMessage( 360 | `Microphone test failed: ${error instanceof Error ? error.message : String(error)}` 361 | ) 362 | 363 | // Check if this might be a device-related error 364 | const errorMessage = error instanceof Error ? error.message : String(error) 365 | if (errorMessage.includes('device') || errorMessage.includes('Device')) { 366 | vscode.window.showInformationMessage( 367 | 'This might be a microphone device issue. Would you like to list available devices?', 368 | 'List Devices' 369 | ).then(selection => { 370 | if (selection === 'List Devices') { 371 | micWrapper.listAvailableDevices() 372 | } 373 | }) 374 | } 375 | } finally { 376 | micWrapper.dispose() 377 | } 378 | }) 379 | } catch (error) { 380 | vscode.window.showErrorMessage( 381 | `Failed to test microphone: ${error instanceof Error ? error.message : String(error)}` 382 | ) 383 | } 384 | }) 385 | ) 386 | 387 | // Add service to subscriptions for cleanup 388 | context.subscriptions.push({ 389 | dispose: () => { 390 | modeManager.dispose() 391 | } 392 | }) 393 | 394 | console.log('Vibe Coder extension activated successfully') 395 | } 396 | 397 | // This method is called when your extension is deactivated 398 | export function deactivate() { 399 | console.log('Deactivating Vibe Coder extension') 400 | } 401 | -------------------------------------------------------------------------------- /media/audioInterface.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Audio Interface 8 | 66 | 67 | 68 |
69 |

Vibe Coder Audio Interface

70 |

This interface requires microphone access to enable voice commands and dictation.

71 | 72 | 73 | 74 |
Microphone permission: unknown
75 | 76 |
77 | 78 | 431 | 432 | -------------------------------------------------------------------------------- /src/utils/native-module-wrapper.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import { loadNativeModule } from './binary-loader' 3 | import * as cp from 'child_process' 4 | import * as os from 'os' 5 | import * as fs from 'fs' 6 | import * as path from 'path' 7 | import { execSync } from 'child_process' 8 | 9 | interface MicrophoneCommandInfo { 10 | commandName: 'rec' | 'sox' | 'arecord' | null 11 | commandAvailable: boolean 12 | installInstructions: string 13 | defaultDevice: string 14 | deviceListCommand?: string 15 | } 16 | 17 | interface MicrophoneOptions { 18 | // Audio format options 19 | bitwidth?: string 20 | channels?: string 21 | rate?: string 22 | encoding?: 'signed-integer' | 'unsigned-integer' 23 | endian?: 'little' | 'big' 24 | 25 | // Device selection 26 | device?: string 27 | 28 | // Additional options 29 | additionalParameters?: string[] 30 | useDataEmitter?: boolean 31 | } 32 | 33 | function detectMicrophoneCommand(): MicrophoneCommandInfo { 34 | const currentPlatform = os.platform() 35 | let commandName: MicrophoneCommandInfo['commandName'] = null 36 | let commandAvailable = false 37 | let installInstructions = '' 38 | let defaultDevice = '' 39 | let deviceListCommand = '' 40 | 41 | try { 42 | switch (currentPlatform) { 43 | case 'darwin': { 44 | commandName = 'rec' 45 | defaultDevice = 'default' 46 | deviceListCommand = 'system_profiler SPAudioDataType | grep "Input Sources:"' 47 | try { 48 | execSync('which rec', { stdio: 'ignore' }) 49 | commandAvailable = true 50 | } catch { 51 | installInstructions = 'Install SoX on macOS using: brew install sox' 52 | } 53 | break 54 | } 55 | case 'win32': { 56 | commandName = 'sox' 57 | defaultDevice = 'default' 58 | deviceListCommand = 'sox -h' 59 | try { 60 | execSync('where sox', { stdio: 'ignore' }) 61 | commandAvailable = true 62 | } catch { 63 | installInstructions = 'Install SoX for Windows from: https://sourceforge.net/projects/sox/' 64 | } 65 | break 66 | } 67 | default: { 68 | // Linux and other Unix-like systems 69 | commandName = 'arecord' 70 | defaultDevice = 'plughw:1,0' 71 | deviceListCommand = 'arecord -L' 72 | try { 73 | execSync('which arecord', { stdio: 'ignore' }) 74 | commandAvailable = true 75 | } catch { 76 | installInstructions = 'Install ALSA tools using: sudo apt-get install alsa-utils' 77 | } 78 | } 79 | } 80 | } catch (error) { 81 | console.error('Error detecting microphone command:', error) 82 | } 83 | 84 | return { 85 | commandName, 86 | commandAvailable, 87 | installInstructions, 88 | defaultDevice, 89 | deviceListCommand 90 | } 91 | } 92 | 93 | /** 94 | * A wrapper for the node-microphone module 95 | * Note: node-microphone is not a native module but a JavaScript wrapper around command-line tools 96 | */ 97 | export class MicrophoneWrapper { 98 | private microphone: any | null = null 99 | private ps: any | null = null 100 | private options: MicrophoneOptions 101 | private commandInfo: MicrophoneCommandInfo 102 | private platform: string = os.platform() 103 | private EventEmitter = require('events') 104 | 105 | // Add proper type declarations for previously missing properties 106 | private commandName: 'rec' | 'sox' | 'arecord' | null = null 107 | private commandAvailable: boolean = false 108 | 109 | constructor(options: MicrophoneOptions = {}) { 110 | this.options = options 111 | this.commandInfo = detectMicrophoneCommand() 112 | 113 | // Use the commandInfo instead of re-detecting 114 | this.commandName = this.commandInfo.commandName 115 | this.commandAvailable = this.commandInfo.commandAvailable 116 | 117 | // If no device is specified, use the default for this platform 118 | if (!this.options.device) { 119 | this.options.device = this.getConfiguredDevice() || this.commandInfo.defaultDevice 120 | console.log(`MicrophoneWrapper: Using device: ${this.options.device}`) 121 | } 122 | 123 | if (!this.commandAvailable) { 124 | console.warn(`Microphone command '${this.commandName}' not found.`) 125 | console.warn(`Installation instructions: ${this.commandInfo.installInstructions}`) 126 | 127 | // On macOS, check if SoX is installed but rec is not in PATH 128 | if (this.platform === 'darwin') { 129 | try { 130 | // Check common Homebrew locations for SoX 131 | if (fs.existsSync('/usr/local/bin/sox') || fs.existsSync('/opt/homebrew/bin/sox')) { 132 | console.log('MicrophoneWrapper: Found sox command, but rec command is missing') 133 | vscode.window.showWarningMessage( 134 | 'SoX is installed but the "rec" command is not available. Try running "brew link --force sox" in Terminal.', 135 | 'Open Terminal' 136 | ).then(selection => { 137 | if (selection === 'Open Terminal') { 138 | cp.exec('open -a Terminal') 139 | } 140 | }) 141 | return 142 | } 143 | } catch (err) { 144 | // Ignore errors in this additional check 145 | } 146 | } 147 | 148 | // Show installation instructions 149 | this.showInstallationInstructions() 150 | } 151 | 152 | // Create microphone implementation based on command availability 153 | if (this.commandAvailable) { 154 | console.log('MicrophoneWrapper: Command available, creating custom microphone implementation') 155 | this.microphone = this.createMicrophoneImplementation() 156 | } else { 157 | console.log('MicrophoneWrapper: Command not available, creating dummy microphone') 158 | this.microphone = this.createDummyMicrophone() 159 | } 160 | } 161 | 162 | /** 163 | * Get the configured microphone device from VS Code settings 164 | */ 165 | private getConfiguredDevice(): string | undefined { 166 | const config = vscode.workspace.getConfiguration('vibeCoder.microphone') 167 | 168 | switch (this.platform) { 169 | case 'darwin': 170 | return config.get('deviceMacOS') 171 | case 'win32': 172 | return config.get('deviceWindows') 173 | default: 174 | return config.get('deviceLinux') 175 | } 176 | } 177 | 178 | /** 179 | * List available microphone devices 180 | * This is an async operation that will show the devices in the output channel 181 | */ 182 | public async listAvailableDevices(): Promise { 183 | if (!this.commandAvailable || !this.commandInfo.deviceListCommand) { 184 | vscode.window.showErrorMessage( 185 | `Cannot list devices: ${this.commandName} command not available.` 186 | ) 187 | return 188 | } 189 | 190 | try { 191 | const outputChannel = vscode.window.createOutputChannel('Vibe-Coder Microphone Devices') 192 | outputChannel.show() 193 | outputChannel.appendLine(`Listing available microphone devices for ${this.platform}...`) 194 | outputChannel.appendLine('Command: ' + this.commandInfo.deviceListCommand) 195 | outputChannel.appendLine('-------------------------------------------') 196 | 197 | const output = cp.execSync(this.commandInfo.deviceListCommand, { encoding: 'utf-8' }) 198 | outputChannel.appendLine(output) 199 | 200 | outputChannel.appendLine('-------------------------------------------') 201 | outputChannel.appendLine('To configure your microphone device, update settings:') 202 | outputChannel.appendLine('1. Open VS Code settings (File > Preferences > Settings)') 203 | outputChannel.appendLine('2. Search for "vibeCoder.microphone"') 204 | outputChannel.appendLine('3. Set the appropriate device for your platform') 205 | 206 | } catch (error) { 207 | console.error('Error listing microphone devices:', error) 208 | vscode.window.showErrorMessage( 209 | `Failed to list microphone devices: ${error instanceof Error ? error.message : String(error)}` 210 | ) 211 | } 212 | } 213 | 214 | private createDummyMicrophone() { 215 | const self = this 216 | return new class DummyMicrophone extends this.EventEmitter { 217 | constructor() { 218 | super() 219 | console.log('MicrophoneWrapper: Initialized dummy microphone') 220 | } 221 | 222 | startRecording() { 223 | console.log('MicrophoneWrapper: Dummy microphone startRecording called') 224 | const stream = new self.EventEmitter() 225 | setTimeout(() => { 226 | const error = new Error(self.commandInfo.installInstructions) 227 | this.emit('error', error) 228 | stream.emit('error', error) 229 | }, 500) 230 | return stream 231 | } 232 | 233 | stopRecording() { 234 | console.log('MicrophoneWrapper: Dummy microphone stopRecording called') 235 | // No-op 236 | } 237 | }() 238 | } 239 | 240 | private createMicrophoneImplementation() { 241 | const self = this 242 | 243 | if (!this.commandName) { 244 | throw new Error('Cannot create microphone implementation: command not available') 245 | } 246 | 247 | return new class MicrophoneImplementation extends this.EventEmitter { 248 | private ps: cp.ChildProcess | null = null 249 | private options: MicrophoneOptions = {} 250 | 251 | constructor(options?: MicrophoneOptions) { 252 | super() 253 | this.ps = null 254 | this.options = options || self.options || {} 255 | } 256 | 257 | startRecording() { 258 | if (this.ps === null) { 259 | let audioOptions: string[] = [] 260 | 261 | // Ensure we have a command name 262 | if (!self.commandName) { 263 | throw new Error('Cannot start recording: command not available') 264 | } 265 | 266 | switch (self.commandName) { 267 | case 'rec': { 268 | // macOS 269 | audioOptions = [ 270 | '-q', 271 | '-b', this.options.bitwidth || '16', 272 | '-c', this.options.channels || '1', 273 | '-r', this.options.rate || '16000', 274 | '-e', this.options.encoding || 'signed-integer', 275 | '-t', 'wav', 276 | '-', 277 | ] 278 | break 279 | } 280 | case 'sox': { 281 | // Windows 282 | audioOptions = [ 283 | '-b', this.options.bitwidth || '16', 284 | '--endian', this.options.endian || 'little', 285 | '-c', this.options.channels || '1', 286 | '-r', this.options.rate || '16000', 287 | '-e', this.options.encoding || 'signed-integer', 288 | '-t', 'waveaudio', 289 | this.options.device || self.commandInfo.defaultDevice, 290 | '-p', 291 | ] 292 | break 293 | } 294 | case 'arecord': { 295 | // Linux 296 | const formatEncoding = this.options.encoding === 'unsigned-integer' ? 'U' : 'S' 297 | const formatEndian = this.options.endian === 'big' ? 'BE' : 'LE' 298 | const format = `${formatEncoding}${this.options.bitwidth || '16'}_${formatEndian}` 299 | 300 | audioOptions = [ 301 | '-c', this.options.channels || '1', 302 | '-r', this.options.rate || '16000', 303 | '-f', format, 304 | '-D', this.options.device || self.commandInfo.defaultDevice, 305 | ] 306 | break 307 | } 308 | default: 309 | throw new Error(`Unsupported command: ${self.commandName}`) 310 | } 311 | 312 | if (this.options.additionalParameters) { 313 | audioOptions = audioOptions.concat(this.options.additionalParameters) 314 | } 315 | 316 | try { 317 | console.log(`MicrophoneWrapper: Starting ${self.commandName} with device: ${this.options.device || self.commandInfo.defaultDevice}`) 318 | 319 | // We can safely assert commandName is string here due to the check above 320 | this.ps = cp.spawn(self.commandName, audioOptions) 321 | 322 | if (!this.ps) { 323 | throw new Error(`Failed to start ${self.commandName} process`) 324 | } 325 | 326 | this.ps.on('error', (error) => { 327 | console.error(`MicrophoneWrapper: Process error: ${error.message}`) 328 | this.emit('error', error) 329 | }) 330 | 331 | if (this.ps.stderr) { 332 | this.ps.stderr.on('error', (error) => { 333 | console.error(`MicrophoneWrapper: stderr error: ${error.message}`) 334 | this.emit('error', error) 335 | }) 336 | 337 | this.ps.stderr.on('data', (info) => { 338 | const infoStr = info.toString().trim() 339 | if (infoStr) { 340 | console.log(`MicrophoneWrapper: Process info: ${infoStr}`) 341 | this.emit('info', info) 342 | } 343 | }) 344 | } 345 | 346 | if (this.ps.stdout) { 347 | if (this.options.useDataEmitter) { 348 | this.ps.stdout.on('data', (data) => { 349 | this.emit('data', data) 350 | }) 351 | } 352 | 353 | return this.ps.stdout 354 | } 355 | 356 | throw new Error('No stdout available from microphone process') 357 | } catch (error) { 358 | console.error(`MicrophoneWrapper: Failed to start recording: ${error instanceof Error ? error.message : String(error)}`) 359 | this.emit('error', error) 360 | throw error 361 | } 362 | } 363 | 364 | return this.ps?.stdout || null 365 | } 366 | 367 | stopRecording() { 368 | if (this.ps) { 369 | console.log('MicrophoneWrapper: Stopping recording') 370 | this.ps.kill() 371 | this.ps = null 372 | } 373 | } 374 | }() 375 | } 376 | 377 | private showInstallationInstructions() { 378 | // Use the installation instructions from commandInfo 379 | const message = `Microphone requires ${this.commandName}. ${this.commandInfo.installInstructions}` 380 | 381 | if (this.platform === 'darwin') { 382 | vscode.window.showWarningMessage( 383 | message, 384 | 'Install SoX', 385 | 'Copy Command' 386 | ).then(selection => { 387 | if (selection === 'Install SoX') { 388 | vscode.env.openExternal(vscode.Uri.parse('https://brew.sh/')) 389 | } else if (selection === 'Copy Command') { 390 | vscode.env.clipboard.writeText('brew install sox') 391 | vscode.window.showInformationMessage('Command copied to clipboard: brew install sox') 392 | } 393 | }) 394 | } else if (this.platform === 'win32') { 395 | vscode.window.showWarningMessage( 396 | message, 397 | 'Download SoX', 398 | 'Learn More' 399 | ).then(selection => { 400 | if (selection === 'Download SoX') { 401 | vscode.env.openExternal(vscode.Uri.parse('https://sourceforge.net/projects/sox/')) 402 | } else if (selection === 'Learn More') { 403 | vscode.env.openExternal(vscode.Uri.parse('https://github.com/deepgram/vibe_coder#prerequisites')) 404 | } 405 | }) 406 | } else { 407 | vscode.window.showWarningMessage( 408 | message, 409 | 'Copy Command', 410 | 'Learn More' 411 | ).then(selection => { 412 | if (selection === 'Copy Command') { 413 | vscode.env.clipboard.writeText('sudo apt-get install alsa-utils') 414 | vscode.window.showInformationMessage('Command copied to clipboard: sudo apt-get install alsa-utils') 415 | } else if (selection === 'Learn More') { 416 | vscode.env.openExternal(vscode.Uri.parse('https://wiki.archlinux.org/title/Advanced_Linux_Sound_Architecture')) 417 | } 418 | }) 419 | } 420 | } 421 | 422 | /** 423 | * Start recording from the microphone 424 | * @returns A readable stream of audio data 425 | * @throws Error if the microphone is not available or fails to start 426 | */ 427 | startRecording() { 428 | console.log('MicrophoneWrapper: startRecording called') 429 | if (!this.microphone) { 430 | console.error('MicrophoneWrapper: Microphone is not available') 431 | throw new Error('Microphone is not available on this platform') 432 | } 433 | 434 | try { 435 | console.log('MicrophoneWrapper: Calling microphone.startRecording()') 436 | const stream = this.microphone.startRecording() 437 | console.log('MicrophoneWrapper: Got stream from microphone.startRecording()') 438 | return stream 439 | } catch (error) { 440 | console.error('MicrophoneWrapper: Microphone start recording error:', error) 441 | 442 | if (!this.commandAvailable) { 443 | vscode.window.showErrorMessage(`Microphone requires the "${this.commandName}" command which is not installed.`) 444 | this.showInstallationInstructions() 445 | } else { 446 | const errorMessage = error instanceof Error ? error.message : String(error) 447 | vscode.window.showErrorMessage(`Failed to start recording from microphone: ${errorMessage}`) 448 | 449 | // Check if this might be a device-related error 450 | if (errorMessage.includes('device') || errorMessage.includes('Device')) { 451 | vscode.window.showInformationMessage( 452 | 'This might be a microphone device issue. Would you like to list available devices?', 453 | 'List Devices' 454 | ).then(selection => { 455 | if (selection === 'List Devices') { 456 | this.listAvailableDevices() 457 | } 458 | }) 459 | } 460 | } 461 | 462 | throw error 463 | } 464 | } 465 | 466 | /** 467 | * Stop recording from the microphone 468 | */ 469 | stopRecording() { 470 | console.log('MicrophoneWrapper: stopRecording called') 471 | if (!this.microphone) { 472 | console.log('MicrophoneWrapper: No microphone to stop') 473 | return 474 | } 475 | 476 | try { 477 | console.log('MicrophoneWrapper: Calling microphone.stopRecording()') 478 | this.microphone.stopRecording() 479 | console.log('MicrophoneWrapper: microphone.stopRecording() completed') 480 | } catch (error) { 481 | console.error('MicrophoneWrapper: Microphone stop recording error:', error) 482 | } 483 | } 484 | 485 | /** 486 | * Test the microphone by attempting to record a short sample 487 | * This is useful for verifying that the microphone is working 488 | * @returns A promise that resolves if the test is successful, or rejects with an error 489 | */ 490 | public async testMicrophone(): Promise { 491 | if (!this.commandAvailable) { 492 | throw new Error(`Microphone command '${this.commandName}' not found. ${this.commandInfo.installInstructions}`) 493 | } 494 | 495 | return new Promise((resolve, reject) => { 496 | try { 497 | console.log('MicrophoneWrapper: Testing microphone...') 498 | const stream = this.startRecording() 499 | 500 | // Set up a timeout to stop the test after 2 seconds 501 | const timeout = setTimeout(() => { 502 | console.log('MicrophoneWrapper: Microphone test completed successfully') 503 | this.stopRecording() 504 | resolve() 505 | }, 2000) 506 | 507 | // Listen for data to confirm we're getting audio 508 | let dataReceived = false 509 | 510 | stream.on('data', (chunk: Buffer) => { 511 | if (!dataReceived) { 512 | console.log(`MicrophoneWrapper: Received first audio chunk (${chunk.length} bytes)`) 513 | dataReceived = true 514 | } 515 | }) 516 | 517 | // Listen for errors 518 | stream.on('error', (error: Error) => { 519 | clearTimeout(timeout) 520 | this.stopRecording() 521 | console.error('MicrophoneWrapper: Microphone test failed:', error) 522 | reject(error) 523 | }) 524 | 525 | } catch (error) { 526 | console.error('MicrophoneWrapper: Microphone test failed:', error) 527 | reject(error) 528 | } 529 | }) 530 | } 531 | 532 | /** 533 | * Dispose of resources used by the microphone wrapper 534 | * This should be called when the wrapper is no longer needed 535 | */ 536 | public dispose(): void { 537 | console.log('MicrophoneWrapper: Disposing resources') 538 | try { 539 | this.stopRecording() 540 | } catch (error) { 541 | console.error('MicrophoneWrapper: Error during disposal:', error) 542 | } 543 | } 544 | } -------------------------------------------------------------------------------- /src/services/voice-agent-service.ts: -------------------------------------------------------------------------------- 1 | import * as vscode from 'vscode' 2 | import WebSocket from 'ws' 3 | import { createClient } from '@deepgram/sdk' 4 | // import { AgentPanel } from './agent-panel' 5 | // Remove these direct imports 6 | // import Microphone from 'node-microphone' 7 | // import Speaker from 'speaker' 8 | // Import our wrappers instead 9 | import { MicrophoneWrapper } from '../utils/native-module-wrapper' 10 | import { PromptManagementService } from './prompt-management-service' 11 | import { env, window, workspace } from 'vscode' 12 | import { LLMService } from './llm-service' 13 | import { EventEmitter } from 'events' 14 | import { CommandRegistryService } from './command-registry-service' 15 | import { WorkspaceService } from './workspace-service' 16 | import { ConversationLoggerService } from './conversation-logger-service' 17 | import { SpecGeneratorService } from './spec-generator-service' 18 | import { checkNativeModulesCompatibility } from '../utils/binary-loader' 19 | 20 | interface AgentConfig { 21 | type: 'SettingsConfiguration' 22 | audio: { 23 | input: { 24 | encoding: string 25 | sample_rate: number 26 | } 27 | output: { 28 | encoding: string 29 | sample_rate: number 30 | container: string 31 | } 32 | } 33 | agent: { 34 | listen: { 35 | model: string 36 | } 37 | think: { 38 | provider: { 39 | type: string 40 | } 41 | model: string 42 | instructions: string 43 | functions: Array<{ 44 | name: string 45 | description: string 46 | parameters: { 47 | type: 'object' 48 | properties: { 49 | name?: { 50 | type: string 51 | description: string 52 | } 53 | args?: { 54 | type: 'array' 55 | description: string 56 | items: { 57 | type: string 58 | } 59 | } 60 | format?: { 61 | type: string 62 | enum?: string[] 63 | description: string 64 | } 65 | } 66 | required: string[] 67 | } 68 | }> 69 | } 70 | speak: { 71 | model: string 72 | temp?: number 73 | rep_penalty?: number 74 | } 75 | } 76 | } 77 | 78 | interface AgentMessage { 79 | type: 80 | | 'Welcome' 81 | | 'Ready' 82 | | 'Speech' 83 | | 'AgentResponse' 84 | | 'FunctionCallRequest' 85 | | 'FunctionCalling' 86 | | 'ConversationText' 87 | | 'UserStartedSpeaking' 88 | | 'AgentStartedSpeaking' 89 | | 'AgentAudioDone' 90 | | 'Error' 91 | | 'Close' 92 | | 'SettingsApplied' 93 | session_id?: string 94 | text?: string 95 | is_final?: boolean 96 | role?: 'assistant' | 'user' 97 | content?: string 98 | audio?: { 99 | data: string 100 | encoding: string 101 | sample_rate: number 102 | container?: string 103 | bitrate?: number 104 | } 105 | message?: string 106 | function_name?: string 107 | function_call_id?: string 108 | input?: any 109 | tts_latency?: number 110 | ttt_latency?: number 111 | total_latency?: number 112 | } 113 | 114 | export interface MessageHandler { 115 | postMessage(message: unknown): Thenable 116 | } 117 | 118 | export class VoiceAgentService { 119 | private ws: WebSocket | null = null 120 | private isInitialized = false 121 | private keepAliveInterval: NodeJS.Timeout | null = null 122 | private audioBuffers: Buffer[] = [] 123 | private readonly AGENT_SAMPLE_RATE = 24000 124 | private promptManager: PromptManagementService 125 | private llmService: LLMService 126 | private eventEmitter = new EventEmitter() 127 | private commandRegistry: CommandRegistryService 128 | private workspaceService: WorkspaceService 129 | private agentPanel: MessageHandler | undefined = undefined 130 | private conversationLogger: ConversationLoggerService 131 | private specGenerator: SpecGeneratorService 132 | private context: vscode.ExtensionContext 133 | private updateStatus: (status: string) => void 134 | private updateTranscript: (text: string) => void 135 | 136 | constructor({ 137 | context, 138 | updateStatus, 139 | updateTranscript, 140 | conversationLogger 141 | }: { 142 | context: vscode.ExtensionContext 143 | updateStatus: (status: string) => void 144 | updateTranscript: (text: string) => void 145 | conversationLogger: ConversationLoggerService 146 | }) { 147 | this.context = context 148 | this.updateStatus = updateStatus 149 | this.updateTranscript = updateTranscript 150 | this.conversationLogger = conversationLogger 151 | 152 | // Assign llmService first 153 | this.llmService = new LLMService(context) 154 | 155 | // Then create specGenerator 156 | this.specGenerator = new SpecGeneratorService(this.llmService, this.conversationLogger) 157 | 158 | this.promptManager = new PromptManagementService(context) 159 | this.commandRegistry = new CommandRegistryService() 160 | this.workspaceService = new WorkspaceService() 161 | 162 | // Comment out or remove agentPanel if unused 163 | // this.agentPanel = agentPanel 164 | 165 | // Use the public method to register the command 166 | this.commandRegistry.registerCommand({ 167 | name: 'generateProjectSpec', 168 | command: 'vibe-coder.generateProjectSpec', 169 | category: 'workspace', 170 | description: 'Generate a structured project specification from our conversation', 171 | parameters: { 172 | type: 'object', 173 | properties: { 174 | format: { 175 | type: 'string', 176 | enum: ['markdown'], 177 | description: 'Output format (currently only supports markdown)' 178 | } 179 | }, 180 | required: ['format'] 181 | } 182 | }) 183 | } 184 | 185 | async initialize(): Promise { 186 | // Check if native modules are available for this platform 187 | const compatibility = checkNativeModulesCompatibility() 188 | if (!compatibility.compatible) { 189 | vscode.window.showWarningMessage(compatibility.message) 190 | console.warn('Native module compatibility check failed:', compatibility) 191 | } 192 | 193 | // Check for API key but don't require it for initialization 194 | const apiKey = await this.context.secrets.get('deepgram.apiKey') 195 | // We'll mark as initialized even without an API key 196 | this.isInitialized = true 197 | } 198 | 199 | async startAgent(): Promise { 200 | if (!this.isInitialized) 201 | throw new Error('Voice Agent not initialized') 202 | 203 | // Ensure cleanup of any existing connection first 204 | this.cleanup() 205 | 206 | try { 207 | this.updateStatus('Connecting to agent...') 208 | 209 | const apiKey = await this.context.secrets.get('deepgram.apiKey') 210 | if (!apiKey) { 211 | const key = await vscode.window.showInputBox({ 212 | prompt: 'Enter your Deepgram API key', 213 | password: true, 214 | placeHolder: 'Deepgram API key is required for voice agent', 215 | ignoreFocusOut: true 216 | }) 217 | if (!key) { 218 | this.updateStatus('API key required') 219 | vscode.window.showErrorMessage('Deepgram API key is required for voice agent') 220 | throw new Error('Deepgram API key is required') 221 | } 222 | await this.context.secrets.store('deepgram.apiKey', key) 223 | } 224 | 225 | this.ws = new WebSocket('wss://agent.deepgram.com/agent', 226 | ['token'], 227 | { 228 | headers: { 229 | 'Authorization': `Token ${apiKey || ''}` 230 | } 231 | } 232 | ) 233 | 234 | // Wait for connection to be established 235 | await new Promise((resolve, reject) => { 236 | if (!this.ws) return reject(new Error('WebSocket not initialized')) 237 | 238 | this.ws.on('open', () => { 239 | console.log('WebSocket connection opened') 240 | resolve() 241 | }) 242 | 243 | this.ws.on('error', (error) => { 244 | console.error('WebSocket connection error:', error) 245 | reject(error) 246 | }) 247 | }) 248 | 249 | // Set up message handler after connection is established 250 | this.ws.on('message', async (data: WebSocket.Data) => { 251 | try { 252 | const message = JSON.parse(data.toString()) as AgentMessage 253 | console.log('WebSocket received message:', message) 254 | 255 | switch (message.type) { 256 | case 'Welcome': 257 | console.log('Received Welcome, sending configuration...') 258 | const config = await this.getAgentConfig() 259 | console.log('Sending configuration:', JSON.stringify(config, null, 2)) 260 | this.ws?.send(JSON.stringify(config)) 261 | this.updateStatus('Configuring agent...') 262 | break 263 | 264 | case 'SettingsApplied': 265 | console.log('Settings applied, setting up microphone...') 266 | this.setupMicrophone() 267 | this.updateStatus('Connected! Start speaking...') 268 | break 269 | 270 | case 'Ready': 271 | console.log('Agent ready to receive audio') 272 | this.updateStatus('Ready! Start speaking...') 273 | break 274 | case 'Speech': 275 | this.updateTranscript(message.text || '') 276 | break 277 | case 'AgentResponse': 278 | console.log('Agent response received:', message) 279 | if (message.text) { 280 | console.log('Logging agent response from WebSocket') 281 | this.conversationLogger.logEntry({ 282 | role: 'assistant', 283 | content: message.text 284 | }) 285 | } 286 | this.updateTranscript(message.text || '') 287 | if (message.audio) { 288 | this.playAudioResponse(message.audio) 289 | } 290 | break 291 | case 'FunctionCallRequest': 292 | console.log('Function call requested:', message) 293 | try { 294 | const result = await this.handleFunctionCall( 295 | message.function_call_id!, 296 | { 297 | name: message.function_name, 298 | arguments: JSON.stringify(message.input) 299 | } 300 | ) 301 | 302 | const response = { 303 | type: 'FunctionCallResponse', 304 | function_call_id: message.function_call_id, 305 | output: JSON.stringify(result) 306 | } 307 | console.log('Sending function call response:', response) 308 | this.ws?.send(JSON.stringify(response)) 309 | } catch (error) { 310 | console.error('Function call failed:', error) 311 | const errorResponse = { 312 | type: 'FunctionCallResponse', 313 | function_call_id: message.function_call_id, 314 | output: JSON.stringify({ 315 | success: false, 316 | error: (error as Error).message 317 | }) 318 | } 319 | this.ws?.send(JSON.stringify(errorResponse)) 320 | } 321 | break 322 | case 'FunctionCalling': 323 | // Debug message from server about function calling workflow 324 | console.log('Function calling debug:', message) 325 | break 326 | case 'ConversationText': 327 | console.log('Conversation text received:', message) 328 | if (message.role && message.content) { 329 | console.log('Logging conversation entry from WebSocket') 330 | this.conversationLogger.logEntry({ 331 | role: message.role, 332 | content: message.content 333 | }) 334 | } 335 | if (message.role === 'assistant') { 336 | this.agentPanel?.postMessage({ 337 | type: 'updateTranscript', 338 | text: message.content || '', 339 | target: 'agent-transcript', 340 | animate: true 341 | }) 342 | } 343 | this.eventEmitter.emit('transcript', message.content || '') 344 | break 345 | case 'UserStartedSpeaking': 346 | console.log('User started speaking, stopping audio playback') 347 | 348 | // Send a message to the webview to stop all audio playback 349 | if (this.agentPanel) { 350 | this.agentPanel.postMessage({ 351 | type: 'stopAudio' 352 | }); 353 | } 354 | 355 | this.sendSpeakingStateUpdate('idle') 356 | break 357 | case 'AgentStartedSpeaking': 358 | console.log('Agent started speaking') 359 | this.sendSpeakingStateUpdate('speaking') 360 | break 361 | case 'AgentAudioDone': 362 | console.log('Agent audio done') 363 | this.sendSpeakingStateUpdate('idle') 364 | break 365 | case 'Error': 366 | console.error('Agent error:', message) 367 | vscode.window.showErrorMessage(`Agent error: ${message.message}`) 368 | this.updateStatus('Error occurred') 369 | break 370 | case 'Close': 371 | console.log('Agent requested close') 372 | this.cleanup() 373 | break 374 | default: 375 | console.log('Unknown message type:', message.type) 376 | } 377 | } catch (e) { 378 | console.error('Error handling WebSocket message:', e) 379 | // If it's not JSON, it's raw audio data 380 | if (data instanceof Buffer) { 381 | this.handleRawAudio(data) 382 | } 383 | } 384 | }) 385 | 386 | // Set up keep-alive interval 387 | this.keepAliveInterval = setInterval(() => { 388 | if (this.ws?.readyState === WebSocket.OPEN) { 389 | this.ws.ping() 390 | } 391 | }, 30000) 392 | 393 | } catch (error) { 394 | console.error('Failed to start agent:', error) 395 | this.cleanup() // Cleanup on error 396 | throw error 397 | } 398 | } 399 | 400 | private setupMicrophone() { 401 | // Use our wrapper instead of direct microphone 402 | const mic = new MicrophoneWrapper() 403 | try { 404 | const audioStream = mic.startRecording() 405 | 406 | audioStream.on('data', (chunk: Buffer) => { 407 | if (this.ws?.readyState === WebSocket.OPEN) { 408 | this.ws.send(chunk) 409 | } 410 | }) 411 | 412 | audioStream.on('error', (error: Error) => { 413 | vscode.window.showErrorMessage(`Microphone error: ${error.message}`) 414 | this.cleanup() 415 | }) 416 | } catch (error) { 417 | vscode.window.showErrorMessage(`Failed to start microphone: ${error instanceof Error ? error.message : String(error)}`) 418 | this.cleanup() 419 | } 420 | } 421 | 422 | private async playAudioResponse(audio: { data: string, encoding: string, sample_rate: number }) { 423 | // Instead of playing audio through the native speaker, send it to the webview 424 | if (!this.agentPanel) { 425 | console.warn('No agent panel available for audio playback') 426 | return 427 | } 428 | 429 | console.log('Sending audio data to webview for playback') 430 | 431 | // Send the audio data to the webview 432 | this.agentPanel.postMessage({ 433 | type: 'playAudio', 434 | audio: { 435 | data: audio.data, 436 | encoding: audio.encoding, 437 | sampleRate: audio.sample_rate 438 | } 439 | }) 440 | 441 | // Also update the transcript if available 442 | this.updateTranscript(audio.data) 443 | } 444 | 445 | public cleanup(): void { 446 | console.log('Cleaning up voice agent...') 447 | 448 | // Close WebSocket connection 449 | if (this.ws) { 450 | console.log('Closing WebSocket connection...') 451 | this.ws.removeAllListeners() // Remove all event listeners 452 | this.ws.close() 453 | this.ws = null 454 | } 455 | 456 | // Clear keep-alive interval 457 | if (this.keepAliveInterval) { 458 | clearInterval(this.keepAliveInterval) 459 | this.keepAliveInterval = null 460 | } 461 | 462 | // Update UI status 463 | this.updateStatus('Disconnected') 464 | 465 | console.log('Voice agent cleanup complete') 466 | } 467 | 468 | async updateInstructions(instructions: string) { 469 | if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { 470 | throw new Error('Agent not connected') 471 | } 472 | 473 | const updateMessage = { 474 | type: 'UpdateInstructions', 475 | instructions 476 | } 477 | 478 | this.ws.send(JSON.stringify(updateMessage)) 479 | } 480 | 481 | async updateSpeakModel(model: string) { 482 | if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { 483 | throw new Error('Agent not connected') 484 | } 485 | 486 | const updateMessage = { 487 | type: 'UpdateSpeak', 488 | model 489 | } 490 | 491 | this.ws.send(JSON.stringify(updateMessage)) 492 | } 493 | 494 | async injectAgentMessage(message: string) { 495 | if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { 496 | throw new Error('Agent not connected') 497 | } 498 | 499 | console.log('Injecting agent message:', message) 500 | 501 | const injectMessage = { 502 | type: 'InjectAgentMessage', 503 | message 504 | } 505 | 506 | if (this.ws && this.ws.readyState === WebSocket.OPEN) { 507 | this.ws.send(JSON.stringify(injectMessage)) 508 | } else { 509 | console.warn('VoiceAgentService: Cannot send message - WebSocket is not open') 510 | } 511 | } 512 | 513 | async handleFunctionCall(functionCallId: string, func: any) { 514 | console.log('Handling function call:', { functionCallId, func }) 515 | 516 | if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { 517 | throw new Error('Agent not connected') 518 | } 519 | 520 | // Inject a message to let the user know we're working on their request 521 | try { 522 | await this.injectAgentMessage("Let me work on that, standby.") 523 | console.log('Injected standby message') 524 | } catch (error) { 525 | console.warn('Could not inject standby message:', error) 526 | // Continue with function execution even if message injection fails 527 | } 528 | 529 | if (func.name === 'generateProjectSpec') { 530 | console.log('Generating project spec...') 531 | try { 532 | await this.specGenerator.generateSpec() 533 | const successMessage = 'Project specification has been generated and saved to project_spec.md' 534 | this.updateTranscript(successMessage) 535 | this.conversationLogger.logEntry({ 536 | role: 'assistant', 537 | content: successMessage 538 | }) 539 | return { 540 | success: true, 541 | message: successMessage 542 | } 543 | } catch (error) { 544 | console.error('Failed to generate project spec:', error) 545 | const errorMessage = `Failed to generate project specification: ${(error as Error).message}` 546 | this.updateTranscript(errorMessage) 547 | this.conversationLogger.logEntry({ 548 | role: 'assistant', 549 | content: errorMessage 550 | }) 551 | return { 552 | success: false, 553 | error: errorMessage 554 | } 555 | } 556 | } 557 | 558 | if (func.name === 'execute_command') { 559 | console.log('Handling execute_command function call:', func) 560 | const args = JSON.parse(func.arguments) 561 | try { 562 | await this.commandRegistry.executeCommand(args.name, args.args) 563 | return { success: true } 564 | } catch (error) { 565 | console.error('Command execution failed:', error) 566 | return { 567 | success: false, 568 | error: (error as Error).message 569 | } 570 | } 571 | } 572 | 573 | console.error('Unknown function:', func.name) 574 | return { 575 | success: false, 576 | error: `Unknown function: ${func.name}` 577 | } 578 | } 579 | 580 | private handleRawAudio(data: Buffer) { 581 | console.log('Received raw audio data, length:', data.length, 582 | 'sample rate:', this.AGENT_SAMPLE_RATE) 583 | 584 | // Instead of using the native speaker, send the raw audio data to the webview 585 | if (!this.agentPanel) { 586 | console.warn('No agent panel available for audio playback') 587 | return 588 | } 589 | 590 | // Convert the raw PCM buffer to base64 for sending to the webview 591 | const base64Audio = data.toString('base64') 592 | 593 | // Send the audio data to the webview with explicit sample rate info 594 | this.agentPanel.postMessage({ 595 | type: 'playAudio', 596 | audio: { 597 | data: base64Audio, 598 | encoding: 'linear16', 599 | sampleRate: this.AGENT_SAMPLE_RATE, 600 | isRaw: true 601 | } 602 | }) 603 | 604 | // Update speaking state 605 | this.sendSpeakingStateUpdate('speaking') 606 | 607 | // Set a timeout to update the speaking state back to idle 608 | // This is a simple approach; a more sophisticated approach would track when playback ends 609 | setTimeout(() => { 610 | this.sendSpeakingStateUpdate('idle') 611 | }, 1000) // Adjust timeout based on typical audio length 612 | } 613 | 614 | private async getAgentConfig(): Promise { 615 | const commands = this.commandRegistry.getCommandDefinitions() 616 | const fileTree = await this.workspaceService.getFileTree() 617 | const formattedTree = this.workspaceService.formatFileTree(fileTree) 618 | 619 | return { 620 | type: 'SettingsConfiguration', 621 | audio: { 622 | input: { 623 | encoding: 'linear16', 624 | sample_rate: 16000 625 | }, 626 | output: { 627 | encoding: 'linear16', 628 | sample_rate: 24000, 629 | container: 'none' 630 | } 631 | }, 632 | agent: { 633 | listen: { 634 | model: 'nova-2' 635 | }, 636 | think: { 637 | provider: { 638 | type: 'open_ai' 639 | }, 640 | model: 'gpt-4o-mini', 641 | instructions: `You are a coding mentor and VS Code assistant. You help users navigate and control VS Code through voice commands. You also help users think through their product and application. You ask questions, one at a time, using the socratic method to help the user think critically, unless the user explicitly asks you for suggestions or ideas. 642 | 643 | Everything you say will be spoken out load through a TTS system, so do not use markdown or other formatting, and keep your responses concise. 644 | 645 | Current Workspace Structure: 646 | ${formattedTree} 647 | 648 | When a user requests an action that matches a VS Code command, use the execute_command function. 649 | You can help users navigate the file structure and open files using the paths shown above. 650 | You can also generate project specifications from our conversation using the generateProjectSpec function. 651 | Provide helpful feedback about what you're doing and guide users if they need help.`, 652 | functions: [ 653 | { 654 | name: 'execute_command', 655 | description: 'Execute a VS Code command', 656 | parameters: { 657 | type: 'object', 658 | properties: { 659 | name: { 660 | type: 'string', 661 | description: 'Name of the command to execute' 662 | }, 663 | args: { 664 | type: 'array', 665 | description: 'Arguments for the command', 666 | items: { 667 | type: 'string' 668 | } 669 | } 670 | }, 671 | required: ['name'] 672 | } 673 | }, 674 | { 675 | name: 'generateProjectSpec', 676 | description: 'Generate a project specification document from the conversation history', 677 | parameters: { 678 | type: 'object', 679 | properties: { 680 | format: { 681 | type: 'string', 682 | enum: ['markdown'], 683 | description: 'Output format (currently only supports markdown)' 684 | } 685 | }, 686 | required: ['format'] 687 | } 688 | } 689 | ] 690 | }, 691 | speak: { 692 | model: 'aura-2-speaker-180', 693 | temp: 0.45, 694 | rep_penalty: 2.0 695 | } 696 | } 697 | } 698 | } 699 | 700 | dispose(): void { 701 | this.cleanup() 702 | } 703 | 704 | onTranscript(callback: (text: string) => void) { 705 | this.eventEmitter.on('transcript', callback) 706 | return () => this.eventEmitter.off('transcript', callback) 707 | } 708 | 709 | private sendSpeakingStateUpdate(state: 'speaking' | 'idle') { 710 | console.log('Sending speaking state update:', state) 711 | if (!this.agentPanel) { 712 | console.warn('No agent panel available for state update') 713 | return 714 | } 715 | 716 | // Send status text instead of animation state 717 | this.agentPanel.postMessage({ 718 | type: 'updateStatus', 719 | text: state === 'speaking' ? 'Agent Speaking...' : 'Ready', 720 | target: 'vibe-status' 721 | }) 722 | } 723 | 724 | public setAgentPanel(handler: MessageHandler | undefined) { 725 | this.agentPanel = handler 726 | } 727 | 728 | private async handleMessage(message: any) { 729 | if (message.type === 'text') { 730 | console.log('VoiceAgent: Received user message:', message.text) 731 | 732 | // Log user messages 733 | this.conversationLogger.logEntry({ 734 | role: 'user', 735 | content: message.text 736 | }) 737 | console.log('VoiceAgent: Logged user message to conversation') 738 | 739 | // Send to agent and handle response 740 | const response = await this.sendToAgent(message.text) 741 | console.log('VoiceAgent: Got response from agent:', response) 742 | 743 | // Log assistant responses 744 | if (response.text) { 745 | console.log('VoiceAgent: Logging assistant response') 746 | this.conversationLogger.logEntry({ 747 | role: 'assistant', 748 | content: response.text 749 | }) 750 | } 751 | 752 | // Update UI with response 753 | this.updateTranscript(response.text || 'No response from agent') 754 | } 755 | // ... rest of the message handling 756 | } 757 | 758 | private async handleAgentResponse(response: any) { 759 | console.log('VoiceAgent: Handling agent response:', response) 760 | 761 | // Log the agent's response before handling function calls 762 | if (response.text) { 763 | console.log('VoiceAgent: Logging agent response to conversation') 764 | this.conversationLogger.logEntry({ 765 | role: 'assistant', 766 | content: response.text 767 | }) 768 | } 769 | 770 | if (response.function_call?.name === 'generateProjectSpec') { 771 | try { 772 | await this.specGenerator.generateSpec() 773 | const successMessage = 'Project specification has been generated and saved to project_spec.md' 774 | this.updateTranscript(successMessage) 775 | // Log the success message as well 776 | this.conversationLogger.logEntry({ 777 | role: 'assistant', 778 | content: successMessage 779 | }) 780 | } catch (err) { 781 | const error = err as Error 782 | const errorMessage = `Error generating spec: ${error?.message || 'Unknown error'}` 783 | this.updateTranscript(errorMessage) 784 | // Log the error message 785 | this.conversationLogger.logEntry({ 786 | role: 'assistant', 787 | content: errorMessage 788 | }) 789 | } 790 | } 791 | } 792 | 793 | private async sendToAgent(text: string) { 794 | if (!this.ws || this.ws.readyState !== WebSocket.OPEN) { 795 | throw new Error('Agent not connected') 796 | } 797 | 798 | const message = { 799 | type: 'UserText', 800 | text 801 | } 802 | 803 | this.ws.send(JSON.stringify(message)) 804 | 805 | // Return a promise that resolves when we get a response 806 | return new Promise<{ text?: string }>((resolve) => { 807 | const messageHandler = (data: WebSocket.Data) => { 808 | const response = JSON.parse(data.toString()) 809 | if (response.type === 'AgentResponse') { 810 | // Use optional chaining and ensure ws exists before removing listener 811 | this.ws?.off('message', messageHandler) 812 | resolve({ text: response.text }) 813 | } 814 | } 815 | // Add null check before adding event listener 816 | if (this.ws) { 817 | this.ws.on('message', messageHandler) 818 | } else { 819 | resolve({ text: 'Error: WebSocket connection lost' }) 820 | } 821 | }) 822 | } 823 | } --------------------------------------------------------------------------------