├── .env.example
├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── assets
    └── demo.gif
├── eslint.config.mjs
├── package-lock.json
├── package.json
├── pnpm-lock.yaml
├── src
    ├── config
    │   ├── constants.ts
    │   └── prompts.ts
    ├── handlers
    │   └── openai.handler.ts
    ├── servers
    │   ├── mcp.server.ts
    │   └── voice.server.ts
    ├── services
    │   ├── openai
    │   │   ├── context.service.ts
    │   │   ├── event.service.ts
    │   │   └── ws.service.ts
    │   ├── session-manager.service.ts
    │   └── twilio
    │   │   ├── call.service.ts
    │   │   ├── event.service.ts
    │   │   └── ws.service.ts
    ├── start-all.ts
    ├── types.ts
    └── utils
    │   ├── call-utils.ts
    │   └── execution-utils.ts
└── tsconfig.json


/.env.example:
--------------------------------------------------------------------------------
 1 | # Server configuration
 2 | PORT=3004
 3 | 
 4 | # Twilio API credentials
 5 | TWILIO_ACCOUNT_SID=your_twilio_account_sid
 6 | TWILIO_AUTH_TOKEN=your_twilio_auth_token
 7 | TWILIO_NUMBER=your_twilio_number
 8 | # OpenAI API key
 9 | OPENAI_API_KEY=your_openai_api_key
10 | OPENAI_WEBSOCKET_URL=wss://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview
11 | 
12 | # Ngrok authentication token
13 | NGROK_AUTHTOKEN=your_ngrok_authtoken
14 | 
15 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Dependency directories
 2 | node_modules/
 3 | jspm_packages/
 4 | 
 5 | # Build outputs
 6 | dist/
 7 | build/
 8 | out/
 9 | *.tsbuildinfo
10 | 
11 | # Environment variables
12 | .env
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | # Logs
19 | logs
20 | *.log
21 | npm-debug.log*
22 | yarn-debug.log*
23 | yarn-error.log*
24 | lerna-debug.log*
25 | 
26 | # Coverage directory used by tools like istanbul
27 | coverage/
28 | *.lcov
29 | 
30 | # Cache directories
31 | .npm
32 | .eslintcache
33 | .stylelintcache
34 | .rpt2_cache/
35 | .rts2_cache_cjs/
36 | .rts2_cache_es/
37 | .rts2_cache_umd/
38 | 
39 | # Runtime data
40 | pids
41 | *.pid
42 | *.seed
43 | *.pid.lock
44 | 
45 | # IDE and editor folders
46 | .idea/
47 | .vscode/
48 | *.swp
49 | *.swo
50 | .DS_Store
51 | .directory
52 | .project
53 | .settings/
54 | .classpath
55 | .c9/
56 | *.launch
57 | .settings/
58 | *.sublime-workspace
59 | 
60 | 
61 | # ngrok
62 | .ngrok/
63 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 LukasK
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Voice Call MCP Server
  2 | 
  3 | A Model Context Protocol (MCP) server that enables Claude and other AI assistants to initiate and manage voice calls using Twilio and OpenAI (GPT-4o Realtime model).
  4 | 
  5 | Use this as a base to kick-start your AI-powered voice calling explorations, save time and develop additional functionality on top of it.
  6 | 
  7 | ![Demo](./assets/demo.gif)
  8 | 
  9 | 
 10 | ## Sequence Diagram
 11 | 
 12 | ```mermaid
 13 | sequenceDiagram
 14 |     participant AI as AI Assistant (e.g., Claude)
 15 |     participant MCP as MCP Server
 16 |     participant Twilio as Twilio
 17 |     participant Phone as Destination Phone
 18 |     participant OpenAI as OpenAI
 19 |     
 20 |     AI->>MCP: 1) Initiate outbound call request <br>(POST /calls)
 21 |     MCP->>Twilio: 2) Place outbound call via Twilio API
 22 |     Twilio->>Phone: 3) Ring the destination phone
 23 |     Twilio->>MCP: 4) Call status updates & audio callbacks (webhooks)
 24 |     MCP->>OpenAI: 5) Forward real-time audio to OpenaAI's realtime model
 25 |     OpenAI->>MCP: 6) Return voice stream
 26 |     MCP->>Twilio: 7) Send voice stream
 27 |     Twilio->>Phone: 8) Forward voice stream
 28 |     Note over Phone: Two-way conversation continues <br>until the call ends
 29 | ```
 30 | 
 31 | 
 32 | ## Features
 33 | 
 34 | - Make outbound phone calls via Twilio 📞
 35 | - Process call audio in real-time with GPT-4o Realtime model 🎙️
 36 | - Real-time language switching during calls 🌐
 37 | - Pre-built prompts for common calling scenarios (like restaurant reservations) 🍽️
 38 | - Automatic public URL tunneling with ngrok 🔄
 39 | - Secure handling of credentials 🔒
 40 | 
 41 | ## Why MCP?
 42 | 
 43 | The Model Context Protocol (MCP) bridges the gap between AI assistants and real-world actions. By implementing MCP, this server allows AI models like Claude to:
 44 | 
 45 | 1. Initiate actual phone calls on behalf of users
 46 | 2. Process and respond to real-time audio conversations
 47 | 3. Execute complex tasks requiring voice communication
 48 | 
 49 | This open-source implementation provides transparency and customizability, allowing developers to extend functionality while maintaining control over their data and privacy.
 50 | 
 51 | ## Requirements
 52 | 
 53 | - Node.js >= 22
 54 |   - If you need to update Node.js, we recommend using `nvm` (Node Version Manager):
 55 |     ```bash
 56 |     nvm install 22
 57 |     nvm use 22
 58 |     ```
 59 | - Twilio account with API credentials
 60 | - OpenAI API key
 61 | - Ngrok Authtoken
 62 | 
 63 | ## Installation
 64 | 
 65 | ### Manual Installation
 66 | 
 67 | 1. Clone the repository
 68 |    ```bash
 69 |    git clone https://github.com/lukaskai/voice-call-mcp-server.git
 70 |    cd voice-call-mcp-server
 71 |    ```
 72 | 
 73 | 2. Install dependencies and build
 74 |    ```bash
 75 |    npm install
 76 |    npm run build
 77 |    ```
 78 | 
 79 | ## Configuration
 80 | 
 81 | The server requires several environment variables:
 82 | 
 83 | - `TWILIO_ACCOUNT_SID`: Your Twilio account SID
 84 | - `TWILIO_AUTH_TOKEN`: Your Twilio auth token
 85 | - `TWILIO_NUMBER`: Your Twilio number
 86 | - `OPENAI_API_KEY`: Your OpenAI API key
 87 | - `NGROK_AUTHTOKEN`: Your ngrok authtoken
 88 | - `RECORD_CALLS`: Set to "true" to record calls (optional)
 89 | 
 90 | ### Claude Desktop Configuration
 91 | 
 92 | To use this server with Claude Desktop, add the following to your configuration file:
 93 | 
 94 | **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
 95 | 
 96 | **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
 97 | 
 98 | ```json
 99 | {
100 |   "mcpServers": {
101 |     "voice-call": {
102 |       "command": "node",
103 |       "args": ["/path/to/your/mcp-new/dist/start-all.cjs"],
104 |       "env": {
105 |         "TWILIO_ACCOUNT_SID": "your_account_sid",
106 |         "TWILIO_AUTH_TOKEN": "your_auth_token",
107 |         "TWILIO_NUMBER": "your_e.164_format_number",
108 |         "OPENAI_API_KEY": "your_openai_api_key",
109 |         "NGROK_AUTHTOKEN": "your_ngrok_authtoken"
110 |       }
111 |     }
112 |   }
113 | }
114 | ```
115 | 
116 | After that, restart Claude Desktop to reload the configuration. 
117 | If connected, you should see Voice Call under the 🔨 menu.
118 | 
119 | ## Example Interactions with Claude
120 | 
121 | Here are some natural ways to interact with the server through Claude:
122 | 
123 | 1. Simple call:
124 | ```
125 | Can you call +1-123-456-7890 and let them know I'll be 15 minutes late for our meeting?
126 | ```
127 | 
128 | 2. Restaurant reservation:
129 | ```
130 | Please call Delicious Restaurant at +1-123-456-7890 and make a reservation for 4 people tonight at 7:30 PM. Please speak in German.
131 | ```
132 | 
133 | 3. Appointment scheduling:
134 | ```
135 | Please call Expert Dental NYC (+1-123-456-7899) and reschedule my Monday appointment to next Friday between 4–6pm.
136 | ```
137 | 
138 | ## Important Notes
139 | 
140 | 1. **Phone Number Format**: All phone numbers must be in E.164 format (e.g., +11234567890)
141 | 2. **Rate Limits**: Be aware of your Twilio and OpenAI account's rate limits and pricing
142 | 3. **Voice Conversations**: The AI will handle natural conversations in real-time
143 | 4. **Call Duration**: Be mindful of call durations as they affect OpenAI API and Twilio costs
144 | 5. **Public Exposure**: Be aware that the ngrok tunnel exposes your server publicly for Twilio to reach it (though with a random URL and protected by a random secret)
145 | 
146 | ## Troubleshooting
147 | 
148 | Common error messages and solutions:
149 | 
150 | 1. "Phone number must be in E.164 format"
151 |    - Make sure the phone number starts with "+" and the country code
152 | 
153 | 2. "Invalid credentials"
154 |    - Double-check your TWILIO_ACCOUNT_SID and TWILIO_AUTH_TOKEN. You can copy them from the [Twilio Console](https://console.twilio.com)
155 | 
156 | 3. "OpenAI API error"
157 |    - Verify your OPENAI_API_KEY is correct and has sufficient credits
158 | 
159 | 4. "Ngrok tunnel failed to start"
160 |    - Ensure your NGROK_AUTHTOKEN is valid and not expired
161 | 
162 | 5. "OpenAI Realtime does not detect the end of voice input, or is lagging."
163 |    - Sometimes, there might be voice encoding issues between Twilio and the receiver's network operator. Try using a different receiver.
164 | 
165 | ## Contributing
166 | 
167 | Contributions are welcome! Here are some areas we're looking to improve:
168 | 
169 | - Implement support for multiple AI models beyond the current implementation
170 | - Add database integration to store conversation history locally and make it accessible for AI context
171 | - Improve latency and response times to enhance call experiences
172 | - Enhance error handling and recovery mechanisms
173 | - Add more pre-built conversation templates for common scenarios
174 | - Implement improved call monitoring and analytics
175 | 
176 | If you'd like to contribute, please open an issue to discuss your ideas before submitting a pull request.
177 | 
178 | ## License
179 | 
180 | This project is licensed under the MIT License - see the LICENSE file for details.
181 | 
182 | ## Security
183 | 
184 | Please do not include any sensitive information (like phone numbers or API credentials) in GitHub issues or pull requests. This server handles sensitive communications; deploy it responsibly and ensure all credentials are kept secure.
185 | 
186 | 
187 | ## Time For a New Mission?
188 | 
189 | We’re hiring engineers to build at the frontier of voice AI — and bake it into a next-gen telco.
190 | 
191 | Curious? Head to [careers.popcorn.space](https://careers.popcorn.space/apply) 🍿 !
192 | 


--------------------------------------------------------------------------------
/assets/demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/popcornspace/voice-call-mcp-server/13e3bd99ed2d6859a18dba9c030564da90986894/assets/demo.gif


--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import eslint from '@eslint/js';
 2 | import tseslint from 'typescript-eslint';
 3 | 
 4 | export default tseslint.config(
 5 |     eslint.configs.recommended,
 6 |     ...tseslint.configs.recommended,
 7 |     ...tseslint.configs.stylistic,
 8 |     {
 9 |         plugins: {
10 |         },
11 |         rules: {
12 |             '@typescript-eslint/no-explicit-any': 'off',
13 |             '@typescript-eslint/prefer-for-of': 'off',
14 |             'no-trailing-spaces': 'error', // Disallow trailing spaces
15 |             'eol-last': ['error', 'always'], // Enforce newline at end of file
16 |             'indent': ['error', 4], // Enforce 4 spaces for indentation
17 |             'quotes': ['error', 'single'], // Enforce single quotes
18 |             'semi': ['error', 'always'], // Enforce semicolons
19 |         },
20 |     }
21 | );
22 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "voice-call-mcp-server",
 3 |   "main": "dist/start-all.js",
 4 |   "type": "module",
 5 |   "scripts": {
 6 |     "start-all": "tsx src/start-all.ts",
 7 |     "start": "node dist/start-all.cjs",
 8 |     "build": "npm-run-all clean build:app",
 9 |     "build:app": "tsup src/start-all.ts",
10 |     "clean": "rimraf dist"
11 |   },
12 |   "dependencies": {
13 |     "@modelcontextprotocol/sdk": "1.8.0",
14 |     "@ngrok/ngrok": "^1.4.1",
15 |     "axios": "^1.6.8",
16 |     "body-parser": "^1.20.2",
17 |     "colors": "^1.4.0",
18 |     "cors": "^2.8.5",
19 |     "dotenv": "^16.4.5",
20 |     "eslint-plugin-simple-import-sort": "^12.1.1",
21 |     "express": "^4.18.3",
22 |     "express-ws": "^5.0.2",
23 |     "form-data": "^4.0.0",
24 |     "google-protobuf": "^3.21.4",
25 |     "httpdispatcher": "^2.2.0",
26 |     "ngrok": "5.0.0-beta.2",
27 |     "node-fetch": "^2.7.0",
28 |     "node-vad": "^1.1.4",
29 |     "openai": "^4.85.1",
30 |     "peerjs": "^1.5.4",
31 |     "perf_hooks": "^0.0.1",
32 |     "protobufjs": "^7.4.0",
33 |     "twilio": "^5.0.1",
34 |     "uuid": "^9.0.1",
35 |     "websocket": "^1.0.28",
36 |     "zod": "^3.22.4"
37 |   },
38 |   "devDependencies": {
39 |     "@eslint/js": "^9.21.0",
40 |     "@types/cors": "^2.8.17",
41 |     "@types/express": "^4.17.21",
42 |     "@types/express-ws": "^3.0.4",
43 |     "@types/node": "^20.11.30",
44 |     "@types/uuid": "^9.0.8",
45 |     "@types/websocket": "^1.0.10",
46 |     "@types/ws": "^8.5.10",
47 |     "dotenv": "^16.4.5",
48 |     "eslint": "^9.21.0",
49 |     "globals": "^16.0.0",
50 |     "npm-run-all": "^4.1.5",
51 |     "rimraf": "^5.0.5",
52 |     "tsup": "^8.0.2",
53 |     "tsx": "^4.7.1",
54 |     "typescript": "^5.4.2",
55 |     "typescript-eslint": "^8.24.1"
56 |   },
57 |   "author": "Popcorn",
58 |   "license": "MIT",
59 |   "packageManager": "pnpm@10.7.0+sha512.6b865ad4b62a1d9842b61d674a393903b871d9244954f652b8842c2b553c72176b278f64c463e52d40fff8aba385c235c8c9ecf5cc7de4fd78b8bb6d49633ab6"
60 | }
61 | 


--------------------------------------------------------------------------------
/src/config/constants.ts:
--------------------------------------------------------------------------------
 1 | export const LOG_EVENT_TYPES = [
 2 |     'error',
 3 |     'session.created',
 4 |     'response.audio.delta',
 5 |     'response.audio_transcript.done',
 6 |     'conversation.item.input_audio_transcription.completed',
 7 | ];
 8 | 
 9 | export const DYNAMIC_API_SECRET = Math.random().toString(36).substring(2, 15) + Math.random().toString(36).substring(2, 15);
10 | export const SHOW_TIMING_MATH = false;
11 | export const VOICE = 'sage';
12 | export const RECORD_CALLS = process.env.RECORD === 'true';
13 | export const GOODBYE_PHRASES = ['bye', 'goodbye', 'have a nice day', 'see you', 'take care'];
14 | 


--------------------------------------------------------------------------------
/src/config/prompts.ts:
--------------------------------------------------------------------------------
 1 | import { CallState } from '../types.js';
 2 | 
 3 | export const generateOutboundCallContext = (callState: CallState, callContext?: string): string => {
 4 |     return `Please refer to phone call transcripts. 
 5 |     Stay concise and short. 
 6 |     You are assistant (if asked, you phone number with country code is: ${callState.fromNumber}). You are making an outbound call.
 7 |     Be friendly and speak in human short sentences. Start conversation with how are you. Do not speak in bullet points. Ask one question at a time, tell one sentence at a time.
 8 |     After successful task completion, say goodbye and end the conversation.
 9 |      You ARE NOT a receptionist, NOT an administrator, NOT a person making reservation. 
10 |      You do not provide any other info, which is not related to the goal. You can calling solely to achieve your tasks
11 |     You are the customer making a request, not the restaurant staff. 
12 |     YOU ARE STRICTLY THE ONE MAKING THE REQUEST (and not the one receiving). YOU MUST ACHIEVE YOUR GOAL AS AN ASSITANT AND PERFORM TASK.
13 |      Be focused solely on your task: 
14 |         ${callContext ? callContext : ''}`;
15 | };
16 | 


--------------------------------------------------------------------------------
/src/handlers/openai.handler.ts:
--------------------------------------------------------------------------------
  1 | import { WebSocket } from 'ws';
  2 | import twilio from 'twilio';
  3 | import dotenv from 'dotenv';
  4 | import { CallState, CallType, OpenAIConfig } from '../types.js';
  5 | import { VOICE } from '../config/constants.js';
  6 | import { OpenAIContextService } from '../services/openai/context.service.js';
  7 | import { OpenAIWsService } from '../services/openai/ws.service.js';
  8 | import { TwilioWsService } from '../services/twilio/ws.service.js';
  9 | import { OpenAIEventService } from '../services/openai/event.service.js';
 10 | import { TwilioEventService } from '../services/twilio/event.service.js';
 11 | import { SessionManagerService } from '../services/session-manager.service.js';
 12 | import { TwilioCallService } from '../services/twilio/call.service.js';
 13 | 
 14 | dotenv.config();
 15 | 
 16 | /**
 17 |  * Handles the communication between Twilio and OpenAI for voice calls
 18 |  */
 19 | export class OpenAICallHandler {
 20 |     private readonly twilioStream: TwilioWsService;
 21 |     private readonly openAIService: OpenAIWsService;
 22 |     private readonly openAIEventProcessor: OpenAIEventService;
 23 |     private readonly twilioEventProcessor: TwilioEventService;
 24 |     private readonly twilioCallService: TwilioCallService;
 25 |     private readonly callState: CallState;
 26 | 
 27 |     constructor(ws: WebSocket, callType: CallType, twilioClient: twilio.Twilio, contextService: OpenAIContextService) {
 28 |         this.callState = new CallState(callType);
 29 | 
 30 |         // Initialize Twilio services
 31 |         this.twilioStream = new TwilioWsService(ws, this.callState);
 32 |         this.twilioCallService = new TwilioCallService(twilioClient);
 33 | 
 34 |         // Initialize OpenAI service
 35 |         const openAIConfig: OpenAIConfig = {
 36 |             apiKey: process.env.OPENAI_API_KEY || '',
 37 |             websocketUrl: process.env.OPENAI_WEBSOCKET_URL || 'wss://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview',
 38 |             voice: VOICE,
 39 |             temperature: 0.6
 40 |         };
 41 |         this.openAIService = new OpenAIWsService(openAIConfig);
 42 | 
 43 |         // Initialize event processors
 44 |         this.openAIEventProcessor = new OpenAIEventService(
 45 |             this.callState,
 46 |             () => this.endCall(),
 47 |             (payload) => this.twilioStream.sendAudio(payload),
 48 |             () => this.handleSpeechStartedEvent()
 49 |         );
 50 | 
 51 |         this.twilioEventProcessor = new TwilioEventService(
 52 |             this.callState,
 53 |             this.twilioCallService,
 54 |             contextService,
 55 |             (payload) => this.openAIService.sendAudio(payload),// Log the first media event
 56 |         );
 57 | 
 58 |         this.setupEventHandlers();
 59 |         this.initializeOpenAI();
 60 |     }
 61 | 
 62 |     private endCall(): void {
 63 |         if (this.callState.callSid) {
 64 |             this.twilioCallService.endCall(this.callState.callSid);
 65 |         }
 66 | 
 67 |         setTimeout(() => {
 68 |             this.closeWebSockets();
 69 |         }, 5000);
 70 |     }
 71 | 
 72 |     private closeWebSockets(): void {
 73 |         this.twilioStream.close();
 74 |         this.openAIService.close();
 75 |     }
 76 | 
 77 |     private initializeOpenAI(): void {
 78 |         this.openAIService.initialize(
 79 |             (data) => this.openAIEventProcessor.processMessage(data),
 80 |             () => {
 81 |                 setTimeout(() => this.openAIService.initializeSession(this.callState.callContext), 100);
 82 |             },
 83 |             (error) => console.error('Error in the OpenAI WebSocket:', error)
 84 |         );
 85 |     }
 86 | 
 87 |     private handleSpeechStartedEvent(): void {
 88 |         if (this.callState.markQueue.length === 0 || this.callState.responseStartTimestampTwilio === null || !this.callState.lastAssistantItemId) {
 89 |             return;
 90 |         }
 91 | 
 92 |         const elapsedTime = this.callState.latestMediaTimestamp - this.callState.responseStartTimestampTwilio;
 93 | 
 94 |         this.openAIService.truncateAssistantResponse(this.callState.lastAssistantItemId, elapsedTime);
 95 |         this.twilioStream.clearStream();
 96 |         this.resetResponseState();
 97 |     }
 98 | 
 99 |     private resetResponseState(): void {
100 |         this.callState.markQueue = [];
101 |         this.callState.lastAssistantItemId = null;
102 |         this.callState.responseStartTimestampTwilio = null;
103 |     }
104 | 
105 |     private setupEventHandlers(): void {
106 |         this.twilioStream.setupEventHandlers(
107 |             async (message) => await this.twilioEventProcessor.processMessage(message),
108 |             async () => {
109 |                 this.openAIService.close();
110 |             }
111 |         );
112 |     }
113 | }
114 | 
115 | /**
116 |  * Manages multiple concurrent call sessions
117 |  */
118 | export class CallSessionManager {
119 |     private readonly sessionManager: SessionManagerService;
120 | 
121 |     constructor(twilioClient: twilio.Twilio) {
122 |         this.sessionManager = new SessionManagerService(twilioClient);
123 |     }
124 | 
125 |     /**
126 |      * Creates a new call session
127 |      * @param ws The WebSocket connection
128 |      * @param callType The type of call
129 |      */
130 |     public createSession(ws: WebSocket, callType: CallType): void {
131 |         this.sessionManager.createSession(ws, callType);
132 |     }
133 | }
134 | 


--------------------------------------------------------------------------------
/src/servers/mcp.server.ts:
--------------------------------------------------------------------------------
  1 | import { McpServer, ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js';
  2 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
  3 | import { z } from 'zod';
  4 | import { TwilioCallService } from '../services/twilio/call.service.js';
  5 | 
  6 | export class VoiceCallMcpServer {
  7 |     private server: McpServer;
  8 |     private twilioCallService: TwilioCallService;
  9 |     private twilioCallbackUrl: string;
 10 | 
 11 |     constructor(twilioCallService: TwilioCallService, twilioCallbackUrl: string) {
 12 |         this.twilioCallbackUrl = twilioCallbackUrl;
 13 |         this.twilioCallService = twilioCallService;
 14 | 
 15 |         this.server = new McpServer({
 16 |             name: 'Voice Call MCP Server',
 17 |             version: '1.0.0',
 18 |             description: 'MCP server that provides tools for initiating phone calls via Twilio'
 19 |         });
 20 | 
 21 |         this.registerTools();
 22 |         this.registerResources();
 23 |         this.registerPrompts();
 24 |     }
 25 | 
 26 |     private registerTools(): void {
 27 |         this.server.tool(
 28 |             'trigger-call',
 29 |             'Trigger an outbound phone call via Twilio',
 30 |             {
 31 |                 toNumber: z.string().describe('The phone number to call'),
 32 |                 callContext: z.string().describe('Context for the call')
 33 |             },
 34 |             async ({ toNumber, callContext }) => {
 35 |                 try {
 36 |                     const callSid = await this.twilioCallService.makeCall(this.twilioCallbackUrl, toNumber, callContext);
 37 | 
 38 |                     return {
 39 |                         content: [{
 40 |                             type: 'text',
 41 |                             text: JSON.stringify({
 42 |                                 status: 'success',
 43 |                                 message: 'Call triggered successfully',
 44 |                                 callSid: callSid
 45 |                             })
 46 |                         }]
 47 |                     };
 48 |                 } catch (error) {
 49 |                     const errorMessage = error instanceof Error ? error.message : String(error);
 50 | 
 51 |                     return {
 52 |                         content: [{
 53 |                             type: 'text',
 54 |                             text: JSON.stringify({
 55 |                                 status: 'error',
 56 |                                 message: `Failed to trigger call: ${errorMessage}`
 57 |                             })
 58 |                         }],
 59 |                         isError: true
 60 |                     };
 61 |                 }
 62 |             }
 63 |         );
 64 |     }
 65 | 
 66 |     private registerResources(): void {
 67 |         this.server.resource(
 68 |             'get-latest-call',
 69 |             new ResourceTemplate('call://transcriptions', { list: undefined }),
 70 |             async () => {
 71 |                 // TODO: get call transcription
 72 |                 return {
 73 |                     contents: [{
 74 |                         text: JSON.stringify({
 75 |                             transcription: '{}',
 76 |                             status: 'completed',
 77 |                         }),
 78 |                         uri: 'call://transcriptions/latest',
 79 |                         mimeType: 'application/json'
 80 |                     }]
 81 |                 };
 82 |             }
 83 |         );
 84 |     }
 85 | 
 86 |     private registerPrompts(): void {
 87 |         this.server.prompt(
 88 |             'make-restaurant-reservation',
 89 |             'Create a prompt for making a restaurant reservation by phone',
 90 |             {
 91 |                 restaurantNumber: z.string().describe('The phone number of the restaurant'),
 92 |                 peopleNumber: z.string().describe('The number of people in the party'),
 93 |                 date: z.string().describe('Date of the reservation'),
 94 |                 time: z.string().describe('Preferred time for the reservation')
 95 |             },
 96 |             ({ restaurantNumber, peopleNumber, date, time }) => {
 97 |                 return {
 98 |                     messages: [{
 99 |                         role: 'user',
100 |                         content: {
101 |                             type: 'text',
102 |                             text: `You are calling a restaurant to book a table for ${peopleNumber} people on ${date} at ${time}. Call the restaurant at ${restaurantNumber} from ${process.env.TWILIO_NUMBER}.`
103 |                         }
104 |                     }]
105 |                 };
106 |             }
107 |         );
108 |     }
109 | 
110 |     public async start(): Promise<void> {
111 |         const transport = new StdioServerTransport();
112 |         await this.server.connect(transport);
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/servers/voice.server.ts:
--------------------------------------------------------------------------------
 1 | import dotenv from 'dotenv';
 2 | import express, { Response } from 'express';
 3 | import VoiceResponse from 'twilio/lib/twiml/VoiceResponse.js';
 4 | import ExpressWs from 'express-ws';
 5 | import { WebSocket } from 'ws';
 6 | import { CallType } from '../types.js';
 7 | import { DYNAMIC_API_SECRET } from '../config/constants.js';
 8 | import { CallSessionManager } from '../handlers/openai.handler.js';
 9 | dotenv.config();
10 | 
11 | export class VoiceServer {
12 |     private app: express.Application & { ws: any };
13 |     private port: number;
14 |     private sessionManager: CallSessionManager;
15 |     private callbackUrl: string;
16 | 
17 |     constructor(callbackUrl: string, sessionManager: CallSessionManager) {
18 |         this.callbackUrl = callbackUrl;
19 |         this.port = parseInt(process.env.PORT || '3004');
20 |         this.app = ExpressWs(express()).app;
21 |         this.sessionManager = sessionManager;
22 |         this.configureMiddleware();
23 |         this.setupRoutes();
24 |     }
25 | 
26 |     private configureMiddleware(): void {
27 |         this.app.use(express.json());
28 |         this.app.use(express.urlencoded({ extended: false }));
29 |     }
30 | 
31 |     private setupRoutes(): void {
32 |         this.app.post('/call/outgoing', this.handleOutgoingCall.bind(this));
33 |         this.app.ws('/call/connection-outgoing/:secret', this.handleOutgoingConnection.bind(this));
34 |     }
35 | 
36 |     private async handleOutgoingCall(req: express.Request, res: Response): Promise<void> {
37 |         const apiSecret = req.query.apiSecret?.toString();
38 | 
39 |         if (req.query.apiSecret?.toString() !== DYNAMIC_API_SECRET) {
40 |             res.status(401).json({ error: 'Unauthorized: Invalid or missing API secret' });
41 |             return;
42 |         }
43 | 
44 |         const fromNumber = req.body.From;
45 |         const toNumber = req.body.To;
46 |         const callContext = req.query.callContext?.toString();
47 | 
48 |         const twiml = new VoiceResponse();
49 |         const connect = twiml.connect();
50 | 
51 |         const stream = connect.stream({
52 |             url: `${this.callbackUrl.replace('https://', 'wss://')}/call/connection-outgoing/${apiSecret}`,
53 |         });
54 | 
55 |         stream.parameter({ name: 'fromNumber', value: fromNumber });
56 |         stream.parameter({ name: 'toNumber', value: toNumber });
57 |         stream.parameter({ name: 'callContext', value: callContext });
58 | 
59 |         res.writeHead(200, { 'Content-Type': 'text/xml' });
60 |         res.end(twiml.toString());
61 |     }
62 | 
63 |     private handleOutgoingConnection(ws: WebSocket, req: express.Request): void {
64 |         if (req.params.secret !== DYNAMIC_API_SECRET) {
65 |             ws.close(1008, 'Unauthorized: Invalid or missing API secret');
66 |             return;
67 |         }
68 | 
69 |         this.sessionManager.createSession(ws, CallType.OUTBOUND);
70 |     }
71 | 
72 |     public start(): void {
73 |         this.app.listen(this.port);
74 |     }
75 | }
76 | 


--------------------------------------------------------------------------------
/src/services/openai/context.service.ts:
--------------------------------------------------------------------------------
 1 | import { generateOutboundCallContext } from '../../config/prompts.js';
 2 | import { CallState, ConversationMessage } from '../../types.js';
 3 | 
 4 | export class OpenAIContextService {
 5 | 
 6 |     public initializeCallState(callState: CallState, fromNumber: string, toNumber: string): void {
 7 |         callState.fromNumber = fromNumber;
 8 |         callState.toNumber = toNumber;
 9 |     }
10 | 
11 |     public setupConversationContext(callState: CallState, callContext?: string): void {
12 |         callState.initialMessage = 'Hello!';
13 |         callState.callContext = generateOutboundCallContext(callState, callContext);
14 | 
15 |         const systemMessage: ConversationMessage = {
16 |             role: 'system',
17 |             content: callState.callContext
18 |         };
19 | 
20 |         callState.conversationHistory = [systemMessage];
21 | 
22 |         const initialMessage: ConversationMessage = {
23 |             role: 'user',
24 |             content: callState.initialMessage
25 |         };
26 | 
27 |         callState.conversationHistory.push(initialMessage);
28 |     }
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/src/services/openai/event.service.ts:
--------------------------------------------------------------------------------
  1 | import { WebSocket } from 'ws';
  2 | import { CallState } from '../../types.js';
  3 | import { LOG_EVENT_TYPES, SHOW_TIMING_MATH } from '../../config/constants.js';
  4 | import { checkForGoodbye } from '../../utils/call-utils.js';
  5 | 
  6 | /**
  7 |  * Service for processing OpenAI events
  8 |  */
  9 | export class OpenAIEventService {
 10 |     private readonly callState: CallState;
 11 |     private readonly onEndCall: () => void;
 12 |     private readonly onSendAudioToTwilio: (payload: string) => void;
 13 |     private readonly onTruncateResponse: () => void;
 14 | 
 15 |     /**
 16 |      * Create a new OpenAI event processor
 17 |      * @param callState The state of the call
 18 |      * @param onEndCall Callback for ending the call
 19 |      * @param onSendAudioToTwilio Callback for sending audio to Twilio
 20 |      * @param onTruncateResponse Callback for truncating the response
 21 |      */
 22 |     constructor(
 23 |         callState: CallState,
 24 |         onEndCall: () => void,
 25 |         onSendAudioToTwilio: (payload: string) => void,
 26 |         onTruncateResponse: () => void
 27 |     ) {
 28 |         this.callState = callState;
 29 |         this.onEndCall = onEndCall;
 30 |         this.onSendAudioToTwilio = onSendAudioToTwilio;
 31 |         this.onTruncateResponse = onTruncateResponse;
 32 |     }
 33 | 
 34 |     /**
 35 |      * Process an OpenAI message
 36 |      * @param data The message data
 37 |      */
 38 |     public processMessage(data: WebSocket.Data): void {
 39 |         try {
 40 |             const response = JSON.parse(data.toString());
 41 | 
 42 |             if (LOG_EVENT_TYPES.includes(response.type)) {
 43 |                 // console.log(`Received event: ${response.type}`, response);
 44 |             }
 45 | 
 46 |             this.processEvent(response);
 47 |         } catch (error) {
 48 |             console.error('Error processing OpenAI message:', error, 'Raw message:', data);
 49 |         }
 50 |     }
 51 | 
 52 |     /**
 53 |      * Process an OpenAI event
 54 |      * @param response The event data
 55 |      */
 56 |     private processEvent(response: any): void {
 57 |         switch (response.type) {
 58 |         case 'conversation.item.input_audio_transcription.completed':
 59 |             this.handleTranscriptionCompleted(response.transcript);
 60 |             break;
 61 |         case 'response.audio_transcript.done':
 62 |             this.handleAudioTranscriptDone(response.transcript);
 63 |             break;
 64 |         case 'response.audio.delta':
 65 |             if (response.delta) {
 66 |                 this.handleAudioDelta(response);
 67 |             }
 68 |             break;
 69 |         case 'input_audio_buffer.speech_started':
 70 |             this.onTruncateResponse();
 71 |             break;
 72 |         }
 73 |     }
 74 | 
 75 |     /**
 76 |      * Handle a transcription completed event
 77 |      * @param transcription The transcription text
 78 |      */
 79 |     private handleTranscriptionCompleted(transcription: string): void {
 80 |         if (!transcription) {
 81 |             return;
 82 |         }
 83 | 
 84 |         this.callState.conversationHistory.push({
 85 |             role: 'user',
 86 |             content: transcription
 87 |         });
 88 | 
 89 |         if (checkForGoodbye(transcription)) {
 90 |             this.onEndCall();
 91 |         }
 92 |     }
 93 | 
 94 |     /**
 95 |      * Handle an audio transcript done event
 96 |      * @param transcript The transcript text
 97 |      */
 98 |     private handleAudioTranscriptDone(transcript: string): void {
 99 |         if (!transcript) {
100 |             return;
101 |         }
102 | 
103 |         this.callState.conversationHistory.push({
104 |             role: 'assistant',
105 |             content: transcript
106 |         });
107 |     }
108 | 
109 |     /**
110 |      * Handle an audio delta event
111 |      * @param response The event data
112 |      */
113 |     private handleAudioDelta(response: any): void {
114 |         this.onSendAudioToTwilio(response.delta);
115 | 
116 |         if (!this.callState.responseStartTimestampTwilio) {
117 |             this.callState.responseStartTimestampTwilio = this.callState.latestMediaTimestamp;
118 |             if (SHOW_TIMING_MATH) {
119 |                 // console.log(`Setting start timestamp for new response: ${this.callState.responseStartTimestampTwilio}ms`);
120 |             }
121 |         }
122 | 
123 |         if (response.item_id) {
124 |             this.callState.lastAssistantItemId = response.item_id;
125 |         }
126 |     }
127 | }
128 | 


--------------------------------------------------------------------------------
/src/services/openai/ws.service.ts:
--------------------------------------------------------------------------------
  1 | import { WebSocket } from 'ws';
  2 | import { OpenAIConfig } from '../../types.js';
  3 | import { SHOW_TIMING_MATH } from '../../config/constants.js';
  4 | 
  5 | /**
  6 |  * Service for handling OpenAI API interactions
  7 |  */
  8 | export class OpenAIWsService {
  9 |     private webSocket: WebSocket | null = null;
 10 |     private readonly config: OpenAIConfig;
 11 | 
 12 |     /**
 13 |      * Create a new OpenAI service
 14 |      * @param config Configuration for the OpenAI API
 15 |      */
 16 |     constructor(config: OpenAIConfig) {
 17 |         this.config = config;
 18 |     }
 19 | 
 20 |     /**
 21 |      * Initialize the WebSocket connection to OpenAI
 22 |      * @param onMessage Callback for handling messages from OpenAI
 23 |      * @param onOpen Callback for when the connection is opened
 24 |      * @param onError Callback for handling errors
 25 |      */
 26 |     public initialize(
 27 |         onMessage: (data: WebSocket.Data) => void,
 28 |         onOpen: () => void,
 29 |         onError: (error: Error) => void
 30 |     ): void {
 31 |         this.webSocket = new WebSocket(this.config.websocketUrl, {
 32 |             headers: {
 33 |                 Authorization: `Bearer ${this.config.apiKey}`,
 34 |                 'OpenAI-Beta': 'realtime=v1'
 35 |             }
 36 |         });
 37 | 
 38 |         this.webSocket.on('open', onOpen);
 39 |         this.webSocket.on('message', onMessage);
 40 |         this.webSocket.on('error', onError);
 41 |     }
 42 | 
 43 |     /**
 44 |      * Initialize the session with OpenAI
 45 |      * @param callContext The context for the call
 46 |      */
 47 |     public initializeSession(callContext: string): void {
 48 |         if (!this.webSocket || this.webSocket.readyState !== WebSocket.OPEN) {
 49 |             return;
 50 |         }
 51 | 
 52 |         const sessionUpdate = {
 53 |             type: 'session.update',
 54 |             session: {
 55 |                 turn_detection: { type: 'server_vad' },
 56 |                 input_audio_format: 'g711_ulaw',
 57 |                 output_audio_format: 'g711_ulaw',
 58 |                 voice: this.config.voice,
 59 |                 instructions: callContext,
 60 |                 modalities: ['text', 'audio'],
 61 |                 temperature: this.config.temperature,
 62 |                 'input_audio_transcription': {
 63 |                     'model': 'whisper-1'
 64 |                 },
 65 |             }
 66 |         };
 67 | 
 68 |         this.webSocket.send(JSON.stringify(sessionUpdate));
 69 |     }
 70 | 
 71 |     /**
 72 |      * Close the WebSocket connection
 73 |      */
 74 |     public close(): void {
 75 |         if (this.webSocket && this.webSocket.readyState === WebSocket.OPEN) {
 76 |             this.webSocket.close();
 77 |         }
 78 |     }
 79 | 
 80 |     /**
 81 |      * Forward audio data to OpenAI
 82 |      * @param audioPayload The audio payload to forward
 83 |      */
 84 |     public sendAudio(audioPayload: string): void {
 85 |         if (!this.webSocket || this.webSocket.readyState !== WebSocket.OPEN) {
 86 |             return;
 87 |         }
 88 | 
 89 |         const audioAppend = {
 90 |             type: 'input_audio_buffer.append',
 91 |             audio: audioPayload
 92 |         };
 93 | 
 94 |         this.webSocket.send(JSON.stringify(audioAppend));
 95 |     }
 96 | 
 97 |     /**
 98 |      * Truncate the assistant's response
 99 |      * @param itemId The ID of the assistant's response
100 |      * @param elapsedTime The time elapsed since the response started
101 |      */
102 |     public truncateAssistantResponse(itemId: string, elapsedTime: number): void {
103 |         if (!this.webSocket || this.webSocket.readyState !== WebSocket.OPEN) {
104 |             return;
105 |         }
106 | 
107 |         const truncateEvent = {
108 |             type: 'conversation.item.truncate',
109 |             item_id: itemId,
110 |             content_index: 0,
111 |             audio_end_ms: elapsedTime
112 |         };
113 | 
114 |         if (SHOW_TIMING_MATH) {
115 |             console.error('Sending truncation event:', JSON.stringify(truncateEvent));
116 |         }
117 | 
118 |         this.webSocket.send(JSON.stringify(truncateEvent));
119 |     }
120 | 
121 |     /**
122 |      * Check if the WebSocket is connected
123 |      */
124 |     public isConnected(): boolean {
125 |         return this.webSocket !== null && this.webSocket.readyState === WebSocket.OPEN;
126 |     }
127 | }
128 | 


--------------------------------------------------------------------------------
/src/services/session-manager.service.ts:
--------------------------------------------------------------------------------
 1 | import { WebSocket } from 'ws';
 2 | import twilio from 'twilio';
 3 | import { CallType } from '../types.js';
 4 | import { OpenAIContextService } from './openai/context.service.js';
 5 | import { OpenAICallHandler } from '../handlers/openai.handler.js';
 6 | 
 7 | /**
 8 |  * Manages multiple concurrent call sessions
 9 |  */
10 | export class SessionManagerService {
11 |     private readonly activeSessions: Map<string, OpenAICallHandler>;
12 |     private readonly twilioClient: twilio.Twilio;
13 |     private readonly contextService: OpenAIContextService;
14 | 
15 |     /**
16 |      * Create a new session manager
17 |      * @param twilioConfig Configuration for the Twilio client
18 |      */
19 |     constructor(twilioClient: twilio.Twilio) {
20 |         this.activeSessions = new Map();
21 |         this.twilioClient = twilioClient;
22 |         this.contextService = new OpenAIContextService();
23 |     }
24 | 
25 |     /**
26 |      * Creates a new call session and adds it to the active sessions
27 |      * @param ws The WebSocket connection
28 |      * @param callType The type of call
29 |      */
30 |     public createSession(ws: WebSocket, callType: CallType): void {
31 |         const handler = new OpenAICallHandler(ws, callType, this.twilioClient, this.contextService);
32 |         this.registerSessionCleanup(ws);
33 |         this.addSession(ws, handler);
34 |     }
35 | 
36 |     /**
37 |      * Register cleanup for a session
38 |      * @param ws The WebSocket connection
39 |      */
40 |     private registerSessionCleanup(ws: WebSocket): void {
41 |         ws.on('close', () => {
42 |             this.removeSession(ws);
43 |         });
44 |     }
45 | 
46 |     /**
47 |      * Add a session to active sessions
48 |      * @param ws The WebSocket connection
49 |      * @param handler The OpenAI call handler
50 |      */
51 |     private addSession(ws: WebSocket, handler: OpenAICallHandler): void {
52 |         this.activeSessions.set(this.getSessionKey(ws), handler);
53 |     }
54 | 
55 |     /**
56 |      * Removes a session from active sessions
57 |      * @param ws The WebSocket connection
58 |      */
59 |     private removeSession(ws: WebSocket): void {
60 |         const sessionKey = this.getSessionKey(ws);
61 |         if (this.activeSessions.has(sessionKey)) {
62 |             this.activeSessions.delete(sessionKey);
63 |         }
64 |     }
65 | 
66 |     /**
67 |      * Generates a unique key for a session based on the WebSocket object
68 |      * @param ws The WebSocket connection
69 |      * @returns A unique key for the session
70 |      */
71 |     private getSessionKey(ws: WebSocket): string {
72 |         return ws.url || ws.toString();
73 |     }
74 | 
75 |     /**
76 |      * Get the Twilio client
77 |      * @returns The Twilio client
78 |      */
79 |     public getTwilioClient(): twilio.Twilio {
80 |         return this.twilioClient;
81 |     }
82 | 
83 |     /**
84 |      * Get the context service
85 |      * @returns The context service
86 |      */
87 |     public getContextService(): OpenAIContextService {
88 |         return this.contextService;
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/src/services/twilio/call.service.ts:
--------------------------------------------------------------------------------
 1 | import twilio from 'twilio';
 2 | import { DYNAMIC_API_SECRET, RECORD_CALLS } from '../../config/constants.js';
 3 | 
 4 | /**
 5 |  * Service for handling Twilio call operations
 6 |  */
 7 | export class TwilioCallService {
 8 |     private readonly twilioClient: twilio.Twilio;
 9 | 
10 |     /**
11 |      * Create a new Twilio call service
12 |      * @param twilioClient The Twilio client
13 |      */
14 |     constructor(twilioClient: twilio.Twilio) {
15 |         this.twilioClient = twilioClient;
16 |     }
17 | 
18 |     /**
19 |      * Start recording a call
20 |      * @param callSid The SID of the call to record
21 |      */
22 |     public async startRecording(callSid: string): Promise<void> {
23 |         if (!RECORD_CALLS || !callSid) {
24 |             return;
25 |         }
26 | 
27 |         try {
28 |             await this.twilioClient.calls(callSid)
29 |                 .recordings
30 |                 .create();
31 |         } catch (error) {
32 |             console.error(`Failed to start recording for call ${callSid}:`, error);
33 |         }
34 |     }
35 | 
36 |     /**
37 |      * End a call
38 |      * @param callSid The SID of the call to end
39 |      */
40 |     public async endCall(callSid: string): Promise<void> {
41 |         if (!callSid) {
42 |             return;
43 |         }
44 | 
45 |         try {
46 |             await this.twilioClient.calls(callSid)
47 |                 .update({ status: 'completed' });
48 |         } catch (error) {
49 |             console.error(`Failed to end call ${callSid}:`, error);
50 |         }
51 |     }
52 | 
53 | 
54 |     public async makeCall(twilioCallbackUrl: string, toNumber: string, callContext = ''): Promise<string> {
55 |         try {
56 |             const twilioClient = twilio(process.env.TWILIO_ACCOUNT_SID, process.env.TWILIO_AUTH_TOKEN);
57 | 
58 |             const callContextEncoded =  encodeURIComponent(callContext);
59 | 
60 |             const call = await twilioClient.calls.create({
61 |                 to: toNumber,
62 |                 from: process.env.TWILIO_NUMBER || '',
63 |                 url: `${twilioCallbackUrl}/call/outgoing?apiSecret=${DYNAMIC_API_SECRET}&callType=outgoing&callContext=${callContextEncoded}`,
64 |             });
65 | 
66 |             return call.sid;
67 |         } catch (error) {
68 |             console.error(`Error making call: ${error}`);
69 |             throw error;
70 |         }
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/services/twilio/event.service.ts:
--------------------------------------------------------------------------------
  1 | import { CallState } from '../../types.js';
  2 | import { OpenAIContextService } from '../openai/context.service.js';
  3 | import { RECORD_CALLS, SHOW_TIMING_MATH } from '../../config/constants.js';
  4 | import { TwilioCallService } from './call.service.js';
  5 | 
  6 | /**
  7 |  * Service for processing Twilio events
  8 |  */
  9 | export class TwilioEventService {
 10 |     private readonly callState: CallState;
 11 |     private readonly twilioCallService: TwilioCallService;
 12 |     private readonly contextService: OpenAIContextService;
 13 |     private readonly onForwardAudioToOpenAI: (payload: string) => void;
 14 | 
 15 |     /**
 16 |      * Create a new Twilio event processor
 17 |      * @param callState The state of the call
 18 |      * @param twilioCallService The Twilio call service
 19 |      * @param contextService The context service
 20 |      * @param onForwardAudioToOpenAI Callback for forwarding audio to OpenAI
 21 |      */
 22 |     constructor(
 23 |         callState: CallState,
 24 |         twilioCallService: TwilioCallService,
 25 |         contextService: OpenAIContextService,
 26 |         onForwardAudioToOpenAI: (payload: string) => void,
 27 |     ) {
 28 |         this.callState = callState;
 29 |         this.twilioCallService = twilioCallService;
 30 |         this.contextService = contextService;
 31 |         this.onForwardAudioToOpenAI = onForwardAudioToOpenAI;
 32 |     }
 33 | 
 34 |     /**
 35 |      * Process a Twilio message
 36 |      * @param message The message data
 37 |      */
 38 |     public async processMessage(message: Buffer | string): Promise<void> {
 39 |         try {
 40 |             const data = JSON.parse(message.toString());
 41 |             await this.processEvent(data);
 42 |         } catch (error) {
 43 |             console.error('Error parsing message:', error, 'Message:', message);
 44 |         }
 45 |     }
 46 | 
 47 |     /**
 48 |      * Process a Twilio event
 49 |      * @param data The event data
 50 |      */
 51 |     private async processEvent(data: any): Promise<void> {
 52 |         switch (data.event) {
 53 |         case 'media':
 54 |             await this.handleMediaEvent(data);
 55 |             break;
 56 |         case 'start':
 57 |             await this.handleStartEvent(data);
 58 |             break;
 59 |         case 'mark':
 60 |             this.handleMarkEvent();
 61 |             break;
 62 |         default:
 63 |             console.error('Received non-media event:', data.event);
 64 |             break;
 65 |         }
 66 |     }
 67 | 
 68 |     /**
 69 |      * Handle a Twilio media event
 70 |      * @param data The event data
 71 |      */
 72 |     private async handleMediaEvent(data: any): Promise<void> {
 73 |         this.callState.latestMediaTimestamp = data.media.timestamp;
 74 |         if (SHOW_TIMING_MATH) {
 75 |             // console.log(`Received media message with timestamp: ${this.callState.latestMediaTimestamp}ms`);
 76 |         }
 77 | 
 78 |         await this.handleFirstMediaEventIfNeeded();
 79 |         this.onForwardAudioToOpenAI(data.media.payload);
 80 |     }
 81 | 
 82 |     /**
 83 |      * Handle the first media event if it hasn't been handled yet
 84 |      */
 85 |     private async handleFirstMediaEventIfNeeded(): Promise<void> {
 86 |         if (this.callState.hasSeenMedia) {
 87 |             return;
 88 |         }
 89 | 
 90 |         this.callState.hasSeenMedia = true;
 91 | 
 92 |         if (RECORD_CALLS && this.callState.callSid) {
 93 |             await this.startCallRecording();
 94 |         }
 95 |     }
 96 | 
 97 |     /**
 98 |      * Start recording the call
 99 |      */
100 |     private async startCallRecording(): Promise<void> {
101 |         await this.twilioCallService.startRecording(this.callState.callSid);
102 |     }
103 | 
104 |     /**
105 |      * Handle a Twilio start event
106 |      * @param data The event data
107 |      */
108 |     private async handleStartEvent(data: any): Promise<void> {
109 |         this.callState.streamSid = data.start.streamSid;
110 |         this.callState.responseStartTimestampTwilio = null;
111 |         this.callState.latestMediaTimestamp = 0;
112 | 
113 |         this.contextService.initializeCallState(this.callState, data.start.customParameters.fromNumber, data.start.customParameters.toNumber);
114 |         this.contextService.setupConversationContext(this.callState, data.start.customParameters.callContext);
115 |         this.callState.callSid = data.start.callSid;
116 |     }
117 | 
118 |     /**
119 |      * Handle a Twilio mark event
120 |      */
121 |     private handleMarkEvent(): void {
122 |         if (this.callState.markQueue.length > 0) {
123 |             this.callState.markQueue.shift();
124 |         }
125 |     }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/services/twilio/ws.service.ts:
--------------------------------------------------------------------------------
  1 | import { WebSocket } from 'ws';
  2 | import { CallState } from '../../types.js';
  3 | import { SHOW_TIMING_MATH } from '../../config/constants.js';
  4 | 
  5 | /**
  6 |  * Service for handling Twilio WebSocket streams
  7 |  */
  8 | export class TwilioWsService {
  9 |     private readonly webSocket: WebSocket;
 10 |     private readonly callState: CallState;
 11 | 
 12 |     /**
 13 |      * Create a new Twilio stream service
 14 |      * @param webSocket The Twilio WebSocket connection
 15 |      * @param callState The state of the call
 16 |      */
 17 |     constructor(webSocket: WebSocket, callState: CallState) {
 18 |         this.webSocket = webSocket;
 19 |         this.callState = callState;
 20 |     }
 21 | 
 22 |     /**
 23 |      * Close the WebSocket connection
 24 |      */
 25 |     public close(): void {
 26 |         if (this.webSocket.readyState === WebSocket.OPEN) {
 27 |             this.webSocket.close();
 28 |         }
 29 |     }
 30 | 
 31 |     /**
 32 |      * Send a mark event to Twilio
 33 |      */
 34 |     public sendMark(): void {
 35 |         if (!this.callState.streamSid) {
 36 |             return;
 37 |         }
 38 | 
 39 |         const markEvent = {
 40 |             event: 'mark',
 41 |             streamSid: this.callState.streamSid,
 42 |             mark: { name: 'responsePart' }
 43 |         };
 44 |         this.webSocket.send(JSON.stringify(markEvent));
 45 |         this.callState.markQueue.push('responsePart');
 46 |     }
 47 | 
 48 |     /**
 49 |      * Send audio data to Twilio
 50 |      * @param payload The audio payload to send
 51 |      */
 52 |     public sendAudio(payload: string): void {
 53 |         if (!this.callState.streamSid) {
 54 |             return;
 55 |         }
 56 | 
 57 |         const audioDelta = {
 58 |             event: 'media',
 59 |             streamSid: this.callState.streamSid,
 60 |             media: { payload }
 61 |         };
 62 |         this.webSocket.send(JSON.stringify(audioDelta));
 63 |     }
 64 | 
 65 |     /**
 66 |      * Clear the Twilio stream
 67 |      */
 68 |     public clearStream(): void {
 69 |         if (!this.callState.streamSid) {
 70 |             return;
 71 |         }
 72 | 
 73 |         this.webSocket.send(JSON.stringify({
 74 |             event: 'clear',
 75 |             streamSid: this.callState.streamSid
 76 |         }));
 77 |     }
 78 | 
 79 |     /**
 80 |      * Set up event handlers for the Twilio WebSocket
 81 |      * @param onMessage Callback for handling messages from Twilio
 82 |      * @param onClose Callback for when the connection is closed
 83 |      */
 84 |     public setupEventHandlers(
 85 |         onMessage: (message: Buffer | string) => void,
 86 |         onClose: () => void
 87 |     ): void {
 88 |         this.webSocket.on('message', onMessage);
 89 |         this.webSocket.on('close', onClose);
 90 |     }
 91 | 
 92 |     /**
 93 |      * Process a Twilio start event
 94 |      * @param data The start event data
 95 |      */
 96 |     public processStartEvent(data: any): void {
 97 |         this.callState.streamSid = data.start.streamSid;
 98 |         this.callState.responseStartTimestampTwilio = null;
 99 |         this.callState.latestMediaTimestamp = 0;
100 |         this.callState.callSid = data.start.callSid;
101 |     }
102 | 
103 |     /**
104 |      * Process a Twilio mark event
105 |      */
106 |     public processMarkEvent(): void {
107 |         if (this.callState.markQueue.length > 0) {
108 |             this.callState.markQueue.shift();
109 |         }
110 |     }
111 | 
112 |     /**
113 |      * Process a Twilio media event
114 |      * @param data The media event data
115 |      */
116 |     public processMediaEvent(data: any): void {
117 |         this.callState.latestMediaTimestamp = data.media.timestamp;
118 |         if (SHOW_TIMING_MATH) {
119 |             // console.log(`Received media message with timestamp: ${this.callState.latestMediaTimestamp}ms`);
120 |         }
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/start-all.ts:
--------------------------------------------------------------------------------
  1 | import dotenv from 'dotenv';
  2 | import ngrok from '@ngrok/ngrok';
  3 | import { isPortInUse } from './utils/execution-utils.js';
  4 | import { VoiceCallMcpServer } from './servers/mcp.server.js';
  5 | import { TwilioCallService } from './services/twilio/call.service.js';
  6 | import { VoiceServer } from './servers/voice.server.js';
  7 | import twilio from 'twilio';
  8 | import { CallSessionManager } from './handlers/openai.handler.js';
  9 | 
 10 | // Load environment variables
 11 | dotenv.config();
 12 | 
 13 | // Define required environment variables
 14 | const REQUIRED_ENV_VARS = [
 15 |     'TWILIO_ACCOUNT_SID',
 16 |     'TWILIO_AUTH_TOKEN',
 17 |     'OPENAI_API_KEY',
 18 |     'NGROK_AUTHTOKEN',
 19 |     'TWILIO_NUMBER'
 20 | ] as const;
 21 | 
 22 | /**
 23 |  * Validates that all required environment variables are present
 24 |  * @returns true if all variables are present, exits process otherwise
 25 |  */
 26 | function validateEnvironmentVariables(): boolean {
 27 |     for (const envVar of REQUIRED_ENV_VARS) {
 28 |         if (!process.env[envVar]) {
 29 |             console.error(`Error: ${envVar} environment variable is required`);
 30 |             process.exit(1);
 31 |         }
 32 |     }
 33 |     return true;
 34 | }
 35 | 
 36 | /**
 37 |  * Sets up the port for the application
 38 |  */
 39 | function setupPort(): number {
 40 |     const PORT = process.env.PORT || '3004';
 41 |     process.env.PORT = PORT;
 42 |     return parseInt(PORT);
 43 | }
 44 | 
 45 | /**
 46 |  * Establishes ngrok tunnel for external access
 47 |  * @param portNumber - The port number to forward
 48 |  * @returns The public URL provided by ngrok
 49 |  */
 50 | async function setupNgrokTunnel(portNumber: number): Promise<string> {
 51 |     const listener = await ngrok.forward({
 52 |         addr: portNumber,
 53 |         authtoken_from_env: true
 54 |     });
 55 | 
 56 |     const twilioCallbackUrl = listener.url();
 57 |     if (!twilioCallbackUrl) {
 58 |         throw new Error('Failed to obtain ngrok URL');
 59 |     }
 60 | 
 61 |     return twilioCallbackUrl;
 62 | }
 63 | 
 64 | /**
 65 |  * Sets up graceful shutdown handlers
 66 |  */
 67 | function setupShutdownHandlers(): void {
 68 |     process.on('SIGINT', async () => {
 69 |         try {
 70 |             await ngrok.disconnect();
 71 |         } catch (err) {
 72 |             console.error('Error killing ngrok:', err);
 73 |         }
 74 |         process.exit(0);
 75 |     });
 76 | }
 77 | 
 78 | /**
 79 |  * Retries starting the server when the port is in use
 80 |  * @param portNumber - The port number to check
 81 |  */
 82 | function scheduleServerRetry(portNumber: number): void {
 83 |     console.error(`Port ${portNumber} is already in use. Server may already be running.`);
 84 |     console.error('Will retry in 15 seconds...');
 85 | 
 86 |     const RETRY_INTERVAL_MS = 15000;
 87 | 
 88 |     const retryInterval = setInterval(async () => {
 89 |         const stillInUse = await isPortInUse(portNumber);
 90 | 
 91 |         if (!stillInUse) {
 92 |             clearInterval(retryInterval);
 93 |             main();
 94 |         } else {
 95 |             console.error(`Port ${portNumber} is still in use. Will retry in 15 seconds...`);
 96 |         }
 97 |     }, RETRY_INTERVAL_MS);
 98 | }
 99 | 
100 | 
101 | async function main(): Promise<void> {
102 |     try {
103 |         validateEnvironmentVariables();
104 |         const portNumber = setupPort();
105 | 
106 |         const twilioClient = twilio(process.env.TWILIO_ACCOUNT_SID, process.env.TWILIO_AUTH_TOKEN);
107 | 
108 |         const sessionManager = new CallSessionManager(twilioClient);
109 |         const twilioCallService = new TwilioCallService(twilioClient);
110 | 
111 |         // Check if port is already in use
112 |         const portInUse = await isPortInUse(portNumber);
113 |         if (portInUse) {
114 |             scheduleServerRetry(portNumber);
115 |             return;
116 |         }
117 | 
118 |         // Establish ngrok connectivity
119 |         const twilioCallbackUrl = await setupNgrokTunnel(portNumber);
120 | 
121 |         // Start the main HTTP server
122 |         const server = new VoiceServer(twilioCallbackUrl, sessionManager);
123 |         server.start();
124 | 
125 |         const mcpServer = new VoiceCallMcpServer(twilioCallService, twilioCallbackUrl);
126 |         await mcpServer.start();
127 | 
128 |         // Set up graceful shutdown
129 |         setupShutdownHandlers();
130 |     } catch (error) {
131 |         console.error('Error starting services:', error);
132 |         process.exit(1);
133 |     }
134 | }
135 | 
136 | // Start the main function
137 | main();
138 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
 1 | // state.ts - Shared state variables
 2 | export enum CallType {
 3 |     OUTBOUND = 'OUTBOUND',
 4 | }
 5 | 
 6 | export interface ConversationMessage {
 7 |     role: 'system' | 'user' | 'assistant';
 8 |     content: string;
 9 |     name?: string;
10 | }
11 | 
12 | export class CallState {
13 |     // Call identification
14 |     streamSid = '';
15 |     callSid = '';
16 | 
17 |     // Call type and direction
18 |     callType: CallType = CallType.OUTBOUND;
19 | 
20 |     // Phone numbers
21 |     fromNumber = '';
22 |     toNumber = '';
23 | 
24 |     // Call context and conversation
25 |     callContext = '';
26 |     initialMessage = '';
27 |     conversationHistory: ConversationMessage[] = [];
28 | 
29 |     // Speech state
30 |     speaking = false;
31 | 
32 |     // Timing and processing state
33 |     llmStart = 0;
34 |     firstByte = true;
35 |     sendFirstSentenceInputTime: number | null = null;
36 | 
37 |     // Media processing state
38 |     latestMediaTimestamp = 0;
39 |     responseStartTimestampTwilio: number | null = null;
40 |     lastAssistantItemId: string | null = null;
41 |     markQueue: string[] = [];
42 |     hasSeenMedia = false;
43 | 
44 |     constructor(callType: CallType = CallType.OUTBOUND) {
45 |         this.callType = callType;
46 |     }
47 | }
48 | 
49 | /**
50 |  * Configuration for the OpenAI WebSocket connection
51 |  */
52 | export interface OpenAIConfig {
53 |     apiKey: string;
54 |     websocketUrl: string;
55 |     voice: string;
56 |     temperature: number;
57 | }
58 | 
59 | /**
60 |  * Configuration for Twilio client
61 |  */
62 | export interface TwilioConfig {
63 |     accountSid: string;
64 |     authToken: string;
65 |     recordCalls: boolean;
66 | }
67 | 


--------------------------------------------------------------------------------
/src/utils/call-utils.ts:
--------------------------------------------------------------------------------
 1 | import { WebSocket } from 'ws';
 2 | import { GOODBYE_PHRASES } from '../config/constants.js';
 3 | 
 4 | export const checkForGoodbye = (text: string): boolean => {
 5 |     const lowercaseText = text.toLowerCase();
 6 |     return GOODBYE_PHRASES.some(phrase => lowercaseText.includes(phrase));
 7 | };
 8 | 
 9 | export const endCall = (ws: WebSocket, openAiWs: WebSocket): void => {
10 |     setTimeout(() => {
11 |         if (ws.readyState === WebSocket.OPEN) {
12 |             ws.close();
13 |         }
14 |         if (openAiWs.readyState === WebSocket.OPEN) {
15 |             openAiWs.close();
16 |         }
17 |     }, 5000);
18 | };
19 | 


--------------------------------------------------------------------------------
/src/utils/execution-utils.ts:
--------------------------------------------------------------------------------
 1 | import net from 'net';
 2 | 
 3 | export async function isPortInUse(port: number): Promise<boolean> {
 4 |     return new Promise((resolve) => {
 5 |         const server = net.createServer()
 6 |             .once('error', (err: NodeJS.ErrnoException) => {
 7 |                 if (err.code === 'EADDRINUSE') {
 8 |                     resolve(true);
 9 |                 } else {
10 |                     resolve(false);
11 |                 }
12 |             })
13 |             .once('listening', () => {
14 |                 server.close();
15 |                 resolve(false);
16 |             })
17 |             .listen(port);
18 |     });
19 | }
20 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "es2020",
 4 |     "module": "NodeNext",
 5 |     "moduleResolution": "NodeNext",
 6 |     "declaration": true,
 7 |     "declarationMap": true,
 8 |     "sourceMap": true,
 9 |     "outDir": "./dist",
10 |     "strict": true,
11 |     "esModuleInterop": true,
12 |     "forceConsistentCasingInFileNames": true,
13 |     "resolveJsonModule": true,
14 |     "isolatedModules": true,
15 |     "skipLibCheck": true,
16 |     "lib": ["es2020", "DOM"],
17 |     "allowSyntheticDefaultImports": true
18 |   },
19 |   "include": ["src/**/*", "openai-realtime-handler.ts"],
20 |   "exclude": ["node_modules", "dist"]
21 | }


--------------------------------------------------------------------------------