├── .gitignore ├── Dockerfile ├── README.md ├── package.json ├── smithery.yaml ├── src └── server.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist/ 3 | .env 4 | *.log -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Generated by https://smithery.ai. See: https://smithery.ai/docs/config#dockerfile 2 | # Use the official Node.js image with npm 3 | FROM node:22.12-alpine AS builder 4 | 5 | # Set working directory 6 | WORKDIR /app 7 | 8 | # Copy package.json and package-lock.json to the working directory 9 | COPY package.json tsconfig.json ./ 10 | 11 | # Install dependencies 12 | RUN npm install --ignore-scripts 13 | 14 | # Copy the TypeScript source files 15 | COPY src/ ./src 16 | 17 | # Compile TypeScript to JavaScript 18 | RUN npx tsc 19 | 20 | # Use a smaller Node.js image for the final build 21 | FROM node:22-alpine AS release 22 | 23 | # Set the working directory 24 | WORKDIR /app 25 | 26 | # Copy the compiled files and node_modules from the builder stage 27 | COPY --from=builder /app/dist /app/dist 28 | COPY --from=builder /app/node_modules /app/node_modules 29 | COPY --from=builder /app/package.json /app/package.json 30 | 31 | # Set environment variables required for the server 32 | ENV NODE_ENV=production 33 | 34 | # Expose any required ports (replace 3000 with the actual port if different) 35 | EXPOSE 3000 36 | 37 | # Run the server 38 | ENTRYPOINT ["node", "dist/server.js"] 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![MseeP.ai Security Assessment Badge](https://mseep.net/pr/phialsbasement-zonos-tts-mcp-badge.png)](https://mseep.ai/app/phialsbasement-zonos-tts-mcp) 2 | 3 | # Zonos MCP Integration 4 | [![smithery badge](https://smithery.ai/badge/@PhialsBasement/zonos-tts-mcp)](https://smithery.ai/server/@PhialsBasement/zonos-tts-mcp) 5 | 6 | A Model Context Protocol integration for Zonos TTS, allowing Claude to generate speech directly. 7 | 8 | ## Setup 9 | 10 | ### Installing via Smithery 11 | 12 | To install Zonos TTS Integration for Claude Desktop automatically via [Smithery](https://smithery.ai/server/@PhialsBasement/zonos-tts-mcp): 13 | 14 | ```bash 15 | npx -y @smithery/cli install @PhialsBasement/zonos-tts-mcp --client claude 16 | ``` 17 | 18 | ### Manual installation 19 | 20 | 1. Make sure you have Zonos running with our API implementation ([PhialsBasement/zonos-api](https://github.com/PhialsBasement/Zonos-API)) 21 | 22 | 2. Install dependencies: 23 | ```bash 24 | npm install @modelcontextprotocol/sdk axios 25 | ``` 26 | 27 | 3. Configure PulseAudio access: 28 | ```bash 29 | # Your pulse audio should be properly configured for audio playback 30 | # The MCP server will automatically try to connect to your pulse server 31 | ``` 32 | 33 | 4. Build the MCP server: 34 | ```bash 35 | npm run build 36 | # This will create the dist folder with the compiled server 37 | ``` 38 | 39 | 5. Add to Claude's config file: 40 | Edit your Claude config file (usually in `~/.config/claude/config.json`) and add this to the `mcpServers` section: 41 | 42 | ```json 43 | "zonos-tts": { 44 | "command": "node", 45 | "args": [ 46 | "/path/to/your/zonos-mcp/dist/server.js" 47 | ] 48 | } 49 | ``` 50 | 51 | Replace `/path/to/your/zonos-mcp` with the actual path where you installed the MCP server. 52 | 53 | ## Using with Claude 54 | 55 | Once configured, Claude automatically knows how to use the `speak_response` tool: 56 | 57 | ```python 58 | speak_response( 59 | text="Your text here", 60 | language="en-us", # optional, defaults to en-us 61 | emotion="happy" # optional: "neutral", "happy", "sad", "angry" 62 | ) 63 | ``` 64 | 65 | ## Features 66 | 67 | - Text-to-speech through Claude 68 | - Multiple emotions support 69 | - Multi-language support 70 | - Proper audio playback through PulseAudio 71 | 72 | ## Requirements 73 | 74 | - Node.js 75 | - PulseAudio setup 76 | - Running instance of Zonos API (PhialsBasement/zonos-api) 77 | - Working audio output device 78 | 79 | ## Notes 80 | 81 | - Make sure both the Zonos API server and this MCP server are running 82 | - Audio playback requires proper PulseAudio configuration 83 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mcp-tts", 3 | "version": "1.0.0", 4 | "type": "module", 5 | "scripts": { 6 | "build": "tsc", 7 | "start": "node dist/server.js", 8 | "dev": "tsc --watch" 9 | }, 10 | "dependencies": { 11 | "@gradio/client": "^0.12.1", 12 | "@modelcontextprotocol/sdk": "^1.0.0", 13 | "axios": "^1.7.9", 14 | "zod": "^3.22.4" 15 | }, 16 | "devDependencies": { 17 | "@types/node": "^20.0.0", 18 | "typescript": "^5.0.0" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | startCommand: 4 | type: stdio 5 | configSchema: 6 | # JSON Schema defining the configuration options for the MCP. 7 | type: object 8 | required: [] 9 | properties: {} 10 | commandFunction: 11 | # A function that produces the CLI command to start the MCP on stdio. 12 | |- 13 | (config) => ({ command: 'node', args: ['dist/server.js'] }) 14 | -------------------------------------------------------------------------------- /src/server.ts: -------------------------------------------------------------------------------- 1 | // Polyfill a minimal global 'window' for Node.js (do this before any other imports) 2 | if (typeof global.window === "undefined") { 3 | (global as any).window = { 4 | location: { 5 | protocol: "http:", 6 | hostname: "localhost", 7 | port: "8000", 8 | href: "http://localhost:8000/" 9 | } 10 | }; 11 | } 12 | 13 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; 14 | import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; 15 | import { z } from "zod"; 16 | import { exec } from "child_process"; 17 | import { promisify } from "util"; 18 | import axios from 'axios'; 19 | 20 | const execAsync = promisify(exec); 21 | const API_BASE_URL = 'http://localhost:8000'; 22 | 23 | type Emotion = "neutral" | "happy" | "sad" | "angry"; 24 | 25 | interface EmotionParameters { 26 | happiness: number; 27 | sadness: number; 28 | disgust: number; 29 | fear: number; 30 | surprise: number; 31 | anger: number; 32 | other: number; 33 | neutral: number; 34 | } 35 | 36 | interface ZonosRequestParams { 37 | text: string; 38 | language: string; 39 | emotion: Emotion; 40 | } 41 | 42 | interface EmotionMap { 43 | [key: string]: EmotionParameters; 44 | } 45 | 46 | class TTSServer { 47 | private mcp: McpServer; 48 | private readonly emotionMap: EmotionMap; 49 | 50 | constructor() { 51 | this.mcp = new McpServer({ 52 | name: "TTS MCP Server", 53 | version: "1.0.0", 54 | }); 55 | 56 | this.emotionMap = { 57 | neutral: { 58 | happiness: 0.2, 59 | sadness: 0.2, 60 | anger: 0.2, 61 | disgust: 0.05, 62 | fear: 0.05, 63 | surprise: 0.1, 64 | other: 0.1, 65 | neutral: 0.8, 66 | }, 67 | happy: { 68 | happiness: 1, 69 | sadness: 0.05, 70 | anger: 0.05, 71 | disgust: 0.05, 72 | fear: 0.05, 73 | surprise: 0.2, 74 | other: 0.1, 75 | neutral: 0.2, 76 | }, 77 | sad: { 78 | happiness: 0.05, 79 | sadness: 1, 80 | anger: 0.05, 81 | disgust: 0.2, 82 | fear: 0.2, 83 | surprise: 0.05, 84 | other: 0.1, 85 | neutral: 0.2, 86 | }, 87 | angry: { 88 | happiness: 0.05, 89 | sadness: 0.2, 90 | anger: 1, 91 | disgust: 0.4, 92 | fear: 0.2, 93 | surprise: 0.2, 94 | other: 0.1, 95 | neutral: 0.1, 96 | }, 97 | }; 98 | 99 | this.setupTools(); 100 | } 101 | 102 | private setupTools(): void { 103 | this.mcp.tool( 104 | "speak_response", 105 | { 106 | text: z.string(), 107 | language: z.string().default("en-us"), 108 | emotion: z.enum(["neutral", "happy", "sad", "angry"]).default("neutral"), 109 | }, 110 | async ({ text, language, emotion }: ZonosRequestParams) => { 111 | try { 112 | const emotionParams = this.emotionMap[emotion]; 113 | console.log(`Converting to speech: "${text}" with ${emotion} emotion`); 114 | 115 | // Use new OpenAI-style endpoint 116 | const response = await axios.post(`${API_BASE_URL}/v1/audio/speech`, { 117 | model: "Zyphra/Zonos-v0.1-transformer", 118 | input: text, 119 | language: language, 120 | emotion: emotionParams, 121 | speed: 1.0, 122 | response_format: "wav" // Using WAV for better compatibility 123 | }, { 124 | responseType: 'arraybuffer' 125 | }); 126 | 127 | // Save the audio response to a temporary file 128 | const tempAudioPath = `/tmp/tts_output_${Date.now()}.wav`; 129 | const fs = await import('fs/promises'); 130 | await fs.writeFile(tempAudioPath, response.data); 131 | 132 | // Play the audio 133 | await this.playAudio(tempAudioPath); 134 | 135 | // Clean up the temporary file 136 | await fs.unlink(tempAudioPath); 137 | 138 | return { 139 | content: [ 140 | { 141 | type: "text", 142 | text: `Successfully spoke: "${text}" with ${emotion} emotion`, 143 | }, 144 | ], 145 | }; 146 | } catch (error) { 147 | const errorMessage = error instanceof Error ? error.message : "Unknown error"; 148 | console.error("TTS Error:", errorMessage); 149 | if (axios.isAxiosError(error) && error.response) { 150 | console.error("API Response:", error.response.data); 151 | } 152 | throw new Error(`TTS failed: ${errorMessage}`); 153 | } 154 | } 155 | ); 156 | } 157 | 158 | private async playAudio(audioPath: string): Promise { 159 | try { 160 | console.log("Playing audio from:", audioPath); 161 | 162 | switch (process.platform) { 163 | case "darwin": 164 | await execAsync(`afplay ${audioPath}`); 165 | break; 166 | case "linux": 167 | // Try paplay for PulseAudio 168 | const XDG_RUNTIME_DIR = process.env.XDG_RUNTIME_DIR || '/run/user/1000'; 169 | const env = { 170 | ...process.env, 171 | PULSE_SERVER: `unix:${XDG_RUNTIME_DIR}/pulse/native`, 172 | PULSE_COOKIE: `${process.env.HOME}/.config/pulse/cookie` 173 | }; 174 | await execAsync(`paplay ${audioPath}`, { env }); 175 | break; 176 | case "win32": 177 | await execAsync( 178 | `powershell -c (New-Object Media.SoundPlayer '${audioPath}').PlaySync()` 179 | ); 180 | break; 181 | default: 182 | throw new Error(`Unsupported platform: ${process.platform}`); 183 | } 184 | } catch (error) { 185 | const errorMessage = error instanceof Error ? error.message : "Unknown error"; 186 | console.error("Audio playback error:", errorMessage); 187 | throw new Error(`Audio playback failed: ${errorMessage}`); 188 | } 189 | } 190 | 191 | public async start(): Promise { 192 | const transport = new StdioServerTransport(); 193 | await this.mcp.connect(transport); 194 | } 195 | } 196 | 197 | const server = new TTSServer(); 198 | await server.start(); 199 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "ESNext", 5 | "moduleResolution": "node", 6 | "esModuleInterop": true, 7 | "strict": true, 8 | "skipLibCheck": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "outDir": "./dist", 11 | "rootDir": "./src" 12 | }, 13 | "include": ["src/**/*"], 14 | "exclude": ["node_modules", "dist"] 15 | } --------------------------------------------------------------------------------