├── bunfig.toml ├── .size-limit.json ├── tsconfig.json ├── examples ├── listVoices.ts ├── streaming.ts ├── simple.ts ├── webworker-example │ ├── worker.ts │ └── main.ts ├── simple-api.ts ├── NAMING_EXAMPLES.md ├── simple-vs-advanced.ts ├── naming-compatibility.ts ├── universal-detection.ts ├── universal-api.ts ├── isomorphic-example.ts └── cdn-example.html ├── .npmignore ├── typedoc.json ├── tests ├── deno │ ├── isomorphic.deno.ts │ └── voices.deno.ts ├── voices.test.js ├── communicate.test.js ├── types.test.js ├── isomorphic.test.js └── simple-api.test.js ├── schemas └── communicate-options.schema.json ├── .github └── workflows │ ├── sync-proxy-package.yml │ ├── test.yml │ └── publish.yml ├── .eslintrc.json ├── tsup.config.ts ├── src ├── browser-drm.ts ├── constants.ts ├── browser-entry.ts ├── exceptions.ts ├── isomorphic-entry.ts ├── isomorphic-drm.ts ├── drm.ts ├── tts_config.ts ├── types.ts ├── submaker.ts ├── voices.ts ├── webworker-entry.ts ├── index.ts ├── runtime-detection.ts ├── browser-voices.ts ├── browser-utils.ts ├── isomorphic-voices.ts ├── simple.ts ├── isomorphic-utils.ts ├── browser-simple.ts ├── isomorphic-simple.ts ├── utils.ts ├── communicate.ts └── browser.ts ├── deno.json ├── .gitignore ├── PERFORMANCE_OPTIMIZATIONS.md ├── package.json └── FEATURES.md /bunfig.toml: -------------------------------------------------------------------------------- 1 | [test] 2 | # Exclude Deno-only tests when running `bun test` locally. 3 | # CI runs Deno tests with `deno test` (see .github/workflows/test.yml). 4 | coveragePathIgnorePatterns = ["tests/deno/**"] 5 | -------------------------------------------------------------------------------- /.size-limit.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "Browser", 4 | "path": "dist/browser.js", 5 | "limit": "35 KB" 6 | }, 7 | { 8 | "name": "Web Worker", 9 | "path": "dist/webworker.js", 10 | "limit": "40 KB" 11 | } 12 | ] -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ES2022", 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "moduleResolution": "node", 7 | "allowSyntheticDefaultImports": true, 8 | "esModuleInterop": true, 9 | 10 | "forceConsistentCasingInFileNames": true, 11 | "strict": true, 12 | "skipLibCheck": true, 13 | "resolveJsonModule": true, 14 | "declaration": true, 15 | "outDir": "./dist" 16 | }, 17 | "include": ["src"], 18 | "exclude": ["node_modules", "dist", "examples"] 19 | } 20 | -------------------------------------------------------------------------------- /examples/listVoices.ts: -------------------------------------------------------------------------------- 1 | import { VoicesManager } from "../dist/index.js"; 2 | 3 | async function main() { 4 | const voicesManager = await VoicesManager.create(); 5 | 6 | // Find all English voices 7 | const voices = voicesManager.find({ Language: "en" }); 8 | console.log("English voices:", voices.map(v => v.ShortName)); 9 | 10 | // Find a specific voice 11 | const femaleUsVoices = voicesManager.find({ Gender: "Female", Locale: "en-US" }); 12 | console.log("Female US voices:", femaleUsVoices.map(v => v.ShortName)); 13 | } 14 | 15 | main().catch(console.error); -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | # Source files - only ship built dist/ files 2 | src/ 3 | examples/ 4 | schemas/ 5 | 6 | # Development files 7 | tsconfig.json 8 | tsup.config.ts 9 | typedoc.json 10 | .gitignore 11 | .eslintrc* 12 | .prettierrc* 13 | 14 | # Documentation (keep README.md but exclude others) 15 | FEATURES.md 16 | API.md 17 | 18 | # Test files 19 | test/ 20 | tests/ 21 | *.test.ts 22 | *.test.js 23 | *.spec.ts 24 | *.spec.js 25 | 26 | # Development dependencies 27 | node_modules/ 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | 32 | # IDE files 33 | .vscode/ 34 | .idea/ 35 | *.swp 36 | *.swo 37 | 38 | # OS files 39 | .DS_Store 40 | Thumbs.db 41 | 42 | # Build artifacts that aren't needed 43 | *.tsbuildinfo -------------------------------------------------------------------------------- /typedoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "entryPoints": ["src/index.ts"], 3 | "out": "docs", 4 | "name": "edge-tts-universal", 5 | "includeVersion": true, 6 | "excludePrivate": true, 7 | "excludeProtected": true, 8 | "excludeInternal": true, 9 | "readme": "README.md", 10 | "theme": "default", 11 | "hideGenerator": false, 12 | "sort": ["source-order"], 13 | "kindSortOrder": [ 14 | "Document", 15 | "Project", 16 | "Module", 17 | "Namespace", 18 | "Enum", 19 | "EnumMember", 20 | "Class", 21 | "Interface", 22 | "TypeAlias", 23 | "Constructor", 24 | "Property", 25 | "Variable", 26 | "Function", 27 | "Method", 28 | "Accessor", 29 | "Parameter", 30 | "TypeParameter", 31 | "Reference" 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /examples/streaming.ts: -------------------------------------------------------------------------------- 1 | import { Communicate, SubMaker } from "../dist/index.js"; 2 | 3 | const TEXT = "This is a test of the streaming functionality, with subtitles."; 4 | const VOICE = "en-GB-SoniaNeural"; 5 | 6 | async function main() { 7 | const communicate = new Communicate(TEXT, { voice: VOICE }); 8 | const subMaker = new SubMaker(); 9 | 10 | for await (const chunk of communicate.stream()) { 11 | if (chunk.type === "audio" && chunk.data) { 12 | // Do something with the audio data, e.g., stream it to a client. 13 | // For this example, we'll just log its size. 14 | console.log(`Received audio chunk of size: ${chunk.data.length}`); 15 | } else if (chunk.type === "WordBoundary") { 16 | subMaker.feed(chunk); 17 | } 18 | } 19 | 20 | // Get the subtitles in SRT format. 21 | const srt = subMaker.getSrt(); 22 | console.log("\nGenerated Subtitles (SRT):\n", srt); 23 | } 24 | 25 | main().catch(console.error); -------------------------------------------------------------------------------- /examples/simple.ts: -------------------------------------------------------------------------------- 1 | import { Communicate } from "../dist/index.js"; 2 | import { promises as fs } from "fs"; 3 | import path from "path"; 4 | import { fileURLToPath } from 'url'; 5 | 6 | const __filename = fileURLToPath(import.meta.url); 7 | const __dirname = path.dirname(__filename); 8 | 9 | const TEXT = "Hello, world! This is a test of the new edge-tts Node.js library."; 10 | const VOICE = "en-US-SteffanNeural"; 11 | const OUTPUT_FILE = path.join(__dirname, "test.mp3"); 12 | 13 | async function main() { 14 | const communicate = new Communicate(TEXT, { voice: VOICE }); 15 | 16 | // The stream() method returns an async generator that yields audio chunks. 17 | const audioStream = communicate.stream(); 18 | 19 | const buffers: Buffer[] = []; 20 | for await (const chunk of audioStream) { 21 | if (chunk.type === "audio" && chunk.data) { 22 | buffers.push(chunk.data); 23 | } 24 | } 25 | 26 | const finalBuffer = Buffer.concat(buffers); 27 | await fs.writeFile(OUTPUT_FILE, finalBuffer); 28 | 29 | console.log(`Audio saved to ${OUTPUT_FILE}`); 30 | } 31 | 32 | main().catch(console.error); -------------------------------------------------------------------------------- /examples/webworker-example/worker.ts: -------------------------------------------------------------------------------- 1 | // Web Worker for background TTS processing 2 | import { EdgeTTS, postAudioMessage } from '../../src/webworker-entry'; 3 | 4 | // Listen for messages from main thread 5 | self.onmessage = async function (e) { 6 | const { type, text, voice, options } = e.data; 7 | 8 | if (type === 'synthesize') { 9 | try { 10 | console.log('Worker: Starting TTS synthesis...'); 11 | 12 | const tts = new EdgeTTS(text, voice, options); 13 | const result = await tts.synthesize(); 14 | 15 | console.log(`Worker: Generated ${result.audio.size} bytes of audio`); 16 | 17 | // Post result back to main thread 18 | postAudioMessage(result.audio, result.subtitle); 19 | 20 | } catch (error) { 21 | console.error('Worker: TTS synthesis failed:', error); 22 | 23 | // Post error back to main thread 24 | self.postMessage({ 25 | type: 'error', 26 | error: error instanceof Error ? error.message : 'Unknown error' 27 | }); 28 | } 29 | } 30 | } 31 | 32 | // Signal that worker is ready 33 | self.postMessage({ 34 | type: 'ready', 35 | message: 'TTS Worker ready for synthesis requests' 36 | }); -------------------------------------------------------------------------------- /tests/deno/isomorphic.deno.ts: -------------------------------------------------------------------------------- 1 | import { assertEquals, assert } from "https://deno.land/std@0.208.0/assert/mod.ts"; 2 | import { Communicate } from "../../dist/isomorphic.js"; 3 | 4 | Deno.test("Isomorphic API - Communicate can be instantiated", () => { 5 | const communicate = new Communicate('Hello, world!', { 6 | voice: 'en-US-EmmaMultilingualNeural' 7 | }); 8 | assert(communicate instanceof Communicate, 'Should create Communicate instance'); 9 | }); 10 | 11 | Deno.test("Isomorphic API - Communicate stream method exists", () => { 12 | const communicate = new Communicate('Test', { 13 | voice: 'en-US-EmmaMultilingualNeural' 14 | }); 15 | 16 | assert(typeof communicate.stream === 'function', 'Should have stream method'); 17 | 18 | const stream = communicate.stream(); 19 | assert(typeof stream[Symbol.asyncIterator] === 'function', 'Should return async iterable'); 20 | }); 21 | 22 | Deno.test("Isomorphic API - Communicate accepts configuration options", () => { 23 | const communicate = new Communicate('Test text', { 24 | voice: 'en-US-EmmaMultilingualNeural', 25 | rate: '+20%', 26 | volume: '+10%', 27 | pitch: '+5Hz', 28 | connectionTimeout: 5000 29 | }); 30 | 31 | assert(communicate instanceof Communicate, 'Should create Communicate instance with options'); 32 | }); 33 | 34 | -------------------------------------------------------------------------------- /schemas/communicate-options.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "$id": "https://schemas.travisvn.com/edge-tts-universal/communicate-options.json", 4 | "title": "CommunicateOptions", 5 | "description": "Configuration options for the Communicate class", 6 | "type": "object", 7 | "properties": { 8 | "voice": { 9 | "type": "string", 10 | "description": "Voice to use for synthesis", 11 | "default": "en-US-EmmaMultilingualNeural", 12 | "examples": ["en-US-EmmaMultilingualNeural", "en-GB-SoniaNeural"] 13 | }, 14 | "rate": { 15 | "type": "string", 16 | "description": "Speech rate adjustment", 17 | "pattern": "^[+-]\\d+%$", 18 | "default": "+0%", 19 | "examples": ["+20%", "-10%", "+0%"] 20 | }, 21 | "volume": { 22 | "type": "string", 23 | "description": "Volume level adjustment", 24 | "pattern": "^[+-]\\d+%$", 25 | "default": "+0%", 26 | "examples": ["+50%", "-25%", "+0%"] 27 | }, 28 | "pitch": { 29 | "type": "string", 30 | "description": "Pitch adjustment in Hz", 31 | "pattern": "^[+-]\\d+Hz$", 32 | "default": "+0Hz", 33 | "examples": ["+5Hz", "-10Hz", "+0Hz"] 34 | }, 35 | "proxy": { 36 | "type": "string", 37 | "description": "Proxy URL for requests", 38 | "format": "uri", 39 | "examples": ["http://proxy:8080", "https://user:pass@proxy:3128"] 40 | }, 41 | "connectionTimeout": { 42 | "type": "number", 43 | "description": "WebSocket connection timeout in milliseconds", 44 | "minimum": 0, 45 | "examples": [10000, 30000] 46 | } 47 | }, 48 | "additionalProperties": false 49 | } 50 | -------------------------------------------------------------------------------- /tests/voices.test.js: -------------------------------------------------------------------------------- 1 | import { test, describe } from 'node:test'; 2 | import assert from 'node:assert'; 3 | import { VoicesManager, listVoices } from '../dist/index.js'; 4 | 5 | describe('Voice Management', () => { 6 | test('listVoices returns array of voices', async () => { 7 | const voices = await listVoices(); 8 | assert(Array.isArray(voices), 'listVoices should return an array'); 9 | assert(voices.length > 0, 'Should have at least one voice'); 10 | 11 | // Check voice structure 12 | const voice = voices[0]; 13 | assert(typeof voice.Name === 'string', 'Voice should have Name property'); 14 | assert(typeof voice.ShortName === 'string', 'Voice should have ShortName property'); 15 | assert(['Female', 'Male'].includes(voice.Gender), 'Voice should have valid Gender'); 16 | assert(typeof voice.Locale === 'string', 'Voice should have Locale property'); 17 | }); 18 | 19 | test('VoicesManager can filter voices', async () => { 20 | const voicesManager = await VoicesManager.create(); 21 | 22 | // Test finding English voices 23 | const englishVoices = voicesManager.find({ Language: 'en' }); 24 | assert(Array.isArray(englishVoices), 'Should return array'); 25 | assert(englishVoices.length > 0, 'Should find English voices'); 26 | 27 | for (const voice of englishVoices) { 28 | assert(voice.Language === 'en', 'All returned voices should be English'); 29 | } 30 | 31 | // Test finding female voices 32 | const femaleVoices = voicesManager.find({ Gender: 'Female' }); 33 | assert(Array.isArray(femaleVoices), 'Should return array'); 34 | 35 | for (const voice of femaleVoices) { 36 | assert(voice.Gender === 'Female', 'All returned voices should be Female'); 37 | } 38 | }); 39 | }); -------------------------------------------------------------------------------- /examples/simple-api.ts: -------------------------------------------------------------------------------- 1 | import { EdgeTTS, createVTT, createSRT } from '../dist/index.js'; 2 | import { promises as fs } from 'fs'; 3 | import path from 'path'; 4 | import { fileURLToPath } from 'url'; 5 | 6 | const __filename = fileURLToPath(import.meta.url); 7 | const __dirname = path.dirname(__filename); 8 | 9 | const TEXT = 'Hello, world! This is a test of the simple edge-tts API.'; 10 | const VOICE = 'en-US-EmmaMultilingualNeural'; 11 | const OUTPUT_FILE = path.join(__dirname, 'simple-test.mp3'); 12 | 13 | async function main() { 14 | // Create TTS instance with prosody options 15 | const tts = new EdgeTTS(TEXT, VOICE, { 16 | rate: '+10%', 17 | volume: '+0%', 18 | pitch: '+0Hz' 19 | }); 20 | 21 | try { 22 | // Synthesize speech (one-shot) 23 | const result = await tts.synthesize(); 24 | 25 | // Save audio file 26 | const audioBuffer = Buffer.from(await result.audio.arrayBuffer()); 27 | await fs.writeFile(OUTPUT_FILE, audioBuffer); 28 | 29 | // Generate subtitle files 30 | const vttContent = createVTT(result.subtitle); 31 | const srtContent = createSRT(result.subtitle); 32 | 33 | await fs.writeFile(path.join(__dirname, 'subtitles.vtt'), vttContent); 34 | await fs.writeFile(path.join(__dirname, 'subtitles.srt'), srtContent); 35 | 36 | console.log(`Audio saved to ${OUTPUT_FILE}`); 37 | console.log(`Generated ${result.subtitle.length} word boundaries`); 38 | console.log('VTT preview:', vttContent.substring(0, 200) + '...'); 39 | console.log('SRT preview:', srtContent.substring(0, 200) + '...'); 40 | } catch (error) { 41 | console.error('Synthesis failed:', error); 42 | } 43 | } 44 | 45 | // ESM equivalent of require.main === module 46 | if (import.meta.url === `file://${process.argv[1]}`) { 47 | main().catch(console.error); 48 | } -------------------------------------------------------------------------------- /.github/workflows/sync-proxy-package.yml: -------------------------------------------------------------------------------- 1 | # Add this file to your edge-tts-universal repo at: 2 | # .github/workflows/sync-proxy-package.yml 3 | 4 | name: Sync Proxy Package Version 5 | 6 | on: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | inputs: 11 | version: 12 | description: 'Version to sync to proxy (e.g., 1.2.3)' 13 | required: true 14 | type: string 15 | 16 | jobs: 17 | trigger-proxy-sync: 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - name: Extract version 22 | id: version 23 | run: | 24 | if [ "${{ github.event_name }}" = "release" ]; then 25 | # Extract version from release tag (v1.2.3 -> 1.2.3) 26 | VERSION="${{ github.event.release.tag_name }}" 27 | VERSION=${VERSION#v} # Remove 'v' prefix if present 28 | else 29 | VERSION="${{ github.event.inputs.version }}" 30 | fi 31 | 32 | echo "version=$VERSION" >> $GITHUB_OUTPUT 33 | echo "Syncing version: $VERSION" 34 | 35 | - name: Trigger proxy repo sync 36 | uses: peter-evans/repository-dispatch@v3 37 | with: 38 | token: ${{ secrets.PROXY_REPO_TOKEN }} 39 | repository: travisvn/universal-edge-tts 40 | event-type: sync-version 41 | client-payload: | 42 | { 43 | "version": "${{ steps.version.outputs.version }}", 44 | "main_version": "${{ steps.version.outputs.version }}", 45 | "triggered_by": "${{ github.repository }}", 46 | "trigger_event": "${{ github.event_name }}" 47 | } 48 | 49 | - name: Log trigger 50 | run: | 51 | echo "✅ Triggered proxy repo sync for version ${{ steps.version.outputs.version }}" 52 | echo "📦 Repository: travisvn/universal-edge-tts" 53 | echo "🔄 Event: sync-version" 54 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "root": true, 3 | "env": { 4 | "es2022": true, 5 | "node": true 6 | }, 7 | "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"], 8 | "parser": "@typescript-eslint/parser", 9 | "parserOptions": { 10 | "ecmaVersion": 2022, 11 | "sourceType": "module" 12 | }, 13 | "plugins": ["@typescript-eslint"], 14 | "rules": { 15 | "@typescript-eslint/no-unused-vars": "off", 16 | "@typescript-eslint/no-explicit-any": "off", 17 | "@typescript-eslint/explicit-function-return-type": "off", 18 | "no-case-declarations": "off", 19 | "prefer-const": "error", 20 | "no-var": "error" 21 | }, 22 | "overrides": [ 23 | { 24 | "files": ["src/browser*.ts", "src/*browser*"], 25 | "env": { 26 | "browser": true, 27 | "node": false 28 | }, 29 | "globals": { 30 | "WebSocket": "readonly", 31 | "Blob": "readonly", 32 | "URL": "readonly", 33 | "TextEncoder": "readonly", 34 | "crypto": "readonly" 35 | } 36 | }, 37 | { 38 | "files": ["src/webworker*.ts", "src/*worker*"], 39 | "env": { 40 | "worker": true, 41 | "node": false, 42 | "browser": false 43 | }, 44 | "globals": { 45 | "importScripts": "readonly", 46 | "WorkerGlobalScope": "readonly", 47 | "postMessage": "readonly", 48 | "self": "readonly" 49 | } 50 | }, 51 | { 52 | "files": ["src/isomorphic*.ts", "src/*isomorphic*"], 53 | "env": { 54 | "node": true, 55 | "browser": true 56 | }, 57 | "rules": { 58 | "@typescript-eslint/no-explicit-any": "off" 59 | } 60 | }, 61 | { 62 | "files": ["examples/**/*.ts"], 63 | "env": { 64 | "node": true, 65 | "browser": true 66 | }, 67 | "rules": { 68 | "@typescript-eslint/no-explicit-any": "off", 69 | "no-console": "off" 70 | } 71 | } 72 | ] 73 | } 74 | -------------------------------------------------------------------------------- /tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup' 2 | 3 | export default defineConfig([ 4 | // Node.js builds only 5 | { 6 | entry: { 7 | index: 'src/index.ts', 8 | 'runtime-detection': 'src/runtime-detection.ts', 9 | }, 10 | format: ['cjs', 'esm'], 11 | dts: true, 12 | sourcemap: true, 13 | clean: true, 14 | splitting: false, 15 | treeshake: true, 16 | minify: false, 17 | target: 'es2020', 18 | outDir: 'dist', 19 | platform: 'node', 20 | external: [ 21 | 'axios', 22 | 'https-proxy-agent', 23 | 'uuid', 24 | 'ws', 25 | 'xml-escape', 26 | 'isomorphic-ws', 27 | 'cross-fetch', 28 | 'buffer' 29 | ] 30 | }, 31 | // Universal/Isomorphic builds - truly universal with all dependencies bundled 32 | { 33 | entry: { 34 | isomorphic: 'src/isomorphic-entry.ts', 35 | }, 36 | format: ['cjs', 'esm'], 37 | dts: true, 38 | sourcemap: true, 39 | clean: false, 40 | splitting: false, 41 | treeshake: true, 42 | minify: false, 43 | target: 'es2020', 44 | outDir: 'dist', 45 | platform: 'neutral', // Neither node nor browser - truly universal 46 | external: [ 47 | // Bundle everything for universal compatibility 48 | ], 49 | define: { 50 | 'process.env.NODE_ENV': '"production"' 51 | } 52 | }, 53 | // Browser builds (separate config to avoid conflicts) 54 | { 55 | entry: { 56 | browser: 'src/browser-entry.ts', 57 | webworker: 'src/webworker-entry.ts', 58 | }, 59 | format: ['cjs', 'esm'], 60 | dts: true, 61 | sourcemap: true, 62 | clean: false, // Don't clean since previous builds already ran 63 | splitting: false, 64 | treeshake: true, 65 | minify: false, 66 | target: 'es2020', 67 | outDir: 'dist', 68 | platform: 'browser', 69 | external: [ 70 | // No external dependencies for browser builds - all bundled with browser-native implementations 71 | ] 72 | } 73 | ]) -------------------------------------------------------------------------------- /tests/communicate.test.js: -------------------------------------------------------------------------------- 1 | import { test, describe } from 'node:test'; 2 | import assert from 'node:assert'; 3 | import { Communicate, SubMaker } from '../dist/index.js'; 4 | 5 | describe('Streaming API', () => { 6 | test('Communicate can be instantiated', () => { 7 | const communicate = new Communicate('Hello, world!', { 8 | voice: 'en-US-EmmaMultilingualNeural' 9 | }); 10 | assert(communicate instanceof Communicate, 'Should create Communicate instance'); 11 | }); 12 | 13 | test('Communicate accepts configuration options', () => { 14 | const communicate = new Communicate('Test text', { 15 | voice: 'en-US-EmmaMultilingualNeural', 16 | rate: '+20%', 17 | volume: '+10%', 18 | pitch: '+5Hz', 19 | connectionTimeout: 5000 20 | }); 21 | 22 | assert(communicate instanceof Communicate, 'Should create Communicate instance with options'); 23 | }); 24 | 25 | test('SubMaker can process word boundary events', () => { 26 | const subMaker = new SubMaker(); 27 | 28 | // Mock word boundary event 29 | const wordBoundary = { 30 | type: 'WordBoundary', 31 | offset: 0, 32 | duration: 1000000, 33 | text: 'Hello' 34 | }; 35 | 36 | subMaker.feed(wordBoundary); 37 | const srt = subMaker.getSrt(); 38 | 39 | assert(typeof srt === 'string', 'SubMaker should return SRT string'); 40 | assert(srt.includes('Hello'), 'SRT should contain the word'); 41 | }); 42 | 43 | test('Communicate stream method exists and is async iterable', async () => { 44 | const communicate = new Communicate('Test', { 45 | voice: 'en-US-EmmaMultilingualNeural' 46 | }); 47 | 48 | // Check that stream method exists 49 | assert(typeof communicate.stream === 'function', 'Should have stream method'); 50 | 51 | // Check that it returns an async iterable 52 | const stream = communicate.stream(); 53 | assert(typeof stream[Symbol.asyncIterator] === 'function', 'Should return async iterable'); 54 | }); 55 | }); -------------------------------------------------------------------------------- /tests/deno/voices.deno.ts: -------------------------------------------------------------------------------- 1 | import { assertEquals, assert } from "https://deno.land/std@0.208.0/assert/mod.ts"; 2 | import { VoicesManager, listVoices } from "../../dist/isomorphic.js"; 3 | 4 | Deno.test("Voice Management - listVoices returns array of voices", async () => { 5 | try { 6 | const voices = await listVoices(); 7 | assert(Array.isArray(voices), 'listVoices should return an array'); 8 | assert(voices.length > 0, 'Should have at least one voice'); 9 | 10 | // Check voice structure 11 | const voice = voices[0]; 12 | assert(typeof voice.Name === 'string', 'Voice should have Name property'); 13 | assert(typeof voice.ShortName === 'string', 'Voice should have ShortName property'); 14 | assert(['Female', 'Male'].includes(voice.Gender), 'Voice should have valid Gender'); 15 | assert(typeof voice.Locale === 'string', 'Voice should have Locale property'); 16 | } catch (error) { 17 | // If network/service is unavailable, just check that error is reasonable 18 | assert(error instanceof Error, 'Should throw proper Error if service unavailable'); 19 | } 20 | }); 21 | 22 | Deno.test("Voice Management - VoicesManager can filter voices", async () => { 23 | try { 24 | const voicesManager = await VoicesManager.create(); 25 | 26 | // Test finding English voices 27 | const englishVoices = voicesManager.find({ Language: 'en' }); 28 | assert(Array.isArray(englishVoices), 'Should return array'); 29 | assert(englishVoices.length > 0, 'Should find English voices'); 30 | 31 | for (const voice of englishVoices) { 32 | assertEquals(voice.Language, 'en', 'All returned voices should be English'); 33 | } 34 | 35 | // Test finding female voices 36 | const femaleVoices = voicesManager.find({ Gender: 'Female' }); 37 | assert(Array.isArray(femaleVoices), 'Should return array'); 38 | 39 | for (const voice of femaleVoices) { 40 | assertEquals(voice.Gender, 'Female', 'All returned voices should be Female'); 41 | } 42 | } catch (error) { 43 | // If network/service is unavailable, just check that error is reasonable 44 | assert(error instanceof Error, 'Should throw proper Error if service unavailable'); 45 | } 46 | }); 47 | 48 | -------------------------------------------------------------------------------- /tests/types.test.js: -------------------------------------------------------------------------------- 1 | import { test, describe } from 'node:test'; 2 | import assert from 'node:assert'; 3 | import { 4 | NoAudioReceived, 5 | UnexpectedResponse, 6 | UnknownResponse, 7 | WebSocketError 8 | } from '../dist/index.js'; 9 | 10 | describe('Types and Exceptions', () => { 11 | test('Exception classes can be instantiated', () => { 12 | const noAudio = new NoAudioReceived('No audio received'); 13 | const unexpected = new UnexpectedResponse('Unexpected response'); 14 | const unknown = new UnknownResponse('Unknown response'); 15 | const wsError = new WebSocketError('WebSocket error'); 16 | 17 | assert(noAudio instanceof Error, 'NoAudioReceived should extend Error'); 18 | assert(unexpected instanceof Error, 'UnexpectedResponse should extend Error'); 19 | assert(unknown instanceof Error, 'UnknownResponse should extend Error'); 20 | assert(wsError instanceof Error, 'WebSocketError should extend Error'); 21 | 22 | assert(noAudio instanceof NoAudioReceived, 'Should be instance of NoAudioReceived'); 23 | assert(unexpected instanceof UnexpectedResponse, 'Should be instance of UnexpectedResponse'); 24 | assert(unknown instanceof UnknownResponse, 'Should be instance of UnknownResponse'); 25 | assert(wsError instanceof WebSocketError, 'Should be instance of WebSocketError'); 26 | }); 27 | 28 | test('Exception classes have proper names', () => { 29 | const noAudio = new NoAudioReceived('test'); 30 | const unexpected = new UnexpectedResponse('test'); 31 | const unknown = new UnknownResponse('test'); 32 | const wsError = new WebSocketError('test'); 33 | 34 | assert(noAudio.name === 'NoAudioReceived', 'Should have correct name'); 35 | assert(unexpected.name === 'UnexpectedResponse', 'Should have correct name'); 36 | assert(unknown.name === 'UnknownResponse', 'Should have correct name'); 37 | assert(wsError.name === 'WebSocketError', 'Should have correct name'); 38 | }); 39 | 40 | test('Exception classes preserve error messages', () => { 41 | const message = 'Test error message'; 42 | const error = new NoAudioReceived(message); 43 | 44 | assert(error.message === message, 'Should preserve error message'); 45 | }); 46 | }); -------------------------------------------------------------------------------- /src/browser-drm.ts: -------------------------------------------------------------------------------- 1 | import { TRUSTED_CLIENT_TOKEN } from './constants'; 2 | import { SkewAdjustmentError } from "./exceptions"; 3 | 4 | const WIN_EPOCH = 11644473600; 5 | const S_TO_NS = 1e9; 6 | 7 | /** 8 | * Browser-specific DRM class that uses only Web APIs. 9 | * Uses the Web Crypto API instead of Node.js crypto module. 10 | */ 11 | export class BrowserDRM { 12 | private static clockSkewSeconds = 0.0; 13 | 14 | static adjClockSkewSeconds(skewSeconds: number) { 15 | BrowserDRM.clockSkewSeconds += skewSeconds; 16 | } 17 | 18 | static getUnixTimestamp(): number { 19 | return Date.now() / 1000 + BrowserDRM.clockSkewSeconds; 20 | } 21 | 22 | static parseRfc2616Date(date: string): number | null { 23 | try { 24 | return new Date(date).getTime() / 1000; 25 | } catch (e) { 26 | return null; 27 | } 28 | } 29 | 30 | static handleClientResponseError(response: { status: number; headers: Record }) { 31 | if (!response.headers) { 32 | throw new SkewAdjustmentError("No headers in response."); 33 | } 34 | const serverDate = response.headers["date"] || response.headers["Date"]; 35 | if (!serverDate) { 36 | throw new SkewAdjustmentError("No server date in headers."); 37 | } 38 | const serverDateParsed = BrowserDRM.parseRfc2616Date(serverDate); 39 | if (serverDateParsed === null) { 40 | throw new SkewAdjustmentError(`Failed to parse server date: ${serverDate}`); 41 | } 42 | const clientDate = BrowserDRM.getUnixTimestamp(); 43 | BrowserDRM.adjClockSkewSeconds(serverDateParsed - clientDate); 44 | } 45 | 46 | static async generateSecMsGec(): Promise { 47 | let ticks = BrowserDRM.getUnixTimestamp(); 48 | ticks += WIN_EPOCH; 49 | ticks -= ticks % 300; 50 | ticks *= S_TO_NS / 100; 51 | 52 | const strToHash = `${ticks.toFixed(0)}${TRUSTED_CLIENT_TOKEN}`; 53 | 54 | // Use Web Crypto API - guaranteed to be available in browsers 55 | const encoder = new TextEncoder(); 56 | const data = encoder.encode(strToHash); 57 | const hashBuffer = await crypto.subtle.digest('SHA-256', data); 58 | const hashArray = Array.from(new Uint8Array(hashBuffer)); 59 | return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').toUpperCase(); 60 | } 61 | } -------------------------------------------------------------------------------- /src/constants.ts: -------------------------------------------------------------------------------- 1 | /** Base URL for Microsoft Edge TTS service endpoints */ 2 | export const BASE_URL = "speech.platform.bing.com/consumer/speech/synthesize/readaloud"; 3 | 4 | /** Trusted client token used for authentication with the TTS service */ 5 | export const TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"; 6 | 7 | /** WebSocket URL for TTS streaming synthesis */ 8 | export const WSS_URL = `wss://${BASE_URL}/edge/v1?TrustedClientToken=${TRUSTED_CLIENT_TOKEN}`; 9 | 10 | /** HTTP URL for fetching available voices list */ 11 | export const VOICE_LIST_URL = `https://${BASE_URL}/voices/list?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`; 12 | 13 | /** Default voice to use when none is specified */ 14 | export const DEFAULT_VOICE = "en-US-EmmaMultilingualNeural"; 15 | 16 | /** Version string for Chromium browser emulation */ 17 | export const CHROMIUM_FULL_VERSION = "130.0.2849.68"; 18 | 19 | /** Major version number extracted from the full Chromium version */ 20 | export const CHROMIUM_MAJOR_VERSION = CHROMIUM_FULL_VERSION.split(".")[0]; 21 | 22 | /** Security token version for API authentication */ 23 | export const SEC_MS_GEC_VERSION = `1-${CHROMIUM_FULL_VERSION}`; 24 | 25 | /** Base HTTP headers for API requests, mimicking a real browser */ 26 | export const BASE_HEADERS = { 27 | "User-Agent": `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROMIUM_MAJOR_VERSION}.0.0.0 Safari/537.36 Edg/${CHROMIUM_MAJOR_VERSION}.0.0.0`, 28 | "Accept-Encoding": "gzip, deflate, br", 29 | "Accept-Language": "en-US,en;q=0.9", 30 | }; 31 | 32 | /** HTTP headers specific to WebSocket connection requests */ 33 | export const WSS_HEADERS = { 34 | ...BASE_HEADERS, 35 | "Pragma": "no-cache", 36 | "Cache-Control": "no-cache", 37 | "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold", 38 | }; 39 | 40 | /** HTTP headers specific to voice list API requests */ 41 | export const VOICE_HEADERS = { 42 | ...BASE_HEADERS, 43 | "Authority": "speech.platform.bing.com", 44 | "Sec-CH-UA": `" Not;A Brand";v="99", "Microsoft Edge";v="${CHROMIUM_MAJOR_VERSION}", "Chromium";v="${CHROMIUM_MAJOR_VERSION}"`, 45 | "Sec-CH-UA-Mobile": "?0", 46 | "Accept": "*/*", 47 | "Sec-Fetch-Site": "none", 48 | "Sec-Fetch-Mode": "cors", 49 | "Sec-Fetch-Dest": "empty", 50 | }; -------------------------------------------------------------------------------- /deno.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@edge-tts/universal", 3 | "version": "1.3.3", 4 | "description": "Universal text-to-speech library using Microsoft Edge's online TTS service. Works in Node.js and browsers WITHOUT needing Microsoft Edge, Windows, or an API key", 5 | "license": "AGPL-3.0", 6 | "author": "Travis ", 7 | "homepage": "https://github.com/travisvn/edge-tts-universal", 8 | "repository": { 9 | "type": "git", 10 | "url": "git+https://github.com/travisvn/edge-tts-universal.git" 11 | }, 12 | "exports": { 13 | ".": "./src/isomorphic-entry.ts", 14 | "./browser": "./src/browser-entry.ts", 15 | "./isomorphic": "./src/isomorphic-entry.ts", 16 | "./webworker": "./src/webworker-entry.ts" 17 | }, 18 | "imports": { 19 | "crypto": "node:crypto", 20 | "uuid": "npm:uuid@^11.1.0", 21 | "axios": "npm:axios@^1.12.1", 22 | "cross-fetch": "npm:cross-fetch@^4.1.0", 23 | "https-proxy-agent": "npm:https-proxy-agent@^7.0.6", 24 | "isomorphic-ws": "npm:isomorphic-ws@^5.0.0", 25 | "ws": "npm:ws@^8.18.3", 26 | "xml-escape": "npm:xml-escape@^1.1.0" 27 | }, 28 | "tasks": { 29 | "dev": "deno run --allow-net --allow-read examples/isomorphic-example.ts", 30 | "test": "deno test --allow-net --allow-env --no-check --no-lock tests/deno/*.ts", 31 | "example:simple": "deno run --allow-net examples/simple-api.ts", 32 | "example:streaming": "deno run --allow-net examples/streaming.ts", 33 | "example:voices": "deno run --allow-net examples/listVoices.ts", 34 | "example:universal": "deno run --allow-net examples/universal-detection.ts", 35 | "example:isomorphic": "deno run --allow-net examples/isomorphic-example.ts" 36 | }, 37 | "compilerOptions": { 38 | "lib": ["deno.window", "deno.worker"], 39 | "strict": true 40 | }, 41 | "unstable": ["sloppy-imports"], 42 | "fmt": { 43 | "files": { 44 | "include": ["src/", "examples/"], 45 | "exclude": ["dist/", "node_modules/"] 46 | } 47 | }, 48 | "lint": { 49 | "files": { 50 | "include": ["src/", "examples/"], 51 | "exclude": ["dist/", "node_modules/"] 52 | } 53 | }, 54 | "publish": { 55 | "exclude": [ 56 | "node_modules/", 57 | "dist/", 58 | "examples/", 59 | "docs/", 60 | ".git/", 61 | "*.log" 62 | ] 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /tests/isomorphic.test.js: -------------------------------------------------------------------------------- 1 | import { test, describe } from 'node:test'; 2 | import assert from 'node:assert'; 3 | import { IsomorphicCommunicate, IsomorphicVoicesManager, listVoicesIsomorphic } from '../dist/index.js'; 4 | 5 | describe('Isomorphic API', () => { 6 | test('IsomorphicCommunicate can be instantiated', () => { 7 | const communicate = new IsomorphicCommunicate('Hello, world!', { 8 | voice: 'en-US-EmmaMultilingualNeural' 9 | }); 10 | assert(communicate instanceof IsomorphicCommunicate, 'Should create IsomorphicCommunicate instance'); 11 | }); 12 | 13 | test('listVoicesIsomorphic returns array of voices', async () => { 14 | try { 15 | const voices = await listVoicesIsomorphic(); 16 | assert(Array.isArray(voices), 'Should return array'); 17 | assert(voices.length > 0, 'Should have voices'); 18 | 19 | const voice = voices[0]; 20 | assert(typeof voice.Name === 'string', 'Voice should have Name'); 21 | assert(typeof voice.ShortName === 'string', 'Voice should have ShortName'); 22 | } catch (error) { 23 | // If network/service is unavailable, just check error type 24 | assert(error instanceof Error, 'Should throw proper Error if service unavailable'); 25 | } 26 | }); 27 | 28 | test('IsomorphicVoicesManager can filter voices', async () => { 29 | try { 30 | const voicesManager = await IsomorphicVoicesManager.create(); 31 | 32 | const englishVoices = voicesManager.find({ Language: 'en' }); 33 | assert(Array.isArray(englishVoices), 'Should return array'); 34 | 35 | for (const voice of englishVoices) { 36 | assert(voice.Language === 'en', 'All voices should be English'); 37 | } 38 | } catch (error) { 39 | // If network/service is unavailable, just check error type 40 | assert(error instanceof Error, 'Should throw proper Error if service unavailable'); 41 | } 42 | }); 43 | 44 | test('IsomorphicCommunicate stream method exists', () => { 45 | const communicate = new IsomorphicCommunicate('Test', { 46 | voice: 'en-US-EmmaMultilingualNeural' 47 | }); 48 | 49 | assert(typeof communicate.stream === 'function', 'Should have stream method'); 50 | 51 | const stream = communicate.stream(); 52 | assert(typeof stream[Symbol.asyncIterator] === 'function', 'Should return async iterable'); 53 | }); 54 | }); -------------------------------------------------------------------------------- /src/browser-entry.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Browser-specific entry point for edge-tts-universal. 3 | * 4 | * This module exports APIs optimized specifically for browser environments, avoiding Node.js 5 | * dependencies and providing browser-native implementations where possible. 6 | * 7 | * Key features: 8 | * - Browser-optimized implementations 9 | * - No Node.js dependencies 10 | * - Web API compatibility 11 | * - Smaller bundle size compared to isomorphic entry 12 | * - Support for Web Workers and main thread 13 | * 14 | * Note: This entry point is subject to browser CORS policies when making requests 15 | * to the Microsoft Edge TTS service. Consider using a proxy server for production 16 | * applications if CORS becomes an issue. 17 | * 18 | * @example 19 | * ```typescript 20 | * import { EdgeTTS, listVoices } from '@edge-tts/universal/browser'; 21 | * 22 | * // Browser-optimized TTS 23 | * const tts = new EdgeTTS('Hello from the browser!', 'en-US-EmmaMultilingualNeural'); 24 | * const result = await tts.synthesize(); 25 | * 26 | * // Play the audio 27 | * const audio = new Audio(URL.createObjectURL(result.audio)); 28 | * audio.play(); 29 | * ``` 30 | * 31 | * @module BrowserEntry 32 | */ 33 | 34 | // Browser-only entry point - exports only browser-compatible APIs 35 | // Use this in environments where Node.js dependencies are not available 36 | 37 | // Export both the old EdgeTTSBrowser class and the new simplified browser API 38 | export { EdgeTTSBrowser } from './browser'; 39 | 40 | // Export the new simplified browser-specific API as the main EdgeTTS 41 | export { 42 | BrowserEdgeTTS as EdgeTTS, 43 | ProsodyOptions, 44 | WordBoundary, 45 | SynthesisResult, 46 | createVTT, 47 | createSRT 48 | } from './browser-simple'; 49 | 50 | // Export browser-specific implementations to avoid Node.js dependencies 51 | export { 52 | BrowserCommunicate as Communicate, 53 | BrowserCommunicateOptions as CommunicateOptions 54 | } from './browser-communicate'; 55 | 56 | export { 57 | BrowserVoicesManager as VoicesManager, 58 | listVoices, 59 | BrowserFetchError as FetchError 60 | } from './browser-voices'; 61 | 62 | export { BrowserDRM as DRM } from './browser-drm'; 63 | 64 | // SubMaker works everywhere as it doesn't have environment dependencies 65 | export { SubMaker } from './submaker'; 66 | 67 | // Common types and exceptions 68 | export * from './exceptions'; 69 | export * from './types'; -------------------------------------------------------------------------------- /src/exceptions.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Base exception class for all Edge TTS related errors. 3 | */ 4 | export class EdgeTTSException extends Error { 5 | constructor(message: string) { 6 | super(message); 7 | this.name = "EdgeTTSException"; 8 | } 9 | } 10 | 11 | /** 12 | * Exception raised when there's an error adjusting clock skew for API requests. 13 | * This typically occurs when the client and server clocks are significantly out of sync. 14 | */ 15 | export class SkewAdjustmentError extends EdgeTTSException { 16 | constructor(message: string) { 17 | super(message); 18 | this.name = "SkewAdjustmentError"; 19 | } 20 | } 21 | 22 | /** 23 | * Exception raised when an unknown response is received from the TTS service. 24 | * This indicates an unexpected message type or format that the client cannot handle. 25 | */ 26 | export class UnknownResponse extends EdgeTTSException { 27 | constructor(message: string) { 28 | super(message); 29 | this.name = "UnknownResponse"; 30 | } 31 | } 32 | 33 | /** 34 | * Exception raised when an unexpected response is received from the TTS service. 35 | * This indicates a response that doesn't match the expected protocol flow. 36 | */ 37 | export class UnexpectedResponse extends EdgeTTSException { 38 | constructor(message: string) { 39 | super(message); 40 | this.name = "UnexpectedResponse"; 41 | } 42 | } 43 | 44 | /** 45 | * Exception raised when no audio data is received during synthesis. 46 | * This typically indicates a problem with the synthesis request or service. 47 | */ 48 | export class NoAudioReceived extends EdgeTTSException { 49 | constructor(message: string) { 50 | super(message); 51 | this.name = "NoAudioReceived"; 52 | } 53 | } 54 | 55 | /** 56 | * Exception raised when there's an error with the WebSocket connection. 57 | * This can occur during connection establishment, data transmission, or connection closure. 58 | */ 59 | export class WebSocketError extends EdgeTTSException { 60 | constructor(message: string) { 61 | super(message); 62 | this.name = "WebSocketError"; 63 | } 64 | } 65 | 66 | /** 67 | * Exception raised when an invalid value is provided to a function or method. 68 | * This is typically used for input validation errors. 69 | */ 70 | export class ValueError extends EdgeTTSException { 71 | constructor(message: string) { 72 | super(message); 73 | this.name = "ValueError"; 74 | } 75 | } -------------------------------------------------------------------------------- /examples/NAMING_EXAMPLES.md: -------------------------------------------------------------------------------- 1 | # API Naming Examples 2 | 3 | This library supports both "Isomorphic" and "Universal" naming conventions for cross-platform APIs. **Universal is the preferred naming**. 4 | 5 | ## Preferred Universal Naming 6 | 7 | ```typescript 8 | // ✅ Preferred: Universal naming (clear and descriptive) 9 | import { 10 | UniversalEdgeTTS, 11 | UniversalCommunicate, 12 | UniversalVoicesManager, 13 | listVoicesUniversal 14 | } from 'edge-tts-universal'; 15 | 16 | // Simple API 17 | const tts = new UniversalEdgeTTS('Hello world', 'en-US-EmmaMultilingualNeural'); 18 | const result = await tts.synthesize(); 19 | 20 | // Streaming API 21 | const communicate = new UniversalCommunicate('Hello world'); 22 | for await (const chunk of communicate.stream()) { 23 | if (chunk.type === 'audio') { 24 | // Handle audio data 25 | } 26 | } 27 | 28 | // Voice management 29 | const voices = await listVoicesUniversal(); 30 | const voicesManager = await UniversalVoicesManager.create(); 31 | ``` 32 | 33 | ## Legacy Isomorphic Naming 34 | 35 | ```typescript 36 | // ⚠️ Legacy: Still supported but not recommended for new code 37 | import { 38 | IsomorphicEdgeTTS, 39 | IsomorphicCommunicate, 40 | IsomorphicVoicesManager, 41 | listVoicesIsomorphic 42 | } from 'edge-tts-universal'; 43 | 44 | // Same functionality, different naming 45 | const tts = new IsomorphicEdgeTTS('Hello world', 'en-US-EmmaMultilingualNeural'); 46 | const communicate = new IsomorphicCommunicate('Hello world'); 47 | const voices = await listVoicesIsomorphic(); 48 | ``` 49 | 50 | ## Platform-Specific APIs 51 | 52 | For platform-optimized code, you can still use platform-specific exports: 53 | 54 | ```typescript 55 | // Node.js optimized (with proxy support, etc.) 56 | import { EdgeTTS, Communicate, VoicesManager } from 'edge-tts-universal'; 57 | 58 | // Browser optimized (smaller bundle size) 59 | import { EdgeTTSBrowser, BrowserCommunicate } from 'edge-tts-universal/browser'; 60 | ``` 61 | 62 | ## Migration Guide 63 | 64 | If you're currently using Isomorphic naming, you can easily migrate: 65 | 66 | ```typescript 67 | // Old code 68 | import { IsomorphicCommunicate } from 'edge-tts-universal'; 69 | 70 | // New code (just change the import) 71 | import { UniversalCommunicate as Communicate } from 'edge-tts-universal'; 72 | // OR 73 | import { UniversalCommunicate } from 'edge-tts-universal'; 74 | ``` 75 | 76 | Both naming conventions will continue to work, but new projects should prefer the "Universal" naming for clarity. -------------------------------------------------------------------------------- /examples/simple-vs-advanced.ts: -------------------------------------------------------------------------------- 1 | import { EdgeTTS, createVTT, createSRT, Communicate } from '../dist/index.js'; 2 | 3 | async function simpleApiExample() { 4 | console.log('=== Simple API Example (like code 54.ts) ==='); 5 | 6 | // Simple one-shot synthesis 7 | const tts = new EdgeTTS( 8 | 'Hello, this is a simple text-to-speech example!', 9 | 'Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)', 10 | { 11 | rate: '+10%', 12 | volume: '+0%', 13 | pitch: '+0Hz' 14 | } 15 | ); 16 | 17 | try { 18 | const result = await tts.synthesize(); 19 | 20 | console.log('Audio generated:', result.audio.size, 'bytes'); 21 | console.log('Word boundaries:', result.subtitle.length); 22 | 23 | // Generate subtitles 24 | const vttSubtitles = createVTT(result.subtitle); 25 | const srtSubtitles = createSRT(result.subtitle); 26 | 27 | console.log('VTT Subtitles:\n', vttSubtitles.substring(0, 200) + '...'); 28 | console.log('SRT Subtitles:\n', srtSubtitles.substring(0, 200) + '...'); 29 | 30 | } catch (error) { 31 | console.error('Simple API Error:', error); 32 | } 33 | } 34 | 35 | async function advancedApiExample() { 36 | console.log('\n=== Advanced Streaming API Example ==='); 37 | 38 | // Advanced streaming synthesis with real-time processing 39 | const communicate = new Communicate( 40 | 'This is an advanced streaming example with real-time processing capabilities.', 41 | { 42 | voice: 'en-US-EmmaMultilingualNeural', 43 | rate: '+10%', 44 | volume: '+0%', 45 | pitch: '+0Hz' 46 | } 47 | ); 48 | 49 | try { 50 | const audioChunks: Buffer[] = []; 51 | let wordCount = 0; 52 | 53 | for await (const chunk of communicate.stream()) { 54 | if (chunk.type === 'audio' && chunk.data) { 55 | audioChunks.push(chunk.data); 56 | console.log(`Received audio chunk: ${chunk.data.length} bytes`); 57 | } else if (chunk.type === 'WordBoundary') { 58 | wordCount++; 59 | console.log(`Word ${wordCount}: "${chunk.text}" at ${chunk.offset}ns`); 60 | } 61 | } 62 | 63 | const totalAudioSize = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0); 64 | console.log(`Total audio: ${totalAudioSize} bytes, Words: ${wordCount}`); 65 | 66 | } catch (error) { 67 | console.error('Advanced API Error:', error); 68 | } 69 | } 70 | 71 | // Run both examples 72 | async function main() { 73 | await simpleApiExample(); 74 | await advancedApiExample(); 75 | } 76 | 77 | if (require.main === module) { 78 | main().catch(console.error); 79 | } -------------------------------------------------------------------------------- /src/isomorphic-entry.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Isomorphic/Universal entry point for edge-tts-universal. 3 | * 4 | * This module exports APIs that work consistently across both Node.js and browser environments, 5 | * providing maximum compatibility for text-to-speech functionality using Microsoft Edge's TTS service. 6 | * 7 | * Key features: 8 | * - Universal compatibility (Node.js, browsers, web workers) 9 | * - No platform-specific dependencies in the API surface 10 | * - Consistent behavior across environments 11 | * - Built-in proxy support for Node.js 12 | * - CORS-aware browser implementation 13 | * 14 | * @example 15 | * ```typescript 16 | * import { EdgeTTS, listVoices } from '@edge-tts/universal'; 17 | * 18 | * // Works in both Node.js and browsers 19 | * const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural'); 20 | * const result = await tts.synthesize(); 21 | * 22 | * // Get available voices 23 | * const voices = await listVoices(); 24 | * ``` 25 | * 26 | * @module IsomorphicEntry 27 | */ 28 | 29 | // Isomorphic/Universal entry point - exports only APIs that work in both Node.js and browsers 30 | // Use this for maximum compatibility across environments 31 | 32 | export { 33 | IsomorphicCommunicate as Communicate, 34 | IsomorphicCommunicateOptions as CommunicateOptions 35 | } from './isomorphic-communicate'; 36 | 37 | export { 38 | IsomorphicVoicesManager as VoicesManager, 39 | listVoices, 40 | FetchError 41 | } from './isomorphic-voices'; 42 | 43 | export { IsomorphicDRM as DRM } from './isomorphic-drm'; 44 | 45 | // Simple API using isomorphic backend (all from isomorphic-simple to avoid Node.js deps) 46 | export { 47 | IsomorphicEdgeTTS as EdgeTTS, 48 | ProsodyOptions, 49 | WordBoundary, 50 | SynthesisResult, 51 | createVTT, 52 | createSRT 53 | } from './isomorphic-simple'; 54 | 55 | // Utility for creating subtitles (works everywhere) 56 | export { SubMaker } from './submaker'; 57 | 58 | // Common types and exceptions 59 | export * from './exceptions'; 60 | export * from './types'; 61 | 62 | // Universal aliases (preferred naming convention) 63 | export { 64 | IsomorphicCommunicate as UniversalCommunicate, 65 | IsomorphicCommunicateOptions as UniversalCommunicateOptions 66 | } from './isomorphic-communicate'; 67 | 68 | export { 69 | IsomorphicVoicesManager as UniversalVoicesManager, 70 | listVoices as listVoicesUniversal, 71 | FetchError as UniversalFetchError 72 | } from './isomorphic-voices'; 73 | 74 | export { IsomorphicDRM as UniversalDRM } from './isomorphic-drm'; 75 | 76 | export { 77 | IsomorphicEdgeTTS as UniversalEdgeTTS 78 | } from './isomorphic-simple'; -------------------------------------------------------------------------------- /src/isomorphic-drm.ts: -------------------------------------------------------------------------------- 1 | import { TRUSTED_CLIENT_TOKEN } from './constants'; 2 | import { SkewAdjustmentError } from "./exceptions"; 3 | 4 | const WIN_EPOCH = 11644473600; 5 | const S_TO_NS = 1e9; 6 | 7 | /** 8 | * Isomorphic DRM class that works in both Node.js and browsers. 9 | * Uses appropriate crypto APIs based on the environment. 10 | */ 11 | export class IsomorphicDRM { 12 | private static clockSkewSeconds = 0.0; 13 | 14 | static adjClockSkewSeconds(skewSeconds: number) { 15 | IsomorphicDRM.clockSkewSeconds += skewSeconds; 16 | } 17 | 18 | static getUnixTimestamp(): number { 19 | return Date.now() / 1000 + IsomorphicDRM.clockSkewSeconds; 20 | } 21 | 22 | static parseRfc2616Date(date: string): number | null { 23 | try { 24 | return new Date(date).getTime() / 1000; 25 | } catch (e) { 26 | return null; 27 | } 28 | } 29 | 30 | static handleClientResponseError(response: { status: number; headers: any }) { 31 | let serverDate: string | null = null; 32 | 33 | if ('headers' in response && typeof response.headers === 'object') { 34 | if ('get' in response.headers && typeof response.headers.get === 'function') { 35 | // Fetch Response object 36 | serverDate = response.headers.get("date"); 37 | } else { 38 | // Plain object with headers 39 | const headers = response.headers as Record; 40 | serverDate = headers["date"] || headers["Date"]; 41 | } 42 | } 43 | 44 | if (!serverDate) { 45 | throw new SkewAdjustmentError("No server date in headers."); 46 | } 47 | const serverDateParsed = IsomorphicDRM.parseRfc2616Date(serverDate); 48 | if (serverDateParsed === null) { 49 | throw new SkewAdjustmentError(`Failed to parse server date: ${serverDate}`); 50 | } 51 | const clientDate = IsomorphicDRM.getUnixTimestamp(); 52 | IsomorphicDRM.adjClockSkewSeconds(serverDateParsed - clientDate); 53 | } 54 | 55 | static async generateSecMsGec(): Promise { 56 | let ticks = IsomorphicDRM.getUnixTimestamp(); 57 | ticks += WIN_EPOCH; 58 | ticks -= ticks % 300; 59 | ticks *= S_TO_NS / 100; 60 | 61 | const strToHash = `${ticks.toFixed(0)}${TRUSTED_CLIENT_TOKEN}`; 62 | 63 | // Use Web Crypto API directly - available in both Node.js 16+ and browsers 64 | if (!globalThis.crypto || !globalThis.crypto.subtle) { 65 | throw new Error('Web Crypto API not available'); 66 | } 67 | 68 | const encoder = new TextEncoder(); 69 | const data = encoder.encode(strToHash); 70 | const hashBuffer = await globalThis.crypto.subtle.digest('SHA-256', data); 71 | const hashArray = Array.from(new Uint8Array(hashBuffer)); 72 | return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').toUpperCase(); 73 | } 74 | } -------------------------------------------------------------------------------- /tests/simple-api.test.js: -------------------------------------------------------------------------------- 1 | import { test, describe } from 'node:test'; 2 | import assert from 'node:assert'; 3 | import { EdgeTTS, createVTT, createSRT } from '../dist/index.js'; 4 | 5 | describe('Simple API', () => { 6 | test('EdgeTTS can be instantiated with text and voice', () => { 7 | const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural'); 8 | assert(tts instanceof EdgeTTS, 'Should create EdgeTTS instance'); 9 | }); 10 | 11 | test('EdgeTTS synthesize method returns proper result structure', async () => { 12 | const tts = new EdgeTTS('Hello, test!', 'en-US-EmmaMultilingualNeural'); 13 | 14 | try { 15 | const result = await tts.synthesize(); 16 | 17 | // Check result structure 18 | assert(typeof result === 'object', 'Result should be an object'); 19 | assert(result.audio instanceof Blob, 'Result should have audio Blob'); 20 | assert(Array.isArray(result.subtitle), 'Result should have subtitle array'); 21 | 22 | // Check audio 23 | assert(result.audio.size > 0, 'Audio should have content'); 24 | assert(result.audio.type.includes('audio'), 'Audio should have audio mime type'); 25 | 26 | // Check subtitles structure if present 27 | if (result.subtitle.length > 0) { 28 | const sub = result.subtitle[0]; 29 | assert(typeof sub.offset === 'number', 'Subtitle should have offset'); 30 | assert(typeof sub.duration === 'number', 'Subtitle should have duration'); 31 | assert(typeof sub.text === 'string', 'Subtitle should have text'); 32 | } 33 | } catch (error) { 34 | // If network/service is unavailable, just check that error is reasonable 35 | assert(error instanceof Error, 'Should throw proper Error if service unavailable'); 36 | } 37 | }); 38 | 39 | test('createVTT and createSRT work with subtitle data', () => { 40 | const mockSubtitles = [ 41 | { offset: 0, duration: 1000000, text: 'Hello' }, 42 | { offset: 1000000, duration: 1000000, text: 'world' } 43 | ]; 44 | 45 | const vtt = createVTT(mockSubtitles); 46 | const srt = createSRT(mockSubtitles); 47 | 48 | assert(typeof vtt === 'string', 'createVTT should return string'); 49 | assert(typeof srt === 'string', 'createSRT should return string'); 50 | assert(vtt.includes('WEBVTT'), 'VTT should contain WEBVTT header'); 51 | assert(srt.includes('Hello'), 'SRT should contain subtitle text'); 52 | }); 53 | 54 | test('EdgeTTS accepts prosody options', () => { 55 | const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural', { 56 | rate: '+10%', 57 | volume: '+0%', 58 | pitch: '+5Hz' 59 | }); 60 | 61 | assert(tts instanceof EdgeTTS, 'Should create EdgeTTS instance with prosody options'); 62 | }); 63 | }); -------------------------------------------------------------------------------- /src/drm.ts: -------------------------------------------------------------------------------- 1 | import { createHash } from 'crypto'; 2 | import { TRUSTED_CLIENT_TOKEN } from './constants'; 3 | import { SkewAdjustmentError } from "./exceptions"; 4 | import { AxiosError } from "axios"; 5 | 6 | const WIN_EPOCH = 11644473600; 7 | const S_TO_NS = 1e9; 8 | 9 | /** 10 | * Digital Rights Management (DRM) class for handling authentication with Microsoft Edge TTS service. 11 | * Manages clock synchronization and security token generation for Node.js environments. 12 | */ 13 | export class DRM { 14 | private static clockSkewSeconds = 0.0; 15 | 16 | /** 17 | * Adjusts the clock skew to synchronize with server time. 18 | * @param skewSeconds - Number of seconds to adjust the clock by 19 | */ 20 | static adjClockSkewSeconds(skewSeconds: number) { 21 | DRM.clockSkewSeconds += skewSeconds; 22 | } 23 | 24 | /** 25 | * Gets the current Unix timestamp adjusted for clock skew. 26 | * @returns Unix timestamp in seconds 27 | */ 28 | static getUnixTimestamp(): number { 29 | return Date.now() / 1000 + DRM.clockSkewSeconds; 30 | } 31 | 32 | /** 33 | * Parses an RFC 2616 date string into a Unix timestamp. 34 | * @param date - RFC 2616 formatted date string 35 | * @returns Unix timestamp in seconds, or null if parsing fails 36 | */ 37 | static parseRfc2616Date(date: string): number | null { 38 | try { 39 | // The python version uses strptime with %Z, but it mentions it's not quite right. 40 | // JS's Date parsing is generally good with RFC 2616 dates. 41 | // And since it's UTC, it should be fine. 42 | return new Date(date).getTime() / 1000; 43 | } catch (e) { 44 | return null; 45 | } 46 | } 47 | 48 | /** 49 | * Handles client response errors by adjusting clock skew based on server date. 50 | * @param e - Axios error containing server response headers 51 | * @throws {SkewAdjustmentError} If server date is missing or invalid 52 | */ 53 | static handleClientResponseError(e: AxiosError) { 54 | if (!e.response || !e.response.headers) { 55 | throw new SkewAdjustmentError("No server date in headers."); 56 | } 57 | const serverDate = e.response.headers["date"]; 58 | if (!serverDate || typeof serverDate !== 'string') { 59 | throw new SkewAdjustmentError("No server date in headers."); 60 | } 61 | const serverDateParsed = DRM.parseRfc2616Date(serverDate); 62 | if (serverDateParsed === null) { 63 | throw new SkewAdjustmentError(`Failed to parse server date: ${serverDate}`); 64 | } 65 | const clientDate = DRM.getUnixTimestamp(); 66 | DRM.adjClockSkewSeconds(serverDateParsed - clientDate); 67 | } 68 | 69 | /** 70 | * Generates the Sec-MS-GEC security token required for API authentication. 71 | * @returns Uppercase hexadecimal SHA-256 hash string 72 | */ 73 | static generateSecMsGec(): string { 74 | let ticks = DRM.getUnixTimestamp(); 75 | ticks += WIN_EPOCH; 76 | ticks -= ticks % 300; 77 | ticks *= S_TO_NS / 100; 78 | 79 | const strToHash = `${ticks.toFixed(0)}${TRUSTED_CLIENT_TOKEN}`; 80 | return createHash('sha256').update(strToHash, 'ascii').digest('hex').toUpperCase(); 81 | } 82 | } -------------------------------------------------------------------------------- /src/tts_config.ts: -------------------------------------------------------------------------------- 1 | import { ValueError } from "./exceptions"; 2 | 3 | /** 4 | * Interface defining the configuration options for TTS synthesis. 5 | */ 6 | export interface ITTSConfig { 7 | /** Voice name to use for synthesis */ 8 | voice: string; 9 | /** Speech rate adjustment (e.g., "+20%", "-10%") */ 10 | rate: string; 11 | /** Volume level adjustment (e.g., "+50%", "-25%") */ 12 | volume: string; 13 | /** Pitch adjustment in Hz (e.g., "+5Hz", "-10Hz") */ 14 | pitch: string; 15 | } 16 | 17 | /** 18 | * Configuration class for TTS synthesis parameters. 19 | * Handles voice name normalization and parameter validation. 20 | * 21 | * @example 22 | * ```typescript 23 | * const config = new TTSConfig({ 24 | * voice: 'en-US-EmmaMultilingualNeural', 25 | * rate: '+20%', 26 | * volume: '+10%', 27 | * pitch: '+5Hz' 28 | * }); 29 | * ``` 30 | */ 31 | export class TTSConfig implements ITTSConfig { 32 | public voice: string; 33 | public rate: string; 34 | public volume: string; 35 | public pitch: string; 36 | 37 | /** 38 | * Creates a new TTSConfig instance with the specified parameters. 39 | * 40 | * @param options - Configuration options 41 | * @param options.voice - Voice name (supports both short and full formats) 42 | * @param options.rate - Speech rate adjustment (default: "+0%") 43 | * @param options.volume - Volume adjustment (default: "+0%") 44 | * @param options.pitch - Pitch adjustment (default: "+0Hz") 45 | * @throws {ValueError} If any parameter has an invalid format 46 | */ 47 | constructor({ 48 | voice, 49 | rate = "+0%", 50 | volume = "+0%", 51 | pitch = "+0Hz", 52 | }: { 53 | voice: string, 54 | rate?: string, 55 | volume?: string, 56 | pitch?: string, 57 | }) { 58 | this.voice = voice; 59 | this.rate = rate; 60 | this.volume = volume; 61 | this.pitch = pitch; 62 | 63 | this.validate(); 64 | } 65 | 66 | private validate() { 67 | // Voice validation and transformation 68 | const match = /^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$/.exec(this.voice); 69 | if (match) { 70 | const [, lang] = match; 71 | let [, , region, name] = match; 72 | if (name.includes('-')) { 73 | const parts = name.split('-'); 74 | region += `-${parts[0]}`; 75 | name = parts[1]; 76 | } 77 | this.voice = `Microsoft Server Speech Text to Speech Voice (${lang}-${region}, ${name})`; 78 | } 79 | 80 | TTSConfig.validateStringParam( 81 | "voice", 82 | this.voice, 83 | /^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$/ 84 | ); 85 | TTSConfig.validateStringParam("rate", this.rate, /^[+-]\d+%$/); 86 | TTSConfig.validateStringParam("volume", this.volume, /^[+-]\d+%$/); 87 | TTSConfig.validateStringParam("pitch", this.pitch, /^[+-]\d+Hz$/); 88 | } 89 | 90 | private static validateStringParam(paramName: string, paramValue: string, pattern: RegExp) { 91 | if (typeof paramValue !== 'string') { 92 | throw new TypeError(`${paramName} must be a string`); 93 | } 94 | if (!pattern.test(paramValue)) { 95 | throw new ValueError(`Invalid ${paramName} '${paramValue}'.`); 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # vitepress build output 108 | **/.vitepress/dist 109 | 110 | # vitepress cache directory 111 | **/.vitepress/cache 112 | 113 | # Docusaurus cache and generated files 114 | .docusaurus 115 | 116 | # Serverless directories 117 | .serverless/ 118 | 119 | # FuseBox cache 120 | .fusebox/ 121 | 122 | # DynamoDB Local files 123 | .dynamodb/ 124 | 125 | # TernJS port file 126 | .tern-port 127 | 128 | # Stores VSCode versions used for testing VSCode extensions 129 | .vscode-test 130 | 131 | # yarn v2 132 | .yarn/cache 133 | .yarn/unplugged 134 | .yarn/build-state.yml 135 | .yarn/install-state.gz 136 | .pnp.* 137 | 138 | node_modules/ 139 | dist/ 140 | *.log 141 | *.mp3 142 | *.wav 143 | *.srt 144 | .env 145 | .DS_Store 146 | coverage/ 147 | .nyc_output/ 148 | *.tgz 149 | 150 | 151 | *.mp3 152 | bin/ 153 | 154 | docs/ 155 | .notes/ 156 | 157 | CLAUDE.md 158 | .claude/ 159 | .cursorignore 160 | .cursor/ 161 | 162 | # Deno lockfile - let CI generate fresh 163 | deno.lock 164 | 165 | *.vtt -------------------------------------------------------------------------------- /examples/naming-compatibility.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Naming Compatibility Test 3 | * Verifies that both Universal and Isomorphic naming work identically 4 | */ 5 | 6 | import { 7 | // Universal naming (preferred) 8 | UniversalCommunicate, 9 | UniversalVoicesManager, 10 | UniversalEdgeTTS, 11 | listVoicesUniversal, 12 | 13 | // Isomorphic naming (legacy) 14 | IsomorphicCommunicate, 15 | IsomorphicVoicesManager, 16 | IsomorphicEdgeTTS, 17 | listVoicesIsomorphic 18 | } from '../dist/index.js'; 19 | 20 | async function testNamingCompatibility() { 21 | console.log('🔄 Testing naming compatibility between Universal and Isomorphic APIs...'); 22 | 23 | try { 24 | // Test that both naming conventions are available 25 | console.log('✅ Universal imports available:', { 26 | UniversalCommunicate: typeof UniversalCommunicate, 27 | UniversalVoicesManager: typeof UniversalVoicesManager, 28 | UniversalEdgeTTS: typeof UniversalEdgeTTS, 29 | listVoicesUniversal: typeof listVoicesUniversal 30 | }); 31 | 32 | console.log('✅ Isomorphic imports available:', { 33 | IsomorphicCommunicate: typeof IsomorphicCommunicate, 34 | IsomorphicVoicesManager: typeof IsomorphicVoicesManager, 35 | IsomorphicEdgeTTS: typeof IsomorphicEdgeTTS, 36 | listVoicesIsomorphic: typeof listVoicesIsomorphic 37 | }); 38 | 39 | // Verify they are the same underlying classes 40 | console.log('🔍 Verifying aliases point to same implementations...'); 41 | console.log('UniversalCommunicate === IsomorphicCommunicate:', UniversalCommunicate === IsomorphicCommunicate); 42 | console.log('UniversalVoicesManager === IsomorphicVoicesManager:', UniversalVoicesManager === IsomorphicVoicesManager); 43 | console.log('UniversalEdgeTTS === IsomorphicEdgeTTS:', UniversalEdgeTTS === IsomorphicEdgeTTS); 44 | console.log('listVoicesUniversal === listVoicesIsomorphic:', listVoicesUniversal === listVoicesIsomorphic); 45 | 46 | // Test instantiation with both naming conventions 47 | const universalTTS = new UniversalEdgeTTS('Test', 'en-US-EmmaMultilingualNeural'); 48 | const isomorphicTTS = new IsomorphicEdgeTTS('Test', 'en-US-EmmaMultilingualNeural'); 49 | 50 | console.log('✅ Both naming conventions can be instantiated'); 51 | console.log('Universal TTS instance:', universalTTS.constructor.name); 52 | console.log('Isomorphic TTS instance:', isomorphicTTS.constructor.name); 53 | 54 | // Test that they behave identically 55 | const universalComm = new UniversalCommunicate('Test'); 56 | const isomorphicComm = new IsomorphicCommunicate('Test'); 57 | 58 | console.log('✅ Both communication classes instantiated'); 59 | console.log('Universal Communicate:', universalComm.constructor.name); 60 | console.log('Isomorphic Communicate:', isomorphicComm.constructor.name); 61 | 62 | console.log('🎉 All naming compatibility tests passed!'); 63 | console.log('💡 Recommendation: Use Universal naming for new projects'); 64 | 65 | } catch (error) { 66 | console.error('❌ Naming compatibility test failed:', error); 67 | } 68 | } 69 | 70 | // ESM equivalent check 71 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) { 72 | testNamingCompatibility().catch(console.error); 73 | } else if (typeof globalThis !== 'undefined') { 74 | (globalThis as any).runNamingCompatibilityTest = testNamingCompatibility; 75 | } 76 | 77 | export { testNamingCompatibility }; -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Represents a chunk of data received during TTS streaming. 3 | * Can contain either audio data or word boundary metadata. 4 | */ 5 | export type TTSChunk = { 6 | /** The type of chunk - either audio data or word boundary metadata */ 7 | type: "audio" | "WordBoundary"; 8 | /** Raw audio data buffer (present for audio chunks) */ 9 | data?: Buffer; 10 | /** Duration of the word in 100-nanosecond units (present for WordBoundary chunks) */ 11 | duration?: number; 12 | /** Offset from the beginning in 100-nanosecond units (present for WordBoundary chunks) */ 13 | offset?: number; 14 | /** The spoken text (present for WordBoundary chunks) */ 15 | text?: string; 16 | }; 17 | 18 | /** 19 | * Voice characteristics and personality tags from the Microsoft Edge TTS service. 20 | */ 21 | export type VoiceTag = { 22 | /** Content categories that the voice is optimized for */ 23 | ContentCategories: ( 24 | | "Cartoon" 25 | | "Conversation" 26 | | "Copilot" 27 | | "Dialect" 28 | | "General" 29 | | "News" 30 | | "Novel" 31 | | "Sports" 32 | )[]; 33 | /** Personality traits that describe the voice's characteristics */ 34 | VoicePersonalities: ( 35 | | "Approachable" 36 | | "Authentic" 37 | | "Authority" 38 | | "Bright" 39 | | "Caring" 40 | | "Casual" 41 | | "Cheerful" 42 | | "Clear" 43 | | "Comfort" 44 | | "Confident" 45 | | "Considerate" 46 | | "Conversational" 47 | | "Cute" 48 | | "Expressive" 49 | | "Friendly" 50 | | "Honest" 51 | | "Humorous" 52 | | "Lively" 53 | | "Passion" 54 | | "Pleasant" 55 | | "Positive" 56 | | "Professional" 57 | | "Rational" 58 | | "Reliable" 59 | | "Sincere" 60 | | "Sunshine" 61 | | "Warm" 62 | )[]; 63 | }; 64 | 65 | /** 66 | * Complete voice definition as returned by the Microsoft Edge TTS service. 67 | */ 68 | export type Voice = { 69 | /** Full voice name identifier */ 70 | Name: string; 71 | /** Short name for the voice */ 72 | ShortName: string; 73 | /** Gender of the voice */ 74 | Gender: "Female" | "Male"; 75 | /** Locale code (e.g., "en-US", "zh-CN") */ 76 | Locale: string; 77 | /** Recommended audio codec for this voice */ 78 | SuggestedCodec: "audio-24khz-48kbitrate-mono-mp3"; 79 | /** Human-readable friendly name */ 80 | FriendlyName: string; 81 | /** Voice availability status */ 82 | Status: "GA"; 83 | /** Voice characteristics and personality traits */ 84 | VoiceTag: VoiceTag; 85 | }; 86 | 87 | /** 88 | * Extended voice type with language information for the VoicesManager. 89 | */ 90 | export type VoicesManagerVoice = Voice & { 91 | /** Language code extracted from the locale (e.g., "en" from "en-US") */ 92 | Language: string; 93 | }; 94 | 95 | /** 96 | * Filter criteria for finding voices using the VoicesManager. 97 | */ 98 | export type VoicesManagerFind = { 99 | /** Filter by voice gender */ 100 | Gender?: "Female" | "Male"; 101 | /** Filter by locale code */ 102 | Locale?: string; 103 | /** Filter by language code */ 104 | Language?: string; 105 | } 106 | 107 | /** 108 | * Internal state tracking for the Communicate class during streaming. 109 | */ 110 | export type CommunicateState = { 111 | /** Buffer for partial text data */ 112 | partialText: Buffer; 113 | /** Timing offset compensation for multi-request scenarios */ 114 | offsetCompensation: number; 115 | /** Last recorded duration offset for timing calculations */ 116 | lastDurationOffset: number; 117 | /** Flag indicating if the stream method has been called */ 118 | streamWasCalled: boolean; 119 | }; -------------------------------------------------------------------------------- /src/submaker.ts: -------------------------------------------------------------------------------- 1 | import { TTSChunk } from "./types"; 2 | import { ValueError } from "./exceptions"; 3 | 4 | interface Cue { 5 | index: number; 6 | start: number; // in seconds 7 | end: number; // in seconds 8 | content: string; 9 | } 10 | 11 | function formatTime(seconds: number): string { 12 | const h = Math.floor(seconds / 3600); 13 | const m = Math.floor((seconds % 3600) / 60); 14 | const s = Math.floor(seconds % 60); 15 | const ms = Math.round((seconds - Math.floor(seconds)) * 1000); 16 | 17 | const pad = (num: number, size = 2) => num.toString().padStart(size, '0'); 18 | 19 | return `${pad(h)}:${pad(m)}:${pad(s)},${pad(ms, 3)}`; 20 | } 21 | 22 | /** 23 | * Utility class for generating SRT subtitles from WordBoundary events. 24 | * 25 | * @example 26 | * ```typescript 27 | * const subMaker = new SubMaker(); 28 | * 29 | * for await (const chunk of communicate.stream()) { 30 | * if (chunk.type === 'WordBoundary') { 31 | * subMaker.feed(chunk); 32 | * } 33 | * } 34 | * 35 | * const srt = subMaker.getSrt(); 36 | * ``` 37 | */ 38 | export class SubMaker { 39 | private cues: Cue[] = []; 40 | 41 | /** 42 | * Adds a WordBoundary chunk to the subtitle maker. 43 | * 44 | * @param msg - Must be a WordBoundary type chunk with offset, duration, and text 45 | * @throws {ValueError} If chunk is not a WordBoundary with required fields 46 | */ 47 | feed(msg: TTSChunk): void { 48 | if (msg.type !== 'WordBoundary' || msg.offset === undefined || msg.duration === undefined || msg.text === undefined) { 49 | throw new ValueError("Invalid message type, expected 'WordBoundary' with offset, duration and text"); 50 | } 51 | 52 | // offset and duration are in 100-nanosecond intervals. 53 | // srt timestamps are in seconds. 1s = 10^7 * 100ns 54 | const start = msg.offset / 1e7; 55 | const end = (msg.offset + msg.duration) / 1e7; 56 | 57 | this.cues.push({ 58 | index: this.cues.length + 1, 59 | start: start, 60 | end: end, 61 | content: msg.text, 62 | }); 63 | } 64 | 65 | /** 66 | * Merges consecutive cues to create subtitle entries with multiple words. 67 | * This is useful for creating more readable subtitles instead of word-by-word display. 68 | * 69 | * @param words - Maximum number of words per merged cue 70 | * @throws {ValueError} If words parameter is invalid 71 | */ 72 | mergeCues(words: number): void { 73 | if (words <= 0) { 74 | throw new ValueError("Invalid number of words to merge, expected > 0"); 75 | } 76 | if (this.cues.length === 0) { 77 | return; 78 | } 79 | 80 | const newCues: Cue[] = []; 81 | let currentCue: Cue = this.cues[0]; 82 | 83 | for (const cue of this.cues.slice(1)) { 84 | if (currentCue.content.split(' ').length < words) { 85 | currentCue = { 86 | ...currentCue, 87 | end: cue.end, 88 | content: `${currentCue.content} ${cue.content}`, 89 | }; 90 | } else { 91 | newCues.push(currentCue); 92 | currentCue = cue; 93 | } 94 | } 95 | newCues.push(currentCue); 96 | 97 | // re-index 98 | this.cues = newCues.map((cue, i) => ({ ...cue, index: i + 1 })); 99 | } 100 | 101 | /** 102 | * Returns the subtitles in SRT format. 103 | * 104 | * @returns SRT formatted subtitles 105 | */ 106 | getSrt(): string { 107 | return this.cues.map(cue => { 108 | return `${cue.index}\r\n${formatTime(cue.start)} --> ${formatTime(cue.end)}\r\n${cue.content}\r\n`; 109 | }).join('\r\n'); 110 | } 111 | 112 | toString(): string { 113 | return this.getSrt(); 114 | } 115 | } -------------------------------------------------------------------------------- /src/voices.ts: -------------------------------------------------------------------------------- 1 | import axios, { AxiosError, AxiosProxyConfig } from 'axios'; 2 | import { SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST_URL } from './constants'; 3 | import { DRM } from './drm'; 4 | import { Voice, VoicesManagerFind, VoicesManagerVoice } from './types'; 5 | 6 | function buildProxyConfig(proxy: string): AxiosProxyConfig | false { 7 | try { 8 | const proxyUrl = new URL(proxy); 9 | return { 10 | host: proxyUrl.hostname, 11 | port: parseInt(proxyUrl.port), 12 | protocol: proxyUrl.protocol, 13 | }; 14 | } catch (e) { 15 | // if proxy is not a valid URL, just ignore it. 16 | return false; 17 | } 18 | } 19 | 20 | async function _listVoices(proxy?: string): Promise { 21 | const url = `${VOICE_LIST_URL}&Sec-MS-GEC=${DRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}`; 22 | const response = await axios.get(url, { 23 | headers: VOICE_HEADERS, 24 | proxy: proxy ? buildProxyConfig(proxy) : false, 25 | }); 26 | 27 | const data = response.data; 28 | 29 | for (const voice of data) { 30 | voice.VoiceTag.ContentCategories = voice.VoiceTag.ContentCategories.map(c => c.trim() as any); 31 | voice.VoiceTag.VoicePersonalities = voice.VoiceTag.VoicePersonalities.map(p => p.trim() as any); 32 | } 33 | 34 | return data; 35 | } 36 | 37 | /** 38 | * Fetches all available voices from the Microsoft Edge TTS service. 39 | * 40 | * @param proxy - Optional proxy URL for the request 41 | * @returns Promise resolving to array of available voices 42 | */ 43 | export async function listVoices(proxy?: string): Promise { 44 | try { 45 | return await _listVoices(proxy); 46 | } catch (e) { 47 | if (e instanceof AxiosError && e.response?.status === 403) { 48 | DRM.handleClientResponseError(e); 49 | return await _listVoices(proxy); 50 | } 51 | throw e; 52 | } 53 | } 54 | 55 | /** 56 | * Utility class for finding and filtering available voices. 57 | * 58 | * @example 59 | * ```typescript 60 | * const voicesManager = await VoicesManager.create(); 61 | * const englishVoices = voicesManager.find({ Language: 'en' }); 62 | * ``` 63 | */ 64 | export class VoicesManager { 65 | private voices: VoicesManagerVoice[] = []; 66 | private calledCreate = false; 67 | 68 | /** 69 | * Creates a new VoicesManager instance. 70 | * 71 | * @param customVoices - Optional custom voice list instead of fetching from API 72 | * @param proxy - Optional proxy URL for API requests 73 | * @returns Promise resolving to VoicesManager instance 74 | */ 75 | public static async create(customVoices?: Voice[], proxy?: string): Promise { 76 | const manager = new VoicesManager(); 77 | const voices = customVoices ?? await listVoices(proxy); 78 | manager.voices = voices.map(voice => ({ 79 | ...voice, 80 | Language: voice.Locale.split('-')[0], 81 | })); 82 | manager.calledCreate = true; 83 | return manager; 84 | } 85 | 86 | /** 87 | * Finds voices matching the specified criteria. 88 | * 89 | * @param filter - Filter criteria for voice selection 90 | * @returns Array of voices matching the filter 91 | * @throws {Error} If called before create() 92 | */ 93 | public find(filter: VoicesManagerFind): VoicesManagerVoice[] { 94 | if (!this.calledCreate) { 95 | throw new Error('VoicesManager.find() called before VoicesManager.create()'); 96 | } 97 | 98 | return this.voices.filter(voice => { 99 | return Object.entries(filter).every(([key, value]) => { 100 | return voice[key as keyof VoicesManagerFind] === value; 101 | }); 102 | }); 103 | } 104 | } -------------------------------------------------------------------------------- /src/webworker-entry.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Web Worker entry point for edge-tts-universal. 3 | * 4 | * This module exports APIs specifically designed for Web Worker environments, providing 5 | * text-to-speech functionality that works in background threads without blocking the main UI. 6 | * 7 | * Key features: 8 | * - Web Worker compatibility 9 | * - No DOM dependencies 10 | * - Background processing capabilities 11 | * - Message passing utilities for TTS results 12 | * - Isomorphic APIs that work in worker contexts 13 | * 14 | * Web Workers provide an ideal environment for TTS processing as they: 15 | * - Don't block the main UI thread 16 | * - Have access to fetch and WebSocket APIs 17 | * - Can handle large audio data without freezing the page 18 | * - Support streaming TTS processing 19 | * 20 | * @example 21 | * ```typescript 22 | * // In a Web Worker file 23 | * import { EdgeTTS, postAudioMessage, isWebWorker } from '@edge-tts/universal/webworker'; 24 | * 25 | * if (isWebWorker()) { 26 | * self.addEventListener('message', async (event) => { 27 | * if (event.data.type === 'synthesize') { 28 | * const tts = new EdgeTTS(event.data.text, event.data.voice); 29 | * const result = await tts.synthesize(); 30 | * postAudioMessage(result.audio, result.subtitle); 31 | * } 32 | * }); 33 | * } 34 | * ``` 35 | * 36 | * @module WebWorkerEntry 37 | */ 38 | 39 | // Web Worker entry point - exports only APIs that work in Web Worker environments 40 | // Web Workers don't have access to DOM but do have Web APIs like fetch and WebSocket 41 | 42 | export { 43 | IsomorphicCommunicate as Communicate, 44 | IsomorphicCommunicateOptions as CommunicateOptions 45 | } from './isomorphic-communicate'; 46 | 47 | export { 48 | IsomorphicVoicesManager as VoicesManager, 49 | listVoices, 50 | FetchError 51 | } from './isomorphic-voices'; 52 | 53 | export { IsomorphicDRM as DRM } from './isomorphic-drm'; 54 | 55 | // Simple API using isomorphic backend (works in Web Workers) 56 | // Import everything from isomorphic-simple to avoid Node.js dependencies 57 | export { 58 | IsomorphicEdgeTTS as EdgeTTS, 59 | ProsodyOptions, 60 | WordBoundary, 61 | SynthesisResult, 62 | createVTT, 63 | createSRT 64 | } from './isomorphic-simple'; 65 | 66 | // Utility for creating subtitles (works everywhere) 67 | export { SubMaker } from './submaker'; 68 | 69 | // Common types and exceptions 70 | export * from './exceptions'; 71 | export * from './types'; 72 | 73 | // Web Worker specific utilities 74 | /** 75 | * Detects if the current environment is a Web Worker. 76 | * 77 | * @returns True if running in a Web Worker context, false otherwise 78 | */ 79 | export function isWebWorker(): boolean { 80 | return typeof (globalThis as any).importScripts === 'function' && 81 | typeof (globalThis as any).WorkerGlobalScope !== 'undefined'; 82 | } 83 | 84 | /** 85 | * Posts a TTS result message to the main thread from a Web Worker. 86 | * This is a convenience function for sending audio and subtitle data 87 | * back to the main thread after TTS processing is complete. 88 | * 89 | * @param audio - The synthesized audio as a Blob 90 | * @param subtitle - Array of subtitle/word boundary data 91 | * @throws {Warning} Logs a warning if called outside Web Worker context 92 | */ 93 | export function postAudioMessage(audio: Blob, subtitle: any[]) { 94 | if (isWebWorker()) { 95 | // In a Web Worker, post message to main thread 96 | (globalThis as any).postMessage({ 97 | type: 'tts-result', 98 | audio: audio, 99 | subtitle: subtitle 100 | }); 101 | } else { 102 | console.warn('postAudioMessage should only be called in Web Worker context'); 103 | } 104 | } -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Main entry point for edge-tts-universal (Node.js optimized). 3 | * 4 | * This module provides the complete API surface for text-to-speech functionality 5 | * using Microsoft Edge's TTS service. It includes both Node.js-specific optimized 6 | * implementations and universal/isomorphic APIs for cross-platform compatibility. 7 | * 8 | * Key features: 9 | * - Node.js optimized implementations with full feature set 10 | * - Proxy support for enterprise environments 11 | * - Comprehensive voice management 12 | * - Streaming and simple APIs 13 | * - Subtitle generation utilities 14 | * - Cross-platform compatibility layers 15 | * 16 | * @example 17 | * ```typescript 18 | * import { EdgeTTS, listVoices, Communicate } from 'edge-tts-universal'; 19 | * 20 | * // Simple API 21 | * const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural'); 22 | * const result = await tts.synthesize(); 23 | * 24 | * // Streaming API 25 | * const communicate = new Communicate('Hello, world!'); 26 | * for await (const chunk of communicate.stream()) { 27 | * if (chunk.type === 'audio') { 28 | * // Handle audio data 29 | * } 30 | * } 31 | * 32 | * // Voice management 33 | * const voices = await listVoices(); 34 | * ``` 35 | * 36 | * @module MainEntry 37 | */ 38 | 39 | // Node.js-specific API (uses axios, Node.js crypto, etc.) 40 | export { Communicate, CommunicateOptions } from './communicate'; 41 | export { SubMaker } from './submaker'; 42 | export { VoicesManager, listVoices } from './voices'; 43 | 44 | // Simple API (works in both Node.js and browsers when using appropriate exports) 45 | export { EdgeTTS, ProsodyOptions, WordBoundary, SynthesisResult, createVTT, createSRT, UniversalEdgeTTS } from './simple'; 46 | 47 | // Universal/Isomorphic API (works in both Node.js and browsers) 48 | export { 49 | IsomorphicCommunicate, 50 | IsomorphicCommunicateOptions 51 | } from './isomorphic-communicate'; 52 | export { 53 | IsomorphicVoicesManager, 54 | listVoices as listVoicesIsomorphic, 55 | FetchError 56 | } from './isomorphic-voices'; 57 | export { IsomorphicDRM } from './isomorphic-drm'; 58 | 59 | // Simple isomorphic API 60 | export { 61 | IsomorphicEdgeTTS, 62 | ProsodyOptions as IsomorphicProsodyOptions, 63 | WordBoundary as IsomorphicWordBoundary, 64 | SynthesisResult as IsomorphicSynthesisResult, 65 | createVTT as createVTTIsomorphic, 66 | createSRT as createSRTIsomorphic 67 | } from './isomorphic-simple'; 68 | 69 | // Universal aliases (preferred naming) 70 | export { 71 | IsomorphicCommunicate as UniversalCommunicate, 72 | IsomorphicCommunicateOptions as UniversalCommunicateOptions 73 | } from './isomorphic-communicate'; 74 | export { 75 | IsomorphicVoicesManager as UniversalVoicesManager, 76 | listVoices as listVoicesUniversal, 77 | FetchError as UniversalFetchError 78 | } from './isomorphic-voices'; 79 | export { IsomorphicDRM as UniversalDRM } from './isomorphic-drm'; 80 | export { 81 | IsomorphicEdgeTTS as UniversalEdgeTTS_Isomorphic, 82 | ProsodyOptions as UniversalProsodyOptions_Isomorphic, 83 | WordBoundary as UniversalWordBoundary_Isomorphic, 84 | SynthesisResult as UniversalSynthesisResult_Isomorphic, 85 | createVTT as createVTTUniversal_Isomorphic, 86 | createSRT as createSRTUniversal_Isomorphic 87 | } from './isomorphic-simple'; 88 | 89 | // Browser-specific API (uses native browser APIs only) 90 | export { 91 | EdgeTTSBrowser, 92 | ProsodyOptions as BrowserProsodyOptions, 93 | WordBoundary as BrowserWordBoundary, 94 | SynthesisResult as BrowserSynthesisResult, 95 | createVTT as createVTTBrowser, 96 | createSRT as createSRTBrowser 97 | } from './browser'; 98 | 99 | // Common types and exceptions 100 | export * from './exceptions'; 101 | export * from './types'; -------------------------------------------------------------------------------- /examples/webworker-example/main.ts: -------------------------------------------------------------------------------- 1 | // Main thread - manages Web Worker for TTS processing 2 | // Run this example in a browser environment 3 | 4 | async function runWebWorkerExample() { 5 | console.log('🔄 Starting Web Worker TTS example...'); 6 | 7 | // Check if we're in a browser 8 | if (typeof Worker === 'undefined') { 9 | console.error('❌ Web Workers not supported in this environment'); 10 | return; 11 | } 12 | 13 | try { 14 | // Create Web Worker (you'll need to build worker.ts to worker.js first) 15 | const worker = new Worker('./worker.js'); 16 | 17 | // Listen for messages from worker 18 | worker.onmessage = function (e) { 19 | const { type, audio, subtitle, error, message } = e.data; 20 | 21 | switch (type) { 22 | case 'ready': 23 | console.log('✅ Worker ready:', message); 24 | 25 | // Send synthesis request to worker 26 | worker.postMessage({ 27 | type: 'synthesize', 28 | text: 'Hello from a Web Worker! This text-to-speech synthesis is happening in the background.', 29 | voice: 'en-US-EmmaMultilingualNeural', 30 | options: { 31 | rate: '+10%', 32 | volume: '+0%', 33 | pitch: '+0Hz' 34 | } 35 | }); 36 | break; 37 | 38 | case 'tts-result': 39 | console.log(`🎵 Audio generated in worker: ${audio.size} bytes`); 40 | console.log(`📝 Subtitle words: ${subtitle.length}`); 41 | 42 | // Create audio element to play the result 43 | const audioUrl = URL.createObjectURL(audio); 44 | const audioElement = new Audio(audioUrl); 45 | audioElement.controls = true; 46 | 47 | // Add to page if in browser 48 | if (typeof document !== 'undefined') { 49 | const container = document.getElementById('audio-container') || document.body; 50 | const label = document.createElement('p'); 51 | label.textContent = 'Generated audio from Web Worker:'; 52 | container.appendChild(label); 53 | container.appendChild(audioElement); 54 | 55 | // Add download link 56 | const downloadLink = document.createElement('a'); 57 | downloadLink.href = audioUrl; 58 | downloadLink.download = 'webworker-tts-output.mp3'; 59 | downloadLink.textContent = 'Download Audio'; 60 | downloadLink.style.display = 'block'; 61 | downloadLink.style.marginTop = '10px'; 62 | container.appendChild(downloadLink); 63 | } 64 | 65 | // Terminate worker when done 66 | worker.terminate(); 67 | console.log('✅ Web Worker TTS example completed!'); 68 | break; 69 | 70 | case 'error': 71 | console.error('❌ Worker error:', error); 72 | worker.terminate(); 73 | break; 74 | } 75 | }; 76 | 77 | worker.onerror = function (error) { 78 | console.error('❌ Worker failed:', error); 79 | }; 80 | 81 | } catch (error) { 82 | console.error('❌ Failed to create Web Worker:', error); 83 | } 84 | } 85 | 86 | // Export for use in different environments 87 | export { runWebWorkerExample }; 88 | 89 | // Auto-run in browser if this script is loaded directly 90 | if (typeof window !== 'undefined' && typeof document !== 'undefined') { 91 | // Make function available globally 92 | (window as any).runWebWorkerExample = runWebWorkerExample; 93 | 94 | // Auto-run if there's a button or when DOM is ready 95 | document.addEventListener('DOMContentLoaded', () => { 96 | const button = document.getElementById('run-webworker-example'); 97 | if (button) { 98 | button.addEventListener('click', runWebWorkerExample); 99 | } 100 | }); 101 | } -------------------------------------------------------------------------------- /src/runtime-detection.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Runtime detection utilities for cross-platform compatibility 3 | */ 4 | 5 | export interface RuntimeInfo { 6 | name: 'node' | 'deno' | 'bun' | 'browser' | 'webworker' | 'unknown'; 7 | version?: string; 8 | isNode: boolean; 9 | isDeno: boolean; 10 | isBun: boolean; 11 | isBrowser: boolean; 12 | isWebWorker: boolean; 13 | } 14 | 15 | /** 16 | * Detect the current JavaScript runtime environment 17 | */ 18 | export function detectRuntime(): RuntimeInfo { 19 | const info: RuntimeInfo = { 20 | name: 'unknown', 21 | isNode: false, 22 | isDeno: false, 23 | isBun: false, 24 | isBrowser: false, 25 | isWebWorker: false, 26 | }; 27 | 28 | // Check for Deno 29 | if (typeof (globalThis as any).Deno !== 'undefined') { 30 | info.name = 'deno'; 31 | info.isDeno = true; 32 | info.version = (globalThis as any).Deno.version?.deno; 33 | return info; 34 | } 35 | 36 | // Check for Bun 37 | if (typeof (globalThis as any).Bun !== 'undefined') { 38 | info.name = 'bun'; 39 | info.isBun = true; 40 | info.version = (globalThis as any).Bun.version; 41 | return info; 42 | } 43 | 44 | // Check for Node.js 45 | if (typeof process !== 'undefined' && process.versions && process.versions.node) { 46 | info.name = 'node'; 47 | info.isNode = true; 48 | info.version = process.versions.node; 49 | return info; 50 | } 51 | 52 | // Check for Web Worker 53 | if (typeof (globalThis as any).importScripts === 'function' && typeof (globalThis as any).WorkerGlobalScope !== 'undefined') { 54 | info.name = 'webworker'; 55 | info.isWebWorker = true; 56 | return info; 57 | } 58 | 59 | // Check for Browser 60 | if (typeof window !== 'undefined') { 61 | info.name = 'browser'; 62 | info.isBrowser = true; 63 | return info; 64 | } 65 | 66 | return info; 67 | } 68 | 69 | /** 70 | * Get the appropriate fetch implementation for the current runtime 71 | */ 72 | export function getFetch(): typeof fetch { 73 | const runtime = detectRuntime(); 74 | 75 | if (runtime.isDeno || runtime.isBrowser || runtime.isWebWorker) { 76 | return globalThis.fetch; 77 | } 78 | 79 | if (runtime.isNode || runtime.isBun) { 80 | try { 81 | // Try using built-in fetch first (Node 18+, Bun) 82 | if (typeof globalThis.fetch !== 'undefined') { 83 | return globalThis.fetch; 84 | } 85 | // Fallback to cross-fetch for older Node versions 86 | return require('cross-fetch'); 87 | } catch { 88 | throw new Error('No fetch implementation available. Please install cross-fetch.'); 89 | } 90 | } 91 | 92 | throw new Error('Unsupported runtime environment'); 93 | } 94 | 95 | /** 96 | * Get the appropriate WebSocket implementation for the current runtime 97 | */ 98 | export function getWebSocket(): any { 99 | const runtime = detectRuntime(); 100 | 101 | if (runtime.isDeno || runtime.isBrowser || runtime.isWebWorker) { 102 | return globalThis.WebSocket; 103 | } 104 | 105 | if (runtime.isNode || runtime.isBun) { 106 | try { 107 | return require('isomorphic-ws'); 108 | } catch { 109 | throw new Error('No WebSocket implementation available. Please install isomorphic-ws.'); 110 | } 111 | } 112 | 113 | throw new Error('Unsupported runtime environment'); 114 | } 115 | 116 | /** 117 | * Get runtime-specific crypto implementation 118 | * Note: Node.js 16+ (and our minimum version of 18.17+) has native globalThis.crypto support 119 | */ 120 | export function getCrypto(): Crypto { 121 | const runtime = detectRuntime(); 122 | 123 | if (runtime.isDeno || runtime.isBrowser || runtime.isWebWorker) { 124 | return globalThis.crypto; 125 | } 126 | 127 | if (runtime.isNode || runtime.isBun) { 128 | // Node.js 18.17+ and Bun have built-in crypto 129 | if (typeof globalThis.crypto !== 'undefined') { 130 | return globalThis.crypto; 131 | } 132 | throw new Error('No crypto implementation available. Please upgrade to Node.js 18.17+.'); 133 | } 134 | 135 | throw new Error('Unsupported runtime environment'); 136 | } -------------------------------------------------------------------------------- /src/browser-voices.ts: -------------------------------------------------------------------------------- 1 | import { SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST_URL } from './constants'; 2 | import { BrowserDRM } from './browser-drm'; 3 | import { Voice, VoicesManagerFind, VoicesManagerVoice } from './types'; 4 | 5 | /** 6 | * Error class for fetch-related errors (browser-specific) 7 | */ 8 | export class BrowserFetchError extends Error { 9 | response?: { 10 | status: number; 11 | headers: Record; 12 | }; 13 | 14 | constructor(message: string, response?: { status: number; headers: Record }) { 15 | super(message); 16 | this.name = 'BrowserFetchError'; 17 | this.response = response; 18 | } 19 | } 20 | 21 | async function _listVoices(): Promise { 22 | const url = `${VOICE_LIST_URL}&Sec-MS-GEC=${await BrowserDRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}`; 23 | 24 | try { 25 | const response = await fetch(url, { 26 | headers: VOICE_HEADERS, 27 | }); 28 | 29 | if (!response.ok) { 30 | const headers: Record = {}; 31 | response.headers.forEach((value, key) => { 32 | headers[key] = value; 33 | }); 34 | 35 | throw new BrowserFetchError(`HTTP ${response.status}`, { 36 | status: response.status, 37 | headers 38 | }); 39 | } 40 | 41 | const data: Voice[] = await response.json(); 42 | 43 | for (const voice of data) { 44 | voice.VoiceTag.ContentCategories = voice.VoiceTag.ContentCategories.map(c => c.trim() as any); 45 | voice.VoiceTag.VoicePersonalities = voice.VoiceTag.VoicePersonalities.map(p => p.trim() as any); 46 | } 47 | 48 | return data; 49 | } catch (error) { 50 | if (error instanceof BrowserFetchError) { 51 | throw error; 52 | } 53 | // Convert other fetch errors to our BrowserFetchError format 54 | throw new BrowserFetchError(error instanceof Error ? error.message : 'Unknown fetch error'); 55 | } 56 | } 57 | 58 | /** 59 | * Fetches all available voices from the Microsoft Edge TTS service (browser version). 60 | * Uses native browser fetch API and Web Crypto. 61 | * 62 | * @returns Promise resolving to array of available voices 63 | */ 64 | export async function listVoices(): Promise { 65 | try { 66 | return await _listVoices(); 67 | } catch (e) { 68 | if (e instanceof BrowserFetchError && e.response?.status === 403) { 69 | BrowserDRM.handleClientResponseError(e.response); 70 | return await _listVoices(); 71 | } 72 | throw e; 73 | } 74 | } 75 | 76 | /** 77 | * Browser-specific utility class for finding and filtering available voices. 78 | * Uses only browser-native APIs. 79 | * 80 | * @example 81 | * ```typescript 82 | * const voicesManager = await BrowserVoicesManager.create(); 83 | * const englishVoices = voicesManager.find({ Language: 'en' }); 84 | * ``` 85 | */ 86 | export class BrowserVoicesManager { 87 | private voices: VoicesManagerVoice[] = []; 88 | private calledCreate = false; 89 | 90 | /** 91 | * Creates a new BrowserVoicesManager instance. 92 | * 93 | * @param customVoices - Optional custom voice list instead of fetching from API 94 | * @returns Promise resolving to BrowserVoicesManager instance 95 | */ 96 | public static async create(customVoices?: Voice[]): Promise { 97 | const manager = new BrowserVoicesManager(); 98 | const voices = customVoices ?? await listVoices(); 99 | manager.voices = voices.map(voice => ({ 100 | ...voice, 101 | Language: voice.Locale.split('-')[0], 102 | })); 103 | manager.calledCreate = true; 104 | return manager; 105 | } 106 | 107 | /** 108 | * Finds voices matching the specified criteria. 109 | * 110 | * @param filter - Filter criteria for voice selection 111 | * @returns Array of voices matching the filter 112 | * @throws {Error} If called before create() 113 | */ 114 | public find(filter: VoicesManagerFind): VoicesManagerVoice[] { 115 | if (!this.calledCreate) { 116 | throw new Error('BrowserVoicesManager.find() called before BrowserVoicesManager.create()'); 117 | } 118 | 119 | return this.voices.filter(voice => { 120 | return Object.entries(filter).every(([key, value]) => { 121 | return voice[key as keyof VoicesManagerFind] === value; 122 | }); 123 | }); 124 | } 125 | } -------------------------------------------------------------------------------- /examples/universal-detection.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Universal Environment Detection Example 3 | * This example shows how to detect the environment and use appropriate APIs 4 | */ 5 | 6 | // Environment detection function 7 | function detectEnvironment() { 8 | // Check for Node.js 9 | const isNode = typeof process !== 'undefined' && 10 | process.versions && 11 | process.versions.node; 12 | 13 | // Check for browser 14 | const isBrowser = typeof window !== 'undefined' && 15 | typeof window.document !== 'undefined'; 16 | 17 | // Check for Web Workers 18 | const isWebWorker = typeof importScripts === 'function' && 19 | typeof (globalThis as any).WorkerGlobalScope !== 'undefined'; 20 | 21 | // Check for Deno 22 | const isDeno = typeof (globalThis as any).Deno !== 'undefined'; 23 | 24 | // Check for Bun 25 | const isBun = typeof (globalThis as any).Bun !== 'undefined'; 26 | 27 | return { 28 | isNode, 29 | isBrowser, 30 | isWebWorker, 31 | isDeno, 32 | isBun, 33 | platform: isNode ? 'node' : 34 | isBrowser ? 'browser' : 35 | isWebWorker ? 'webworker' : 36 | isDeno ? 'deno' : 37 | isBun ? 'bun' : 38 | 'unknown' 39 | }; 40 | } 41 | 42 | // Dynamic import based on environment 43 | async function createTTSInstance(text: string, voice?: string) { 44 | const env = detectEnvironment(); 45 | console.log('Detected environment:', env.platform); 46 | 47 | switch (env.platform) { 48 | case 'node': 49 | // Use full Node.js API with all features 50 | const { EdgeTTS } = await import('../dist/index.js'); 51 | return new EdgeTTS(text, voice); 52 | 53 | case 'browser': 54 | // Use browser-specific API for optimal bundle size 55 | const { EdgeTTS: BrowserEdgeTTS } = await import('../dist/browser.js'); 56 | return new BrowserEdgeTTS(text, voice); 57 | 58 | case 'deno': 59 | case 'bun': 60 | default: 61 | // Use isomorphic API for maximum compatibility 62 | const { EdgeTTS: IsomorphicTTS } = await import('../dist/isomorphic.js'); 63 | return new IsomorphicTTS(text, voice); 64 | } 65 | } 66 | 67 | // Universal synthesis function 68 | async function universalSynthesis() { 69 | const text = 'Hello from a universal text-to-speech library!'; 70 | const voice = 'en-US-EmmaMultilingualNeural'; 71 | 72 | try { 73 | const tts = await createTTSInstance(text, voice); 74 | const result = await tts.synthesize(); 75 | 76 | console.log(`✅ Generated audio: ${result.audio.size} bytes`); 77 | console.log(`📝 Word boundaries: ${result.subtitle.length}`); 78 | 79 | const env = detectEnvironment(); 80 | 81 | // Handle result based on environment 82 | if (env.isNode) { 83 | // Node.js - save to file 84 | const fs = await import('fs/promises'); 85 | await fs.writeFile('universal-output.mp3', Buffer.from(await result.audio.arrayBuffer())); 86 | console.log('💾 Audio saved to universal-output.mp3'); 87 | } else if (env.isBrowser) { 88 | // Browser - create downloadable link 89 | const url = URL.createObjectURL(result.audio); 90 | console.log(`🔗 Audio URL: ${url}`); 91 | 92 | // Create download link 93 | const a = document.createElement('a'); 94 | a.href = url; 95 | a.download = 'universal-output.mp3'; 96 | a.textContent = 'Download Audio'; 97 | document.body.appendChild(a); 98 | } else { 99 | // Other environments 100 | console.log('🌐 Audio ready for processing in your environment'); 101 | } 102 | 103 | } catch (error) { 104 | console.error('❌ Universal synthesis failed:', error); 105 | 106 | if (error instanceof Error && error.message.includes('CORS')) { 107 | console.log('💡 Tip: Use a proxy server for browser applications'); 108 | } 109 | } 110 | } 111 | 112 | // Export for use in different environments 113 | export { detectEnvironment, createTTSInstance, universalSynthesis }; 114 | 115 | // Auto-run in appropriate environments 116 | // ESM equivalent check 117 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) { 118 | // Node.js 119 | universalSynthesis().catch(console.error); 120 | } else if (typeof globalThis !== 'undefined') { 121 | // Global scope - make function available 122 | (globalThis as any).runUniversalExample = universalSynthesis; 123 | } -------------------------------------------------------------------------------- /examples/universal-api.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Universal API Example (Preferred Naming Convention) 3 | * This example demonstrates the preferred "Universal" naming instead of "Isomorphic" 4 | */ 5 | 6 | import { 7 | UniversalCommunicate, 8 | UniversalVoicesManager, 9 | listVoicesUniversal, 10 | UniversalEdgeTTS 11 | } from '../dist/index.js'; 12 | 13 | const TEXT = 'Hello! This demonstrates the preferred Universal API naming.'; 14 | const VOICE = 'en-US-EmmaMultilingualNeural'; 15 | 16 | async function universalApiExample() { 17 | console.log('🌍 Running Universal API example (preferred naming)...'); 18 | 19 | try { 20 | // Test voice listing with Universal naming 21 | console.log('📋 Fetching available voices...'); 22 | const voices = await listVoicesUniversal(); 23 | console.log(`✅ Found ${voices.length} voices`); 24 | 25 | // Test voice manager with Universal naming 26 | console.log('🔍 Testing Universal voice manager...'); 27 | const voicesManager = await UniversalVoicesManager.create(); 28 | const englishVoices = voicesManager.find({ Language: 'en' }); 29 | console.log(`✅ Found ${englishVoices.length} English voices`); 30 | 31 | // Test simple Universal API 32 | console.log('🎤 Testing Universal EdgeTTS (simple API)...'); 33 | const tts = new UniversalEdgeTTS(TEXT, VOICE, { 34 | rate: '+15%', 35 | volume: '+0%', 36 | pitch: '+5Hz' 37 | }); 38 | 39 | const result = await tts.synthesize(); 40 | console.log(`✅ Simple synthesis: ${result.audio.size} bytes, ${result.subtitle.length} words`); 41 | 42 | // Test streaming Universal API 43 | console.log('🎵 Testing Universal streaming API...'); 44 | const communicate = new UniversalCommunicate(TEXT, { 45 | voice: VOICE, 46 | rate: '+10%', 47 | volume: '+0%', 48 | pitch: '+0Hz' 49 | }); 50 | 51 | const audioChunks: Uint8Array[] = []; 52 | let wordCount = 0; 53 | 54 | for await (const chunk of communicate.stream()) { 55 | if (chunk.type === 'audio' && chunk.data) { 56 | audioChunks.push(chunk.data); 57 | } else if (chunk.type === 'WordBoundary') { 58 | wordCount++; 59 | console.log(`📝 Word ${wordCount}: "${chunk.text}"`); 60 | } 61 | } 62 | 63 | const totalAudioSize = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0); 64 | console.log(`✅ Universal streaming complete! Audio: ${totalAudioSize} bytes, Words: ${wordCount}`); 65 | 66 | // Environment-specific handling 67 | if (typeof process !== 'undefined' && process.versions?.node) { 68 | // Node.js - save to file 69 | const fs = await import('fs/promises'); 70 | const path = await import('path'); 71 | 72 | const outputFile = path.join(process.cwd(), 'universal-api-output.mp3'); 73 | 74 | // Concatenate Uint8Arrays 75 | const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0); 76 | const concatenated = new Uint8Array(totalLength); 77 | let offset = 0; 78 | for (const chunk of audioChunks) { 79 | concatenated.set(chunk, offset); 80 | offset += chunk.length; 81 | } 82 | 83 | await fs.writeFile(outputFile, concatenated); 84 | console.log(`💾 Node.js: Audio saved to ${outputFile}`); 85 | } else { 86 | // Browser or other environments 87 | console.log('🌐 Audio data ready for browser processing'); 88 | } 89 | 90 | } catch (error) { 91 | console.error('❌ Error:', error); 92 | 93 | if (error instanceof Error) { 94 | if (error.message.includes('CORS') || error.message.includes('cross-origin')) { 95 | console.log(` 96 | 🚫 CORS Error Detected! 97 | This is expected when running in a browser due to Microsoft's CORS policy. 98 | 99 | Solutions: 100 | 1. Use a proxy server on your backend 101 | 2. Deploy as a browser extension 102 | 3. Use Microsoft's official Speech SDK for browser apps 103 | 4. Run this example in Node.js where CORS doesn't apply 104 | `); 105 | } 106 | } 107 | } 108 | } 109 | 110 | // ESM equivalent check 111 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) { 112 | // Node.js 113 | universalApiExample().catch(console.error); 114 | } else if (typeof globalThis !== 'undefined') { 115 | // Browser - expose function globally 116 | (globalThis as any).runUniversalApiExample = universalApiExample; 117 | } 118 | 119 | export { universalApiExample }; -------------------------------------------------------------------------------- /examples/isomorphic-example.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Isomorphic Edge TTS Example 3 | * This example works in both Node.js and browsers (subject to CORS policy) 4 | */ 5 | 6 | import { 7 | IsomorphicCommunicate, 8 | IsomorphicVoicesManager, 9 | listVoicesIsomorphic 10 | } from '../dist/index.js'; 11 | 12 | const TEXT = 'Hello! This is an isomorphic text-to-speech example that works in both Node.js and browsers.'; 13 | const VOICE = 'en-US-EmmaMultilingualNeural'; 14 | 15 | async function isomorphicExample() { 16 | console.log('🌐 Running isomorphic Edge TTS example...'); 17 | 18 | try { 19 | // Test voice listing (isomorphic) 20 | console.log('📋 Fetching available voices...'); 21 | const voices = await listVoicesIsomorphic(); 22 | console.log(`✅ Found ${voices.length} voices`); 23 | 24 | // Test voice manager (isomorphic) 25 | console.log('🔍 Testing voice manager...'); 26 | const voicesManager = await IsomorphicVoicesManager.create(); 27 | const englishVoices = voicesManager.find({ Language: 'en' }); 28 | console.log(`✅ Found ${englishVoices.length} English voices`); 29 | 30 | // Test TTS synthesis (isomorphic) 31 | console.log('🎤 Starting TTS synthesis...'); 32 | const communicate = new IsomorphicCommunicate(TEXT, { 33 | voice: VOICE, 34 | rate: '+10%', 35 | volume: '+0%', 36 | pitch: '+0Hz' 37 | }); 38 | 39 | const audioChunks: Uint8Array[] = []; 40 | let wordCount = 0; 41 | 42 | for await (const chunk of communicate.stream()) { 43 | if (chunk.type === 'audio' && chunk.data) { 44 | audioChunks.push(chunk.data); 45 | console.log(`🔊 Audio chunk: ${chunk.data.length} bytes`); 46 | } else if (chunk.type === 'WordBoundary') { 47 | wordCount++; 48 | console.log(`📝 Word ${wordCount}: "${chunk.text}" at ${chunk.offset}ns`); 49 | } 50 | } 51 | 52 | const totalAudioSize = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0); 53 | console.log(`✅ Synthesis complete! Audio: ${totalAudioSize} bytes, Words: ${wordCount}`); 54 | 55 | // Environment-specific handling 56 | const isNode = typeof globalThis !== 'undefined' 57 | ? globalThis.process?.versions?.node !== undefined 58 | : typeof process !== 'undefined' && process.versions?.node !== undefined; 59 | 60 | if (isNode) { 61 | // Node.js - save to file 62 | const fs = await import('fs/promises'); 63 | const path = await import('path'); 64 | 65 | const outputFile = path.join(process.cwd(), 'isomorphic-output.mp3'); 66 | // Concatenate Uint8Arrays 67 | const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0); 68 | const concatenated = new Uint8Array(totalLength); 69 | let offset = 0; 70 | for (const chunk of audioChunks) { 71 | concatenated.set(chunk, offset); 72 | offset += chunk.length; 73 | } 74 | await fs.writeFile(outputFile, concatenated); 75 | console.log(`💾 Node.js: Audio saved to ${outputFile}`); 76 | } else { 77 | // Browser - create audio element 78 | const audioBlob = new Blob(audioChunks, { type: 'audio/mpeg' }); 79 | const audioUrl = URL.createObjectURL(audioBlob); 80 | 81 | console.log(`🌐 Browser: Audio Blob created (${audioBlob.size} bytes)`); 82 | console.log(`🔗 Audio URL: ${audioUrl}`); 83 | 84 | // If running in actual browser, you could: 85 | // const audio = new Audio(audioUrl); 86 | // audio.play(); 87 | } 88 | 89 | } catch (error) { 90 | console.error('❌ Error:', error); 91 | 92 | if (error instanceof Error) { 93 | if (error.message.includes('CORS') || error.message.includes('cross-origin')) { 94 | console.log(` 95 | 🚫 CORS Error Detected! 96 | This is expected when running in a browser due to Microsoft's CORS policy. 97 | 98 | Solutions: 99 | 1. Use a proxy server on your backend 100 | 2. Deploy as a browser extension 101 | 3. Use Microsoft's official Speech SDK instead 102 | 4. Run this example in Node.js where CORS doesn't apply 103 | `); 104 | } 105 | } 106 | } 107 | } 108 | 109 | // Universal module pattern - works in both Node.js and browsers 110 | // ESM equivalent check 111 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) { 112 | // Node.js 113 | isomorphicExample().catch(console.error); 114 | } else if (typeof globalThis !== 'undefined') { 115 | // Browser - expose function globally 116 | (globalThis as any).runIsomorphicExample = isomorphicExample; 117 | } 118 | 119 | export { isomorphicExample }; -------------------------------------------------------------------------------- /PERFORMANCE_OPTIMIZATIONS.md: -------------------------------------------------------------------------------- 1 | # Performance Optimizations for Edge TTS Universal 2 | 3 | ## Overview 4 | 5 | This document outlines the performance optimizations made to improve audio quality and reduce delays in the Edge TTS Universal library. 6 | 7 | ## Key Issues Addressed 8 | 9 | ### 1. Hard-coded Delay Removal 10 | 11 | **Problem**: A hard-coded delay of 8,750,000 nanoseconds (0.875 seconds) was being added to `offsetCompensation` on every `turn.end` event. 12 | 13 | **Solution**: Removed the line `this.state.offsetCompensation += 8_750_000;` from all communicate implementations. 14 | 15 | **Impact**: Eliminates the artificial 0.875-second delay between audio segments, providing seamless audio playback. 16 | 17 | ### 2. Audio Quality Enhancement 18 | 19 | **Problem**: Low bitrate audio format (24kHz, 48kbps) was causing audio quality issues and potential smoothness problems. 20 | 21 | **Attempted Solution**: Upgraded audio output format from `audio-24khz-48kbitrate-mono-mp3` to `audio-48khz-192kbitrate-mono-mp3`. 22 | 23 | **Issue**: The higher quality format caused "NoAudioReceived" errors as Microsoft's Edge TTS service doesn't support the 48kHz/192kbps format. 24 | 25 | **Final Solution**: Reverted back to the original `audio-24khz-48kbitrate-mono-mp3` format to maintain compatibility while keeping other optimizations. 26 | 27 | ### 3. Improved Audio Concatenation 28 | 29 | **Problem**: Simple array concatenation could introduce gaps or inconsistencies in audio data. 30 | 31 | **Solution**: Enhanced the `concatUint8Arrays` function with: 32 | 33 | - Proper empty array handling 34 | - Single array optimization 35 | - Explicit length validation 36 | - Better memory allocation 37 | 38 | ### 4. Message Processing Optimization 39 | 40 | **Problem**: Indefinite waiting in message processing loops could cause delays. 41 | 42 | **Solution**: Added timeout mechanism to message processing: 43 | 44 | - Added 50ms timeout to prevent indefinite waiting 45 | - More responsive message handling 46 | - Better error recovery 47 | 48 | ## Configuration Changes 49 | 50 | ### Audio Format Configuration 51 | 52 | ```json 53 | { 54 | "context": { 55 | "synthesis": { 56 | "audio": { 57 | "metadataoptions": { 58 | "sentenceBoundaryEnabled": false, 59 | "wordBoundaryEnabled": true 60 | }, 61 | "outputFormat": "audio-24khz-48kbitrate-mono-mp3" 62 | } 63 | } 64 | } 65 | } 66 | ``` 67 | 68 | _Note: Higher quality formats (48kHz/192kbps) were tested but are not supported by Microsoft's Edge TTS service._ 69 | 70 | ### Timing Compensation 71 | 72 | The `offsetCompensation` now only uses actual audio timing data without artificial delays: 73 | 74 | ```typescript 75 | // Before (with artificial delay) 76 | this.state.offsetCompensation = this.state.lastDurationOffset; 77 | this.state.offsetCompensation += 8_750_000; // Removed this line 78 | 79 | // After (natural timing) 80 | this.state.offsetCompensation = this.state.lastDurationOffset; 81 | ``` 82 | 83 | ## Performance Benefits 84 | 85 | 1. **Reduced Latency**: Elimination of 0.875s artificial delay 86 | 2. **Maintained Compatibility**: Kept original audio format that works reliably 87 | 3. **Smoother Playback**: Improved audio chunk concatenation 88 | 4. **More Responsive**: Better message processing with timeout handling 89 | 5. **Cross-Platform**: Optimizations applied to all implementations (Browser, Node.js, Isomorphic) 90 | 91 | ## Files Modified 92 | 93 | - `src/browser-communicate.ts` 94 | - `src/communicate.ts` 95 | - `src/isomorphic-communicate.ts` 96 | - `src/browser.ts` 97 | - `src/browser-simple.ts` 98 | - `src/isomorphic-simple.ts` 99 | - `examples/browser-example.html` 100 | 101 | ## Testing Recommendations 102 | 103 | After implementing these changes, test with: 104 | 105 | 1. **Long text passages** to verify no delays between sentences 106 | 2. **Multiple paragraphs** to ensure smooth transitions 107 | 3. **Different voice types** to confirm quality improvements 108 | 4. **Browser playback** to verify smoothness in web environments 109 | 5. **Streaming scenarios** to test real-time performance 110 | 111 | ## Future Optimization Opportunities 112 | 113 | 1. **Audio Preloading**: Implement audio chunk preloading for even smoother playback 114 | 2. **Adaptive Bitrate**: Dynamic quality adjustment based on network conditions 115 | 3. **Audio Compression**: Consider WebM or other formats for better compression 116 | 4. **Buffer Management**: Implement smart buffering strategies for large texts 117 | 5. **WebAudio API**: Use WebAudio API for advanced audio processing in browsers 118 | 119 | ## Compatibility Notes 120 | 121 | - All changes maintain backward compatibility 122 | - Higher quality audio may require more bandwidth 123 | - Older browsers should still work with the upgraded format 124 | - The timeout mechanism adds resilience without breaking existing functionality 125 | -------------------------------------------------------------------------------- /src/browser-utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Browser-specific utility functions that avoid Node.js dependencies. 3 | * Provides browser-native implementations of UUID generation and XML escaping. 4 | */ 5 | 6 | /** 7 | * Generates a UUID v4 string without hyphens using browser's crypto API. 8 | * @returns UUID string with hyphens removed 9 | */ 10 | export function browserConnectId(): string { 11 | // Use crypto.getRandomValues for browser-native UUID generation 12 | const array = new Uint8Array(16); 13 | crypto.getRandomValues(array); 14 | 15 | // Set version (4) and variant bits according to RFC 4122 16 | array[6] = (array[6] & 0x0f) | 0x40; 17 | array[8] = (array[8] & 0x3f) | 0x80; 18 | 19 | // Convert to hex string and format as UUID, then remove hyphens 20 | const hex = Array.from(array, byte => byte.toString(16).padStart(2, '0')).join(''); 21 | const uuid = `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`; 22 | 23 | return uuid.replace(/-/g, ''); 24 | } 25 | 26 | /** 27 | * Browser-native XML escaping function. 28 | * @param text - Text to escape 29 | * @returns XML-escaped text 30 | */ 31 | export function browserEscape(text: string): string { 32 | return text 33 | .replace(/&/g, '&') 34 | .replace(//g, '>') 36 | .replace(/"/g, '"') 37 | .replace(/'/g, '''); 38 | } 39 | 40 | /** 41 | * Unescapes XML entities in text. 42 | * @param text - Text containing XML entities to unescape 43 | * @returns Text with XML entities converted back to their original characters 44 | */ 45 | export function browserUnescape(text: string): string { 46 | return text 47 | .replace(/"/g, '"') 48 | .replace(/'/g, "'") 49 | .replace(/</g, '<') 50 | .replace(/>/g, '>') 51 | .replace(/&/g, '&'); // Do & last to avoid double unescaping 52 | } 53 | 54 | /** 55 | * Removes control characters that are incompatible with TTS processing. 56 | * @param text - Input text to clean 57 | * @returns Text with control characters replaced by spaces 58 | */ 59 | export function browserRemoveIncompatibleCharacters(text: string): string { 60 | // Remove control characters (U+0000 to U+001F except \t, \n, \r) 61 | // eslint-disable-next-line no-control-regex 62 | return text.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, ' '); 63 | } 64 | 65 | /** 66 | * Formats the current date as a string in the format expected by the TTS service. 67 | * @returns Formatted date string 68 | */ 69 | export function browserDateToString(): string { 70 | return new Date().toUTCString().replace("GMT", "GMT+0000 (Coordinated Universal Time)"); 71 | } 72 | 73 | /** 74 | * Creates SSML (Speech Synthesis Markup Language) from text and voice configuration. 75 | * @param voice - Voice name 76 | * @param rate - Speech rate (e.g., "+0%") 77 | * @param volume - Speech volume (e.g., "+0%") 78 | * @param pitch - Speech pitch (e.g., "+0Hz") 79 | * @param escapedText - Text content (should be XML-escaped) 80 | * @returns Complete SSML document string 81 | */ 82 | export function browserMkssml(voice: string, rate: string, volume: string, pitch: string, escapedText: string): string { 83 | return ( 84 | "" 85 | + `` 86 | + `` 87 | + `${escapedText}` 88 | + "" 89 | + "" 90 | + "" 91 | ); 92 | } 93 | 94 | /** 95 | * Creates a complete WebSocket message with headers and SSML data. 96 | * @param requestId - Unique request identifier 97 | * @param timestamp - Timestamp string for the request 98 | * @param ssml - SSML content to include in the message 99 | * @returns Complete WebSocket message string with headers and data 100 | */ 101 | export function browserSsmlHeadersPlusData(requestId: string, timestamp: string, ssml: string): string { 102 | return ( 103 | `X-RequestId:${requestId}\r\n` 104 | + "Content-Type:application/ssml+xml\r\n" 105 | + `X-Timestamp:${timestamp}Z\r\n` // This is not a mistake, Microsoft Edge bug. 106 | + "Path:ssml\r\n\r\n" 107 | + `${ssml}` 108 | ); 109 | } 110 | 111 | /** 112 | * Calculates the maximum message size for text chunks based on WebSocket limits. 113 | * @param voice - Voice name 114 | * @param rate - Speech rate 115 | * @param volume - Speech volume 116 | * @param pitch - Speech pitch 117 | * @returns Maximum byte size for text content in a single message 118 | */ 119 | export function browserCalcMaxMesgSize(voice: string, rate: string, volume: string, pitch: string): number { 120 | const websocketMaxSize = 2 ** 16; 121 | const overheadPerMessage = browserSsmlHeadersPlusData( 122 | browserConnectId(), 123 | browserDateToString(), 124 | browserMkssml(voice, rate, volume, pitch, ""), 125 | ).length + 50; // margin of error 126 | return websocketMaxSize - overheadPerMessage; 127 | } -------------------------------------------------------------------------------- /src/isomorphic-voices.ts: -------------------------------------------------------------------------------- 1 | // Use native fetch API available in both Node.js 18+ and browsers 2 | import { SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST_URL } from './constants'; 3 | import { IsomorphicDRM } from './isomorphic-drm'; 4 | import { Voice, VoicesManagerFind, VoicesManagerVoice } from './types'; 5 | 6 | /** 7 | * Error class for fetch-related errors (isomorphic equivalent of AxiosError) 8 | */ 9 | export class FetchError extends Error { 10 | response?: { 11 | status: number; 12 | headers: Record; 13 | }; 14 | 15 | constructor(message: string, response?: { status: number; headers: Record }) { 16 | super(message); 17 | this.name = 'FetchError'; 18 | this.response = response; 19 | } 20 | } 21 | 22 | async function _listVoices(proxy?: string): Promise { 23 | const url = `${VOICE_LIST_URL}&Sec-MS-GEC=${await IsomorphicDRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}`; 24 | 25 | const fetchOptions: RequestInit = { 26 | headers: VOICE_HEADERS, 27 | }; 28 | 29 | // Note: Proxy support in browsers is limited and handled differently 30 | // In Node.js, we could potentially use a proxy agent with fetch 31 | if (proxy) { 32 | console.warn('Proxy support in isomorphic environment is limited. Consider using a backend proxy.'); 33 | } 34 | 35 | try { 36 | const response = await fetch(url, fetchOptions); 37 | 38 | if (!response.ok) { 39 | const headers: Record = {}; 40 | response.headers.forEach((value, key) => { 41 | headers[key] = value; 42 | }); 43 | 44 | throw new FetchError(`HTTP ${response.status}`, { 45 | status: response.status, 46 | headers 47 | }); 48 | } 49 | 50 | const data: Voice[] = await response.json(); 51 | 52 | for (const voice of data) { 53 | voice.VoiceTag.ContentCategories = voice.VoiceTag.ContentCategories.map(c => c.trim() as any); 54 | voice.VoiceTag.VoicePersonalities = voice.VoiceTag.VoicePersonalities.map(p => p.trim() as any); 55 | } 56 | 57 | return data; 58 | } catch (error) { 59 | if (error instanceof FetchError) { 60 | throw error; 61 | } 62 | // Convert other fetch errors to our FetchError format 63 | throw new FetchError(error instanceof Error ? error.message : 'Unknown fetch error'); 64 | } 65 | } 66 | 67 | /** 68 | * Fetches all available voices from the Microsoft Edge TTS service (isomorphic version). 69 | * Works in both Node.js and browsers (subject to CORS policy). 70 | * 71 | * @param proxy - Optional proxy URL for the request (limited browser support) 72 | * @returns Promise resolving to array of available voices 73 | */ 74 | export async function listVoices(proxy?: string): Promise { 75 | try { 76 | return await _listVoices(proxy); 77 | } catch (e) { 78 | if (e instanceof FetchError && e.response?.status === 403) { 79 | IsomorphicDRM.handleClientResponseError(e.response); 80 | return await _listVoices(proxy); 81 | } 82 | throw e; 83 | } 84 | } 85 | 86 | /** 87 | * Isomorphic utility class for finding and filtering available voices. 88 | * Works in both Node.js and browsers (subject to CORS policy). 89 | * 90 | * @example 91 | * ```typescript 92 | * const voicesManager = await IsomorphicVoicesManager.create(); 93 | * const englishVoices = voicesManager.find({ Language: 'en' }); 94 | * ``` 95 | */ 96 | export class IsomorphicVoicesManager { 97 | private voices: VoicesManagerVoice[] = []; 98 | private calledCreate = false; 99 | 100 | /** 101 | * Creates a new IsomorphicVoicesManager instance. 102 | * 103 | * @param customVoices - Optional custom voice list instead of fetching from API 104 | * @param proxy - Optional proxy URL for API requests (limited browser support) 105 | * @returns Promise resolving to IsomorphicVoicesManager instance 106 | */ 107 | public static async create(customVoices?: Voice[], proxy?: string): Promise { 108 | const manager = new IsomorphicVoicesManager(); 109 | const voices = customVoices ?? await listVoices(proxy); 110 | manager.voices = voices.map(voice => ({ 111 | ...voice, 112 | Language: voice.Locale.split('-')[0], 113 | })); 114 | manager.calledCreate = true; 115 | return manager; 116 | } 117 | 118 | /** 119 | * Finds voices matching the specified criteria. 120 | * 121 | * @param filter - Filter criteria for voice selection 122 | * @returns Array of voices matching the filter 123 | * @throws {Error} If called before create() 124 | */ 125 | public find(filter: VoicesManagerFind): VoicesManagerVoice[] { 126 | if (!this.calledCreate) { 127 | throw new Error('IsomorphicVoicesManager.find() called before IsomorphicVoicesManager.create()'); 128 | } 129 | 130 | return this.voices.filter(voice => { 131 | return Object.entries(filter).every(([key, value]) => { 132 | return voice[key as keyof VoicesManagerFind] === value; 133 | }); 134 | }); 135 | } 136 | } -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test & Build 2 | 3 | on: 4 | push: 5 | branches: [main, master, develop] 6 | pull_request: 7 | branches: [main, master, develop] 8 | 9 | jobs: 10 | test: 11 | name: Test on ${{ matrix.runtime }} 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | include: 16 | - runtime: node 17 | node-version: '18' 18 | - runtime: node 19 | node-version: '20' 20 | - runtime: node 21 | node-version: '22' 22 | - runtime: deno 23 | deno-version: 'v1.x' 24 | - runtime: bun 25 | bun-version: 'latest' 26 | 27 | steps: 28 | - name: Checkout code 29 | uses: actions/checkout@v4 30 | 31 | - name: Setup Node.js 32 | if: matrix.runtime == 'node' 33 | uses: actions/setup-node@v4 34 | with: 35 | node-version: ${{ matrix.node-version }} 36 | cache: 'npm' 37 | 38 | - name: Setup Deno 39 | if: matrix.runtime == 'deno' 40 | uses: denoland/setup-deno@v1 41 | with: 42 | deno-version: ${{ matrix.deno-version }} 43 | 44 | - name: Setup Node.js (for Deno build) 45 | if: matrix.runtime == 'deno' 46 | uses: actions/setup-node@v4 47 | with: 48 | node-version: '22' 49 | cache: 'npm' 50 | 51 | - name: Install dependencies (Deno build) 52 | if: matrix.runtime == 'deno' 53 | run: npm ci 54 | 55 | - name: Setup Bun 56 | if: matrix.runtime == 'bun' 57 | uses: oven-sh/setup-bun@v1 58 | with: 59 | bun-version: ${{ matrix.bun-version }} 60 | 61 | - name: Install dependencies (Node.js) 62 | if: matrix.runtime == 'node' 63 | run: npm ci 64 | 65 | - name: Install dependencies (Bun) 66 | if: matrix.runtime == 'bun' 67 | run: bun install 68 | 69 | - name: Type check (Node.js) 70 | if: matrix.runtime == 'node' 71 | run: npm run type-check 72 | 73 | - name: Lint (Node.js) 74 | if: matrix.runtime == 'node' && matrix.node-version == '22' 75 | run: npm run lint 76 | 77 | - name: Build (Node.js) 78 | if: matrix.runtime == 'node' 79 | run: npm run build 80 | 81 | - name: Build (Bun) 82 | if: matrix.runtime == 'bun' 83 | run: bun run build 84 | 85 | - name: Build (Deno) 86 | if: matrix.runtime == 'deno' 87 | run: npm run build 88 | 89 | - name: Test Node.js 90 | if: matrix.runtime == 'node' 91 | run: npm test 92 | continue-on-error: true 93 | 94 | - name: Test Deno 95 | if: matrix.runtime == 'deno' 96 | run: deno test --allow-net --allow-env --no-check --no-lock tests/deno/*.ts 97 | continue-on-error: true 98 | 99 | - name: Test Bun 100 | if: matrix.runtime == 'bun' 101 | run: bun run test 102 | continue-on-error: true 103 | 104 | - name: Check bundle sizes 105 | if: matrix.runtime == 'node' && matrix.node-version == '22' 106 | run: ls -la dist/ 107 | continue-on-error: true 108 | 109 | build-matrix: 110 | name: Build for ${{ matrix.target }} 111 | runs-on: ubuntu-latest 112 | strategy: 113 | matrix: 114 | target: [node, browser, isomorphic, webworker] 115 | 116 | steps: 117 | - name: Checkout code 118 | uses: actions/checkout@v4 119 | 120 | - name: Setup Node.js 121 | uses: actions/setup-node@v4 122 | with: 123 | node-version: '22' 124 | cache: 'npm' 125 | 126 | - name: Install dependencies 127 | run: npm ci 128 | 129 | - name: Build ${{ matrix.target }} 130 | run: npm run build 131 | 132 | - name: Upload build artifacts 133 | uses: actions/upload-artifact@v4 134 | with: 135 | name: build-${{ matrix.target }} 136 | path: dist/ 137 | retention-days: 7 138 | 139 | compatibility-test: 140 | name: Test Cross-Runtime Compatibility 141 | runs-on: ubuntu-latest 142 | needs: [test] 143 | if: github.event_name == 'pull_request' 144 | 145 | steps: 146 | - name: Checkout code 147 | uses: actions/checkout@v4 148 | 149 | - name: Setup Node.js 150 | uses: actions/setup-node@v4 151 | with: 152 | node-version: '22' 153 | cache: 'npm' 154 | 155 | - name: Setup Deno 156 | uses: denoland/setup-deno@v1 157 | with: 158 | deno-version: v1.x 159 | 160 | - name: Setup Bun 161 | uses: oven-sh/setup-bun@v1 162 | with: 163 | bun-version: latest 164 | 165 | - name: Install dependencies 166 | run: npm ci 167 | 168 | - name: Build package 169 | run: npm run build 170 | 171 | - name: Test runtime detection (Node.js) 172 | run: node -e "const { detectRuntime } = require('./dist/runtime-detection.cjs'); const runtime = detectRuntime(); console.log('Node.js runtime detection:', runtime); if (runtime.name !== 'node') process.exit(1);" 173 | 174 | - name: Test runtime detection (Deno) 175 | run: deno eval "import { detectRuntime } from './dist/runtime-detection.js'; const runtime = detectRuntime(); console.log('Deno runtime detection:', runtime); if (runtime.name !== 'deno') Deno.exit(1);" 176 | 177 | - name: Test runtime detection (Bun) 178 | run: bun run -e "import { detectRuntime } from './dist/runtime-detection.js'; const runtime = detectRuntime(); console.log('Bun runtime detection:', runtime); if (runtime.name !== 'bun') process.exit(1);" 179 | 180 | - name: Test package imports (Node.js) 181 | run: node -e "const main = require('./dist/index.cjs'); const browser = require('./dist/browser.cjs'); const isomorphic = require('./dist/isomorphic.cjs'); console.log('✅ All Node.js imports work');" 182 | 183 | - name: Test package imports (Deno) 184 | run: deno eval "import * as main from './dist/index.js'; import * as browser from './dist/browser.js'; import * as isomorphic from './dist/isomorphic.js'; console.log('✅ All Deno imports work');" 185 | 186 | - name: Test package imports (Bun) 187 | run: bun run -e "import * as main from './dist/index.js'; import * as browser from './dist/browser.js'; import * as isomorphic from './dist/isomorphic.js'; console.log('✅ All Bun imports work');" 188 | -------------------------------------------------------------------------------- /src/simple.ts: -------------------------------------------------------------------------------- 1 | import { Communicate } from './communicate'; 2 | 3 | /** 4 | * Options for controlling the voice prosody (rate, pitch, volume). 5 | */ 6 | export interface ProsodyOptions { 7 | /** 8 | * The speaking rate of the voice. 9 | * Examples: "+10.00%", "-20.00%" 10 | */ 11 | rate?: string; 12 | /** 13 | * The speaking volume of the voice. 14 | * Examples: "+15.00%", "-10.00%" 15 | */ 16 | volume?: string; 17 | /** 18 | * The speaking pitch of the voice. 19 | * Examples: "+20Hz", "-10Hz" 20 | */ 21 | pitch?: string; 22 | } 23 | 24 | /** 25 | * Represents a single word boundary with its timing and text. 26 | * The API provides timing in 100-nanosecond units. 27 | */ 28 | export interface WordBoundary { 29 | /** 30 | * The offset from the beginning of the audio stream in 100-nanosecond units. 31 | */ 32 | offset: number; 33 | /** 34 | * The duration of the word in 100-nanosecond units. 35 | */ 36 | duration: number; 37 | /** 38 | * The text of the spoken word. 39 | */ 40 | text: string; 41 | } 42 | 43 | /** 44 | * The final result of the synthesis process. 45 | */ 46 | export interface SynthesisResult { 47 | /** 48 | * The generated audio as a Blob, which can be used in an