├── bunfig.toml
├── .size-limit.json
├── tsconfig.json
├── examples
    ├── listVoices.ts
    ├── streaming.ts
    ├── simple.ts
    ├── webworker-example
    │   ├── worker.ts
    │   └── main.ts
    ├── simple-api.ts
    ├── NAMING_EXAMPLES.md
    ├── simple-vs-advanced.ts
    ├── naming-compatibility.ts
    ├── universal-detection.ts
    ├── universal-api.ts
    ├── isomorphic-example.ts
    └── cdn-example.html
├── .npmignore
├── typedoc.json
├── tests
    ├── deno
    │   ├── isomorphic.deno.ts
    │   └── voices.deno.ts
    ├── voices.test.js
    ├── communicate.test.js
    ├── types.test.js
    ├── isomorphic.test.js
    └── simple-api.test.js
├── schemas
    └── communicate-options.schema.json
├── .github
    └── workflows
    │   ├── sync-proxy-package.yml
    │   ├── test.yml
    │   └── publish.yml
├── .eslintrc.json
├── tsup.config.ts
├── src
    ├── browser-drm.ts
    ├── constants.ts
    ├── browser-entry.ts
    ├── exceptions.ts
    ├── isomorphic-entry.ts
    ├── isomorphic-drm.ts
    ├── drm.ts
    ├── tts_config.ts
    ├── types.ts
    ├── submaker.ts
    ├── voices.ts
    ├── webworker-entry.ts
    ├── index.ts
    ├── runtime-detection.ts
    ├── browser-voices.ts
    ├── browser-utils.ts
    ├── isomorphic-voices.ts
    ├── simple.ts
    ├── isomorphic-utils.ts
    ├── browser-simple.ts
    ├── isomorphic-simple.ts
    ├── utils.ts
    ├── communicate.ts
    └── browser.ts
├── deno.json
├── .gitignore
├── PERFORMANCE_OPTIMIZATIONS.md
├── package.json
└── FEATURES.md


/bunfig.toml:
--------------------------------------------------------------------------------
1 | [test]
2 | # Exclude Deno-only tests when running `bun test` locally.
3 | # CI runs Deno tests with `deno test` (see .github/workflows/test.yml).
4 | coveragePathIgnorePatterns = ["tests/deno/**"]
5 | 


--------------------------------------------------------------------------------
/.size-limit.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "name": "Browser",
 4 |     "path": "dist/browser.js", 
 5 |     "limit": "35 KB"
 6 |   },
 7 |   {
 8 |     "name": "Web Worker",
 9 |     "path": "dist/webworker.js",
10 |     "limit": "40 KB"
11 |   }
12 | ] 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "module": "ES2022",
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "moduleResolution": "node",
 7 |     "allowSyntheticDefaultImports": true,
 8 |     "esModuleInterop": true,
 9 | 
10 |     "forceConsistentCasingInFileNames": true,
11 |     "strict": true,
12 |     "skipLibCheck": true,
13 |     "resolveJsonModule": true,
14 |     "declaration": true,
15 |     "outDir": "./dist"
16 |   },
17 |   "include": ["src"],
18 |   "exclude": ["node_modules", "dist", "examples"]
19 | }
20 | 


--------------------------------------------------------------------------------
/examples/listVoices.ts:
--------------------------------------------------------------------------------
 1 | import { VoicesManager } from "../dist/index.js";
 2 | 
 3 | async function main() {
 4 |   const voicesManager = await VoicesManager.create();
 5 | 
 6 |   // Find all English voices
 7 |   const voices = voicesManager.find({ Language: "en" });
 8 |   console.log("English voices:", voices.map(v => v.ShortName));
 9 | 
10 |   // Find a specific voice
11 |   const femaleUsVoices = voicesManager.find({ Gender: "Female", Locale: "en-US" });
12 |   console.log("Female US voices:", femaleUsVoices.map(v => v.ShortName));
13 | }
14 | 
15 | main().catch(console.error); 


--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
 1 | # Source files - only ship built dist/ files
 2 | src/
 3 | examples/
 4 | schemas/
 5 | 
 6 | # Development files
 7 | tsconfig.json
 8 | tsup.config.ts
 9 | typedoc.json
10 | .gitignore
11 | .eslintrc*
12 | .prettierrc*
13 | 
14 | # Documentation (keep README.md but exclude others)
15 | FEATURES.md
16 | API.md
17 | 
18 | # Test files
19 | test/
20 | tests/
21 | *.test.ts
22 | *.test.js
23 | *.spec.ts
24 | *.spec.js
25 | 
26 | # Development dependencies
27 | node_modules/
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | 
32 | # IDE files
33 | .vscode/
34 | .idea/
35 | *.swp
36 | *.swo
37 | 
38 | # OS files
39 | .DS_Store
40 | Thumbs.db
41 | 
42 | # Build artifacts that aren't needed
43 | *.tsbuildinfo 


--------------------------------------------------------------------------------
/typedoc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "entryPoints": ["src/index.ts"],
 3 |   "out": "docs",
 4 |   "name": "edge-tts-universal",
 5 |   "includeVersion": true,
 6 |   "excludePrivate": true,
 7 |   "excludeProtected": true,
 8 |   "excludeInternal": true,
 9 |   "readme": "README.md",
10 |   "theme": "default",
11 |   "hideGenerator": false,
12 |   "sort": ["source-order"],
13 |   "kindSortOrder": [
14 |     "Document",
15 |     "Project",
16 |     "Module",
17 |     "Namespace",
18 |     "Enum",
19 |     "EnumMember",
20 |     "Class",
21 |     "Interface",
22 |     "TypeAlias",
23 |     "Constructor",
24 |     "Property",
25 |     "Variable",
26 |     "Function",
27 |     "Method",
28 |     "Accessor",
29 |     "Parameter",
30 |     "TypeParameter",
31 |     "Reference"
32 |   ]
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/streaming.ts:
--------------------------------------------------------------------------------
 1 | import { Communicate, SubMaker } from "../dist/index.js";
 2 | 
 3 | const TEXT = "This is a test of the streaming functionality, with subtitles.";
 4 | const VOICE = "en-GB-SoniaNeural";
 5 | 
 6 | async function main() {
 7 |   const communicate = new Communicate(TEXT, { voice: VOICE });
 8 |   const subMaker = new SubMaker();
 9 | 
10 |   for await (const chunk of communicate.stream()) {
11 |     if (chunk.type === "audio" && chunk.data) {
12 |       // Do something with the audio data, e.g., stream it to a client.
13 |       // For this example, we'll just log its size.
14 |       console.log(`Received audio chunk of size: ${chunk.data.length}`);
15 |     } else if (chunk.type === "WordBoundary") {
16 |       subMaker.feed(chunk);
17 |     }
18 |   }
19 | 
20 |   // Get the subtitles in SRT format.
21 |   const srt = subMaker.getSrt();
22 |   console.log("\nGenerated Subtitles (SRT):\n", srt);
23 | }
24 | 
25 | main().catch(console.error); 


--------------------------------------------------------------------------------
/examples/simple.ts:
--------------------------------------------------------------------------------
 1 | import { Communicate } from "../dist/index.js";
 2 | import { promises as fs } from "fs";
 3 | import path from "path";
 4 | import { fileURLToPath } from 'url';
 5 | 
 6 | const __filename = fileURLToPath(import.meta.url);
 7 | const __dirname = path.dirname(__filename);
 8 | 
 9 | const TEXT = "Hello, world! This is a test of the new edge-tts Node.js library.";
10 | const VOICE = "en-US-SteffanNeural";
11 | const OUTPUT_FILE = path.join(__dirname, "test.mp3");
12 | 
13 | async function main() {
14 |   const communicate = new Communicate(TEXT, { voice: VOICE });
15 | 
16 |   // The stream() method returns an async generator that yields audio chunks.
17 |   const audioStream = communicate.stream();
18 | 
19 |   const buffers: Buffer[] = [];
20 |   for await (const chunk of audioStream) {
21 |     if (chunk.type === "audio" && chunk.data) {
22 |       buffers.push(chunk.data);
23 |     }
24 |   }
25 | 
26 |   const finalBuffer = Buffer.concat(buffers);
27 |   await fs.writeFile(OUTPUT_FILE, finalBuffer);
28 | 
29 |   console.log(`Audio saved to ${OUTPUT_FILE}`);
30 | }
31 | 
32 | main().catch(console.error); 


--------------------------------------------------------------------------------
/examples/webworker-example/worker.ts:
--------------------------------------------------------------------------------
 1 | // Web Worker for background TTS processing
 2 | import { EdgeTTS, postAudioMessage } from '../../src/webworker-entry';
 3 | 
 4 | // Listen for messages from main thread
 5 | self.onmessage = async function (e) {
 6 |   const { type, text, voice, options } = e.data;
 7 | 
 8 |   if (type === 'synthesize') {
 9 |     try {
10 |       console.log('Worker: Starting TTS synthesis...');
11 | 
12 |       const tts = new EdgeTTS(text, voice, options);
13 |       const result = await tts.synthesize();
14 | 
15 |       console.log(`Worker: Generated ${result.audio.size} bytes of audio`);
16 | 
17 |       // Post result back to main thread
18 |       postAudioMessage(result.audio, result.subtitle);
19 | 
20 |     } catch (error) {
21 |       console.error('Worker: TTS synthesis failed:', error);
22 | 
23 |       // Post error back to main thread
24 |       self.postMessage({
25 |         type: 'error',
26 |         error: error instanceof Error ? error.message : 'Unknown error'
27 |       });
28 |     }
29 |   }
30 | }
31 | 
32 | // Signal that worker is ready
33 | self.postMessage({
34 |   type: 'ready',
35 |   message: 'TTS Worker ready for synthesis requests'
36 | }); 


--------------------------------------------------------------------------------
/tests/deno/isomorphic.deno.ts:
--------------------------------------------------------------------------------
 1 | import { assertEquals, assert } from "https://deno.land/std@0.208.0/assert/mod.ts";
 2 | import { Communicate } from "../../dist/isomorphic.js";
 3 | 
 4 | Deno.test("Isomorphic API - Communicate can be instantiated", () => {
 5 |   const communicate = new Communicate('Hello, world!', {
 6 |     voice: 'en-US-EmmaMultilingualNeural'
 7 |   });
 8 |   assert(communicate instanceof Communicate, 'Should create Communicate instance');
 9 | });
10 | 
11 | Deno.test("Isomorphic API - Communicate stream method exists", () => {
12 |   const communicate = new Communicate('Test', {
13 |     voice: 'en-US-EmmaMultilingualNeural'
14 |   });
15 |   
16 |   assert(typeof communicate.stream === 'function', 'Should have stream method');
17 |   
18 |   const stream = communicate.stream();
19 |   assert(typeof stream[Symbol.asyncIterator] === 'function', 'Should return async iterable');
20 | });
21 | 
22 | Deno.test("Isomorphic API - Communicate accepts configuration options", () => {
23 |   const communicate = new Communicate('Test text', {
24 |     voice: 'en-US-EmmaMultilingualNeural',
25 |     rate: '+20%',
26 |     volume: '+10%',
27 |     pitch: '+5Hz',
28 |     connectionTimeout: 5000
29 |   });
30 |   
31 |   assert(communicate instanceof Communicate, 'Should create Communicate instance with options');
32 | });
33 | 
34 | 


--------------------------------------------------------------------------------
/schemas/communicate-options.schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "http://json-schema.org/draft-07/schema#",
 3 |   "$id": "https://schemas.travisvn.com/edge-tts-universal/communicate-options.json",
 4 |   "title": "CommunicateOptions",
 5 |   "description": "Configuration options for the Communicate class",
 6 |   "type": "object",
 7 |   "properties": {
 8 |     "voice": {
 9 |       "type": "string",
10 |       "description": "Voice to use for synthesis",
11 |       "default": "en-US-EmmaMultilingualNeural",
12 |       "examples": ["en-US-EmmaMultilingualNeural", "en-GB-SoniaNeural"]
13 |     },
14 |     "rate": {
15 |       "type": "string",
16 |       "description": "Speech rate adjustment",
17 |       "pattern": "^[+-]\\d+%$",
18 |       "default": "+0%",
19 |       "examples": ["+20%", "-10%", "+0%"]
20 |     },
21 |     "volume": {
22 |       "type": "string",
23 |       "description": "Volume level adjustment",
24 |       "pattern": "^[+-]\\d+%$",
25 |       "default": "+0%",
26 |       "examples": ["+50%", "-25%", "+0%"]
27 |     },
28 |     "pitch": {
29 |       "type": "string",
30 |       "description": "Pitch adjustment in Hz",
31 |       "pattern": "^[+-]\\d+Hz$",
32 |       "default": "+0Hz",
33 |       "examples": ["+5Hz", "-10Hz", "+0Hz"]
34 |     },
35 |     "proxy": {
36 |       "type": "string",
37 |       "description": "Proxy URL for requests",
38 |       "format": "uri",
39 |       "examples": ["http://proxy:8080", "https://user:pass@proxy:3128"]
40 |     },
41 |     "connectionTimeout": {
42 |       "type": "number",
43 |       "description": "WebSocket connection timeout in milliseconds",
44 |       "minimum": 0,
45 |       "examples": [10000, 30000]
46 |     }
47 |   },
48 |   "additionalProperties": false
49 | }
50 | 


--------------------------------------------------------------------------------
/tests/voices.test.js:
--------------------------------------------------------------------------------
 1 | import { test, describe } from 'node:test';
 2 | import assert from 'node:assert';
 3 | import { VoicesManager, listVoices } from '../dist/index.js';
 4 | 
 5 | describe('Voice Management', () => {
 6 |   test('listVoices returns array of voices', async () => {
 7 |     const voices = await listVoices();
 8 |     assert(Array.isArray(voices), 'listVoices should return an array');
 9 |     assert(voices.length > 0, 'Should have at least one voice');
10 |     
11 |     // Check voice structure
12 |     const voice = voices[0];
13 |     assert(typeof voice.Name === 'string', 'Voice should have Name property');
14 |     assert(typeof voice.ShortName === 'string', 'Voice should have ShortName property');
15 |     assert(['Female', 'Male'].includes(voice.Gender), 'Voice should have valid Gender');
16 |     assert(typeof voice.Locale === 'string', 'Voice should have Locale property');
17 |   });
18 | 
19 |   test('VoicesManager can filter voices', async () => {
20 |     const voicesManager = await VoicesManager.create();
21 |     
22 |     // Test finding English voices
23 |     const englishVoices = voicesManager.find({ Language: 'en' });
24 |     assert(Array.isArray(englishVoices), 'Should return array');
25 |     assert(englishVoices.length > 0, 'Should find English voices');
26 |     
27 |     for (const voice of englishVoices) {
28 |       assert(voice.Language === 'en', 'All returned voices should be English');
29 |     }
30 |     
31 |     // Test finding female voices
32 |     const femaleVoices = voicesManager.find({ Gender: 'Female' });
33 |     assert(Array.isArray(femaleVoices), 'Should return array');
34 |     
35 |     for (const voice of femaleVoices) {
36 |       assert(voice.Gender === 'Female', 'All returned voices should be Female');
37 |     }
38 |   });
39 | });


--------------------------------------------------------------------------------
/examples/simple-api.ts:
--------------------------------------------------------------------------------
 1 | import { EdgeTTS, createVTT, createSRT } from '../dist/index.js';
 2 | import { promises as fs } from 'fs';
 3 | import path from 'path';
 4 | import { fileURLToPath } from 'url';
 5 | 
 6 | const __filename = fileURLToPath(import.meta.url);
 7 | const __dirname = path.dirname(__filename);
 8 | 
 9 | const TEXT = 'Hello, world! This is a test of the simple edge-tts API.';
10 | const VOICE = 'en-US-EmmaMultilingualNeural';
11 | const OUTPUT_FILE = path.join(__dirname, 'simple-test.mp3');
12 | 
13 | async function main() {
14 |   // Create TTS instance with prosody options
15 |   const tts = new EdgeTTS(TEXT, VOICE, {
16 |     rate: '+10%',
17 |     volume: '+0%',
18 |     pitch: '+0Hz'
19 |   });
20 | 
21 |   try {
22 |     // Synthesize speech (one-shot)
23 |     const result = await tts.synthesize();
24 | 
25 |     // Save audio file
26 |     const audioBuffer = Buffer.from(await result.audio.arrayBuffer());
27 |     await fs.writeFile(OUTPUT_FILE, audioBuffer);
28 | 
29 |     // Generate subtitle files
30 |     const vttContent = createVTT(result.subtitle);
31 |     const srtContent = createSRT(result.subtitle);
32 | 
33 |     await fs.writeFile(path.join(__dirname, 'subtitles.vtt'), vttContent);
34 |     await fs.writeFile(path.join(__dirname, 'subtitles.srt'), srtContent);
35 | 
36 |     console.log(`Audio saved to ${OUTPUT_FILE}`);
37 |     console.log(`Generated ${result.subtitle.length} word boundaries`);
38 |     console.log('VTT preview:', vttContent.substring(0, 200) + '...');
39 |     console.log('SRT preview:', srtContent.substring(0, 200) + '...');
40 |   } catch (error) {
41 |     console.error('Synthesis failed:', error);
42 |   }
43 | }
44 | 
45 | // ESM equivalent of require.main === module
46 | if (import.meta.url === `file://${process.argv[1]}`) {
47 |   main().catch(console.error);
48 | } 


--------------------------------------------------------------------------------
/.github/workflows/sync-proxy-package.yml:
--------------------------------------------------------------------------------
 1 | # Add this file to your edge-tts-universal repo at:
 2 | # .github/workflows/sync-proxy-package.yml
 3 | 
 4 | name: Sync Proxy Package Version
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [published]
 9 |   workflow_dispatch:
10 |     inputs:
11 |       version:
12 |         description: 'Version to sync to proxy (e.g., 1.2.3)'
13 |         required: true
14 |         type: string
15 | 
16 | jobs:
17 |   trigger-proxy-sync:
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |       - name: Extract version
22 |         id: version
23 |         run: |
24 |           if [ "${{ github.event_name }}" = "release" ]; then
25 |             # Extract version from release tag (v1.2.3 -> 1.2.3)
26 |             VERSION="${{ github.event.release.tag_name }}"
27 |             VERSION=${VERSION#v}  # Remove 'v' prefix if present
28 |           else
29 |             VERSION="${{ github.event.inputs.version }}"
30 |           fi
31 | 
32 |           echo "version=$VERSION" >> $GITHUB_OUTPUT
33 |           echo "Syncing version: $VERSION"
34 | 
35 |       - name: Trigger proxy repo sync
36 |         uses: peter-evans/repository-dispatch@v3
37 |         with:
38 |           token: ${{ secrets.PROXY_REPO_TOKEN }}
39 |           repository: travisvn/universal-edge-tts
40 |           event-type: sync-version
41 |           client-payload: |
42 |             {
43 |               "version": "${{ steps.version.outputs.version }}",
44 |               "main_version": "${{ steps.version.outputs.version }}",
45 |               "triggered_by": "${{ github.repository }}",
46 |               "trigger_event": "${{ github.event_name }}"
47 |             }
48 | 
49 |       - name: Log trigger
50 |         run: |
51 |           echo "✅ Triggered proxy repo sync for version ${{ steps.version.outputs.version }}"
52 |           echo "📦 Repository: travisvn/universal-edge-tts"
53 |           echo "🔄 Event: sync-version"
54 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "env": {
 4 |     "es2022": true,
 5 |     "node": true
 6 |   },
 7 |   "extends": ["eslint:recommended", "plugin:@typescript-eslint/recommended"],
 8 |   "parser": "@typescript-eslint/parser",
 9 |   "parserOptions": {
10 |     "ecmaVersion": 2022,
11 |     "sourceType": "module"
12 |   },
13 |   "plugins": ["@typescript-eslint"],
14 |   "rules": {
15 |     "@typescript-eslint/no-unused-vars": "off",
16 |     "@typescript-eslint/no-explicit-any": "off",
17 |     "@typescript-eslint/explicit-function-return-type": "off",
18 |     "no-case-declarations": "off",
19 |     "prefer-const": "error",
20 |     "no-var": "error"
21 |   },
22 |   "overrides": [
23 |     {
24 |       "files": ["src/browser*.ts", "src/*browser*"],
25 |       "env": {
26 |         "browser": true,
27 |         "node": false
28 |       },
29 |       "globals": {
30 |         "WebSocket": "readonly",
31 |         "Blob": "readonly",
32 |         "URL": "readonly",
33 |         "TextEncoder": "readonly",
34 |         "crypto": "readonly"
35 |       }
36 |     },
37 |     {
38 |       "files": ["src/webworker*.ts", "src/*worker*"],
39 |       "env": {
40 |         "worker": true,
41 |         "node": false,
42 |         "browser": false
43 |       },
44 |       "globals": {
45 |         "importScripts": "readonly",
46 |         "WorkerGlobalScope": "readonly",
47 |         "postMessage": "readonly",
48 |         "self": "readonly"
49 |       }
50 |     },
51 |     {
52 |       "files": ["src/isomorphic*.ts", "src/*isomorphic*"],
53 |       "env": {
54 |         "node": true,
55 |         "browser": true
56 |       },
57 |       "rules": {
58 |         "@typescript-eslint/no-explicit-any": "off"
59 |       }
60 |     },
61 |     {
62 |       "files": ["examples/**/*.ts"],
63 |       "env": {
64 |         "node": true,
65 |         "browser": true
66 |       },
67 |       "rules": {
68 |         "@typescript-eslint/no-explicit-any": "off",
69 |         "no-console": "off"
70 |       }
71 |     }
72 |   ]
73 | }
74 | 


--------------------------------------------------------------------------------
/tsup.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'tsup'
 2 | 
 3 | export default defineConfig([
 4 |   // Node.js builds only
 5 |   {
 6 |     entry: {
 7 |       index: 'src/index.ts',
 8 |       'runtime-detection': 'src/runtime-detection.ts',
 9 |     },
10 |     format: ['cjs', 'esm'],
11 |     dts: true,
12 |     sourcemap: true,
13 |     clean: true,
14 |     splitting: false,
15 |     treeshake: true,
16 |     minify: false,
17 |     target: 'es2020',
18 |     outDir: 'dist',
19 |     platform: 'node',
20 |     external: [
21 |       'axios',
22 |       'https-proxy-agent',
23 |       'uuid',
24 |       'ws',
25 |       'xml-escape',
26 |       'isomorphic-ws',
27 |       'cross-fetch',
28 |       'buffer'
29 |     ]
30 |   },
31 |   // Universal/Isomorphic builds - truly universal with all dependencies bundled
32 |   {
33 |     entry: {
34 |       isomorphic: 'src/isomorphic-entry.ts',
35 |     },
36 |     format: ['cjs', 'esm'],
37 |     dts: true,
38 |     sourcemap: true,
39 |     clean: false,
40 |     splitting: false,
41 |     treeshake: true,
42 |     minify: false,
43 |     target: 'es2020',
44 |     outDir: 'dist',
45 |     platform: 'neutral', // Neither node nor browser - truly universal
46 |     external: [
47 |       // Bundle everything for universal compatibility
48 |     ],
49 |     define: {
50 |       'process.env.NODE_ENV': '"production"'
51 |     }
52 |   },
53 |   // Browser builds (separate config to avoid conflicts)
54 |   {
55 |     entry: {
56 |       browser: 'src/browser-entry.ts',
57 |       webworker: 'src/webworker-entry.ts',
58 |     },
59 |     format: ['cjs', 'esm'],
60 |     dts: true,
61 |     sourcemap: true,
62 |     clean: false, // Don't clean since previous builds already ran
63 |     splitting: false,
64 |     treeshake: true,
65 |     minify: false,
66 |     target: 'es2020',
67 |     outDir: 'dist',
68 |     platform: 'browser',
69 |     external: [
70 |       // No external dependencies for browser builds - all bundled with browser-native implementations
71 |     ]
72 |   }
73 | ]) 


--------------------------------------------------------------------------------
/tests/communicate.test.js:
--------------------------------------------------------------------------------
 1 | import { test, describe } from 'node:test';
 2 | import assert from 'node:assert';
 3 | import { Communicate, SubMaker } from '../dist/index.js';
 4 | 
 5 | describe('Streaming API', () => {
 6 |   test('Communicate can be instantiated', () => {
 7 |     const communicate = new Communicate('Hello, world!', {
 8 |       voice: 'en-US-EmmaMultilingualNeural'
 9 |     });
10 |     assert(communicate instanceof Communicate, 'Should create Communicate instance');
11 |   });
12 | 
13 |   test('Communicate accepts configuration options', () => {
14 |     const communicate = new Communicate('Test text', {
15 |       voice: 'en-US-EmmaMultilingualNeural',
16 |       rate: '+20%',
17 |       volume: '+10%',
18 |       pitch: '+5Hz',
19 |       connectionTimeout: 5000
20 |     });
21 |     
22 |     assert(communicate instanceof Communicate, 'Should create Communicate instance with options');
23 |   });
24 | 
25 |   test('SubMaker can process word boundary events', () => {
26 |     const subMaker = new SubMaker();
27 |     
28 |     // Mock word boundary event
29 |     const wordBoundary = {
30 |       type: 'WordBoundary',
31 |       offset: 0,
32 |       duration: 1000000,
33 |       text: 'Hello'
34 |     };
35 |     
36 |     subMaker.feed(wordBoundary);
37 |     const srt = subMaker.getSrt();
38 |     
39 |     assert(typeof srt === 'string', 'SubMaker should return SRT string');
40 |     assert(srt.includes('Hello'), 'SRT should contain the word');
41 |   });
42 | 
43 |   test('Communicate stream method exists and is async iterable', async () => {
44 |     const communicate = new Communicate('Test', {
45 |       voice: 'en-US-EmmaMultilingualNeural'
46 |     });
47 |     
48 |     // Check that stream method exists
49 |     assert(typeof communicate.stream === 'function', 'Should have stream method');
50 |     
51 |     // Check that it returns an async iterable
52 |     const stream = communicate.stream();
53 |     assert(typeof stream[Symbol.asyncIterator] === 'function', 'Should return async iterable');
54 |   });
55 | });


--------------------------------------------------------------------------------
/tests/deno/voices.deno.ts:
--------------------------------------------------------------------------------
 1 | import { assertEquals, assert } from "https://deno.land/std@0.208.0/assert/mod.ts";
 2 | import { VoicesManager, listVoices } from "../../dist/isomorphic.js";
 3 | 
 4 | Deno.test("Voice Management - listVoices returns array of voices", async () => {
 5 |   try {
 6 |     const voices = await listVoices();
 7 |     assert(Array.isArray(voices), 'listVoices should return an array');
 8 |     assert(voices.length > 0, 'Should have at least one voice');
 9 |     
10 |     // Check voice structure
11 |     const voice = voices[0];
12 |     assert(typeof voice.Name === 'string', 'Voice should have Name property');
13 |     assert(typeof voice.ShortName === 'string', 'Voice should have ShortName property');
14 |     assert(['Female', 'Male'].includes(voice.Gender), 'Voice should have valid Gender');
15 |     assert(typeof voice.Locale === 'string', 'Voice should have Locale property');
16 |   } catch (error) {
17 |     // If network/service is unavailable, just check that error is reasonable
18 |     assert(error instanceof Error, 'Should throw proper Error if service unavailable');
19 |   }
20 | });
21 | 
22 | Deno.test("Voice Management - VoicesManager can filter voices", async () => {
23 |   try {
24 |     const voicesManager = await VoicesManager.create();
25 |     
26 |     // Test finding English voices
27 |     const englishVoices = voicesManager.find({ Language: 'en' });
28 |     assert(Array.isArray(englishVoices), 'Should return array');
29 |     assert(englishVoices.length > 0, 'Should find English voices');
30 |     
31 |     for (const voice of englishVoices) {
32 |       assertEquals(voice.Language, 'en', 'All returned voices should be English');
33 |     }
34 |     
35 |     // Test finding female voices
36 |     const femaleVoices = voicesManager.find({ Gender: 'Female' });
37 |     assert(Array.isArray(femaleVoices), 'Should return array');
38 |     
39 |     for (const voice of femaleVoices) {
40 |       assertEquals(voice.Gender, 'Female', 'All returned voices should be Female');
41 |     }
42 |   } catch (error) {
43 |     // If network/service is unavailable, just check that error is reasonable
44 |     assert(error instanceof Error, 'Should throw proper Error if service unavailable');
45 |   }
46 | });
47 | 
48 | 


--------------------------------------------------------------------------------
/tests/types.test.js:
--------------------------------------------------------------------------------
 1 | import { test, describe } from 'node:test';
 2 | import assert from 'node:assert';
 3 | import { 
 4 |   NoAudioReceived, 
 5 |   UnexpectedResponse, 
 6 |   UnknownResponse, 
 7 |   WebSocketError 
 8 | } from '../dist/index.js';
 9 | 
10 | describe('Types and Exceptions', () => {
11 |   test('Exception classes can be instantiated', () => {
12 |     const noAudio = new NoAudioReceived('No audio received');
13 |     const unexpected = new UnexpectedResponse('Unexpected response');
14 |     const unknown = new UnknownResponse('Unknown response');
15 |     const wsError = new WebSocketError('WebSocket error');
16 |     
17 |     assert(noAudio instanceof Error, 'NoAudioReceived should extend Error');
18 |     assert(unexpected instanceof Error, 'UnexpectedResponse should extend Error');
19 |     assert(unknown instanceof Error, 'UnknownResponse should extend Error');
20 |     assert(wsError instanceof Error, 'WebSocketError should extend Error');
21 |     
22 |     assert(noAudio instanceof NoAudioReceived, 'Should be instance of NoAudioReceived');
23 |     assert(unexpected instanceof UnexpectedResponse, 'Should be instance of UnexpectedResponse');
24 |     assert(unknown instanceof UnknownResponse, 'Should be instance of UnknownResponse');
25 |     assert(wsError instanceof WebSocketError, 'Should be instance of WebSocketError');
26 |   });
27 | 
28 |   test('Exception classes have proper names', () => {
29 |     const noAudio = new NoAudioReceived('test');
30 |     const unexpected = new UnexpectedResponse('test');
31 |     const unknown = new UnknownResponse('test');
32 |     const wsError = new WebSocketError('test');
33 |     
34 |     assert(noAudio.name === 'NoAudioReceived', 'Should have correct name');
35 |     assert(unexpected.name === 'UnexpectedResponse', 'Should have correct name');
36 |     assert(unknown.name === 'UnknownResponse', 'Should have correct name');
37 |     assert(wsError.name === 'WebSocketError', 'Should have correct name');
38 |   });
39 | 
40 |   test('Exception classes preserve error messages', () => {
41 |     const message = 'Test error message';
42 |     const error = new NoAudioReceived(message);
43 |     
44 |     assert(error.message === message, 'Should preserve error message');
45 |   });
46 | });


--------------------------------------------------------------------------------
/src/browser-drm.ts:
--------------------------------------------------------------------------------
 1 | import { TRUSTED_CLIENT_TOKEN } from './constants';
 2 | import { SkewAdjustmentError } from "./exceptions";
 3 | 
 4 | const WIN_EPOCH = 11644473600;
 5 | const S_TO_NS = 1e9;
 6 | 
 7 | /**
 8 |  * Browser-specific DRM class that uses only Web APIs.
 9 |  * Uses the Web Crypto API instead of Node.js crypto module.
10 |  */
11 | export class BrowserDRM {
12 |   private static clockSkewSeconds = 0.0;
13 | 
14 |   static adjClockSkewSeconds(skewSeconds: number) {
15 |     BrowserDRM.clockSkewSeconds += skewSeconds;
16 |   }
17 | 
18 |   static getUnixTimestamp(): number {
19 |     return Date.now() / 1000 + BrowserDRM.clockSkewSeconds;
20 |   }
21 | 
22 |   static parseRfc2616Date(date: string): number | null {
23 |     try {
24 |       return new Date(date).getTime() / 1000;
25 |     } catch (e) {
26 |       return null;
27 |     }
28 |   }
29 | 
30 |   static handleClientResponseError(response: { status: number; headers: Record<string, string> }) {
31 |     if (!response.headers) {
32 |       throw new SkewAdjustmentError("No headers in response.");
33 |     }
34 |     const serverDate = response.headers["date"] || response.headers["Date"];
35 |     if (!serverDate) {
36 |       throw new SkewAdjustmentError("No server date in headers.");
37 |     }
38 |     const serverDateParsed = BrowserDRM.parseRfc2616Date(serverDate);
39 |     if (serverDateParsed === null) {
40 |       throw new SkewAdjustmentError(`Failed to parse server date: ${serverDate}`);
41 |     }
42 |     const clientDate = BrowserDRM.getUnixTimestamp();
43 |     BrowserDRM.adjClockSkewSeconds(serverDateParsed - clientDate);
44 |   }
45 | 
46 |   static async generateSecMsGec(): Promise<string> {
47 |     let ticks = BrowserDRM.getUnixTimestamp();
48 |     ticks += WIN_EPOCH;
49 |     ticks -= ticks % 300;
50 |     ticks *= S_TO_NS / 100;
51 | 
52 |     const strToHash = `${ticks.toFixed(0)}${TRUSTED_CLIENT_TOKEN}`;
53 | 
54 |     // Use Web Crypto API - guaranteed to be available in browsers
55 |     const encoder = new TextEncoder();
56 |     const data = encoder.encode(strToHash);
57 |     const hashBuffer = await crypto.subtle.digest('SHA-256', data);
58 |     const hashArray = Array.from(new Uint8Array(hashBuffer));
59 |     return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').toUpperCase();
60 |   }
61 | } 


--------------------------------------------------------------------------------
/src/constants.ts:
--------------------------------------------------------------------------------
 1 | /** Base URL for Microsoft Edge TTS service endpoints */
 2 | export const BASE_URL = "speech.platform.bing.com/consumer/speech/synthesize/readaloud";
 3 | 
 4 | /** Trusted client token used for authentication with the TTS service */
 5 | export const TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
 6 | 
 7 | /** WebSocket URL for TTS streaming synthesis */
 8 | export const WSS_URL = `wss://${BASE_URL}/edge/v1?TrustedClientToken=${TRUSTED_CLIENT_TOKEN}`;
 9 | 
10 | /** HTTP URL for fetching available voices list */
11 | export const VOICE_LIST_URL = `https://${BASE_URL}/voices/list?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`;
12 | 
13 | /** Default voice to use when none is specified */
14 | export const DEFAULT_VOICE = "en-US-EmmaMultilingualNeural";
15 | 
16 | /** Version string for Chromium browser emulation */
17 | export const CHROMIUM_FULL_VERSION = "130.0.2849.68";
18 | 
19 | /** Major version number extracted from the full Chromium version */
20 | export const CHROMIUM_MAJOR_VERSION = CHROMIUM_FULL_VERSION.split(".")[0];
21 | 
22 | /** Security token version for API authentication */
23 | export const SEC_MS_GEC_VERSION = `1-${CHROMIUM_FULL_VERSION}`;
24 | 
25 | /** Base HTTP headers for API requests, mimicking a real browser */
26 | export const BASE_HEADERS = {
27 |   "User-Agent": `Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${CHROMIUM_MAJOR_VERSION}.0.0.0 Safari/537.36 Edg/${CHROMIUM_MAJOR_VERSION}.0.0.0`,
28 |   "Accept-Encoding": "gzip, deflate, br",
29 |   "Accept-Language": "en-US,en;q=0.9",
30 | };
31 | 
32 | /** HTTP headers specific to WebSocket connection requests */
33 | export const WSS_HEADERS = {
34 |   ...BASE_HEADERS,
35 |   "Pragma": "no-cache",
36 |   "Cache-Control": "no-cache",
37 |   "Origin": "chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold",
38 | };
39 | 
40 | /** HTTP headers specific to voice list API requests */
41 | export const VOICE_HEADERS = {
42 |   ...BASE_HEADERS,
43 |   "Authority": "speech.platform.bing.com",
44 |   "Sec-CH-UA": `" Not;A Brand";v="99", "Microsoft Edge";v="${CHROMIUM_MAJOR_VERSION}", "Chromium";v="${CHROMIUM_MAJOR_VERSION}"`,
45 |   "Sec-CH-UA-Mobile": "?0",
46 |   "Accept": "*/*",
47 |   "Sec-Fetch-Site": "none",
48 |   "Sec-Fetch-Mode": "cors",
49 |   "Sec-Fetch-Dest": "empty",
50 | }; 


--------------------------------------------------------------------------------
/deno.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@edge-tts/universal",
 3 |   "version": "1.3.3",
 4 |   "description": "Universal text-to-speech library using Microsoft Edge's online TTS service. Works in Node.js and browsers WITHOUT needing Microsoft Edge, Windows, or an API key",
 5 |   "license": "AGPL-3.0",
 6 |   "author": "Travis <contact@travis.engineer>",
 7 |   "homepage": "https://github.com/travisvn/edge-tts-universal",
 8 |   "repository": {
 9 |     "type": "git",
10 |     "url": "git+https://github.com/travisvn/edge-tts-universal.git"
11 |   },
12 |   "exports": {
13 |     ".": "./src/isomorphic-entry.ts",
14 |     "./browser": "./src/browser-entry.ts",
15 |     "./isomorphic": "./src/isomorphic-entry.ts",
16 |     "./webworker": "./src/webworker-entry.ts"
17 |   },
18 |   "imports": {
19 |     "crypto": "node:crypto",
20 |     "uuid": "npm:uuid@^11.1.0",
21 |     "axios": "npm:axios@^1.12.1",
22 |     "cross-fetch": "npm:cross-fetch@^4.1.0",
23 |     "https-proxy-agent": "npm:https-proxy-agent@^7.0.6",
24 |     "isomorphic-ws": "npm:isomorphic-ws@^5.0.0",
25 |     "ws": "npm:ws@^8.18.3",
26 |     "xml-escape": "npm:xml-escape@^1.1.0"
27 |   },
28 |   "tasks": {
29 |     "dev": "deno run --allow-net --allow-read examples/isomorphic-example.ts",
30 |     "test": "deno test --allow-net --allow-env --no-check --no-lock tests/deno/*.ts",
31 |     "example:simple": "deno run --allow-net examples/simple-api.ts",
32 |     "example:streaming": "deno run --allow-net examples/streaming.ts",
33 |     "example:voices": "deno run --allow-net examples/listVoices.ts",
34 |     "example:universal": "deno run --allow-net examples/universal-detection.ts",
35 |     "example:isomorphic": "deno run --allow-net examples/isomorphic-example.ts"
36 |   },
37 |   "compilerOptions": {
38 |     "lib": ["deno.window", "deno.worker"],
39 |     "strict": true
40 |   },
41 |   "unstable": ["sloppy-imports"],
42 |   "fmt": {
43 |     "files": {
44 |       "include": ["src/", "examples/"],
45 |       "exclude": ["dist/", "node_modules/"]
46 |     }
47 |   },
48 |   "lint": {
49 |     "files": {
50 |       "include": ["src/", "examples/"],
51 |       "exclude": ["dist/", "node_modules/"]
52 |     }
53 |   },
54 |   "publish": {
55 |     "exclude": [
56 |       "node_modules/",
57 |       "dist/",
58 |       "examples/",
59 |       "docs/",
60 |       ".git/",
61 |       "*.log"
62 |     ]
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/tests/isomorphic.test.js:
--------------------------------------------------------------------------------
 1 | import { test, describe } from 'node:test';
 2 | import assert from 'node:assert';
 3 | import { IsomorphicCommunicate, IsomorphicVoicesManager, listVoicesIsomorphic } from '../dist/index.js';
 4 | 
 5 | describe('Isomorphic API', () => {
 6 |   test('IsomorphicCommunicate can be instantiated', () => {
 7 |     const communicate = new IsomorphicCommunicate('Hello, world!', {
 8 |       voice: 'en-US-EmmaMultilingualNeural'
 9 |     });
10 |     assert(communicate instanceof IsomorphicCommunicate, 'Should create IsomorphicCommunicate instance');
11 |   });
12 | 
13 |   test('listVoicesIsomorphic returns array of voices', async () => {
14 |     try {
15 |       const voices = await listVoicesIsomorphic();
16 |       assert(Array.isArray(voices), 'Should return array');
17 |       assert(voices.length > 0, 'Should have voices');
18 |       
19 |       const voice = voices[0];
20 |       assert(typeof voice.Name === 'string', 'Voice should have Name');
21 |       assert(typeof voice.ShortName === 'string', 'Voice should have ShortName');
22 |     } catch (error) {
23 |       // If network/service is unavailable, just check error type
24 |       assert(error instanceof Error, 'Should throw proper Error if service unavailable');
25 |     }
26 |   });
27 | 
28 |   test('IsomorphicVoicesManager can filter voices', async () => {
29 |     try {
30 |       const voicesManager = await IsomorphicVoicesManager.create();
31 |       
32 |       const englishVoices = voicesManager.find({ Language: 'en' });
33 |       assert(Array.isArray(englishVoices), 'Should return array');
34 |       
35 |       for (const voice of englishVoices) {
36 |         assert(voice.Language === 'en', 'All voices should be English');
37 |       }
38 |     } catch (error) {
39 |       // If network/service is unavailable, just check error type
40 |       assert(error instanceof Error, 'Should throw proper Error if service unavailable');
41 |     }
42 |   });
43 | 
44 |   test('IsomorphicCommunicate stream method exists', () => {
45 |     const communicate = new IsomorphicCommunicate('Test', {
46 |       voice: 'en-US-EmmaMultilingualNeural'
47 |     });
48 |     
49 |     assert(typeof communicate.stream === 'function', 'Should have stream method');
50 |     
51 |     const stream = communicate.stream();
52 |     assert(typeof stream[Symbol.asyncIterator] === 'function', 'Should return async iterable');
53 |   });
54 | });


--------------------------------------------------------------------------------
/src/browser-entry.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Browser-specific entry point for edge-tts-universal.
 3 |  * 
 4 |  * This module exports APIs optimized specifically for browser environments, avoiding Node.js
 5 |  * dependencies and providing browser-native implementations where possible.
 6 |  * 
 7 |  * Key features:
 8 |  * - Browser-optimized implementations
 9 |  * - No Node.js dependencies
10 |  * - Web API compatibility
11 |  * - Smaller bundle size compared to isomorphic entry
12 |  * - Support for Web Workers and main thread
13 |  * 
14 |  * Note: This entry point is subject to browser CORS policies when making requests
15 |  * to the Microsoft Edge TTS service. Consider using a proxy server for production
16 |  * applications if CORS becomes an issue.
17 |  * 
18 |  * @example
19 |  * ```typescript
20 |  * import { EdgeTTS, listVoices } from '@edge-tts/universal/browser';
21 |  * 
22 |  * // Browser-optimized TTS
23 |  * const tts = new EdgeTTS('Hello from the browser!', 'en-US-EmmaMultilingualNeural');
24 |  * const result = await tts.synthesize();
25 |  * 
26 |  * // Play the audio
27 |  * const audio = new Audio(URL.createObjectURL(result.audio));
28 |  * audio.play();
29 |  * ```
30 |  * 
31 |  * @module BrowserEntry
32 |  */
33 | 
34 | // Browser-only entry point - exports only browser-compatible APIs
35 | // Use this in environments where Node.js dependencies are not available
36 | 
37 | // Export both the old EdgeTTSBrowser class and the new simplified browser API
38 | export { EdgeTTSBrowser } from './browser';
39 | 
40 | // Export the new simplified browser-specific API as the main EdgeTTS
41 | export {
42 |   BrowserEdgeTTS as EdgeTTS,
43 |   ProsodyOptions,
44 |   WordBoundary,
45 |   SynthesisResult,
46 |   createVTT,
47 |   createSRT
48 | } from './browser-simple';
49 | 
50 | // Export browser-specific implementations to avoid Node.js dependencies
51 | export {
52 |   BrowserCommunicate as Communicate,
53 |   BrowserCommunicateOptions as CommunicateOptions
54 | } from './browser-communicate';
55 | 
56 | export {
57 |   BrowserVoicesManager as VoicesManager,
58 |   listVoices,
59 |   BrowserFetchError as FetchError
60 | } from './browser-voices';
61 | 
62 | export { BrowserDRM as DRM } from './browser-drm';
63 | 
64 | // SubMaker works everywhere as it doesn't have environment dependencies
65 | export { SubMaker } from './submaker';
66 | 
67 | // Common types and exceptions
68 | export * from './exceptions';
69 | export * from './types'; 


--------------------------------------------------------------------------------
/src/exceptions.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Base exception class for all Edge TTS related errors.
 3 |  */
 4 | export class EdgeTTSException extends Error {
 5 |   constructor(message: string) {
 6 |     super(message);
 7 |     this.name = "EdgeTTSException";
 8 |   }
 9 | }
10 | 
11 | /**
12 |  * Exception raised when there's an error adjusting clock skew for API requests.
13 |  * This typically occurs when the client and server clocks are significantly out of sync.
14 |  */
15 | export class SkewAdjustmentError extends EdgeTTSException {
16 |   constructor(message: string) {
17 |     super(message);
18 |     this.name = "SkewAdjustmentError";
19 |   }
20 | }
21 | 
22 | /**
23 |  * Exception raised when an unknown response is received from the TTS service.
24 |  * This indicates an unexpected message type or format that the client cannot handle.
25 |  */
26 | export class UnknownResponse extends EdgeTTSException {
27 |   constructor(message: string) {
28 |     super(message);
29 |     this.name = "UnknownResponse";
30 |   }
31 | }
32 | 
33 | /**
34 |  * Exception raised when an unexpected response is received from the TTS service.
35 |  * This indicates a response that doesn't match the expected protocol flow.
36 |  */
37 | export class UnexpectedResponse extends EdgeTTSException {
38 |   constructor(message: string) {
39 |     super(message);
40 |     this.name = "UnexpectedResponse";
41 |   }
42 | }
43 | 
44 | /**
45 |  * Exception raised when no audio data is received during synthesis.
46 |  * This typically indicates a problem with the synthesis request or service.
47 |  */
48 | export class NoAudioReceived extends EdgeTTSException {
49 |   constructor(message: string) {
50 |     super(message);
51 |     this.name = "NoAudioReceived";
52 |   }
53 | }
54 | 
55 | /**
56 |  * Exception raised when there's an error with the WebSocket connection.
57 |  * This can occur during connection establishment, data transmission, or connection closure.
58 |  */
59 | export class WebSocketError extends EdgeTTSException {
60 |   constructor(message: string) {
61 |     super(message);
62 |     this.name = "WebSocketError";
63 |   }
64 | }
65 | 
66 | /**
67 |  * Exception raised when an invalid value is provided to a function or method.
68 |  * This is typically used for input validation errors.
69 |  */
70 | export class ValueError extends EdgeTTSException {
71 |   constructor(message: string) {
72 |     super(message);
73 |     this.name = "ValueError";
74 |   }
75 | } 


--------------------------------------------------------------------------------
/examples/NAMING_EXAMPLES.md:
--------------------------------------------------------------------------------
 1 | # API Naming Examples
 2 | 
 3 | This library supports both "Isomorphic" and "Universal" naming conventions for cross-platform APIs. **Universal is the preferred naming**.
 4 | 
 5 | ## Preferred Universal Naming
 6 | 
 7 | ```typescript
 8 | // ✅ Preferred: Universal naming (clear and descriptive)
 9 | import { 
10 |   UniversalEdgeTTS,
11 |   UniversalCommunicate, 
12 |   UniversalVoicesManager,
13 |   listVoicesUniversal 
14 | } from 'edge-tts-universal';
15 | 
16 | // Simple API
17 | const tts = new UniversalEdgeTTS('Hello world', 'en-US-EmmaMultilingualNeural');
18 | const result = await tts.synthesize();
19 | 
20 | // Streaming API  
21 | const communicate = new UniversalCommunicate('Hello world');
22 | for await (const chunk of communicate.stream()) {
23 |   if (chunk.type === 'audio') {
24 |     // Handle audio data
25 |   }
26 | }
27 | 
28 | // Voice management
29 | const voices = await listVoicesUniversal();
30 | const voicesManager = await UniversalVoicesManager.create();
31 | ```
32 | 
33 | ## Legacy Isomorphic Naming
34 | 
35 | ```typescript  
36 | // ⚠️ Legacy: Still supported but not recommended for new code
37 | import { 
38 |   IsomorphicEdgeTTS,
39 |   IsomorphicCommunicate,
40 |   IsomorphicVoicesManager, 
41 |   listVoicesIsomorphic
42 | } from 'edge-tts-universal';
43 | 
44 | // Same functionality, different naming
45 | const tts = new IsomorphicEdgeTTS('Hello world', 'en-US-EmmaMultilingualNeural');
46 | const communicate = new IsomorphicCommunicate('Hello world');
47 | const voices = await listVoicesIsomorphic();
48 | ```
49 | 
50 | ## Platform-Specific APIs
51 | 
52 | For platform-optimized code, you can still use platform-specific exports:
53 | 
54 | ```typescript
55 | // Node.js optimized (with proxy support, etc.)
56 | import { EdgeTTS, Communicate, VoicesManager } from 'edge-tts-universal';
57 | 
58 | // Browser optimized (smaller bundle size) 
59 | import { EdgeTTSBrowser, BrowserCommunicate } from 'edge-tts-universal/browser';
60 | ```
61 | 
62 | ## Migration Guide
63 | 
64 | If you're currently using Isomorphic naming, you can easily migrate:
65 | 
66 | ```typescript
67 | // Old code
68 | import { IsomorphicCommunicate } from 'edge-tts-universal';
69 | 
70 | // New code (just change the import)  
71 | import { UniversalCommunicate as Communicate } from 'edge-tts-universal';
72 | // OR
73 | import { UniversalCommunicate } from 'edge-tts-universal';
74 | ```
75 | 
76 | Both naming conventions will continue to work, but new projects should prefer the "Universal" naming for clarity.


--------------------------------------------------------------------------------
/examples/simple-vs-advanced.ts:
--------------------------------------------------------------------------------
 1 | import { EdgeTTS, createVTT, createSRT, Communicate } from '../dist/index.js';
 2 | 
 3 | async function simpleApiExample() {
 4 |   console.log('=== Simple API Example (like code 54.ts) ===');
 5 | 
 6 |   // Simple one-shot synthesis
 7 |   const tts = new EdgeTTS(
 8 |     'Hello, this is a simple text-to-speech example!',
 9 |     'Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)',
10 |     {
11 |       rate: '+10%',
12 |       volume: '+0%',
13 |       pitch: '+0Hz'
14 |     }
15 |   );
16 | 
17 |   try {
18 |     const result = await tts.synthesize();
19 | 
20 |     console.log('Audio generated:', result.audio.size, 'bytes');
21 |     console.log('Word boundaries:', result.subtitle.length);
22 | 
23 |     // Generate subtitles
24 |     const vttSubtitles = createVTT(result.subtitle);
25 |     const srtSubtitles = createSRT(result.subtitle);
26 | 
27 |     console.log('VTT Subtitles:\n', vttSubtitles.substring(0, 200) + '...');
28 |     console.log('SRT Subtitles:\n', srtSubtitles.substring(0, 200) + '...');
29 | 
30 |   } catch (error) {
31 |     console.error('Simple API Error:', error);
32 |   }
33 | }
34 | 
35 | async function advancedApiExample() {
36 |   console.log('\n=== Advanced Streaming API Example ===');
37 | 
38 |   // Advanced streaming synthesis with real-time processing
39 |   const communicate = new Communicate(
40 |     'This is an advanced streaming example with real-time processing capabilities.',
41 |     {
42 |       voice: 'en-US-EmmaMultilingualNeural',
43 |       rate: '+10%',
44 |       volume: '+0%',
45 |       pitch: '+0Hz'
46 |     }
47 |   );
48 | 
49 |   try {
50 |     const audioChunks: Buffer[] = [];
51 |     let wordCount = 0;
52 | 
53 |     for await (const chunk of communicate.stream()) {
54 |       if (chunk.type === 'audio' && chunk.data) {
55 |         audioChunks.push(chunk.data);
56 |         console.log(`Received audio chunk: ${chunk.data.length} bytes`);
57 |       } else if (chunk.type === 'WordBoundary') {
58 |         wordCount++;
59 |         console.log(`Word ${wordCount}: "${chunk.text}" at ${chunk.offset}ns`);
60 |       }
61 |     }
62 | 
63 |     const totalAudioSize = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
64 |     console.log(`Total audio: ${totalAudioSize} bytes, Words: ${wordCount}`);
65 | 
66 |   } catch (error) {
67 |     console.error('Advanced API Error:', error);
68 |   }
69 | }
70 | 
71 | // Run both examples
72 | async function main() {
73 |   await simpleApiExample();
74 |   await advancedApiExample();
75 | }
76 | 
77 | if (require.main === module) {
78 |   main().catch(console.error);
79 | } 


--------------------------------------------------------------------------------
/src/isomorphic-entry.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Isomorphic/Universal entry point for edge-tts-universal.
 3 |  * 
 4 |  * This module exports APIs that work consistently across both Node.js and browser environments,
 5 |  * providing maximum compatibility for text-to-speech functionality using Microsoft Edge's TTS service.
 6 |  * 
 7 |  * Key features:
 8 |  * - Universal compatibility (Node.js, browsers, web workers)
 9 |  * - No platform-specific dependencies in the API surface
10 |  * - Consistent behavior across environments
11 |  * - Built-in proxy support for Node.js
12 |  * - CORS-aware browser implementation
13 |  * 
14 |  * @example
15 |  * ```typescript
16 |  * import { EdgeTTS, listVoices } from '@edge-tts/universal';
17 |  * 
18 |  * // Works in both Node.js and browsers
19 |  * const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural');
20 |  * const result = await tts.synthesize();
21 |  * 
22 |  * // Get available voices
23 |  * const voices = await listVoices();
24 |  * ```
25 |  * 
26 |  * @module IsomorphicEntry
27 |  */
28 | 
29 | // Isomorphic/Universal entry point - exports only APIs that work in both Node.js and browsers
30 | // Use this for maximum compatibility across environments
31 | 
32 | export {
33 |   IsomorphicCommunicate as Communicate,
34 |   IsomorphicCommunicateOptions as CommunicateOptions
35 | } from './isomorphic-communicate';
36 | 
37 | export {
38 |   IsomorphicVoicesManager as VoicesManager,
39 |   listVoices,
40 |   FetchError
41 | } from './isomorphic-voices';
42 | 
43 | export { IsomorphicDRM as DRM } from './isomorphic-drm';
44 | 
45 | // Simple API using isomorphic backend (all from isomorphic-simple to avoid Node.js deps)
46 | export {
47 |   IsomorphicEdgeTTS as EdgeTTS,
48 |   ProsodyOptions,
49 |   WordBoundary,
50 |   SynthesisResult,
51 |   createVTT,
52 |   createSRT
53 | } from './isomorphic-simple';
54 | 
55 | // Utility for creating subtitles (works everywhere)
56 | export { SubMaker } from './submaker';
57 | 
58 | // Common types and exceptions
59 | export * from './exceptions';
60 | export * from './types';
61 | 
62 | // Universal aliases (preferred naming convention)
63 | export {
64 |   IsomorphicCommunicate as UniversalCommunicate,
65 |   IsomorphicCommunicateOptions as UniversalCommunicateOptions
66 | } from './isomorphic-communicate';
67 | 
68 | export {
69 |   IsomorphicVoicesManager as UniversalVoicesManager,
70 |   listVoices as listVoicesUniversal,
71 |   FetchError as UniversalFetchError
72 | } from './isomorphic-voices';
73 | 
74 | export { IsomorphicDRM as UniversalDRM } from './isomorphic-drm';
75 | 
76 | export {
77 |   IsomorphicEdgeTTS as UniversalEdgeTTS
78 | } from './isomorphic-simple'; 


--------------------------------------------------------------------------------
/src/isomorphic-drm.ts:
--------------------------------------------------------------------------------
 1 | import { TRUSTED_CLIENT_TOKEN } from './constants';
 2 | import { SkewAdjustmentError } from "./exceptions";
 3 | 
 4 | const WIN_EPOCH = 11644473600;
 5 | const S_TO_NS = 1e9;
 6 | 
 7 | /**
 8 |  * Isomorphic DRM class that works in both Node.js and browsers.
 9 |  * Uses appropriate crypto APIs based on the environment.
10 |  */
11 | export class IsomorphicDRM {
12 |   private static clockSkewSeconds = 0.0;
13 | 
14 |   static adjClockSkewSeconds(skewSeconds: number) {
15 |     IsomorphicDRM.clockSkewSeconds += skewSeconds;
16 |   }
17 | 
18 |   static getUnixTimestamp(): number {
19 |     return Date.now() / 1000 + IsomorphicDRM.clockSkewSeconds;
20 |   }
21 | 
22 |   static parseRfc2616Date(date: string): number | null {
23 |     try {
24 |       return new Date(date).getTime() / 1000;
25 |     } catch (e) {
26 |       return null;
27 |     }
28 |   }
29 | 
30 |   static handleClientResponseError(response: { status: number; headers: any }) {
31 |     let serverDate: string | null = null;
32 | 
33 |     if ('headers' in response && typeof response.headers === 'object') {
34 |       if ('get' in response.headers && typeof response.headers.get === 'function') {
35 |         // Fetch Response object
36 |         serverDate = response.headers.get("date");
37 |       } else {
38 |         // Plain object with headers
39 |         const headers = response.headers as Record<string, string>;
40 |         serverDate = headers["date"] || headers["Date"];
41 |       }
42 |     }
43 | 
44 |     if (!serverDate) {
45 |       throw new SkewAdjustmentError("No server date in headers.");
46 |     }
47 |     const serverDateParsed = IsomorphicDRM.parseRfc2616Date(serverDate);
48 |     if (serverDateParsed === null) {
49 |       throw new SkewAdjustmentError(`Failed to parse server date: ${serverDate}`);
50 |     }
51 |     const clientDate = IsomorphicDRM.getUnixTimestamp();
52 |     IsomorphicDRM.adjClockSkewSeconds(serverDateParsed - clientDate);
53 |   }
54 | 
55 |   static async generateSecMsGec(): Promise<string> {
56 |     let ticks = IsomorphicDRM.getUnixTimestamp();
57 |     ticks += WIN_EPOCH;
58 |     ticks -= ticks % 300;
59 |     ticks *= S_TO_NS / 100;
60 | 
61 |     const strToHash = `${ticks.toFixed(0)}${TRUSTED_CLIENT_TOKEN}`;
62 | 
63 |     // Use Web Crypto API directly - available in both Node.js 16+ and browsers
64 |     if (!globalThis.crypto || !globalThis.crypto.subtle) {
65 |       throw new Error('Web Crypto API not available');
66 |     }
67 | 
68 |     const encoder = new TextEncoder();
69 |     const data = encoder.encode(strToHash);
70 |     const hashBuffer = await globalThis.crypto.subtle.digest('SHA-256', data);
71 |     const hashArray = Array.from(new Uint8Array(hashBuffer));
72 |     return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').toUpperCase();
73 |   }
74 | } 


--------------------------------------------------------------------------------
/tests/simple-api.test.js:
--------------------------------------------------------------------------------
 1 | import { test, describe } from 'node:test';
 2 | import assert from 'node:assert';
 3 | import { EdgeTTS, createVTT, createSRT } from '../dist/index.js';
 4 | 
 5 | describe('Simple API', () => {
 6 |   test('EdgeTTS can be instantiated with text and voice', () => {
 7 |     const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural');
 8 |     assert(tts instanceof EdgeTTS, 'Should create EdgeTTS instance');
 9 |   });
10 | 
11 |   test('EdgeTTS synthesize method returns proper result structure', async () => {
12 |     const tts = new EdgeTTS('Hello, test!', 'en-US-EmmaMultilingualNeural');
13 |     
14 |     try {
15 |       const result = await tts.synthesize();
16 |       
17 |       // Check result structure
18 |       assert(typeof result === 'object', 'Result should be an object');
19 |       assert(result.audio instanceof Blob, 'Result should have audio Blob');
20 |       assert(Array.isArray(result.subtitle), 'Result should have subtitle array');
21 |       
22 |       // Check audio
23 |       assert(result.audio.size > 0, 'Audio should have content');
24 |       assert(result.audio.type.includes('audio'), 'Audio should have audio mime type');
25 |       
26 |       // Check subtitles structure if present
27 |       if (result.subtitle.length > 0) {
28 |         const sub = result.subtitle[0];
29 |         assert(typeof sub.offset === 'number', 'Subtitle should have offset');
30 |         assert(typeof sub.duration === 'number', 'Subtitle should have duration');
31 |         assert(typeof sub.text === 'string', 'Subtitle should have text');
32 |       }
33 |     } catch (error) {
34 |       // If network/service is unavailable, just check that error is reasonable
35 |       assert(error instanceof Error, 'Should throw proper Error if service unavailable');
36 |     }
37 |   });
38 | 
39 |   test('createVTT and createSRT work with subtitle data', () => {
40 |     const mockSubtitles = [
41 |       { offset: 0, duration: 1000000, text: 'Hello' },
42 |       { offset: 1000000, duration: 1000000, text: 'world' }
43 |     ];
44 |     
45 |     const vtt = createVTT(mockSubtitles);
46 |     const srt = createSRT(mockSubtitles);
47 |     
48 |     assert(typeof vtt === 'string', 'createVTT should return string');
49 |     assert(typeof srt === 'string', 'createSRT should return string');
50 |     assert(vtt.includes('WEBVTT'), 'VTT should contain WEBVTT header');
51 |     assert(srt.includes('Hello'), 'SRT should contain subtitle text');
52 |   });
53 | 
54 |   test('EdgeTTS accepts prosody options', () => {
55 |     const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural', {
56 |       rate: '+10%',
57 |       volume: '+0%',
58 |       pitch: '+5Hz'
59 |     });
60 |     
61 |     assert(tts instanceof EdgeTTS, 'Should create EdgeTTS instance with prosody options');
62 |   });
63 | });


--------------------------------------------------------------------------------
/src/drm.ts:
--------------------------------------------------------------------------------
 1 | import { createHash } from 'crypto';
 2 | import { TRUSTED_CLIENT_TOKEN } from './constants';
 3 | import { SkewAdjustmentError } from "./exceptions";
 4 | import { AxiosError } from "axios";
 5 | 
 6 | const WIN_EPOCH = 11644473600;
 7 | const S_TO_NS = 1e9;
 8 | 
 9 | /**
10 |  * Digital Rights Management (DRM) class for handling authentication with Microsoft Edge TTS service.
11 |  * Manages clock synchronization and security token generation for Node.js environments.
12 |  */
13 | export class DRM {
14 |   private static clockSkewSeconds = 0.0;
15 | 
16 |   /**
17 |    * Adjusts the clock skew to synchronize with server time.
18 |    * @param skewSeconds - Number of seconds to adjust the clock by
19 |    */
20 |   static adjClockSkewSeconds(skewSeconds: number) {
21 |     DRM.clockSkewSeconds += skewSeconds;
22 |   }
23 | 
24 |   /**
25 |    * Gets the current Unix timestamp adjusted for clock skew.
26 |    * @returns Unix timestamp in seconds
27 |    */
28 |   static getUnixTimestamp(): number {
29 |     return Date.now() / 1000 + DRM.clockSkewSeconds;
30 |   }
31 | 
32 |   /**
33 |    * Parses an RFC 2616 date string into a Unix timestamp.
34 |    * @param date - RFC 2616 formatted date string
35 |    * @returns Unix timestamp in seconds, or null if parsing fails
36 |    */
37 |   static parseRfc2616Date(date: string): number | null {
38 |     try {
39 |       // The python version uses strptime with %Z, but it mentions it's not quite right.
40 |       // JS's Date parsing is generally good with RFC 2616 dates.
41 |       // And since it's UTC, it should be fine.
42 |       return new Date(date).getTime() / 1000;
43 |     } catch (e) {
44 |       return null;
45 |     }
46 |   }
47 | 
48 |   /**
49 |    * Handles client response errors by adjusting clock skew based on server date.
50 |    * @param e - Axios error containing server response headers
51 |    * @throws {SkewAdjustmentError} If server date is missing or invalid
52 |    */
53 |   static handleClientResponseError(e: AxiosError) {
54 |     if (!e.response || !e.response.headers) {
55 |       throw new SkewAdjustmentError("No server date in headers.");
56 |     }
57 |     const serverDate = e.response.headers["date"];
58 |     if (!serverDate || typeof serverDate !== 'string') {
59 |       throw new SkewAdjustmentError("No server date in headers.");
60 |     }
61 |     const serverDateParsed = DRM.parseRfc2616Date(serverDate);
62 |     if (serverDateParsed === null) {
63 |       throw new SkewAdjustmentError(`Failed to parse server date: ${serverDate}`);
64 |     }
65 |     const clientDate = DRM.getUnixTimestamp();
66 |     DRM.adjClockSkewSeconds(serverDateParsed - clientDate);
67 |   }
68 | 
69 |   /**
70 |    * Generates the Sec-MS-GEC security token required for API authentication.
71 |    * @returns Uppercase hexadecimal SHA-256 hash string
72 |    */
73 |   static generateSecMsGec(): string {
74 |     let ticks = DRM.getUnixTimestamp();
75 |     ticks += WIN_EPOCH;
76 |     ticks -= ticks % 300;
77 |     ticks *= S_TO_NS / 100;
78 | 
79 |     const strToHash = `${ticks.toFixed(0)}${TRUSTED_CLIENT_TOKEN}`;
80 |     return createHash('sha256').update(strToHash, 'ascii').digest('hex').toUpperCase();
81 |   }
82 | } 


--------------------------------------------------------------------------------
/src/tts_config.ts:
--------------------------------------------------------------------------------
 1 | import { ValueError } from "./exceptions";
 2 | 
 3 | /**
 4 |  * Interface defining the configuration options for TTS synthesis.
 5 |  */
 6 | export interface ITTSConfig {
 7 |   /** Voice name to use for synthesis */
 8 |   voice: string;
 9 |   /** Speech rate adjustment (e.g., "+20%", "-10%") */
10 |   rate: string;
11 |   /** Volume level adjustment (e.g., "+50%", "-25%") */
12 |   volume: string;
13 |   /** Pitch adjustment in Hz (e.g., "+5Hz", "-10Hz") */
14 |   pitch: string;
15 | }
16 | 
17 | /**
18 |  * Configuration class for TTS synthesis parameters.
19 |  * Handles voice name normalization and parameter validation.
20 |  * 
21 |  * @example
22 |  * ```typescript
23 |  * const config = new TTSConfig({
24 |  *   voice: 'en-US-EmmaMultilingualNeural',
25 |  *   rate: '+20%',
26 |  *   volume: '+10%',
27 |  *   pitch: '+5Hz'
28 |  * });
29 |  * ```
30 |  */
31 | export class TTSConfig implements ITTSConfig {
32 |   public voice: string;
33 |   public rate: string;
34 |   public volume: string;
35 |   public pitch: string;
36 | 
37 |   /**
38 |    * Creates a new TTSConfig instance with the specified parameters.
39 |    * 
40 |    * @param options - Configuration options
41 |    * @param options.voice - Voice name (supports both short and full formats)
42 |    * @param options.rate - Speech rate adjustment (default: "+0%")
43 |    * @param options.volume - Volume adjustment (default: "+0%") 
44 |    * @param options.pitch - Pitch adjustment (default: "+0Hz")
45 |    * @throws {ValueError} If any parameter has an invalid format
46 |    */
47 |   constructor({
48 |     voice,
49 |     rate = "+0%",
50 |     volume = "+0%",
51 |     pitch = "+0Hz",
52 |   }: {
53 |     voice: string,
54 |     rate?: string,
55 |     volume?: string,
56 |     pitch?: string,
57 |   }) {
58 |     this.voice = voice;
59 |     this.rate = rate;
60 |     this.volume = volume;
61 |     this.pitch = pitch;
62 | 
63 |     this.validate();
64 |   }
65 | 
66 |   private validate() {
67 |     // Voice validation and transformation
68 |     const match = /^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$/.exec(this.voice);
69 |     if (match) {
70 |       const [, lang] = match;
71 |       let [, , region, name] = match;
72 |       if (name.includes('-')) {
73 |         const parts = name.split('-');
74 |         region += `-${parts[0]}`;
75 |         name = parts[1];
76 |       }
77 |       this.voice = `Microsoft Server Speech Text to Speech Voice (${lang}-${region}, ${name})`;
78 |     }
79 | 
80 |     TTSConfig.validateStringParam(
81 |       "voice",
82 |       this.voice,
83 |       /^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$/
84 |     );
85 |     TTSConfig.validateStringParam("rate", this.rate, /^[+-]\d+%$/);
86 |     TTSConfig.validateStringParam("volume", this.volume, /^[+-]\d+%$/);
87 |     TTSConfig.validateStringParam("pitch", this.pitch, /^[+-]\d+Hz$/);
88 |   }
89 | 
90 |   private static validateStringParam(paramName: string, paramValue: string, pattern: RegExp) {
91 |     if (typeof paramValue !== 'string') {
92 |       throw new TypeError(`${paramName} must be a string`);
93 |     }
94 |     if (!pattern.test(paramValue)) {
95 |       throw new ValueError(`Invalid ${paramName} '${paramValue}'.`);
96 |     }
97 |   }
98 | } 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Logs
  2 | logs
  3 | *.log
  4 | npm-debug.log*
  5 | yarn-debug.log*
  6 | yarn-error.log*
  7 | lerna-debug.log*
  8 | .pnpm-debug.log*
  9 | 
 10 | # Diagnostic reports (https://nodejs.org/api/report.html)
 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 12 | 
 13 | # Runtime data
 14 | pids
 15 | *.pid
 16 | *.seed
 17 | *.pid.lock
 18 | 
 19 | # Directory for instrumented libs generated by jscoverage/JSCover
 20 | lib-cov
 21 | 
 22 | # Coverage directory used by tools like istanbul
 23 | coverage
 24 | *.lcov
 25 | 
 26 | # nyc test coverage
 27 | .nyc_output
 28 | 
 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 30 | .grunt
 31 | 
 32 | # Bower dependency directory (https://bower.io/)
 33 | bower_components
 34 | 
 35 | # node-waf configuration
 36 | .lock-wscript
 37 | 
 38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 39 | build/Release
 40 | 
 41 | # Dependency directories
 42 | node_modules/
 43 | jspm_packages/
 44 | 
 45 | # Snowpack dependency directory (https://snowpack.dev/)
 46 | web_modules/
 47 | 
 48 | # TypeScript cache
 49 | *.tsbuildinfo
 50 | 
 51 | # Optional npm cache directory
 52 | .npm
 53 | 
 54 | # Optional eslint cache
 55 | .eslintcache
 56 | 
 57 | # Optional stylelint cache
 58 | .stylelintcache
 59 | 
 60 | # Microbundle cache
 61 | .rpt2_cache/
 62 | .rts2_cache_cjs/
 63 | .rts2_cache_es/
 64 | .rts2_cache_umd/
 65 | 
 66 | # Optional REPL history
 67 | .node_repl_history
 68 | 
 69 | # Output of 'npm pack'
 70 | *.tgz
 71 | 
 72 | # Yarn Integrity file
 73 | .yarn-integrity
 74 | 
 75 | # dotenv environment variable files
 76 | .env
 77 | .env.development.local
 78 | .env.test.local
 79 | .env.production.local
 80 | .env.local
 81 | 
 82 | # parcel-bundler cache (https://parceljs.org/)
 83 | .cache
 84 | .parcel-cache
 85 | 
 86 | # Next.js build output
 87 | .next
 88 | out
 89 | 
 90 | # Nuxt.js build / generate output
 91 | .nuxt
 92 | dist
 93 | 
 94 | # Gatsby files
 95 | .cache/
 96 | # Comment in the public line in if your project uses Gatsby and not Next.js
 97 | # https://nextjs.org/blog/next-9-1#public-directory-support
 98 | # public
 99 | 
100 | # vuepress build output
101 | .vuepress/dist
102 | 
103 | # vuepress v2.x temp and cache directory
104 | .temp
105 | .cache
106 | 
107 | # vitepress build output
108 | **/.vitepress/dist
109 | 
110 | # vitepress cache directory
111 | **/.vitepress/cache
112 | 
113 | # Docusaurus cache and generated files
114 | .docusaurus
115 | 
116 | # Serverless directories
117 | .serverless/
118 | 
119 | # FuseBox cache
120 | .fusebox/
121 | 
122 | # DynamoDB Local files
123 | .dynamodb/
124 | 
125 | # TernJS port file
126 | .tern-port
127 | 
128 | # Stores VSCode versions used for testing VSCode extensions
129 | .vscode-test
130 | 
131 | # yarn v2
132 | .yarn/cache
133 | .yarn/unplugged
134 | .yarn/build-state.yml
135 | .yarn/install-state.gz
136 | .pnp.*
137 | 
138 | node_modules/
139 | dist/
140 | *.log
141 | *.mp3
142 | *.wav
143 | *.srt
144 | .env
145 | .DS_Store
146 | coverage/
147 | .nyc_output/
148 | *.tgz
149 | 
150 | 
151 | *.mp3
152 | bin/
153 | 
154 | docs/
155 | .notes/
156 | 
157 | CLAUDE.md
158 | .claude/
159 | .cursorignore
160 | .cursor/
161 | 
162 | # Deno lockfile - let CI generate fresh
163 | deno.lock
164 | 
165 | *.vtt


--------------------------------------------------------------------------------
/examples/naming-compatibility.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Naming Compatibility Test
 3 |  * Verifies that both Universal and Isomorphic naming work identically
 4 |  */
 5 | 
 6 | import {
 7 |   // Universal naming (preferred)
 8 |   UniversalCommunicate,
 9 |   UniversalVoicesManager,
10 |   UniversalEdgeTTS,
11 |   listVoicesUniversal,
12 |   
13 |   // Isomorphic naming (legacy)
14 |   IsomorphicCommunicate, 
15 |   IsomorphicVoicesManager,
16 |   IsomorphicEdgeTTS,
17 |   listVoicesIsomorphic
18 | } from '../dist/index.js';
19 | 
20 | async function testNamingCompatibility() {
21 |   console.log('🔄 Testing naming compatibility between Universal and Isomorphic APIs...');
22 | 
23 |   try {
24 |     // Test that both naming conventions are available
25 |     console.log('✅ Universal imports available:', {
26 |       UniversalCommunicate: typeof UniversalCommunicate,
27 |       UniversalVoicesManager: typeof UniversalVoicesManager, 
28 |       UniversalEdgeTTS: typeof UniversalEdgeTTS,
29 |       listVoicesUniversal: typeof listVoicesUniversal
30 |     });
31 | 
32 |     console.log('✅ Isomorphic imports available:', {
33 |       IsomorphicCommunicate: typeof IsomorphicCommunicate,
34 |       IsomorphicVoicesManager: typeof IsomorphicVoicesManager,
35 |       IsomorphicEdgeTTS: typeof IsomorphicEdgeTTS, 
36 |       listVoicesIsomorphic: typeof listVoicesIsomorphic
37 |     });
38 | 
39 |     // Verify they are the same underlying classes
40 |     console.log('🔍 Verifying aliases point to same implementations...');
41 |     console.log('UniversalCommunicate === IsomorphicCommunicate:', UniversalCommunicate === IsomorphicCommunicate);
42 |     console.log('UniversalVoicesManager === IsomorphicVoicesManager:', UniversalVoicesManager === IsomorphicVoicesManager);
43 |     console.log('UniversalEdgeTTS === IsomorphicEdgeTTS:', UniversalEdgeTTS === IsomorphicEdgeTTS);
44 |     console.log('listVoicesUniversal === listVoicesIsomorphic:', listVoicesUniversal === listVoicesIsomorphic);
45 | 
46 |     // Test instantiation with both naming conventions
47 |     const universalTTS = new UniversalEdgeTTS('Test', 'en-US-EmmaMultilingualNeural');
48 |     const isomorphicTTS = new IsomorphicEdgeTTS('Test', 'en-US-EmmaMultilingualNeural'); 
49 | 
50 |     console.log('✅ Both naming conventions can be instantiated');
51 |     console.log('Universal TTS instance:', universalTTS.constructor.name);
52 |     console.log('Isomorphic TTS instance:', isomorphicTTS.constructor.name);
53 | 
54 |     // Test that they behave identically
55 |     const universalComm = new UniversalCommunicate('Test');
56 |     const isomorphicComm = new IsomorphicCommunicate('Test');
57 |     
58 |     console.log('✅ Both communication classes instantiated');
59 |     console.log('Universal Communicate:', universalComm.constructor.name);
60 |     console.log('Isomorphic Communicate:', isomorphicComm.constructor.name);
61 | 
62 |     console.log('🎉 All naming compatibility tests passed!');
63 |     console.log('💡 Recommendation: Use Universal naming for new projects');
64 | 
65 |   } catch (error) {
66 |     console.error('❌ Naming compatibility test failed:', error);
67 |   }
68 | }
69 | 
70 | // ESM equivalent check
71 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) {
72 |   testNamingCompatibility().catch(console.error);
73 | } else if (typeof globalThis !== 'undefined') {
74 |   (globalThis as any).runNamingCompatibilityTest = testNamingCompatibility;
75 | }
76 | 
77 | export { testNamingCompatibility };


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Represents a chunk of data received during TTS streaming.
  3 |  * Can contain either audio data or word boundary metadata.
  4 |  */
  5 | export type TTSChunk = {
  6 |   /** The type of chunk - either audio data or word boundary metadata */
  7 |   type: "audio" | "WordBoundary";
  8 |   /** Raw audio data buffer (present for audio chunks) */
  9 |   data?: Buffer;
 10 |   /** Duration of the word in 100-nanosecond units (present for WordBoundary chunks) */
 11 |   duration?: number;
 12 |   /** Offset from the beginning in 100-nanosecond units (present for WordBoundary chunks) */
 13 |   offset?: number;
 14 |   /** The spoken text (present for WordBoundary chunks) */
 15 |   text?: string;
 16 | };
 17 | 
 18 | /**
 19 |  * Voice characteristics and personality tags from the Microsoft Edge TTS service.
 20 |  */
 21 | export type VoiceTag = {
 22 |   /** Content categories that the voice is optimized for */
 23 |   ContentCategories: (
 24 |     | "Cartoon"
 25 |     | "Conversation"
 26 |     | "Copilot"
 27 |     | "Dialect"
 28 |     | "General"
 29 |     | "News"
 30 |     | "Novel"
 31 |     | "Sports"
 32 |   )[];
 33 |   /** Personality traits that describe the voice's characteristics */
 34 |   VoicePersonalities: (
 35 |     | "Approachable"
 36 |     | "Authentic"
 37 |     | "Authority"
 38 |     | "Bright"
 39 |     | "Caring"
 40 |     | "Casual"
 41 |     | "Cheerful"
 42 |     | "Clear"
 43 |     | "Comfort"
 44 |     | "Confident"
 45 |     | "Considerate"
 46 |     | "Conversational"
 47 |     | "Cute"
 48 |     | "Expressive"
 49 |     | "Friendly"
 50 |     | "Honest"
 51 |     | "Humorous"
 52 |     | "Lively"
 53 |     | "Passion"
 54 |     | "Pleasant"
 55 |     | "Positive"
 56 |     | "Professional"
 57 |     | "Rational"
 58 |     | "Reliable"
 59 |     | "Sincere"
 60 |     | "Sunshine"
 61 |     | "Warm"
 62 |   )[];
 63 | };
 64 | 
 65 | /**
 66 |  * Complete voice definition as returned by the Microsoft Edge TTS service.
 67 |  */
 68 | export type Voice = {
 69 |   /** Full voice name identifier */
 70 |   Name: string;
 71 |   /** Short name for the voice */
 72 |   ShortName: string;
 73 |   /** Gender of the voice */
 74 |   Gender: "Female" | "Male";
 75 |   /** Locale code (e.g., "en-US", "zh-CN") */
 76 |   Locale: string;
 77 |   /** Recommended audio codec for this voice */
 78 |   SuggestedCodec: "audio-24khz-48kbitrate-mono-mp3";
 79 |   /** Human-readable friendly name */
 80 |   FriendlyName: string;
 81 |   /** Voice availability status */
 82 |   Status: "GA";
 83 |   /** Voice characteristics and personality traits */
 84 |   VoiceTag: VoiceTag;
 85 | };
 86 | 
 87 | /**
 88 |  * Extended voice type with language information for the VoicesManager.
 89 |  */
 90 | export type VoicesManagerVoice = Voice & {
 91 |   /** Language code extracted from the locale (e.g., "en" from "en-US") */
 92 |   Language: string;
 93 | };
 94 | 
 95 | /**
 96 |  * Filter criteria for finding voices using the VoicesManager.
 97 |  */
 98 | export type VoicesManagerFind = {
 99 |   /** Filter by voice gender */
100 |   Gender?: "Female" | "Male";
101 |   /** Filter by locale code */
102 |   Locale?: string;
103 |   /** Filter by language code */
104 |   Language?: string;
105 | }
106 | 
107 | /**
108 |  * Internal state tracking for the Communicate class during streaming.
109 |  */
110 | export type CommunicateState = {
111 |   /** Buffer for partial text data */
112 |   partialText: Buffer;
113 |   /** Timing offset compensation for multi-request scenarios */
114 |   offsetCompensation: number;
115 |   /** Last recorded duration offset for timing calculations */
116 |   lastDurationOffset: number;
117 |   /** Flag indicating if the stream method has been called */
118 |   streamWasCalled: boolean;
119 | }; 


--------------------------------------------------------------------------------
/src/submaker.ts:
--------------------------------------------------------------------------------
  1 | import { TTSChunk } from "./types";
  2 | import { ValueError } from "./exceptions";
  3 | 
  4 | interface Cue {
  5 |   index: number;
  6 |   start: number; // in seconds
  7 |   end: number; // in seconds
  8 |   content: string;
  9 | }
 10 | 
 11 | function formatTime(seconds: number): string {
 12 |   const h = Math.floor(seconds / 3600);
 13 |   const m = Math.floor((seconds % 3600) / 60);
 14 |   const s = Math.floor(seconds % 60);
 15 |   const ms = Math.round((seconds - Math.floor(seconds)) * 1000);
 16 | 
 17 |   const pad = (num: number, size = 2) => num.toString().padStart(size, '0');
 18 | 
 19 |   return `${pad(h)}:${pad(m)}:${pad(s)},${pad(ms, 3)}`;
 20 | }
 21 | 
 22 | /**
 23 |  * Utility class for generating SRT subtitles from WordBoundary events.
 24 |  * 
 25 |  * @example
 26 |  * ```typescript
 27 |  * const subMaker = new SubMaker();
 28 |  * 
 29 |  * for await (const chunk of communicate.stream()) {
 30 |  *   if (chunk.type === 'WordBoundary') {
 31 |  *     subMaker.feed(chunk);
 32 |  *   }
 33 |  * }
 34 |  * 
 35 |  * const srt = subMaker.getSrt();
 36 |  * ```
 37 |  */
 38 | export class SubMaker {
 39 |   private cues: Cue[] = [];
 40 | 
 41 |   /**
 42 |    * Adds a WordBoundary chunk to the subtitle maker.
 43 |    * 
 44 |    * @param msg - Must be a WordBoundary type chunk with offset, duration, and text
 45 |    * @throws {ValueError} If chunk is not a WordBoundary with required fields
 46 |    */
 47 |   feed(msg: TTSChunk): void {
 48 |     if (msg.type !== 'WordBoundary' || msg.offset === undefined || msg.duration === undefined || msg.text === undefined) {
 49 |       throw new ValueError("Invalid message type, expected 'WordBoundary' with offset, duration and text");
 50 |     }
 51 | 
 52 |     // offset and duration are in 100-nanosecond intervals.
 53 |     // srt timestamps are in seconds. 1s = 10^7 * 100ns
 54 |     const start = msg.offset / 1e7;
 55 |     const end = (msg.offset + msg.duration) / 1e7;
 56 | 
 57 |     this.cues.push({
 58 |       index: this.cues.length + 1,
 59 |       start: start,
 60 |       end: end,
 61 |       content: msg.text,
 62 |     });
 63 |   }
 64 | 
 65 |   /**
 66 |    * Merges consecutive cues to create subtitle entries with multiple words.
 67 |    * This is useful for creating more readable subtitles instead of word-by-word display.
 68 |    * 
 69 |    * @param words - Maximum number of words per merged cue
 70 |    * @throws {ValueError} If words parameter is invalid
 71 |    */
 72 |   mergeCues(words: number): void {
 73 |     if (words <= 0) {
 74 |       throw new ValueError("Invalid number of words to merge, expected > 0");
 75 |     }
 76 |     if (this.cues.length === 0) {
 77 |       return;
 78 |     }
 79 | 
 80 |     const newCues: Cue[] = [];
 81 |     let currentCue: Cue = this.cues[0];
 82 | 
 83 |     for (const cue of this.cues.slice(1)) {
 84 |       if (currentCue.content.split(' ').length < words) {
 85 |         currentCue = {
 86 |           ...currentCue,
 87 |           end: cue.end,
 88 |           content: `${currentCue.content} ${cue.content}`,
 89 |         };
 90 |       } else {
 91 |         newCues.push(currentCue);
 92 |         currentCue = cue;
 93 |       }
 94 |     }
 95 |     newCues.push(currentCue);
 96 | 
 97 |     // re-index
 98 |     this.cues = newCues.map((cue, i) => ({ ...cue, index: i + 1 }));
 99 |   }
100 | 
101 |   /**
102 |    * Returns the subtitles in SRT format.
103 |    * 
104 |    * @returns SRT formatted subtitles
105 |    */
106 |   getSrt(): string {
107 |     return this.cues.map(cue => {
108 |       return `${cue.index}\r\n${formatTime(cue.start)} --> ${formatTime(cue.end)}\r\n${cue.content}\r\n`;
109 |     }).join('\r\n');
110 |   }
111 | 
112 |   toString(): string {
113 |     return this.getSrt();
114 |   }
115 | } 


--------------------------------------------------------------------------------
/src/voices.ts:
--------------------------------------------------------------------------------
  1 | import axios, { AxiosError, AxiosProxyConfig } from 'axios';
  2 | import { SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST_URL } from './constants';
  3 | import { DRM } from './drm';
  4 | import { Voice, VoicesManagerFind, VoicesManagerVoice } from './types';
  5 | 
  6 | function buildProxyConfig(proxy: string): AxiosProxyConfig | false {
  7 |   try {
  8 |     const proxyUrl = new URL(proxy);
  9 |     return {
 10 |       host: proxyUrl.hostname,
 11 |       port: parseInt(proxyUrl.port),
 12 |       protocol: proxyUrl.protocol,
 13 |     };
 14 |   } catch (e) {
 15 |     // if proxy is not a valid URL, just ignore it.
 16 |     return false;
 17 |   }
 18 | }
 19 | 
 20 | async function _listVoices(proxy?: string): Promise<Voice[]> {
 21 |   const url = `${VOICE_LIST_URL}&Sec-MS-GEC=${DRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}`;
 22 |   const response = await axios.get<Voice[]>(url, {
 23 |     headers: VOICE_HEADERS,
 24 |     proxy: proxy ? buildProxyConfig(proxy) : false,
 25 |   });
 26 | 
 27 |   const data = response.data;
 28 | 
 29 |   for (const voice of data) {
 30 |     voice.VoiceTag.ContentCategories = voice.VoiceTag.ContentCategories.map(c => c.trim() as any);
 31 |     voice.VoiceTag.VoicePersonalities = voice.VoiceTag.VoicePersonalities.map(p => p.trim() as any);
 32 |   }
 33 | 
 34 |   return data;
 35 | }
 36 | 
 37 | /**
 38 |  * Fetches all available voices from the Microsoft Edge TTS service.
 39 |  * 
 40 |  * @param proxy - Optional proxy URL for the request
 41 |  * @returns Promise resolving to array of available voices
 42 |  */
 43 | export async function listVoices(proxy?: string): Promise<Voice[]> {
 44 |   try {
 45 |     return await _listVoices(proxy);
 46 |   } catch (e) {
 47 |     if (e instanceof AxiosError && e.response?.status === 403) {
 48 |       DRM.handleClientResponseError(e);
 49 |       return await _listVoices(proxy);
 50 |     }
 51 |     throw e;
 52 |   }
 53 | }
 54 | 
 55 | /**
 56 |  * Utility class for finding and filtering available voices.
 57 |  * 
 58 |  * @example
 59 |  * ```typescript
 60 |  * const voicesManager = await VoicesManager.create();
 61 |  * const englishVoices = voicesManager.find({ Language: 'en' });
 62 |  * ```
 63 |  */
 64 | export class VoicesManager {
 65 |   private voices: VoicesManagerVoice[] = [];
 66 |   private calledCreate = false;
 67 | 
 68 |   /**
 69 |    * Creates a new VoicesManager instance.
 70 |    * 
 71 |    * @param customVoices - Optional custom voice list instead of fetching from API
 72 |    * @param proxy - Optional proxy URL for API requests
 73 |    * @returns Promise resolving to VoicesManager instance
 74 |    */
 75 |   public static async create(customVoices?: Voice[], proxy?: string): Promise<VoicesManager> {
 76 |     const manager = new VoicesManager();
 77 |     const voices = customVoices ?? await listVoices(proxy);
 78 |     manager.voices = voices.map(voice => ({
 79 |       ...voice,
 80 |       Language: voice.Locale.split('-')[0],
 81 |     }));
 82 |     manager.calledCreate = true;
 83 |     return manager;
 84 |   }
 85 | 
 86 |   /**
 87 |    * Finds voices matching the specified criteria.
 88 |    * 
 89 |    * @param filter - Filter criteria for voice selection
 90 |    * @returns Array of voices matching the filter
 91 |    * @throws {Error} If called before create()
 92 |    */
 93 |   public find(filter: VoicesManagerFind): VoicesManagerVoice[] {
 94 |     if (!this.calledCreate) {
 95 |       throw new Error('VoicesManager.find() called before VoicesManager.create()');
 96 |     }
 97 | 
 98 |     return this.voices.filter(voice => {
 99 |       return Object.entries(filter).every(([key, value]) => {
100 |         return voice[key as keyof VoicesManagerFind] === value;
101 |       });
102 |     });
103 |   }
104 | } 


--------------------------------------------------------------------------------
/src/webworker-entry.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Web Worker entry point for edge-tts-universal.
  3 |  * 
  4 |  * This module exports APIs specifically designed for Web Worker environments, providing
  5 |  * text-to-speech functionality that works in background threads without blocking the main UI.
  6 |  * 
  7 |  * Key features:
  8 |  * - Web Worker compatibility
  9 |  * - No DOM dependencies
 10 |  * - Background processing capabilities
 11 |  * - Message passing utilities for TTS results
 12 |  * - Isomorphic APIs that work in worker contexts
 13 |  * 
 14 |  * Web Workers provide an ideal environment for TTS processing as they:
 15 |  * - Don't block the main UI thread
 16 |  * - Have access to fetch and WebSocket APIs
 17 |  * - Can handle large audio data without freezing the page
 18 |  * - Support streaming TTS processing
 19 |  * 
 20 |  * @example
 21 |  * ```typescript
 22 |  * // In a Web Worker file
 23 |  * import { EdgeTTS, postAudioMessage, isWebWorker } from '@edge-tts/universal/webworker';
 24 |  * 
 25 |  * if (isWebWorker()) {
 26 |  *   self.addEventListener('message', async (event) => {
 27 |  *     if (event.data.type === 'synthesize') {
 28 |  *       const tts = new EdgeTTS(event.data.text, event.data.voice);
 29 |  *       const result = await tts.synthesize();
 30 |  *       postAudioMessage(result.audio, result.subtitle);
 31 |  *     }
 32 |  *   });
 33 |  * }
 34 |  * ```
 35 |  * 
 36 |  * @module WebWorkerEntry
 37 |  */
 38 | 
 39 | // Web Worker entry point - exports only APIs that work in Web Worker environments
 40 | // Web Workers don't have access to DOM but do have Web APIs like fetch and WebSocket
 41 | 
 42 | export {
 43 |   IsomorphicCommunicate as Communicate,
 44 |   IsomorphicCommunicateOptions as CommunicateOptions
 45 | } from './isomorphic-communicate';
 46 | 
 47 | export {
 48 |   IsomorphicVoicesManager as VoicesManager,
 49 |   listVoices,
 50 |   FetchError
 51 | } from './isomorphic-voices';
 52 | 
 53 | export { IsomorphicDRM as DRM } from './isomorphic-drm';
 54 | 
 55 | // Simple API using isomorphic backend (works in Web Workers)
 56 | // Import everything from isomorphic-simple to avoid Node.js dependencies
 57 | export {
 58 |   IsomorphicEdgeTTS as EdgeTTS,
 59 |   ProsodyOptions,
 60 |   WordBoundary,
 61 |   SynthesisResult,
 62 |   createVTT,
 63 |   createSRT
 64 | } from './isomorphic-simple';
 65 | 
 66 | // Utility for creating subtitles (works everywhere)
 67 | export { SubMaker } from './submaker';
 68 | 
 69 | // Common types and exceptions
 70 | export * from './exceptions';
 71 | export * from './types';
 72 | 
 73 | // Web Worker specific utilities
 74 | /**
 75 |  * Detects if the current environment is a Web Worker.
 76 |  * 
 77 |  * @returns True if running in a Web Worker context, false otherwise
 78 |  */
 79 | export function isWebWorker(): boolean {
 80 |   return typeof (globalThis as any).importScripts === 'function' &&
 81 |     typeof (globalThis as any).WorkerGlobalScope !== 'undefined';
 82 | }
 83 | 
 84 | /**
 85 |  * Posts a TTS result message to the main thread from a Web Worker.
 86 |  * This is a convenience function for sending audio and subtitle data
 87 |  * back to the main thread after TTS processing is complete.
 88 |  * 
 89 |  * @param audio - The synthesized audio as a Blob
 90 |  * @param subtitle - Array of subtitle/word boundary data
 91 |  * @throws {Warning} Logs a warning if called outside Web Worker context
 92 |  */
 93 | export function postAudioMessage(audio: Blob, subtitle: any[]) {
 94 |   if (isWebWorker()) {
 95 |     // In a Web Worker, post message to main thread
 96 |     (globalThis as any).postMessage({
 97 |       type: 'tts-result',
 98 |       audio: audio,
 99 |       subtitle: subtitle
100 |     });
101 |   } else {
102 |     console.warn('postAudioMessage should only be called in Web Worker context');
103 |   }
104 | } 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Main entry point for edge-tts-universal (Node.js optimized).
  3 |  * 
  4 |  * This module provides the complete API surface for text-to-speech functionality
  5 |  * using Microsoft Edge's TTS service. It includes both Node.js-specific optimized
  6 |  * implementations and universal/isomorphic APIs for cross-platform compatibility.
  7 |  * 
  8 |  * Key features:
  9 |  * - Node.js optimized implementations with full feature set
 10 |  * - Proxy support for enterprise environments
 11 |  * - Comprehensive voice management
 12 |  * - Streaming and simple APIs
 13 |  * - Subtitle generation utilities
 14 |  * - Cross-platform compatibility layers
 15 |  * 
 16 |  * @example
 17 |  * ```typescript
 18 |  * import { EdgeTTS, listVoices, Communicate } from 'edge-tts-universal';
 19 |  * 
 20 |  * // Simple API
 21 |  * const tts = new EdgeTTS('Hello, world!', 'en-US-EmmaMultilingualNeural');
 22 |  * const result = await tts.synthesize();
 23 |  * 
 24 |  * // Streaming API
 25 |  * const communicate = new Communicate('Hello, world!');
 26 |  * for await (const chunk of communicate.stream()) {
 27 |  *   if (chunk.type === 'audio') {
 28 |  *     // Handle audio data
 29 |  *   }
 30 |  * }
 31 |  * 
 32 |  * // Voice management
 33 |  * const voices = await listVoices();
 34 |  * ```
 35 |  * 
 36 |  * @module MainEntry
 37 |  */
 38 | 
 39 | // Node.js-specific API (uses axios, Node.js crypto, etc.)
 40 | export { Communicate, CommunicateOptions } from './communicate';
 41 | export { SubMaker } from './submaker';
 42 | export { VoicesManager, listVoices } from './voices';
 43 | 
 44 | // Simple API (works in both Node.js and browsers when using appropriate exports)
 45 | export { EdgeTTS, ProsodyOptions, WordBoundary, SynthesisResult, createVTT, createSRT, UniversalEdgeTTS } from './simple';
 46 | 
 47 | // Universal/Isomorphic API (works in both Node.js and browsers)
 48 | export {
 49 |   IsomorphicCommunicate,
 50 |   IsomorphicCommunicateOptions
 51 | } from './isomorphic-communicate';
 52 | export {
 53 |   IsomorphicVoicesManager,
 54 |   listVoices as listVoicesIsomorphic,
 55 |   FetchError
 56 | } from './isomorphic-voices';
 57 | export { IsomorphicDRM } from './isomorphic-drm';
 58 | 
 59 | // Simple isomorphic API
 60 | export {
 61 |   IsomorphicEdgeTTS,
 62 |   ProsodyOptions as IsomorphicProsodyOptions,
 63 |   WordBoundary as IsomorphicWordBoundary,
 64 |   SynthesisResult as IsomorphicSynthesisResult,
 65 |   createVTT as createVTTIsomorphic,
 66 |   createSRT as createSRTIsomorphic
 67 | } from './isomorphic-simple';
 68 | 
 69 | // Universal aliases (preferred naming)
 70 | export {
 71 |   IsomorphicCommunicate as UniversalCommunicate,
 72 |   IsomorphicCommunicateOptions as UniversalCommunicateOptions
 73 | } from './isomorphic-communicate';
 74 | export {
 75 |   IsomorphicVoicesManager as UniversalVoicesManager,
 76 |   listVoices as listVoicesUniversal,
 77 |   FetchError as UniversalFetchError
 78 | } from './isomorphic-voices';
 79 | export { IsomorphicDRM as UniversalDRM } from './isomorphic-drm';
 80 | export {
 81 |   IsomorphicEdgeTTS as UniversalEdgeTTS_Isomorphic,
 82 |   ProsodyOptions as UniversalProsodyOptions_Isomorphic,
 83 |   WordBoundary as UniversalWordBoundary_Isomorphic,
 84 |   SynthesisResult as UniversalSynthesisResult_Isomorphic,
 85 |   createVTT as createVTTUniversal_Isomorphic,
 86 |   createSRT as createSRTUniversal_Isomorphic
 87 | } from './isomorphic-simple';
 88 | 
 89 | // Browser-specific API (uses native browser APIs only)
 90 | export {
 91 |   EdgeTTSBrowser,
 92 |   ProsodyOptions as BrowserProsodyOptions,
 93 |   WordBoundary as BrowserWordBoundary,
 94 |   SynthesisResult as BrowserSynthesisResult,
 95 |   createVTT as createVTTBrowser,
 96 |   createSRT as createSRTBrowser
 97 | } from './browser';
 98 | 
 99 | // Common types and exceptions
100 | export * from './exceptions';
101 | export * from './types'; 


--------------------------------------------------------------------------------
/examples/webworker-example/main.ts:
--------------------------------------------------------------------------------
  1 | // Main thread - manages Web Worker for TTS processing
  2 | // Run this example in a browser environment
  3 | 
  4 | async function runWebWorkerExample() {
  5 |   console.log('🔄 Starting Web Worker TTS example...');
  6 | 
  7 |   // Check if we're in a browser
  8 |   if (typeof Worker === 'undefined') {
  9 |     console.error('❌ Web Workers not supported in this environment');
 10 |     return;
 11 |   }
 12 | 
 13 |   try {
 14 |     // Create Web Worker (you'll need to build worker.ts to worker.js first)
 15 |     const worker = new Worker('./worker.js');
 16 | 
 17 |     // Listen for messages from worker
 18 |     worker.onmessage = function (e) {
 19 |       const { type, audio, subtitle, error, message } = e.data;
 20 | 
 21 |       switch (type) {
 22 |         case 'ready':
 23 |           console.log('✅ Worker ready:', message);
 24 | 
 25 |           // Send synthesis request to worker
 26 |           worker.postMessage({
 27 |             type: 'synthesize',
 28 |             text: 'Hello from a Web Worker! This text-to-speech synthesis is happening in the background.',
 29 |             voice: 'en-US-EmmaMultilingualNeural',
 30 |             options: {
 31 |               rate: '+10%',
 32 |               volume: '+0%',
 33 |               pitch: '+0Hz'
 34 |             }
 35 |           });
 36 |           break;
 37 | 
 38 |         case 'tts-result':
 39 |           console.log(`🎵 Audio generated in worker: ${audio.size} bytes`);
 40 |           console.log(`📝 Subtitle words: ${subtitle.length}`);
 41 | 
 42 |           // Create audio element to play the result
 43 |           const audioUrl = URL.createObjectURL(audio);
 44 |           const audioElement = new Audio(audioUrl);
 45 |           audioElement.controls = true;
 46 | 
 47 |           // Add to page if in browser
 48 |           if (typeof document !== 'undefined') {
 49 |             const container = document.getElementById('audio-container') || document.body;
 50 |             const label = document.createElement('p');
 51 |             label.textContent = 'Generated audio from Web Worker:';
 52 |             container.appendChild(label);
 53 |             container.appendChild(audioElement);
 54 | 
 55 |             // Add download link
 56 |             const downloadLink = document.createElement('a');
 57 |             downloadLink.href = audioUrl;
 58 |             downloadLink.download = 'webworker-tts-output.mp3';
 59 |             downloadLink.textContent = 'Download Audio';
 60 |             downloadLink.style.display = 'block';
 61 |             downloadLink.style.marginTop = '10px';
 62 |             container.appendChild(downloadLink);
 63 |           }
 64 | 
 65 |           // Terminate worker when done
 66 |           worker.terminate();
 67 |           console.log('✅ Web Worker TTS example completed!');
 68 |           break;
 69 | 
 70 |         case 'error':
 71 |           console.error('❌ Worker error:', error);
 72 |           worker.terminate();
 73 |           break;
 74 |       }
 75 |     };
 76 | 
 77 |     worker.onerror = function (error) {
 78 |       console.error('❌ Worker failed:', error);
 79 |     };
 80 | 
 81 |   } catch (error) {
 82 |     console.error('❌ Failed to create Web Worker:', error);
 83 |   }
 84 | }
 85 | 
 86 | // Export for use in different environments
 87 | export { runWebWorkerExample };
 88 | 
 89 | // Auto-run in browser if this script is loaded directly
 90 | if (typeof window !== 'undefined' && typeof document !== 'undefined') {
 91 |   // Make function available globally
 92 |   (window as any).runWebWorkerExample = runWebWorkerExample;
 93 | 
 94 |   // Auto-run if there's a button or when DOM is ready
 95 |   document.addEventListener('DOMContentLoaded', () => {
 96 |     const button = document.getElementById('run-webworker-example');
 97 |     if (button) {
 98 |       button.addEventListener('click', runWebWorkerExample);
 99 |     }
100 |   });
101 | } 


--------------------------------------------------------------------------------
/src/runtime-detection.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Runtime detection utilities for cross-platform compatibility
  3 |  */
  4 | 
  5 | export interface RuntimeInfo {
  6 |   name: 'node' | 'deno' | 'bun' | 'browser' | 'webworker' | 'unknown';
  7 |   version?: string;
  8 |   isNode: boolean;
  9 |   isDeno: boolean;
 10 |   isBun: boolean;
 11 |   isBrowser: boolean;
 12 |   isWebWorker: boolean;
 13 | }
 14 | 
 15 | /**
 16 |  * Detect the current JavaScript runtime environment
 17 |  */
 18 | export function detectRuntime(): RuntimeInfo {
 19 |   const info: RuntimeInfo = {
 20 |     name: 'unknown',
 21 |     isNode: false,
 22 |     isDeno: false,
 23 |     isBun: false,
 24 |     isBrowser: false,
 25 |     isWebWorker: false,
 26 |   };
 27 | 
 28 |   // Check for Deno
 29 |   if (typeof (globalThis as any).Deno !== 'undefined') {
 30 |     info.name = 'deno';
 31 |     info.isDeno = true;
 32 |     info.version = (globalThis as any).Deno.version?.deno;
 33 |     return info;
 34 |   }
 35 | 
 36 |   // Check for Bun
 37 |   if (typeof (globalThis as any).Bun !== 'undefined') {
 38 |     info.name = 'bun';
 39 |     info.isBun = true;
 40 |     info.version = (globalThis as any).Bun.version;
 41 |     return info;
 42 |   }
 43 | 
 44 |   // Check for Node.js
 45 |   if (typeof process !== 'undefined' && process.versions && process.versions.node) {
 46 |     info.name = 'node';
 47 |     info.isNode = true;
 48 |     info.version = process.versions.node;
 49 |     return info;
 50 |   }
 51 | 
 52 |   // Check for Web Worker
 53 |   if (typeof (globalThis as any).importScripts === 'function' && typeof (globalThis as any).WorkerGlobalScope !== 'undefined') {
 54 |     info.name = 'webworker';
 55 |     info.isWebWorker = true;
 56 |     return info;
 57 |   }
 58 | 
 59 |   // Check for Browser
 60 |   if (typeof window !== 'undefined') {
 61 |     info.name = 'browser';
 62 |     info.isBrowser = true;
 63 |     return info;
 64 |   }
 65 | 
 66 |   return info;
 67 | }
 68 | 
 69 | /**
 70 |  * Get the appropriate fetch implementation for the current runtime
 71 |  */
 72 | export function getFetch(): typeof fetch {
 73 |   const runtime = detectRuntime();
 74 | 
 75 |   if (runtime.isDeno || runtime.isBrowser || runtime.isWebWorker) {
 76 |     return globalThis.fetch;
 77 |   }
 78 | 
 79 |   if (runtime.isNode || runtime.isBun) {
 80 |     try {
 81 |       // Try using built-in fetch first (Node 18+, Bun)
 82 |       if (typeof globalThis.fetch !== 'undefined') {
 83 |         return globalThis.fetch;
 84 |       }
 85 |       // Fallback to cross-fetch for older Node versions
 86 |       return require('cross-fetch');
 87 |     } catch {
 88 |       throw new Error('No fetch implementation available. Please install cross-fetch.');
 89 |     }
 90 |   }
 91 | 
 92 |   throw new Error('Unsupported runtime environment');
 93 | }
 94 | 
 95 | /**
 96 |  * Get the appropriate WebSocket implementation for the current runtime
 97 |  */
 98 | export function getWebSocket(): any {
 99 |   const runtime = detectRuntime();
100 | 
101 |   if (runtime.isDeno || runtime.isBrowser || runtime.isWebWorker) {
102 |     return globalThis.WebSocket;
103 |   }
104 | 
105 |   if (runtime.isNode || runtime.isBun) {
106 |     try {
107 |       return require('isomorphic-ws');
108 |     } catch {
109 |       throw new Error('No WebSocket implementation available. Please install isomorphic-ws.');
110 |     }
111 |   }
112 | 
113 |   throw new Error('Unsupported runtime environment');
114 | }
115 | 
116 | /**
117 |  * Get runtime-specific crypto implementation
118 |  * Note: Node.js 16+ (and our minimum version of 18.17+) has native globalThis.crypto support
119 |  */
120 | export function getCrypto(): Crypto {
121 |   const runtime = detectRuntime();
122 | 
123 |   if (runtime.isDeno || runtime.isBrowser || runtime.isWebWorker) {
124 |     return globalThis.crypto;
125 |   }
126 | 
127 |   if (runtime.isNode || runtime.isBun) {
128 |     // Node.js 18.17+ and Bun have built-in crypto
129 |     if (typeof globalThis.crypto !== 'undefined') {
130 |       return globalThis.crypto;
131 |     }
132 |     throw new Error('No crypto implementation available. Please upgrade to Node.js 18.17+.');
133 |   }
134 | 
135 |   throw new Error('Unsupported runtime environment');
136 | } 


--------------------------------------------------------------------------------
/src/browser-voices.ts:
--------------------------------------------------------------------------------
  1 | import { SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST_URL } from './constants';
  2 | import { BrowserDRM } from './browser-drm';
  3 | import { Voice, VoicesManagerFind, VoicesManagerVoice } from './types';
  4 | 
  5 | /**
  6 |  * Error class for fetch-related errors (browser-specific)
  7 |  */
  8 | export class BrowserFetchError extends Error {
  9 |   response?: {
 10 |     status: number;
 11 |     headers: Record<string, string>;
 12 |   };
 13 | 
 14 |   constructor(message: string, response?: { status: number; headers: Record<string, string> }) {
 15 |     super(message);
 16 |     this.name = 'BrowserFetchError';
 17 |     this.response = response;
 18 |   }
 19 | }
 20 | 
 21 | async function _listVoices(): Promise<Voice[]> {
 22 |   const url = `${VOICE_LIST_URL}&Sec-MS-GEC=${await BrowserDRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}`;
 23 | 
 24 |   try {
 25 |     const response = await fetch(url, {
 26 |       headers: VOICE_HEADERS,
 27 |     });
 28 | 
 29 |     if (!response.ok) {
 30 |       const headers: Record<string, string> = {};
 31 |       response.headers.forEach((value, key) => {
 32 |         headers[key] = value;
 33 |       });
 34 | 
 35 |       throw new BrowserFetchError(`HTTP ${response.status}`, {
 36 |         status: response.status,
 37 |         headers
 38 |       });
 39 |     }
 40 | 
 41 |     const data: Voice[] = await response.json();
 42 | 
 43 |     for (const voice of data) {
 44 |       voice.VoiceTag.ContentCategories = voice.VoiceTag.ContentCategories.map(c => c.trim() as any);
 45 |       voice.VoiceTag.VoicePersonalities = voice.VoiceTag.VoicePersonalities.map(p => p.trim() as any);
 46 |     }
 47 | 
 48 |     return data;
 49 |   } catch (error) {
 50 |     if (error instanceof BrowserFetchError) {
 51 |       throw error;
 52 |     }
 53 |     // Convert other fetch errors to our BrowserFetchError format
 54 |     throw new BrowserFetchError(error instanceof Error ? error.message : 'Unknown fetch error');
 55 |   }
 56 | }
 57 | 
 58 | /**
 59 |  * Fetches all available voices from the Microsoft Edge TTS service (browser version).
 60 |  * Uses native browser fetch API and Web Crypto.
 61 |  * 
 62 |  * @returns Promise resolving to array of available voices
 63 |  */
 64 | export async function listVoices(): Promise<Voice[]> {
 65 |   try {
 66 |     return await _listVoices();
 67 |   } catch (e) {
 68 |     if (e instanceof BrowserFetchError && e.response?.status === 403) {
 69 |       BrowserDRM.handleClientResponseError(e.response);
 70 |       return await _listVoices();
 71 |     }
 72 |     throw e;
 73 |   }
 74 | }
 75 | 
 76 | /**
 77 |  * Browser-specific utility class for finding and filtering available voices.
 78 |  * Uses only browser-native APIs.
 79 |  * 
 80 |  * @example
 81 |  * ```typescript
 82 |  * const voicesManager = await BrowserVoicesManager.create();
 83 |  * const englishVoices = voicesManager.find({ Language: 'en' });
 84 |  * ```
 85 |  */
 86 | export class BrowserVoicesManager {
 87 |   private voices: VoicesManagerVoice[] = [];
 88 |   private calledCreate = false;
 89 | 
 90 |   /**
 91 |    * Creates a new BrowserVoicesManager instance.
 92 |    * 
 93 |    * @param customVoices - Optional custom voice list instead of fetching from API
 94 |    * @returns Promise resolving to BrowserVoicesManager instance
 95 |    */
 96 |   public static async create(customVoices?: Voice[]): Promise<BrowserVoicesManager> {
 97 |     const manager = new BrowserVoicesManager();
 98 |     const voices = customVoices ?? await listVoices();
 99 |     manager.voices = voices.map(voice => ({
100 |       ...voice,
101 |       Language: voice.Locale.split('-')[0],
102 |     }));
103 |     manager.calledCreate = true;
104 |     return manager;
105 |   }
106 | 
107 |   /**
108 |    * Finds voices matching the specified criteria.
109 |    * 
110 |    * @param filter - Filter criteria for voice selection
111 |    * @returns Array of voices matching the filter
112 |    * @throws {Error} If called before create()
113 |    */
114 |   public find(filter: VoicesManagerFind): VoicesManagerVoice[] {
115 |     if (!this.calledCreate) {
116 |       throw new Error('BrowserVoicesManager.find() called before BrowserVoicesManager.create()');
117 |     }
118 | 
119 |     return this.voices.filter(voice => {
120 |       return Object.entries(filter).every(([key, value]) => {
121 |         return voice[key as keyof VoicesManagerFind] === value;
122 |       });
123 |     });
124 |   }
125 | } 


--------------------------------------------------------------------------------
/examples/universal-detection.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Universal Environment Detection Example
  3 |  * This example shows how to detect the environment and use appropriate APIs
  4 |  */
  5 | 
  6 | // Environment detection function
  7 | function detectEnvironment() {
  8 |   // Check for Node.js
  9 |   const isNode = typeof process !== 'undefined' &&
 10 |     process.versions &&
 11 |     process.versions.node;
 12 | 
 13 |   // Check for browser
 14 |   const isBrowser = typeof window !== 'undefined' &&
 15 |     typeof window.document !== 'undefined';
 16 | 
 17 |   // Check for Web Workers  
 18 |   const isWebWorker = typeof importScripts === 'function' &&
 19 |     typeof (globalThis as any).WorkerGlobalScope !== 'undefined';
 20 | 
 21 |   // Check for Deno
 22 |   const isDeno = typeof (globalThis as any).Deno !== 'undefined';
 23 | 
 24 |   // Check for Bun
 25 |   const isBun = typeof (globalThis as any).Bun !== 'undefined';
 26 | 
 27 |   return {
 28 |     isNode,
 29 |     isBrowser,
 30 |     isWebWorker,
 31 |     isDeno,
 32 |     isBun,
 33 |     platform: isNode ? 'node' :
 34 |       isBrowser ? 'browser' :
 35 |         isWebWorker ? 'webworker' :
 36 |           isDeno ? 'deno' :
 37 |             isBun ? 'bun' :
 38 |               'unknown'
 39 |   };
 40 | }
 41 | 
 42 | // Dynamic import based on environment
 43 | async function createTTSInstance(text: string, voice?: string) {
 44 |   const env = detectEnvironment();
 45 |   console.log('Detected environment:', env.platform);
 46 | 
 47 |   switch (env.platform) {
 48 |     case 'node':
 49 |       // Use full Node.js API with all features
 50 |       const { EdgeTTS } = await import('../dist/index.js');
 51 |       return new EdgeTTS(text, voice);
 52 | 
 53 |     case 'browser':
 54 |       // Use browser-specific API for optimal bundle size
 55 |       const { EdgeTTS: BrowserEdgeTTS } = await import('../dist/browser.js');
 56 |       return new BrowserEdgeTTS(text, voice);
 57 | 
 58 |     case 'deno':
 59 |     case 'bun':
 60 |     default:
 61 |       // Use isomorphic API for maximum compatibility
 62 |       const { EdgeTTS: IsomorphicTTS } = await import('../dist/isomorphic.js');
 63 |       return new IsomorphicTTS(text, voice);
 64 |   }
 65 | }
 66 | 
 67 | // Universal synthesis function
 68 | async function universalSynthesis() {
 69 |   const text = 'Hello from a universal text-to-speech library!';
 70 |   const voice = 'en-US-EmmaMultilingualNeural';
 71 | 
 72 |   try {
 73 |     const tts = await createTTSInstance(text, voice);
 74 |     const result = await tts.synthesize();
 75 | 
 76 |     console.log(`✅ Generated audio: ${result.audio.size} bytes`);
 77 |     console.log(`📝 Word boundaries: ${result.subtitle.length}`);
 78 | 
 79 |     const env = detectEnvironment();
 80 | 
 81 |     // Handle result based on environment
 82 |     if (env.isNode) {
 83 |       // Node.js - save to file
 84 |       const fs = await import('fs/promises');
 85 |       await fs.writeFile('universal-output.mp3', Buffer.from(await result.audio.arrayBuffer()));
 86 |       console.log('💾 Audio saved to universal-output.mp3');
 87 |     } else if (env.isBrowser) {
 88 |       // Browser - create downloadable link
 89 |       const url = URL.createObjectURL(result.audio);
 90 |       console.log(`🔗 Audio URL: ${url}`);
 91 | 
 92 |       // Create download link
 93 |       const a = document.createElement('a');
 94 |       a.href = url;
 95 |       a.download = 'universal-output.mp3';
 96 |       a.textContent = 'Download Audio';
 97 |       document.body.appendChild(a);
 98 |     } else {
 99 |       // Other environments
100 |       console.log('🌐 Audio ready for processing in your environment');
101 |     }
102 | 
103 |   } catch (error) {
104 |     console.error('❌ Universal synthesis failed:', error);
105 | 
106 |     if (error instanceof Error && error.message.includes('CORS')) {
107 |       console.log('💡 Tip: Use a proxy server for browser applications');
108 |     }
109 |   }
110 | }
111 | 
112 | // Export for use in different environments
113 | export { detectEnvironment, createTTSInstance, universalSynthesis };
114 | 
115 | // Auto-run in appropriate environments
116 | // ESM equivalent check
117 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) {
118 |   // Node.js
119 |   universalSynthesis().catch(console.error);
120 | } else if (typeof globalThis !== 'undefined') {
121 |   // Global scope - make function available
122 |   (globalThis as any).runUniversalExample = universalSynthesis;
123 | } 


--------------------------------------------------------------------------------
/examples/universal-api.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Universal API Example (Preferred Naming Convention)
  3 |  * This example demonstrates the preferred "Universal" naming instead of "Isomorphic"
  4 |  */
  5 | 
  6 | import {
  7 |   UniversalCommunicate,
  8 |   UniversalVoicesManager,
  9 |   listVoicesUniversal,
 10 |   UniversalEdgeTTS
 11 | } from '../dist/index.js';
 12 | 
 13 | const TEXT = 'Hello! This demonstrates the preferred Universal API naming.';
 14 | const VOICE = 'en-US-EmmaMultilingualNeural';
 15 | 
 16 | async function universalApiExample() {
 17 |   console.log('🌍 Running Universal API example (preferred naming)...');
 18 | 
 19 |   try {
 20 |     // Test voice listing with Universal naming
 21 |     console.log('📋 Fetching available voices...');
 22 |     const voices = await listVoicesUniversal();
 23 |     console.log(`✅ Found ${voices.length} voices`);
 24 | 
 25 |     // Test voice manager with Universal naming
 26 |     console.log('🔍 Testing Universal voice manager...');
 27 |     const voicesManager = await UniversalVoicesManager.create();
 28 |     const englishVoices = voicesManager.find({ Language: 'en' });
 29 |     console.log(`✅ Found ${englishVoices.length} English voices`);
 30 | 
 31 |     // Test simple Universal API
 32 |     console.log('🎤 Testing Universal EdgeTTS (simple API)...');
 33 |     const tts = new UniversalEdgeTTS(TEXT, VOICE, {
 34 |       rate: '+15%',
 35 |       volume: '+0%',
 36 |       pitch: '+5Hz'
 37 |     });
 38 | 
 39 |     const result = await tts.synthesize();
 40 |     console.log(`✅ Simple synthesis: ${result.audio.size} bytes, ${result.subtitle.length} words`);
 41 | 
 42 |     // Test streaming Universal API
 43 |     console.log('🎵 Testing Universal streaming API...');
 44 |     const communicate = new UniversalCommunicate(TEXT, {
 45 |       voice: VOICE,
 46 |       rate: '+10%',
 47 |       volume: '+0%',
 48 |       pitch: '+0Hz'
 49 |     });
 50 | 
 51 |     const audioChunks: Uint8Array[] = [];
 52 |     let wordCount = 0;
 53 | 
 54 |     for await (const chunk of communicate.stream()) {
 55 |       if (chunk.type === 'audio' && chunk.data) {
 56 |         audioChunks.push(chunk.data);
 57 |       } else if (chunk.type === 'WordBoundary') {
 58 |         wordCount++;
 59 |         console.log(`📝 Word ${wordCount}: "${chunk.text}"`);
 60 |       }
 61 |     }
 62 | 
 63 |     const totalAudioSize = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
 64 |     console.log(`✅ Universal streaming complete! Audio: ${totalAudioSize} bytes, Words: ${wordCount}`);
 65 | 
 66 |     // Environment-specific handling
 67 |     if (typeof process !== 'undefined' && process.versions?.node) {
 68 |       // Node.js - save to file
 69 |       const fs = await import('fs/promises');
 70 |       const path = await import('path');
 71 | 
 72 |       const outputFile = path.join(process.cwd(), 'universal-api-output.mp3');
 73 |       
 74 |       // Concatenate Uint8Arrays
 75 |       const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
 76 |       const concatenated = new Uint8Array(totalLength);
 77 |       let offset = 0;
 78 |       for (const chunk of audioChunks) {
 79 |         concatenated.set(chunk, offset);
 80 |         offset += chunk.length;
 81 |       }
 82 |       
 83 |       await fs.writeFile(outputFile, concatenated);
 84 |       console.log(`💾 Node.js: Audio saved to ${outputFile}`);
 85 |     } else {
 86 |       // Browser or other environments
 87 |       console.log('🌐 Audio data ready for browser processing');
 88 |     }
 89 | 
 90 |   } catch (error) {
 91 |     console.error('❌ Error:', error);
 92 | 
 93 |     if (error instanceof Error) {
 94 |       if (error.message.includes('CORS') || error.message.includes('cross-origin')) {
 95 |         console.log(`
 96 | 🚫 CORS Error Detected!
 97 | This is expected when running in a browser due to Microsoft's CORS policy.
 98 | 
 99 | Solutions:
100 | 1. Use a proxy server on your backend
101 | 2. Deploy as a browser extension  
102 | 3. Use Microsoft's official Speech SDK for browser apps
103 | 4. Run this example in Node.js where CORS doesn't apply
104 |         `);
105 |       }
106 |     }
107 |   }
108 | }
109 | 
110 | // ESM equivalent check
111 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) {
112 |   // Node.js
113 |   universalApiExample().catch(console.error);
114 | } else if (typeof globalThis !== 'undefined') {
115 |   // Browser - expose function globally
116 |   (globalThis as any).runUniversalApiExample = universalApiExample;
117 | }
118 | 
119 | export { universalApiExample };


--------------------------------------------------------------------------------
/examples/isomorphic-example.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Isomorphic Edge TTS Example
  3 |  * This example works in both Node.js and browsers (subject to CORS policy)
  4 |  */
  5 | 
  6 | import {
  7 |   IsomorphicCommunicate,
  8 |   IsomorphicVoicesManager,
  9 |   listVoicesIsomorphic
 10 | } from '../dist/index.js';
 11 | 
 12 | const TEXT = 'Hello! This is an isomorphic text-to-speech example that works in both Node.js and browsers.';
 13 | const VOICE = 'en-US-EmmaMultilingualNeural';
 14 | 
 15 | async function isomorphicExample() {
 16 |   console.log('🌐 Running isomorphic Edge TTS example...');
 17 | 
 18 |   try {
 19 |     // Test voice listing (isomorphic)
 20 |     console.log('📋 Fetching available voices...');
 21 |     const voices = await listVoicesIsomorphic();
 22 |     console.log(`✅ Found ${voices.length} voices`);
 23 | 
 24 |     // Test voice manager (isomorphic)
 25 |     console.log('🔍 Testing voice manager...');
 26 |     const voicesManager = await IsomorphicVoicesManager.create();
 27 |     const englishVoices = voicesManager.find({ Language: 'en' });
 28 |     console.log(`✅ Found ${englishVoices.length} English voices`);
 29 | 
 30 |     // Test TTS synthesis (isomorphic)
 31 |     console.log('🎤 Starting TTS synthesis...');
 32 |     const communicate = new IsomorphicCommunicate(TEXT, {
 33 |       voice: VOICE,
 34 |       rate: '+10%',
 35 |       volume: '+0%',
 36 |       pitch: '+0Hz'
 37 |     });
 38 | 
 39 |     const audioChunks: Uint8Array[] = [];
 40 |     let wordCount = 0;
 41 | 
 42 |     for await (const chunk of communicate.stream()) {
 43 |       if (chunk.type === 'audio' && chunk.data) {
 44 |         audioChunks.push(chunk.data);
 45 |         console.log(`🔊 Audio chunk: ${chunk.data.length} bytes`);
 46 |       } else if (chunk.type === 'WordBoundary') {
 47 |         wordCount++;
 48 |         console.log(`📝 Word ${wordCount}: "${chunk.text}" at ${chunk.offset}ns`);
 49 |       }
 50 |     }
 51 | 
 52 |     const totalAudioSize = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
 53 |     console.log(`✅ Synthesis complete! Audio: ${totalAudioSize} bytes, Words: ${wordCount}`);
 54 | 
 55 |     // Environment-specific handling
 56 |     const isNode = typeof globalThis !== 'undefined'
 57 |       ? globalThis.process?.versions?.node !== undefined
 58 |       : typeof process !== 'undefined' && process.versions?.node !== undefined;
 59 | 
 60 |     if (isNode) {
 61 |       // Node.js - save to file
 62 |       const fs = await import('fs/promises');
 63 |       const path = await import('path');
 64 | 
 65 |       const outputFile = path.join(process.cwd(), 'isomorphic-output.mp3');
 66 |       // Concatenate Uint8Arrays
 67 |       const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0);
 68 |       const concatenated = new Uint8Array(totalLength);
 69 |       let offset = 0;
 70 |       for (const chunk of audioChunks) {
 71 |         concatenated.set(chunk, offset);
 72 |         offset += chunk.length;
 73 |       }
 74 |       await fs.writeFile(outputFile, concatenated);
 75 |       console.log(`💾 Node.js: Audio saved to ${outputFile}`);
 76 |     } else {
 77 |       // Browser - create audio element
 78 |       const audioBlob = new Blob(audioChunks, { type: 'audio/mpeg' });
 79 |       const audioUrl = URL.createObjectURL(audioBlob);
 80 | 
 81 |       console.log(`🌐 Browser: Audio Blob created (${audioBlob.size} bytes)`);
 82 |       console.log(`🔗 Audio URL: ${audioUrl}`);
 83 | 
 84 |       // If running in actual browser, you could:
 85 |       // const audio = new Audio(audioUrl);
 86 |       // audio.play();
 87 |     }
 88 | 
 89 |   } catch (error) {
 90 |     console.error('❌ Error:', error);
 91 | 
 92 |     if (error instanceof Error) {
 93 |       if (error.message.includes('CORS') || error.message.includes('cross-origin')) {
 94 |         console.log(`
 95 | 🚫 CORS Error Detected!
 96 | This is expected when running in a browser due to Microsoft's CORS policy.
 97 | 
 98 | Solutions:
 99 | 1. Use a proxy server on your backend
100 | 2. Deploy as a browser extension  
101 | 3. Use Microsoft's official Speech SDK instead
102 | 4. Run this example in Node.js where CORS doesn't apply
103 |         `);
104 |       }
105 |     }
106 |   }
107 | }
108 | 
109 | // Universal module pattern - works in both Node.js and browsers
110 | // ESM equivalent check
111 | if (typeof process !== 'undefined' && import.meta.url === `file://${process.argv[1]}`) {
112 |   // Node.js
113 |   isomorphicExample().catch(console.error);
114 | } else if (typeof globalThis !== 'undefined') {
115 |   // Browser - expose function globally
116 |   (globalThis as any).runIsomorphicExample = isomorphicExample;
117 | }
118 | 
119 | export { isomorphicExample }; 


--------------------------------------------------------------------------------
/PERFORMANCE_OPTIMIZATIONS.md:
--------------------------------------------------------------------------------
  1 | # Performance Optimizations for Edge TTS Universal
  2 | 
  3 | ## Overview
  4 | 
  5 | This document outlines the performance optimizations made to improve audio quality and reduce delays in the Edge TTS Universal library.
  6 | 
  7 | ## Key Issues Addressed
  8 | 
  9 | ### 1. Hard-coded Delay Removal
 10 | 
 11 | **Problem**: A hard-coded delay of 8,750,000 nanoseconds (0.875 seconds) was being added to `offsetCompensation` on every `turn.end` event.
 12 | 
 13 | **Solution**: Removed the line `this.state.offsetCompensation += 8_750_000;` from all communicate implementations.
 14 | 
 15 | **Impact**: Eliminates the artificial 0.875-second delay between audio segments, providing seamless audio playback.
 16 | 
 17 | ### 2. Audio Quality Enhancement
 18 | 
 19 | **Problem**: Low bitrate audio format (24kHz, 48kbps) was causing audio quality issues and potential smoothness problems.
 20 | 
 21 | **Attempted Solution**: Upgraded audio output format from `audio-24khz-48kbitrate-mono-mp3` to `audio-48khz-192kbitrate-mono-mp3`.
 22 | 
 23 | **Issue**: The higher quality format caused "NoAudioReceived" errors as Microsoft's Edge TTS service doesn't support the 48kHz/192kbps format.
 24 | 
 25 | **Final Solution**: Reverted back to the original `audio-24khz-48kbitrate-mono-mp3` format to maintain compatibility while keeping other optimizations.
 26 | 
 27 | ### 3. Improved Audio Concatenation
 28 | 
 29 | **Problem**: Simple array concatenation could introduce gaps or inconsistencies in audio data.
 30 | 
 31 | **Solution**: Enhanced the `concatUint8Arrays` function with:
 32 | 
 33 | - Proper empty array handling
 34 | - Single array optimization
 35 | - Explicit length validation
 36 | - Better memory allocation
 37 | 
 38 | ### 4. Message Processing Optimization
 39 | 
 40 | **Problem**: Indefinite waiting in message processing loops could cause delays.
 41 | 
 42 | **Solution**: Added timeout mechanism to message processing:
 43 | 
 44 | - Added 50ms timeout to prevent indefinite waiting
 45 | - More responsive message handling
 46 | - Better error recovery
 47 | 
 48 | ## Configuration Changes
 49 | 
 50 | ### Audio Format Configuration
 51 | 
 52 | ```json
 53 | {
 54 |   "context": {
 55 |     "synthesis": {
 56 |       "audio": {
 57 |         "metadataoptions": {
 58 |           "sentenceBoundaryEnabled": false,
 59 |           "wordBoundaryEnabled": true
 60 |         },
 61 |         "outputFormat": "audio-24khz-48kbitrate-mono-mp3"
 62 |       }
 63 |     }
 64 |   }
 65 | }
 66 | ```
 67 | 
 68 | _Note: Higher quality formats (48kHz/192kbps) were tested but are not supported by Microsoft's Edge TTS service._
 69 | 
 70 | ### Timing Compensation
 71 | 
 72 | The `offsetCompensation` now only uses actual audio timing data without artificial delays:
 73 | 
 74 | ```typescript
 75 | // Before (with artificial delay)
 76 | this.state.offsetCompensation = this.state.lastDurationOffset;
 77 | this.state.offsetCompensation += 8_750_000; // Removed this line
 78 | 
 79 | // After (natural timing)
 80 | this.state.offsetCompensation = this.state.lastDurationOffset;
 81 | ```
 82 | 
 83 | ## Performance Benefits
 84 | 
 85 | 1. **Reduced Latency**: Elimination of 0.875s artificial delay
 86 | 2. **Maintained Compatibility**: Kept original audio format that works reliably
 87 | 3. **Smoother Playback**: Improved audio chunk concatenation
 88 | 4. **More Responsive**: Better message processing with timeout handling
 89 | 5. **Cross-Platform**: Optimizations applied to all implementations (Browser, Node.js, Isomorphic)
 90 | 
 91 | ## Files Modified
 92 | 
 93 | - `src/browser-communicate.ts`
 94 | - `src/communicate.ts`
 95 | - `src/isomorphic-communicate.ts`
 96 | - `src/browser.ts`
 97 | - `src/browser-simple.ts`
 98 | - `src/isomorphic-simple.ts`
 99 | - `examples/browser-example.html`
100 | 
101 | ## Testing Recommendations
102 | 
103 | After implementing these changes, test with:
104 | 
105 | 1. **Long text passages** to verify no delays between sentences
106 | 2. **Multiple paragraphs** to ensure smooth transitions
107 | 3. **Different voice types** to confirm quality improvements
108 | 4. **Browser playback** to verify smoothness in web environments
109 | 5. **Streaming scenarios** to test real-time performance
110 | 
111 | ## Future Optimization Opportunities
112 | 
113 | 1. **Audio Preloading**: Implement audio chunk preloading for even smoother playback
114 | 2. **Adaptive Bitrate**: Dynamic quality adjustment based on network conditions
115 | 3. **Audio Compression**: Consider WebM or other formats for better compression
116 | 4. **Buffer Management**: Implement smart buffering strategies for large texts
117 | 5. **WebAudio API**: Use WebAudio API for advanced audio processing in browsers
118 | 
119 | ## Compatibility Notes
120 | 
121 | - All changes maintain backward compatibility
122 | - Higher quality audio may require more bandwidth
123 | - Older browsers should still work with the upgraded format
124 | - The timeout mechanism adds resilience without breaking existing functionality
125 | 


--------------------------------------------------------------------------------
/src/browser-utils.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Browser-specific utility functions that avoid Node.js dependencies.
  3 |  * Provides browser-native implementations of UUID generation and XML escaping.
  4 |  */
  5 | 
  6 | /**
  7 |  * Generates a UUID v4 string without hyphens using browser's crypto API.
  8 |  * @returns UUID string with hyphens removed
  9 |  */
 10 | export function browserConnectId(): string {
 11 |   // Use crypto.getRandomValues for browser-native UUID generation
 12 |   const array = new Uint8Array(16);
 13 |   crypto.getRandomValues(array);
 14 | 
 15 |   // Set version (4) and variant bits according to RFC 4122
 16 |   array[6] = (array[6] & 0x0f) | 0x40;
 17 |   array[8] = (array[8] & 0x3f) | 0x80;
 18 | 
 19 |   // Convert to hex string and format as UUID, then remove hyphens
 20 |   const hex = Array.from(array, byte => byte.toString(16).padStart(2, '0')).join('');
 21 |   const uuid = `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
 22 | 
 23 |   return uuid.replace(/-/g, '');
 24 | }
 25 | 
 26 | /**
 27 |  * Browser-native XML escaping function.
 28 |  * @param text - Text to escape
 29 |  * @returns XML-escaped text
 30 |  */
 31 | export function browserEscape(text: string): string {
 32 |   return text
 33 |     .replace(/&/g, '&amp;')
 34 |     .replace(/</g, '&lt;')
 35 |     .replace(/>/g, '&gt;')
 36 |     .replace(/"/g, '&quot;')
 37 |     .replace(/'/g, '&apos;');
 38 | }
 39 | 
 40 | /**
 41 |  * Unescapes XML entities in text.
 42 |  * @param text - Text containing XML entities to unescape
 43 |  * @returns Text with XML entities converted back to their original characters
 44 |  */
 45 | export function browserUnescape(text: string): string {
 46 |   return text
 47 |     .replace(/&quot;/g, '"')
 48 |     .replace(/&apos;/g, "'")
 49 |     .replace(/&lt;/g, '<')
 50 |     .replace(/&gt;/g, '>')
 51 |     .replace(/&amp;/g, '&'); // Do &amp; last to avoid double unescaping
 52 | }
 53 | 
 54 | /**
 55 |  * Removes control characters that are incompatible with TTS processing.
 56 |  * @param text - Input text to clean
 57 |  * @returns Text with control characters replaced by spaces
 58 |  */
 59 | export function browserRemoveIncompatibleCharacters(text: string): string {
 60 |   // Remove control characters (U+0000 to U+001F except \t, \n, \r)
 61 |   // eslint-disable-next-line no-control-regex
 62 |   return text.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, ' ');
 63 | }
 64 | 
 65 | /**
 66 |  * Formats the current date as a string in the format expected by the TTS service.
 67 |  * @returns Formatted date string
 68 |  */
 69 | export function browserDateToString(): string {
 70 |   return new Date().toUTCString().replace("GMT", "GMT+0000 (Coordinated Universal Time)");
 71 | }
 72 | 
 73 | /**
 74 |  * Creates SSML (Speech Synthesis Markup Language) from text and voice configuration.
 75 |  * @param voice - Voice name
 76 |  * @param rate - Speech rate (e.g., "+0%")
 77 |  * @param volume - Speech volume (e.g., "+0%") 
 78 |  * @param pitch - Speech pitch (e.g., "+0Hz")
 79 |  * @param escapedText - Text content (should be XML-escaped)
 80 |  * @returns Complete SSML document string
 81 |  */
 82 | export function browserMkssml(voice: string, rate: string, volume: string, pitch: string, escapedText: string): string {
 83 |   return (
 84 |     "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
 85 |     + `<voice name='${voice}'>`
 86 |     + `<prosody pitch='${pitch}' rate='${rate}' volume='${volume}'>`
 87 |     + `${escapedText}`
 88 |     + "</prosody>"
 89 |     + "</voice>"
 90 |     + "</speak>"
 91 |   );
 92 | }
 93 | 
 94 | /**
 95 |  * Creates a complete WebSocket message with headers and SSML data.
 96 |  * @param requestId - Unique request identifier
 97 |  * @param timestamp - Timestamp string for the request
 98 |  * @param ssml - SSML content to include in the message
 99 |  * @returns Complete WebSocket message string with headers and data
100 |  */
101 | export function browserSsmlHeadersPlusData(requestId: string, timestamp: string, ssml: string): string {
102 |   return (
103 |     `X-RequestId:${requestId}\r\n`
104 |     + "Content-Type:application/ssml+xml\r\n"
105 |     + `X-Timestamp:${timestamp}Z\r\n`  // This is not a mistake, Microsoft Edge bug.
106 |     + "Path:ssml\r\n\r\n"
107 |     + `${ssml}`
108 |   );
109 | }
110 | 
111 | /**
112 |  * Calculates the maximum message size for text chunks based on WebSocket limits.
113 |  * @param voice - Voice name
114 |  * @param rate - Speech rate
115 |  * @param volume - Speech volume
116 |  * @param pitch - Speech pitch
117 |  * @returns Maximum byte size for text content in a single message
118 |  */
119 | export function browserCalcMaxMesgSize(voice: string, rate: string, volume: string, pitch: string): number {
120 |   const websocketMaxSize = 2 ** 16;
121 |   const overheadPerMessage = browserSsmlHeadersPlusData(
122 |     browserConnectId(),
123 |     browserDateToString(),
124 |     browserMkssml(voice, rate, volume, pitch, ""),
125 |   ).length + 50; // margin of error
126 |   return websocketMaxSize - overheadPerMessage;
127 | } 


--------------------------------------------------------------------------------
/src/isomorphic-voices.ts:
--------------------------------------------------------------------------------
  1 | // Use native fetch API available in both Node.js 18+ and browsers
  2 | import { SEC_MS_GEC_VERSION, VOICE_HEADERS, VOICE_LIST_URL } from './constants';
  3 | import { IsomorphicDRM } from './isomorphic-drm';
  4 | import { Voice, VoicesManagerFind, VoicesManagerVoice } from './types';
  5 | 
  6 | /**
  7 |  * Error class for fetch-related errors (isomorphic equivalent of AxiosError)
  8 |  */
  9 | export class FetchError extends Error {
 10 |   response?: {
 11 |     status: number;
 12 |     headers: Record<string, string>;
 13 |   };
 14 | 
 15 |   constructor(message: string, response?: { status: number; headers: Record<string, string> }) {
 16 |     super(message);
 17 |     this.name = 'FetchError';
 18 |     this.response = response;
 19 |   }
 20 | }
 21 | 
 22 | async function _listVoices(proxy?: string): Promise<Voice[]> {
 23 |   const url = `${VOICE_LIST_URL}&Sec-MS-GEC=${await IsomorphicDRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}`;
 24 | 
 25 |   const fetchOptions: RequestInit = {
 26 |     headers: VOICE_HEADERS,
 27 |   };
 28 | 
 29 |   // Note: Proxy support in browsers is limited and handled differently
 30 |   // In Node.js, we could potentially use a proxy agent with fetch
 31 |   if (proxy) {
 32 |     console.warn('Proxy support in isomorphic environment is limited. Consider using a backend proxy.');
 33 |   }
 34 | 
 35 |   try {
 36 |     const response = await fetch(url, fetchOptions);
 37 | 
 38 |     if (!response.ok) {
 39 |       const headers: Record<string, string> = {};
 40 |       response.headers.forEach((value, key) => {
 41 |         headers[key] = value;
 42 |       });
 43 | 
 44 |       throw new FetchError(`HTTP ${response.status}`, {
 45 |         status: response.status,
 46 |         headers
 47 |       });
 48 |     }
 49 | 
 50 |     const data: Voice[] = await response.json();
 51 | 
 52 |     for (const voice of data) {
 53 |       voice.VoiceTag.ContentCategories = voice.VoiceTag.ContentCategories.map(c => c.trim() as any);
 54 |       voice.VoiceTag.VoicePersonalities = voice.VoiceTag.VoicePersonalities.map(p => p.trim() as any);
 55 |     }
 56 | 
 57 |     return data;
 58 |   } catch (error) {
 59 |     if (error instanceof FetchError) {
 60 |       throw error;
 61 |     }
 62 |     // Convert other fetch errors to our FetchError format
 63 |     throw new FetchError(error instanceof Error ? error.message : 'Unknown fetch error');
 64 |   }
 65 | }
 66 | 
 67 | /**
 68 |  * Fetches all available voices from the Microsoft Edge TTS service (isomorphic version).
 69 |  * Works in both Node.js and browsers (subject to CORS policy).
 70 |  * 
 71 |  * @param proxy - Optional proxy URL for the request (limited browser support)
 72 |  * @returns Promise resolving to array of available voices
 73 |  */
 74 | export async function listVoices(proxy?: string): Promise<Voice[]> {
 75 |   try {
 76 |     return await _listVoices(proxy);
 77 |   } catch (e) {
 78 |     if (e instanceof FetchError && e.response?.status === 403) {
 79 |       IsomorphicDRM.handleClientResponseError(e.response);
 80 |       return await _listVoices(proxy);
 81 |     }
 82 |     throw e;
 83 |   }
 84 | }
 85 | 
 86 | /**
 87 |  * Isomorphic utility class for finding and filtering available voices.
 88 |  * Works in both Node.js and browsers (subject to CORS policy).
 89 |  * 
 90 |  * @example
 91 |  * ```typescript
 92 |  * const voicesManager = await IsomorphicVoicesManager.create();
 93 |  * const englishVoices = voicesManager.find({ Language: 'en' });
 94 |  * ```
 95 |  */
 96 | export class IsomorphicVoicesManager {
 97 |   private voices: VoicesManagerVoice[] = [];
 98 |   private calledCreate = false;
 99 | 
100 |   /**
101 |    * Creates a new IsomorphicVoicesManager instance.
102 |    * 
103 |    * @param customVoices - Optional custom voice list instead of fetching from API
104 |    * @param proxy - Optional proxy URL for API requests (limited browser support)
105 |    * @returns Promise resolving to IsomorphicVoicesManager instance
106 |    */
107 |   public static async create(customVoices?: Voice[], proxy?: string): Promise<IsomorphicVoicesManager> {
108 |     const manager = new IsomorphicVoicesManager();
109 |     const voices = customVoices ?? await listVoices(proxy);
110 |     manager.voices = voices.map(voice => ({
111 |       ...voice,
112 |       Language: voice.Locale.split('-')[0],
113 |     }));
114 |     manager.calledCreate = true;
115 |     return manager;
116 |   }
117 | 
118 |   /**
119 |    * Finds voices matching the specified criteria.
120 |    * 
121 |    * @param filter - Filter criteria for voice selection
122 |    * @returns Array of voices matching the filter
123 |    * @throws {Error} If called before create()
124 |    */
125 |   public find(filter: VoicesManagerFind): VoicesManagerVoice[] {
126 |     if (!this.calledCreate) {
127 |       throw new Error('IsomorphicVoicesManager.find() called before IsomorphicVoicesManager.create()');
128 |     }
129 | 
130 |     return this.voices.filter(voice => {
131 |       return Object.entries(filter).every(([key, value]) => {
132 |         return voice[key as keyof VoicesManagerFind] === value;
133 |       });
134 |     });
135 |   }
136 | } 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
  1 | name: Test & Build
  2 | 
  3 | on:
  4 |   push:
  5 |     branches: [main, master, develop]
  6 |   pull_request:
  7 |     branches: [main, master, develop]
  8 | 
  9 | jobs:
 10 |   test:
 11 |     name: Test on ${{ matrix.runtime }}
 12 |     runs-on: ubuntu-latest
 13 |     strategy:
 14 |       matrix:
 15 |         include:
 16 |           - runtime: node
 17 |             node-version: '18'
 18 |           - runtime: node
 19 |             node-version: '20'
 20 |           - runtime: node
 21 |             node-version: '22'
 22 |           - runtime: deno
 23 |             deno-version: 'v1.x'
 24 |           - runtime: bun
 25 |             bun-version: 'latest'
 26 | 
 27 |     steps:
 28 |       - name: Checkout code
 29 |         uses: actions/checkout@v4
 30 | 
 31 |       - name: Setup Node.js
 32 |         if: matrix.runtime == 'node'
 33 |         uses: actions/setup-node@v4
 34 |         with:
 35 |           node-version: ${{ matrix.node-version }}
 36 |           cache: 'npm'
 37 | 
 38 |       - name: Setup Deno
 39 |         if: matrix.runtime == 'deno'
 40 |         uses: denoland/setup-deno@v1
 41 |         with:
 42 |           deno-version: ${{ matrix.deno-version }}
 43 | 
 44 |       - name: Setup Node.js (for Deno build)
 45 |         if: matrix.runtime == 'deno'
 46 |         uses: actions/setup-node@v4
 47 |         with:
 48 |           node-version: '22'
 49 |           cache: 'npm'
 50 | 
 51 |       - name: Install dependencies (Deno build)
 52 |         if: matrix.runtime == 'deno'
 53 |         run: npm ci
 54 | 
 55 |       - name: Setup Bun
 56 |         if: matrix.runtime == 'bun'
 57 |         uses: oven-sh/setup-bun@v1
 58 |         with:
 59 |           bun-version: ${{ matrix.bun-version }}
 60 | 
 61 |       - name: Install dependencies (Node.js)
 62 |         if: matrix.runtime == 'node'
 63 |         run: npm ci
 64 | 
 65 |       - name: Install dependencies (Bun)
 66 |         if: matrix.runtime == 'bun'
 67 |         run: bun install
 68 | 
 69 |       - name: Type check (Node.js)
 70 |         if: matrix.runtime == 'node'
 71 |         run: npm run type-check
 72 | 
 73 |       - name: Lint (Node.js)
 74 |         if: matrix.runtime == 'node' && matrix.node-version == '22'
 75 |         run: npm run lint
 76 | 
 77 |       - name: Build (Node.js)
 78 |         if: matrix.runtime == 'node'
 79 |         run: npm run build
 80 | 
 81 |       - name: Build (Bun)
 82 |         if: matrix.runtime == 'bun'
 83 |         run: bun run build
 84 | 
 85 |       - name: Build (Deno)
 86 |         if: matrix.runtime == 'deno'
 87 |         run: npm run build
 88 | 
 89 |       - name: Test Node.js
 90 |         if: matrix.runtime == 'node'
 91 |         run: npm test
 92 |         continue-on-error: true
 93 | 
 94 |       - name: Test Deno
 95 |         if: matrix.runtime == 'deno'
 96 |         run: deno test --allow-net --allow-env --no-check --no-lock tests/deno/*.ts
 97 |         continue-on-error: true
 98 | 
 99 |       - name: Test Bun
100 |         if: matrix.runtime == 'bun'
101 |         run: bun run test
102 |         continue-on-error: true
103 | 
104 |       - name: Check bundle sizes
105 |         if: matrix.runtime == 'node' && matrix.node-version == '22'
106 |         run: ls -la dist/
107 |         continue-on-error: true
108 | 
109 |   build-matrix:
110 |     name: Build for ${{ matrix.target }}
111 |     runs-on: ubuntu-latest
112 |     strategy:
113 |       matrix:
114 |         target: [node, browser, isomorphic, webworker]
115 | 
116 |     steps:
117 |       - name: Checkout code
118 |         uses: actions/checkout@v4
119 | 
120 |       - name: Setup Node.js
121 |         uses: actions/setup-node@v4
122 |         with:
123 |           node-version: '22'
124 |           cache: 'npm'
125 | 
126 |       - name: Install dependencies
127 |         run: npm ci
128 | 
129 |       - name: Build ${{ matrix.target }}
130 |         run: npm run build
131 | 
132 |       - name: Upload build artifacts
133 |         uses: actions/upload-artifact@v4
134 |         with:
135 |           name: build-${{ matrix.target }}
136 |           path: dist/
137 |           retention-days: 7
138 | 
139 |   compatibility-test:
140 |     name: Test Cross-Runtime Compatibility
141 |     runs-on: ubuntu-latest
142 |     needs: [test]
143 |     if: github.event_name == 'pull_request'
144 | 
145 |     steps:
146 |       - name: Checkout code
147 |         uses: actions/checkout@v4
148 | 
149 |       - name: Setup Node.js
150 |         uses: actions/setup-node@v4
151 |         with:
152 |           node-version: '22'
153 |           cache: 'npm'
154 | 
155 |       - name: Setup Deno
156 |         uses: denoland/setup-deno@v1
157 |         with:
158 |           deno-version: v1.x
159 | 
160 |       - name: Setup Bun
161 |         uses: oven-sh/setup-bun@v1
162 |         with:
163 |           bun-version: latest
164 | 
165 |       - name: Install dependencies
166 |         run: npm ci
167 | 
168 |       - name: Build package
169 |         run: npm run build
170 | 
171 |       - name: Test runtime detection (Node.js)
172 |         run: node -e "const { detectRuntime } = require('./dist/runtime-detection.cjs'); const runtime = detectRuntime(); console.log('Node.js runtime detection:', runtime); if (runtime.name !== 'node') process.exit(1);"
173 | 
174 |       - name: Test runtime detection (Deno)
175 |         run: deno eval "import { detectRuntime } from './dist/runtime-detection.js'; const runtime = detectRuntime(); console.log('Deno runtime detection:', runtime); if (runtime.name !== 'deno') Deno.exit(1);"
176 | 
177 |       - name: Test runtime detection (Bun)
178 |         run: bun run -e "import { detectRuntime } from './dist/runtime-detection.js'; const runtime = detectRuntime(); console.log('Bun runtime detection:', runtime); if (runtime.name !== 'bun') process.exit(1);"
179 | 
180 |       - name: Test package imports (Node.js)
181 |         run: node -e "const main = require('./dist/index.cjs'); const browser = require('./dist/browser.cjs'); const isomorphic = require('./dist/isomorphic.cjs'); console.log('✅ All Node.js imports work');"
182 | 
183 |       - name: Test package imports (Deno)
184 |         run: deno eval "import * as main from './dist/index.js'; import * as browser from './dist/browser.js'; import * as isomorphic from './dist/isomorphic.js'; console.log('✅ All Deno imports work');"
185 | 
186 |       - name: Test package imports (Bun)
187 |         run: bun run -e "import * as main from './dist/index.js'; import * as browser from './dist/browser.js'; import * as isomorphic from './dist/isomorphic.js'; console.log('✅ All Bun imports work');"
188 | 


--------------------------------------------------------------------------------
/src/simple.ts:
--------------------------------------------------------------------------------
  1 | import { Communicate } from './communicate';
  2 | 
  3 | /**
  4 |  * Options for controlling the voice prosody (rate, pitch, volume).
  5 |  */
  6 | export interface ProsodyOptions {
  7 |   /**
  8 |    * The speaking rate of the voice.
  9 |    * Examples: "+10.00%", "-20.00%"
 10 |    */
 11 |   rate?: string;
 12 |   /**
 13 |    * The speaking volume of the voice.
 14 |    * Examples: "+15.00%", "-10.00%"
 15 |    */
 16 |   volume?: string;
 17 |   /**
 18 |    * The speaking pitch of the voice.
 19 |    * Examples: "+20Hz", "-10Hz"
 20 |    */
 21 |   pitch?: string;
 22 | }
 23 | 
 24 | /**
 25 |  * Represents a single word boundary with its timing and text.
 26 |  * The API provides timing in 100-nanosecond units.
 27 |  */
 28 | export interface WordBoundary {
 29 |   /**
 30 |    * The offset from the beginning of the audio stream in 100-nanosecond units.
 31 |    */
 32 |   offset: number;
 33 |   /**
 34 |    * The duration of the word in 100-nanosecond units.
 35 |    */
 36 |   duration: number;
 37 |   /**
 38 |    * The text of the spoken word.
 39 |    */
 40 |   text: string;
 41 | }
 42 | 
 43 | /**
 44 |  * The final result of the synthesis process.
 45 |  */
 46 | export interface SynthesisResult {
 47 |   /**
 48 |    * The generated audio as a Blob, which can be used in an <audio> element.
 49 |    */
 50 |   audio: Blob;
 51 |   /**
 52 |    * An array of word boundaries containing timing and text for creating subtitles.
 53 |    */
 54 |   subtitle: WordBoundary[];
 55 | }
 56 | 
 57 | /**
 58 |  * Simple Edge TTS class that provides the same API as the standalone implementation
 59 |  * but uses the robust infrastructure of the modular project.
 60 |  */
 61 | export class EdgeTTS {
 62 |   public text: string;
 63 |   public voice: string;
 64 |   public rate: string;
 65 |   public volume: string;
 66 |   public pitch: string;
 67 | 
 68 |   /**
 69 |    * @param text The text to be synthesized.
 70 |    * @param voice The voice to use for synthesis.
 71 |    * @param options Prosody options (rate, volume, pitch).
 72 |    */
 73 |   constructor(
 74 |     text: string,
 75 |     voice = "Microsoft Server Speech Text to Speech Voice (zh-CN, XiaoxiaoNeural)",
 76 |     options: ProsodyOptions = {}
 77 |   ) {
 78 |     this.text = text;
 79 |     this.voice = voice;
 80 |     this.rate = options.rate || "+0%";
 81 |     this.volume = options.volume || "+0%";
 82 |     this.pitch = options.pitch || "+0Hz";
 83 |   }
 84 | 
 85 |   /**
 86 |    * Initiates the synthesis process.
 87 |    * @returns A promise that resolves with the synthesized audio and subtitle data.
 88 |    */
 89 |   public async synthesize(): Promise<SynthesisResult> {
 90 |     const communicate = new Communicate(this.text, {
 91 |       voice: this.voice,
 92 |       rate: this.rate,
 93 |       volume: this.volume,
 94 |       pitch: this.pitch,
 95 |     });
 96 | 
 97 |     const audioChunks: Buffer[] = [];
 98 |     const wordBoundaries: WordBoundary[] = [];
 99 | 
100 |     for await (const chunk of communicate.stream()) {
101 |       if (chunk.type === 'audio' && chunk.data) {
102 |         audioChunks.push(chunk.data);
103 |       } else if (chunk.type === 'WordBoundary' && chunk.offset !== undefined && chunk.duration !== undefined && chunk.text !== undefined) {
104 |         wordBoundaries.push({
105 |           offset: chunk.offset,
106 |           duration: chunk.duration,
107 |           text: chunk.text,
108 |         });
109 |       }
110 |     }
111 | 
112 |     // Convert Buffer array to Blob
113 |     const audioBuffer = Buffer.concat(audioChunks);
114 |     const audioBlob = new Blob([audioBuffer], { type: "audio/mpeg" });
115 | 
116 |     return {
117 |       audio: audioBlob,
118 |       subtitle: wordBoundaries,
119 |     };
120 |   }
121 | }
122 | 
123 | // ==================================================================================
124 | // Subtitle Generation Utilities (from code (54).ts)
125 | // ==================================================================================
126 | 
127 | /**
128 |  * Formats a time value from 100-nanosecond units into a VTT or SRT timestamp string.
129 |  * @param timeIn100ns The time value in 100-nanosecond units.
130 |  * @param format The subtitle format, which determines the decimal separator.
131 |  * @returns A formatted timestamp string (e.g., "00:01:23.456").
132 |  */
133 | function formatTimestamp(timeIn100ns: number, format: 'vtt' | 'srt'): string {
134 |   const totalSeconds = Math.floor(timeIn100ns / 10000000);
135 |   const hours = Math.floor(totalSeconds / 3600);
136 |   const minutes = Math.floor((totalSeconds % 3600) / 60);
137 |   const seconds = totalSeconds % 60;
138 |   const milliseconds = Math.floor((timeIn100ns % 10000000) / 10000);
139 |   const separator = format === 'vtt' ? '.' : ',';
140 |   return `${padNumber(hours)}:${padNumber(minutes)}:${padNumber(seconds)}${separator}${padNumber(milliseconds, 3)}`;
141 | }
142 | 
143 | /**
144 |  * Pads a number with leading zeros to a specified length.
145 |  * @param num The number to pad.
146 |  * @param length The desired length of the string.
147 |  * @returns The padded number as a string.
148 |  */
149 | function padNumber(num: number, length = 2): string {
150 |   return num.toString().padStart(length, '0');
151 | }
152 | 
153 | /**
154 |  * Creates a subtitle file content in VTT (WebVTT) format.
155 |  * @param wordBoundaries The array of word boundary data.
156 |  * @returns A string containing the VTT formatted subtitles.
157 |  */
158 | export function createVTT(wordBoundaries: WordBoundary[]): string {
159 |   let vttContent = "WEBVTT\n\n";
160 |   wordBoundaries.forEach((word, index) => {
161 |     const startTime = formatTimestamp(word.offset, 'vtt');
162 |     const endTime = formatTimestamp(word.offset + word.duration, 'vtt');
163 |     vttContent += `${index + 1}\n`;
164 |     vttContent += `${startTime} --> ${endTime}\n`;
165 |     vttContent += `${word.text}\n\n`;
166 |   });
167 |   return vttContent;
168 | }
169 | 
170 | /**
171 |  * Creates a subtitle file content in SRT (SubRip) format.
172 |  * @param wordBoundaries The array of word boundary data.
173 |  * @returns A string containing the SRT formatted subtitles.
174 |  */
175 | export function createSRT(wordBoundaries: WordBoundary[]): string {
176 |   let srtContent = "";
177 |   wordBoundaries.forEach((word, index) => {
178 |     const startTime = formatTimestamp(word.offset, 'srt');
179 |     const endTime = formatTimestamp(word.offset + word.duration, 'srt');
180 |     srtContent += `${index + 1}\n`;
181 |     srtContent += `${startTime} --> ${endTime}\n`;
182 |     srtContent += `${word.text}\n\n`;
183 |   });
184 |   return srtContent;
185 | }
186 | 
187 | // Universal alias for EdgeTTS (preferred naming)
188 | export { EdgeTTS as UniversalEdgeTTS }; 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "name": "edge-tts-universal",
  3 |   "version": "1.3.3",
  4 |   "description": "Universal text-to-speech library using Microsoft Edge's online TTS service. Works in Node.js and browsers WITHOUT needing Microsoft Edge, Windows, or an API key",
  5 |   "type": "module",
  6 |   "main": "./dist/index.cjs",
  7 |   "module": "./dist/index.js",
  8 |   "types": "./dist/index.d.ts",
  9 |   "unpkg": "./dist/browser.js",
 10 |   "jsdelivr": "./dist/browser.js",
 11 |   "browser": {
 12 |     "./dist/index.js": "./dist/browser.js",
 13 |     "./dist/index.cjs": "./dist/browser.cjs"
 14 |   },
 15 |   "engines": {
 16 |     "node": "^18.17 || ^20.9 || >=22",
 17 |     "npm": ">=9"
 18 |   },
 19 |   "exports": {
 20 |     ".": {
 21 |       "bun": {
 22 |         "types": "./dist/index.d.ts",
 23 |         "import": "./dist/index.js",
 24 |         "require": "./dist/index.cjs"
 25 |       },
 26 |       "deno": {
 27 |         "types": "./dist/index.d.ts",
 28 |         "import": "./dist/index.js"
 29 |       },
 30 |       "import": {
 31 |         "types": "./dist/index.d.ts",
 32 |         "default": "./dist/index.js"
 33 |       },
 34 |       "require": {
 35 |         "types": "./dist/index.d.cts",
 36 |         "default": "./dist/index.cjs"
 37 |       }
 38 |     },
 39 |     "./browser": {
 40 |       "bun": {
 41 |         "types": "./dist/browser.d.ts",
 42 |         "import": "./dist/browser.js",
 43 |         "require": "./dist/browser.cjs"
 44 |       },
 45 |       "deno": {
 46 |         "types": "./dist/browser.d.ts",
 47 |         "import": "./dist/browser.js"
 48 |       },
 49 |       "import": {
 50 |         "types": "./dist/browser.d.ts",
 51 |         "default": "./dist/browser.js"
 52 |       },
 53 |       "require": {
 54 |         "types": "./dist/browser.d.cts",
 55 |         "default": "./dist/browser.cjs"
 56 |       }
 57 |     },
 58 |     "./isomorphic": {
 59 |       "bun": {
 60 |         "types": "./dist/isomorphic.d.ts",
 61 |         "import": "./dist/isomorphic.js",
 62 |         "require": "./dist/isomorphic.cjs"
 63 |       },
 64 |       "deno": {
 65 |         "types": "./dist/isomorphic.d.ts",
 66 |         "import": "./dist/isomorphic.js"
 67 |       },
 68 |       "import": {
 69 |         "types": "./dist/isomorphic.d.ts",
 70 |         "default": "./dist/isomorphic.js"
 71 |       },
 72 |       "require": {
 73 |         "types": "./dist/isomorphic.d.cts",
 74 |         "default": "./dist/isomorphic.cjs"
 75 |       }
 76 |     },
 77 |     "./webworker": {
 78 |       "bun": {
 79 |         "types": "./dist/webworker.d.ts",
 80 |         "import": "./dist/webworker.js",
 81 |         "require": "./dist/webworker.cjs"
 82 |       },
 83 |       "deno": {
 84 |         "types": "./dist/webworker.d.ts",
 85 |         "import": "./dist/webworker.js"
 86 |       },
 87 |       "import": {
 88 |         "types": "./dist/webworker.d.ts",
 89 |         "default": "./dist/webworker.js"
 90 |       },
 91 |       "require": {
 92 |         "types": "./dist/webworker.d.cts",
 93 |         "default": "./dist/webworker.cjs"
 94 |       }
 95 |     },
 96 |     "./runtime-detection": {
 97 |       "bun": {
 98 |         "types": "./dist/runtime-detection.d.ts",
 99 |         "import": "./dist/runtime-detection.js",
100 |         "require": "./dist/runtime-detection.cjs"
101 |       },
102 |       "deno": {
103 |         "types": "./dist/runtime-detection.d.ts",
104 |         "import": "./dist/runtime-detection.js"
105 |       },
106 |       "import": {
107 |         "types": "./dist/runtime-detection.d.ts",
108 |         "default": "./dist/runtime-detection.js"
109 |       },
110 |       "require": {
111 |         "types": "./dist/runtime-detection.d.cts",
112 |         "default": "./dist/runtime-detection.cjs"
113 |       }
114 |     }
115 |   },
116 |   "files": [
117 |     "dist"
118 |   ],
119 |   "scripts": {
120 |     "build": "tsup",
121 |     "dev": "tsup --watch",
122 |     "docs": "typedoc",
123 |     "docs:serve": "typedoc --watch",
124 |     "prepublishOnly": "npm run build",
125 |     "test": "npm run build && node --test tests/*.test.js",
126 |     "test:watch": "npm run build && node --test --watch tests/*.test.js",
127 |     "lint": "eslint src examples --ext .ts",
128 |     "lint:fix": "eslint src examples --ext .ts --fix",
129 |     "type-check": "tsc --noEmit",
130 |     "clean": "rm -rf dist",
131 |     "example:simple": "tsx examples/simple-api.ts",
132 |     "example:streaming": "tsx examples/streaming.ts",
133 |     "example:voices": "tsx examples/listVoices.ts",
134 |     "example:universal": "tsx examples/universal-detection.ts",
135 |     "example:universal-api": "tsx examples/universal-api.ts",
136 |     "example:isomorphic": "tsx examples/isomorphic-example.ts",
137 |     "size": "size-limit",
138 |     "analyze": "npx @next/bundle-analyzer",
139 |     "publish:jsr": "deno publish",
140 |     "publish:npm": "npm publish",
141 |     "publish:all": "npm run publish:npm && npm run publish:jsr"
142 |   },
143 |   "keywords": [
144 |     "text-to-speech",
145 |     "tts",
146 |     "edge-tts",
147 |     "speech-synthesis",
148 |     "microsoft-edge",
149 |     "universal",
150 |     "isomorphic",
151 |     "browser",
152 |     "nodejs",
153 |     "cross-platform",
154 |     "speech",
155 |     "voice",
156 |     "audio",
157 |     "ssml",
158 |     "subtitles"
159 |   ],
160 |   "author": "Travis <contact@travis.engineer> (https://travis.engineer)",
161 |   "license": "AGPL-3.0",
162 |   "homepage": "https://github.com/travisvn/edge-tts-universal",
163 |   "bugs": {
164 |     "url": "https://github.com/travisvn/edge-tts-universal/issues"
165 |   },
166 |   "dependencies": {
167 |     "axios": "^1.12.1",
168 |     "cross-fetch": "^4.1.0",
169 |     "https-proxy-agent": "^7.0.6",
170 |     "isomorphic-ws": "^5.0.0",
171 |     "uuid": "^11.1.0",
172 |     "ws": "^8.18.3",
173 |     "xml-escape": "^1.1.0"
174 |   },
175 |   "devDependencies": {
176 |     "@size-limit/preset-small-lib": "^11.2.0",
177 |     "@types/node": "^22.18.3",
178 |     "@types/uuid": "^10.0.0",
179 |     "@types/ws": "^8.18.1",
180 |     "@types/xml-escape": "^1.1.3",
181 |     "@typescript-eslint/eslint-plugin": "^7.18.0",
182 |     "@typescript-eslint/parser": "^7.18.0",
183 |     "eslint": "^8.57.1",
184 |     "prettier": "^3.6.2",
185 |     "size-limit": "^11.2.0",
186 |     "tsup": "^8.5.0",
187 |     "tsx": "^4.20.5",
188 |     "typedoc": "^0.28.5",
189 |     "typescript": "^5.9.2"
190 |   },
191 |   "directories": {
192 |     "example": "examples"
193 |   },
194 |   "repository": {
195 |     "type": "git",
196 |     "url": "git+https://github.com/travisvn/edge-tts-universal.git"
197 |   },
198 |   "prettier": {
199 |     "semi": true,
200 |     "trailingComma": "es5",
201 |     "singleQuote": true,
202 |     "printWidth": 80,
203 |     "tabWidth": 2,
204 |     "useTabs": false
205 |   }
206 | }
207 | 


--------------------------------------------------------------------------------
/examples/cdn-example.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>Edge TTS Universal - CDN Example</title>
  7 |     <style>
  8 |         body {
  9 |             font-family: Arial, sans-serif;
 10 |             max-width: 800px;
 11 |             margin: 0 auto;
 12 |             padding: 20px;
 13 |         }
 14 |         .warning {
 15 |             background: #fff3cd;
 16 |             border: 1px solid #ffeaa7;
 17 |             color: #856404;
 18 |             padding: 15px;
 19 |             border-radius: 5px;
 20 |             margin-bottom: 20px;
 21 |         }
 22 |         button {
 23 |             background: #007bff;
 24 |             color: white;
 25 |             border: none;
 26 |             padding: 10px 20px;
 27 |             border-radius: 5px;
 28 |             cursor: pointer;
 29 |             margin: 10px;
 30 |         }
 31 |         button:hover {
 32 |             background: #0056b3;
 33 |         }
 34 |         button:disabled {
 35 |             background: #6c757d;
 36 |             cursor: not-allowed;
 37 |         }
 38 |         #output {
 39 |             margin-top: 20px;
 40 |             padding: 15px;
 41 |             background: #f8f9fa;
 42 |             border-radius: 5px;
 43 |         }
 44 |         audio {
 45 |             width: 100%;
 46 |             margin: 10px 0;
 47 |         }
 48 |     </style>
 49 | </head>
 50 | <body>
 51 |     <h1>🌐 Edge TTS Universal - CDN Example</h1>
 52 |     
 53 |     <div class="warning">
 54 |         <strong>⚠️ CORS Notice:</strong> This example may fail due to CORS restrictions.
 55 |         The CDN version works best in environments where CORS is handled (proxy server, browser extensions, etc.)
 56 |     </div>
 57 |     
 58 |     <h2>CDN Usage Examples</h2>
 59 |     
 60 |     <h3>Via unpkg:</h3>
 61 |     <pre><code>&lt;script type="module"&gt;
 62 |   import { EdgeTTS } from 'https://unpkg.com/edge-tts-universal/dist/browser.js';
 63 | &lt;/script&gt;</code></pre>
 64 |     
 65 |     <h3>Via jsdelivr:</h3>
 66 |     <pre><code>&lt;script type="module"&gt;
 67 |   import { EdgeTTS } from 'https://cdn.jsdelivr.net/npm/edge-tts-universal/dist/browser.js';
 68 | &lt;/script&gt;</code></pre>
 69 |     
 70 |     <h2>Live Demo</h2>
 71 |     <button onclick="testCDNImport()" id="testBtn">Test CDN Import</button>
 72 |     <button onclick="synthesizeFromCDN()" id="synthBtn" disabled>Synthesize Speech</button>
 73 |     
 74 |     <div id="output"></div>
 75 |     
 76 |     <script type="module">
 77 |         let EdgeTTS = null;
 78 |         let outputDiv = document.getElementById('output');
 79 |         
 80 |         window.testCDNImport = async function() {
 81 |             const btn = document.getElementById('testBtn');
 82 |             btn.disabled = true;
 83 |             btn.textContent = 'Loading from CDN...';
 84 |             
 85 |             try {
 86 |                 // Try to import from CDN
 87 |                 const module = await import('https://unpkg.com/edge-tts-universal@latest/dist/browser.js');
 88 |                 EdgeTTS = module.EdgeTTS;
 89 |                 
 90 |                 outputDiv.innerHTML = '<div style="color: green;">✅ Successfully imported EdgeTTS from CDN!</div>';
 91 |                 document.getElementById('synthBtn').disabled = false;
 92 |                 btn.textContent = 'CDN Import Successful';
 93 |                 btn.style.background = '#28a745';
 94 |                 
 95 |             } catch (error) {
 96 |                 console.error('CDN import failed:', error);
 97 |                 outputDiv.innerHTML = `
 98 |                     <div style="color: red;">❌ CDN Import Failed: ${error.message}</div>
 99 |                     <div style="margin-top: 10px;">
100 |                         <strong>Common solutions:</strong>
101 |                         <ul>
102 |                             <li>Use a local copy of the library</li>
103 |                             <li>Set up a proxy server</li>
104 |                             <li>Use in a browser extension context</li>
105 |                         </ul>
106 |                     </div>
107 |                 `;
108 |                 btn.textContent = 'CDN Import Failed';
109 |                 btn.style.background = '#dc3545';
110 |             }
111 |         };
112 |         
113 |         window.synthesizeFromCDN = async function() {
114 |             if (!EdgeTTS) {
115 |                 outputDiv.innerHTML = '<div style="color: red;">❌ EdgeTTS not loaded. Try importing from CDN first.</div>';
116 |                 return;
117 |             }
118 |             
119 |             const btn = document.getElementById('synthBtn');
120 |             btn.disabled = true;
121 |             btn.textContent = 'Synthesizing...';
122 |             
123 |             try {
124 |                 const tts = new EdgeTTS(
125 |                     'Hello! This is Edge TTS Universal loaded from CDN. Pretty cool, right?',
126 |                     'Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)',
127 |                     { rate: '+10%', volume: '+0%', pitch: '+0Hz' }
128 |                 );
129 |                 
130 |                 const result = await tts.synthesize();
131 |                 
132 |                 // Create audio element
133 |                 const audioUrl = URL.createObjectURL(result.audio);
134 |                 const audioElement = document.createElement('audio');
135 |                 audioElement.controls = true;
136 |                 audioElement.src = audioUrl;
137 |                 
138 |                 // Create download link
139 |                 const downloadLink = document.createElement('a');
140 |                 downloadLink.href = audioUrl;
141 |                 downloadLink.download = 'cdn-tts-output.mp3';
142 |                 downloadLink.textContent = 'Download Audio';
143 |                 downloadLink.style.display = 'block';
144 |                 downloadLink.style.marginTop = '10px';
145 |                 
146 |                 outputDiv.innerHTML = `
147 |                     <div style="color: green;">✅ Speech synthesized from CDN!</div>
148 |                     <div>Audio size: ${result.audio.size} bytes</div>
149 |                     <div>Word boundaries: ${result.subtitle.length}</div>
150 |                 `;
151 |                 outputDiv.appendChild(audioElement);
152 |                 outputDiv.appendChild(downloadLink);
153 |                 
154 |                 btn.textContent = 'Synthesis Complete';
155 |                 btn.style.background = '#28a745';
156 |                 
157 |             } catch (error) {
158 |                 console.error('Synthesis failed:', error);
159 |                 outputDiv.innerHTML = `<div style="color: red;">❌ Synthesis Failed: ${error.message}</div>`;
160 |                 btn.textContent = 'Synthesis Failed';
161 |                 btn.style.background = '#dc3545';
162 |             }
163 |         };
164 |     </script>
165 | </body>
166 | </html> 


--------------------------------------------------------------------------------
/src/isomorphic-utils.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Isomorphic utilities that work in both Node.js and browsers using only Web APIs.
  3 |  * This module provides browser-compatible implementations without Node.js dependencies.
  4 |  */
  5 | 
  6 | import { TTSConfig } from './tts_config';
  7 | import { ValueError } from "./exceptions";
  8 | 
  9 | /**
 10 |  * Generates a UUID v4 string without hyphens using Web Crypto API.
 11 |  * Works in both Node.js (with globalThis.crypto) and browsers.
 12 |  */
 13 | export function connectId(): string {
 14 |   // Use Web Crypto API available in both Node.js 16+ and all modern browsers
 15 |   const array = new Uint8Array(16);
 16 |   globalThis.crypto.getRandomValues(array);
 17 | 
 18 |   // Set version (4) and variant bits according to RFC 4122
 19 |   array[6] = (array[6] & 0x0f) | 0x40;
 20 |   array[8] = (array[8] & 0x3f) | 0x80;
 21 | 
 22 |   // Convert to hex string and format as UUID, then remove hyphens
 23 |   const hex = Array.from(array, byte => byte.toString(16).padStart(2, '0')).join('');
 24 |   const uuid = `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`;
 25 | 
 26 |   return uuid.replace(/-/g, '');
 27 | }
 28 | 
 29 | /**
 30 |  * Web-native XML escaping function.
 31 |  */
 32 | export function escape(text: string): string {
 33 |   return text
 34 |     .replace(/&/g, '&amp;')
 35 |     .replace(/</g, '&lt;')
 36 |     .replace(/>/g, '&gt;')
 37 |     .replace(/"/g, '&quot;')
 38 |     .replace(/'/g, '&apos;');
 39 | }
 40 | 
 41 | /**
 42 |  * Unescapes XML entities in text.
 43 |  */
 44 | export function unescape(text: string): string {
 45 |   return text
 46 |     .replace(/&quot;/g, '"')
 47 |     .replace(/&apos;/g, "'")
 48 |     .replace(/&lt;/g, '<')
 49 |     .replace(/&gt;/g, '>')
 50 |     .replace(/&amp;/g, '&');
 51 | }
 52 | 
 53 | /**
 54 |  * Parses text-based WebSocket messages to extract headers and data.
 55 |  * Uses Uint8Array for universal compatibility.
 56 |  */
 57 | export function getHeadersAndDataFromText(message: Uint8Array): [{ [key: string]: string }, Uint8Array] {
 58 |   const messageString = new TextDecoder().decode(message);
 59 |   const headerEndIndex = messageString.indexOf('\r\n\r\n');
 60 | 
 61 |   const headers: { [key: string]: string } = {};
 62 |   if (headerEndIndex !== -1) {
 63 |     const headerString = messageString.substring(0, headerEndIndex);
 64 |     const headerLines = headerString.split('\r\n');
 65 |     for (const line of headerLines) {
 66 |       const [key, value] = line.split(':', 2);
 67 |       if (key && value) {
 68 |         headers[key] = value.trim();
 69 |       }
 70 |     }
 71 |   }
 72 | 
 73 |   const headerByteLength = new TextEncoder().encode(messageString.substring(0, headerEndIndex + 4)).length;
 74 |   return [headers, message.slice(headerByteLength)];
 75 | }
 76 | 
 77 | /**
 78 |  * Parses binary WebSocket messages to extract headers and data.
 79 |  * Uses Uint8Array for universal compatibility.
 80 |  */
 81 | export function getHeadersAndDataFromBinary(message: Uint8Array): [{ [key: string]: string }, Uint8Array] {
 82 |   if (message.length < 2) {
 83 |     throw new Error('Message too short to contain header length');
 84 |   }
 85 | 
 86 |   const headerLength = (message[0] << 8) | message[1]; // Read big-endian uint16
 87 |   const headers: { [key: string]: string } = {};
 88 | 
 89 |   if (headerLength > 0 && headerLength + 2 <= message.length) {
 90 |     const headerBytes = message.slice(2, headerLength + 2);
 91 |     const headerString = new TextDecoder().decode(headerBytes);
 92 |     const headerLines = headerString.split('\r\n');
 93 |     for (const line of headerLines) {
 94 |       const [key, value] = line.split(':', 2);
 95 |       if (key && value) {
 96 |         headers[key] = value.trim();
 97 |       }
 98 |     }
 99 |   }
100 | 
101 |   return [headers, message.slice(headerLength + 2)];
102 | }
103 | 
104 | /**
105 |  * Converts a date to the expected string format for WebSocket messages.
106 |  */
107 | export function dateToString(date?: Date): string {
108 |   if (!date) {
109 |     date = new Date();
110 |   }
111 |   return date.toISOString().replace(/[-:.]/g, '').slice(0, -1);
112 | }
113 | 
114 | /**
115 |  * Removes characters that are incompatible with SSML.
116 |  * Preserves essential punctuation (.?;:!,) for natural speech pauses.
117 |  * XML special characters (&<>"') are handled by the escape() function.
118 |  */
119 | export function removeIncompatibleCharacters(str: string): string {
120 |   // Keep essential punctuation for natural speech: .?;:!,
121 |   // Remove characters that could break SSML structure or cause parsing issues
122 |   const chars_to_remove = "*/()[]{}$%^@#+=|\\~`><\"&";
123 |   let clean_str = str;
124 |   for (const char of chars_to_remove) {
125 |     clean_str = clean_str.replace(new RegExp('\\' + char, 'g'), '');
126 |   }
127 |   return clean_str;
128 | }
129 | 
130 | /**
131 |  * Creates SSML from TTS configuration and text.
132 |  * Compatible with original mkssml function signature.
133 |  */
134 | export function mkssml(tc: TTSConfig, escapedText: string | Uint8Array): string {
135 |   const text = escapedText instanceof Uint8Array ? new TextDecoder().decode(escapedText) : escapedText;
136 |   return (
137 |     "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
138 |     + `<voice name='${tc.voice}'>`
139 |     + `<prosody pitch='${tc.pitch}' rate='${tc.rate}' volume='${tc.volume}'>`
140 |     + `${text}`
141 |     + "</prosody>"
142 |     + "</voice>"
143 |     + "</speak>"
144 |   );
145 | }
146 | 
147 | /**
148 |  * Splits text by byte length while respecting word boundaries.
149 |  */
150 | export function splitTextByByteLength(text: string, byteLength: number): string[] {
151 |   const encoder = new TextEncoder();
152 |   const words = text.split(/(\s+)/); // Split by whitespace but keep delimiters
153 |   const chunks: string[] = [];
154 |   let currentChunk = "";
155 | 
156 |   for (const word of words) {
157 |     const potentialChunk = currentChunk + word;
158 |     if (encoder.encode(potentialChunk).length <= byteLength) {
159 |       currentChunk = potentialChunk;
160 |     } else {
161 |       if (currentChunk) {
162 |         chunks.push(currentChunk.trim());
163 |         currentChunk = word;
164 |       } else {
165 |         // Single word is longer than byteLength, split it
166 |         const wordBytes = encoder.encode(word);
167 |         for (let i = 0; i < wordBytes.length; i += byteLength) {
168 |           const slice = wordBytes.slice(i, i + byteLength);
169 |           chunks.push(new TextDecoder().decode(slice));
170 |         }
171 |         currentChunk = "";
172 |       }
173 |     }
174 |   }
175 | 
176 |   if (currentChunk.trim()) {
177 |     chunks.push(currentChunk.trim());
178 |   }
179 | 
180 |   return chunks;
181 | }
182 | 
183 | /**
184 |  * Calculates the maximum message size based on configuration.
185 |  */
186 | export function calcMaxMesgSize(voiceConfig: TTSConfig): number {
187 |   // Use a fixed maximum size as configured in recent commits
188 |   return 4096;
189 | }
190 | 
191 | /**
192 |  * Creates SSML headers plus data for WebSocket communication.
193 |  */
194 | export function ssmlHeadersPlusData(requestId: string, timestamp: string, ssml: string): string {
195 |   return `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${timestamp}Z\r\nPath:ssml\r\n\r\n${ssml}`;
196 | }


--------------------------------------------------------------------------------
/src/browser-simple.ts:
--------------------------------------------------------------------------------
  1 | import { BrowserCommunicate, BrowserTTSChunk } from './browser-communicate';
  2 | 
  3 | /**
  4 |  * Options for controlling the voice prosody (rate, pitch, volume).
  5 |  */
  6 | export interface ProsodyOptions {
  7 |   /**
  8 |    * The speaking rate of the voice.
  9 |    * Examples: "+10.00%", "-20.00%"
 10 |    */
 11 |   rate?: string;
 12 |   /**
 13 |    * The speaking volume of the voice.
 14 |    * Examples: "+15.00%", "-10.00%"
 15 |    */
 16 |   volume?: string;
 17 |   /**
 18 |    * The speaking pitch of the voice.
 19 |    * Examples: "+20Hz", "-10Hz"
 20 |    */
 21 |   pitch?: string;
 22 | }
 23 | 
 24 | /**
 25 |  * Represents a single word boundary with its timing and text.
 26 |  * The API provides timing in 100-nanosecond units.
 27 |  */
 28 | export interface WordBoundary {
 29 |   /**
 30 |    * The offset from the beginning of the audio stream in 100-nanosecond units.
 31 |    */
 32 |   offset: number;
 33 |   /**
 34 |    * The duration of the word in 100-nanosecond units.
 35 |    */
 36 |   duration: number;
 37 |   /**
 38 |    * The text of the spoken word.
 39 |    */
 40 |   text: string;
 41 | }
 42 | 
 43 | /**
 44 |  * The final result of the synthesis process.
 45 |  */
 46 | export interface SynthesisResult {
 47 |   /**
 48 |    * The generated audio as a Blob, which can be used in an <audio> element.
 49 |    */
 50 |   audio: Blob;
 51 |   /**
 52 |    * An array of word boundaries containing timing and text for creating subtitles.
 53 |    */
 54 |   subtitle: WordBoundary[];
 55 | }
 56 | 
 57 | // Browser-compatible buffer concatenation utility with improved audio handling
 58 | function concatUint8Arrays(arrays: Uint8Array[]): Uint8Array {
 59 |   if (arrays.length === 0) return new Uint8Array(0);
 60 |   if (arrays.length === 1) return arrays[0];
 61 | 
 62 |   // For audio data, we want to ensure smooth concatenation
 63 |   const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
 64 |   const result = new Uint8Array(totalLength);
 65 |   let offset = 0;
 66 | 
 67 |   for (const arr of arrays) {
 68 |     if (arr.length > 0) {
 69 |       result.set(arr, offset);
 70 |       offset += arr.length;
 71 |     }
 72 |   }
 73 | 
 74 |   return result;
 75 | }
 76 | 
 77 | /**
 78 |  * Browser-specific Edge TTS class that uses only browser-native APIs.
 79 |  * Avoids any Node.js dependencies that could cause issues in browser environments.
 80 |  */
 81 | export class BrowserEdgeTTS {
 82 |   public text: string;
 83 |   public voice: string;
 84 |   public rate: string;
 85 |   public volume: string;
 86 |   public pitch: string;
 87 | 
 88 |   /**
 89 |    * @param text The text to be synthesized.
 90 |    * @param voice The voice to use for synthesis.
 91 |    * @param options Prosody options (rate, volume, pitch).
 92 |    */
 93 |   constructor(
 94 |     text: string,
 95 |     voice = "Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)",
 96 |     options: ProsodyOptions = {}
 97 |   ) {
 98 |     this.text = text;
 99 |     this.voice = voice;
100 |     this.rate = options.rate || "+0%";
101 |     this.volume = options.volume || "+0%";
102 |     this.pitch = options.pitch || "+0Hz";
103 |   }
104 | 
105 |   /**
106 |    * Initiates the synthesis process using browser-native APIs.
107 |    * @returns A promise that resolves with the synthesized audio and subtitle data.
108 |    */
109 |   public async synthesize(): Promise<SynthesisResult> {
110 |     const communicate = new BrowserCommunicate(this.text, {
111 |       voice: this.voice,
112 |       rate: this.rate,
113 |       volume: this.volume,
114 |       pitch: this.pitch,
115 |     });
116 | 
117 |     const audioChunks: Uint8Array[] = [];
118 |     const wordBoundaries: WordBoundary[] = [];
119 | 
120 |     for await (const chunk of communicate.stream()) {
121 |       if (chunk.type === 'audio' && chunk.data) {
122 |         audioChunks.push(chunk.data);
123 |       } else if (chunk.type === 'WordBoundary' && chunk.offset !== undefined && chunk.duration !== undefined && chunk.text !== undefined) {
124 |         wordBoundaries.push({
125 |           offset: chunk.offset,
126 |           duration: chunk.duration,
127 |           text: chunk.text,
128 |         });
129 |       }
130 |     }
131 | 
132 |     // Convert Uint8Array chunks to Blob
133 |     const audioBuffer = concatUint8Arrays(audioChunks);
134 |     // TS 5.5+ tightens BlobPart to ArrayBuffer-backed views; cast accordingly.
135 |     const audioBlob = new Blob([
136 |       audioBuffer as unknown as ArrayBufferView<ArrayBuffer>
137 |     ], { type: "audio/mpeg" });
138 | 
139 |     return {
140 |       audio: audioBlob,
141 |       subtitle: wordBoundaries,
142 |     };
143 |   }
144 | }
145 | 
146 | // ==================================================================================
147 | // Subtitle Generation Utilities (Browser Compatible)
148 | // ==================================================================================
149 | 
150 | /**
151 |  * Formats a time value from 100-nanosecond units into a VTT or SRT timestamp string.
152 |  * @param timeIn100ns The time value in 100-nanosecond units.
153 |  * @param format The subtitle format, which determines the decimal separator.
154 |  * @returns A formatted timestamp string (e.g., "00:01:23.456").
155 |  */
156 | function formatTimestamp(timeIn100ns: number, format: 'vtt' | 'srt'): string {
157 |   const totalSeconds = Math.floor(timeIn100ns / 10000000);
158 |   const hours = Math.floor(totalSeconds / 3600);
159 |   const minutes = Math.floor((totalSeconds % 3600) / 60);
160 |   const seconds = totalSeconds % 60;
161 |   const milliseconds = Math.floor((timeIn100ns % 10000000) / 10000);
162 |   const separator = format === 'vtt' ? '.' : ',';
163 |   return `${padNumber(hours)}:${padNumber(minutes)}:${padNumber(seconds)}${separator}${padNumber(milliseconds, 3)}`;
164 | }
165 | 
166 | /**
167 |  * Pads a number with leading zeros to a specified length.
168 |  * @param num The number to pad.
169 |  * @param length The desired length of the string.
170 |  * @returns The padded number as a string.
171 |  */
172 | function padNumber(num: number, length = 2): string {
173 |   return num.toString().padStart(length, '0');
174 | }
175 | 
176 | /**
177 |  * Creates a subtitle file content in VTT (WebVTT) format.
178 |  * @param wordBoundaries The array of word boundary data.
179 |  * @returns A string containing the VTT formatted subtitles.
180 |  */
181 | export function createVTT(wordBoundaries: WordBoundary[]): string {
182 |   let vttContent = "WEBVTT\n\n";
183 |   wordBoundaries.forEach((word, index) => {
184 |     const startTime = formatTimestamp(word.offset, 'vtt');
185 |     const endTime = formatTimestamp(word.offset + word.duration, 'vtt');
186 |     vttContent += `${index + 1}\n`;
187 |     vttContent += `${startTime} --> ${endTime}\n`;
188 |     vttContent += `${word.text}\n\n`;
189 |   });
190 |   return vttContent;
191 | }
192 | 
193 | /**
194 |  * Creates a subtitle file content in SRT (SubRip) format.
195 |  * @param wordBoundaries The array of word boundary data.
196 |  * @returns A string containing the SRT formatted subtitles.
197 |  */
198 | export function createSRT(wordBoundaries: WordBoundary[]): string {
199 |   let srtContent = "";
200 |   wordBoundaries.forEach((word, index) => {
201 |     const startTime = formatTimestamp(word.offset, 'srt');
202 |     const endTime = formatTimestamp(word.offset + word.duration, 'srt');
203 |     srtContent += `${index + 1}\n`;
204 |     srtContent += `${startTime} --> ${endTime}\n`;
205 |     srtContent += `${word.text}\n\n`;
206 |   });
207 |   return srtContent;
208 | } 
209 | 


--------------------------------------------------------------------------------
/src/isomorphic-simple.ts:
--------------------------------------------------------------------------------
  1 | import { IsomorphicCommunicate } from './isomorphic-communicate';
  2 | 
  3 | /**
  4 |  * Options for controlling the voice prosody (rate, pitch, volume).
  5 |  */
  6 | export interface ProsodyOptions {
  7 |   /**
  8 |    * The speaking rate of the voice.
  9 |    * Examples: "+10.00%", "-20.00%"
 10 |    */
 11 |   rate?: string;
 12 |   /**
 13 |    * The speaking volume of the voice.
 14 |    * Examples: "+15.00%", "-10.00%"
 15 |    */
 16 |   volume?: string;
 17 |   /**
 18 |    * The speaking pitch of the voice.
 19 |    * Examples: "+20Hz", "-10Hz"
 20 |    */
 21 |   pitch?: string;
 22 | }
 23 | 
 24 | /**
 25 |  * Represents a single word boundary with its timing and text.
 26 |  * The API provides timing in 100-nanosecond units.
 27 |  */
 28 | export interface WordBoundary {
 29 |   /**
 30 |    * The offset from the beginning of the audio stream in 100-nanosecond units.
 31 |    */
 32 |   offset: number;
 33 |   /**
 34 |    * The duration of the word in 100-nanosecond units.
 35 |    */
 36 |   duration: number;
 37 |   /**
 38 |    * The text of the spoken word.
 39 |    */
 40 |   text: string;
 41 | }
 42 | 
 43 | /**
 44 |  * The final result of the synthesis process.
 45 |  */
 46 | export interface SynthesisResult {
 47 |   /**
 48 |    * The generated audio as a Blob, which can be used in an <audio> element.
 49 |    */
 50 |   audio: Blob;
 51 |   /**
 52 |    * An array of word boundaries containing timing and text for creating subtitles.
 53 |    */
 54 |   subtitle: WordBoundary[];
 55 | }
 56 | 
 57 | // Browser-compatible buffer concatenation utility with improved audio handling
 58 | function concatUint8Arrays(arrays: Uint8Array[]): Uint8Array {
 59 |   if (arrays.length === 0) return new Uint8Array(0);
 60 |   if (arrays.length === 1) return arrays[0];
 61 | 
 62 |   // For audio data, we want to ensure smooth concatenation
 63 |   const totalLength = arrays.reduce((sum, arr) => sum + arr.length, 0);
 64 |   const result = new Uint8Array(totalLength);
 65 |   let offset = 0;
 66 | 
 67 |   for (const arr of arrays) {
 68 |     if (arr.length > 0) {
 69 |       result.set(arr, offset);
 70 |       offset += arr.length;
 71 |     }
 72 |   }
 73 | 
 74 |   return result;
 75 | }
 76 | 
 77 | /**
 78 |  * Isomorphic Edge TTS class that works in both Node.js and browser environments.
 79 |  * Uses isomorphic implementations to avoid platform-specific dependencies.
 80 |  */
 81 | export class IsomorphicEdgeTTS {
 82 |   public text: string;
 83 |   public voice: string;
 84 |   public rate: string;
 85 |   public volume: string;
 86 |   public pitch: string;
 87 | 
 88 |   /**
 89 |    * @param text The text to be synthesized.
 90 |    * @param voice The voice to use for synthesis.
 91 |    * @param options Prosody options (rate, volume, pitch).
 92 |    */
 93 |   constructor(
 94 |     text: string,
 95 |     voice = "Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)",
 96 |     options: ProsodyOptions = {}
 97 |   ) {
 98 |     this.text = text;
 99 |     this.voice = voice;
100 |     this.rate = options.rate || "+0%";
101 |     this.volume = options.volume || "+0%";
102 |     this.pitch = options.pitch || "+0Hz";
103 |   }
104 | 
105 |   /**
106 |    * Initiates the synthesis process using isomorphic implementations.
107 |    * @returns A promise that resolves with the synthesized audio and subtitle data.
108 |    */
109 |   public async synthesize(): Promise<SynthesisResult> {
110 |     const communicate = new IsomorphicCommunicate(this.text, {
111 |       voice: this.voice,
112 |       rate: this.rate,
113 |       volume: this.volume,
114 |       pitch: this.pitch,
115 |     });
116 | 
117 |     const audioChunks: Uint8Array[] = [];
118 |     const wordBoundaries: WordBoundary[] = [];
119 | 
120 |     for await (const chunk of communicate.stream()) {
121 |       if (chunk.type === 'audio' && chunk.data) {
122 |         audioChunks.push(chunk.data);
123 |       } else if (chunk.type === 'WordBoundary' && chunk.offset !== undefined && chunk.duration !== undefined && chunk.text !== undefined) {
124 |         wordBoundaries.push({
125 |           offset: chunk.offset,
126 |           duration: chunk.duration,
127 |           text: chunk.text,
128 |         });
129 |       }
130 |     }
131 | 
132 |     // Convert Uint8Array chunks to Blob (works in both Node.js and browsers)
133 |     const audioBuffer = concatUint8Arrays(audioChunks);
134 |     // TS 5.5+ tightens BlobPart types to require ArrayBuffer-backed views.
135 |     // Our Uint8Array is ArrayBuffer-backed, so cast for type compatibility.
136 |     const audioBlob = new Blob([
137 |       audioBuffer as unknown as ArrayBufferView<ArrayBuffer>
138 |     ], { type: "audio/mpeg" });
139 | 
140 |     return {
141 |       audio: audioBlob,
142 |       subtitle: wordBoundaries,
143 |     };
144 |   }
145 | }
146 | 
147 | // ==================================================================================
148 | // Subtitle Generation Utilities (Isomorphic - works everywhere)
149 | // ==================================================================================
150 | 
151 | /**
152 |  * Formats a time value from 100-nanosecond units into a VTT or SRT timestamp string.
153 |  * @param timeIn100ns The time value in 100-nanosecond units.
154 |  * @param format The subtitle format, which determines the decimal separator.
155 |  * @returns A formatted timestamp string (e.g., "00:01:23.456").
156 |  */
157 | function formatTimestamp(timeIn100ns: number, format: 'vtt' | 'srt'): string {
158 |   const totalSeconds = Math.floor(timeIn100ns / 10000000);
159 |   const hours = Math.floor(totalSeconds / 3600);
160 |   const minutes = Math.floor((totalSeconds % 3600) / 60);
161 |   const seconds = totalSeconds % 60;
162 |   const milliseconds = Math.floor((timeIn100ns % 10000000) / 10000);
163 |   const separator = format === 'vtt' ? '.' : ',';
164 |   return `${padNumber(hours)}:${padNumber(minutes)}:${padNumber(seconds)}${separator}${padNumber(milliseconds, 3)}`;
165 | }
166 | 
167 | /**
168 |  * Pads a number with leading zeros to a specified length.
169 |  * @param num The number to pad.
170 |  * @param length The desired length of the string.
171 |  * @returns The padded number as a string.
172 |  */
173 | function padNumber(num: number, length = 2): string {
174 |   return num.toString().padStart(length, '0');
175 | }
176 | 
177 | /**
178 |  * Creates a subtitle file content in VTT (WebVTT) format.
179 |  * @param wordBoundaries The array of word boundary data.
180 |  * @returns A string containing the VTT formatted subtitles.
181 |  */
182 | export function createVTT(wordBoundaries: WordBoundary[]): string {
183 |   let vttContent = "WEBVTT\n\n";
184 |   wordBoundaries.forEach((word, index) => {
185 |     const startTime = formatTimestamp(word.offset, 'vtt');
186 |     const endTime = formatTimestamp(word.offset + word.duration, 'vtt');
187 |     vttContent += `${index + 1}\n`;
188 |     vttContent += `${startTime} --> ${endTime}\n`;
189 |     vttContent += `${word.text}\n\n`;
190 |   });
191 |   return vttContent;
192 | }
193 | 
194 | /**
195 |  * Creates a subtitle file content in SRT (SubRip) format.
196 |  * @param wordBoundaries The array of word boundary data.
197 |  * @returns A string containing the SRT formatted subtitles.
198 |  */
199 | export function createSRT(wordBoundaries: WordBoundary[]): string {
200 |   let srtContent = "";
201 |   wordBoundaries.forEach((word, index) => {
202 |     const startTime = formatTimestamp(word.offset, 'srt');
203 |     const endTime = formatTimestamp(word.offset + word.duration, 'srt');
204 |     srtContent += `${index + 1}\n`;
205 |     srtContent += `${startTime} --> ${endTime}\n`;
206 |     srtContent += `${word.text}\n\n`;
207 |   });
208 |   return srtContent;
209 | } 
210 | 


--------------------------------------------------------------------------------
/src/utils.ts:
--------------------------------------------------------------------------------
  1 | import { v4 as uuidv4 } from 'uuid';
  2 | import { TTSConfig } from './tts_config';
  3 | import { ValueError } from "./exceptions";
  4 | import escape from 'xml-escape';
  5 | 
  6 | /**
  7 |  * Parses text-based WebSocket messages to extract headers and data.
  8 |  * @param message - Buffer containing the message to parse
  9 |  * @returns Tuple of headers object and data buffer
 10 |  */
 11 | export function getHeadersAndDataFromText(message: Buffer): [{ [key: string]: string }, Buffer] {
 12 |   const headerLength = message.indexOf('\r\n\r\n');
 13 |   const headers: { [key: string]: string } = {};
 14 |   const headerString = message.subarray(0, headerLength).toString('utf-8');
 15 |   if (headerString) {
 16 |     const headerLines = headerString.split('\r\n');
 17 |     for (const line of headerLines) {
 18 |       const [key, value] = line.split(':', 2);
 19 |       if (key && value) {
 20 |         headers[key] = value.trim();
 21 |       }
 22 |     }
 23 |   }
 24 | 
 25 |   return [headers, message.subarray(headerLength + 2)];
 26 | }
 27 | 
 28 | /**
 29 |  * Parses binary WebSocket messages to extract headers and data.
 30 |  * @param message - Buffer containing the binary message to parse
 31 |  * @returns Tuple of headers object and data buffer
 32 |  */
 33 | export function getHeadersAndDataFromBinary(message: Buffer): [{ [key: string]: string }, Buffer] {
 34 |   const headerLength = message.readUInt16BE(0);
 35 |   const headers: { [key: string]: string } = {};
 36 |   const headerString = message.subarray(2, headerLength + 2).toString('utf-8');
 37 |   if (headerString) {
 38 |     const headerLines = headerString.split('\r\n');
 39 |     for (const line of headerLines) {
 40 |       const [key, value] = line.split(':', 2);
 41 |       if (key && value) {
 42 |         headers[key] = value.trim();
 43 |       }
 44 |     }
 45 |   }
 46 | 
 47 |   return [headers, message.subarray(headerLength + 2)];
 48 | }
 49 | 
 50 | /**
 51 |  * Removes control characters that are incompatible with TTS processing.
 52 |  * @param text - Input text to clean
 53 |  * @returns Text with control characters replaced by spaces
 54 |  */
 55 | export function removeIncompatibleCharacters(text: string): string {
 56 |   // Remove control characters (U+0000 to U+001F except \t, \n, \r)
 57 |   // eslint-disable-next-line no-control-regex
 58 |   return text.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F]/g, ' ');
 59 | }
 60 | 
 61 | /**
 62 |  * Generates a unique connection ID for WebSocket connections.
 63 |  * @returns UUID string with hyphens removed
 64 |  */
 65 | export function connectId(): string {
 66 |   return uuidv4().replace(/-/g, '');
 67 | }
 68 | 
 69 | function _findLastNewlineOrSpaceWithinLimit(text: Buffer, limit: number): number {
 70 |   const slice = text.subarray(0, limit);
 71 |   let splitAt = slice.lastIndexOf('\n');
 72 |   if (splitAt < 0) {
 73 |     splitAt = slice.lastIndexOf(' ');
 74 |   }
 75 |   return splitAt;
 76 | }
 77 | 
 78 | function _findSafeUtf8SplitPoint(textSegment: Buffer): number {
 79 |   let splitAt = textSegment.length;
 80 |   while (splitAt > 0) {
 81 |     const slice = textSegment.subarray(0, splitAt);
 82 |     // check if the slice is a valid utf8 string
 83 |     if (slice.toString('utf-8').endsWith('�')) {
 84 |       splitAt--;
 85 |       continue;
 86 |     }
 87 |     return splitAt;
 88 |   }
 89 |   return splitAt;
 90 | }
 91 | 
 92 | function _adjustSplitPointForXmlEntity(text: Buffer, splitAt: number): number {
 93 |   let ampersandIndex = text.lastIndexOf('&', splitAt - 1);
 94 |   while (ampersandIndex !== -1) {
 95 |     const semicolonIndex = text.indexOf(';', ampersandIndex);
 96 |     if (semicolonIndex !== -1 && semicolonIndex < splitAt) {
 97 |       break; // Found a terminated entity
 98 |     }
 99 |     // Ampersand is not terminated before split_at
100 |     splitAt = ampersandIndex;
101 |     ampersandIndex = text.lastIndexOf('&', splitAt - 1);
102 |   }
103 |   return splitAt;
104 | }
105 | 
106 | /**
107 |  * Splits text into chunks that don't exceed the specified byte length.
108 |  * Attempts to split at word boundaries and handles UTF-8 encoding properly.
109 |  * @param text - Text to split (string or Buffer)
110 |  * @param byteLength - Maximum byte length per chunk
111 |  * @yields Buffer chunks of the split text
112 |  * @throws {ValueError} If byteLength is too small or text has invalid structure
113 |  */
114 | export function* splitTextByByteLength(text: string | Buffer, byteLength: number): Generator<Buffer> {
115 |   let buffer = Buffer.isBuffer(text) ? text : Buffer.from(text, 'utf-8');
116 | 
117 |   if (byteLength <= 0) {
118 |     throw new ValueError("byteLength must be greater than 0");
119 |   }
120 | 
121 |   while (buffer.length > byteLength) {
122 |     let splitAt = _findLastNewlineOrSpaceWithinLimit(buffer, byteLength);
123 | 
124 |     if (splitAt < 0) {
125 |       splitAt = _findSafeUtf8SplitPoint(buffer.subarray(0, byteLength));
126 |     }
127 | 
128 |     splitAt = _adjustSplitPointForXmlEntity(buffer, splitAt);
129 | 
130 |     if (splitAt <= 0) {
131 |       throw new ValueError(
132 |         "Maximum byte length is too small or "
133 |         + "invalid text structure near '&' or invalid UTF-8"
134 |       );
135 |     }
136 | 
137 |     const chunk = buffer.subarray(0, splitAt);
138 |     const chunkString = chunk.toString('utf-8').trim();
139 |     if (chunkString) {
140 |       yield Buffer.from(chunkString, 'utf-8');
141 |     }
142 | 
143 |     buffer = buffer.subarray(splitAt);
144 |   }
145 | 
146 |   const remainingChunk = buffer.toString('utf-8').trim();
147 |   if (remainingChunk) {
148 |     yield Buffer.from(remainingChunk, 'utf-8');
149 |   }
150 | }
151 | 
152 | /**
153 |  * Creates SSML (Speech Synthesis Markup Language) from text and TTS configuration.
154 |  * @param tc - TTS configuration containing voice and prosody settings
155 |  * @param escapedText - Text content (should be XML-escaped)
156 |  * @returns Complete SSML document string
157 |  */
158 | export function mkssml(tc: TTSConfig, escapedText: string | Buffer): string {
159 |   const text = Buffer.isBuffer(escapedText) ? escapedText.toString('utf-8') : escapedText;
160 |   return (
161 |     "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>"
162 |     + `<voice name='${tc.voice}'>`
163 |     + `<prosody pitch='${tc.pitch}' rate='${tc.rate}' volume='${tc.volume}'>`
164 |     + `${text}`
165 |     + "</prosody>"
166 |     + "</voice>"
167 |     + "</speak>"
168 |   );
169 | }
170 | 
171 | /**
172 |  * Formats the current date as a string in the format expected by the TTS service.
173 |  * @returns Formatted date string
174 |  */
175 | export function dateToString(): string {
176 |   return new Date().toUTCString().replace("GMT", "GMT+0000 (Coordinated Universal Time)");
177 | }
178 | 
179 | /**
180 |  * Creates a complete WebSocket message with headers and SSML data.
181 |  * @param requestId - Unique request identifier
182 |  * @param timestamp - Timestamp string for the request
183 |  * @param ssml - SSML content to include in the message
184 |  * @returns Complete WebSocket message string with headers and data
185 |  */
186 | export function ssmlHeadersPlusData(requestId: string, timestamp: string, ssml: string): string {
187 |   return (
188 |     `X-RequestId:${requestId}\r\n`
189 |     + "Content-Type:application/ssml+xml\r\n"
190 |     + `X-Timestamp:${timestamp}Z\r\n`  // This is not a mistake, Microsoft Edge bug.
191 |     + "Path:ssml\r\n\r\n"
192 |     + `${ssml}`
193 |   );
194 | }
195 | 
196 | /**
197 |  * Calculates the maximum message size for text chunks based on WebSocket limits.
198 |  * @param ttsConfig - TTS configuration to calculate overhead for
199 |  * @returns Maximum byte size for text content in a single message
200 |  */
201 | export function calcMaxMesgSize(ttsConfig: TTSConfig): number {
202 |   const websocketMaxSize = 2 ** 16;
203 |   const overheadPerMessage = ssmlHeadersPlusData(
204 |     connectId(),
205 |     dateToString(),
206 |     mkssml(ttsConfig, ""),
207 |   ).length + 50; // margin of error
208 |   return websocketMaxSize - overheadPerMessage;
209 | }
210 | 
211 | export { escape };
212 | 
213 | /**
214 |  * Unescapes XML entities in text.
215 |  * @param text - Text containing XML entities to unescape
216 |  * @returns Text with XML entities converted back to their original characters
217 |  */
218 | export function unescape(text: string): string {
219 |   return text.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>');
220 | } 


--------------------------------------------------------------------------------
/FEATURES.md:
--------------------------------------------------------------------------------
  1 | # Edge TTS Universal - Features
  2 | 
  3 | ## 🌟 Universal Compatibility
  4 | 
  5 | ### Supported Environments
  6 | 
  7 | - ✅ **Node.js** (v16+) - Full-featured implementation
  8 | - ✅ **Browsers** - Modern browsers with ES2020 support
  9 | - ✅ **Web Workers** - Background processing support
 10 | - ✅ **Deno** - First-class Deno support
 11 | - ✅ **Bun** - Works with Bun runtime
 12 | - ✅ **Edge Runtimes** - Cloudflare Workers, Vercel Edge, etc.
 13 | - ✅ **React Native** - Compatible with RN's JS engine
 14 | - ✅ **Electron** - Works in both main and renderer processes
 15 | 
 16 | ### Entry Points
 17 | 
 18 | | Entry Point                     | Size  | Environment | Features                         |
 19 | | ------------------------------- | ----- | ----------- | -------------------------------- |
 20 | | `edge-tts-universal`            | ~46KB | Node.js     | Full API + Node.js optimizations |
 21 | | `edge-tts-universal/browser`    | ~30KB | Browser     | Browser-only, zero Node.js deps  |
 22 | | `edge-tts-universal/isomorphic` | ~36KB | Universal   | Cross-platform compatibility     |
 23 | | `edge-tts-universal/webworker`  | ~36KB | Web Workers | Background processing            |
 24 | 
 25 | ## 🚀 API Styles
 26 | 
 27 | ### 1. Simple API (Promise-based)
 28 | 
 29 | ```typescript
 30 | import { EdgeTTS } from 'edge-tts-universal';
 31 | 
 32 | const tts = new EdgeTTS('Hello world', 'en-US-EmmaMultilingualNeural');
 33 | const result = await tts.synthesize();
 34 | // Returns: { audio: Blob, subtitle: WordBoundary[] }
 35 | ```
 36 | 
 37 | **Best for:**
 38 | 
 39 | - Quick synthesis tasks
 40 | - One-shot audio generation
 41 | - Simple applications
 42 | - Getting started quickly
 43 | 
 44 | ### 2. Streaming API (Real-time)
 45 | 
 46 | ```typescript
 47 | import { Communicate } from 'edge-tts-universal';
 48 | 
 49 | const communicate = new Communicate('Hello world', {
 50 |   voice: 'en-US-EmmaMultilingualNeural',
 51 | });
 52 | 
 53 | for await (const chunk of communicate.stream()) {
 54 |   if (chunk.type === 'audio') {
 55 |     // Process audio chunks in real-time
 56 |   } else if (chunk.type === 'WordBoundary') {
 57 |     // Handle word timing events
 58 |   }
 59 | }
 60 | ```
 61 | 
 62 | **Best for:**
 63 | 
 64 | - Real-time audio streaming
 65 | - Large text processing
 66 | - Memory-efficient synthesis
 67 | - Live applications
 68 | 
 69 | ### 3. Universal API (Cross-Platform)
 70 | 
 71 | ```typescript
 72 | import { UniversalCommunicate } from 'edge-tts-universal';
 73 | 
 74 | // Works identically in Node.js, browsers, Deno, Bun
 75 | const communicate = new UniversalCommunicate('Hello world');
 76 | ```
 77 | 
 78 | **Best for:**
 79 | 
 80 | - Cross-platform libraries
 81 | - SSR applications
 82 | - Universal modules
 83 | - Consistent API across environments
 84 | 
 85 | ## 🎵 Audio Features
 86 | 
 87 | ### Supported Audio Formats
 88 | 
 89 | - **MP3** - Default format (24kHz, 48kbit/s, mono)
 90 | - High-quality audio optimized for speech
 91 | 
 92 | ### Voice Control
 93 | 
 94 | - **170+ Voices** - Multiple languages and regions
 95 | - **Neural Voices** - High-quality AI-generated speech
 96 | - **Voice Filtering** - Find voices by language, gender, locale
 97 | - **Voice Discovery** - List and search available voices
 98 | 
 99 | ### Prosody Control
100 | 
101 | - **Rate** - Speaking speed (`+20%`, `-10%`, etc.)
102 | - **Volume** - Audio level (`+50%`, `-25%`, etc.)
103 | - **Pitch** - Voice pitch (`+5Hz`, `-10Hz`, etc.)
104 | - **SSML Support** - Advanced speech markup
105 | 
106 | ## 📝 Subtitle Features
107 | 
108 | ### Word-Level Timing
109 | 
110 | - **WordBoundary Events** - Precise word timing data
111 | - **100-nanosecond precision** - Extremely accurate timing
112 | - **Real-time Generation** - Timing data as audio streams
113 | 
114 | ### Subtitle Formats
115 | 
116 | - **VTT (WebVTT)** - Web-compatible subtitle format
117 | - **SRT (SubRip)** - Universal subtitle format
118 | - **Custom Formatting** - Build your own subtitle format
119 | 
120 | ### Subtitle Tools
121 | 
122 | - **SubMaker** - Generate SRT from word boundaries
123 | - **Cue Merging** - Combine words into phrases
124 | - **Time Formatting** - Automatic time conversion
125 | 
126 | ## 🔧 Advanced Features
127 | 
128 | ### Environment Detection
129 | 
130 | - **Automatic Detection** - Detects Node.js, browser, Deno, etc.
131 | - **API Selection** - Chooses optimal API for environment
132 | - **Graceful Fallbacks** - Handles missing features gracefully
133 | 
134 | ### Performance Optimizations
135 | 
136 | - **Tree Shaking** - Import only what you need
137 | - **Code Splitting** - Separate bundles for different environments
138 | - **Lazy Loading** - Dynamic imports for optional features
139 | - **Bundle Analysis** - Size monitoring and optimization
140 | 
141 | ### Developer Experience
142 | 
143 | - **TypeScript First** - Full type definitions
144 | - **ESLint Config** - Environment-specific linting
145 | - **VS Code Support** - Optimized development settings
146 | - **Documentation** - Comprehensive API docs
147 | 
148 | ### Network Features
149 | 
150 | - **Proxy Support** - HTTP/HTTPS proxy support (Node.js)
151 | - **Connection Timeout** - Configurable timeout settings
152 | - **Error Recovery** - Automatic retry with token refresh
153 | - **CORS Handling** - Browser CORS considerations
154 | 
155 | ## 🛡️ Security Features
156 | 
157 | ### DRM Token Generation
158 | 
159 | - **Automatic Tokens** - Generates required security tokens
160 | - **Clock Skew Handling** - Adjusts for time differences
161 | - **Token Refresh** - Automatic token renewal on expiry
162 | - **Cross-platform** - Works in all environments
163 | 
164 | ### Error Handling
165 | 
166 | - **Typed Exceptions** - Specific error types for different failures
167 | - **Graceful Degradation** - Continues working when possible
168 | - **Detailed Errors** - Helpful error messages and debugging info
169 | 
170 | ## 📦 Distribution Features
171 | 
172 | ### Package Formats
173 | 
174 | - **ESM** - Modern ES modules
175 | - **CommonJS** - Traditional Node.js compatibility
176 | - **TypeScript** - Native TypeScript support
177 | - **Source Maps** - Debugging support
178 | 
179 | ### CDN Support
180 | 
181 | - **unpkg** - `https://unpkg.com/edge-tts-universal`
182 | - **jsdelivr** - `https://cdn.jsdelivr.net/npm/edge-tts-universal`
183 | - **Direct Import** - No build step required
184 | 
185 | ### Size Optimization
186 | 
187 | - **Multiple Bundles** - Choose the right size for your needs
188 | - **Zero Dependencies** - Browser builds have no external deps
189 | - **Size Monitoring** - Automated bundle size tracking
190 | - **Performance Budgets** - Size limits to prevent bloat
191 | 
192 | ## 🔄 Compatibility Features
193 | 
194 | ### Legacy Support
195 | 
196 | - **ES2020 Target** - Wide browser compatibility
197 | - **Polyfill Ready** - Works with standard polyfills
198 | - **Progressive Enhancement** - Graceful feature detection
199 | 
200 | ### Modern Features
201 | 
202 | - **Async/Await** - Modern async patterns
203 | - **Generator Functions** - Streaming API support
204 | - **Dynamic Imports** - Lazy loading capabilities
205 | - **Web Standards** - Uses standard Web APIs when available
206 | 
207 | ## 🌐 Platform-Specific Features
208 | 
209 | ### Node.js
210 | 
211 | - **File System** - Direct file writing
212 | - **Streams** - Node.js stream compatibility
213 | - **Process Environment** - Environment variable support
214 | - **Native Modules** - Can use Node.js-specific modules
215 | 
216 | ### Browser
217 | 
218 | - **Blob API** - Native browser file handling
219 | - **Audio Element** - Direct `<audio>` element support
220 | - **Download Links** - Generate downloadable files
221 | - **Web APIs** - Uses native browser WebSocket, fetch, etc.
222 | 
223 | ### Web Workers
224 | 
225 | - **Background Processing** - Non-blocking synthesis
226 | - **Message Passing** - Structured communication
227 | - **Shared Workers** - Multi-tab support possible
228 | - **Service Workers** - Offline synthesis capabilities
229 | 
230 | ### Deno
231 | 
232 | - **Standard Library** - Uses Deno standard modules
233 | - **Permissions** - Respects Deno security model
234 | - **TypeScript Native** - No compilation step needed
235 | - **Web Standards** - Prefers web-standard APIs
236 | 
237 | ## 🔮 Future Features
238 | 
239 | ### Planned Enhancements
240 | 
241 | - **Voice Cloning** - Custom voice training
242 | - **Emotion Control** - Emotional speech synthesis
243 | - **Speed Optimization** - Faster synthesis times
244 | - **Offline Mode** - Local voice synthesis
245 | - **Streaming Improvements** - Better real-time performance
246 | 
247 | ### Community Requests
248 | 
249 | - **More Formats** - Additional audio format support
250 | - **Better Docs** - Interactive documentation
251 | - **More Examples** - Framework-specific examples
252 | - **Testing Suite** - Comprehensive test coverage
253 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
  1 | name: Publish Package
  2 | 
  3 | on:
  4 |   release:
  5 |     types: [published]
  6 |   workflow_dispatch:
  7 |     inputs:
  8 |       version:
  9 |         description: 'Version to publish (leave empty to use current version)'
 10 |         required: false
 11 |         type: string
 12 |       npm:
 13 |         description: 'Publish to npm'
 14 |         required: false
 15 |         default: true
 16 |         type: boolean
 17 |       jsr:
 18 |         description: 'Publish to JSR'
 19 |         required: false
 20 |         default: true
 21 |         type: boolean
 22 |       github:
 23 |         description: 'Publish to GitHub Packages'
 24 |         required: false
 25 |         default: false
 26 |         type: boolean
 27 |       tag:
 28 |         description: 'Distribution tag (latest, beta, alpha, etc.)'
 29 |         required: false
 30 |         default: 'latest'
 31 |         type: string
 32 | 
 33 | jobs:
 34 |   test:
 35 |     name: Test Before Publishing
 36 |     runs-on: ubuntu-latest
 37 |     steps:
 38 |       - name: Checkout code
 39 |         uses: actions/checkout@v4
 40 | 
 41 |       - name: Setup Node.js
 42 |         uses: actions/setup-node@v4
 43 |         with:
 44 |           node-version: '22'
 45 |           cache: 'npm'
 46 | 
 47 |       - name: Setup Deno
 48 |         uses: denoland/setup-deno@v1
 49 |         with:
 50 |           deno-version: v1.x
 51 | 
 52 |       - name: Setup Bun
 53 |         uses: oven-sh/setup-bun@v1
 54 |         with:
 55 |           bun-version: latest
 56 | 
 57 |       - name: Install dependencies
 58 |         run: npm ci
 59 | 
 60 |       - name: Type check
 61 |         run: npm run type-check
 62 | 
 63 |       - name: Lint
 64 |         run: npm run lint
 65 | 
 66 |       - name: Build package
 67 |         run: npm run build
 68 | 
 69 |       - name: Test Node.js
 70 |         run: npm test
 71 |         continue-on-error: true
 72 | 
 73 |       - name: Test Deno
 74 |         run: deno task test
 75 |         continue-on-error: true
 76 | 
 77 |       - name: Test Bun
 78 |         run: bun test
 79 |         continue-on-error: true
 80 | 
 81 |       - name: Check bundle sizes
 82 |         run: npm run size
 83 |         continue-on-error: true
 84 | 
 85 |   sync-versions:
 86 |     name: Sync Versions
 87 |     runs-on: ubuntu-latest
 88 |     needs: test
 89 |     if: github.event.inputs.version != ''
 90 |     steps:
 91 |       - name: Checkout code
 92 |         uses: actions/checkout@v4
 93 |         with:
 94 |           token: ${{ secrets.GITHUB_TOKEN }}
 95 | 
 96 |       - name: Setup Node.js
 97 |         uses: actions/setup-node@v4
 98 |         with:
 99 |           node-version: '22'
100 |           cache: 'npm'
101 | 
102 |       - name: Update package.json version
103 |         run: npm version ${{ github.event.inputs.version }} --no-git-tag-version
104 | 
105 |       - name: Update deno.json version
106 |         run: |
107 |           VERSION="${{ github.event.inputs.version }}"
108 |           sed -i "s/\"version\": \".*\"/\"version\": \"$VERSION\"/" deno.json
109 | 
110 |       - name: Commit version changes
111 |         run: |
112 |           git config --local user.email "action@github.com"
113 |           git config --local user.name "GitHub Action"
114 |           git add package.json deno.json
115 |           git commit -m "chore: bump version to ${{ github.event.inputs.version }}" || exit 0
116 |           git push
117 | 
118 |   publish-npm:
119 |     name: Publish to npm
120 |     runs-on: ubuntu-latest
121 |     needs: [test, sync-versions]
122 |     if: |
123 |       always() && 
124 |       needs.test.result == 'success' && 
125 |       (needs.sync-versions.result == 'success' || needs.sync-versions.result == 'skipped') &&
126 |       (github.event.inputs.npm == 'true' || github.event.inputs.npm == '' || github.event_name == 'release')
127 |     permissions:
128 |       contents: read
129 |       id-token: write # The OIDC JWT token will be requested through this audience value.
130 |     steps:
131 |       - name: Checkout code
132 |         uses: actions/checkout@v4
133 | 
134 |       - name: Setup Node.js
135 |         uses: actions/setup-node@v4
136 |         with:
137 |           node-version: '22'
138 |           registry-url: 'https://registry.npmjs.org'
139 |           cache: 'npm'
140 | 
141 |       - name: Install dependencies
142 |         run: npm ci
143 | 
144 |       - name: Build package
145 |         run: npm run build
146 | 
147 |       - name: Publish to npm (latest)
148 |         if: github.event.inputs.tag == 'latest' || github.event.inputs.tag == '' || github.event_name == 'release'
149 |         run: npm publish --access public
150 |         env:
151 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
152 | 
153 |       - name: Publish to npm (with tag)
154 |         if: github.event.inputs.tag != 'latest' && github.event.inputs.tag != '' && github.event_name != 'release'
155 |         run: npm publish --access public --tag ${{ github.event.inputs.tag }}
156 |         env:
157 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
158 | 
159 |   publish-jsr:
160 |     name: Publish to JSR
161 |     runs-on: ubuntu-latest
162 |     needs: [test, sync-versions]
163 |     if: |
164 |       always() && 
165 |       needs.test.result == 'success' && 
166 |       (needs.sync-versions.result == 'success' || needs.sync-versions.result == 'skipped') &&
167 |       (github.event.inputs.jsr == 'true' || github.event.inputs.jsr == '' || github.event_name == 'release')
168 |     permissions:
169 |       contents: read
170 |       id-token: write # The OIDC JWT token will be requested through this audience value.
171 |     steps:
172 |       - name: Checkout code
173 |         uses: actions/checkout@v4
174 | 
175 |       - name: Setup Deno
176 |         uses: denoland/setup-deno@v1
177 |         with:
178 |           deno-version: v1.x
179 | 
180 |       - name: Publish to JSR (latest)
181 |         if: github.event.inputs.tag == 'latest' || github.event.inputs.tag == '' || github.event_name == 'release'
182 |         run: deno publish --no-check
183 | 
184 |       - name: Publish to JSR (with tag)
185 |         if: github.event.inputs.tag != 'latest' && github.event.inputs.tag != '' && github.event_name != 'release'
186 |         run: deno publish --tag ${{ github.event.inputs.tag }} --no-check
187 | 
188 |   publish-github:
189 |     name: Publish to GitHub Packages
190 |     runs-on: ubuntu-latest
191 |     needs: [test, sync-versions]
192 |     if: |
193 |       always() && 
194 |       needs.test.result == 'success' && 
195 |       (needs.sync-versions.result == 'success' || needs.sync-versions.result == 'skipped') &&
196 |       github.event.inputs.github == 'true'
197 |     steps:
198 |       - name: Checkout code
199 |         uses: actions/checkout@v4
200 | 
201 |       - name: Setup Node.js
202 |         uses: actions/setup-node@v4
203 |         with:
204 |           node-version: '22'
205 |           registry-url: 'https://npm.pkg.github.com'
206 |           scope: '@${{ github.repository_owner }}'
207 |           cache: 'npm'
208 | 
209 |       - name: Install dependencies
210 |         run: npm ci
211 | 
212 |       - name: Build package
213 |         run: npm run build
214 | 
215 |       - name: Configure package for GitHub Packages
216 |         run: |
217 |           # Update package name to include scope for GitHub Packages
218 |           npm pkg set name="@${{ github.repository_owner }}/edge-tts-universal"
219 |           npm pkg set publishConfig.registry="https://npm.pkg.github.com"
220 | 
221 |       - name: Publish to GitHub Packages
222 |         run: npm publish
223 |         env:
224 |           NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
225 | 
226 |   post-publish:
227 |     name: Post-Publish Tasks
228 |     runs-on: ubuntu-latest
229 |     needs: [publish-npm, publish-jsr, publish-github]
230 |     if: always() && (needs.publish-npm.result == 'success' || needs.publish-jsr.result == 'success')
231 |     steps:
232 |       - name: Checkout code
233 |         uses: actions/checkout@v4
234 | 
235 |       - name: Create GitHub Release (if manual dispatch)
236 |         if: github.event_name == 'workflow_dispatch' && github.event.inputs.version != ''
237 |         uses: actions/create-release@v1
238 |         env:
239 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
240 |         with:
241 |           tag_name: v${{ github.event.inputs.version }}
242 |           release_name: Release v${{ github.event.inputs.version }}
243 |           body: |
244 |             ## What's Changed
245 | 
246 |             Published to:
247 |             ${{ needs.publish-npm.result == 'success' && '- ✅ npm' || '- ❌ npm' }}
248 |             ${{ needs.publish-jsr.result == 'success' && '- ✅ JSR' || '- ❌ JSR' }}
249 |             ${{ needs.publish-github.result == 'success' && '- ✅ GitHub Packages' || '' }}
250 | 
251 |             **Full Changelog**: https://github.com/${{ github.repository }}/compare/v${{ github.event.inputs.version }}...HEAD
252 |           draft: false
253 |           prerelease: ${{ github.event.inputs.tag != 'latest' && github.event.inputs.tag != '' }}
254 | 
255 |       - name: Update README badges (optional)
256 |         run: |
257 |           echo "📦 Package published successfully!"
258 |           echo "npm: https://www.npmjs.com/package/edge-tts-universal"
259 |           echo "JSR: https://jsr.io/@edge-tts/universal"
260 |           echo "GitHub: https://github.com/${{ github.repository }}/packages"
261 |         continue-on-error: true
262 | 


--------------------------------------------------------------------------------
/src/communicate.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   connectId,
  3 |   dateToString,
  4 |   escape,
  5 |   getHeadersAndDataFromBinary,
  6 |   getHeadersAndDataFromText,
  7 |   mkssml,
  8 |   removeIncompatibleCharacters,
  9 |   splitTextByByteLength,
 10 |   ssmlHeadersPlusData,
 11 |   unescape
 12 | } from './utils';
 13 | import {
 14 |   NoAudioReceived,
 15 |   UnexpectedResponse,
 16 |   UnknownResponse,
 17 |   WebSocketError
 18 | } from "./exceptions";
 19 | import { TTSConfig } from './tts_config';
 20 | import { CommunicateState, TTSChunk } from './types';
 21 | // Use isomorphic WebSocket that works in both Node.js and browsers
 22 | import WebSocket from 'isomorphic-ws';
 23 | import { DEFAULT_VOICE, WSS_URL, WSS_HEADERS, SEC_MS_GEC_VERSION } from './constants';
 24 | import { DRM } from './drm';
 25 | import { AxiosError } from 'axios';
 26 | 
 27 | // HttpsProxyAgent will be imported dynamically when needed
 28 | let HttpsProxyAgent: any;
 29 | 
 30 | /**
 31 |  * Configuration options for the Communicate class.
 32 |  */
 33 | export interface CommunicateOptions {
 34 |   /** Voice to use for synthesis (e.g., "en-US-EmmaMultilingualNeural") */
 35 |   voice?: string;
 36 |   /** Speech rate adjustment (e.g., "+20%", "-10%") */
 37 |   rate?: string;
 38 |   /** Volume level adjustment (e.g., "+50%", "-25%") */
 39 |   volume?: string;
 40 |   /** Pitch adjustment in Hz (e.g., "+5Hz", "-10Hz") */
 41 |   pitch?: string;
 42 |   /** Proxy URL for requests */
 43 |   proxy?: string;
 44 |   /** WebSocket connection timeout in milliseconds */
 45 |   connectionTimeout?: number;
 46 | }
 47 | 
 48 | /**
 49 |  * Main class for text-to-speech synthesis using Microsoft Edge's online TTS service.
 50 |  * 
 51 |  * @example
 52 |  * ```typescript
 53 |  * const communicate = new Communicate('Hello, world!', {
 54 |  *   voice: 'en-US-EmmaMultilingualNeural',
 55 |  * });
 56 |  * 
 57 |  * for await (const chunk of communicate.stream()) {
 58 |  *   if (chunk.type === 'audio' && chunk.data) {
 59 |  *     // Handle audio data
 60 |  *   }
 61 |  * }
 62 |  * ```
 63 |  */
 64 | export class Communicate {
 65 |   private readonly ttsConfig: TTSConfig;
 66 |   private readonly texts: Generator<Buffer>;
 67 |   private readonly proxy?: string;
 68 |   private readonly connectionTimeout?: number;
 69 | 
 70 |   private state: CommunicateState = {
 71 |     partialText: Buffer.from(''),
 72 |     offsetCompensation: 0,
 73 |     lastDurationOffset: 0,
 74 |     streamWasCalled: false,
 75 |   };
 76 | 
 77 |   /**
 78 |    * Creates a new Communicate instance for text-to-speech synthesis.
 79 |    * 
 80 |    * @param text - The text to synthesize
 81 |    * @param options - Configuration options for synthesis
 82 |    */
 83 |   constructor(text: string, options: CommunicateOptions = {}) {
 84 |     this.ttsConfig = new TTSConfig({
 85 |       voice: options.voice || DEFAULT_VOICE,
 86 |       rate: options.rate,
 87 |       volume: options.volume,
 88 |       pitch: options.pitch,
 89 |     });
 90 | 
 91 |     if (typeof text !== 'string') {
 92 |       throw new TypeError('text must be a string');
 93 |     }
 94 | 
 95 |     this.texts = splitTextByByteLength(
 96 |       escape(removeIncompatibleCharacters(text)),
 97 |       // calcMaxMesgSize(this.ttsConfig),
 98 |       4096,
 99 |     );
100 | 
101 |     this.proxy = options.proxy;
102 |     this.connectionTimeout = options.connectionTimeout;
103 |   }
104 | 
105 |   private parseMetadata(data: Buffer): TTSChunk {
106 |     const metadata = JSON.parse(data.toString('utf-8'));
107 |     for (const metaObj of metadata['Metadata']) {
108 |       const metaType = metaObj['Type'];
109 |       if (metaType === 'WordBoundary') {
110 |         const currentOffset = metaObj['Data']['Offset'] + this.state.offsetCompensation;
111 |         const currentDuration = metaObj['Data']['Duration'];
112 |         return {
113 |           type: metaType,
114 |           offset: currentOffset,
115 |           duration: currentDuration,
116 |           text: unescape(metaObj['Data']['text']['Text']),
117 |         };
118 |       }
119 |       if (metaType === 'SessionEnd') {
120 |         continue;
121 |       }
122 |       throw new UnknownResponse(`Unknown metadata type: ${metaType}`);
123 |     }
124 |     throw new UnexpectedResponse('No WordBoundary metadata found');
125 |   }
126 | 
127 |   private async * _stream(): AsyncGenerator<TTSChunk, void, unknown> {
128 |     const url = `${WSS_URL}&Sec-MS-GEC=${DRM.generateSecMsGec()}&Sec-MS-GEC-Version=${SEC_MS_GEC_VERSION}&ConnectionId=${connectId()}`;
129 | 
130 |     let agent: any;
131 |     if (this.proxy) {
132 |       // Import HttpsProxyAgent dynamically only when needed
133 |       if (!HttpsProxyAgent) {
134 |         try {
135 |           const proxyModule = await import('https-proxy-agent');
136 |           HttpsProxyAgent = proxyModule.HttpsProxyAgent;
137 |         } catch (e) {
138 |           console.warn('https-proxy-agent not available:', e);
139 |         }
140 |       }
141 |       if (HttpsProxyAgent) {
142 |         agent = new HttpsProxyAgent(this.proxy);
143 |       }
144 |     }
145 | 
146 |     const websocket = new WebSocket(url, {
147 |       headers: WSS_HEADERS,
148 |       timeout: this.connectionTimeout,
149 |       agent: agent,
150 |     });
151 | 
152 |     const messageQueue: (TTSChunk | Error | 'close')[] = [];
153 |     let resolveMessage: (() => void) | null = null;
154 | 
155 |     websocket.on('message', (message: Buffer, isBinary: boolean) => {
156 |       if (!isBinary) {
157 |         // text message
158 |         const [headers, data] = getHeadersAndDataFromText(message);
159 | 
160 |         const path = headers['Path'];
161 |         if (path === 'audio.metadata') {
162 |           try {
163 |             const parsedMetadata = this.parseMetadata(data);
164 |             this.state.lastDurationOffset = parsedMetadata.offset! + parsedMetadata.duration!;
165 |             messageQueue.push(parsedMetadata);
166 |           } catch (e) {
167 |             messageQueue.push(e as Error);
168 |           }
169 |         } else if (path === 'turn.end') {
170 |           this.state.offsetCompensation = this.state.lastDurationOffset;
171 |           websocket.close();
172 |         } else if (path !== 'response' && path !== 'turn.start') {
173 |           messageQueue.push(new UnknownResponse(`Unknown path received: ${path}`));
174 |         }
175 |       } else {
176 |         // binary message
177 |         if (message.length < 2) {
178 |           messageQueue.push(new UnexpectedResponse('We received a binary message, but it is missing the header length.'));
179 |         } else {
180 |           const headerLength = message.readUInt16BE(0);
181 |           if (headerLength > message.length) {
182 |             messageQueue.push(new UnexpectedResponse('The header length is greater than the length of the data.'));
183 |           } else {
184 |             const [headers, data] = getHeadersAndDataFromBinary(message);
185 | 
186 |             if (headers['Path'] !== 'audio') {
187 |               messageQueue.push(new UnexpectedResponse('Received binary message, but the path is not audio.'));
188 |             } else {
189 |               const contentType = headers['Content-Type'];
190 |               if (contentType !== 'audio/mpeg') {
191 |                 if (data.length > 0) {
192 |                   messageQueue.push(new UnexpectedResponse('Received binary message, but with an unexpected Content-Type.'));
193 |                 }
194 |               } else if (data.length === 0) {
195 |                 messageQueue.push(new UnexpectedResponse('Received binary message, but it is missing the audio data.'));
196 |               } else {
197 |                 messageQueue.push({ type: 'audio', data: data });
198 |               }
199 |             }
200 |           }
201 |         }
202 |       }
203 |       if (resolveMessage) resolveMessage();
204 |     });
205 | 
206 |     websocket.on('error', (error) => {
207 |       messageQueue.push(new WebSocketError(error.message));
208 |       if (resolveMessage) resolveMessage();
209 |     });
210 | 
211 |     websocket.on('close', () => {
212 |       messageQueue.push('close');
213 |       if (resolveMessage) resolveMessage();
214 |     });
215 | 
216 |     await new Promise<void>(resolve => websocket.on('open', resolve));
217 | 
218 |     websocket.send(
219 |       `X-Timestamp:${dateToString()}\r\n`
220 |       + 'Content-Type:application/json; charset=utf-8\r\n'
221 |       + 'Path:speech.config\r\n\r\n'
222 |       + '{"context":{"synthesis":{"audio":{"metadataoptions":{'
223 |       + '"sentenceBoundaryEnabled":"false","wordBoundaryEnabled":"true"},'
224 |       + '"outputFormat":"audio-24khz-48kbitrate-mono-mp3"'
225 |       + '}}}}\r\n'
226 |     );
227 | 
228 |     websocket.send(
229 |       ssmlHeadersPlusData(
230 |         connectId(),
231 |         dateToString(),
232 |         mkssml(this.ttsConfig, this.state.partialText),
233 |       )
234 |     );
235 | 
236 |     let audioWasReceived = false;
237 |     while (true) {
238 |       if (messageQueue.length > 0) {
239 |         const message = messageQueue.shift()!;
240 |         if (message === 'close') {
241 |           if (!audioWasReceived) {
242 |             throw new NoAudioReceived('No audio was received.');
243 |           }
244 |           break;
245 |         } else if (message instanceof Error) {
246 |           throw message;
247 |         } else {
248 |           if (message.type === 'audio') audioWasReceived = true;
249 |           yield message;
250 |         }
251 |       } else {
252 |         // Use a more responsive wait mechanism
253 |         await new Promise<void>(resolve => {
254 |           resolveMessage = resolve;
255 |           // Add a small timeout to prevent indefinite waiting
256 |           setTimeout(resolve, 50);
257 |         });
258 |       }
259 |     }
260 |   }
261 | 
262 |   /**
263 |    * Streams text-to-speech synthesis results.
264 |    * 
265 |    * Returns an async generator that yields audio chunks and word boundary events.
266 |    * Can only be called once per Communicate instance.
267 |    * 
268 |    * @yields TTSChunk - Audio data or word boundary information
269 |    * @throws {Error} If called more than once
270 |    * @throws {NoAudioReceived} If no audio data is received
271 |    * @throws {WebSocketError} If WebSocket connection fails
272 |    * 
273 |    * @example
274 |    * ```typescript
275 |    * for await (const chunk of communicate.stream()) {
276 |    *   if (chunk.type === 'audio') {
277 |    *     // Process audio data
278 |    *   } else if (chunk.type === 'WordBoundary') {
279 |    *     // Process subtitle timing
280 |    *   }
281 |    * }
282 |    * ```
283 |    */
284 |   async * stream(): AsyncGenerator<TTSChunk, void, unknown> {
285 |     if (this.state.streamWasCalled) {
286 |       throw new Error('stream can only be called once.');
287 |     }
288 |     this.state.streamWasCalled = true;
289 | 
290 |     for (const partialText of this.texts) {
291 |       this.state.partialText = partialText;
292 |       try {
293 |         for await (const message of this._stream()) {
294 |           yield message;
295 |         }
296 |       } catch (e) {
297 |         if (e instanceof AxiosError && e.response?.status === 403) {
298 |           DRM.handleClientResponseError(e);
299 |           for await (const message of this._stream()) {
300 |             yield message;
301 |           }
302 |         } else {
303 |           throw e;
304 |         }
305 |       }
306 |     }
307 |   }
308 | } 


--------------------------------------------------------------------------------
/src/browser.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Browser-compatible version of edge-tts Simple API
  3 |  * Uses native browser APIs instead of Node.js dependencies
  4 |  */
  5 | 
  6 | /**
  7 |  * Options for controlling the voice prosody (rate, pitch, volume).
  8 |  */
  9 | export interface ProsodyOptions {
 10 |   /**
 11 |    * The speaking rate of the voice.
 12 |    * Examples: "+10.00%", "-20.00%"
 13 |    */
 14 |   rate?: string;
 15 |   /**
 16 |    * The speaking volume of the voice.
 17 |    * Examples: "+15.00%", "-10.00%"
 18 |    */
 19 |   volume?: string;
 20 |   /**
 21 |    * The speaking pitch of the voice.
 22 |    * Examples: "+20Hz", "-10Hz"
 23 |    */
 24 |   pitch?: string;
 25 | }
 26 | 
 27 | /**
 28 |  * Represents a single word boundary with its timing and text.
 29 |  * The API provides timing in 100-nanosecond units.
 30 |  */
 31 | export interface WordBoundary {
 32 |   /**
 33 |    * The offset from the beginning of the audio stream in 100-nanosecond units.
 34 |    */
 35 |   offset: number;
 36 |   /**
 37 |    * The duration of the word in 100-nanosecond units.
 38 |    */
 39 |   duration: number;
 40 |   /**
 41 |    * The text of the spoken word.
 42 |    */
 43 |   text: string;
 44 | }
 45 | 
 46 | /**
 47 |  * The final result of the synthesis process.
 48 |  */
 49 | export interface SynthesisResult {
 50 |   /**
 51 |    * The generated audio as a Blob, which can be used in an <audio> element.
 52 |    */
 53 |   audio: Blob;
 54 |   /**
 55 |    * An array of word boundaries containing timing and text for creating subtitles.
 56 |    */
 57 |   subtitle: WordBoundary[];
 58 | }
 59 | 
 60 | /**
 61 |  * Browser-compatible Edge TTS class that uses native browser APIs.
 62 |  * 
 63 |  * @remarks This uses an undocumented Microsoft API. CORS policy may prevent
 64 |  * direct usage from web apps. Consider using a proxy server.
 65 |  */
 66 | export class EdgeTTSBrowser {
 67 |   public text: string;
 68 |   public voice: string;
 69 |   public rate: string;
 70 |   public volume: string;
 71 |   public pitch: string;
 72 | 
 73 |   private ws: WebSocket | null = null;
 74 |   private readonly WSS_URL = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1";
 75 |   private readonly TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
 76 | 
 77 |   /**
 78 |    * @param text The text to be synthesized.
 79 |    * @param voice The voice to use for synthesis.
 80 |    * @param options Prosody options (rate, volume, pitch).
 81 |    */
 82 |   constructor(
 83 |     text: string,
 84 |     voice = "Microsoft Server Speech Text to Speech Voice (en-US, EmmaMultilingualNeural)",
 85 |     options: ProsodyOptions = {}
 86 |   ) {
 87 |     this.text = text;
 88 |     this.voice = voice;
 89 |     this.rate = options.rate || "+0%";
 90 |     this.volume = options.volume || "+0%";
 91 |     this.pitch = options.pitch || "+0Hz";
 92 |   }
 93 | 
 94 |   /**
 95 |    * Initiates the synthesis process.
 96 |    * @returns A promise that resolves with the synthesized audio and subtitle data.
 97 |    */
 98 |   public async synthesize(): Promise<SynthesisResult> {
 99 |     await this.connect();
100 | 
101 |     if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
102 |       throw new Error("WebSocket is not connected.");
103 |     }
104 | 
105 |     this.ws.send(this.createSpeechConfig());
106 |     this.ws.send(this.createSSML());
107 | 
108 |     return new Promise((resolve, reject) => {
109 |       const audioChunks: Uint8Array[] = [];
110 |       let wordBoundaries: WordBoundary[] = [];
111 | 
112 |       if (this.ws) {
113 |         this.ws.onmessage = (event: MessageEvent) => {
114 |           if (typeof event.data === "string") {
115 |             // Text message
116 |             const { headers, body } = this.parseMessage(event.data);
117 |             if (headers.Path === "audio.metadata") {
118 |               try {
119 |                 const metadata = JSON.parse(body);
120 |                 if (metadata.Metadata && Array.isArray(metadata.Metadata)) {
121 |                   const boundaries = metadata.Metadata
122 |                     .filter((item: any) => item.Type === "WordBoundary" && item.Data)
123 |                     .map((item: any) => ({
124 |                       offset: item.Data.Offset,
125 |                       duration: item.Data.Duration,
126 |                       text: item.Data.text.Text,
127 |                     }));
128 |                   wordBoundaries = wordBoundaries.concat(boundaries);
129 |                 }
130 |               } catch (e) {
131 |                 // Ignore JSON parsing errors for metadata
132 |               }
133 |             } else if (headers.Path === "turn.end") {
134 |               if (this.ws) this.ws.close();
135 |             }
136 |           } else if (event.data instanceof Blob) {
137 |             // Binary audio message
138 |             event.data.arrayBuffer().then(arrayBuffer => {
139 |               const dataView = new DataView(arrayBuffer);
140 |               const headerLength = dataView.getUint16(0);
141 | 
142 |               if (arrayBuffer.byteLength > headerLength + 2) {
143 |                 const audioData = new Uint8Array(arrayBuffer, headerLength + 2);
144 |                 audioChunks.push(audioData);
145 |               }
146 |             });
147 |           }
148 |         };
149 | 
150 |         this.ws.onclose = () => {
151 |           // TS 5.5+ requires BlobPart views to be ArrayBuffer-backed.
152 |           // Our chunks are Uint8Array; cast them for type compatibility.
153 |           const audioBlob = new Blob(
154 |             audioChunks as unknown as ArrayBufferView<ArrayBuffer>[],
155 |             { type: "audio/mpeg" }
156 |           );
157 |           resolve({ audio: audioBlob, subtitle: wordBoundaries });
158 |         };
159 | 
160 |         this.ws.onerror = (error) => {
161 |           reject(error);
162 |         };
163 |       }
164 |     });
165 |   }
166 | 
167 |   /**
168 |    * Establishes a connection to the WebSocket server.
169 |    */
170 |   private connect(): Promise<void> {
171 |     const connectionId = this.generateConnectionId();
172 |     const secMsGec = this.generateSecMsGec();
173 |     const url = `${this.WSS_URL}?TrustedClientToken=${this.TRUSTED_CLIENT_TOKEN}&ConnectionId=${connectionId}&Sec-MS-GEC=${secMsGec}&Sec-MS-GEC-Version=1-130.0.2849.68`;
174 | 
175 |     this.ws = new WebSocket(url);
176 | 
177 |     return new Promise((resolve, reject) => {
178 |       if (!this.ws) {
179 |         return reject(new Error("WebSocket not initialized"));
180 |       }
181 |       this.ws.onopen = () => {
182 |         resolve();
183 |       };
184 |       this.ws.onerror = (error) => {
185 |         reject(error);
186 |       };
187 |     });
188 |   }
189 | 
190 |   /**
191 |    * Parses a string message from the WebSocket into headers and a body.
192 |    */
193 |   private parseMessage(message: string): { headers: Record<string, string>; body: string } {
194 |     const parts = message.split("\r\n\r\n");
195 |     const headerLines = parts[0].split("\r\n");
196 |     const headers: Record<string, string> = {};
197 |     headerLines.forEach(line => {
198 |       const [key, value] = line.split(":", 2);
199 |       if (key && value) {
200 |         headers[key.trim()] = value.trim();
201 |       }
202 |     });
203 |     return { headers, body: parts[1] || '' };
204 |   }
205 | 
206 |   /**
207 |    * Creates the speech configuration message.
208 |    */
209 |   private createSpeechConfig(): string {
210 |     const config = {
211 |       context: {
212 |         synthesis: {
213 |           audio: {
214 |             metadataoptions: {
215 |               sentenceBoundaryEnabled: false,
216 |               wordBoundaryEnabled: true,
217 |             },
218 |             outputFormat: "audio-24khz-48kbitrate-mono-mp3",
219 |           },
220 |         },
221 |       },
222 |     };
223 |     return `X-Timestamp:${this.getTimestamp()}\r\nContent-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n${JSON.stringify(config)}`;
224 |   }
225 | 
226 |   /**
227 |    * Creates the SSML (Speech Synthesis Markup Language) message.
228 |    */
229 |   private createSSML(): string {
230 |     const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
231 |       <voice name='${this.voice}'>
232 |         <prosody pitch='${this.pitch}' rate='${this.rate}' volume='${this.volume}'>
233 |           ${this.escapeXml(this.text)}
234 |         </prosody>
235 |       </voice>
236 |     </speak>`;
237 |     return `X-RequestId:${this.generateConnectionId()}\r\nContent-Type:application/ssml+xml\r\nX-Timestamp:${this.getTimestamp()}Z\r\nPath:ssml\r\n\r\n${ssml}`;
238 |   }
239 | 
240 |   private generateConnectionId(): string {
241 |     return "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, (c) => {
242 |       const r = (Math.random() * 16) | 0;
243 |       const v = c === "x" ? r : (r & 0x3) | 0x8;
244 |       return v.toString(16);
245 |     });
246 |   }
247 | 
248 |   private getTimestamp(): string {
249 |     return new Date().toISOString().replace(/[:-]|\.\d{3}/g, "");
250 |   }
251 | 
252 |   private escapeXml(text: string): string {
253 |     return text.replace(/[<>&'"]/g, (char) => {
254 |       switch (char) {
255 |         case "<": return "&lt;";
256 |         case ">": return "&gt;";
257 |         case "&": return "&amp;";
258 |         case "'": return "&apos;";
259 |         case '"': return "&quot;";
260 |         default: return char;
261 |       }
262 |     });
263 |   }
264 | 
265 |   /**
266 |    * Browser-compatible version of DRM security token generation
267 |    * Uses Web Crypto API instead of Node.js crypto
268 |    */
269 |   private async generateSecMsGec(): Promise<string> {
270 |     const WIN_EPOCH = 11644473600;
271 |     const S_TO_NS = 1e9;
272 | 
273 |     let ticks = Date.now() / 1000;
274 |     ticks += WIN_EPOCH;
275 |     ticks -= ticks % 300;
276 |     ticks *= S_TO_NS / 100;
277 | 
278 |     const strToHash = `${ticks.toFixed(0)}${this.TRUSTED_CLIENT_TOKEN}`;
279 | 
280 |     // Use Web Crypto API for hashing
281 |     const encoder = new TextEncoder();
282 |     const data = encoder.encode(strToHash);
283 |     const hashBuffer = await crypto.subtle.digest('SHA-256', data);
284 |     const hashArray = Array.from(new Uint8Array(hashBuffer));
285 |     return hashArray.map(b => b.toString(16).padStart(2, '0')).join('').toUpperCase();
286 |   }
287 | }
288 | 
289 | // ==================================================================================
290 | // Subtitle Generation Utilities (Browser Compatible)
291 | // ==================================================================================
292 | 
293 | /**
294 |  * Formats a time value from 100-nanosecond units into a VTT or SRT timestamp string.
295 |  */
296 | function formatTimestamp(timeIn100ns: number, format: 'vtt' | 'srt'): string {
297 |   const totalSeconds = Math.floor(timeIn100ns / 10000000);
298 |   const hours = Math.floor(totalSeconds / 3600);
299 |   const minutes = Math.floor((totalSeconds % 3600) / 60);
300 |   const seconds = totalSeconds % 60;
301 |   const milliseconds = Math.floor((timeIn100ns % 10000000) / 10000);
302 |   const separator = format === 'vtt' ? '.' : ',';
303 |   return `${padNumber(hours)}:${padNumber(minutes)}:${padNumber(seconds)}${separator}${padNumber(milliseconds, 3)}`;
304 | }
305 | 
306 | /**
307 |  * Pads a number with leading zeros to a specified length.
308 |  */
309 | function padNumber(num: number, length = 2): string {
310 |   return num.toString().padStart(length, '0');
311 | }
312 | 
313 | /**
314 |  * Creates a subtitle file content in VTT (WebVTT) format.
315 |  */
316 | export function createVTT(wordBoundaries: WordBoundary[]): string {
317 |   let vttContent = "WEBVTT\n\n";
318 |   wordBoundaries.forEach((word, index) => {
319 |     const startTime = formatTimestamp(word.offset, 'vtt');
320 |     const endTime = formatTimestamp(word.offset + word.duration, 'vtt');
321 |     vttContent += `${index + 1}\n`;
322 |     vttContent += `${startTime} --> ${endTime}\n`;
323 |     vttContent += `${word.text}\n\n`;
324 |   });
325 |   return vttContent;
326 | }
327 | 
328 | /**
329 |  * Creates a subtitle file content in SRT (SubRip) format.
330 |  */
331 | export function createSRT(wordBoundaries: WordBoundary[]): string {
332 |   let srtContent = "";
333 |   wordBoundaries.forEach((word, index) => {
334 |     const startTime = formatTimestamp(word.offset, 'srt');
335 |     const endTime = formatTimestamp(word.offset + word.duration, 'srt');
336 |     srtContent += `${index + 1}\n`;
337 |     srtContent += `${startTime} --> ${endTime}\n`;
338 |     srtContent += `${word.text}\n\n`;
339 |   });
340 |   return srtContent;
341 | } 
342 | 


--------------------------------------------------------------------------------