├── turn-go ├── .gitignore ├── README.md └── go.mod ├── video-to-jpeg ├── public │ └── .gitkeep ├── src │ ├── html.d.ts │ ├── packet.proto │ ├── index.ts │ ├── player.html │ ├── packet.ts │ ├── shared │ │ └── sfu-utils.ts │ └── web │ │ └── app.ts ├── tsconfig.web.json ├── tsconfig.json ├── package.json ├── wrangler.jsonc ├── .gitignore ├── ARCHITECTURE.md └── README.md ├── sfu-turn-go ├── .gitignore ├── README.md └── go.mod ├── tts-ws ├── .vscode │ └── settings.json ├── src │ ├── html.d.ts │ ├── packet.proto │ └── packet.ts ├── .prettierrc ├── .editorconfig ├── package.json ├── wrangler.jsonc ├── tsconfig.json ├── .gitignore └── README.md ├── ai-tts-stt ├── src │ ├── html.d.ts │ ├── wasm.d.ts │ ├── wasm │ │ └── speexdsp.wasm │ ├── packet.proto │ ├── web │ │ ├── ui │ │ │ ├── debugLog.ts │ │ │ ├── generateControls.ts │ │ │ ├── transcriptionList.ts │ │ │ ├── listenerControls.ts │ │ │ ├── publisherControls.ts │ │ │ ├── statusIndicator.ts │ │ │ ├── sttControls.ts │ │ │ └── dom.ts │ │ ├── types.ts │ │ ├── services │ │ │ ├── subtitles.ts │ │ │ ├── api.ts │ │ │ ├── webrtc.ts │ │ │ └── stt.ts │ │ └── state.ts │ ├── shared │ │ ├── config.ts │ │ ├── log.ts │ │ ├── ws-connection.ts │ │ ├── state-store.ts │ │ ├── audio-utils.ts │ │ ├── do-utils.ts │ │ └── sfu-utils.ts │ ├── index.ts │ ├── audio-processor.ts │ ├── packet.ts │ ├── speex-resampler.ts │ └── player.html ├── .prettierrc ├── tsconfig.web.json ├── package.json ├── wrangler.jsonc ├── tsconfig.json ├── .gitignore └── scripts │ ├── bootstrap.sh │ └── build-speexdsp-wasm.sh ├── whip-whep-server ├── wish-whep-00-player │ ├── wrangler.toml │ ├── package.json │ └── static │ │ └── index.html ├── .prettierrc ├── .editorconfig ├── worker-configuration.d.ts ├── wrangler.toml ├── package.json ├── README.md ├── .gitignore └── src │ └── index.ts ├── openai-webrtc-relay ├── .prettierrc ├── worker-configuration.d.ts ├── .editorconfig ├── test │ ├── tsconfig.json │ └── index.spec.ts ├── vitest.config.mts ├── package.json ├── wrangler.toml ├── README.md ├── tsconfig.json ├── .gitignore ├── public │ └── index.html └── src │ └── index.ts ├── echo └── README.md ├── echo-simulcast └── README.md ├── echo-datachannels └── README.md └── .github └── workflows └── semgrep.yml /turn-go/.gitignore: -------------------------------------------------------------------------------- 1 | turn-go 2 | -------------------------------------------------------------------------------- /video-to-jpeg/public/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sfu-turn-go/.gitignore: -------------------------------------------------------------------------------- 1 | sfu-turn-go 2 | -------------------------------------------------------------------------------- /tts-ws/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.associations": { 3 | "wrangler.json": "jsonc" 4 | } 5 | } -------------------------------------------------------------------------------- /tts-ws/src/html.d.ts: -------------------------------------------------------------------------------- 1 | declare module '*.html' { 2 | const content: string; 3 | export default content; 4 | } 5 | -------------------------------------------------------------------------------- /ai-tts-stt/src/html.d.ts: -------------------------------------------------------------------------------- 1 | declare module '*.html' { 2 | const content: string; 3 | export default content; 4 | } 5 | -------------------------------------------------------------------------------- /whip-whep-server/wish-whep-00-player/wrangler.toml: -------------------------------------------------------------------------------- 1 | name = "whep-00-player" 2 | pages_build_output_dir = "./static" 3 | -------------------------------------------------------------------------------- /tts-ws/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 140, 3 | "singleQuote": true, 4 | "semi": true, 5 | "useTabs": true 6 | } 7 | -------------------------------------------------------------------------------- /video-to-jpeg/src/html.d.ts: -------------------------------------------------------------------------------- 1 | declare module '*.html' { 2 | const content: string; 3 | export default content; 4 | } 5 | -------------------------------------------------------------------------------- /ai-tts-stt/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 140, 3 | "singleQuote": true, 4 | "semi": true, 5 | "useTabs": true 6 | } 7 | -------------------------------------------------------------------------------- /ai-tts-stt/src/wasm.d.ts: -------------------------------------------------------------------------------- 1 | declare module '*.wasm' { 2 | const content: WebAssembly.Module; 3 | export default content; 4 | } 5 | -------------------------------------------------------------------------------- /ai-tts-stt/src/wasm/speexdsp.wasm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cloudflare/realtime-examples/HEAD/ai-tts-stt/src/wasm/speexdsp.wasm -------------------------------------------------------------------------------- /openai-webrtc-relay/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 140, 3 | "singleQuote": true, 4 | "semi": true, 5 | "useTabs": true 6 | } 7 | -------------------------------------------------------------------------------- /whip-whep-server/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 140, 3 | "singleQuote": true, 4 | "semi": true, 5 | "useTabs": true 6 | } 7 | -------------------------------------------------------------------------------- /openai-webrtc-relay/worker-configuration.d.ts: -------------------------------------------------------------------------------- 1 | // Generated by Wrangler 2 | // After adding bindings to `wrangler.toml`, regenerate this interface via `npm run cf-typegen` 3 | interface Env { 4 | } 5 | -------------------------------------------------------------------------------- /echo/README.md: -------------------------------------------------------------------------------- 1 | # Echo example 2 | 3 | In order to run this example, you'll need to update `APP_ID` and `APP_TOKEN` after creating an SFU Application in the [Cloudflare dashboard](https://dash.cloudflare.com/?to=/:account/calls). -------------------------------------------------------------------------------- /ai-tts-stt/src/packet.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message Packet { 4 | uint32 sequenceNumber = 1; // not used in Buffer mode 5 | uint32 timestamp = 2; // not used in Buffer mode 6 | 7 | bytes payload = 5; 8 | } 9 | -------------------------------------------------------------------------------- /tts-ws/src/packet.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message Packet { 4 | uint32 sequenceNumber = 1; // not used in Buffer mode 5 | uint32 timestamp = 2; // not used in Buffer mode 6 | 7 | bytes payload = 5; 8 | } 9 | -------------------------------------------------------------------------------- /echo-simulcast/README.md: -------------------------------------------------------------------------------- 1 | # Echo example 2 | 3 | In order to run this example, you'll need to update `APP_ID` and `APP_TOKEN` after creating an SFU Application in the [Cloudflare dashboard](https://dash.cloudflare.com/?to=/:account/calls). -------------------------------------------------------------------------------- /echo-datachannels/README.md: -------------------------------------------------------------------------------- 1 | # Echo with DataChannels 2 | 3 | In order to run this example, you'll need to update `APP_ID` and `APP_TOKEN` after creating an SFU Application in the [Cloudflare dashboard](https://dash.cloudflare.com/?to=/:account/calls). 4 | -------------------------------------------------------------------------------- /tts-ws/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = tab 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yml] 12 | indent_style = space 13 | -------------------------------------------------------------------------------- /whip-whep-server/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = tab 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yml] 12 | indent_style = space 13 | -------------------------------------------------------------------------------- /openai-webrtc-relay/.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = tab 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.yml] 12 | indent_style = space 13 | -------------------------------------------------------------------------------- /openai-webrtc-relay/test/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "compilerOptions": { 4 | "types": ["@cloudflare/workers-types/experimental", "@cloudflare/vitest-pool-workers"] 5 | }, 6 | "include": ["./**/*.ts", "../worker-configuration.d.ts"], 7 | "exclude": [] 8 | } 9 | -------------------------------------------------------------------------------- /openai-webrtc-relay/vitest.config.mts: -------------------------------------------------------------------------------- 1 | import { defineWorkersConfig } from '@cloudflare/vitest-pool-workers/config'; 2 | 3 | export default defineWorkersConfig({ 4 | test: { 5 | poolOptions: { 6 | workers: { 7 | wrangler: { configPath: './wrangler.toml' }, 8 | }, 9 | }, 10 | }, 11 | }); 12 | -------------------------------------------------------------------------------- /video-to-jpeg/src/packet.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message Packet { 4 | uint32 sequenceNumber = 1; // sequence number (used for audio; may be unset for video) 5 | uint32 timestamp = 2; // timestamp for synchronization 6 | 7 | bytes payload = 5; // media payload (PCM audio or JPEG video) 8 | } 9 | -------------------------------------------------------------------------------- /whip-whep-server/worker-configuration.d.ts: -------------------------------------------------------------------------------- 1 | // Generated by Wrangler on Thu May 09 2024 10:37:45 GMT-0500 (Central Daylight Time) 2 | // by running `wrangler types` 3 | 4 | interface Env { 5 | CALLS_API: "http://localhost:8888"; 6 | CALLS_APP_ID: "f0ea263009299383d09d44b9fad5316c"; 7 | LIVE_STORE: DurableObjectNamespace; 8 | } 9 | -------------------------------------------------------------------------------- /whip-whep-server/wish-whep-00-player/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "whep-00-player", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "devDependencies": { 12 | "wrangler": "^3.56.0" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tts-ws/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tts-ws", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "deploy": "wrangler deploy", 7 | "dev": "wrangler dev", 8 | "start": "wrangler dev", 9 | "cf-typegen": "wrangler types", 10 | "proto": "protoc --ts_out src --proto_path src src/packet.proto" 11 | }, 12 | "devDependencies": { 13 | "@protobuf-ts/plugin": "^2.11.1", 14 | "typescript": "^5.5.2", 15 | "wrangler": "^4.28.0" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /whip-whep-server/wrangler.toml: -------------------------------------------------------------------------------- 1 | #:schema node_modules/wrangler/config-schema.json 2 | name = "whipwhep-proxy" 3 | main = "src/index.ts" 4 | compatibility_date = "2024-04-03" 5 | 6 | [vars] 7 | CALLS_API = "https://rtc.live.cloudflare.com" 8 | CALLS_APP_ID = "" 9 | CALLS_APP_SECRET = "" 10 | 11 | [[durable_objects.bindings]] 12 | name = "LIVE_STORE" 13 | class_name = "LiveStore" 14 | 15 | [[migrations]] 16 | tag = "v1" 17 | new_classes = ["LiveStore"] 18 | -------------------------------------------------------------------------------- /whip-whep-server/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "whipwhep-proxy", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "deploy": "wrangler deploy", 7 | "dev": "wrangler dev", 8 | "start": "wrangler dev", 9 | "cf-typegen": "wrangler types" 10 | }, 11 | "devDependencies": { 12 | "typescript": "^5.0.4", 13 | "wrangler": "^3.0.0" 14 | }, 15 | "dependencies": { 16 | "@cloudflare/workers-types": "^4.20240502.0" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /openai-webrtc-relay/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-demo", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "deploy": "wrangler deploy", 7 | "dev": "wrangler dev", 8 | "start": "wrangler dev", 9 | "test": "vitest", 10 | "cf-typegen": "wrangler types" 11 | }, 12 | "devDependencies": { 13 | "@cloudflare/vitest-pool-workers": "^0.5.2", 14 | "@cloudflare/workers-types": "^4.20241205.0", 15 | "typescript": "^5.5.2", 16 | "vitest": "2.1.8", 17 | "wrangler": "^3.95.0" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /ai-tts-stt/tsconfig.web.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "moduleResolution": "node", 7 | "strict": true, 8 | "esModuleInterop": true, 9 | "skipLibCheck": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "resolveJsonModule": true, 12 | "isolatedModules": true, 13 | "noEmit": true 14 | }, 15 | "include": [ 16 | "src/web/**/*" 17 | ], 18 | "exclude": [ 19 | "node_modules" 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /video-to-jpeg/tsconfig.web.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "ESNext", 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "moduleResolution": "node", 7 | "strict": true, 8 | "esModuleInterop": true, 9 | "skipLibCheck": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "resolveJsonModule": true, 12 | "isolatedModules": true, 13 | "noEmit": true 14 | }, 15 | "include": [ 16 | "src/web/**/*" 17 | ], 18 | "exclude": [ 19 | "node_modules" 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /openai-webrtc-relay/wrangler.toml: -------------------------------------------------------------------------------- 1 | #:schema node_modules/wrangler/config-schema.json 2 | name = "openai-demo" 3 | main = "src/index.ts" 4 | compatibility_date = "2024-12-05" 5 | compatibility_flags = ["nodejs_compat"] 6 | assets = { directory = "./public" } 7 | 8 | [vars] 9 | OPENAI_API_KEY = "" 10 | OPENAI_MODEL_ENDPOINT = "https://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01" 11 | CALLS_BASE_URL = "https://rtc.live.cloudflare.com/v1/apps" 12 | CALLS_APP_ID = "" 13 | CALLS_APP_TOKEN = "" 14 | -------------------------------------------------------------------------------- /video-to-jpeg/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2021", 4 | "lib": ["es2021"], 5 | "jsx": "react-jsx", 6 | "module": "es2022", 7 | "moduleResolution": "node", 8 | "resolveJsonModule": true, 9 | "allowJs": true, 10 | "checkJs": false, 11 | "noEmit": true, 12 | "isolatedModules": true, 13 | "allowSyntheticDefaultImports": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "strict": true, 16 | "skipLibCheck": true, 17 | "types": [ 18 | "./worker-configuration.d.ts" 19 | ] 20 | }, 21 | "exclude": [ 22 | "src/web/**" 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/debugLog.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Debug log UI component 3 | */ 4 | 5 | import { AppState } from '../types'; 6 | import { UIElements } from './dom'; 7 | 8 | export class DebugLog { 9 | constructor(private elements: UIElements) {} 10 | 11 | update(state: AppState) { 12 | const { debugArea } = this.elements; 13 | 14 | // Clear and rebuild log (simple approach) 15 | debugArea.value = state.debugLogs.map((entry) => `[${entry.timestamp.toLocaleTimeString()}] ${entry.message}`).join('\n'); 16 | 17 | // Auto-scroll to bottom 18 | debugArea.scrollTop = debugArea.scrollHeight; 19 | } 20 | 21 | clear() { 22 | this.elements.debugArea.value = ''; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /turn-go/README.md: -------------------------------------------------------------------------------- 1 | # TURN to TURN example in Go 2 | 3 | This command line example written in Go shows how to fetch TURN credentials from the Cloudflare API for two PeerConnections. 4 | Then configures two PeerConnection in Pion with the TURN credentials, both set with the relay only policy. 5 | Then it connects the two PeerConnections and estalishes a data channel between the two peers. 6 | 7 | ## Building 8 | 9 | Running `go build` should result in a binary called `turn-go` getting build. 10 | 11 | ## Executing 12 | 13 | Simply invoke the `turn-go` binary with two arguments: the API token and the TURN roken. 14 | You get these two parameters when you create a new TURN application on your Cloudflare dashboard. -------------------------------------------------------------------------------- /sfu-turn-go/README.md: -------------------------------------------------------------------------------- 1 | # TURN to TURN example in Go 2 | 3 | This command line example written in Go shows how to fetch TURN credentials from the Cloudflare API for two PeerConnections. 4 | Then configures two PeerConnection in Pion with the TURN credentials, both set with the relay only policy. 5 | Then it connects the two PeerConnections and estalishes a data channel between the two peers. 6 | 7 | ## Building 8 | 9 | Running `go build` should result in a binary called `turn-go` getting build. 10 | 11 | ## Executing 12 | 13 | Simply invoke the `turn-go` binary with two arguments: the API token and the TURN roken. 14 | You get these two parameters when you create a new TURN application on your Cloudflare dashboard. -------------------------------------------------------------------------------- /.github/workflows/semgrep.yml: -------------------------------------------------------------------------------- 1 | on: 2 | pull_request: {} 3 | workflow_dispatch: {} 4 | push: 5 | branches: 6 | - main 7 | - master 8 | schedule: 9 | - cron: '0 0 * * *' 10 | name: Semgrep config 11 | jobs: 12 | semgrep: 13 | name: semgrep/ci 14 | runs-on: ubuntu-latest 15 | env: 16 | SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }} 17 | SEMGREP_URL: https://cloudflare.semgrep.dev 18 | SEMGREP_APP_URL: https://cloudflare.semgrep.dev 19 | SEMGREP_VERSION_CHECK_URL: https://cloudflare.semgrep.dev/api/check-version 20 | container: 21 | image: returntocorp/semgrep 22 | steps: 23 | - uses: actions/checkout@v4 24 | - run: semgrep ci 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /video-to-jpeg/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cf-realtime-video-to-jpeg", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "deploy": "wrangler deploy", 7 | "dev": "wrangler dev", 8 | "start": "wrangler dev", 9 | "cf-typegen": "wrangler types", 10 | "build:web": "npm run build:web:js", 11 | "build:web:js": "esbuild src/web/app.ts --bundle --minify --sourcemap --outfile=public/assets/app.js", 12 | "watch:web": "esbuild src/web/app.ts --bundle --sourcemap --outfile=public/assets/app.js --watch" 13 | }, 14 | "dependencies": { 15 | "@protobuf-ts/runtime": "^2.11.1" 16 | }, 17 | "devDependencies": { 18 | "typescript": "^5.5.2", 19 | "wrangler": "^4.28.0", 20 | "esbuild": "^0.19.0" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /openai-webrtc-relay/README.md: -------------------------------------------------------------------------------- 1 | # Calls - OpenAI demo 2 | 3 | This is a simple example of how you can set up OpenAI's WebRTC realtime API with Cloudflare Calls. 4 | 5 | ## Configuration 6 | 7 | Please update the environment variables in wrangler.toml 8 | ``` 9 | OPENAI_API_KEY = "" 10 | OPENAI_MODEL_ENDPOINT = "https://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01" 11 | CALLS_BASE_URL = "https://rtc.live.cloudflare.com/v1/apps" 12 | CALLS_APP_ID = "" 13 | CALLS_APP_TOKEN = "" 14 | ``` 15 | 16 | ## How to run it 17 | Install dependencies if you run this for first time: 18 | ``` 19 | npm install --include=dev 20 | ``` 21 | Once everything is in place, run the dev server: 22 | ``` 23 | npm start -- --port 7878 24 | ``` -------------------------------------------------------------------------------- /video-to-jpeg/wrangler.jsonc: -------------------------------------------------------------------------------- 1 | /** 2 | * Wrangler configuration for the WebRTC Video → JPEG demo. 3 | */ 4 | { 5 | "$schema": "node_modules/wrangler/config-schema.json", 6 | "name": "cf-realtime-video-to-jpeg", 7 | "main": "src/index.ts", 8 | "compatibility_date": "2025-08-06", 9 | "migrations": [ 10 | { 11 | "new_sqlite_classes": [ 12 | "VideoAdapter" 13 | ], 14 | "tag": "v1" 15 | } 16 | ], 17 | "assets": { 18 | "directory": "public" 19 | }, 20 | "durable_objects": { 21 | "bindings": [ 22 | { 23 | "class_name": "VideoAdapter", 24 | "name": "VIDEO_ADAPTER" 25 | } 26 | ] 27 | }, 28 | "observability": { 29 | "enabled": true 30 | }, 31 | "vars": { 32 | "SFU_API_BASE": "https://rtc.live.cloudflare.com/v1", 33 | "REALTIME_SFU_APP_ID": "006c9db1a46b95b1b0cec2d9fc63c8d7", 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /tts-ws/wrangler.jsonc: -------------------------------------------------------------------------------- 1 | /** 2 | * For more details on how to configure Wrangler, refer to: 3 | * https://developers.cloudflare.com/workers/wrangler/configuration/ 4 | */ 5 | { 6 | "$schema": "node_modules/wrangler/config-schema.json", 7 | "name": "tts-ws", 8 | "main": "src/index.ts", 9 | "compatibility_date": "2025-08-06", 10 | "migrations": [ 11 | { 12 | "new_sqlite_classes": [ 13 | "AudioSession" 14 | ], 15 | "tag": "v1" 16 | } 17 | ], 18 | "durable_objects": { 19 | "bindings": [ 20 | { 21 | "class_name": "AudioSession", 22 | "name": "AUDIO_SESSION" 23 | } 24 | ] 25 | }, 26 | "observability": { 27 | "enabled": true 28 | }, 29 | "vars": { 30 | "SFU_API_BASE": "https://rtc.live.cloudflare.com/v1", 31 | "ELEVENLABS_VOICE_ID": "21m00Tcm4TlvDq8ikWAM", 32 | "REALTIME_SFU_APP_ID": "c57b0ed7694514a0088a41fb53218173", 33 | } 34 | } -------------------------------------------------------------------------------- /whip-whep-server/README.md: -------------------------------------------------------------------------------- 1 | # WHIP-WHEP Server 2 | 3 | WHIP and WHEP server implemented on top of Calls API 4 | 5 | ## Usage 6 | ### Configuration 7 | The following environment variables must be set in wrangler.toml before running it: 8 | 9 | * CALLS_APP_ID 10 | * CALLS_APP_SECRET 11 | 12 | ### Install dependencies 13 | 14 | ``` 15 | npm install --include=dev 16 | ``` 17 | 18 | ### Run it locally or deploy it to Earth 19 | 20 | To run it locally: 21 | 22 | ``` 23 | npx wrangler dev --local 24 | ``` 25 | 26 | If you want it to run on the Cloudflare network: 27 | 28 | ``` 29 | npx wrangler deploy 30 | ``` 31 | 32 | ### Ingest 33 | The ingest endpoint will look like \/ingest/\ 34 | 35 | Example: http://your-domain.com/ingest/my-live 36 | 37 | ### Play 38 | 39 | The play endpoint will look like \/play/\ 40 | 41 | Example: http://your-domain.com/play/my-live 42 | 43 | ## Bonus: WHEP player 44 | A basic WHEP player can be found under the directory wish-whep-00-player/ 45 | -------------------------------------------------------------------------------- /ai-tts-stt/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cf-realtime-audio", 3 | "version": "1.0.0", 4 | "private": true, 5 | "scripts": { 6 | "deploy": "wrangler deploy", 7 | "dev": "wrangler dev", 8 | "start": "wrangler dev", 9 | "cf-typegen": "wrangler types", 10 | "proto": "protoc --ts_out src --proto_path src src/packet.proto", 11 | "tooling:bootstrap": "bash scripts/bootstrap.sh", 12 | "wasm:build": "bash scripts/build-speexdsp-wasm.sh", 13 | "build:web": "npm run build:web:js && npm run build:web:css", 14 | "build:web:js": "esbuild src/web/app.ts --bundle --minify --sourcemap --outfile=public/assets/player.js", 15 | "build:web:css": "esbuild src/web/styles/player.css --bundle --minify --outfile=public/assets/player.css", 16 | "watch:web": "esbuild src/web/app.ts src/web/styles/player.css --bundle --sourcemap --outdir=public/assets --entry-names=player --watch" 17 | }, 18 | "devDependencies": { 19 | "@protobuf-ts/plugin": "^2.11.1", 20 | "esbuild": "^0.19.0", 21 | "typescript": "^5.5.2", 22 | "wrangler": "^4.28.0" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/generateControls.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Generate (TTS) controls UI component 3 | */ 4 | 5 | import { AppState } from '../types'; 6 | import { UIElements, setButtonLoading, setEnabled } from './dom'; 7 | 8 | export class GenerateControls { 9 | constructor(private elements: UIElements, private onGenerate: () => void) { 10 | this.bindEvents(); 11 | } 12 | 13 | private bindEvents() { 14 | this.elements.generateBtn.addEventListener('click', this.onGenerate); 15 | } 16 | 17 | update(state: AppState) { 18 | const { generateSection, generateBtn } = this.elements; 19 | 20 | // Enable generate section only when published or connected 21 | const isEnabled = state.connectionState === 'published' || state.connectionState === 'connected'; 22 | 23 | setEnabled(generateSection, isEnabled); 24 | 25 | // Reset button loading state 26 | setButtonLoading(generateBtn, false); 27 | } 28 | 29 | getText(): string { 30 | return this.elements.ttsText.value.trim(); 31 | } 32 | 33 | setLoading(loading: boolean) { 34 | setButtonLoading(this.elements.generateBtn, loading); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /ai-tts-stt/wrangler.jsonc: -------------------------------------------------------------------------------- 1 | /** 2 | * For more details on how to configure Wrangler, refer to: 3 | * https://developers.cloudflare.com/workers/wrangler/configuration/ 4 | */ 5 | { 6 | "$schema": "node_modules/wrangler/config-schema.json", 7 | "name": "cf-realtime-audio", 8 | "main": "src/index.ts", 9 | "compatibility_date": "2025-08-06", 10 | "migrations": [ 11 | { 12 | "new_sqlite_classes": [ 13 | "TTSAdapter" 14 | ], 15 | "tag": "v1" 16 | }, 17 | { 18 | "new_sqlite_classes": [ 19 | "STTAdapter" 20 | ], 21 | "tag": "v2" 22 | }, 23 | ], 24 | "assets": { 25 | "directory": "public" 26 | }, 27 | "durable_objects": { 28 | "bindings": [ 29 | { 30 | "class_name": "TTSAdapter", 31 | "name": "TTS_ADAPTER" 32 | }, 33 | { 34 | "class_name": "STTAdapter", 35 | "name": "STT_ADAPTER" 36 | } 37 | ] 38 | }, 39 | "observability": { 40 | "enabled": true 41 | }, 42 | "vars": { 43 | "SFU_API_BASE": "https://rtc.live.cloudflare.com/v1", 44 | "CF_ACCOUNT": "8477399eb04accc1792af96aeaa25222", 45 | "REALTIME_SFU_APP_ID": "66e52edd9e080a5c01c0b4d559fd0e3c", 46 | } 47 | } -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/config.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Shared configuration constants for TTS and STT adapters 3 | */ 4 | 5 | // Model names 6 | export const TTS_MODEL = '@cf/deepgram/aura-1'; 7 | export const STT_MODEL = '@cf/deepgram/nova-3'; 8 | 9 | // Timeouts and intervals 10 | export const DEFAULT_INACTIVITY_TIMEOUT_MS = 10 * 60 * 1000; // 10 minutes 11 | export const DEFAULT_CLEANUP_GRACE_MS = 100; // 100ms delay for last-client cleanup 12 | export const DEFAULT_MAX_RECONNECT_ATTEMPTS = 5; 13 | export const DEFAULT_MAX_RECONNECT_DELAY_MS = 30000; // 30 seconds 14 | 15 | // TTS-specific 16 | export const TTS_BUFFER_CHUNK_SIZE = 16 * 1024; // 16KB - must be less than SFU's 32KB WebSocket message limit 17 | 18 | // STT-specific 19 | export const STT_DEBUG_GRACE_MS = 30 * 1000; // 30 seconds grace when debug-restart with no clients 20 | export const STT_NOVA_KEEPALIVE_MS = 5 * 1000; // 5 seconds 21 | export const STT_MAX_QUEUE_BYTES = 2 * 1024 * 1024; // 2MB safety cap 22 | export const STT_MIN_BATCH_BYTES = 3200; // ~100ms @16kHz mono 16-bit 23 | export const STT_MAX_BATCH_BYTES = 16000; // 500ms @16kHz mono 16-bit 24 | export const STT_MAX_DRAIN_BATCHES_PER_TURN = 8; 25 | export const STT_MAX_DRAIN_SLICE_MS = 10; 26 | -------------------------------------------------------------------------------- /openai-webrtc-relay/test/index.spec.ts: -------------------------------------------------------------------------------- 1 | // test/index.spec.ts 2 | import { env, createExecutionContext, waitOnExecutionContext, SELF } from 'cloudflare:test'; 3 | import { describe, it, expect } from 'vitest'; 4 | import worker from '../src/index'; 5 | 6 | // For now, you'll need to do something like this to get a correctly-typed 7 | // `Request` to pass to `worker.fetch()`. 8 | const IncomingRequest = Request; 9 | 10 | describe('Hello World worker', () => { 11 | it('responds with Hello World! (unit style)', async () => { 12 | const request = new IncomingRequest('http://example.com'); 13 | // Create an empty context to pass to `worker.fetch()`. 14 | const ctx = createExecutionContext(); 15 | const response = await worker.fetch(request, env, ctx); 16 | // Wait for all `Promise`s passed to `ctx.waitUntil()` to settle before running test assertions 17 | await waitOnExecutionContext(ctx); 18 | expect(await response.text()).toMatchInlineSnapshot(`"Hello World!"`); 19 | }); 20 | 21 | it('responds with Hello World! (integration style)', async () => { 22 | const response = await SELF.fetch('https://example.com'); 23 | expect(await response.text()).toMatchInlineSnapshot(`"Hello World!"`); 24 | }); 25 | }); 26 | -------------------------------------------------------------------------------- /turn-go/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/cloudflare/calls-examples/turn-go 2 | 3 | go 1.24.3 4 | 5 | require ( 6 | github.com/davecgh/go-spew v1.1.1 // indirect 7 | github.com/google/uuid v1.3.1 // indirect 8 | github.com/pion/datachannel v1.5.8 // indirect 9 | github.com/pion/dtls/v2 v2.2.12 // indirect 10 | github.com/pion/ice/v2 v2.3.36 // indirect 11 | github.com/pion/interceptor v0.1.29 // indirect 12 | github.com/pion/logging v0.2.2 // indirect 13 | github.com/pion/mdns v0.0.12 // indirect 14 | github.com/pion/randutil v0.1.0 // indirect 15 | github.com/pion/rtcp v1.2.14 // indirect 16 | github.com/pion/rtp v1.8.7 // indirect 17 | github.com/pion/sctp v1.8.19 // indirect 18 | github.com/pion/sdp/v3 v3.0.9 // indirect 19 | github.com/pion/srtp/v2 v2.0.20 // indirect 20 | github.com/pion/stun v0.6.1 // indirect 21 | github.com/pion/transport/v2 v2.2.10 // indirect 22 | github.com/pion/turn/v2 v2.1.6 // indirect 23 | github.com/pion/webrtc/v3 v3.3.5 // indirect 24 | github.com/pmezard/go-difflib v1.0.0 // indirect 25 | github.com/stretchr/testify v1.9.0 // indirect 26 | github.com/wlynxg/anet v0.0.3 // indirect 27 | golang.org/x/crypto v0.21.0 // indirect 28 | golang.org/x/net v0.22.0 // indirect 29 | golang.org/x/sys v0.18.0 // indirect 30 | gopkg.in/yaml.v3 v3.0.1 // indirect 31 | ) 32 | -------------------------------------------------------------------------------- /sfu-turn-go/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/cloudflare/calls-examples/sfu-turn-go 2 | 3 | go 1.24.3 4 | 5 | require ( 6 | github.com/davecgh/go-spew v1.1.1 // indirect 7 | github.com/google/uuid v1.3.1 // indirect 8 | github.com/pion/datachannel v1.5.8 // indirect 9 | github.com/pion/dtls/v2 v2.2.12 // indirect 10 | github.com/pion/ice/v2 v2.3.36 // indirect 11 | github.com/pion/interceptor v0.1.29 // indirect 12 | github.com/pion/logging v0.2.2 // indirect 13 | github.com/pion/mdns v0.0.12 // indirect 14 | github.com/pion/randutil v0.1.0 // indirect 15 | github.com/pion/rtcp v1.2.14 // indirect 16 | github.com/pion/rtp v1.8.7 // indirect 17 | github.com/pion/sctp v1.8.19 // indirect 18 | github.com/pion/sdp/v3 v3.0.9 // indirect 19 | github.com/pion/srtp/v2 v2.0.20 // indirect 20 | github.com/pion/stun v0.6.1 // indirect 21 | github.com/pion/transport/v2 v2.2.10 // indirect 22 | github.com/pion/turn/v2 v2.1.6 // indirect 23 | github.com/pion/webrtc/v3 v3.3.5 // indirect 24 | github.com/pmezard/go-difflib v1.0.0 // indirect 25 | github.com/stretchr/testify v1.9.0 // indirect 26 | github.com/wlynxg/anet v0.0.3 // indirect 27 | golang.org/x/crypto v0.21.0 // indirect 28 | golang.org/x/net v0.22.0 // indirect 29 | golang.org/x/sys v0.18.0 // indirect 30 | gopkg.in/yaml.v3 v3.0.1 // indirect 31 | ) 32 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/types.ts: -------------------------------------------------------------------------------- 1 | export type UserRole = 'player' | 'publisher'; 2 | 3 | export type ConnectionState = 'initial' | 'publishing' | 'published' | 'unpublishing' | 'connecting' | 'connected' | 'disconnected'; 4 | 5 | export interface AppState { 6 | // Core session info 7 | sessionId: string; 8 | userRole: UserRole; 9 | 10 | // Connection state 11 | connectionState: ConnectionState; 12 | isPublished: boolean; 13 | 14 | // TTS settings 15 | selectedSpeaker: string; 16 | 17 | // Publisher UI 18 | publisherTab?: 'tts' | 'stt'; 19 | 20 | // STT state 21 | sttState: { 22 | isMicActive: boolean; 23 | isForwarding: boolean; 24 | pcConnected: boolean; 25 | startTime: number | null; 26 | }; 27 | 28 | // Transcripts 29 | transcripts: Transcript[]; 30 | 31 | // Debug logs 32 | debugLogs: LogEntry[]; 33 | } 34 | 35 | export interface Transcript { 36 | start: number; 37 | text: string; 38 | timestamp: number; 39 | isFinal: boolean; 40 | } 41 | 42 | export interface LogEntry { 43 | timestamp: Date; 44 | message: string; 45 | } 46 | 47 | export interface TranscriptionMessage { 48 | type?: 'transcription' | 'stt_done'; 49 | data?: { 50 | channel?: { 51 | alternatives?: Array<{ 52 | transcript: string; 53 | }>; 54 | }; 55 | is_final?: boolean; 56 | }; 57 | timestamp?: number; 58 | } 59 | 60 | export type StateListener = (state: AppState) => void; 61 | export type StateUpdater = (updates: Partial) => void; 62 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/transcriptionList.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Transcription list UI component 3 | */ 4 | 5 | import { AppState, Transcript } from '../types'; 6 | import { UIElements } from './dom'; 7 | 8 | export class TranscriptionList { 9 | constructor(private elements: UIElements) {} 10 | 11 | update(state: AppState) { 12 | // Only render new transcripts (optimization for large lists) 13 | const container = this.elements.transcriptionContent; 14 | const currentCount = container.children.length; 15 | const newTranscripts = state.transcripts.slice(currentCount); 16 | 17 | newTranscripts.forEach((transcript) => { 18 | this.renderTranscript(transcript); 19 | }); 20 | 21 | // Auto-scroll to bottom 22 | container.scrollTop = container.scrollHeight; 23 | } 24 | 25 | private renderTranscript(transcript: Transcript) { 26 | const line = document.createElement('div'); 27 | line.className = 'transcript-line'; 28 | 29 | const time = new Date(transcript.timestamp); 30 | const timeStr = time.toLocaleTimeString('en-US', { 31 | hour12: false, 32 | hour: '2-digit', 33 | minute: '2-digit', 34 | second: '2-digit', 35 | }); 36 | 37 | line.innerHTML = ` 38 | ${timeStr} 39 | ${transcript.text} 40 | `; 41 | 42 | this.elements.transcriptionContent.appendChild(line); 43 | } 44 | 45 | clear() { 46 | this.elements.transcriptionContent.innerHTML = ''; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/log.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Logging utilities for consistent log formatting across adapters 3 | */ 4 | 5 | // Use types from global scope instead of importing 6 | 7 | /** 8 | * Creates an adapter logger with support for aliasing the DO id to a human-readable session name. 9 | * - aliasOnce(name): logs a one-time mapping and switches future logs to use the name 10 | * - setAliasSilently(name): switches to the name without emitting the mapping log (useful on restore) 11 | */ 12 | export function createAdapterLogger(adapter: 'TTS' | 'STT', id: DurableObjectId) { 13 | const adapterName = `${adapter}Adapter`; 14 | const originalId = id.toString(); 15 | let currentLabel = originalId; 16 | 17 | const format = (message: string) => `[${adapterName}:${currentLabel}] ${message}`; 18 | 19 | return { 20 | log: (message: string, ...args: any[]) => console.log(format(message), ...args), 21 | warn: (message: string, ...args: any[]) => console.warn(format(message), ...args), 22 | error: (message: string, ...args: any[]) => console.error(format(message), ...args), 23 | aliasOnce: (name: string): boolean => { 24 | if (!name || name === currentLabel) return false; 25 | // Announce mapping from the long id to the human-readable session name once 26 | console.log(`[${adapterName}:${originalId}] Durable Object is now known as "${name}"`); 27 | currentLabel = name; 28 | return true; 29 | }, 30 | setAliasSilently: (name: string): boolean => { 31 | if (!name || name === currentLabel) return false; 32 | currentLabel = name; 33 | return true; 34 | }, 35 | } as const; 36 | } 37 | -------------------------------------------------------------------------------- /tts-ws/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */ 4 | 5 | /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ 6 | "target": "es2021", 7 | /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 8 | "lib": ["es2021"], 9 | /* Specify what JSX code is generated. */ 10 | "jsx": "react-jsx", 11 | 12 | /* Specify what module code is generated. */ 13 | "module": "es2022", 14 | /* Specify how TypeScript looks up a file from a given module specifier. */ 15 | "moduleResolution": "node", 16 | /* Enable importing .json files */ 17 | "resolveJsonModule": true, 18 | 19 | /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */ 20 | "allowJs": true, 21 | /* Enable error reporting in type-checked JavaScript files. */ 22 | "checkJs": false, 23 | 24 | /* Disable emitting files from a compilation. */ 25 | "noEmit": true, 26 | 27 | /* Ensure that each file can be safely transpiled without relying on other imports. */ 28 | "isolatedModules": true, 29 | /* Allow 'import x from y' when a module doesn't have a default export. */ 30 | "allowSyntheticDefaultImports": true, 31 | /* Ensure that casing is correct in imports. */ 32 | "forceConsistentCasingInFileNames": true, 33 | 34 | /* Enable all strict type-checking options. */ 35 | "strict": true, 36 | 37 | /* Skip type checking all .d.ts files. */ 38 | "skipLibCheck": true, 39 | "types": [ 40 | "./worker-configuration.d.ts" 41 | ] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /ai-tts-stt/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */ 4 | 5 | /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ 6 | "target": "es2021", 7 | /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 8 | "lib": ["es2021"], 9 | /* Specify what JSX code is generated. */ 10 | "jsx": "react-jsx", 11 | 12 | /* Specify what module code is generated. */ 13 | "module": "es2022", 14 | /* Specify how TypeScript looks up a file from a given module specifier. */ 15 | "moduleResolution": "node", 16 | /* Enable importing .json files */ 17 | "resolveJsonModule": true, 18 | 19 | /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */ 20 | "allowJs": true, 21 | /* Enable error reporting in type-checked JavaScript files. */ 22 | "checkJs": false, 23 | 24 | /* Disable emitting files from a compilation. */ 25 | "noEmit": true, 26 | 27 | /* Ensure that each file can be safely transpiled without relying on other imports. */ 28 | "isolatedModules": true, 29 | /* Allow 'import x from y' when a module doesn't have a default export. */ 30 | "allowSyntheticDefaultImports": true, 31 | /* Ensure that casing is correct in imports. */ 32 | "forceConsistentCasingInFileNames": true, 33 | 34 | /* Enable all strict type-checking options. */ 35 | "strict": true, 36 | 37 | /* Skip type checking all .d.ts files. */ 38 | "skipLibCheck": true, 39 | "types": [ 40 | "./worker-configuration.d.ts" 41 | ] 42 | }, 43 | "exclude": [ 44 | "src/web/**" 45 | ] 46 | } 47 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/listenerControls.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Listener controls UI component 3 | */ 4 | 5 | import { AppState } from '../types'; 6 | import { UIElements, setButtonLoading, setVisible } from './dom'; 7 | 8 | export class ListenerControls { 9 | constructor(private elements: UIElements, private onConnect: () => void, private onDisconnect: () => void) { 10 | this.bindEvents(); 11 | } 12 | 13 | private bindEvents() { 14 | this.elements.connectBtn.addEventListener('click', this.onConnect); 15 | this.elements.disconnectBtn.addEventListener('click', this.onDisconnect); 16 | } 17 | 18 | update(state: AppState) { 19 | const { connectBtn, disconnectBtn, listenerTitle } = this.elements; 20 | 21 | // Update title based on role and active tab (publisher only) 22 | if (state.userRole === 'publisher') { 23 | const activeTab = state.publisherTab ?? 'tts'; 24 | listenerTitle.textContent = activeTab === 'tts' ? 'Step 2: Audio Stream' : 'Audio Stream'; 25 | } else { 26 | listenerTitle.textContent = 'Audio Stream'; 27 | } 28 | 29 | // Reset loading states 30 | setButtonLoading(connectBtn, false); 31 | 32 | switch (state.connectionState) { 33 | case 'initial': 34 | case 'published': 35 | setVisible(connectBtn, true); 36 | setVisible(disconnectBtn, false); 37 | connectBtn.disabled = false; 38 | break; 39 | 40 | case 'connecting': 41 | setVisible(connectBtn, true); 42 | setVisible(disconnectBtn, false); 43 | setButtonLoading(connectBtn, true); 44 | break; 45 | 46 | case 'connected': 47 | setVisible(connectBtn, false); 48 | setVisible(disconnectBtn, true); 49 | break; 50 | 51 | case 'publishing': 52 | case 'unpublishing': 53 | setVisible(connectBtn, true); 54 | setVisible(disconnectBtn, false); 55 | connectBtn.disabled = true; 56 | break; 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /openai-webrtc-relay/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Visit https://aka.ms/tsconfig.json to read more about this file */ 4 | 5 | /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ 6 | "target": "es2021", 7 | /* Specify a set of bundled library declaration files that describe the target runtime environment. */ 8 | "lib": ["es2021"], 9 | /* Specify what JSX code is generated. */ 10 | "jsx": "react-jsx", 11 | 12 | /* Specify what module code is generated. */ 13 | "module": "es2022", 14 | /* Specify how TypeScript looks up a file from a given module specifier. */ 15 | "moduleResolution": "Bundler", 16 | /* Specify type package names to be included without being referenced in a source file. */ 17 | "types": [ 18 | "@cloudflare/workers-types/2023-07-01" 19 | ], 20 | /* Enable importing .json files */ 21 | "resolveJsonModule": true, 22 | 23 | /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */ 24 | "allowJs": true, 25 | /* Enable error reporting in type-checked JavaScript files. */ 26 | "checkJs": false, 27 | 28 | /* Disable emitting files from a compilation. */ 29 | "noEmit": true, 30 | 31 | /* Ensure that each file can be safely transpiled without relying on other imports. */ 32 | "isolatedModules": true, 33 | /* Allow 'import x from y' when a module doesn't have a default export. */ 34 | "allowSyntheticDefaultImports": true, 35 | /* Ensure that casing is correct in imports. */ 36 | "forceConsistentCasingInFileNames": true, 37 | 38 | /* Enable all strict type-checking options. */ 39 | "strict": true, 40 | 41 | /* Skip type checking all .d.ts files. */ 42 | "skipLibCheck": true 43 | }, 44 | "exclude": ["test"], 45 | "include": ["worker-configuration.d.ts", "src/**/*.ts"] 46 | } 47 | -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/ws-connection.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * WebSocket connection management utilities 3 | * Provides deduplication to prevent concurrent connection attempts 4 | */ 5 | 6 | export interface DedupedConnectionParams { 7 | getCurrent: () => T | null; 8 | setCurrent: (ws: T | null) => void; 9 | getCurrentPromise: () => Promise | null; 10 | setCurrentPromise: (promise: Promise | null) => void; 11 | connectFn: () => Promise; 12 | onConnected?: (ws: T) => void; 13 | } 14 | 15 | /** 16 | * Ensures only one WebSocket connection attempt is in flight at a time 17 | * Returns existing open connection or waits for in-flight attempt 18 | */ 19 | export async function dedupedConnect(params: DedupedConnectionParams): Promise { 20 | const { getCurrent, setCurrent, getCurrentPromise, setCurrentPromise, connectFn, onConnected } = params; 21 | 22 | // Check if WebSocket is already open 23 | const current = getCurrent(); 24 | if (current?.readyState === WebSocket.OPEN) { 25 | return current; 26 | } 27 | 28 | // If WebSocket is connecting, wait for the existing connection promise 29 | if (current?.readyState === WebSocket.CONNECTING) { 30 | const currentPromise = getCurrentPromise(); 31 | if (currentPromise) { 32 | return await currentPromise; 33 | } 34 | } 35 | 36 | // Check if there's already a connection promise in flight 37 | const existingPromise = getCurrentPromise(); 38 | if (existingPromise) { 39 | return await existingPromise; 40 | } 41 | 42 | // No connection in progress, create a new one 43 | const promise = connectFn(); 44 | setCurrentPromise(promise); 45 | 46 | try { 47 | const ws = await promise; 48 | setCurrent(ws); 49 | setCurrentPromise(null); 50 | if (onConnected) { 51 | onConnected(ws); 52 | } 53 | return ws; 54 | } catch (error) { 55 | setCurrentPromise(null); 56 | throw error; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/publisherControls.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Publisher controls UI component 3 | */ 4 | 5 | import { AppState } from '../types'; 6 | import { UIElements, setButtonLoading, setVisible } from './dom'; 7 | 8 | export class PublisherControls { 9 | constructor(private elements: UIElements, private onPublish: () => void, private onUnpublish: () => void) { 10 | this.bindEvents(); 11 | } 12 | 13 | private bindEvents() { 14 | this.elements.publishBtn.addEventListener('click', this.onPublish); 15 | this.elements.unpublishBtn.addEventListener('click', this.onUnpublish); 16 | } 17 | 18 | update(state: AppState) { 19 | const { publishBtn, unpublishBtn, speakerSelect } = this.elements; 20 | 21 | // Reset loading states 22 | setButtonLoading(publishBtn, false); 23 | setButtonLoading(unpublishBtn, false); 24 | 25 | switch (state.connectionState) { 26 | case 'initial': 27 | setVisible(publishBtn, true); 28 | setVisible(unpublishBtn, false); 29 | publishBtn.disabled = false; 30 | speakerSelect.disabled = false; 31 | break; 32 | 33 | case 'publishing': 34 | setButtonLoading(publishBtn, true); 35 | setVisible(unpublishBtn, false); 36 | speakerSelect.disabled = true; 37 | break; 38 | 39 | case 'published': 40 | case 'connected': 41 | setVisible(publishBtn, false); 42 | setVisible(unpublishBtn, true); 43 | unpublishBtn.disabled = false; 44 | speakerSelect.disabled = true; 45 | break; 46 | 47 | case 'unpublishing': 48 | setVisible(publishBtn, false); 49 | setVisible(unpublishBtn, true); 50 | setButtonLoading(unpublishBtn, true); 51 | break; 52 | 53 | case 'connecting': 54 | setVisible(publishBtn, false); 55 | setVisible(unpublishBtn, true); 56 | unpublishBtn.disabled = true; 57 | break; 58 | } 59 | } 60 | 61 | getSelectedSpeaker(): string { 62 | return this.elements.speakerSelect.value; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/statusIndicator.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Status indicator UI component 3 | */ 4 | 5 | import { AppState } from '../types'; 6 | import { UIElements } from './dom'; 7 | 8 | export class StatusIndicator { 9 | constructor(private elements: UIElements) {} 10 | 11 | update(state: AppState) { 12 | const { statusIndicator } = this.elements; 13 | 14 | // Remove all status classes 15 | statusIndicator.className = ''; 16 | 17 | // Set status based on state 18 | switch (state.connectionState) { 19 | case 'initial': 20 | statusIndicator.className = 'disconnected'; 21 | statusIndicator.textContent = state.userRole === 'player' ? 'Ready to connect' : 'Not Published'; 22 | break; 23 | case 'publishing': 24 | statusIndicator.className = 'connecting'; 25 | statusIndicator.textContent = 'Publishing...'; 26 | break; 27 | case 'published': 28 | statusIndicator.className = 'disconnected'; 29 | statusIndicator.textContent = 'Published'; 30 | break; 31 | case 'unpublishing': 32 | statusIndicator.className = 'connecting'; 33 | statusIndicator.textContent = 'Unpublishing...'; 34 | break; 35 | case 'connecting': 36 | statusIndicator.className = 'connecting'; 37 | statusIndicator.textContent = 'Connecting...'; 38 | break; 39 | case 'connected': 40 | statusIndicator.className = 'live'; 41 | statusIndicator.textContent = 'Live'; 42 | break; 43 | case 'disconnected': 44 | if (state.isPublished) { 45 | statusIndicator.className = 'disconnected'; 46 | statusIndicator.textContent = 'Published'; 47 | } else { 48 | statusIndicator.className = 'disconnected'; 49 | statusIndicator.textContent = state.userRole === 'player' ? 'Ready to connect' : 'Not Published'; 50 | } 51 | break; 52 | } 53 | 54 | // Add the base id back 55 | statusIndicator.id = 'statusIndicator'; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /video-to-jpeg/src/index.ts: -------------------------------------------------------------------------------- 1 | import playerHtml from "./player.html"; 2 | 3 | // Export Durable Objects from their modules 4 | export { VideoAdapter } from "./video-adapter"; 5 | 6 | /** 7 | * Main Worker handler for the video-to-jpeg demo. 8 | * 9 | * Routes incoming HTTP requests to the appropriate Durable Object instance 10 | * based on the session name (the first URL path segment). 11 | */ 12 | export default { 13 | async fetch(request, env, ctx): Promise { 14 | const url = new URL(request.url); 15 | const pathParts = url.pathname 16 | .substring(1) 17 | .split("/") 18 | .filter((p) => p); 19 | 20 | // Root request handler 21 | if (pathParts.length === 0) { 22 | return new Response( 23 | "Welcome! Use //publisher to publish video or //viewer to see JPEG snapshots.", 24 | { status: 200 } 25 | ); 26 | } 27 | 28 | const sessionName = pathParts[0]; 29 | const action = pathParts.length > 1 ? pathParts[1] : null; 30 | 31 | // Serve UI shell: GET //(publisher|viewer) 32 | if (action && ["publisher", "viewer"].includes(action) && request.method === "GET") { 33 | return new Response(playerHtml, { 34 | headers: { "Content-Type": "text/html;charset=UTF-8" }, 35 | }); 36 | } 37 | 38 | // Debug cleanup: DELETE / 39 | if (!action && request.method === "DELETE") { 40 | const videoId = env.VIDEO_ADAPTER.idFromName(sessionName); 41 | const videoStub = env.VIDEO_ADAPTER.get(videoId); 42 | 43 | ctx.waitUntil( 44 | Promise.allSettled([ 45 | // Durable Object RPC to class method 46 | videoStub.destroy(), 47 | ]) 48 | ); 49 | 50 | return new Response(`Session ${sessionName} destroy signal sent.`, { status: 202 }); 51 | } 52 | 53 | // Route: //video/* - video adapter endpoints (HTTP + WebSocket) 54 | if (action === "video") { 55 | const videoId = env.VIDEO_ADAPTER.idFromName(sessionName); 56 | const videoStub = env.VIDEO_ADAPTER.get(videoId); 57 | return videoStub.fetch(request); 58 | } 59 | 60 | return new Response("Not Found", { status: 404 }); 61 | }, 62 | } satisfies ExportedHandler; 63 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/sttControls.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * STT controls UI component 3 | */ 4 | 5 | import { AppState } from '../types'; 6 | import { UIElements, setButtonLoading, setVisible } from './dom'; 7 | 8 | export class STTControls { 9 | constructor( 10 | private elements: UIElements, 11 | private onStartRecording: () => void, 12 | private onStopRecording: () => void, 13 | private onStartForwarding: () => void, 14 | private onStopForwarding: () => void, 15 | private onClear: () => void, 16 | private onExport: (format: 'vtt' | 'srt') => void, 17 | private onRestartNova: () => void 18 | ) { 19 | this.bindEvents(); 20 | } 21 | 22 | private bindEvents() { 23 | const { startSTTBtn, stopSTTBtn, startForwardingBtn, stopForwardingBtn, clearTranscriptionBtn, exportSubtitlesBtn, restartNovaBtn } = 24 | this.elements; 25 | 26 | startSTTBtn.addEventListener('click', this.onStartRecording); 27 | stopSTTBtn.addEventListener('click', this.onStopRecording); 28 | startForwardingBtn.addEventListener('click', this.onStartForwarding); 29 | stopForwardingBtn.addEventListener('click', this.onStopForwarding); 30 | clearTranscriptionBtn.addEventListener('click', this.onClear); 31 | 32 | exportSubtitlesBtn.addEventListener('click', () => { 33 | const format = confirm('Export as SRT? (Cancel for WebVTT)') ? 'srt' : 'vtt'; 34 | this.onExport(format); 35 | }); 36 | 37 | restartNovaBtn.addEventListener('click', this.onRestartNova); 38 | } 39 | 40 | update(state: AppState) { 41 | const { startSTTBtn, stopSTTBtn, startForwardingBtn, stopForwardingBtn } = this.elements; 42 | 43 | const { isMicActive, isForwarding, pcConnected } = state.sttState; 44 | 45 | // Mic buttons 46 | setVisible(startSTTBtn, !isMicActive); 47 | setVisible(stopSTTBtn, isMicActive); 48 | stopSTTBtn.disabled = isForwarding; // Cannot stop mic while forwarding 49 | 50 | // Forwarding buttons 51 | startForwardingBtn.disabled = !(isMicActive && pcConnected) || isForwarding; 52 | setVisible(startForwardingBtn, !isForwarding); 53 | setVisible(stopForwardingBtn, isForwarding); 54 | } 55 | 56 | setStartRecordingLoading(loading: boolean) { 57 | setButtonLoading(this.elements.startSTTBtn, loading); 58 | } 59 | 60 | setStartForwardingLoading(loading: boolean) { 61 | setButtonLoading(this.elements.startForwardingBtn, loading); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/state-store.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Generic state management utilities for Durable Objects 3 | */ 4 | 5 | // DurableObjectState is available globally 6 | 7 | export interface StateStore { 8 | state: T; 9 | save(): Promise; 10 | restore(): Promise; 11 | update(updates: Partial, skipAlarmReschedule?: boolean): Promise; 12 | deleteKeys(keys: (keyof T)[], skipAlarmReschedule?: boolean): Promise; 13 | } 14 | 15 | /** 16 | * Creates a state store with batched updates and alarm management 17 | */ 18 | export function createStateStore>( 19 | ctx: DurableObjectState, 20 | storageKey: string, 21 | initialState: T, 22 | getDeadlines?: (state: T) => number[] 23 | ): StateStore & { rescheduleAlarm(): Promise } { 24 | const store = { 25 | state: { ...initialState }, 26 | 27 | async save(): Promise { 28 | await ctx.storage.put(storageKey, store.state); 29 | }, 30 | 31 | async restore(): Promise { 32 | const savedState = await ctx.storage.get(storageKey); 33 | if (savedState) { 34 | store.state = { ...initialState, ...savedState }; 35 | } 36 | }, 37 | 38 | /** 39 | * Updates state and optionally reschedules alarm 40 | * @param updates Partial state updates to apply 41 | * @param skipAlarmReschedule Skip alarm rescheduling if true 42 | */ 43 | async update(updates: Partial, skipAlarmReschedule = false): Promise { 44 | Object.assign(store.state, updates); 45 | await store.save(); 46 | if (!skipAlarmReschedule && getDeadlines) { 47 | await store.rescheduleAlarm(); 48 | } 49 | }, 50 | 51 | /** 52 | * Deletes specified keys from state 53 | * @param keys Keys to delete from state 54 | * @param skipAlarmReschedule Skip alarm rescheduling if true 55 | */ 56 | async deleteKeys(keys: (keyof T)[], skipAlarmReschedule = false): Promise { 57 | for (const key of keys) { 58 | delete store.state[key]; 59 | } 60 | await store.save(); 61 | if (!skipAlarmReschedule && getDeadlines) { 62 | await store.rescheduleAlarm(); 63 | } 64 | }, 65 | 66 | async rescheduleAlarm(): Promise { 67 | if (!getDeadlines) return; 68 | 69 | const deadlines = getDeadlines(store.state); 70 | const validDeadlines = deadlines.filter((d) => typeof d === 'number' && d > 0); 71 | 72 | if (validDeadlines.length > 0) { 73 | await ctx.storage.setAlarm(Math.min(...validDeadlines)); 74 | } else { 75 | await ctx.storage.deleteAlarm(); 76 | } 77 | }, 78 | }; 79 | 80 | return store; 81 | } 82 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/services/subtitles.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Subtitle export utilities for VTT and SRT formats 3 | */ 4 | 5 | import { Transcript } from '../types'; 6 | 7 | export class SubtitleExporter { 8 | /** 9 | * Format time for subtitles 10 | */ 11 | private static formatTime(seconds: number, format: 'vtt' | 'srt'): string { 12 | const h = Math.floor(seconds / 3600); 13 | const m = Math.floor((seconds % 3600) / 60); 14 | const s = Math.floor(seconds % 60); 15 | const ms = Math.floor((seconds % 1) * 1000); 16 | 17 | const separator = format === 'srt' ? ',' : '.'; 18 | return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.toString().padStart(2, '0')}${separator}${ms 19 | .toString() 20 | .padStart(3, '0')}`; 21 | } 22 | 23 | /** 24 | * Export transcripts as WebVTT 25 | */ 26 | static exportVTT(transcripts: Transcript[], sessionId: string): void { 27 | const finalTranscripts = transcripts.filter((t) => t.isFinal); 28 | 29 | if (finalTranscripts.length === 0) { 30 | throw new Error('No transcriptions to export'); 31 | } 32 | 33 | let content = 'WEBVTT\n\n'; 34 | 35 | finalTranscripts.forEach((transcript) => { 36 | const start = this.formatTime(transcript.start, 'vtt'); 37 | const end = this.formatTime(transcript.start + 3, 'vtt'); // 3 second duration 38 | 39 | content += `${start} --> ${end}\n`; 40 | content += `${transcript.text}\n\n`; 41 | }); 42 | 43 | this.downloadFile(content, `transcription-${sessionId}.vtt`); 44 | } 45 | 46 | /** 47 | * Export transcripts as SRT 48 | */ 49 | static exportSRT(transcripts: Transcript[], sessionId: string): void { 50 | const finalTranscripts = transcripts.filter((t) => t.isFinal); 51 | 52 | if (finalTranscripts.length === 0) { 53 | throw new Error('No transcriptions to export'); 54 | } 55 | 56 | let content = ''; 57 | 58 | finalTranscripts.forEach((transcript, index) => { 59 | const start = this.formatTime(transcript.start, 'srt'); 60 | const end = this.formatTime(transcript.start + 3, 'srt'); // 3 second duration 61 | 62 | content += `${index + 1}\n`; 63 | content += `${start} --> ${end}\n`; 64 | content += `${transcript.text}\n\n`; 65 | }); 66 | 67 | this.downloadFile(content, `transcription-${sessionId}.srt`); 68 | } 69 | 70 | /** 71 | * Download file to user's computer 72 | */ 73 | private static downloadFile(content: string, filename: string): void { 74 | const blob = new Blob([content], { type: 'text/plain' }); 75 | const url = URL.createObjectURL(blob); 76 | const a = document.createElement('a'); 77 | a.href = url; 78 | a.download = filename; 79 | a.click(); 80 | URL.revokeObjectURL(url); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/audio-utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Audio processing utilities shared between STT and TTS adapters 3 | */ 4 | 5 | import { SpeexResampler } from '../speex-resampler'; 6 | import { AudioProcessor } from '../audio-processor'; 7 | 8 | /** 9 | * Ensures buffer has even byte length for 16-bit PCM 10 | */ 11 | export function ensureEvenBytes(buf: ArrayBuffer): ArrayBuffer { 12 | if (buf.byteLength % 2 !== 0) { 13 | return buf.slice(0, buf.byteLength - 1); 14 | } 15 | return buf; 16 | } 17 | 18 | /** 19 | * Safely initializes a Speex resampler 20 | */ 21 | export function initSpeexResampler(channels: number, inputRate: number, outputRate: number): SpeexResampler | null { 22 | try { 23 | SpeexResampler.ensureWasm(); 24 | return SpeexResampler.tryCreate(channels, inputRate, outputRate); 25 | } catch (e) { 26 | console.warn('Speex resampler init failed:', e); 27 | return null; 28 | } 29 | } 30 | 31 | /** 32 | * STT Helper: Convert stereo 48kHz to mono 16kHz with Speex preference 33 | */ 34 | export function toMono16kFromStereo48k(input48kStereo: ArrayBuffer, speexResampler?: SpeexResampler | null): ArrayBuffer { 35 | if (input48kStereo.byteLength === 0) return input48kStereo; 36 | 37 | // Convert stereo to mono first 38 | let mono48k = AudioProcessor.stereoToMono(input48kStereo); 39 | 40 | // Ensure even byte length 41 | mono48k = ensureEvenBytes(mono48k); 42 | 43 | // Try Speex resampling if available 44 | if (speexResampler) { 45 | try { 46 | const inView = new Int16Array(mono48k); 47 | const outView = speexResampler.processInterleavedInt(inView); 48 | if (outView.length > 0) { 49 | return outView.buffer as ArrayBuffer; 50 | } 51 | } catch (e) { 52 | console.warn('Speex resample failed, using JS fallback:', e); 53 | } 54 | } 55 | 56 | // Fallback to JS downsampler 57 | return AudioProcessor.downsample48kHzTo16kHz(mono48k); 58 | } 59 | 60 | /** 61 | * TTS Helper: Resample mono 24kHz to stereo 48kHz with Speex preference 62 | */ 63 | export function resample24kToStereo48k(input24kMono: ArrayBuffer, speexResampler?: SpeexResampler | null): ArrayBuffer { 64 | if (input24kMono.byteLength === 0) return input24kMono; 65 | 66 | // Ensure even byte length 67 | const evenInput = ensureEvenBytes(input24kMono); 68 | 69 | // Try Speex resampling if available 70 | if (speexResampler) { 71 | try { 72 | const inView = new Int16Array(evenInput); 73 | const outView = speexResampler.processInterleavedInt(inView); 74 | if (outView.length > 0) { 75 | // Convert mono 48kHz to stereo 48kHz 76 | return AudioProcessor.monoToStereo(outView.buffer as ArrayBuffer); 77 | } 78 | } catch (e) { 79 | console.warn('Speex resample failed, using JS fallback:', e); 80 | } 81 | } 82 | 83 | // Fallback to JS pipeline (24k -> 48k resample + mono -> stereo) 84 | return AudioProcessor.processForTTS(evenInput); 85 | } 86 | -------------------------------------------------------------------------------- /tts-ws/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | 3 | logs 4 | _.log 5 | npm-debug.log_ 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | 13 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 14 | 15 | # Runtime data 16 | 17 | pids 18 | _.pid 19 | _.seed 20 | \*.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | 24 | lib-cov 25 | 26 | # Coverage directory used by tools like istanbul 27 | 28 | coverage 29 | \*.lcov 30 | 31 | # nyc test coverage 32 | 33 | .nyc_output 34 | 35 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 36 | 37 | .grunt 38 | 39 | # Bower dependency directory (https://bower.io/) 40 | 41 | bower_components 42 | 43 | # node-waf configuration 44 | 45 | .lock-wscript 46 | 47 | # Compiled binary addons (https://nodejs.org/api/addons.html) 48 | 49 | build/Release 50 | 51 | # Dependency directories 52 | 53 | node_modules/ 54 | jspm_packages/ 55 | 56 | # Snowpack dependency directory (https://snowpack.dev/) 57 | 58 | web_modules/ 59 | 60 | # TypeScript cache 61 | 62 | \*.tsbuildinfo 63 | 64 | # Optional npm cache directory 65 | 66 | .npm 67 | 68 | # Optional eslint cache 69 | 70 | .eslintcache 71 | 72 | # Optional stylelint cache 73 | 74 | .stylelintcache 75 | 76 | # Microbundle cache 77 | 78 | .rpt2_cache/ 79 | .rts2_cache_cjs/ 80 | .rts2_cache_es/ 81 | .rts2_cache_umd/ 82 | 83 | # Optional REPL history 84 | 85 | .node_repl_history 86 | 87 | # Output of 'npm pack' 88 | 89 | \*.tgz 90 | 91 | # Yarn Integrity file 92 | 93 | .yarn-integrity 94 | 95 | # parcel-bundler cache (https://parceljs.org/) 96 | 97 | .cache 98 | .parcel-cache 99 | 100 | # Next.js build output 101 | 102 | .next 103 | out 104 | 105 | # Nuxt.js build / generate output 106 | 107 | .nuxt 108 | dist 109 | 110 | # Gatsby files 111 | 112 | .cache/ 113 | 114 | # Comment in the public line in if your project uses Gatsby and not Next.js 115 | 116 | # https://nextjs.org/blog/next-9-1#public-directory-support 117 | 118 | # public 119 | 120 | # vuepress build output 121 | 122 | .vuepress/dist 123 | 124 | # vuepress v2.x temp and cache directory 125 | 126 | .temp 127 | .cache 128 | 129 | # Docusaurus cache and generated files 130 | 131 | .docusaurus 132 | 133 | # Serverless directories 134 | 135 | .serverless/ 136 | 137 | # FuseBox cache 138 | 139 | .fusebox/ 140 | 141 | # DynamoDB Local files 142 | 143 | .dynamodb/ 144 | 145 | # TernJS port file 146 | 147 | .tern-port 148 | 149 | # Stores VSCode versions used for testing VSCode extensions 150 | 151 | .vscode-test 152 | 153 | # yarn v2 154 | 155 | .yarn/cache 156 | .yarn/unplugged 157 | .yarn/build-state.yml 158 | .yarn/install-state.gz 159 | .pnp.\* 160 | 161 | # wrangler project 162 | 163 | .dev.vars* 164 | !.dev.vars.example 165 | .env* 166 | !.env.example 167 | .wrangler/ 168 | -------------------------------------------------------------------------------- /ai-tts-stt/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | 3 | logs 4 | _.log 5 | npm-debug.log_ 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | 13 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 14 | 15 | # Runtime data 16 | 17 | pids 18 | _.pid 19 | _.seed 20 | \*.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | 24 | lib-cov 25 | 26 | # Coverage directory used by tools like istanbul 27 | 28 | coverage 29 | \*.lcov 30 | 31 | # nyc test coverage 32 | 33 | .nyc_output 34 | 35 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 36 | 37 | .grunt 38 | 39 | # Bower dependency directory (https://bower.io/) 40 | 41 | bower_components 42 | 43 | # node-waf configuration 44 | 45 | .lock-wscript 46 | 47 | # Compiled binary addons (https://nodejs.org/api/addons.html) 48 | 49 | build/Release 50 | 51 | # Dependency directories 52 | 53 | node_modules/ 54 | jspm_packages/ 55 | .wrangler/ 56 | dist/ 57 | .DS_Store 58 | public/assets/ 59 | web_modules/ 60 | 61 | # TypeScript cache 62 | 63 | \*.tsbuildinfo 64 | 65 | # Optional npm cache directory 66 | 67 | .npm 68 | 69 | # Optional eslint cache 70 | 71 | .eslintcache 72 | 73 | # Optional stylelint cache 74 | 75 | .stylelintcache 76 | 77 | # Microbundle cache 78 | 79 | .rpt2_cache/ 80 | .rts2_cache_cjs/ 81 | .rts2_cache_es/ 82 | .rts2_cache_umd/ 83 | 84 | # Optional REPL history 85 | 86 | .node_repl_history 87 | 88 | # Output of 'npm pack' 89 | 90 | \*.tgz 91 | 92 | # Yarn Integrity file 93 | 94 | .yarn-integrity 95 | 96 | # parcel-bundler cache (https://parceljs.org/) 97 | 98 | .cache 99 | .parcel-cache 100 | 101 | # Next.js build output 102 | 103 | .next 104 | out 105 | 106 | # Nuxt.js build / generate output 107 | 108 | .nuxt 109 | dist 110 | 111 | # Gatsby files 112 | 113 | .cache/ 114 | 115 | # Comment in the public line in if your project uses Gatsby and not Next.js 116 | 117 | # https://nextjs.org/blog/next-9-1#public-directory-support 118 | 119 | # public 120 | 121 | # vuepress build output 122 | 123 | .vuepress/dist 124 | 125 | # vuepress v2.x temp and cache directory 126 | 127 | .temp 128 | .cache 129 | 130 | # Docusaurus cache and generated files 131 | 132 | .docusaurus 133 | 134 | # Serverless directories 135 | 136 | .serverless/ 137 | 138 | # FuseBox cache 139 | 140 | .fusebox/ 141 | 142 | # DynamoDB Local files 143 | 144 | .dynamodb/ 145 | 146 | # TernJS port file 147 | 148 | .tern-port 149 | 150 | # Stores VSCode versions used for testing VSCode extensions 151 | 152 | .vscode-test 153 | 154 | # yarn v2 155 | 156 | .yarn/cache 157 | .yarn/unplugged 158 | .yarn/build-state.yml 159 | .yarn/install-state.gz 160 | .pnp.\* 161 | 162 | # wrangler project 163 | 164 | .dev.vars* 165 | !.dev.vars.example 166 | .env* 167 | !.env.example 168 | .wrangler/ 169 | 170 | # Local tooling (emsdk) installed by scripts/bootstrap.sh 171 | .tooling/ 172 | -------------------------------------------------------------------------------- /video-to-jpeg/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | 3 | logs 4 | _.log 5 | npm-debug.log_ 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | 13 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 14 | 15 | # Runtime data 16 | 17 | pids 18 | _.pid 19 | _.seed 20 | \*.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | 24 | lib-cov 25 | 26 | # Coverage directory used by tools like istanbul 27 | 28 | coverage 29 | \*.lcov 30 | 31 | # nyc test coverage 32 | 33 | .nyc_output 34 | 35 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 36 | 37 | .grunt 38 | 39 | # Bower dependency directory (https://bower.io/) 40 | 41 | bower_components 42 | 43 | # node-waf configuration 44 | 45 | .lock-wscript 46 | 47 | # Compiled binary addons (https://nodejs.org/api/addons.html) 48 | 49 | build/Release 50 | 51 | # Dependency directories 52 | 53 | node_modules/ 54 | jspm_packages/ 55 | .wrangler/ 56 | dist/ 57 | .DS_Store 58 | public/assets/ 59 | web_modules/ 60 | 61 | # TypeScript cache 62 | 63 | \*.tsbuildinfo 64 | 65 | # Optional npm cache directory 66 | 67 | .npm 68 | 69 | # Optional eslint cache 70 | 71 | .eslintcache 72 | 73 | # Optional stylelint cache 74 | 75 | .stylelintcache 76 | 77 | # Microbundle cache 78 | 79 | .rpt2_cache/ 80 | .rts2_cache_cjs/ 81 | .rts2_cache_es/ 82 | .rts2_cache_umd/ 83 | 84 | # Optional REPL history 85 | 86 | .node_repl_history 87 | 88 | # Output of 'npm pack' 89 | 90 | \*.tgz 91 | 92 | # Yarn Integrity file 93 | 94 | .yarn-integrity 95 | 96 | # parcel-bundler cache (https://parceljs.org/) 97 | 98 | .cache 99 | .parcel-cache 100 | 101 | # Next.js build output 102 | 103 | .next 104 | out 105 | 106 | # Nuxt.js build / generate output 107 | 108 | .nuxt 109 | dist 110 | 111 | # Gatsby files 112 | 113 | .cache/ 114 | 115 | # Comment in the public line in if your project uses Gatsby and not Next.js 116 | 117 | # https://nextjs.org/blog/next-9-1#public-directory-support 118 | 119 | # public 120 | 121 | # vuepress build output 122 | 123 | .vuepress/dist 124 | 125 | # vuepress v2.x temp and cache directory 126 | 127 | .temp 128 | .cache 129 | 130 | # Docusaurus cache and generated files 131 | 132 | .docusaurus 133 | 134 | # Serverless directories 135 | 136 | .serverless/ 137 | 138 | # FuseBox cache 139 | 140 | .fusebox/ 141 | 142 | # DynamoDB Local files 143 | 144 | .dynamodb/ 145 | 146 | # TernJS port file 147 | 148 | .tern-port 149 | 150 | # Stores VSCode versions used for testing VSCode extensions 151 | 152 | .vscode-test 153 | 154 | # yarn v2 155 | 156 | .yarn/cache 157 | .yarn/unplugged 158 | .yarn/build-state.yml 159 | .yarn/install-state.gz 160 | .pnp.\* 161 | 162 | # wrangler project 163 | 164 | .dev.vars* 165 | !.dev.vars.example 166 | .env* 167 | !.env.example 168 | .wrangler/ 169 | 170 | # Local tooling (emsdk) installed by scripts/bootstrap.sh 171 | .tooling/ 172 | -------------------------------------------------------------------------------- /ai-tts-stt/src/index.ts: -------------------------------------------------------------------------------- 1 | import playerHtml from './player.html'; 2 | 3 | // Export Durable Objects from their separate modules 4 | export { TTSAdapter } from './tts-adapter'; 5 | export { STTAdapter } from './stt-adapter'; 6 | 7 | /** 8 | * Main Worker Handler 9 | * 10 | * Routes incoming HTTP requests to the appropriate Durable Object instance based on the session name 11 | * (the first URL path segment). This ensures that multiple requests with the same session name are handled 12 | * by the same instance, allowing session-specific requests to the correct Durable Object instance. 13 | */ 14 | export default { 15 | async fetch(request, env, ctx): Promise { 16 | const url = new URL(request.url); 17 | const pathParts = url.pathname 18 | .substring(1) 19 | .split('/') 20 | .filter((p) => p); 21 | 22 | // Root request handler. 23 | if (pathParts.length === 0) { 24 | return new Response('Welcome! Use //publisher to control or //player to listen.', { status: 200 }); 25 | } 26 | 27 | const sessionName = pathParts[0]; 28 | const action = pathParts.length > 1 ? pathParts[1] : null; 29 | 30 | // Route: GET //player OR GET //publisher 31 | // These are stateless requests to serve the UI. Both routes serve the same HTML file. 32 | if (action && ['player', 'publisher'].includes(action) && request.method === 'GET') { 33 | return new Response(playerHtml, { 34 | headers: { 'Content-Type': 'text/html;charset=UTF-8' }, 35 | }); 36 | } 37 | 38 | // Route: DELETE / 39 | // Forcibly terminates a session across both TTS and STT adapters and wipes state. 40 | if (!action && request.method === 'DELETE') { 41 | const ttsId = env.TTS_ADAPTER.idFromName(sessionName); 42 | const ttsStub = env.TTS_ADAPTER.get(ttsId); 43 | const sttId = env.STT_ADAPTER.idFromName(sessionName); 44 | const sttStub = env.STT_ADAPTER.get(sttId); 45 | 46 | // Run both destroys concurrently in the background 47 | ctx.waitUntil( 48 | Promise.allSettled([ 49 | // These are Durable Object RPC calls to class methods 50 | // They will create/wake the instances as needed 51 | ttsStub.destroy(), 52 | sttStub.destroy(), 53 | ]) 54 | ); 55 | return new Response(`Session ${sessionName} destroy signal sent.`, { status: 202 }); 56 | } 57 | 58 | // Route: //stt/* - STT endpoints 59 | if (action === 'stt' && pathParts.length > 2) { 60 | const sttAdapterId = env.STT_ADAPTER.idFromName(sessionName); 61 | const sttAdapterStub = env.STT_ADAPTER.get(sttAdapterId); 62 | return await sttAdapterStub.fetch(request); 63 | } 64 | 65 | // All other actions are stateful and must be forwarded to the Durable Object. 66 | if (action && ['publish', 'unpublish', 'connect', 'generate', 'subscribe'].includes(action)) { 67 | const id = env.TTS_ADAPTER.idFromName(sessionName); 68 | const stub = env.TTS_ADAPTER.get(id); 69 | return stub.fetch(request); 70 | } 71 | 72 | return new Response('Not Found', { status: 404 }); 73 | }, 74 | } satisfies ExportedHandler; 75 | -------------------------------------------------------------------------------- /whip-whep-server/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | 3 | logs 4 | _.log 5 | npm-debug.log_ 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | 13 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 14 | 15 | # Runtime data 16 | 17 | pids 18 | _.pid 19 | _.seed 20 | \*.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | 24 | lib-cov 25 | 26 | # Coverage directory used by tools like istanbul 27 | 28 | coverage 29 | \*.lcov 30 | 31 | # nyc test coverage 32 | 33 | .nyc_output 34 | 35 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 36 | 37 | .grunt 38 | 39 | # Bower dependency directory (https://bower.io/) 40 | 41 | bower_components 42 | 43 | # node-waf configuration 44 | 45 | .lock-wscript 46 | 47 | # Compiled binary addons (https://nodejs.org/api/addons.html) 48 | 49 | build/Release 50 | 51 | # Dependency directories 52 | 53 | node_modules/ 54 | jspm_packages/ 55 | 56 | # Snowpack dependency directory (https://snowpack.dev/) 57 | 58 | web_modules/ 59 | 60 | # TypeScript cache 61 | 62 | \*.tsbuildinfo 63 | 64 | # Optional npm cache directory 65 | 66 | .npm 67 | 68 | # Optional eslint cache 69 | 70 | .eslintcache 71 | 72 | # Optional stylelint cache 73 | 74 | .stylelintcache 75 | 76 | # Microbundle cache 77 | 78 | .rpt2_cache/ 79 | .rts2_cache_cjs/ 80 | .rts2_cache_es/ 81 | .rts2_cache_umd/ 82 | 83 | # Optional REPL history 84 | 85 | .node_repl_history 86 | 87 | # Output of 'npm pack' 88 | 89 | \*.tgz 90 | 91 | # Yarn Integrity file 92 | 93 | .yarn-integrity 94 | 95 | # dotenv environment variable files 96 | 97 | .env 98 | .env.development.local 99 | .env.test.local 100 | .env.production.local 101 | .env.local 102 | 103 | # parcel-bundler cache (https://parceljs.org/) 104 | 105 | .cache 106 | .parcel-cache 107 | 108 | # Next.js build output 109 | 110 | .next 111 | out 112 | 113 | # Nuxt.js build / generate output 114 | 115 | .nuxt 116 | dist 117 | 118 | # Gatsby files 119 | 120 | .cache/ 121 | 122 | # Comment in the public line in if your project uses Gatsby and not Next.js 123 | 124 | # https://nextjs.org/blog/next-9-1#public-directory-support 125 | 126 | # public 127 | 128 | # vuepress build output 129 | 130 | .vuepress/dist 131 | 132 | # vuepress v2.x temp and cache directory 133 | 134 | .temp 135 | .cache 136 | 137 | # Docusaurus cache and generated files 138 | 139 | .docusaurus 140 | 141 | # Serverless directories 142 | 143 | .serverless/ 144 | 145 | # FuseBox cache 146 | 147 | .fusebox/ 148 | 149 | # DynamoDB Local files 150 | 151 | .dynamodb/ 152 | 153 | # TernJS port file 154 | 155 | .tern-port 156 | 157 | # Stores VSCode versions used for testing VSCode extensions 158 | 159 | .vscode-test 160 | 161 | # yarn v2 162 | 163 | .yarn/cache 164 | .yarn/unplugged 165 | .yarn/build-state.yml 166 | .yarn/install-state.gz 167 | .pnp.\* 168 | 169 | # wrangler project 170 | 171 | .dev.vars 172 | .wrangler/ 173 | -------------------------------------------------------------------------------- /openai-webrtc-relay/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | 3 | logs 4 | _.log 5 | npm-debug.log_ 6 | yarn-debug.log* 7 | yarn-error.log* 8 | lerna-debug.log* 9 | .pnpm-debug.log* 10 | 11 | # Diagnostic reports (https://nodejs.org/api/report.html) 12 | 13 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 14 | 15 | # Runtime data 16 | 17 | pids 18 | _.pid 19 | _.seed 20 | \*.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | 24 | lib-cov 25 | 26 | # Coverage directory used by tools like istanbul 27 | 28 | coverage 29 | \*.lcov 30 | 31 | # nyc test coverage 32 | 33 | .nyc_output 34 | 35 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 36 | 37 | .grunt 38 | 39 | # Bower dependency directory (https://bower.io/) 40 | 41 | bower_components 42 | 43 | # node-waf configuration 44 | 45 | .lock-wscript 46 | 47 | # Compiled binary addons (https://nodejs.org/api/addons.html) 48 | 49 | build/Release 50 | 51 | # Dependency directories 52 | 53 | node_modules/ 54 | jspm_packages/ 55 | 56 | # Snowpack dependency directory (https://snowpack.dev/) 57 | 58 | web_modules/ 59 | 60 | # TypeScript cache 61 | 62 | \*.tsbuildinfo 63 | 64 | # Optional npm cache directory 65 | 66 | .npm 67 | 68 | # Optional eslint cache 69 | 70 | .eslintcache 71 | 72 | # Optional stylelint cache 73 | 74 | .stylelintcache 75 | 76 | # Microbundle cache 77 | 78 | .rpt2_cache/ 79 | .rts2_cache_cjs/ 80 | .rts2_cache_es/ 81 | .rts2_cache_umd/ 82 | 83 | # Optional REPL history 84 | 85 | .node_repl_history 86 | 87 | # Output of 'npm pack' 88 | 89 | \*.tgz 90 | 91 | # Yarn Integrity file 92 | 93 | .yarn-integrity 94 | 95 | # dotenv environment variable files 96 | 97 | .env 98 | .env.development.local 99 | .env.test.local 100 | .env.production.local 101 | .env.local 102 | 103 | # parcel-bundler cache (https://parceljs.org/) 104 | 105 | .cache 106 | .parcel-cache 107 | 108 | # Next.js build output 109 | 110 | .next 111 | out 112 | 113 | # Nuxt.js build / generate output 114 | 115 | .nuxt 116 | dist 117 | 118 | # Gatsby files 119 | 120 | .cache/ 121 | 122 | # Comment in the public line in if your project uses Gatsby and not Next.js 123 | 124 | # https://nextjs.org/blog/next-9-1#public-directory-support 125 | 126 | # public 127 | 128 | # vuepress build output 129 | 130 | .vuepress/dist 131 | 132 | # vuepress v2.x temp and cache directory 133 | 134 | .temp 135 | .cache 136 | 137 | # Docusaurus cache and generated files 138 | 139 | .docusaurus 140 | 141 | # Serverless directories 142 | 143 | .serverless/ 144 | 145 | # FuseBox cache 146 | 147 | .fusebox/ 148 | 149 | # DynamoDB Local files 150 | 151 | .dynamodb/ 152 | 153 | # TernJS port file 154 | 155 | .tern-port 156 | 157 | # Stores VSCode versions used for testing VSCode extensions 158 | 159 | .vscode-test 160 | 161 | # yarn v2 162 | 163 | .yarn/cache 164 | .yarn/unplugged 165 | .yarn/build-state.yml 166 | .yarn/install-state.gz 167 | .pnp.\* 168 | 169 | # wrangler project 170 | 171 | .dev.vars 172 | .wrangler/ 173 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/services/api.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * API client for TTS and STT endpoints 3 | */ 4 | 5 | export class ApiClient { 6 | constructor(private sessionId: string) {} 7 | 8 | // TTS endpoints 9 | async publish(speaker: string): Promise { 10 | const res = await fetch(`/${this.sessionId}/publish`, { 11 | method: 'POST', 12 | headers: { 'Content-Type': 'application/json' }, 13 | body: JSON.stringify({ speaker }), 14 | }); 15 | if (!res.ok) { 16 | throw new Error(`Failed to publish: ${res.status} ${await res.text()}`); 17 | } 18 | } 19 | 20 | async unpublish(): Promise { 21 | const res = await fetch(`/${this.sessionId}/unpublish`, { 22 | method: 'POST', 23 | }); 24 | if (!res.ok) { 25 | throw new Error(`Failed to unpublish: ${res.status} ${await res.text()}`); 26 | } 27 | } 28 | 29 | async connect(sessionDescription: RTCSessionDescriptionInit): Promise { 30 | const res = await fetch(`/${this.sessionId}/connect`, { 31 | method: 'POST', 32 | headers: { 'Content-Type': 'application/json' }, 33 | body: JSON.stringify({ sessionDescription }), 34 | }); 35 | 36 | if (res.status === 400) { 37 | throw new Error('Session has not been published yet.'); 38 | } 39 | if (!res.ok) { 40 | throw new Error(`Failed to connect: ${res.status} ${await res.text()}`); 41 | } 42 | 43 | const answer = await res.json(); 44 | return answer.sessionDescription; 45 | } 46 | 47 | async generate(text: string): Promise { 48 | const res = await fetch(`/${this.sessionId}/generate`, { 49 | method: 'POST', 50 | headers: { 'Content-Type': 'application/json' }, 51 | body: JSON.stringify({ text }), 52 | }); 53 | if (res.status !== 202) { 54 | throw new Error(`Failed to generate audio: ${res.status} ${await res.text()}`); 55 | } 56 | } 57 | 58 | // STT endpoints 59 | async sttConnect(sessionDescription: RTCSessionDescriptionInit): Promise { 60 | const res = await fetch(`/${this.sessionId}/stt/connect`, { 61 | method: 'POST', 62 | headers: { 'Content-Type': 'application/json' }, 63 | body: JSON.stringify({ sessionDescription }), 64 | }); 65 | 66 | if (!res.ok) { 67 | throw new Error(`STT connection failed: ${res.status} ${await res.text()}`); 68 | } 69 | 70 | const answer = await res.json(); 71 | return answer.sessionDescription; 72 | } 73 | 74 | async sttStartForwarding(): Promise { 75 | const res = await fetch(`/${this.sessionId}/stt/start-forwarding`, { 76 | method: 'POST', 77 | headers: { 'Content-Type': 'application/json' }, 78 | }); 79 | if (!res.ok) { 80 | throw new Error(`Failed to start forwarding: ${await res.text()}`); 81 | } 82 | } 83 | 84 | async sttStopForwarding(): Promise { 85 | const res = await fetch(`/${this.sessionId}/stt/stop-forwarding`, { 86 | method: 'POST', 87 | }); 88 | if (!res.ok) { 89 | throw new Error(`Failed to stop forwarding: ${await res.text()}`); 90 | } 91 | } 92 | 93 | async sttReconnectNova(): Promise { 94 | const res = await fetch(`/${this.sessionId}/stt/reconnect-nova`, { 95 | method: 'POST', 96 | }); 97 | if (!res.ok) { 98 | throw new Error(await res.text()); 99 | } 100 | } 101 | 102 | getTranscriptionStreamUrl(): string { 103 | const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; 104 | return `${protocol}//${window.location.host}/${this.sessionId}/stt/transcription-stream`; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/do-utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Shared utilities for Durable Objects 3 | */ 4 | 5 | import { StateStore } from './state-store'; 6 | 7 | /** 8 | * Builds a deadline aggregator function for use with StateStore 9 | * @param keys Array of state keys that contain deadline values 10 | * @returns Function that extracts all non-undefined deadline values from state 11 | */ 12 | export function buildDeadlineAggregator(keys: (keyof T)[]): (state: T) => number[] { 13 | return (state: T) => { 14 | const deadlines: number[] = []; 15 | for (const key of keys) { 16 | const value = state[key]; 17 | if (typeof value === 'number') { 18 | deadlines.push(value); 19 | } 20 | } 21 | return deadlines; 22 | }; 23 | } 24 | 25 | /** 26 | * Schedules a deferred cleanup check via alarm 27 | * Used to handle DO timing issues where getWebSockets() may include closing sockets 28 | * @param stateStore State store with cleanupDeadline property 29 | * @param graceMs Grace period in milliseconds (default 100ms) 30 | */ 31 | export async function scheduleDeferredCleanup( 32 | stateStore: StateStore, 33 | graceMs: number = 100 34 | ): Promise { 35 | const target = Date.now() + graceMs; 36 | const currentCleanup = stateStore.state.cleanupDeadline; 37 | 38 | // Only update if we don't have a deadline or ours is earlier (with churn guard) 39 | if (!currentCleanup || currentCleanup > target + 250) { 40 | await stateStore.update({ cleanupDeadline: target } as Partial); 41 | } 42 | } 43 | 44 | /** 45 | * Gets WebSockets that are actually open (readyState === OPEN) 46 | * @param ctx Durable Object state context 47 | * @param predicate Optional predicate to filter sockets 48 | * @returns Array of open WebSocket instances 49 | */ 50 | export function getOpenSockets(ctx: DurableObjectState, predicate?: (ws: WebSocket) => boolean): WebSocket[] { 51 | const allSockets = ctx.getWebSockets(); 52 | const openSockets = allSockets.filter((ws) => ws.readyState === WebSocket.OPEN); 53 | 54 | if (predicate) { 55 | return openSockets.filter(predicate); 56 | } 57 | 58 | return openSockets; 59 | } 60 | 61 | /** 62 | * Schedules a reconnection attempt with exponential backoff 63 | * @param stateStore State store with reconnection properties 64 | * @param options Reconnection options 65 | */ 66 | export async function scheduleReconnect< 67 | T extends { 68 | allowReconnect?: boolean; 69 | reconnectAttempts?: number; 70 | reconnectType?: string; 71 | reconnectDeadline?: number; 72 | } 73 | >( 74 | stateStore: StateStore, 75 | options: { 76 | type: string; 77 | maxAttempts?: number; 78 | maxDelayMs?: number; 79 | } 80 | ): Promise { 81 | const { type, maxAttempts = 5, maxDelayMs = 30000 } = options; 82 | 83 | if (!stateStore.state.allowReconnect) { 84 | return; 85 | } 86 | 87 | const currentAttempts = stateStore.state.reconnectAttempts || 0; 88 | const newAttempts = currentAttempts + 1; 89 | 90 | if (newAttempts > maxAttempts) { 91 | return; 92 | } 93 | 94 | const delay = Math.min(1000 * Math.pow(2, newAttempts - 1), maxDelayMs); 95 | const target = Date.now() + delay; 96 | 97 | const updates: Partial = { 98 | reconnectAttempts: newAttempts, 99 | reconnectType: type, 100 | } as Partial; 101 | 102 | // Only update deadline if needed (with churn guard) 103 | const currentDeadline = stateStore.state.reconnectDeadline; 104 | if (!currentDeadline || currentDeadline > target + 250) { 105 | (updates as any).reconnectDeadline = target; 106 | } 107 | 108 | await stateStore.update(updates); 109 | } 110 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/state.ts: -------------------------------------------------------------------------------- 1 | import { AppState, StateListener, StateUpdater } from './types'; 2 | 3 | class StateStore extends EventTarget { 4 | private state: AppState; 5 | private listeners = new Set(); 6 | 7 | constructor(initialState: AppState) { 8 | super(); 9 | this.state = initialState; 10 | } 11 | 12 | getState(): AppState { 13 | return this.state; 14 | } 15 | 16 | setState(updates: Partial) { 17 | const oldState = this.state; 18 | this.state = { ...this.state, ...updates }; 19 | 20 | // Notify listeners 21 | this.listeners.forEach((listener) => listener(this.state)); 22 | 23 | // Emit custom event for specific state changes 24 | this.dispatchEvent( 25 | new CustomEvent('statechange', { 26 | detail: { newState: this.state, oldState, updates }, 27 | }) 28 | ); 29 | } 30 | 31 | subscribe(listener: StateListener): () => void { 32 | this.listeners.add(listener); 33 | // Return unsubscribe function 34 | return () => this.listeners.delete(listener); 35 | } 36 | 37 | log(message: string) { 38 | this.setState({ 39 | debugLogs: [ 40 | ...this.state.debugLogs, 41 | { 42 | timestamp: new Date(), 43 | message, 44 | }, 45 | ], 46 | }); 47 | } 48 | 49 | clearLogs() { 50 | this.setState({ debugLogs: [] }); 51 | } 52 | 53 | addTranscript(transcript: string, isFinal: boolean, timestamp?: number) { 54 | const now = timestamp || Date.now(); 55 | const relativeTime = this.state.sttState.startTime ? (now - this.state.sttState.startTime) / 1000 : 0; 56 | 57 | if (isFinal && transcript.trim()) { 58 | this.setState({ 59 | transcripts: [ 60 | ...this.state.transcripts, 61 | { 62 | start: Math.max(0, relativeTime), 63 | text: transcript.trim(), 64 | timestamp: now, 65 | isFinal: true, 66 | }, 67 | ], 68 | }); 69 | } else { 70 | // Handle interim transcripts differently if needed 71 | this.setState({ 72 | transcripts: [ 73 | ...this.state.transcripts, 74 | { 75 | start: Math.max(0, relativeTime), 76 | text: transcript, 77 | timestamp: now, 78 | isFinal: false, 79 | }, 80 | ], 81 | }); 82 | } 83 | } 84 | 85 | clearTranscripts() { 86 | this.setState({ transcripts: [] }); 87 | } 88 | } 89 | 90 | // Parse URL to get session name and role 91 | function parseUrl(): { sessionId: string; userRole: 'player' | 'publisher' } { 92 | const pathParts = window.location.pathname.split('/').filter((p) => p); 93 | 94 | if (pathParts.length < 2 || !['player', 'publisher'].includes(pathParts[1])) { 95 | throw new Error('Invalid URL. Expected: //player or //publisher'); 96 | } 97 | 98 | return { 99 | sessionId: pathParts[0], 100 | userRole: pathParts[1] as 'player' | 'publisher', 101 | }; 102 | } 103 | 104 | // Initialize state from URL 105 | const urlParams = parseUrl(); 106 | 107 | export const store = new StateStore({ 108 | sessionId: urlParams.sessionId, 109 | userRole: urlParams.userRole, 110 | connectionState: 'initial', 111 | isPublished: false, 112 | selectedSpeaker: 'zeus', 113 | publisherTab: 'tts', 114 | sttState: { 115 | isMicActive: false, 116 | isForwarding: false, 117 | pcConnected: false, 118 | startTime: null, 119 | }, 120 | transcripts: [], 121 | debugLogs: [], 122 | }); 123 | 124 | // Export convenience methods 125 | export const getState = () => store.getState(); 126 | export const setState: StateUpdater = (updates) => store.setState(updates); 127 | export const subscribe = (listener: StateListener) => store.subscribe(listener); 128 | export const log = (message: string) => store.log(message); 129 | -------------------------------------------------------------------------------- /video-to-jpeg/src/player.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Cloudflare Realtime: Video to JPEG 6 | 7 | 84 | 85 | 86 |

Cloudflare Realtime: WebRTC Video to JPEG (Adapter Demo)

87 |

88 | This demo captures your camera with WebRTC, publishes it to the Realtime SFU, and displays 89 | approximately 1 FPS JPEG snapshots streamed via the WebSocket adapter. 90 |

91 | 92 |
93 |
94 |

Publisher (Camera → SFU)

95 |
96 | 97 | 98 | 99 | 100 |
101 |
102 | Local/remote video preview will appear here. 103 |
104 |

Status: Ready.

105 |
106 | 107 |
108 |

Viewer (JPEG Stream)

109 |
110 | JPEG snapshots will appear here. 111 |
112 |

113 | Viewer connects via WS /<session>/video/viewer and receives 114 | binary JPEG frames from the VideoAdapter Durable Object. 115 |

116 |
117 |
118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /ai-tts-stt/src/audio-processor.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * AudioProcessor - Shared audio processing utilities for TTS and STT 3 | * Handles format conversion, resampling, and channel conversion 4 | */ 5 | export class AudioProcessor { 6 | /** 7 | * Converts stereo PCM audio to mono by averaging channels 8 | */ 9 | static stereoToMono(stereoBuffer: ArrayBuffer): ArrayBuffer { 10 | const stereoData = new Int16Array(stereoBuffer); 11 | if (stereoData.length === 0) return stereoBuffer; 12 | 13 | // Stereo has 2 channels, so mono length is half 14 | const monoLength = Math.floor(stereoData.length / 2); 15 | const monoData = new Int16Array(monoLength); 16 | 17 | // Average left and right channels 18 | for (let i = 0; i < monoLength; i++) { 19 | const left = stereoData[i * 2]; 20 | const right = stereoData[i * 2 + 1]; 21 | monoData[i] = Math.round((left + right) / 2); 22 | } 23 | 24 | return monoData.buffer; 25 | } 26 | 27 | /** 28 | * Converts mono PCM audio to stereo by duplicating the channel 29 | */ 30 | static monoToStereo(monoBuffer: ArrayBuffer): ArrayBuffer { 31 | const monoView = new Int16Array(monoBuffer); 32 | const stereoBuffer = new ArrayBuffer(monoBuffer.byteLength * 2); 33 | const stereoView = new Int16Array(stereoBuffer); 34 | 35 | for (let i = 0; i < monoView.length; i++) { 36 | stereoView[i * 2] = monoView[i]; 37 | stereoView[i * 2 + 1] = monoView[i]; 38 | } 39 | 40 | return stereoBuffer; 41 | } 42 | 43 | /** 44 | * Downsamples audio from 48kHz to 16kHz (3x decimation) 45 | * Used for STT preprocessing 46 | */ 47 | static downsample48kHzTo16kHz(audioBuffer: ArrayBuffer): ArrayBuffer { 48 | const sourceData = new Int16Array(audioBuffer); 49 | if (sourceData.length === 0) return audioBuffer; 50 | 51 | // 48kHz to 16kHz is 3:1 decimation - take every 3rd sample 52 | const targetLength = Math.floor(sourceData.length / 3); 53 | const targetData = new Int16Array(targetLength); 54 | 55 | // Simple decimation - take every 3rd sample 56 | // For better quality, could implement a low-pass filter 57 | for (let i = 0; i < targetLength; i++) { 58 | targetData[i] = sourceData[i * 3]; 59 | } 60 | 61 | return targetData.buffer; 62 | } 63 | 64 | /** 65 | * Upsamples audio from 24kHz to 48kHz (2x upsampling) 66 | * Used for TTS postprocessing 67 | */ 68 | static resample24kHzTo48kHz(audioBuffer: ArrayBuffer): ArrayBuffer { 69 | const sourceData = new Int16Array(audioBuffer); 70 | if (sourceData.length === 0) return audioBuffer; 71 | 72 | // 24kHz to 48kHz is exactly 2x upsampling 73 | const targetLength = sourceData.length * 2; 74 | const targetData = new Int16Array(targetLength); 75 | 76 | // Linear interpolation for upsampling 77 | for (let i = 0; i < sourceData.length - 1; i++) { 78 | const sample1 = sourceData[i]; 79 | const sample2 = sourceData[i + 1]; 80 | 81 | // Place original sample 82 | targetData[i * 2] = sample1; 83 | 84 | // Interpolate one sample between each original pair 85 | targetData[i * 2 + 1] = Math.round((sample1 + sample2) / 2); 86 | } 87 | 88 | // Handle last sample 89 | const lastSample = sourceData[sourceData.length - 1]; 90 | targetData[targetLength - 2] = lastSample; 91 | targetData[targetLength - 1] = lastSample; 92 | 93 | return targetData.buffer; 94 | } 95 | 96 | /** 97 | * Combined processing pipeline for STT 98 | * Converts stereo 48kHz to mono 16kHz 99 | */ 100 | static processForSTT(stereo48kHz: ArrayBuffer): ArrayBuffer { 101 | const monoAudio = this.stereoToMono(stereo48kHz); 102 | const downsampledAudio = this.downsample48kHzTo16kHz(monoAudio); 103 | return downsampledAudio; 104 | } 105 | 106 | /** 107 | * Combined processing pipeline for TTS 108 | * Converts mono 24kHz to stereo 48kHz 109 | */ 110 | static processForTTS(mono24kHz: ArrayBuffer): ArrayBuffer { 111 | const resampledAudio = this.resample24kHzTo48kHz(mono24kHz); 112 | const stereoAudio = this.monoToStereo(resampledAudio); 113 | return stereoAudio; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /video-to-jpeg/src/packet.ts: -------------------------------------------------------------------------------- 1 | // @generated by protobuf-ts 2.11.1 2 | // @generated from protobuf file "packet.proto" (syntax proto3) 3 | // tslint:disable 4 | import type { BinaryWriteOptions } from "@protobuf-ts/runtime"; 5 | import type { IBinaryWriter } from "@protobuf-ts/runtime"; 6 | import { WireType } from "@protobuf-ts/runtime"; 7 | import type { BinaryReadOptions } from "@protobuf-ts/runtime"; 8 | import type { IBinaryReader } from "@protobuf-ts/runtime"; 9 | import { UnknownFieldHandler } from "@protobuf-ts/runtime"; 10 | import type { PartialMessage } from "@protobuf-ts/runtime"; 11 | import { reflectionMergePartial } from "@protobuf-ts/runtime"; 12 | import { MessageType } from "@protobuf-ts/runtime"; 13 | /** 14 | * @generated from protobuf message Packet 15 | */ 16 | export interface Packet { 17 | /** 18 | * @generated from protobuf field: uint32 sequenceNumber = 1 19 | */ 20 | sequenceNumber: number; 21 | /** 22 | * @generated from protobuf field: uint32 timestamp = 2 23 | */ 24 | timestamp: number; 25 | /** 26 | * @generated from protobuf field: bytes payload = 5 27 | */ 28 | payload: Uint8Array; 29 | } 30 | // @generated message type with reflection information, may provide speed optimized methods 31 | class Packet$Type extends MessageType { 32 | constructor() { 33 | super("Packet", [ 34 | { no: 1, name: "sequenceNumber", kind: "scalar", T: 13 /*ScalarType.UINT32*/ }, 35 | { no: 2, name: "timestamp", kind: "scalar", T: 13 /*ScalarType.UINT32*/ }, 36 | { no: 5, name: "payload", kind: "scalar", T: 12 /*ScalarType.BYTES*/ } 37 | ]); 38 | } 39 | create(value?: PartialMessage): Packet { 40 | const message = globalThis.Object.create((this.messagePrototype!)); 41 | message.sequenceNumber = 0; 42 | message.timestamp = 0; 43 | message.payload = new Uint8Array(0); 44 | if (value !== undefined) 45 | reflectionMergePartial(this, message, value); 46 | return message; 47 | } 48 | internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: Packet): Packet { 49 | let message = target ?? this.create(), end = reader.pos + length; 50 | while (reader.pos < end) { 51 | let [fieldNo, wireType] = reader.tag(); 52 | switch (fieldNo) { 53 | case /* uint32 sequenceNumber */ 1: 54 | message.sequenceNumber = reader.uint32(); 55 | break; 56 | case /* uint32 timestamp */ 2: 57 | message.timestamp = reader.uint32(); 58 | break; 59 | case /* bytes payload */ 5: 60 | message.payload = reader.bytes(); 61 | break; 62 | default: 63 | let u = options.readUnknownField; 64 | if (u === "throw") 65 | throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`); 66 | let d = reader.skip(wireType); 67 | if (u !== false) 68 | (u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d); 69 | } 70 | } 71 | return message; 72 | } 73 | internalBinaryWrite(message: Packet, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter { 74 | /* uint32 sequenceNumber = 1; */ 75 | if (message.sequenceNumber !== 0) 76 | writer.tag(1, WireType.Varint).uint32(message.sequenceNumber); 77 | /* uint32 timestamp = 2; */ 78 | if (message.timestamp !== 0) 79 | writer.tag(2, WireType.Varint).uint32(message.timestamp); 80 | /* bytes payload = 5; */ 81 | if (message.payload.length) 82 | writer.tag(5, WireType.LengthDelimited).bytes(message.payload); 83 | let u = options.writeUnknownFields; 84 | if (u !== false) 85 | (u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer); 86 | return writer; 87 | } 88 | } 89 | /** 90 | * @generated MessageType for protobuf message Packet 91 | */ 92 | export const Packet = new Packet$Type(); 93 | -------------------------------------------------------------------------------- /tts-ws/src/packet.ts: -------------------------------------------------------------------------------- 1 | // @generated by protobuf-ts 2.11.1 2 | // @generated from protobuf file "packet.proto" (syntax proto3) 3 | // tslint:disable 4 | import type { BinaryWriteOptions } from "@protobuf-ts/runtime"; 5 | import type { IBinaryWriter } from "@protobuf-ts/runtime"; 6 | import { WireType } from "@protobuf-ts/runtime"; 7 | import type { BinaryReadOptions } from "@protobuf-ts/runtime"; 8 | import type { IBinaryReader } from "@protobuf-ts/runtime"; 9 | import { UnknownFieldHandler } from "@protobuf-ts/runtime"; 10 | import type { PartialMessage } from "@protobuf-ts/runtime"; 11 | import { reflectionMergePartial } from "@protobuf-ts/runtime"; 12 | import { MessageType } from "@protobuf-ts/runtime"; 13 | /** 14 | * @generated from protobuf message Packet 15 | */ 16 | export interface Packet { 17 | /** 18 | * @generated from protobuf field: uint32 sequenceNumber = 1 19 | */ 20 | sequenceNumber: number; // not used in Buffer mode 21 | /** 22 | * @generated from protobuf field: uint32 timestamp = 2 23 | */ 24 | timestamp: number; // not used in Buffer mode 25 | /** 26 | * @generated from protobuf field: bytes payload = 5 27 | */ 28 | payload: Uint8Array; 29 | } 30 | // @generated message type with reflection information, may provide speed optimized methods 31 | class Packet$Type extends MessageType { 32 | constructor() { 33 | super("Packet", [ 34 | { no: 1, name: "sequenceNumber", kind: "scalar", T: 13 /*ScalarType.UINT32*/ }, 35 | { no: 2, name: "timestamp", kind: "scalar", T: 13 /*ScalarType.UINT32*/ }, 36 | { no: 5, name: "payload", kind: "scalar", T: 12 /*ScalarType.BYTES*/ } 37 | ]); 38 | } 39 | create(value?: PartialMessage): Packet { 40 | const message = globalThis.Object.create((this.messagePrototype!)); 41 | message.sequenceNumber = 0; 42 | message.timestamp = 0; 43 | message.payload = new Uint8Array(0); 44 | if (value !== undefined) 45 | reflectionMergePartial(this, message, value); 46 | return message; 47 | } 48 | internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: Packet): Packet { 49 | let message = target ?? this.create(), end = reader.pos + length; 50 | while (reader.pos < end) { 51 | let [fieldNo, wireType] = reader.tag(); 52 | switch (fieldNo) { 53 | case /* uint32 sequenceNumber */ 1: 54 | message.sequenceNumber = reader.uint32(); 55 | break; 56 | case /* uint32 timestamp */ 2: 57 | message.timestamp = reader.uint32(); 58 | break; 59 | case /* bytes payload */ 5: 60 | message.payload = reader.bytes(); 61 | break; 62 | default: 63 | let u = options.readUnknownField; 64 | if (u === "throw") 65 | throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`); 66 | let d = reader.skip(wireType); 67 | if (u !== false) 68 | (u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d); 69 | } 70 | } 71 | return message; 72 | } 73 | internalBinaryWrite(message: Packet, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter { 74 | /* uint32 sequenceNumber = 1; */ 75 | if (message.sequenceNumber !== 0) 76 | writer.tag(1, WireType.Varint).uint32(message.sequenceNumber); 77 | /* uint32 timestamp = 2; */ 78 | if (message.timestamp !== 0) 79 | writer.tag(2, WireType.Varint).uint32(message.timestamp); 80 | /* bytes payload = 5; */ 81 | if (message.payload.length) 82 | writer.tag(5, WireType.LengthDelimited).bytes(message.payload); 83 | let u = options.writeUnknownFields; 84 | if (u !== false) 85 | (u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer); 86 | return writer; 87 | } 88 | } 89 | /** 90 | * @generated MessageType for protobuf message Packet 91 | */ 92 | export const Packet = new Packet$Type(); 93 | -------------------------------------------------------------------------------- /ai-tts-stt/src/packet.ts: -------------------------------------------------------------------------------- 1 | // @generated by protobuf-ts 2.11.1 2 | // @generated from protobuf file "packet.proto" (syntax proto3) 3 | // tslint:disable 4 | import type { BinaryWriteOptions } from "@protobuf-ts/runtime"; 5 | import type { IBinaryWriter } from "@protobuf-ts/runtime"; 6 | import { WireType } from "@protobuf-ts/runtime"; 7 | import type { BinaryReadOptions } from "@protobuf-ts/runtime"; 8 | import type { IBinaryReader } from "@protobuf-ts/runtime"; 9 | import { UnknownFieldHandler } from "@protobuf-ts/runtime"; 10 | import type { PartialMessage } from "@protobuf-ts/runtime"; 11 | import { reflectionMergePartial } from "@protobuf-ts/runtime"; 12 | import { MessageType } from "@protobuf-ts/runtime"; 13 | /** 14 | * @generated from protobuf message Packet 15 | */ 16 | export interface Packet { 17 | /** 18 | * @generated from protobuf field: uint32 sequenceNumber = 1 19 | */ 20 | sequenceNumber: number; // not used in Buffer mode 21 | /** 22 | * @generated from protobuf field: uint32 timestamp = 2 23 | */ 24 | timestamp: number; // not used in Buffer mode 25 | /** 26 | * @generated from protobuf field: bytes payload = 5 27 | */ 28 | payload: Uint8Array; 29 | } 30 | // @generated message type with reflection information, may provide speed optimized methods 31 | class Packet$Type extends MessageType { 32 | constructor() { 33 | super("Packet", [ 34 | { no: 1, name: "sequenceNumber", kind: "scalar", T: 13 /*ScalarType.UINT32*/ }, 35 | { no: 2, name: "timestamp", kind: "scalar", T: 13 /*ScalarType.UINT32*/ }, 36 | { no: 5, name: "payload", kind: "scalar", T: 12 /*ScalarType.BYTES*/ } 37 | ]); 38 | } 39 | create(value?: PartialMessage): Packet { 40 | const message = globalThis.Object.create((this.messagePrototype!)); 41 | message.sequenceNumber = 0; 42 | message.timestamp = 0; 43 | message.payload = new Uint8Array(0); 44 | if (value !== undefined) 45 | reflectionMergePartial(this, message, value); 46 | return message; 47 | } 48 | internalBinaryRead(reader: IBinaryReader, length: number, options: BinaryReadOptions, target?: Packet): Packet { 49 | let message = target ?? this.create(), end = reader.pos + length; 50 | while (reader.pos < end) { 51 | let [fieldNo, wireType] = reader.tag(); 52 | switch (fieldNo) { 53 | case /* uint32 sequenceNumber */ 1: 54 | message.sequenceNumber = reader.uint32(); 55 | break; 56 | case /* uint32 timestamp */ 2: 57 | message.timestamp = reader.uint32(); 58 | break; 59 | case /* bytes payload */ 5: 60 | message.payload = reader.bytes(); 61 | break; 62 | default: 63 | let u = options.readUnknownField; 64 | if (u === "throw") 65 | throw new globalThis.Error(`Unknown field ${fieldNo} (wire type ${wireType}) for ${this.typeName}`); 66 | let d = reader.skip(wireType); 67 | if (u !== false) 68 | (u === true ? UnknownFieldHandler.onRead : u)(this.typeName, message, fieldNo, wireType, d); 69 | } 70 | } 71 | return message; 72 | } 73 | internalBinaryWrite(message: Packet, writer: IBinaryWriter, options: BinaryWriteOptions): IBinaryWriter { 74 | /* uint32 sequenceNumber = 1; */ 75 | if (message.sequenceNumber !== 0) 76 | writer.tag(1, WireType.Varint).uint32(message.sequenceNumber); 77 | /* uint32 timestamp = 2; */ 78 | if (message.timestamp !== 0) 79 | writer.tag(2, WireType.Varint).uint32(message.timestamp); 80 | /* bytes payload = 5; */ 81 | if (message.payload.length) 82 | writer.tag(5, WireType.LengthDelimited).bytes(message.payload); 83 | let u = options.writeUnknownFields; 84 | if (u !== false) 85 | (u == true ? UnknownFieldHandler.onWrite : u)(this.typeName, message, writer); 86 | return writer; 87 | } 88 | } 89 | /** 90 | * @generated MessageType for protobuf message Packet 91 | */ 92 | export const Packet = new Packet$Type(); 93 | -------------------------------------------------------------------------------- /ai-tts-stt/scripts/bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # Local toolchain bootstrapper 5 | # Downloads and installs pinned versions of: 6 | # - wasi-sdk (clang/wasm-ld) 7 | # - Binaryen (wasm-opt) 8 | # Usage: 9 | # ./scripts/bootstrap.sh 10 | # export PATH="${PWD}/.tooling/wasi-sdk/bin:${PATH}" 11 | # export PATH="${PWD}/.tooling/binaryen/bin:${PATH}" 12 | # clang --version 13 | # wasm-opt --version 14 | 15 | : "${WASI_SDK_VERSION:=27}" 16 | : "${WASI_SDK_VERSION_MINOR:=0}" 17 | : "${BINARYEN_VERSION:=123}" 18 | : "${TOOLING_DIR:=.tooling}" 19 | 20 | echo "==> Ensuring tooling directory at ${TOOLING_DIR}" 21 | mkdir -p "${TOOLING_DIR}" 22 | 23 | # Detect platform 24 | PLATFORM="" 25 | case "$(uname -s)" in 26 | Darwin) 27 | if [ "$(uname -m)" = "arm64" ]; then 28 | PLATFORM="arm64-macos" 29 | else 30 | PLATFORM="x86_64-macos" 31 | fi 32 | # Binaryen uses same naming for macOS 33 | BINARYEN_PLATFORM="$PLATFORM" 34 | ;; 35 | Linux) 36 | if [ "$(uname -m)" = "x86_64" ]; then 37 | PLATFORM="x86_64-linux" 38 | BINARYEN_PLATFORM="x86_64-linux" 39 | elif [ "$(uname -m)" = "aarch64" ] || [ "$(uname -m)" = "arm64" ]; then 40 | PLATFORM="arm64-linux" 41 | # Binaryen release artifacts use aarch64-linux 42 | BINARYEN_PLATFORM="aarch64-linux" 43 | else 44 | echo "Unsupported Linux architecture: $(uname -m)" 45 | exit 1 46 | fi 47 | ;; 48 | *) 49 | echo "Unsupported platform: $(uname -s)" 50 | exit 1 51 | ;; 52 | esac 53 | 54 | WASI_SDK_DIR="${TOOLING_DIR}/wasi-sdk" 55 | WASI_SDK_FULL_VERSION="${WASI_SDK_VERSION}.${WASI_SDK_VERSION_MINOR}" 56 | BINARYEN_DIR="${TOOLING_DIR}/binaryen" 57 | 58 | if [ ! -d "${WASI_SDK_DIR}" ]; then 59 | echo "==> Downloading wasi-sdk ${WASI_SDK_FULL_VERSION} for ${PLATFORM}" 60 | 61 | # Download URL format: https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-22/wasi-sdk-22.0-darwin-arm64.tar.gz 62 | DOWNLOAD_URL="https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/wasi-sdk-${WASI_SDK_FULL_VERSION}-${PLATFORM}.tar.gz" 63 | 64 | echo " Downloading from: ${DOWNLOAD_URL}" 65 | curl -L -o "${TOOLING_DIR}/wasi-sdk.tar.gz" "${DOWNLOAD_URL}" 66 | 67 | echo "==> Extracting wasi-sdk" 68 | tar -xzf "${TOOLING_DIR}/wasi-sdk.tar.gz" -C "${TOOLING_DIR}" 69 | 70 | # Rename to consistent directory name 71 | mv "${TOOLING_DIR}/wasi-sdk-${WASI_SDK_FULL_VERSION}"* "${WASI_SDK_DIR}" 72 | 73 | # Clean up 74 | rm "${TOOLING_DIR}/wasi-sdk.tar.gz" 75 | 76 | echo "==> wasi-sdk installed successfully" 77 | else 78 | echo "==> wasi-sdk already installed at ${WASI_SDK_DIR}" 79 | fi 80 | 81 | if [ ! -d "${BINARYEN_DIR}" ]; then 82 | echo "==> Downloading Binaryen ${BINARYEN_VERSION} for ${BINARYEN_PLATFORM}" 83 | # Download URL format: https://github.com/WebAssembly/binaryen/releases/download/version_116/binaryen-version_116-arm64-macos.tar.gz 84 | BIN_URL="https://github.com/WebAssembly/binaryen/releases/download/version_${BINARYEN_VERSION}/binaryen-version_${BINARYEN_VERSION}-${BINARYEN_PLATFORM}.tar.gz" 85 | echo " Downloading from: ${BIN_URL}" 86 | curl -L -o "${TOOLING_DIR}/binaryen.tar.gz" "${BIN_URL}" 87 | 88 | echo "==> Extracting Binaryen" 89 | tar -xzf "${TOOLING_DIR}/binaryen.tar.gz" -C "${TOOLING_DIR}" 90 | 91 | # Move extracted directory (binaryen-version_XXX) to a stable path 92 | BIN_EXTRACT_DIR=$(tar -tzf "${TOOLING_DIR}/binaryen.tar.gz" | head -1 | cut -f1 -d"/") 93 | mv "${TOOLING_DIR}/${BIN_EXTRACT_DIR}" "${BINARYEN_DIR}" 94 | 95 | # Clean up 96 | rm "${TOOLING_DIR}/binaryen.tar.gz" 97 | 98 | echo "==> Binaryen installed successfully" 99 | else 100 | echo "==> Binaryen already installed at ${BINARYEN_DIR}" 101 | fi 102 | 103 | cat < wasi-sdk ready. 106 | Add to your PATH for this shell session: 107 | export PATH="${PWD}/${WASI_SDK_DIR}/bin:\${PATH}" 108 | export PATH="${PWD}/${BINARYEN_DIR}/bin:\${PATH}" 109 | 110 | Or use directly: 111 | ${WASI_SDK_DIR}/bin/clang --version 112 | ${BINARYEN_DIR}/bin/wasm-opt --version 113 | 114 | Notes: 115 | - You can override the version via WASI_SDK_VERSION and WASI_SDK_VERSION_MINOR env vars. 116 | - You can override the Binaryen version via BINARYEN_VERSION env var. 117 | - This installs locally under ${TOOLING_DIR} and does not affect system-wide toolchains. 118 | - The SDK includes clang, wasm-ld, and wasi-libc headers/libraries. 119 | - Binaryen provides wasm-opt and related tools useful for post-link optimization. 120 | EOF 121 | -------------------------------------------------------------------------------- /openai-webrtc-relay/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Realtime WebRTC API 5 | 6 | 7 | 8 |
9 | OpenAI API Endpoint:
10 | Receive Text:
11 | Receive Audio:
12 | Instructions:
13 | Voice: 24 |
25 | 26 |
27 | 28 | 36 | 37 |

Logs

38 |
39 | 40 | 41 | 108 | 109 | -------------------------------------------------------------------------------- /whip-whep-server/wish-whep-00-player/static/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 13 | 14 | 15 | 16 |
17 |

Basic WISH WHEP 00 Player

18 |
19 |
20 |
21 |

Remote media

22 | 23 |
24 |
25 | 26 | 81 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/ui/dom.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * DOM element caching and utilities 3 | */ 4 | 5 | export interface UIElements { 6 | // Header 7 | pageTitle: HTMLHeadingElement; 8 | sessionNameDisplay: HTMLSpanElement; 9 | statusIndicator: HTMLDivElement; 10 | 11 | // Publisher tabs 12 | publisherTabs: HTMLDivElement; 13 | tabTTS: HTMLButtonElement; 14 | tabSTT: HTMLButtonElement; 15 | 16 | // Publisher controls 17 | publisherSection: HTMLDivElement; 18 | publishBtn: HTMLButtonElement; 19 | unpublishBtn: HTMLButtonElement; 20 | speakerSelect: HTMLSelectElement; 21 | 22 | // Generate controls 23 | generateSection: HTMLDivElement; 24 | ttsText: HTMLTextAreaElement; 25 | generateBtn: HTMLButtonElement; 26 | 27 | // Listener controls 28 | listenerSection: HTMLDivElement; 29 | listenerTitle: HTMLHeadingElement; 30 | connectBtn: HTMLButtonElement; 31 | disconnectBtn: HTMLButtonElement; 32 | 33 | // STT controls 34 | sttSection: HTMLDivElement; 35 | startSTTBtn: HTMLButtonElement; 36 | stopSTTBtn: HTMLButtonElement; 37 | startForwardingBtn: HTMLButtonElement; 38 | stopForwardingBtn: HTMLButtonElement; 39 | clearTranscriptionBtn: HTMLButtonElement; 40 | exportSubtitlesBtn: HTMLButtonElement; 41 | restartNovaBtn: HTMLButtonElement; 42 | 43 | // Transcription display 44 | transcriptionContent: HTMLDivElement; 45 | 46 | // Media and debug 47 | mediaContainer: HTMLDivElement; 48 | debugArea: HTMLTextAreaElement; 49 | } 50 | 51 | /** 52 | * Cache all UI elements 53 | */ 54 | export function cacheElements(): UIElements { 55 | const getElement = (id: string): T => { 56 | const el = document.getElementById(id); 57 | if (!el) throw new Error(`Element with id "${id}" not found`); 58 | return el as T; 59 | }; 60 | 61 | return { 62 | // Header 63 | pageTitle: getElement('pageTitle'), 64 | sessionNameDisplay: getElement('sessionNameDisplay'), 65 | statusIndicator: getElement('statusIndicator'), 66 | 67 | // Publisher tabs 68 | publisherTabs: getElement('publisherTabs'), 69 | tabTTS: getElement('tabTTS'), 70 | tabSTT: getElement('tabSTT'), 71 | 72 | // Publisher controls 73 | publisherSection: getElement('publisherSection'), 74 | publishBtn: getElement('publishBtn'), 75 | unpublishBtn: getElement('unpublishBtn'), 76 | speakerSelect: getElement('speakerSelect'), 77 | 78 | // Generate controls 79 | generateSection: getElement('generateSection'), 80 | ttsText: getElement('ttsText'), 81 | generateBtn: getElement('generateBtn'), 82 | 83 | // Listener controls 84 | listenerSection: getElement('listenerSection'), 85 | listenerTitle: getElement('listenerTitle'), 86 | connectBtn: getElement('connectBtn'), 87 | disconnectBtn: getElement('disconnectBtn'), 88 | 89 | // STT controls 90 | sttSection: getElement('sttSection'), 91 | startSTTBtn: getElement('startSTTBtn'), 92 | stopSTTBtn: getElement('stopSTTBtn'), 93 | startForwardingBtn: getElement('startForwardingBtn'), 94 | stopForwardingBtn: getElement('stopForwardingBtn'), 95 | clearTranscriptionBtn: getElement('clearTranscriptionBtn'), 96 | exportSubtitlesBtn: getElement('exportSubtitlesBtn'), 97 | restartNovaBtn: getElement('restartNovaBtn'), 98 | 99 | // Transcription display 100 | transcriptionContent: getElement('transcriptionContent'), 101 | 102 | // Media and debug 103 | mediaContainer: getElement('media-container'), 104 | debugArea: getElement('debugArea'), 105 | }; 106 | } 107 | 108 | /** 109 | * Set loading state on button 110 | */ 111 | export function setButtonLoading(button: HTMLButtonElement, loading: boolean) { 112 | if (loading) { 113 | button.classList.add('loading'); 114 | button.disabled = true; 115 | } else { 116 | button.classList.remove('loading'); 117 | button.disabled = false; 118 | } 119 | } 120 | 121 | /** 122 | * Show/hide element 123 | */ 124 | export function setVisible(element: HTMLElement, visible: boolean) { 125 | if (visible) { 126 | element.classList.remove('hidden'); 127 | } else { 128 | element.classList.add('hidden'); 129 | } 130 | } 131 | 132 | /** 133 | * Enable/disable element 134 | */ 135 | export function setEnabled(element: HTMLElement, enabled: boolean) { 136 | if (enabled) { 137 | element.classList.remove('disabled'); 138 | } else { 139 | element.classList.add('disabled'); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /ai-tts-stt/scripts/build-speexdsp-wasm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | # Build SpeexDSP resampler to a standalone WebAssembly module using wasi-sdk. 5 | # Output: src/wasm/speexdsp.wasm (placed under src to enable Wrangler's ESM bundling) 6 | # Usage: 7 | # # Ensure wasi-sdk is installed: 8 | # ./scripts/bootstrap.sh 9 | # export PATH="${PWD}/.tooling/wasi-sdk/bin:${PATH}" 10 | # # Build 11 | # ./scripts/build-speexdsp-wasm.sh 12 | # 13 | # Notes: 14 | # - This script auto-detects Binaryen's wasm-opt from (in order): 15 | # 1) WASM_OPT env var (path to the binary or name on PATH) 16 | # 2) PATH (wasm-opt) 17 | # 3) .tooling/binaryen/bin/wasm-opt 18 | 19 | : "${TOOLING_DIR:=.tooling}" 20 | : "${SPEEXDSP_DIR:=${TOOLING_DIR}/speexdsp}" 21 | : "${SPEEXDSP_TAG:=SpeexDSP-1.2.1}" 22 | : "${OUT_DIR:=src/wasm}" 23 | : "${WASI_SDK_DIR:=${TOOLING_DIR}/wasi-sdk}" 24 | 25 | # Check if wasi-sdk is available 26 | if [ ! -d "${WASI_SDK_DIR}" ]; then 27 | echo "wasi-sdk not found. Run: ./scripts/bootstrap.sh" 28 | exit 1 29 | fi 30 | 31 | WASI_CC="${WASI_SDK_DIR}/bin/clang" 32 | 33 | if [ ! -x "${WASI_CC}" ]; then 34 | echo "wasi-sdk clang not found at ${WASI_CC}" 35 | exit 1 36 | fi 37 | 38 | mkdir -p "${TOOLING_DIR}" "${OUT_DIR}" 39 | 40 | # Clone SpeexDSP 41 | if [ ! -d "${SPEEXDSP_DIR}" ]; then 42 | echo "==> Cloning SpeexDSP" 43 | git clone https://github.com/xiph/speexdsp.git "${SPEEXDSP_DIR}" 44 | (cd "${SPEEXDSP_DIR}" && git checkout "${SPEEXDSP_TAG}") 45 | else 46 | echo "==> Updating SpeexDSP" 47 | (cd "${SPEEXDSP_DIR}" && git fetch --tags && git checkout "${SPEEXDSP_TAG}") 48 | fi 49 | 50 | # SpeexDSP expects config types header. Generate minimal version for wasi-sdk. 51 | CONFIG_TYPES_H="${SPEEXDSP_DIR}/include/speex/speexdsp_config_types.h" 52 | if [ ! -f "${CONFIG_TYPES_H}" ]; then 53 | echo "==> Generating ${CONFIG_TYPES_H} for wasi-sdk" 54 | cat > "${CONFIG_TYPES_H}" <<'EOF' 55 | #ifndef SPEEXDSP_CONFIG_TYPES_H 56 | #define SPEEXDSP_CONFIG_TYPES_H 57 | #include 58 | typedef int16_t spx_int16_t; 59 | typedef uint16_t spx_uint16_t; 60 | typedef int32_t spx_int32_t; 61 | typedef uint32_t spx_uint32_t; 62 | #endif 63 | EOF 64 | fi 65 | 66 | # Build resampler to a standalone WASI module (SIMD-enabled). We export a minimal C ABI plus malloc/free. 67 | "${WASI_CC}" \ 68 | "${SPEEXDSP_DIR}/libspeexdsp/resample.c" \ 69 | -O3 \ 70 | -msimd128 \ 71 | -g0 -ffunction-sections -fdata-sections \ 72 | -I "${SPEEXDSP_DIR}/include" \ 73 | -DFLOATING_POINT \ 74 | -DEXPORT= \ 75 | --target=wasm32-wasi \ 76 | -nostartfiles \ 77 | -Wl,--gc-sections \ 78 | -Wl,--no-entry \ 79 | -Wl,--export=speex_resampler_init \ 80 | -Wl,--export=speex_resampler_process_interleaved_int \ 81 | -Wl,--export=speex_resampler_set_rate \ 82 | -Wl,--export=speex_resampler_destroy \ 83 | -Wl,--export=malloc \ 84 | -Wl,--export=free \ 85 | -Wl,--export=__heap_base \ 86 | -Wl,--export=__data_end \ 87 | -Wl,--strip-debug \ 88 | -Wl,--allow-undefined \ 89 | -o "${OUT_DIR}/speexdsp.wasm" 90 | 91 | # Optional post-link optimization via Binaryen (wasm-opt) 92 | OUT_WASM="${OUT_DIR}/speexdsp.wasm" 93 | 94 | # Resolve wasm-opt location: env var -> PATH -> local tooling dir 95 | WASM_OPT_BIN="" 96 | if [ -n "${WASM_OPT:-}" ]; then 97 | if [ -x "${WASM_OPT}" ]; then 98 | WASM_OPT_BIN="${WASM_OPT}" 99 | elif command -v "${WASM_OPT}" >/dev/null 2>&1; then 100 | WASM_OPT_BIN="$(command -v "${WASM_OPT}")" 101 | fi 102 | fi 103 | if [ -z "${WASM_OPT_BIN}" ] && command -v wasm-opt >/dev/null 2>&1; then 104 | WASM_OPT_BIN="$(command -v wasm-opt)" 105 | fi 106 | if [ -z "${WASM_OPT_BIN}" ] && [ -x "${TOOLING_DIR}/binaryen/bin/wasm-opt" ]; then 107 | WASM_OPT_BIN="${TOOLING_DIR}/binaryen/bin/wasm-opt" 108 | fi 109 | 110 | if [ -n "${WASM_OPT_BIN}" ] && [ -x "${WASM_OPT_BIN}" ]; then 111 | echo "==> Optimizing with wasm-opt at ${WASM_OPT_BIN}" 112 | TMP_WASM="${OUT_DIR}/speexdsp.opt.wasm" 113 | "${WASM_OPT_BIN}" "${OUT_WASM}" -o "${TMP_WASM}" \ 114 | -O3 \ 115 | --strip-debug \ 116 | --strip-producers \ 117 | --enable-simd 118 | mv "${TMP_WASM}" "${OUT_WASM}" 119 | else 120 | echo "==> wasm-opt not found; skipping optimization (run ./scripts/bootstrap.sh to install Binaryen)" 121 | fi 122 | 123 | echo "==> Wrote ${OUT_DIR}/speexdsp.wasm" 124 | 125 | echo "Usage in Workers (ESM import; Wrangler auto-bundles .wasm):" 126 | cat <<'EOF' 127 | 128 | // Example (TypeScript) 129 | // Place this near the top-level module scope so the instance is reused. 130 | import speexWasm from "./wasm/speexdsp.wasm"; 131 | 132 | // WASI modules typically need minimal or no imports for pure computation 133 | const importObject = { 134 | wasi_snapshot_preview1: { 135 | // Empty or minimal stubs - SpeexDSP doesn't need WASI syscalls 136 | } 137 | }; 138 | const { instance } = await WebAssembly.instantiate(speexWasm, importObject); 139 | // instance.exports now contains your exported functions 140 | EOF 141 | -------------------------------------------------------------------------------- /tts-ws/README.md: -------------------------------------------------------------------------------- 1 | # Realtime TTS Audio Streaming with Cloudflare Workers 2 | 3 | This folder demonstrates a realtime text-to-speech (TTS) audio streaming solution using Cloudflare Workers, Durable Objects, and Cloudflare's Realtime serverless SFU. Audio is generated via the ElevenLabs API and streamed to clients, with the worker acting as a secure proxy so **no secrets are ever exposed to the browser**. 4 | 5 | The demo is designed to be fully interactive from the browser after deployment, allowing for multiple, sequential TTS generations within a single session, and it provides two distinct interfaces: a **Publisher Console** for controlling the session and a **Listener Page** for passive listening. 6 | 7 | ## Project Architecture 8 | 9 | * **Cloudflare Worker**: Acts as the secure backend and public entry point. It serves the interactive player, handles API requests, and **proxies calls** to the Cloudflare SFU, keeping all secrets and tokens on the server-side. 10 | * **Durable Object (`AudioSession`)**: A stateful, single-instance object that manages a unique audio stream (e.g., for a session named "live-podcast"). It generates audio using the ElevenLabs API, stores the SFU session state, and pushes the audio stream to the Cloudflare SFU. 11 | * **Cloudflare Realtime SFU**: Ingests the audio stream from the worker and makes it available globally for clients to connect to via WebRTC. 12 | * **`player.html`**: A single-file, user-friendly web page that communicates only with the Cloudflare Worker to manage the session lifecycle. It dynamically renders a view for either the publisher or a listener. 13 | 14 | ## Getting Started 15 | 16 | ### Prerequisites 17 | 18 | * A Cloudflare account with Workers and Durable Objects enabled. 19 | * A Cloudflare Realtime SFU application configured. 20 | * An ElevenLabs account and API key. 21 | * [Node.js](https://nodejs.org/) and npm installed. 22 | * [Wrangler CLI](https://developers.cloudflare.com/workers/wrangler/install-and-update/) installed and configured. 23 | 24 | ### Configuration 25 | 26 | 1. **Clone the Repository and Install Dependencies:** 27 | ```bash 28 | git clone 29 | cd 30 | npm install 31 | ``` 32 | 33 | 2. **Configure `wrangler.jsonc` (or `wrangler.toml`):** 34 | Open your wrangler configuration file and modify the `vars` block with your own Cloudflare and ElevenLabs IDs. 35 | 36 | ```jsonc 37 | { 38 | // ... other wrangler config ... 39 | "vars": { 40 | "SFU_API_BASE": "https://rtc.live.cloudflare.com/v1", 41 | "ELEVENLABS_VOICE_ID": "", 42 | "REALTIME_SFU_APP_ID": "" 43 | } 44 | } 45 | ``` 46 | 47 | 3. **Configure Secrets:** 48 | Use the Wrangler CLI to securely store your API keys and tokens. These are encrypted and never exposed to your code or clients. 49 | 50 | * **ElevenLabs API Key:** 51 | ```bash 52 | npx wrangler secret put ELEVENLABS_API_KEY 53 | ``` 54 | (You will be prompted to enter the key) 55 | 56 | * **Cloudflare SFU Bearer Token:** 57 | ```bash 58 | npx wrangler secret put REALTIME_SFU_BEARER_TOKEN 59 | ``` 60 | (You will be prompted to enter the token) 61 | 62 | ## Deployment & Usage 63 | 64 | 1. **Deploy the Worker:** 65 | Deploy your worker and Durable Object to Cloudflare: 66 | ```bash 67 | npx wrangler deploy 68 | ``` 69 | 70 | 2. **Open the Publisher Console:** 71 | To control the session, navigate to the URL of your deployed worker, adding a unique session ID and `/publisher` to the path. You can make up any session ID you like. 72 | 73 | **Publisher URL:** `https://..workers.dev/my-stream/publisher` 74 | 75 | 3. **Start the Session:** 76 | As the publisher, click the **"Publish Session"** button. This makes the audio stream available for anyone to connect to. 77 | 78 | 4. **Share the Listener Link:** 79 | Share the `/player` URL with anyone who you want to listen to the stream. 80 | 81 | **Listener URL:** `https://..workers.dev/my-stream/player` 82 | 83 | 5. **Listeners Connect:** 84 | Users opening the listener URL will see a simple page with a "Connect and Listen" button. Clicking this will connect them to the audio stream via **Cloudflare Realtime SFU** 85 | 86 | 6. **Generate Speech:** 87 | As the publisher, you can type text into the box and click **"Generate Speech"**. You can do this multiple times. Each time, the newly generated audio will be streamed in realtime to all connected listeners (including yourself, if you've also clicked "Connect and Listen"). 88 | 89 | 7. **Stop the Session:** 90 | As the publisher, you can click **"Unpublish Session"** to completely remove the audio track from Cloudflare's servers. This will disconnect the publisher track, prevent new ones from joining, and reset the UI. 91 | 92 | 8. **End the Session (Forcibly):** 93 | You can also terminate the session and clear all its state on the server by sending a `DELETE` request. This is useful for cleaning up abandoned sessions. 94 | 95 | ```bash 96 | curl -X DELETE https://..workers.dev/my-stream 97 | ``` -------------------------------------------------------------------------------- /video-to-jpeg/src/shared/sfu-utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * SFU (Realtime SFU) helper utilities for the video-to-jpeg demo. 3 | */ 4 | 5 | import { Packet } from "../packet"; 6 | 7 | /** 8 | * Compute the SFU API base URL from environment variables. 9 | */ 10 | export function getSfuApiBase(env: Env): string { 11 | return `${env.SFU_API_BASE}/apps/${env.REALTIME_SFU_APP_ID}`; 12 | } 13 | 14 | /** 15 | * Simple SFU API client used by the VideoAdapter Durable Object. 16 | */ 17 | export class SfuClient { 18 | private env: Env; 19 | 20 | constructor(env: Env) { 21 | this.env = env; 22 | } 23 | 24 | private get base(): string { 25 | return getSfuApiBase(this.env); 26 | } 27 | 28 | private get headers(): Record { 29 | return getSfuAuthHeaders(this.env); 30 | } 31 | 32 | // --- Sessions & Tracks --- 33 | 34 | async createSession(): Promise<{ sessionId: string }> { 35 | const res = await fetch(`${this.base}/sessions/new`, { 36 | method: "POST", 37 | headers: this.headers, 38 | }); 39 | if (!res.ok) { 40 | throw new Error(`SFU createSession failed: ${res.status} ${await res.text()}`); 41 | } 42 | const json = (await res.json()) as any; 43 | const sessionId = json?.sessionId; 44 | if (!sessionId) throw new Error("SFU createSession: sessionId missing in response"); 45 | return { sessionId }; 46 | } 47 | 48 | /** 49 | * Adds tracks to an existing session using autoDiscover from the provided SDP offer. 50 | * Returns the full JSON response and the first video trackName (if present). 51 | */ 52 | async addTracksAutoDiscoverForVideo( 53 | sessionId: string, 54 | sessionDescription: any 55 | ): Promise<{ json: any; videoTrackName?: string }> { 56 | const body = { autoDiscover: true, sessionDescription }; 57 | const res = await fetch(`${this.base}/sessions/${sessionId}/tracks/new`, { 58 | method: "POST", 59 | headers: this.headers, 60 | body: JSON.stringify(body), 61 | }); 62 | if (!res.ok) { 63 | throw new Error(`SFU addTracksAutoDiscover failed: ${res.status} ${await res.text()}`); 64 | } 65 | const json = (await res.json()) as any; 66 | const tracks = json?.tracks ?? []; 67 | const video = tracks.find((t: any) => t.kind === "video"); 68 | const videoTrackName = video?.trackName || tracks[0]?.trackName; 69 | return { json, videoTrackName }; 70 | } 71 | 72 | /** 73 | * Configure a WebSocket adapter in remote (stream) mode. 74 | * Used to stream a remote video track as JPEG frames to a WebSocket endpoint. 75 | */ 76 | async pullTrackToWebSocket( 77 | sessionId: string, 78 | trackName: string, 79 | endpoint: string, 80 | opts?: { outputCodec?: "pcm" | "jpeg" } 81 | ): Promise<{ adapterId?: string; json: any }> { 82 | const body = { 83 | tracks: [ 84 | { 85 | location: "remote", 86 | sessionId, 87 | trackName, 88 | endpoint, 89 | outputCodec: opts?.outputCodec ?? "jpeg", 90 | }, 91 | ], 92 | }; 93 | const res = await fetch(`${this.base}/adapters/websocket/new`, { 94 | method: "POST", 95 | headers: this.headers, 96 | body: JSON.stringify(body), 97 | }); 98 | const text = await res.text(); 99 | if (!res.ok) { 100 | throw new Error(`SFU pullTrackToWebSocket failed: ${res.status} ${text}`); 101 | } 102 | let json: any = {}; 103 | try { 104 | json = JSON.parse(text); 105 | } catch {} 106 | const adapterId = json?.tracks?.[0]?.adapterId as string | undefined; 107 | return { adapterId, json }; 108 | } 109 | 110 | /** 111 | * Idempotent close for WebSocket adapters. 112 | * If SFU returns 503 adapter_not_found, treat as already-closed success. 113 | */ 114 | async closeWebSocketAdapter( 115 | adapterId: string 116 | ): Promise<{ ok: boolean; alreadyClosed: boolean; status: number; text: string }> { 117 | const body = { tracks: [{ adapterId }] }; 118 | const res = await fetch(`${this.base}/adapters/websocket/close`, { 119 | method: "POST", 120 | headers: this.headers, 121 | body: JSON.stringify(body), 122 | }); 123 | const text = await res.text(); 124 | if (res.ok) return { ok: true, alreadyClosed: false, status: res.status, text }; 125 | let alreadyClosed = false; 126 | if (res.status === 503) { 127 | try { 128 | const j = JSON.parse(text); 129 | if (j?.tracks?.[0]?.errorCode === "adapter_not_found") alreadyClosed = true; 130 | } catch {} 131 | } 132 | return { ok: alreadyClosed, alreadyClosed, status: res.status, text }; 133 | } 134 | } 135 | 136 | /** 137 | * Standard SFU authorization headers. 138 | */ 139 | export function getSfuAuthHeaders(env: Env): Record { 140 | return { 141 | Authorization: `Bearer ${env.REALTIME_SFU_BEARER_TOKEN}`, 142 | "Content-Type": "application/json", 143 | }; 144 | } 145 | 146 | /** 147 | * Builds a WebSocket callback URL from an HTTP request and path. 148 | */ 149 | export function buildWsCallbackUrl(request: Request, path: string): string { 150 | const url = new URL(request.url); 151 | url.pathname = path; 152 | url.protocol = url.protocol === "https:" ? "wss:" : "ws:"; 153 | return url.toString(); 154 | } 155 | 156 | /** 157 | * Extract JPEG payload from an SFU Packet. 158 | * Returns a copy of the payload bytes or null if missing. 159 | */ 160 | export function extractJpegFromSfuPacket(packetData: ArrayBuffer): Uint8Array | null { 161 | try { 162 | const packet = Packet.fromBinary(new Uint8Array(packetData)); 163 | if (!packet.payload || packet.payload.length === 0) { 164 | return null; 165 | } 166 | // Return a safe copy of the payload bytes 167 | return new Uint8Array(packet.payload); 168 | } catch (error) { 169 | console.error("Error decoding SFU Packet for video:", error); 170 | return null; 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/services/webrtc.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * WebRTC connection management 3 | */ 4 | 5 | import { log } from '../state'; 6 | 7 | export class WebRTCService { 8 | private peerConnection: RTCPeerConnection | null = null; 9 | private role: 'listener' | 'mic' | null = null; 10 | 11 | /** 12 | * Create a peer connection for listening (pulling audio) 13 | */ 14 | async createListenerConnection(): Promise { 15 | this.closePeerConnection(); 16 | 17 | this.peerConnection = new RTCPeerConnection({ 18 | iceServers: [{ urls: 'stun:stun.cloudflare.com:3478' }], 19 | }); 20 | 21 | // Configure to receive audio only 22 | this.peerConnection.addTransceiver('audio', { direction: 'recvonly' }); 23 | this.role = 'listener'; 24 | // this.attachDebugEventListeners('listener'); 25 | log('🎧 Listener PeerConnection created (recvonly)'); 26 | 27 | return this.peerConnection; 28 | } 29 | 30 | /** 31 | * Create a peer connection for publishing microphone 32 | */ 33 | async createMicConnection(stream: MediaStream): Promise { 34 | this.closePeerConnection(); 35 | 36 | this.peerConnection = new RTCPeerConnection({ 37 | iceServers: [{ urls: 'stun:stun.cloudflare.com:3478' }], 38 | }); 39 | 40 | // Add all tracks from the microphone stream 41 | stream.getTracks().forEach((track) => { 42 | this.peerConnection!.addTrack(track, stream); 43 | }); 44 | this.role = 'mic'; 45 | // this.attachDebugEventListeners('mic'); 46 | log(`🎤 Mic PeerConnection created; added ${stream.getTracks().length} track(s)`); 47 | 48 | return this.peerConnection; 49 | } 50 | 51 | /** 52 | * Create an SDP offer 53 | */ 54 | async createOffer(): Promise { 55 | if (!this.peerConnection) { 56 | throw new Error('No peer connection established'); 57 | } 58 | 59 | log('📝 Creating SDP offer...'); 60 | const offer = await this.peerConnection.createOffer(); 61 | await this.peerConnection.setLocalDescription(offer); 62 | const sdpLen = offer.sdp?.length ?? 0; 63 | log(`📨 Local offer set (SDP length=${sdpLen})`); 64 | return offer; 65 | } 66 | 67 | /** 68 | * Set remote SDP answer 69 | */ 70 | async setRemoteAnswer(answer: RTCSessionDescriptionInit): Promise { 71 | if (!this.peerConnection) { 72 | throw new Error('No peer connection established'); 73 | } 74 | 75 | await this.peerConnection.setRemoteDescription(answer); 76 | const sdpLen = answer.sdp?.length ?? 0; 77 | log(`📬 Remote answer set (SDP length=${sdpLen})`); 78 | } 79 | 80 | /** 81 | * Set up event handlers 82 | */ 83 | onIceConnectionStateChange(callback: (state: RTCIceConnectionState) => void) { 84 | if (!this.peerConnection) return; 85 | 86 | this.peerConnection.oniceconnectionstatechange = () => { 87 | if (this.peerConnection) { 88 | callback(this.peerConnection.iceConnectionState); 89 | } 90 | }; 91 | } 92 | 93 | onConnectionStateChange(callback: (state: RTCPeerConnectionState) => void) { 94 | if (!this.peerConnection) return; 95 | 96 | this.peerConnection.onconnectionstatechange = () => { 97 | if (this.peerConnection) { 98 | callback(this.peerConnection.connectionState); 99 | } 100 | }; 101 | } 102 | 103 | onTrack(callback: (event: RTCTrackEvent) => void) { 104 | if (!this.peerConnection) return; 105 | 106 | this.peerConnection.ontrack = callback; 107 | } 108 | 109 | /** 110 | * Get current connection state 111 | */ 112 | getConnectionState(): RTCPeerConnectionState | null { 113 | return this.peerConnection?.connectionState || null; 114 | } 115 | 116 | /** 117 | * Close the peer connection 118 | */ 119 | closePeerConnection() { 120 | if (this.peerConnection) { 121 | try { 122 | const conn = this.peerConnection; 123 | log(`⏹️ Closing PeerConnection (conn=${conn.connectionState}, ice=${conn.iceConnectionState}, signaling=${conn.signalingState})`); 124 | conn.close(); 125 | } catch {} 126 | this.peerConnection = null; 127 | this.role = null; 128 | } 129 | } 130 | 131 | /** 132 | * Attach debug listeners without overriding consumer-provided on* handlers. 133 | * Uses addEventListener so logs and app callbacks can both run. 134 | */ 135 | private attachDebugEventListeners(role: 'listener' | 'mic') { 136 | const pc = this.peerConnection; 137 | if (!pc) return; 138 | 139 | pc.addEventListener('connectionstatechange', () => { 140 | log(`[webrtc/${role}] connectionstatechange → ${pc.connectionState}`); 141 | }); 142 | pc.addEventListener('iceconnectionstatechange', () => { 143 | log(`[webrtc/${role}] iceconnectionstatechange → ${pc.iceConnectionState}`); 144 | }); 145 | pc.addEventListener('signalingstatechange', () => { 146 | log(`[webrtc/${role}] signalingstatechange → ${pc.signalingState}`); 147 | }); 148 | pc.addEventListener('icegatheringstatechange', () => { 149 | log(`[webrtc/${role}] icegatheringstatechange → ${pc.iceGatheringState}`); 150 | }); 151 | pc.addEventListener('negotiationneeded', () => { 152 | log(`[webrtc/${role}] negotiationneeded`); 153 | }); 154 | pc.addEventListener('icecandidate', (ev: Event) => { 155 | const e = ev as RTCPeerConnectionIceEvent; 156 | if (!e.candidate) { 157 | log(`[webrtc/${role}] icecandidate → null (gathering complete)`); 158 | return; 159 | } 160 | const cand = e.candidate.candidate || ''; 161 | const m = cand.match(/ typ (host|srflx|relay)/); 162 | const typ = m?.[1] ?? 'unknown'; 163 | log(`[webrtc/${role}] icecandidate (${typ})`); 164 | }); 165 | pc.addEventListener('track', (ev: Event) => { 166 | const e = ev as RTCTrackEvent; 167 | const kind = e.track?.kind || 'unknown'; 168 | const streams = e.streams?.length ?? 0; 169 | log(`[webrtc/${role}] ontrack kind=${kind} streams=${streams}`); 170 | }); 171 | } 172 | 173 | /** 174 | * Get microphone stream with optimal settings 175 | */ 176 | static async getMicrophoneStream(): Promise { 177 | return navigator.mediaDevices.getUserMedia({ 178 | audio: { 179 | sampleRate: 48000, 180 | channelCount: 2, 181 | echoCancellation: true, 182 | noiseSuppression: true, 183 | autoGainControl: true, 184 | }, 185 | }); 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /video-to-jpeg/ARCHITECTURE.md: -------------------------------------------------------------------------------- 1 | # Architecture and Technical Details 2 | 3 | This document provides in-depth technical information about the video-to-jpeg demo's architecture, API flows, and implementation details. 4 | 5 | ## High-level Architecture 6 | 7 | ### Components 8 | 9 | - **Cloudflare Worker** (`src/index.ts`) 10 | - Routes requests based on `//...`. 11 | - Serves the static HTML UI from `src/player.html`. 12 | - Forwards all `/⟨session⟩/video/*` requests to the `VideoAdapter` Durable Object instance derived from `session`. 13 | 14 | - **`VideoAdapter` Durable Object** (`src/video-adapter.ts`) 15 | - Owns the state for a single `session` name: 16 | - Realtime SFU session ID. 17 | - Video track name. 18 | - WebSocket adapter ID. 19 | - HTTP endpoints: 20 | - `POST /⟨session⟩/video/connect` — create SFU session, publish video via `autoDiscover`. 21 | - `POST /⟨session⟩/video/start-forwarding` — configure a WebSocket adapter with `outputCodec: "jpeg"`. 22 | - `POST /⟨session⟩/video/stop-forwarding` — close the adapter (idempotent). 23 | - WebSocket endpoints: 24 | - `WS /⟨session⟩/video/sfu-subscribe` — SFU → DO, receives `Packet` messages containing JPEG payloads. 25 | - `WS /⟨session⟩/video/viewer` — DO → browsers, fans out raw JPEG payloads to viewers. 26 | 27 | - **SFU helper** (`src/shared/sfu-utils.ts`) 28 | - Wraps Realtime SFU REST API calls: 29 | - `createSession()` 30 | - `addTracksAutoDiscoverForVideo()` 31 | - `pullTrackToWebSocket()` (with `outputCodec: "jpeg"`) 32 | - `closeWebSocketAdapter()` 33 | - Knows how to decode the `Packet` protobuf and extract JPEG payloads. 34 | 35 | - **Frontend app** (`src/web/app.ts`) 36 | - Publisher: 37 | - WebRTC offer/answer negotiation with the SFU via the Worker. 38 | - Controls start/stop of JPEG streaming via adapter endpoints. 39 | - Renders local camera video and snapshots. 40 | - Viewer: 41 | - Connects to the viewer WebSocket and renders the JPEG frames. 42 | 43 | ## Publisher Workflow (Detailed) 44 | 45 | When you click buttons on the **publisher** page, here's what happens under the hood: 46 | 47 | ### 1. Connect Camera 48 | 49 | 1. The browser requests camera access via `getUserMedia`. 50 | 2. A `RTCPeerConnection` is created with your camera tracks. 51 | 3. The frontend sends an SDP offer to the Worker: 52 | - `POST /my-session/video/connect` with `{ sessionDescription: offer }`. 53 | 4. The `VideoAdapter` Durable Object: 54 | - Creates an SFU session via `POST /v1/apps/{appId}/sessions/new`. 55 | - Publishes your track via `autoDiscover` onto that session. 56 | - Returns an SFU answer, which the frontend sets as the remote description. 57 | 58 | ### 2. Start JPEG Stream 59 | 60 | 1. Frontend calls `POST /my-session/video/start-forwarding`. 61 | 2. `VideoAdapter` configures a **WebSocket adapter** with: 62 | - `location: "remote"` 63 | - `sessionId: ` 64 | - `trackName: ` 65 | - `endpoint: wss://.../my-session/video/sfu-subscribe` 66 | - `outputCodec: "jpeg"` 67 | 3. The Realtime SFU starts sending JPEG frames (~1 FPS) to the Durable Object over WebSocket. 68 | 4. The publisher page opens a viewer WebSocket as well (same as the viewer page) and shows the JPEG snapshots. 69 | 70 | ### 3. Stop JPEG Stream 71 | 72 | - Frontend calls `POST /my-session/video/stop-forwarding`. 73 | - `VideoAdapter` closes the WebSocket adapter via the Realtime SFU API. 74 | 75 | ## Viewer Workflow (Detailed) 76 | 77 | On the **viewer** page (`/my-session/viewer`): 78 | 79 | 1. The frontend opens a WebSocket to: 80 | ``` 81 | ws(s):///my-session/video/viewer 82 | ``` 83 | 84 | 2. The `VideoAdapter`: 85 | - Receives each adapter frame as a `Packet` protobuf from the SFU. 86 | - Extracts the JPEG `payload`. 87 | - Broadcasts the JPEG bytes to all connected viewer sockets. 88 | 89 | 3. The browser receives each message as a binary `Blob`, wraps it in an `ObjectURL`, and assigns it to an `` element. 90 | 91 | If a viewer connects late, the `VideoAdapter` sends the **last stored frame** immediately upon connection so the UI shows something even before the next snapshot arrives. 92 | 93 | ## API Routes Reference 94 | 95 | ### UI pages 96 | 97 | - `GET /⟨session⟩/publisher` — Publisher interface with camera controls 98 | - `GET /⟨session⟩/viewer` — Viewer interface showing JPEG stream 99 | 100 | ### HTTP endpoints (VideoAdapter) 101 | 102 | - `POST /⟨session⟩/video/connect` — Create SFU session and publish camera 103 | - `POST /⟨session⟩/video/start-forwarding` — Start WebSocket adapter with JPEG output 104 | - `POST /⟨session⟩/video/stop-forwarding` — Stop adapter and close connection 105 | 106 | ### WebSocket endpoints 107 | 108 | - `WS /⟨session⟩/video/sfu-subscribe` — SFU → Durable Object (JPEG `Packet` messages) 109 | - `WS /⟨session⟩/video/viewer` — Durable Object → browsers (raw JPEG bytes) 110 | 111 | ### Debug endpoints 112 | 113 | - `DELETE /⟨session⟩` — Calls `VideoAdapter.destroy()` to close all sockets and wipe state for that session. This is unauthenticated for demo purposes; add authentication before using in production. 114 | 115 | ## Protobuf Message Format 116 | 117 | The WebSocket adapter sends video frames as protobuf `Packet` messages: 118 | 119 | ```proto 120 | syntax = "proto3"; 121 | 122 | message Packet { 123 | uint32 sequenceNumber = 1; // sequence number (used for audio; may be unset for video) 124 | uint32 timestamp = 2; // timestamp for synchronization 125 | bytes payload = 5; // media payload (PCM audio or JPEG video) 126 | } 127 | ``` 128 | 129 | For JPEG video, the `payload` field contains the raw JPEG image bytes, which can be directly rendered in a browser `` element or processed further. 130 | 131 | ## Durable Object State Persistence 132 | 133 | The `VideoAdapter` persists the following state across requests: 134 | 135 | - `sfuSessionId` — The Realtime SFU session ID 136 | - `videoTrackName` — The name of the published video track 137 | - `sfuAdapterId` — The WebSocket adapter ID (if active) 138 | - `sessionName` — Human-readable session identifier 139 | 140 | This state is stored in Durable Object storage and survives Worker restarts. The "Reset Session" button calls `DELETE /` to clear this state. 141 | 142 | ## Video Processing Pipeline 143 | 144 | ``` 145 | Camera (Browser) 146 | ↓ WebRTC 147 | Realtime SFU 148 | ↓ WebSocket Adapter (outputCodec: "jpeg") 149 | VideoAdapter Durable Object 150 | ↓ WebSocket (binary JPEG) 151 | Viewer Browsers 152 | ``` 153 | 154 | The Realtime SFU transcodes the incoming video stream to JPEG at approximately 1 FPS and sends each frame as a protobuf `Packet` to the Durable Object, which broadcasts it to all connected viewers. 155 | -------------------------------------------------------------------------------- /video-to-jpeg/README.md: -------------------------------------------------------------------------------- 1 | # Cloudflare Realtime Video: WebRTC → JPEG Demo 2 | 3 | Stream your camera through the **Cloudflare Realtime SFU** and view it as JPEG snapshots (~1 FPS) using the **WebSocket media transport adapter**. 4 | 5 | After deployment, you can share a session with others entirely from your browser. It provides two interfaces: a **Publisher** page for camera control and a **Viewer** page for watching the JPEG stream. 6 | 7 | ## Quickstart (Deploy First) 8 | 9 | > **Important:** We recommend deploying to Cloudflare Workers first before trying local development. The Realtime SFU needs to connect back to your Worker via WebSocket, which doesn't work with `localhost`. 10 | 11 | 1. Clone and install: 12 | ```bash 13 | npm install 14 | ``` 15 | 16 | 2. Configure `wrangler.jsonc` vars and add secrets (see Configuration below). 17 | 18 | 3. Build and deploy: 19 | ```bash 20 | npm run build:web 21 | npx wrangler deploy 22 | ``` 23 | 24 | 4. Open in your browser: 25 | - Publisher: `https://.workers.dev//publisher` 26 | - Viewer: `https://.workers.dev//viewer` 27 | 28 | ## How It Works 29 | 30 | This demo uses the Realtime SFU's **WebSocket adapter** to convert WebRTC video into JPEG frames that can be processed in a Worker. 31 | 32 | - **Cloudflare Worker**: Serves the UI and handles API requests. All secrets stay on the server. 33 | - **Cloudflare Realtime SFU**: Receives your camera stream via WebRTC and converts video to JPEG frames (~1 FPS). 34 | - **`VideoAdapter` (Durable Object)**: Manages each session. Receives JPEG frames from the SFU and broadcasts them to viewer WebSockets. [See technical details](./ARCHITECTURE.md). 35 | - **Frontend UI**: A TypeScript app bundled with `esbuild` that handles camera access, WebRTC connections, and displays JPEG snapshots. 36 | 37 | ## Getting Started 38 | 39 | ### Prerequisites 40 | 41 | - A Cloudflare account with **Workers**, **Durable Objects**, and **Realtime SFU** enabled. 42 | - A configured **Realtime SFU application**. 43 | - A **Realtime SFU API bearer token**. 44 | - [Node.js](https://nodejs.org/) and npm. 45 | - The [Wrangler CLI](https://developers.cloudflare.com/workers/wrangler/install-and-update/). 46 | 47 | ### Configuration 48 | 49 | 1. **Clone and Install:** 50 | ```bash 51 | git clone 52 | cd calls-examples/video-to-jpeg 53 | npm install 54 | ``` 55 | 56 | 2. **Set up `wrangler.jsonc`:** 57 | Open the file and add your SFU app ID to the `vars` block: 58 | 59 | ```jsonc 60 | { 61 | // ... other wrangler config ... 62 | "vars": { 63 | "SFU_API_BASE": "https://rtc.live.cloudflare.com/v1", 64 | "REALTIME_SFU_APP_ID": "" 65 | } 66 | } 67 | ``` 68 | 69 | 3. **Add Your Secret:** 70 | Use Wrangler to store your Realtime SFU bearer token securely: 71 | 72 | ```bash 73 | npx wrangler secret put REALTIME_SFU_BEARER_TOKEN 74 | ``` 75 | 76 | This token is used only from the Worker to the Realtime SFU API; it's never exposed to the browser. 77 | 78 | ## Deploy and Use 79 | 80 | 1. **Build the frontend:** 81 | ```bash 82 | npm run build:web 83 | ``` 84 | 85 | 2. **Deploy to Cloudflare Workers:** 86 | ```bash 87 | npx wrangler deploy 88 | ``` 89 | 90 | 3. **Open the Publisher page:** 91 | Navigate to your deployed Worker's URL with any unique session name: 92 | ``` 93 | https://.workers.dev//publisher 94 | ``` 95 | 96 | 4. **Start streaming:** 97 | - Click **Connect Camera** to allow camera access and establish WebRTC. 98 | - Click **Start JPEG Stream** to begin streaming JPEG snapshots. 99 | - You'll see your camera feed and the JPEG snapshots side-by-side. 100 | 101 | 5. **Share the Viewer link:** 102 | Send the viewer URL to anyone you want to share with: 103 | ``` 104 | https://.workers.dev//viewer 105 | ``` 106 | They'll see the JPEG stream without needing camera access. 107 | 108 | 6. **Stop streaming:** 109 | - Click **Stop JPEG Stream** to stop the adapter. 110 | - Click **Reset Session** to clear all state and start fresh. 111 | 112 | > **URL patterns:** 113 | > - Publisher: `//publisher` 114 | > - Viewer: `//viewer` 115 | 116 | ## Local Development (Advanced) 117 | 118 | > **⚠️ Localhost Limitation:** The Realtime SFU cannot connect back to `localhost` WebSockets, so the JPEG adapter won't work locally. **We strongly recommend deploying to Workers first** to test the full functionality. 119 | 120 | If you still want to develop locally (e.g., for UI changes): 121 | 122 | 1. **Start the frontend watcher:** 123 | ```bash 124 | npm run watch:web 125 | ``` 126 | 127 | 2. **Start the Worker:** 128 | ```bash 129 | npm run dev 130 | ``` 131 | 132 | 3. **Open in browser:** 133 | ``` 134 | http://localhost:8787//publisher 135 | ``` 136 | 137 | Note: WebRTC and the UI will work, but the JPEG streaming via the adapter will fail because the SFU can't reach your local machine. 138 | 139 | For technical details about the architecture, API flows, and implementation, see [ARCHITECTURE.md](./ARCHITECTURE.md). 140 | 141 | ## Troubleshooting 142 | 143 | ### "Forwarding already active" error 144 | 145 | - Click the **Reset Session** button on the publisher page to clear the Durable Object state. 146 | - Alternatively, send a DELETE request: `curl -X DELETE https://.workers.dev/` 147 | 148 | ### Camera not working 149 | 150 | - Check your browser's permissions for camera access. 151 | - Try a different browser or device. 152 | - Ensure you're using HTTPS (required for camera access, except on localhost). 153 | 154 | ### No JPEG frames appearing 155 | 156 | - Make sure you clicked **Start JPEG Stream** after connecting the camera. 157 | - Check the browser console for WebSocket errors. 158 | - If testing locally, remember that the SFU can't connect to localhost—deploy to Workers instead. 159 | 160 | ### Stale UI or 404s on assets 161 | 162 | - Run `npm run build:web` to rebuild the frontend. 163 | - Check that `wrangler.jsonc` has `"assets": { "directory": "public" }`. 164 | - Try a hard refresh in your browser. 165 | 166 | ### Deployed changes not showing up 167 | 168 | - Ensure you ran `npm run build:web` before `npx wrangler deploy`. 169 | - Hard refresh your browser to clear cached assets. 170 | 171 | ## Notes and Limitations 172 | 173 | - Video is streamed as **JPEG at approximately 1 FPS** (beta behavior). 174 | - This is a **reference demo** and should not be used in production without: 175 | - Authentication and authorization 176 | - Error handling and monitoring 177 | - Rate limiting and resource controls 178 | 179 | For more details, see [ARCHITECTURE.md](./ARCHITECTURE.md). 180 | -------------------------------------------------------------------------------- /ai-tts-stt/src/speex-resampler.ts: -------------------------------------------------------------------------------- 1 | import speexWasm from './wasm/speexdsp.wasm'; 2 | 3 | // Minimal typings for the WASI module exports 4 | interface SpeexExports { 5 | memory: WebAssembly.Memory; 6 | speex_resampler_init: (nb_channels: number, in_rate: number, out_rate: number, quality: number, errPtr: number) => number; 7 | speex_resampler_process_interleaved_int: (stPtr: number, inPtr: number, inLenPtr: number, outPtr: number, outLenPtr: number) => number; 8 | speex_resampler_set_rate: (stPtr: number, in_rate: number, out_rate: number) => number; 9 | speex_resampler_destroy: (stPtr: number) => void; 10 | malloc: (size: number) => number; 11 | free: (ptr: number) => void; 12 | __heap_base?: number; 13 | __data_end?: number; 14 | } 15 | 16 | let exportsRef: SpeexExports | null = null; 17 | let speexInitError: Error | null = null; 18 | 19 | // Top-level WASM instantiation: runs at module load 20 | try { 21 | const module = speexWasm as unknown as WebAssembly.Module; 22 | const instance = await WebAssembly.instantiate(module, { 23 | wasi_snapshot_preview1: {}, // Empty - SpeexDSP doesn't need WASI syscalls 24 | }); 25 | 26 | // WASI modules export functions directly without underscores 27 | const exports = instance.exports as SpeexExports & Record; 28 | 29 | // Validate required exports 30 | const required = [ 31 | 'memory', 'speex_resampler_init', 'speex_resampler_process_interleaved_int', 32 | 'speex_resampler_set_rate', 'speex_resampler_destroy', 'malloc', 'free' 33 | ]; 34 | 35 | const missing = required.filter(name => !(name in exports)); 36 | if (missing.length > 0) { 37 | console.warn('[SpeexResampler] Available exports:', Object.keys(exports)); 38 | throw new Error(`SpeexDSP WASM missing: ${missing.join(', ')}`); 39 | } 40 | 41 | exportsRef = exports as SpeexExports; 42 | // Optional: log once for dev visibility. In production, reduce noise if desired. 43 | console.log('[SpeexResampler] WASM initialized'); 44 | } catch (e: any) { 45 | speexInitError = e as Error; 46 | console.warn('[SpeexResampler] WASM init failed; JS fallback will be used when needed:', speexInitError?.message || e); 47 | } 48 | 49 | export class SpeexResampler { 50 | private static readonly HEADROOM_SAMPLES = 64; // safety margin for output buffers 51 | 52 | private exp: SpeexExports; 53 | private stPtr: number = 0; 54 | private channels: number; 55 | private inRate: number; 56 | private outRate: number; 57 | 58 | // Legacy compatibility - no longer needed with top-level await 59 | static ensureWasm(): void {} 60 | 61 | // Try to create synchronously if WASM is ready; otherwise return null 62 | static tryCreate(channels: number, inRate: number, outRate: number, quality = 5): SpeexResampler | null { 63 | if (!exportsRef) return null; 64 | return new SpeexResampler(exportsRef, channels, inRate, outRate, quality); 65 | } 66 | 67 | // Async creator - mainly for compatibility 68 | static async create(channels: number, inRate: number, outRate: number, quality = 5): Promise { 69 | if (!exportsRef) throw (speexInitError || new Error('Speex WASM not available')); 70 | return new SpeexResampler(exportsRef, channels, inRate, outRate, quality); 71 | } 72 | 73 | private constructor(exp: SpeexExports, channels: number, inRate: number, outRate: number, quality: number) { 74 | this.exp = exp; 75 | this.channels = channels; 76 | this.inRate = inRate; 77 | this.outRate = outRate; 78 | 79 | // Allocate error pointer (int32) 80 | const errPtr = this.exp.malloc(4); 81 | try { 82 | const st = this.exp.speex_resampler_init(this.channels, this.inRate, this.outRate, quality, errPtr); 83 | const errView = new Int32Array(this.exp.memory.buffer, errPtr, 1); 84 | const errCode = errView[0] | 0; 85 | if (!st || errCode !== 0) { 86 | throw new Error(`speex_resampler_init failed: err=${errCode}`); 87 | } 88 | this.stPtr = st; 89 | } finally { 90 | this.exp.free(errPtr); 91 | } 92 | } 93 | 94 | // Process interleaved int16 PCM (mono when channels=1). Returns a new Int16Array with the resampled data. 95 | processInterleavedInt(input: Int16Array): Int16Array { 96 | if (!this.stPtr) return new Int16Array(0); 97 | 98 | const inSamples = input.length; // samples per channel for interleaved 99 | if (inSamples === 0) return new Int16Array(0); 100 | 101 | // Allocate input buffer in WASM memory and copy data 102 | const inBytes = inSamples * 2; 103 | const inPtr = this.exp.malloc(inBytes); 104 | const inHeap = new Int16Array(this.exp.memory.buffer, inPtr, inSamples); 105 | inHeap.set(input); 106 | 107 | // Estimate output capacity conservatively and allocate 108 | const ratio = this.outRate / this.inRate; 109 | const outCap = Math.ceil(inSamples * ratio) + SpeexResampler.HEADROOM_SAMPLES * this.channels; 110 | const outBytes = outCap * 2; 111 | const outPtr = this.exp.malloc(outBytes); 112 | 113 | // Allocate length pointers (uint32) 114 | const inLenPtr = this.exp.malloc(4); 115 | const outLenPtr = this.exp.malloc(4); 116 | const inLenView = new Uint32Array(this.exp.memory.buffer, inLenPtr, 1); 117 | const outLenView = new Uint32Array(this.exp.memory.buffer, outLenPtr, 1); 118 | inLenView[0] = inSamples / this.channels; // per-channel samples 119 | outLenView[0] = outCap / this.channels; // per-channel capacity 120 | 121 | try { 122 | const rc = this.exp.speex_resampler_process_interleaved_int(this.stPtr, inPtr, inLenPtr, outPtr, outLenPtr); 123 | if (rc !== 0) { 124 | // Non-zero return indicates error; return empty to trigger fallbacks upstream if any 125 | return new Int16Array(0); 126 | } 127 | // Compute produced interleaved samples from outLen per channel 128 | const producedPerChan = outLenView[0] | 0; 129 | const producedInterleaved = producedPerChan * this.channels; 130 | const outView = new Int16Array(this.exp.memory.buffer, outPtr, producedInterleaved); 131 | // Copy out to a fresh array (decouple from WASM memory) 132 | return new Int16Array(outView); 133 | } finally { 134 | this.exp.free(inPtr); 135 | this.exp.free(outPtr); 136 | this.exp.free(inLenPtr); 137 | this.exp.free(outLenPtr); 138 | } 139 | } 140 | 141 | // Optionally change rates without recreating the state 142 | setRate(inRate: number, outRate: number): void { 143 | if (!this.stPtr) return; 144 | this.inRate = inRate; 145 | this.outRate = outRate; 146 | void this.exp.speex_resampler_set_rate(this.stPtr, inRate, outRate); 147 | } 148 | 149 | destroy(): void { 150 | if (this.stPtr) { 151 | this.exp.speex_resampler_destroy(this.stPtr); 152 | this.stPtr = 0; 153 | } 154 | } 155 | } 156 | 157 | // With top-level await, the module is initialized at import time. 158 | -------------------------------------------------------------------------------- /ai-tts-stt/src/player.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | TTS Demo - Cloudflare AI 8 | 9 | 10 | 11 | 12 |
13 |
14 |

TTS Demo

15 |
16 | 17 |
18 |
19 |
20 | 21 | 25 | 26 | 27 | 61 | 62 | 63 |
64 |

Audio Stream

65 |

Connect to the audio stream to hear the generated speech.

66 |
67 | 71 | 72 |
73 |
74 | 75 | 76 | 87 | 88 | 89 | 121 | 122 | 123 |
124 |

Output

125 |
126 |
127 | Debug Logs 128 | 129 |
130 |
131 |
132 | 133 |
134 | made with cloudflare · 135 | GitHub 137 |
138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /ai-tts-stt/src/web/services/stt.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * STT (Speech-to-Text) service 3 | */ 4 | 5 | import { ApiClient } from './api'; 6 | import { WebRTCService } from './webrtc'; 7 | import { setState, getState, log } from '../state'; 8 | import { TranscriptionMessage } from '../types'; 9 | 10 | export class STTService { 11 | private api: ApiClient; 12 | private webrtc: WebRTCService; 13 | private transcriptionWS: WebSocket | null = null; 14 | private transcriptionCloseTimer: number | null = null; 15 | 16 | constructor(sessionId: string) { 17 | this.api = new ApiClient(sessionId); 18 | this.webrtc = new WebRTCService(); 19 | } 20 | 21 | async startRecording() { 22 | const state = getState(); 23 | if (state.sttState.isMicActive) return; 24 | 25 | try { 26 | log('🎤 Starting microphone (WebRTC publish)...'); 27 | 28 | // Get microphone access 29 | const stream = await WebRTCService.getMicrophoneStream(); 30 | 31 | // Create WebRTC connection for audio upload 32 | const peerConnection = await this.webrtc.createMicConnection(stream); 33 | 34 | // Monitor connection state 35 | this.webrtc.onConnectionStateChange((pcState) => { 36 | const connected = pcState === 'connected'; 37 | setState({ 38 | sttState: { 39 | ...getState().sttState, 40 | pcConnected: connected, 41 | }, 42 | }); 43 | log(`📡 PeerConnection state: ${pcState}`); 44 | }); 45 | 46 | // Create offer and send to STT adapter 47 | const offer = await this.webrtc.createOffer(); 48 | const answer = await this.api.sttConnect(offer); 49 | await this.webrtc.setRemoteAnswer(answer); 50 | 51 | setState({ 52 | sttState: { 53 | ...getState().sttState, 54 | isMicActive: true, 55 | startTime: null, 56 | }, 57 | }); 58 | 59 | log('✅ Microphone publishing started. You can start forwarding once connected.'); 60 | } catch (error) { 61 | log(`❌ STT Error: ${(error as Error).message}`); 62 | this.stopRecording(); 63 | throw error; 64 | } 65 | } 66 | 67 | async startForwarding() { 68 | const state = getState(); 69 | 70 | if (!state.sttState.isMicActive) { 71 | log('⚠️ Start mic first.'); 72 | return; 73 | } 74 | 75 | if (!state.sttState.pcConnected) { 76 | log('⏳ Waiting for PeerConnection to be connected...'); 77 | return; 78 | } 79 | 80 | if (state.sttState.isForwarding) { 81 | log('ℹ️ Forwarding already active.'); 82 | return; 83 | } 84 | 85 | try { 86 | log('🔄 Starting WebSocket forwarding...'); 87 | await this.api.sttStartForwarding(); 88 | 89 | setState({ 90 | sttState: { 91 | ...getState().sttState, 92 | isForwarding: true, 93 | startTime: Date.now(), 94 | }, 95 | }); 96 | 97 | // Connect to transcription stream if not already connected 98 | if (!this.transcriptionWS || this.transcriptionWS.readyState === WebSocket.CLOSED) { 99 | this.connectTranscriptionStream(); 100 | } 101 | 102 | log('✅ WebSocket forwarding started'); 103 | } catch (error) { 104 | log(`❌ Forwarding Error: ${(error as Error).message}`); 105 | throw error; 106 | } 107 | } 108 | 109 | async stopForwarding() { 110 | const state = getState(); 111 | 112 | if (!state.sttState.isForwarding) { 113 | log('ℹ️ Forwarding already stopped.'); 114 | return; 115 | } 116 | 117 | try { 118 | log('⛔ Stopping WebSocket forwarding...'); 119 | await this.api.sttStopForwarding(); 120 | 121 | setState({ 122 | sttState: { 123 | ...getState().sttState, 124 | isForwarding: false, 125 | }, 126 | }); 127 | 128 | // Keep transcription WS connected 129 | log('✅ Forwarding stopped. Transcription stream remains connected.'); 130 | } catch (error) { 131 | log(`❌ Stop Forwarding Error: ${(error as Error).message}`); 132 | throw error; 133 | } 134 | } 135 | 136 | async stopRecording() { 137 | const state = getState(); 138 | 139 | if (!state.sttState.isMicActive) return; 140 | 141 | if (state.sttState.isForwarding) { 142 | log('⚠️ Stop forwarding before stopping the mic.'); 143 | return; 144 | } 145 | 146 | log('⏹️ Stopping microphone/WebRTC...'); 147 | 148 | this.webrtc.closePeerConnection(); 149 | 150 | setState({ 151 | sttState: { 152 | isMicActive: false, 153 | isForwarding: false, 154 | pcConnected: false, 155 | startTime: null, 156 | }, 157 | }); 158 | 159 | log('✅ Microphone stopped'); 160 | } 161 | 162 | async restartNova() { 163 | try { 164 | log('🛠️ Restarting Nova STT (debug)...'); 165 | await this.api.sttReconnectNova(); 166 | log('✅ Nova STT restarted'); 167 | } catch (error) { 168 | log(`❌ Restart Nova error: ${(error as Error).message}`); 169 | throw error; 170 | } 171 | } 172 | 173 | private connectTranscriptionStream() { 174 | const wsUrl = this.api.getTranscriptionStreamUrl(); 175 | 176 | this.transcriptionWS = new WebSocket(wsUrl); 177 | 178 | this.transcriptionWS.onopen = () => { 179 | log('🟢 Transcription stream connected'); 180 | }; 181 | 182 | this.transcriptionWS.onmessage = (event) => { 183 | try { 184 | const data: TranscriptionMessage = JSON.parse(event.data); 185 | 186 | if (data.type === 'stt_done') { 187 | log('✅ Transcription segment finalized'); 188 | return; 189 | } 190 | 191 | // Handle transcription data 192 | if (!data.type || data.type === 'transcription') { 193 | this.displayTranscription(data); 194 | } 195 | } catch (error) { 196 | log(`❌ Transcription parse error: ${(error as Error).message}`); 197 | } 198 | }; 199 | 200 | this.transcriptionWS.onclose = () => { 201 | log('🔌 Transcription stream disconnected'); 202 | }; 203 | 204 | this.transcriptionWS.onerror = (error) => { 205 | log(`❌ Transcription WebSocket error: ${error}`); 206 | }; 207 | } 208 | 209 | private displayTranscription(data: TranscriptionMessage) { 210 | if (!data.data?.channel?.alternatives?.[0]?.transcript) { 211 | return; 212 | } 213 | 214 | const transcript = data.data.channel.alternatives[0].transcript; 215 | const isFinal = data.data.is_final || false; 216 | const timestamp = data.timestamp || Date.now(); 217 | const state = getState(); 218 | 219 | // Add transcript to state 220 | const relativeTime = state.sttState.startTime ? (timestamp - state.sttState.startTime) / 1000 : 0; 221 | 222 | setState({ 223 | transcripts: [ 224 | ...state.transcripts, 225 | { 226 | start: Math.max(0, relativeTime), 227 | text: transcript, 228 | timestamp, 229 | isFinal, 230 | }, 231 | ], 232 | }); 233 | } 234 | 235 | closeTranscriptionStream() { 236 | if (this.transcriptionCloseTimer) { 237 | clearTimeout(this.transcriptionCloseTimer); 238 | this.transcriptionCloseTimer = null; 239 | } 240 | 241 | if (this.transcriptionWS) { 242 | this.transcriptionWS.close(); 243 | this.transcriptionWS = null; 244 | } 245 | } 246 | 247 | clearTranscriptions() { 248 | setState({ transcripts: [] }); 249 | log('🗑️ Transcriptions cleared'); 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /whip-whep-server/src/index.ts: -------------------------------------------------------------------------------- 1 | import { DurableObject } from "cloudflare:workers"; 2 | 3 | interface Env { 4 | CALLS_API: string 5 | CALLS_APP_ID: string 6 | CALLS_APP_SECRET: string 7 | LIVE_STORE: DurableObjectNamespace 8 | } 9 | 10 | interface SessionDescription { 11 | sdp: string 12 | type: string 13 | } 14 | 15 | interface NewSessionResponse { 16 | sessionId: string 17 | } 18 | 19 | interface NewTrackResponse { 20 | trackName: string 21 | mid: string 22 | } 23 | 24 | interface NewTracksResponse { 25 | tracks: NewTrackResponse[] 26 | sessionDescription: SessionDescription 27 | } 28 | 29 | interface TrackLocator { 30 | location: string 31 | sessionId: string 32 | trackName: string 33 | } 34 | 35 | export class LiveStore extends DurableObject { 36 | constructor(ctx: DurableObjectState, env: Env) { 37 | super(ctx, env); 38 | } 39 | 40 | async setTracks(tracks: TrackLocator[]): Promise { 41 | await this.ctx.storage.put("tracks", tracks) 42 | } 43 | 44 | async getTracks(): Promise { 45 | return await this.ctx.storage.get("tracks") || [] 46 | } 47 | 48 | async deleteTracks() : Promise { 49 | await this.ctx.storage.delete("tracks") 50 | } 51 | } 52 | 53 | function optionsResponse(): Response { 54 | return new Response(null, { 55 | status: 204, 56 | headers: { 57 | "accept-post": "application/sdp", 58 | "access-control-allow-credentials": "true", 59 | "access-control-allow-headers": "content-type,authorization,if-match", 60 | "access-control-allow-methods": "PATCH,POST,PUT,DELETE,OPTIONS", 61 | "access-control-allow-origin": "*", 62 | "access-control-expose-headers": "x-thunderclap,location,link,accept-post,accept-patch,etag", 63 | "link": "; rel=\"ice-server\"" 64 | } 65 | }) 66 | } 67 | async function whipHandler(request: Request, env: Env, ctx: ExecutionContext, parsedURL: URL): Promise 68 | { 69 | const groups = /\/ingest\/([\w-]+)\/?([\w-]+)?/g.exec(parsedURL.pathname) 70 | if(!groups || groups.length < 2) { 71 | return new Response("not found", {status: 404}) 72 | } 73 | const liveId = groups[1] 74 | let stub = env.LIVE_STORE.get(env.LIVE_STORE.idFromName(liveId)) 75 | switch(request.method) { 76 | case 'OPTIONS': 77 | return optionsResponse() 78 | case 'POST': 79 | break 80 | case 'DELETE': 81 | stub.deleteTracks() 82 | return new Response("OK") 83 | default: 84 | return new Response("Not supported", {status: 400}) 85 | } 86 | const CallsEndpoint = `${env.CALLS_API}/v1/apps/${env.CALLS_APP_ID}` 87 | const CallsEndpointHeaders = {'Authorization': `Bearer ${env.CALLS_APP_SECRET}`} 88 | const newSessionResult = await (await fetch(`${CallsEndpoint}/sessions/new`, {method: 'POST', headers: CallsEndpointHeaders})).json() as NewSessionResponse 89 | const newTracksBody = { 90 | "sessionDescription": { 91 | "type": "offer", 92 | "sdp": await request.text() 93 | }, 94 | "autoDiscover": true 95 | } 96 | const newTracksResult = await (await fetch(`${CallsEndpoint}/sessions/${newSessionResult.sessionId}/tracks/new`, { 97 | method: 'POST', 98 | headers: CallsEndpointHeaders, 99 | body: JSON.stringify(newTracksBody) 100 | })).json() as NewTracksResponse 101 | const tracks = newTracksResult.tracks.map(track => { 102 | return {location: "remote", "sessionId": newSessionResult.sessionId, "trackName": track.trackName} 103 | }) as TrackLocator[] 104 | await stub.setTracks(tracks) 105 | return new Response(newTracksResult.sessionDescription.sdp, { 106 | status: 201, 107 | headers: { 108 | 'content-type': "application/sdp", 109 | 'protocol-version': "draft-ietf-wish-whip-06", 110 | etag: `"${newSessionResult.sessionId}"`, 111 | location: `/ingest/${liveId}/${newSessionResult.sessionId}` 112 | }, 113 | }) 114 | } 115 | 116 | async function whepHandler(request: Request, env: Env, ctx: ExecutionContext, parsedURL: URL): Promise 117 | { 118 | const corsHeaders = {"access-control-allow-origin": "*"} 119 | const groups = /\/play\/([\w-]+)\/?([\w-]+)?/g.exec(parsedURL.pathname) 120 | if(!groups || groups.length < 2) { 121 | return new Response("not found", { 122 | status: 404, 123 | headers: corsHeaders 124 | }) 125 | } 126 | const liveId = groups[1] 127 | const CallsEndpoint = `${env.CALLS_API}/v1/apps/${env.CALLS_APP_ID}` 128 | const CallsEndpointHeaders = {'Authorization': `Bearer ${env.CALLS_APP_SECRET}`} 129 | switch(request.method) { 130 | case 'OPTIONS': 131 | return optionsResponse() 132 | case 'POST': 133 | break 134 | case 'DELETE': 135 | return new Response("OK") 136 | case 'PATCH': 137 | const sessionId = groups[2] 138 | const renegotiateBody = { 139 | "sessionDescription": { 140 | "type": "answer", 141 | "sdp": await request.text() 142 | } 143 | } 144 | const renegotiateResponse = await fetch(`${CallsEndpoint}/sessions/${sessionId}/renegotiate`, { 145 | method: 'PUT', 146 | headers: CallsEndpointHeaders, 147 | body: JSON.stringify(renegotiateBody), 148 | }) 149 | return new Response(null, { 150 | status: renegotiateResponse.status, 151 | headers: corsHeaders 152 | }) 153 | default: 154 | return new Response("Not supported", { 155 | status: 404, 156 | headers: corsHeaders 157 | }) 158 | } 159 | let stub = env.LIVE_STORE.get(env.LIVE_STORE.idFromName(liveId)) 160 | const tracks = await stub.getTracks() as TrackLocator[] 161 | if(tracks.length == 0) { 162 | return new Response("Live not started yet", { 163 | status: 404, 164 | headers: corsHeaders 165 | }) 166 | } 167 | const newSessionResult = await (await fetch(`${CallsEndpoint}/sessions/new`, {method: 'POST', headers: CallsEndpointHeaders})).json() as NewSessionResponse 168 | const remoteOffer = await request.text() 169 | const newTracksBody = { 170 | "tracks": tracks, 171 | ... (remoteOffer.length > 0? 172 | {"sessionDescription": 173 | { 174 | "type": "offer", 175 | "sdp": remoteOffer 176 | } 177 | } : {}) 178 | } 179 | const newTracksResult = await (await fetch(`${CallsEndpoint}/sessions/${newSessionResult.sessionId}/tracks/new`, { 180 | method: 'POST', 181 | headers: CallsEndpointHeaders, 182 | body: JSON.stringify(newTracksBody) 183 | })).json() as NewTracksResponse 184 | return new Response(newTracksResult.sessionDescription.sdp, { 185 | status: 201, 186 | headers: { 187 | "access-control-expose-headers": "location", 188 | 'content-type': "application/sdp", 189 | 'protocol-version': "draft-ietf-wish-whep-00", 190 | "etag": `"${newSessionResult.sessionId}"`, 191 | "location": `/play/${liveId}/${newSessionResult.sessionId}`, 192 | ... corsHeaders 193 | }, 194 | }) 195 | } 196 | 197 | export default { 198 | async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { 199 | const parsedURL = new URL(request.url) 200 | if(parsedURL.pathname.split('/')[1] == 'ingest') { 201 | return whipHandler(request, env, ctx, parsedURL) 202 | } 203 | return whepHandler(request, env, ctx, parsedURL) 204 | }, 205 | }; 206 | -------------------------------------------------------------------------------- /ai-tts-stt/src/shared/sfu-utils.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * SFU (Selective Forwarding Unit) utilities for Cloudflare Calls 3 | */ 4 | 5 | import { Packet } from '../packet'; 6 | 7 | /** 8 | * Gets the SFU API base URL 9 | */ 10 | export function getSfuApiBase(env: Env): string { 11 | return `${env.SFU_API_BASE}/apps/${env.REALTIME_SFU_APP_ID}`; 12 | } 13 | 14 | /** 15 | * High-level SFU API client to encapsulate common Calls operations. 16 | * Keeps Env handling, base URL, and auth headers in one place. 17 | */ 18 | export class SfuClient { 19 | private env: Env; 20 | 21 | constructor(env: Env) { 22 | this.env = env; 23 | } 24 | 25 | private get base(): string { 26 | return getSfuApiBase(this.env); 27 | } 28 | 29 | private get headers(): Record { 30 | return getSfuAuthHeaders(this.env); 31 | } 32 | 33 | // --- Sessions & Tracks --- 34 | 35 | async createSession(): Promise<{ sessionId: string }> { 36 | const res = await fetch(`${this.base}/sessions/new`, { 37 | method: 'POST', 38 | headers: this.headers, 39 | }); 40 | if (!res.ok) { 41 | throw new Error(`SFU createSession failed: ${res.status} ${await res.text()}`); 42 | } 43 | const json = (await res.json()) as any; 44 | const sessionId = json?.sessionId; 45 | if (!sessionId) throw new Error('SFU createSession: sessionId missing in response'); 46 | return { sessionId }; 47 | } 48 | 49 | /** 50 | * Adds tracks to an existing session using autoDiscover from the provided SDP offer. 51 | * Returns entire JSON response and the first audio trackName (if present). 52 | */ 53 | async addTracksAutoDiscover(sessionId: string, sessionDescription: any): Promise<{ json: any; audioTrackName?: string }> { 54 | const body = { autoDiscover: true, sessionDescription }; 55 | const res = await fetch(`${this.base}/sessions/${sessionId}/tracks/new`, { 56 | method: 'POST', 57 | headers: this.headers, 58 | body: JSON.stringify(body), 59 | }); 60 | if (!res.ok) { 61 | throw new Error(`SFU addTracksAutoDiscover failed: ${res.status} ${await res.text()}`); 62 | } 63 | const json = (await res.json()) as any; 64 | const audio = json?.tracks?.find((t: any) => t.kind === 'audio' || !t.kind); 65 | const audioTrackName = audio?.trackName || json?.tracks?.[0]?.trackName; 66 | return { json, audioTrackName }; 67 | } 68 | 69 | /** 70 | * Player flow: pull a remote track from the publisher session into a new player session. 71 | * Returns the SFU answer JSON to send back to the client. 72 | */ 73 | async pullRemoteTrackToPlayer( 74 | playerSessionId: string, 75 | publisherSessionId: string, 76 | trackName: string, 77 | sessionDescription: any 78 | ): Promise { 79 | const body = { 80 | sessionDescription, 81 | tracks: [{ location: 'remote', sessionId: publisherSessionId, trackName, kind: 'audio' }], 82 | }; 83 | const res = await fetch(`${this.base}/sessions/${playerSessionId}/tracks/new`, { 84 | method: 'POST', 85 | headers: this.headers, 86 | body: JSON.stringify(body), 87 | }); 88 | if (!res.ok) { 89 | throw new Error(`SFU pullRemoteTrackToPlayer failed: ${res.status} ${await res.text()}`); 90 | } 91 | return res.json(); 92 | } 93 | 94 | // --- WebSocket Adapters --- 95 | 96 | /** 97 | * DO pushes PCM into SFU as a local track via WebSocket adapter. 98 | * Used by TTS publish path. 99 | */ 100 | async pushTrackFromWebSocket( 101 | trackName: string, 102 | endpoint: string, 103 | opts?: { inputCodec?: 'pcm'; mode?: 'buffer' } 104 | ): Promise<{ sessionId: string; adapterId: string; json: any }> { 105 | const body = { 106 | tracks: [ 107 | { 108 | location: 'local', 109 | trackName, 110 | endpoint, 111 | inputCodec: opts?.inputCodec ?? 'pcm', 112 | mode: opts?.mode ?? 'buffer', 113 | }, 114 | ], 115 | }; 116 | const res = await fetch(`${this.base}/adapters/websocket/new`, { 117 | method: 'POST', 118 | headers: this.headers, 119 | body: JSON.stringify(body), 120 | }); 121 | const text = await res.text(); 122 | if (!res.ok) { 123 | throw new Error(`SFU pushTrackFromWebSocket failed: ${res.status} ${text}`); 124 | } 125 | let json: any = {}; 126 | try { 127 | json = JSON.parse(text); 128 | } catch {} 129 | const sessionId = json?.tracks?.[0]?.sessionId; 130 | const adapterId = json?.tracks?.[0]?.adapterId; 131 | if (!sessionId || !adapterId) throw new Error('SFU pushTrackFromWebSocket: sessionId/adapterId missing'); 132 | return { sessionId, adapterId, json }; 133 | } 134 | 135 | /** 136 | * SFU pulls a remote track and streams PCM to our DO via WebSocket adapter. 137 | * Used by STT start-forwarding path. 138 | */ 139 | async pullTrackToWebSocket( 140 | sessionId: string, 141 | trackName: string, 142 | endpoint: string, 143 | opts?: { outputCodec?: 'pcm' } 144 | ): Promise<{ adapterId?: string; json: any }> { 145 | const body = { 146 | tracks: [ 147 | { 148 | location: 'remote', 149 | sessionId, 150 | trackName, 151 | endpoint, 152 | outputCodec: opts?.outputCodec ?? 'pcm', 153 | }, 154 | ], 155 | }; 156 | const res = await fetch(`${this.base}/adapters/websocket/new`, { 157 | method: 'POST', 158 | headers: this.headers, 159 | body: JSON.stringify(body), 160 | }); 161 | const text = await res.text(); 162 | if (!res.ok) { 163 | throw new Error(`SFU pullTrackToWebSocket failed: ${res.status} ${text}`); 164 | } 165 | let json: any = {}; 166 | try { 167 | json = JSON.parse(text); 168 | } catch {} 169 | const adapterId = json?.tracks?.[0]?.adapterId as string | undefined; 170 | return { adapterId, json }; 171 | } 172 | 173 | /** 174 | * Idempotent close for WebSocket adapters. 175 | * If SFU returns 503 adapter_not_found, treat as already-closed success. 176 | */ 177 | async closeWebSocketAdapter(adapterId: string): Promise<{ ok: boolean; alreadyClosed: boolean; status: number; text: string }> { 178 | const body = { tracks: [{ adapterId }] }; 179 | const res = await fetch(`${this.base}/adapters/websocket/close`, { 180 | method: 'POST', 181 | headers: this.headers, 182 | body: JSON.stringify(body), 183 | }); 184 | const text = await res.text(); 185 | if (res.ok) return { ok: true, alreadyClosed: false, status: res.status, text }; 186 | let alreadyClosed = false; 187 | if (res.status === 503) { 188 | try { 189 | const j = JSON.parse(text); 190 | if (j?.tracks?.[0]?.errorCode === 'adapter_not_found') alreadyClosed = true; 191 | } catch {} 192 | } 193 | return { ok: alreadyClosed, alreadyClosed, status: res.status, text }; 194 | } 195 | } 196 | 197 | /** 198 | * Gets standard SFU authorization headers 199 | */ 200 | export function getSfuAuthHeaders(env: Env): Record { 201 | return { 202 | Authorization: `Bearer ${env.REALTIME_SFU_BEARER_TOKEN}`, 203 | 'Content-Type': 'application/json', 204 | }; 205 | } 206 | 207 | /** 208 | * Builds a WebSocket callback URL from an HTTP request 209 | */ 210 | export function buildWsCallbackUrl(request: Request, path: string): string { 211 | const url = new URL(request.url); 212 | url.pathname = path; 213 | url.protocol = url.protocol === 'https:' ? 'wss:' : 'ws:'; 214 | return url.toString(); 215 | } 216 | 217 | /** 218 | * Encodes PCM audio payload for SFU transmission 219 | */ 220 | export function encodePcmForSfu(payload: ArrayBuffer): ArrayBuffer { 221 | const packet = { 222 | sequenceNumber: 0, 223 | timestamp: 0, 224 | payload: new Uint8Array(payload), 225 | }; 226 | const bytes = Packet.toBinary(packet); 227 | // Return a freshly allocated ArrayBuffer 228 | const out = new Uint8Array(bytes.byteLength); 229 | out.set(bytes); 230 | return out.buffer; 231 | } 232 | 233 | /** 234 | * Extracts PCM audio from SFU packet with safety checks 235 | */ 236 | export function extractPcmFromSfuPacket(packetData: ArrayBuffer): ArrayBuffer | null { 237 | try { 238 | const packet = Packet.fromBinary(new Uint8Array(packetData)); 239 | if (!packet.payload) { 240 | return null; 241 | } 242 | 243 | // IMPORTANT: Do not use `packet.payload.buffer` directly because it may include 244 | // unrelated bytes (due to byteOffset) leading to odd lengths/misalignment. 245 | let payloadView = packet.payload as Uint8Array; 246 | 247 | // Ensure even byte length for 16-bit PCM 248 | if (payloadView.byteLength % 2 !== 0) { 249 | console.warn(`Odd payload length (${payloadView.byteLength}) detected. Truncating last byte.`); 250 | payloadView = payloadView.subarray(0, payloadView.byteLength - 1); 251 | } 252 | 253 | // Copy into a new Uint8Array to guarantee an ArrayBuffer backing 254 | const safeCopy = new Uint8Array(payloadView); 255 | return safeCopy.buffer; 256 | } catch (error) { 257 | console.error('Error decoding SFU packet:', error); 258 | return null; 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /video-to-jpeg/src/web/app.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Frontend for the WebRTC Video → JPEG demo. 3 | * 4 | * Publisher flow: 5 | * - Capture camera via getUserMedia 6 | * - Create WebRTC offer and send to //video/connect 7 | * - When connected, call //video/start-forwarding 8 | * - Also open JPEG viewer WebSocket 9 | * 10 | * Viewer flow: 11 | * - Only open JPEG viewer WebSocket 12 | */ 13 | 14 | const url = new URL(window.location.href); 15 | const parts = url.pathname.split('/').filter(Boolean); 16 | const sessionId = parts[0] ?? 'default'; 17 | const role: 'publisher' | 'viewer' = parts[1] === 'publisher' ? 'publisher' : 'viewer'; 18 | 19 | // DOM elements 20 | const btnConnect = document.querySelector('#btnConnectCamera'); 21 | const btnStart = document.querySelector('#btnStartStream'); 22 | const btnStop = document.querySelector('#btnStopStream'); 23 | const btnReset = document.querySelector('#btnResetSession'); 24 | const videoContainer = document.querySelector('#videoContainer'); 25 | const jpegContainer = document.querySelector('#jpegContainer'); 26 | const statusEl = document.querySelector('#statusText'); 27 | 28 | let pc: RTCPeerConnection | null = null; 29 | let viewerSocket: WebSocket | null = null; 30 | let latestObjectUrl: string | null = null; 31 | 32 | function setStatus(text: string) { 33 | if (statusEl) statusEl.textContent = text; 34 | } 35 | 36 | function cleanupViewerSocket() { 37 | if (viewerSocket) { 38 | try { 39 | viewerSocket.close(); 40 | } catch { 41 | // ignore 42 | } 43 | viewerSocket = null; 44 | } 45 | if (latestObjectUrl) { 46 | URL.revokeObjectURL(latestObjectUrl); 47 | latestObjectUrl = null; 48 | } 49 | } 50 | 51 | async function resetSession() { 52 | setStatus('Resetting session...'); 53 | 54 | try { 55 | const res = await fetch(`/${sessionId}`, { 56 | method: 'DELETE', 57 | }); 58 | 59 | if (!res.ok) { 60 | const text = await res.text(); 61 | throw new Error(`Reset failed: ${res.status} ${text}`); 62 | } 63 | 64 | cleanupViewerSocket(); 65 | if (pc) { 66 | try { 67 | pc.close(); 68 | } catch { 69 | // ignore 70 | } 71 | pc = null; 72 | } 73 | 74 | setStatus('Session reset. You can reconnect the camera and start streaming again.'); 75 | } catch (err: any) { 76 | console.error(err); 77 | setStatus(`Error resetting session: ${err?.message ?? err}`); 78 | } 79 | } 80 | 81 | function ensureViewerSocket() { 82 | if (viewerSocket) return; 83 | const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; 84 | const wsUrl = `${protocol}//${window.location.host}/${sessionId}/video/viewer`; 85 | 86 | viewerSocket = new WebSocket(wsUrl); 87 | viewerSocket.binaryType = 'blob'; 88 | 89 | viewerSocket.onopen = () => { 90 | setStatus(`Viewer WebSocket connected (${role}).`); 91 | }; 92 | 93 | viewerSocket.onclose = () => { 94 | setStatus('Viewer WebSocket closed.'); 95 | }; 96 | 97 | viewerSocket.onerror = () => { 98 | setStatus('Viewer WebSocket error.'); 99 | }; 100 | 101 | viewerSocket.onmessage = (event: MessageEvent) => { 102 | const data = event.data; 103 | if (!(data instanceof Blob)) { 104 | return; 105 | } 106 | if (!jpegContainer) return; 107 | 108 | // Revoke previous object URL to avoid leaks 109 | if (latestObjectUrl) { 110 | URL.revokeObjectURL(latestObjectUrl); 111 | latestObjectUrl = null; 112 | } 113 | 114 | latestObjectUrl = URL.createObjectURL(data); 115 | 116 | let img = jpegContainer.querySelector('img'); 117 | if (!img) { 118 | jpegContainer.innerHTML = ''; 119 | img = document.createElement('img'); 120 | jpegContainer.appendChild(img); 121 | } 122 | img.src = latestObjectUrl; 123 | }; 124 | } 125 | 126 | async function connectCameraAndPublish() { 127 | if (!btnConnect || !btnStart || !videoContainer) return; 128 | 129 | btnConnect.disabled = true; 130 | setStatus('Requesting camera access...'); 131 | 132 | try { 133 | const stream = await navigator.mediaDevices.getUserMedia({ 134 | video: { 135 | width: { ideal: 1280 }, 136 | height: { ideal: 720 }, 137 | }, 138 | audio: false, 139 | }); 140 | 141 | const videoEl = document.createElement('video'); 142 | videoEl.autoplay = true; 143 | videoEl.muted = true; 144 | videoEl.playsInline = true; 145 | videoEl.srcObject = stream; 146 | videoContainer.innerHTML = ''; 147 | videoContainer.appendChild(videoEl); 148 | 149 | if (pc) { 150 | pc.close(); 151 | pc = null; 152 | } 153 | 154 | pc = new RTCPeerConnection({ 155 | iceServers: [{ urls: 'stun:stun.cloudflare.com:3478' }], 156 | }); 157 | 158 | stream.getTracks().forEach((track) => { 159 | pc!.addTrack(track, stream); 160 | }); 161 | 162 | pc.oniceconnectionstatechange = () => { 163 | const state = pc?.iceConnectionState; 164 | setStatus(`ICE state: ${state}`); 165 | }; 166 | 167 | setStatus('Creating offer...'); 168 | const offer = await pc.createOffer(); 169 | await pc.setLocalDescription(offer); 170 | 171 | const connectRes = await fetch(`/${sessionId}/video/connect`, { 172 | method: 'POST', 173 | headers: { 'Content-Type': 'application/json' }, 174 | body: JSON.stringify({ sessionDescription: offer }), 175 | }); 176 | 177 | if (!connectRes.ok) { 178 | const text = await connectRes.text(); 179 | throw new Error(`Connect failed: ${connectRes.status} ${text}`); 180 | } 181 | 182 | const connectJson = await connectRes.json(); 183 | const answer = connectJson.sessionDescription as RTCSessionDescriptionInit; 184 | await pc.setRemoteDescription(answer); 185 | 186 | setStatus('WebRTC connected. You can now start JPEG streaming.'); 187 | btnStart.disabled = false; 188 | } catch (err: any) { 189 | console.error(err); 190 | setStatus(`Error connecting camera: ${err?.message ?? err}`); 191 | btnConnect.disabled = false; 192 | } 193 | } 194 | 195 | async function startForwarding() { 196 | if (!btnStart || !btnStop) return; 197 | 198 | btnStart.disabled = true; 199 | setStatus('Starting JPEG forwarding via adapter...'); 200 | 201 | try { 202 | const res = await fetch(`/${sessionId}/video/start-forwarding`, { 203 | method: 'POST', 204 | headers: { 'Content-Type': 'application/json' }, 205 | }); 206 | if (!res.ok) { 207 | const text = await res.text(); 208 | throw new Error(`Start-forwarding failed: ${res.status} ${text}`); 209 | } 210 | 211 | setStatus('JPEG forwarding active. Opening viewer WebSocket...'); 212 | ensureViewerSocket(); 213 | btnStop.disabled = false; 214 | } catch (err: any) { 215 | console.error(err); 216 | setStatus(`Error starting forwarding: ${err?.message ?? err}`); 217 | btnStart.disabled = false; 218 | } 219 | } 220 | 221 | async function stopForwarding() { 222 | if (!btnStart || !btnStop) return; 223 | 224 | btnStop.disabled = true; 225 | setStatus('Stopping JPEG forwarding...'); 226 | 227 | try { 228 | const res = await fetch(`/${sessionId}/video/stop-forwarding`, { 229 | method: 'POST', 230 | }); 231 | if (!res.ok) { 232 | const text = await res.text(); 233 | throw new Error(`Stop-forwarding failed: ${res.status} ${text}`); 234 | } 235 | 236 | setStatus('Forwarding stopped.'); 237 | } catch (err: any) { 238 | console.error(err); 239 | setStatus(`Error stopping forwarding: ${err?.message ?? err}`); 240 | } finally { 241 | btnStart.disabled = false; 242 | } 243 | } 244 | 245 | function init() { 246 | if (role === 'publisher') { 247 | if (btnConnect) btnConnect.disabled = false; 248 | if (btnStart) btnStart.disabled = true; 249 | if (btnStop) btnStop.disabled = true; 250 | if (btnReset) btnReset.disabled = false; 251 | 252 | setStatus(`Role: publisher (session: ${sessionId})`); 253 | 254 | btnConnect?.addEventListener('click', () => { 255 | void connectCameraAndPublish(); 256 | }); 257 | 258 | btnStart?.addEventListener('click', () => { 259 | void startForwarding(); 260 | }); 261 | 262 | btnStop?.addEventListener('click', () => { 263 | void stopForwarding(); 264 | }); 265 | 266 | btnReset?.addEventListener('click', () => { 267 | void resetSession(); 268 | }); 269 | } else { 270 | // Viewer-only: just open JPEG stream 271 | if (btnConnect) btnConnect.disabled = true; 272 | if (btnStart) btnStart.disabled = true; 273 | if (btnStop) btnStop.disabled = true; 274 | setStatus(`Role: viewer (session: ${sessionId})`); 275 | ensureViewerSocket(); 276 | } 277 | } 278 | 279 | window.addEventListener('load', init); 280 | 281 | window.addEventListener('beforeunload', () => { 282 | cleanupViewerSocket(); 283 | if (pc) { 284 | try { 285 | pc.close(); 286 | } catch { 287 | // ignore 288 | } 289 | pc = null; 290 | } 291 | }); 292 | -------------------------------------------------------------------------------- /openai-webrtc-relay/src/index.ts: -------------------------------------------------------------------------------- 1 | interface Env { 2 | OPENAI_API_KEY: string 3 | OPENAI_MODEL_ENDPOINT: string 4 | CALLS_BASE_URL: string 5 | CALLS_APP_ID: string 6 | CALLS_APP_TOKEN: string 7 | } 8 | interface SessionDescription { 9 | sdp: string 10 | type: string 11 | } 12 | 13 | interface NewSessionResponse { 14 | sessionId: string 15 | } 16 | 17 | interface NewTrackResponse { 18 | trackName: string 19 | mid: string 20 | errorCode?: string 21 | errorDescription?: string 22 | } 23 | 24 | interface NewTracksResponse { 25 | tracks: NewTrackResponse[] 26 | sessionDescription?: SessionDescription 27 | errorCode?: string 28 | errorDescription?: string 29 | } 30 | 31 | interface TrackLocator { 32 | location: string 33 | sessionId: string 34 | trackName: string 35 | } 36 | 37 | class CallsSession { 38 | sessionId: string 39 | headers: any 40 | endpoint: string 41 | constructor(sessionId: string, headers: any, endpoint: string) { 42 | this.sessionId = sessionId 43 | this.headers = headers 44 | this.endpoint = endpoint 45 | } 46 | async NewTracks(body: any): Promise { 47 | const newTracksURL = new URL(`${this.endpoint}/sessions/${this.sessionId}/tracks/new?streamDebug`) 48 | const newTracksResponse = await fetch( 49 | newTracksURL.href, 50 | { 51 | method: "POST", 52 | headers: this.headers, 53 | body: JSON.stringify(body) 54 | }, 55 | ).then((res) => res.json()) as NewTracksResponse; 56 | return newTracksResponse 57 | } 58 | async Renegotiate(sdp: SessionDescription) { 59 | const renegotiateBody = { 60 | "sessionDescription": sdp 61 | } 62 | const renegotiateURL = new URL(`${this.endpoint}/sessions/${this.sessionId}/renegotiate?streamDebug`) 63 | const renegotiateResponse = await fetch(renegotiateURL.href, { 64 | method: 'PUT', 65 | headers: this.headers, 66 | body: JSON.stringify(renegotiateBody), 67 | }) 68 | } 69 | } 70 | 71 | async function CallsNewSession(baseURL: string, appID: string, appToken: string, thirdparty: boolean = false): Promise { 72 | const headers = { 73 | Authorization: `Bearer ${appToken}`, 74 | "Content-Type": "application/json", 75 | } 76 | const endpoint = `${baseURL}/${appID}` 77 | const newSessionURL = new URL(`${endpoint}/sessions/new?streamDebug`) 78 | if (thirdparty) { 79 | newSessionURL.searchParams.set('thirdparty', 'true') 80 | } 81 | const sessionResponse = await fetch(newSessionURL.href, 82 | { 83 | method: "POST", 84 | headers: headers, 85 | }, 86 | ).then((res) => res.json()) as NewSessionResponse; 87 | return new CallsSession(sessionResponse.sessionId, headers, endpoint) 88 | } 89 | 90 | function checkNewTracksResponse(newTracksResponse: NewTracksResponse, sdpExpected: boolean = false) { 91 | if (newTracksResponse.errorCode) { 92 | throw newTracksResponse.errorDescription 93 | } 94 | if (newTracksResponse.tracks[0].errorDescription) { 95 | throw newTracksResponse.tracks[0].errorDescription 96 | } 97 | if (sdpExpected && newTracksResponse.sessionDescription == null) { 98 | throw "empty sdp from Calls for session A" 99 | } 100 | } 101 | 102 | async function requestOpenAIService(originalRequest: Request, offer: SessionDescription, env: Env): Promise { 103 | const apiKey = env.OPENAI_API_KEY 104 | const originalRequestURL = new URL(originalRequest.url) 105 | const endpointURL = new URL(env.OPENAI_MODEL_ENDPOINT) 106 | const originalParams = new URLSearchParams(endpointURL.search) 107 | const newParams = new URLSearchParams(originalRequestURL.search) 108 | 109 | // Merge the params, giving priority to the original request URL params 110 | for (const [key, value] of originalParams) { 111 | if (!newParams.has(key)) { 112 | newParams.set(key, value) 113 | } 114 | } 115 | 116 | endpointURL.search = newParams.toString() 117 | const response = await fetch(endpointURL.href, { 118 | method: 'POST', 119 | body: offer.sdp, 120 | headers: { 121 | Authorization: `Bearer ${apiKey}`, 122 | 'Content-Type': 'application/sdp' 123 | } 124 | }) 125 | const answerSDP = await response.text() 126 | return { type: "answer", sdp: answerSDP } as SessionDescription 127 | } 128 | 129 | function optionsResponse(): Response { 130 | return new Response(null, { 131 | status: 204, 132 | headers: { 133 | "accept-post": "application/sdp", 134 | "access-control-allow-credentials": "true", 135 | "access-control-allow-headers": "content-type,authorization,if-match", 136 | "access-control-allow-methods": "PATCH,POST,PUT,DELETE,OPTIONS", 137 | "access-control-allow-origin": "*", 138 | "access-control-expose-headers": "x-thunderclap,location,link,accept-post,accept-patch,etag", 139 | "link": "; rel=\"ice-server\"" 140 | } 141 | }) 142 | } 143 | 144 | const corsHeaders = { "access-control-allow-origin": "*" } 145 | 146 | export default { 147 | async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { 148 | if (request.method == 'OPTIONS') { 149 | return optionsResponse() 150 | } 151 | if (!new URL(request.url).pathname.endsWith("/endpoint")) { 152 | return new Response("not found", {status: 404}) 153 | } 154 | // PeerConnection A 155 | // This session establishes a PeerConnection between the end-user and Calls. 156 | const sessionA = await CallsNewSession(env.CALLS_BASE_URL, env.CALLS_APP_ID, env.CALLS_APP_TOKEN) 157 | const userSDP = await request.text() 158 | const newTracksResponseA = await sessionA.NewTracks({ 159 | "sessionDescription": { "sdp": userSDP, "type": "offer" }, 160 | "tracks": [{ 161 | "location": "local", 162 | "trackName": "user-mic", 163 | // Let it know a sendrecv transceiver is wanted to receive this track instead of a recvonly one 164 | "bidirectionalMediaStream": true, 165 | // Needed to create an appropriate response 166 | "kind": "audio", 167 | "mid": "0", 168 | }] 169 | }); 170 | checkNewTracksResponse(newTracksResponseA, true) 171 | 172 | // PeerConnection B 173 | // This session establishes a PeerConnection between Calls and OpenAI. 174 | // CallsNewSession thirdparty parameter must be true to be able to connect to an external WebRTC server 175 | const sessionB = await CallsNewSession(env.CALLS_BASE_URL, env.CALLS_APP_ID, env.CALLS_APP_TOKEN, true) 176 | const newTracksResponseB = await sessionB.NewTracks({ 177 | // No offer is provided so Calls will generate one for us 178 | "tracks": [{ 179 | "location": "local", 180 | "trackName": "ai-generated-voice", 181 | // Let it know a sendrecv transceiver is wanted to receive this track instead of a recvonly one 182 | "bidirectionalMediaStream": true, 183 | // Needed to create an appropriate response 184 | "kind": "audio", 185 | }] 186 | }); 187 | checkNewTracksResponse(newTracksResponseB, true) 188 | // The Calls's offer is sent to OpenAI 189 | const openaiAnswer = await requestOpenAIService(request, newTracksResponseB.sessionDescription || {} as SessionDescription, env) 190 | // And the negotiation is completed by setting the answer from OpenAI 191 | await sessionB.Renegotiate(openaiAnswer) 192 | 193 | // PeerConnection A answer SDP must be sent before anything else 194 | // in order to establish a connection first. That's the reason 195 | // to make the exchange requests after returning a response 196 | ctx.waitUntil((async () => { 197 | console.log("Starting exchange") 198 | // The tracks exchange happens here 199 | const exchangeStepOne = await sessionA.NewTracks({ 200 | // Session A is the PeerConnection from Calls to the end-user. 201 | // The following request instructs Calls to pull the 'ai-generated-voice' from session B and to send 202 | // it back to the end-user through an existing transceiver that was created to 203 | // publish the user-mic track at the beginning 204 | // 205 | // 206 | // PeerConnection A 207 | // end-user <-> [sendrecv transceiver] <---- ai-generated-voice (new!) 208 | // mid=0 (#user-mic) \ 209 | // `--> user-mic 210 | "tracks": [{ 211 | "location": "remote", 212 | "sessionId": sessionB.sessionId, 213 | "trackName": "ai-generated-voice", 214 | // We may not know the exact mid value associated to the user-mic transceiver 215 | // so instead of providing it, let Calls to resolve it for you 216 | "mid": "#user-mic" 217 | }] 218 | }) 219 | checkNewTracksResponse(exchangeStepOne) 220 | console.log("exchangeStepOne ready") 221 | // Session B is the PeerConnection from Calls to OpenAI. 222 | // The following request instructs Calls to pull the 'user-mic' from session A and to send 223 | // it back to OpenAI through an existing transceiver that was created to 224 | // publish the ai-generated-voice 225 | // 226 | // 227 | // PeerConnection B 228 | // OpenAI <-> [sendrecv transceiver] <-------- user-mic (new!) 229 | // mid=0 (#ai-generated-voice) \ 230 | // \ 231 | // `--> ai-generated-voice 232 | const exchangeStepTwo = await sessionB.NewTracks({ 233 | "tracks": [{ 234 | "location": "remote", 235 | "sessionId": sessionA.sessionId, 236 | "trackName": "user-mic", 237 | // Let Calls to find out the actual mid value 238 | "mid": "#ai-generated-voice" 239 | }] 240 | }) 241 | checkNewTracksResponse(exchangeStepTwo) 242 | console.log("exchangeStepTwo ready") 243 | })()) 244 | // This will complete the negotiation to connect PeerConnection A 245 | return new Response(newTracksResponseA.sessionDescription?.sdp, { 246 | status: 200, 247 | headers: corsHeaders 248 | }) 249 | }, 250 | } satisfies ExportedHandler; --------------------------------------------------------------------------------