├── .github
    └── workflows
    │   ├── docs.yml
    │   ├── tests-frontend.yml
    │   └── tests.yml
├── .gitignore
├── CNAME
├── LICENSE
├── README.md
├── backend
    └── fastrtc
    │   ├── __init__.py
    │   ├── credentials.py
    │   ├── pause_detection
    │       ├── __init__.py
    │       ├── protocol.py
    │       └── silero.py
    │   ├── py.typed
    │   ├── reply_on_pause.py
    │   ├── reply_on_stopwords.py
    │   ├── speech_to_text
    │       ├── __init__.py
    │       ├── stt_.py
    │       └── test_file.wav
    │   ├── stream.py
    │   ├── templates
    │       ├── component
    │       │   ├── _basePickBy-BWyW5hVA.js
    │       │   ├── _baseUniq-BfI_PfuI.js
    │       │   ├── arc-BHDr2DIN.js
    │       │   ├── architectureDiagram-IEHRJDOE-SNjtt7L7.js
    │       │   ├── assets
    │       │   │   └── worker-lPYB70QI.js
    │       │   ├── blockDiagram-JOT3LUYC-DoVxwmWs.js
    │       │   ├── c4Diagram-VJAJSXHY-BFLwyIU9.js
    │       │   ├── channel-BLI8LD7T.js
    │       │   ├── chunk-4BMEZGHF-4N88GRXN.js
    │       │   ├── chunk-A2AXSNBT-C4qvwI5K.js
    │       │   ├── chunk-AEK57VVT-OrsXQu-U.js
    │       │   ├── chunk-D6G4REZN-BSQJOIKu.js
    │       │   ├── chunk-RZ5BOZE2-BQm8zocb.js
    │       │   ├── chunk-XZIHB7SX-DIHERCaT.js
    │       │   ├── classDiagram-GIVACNV2-KrkkVrlR.js
    │       │   ├── classDiagram-v2-COTLJTTW-KrkkVrlR.js
    │       │   ├── clone-D_f12Uao.js
    │       │   ├── cytoscape.esm-C2cgT2B2.js
    │       │   ├── dagre-OKDRZEBW-BpZfNC14.js
    │       │   ├── diagram-SSKATNLV-OTX44Aig.js
    │       │   ├── diagram-VNBRO52H-CxKrAEhh.js
    │       │   ├── erDiagram-Q7BY3M3F-CO2pasYc.js
    │       │   ├── flowDiagram-4HSFHLVR-DiHdHcaJ.js
    │       │   ├── ganttDiagram-APWFNJXF-BzPYWX9W.js
    │       │   ├── gitGraphDiagram-7IBYFJ6S-DFMHUBmV.js
    │       │   ├── graph-as_7zmXK.js
    │       │   ├── index-xxHpJ_RR.js
    │       │   ├── index.js
    │       │   ├── infoDiagram-PH2N3AL5-fhMlkv6w.js
    │       │   ├── init-DjUOC4st.js
    │       │   ├── journeyDiagram-U35MCT3I-BI3B5NA4.js
    │       │   ├── kanban-definition-NDS4AKOZ-BdftdmWH.js
    │       │   ├── layout-BG95tefZ.js
    │       │   ├── linear-CRa8eD4r.js
    │       │   ├── mermaid.core-C0Blj36u.js
    │       │   ├── mindmap-definition-ALO5MXBD-BO2Uu9ee.js
    │       │   ├── ordinal-DfAQgscy.js
    │       │   ├── pieDiagram-IB7DONF6-DDe9KgBF.js
    │       │   ├── quadrantDiagram-7GDLP6J5-BGl9qPho.js
    │       │   ├── radar-MK3ICKWK-Uwn-jZp4.js
    │       │   ├── requirementDiagram-KVF5MWMF-BvKrRVax.js
    │       │   ├── sankeyDiagram-QLVOVGJD-B_m0WTk6.js
    │       │   ├── sequenceDiagram-X6HHIX6F-Bfni-YW_.js
    │       │   ├── stateDiagram-DGXRK772-CpehDlzW.js
    │       │   ├── stateDiagram-v2-YXO3MK2T-CFM2lJF8.js
    │       │   ├── style.css
    │       │   ├── timeline-definition-BDJGKUSR-C6DrPqLg.js
    │       │   └── xychartDiagram-VJFVF3MP-BKtEAN5R.js
    │       └── example
    │       │   ├── assets
    │       │       └── worker-lPYB70QI.js
    │       │   ├── index.js
    │       │   └── style.css
    │   ├── text_to_speech
    │       ├── __init__.py
    │       ├── test_tts.py
    │       └── tts.py
    │   ├── tracks.py
    │   ├── utils.py
    │   ├── webrtc.py
    │   ├── webrtc_connection_mixin.py
    │   └── websocket.py
├── demo
    ├── __init__.py
    ├── echo_audio
    │   ├── README.md
    │   ├── app.py
    │   └── requirements.txt
    ├── gemini_audio_video
    │   ├── README.md
    │   ├── app.py
    │   └── requirements.txt
    ├── gemini_conversation
    │   ├── README.md
    │   └── app.py
    ├── hello_computer
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── llama_code_editor
    │   ├── README.md
    │   ├── app.py
    │   ├── assets
    │   │   ├── sandbox.html
    │   │   └── spinner.html
    │   ├── handler.py
    │   ├── requirements.in
    │   ├── requirements.txt
    │   └── ui.py
    ├── llm_voice_chat
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   └── requirements.txt
    ├── moonshine_live
    │   ├── README.md
    │   ├── app.py
    │   ├── default-favicon.ico
    │   └── requirements.txt
    ├── nextjs_voice_chat
    │   ├── README.md
    │   ├── backend
    │   │   ├── env.py
    │   │   └── server.py
    │   ├── frontend
    │   │   └── fastrtc-demo
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── app
    │   │   │       ├── favicon.ico
    │   │   │       ├── globals.css
    │   │   │       ├── layout.tsx
    │   │   │       └── page.tsx
    │   │   │   ├── components.json
    │   │   │   ├── components
    │   │   │       ├── background-circle-provider.tsx
    │   │   │       ├── theme-provider.tsx
    │   │   │       └── ui
    │   │   │       │   ├── ai-voice-input.tsx
    │   │   │       │   ├── background-circles.tsx
    │   │   │       │   ├── reset-chat.tsx
    │   │   │       │   ├── theme-toggle.tsx
    │   │   │       │   └── theme-transition.tsx
    │   │   │   ├── eslint.config.mjs
    │   │   │   ├── lib
    │   │   │       ├── utils.ts
    │   │   │       └── webrtc-client.ts
    │   │   │   ├── next.config.ts
    │   │   │   ├── package.json
    │   │   │   ├── postcss.config.mjs
    │   │   │   ├── public
    │   │   │       ├── file.svg
    │   │   │       ├── globe.svg
    │   │   │       ├── next.svg
    │   │   │       ├── vercel.svg
    │   │   │       └── window.svg
    │   │   │   └── tsconfig.json
    │   ├── requirements.txt
    │   └── run.sh
    ├── object_detection
    │   ├── README.md
    │   ├── app.py
    │   ├── index.html
    │   ├── inference.py
    │   ├── requirements.txt
    │   └── utils.py
    ├── patient_intake
    │   └── app.py
    ├── phonic_chat
    │   ├── README.md
    │   ├── app.py
    │   └── requirements.txt
    ├── qwen_phone_chat
    │   ├── README.md
    │   ├── app.py
    │   └── requirements.txt
    ├── send_text_or_audio
    │   ├── app.py
    │   └── index.html
    ├── talk_to_azure_openai
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── talk_to_claude
    │   ├── README.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── talk_to_gemini
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── talk_to_llama4
    │   ├── AV_Huggy.png
    │   ├── README.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── talk_to_openai
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── talk_to_sambanova
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    ├── talk_to_smolagents
    │   ├── README.md
    │   ├── app.py
    │   └── requirements.txt
    ├── text_mode
    │   └── app.py
    ├── voice_text_editor
    │   ├── README.md
    │   └── app.py
    ├── voice_text_editor_local
    │   └── app.py
    ├── webrtc_vs_websocket
    │   ├── README.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
    └── whisper_realtime
    │   ├── README.md
    │   ├── README_gradio.md
    │   ├── app.py
    │   ├── index.html
    │   └── requirements.txt
├── docs
    ├── CNAME
    ├── Discord-Symbol-White.svg
    ├── advanced-configuration.md
    ├── cookbook.md
    ├── deployment.md
    ├── faq.md
    ├── fastrtc_logo.png
    ├── fastrtc_logo_small.png
    ├── gradio-logo-with-title.svg
    ├── gradio-logo.svg
    ├── hf-logo-with-title.svg
    ├── hf-logo.svg
    ├── index.md
    ├── reference
    │   ├── credentials.md
    │   ├── reply_on_pause.md
    │   ├── stream.md
    │   ├── stream_handlers.md
    │   └── utils.md
    ├── speech_to_text_gallery.md
    ├── stylesheets
    │   └── extra.css
    ├── text_to_speech_gallery.md
    ├── turn_taking_gallery.md
    ├── userguide
    │   ├── api.md
    │   ├── audio-video.md
    │   ├── audio.md
    │   ├── gradio.md
    │   ├── streams.md
    │   ├── video.md
    │   ├── webrtc_docs.md
    │   └── websocket_docs.md
    └── utils.md
├── frontend
    ├── .prettierrc
    ├── Example.svelte
    ├── Index.svelte
    ├── gradio.config.js
    ├── index.ts
    ├── package-lock.json
    ├── package.json
    └── shared
    │   ├── AudioWave.svelte
    │   ├── InteractiveAudio.svelte
    │   ├── InteractiveVideo.svelte
    │   ├── MicrophoneMuted.svelte
    │   ├── PulsingIcon.svelte
    │   ├── StaticAudio.svelte
    │   ├── StaticVideo.svelte
    │   ├── TextboxWithMic.svelte
    │   ├── Webcam.svelte
    │   ├── WebcamPermissions.svelte
    │   ├── index.ts
    │   ├── stream_utils.ts
    │   ├── utils.ts
    │   └── webrtc_utils.ts
├── justfile
├── mkdocs.yml
├── overrides
    └── partials
    │   └── header.html
├── pyproject.toml
├── test
    ├── __init__.py
    ├── test_tts.py
    ├── test_utils.py
    └── test_webrtc_connection_mixin.py
└── upload_space.py


/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: docs
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 | 
10 | permissions:
11 |   contents: write
12 |   pull-requests: write
13 |   deployments: write
14 |   pages: write
15 | 
16 | jobs:
17 |   deploy:
18 |     runs-on: ubuntu-latest
19 |     if: github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false)
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |       - name: Configure Git Credentials
23 |         run: |
24 |           git config user.name github-actions[bot]
25 |           git config user.email 41898282+github-actions[bot]@users.noreply.github.com
26 |       - uses: actions/setup-python@v5
27 |         with:
28 |           python-version: 3.x
29 |       - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 
30 |       - uses: actions/cache@v4
31 |         with:
32 |           key: mkdocs-material-${{ env.cache_id }}
33 |           path: .cache
34 |           restore-keys: |
35 |             mkdocs-material-
36 |       - run: pip install mkdocs-material mkdocs-llmstxt==0.1.0
37 |       - name: Build docs
38 |         run: mkdocs build
39 | 
40 |       - name: Deploy to GH Pages (main)
41 |         if: github.event_name == 'push'
42 |         run: mkdocs gh-deploy --force
43 | 
44 |       - name: Deploy PR Preview
45 |         if: github.event_name == 'pull_request'
46 |         uses: rossjrw/pr-preview-action@v1
47 |         with:
48 |           source-dir: ./site
49 |           preview-branch: gh-pages
50 |           umbrella-dir: pr-preview
51 |           action: auto


--------------------------------------------------------------------------------
/.github/workflows/tests-frontend.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   prettier:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: actions/setup-node@v4
11 |         with:
12 |           node-version: 18
13 |       - name: Run prettier
14 |         run: |
15 |           cd frontend
16 |           npm install
17 |           npx prettier --check .


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   lint:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - uses: actions/setup-python@v5
11 |         with:
12 |           python-version: '3.10'
13 |       - name: Run linters
14 |         run: |
15 |           pip install ruff pyright
16 |           pip install -e .[dev]
17 |           ruff check .
18 |           ruff format --check --diff .
19 |           pyright
20 |   test:
21 |     runs-on: ${{ matrix.os }}
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         os: [ubuntu-latest]
26 |         python:
27 |           - '3.10'
28 |           - '3.13'
29 |     steps:
30 |       - uses: actions/checkout@v4
31 |       - uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python }}
34 |       - name: Run tests
35 |         run: |
36 |           python -m pip install -U pip
37 |           pip install '.[dev, tts]'
38 |           python -m pytest --capture=no
39 |         shell: bash
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .eggs/
 2 | dist/
 3 | *.pyc
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | __tmp/*
 8 | *.pyi
 9 | .mypycache
10 | .ruff_cache
11 | node_modules
12 | demo/MobileNetSSD_deploy.caffemodel
13 | demo/MobileNetSSD_deploy.prototxt.txt
14 | demo/scratch
15 | .gradio
16 | .vscode
17 | .DS_Store
18 | .venv*
19 | .env
20 | 


--------------------------------------------------------------------------------
/CNAME:
--------------------------------------------------------------------------------
1 | fastrtc.org


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Freddy Boulton
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/backend/fastrtc/__init__.py:
--------------------------------------------------------------------------------
 1 | from .credentials import (
 2 |     get_cloudflare_turn_credentials,
 3 |     get_cloudflare_turn_credentials_async,
 4 |     get_hf_turn_credentials,
 5 |     get_hf_turn_credentials_async,
 6 |     get_turn_credentials,
 7 |     get_turn_credentials_async,
 8 |     get_twilio_turn_credentials,
 9 | )
10 | from .pause_detection import (
11 |     ModelOptions,
12 |     PauseDetectionModel,
13 |     SileroVadOptions,
14 |     get_silero_model,
15 | )
16 | from .reply_on_pause import AlgoOptions, ReplyOnPause
17 | from .reply_on_stopwords import ReplyOnStopWords
18 | from .speech_to_text import MoonshineSTT, get_stt_model
19 | from .stream import Stream, UIArgs
20 | from .text_to_speech import (
21 |     CartesiaTTSOptions,
22 |     KokoroTTSOptions,
23 |     get_tts_model,
24 | )
25 | from .tracks import (
26 |     AsyncAudioVideoStreamHandler,
27 |     AsyncStreamHandler,
28 |     AudioEmitType,
29 |     AudioVideoStreamHandler,
30 |     StreamHandler,
31 |     VideoEmitType,
32 |     VideoStreamHandler,
33 | )
34 | from .utils import (
35 |     AdditionalOutputs,
36 |     CloseStream,
37 |     Warning,
38 |     WebRTCData,
39 |     WebRTCError,
40 |     aggregate_bytes_to_16bit,
41 |     async_aggregate_bytes_to_16bit,
42 |     audio_to_bytes,
43 |     audio_to_file,
44 |     audio_to_float32,
45 |     audio_to_int16,
46 |     get_current_context,
47 |     wait_for_item,
48 | )
49 | from .webrtc import (
50 |     WebRTC,
51 | )
52 | 
53 | __all__ = [
54 |     "AsyncStreamHandler",
55 |     "AudioVideoStreamHandler",
56 |     "AudioEmitType",
57 |     "AsyncAudioVideoStreamHandler",
58 |     "AlgoOptions",
59 |     "AdditionalOutputs",
60 |     "aggregate_bytes_to_16bit",
61 |     "async_aggregate_bytes_to_16bit",
62 |     "audio_to_bytes",
63 |     "audio_to_file",
64 |     "audio_to_float32",
65 |     "audio_to_int16",
66 |     "get_hf_turn_credentials",
67 |     "get_twilio_turn_credentials",
68 |     "get_turn_credentials",
69 |     "ReplyOnPause",
70 |     "ReplyOnStopWords",
71 |     "SileroVadOptions",
72 |     "get_stt_model",
73 |     "MoonshineSTT",
74 |     "StreamHandler",
75 |     "Stream",
76 |     "VideoEmitType",
77 |     "WebRTC",
78 |     "WebRTCError",
79 |     "Warning",
80 |     "get_tts_model",
81 |     "KokoroTTSOptions",
82 |     "get_cloudflare_turn_credentials_async",
83 |     "get_hf_turn_credentials_async",
84 |     "get_turn_credentials_async",
85 |     "get_cloudflare_turn_credentials",
86 |     "wait_for_item",
87 |     "UIArgs",
88 |     "ModelOptions",
89 |     "PauseDetectionModel",
90 |     "get_silero_model",
91 |     "SileroVadOptions",
92 |     "VideoStreamHandler",
93 |     "CloseStream",
94 |     "get_current_context",
95 |     "CartesiaTTSOptions",
96 |     "WebRTCData",
97 | ]
98 | 


--------------------------------------------------------------------------------
/backend/fastrtc/pause_detection/__init__.py:
--------------------------------------------------------------------------------
 1 | from .protocol import ModelOptions, PauseDetectionModel
 2 | from .silero import SileroVADModel, SileroVadOptions, get_silero_model
 3 | 
 4 | __all__ = [
 5 |     "SileroVADModel",
 6 |     "SileroVadOptions",
 7 |     "PauseDetectionModel",
 8 |     "ModelOptions",
 9 |     "get_silero_model",
10 | ]
11 | 


--------------------------------------------------------------------------------
/backend/fastrtc/pause_detection/protocol.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Protocol, TypeAlias
 2 | 
 3 | import numpy as np
 4 | from numpy.typing import NDArray
 5 | 
 6 | from ..utils import AudioChunk
 7 | 
 8 | ModelOptions: TypeAlias = Any
 9 | 
10 | 
11 | class PauseDetectionModel(Protocol):
12 |     def vad(
13 |         self,
14 |         audio: tuple[int, NDArray[np.int16] | NDArray[np.float32]],
15 |         options: ModelOptions,
16 |     ) -> tuple[float, list[AudioChunk]]: ...
17 | 
18 |     def warmup(
19 |         self,
20 |     ) -> None: ...
21 | 


--------------------------------------------------------------------------------
/backend/fastrtc/py.typed:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/backend/fastrtc/speech_to_text/__init__.py:
--------------------------------------------------------------------------------
1 | from .stt_ import MoonshineSTT, get_stt_model, stt_for_chunks
2 | 
3 | __all__ = ["get_stt_model", "MoonshineSTT", "get_stt_model", "stt_for_chunks"]
4 | 


--------------------------------------------------------------------------------
/backend/fastrtc/speech_to_text/stt_.py:
--------------------------------------------------------------------------------
 1 | from functools import lru_cache
 2 | from pathlib import Path
 3 | from typing import Literal, Protocol
 4 | 
 5 | import click
 6 | import librosa
 7 | import numpy as np
 8 | from numpy.typing import NDArray
 9 | 
10 | from ..utils import AudioChunk, audio_to_float32
11 | 
12 | curr_dir = Path(__file__).parent
13 | 
14 | 
15 | class STTModel(Protocol):
16 |     def stt(self, audio: tuple[int, NDArray[np.int16 | np.float32]]) -> str: ...
17 | 
18 | 
19 | class MoonshineSTT(STTModel):
20 |     def __init__(
21 |         self, model: Literal["moonshine/base", "moonshine/tiny"] = "moonshine/base"
22 |     ):
23 |         try:
24 |             from moonshine_onnx import MoonshineOnnxModel, load_tokenizer
25 |         except (ImportError, ModuleNotFoundError):
26 |             raise ImportError(
27 |                 "Install fastrtc[stt] for speech-to-text and stopword detection support."
28 |             )
29 | 
30 |         self.model = MoonshineOnnxModel(model_name=model)
31 |         self.tokenizer = load_tokenizer()
32 | 
33 |     def stt(self, audio: tuple[int, NDArray[np.int16 | np.float32]]) -> str:
34 |         sr, audio_np = audio  # type: ignore
35 |         audio_np = audio_to_float32(audio_np)
36 |         if sr != 16000:
37 |             audio_np: NDArray[np.float32] = librosa.resample(
38 |                 audio_np, orig_sr=sr, target_sr=16000
39 |             )
40 |         if audio_np.ndim == 1:
41 |             audio_np = audio_np.reshape(1, -1)
42 |         tokens = self.model.generate(audio_np)
43 |         return self.tokenizer.decode_batch(tokens)[0]
44 | 
45 | 
46 | @lru_cache
47 | def get_stt_model(
48 |     model: Literal["moonshine/base", "moonshine/tiny"] = "moonshine/base",
49 | ) -> STTModel:
50 |     import os
51 | 
52 |     os.environ["TOKENIZERS_PARALLELISM"] = "false"
53 |     m = MoonshineSTT(model)
54 |     from moonshine_onnx import load_audio
55 | 
56 |     audio = load_audio(str(curr_dir / "test_file.wav"))
57 |     print(click.style("INFO", fg="green") + ":\t  Warming up STT model.")
58 | 
59 |     m.stt((16000, audio))
60 |     print(click.style("INFO", fg="green") + ":\t  STT model warmed up.")
61 |     return m
62 | 
63 | 
64 | def stt_for_chunks(
65 |     stt_model: STTModel,
66 |     audio: tuple[int, NDArray[np.int16 | np.float32]],
67 |     chunks: list[AudioChunk],
68 | ) -> str:
69 |     sr, audio_np = audio
70 |     return " ".join(
71 |         [
72 |             stt_model.stt((sr, audio_np[chunk["start"] : chunk["end"]]))
73 |             for chunk in chunks
74 |         ]
75 |     )
76 | 


--------------------------------------------------------------------------------
/backend/fastrtc/speech_to_text/test_file.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/backend/fastrtc/speech_to_text/test_file.wav


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/_basePickBy-BWyW5hVA.js:
--------------------------------------------------------------------------------
  1 | import { e as x, c as b, g as m, k as P, h as p, j as w, l as N, m as c, n as I, t as A, o as M } from "./_baseUniq-BfI_PfuI.js";
  2 | import { aJ as g, az as E, aK as F, aL as _, aM as $, aN as l, aO as B, aP as T, aQ as y, aR as L } from "./mermaid.core-C0Blj36u.js";
  3 | var R = /\s/;
  4 | function S(n) {
  5 |   for (var r = n.length; r-- && R.test(n.charAt(r)); )
  6 |     ;
  7 |   return r;
  8 | }
  9 | var z = /^\s+/;
 10 | function G(n) {
 11 |   return n && n.slice(0, S(n) + 1).replace(z, "");
 12 | }
 13 | var o = NaN, H = /^[-+]0x[0-9a-f]+$/i, K = /^0b[01]+$/i, q = /^0o[0-7]+$/i, C = parseInt;
 14 | function J(n) {
 15 |   if (typeof n == "number")
 16 |     return n;
 17 |   if (x(n))
 18 |     return o;
 19 |   if (g(n)) {
 20 |     var r = typeof n.valueOf == "function" ? n.valueOf() : n;
 21 |     n = g(r) ? r + "" : r;
 22 |   }
 23 |   if (typeof n != "string")
 24 |     return n === 0 ? n : +n;
 25 |   n = G(n);
 26 |   var t = K.test(n);
 27 |   return t || q.test(n) ? C(n.slice(2), t ? 2 : 8) : H.test(n) ? o : +n;
 28 | }
 29 | var v = 1 / 0, Q = 17976931348623157e292;
 30 | function W(n) {
 31 |   if (!n)
 32 |     return n === 0 ? n : 0;
 33 |   if (n = J(n), n === v || n === -v) {
 34 |     var r = n < 0 ? -1 : 1;
 35 |     return r * Q;
 36 |   }
 37 |   return n === n ? n : 0;
 38 | }
 39 | function X(n) {
 40 |   var r = W(n), t = r % 1;
 41 |   return r === r ? t ? r - t : r : 0;
 42 | }
 43 | function fn(n) {
 44 |   var r = n == null ? 0 : n.length;
 45 |   return r ? b(n) : [];
 46 | }
 47 | var O = Object.prototype, Y = O.hasOwnProperty, dn = E(function(n, r) {
 48 |   n = Object(n);
 49 |   var t = -1, i = r.length, a = i > 2 ? r[2] : void 0;
 50 |   for (a && F(r[0], r[1], a) && (i = 1); ++t < i; )
 51 |     for (var f = r[t], e = _(f), s = -1, d = e.length; ++s < d; ) {
 52 |       var u = e[s], h = n[u];
 53 |       (h === void 0 || $(h, O[u]) && !Y.call(n, u)) && (n[u] = f[u]);
 54 |     }
 55 |   return n;
 56 | });
 57 | function un(n) {
 58 |   var r = n == null ? 0 : n.length;
 59 |   return r ? n[r - 1] : void 0;
 60 | }
 61 | function D(n) {
 62 |   return function(r, t, i) {
 63 |     var a = Object(r);
 64 |     if (!l(r)) {
 65 |       var f = m(t);
 66 |       r = P(r), t = function(s) {
 67 |         return f(a[s], s, a);
 68 |       };
 69 |     }
 70 |     var e = n(r, t, i);
 71 |     return e > -1 ? a[f ? r[e] : e] : void 0;
 72 |   };
 73 | }
 74 | var U = Math.max;
 75 | function Z(n, r, t) {
 76 |   var i = n == null ? 0 : n.length;
 77 |   if (!i)
 78 |     return -1;
 79 |   var a = t == null ? 0 : X(t);
 80 |   return a < 0 && (a = U(i + a, 0)), p(n, m(r), a);
 81 | }
 82 | var hn = D(Z);
 83 | function V(n, r) {
 84 |   var t = -1, i = l(n) ? Array(n.length) : [];
 85 |   return w(n, function(a, f, e) {
 86 |     i[++t] = r(a, f, e);
 87 |   }), i;
 88 | }
 89 | function gn(n, r) {
 90 |   var t = B(n) ? N : V;
 91 |   return t(n, m(r));
 92 | }
 93 | var j = Object.prototype, k = j.hasOwnProperty;
 94 | function nn(n, r) {
 95 |   return n != null && k.call(n, r);
 96 | }
 97 | function mn(n, r) {
 98 |   return n != null && c(n, r, nn);
 99 | }
100 | function rn(n, r) {
101 |   return n < r;
102 | }
103 | function tn(n, r, t) {
104 |   for (var i = -1, a = n.length; ++i < a; ) {
105 |     var f = n[i], e = r(f);
106 |     if (e != null && (s === void 0 ? e === e && !x(e) : t(e, s)))
107 |       var s = e, d = f;
108 |   }
109 |   return d;
110 | }
111 | function on(n) {
112 |   return n && n.length ? tn(n, T, rn) : void 0;
113 | }
114 | function an(n, r, t, i) {
115 |   if (!g(n))
116 |     return n;
117 |   r = I(r, n);
118 |   for (var a = -1, f = r.length, e = f - 1, s = n; s != null && ++a < f; ) {
119 |     var d = A(r[a]), u = t;
120 |     if (d === "__proto__" || d === "constructor" || d === "prototype")
121 |       return n;
122 |     if (a != e) {
123 |       var h = s[d];
124 |       u = void 0, u === void 0 && (u = g(h) ? h : y(r[a + 1]) ? [] : {});
125 |     }
126 |     L(s, d, u), s = s[d];
127 |   }
128 |   return n;
129 | }
130 | function vn(n, r, t) {
131 |   for (var i = -1, a = r.length, f = {}; ++i < a; ) {
132 |     var e = r[i], s = M(n, e);
133 |     t(s, e) && an(f, I(e, n), s);
134 |   }
135 |   return f;
136 | }
137 | export {
138 |   rn as a,
139 |   tn as b,
140 |   V as c,
141 |   vn as d,
142 |   on as e,
143 |   fn as f,
144 |   hn as g,
145 |   mn as h,
146 |   dn as i,
147 |   X as j,
148 |   un as l,
149 |   gn as m,
150 |   W as t
151 | };
152 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/assets/worker-lPYB70QI.js:
--------------------------------------------------------------------------------
1 | (function(){"use strict";const R="https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd/ffmpeg-core.js";var E;(function(t){t.LOAD="LOAD",t.EXEC="EXEC",t.WRITE_FILE="WRITE_FILE",t.READ_FILE="READ_FILE",t.DELETE_FILE="DELETE_FILE",t.RENAME="RENAME",t.CREATE_DIR="CREATE_DIR",t.LIST_DIR="LIST_DIR",t.DELETE_DIR="DELETE_DIR",t.ERROR="ERROR",t.DOWNLOAD="DOWNLOAD",t.PROGRESS="PROGRESS",t.LOG="LOG",t.MOUNT="MOUNT",t.UNMOUNT="UNMOUNT"})(E||(E={}));const a=new Error("unknown message type"),f=new Error("ffmpeg is not loaded, call `await ffmpeg.load()` first"),u=new Error("failed to import ffmpeg-core.js");let r;const O=async({coreURL:t,wasmURL:n,workerURL:e})=>{const o=!r;try{t||(t=R),importScripts(t)}catch{if(t||(t=R.replace("/umd/","/esm/")),self.createFFmpegCore=(await import(t)).default,!self.createFFmpegCore)throw u}const s=t,c=n||t.replace(/.js$/g,".wasm"),b=e||t.replace(/.js$/g,".worker.js");return r=await self.createFFmpegCore({mainScriptUrlOrBlob:`${s}#${btoa(JSON.stringify({wasmURL:c,workerURL:b}))}`}),r.setLogger(i=>self.postMessage({type:E.LOG,data:i})),r.setProgress(i=>self.postMessage({type:E.PROGRESS,data:i})),o},l=({args:t,timeout:n=-1})=>{r.setTimeout(n),r.exec(...t);const e=r.ret;return r.reset(),e},m=({path:t,data:n})=>(r.FS.writeFile(t,n),!0),D=({path:t,encoding:n})=>r.FS.readFile(t,{encoding:n}),S=({path:t})=>(r.FS.unlink(t),!0),I=({oldPath:t,newPath:n})=>(r.FS.rename(t,n),!0),L=({path:t})=>(r.FS.mkdir(t),!0),N=({path:t})=>{const n=r.FS.readdir(t),e=[];for(const o of n){const s=r.FS.stat(`${t}/${o}`),c=r.FS.isDir(s.mode);e.push({name:o,isDir:c})}return e},A=({path:t})=>(r.FS.rmdir(t),!0),w=({fsType:t,options:n,mountPoint:e})=>{const o=t,s=r.FS.filesystems[o];return s?(r.FS.mount(s,n,e),!0):!1},k=({mountPoint:t})=>(r.FS.unmount(t),!0);self.onmessage=async({data:{id:t,type:n,data:e}})=>{const o=[];let s;try{if(n!==E.LOAD&&!r)throw f;switch(n){case E.LOAD:s=await O(e);break;case E.EXEC:s=l(e);break;case E.WRITE_FILE:s=m(e);break;case E.READ_FILE:s=D(e);break;case E.DELETE_FILE:s=S(e);break;case E.RENAME:s=I(e);break;case E.CREATE_DIR:s=L(e);break;case E.LIST_DIR:s=N(e);break;case E.DELETE_DIR:s=A(e);break;case E.MOUNT:s=w(e);break;case E.UNMOUNT:s=k(e);break;default:throw a}}catch(c){self.postMessage({id:t,type:E.ERROR,data:c.toString()});return}s instanceof Uint8Array&&o.push(s.buffer),self.postMessage({id:t,type:n,data:s},o)}})();
2 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/channel-BLI8LD7T.js:
--------------------------------------------------------------------------------
1 | import { ao as r, ap as n } from "./mermaid.core-C0Blj36u.js";
2 | const t = (a, o) => r.lang.round(n.parse(a)[o]);
3 | export {
4 |   t as c
5 | };
6 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/chunk-4BMEZGHF-4N88GRXN.js:
--------------------------------------------------------------------------------
 1 | import { _ as l } from "./mermaid.core-C0Blj36u.js";
 2 | function m(e, c) {
 3 |   var i, t, o;
 4 |   e.accDescr && ((i = c.setAccDescription) == null || i.call(c, e.accDescr)), e.accTitle && ((t = c.setAccTitle) == null || t.call(c, e.accTitle)), e.title && ((o = c.setDiagramTitle) == null || o.call(c, e.title));
 5 | }
 6 | l(m, "populateCommonDb");
 7 | export {
 8 |   m as p
 9 | };
10 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/chunk-D6G4REZN-BSQJOIKu.js:
--------------------------------------------------------------------------------
 1 | import { _ as n, T as c, m as l } from "./mermaid.core-C0Blj36u.js";
 2 | var o = /* @__PURE__ */ n((a, t) => {
 3 |   const e = a.append("rect");
 4 |   if (e.attr("x", t.x), e.attr("y", t.y), e.attr("fill", t.fill), e.attr("stroke", t.stroke), e.attr("width", t.width), e.attr("height", t.height), t.name && e.attr("name", t.name), t.rx && e.attr("rx", t.rx), t.ry && e.attr("ry", t.ry), t.attrs !== void 0)
 5 |     for (const r in t.attrs)
 6 |       e.attr(r, t.attrs[r]);
 7 |   return t.class && e.attr("class", t.class), e;
 8 | }, "drawRect"), d = /* @__PURE__ */ n((a, t) => {
 9 |   const e = {
10 |     x: t.startx,
11 |     y: t.starty,
12 |     width: t.stopx - t.startx,
13 |     height: t.stopy - t.starty,
14 |     fill: t.fill,
15 |     stroke: t.stroke,
16 |     class: "rect"
17 |   };
18 |   o(a, e).lower();
19 | }, "drawBackgroundRect"), g = /* @__PURE__ */ n((a, t) => {
20 |   const e = t.text.replace(c, " "), r = a.append("text");
21 |   r.attr("x", t.x), r.attr("y", t.y), r.attr("class", "legend"), r.style("text-anchor", t.anchor), t.class && r.attr("class", t.class);
22 |   const s = r.append("tspan");
23 |   return s.attr("x", t.x + t.textMargin * 2), s.text(e), r;
24 | }, "drawText"), m = /* @__PURE__ */ n((a, t, e, r) => {
25 |   const s = a.append("image");
26 |   s.attr("x", t), s.attr("y", e);
27 |   const i = l(r);
28 |   s.attr("xlink:href", i);
29 | }, "drawImage"), h = /* @__PURE__ */ n((a, t, e, r) => {
30 |   const s = a.append("use");
31 |   s.attr("x", t), s.attr("y", e);
32 |   const i = l(r);
33 |   s.attr("xlink:href", `#${i}`);
34 | }, "drawEmbeddedImage"), y = /* @__PURE__ */ n(() => ({
35 |   x: 0,
36 |   y: 0,
37 |   width: 100,
38 |   height: 100,
39 |   fill: "#EDF2AE",
40 |   stroke: "#666",
41 |   anchor: "start",
42 |   rx: 0,
43 |   ry: 0
44 | }), "getNoteRect"), p = /* @__PURE__ */ n(() => ({
45 |   x: 0,
46 |   y: 0,
47 |   width: 100,
48 |   height: 100,
49 |   "text-anchor": "start",
50 |   style: "#666",
51 |   textMargin: 0,
52 |   rx: 0,
53 |   ry: 0,
54 |   tspan: !0
55 | }), "getTextObj");
56 | export {
57 |   p as a,
58 |   d as b,
59 |   h as c,
60 |   o as d,
61 |   m as e,
62 |   g as f,
63 |   y as g
64 | };
65 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/chunk-RZ5BOZE2-BQm8zocb.js:
--------------------------------------------------------------------------------
 1 | import { _ as n, j as r, k as g, l as d } from "./mermaid.core-C0Blj36u.js";
 2 | var u = /* @__PURE__ */ n((t, e) => {
 3 |   let o;
 4 |   return e === "sandbox" && (o = r("#i" + t)), (e === "sandbox" ? r(o.nodes()[0].contentDocument.body) : r("body")).select(`[id="${t}"]`);
 5 | }, "getDiagramElement"), b = /* @__PURE__ */ n((t, e, o, i) => {
 6 |   t.attr("class", o);
 7 |   const { width: a, height: s, x: h, y: x } = l(t, e);
 8 |   g(t, s, a, i);
 9 |   const c = w(h, x, a, s, e);
10 |   t.attr("viewBox", c), d.debug(`viewBox configured: ${c} with padding: ${e}`);
11 | }, "setupViewPortForSVG"), l = /* @__PURE__ */ n((t, e) => {
12 |   var i;
13 |   const o = ((i = t.node()) == null ? void 0 : i.getBBox()) || { width: 0, height: 0, x: 0, y: 0 };
14 |   return {
15 |     width: o.width + e * 2,
16 |     height: o.height + e * 2,
17 |     x: o.x,
18 |     y: o.y
19 |   };
20 | }, "calculateDimensionsWithPadding"), w = /* @__PURE__ */ n((t, e, o, i, a) => `${t - a} ${e - a} ${o} ${i}`, "createViewBox");
21 | export {
22 |   u as g,
23 |   b as s
24 | };
25 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/chunk-XZIHB7SX-DIHERCaT.js:
--------------------------------------------------------------------------------
 1 | import { _ as s } from "./mermaid.core-C0Blj36u.js";
 2 | var t, e = (t = class {
 3 |   /**
 4 |    * @param init - Function that creates the default state.
 5 |    */
 6 |   constructor(i) {
 7 |     this.init = i, this.records = this.init();
 8 |   }
 9 |   reset() {
10 |     this.records = this.init();
11 |   }
12 | }, s(t, "ImperativeState"), t);
13 | export {
14 |   e as I
15 | };
16 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/classDiagram-GIVACNV2-KrkkVrlR.js:
--------------------------------------------------------------------------------
 1 | import { c as r, C as s, a as e, s as t } from "./chunk-A2AXSNBT-C4qvwI5K.js";
 2 | import { _ as l } from "./mermaid.core-C0Blj36u.js";
 3 | var d = {
 4 |   parser: r,
 5 |   get db() {
 6 |     return new s();
 7 |   },
 8 |   renderer: e,
 9 |   styles: t,
10 |   init: /* @__PURE__ */ l((a) => {
11 |     a.class || (a.class = {}), a.class.arrowMarkerAbsolute = a.arrowMarkerAbsolute;
12 |   }, "init")
13 | };
14 | export {
15 |   d as diagram
16 | };
17 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/classDiagram-v2-COTLJTTW-KrkkVrlR.js:
--------------------------------------------------------------------------------
 1 | import { c as r, C as s, a as e, s as t } from "./chunk-A2AXSNBT-C4qvwI5K.js";
 2 | import { _ as l } from "./mermaid.core-C0Blj36u.js";
 3 | var d = {
 4 |   parser: r,
 5 |   get db() {
 6 |     return new s();
 7 |   },
 8 |   renderer: e,
 9 |   styles: t,
10 |   init: /* @__PURE__ */ l((a) => {
11 |     a.class || (a.class = {}), a.class.arrowMarkerAbsolute = a.arrowMarkerAbsolute;
12 |   }, "init")
13 | };
14 | export {
15 |   d as diagram
16 | };
17 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/clone-D_f12Uao.js:
--------------------------------------------------------------------------------
1 | import { b as r } from "./_baseUniq-BfI_PfuI.js";
2 | var e = 4;
3 | function a(o) {
4 |   return r(o, e);
5 | }
6 | export {
7 |   a as c
8 | };
9 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/index.js:
--------------------------------------------------------------------------------
 1 | import { E as s, a as t, I as l, l as d, d as o, b as p } from "./index-xxHpJ_RR.js";
 2 | export {
 3 |   s as BaseExample,
 4 |   t as BaseInteractiveVideo,
 5 |   l as default,
 6 |   d as loaded,
 7 |   o as playable,
 8 |   p as prettyBytes
 9 | };
10 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/infoDiagram-PH2N3AL5-fhMlkv6w.js:
--------------------------------------------------------------------------------
 1 | import { _ as e, l as s, H as o, k as i, I as g } from "./mermaid.core-C0Blj36u.js";
 2 | import { p } from "./radar-MK3ICKWK-Uwn-jZp4.js";
 3 | var v = {
 4 |   parse: /* @__PURE__ */ e(async (r) => {
 5 |     const a = await p("info", r);
 6 |     s.debug(a);
 7 |   }, "parse")
 8 | }, d = { version: g.version }, c = /* @__PURE__ */ e(() => d.version, "getVersion"), m = {
 9 |   getVersion: c
10 | }, l = /* @__PURE__ */ e((r, a, n) => {
11 |   s.debug(`rendering info diagram
12 | ` + r);
13 |   const t = o(a);
14 |   i(t, 100, 400, !0), t.append("g").append("text").attr("x", 100).attr("y", 40).attr("class", "version").attr("font-size", 32).style("text-anchor", "middle").text(`v${n}`);
15 | }, "draw"), f = { draw: l }, b = {
16 |   parser: v,
17 |   db: m,
18 |   renderer: f
19 | };
20 | export {
21 |   b as diagram
22 | };
23 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/init-DjUOC4st.js:
--------------------------------------------------------------------------------
 1 | function t(e, a) {
 2 |   switch (arguments.length) {
 3 |     case 0:
 4 |       break;
 5 |     case 1:
 6 |       this.range(e);
 7 |       break;
 8 |     default:
 9 |       this.range(a).domain(e);
10 |       break;
11 |   }
12 |   return this;
13 | }
14 | export {
15 |   t as i
16 | };
17 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/ordinal-DfAQgscy.js:
--------------------------------------------------------------------------------
 1 | import { i as a } from "./init-DjUOC4st.js";
 2 | class o extends Map {
 3 |   constructor(n, t = g) {
 4 |     if (super(), Object.defineProperties(this, { _intern: { value: /* @__PURE__ */ new Map() }, _key: { value: t } }), n != null) for (const [r, s] of n) this.set(r, s);
 5 |   }
 6 |   get(n) {
 7 |     return super.get(c(this, n));
 8 |   }
 9 |   has(n) {
10 |     return super.has(c(this, n));
11 |   }
12 |   set(n, t) {
13 |     return super.set(l(this, n), t);
14 |   }
15 |   delete(n) {
16 |     return super.delete(p(this, n));
17 |   }
18 | }
19 | function c({ _intern: e, _key: n }, t) {
20 |   const r = n(t);
21 |   return e.has(r) ? e.get(r) : t;
22 | }
23 | function l({ _intern: e, _key: n }, t) {
24 |   const r = n(t);
25 |   return e.has(r) ? e.get(r) : (e.set(r, t), t);
26 | }
27 | function p({ _intern: e, _key: n }, t) {
28 |   const r = n(t);
29 |   return e.has(r) && (t = e.get(r), e.delete(r)), t;
30 | }
31 | function g(e) {
32 |   return e !== null && typeof e == "object" ? e.valueOf() : e;
33 | }
34 | const f = Symbol("implicit");
35 | function h() {
36 |   var e = new o(), n = [], t = [], r = f;
37 |   function s(u) {
38 |     let i = e.get(u);
39 |     if (i === void 0) {
40 |       if (r !== f) return r;
41 |       e.set(u, i = n.push(u) - 1);
42 |     }
43 |     return t[i % t.length];
44 |   }
45 |   return s.domain = function(u) {
46 |     if (!arguments.length) return n.slice();
47 |     n = [], e = new o();
48 |     for (const i of u)
49 |       e.has(i) || e.set(i, n.push(i) - 1);
50 |     return s;
51 |   }, s.range = function(u) {
52 |     return arguments.length ? (t = Array.from(u), s) : t.slice();
53 |   }, s.unknown = function(u) {
54 |     return arguments.length ? (r = u, s) : r;
55 |   }, s.copy = function() {
56 |     return h(n, t).unknown(r);
57 |   }, a.apply(s, arguments), s;
58 | }
59 | export {
60 |   h as o
61 | };
62 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/component/stateDiagram-v2-YXO3MK2T-CFM2lJF8.js:
--------------------------------------------------------------------------------
 1 | import { s as a, S as t, b as r, a as s } from "./chunk-AEK57VVT-OrsXQu-U.js";
 2 | import { _ as i } from "./mermaid.core-C0Blj36u.js";
 3 | var _ = {
 4 |   parser: a,
 5 |   get db() {
 6 |     return new t(2);
 7 |   },
 8 |   renderer: r,
 9 |   styles: s,
10 |   init: /* @__PURE__ */ i((e) => {
11 |     e.state || (e.state = {}), e.state.arrowMarkerAbsolute = e.arrowMarkerAbsolute;
12 |   }, "init")
13 | };
14 | export {
15 |   _ as diagram
16 | };
17 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/example/assets/worker-lPYB70QI.js:
--------------------------------------------------------------------------------
1 | (function(){"use strict";const R="https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd/ffmpeg-core.js";var E;(function(t){t.LOAD="LOAD",t.EXEC="EXEC",t.WRITE_FILE="WRITE_FILE",t.READ_FILE="READ_FILE",t.DELETE_FILE="DELETE_FILE",t.RENAME="RENAME",t.CREATE_DIR="CREATE_DIR",t.LIST_DIR="LIST_DIR",t.DELETE_DIR="DELETE_DIR",t.ERROR="ERROR",t.DOWNLOAD="DOWNLOAD",t.PROGRESS="PROGRESS",t.LOG="LOG",t.MOUNT="MOUNT",t.UNMOUNT="UNMOUNT"})(E||(E={}));const a=new Error("unknown message type"),f=new Error("ffmpeg is not loaded, call `await ffmpeg.load()` first"),u=new Error("failed to import ffmpeg-core.js");let r;const O=async({coreURL:t,wasmURL:n,workerURL:e})=>{const o=!r;try{t||(t=R),importScripts(t)}catch{if(t||(t=R.replace("/umd/","/esm/")),self.createFFmpegCore=(await import(t)).default,!self.createFFmpegCore)throw u}const s=t,c=n||t.replace(/.js$/g,".wasm"),b=e||t.replace(/.js$/g,".worker.js");return r=await self.createFFmpegCore({mainScriptUrlOrBlob:`${s}#${btoa(JSON.stringify({wasmURL:c,workerURL:b}))}`}),r.setLogger(i=>self.postMessage({type:E.LOG,data:i})),r.setProgress(i=>self.postMessage({type:E.PROGRESS,data:i})),o},l=({args:t,timeout:n=-1})=>{r.setTimeout(n),r.exec(...t);const e=r.ret;return r.reset(),e},m=({path:t,data:n})=>(r.FS.writeFile(t,n),!0),D=({path:t,encoding:n})=>r.FS.readFile(t,{encoding:n}),S=({path:t})=>(r.FS.unlink(t),!0),I=({oldPath:t,newPath:n})=>(r.FS.rename(t,n),!0),L=({path:t})=>(r.FS.mkdir(t),!0),N=({path:t})=>{const n=r.FS.readdir(t),e=[];for(const o of n){const s=r.FS.stat(`${t}/${o}`),c=r.FS.isDir(s.mode);e.push({name:o,isDir:c})}return e},A=({path:t})=>(r.FS.rmdir(t),!0),w=({fsType:t,options:n,mountPoint:e})=>{const o=t,s=r.FS.filesystems[o];return s?(r.FS.mount(s,n,e),!0):!1},k=({mountPoint:t})=>(r.FS.unmount(t),!0);self.onmessage=async({data:{id:t,type:n,data:e}})=>{const o=[];let s;try{if(n!==E.LOAD&&!r)throw f;switch(n){case E.LOAD:s=await O(e);break;case E.EXEC:s=l(e);break;case E.WRITE_FILE:s=m(e);break;case E.READ_FILE:s=D(e);break;case E.DELETE_FILE:s=S(e);break;case E.RENAME:s=I(e);break;case E.CREATE_DIR:s=L(e);break;case E.LIST_DIR:s=N(e);break;case E.DELETE_DIR:s=A(e);break;case E.MOUNT:s=w(e);break;case E.UNMOUNT:s=k(e);break;default:throw a}}catch(c){self.postMessage({id:t,type:E.ERROR,data:c.toString()});return}s instanceof Uint8Array&&o.push(s.buffer),self.postMessage({id:t,type:n,data:s},o)}})();
2 | 


--------------------------------------------------------------------------------
/backend/fastrtc/templates/example/style.css:
--------------------------------------------------------------------------------
1 | .container.svelte-1uoo7dd{flex:none;max-width:none}.container.svelte-1uoo7dd video{width:var(--size-full);height:var(--size-full);object-fit:cover}.container.svelte-1uoo7dd:hover,.container.selected.svelte-1uoo7dd{border-color:var(--border-color-accent)}.container.table.svelte-1uoo7dd{margin:0 auto;border:2px solid var(--border-color-primary);border-radius:var(--radius-lg);overflow:hidden;width:var(--size-20);height:var(--size-20);object-fit:cover}.container.gallery.svelte-1uoo7dd{height:var(--size-20);max-height:var(--size-20);object-fit:cover}
2 | 


--------------------------------------------------------------------------------
/backend/fastrtc/text_to_speech/__init__.py:
--------------------------------------------------------------------------------
1 | from .tts import (
2 |     CartesiaTTSOptions,
3 |     KokoroTTSOptions,
4 |     get_tts_model,
5 | )
6 | 
7 | __all__ = ["get_tts_model", "KokoroTTSOptions", "CartesiaTTSOptions"]
8 | 


--------------------------------------------------------------------------------
/backend/fastrtc/text_to_speech/test_tts.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/backend/fastrtc/text_to_speech/test_tts.py


--------------------------------------------------------------------------------
/demo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/__init__.py


--------------------------------------------------------------------------------
/demo/echo_audio/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Echo Audio
 3 | emoji: 🪩
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Simple echo stream - simplest FastRTC demo
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/echo_audio/app.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from fastapi import FastAPI
 3 | from fastapi.responses import RedirectResponse
 4 | from fastrtc import ReplyOnPause, Stream, get_twilio_turn_credentials
 5 | from gradio.utils import get_space
 6 | 
 7 | 
 8 | def detection(audio: tuple[int, np.ndarray]):
 9 |     # Implement any iterator that yields audio
10 |     # See "LLM Voice Chat" for a more complete example
11 |     yield audio
12 | 
13 | 
14 | stream = Stream(
15 |     handler=ReplyOnPause(detection),
16 |     modality="audio",
17 |     mode="send-receive",
18 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
19 |     concurrency_limit=5 if get_space() else None,
20 |     time_limit=90 if get_space() else None,
21 | )
22 | 
23 | app = FastAPI()
24 | 
25 | stream.mount(app)
26 | 
27 | 
28 | @app.get("/")
29 | async def index():
30 |     return RedirectResponse(
31 |         url="/ui" if not get_space() else "https://fastrtc-echo-audio.hf.space/ui/"
32 |     )
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     import os
37 | 
38 |     if (mode := os.getenv("MODE")) == "UI":
39 |         stream.ui.launch(server_port=7860)
40 |     elif mode == "PHONE":
41 |         stream.fastphone(port=7860)
42 |     else:
43 |         import uvicorn
44 | 
45 |         uvicorn.run(app, host="0.0.0.0", port=7860)
46 | 


--------------------------------------------------------------------------------
/demo/echo_audio/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad]
2 | twilio
3 | python-dotenv
4 | 


--------------------------------------------------------------------------------
/demo/gemini_audio_video/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Gemini Audio Video
 3 | emoji: ♊️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.25.2
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Gemini understands audio and video!
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/gemini_audio_video/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc==0.0.23.rc1
2 | python-dotenv
3 | google-genai
4 | twilio
5 | 


--------------------------------------------------------------------------------
/demo/gemini_conversation/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Gemini Talking to Gemini
 3 | emoji: ♊️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.17.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Have two Gemini agents talk to each other
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/hello_computer/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Hello Computer
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Say computer before asking your question
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/hello_computer/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Hello Computer (Gradio)
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Say computer (Gradio)
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/hello_computer/app.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import os
  4 | from pathlib import Path
  5 | 
  6 | import gradio as gr
  7 | import huggingface_hub
  8 | import numpy as np
  9 | from dotenv import load_dotenv
 10 | from fastapi import FastAPI
 11 | from fastapi.responses import HTMLResponse, StreamingResponse
 12 | from fastrtc import (
 13 |     AdditionalOutputs,
 14 |     ReplyOnStopWords,
 15 |     Stream,
 16 |     get_stt_model,
 17 |     get_twilio_turn_credentials,
 18 | )
 19 | from gradio.utils import get_space
 20 | from pydantic import BaseModel
 21 | 
 22 | load_dotenv()
 23 | 
 24 | curr_dir = Path(__file__).parent
 25 | 
 26 | 
 27 | client = huggingface_hub.InferenceClient(
 28 |     api_key=os.environ.get("SAMBANOVA_API_KEY"),
 29 |     provider="sambanova",
 30 | )
 31 | model = get_stt_model()
 32 | 
 33 | 
 34 | def response(
 35 |     audio: tuple[int, np.ndarray],
 36 |     gradio_chatbot: list[dict] | None = None,
 37 |     conversation_state: list[dict] | None = None,
 38 | ):
 39 |     gradio_chatbot = gradio_chatbot or []
 40 |     conversation_state = conversation_state or []
 41 |     text = model.stt(audio)
 42 |     print("STT in handler", text)
 43 |     sample_rate, array = audio
 44 |     gradio_chatbot.append(
 45 |         {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
 46 |     )
 47 |     yield AdditionalOutputs(gradio_chatbot, conversation_state)
 48 | 
 49 |     conversation_state.append({"role": "user", "content": text})
 50 | 
 51 |     request = client.chat.completions.create(
 52 |         model="meta-llama/Llama-3.2-3B-Instruct",
 53 |         messages=conversation_state,  # type: ignore
 54 |         temperature=0.1,
 55 |         top_p=0.1,
 56 |     )
 57 |     response = {"role": "assistant", "content": request.choices[0].message.content}
 58 | 
 59 |     conversation_state.append(response)
 60 |     gradio_chatbot.append(response)
 61 | 
 62 |     yield AdditionalOutputs(gradio_chatbot, conversation_state)
 63 | 
 64 | 
 65 | chatbot = gr.Chatbot(type="messages", value=[])
 66 | state = gr.State(value=[])
 67 | stream = Stream(
 68 |     ReplyOnStopWords(
 69 |         response,  # type: ignore
 70 |         stop_words=["computer"],
 71 |         input_sample_rate=16000,
 72 |     ),
 73 |     mode="send",
 74 |     modality="audio",
 75 |     additional_inputs=[chatbot, state],
 76 |     additional_outputs=[chatbot, state],
 77 |     additional_outputs_handler=lambda *a: (a[2], a[3]),
 78 |     concurrency_limit=5 if get_space() else None,
 79 |     time_limit=90 if get_space() else None,
 80 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
 81 | )
 82 | app = FastAPI()
 83 | stream.mount(app)
 84 | 
 85 | 
 86 | class Message(BaseModel):
 87 |     role: str
 88 |     content: str
 89 | 
 90 | 
 91 | class InputData(BaseModel):
 92 |     webrtc_id: str
 93 |     chatbot: list[Message]
 94 |     state: list[Message]
 95 | 
 96 | 
 97 | @app.get("/")
 98 | async def _():
 99 |     rtc_config = get_twilio_turn_credentials() if get_space() else None
100 |     html_content = (curr_dir / "index.html").read_text()
101 |     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
102 |     return HTMLResponse(content=html_content)
103 | 
104 | 
105 | @app.post("/input_hook")
106 | async def _(data: InputData):
107 |     body = data.model_dump()
108 |     stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
109 | 
110 | 
111 | def audio_to_base64(file_path):
112 |     audio_format = "wav"
113 |     with open(file_path, "rb") as audio_file:
114 |         encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
115 |     return f"data:audio/{audio_format};base64,{encoded_audio}"
116 | 
117 | 
118 | @app.get("/outputs")
119 | async def _(webrtc_id: str):
120 |     async def output_stream():
121 |         async for output in stream.output_stream(webrtc_id):
122 |             chatbot = output.args[0]
123 |             state = output.args[1]
124 |             data = {
125 |                 "message": state[-1],
126 |                 "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
127 |                 if chatbot[-1]["role"] == "user"
128 |                 else None,
129 |             }
130 |             yield f"event: output\ndata: {json.dumps(data)}\n\n"
131 | 
132 |     return StreamingResponse(output_stream(), media_type="text/event-stream")
133 | 
134 | 
135 | if __name__ == "__main__":
136 |     import os
137 | 
138 |     if (mode := os.getenv("MODE")) == "UI":
139 |         stream.ui.launch(server_port=7860)
140 |     elif mode == "PHONE":
141 |         raise ValueError("Phone mode not supported")
142 |     else:
143 |         import uvicorn
144 | 
145 |         uvicorn.run(app, host="0.0.0.0", port=7860)
146 | 


--------------------------------------------------------------------------------
/demo/hello_computer/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[stopword]
2 | python-dotenv
3 | huggingface_hub>=0.29.0
4 | twilio


--------------------------------------------------------------------------------
/demo/llama_code_editor/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Llama Code Editor
 3 | emoji: 🦙
 4 | colorFrom: indigo
 5 | colorTo: pink
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Create interactive HTML web pages with your voice
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN,
13 | secret|SAMBANOVA_API_KEY, secret|GROQ_API_KEY]
14 | ---
15 | 
16 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
17 | 


--------------------------------------------------------------------------------
/demo/llama_code_editor/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | from fastapi.responses import RedirectResponse
 3 | from fastrtc import Stream
 4 | from gradio.utils import get_space
 5 | 
 6 | try:
 7 |     from demo.llama_code_editor.handler import (
 8 |         CodeHandler,
 9 |     )
10 |     from demo.llama_code_editor.ui import demo as ui
11 | except (ImportError, ModuleNotFoundError):
12 |     from handler import CodeHandler
13 |     from ui import demo as ui
14 | 
15 | 
16 | stream = Stream(
17 |     handler=CodeHandler,
18 |     modality="audio",
19 |     mode="send-receive",
20 |     concurrency_limit=10 if get_space() else None,
21 |     time_limit=90 if get_space() else None,
22 | )
23 | 
24 | stream.ui = ui
25 | 
26 | app = FastAPI()
27 | 
28 | 
29 | @app.get("/")
30 | async def _():
31 |     url = "/ui" if not get_space() else "https://fastrtc-llama-code-editor.hf.space/ui/"
32 |     return RedirectResponse(url)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     import os
37 | 
38 |     if (mode := os.getenv("MODE")) == "UI":
39 |         stream.ui.launch(server_port=7860, server_name="0.0.0.0")
40 |     elif mode == "PHONE":
41 |         stream.fastphone(host="0.0.0.0", port=7860)
42 |     else:
43 |         import uvicorn
44 | 
45 |         uvicorn.run(app, host="0.0.0.0", port=7860)
46 | 


--------------------------------------------------------------------------------
/demo/llama_code_editor/assets/sandbox.html:
--------------------------------------------------------------------------------
 1 | <div style="
 2 |   display: flex;
 3 |   flex-direction: column;
 4 |   align-items: center;
 5 |   justify-content: center;
 6 |   min-height: 400px;
 7 |   background: linear-gradient(135deg, #f5f7fa 0%, #e4e8ec 100%);
 8 |   border-radius: 8px;
 9 |   border: 2px dashed #cbd5e1;
10 |   padding: 2rem;
11 |   text-align: center;
12 |   color: #64748b;
13 |   font-family: system-ui, -apple-system, sans-serif;
14 | ">
15 |   <div style="
16 |     width: 80px;
17 |     height: 80px;
18 |     margin-bottom: 1.5rem;
19 |     border: 3px solid #cbd5e1;
20 |     border-radius: 12px;
21 |     position: relative;
22 |   ">
23 |     <div style="
24 |       position: absolute;
25 |       top: 50%;
26 |       left: 50%;
27 |       transform: translate(-50%, -50%);
28 |       font-size: 2rem;
29 |     ">📦</div>
30 |   </div>
31 |   <h2 style="
32 |     margin: 0 0 0.5rem 0;
33 |     font-size: 1.5rem;
34 |     font-weight: 600;
35 |     color: #475569;
36 |   ">No Application Created</h2>
37 | </div>


--------------------------------------------------------------------------------
/demo/llama_code_editor/assets/spinner.html:
--------------------------------------------------------------------------------
 1 | <div style="
 2 |   display: flex;
 3 |   flex-direction: column;
 4 |   align-items: center;
 5 |   justify-content: center;
 6 |   min-height: 400px;
 7 |   background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
 8 |   border-radius: 8px;
 9 |   padding: 2rem;
10 |   text-align: center;
11 |   font-family: system-ui, -apple-system, sans-serif;
12 | ">
13 |   <!-- Spinner container -->
14 |   <div style="
15 |     position: relative;
16 |     width: 64px;
17 |     height: 64px;
18 |     margin-bottom: 1.5rem;
19 |   ">
20 |     <!-- Static ring -->
21 |     <div style="
22 |       position: absolute;
23 |       width: 100%;
24 |       height: 100%;
25 |       border: 4px solid #e2e8f0;
26 |       border-radius: 50%;
27 |     "></div>
28 |     <!-- Animated spinner -->
29 |     <div style="
30 |       position: absolute;
31 |       width: 100%;
32 |       height: 100%;
33 |       border: 4px solid transparent;
34 |       border-top-color: #3b82f6;
35 |       border-radius: 50%;
36 |       animation: spin 1s linear infinite;
37 |     "></div>
38 |   </div>
39 | 
40 |   <!-- Text content -->
41 |   <h2 style="
42 |     margin: 0 0 0.5rem 0;
43 |     font-size: 1.25rem;
44 |     font-weight: 600;
45 |     color: #475569;
46 |   ">Generating your application...</h2>
47 |   
48 |   <p style="
49 |     margin: 0;
50 |     font-size: 0.875rem;
51 |     color: #64748b;
52 |   ">This may take a few moments</p>
53 | 
54 |   <style>
55 |     @keyframes spin {
56 |       0% { transform: rotate(0deg); }
57 |       100% { transform: rotate(360deg); }
58 |     }
59 |   </style>
60 | </div>


--------------------------------------------------------------------------------
/demo/llama_code_editor/handler.py:
--------------------------------------------------------------------------------
 1 | import base64
 2 | import os
 3 | import re
 4 | from pathlib import Path
 5 | 
 6 | import numpy as np
 7 | import openai
 8 | from dotenv import load_dotenv
 9 | from fastrtc import (
10 |     AdditionalOutputs,
11 |     ReplyOnPause,
12 |     audio_to_bytes,
13 | )
14 | from groq import Groq
15 | 
16 | load_dotenv()
17 | 
18 | groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
19 | 
20 | client = openai.OpenAI(
21 |     api_key=os.environ.get("SAMBANOVA_API_KEY"),
22 |     base_url="https://api.sambanova.ai/v1",
23 | )
24 | 
25 | path = Path(__file__).parent / "assets"
26 | 
27 | spinner_html = open(path / "spinner.html").read()
28 | 
29 | 
30 | system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
31 | user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
32 | 
33 | 
34 | def extract_html_content(text):
35 |     """
36 |     Extract content including HTML tags.
37 |     """
38 |     match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
39 |     return match.group(0) if match else None
40 | 
41 | 
42 | def display_in_sandbox(code):
43 |     encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
44 |     data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
45 |     return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'
46 | 
47 | 
48 | def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
49 |     yield AdditionalOutputs(history, spinner_html)
50 | 
51 |     text = groq_client.audio.transcriptions.create(
52 |         file=("audio-file.mp3", audio_to_bytes(user_message)),
53 |         model="whisper-large-v3-turbo",
54 |         response_format="verbose_json",
55 |     ).text
56 | 
57 |     user_msg_formatted = user_prompt.format(user_message=text, code=code)
58 |     history.append({"role": "user", "content": user_msg_formatted})
59 | 
60 |     response = client.chat.completions.create(
61 |         model="Meta-Llama-3.1-70B-Instruct",
62 |         messages=history,  # type: ignore
63 |         temperature=0.1,
64 |         top_p=0.1,
65 |     )
66 | 
67 |     output = response.choices[0].message.content
68 |     html_code = extract_html_content(output)
69 |     history.append({"role": "assistant", "content": output})
70 |     yield AdditionalOutputs(history, html_code)
71 | 
72 | 
73 | CodeHandler = ReplyOnPause(generate)  # type: ignore
74 | 


--------------------------------------------------------------------------------
/demo/llama_code_editor/requirements.in:
--------------------------------------------------------------------------------
1 | fastrtc[vad]
2 | groq
3 | openai
4 | python-dotenv
5 | twilio


--------------------------------------------------------------------------------
/demo/llama_code_editor/ui.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import gradio as gr
 4 | from dotenv import load_dotenv
 5 | from fastrtc import WebRTC, get_twilio_turn_credentials
 6 | from gradio.utils import get_space
 7 | 
 8 | try:
 9 |     from demo.llama_code_editor.handler import (
10 |         CodeHandler,
11 |         display_in_sandbox,
12 |         system_prompt,
13 |     )
14 | except (ImportError, ModuleNotFoundError):
15 |     from handler import CodeHandler, display_in_sandbox, system_prompt
16 | 
17 | load_dotenv()
18 | 
19 | path = Path(__file__).parent / "assets"
20 | 
21 | with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
22 |     history = gr.State([{"role": "system", "content": system_prompt}])
23 |     with gr.Row():
24 |         with gr.Column(scale=1):
25 |             gr.HTML(
26 |                 """
27 |                 <h1 style='text-align: center'>
28 |                 Llama Code Editor
29 |                 </h1>
30 |                 <h2 style='text-align: center'>
31 |                 Powered by SambaNova and Gradio-WebRTC ⚡️
32 |                 </h2>
33 |                 <p style='text-align: center'>
34 |                 Create and edit single-file HTML applications with just your voice!
35 |                 </p>
36 |                 <p style='text-align: center'>
37 |                 Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
38 |                 </p>
39 |                 """
40 |             )
41 |             webrtc = WebRTC(
42 |                 rtc_configuration=get_twilio_turn_credentials()
43 |                 if get_space()
44 |                 else None,
45 |                 mode="send",
46 |                 modality="audio",
47 |             )
48 |         with gr.Column(scale=10):
49 |             with gr.Tabs():
50 |                 with gr.Tab("Sandbox"):
51 |                     sandbox = gr.HTML(value=open(path / "sandbox.html").read())
52 |                 with gr.Tab("Code"):
53 |                     code = gr.Code(
54 |                         language="html",
55 |                         max_lines=50,
56 |                         interactive=False,
57 |                         elem_classes="code-component",
58 |                     )
59 |                 with gr.Tab("Chat"):
60 |                     cb = gr.Chatbot(type="messages")
61 | 
62 |     webrtc.stream(
63 |         CodeHandler,
64 |         inputs=[webrtc, history, code],
65 |         outputs=[webrtc],
66 |         time_limit=90 if get_space() else None,
67 |         concurrency_limit=10 if get_space() else None,
68 |     )
69 |     webrtc.on_additional_outputs(
70 |         lambda history, code: (history, code, history), outputs=[history, code, cb]
71 |     )
72 |     code.change(display_in_sandbox, code, sandbox, queue=False)
73 | 
74 | if __name__ == "__main__":
75 |     demo.launch()
76 | 


--------------------------------------------------------------------------------
/demo/llm_voice_chat/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: LLM Voice Chat
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to an LLM with ElevenLabs
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/llm_voice_chat/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: LLM Voice Chat (Gradio)
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: LLM Voice by ElevenLabs (Gradio)
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/llm_voice_chat/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import gradio as gr
 5 | import numpy as np
 6 | from dotenv import load_dotenv
 7 | from elevenlabs import ElevenLabs
 8 | from fastapi import FastAPI
 9 | from fastrtc import (
10 |     AdditionalOutputs,
11 |     ReplyOnPause,
12 |     Stream,
13 |     get_stt_model,
14 |     get_twilio_turn_credentials,
15 | )
16 | from gradio.utils import get_space
17 | from groq import Groq
18 | from numpy.typing import NDArray
19 | 
20 | load_dotenv()
21 | groq_client = Groq()
22 | tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
23 | stt_model = get_stt_model()
24 | 
25 | 
26 | # See "Talk to Claude" in Cookbook for an example of how to keep
27 | # track of the chat history.
28 | def response(
29 |     audio: tuple[int, NDArray[np.int16 | np.float32]],
30 |     chatbot: list[dict] | None = None,
31 | ):
32 |     chatbot = chatbot or []
33 |     messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
34 |     start = time.time()
35 |     text = stt_model.stt(audio)
36 |     print("transcription", time.time() - start)
37 |     print("prompt", text)
38 |     chatbot.append({"role": "user", "content": text})
39 |     yield AdditionalOutputs(chatbot)
40 |     messages.append({"role": "user", "content": text})
41 |     response_text = (
42 |         groq_client.chat.completions.create(
43 |             model="llama-3.1-8b-instant",
44 |             max_tokens=200,
45 |             messages=messages,  # type: ignore
46 |         )
47 |         .choices[0]
48 |         .message.content
49 |     )
50 | 
51 |     chatbot.append({"role": "assistant", "content": response_text})
52 | 
53 |     for i, chunk in enumerate(
54 |         tts_client.text_to_speech.convert_as_stream(
55 |             text=response_text,  # type: ignore
56 |             voice_id="JBFqnCBsd6RMkjVDRZzb",
57 |             model_id="eleven_multilingual_v2",
58 |             output_format="pcm_24000",
59 |         )
60 |     ):
61 |         if i == 0:
62 |             yield AdditionalOutputs(chatbot)
63 |         audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
64 |         yield (24000, audio_array)
65 | 
66 | 
67 | chatbot = gr.Chatbot(type="messages")
68 | stream = Stream(
69 |     modality="audio",
70 |     mode="send-receive",
71 |     handler=ReplyOnPause(response, input_sample_rate=16000),
72 |     additional_outputs_handler=lambda a, b: b,
73 |     additional_inputs=[chatbot],
74 |     additional_outputs=[chatbot],
75 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
76 |     concurrency_limit=5 if get_space() else None,
77 |     time_limit=90 if get_space() else None,
78 |     ui_args={"title": "LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)"},
79 | )
80 | 
81 | # Mount the STREAM UI to the FastAPI app
82 | # Because I don't want to build the UI manually
83 | app = FastAPI()
84 | app = gr.mount_gradio_app(app, stream.ui, path="/")
85 | 
86 | 
87 | if __name__ == "__main__":
88 |     import os
89 | 
90 |     os.environ["GRADIO_SSR_MODE"] = "false"
91 | 
92 |     if (mode := os.getenv("MODE")) == "UI":
93 |         stream.ui.launch(server_port=7860)
94 |     elif mode == "PHONE":
95 |         stream.fastphone(host="0.0.0.0", port=7860)
96 |     else:
97 |         stream.ui.launch(server_port=7860)
98 | 


--------------------------------------------------------------------------------
/demo/llm_voice_chat/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[stopword]
2 | python-dotenv
3 | openai
4 | twilio
5 | groq
6 | elevenlabs
7 | 


--------------------------------------------------------------------------------
/demo/moonshine_live/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Moonshine Live Transcription
 3 | emoji: 🌕
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.17.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Real-time captions with Moonshine ONNX
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
13 | models: [onnx-community/moonshine-base-ONNX, UsefulSensors/moonshine-base]
14 | ---
15 | 
16 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/moonshine_live/app.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Generator
 2 | from functools import cache
 3 | from typing import Literal
 4 | 
 5 | import gradio as gr
 6 | import numpy as np
 7 | from dotenv import load_dotenv
 8 | from fastrtc import (
 9 |     AdditionalOutputs,
10 |     ReplyOnPause,
11 |     Stream,
12 |     audio_to_float32,
13 |     get_twilio_turn_credentials,
14 | )
15 | from moonshine_onnx import MoonshineOnnxModel, load_tokenizer
16 | from numpy.typing import NDArray
17 | 
18 | load_dotenv()
19 | 
20 | 
21 | @cache
22 | def load_moonshine(
23 |     model_name: Literal["moonshine/base", "moonshine/tiny"],
24 | ) -> MoonshineOnnxModel:
25 |     return MoonshineOnnxModel(model_name=model_name)
26 | 
27 | 
28 | tokenizer = load_tokenizer()
29 | 
30 | 
31 | def stt(
32 |     audio: tuple[int, NDArray[np.int16 | np.float32]],
33 |     model_name: Literal["moonshine/base", "moonshine/tiny"],
34 |     captions: str,
35 | ) -> Generator[AdditionalOutputs, None, None]:
36 |     moonshine = load_moonshine(model_name)
37 |     sr, audio_np = audio  # type: ignore
38 |     if audio_np.dtype == np.int16:
39 |         audio_np = audio_to_float32(audio)
40 |     if audio_np.ndim == 1:
41 |         audio_np = audio_np.reshape(1, -1)
42 |     tokens = moonshine.generate(audio_np)
43 |     yield AdditionalOutputs(
44 |         (captions + "\n" + tokenizer.decode_batch(tokens)[0]).strip()
45 |     )
46 | 
47 | 
48 | captions = gr.Textbox(label="Captions")
49 | stream = Stream(
50 |     ReplyOnPause(stt, input_sample_rate=16000),
51 |     modality="audio",
52 |     mode="send",
53 |     ui_args={
54 |         "title": "Live Captions by Moonshine",
55 |         "icon": "default-favicon.ico",
56 |         "icon_button_color": "#5c5c5c",
57 |         "pulse_color": "#a7c6fc",
58 |         "icon_radius": 0,
59 |     },
60 |     rtc_configuration=get_twilio_turn_credentials(),
61 |     additional_inputs=[
62 |         gr.Radio(
63 |             choices=["moonshine/base", "moonshine/tiny"],
64 |             value="moonshine/base",
65 |             label="Model",
66 |         ),
67 |         captions,
68 |     ],
69 |     additional_outputs=[captions],
70 |     additional_outputs_handler=lambda prev, current: (prev + "\n" + current).strip(),
71 | )
72 | 
73 | if __name__ == "__main__":
74 |     stream.ui.launch()
75 | 


--------------------------------------------------------------------------------
/demo/moonshine_live/default-favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/moonshine_live/default-favicon.ico


--------------------------------------------------------------------------------
/demo/moonshine_live/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad]
2 | useful-moonshine-onnx@git+https://git@github.com/usefulsensors/moonshine.git#subdirectory=moonshine-onnx
3 | twilio


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/README.md:
--------------------------------------------------------------------------------
 1 | # FastRTC POC
 2 | A simple POC for a fast real-time voice chat application using FastAPI and FastRTC by [rohanprichard](https://github.com/rohanprichard). I wanted to make one as an example with more production-ready languages, rather than just Gradio.
 3 | 
 4 | ## Setup
 5 | 1. Set your API keys in an `.env` file based on the `.env.example` file
 6 | 2. Create a virtual environment and install the dependencies
 7 |     ```bash
 8 |     python3 -m venv env
 9 |     source env/bin/activate
10 |     pip install -r requirements.txt
11 |     ```
12 | 
13 | 3. Run the server
14 |     ```bash
15 |     ./run.sh
16 |     ```
17 | 4. Navigate into the frontend directory in another terminal
18 |     ```bash
19 |     cd frontend/fastrtc-demo
20 |     ```
21 | 5. Run the frontend
22 |     ```bash
23 |     npm install
24 |     npm run dev
25 |     ```
26 | 6. Go to the URL and click the microphone icon to start chatting!
27 | 
28 | 7. Reset chats by clicking the trash button on the bottom right
29 | 
30 | ## Notes
31 | You can choose to not install the requirements for TTS and STT by removing the `[tts, stt]` from the specifier in the `requirements.txt` file.
32 | 
33 | - The STT is currently using the ElevenLabs API.
34 | - The LLM is currently using the OpenAI API.
35 | - The TTS is currently using the ElevenLabs API.
36 | - The VAD is currently using the Silero VAD model.
37 | - You may need to install ffmpeg if you get errors in STT
38 | 
39 | The prompt can be changed in the `backend/server.py` file and modified as you like.
40 | 
41 | ### Audio Parameters 
42 | 
43 | #### AlgoOptions
44 | 
45 | - **audio_chunk_duration**: Length of audio chunks in seconds. Smaller values allow for faster processing but may be less accurate.
46 | - **started_talking_threshold**: If a chunk has more than this many seconds of speech, the system considers that the user has started talking.
47 | - **speech_threshold**: After the user has started speaking, if a chunk has less than this many seconds of speech, the system considers that the user has paused.
48 | 
49 | #### SileroVadOptions
50 | 
51 | - **threshold**: Speech probability threshold (0.0-1.0). Values above this are considered speech. Higher values are more strict.
52 | - **min_speech_duration_ms**: Speech segments shorter than this (in milliseconds) are filtered out.
53 | - **min_silence_duration_ms**: The system waits for this duration of silence (in milliseconds) before considering speech to be finished.
54 | - **speech_pad_ms**: Padding added to both ends of detected speech segments to prevent cutting off words.
55 | - **max_speech_duration_s**: Maximum allowed duration for a speech segment in seconds. Prevents indefinite listening.
56 | 
57 | ### Tuning Recommendations
58 | 
59 | - If the AI interrupts you too early:
60 |   - Increase `min_silence_duration_ms`
61 |   - Increase `speech_threshold`
62 |   - Increase `speech_pad_ms`
63 | 
64 | - If the AI is slow to respond after you finish speaking:
65 |   - Decrease `min_silence_duration_ms`
66 |   - Decrease `speech_threshold`
67 | 
68 | - If the system fails to detect some speech:
69 |   - Lower the `threshold` value
70 |   - Decrease `started_talking_threshold`
71 | 
72 | 
73 | ## Credits:
74 | Credit for the UI components goes to Shadcn, Aceternity UI and Kokonut UI.
75 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/backend/env.py:
--------------------------------------------------------------------------------
1 | from dotenv import load_dotenv
2 | import os
3 | 
4 | load_dotenv()
5 | 
6 | LLM_API_KEY = os.getenv("LLM_API_KEY")
7 | ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
8 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/backend/server.py:
--------------------------------------------------------------------------------
  1 | import fastapi
  2 | from fastrtc import ReplyOnPause, Stream, AlgoOptions, SileroVadOptions
  3 | from fastrtc.utils import audio_to_bytes, audio_to_float32
  4 | from openai import OpenAI
  5 | import logging
  6 | import time
  7 | from fastapi.middleware.cors import CORSMiddleware
  8 | from elevenlabs import VoiceSettings, stream
  9 | from elevenlabs.client import ElevenLabs
 10 | import numpy as np
 11 | 
 12 | from .env import LLM_API_KEY, ELEVENLABS_API_KEY
 13 | 
 14 | 
 15 | sys_prompt = """
 16 | You are a helpful assistant. You are witty, engaging and fun. You love being interactive with the user. 
 17 | You also can add minimalistic utterances like 'uh-huh' or 'mm-hmm' to the conversation to make it more natural. However, only vocalization are allowed, no actions or other non-vocal sounds.
 18 | Begin a conversation with a self-deprecating joke like 'I'm not sure if I'm ready for this...' or 'I bet you already regret clicking that button...'
 19 | """
 20 | 
 21 | messages = [{"role": "system", "content": sys_prompt}]
 22 | 
 23 | openai_client = OpenAI(api_key=LLM_API_KEY)
 24 | 
 25 | elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
 26 | 
 27 | logging.basicConfig(level=logging.INFO)
 28 | 
 29 | 
 30 | def echo(audio):
 31 |     stt_time = time.time()
 32 | 
 33 |     logging.info("Performing STT")
 34 | 
 35 |     transcription = elevenlabs_client.speech_to_text.convert(
 36 |         file=audio_to_bytes(audio),
 37 |         model_id="scribe_v1",
 38 |         tag_audio_events=False,
 39 |         language_code="eng",
 40 |         diarize=False,
 41 |     )
 42 |     prompt = transcription.text
 43 |     if prompt == "":
 44 |         logging.info("STT returned empty string")
 45 |         return
 46 |     logging.info(f"STT response: {prompt}")
 47 | 
 48 |     messages.append({"role": "user", "content": prompt})
 49 | 
 50 |     logging.info(f"STT took {time.time() - stt_time} seconds")
 51 | 
 52 |     llm_time = time.time()
 53 | 
 54 |     def text_stream():
 55 |         global full_response
 56 |         full_response = ""
 57 | 
 58 |         response = openai_client.chat.completions.create(
 59 |             model="gpt-3.5-turbo", messages=messages, max_tokens=200, stream=True
 60 |         )
 61 | 
 62 |         for chunk in response:
 63 |             if chunk.choices[0].finish_reason == "stop":
 64 |                 break
 65 |             if chunk.choices[0].delta.content:
 66 |                 full_response += chunk.choices[0].delta.content
 67 |                 yield chunk.choices[0].delta.content
 68 | 
 69 |     audio_stream = elevenlabs_client.generate(
 70 |         text=text_stream(),
 71 |         voice="Rachel",  # Cassidy is also really good
 72 |         voice_settings=VoiceSettings(
 73 |             similarity_boost=0.9, stability=0.6, style=0.4, speed=1
 74 |         ),
 75 |         model="eleven_multilingual_v2",
 76 |         output_format="pcm_24000",
 77 |         stream=True,
 78 |     )
 79 | 
 80 |     for audio_chunk in audio_stream:
 81 |         audio_array = audio_to_float32(
 82 |             np.frombuffer(audio_chunk, dtype=np.int16)
 83 |         )
 84 |         yield (24000, audio_array)
 85 | 
 86 |     messages.append({"role": "assistant", "content": full_response + " "})
 87 |     logging.info(f"LLM response: {full_response}")
 88 |     logging.info(f"LLM took {time.time() - llm_time} seconds")
 89 | 
 90 | 
 91 | stream = Stream(
 92 |     ReplyOnPause(
 93 |         echo,
 94 |         algo_options=AlgoOptions(
 95 |             audio_chunk_duration=0.5,
 96 |             started_talking_threshold=0.1,
 97 |             speech_threshold=0.03,
 98 |         ),
 99 |         model_options=SileroVadOptions(
100 |             threshold=0.75,
101 |             min_speech_duration_ms=250,
102 |             min_silence_duration_ms=1500,
103 |             speech_pad_ms=400,
104 |             max_speech_duration_s=15,
105 |         ),
106 |     ),
107 |     modality="audio",
108 |     mode="send-receive",
109 | )
110 | 
111 | app = fastapi.FastAPI()
112 | 
113 | app.add_middleware(
114 |     CORSMiddleware,
115 |     allow_origins=["*"],
116 |     allow_credentials=True,
117 |     allow_methods=["*"],
118 |     allow_headers=["*"],
119 | )
120 | 
121 | stream.mount(app)
122 | 
123 | 
124 | @app.get("/reset")
125 | async def reset():
126 |     global messages
127 |     logging.info("Resetting chat")
128 |     messages = [{"role": "system", "content": sys_prompt}]
129 |     return {"status": "success"}
130 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.*
 7 | .yarn/*
 8 | !.yarn/patches
 9 | !.yarn/plugins
10 | !.yarn/releases
11 | !.yarn/versions
12 | 
13 | # testing
14 | /coverage
15 | 
16 | # next.js
17 | /.next/
18 | /out/
19 | 
20 | # production
21 | /build
22 | 
23 | # misc
24 | .DS_Store
25 | *.pem
26 | 
27 | # debug
28 | npm-debug.log*
29 | yarn-debug.log*
30 | yarn-error.log*
31 | .pnpm-debug.log*
32 | 
33 | # env files (can opt-in for committing if needed)
34 | .env*
35 | 
36 | # vercel
37 | .vercel
38 | 
39 | # typescript
40 | *.tsbuildinfo
41 | next-env.d.ts
42 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/README.md:
--------------------------------------------------------------------------------
 1 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
 2 | 
 3 | ## Getting Started
 4 | 
 5 | First, run the development server:
 6 | 
 7 | ```bash
 8 | npm run dev
 9 | # or
10 | yarn dev
11 | # or
12 | pnpm dev
13 | # or
14 | bun dev
15 | ```
16 | 
17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
18 | 
19 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
20 | 
21 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
22 | 
23 | ## Learn More
24 | 
25 | To learn more about Next.js, take a look at the following resources:
26 | 
27 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
28 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
29 | 
30 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
31 | 
32 | ## Deploy on Vercel
33 | 
34 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
35 | 
36 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
37 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/favicon.ico


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/layout.tsx:
--------------------------------------------------------------------------------
 1 | import type { Metadata } from "next";
 2 | import { Geist, Geist_Mono } from "next/font/google";
 3 | import "./globals.css";
 4 | import { ThemeProvider } from "@/components/theme-provider";
 5 | import { ThemeTransition } from "@/components/ui/theme-transition";
 6 | 
 7 | const geistSans = Geist({
 8 |   variable: "--font-geist-sans",
 9 |   subsets: ["latin"],
10 | });
11 | 
12 | const geistMono = Geist_Mono({
13 |   variable: "--font-geist-mono",
14 |   subsets: ["latin"],
15 | });
16 | 
17 | export const metadata: Metadata = {
18 |   title: "FastRTC Demo",
19 |   description: "Interactive WebRTC demo with audio visualization",
20 | };
21 | 
22 | export default function RootLayout({
23 |   children,
24 | }: Readonly<{
25 |   children: React.ReactNode;
26 | }>) {
27 |   return (
28 |     <html lang="en" suppressHydrationWarning>
29 |       <body
30 |         className={`${geistSans.variable} ${geistMono.variable} antialiased`}
31 |       >
32 |         <ThemeProvider
33 |           attribute="class"
34 |           defaultTheme="dark"
35 |           enableSystem
36 |           disableTransitionOnChange
37 |         >
38 |           {children}
39 |           <ThemeTransition />
40 |         </ThemeProvider>
41 |       </body>
42 |     </html>
43 |   );
44 | }
45 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/page.tsx:
--------------------------------------------------------------------------------
 1 | import { BackgroundCircleProvider } from "@/components/background-circle-provider";
 2 | import { ThemeToggle } from "@/components/ui/theme-toggle";
 3 | import { ResetChat } from "@/components/ui/reset-chat";
 4 | export default function Home() {
 5 |   return (
 6 |     <div className="flex flex-col items-center justify-center h-screen">
 7 |       <BackgroundCircleProvider />
 8 |       <div className="absolute top-4 right-4 z-10">
 9 |         <ThemeToggle />
10 |       </div>
11 |       <div className="absolute bottom-4 right-4 z-10">
12 |         <ResetChat />
13 |       </div>
14 |     </div>
15 |   );
16 | }
17 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://ui.shadcn.com/schema.json",
 3 |   "style": "new-york",
 4 |   "rsc": true,
 5 |   "tsx": true,
 6 |   "tailwind": {
 7 |     "config": "",
 8 |     "css": "app/globals.css",
 9 |     "baseColor": "slate",
10 |     "cssVariables": true,
11 |     "prefix": ""
12 |   },
13 |   "aliases": {
14 |     "components": "@/components",
15 |     "utils": "@/lib/utils",
16 |     "ui": "@/components/ui",
17 |     "lib": "@/lib",
18 |     "hooks": "@/hooks"
19 |   },
20 |   "iconLibrary": "lucide"
21 | }


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/background-circle-provider.tsx:
--------------------------------------------------------------------------------
  1 | "use client"
  2 | 
  3 | import { useState, useEffect, useRef, useCallback } from "react";
  4 | import { BackgroundCircles } from "@/components/ui/background-circles";
  5 | import { AIVoiceInput } from "@/components/ui/ai-voice-input";
  6 | import { WebRTCClient } from "@/lib/webrtc-client";
  7 | 
  8 | export function BackgroundCircleProvider() {
  9 |     const [currentVariant, setCurrentVariant] = 
 10 |         useState<keyof typeof COLOR_VARIANTS>("octonary");
 11 |     const [isConnected, setIsConnected] = useState(false);
 12 |     const [webrtcClient, setWebrtcClient] = useState<WebRTCClient | null>(null);
 13 |     const [audioLevel, setAudioLevel] = useState(0);
 14 |     const audioRef = useRef<HTMLAudioElement>(null);
 15 | 
 16 |     // Memoize callbacks to prevent recreation on each render
 17 |     const handleConnected = useCallback(() => setIsConnected(true), []);
 18 |     const handleDisconnected = useCallback(() => setIsConnected(false), []);
 19 |     
 20 |     const handleAudioStream = useCallback((stream: MediaStream) => {
 21 |         if (audioRef.current) {
 22 |             audioRef.current.srcObject = stream;
 23 |         }
 24 |     }, []);
 25 |     
 26 |     const handleAudioLevel = useCallback((level: number) => {
 27 |         // Apply some smoothing to the audio level
 28 |         setAudioLevel(prev => prev * 0.7 + level * 0.3);
 29 |     }, []);
 30 | 
 31 |     // Get all available variants
 32 |     const variants = Object.keys(
 33 |         COLOR_VARIANTS
 34 |     ) as (keyof typeof COLOR_VARIANTS)[];
 35 | 
 36 |     // Function to change to the next color variant
 37 |     const changeVariant = () => {
 38 |         const currentIndex = variants.indexOf(currentVariant);
 39 |         const nextVariant = variants[(currentIndex + 1) % variants.length];
 40 |         setCurrentVariant(nextVariant);
 41 |     };
 42 | 
 43 |     useEffect(() => {
 44 |         // Initialize WebRTC client with memoized callbacks
 45 |         const client = new WebRTCClient({
 46 |             onConnected: handleConnected,
 47 |             onDisconnected: handleDisconnected,
 48 |             onAudioStream: handleAudioStream,
 49 |             onAudioLevel: handleAudioLevel
 50 |         });
 51 |         setWebrtcClient(client);
 52 | 
 53 |         return () => {
 54 |             client.disconnect();
 55 |         };
 56 |     }, [handleConnected, handleDisconnected, handleAudioStream, handleAudioLevel]);
 57 | 
 58 |     const handleStart = () => {
 59 |         webrtcClient?.connect();
 60 |     };
 61 | 
 62 |     const handleStop = () => {
 63 |         webrtcClient?.disconnect();
 64 |     };
 65 | 
 66 |     return (
 67 |         <div 
 68 |             className="relative w-full h-full"
 69 |             onClick={changeVariant} // Add click handler to change color
 70 |         >
 71 |             <BackgroundCircles 
 72 |                 variant={currentVariant} 
 73 |                 audioLevel={audioLevel}
 74 |                 isActive={isConnected}
 75 |             />
 76 |             <div className="absolute inset-0 flex items-center justify-center">
 77 |                 <AIVoiceInput 
 78 |                     onStart={handleStart}
 79 |                     onStop={handleStop}
 80 |                     isConnected={isConnected}
 81 |                 />
 82 |             </div>
 83 |             <audio ref={audioRef} autoPlay hidden />
 84 |         </div>
 85 |     );
 86 | }
 87 | 
 88 | export default { BackgroundCircleProvider }
 89 | 
 90 | const COLOR_VARIANTS = {
 91 |     primary: {
 92 |         border: [
 93 |             "border-emerald-500/60",
 94 |             "border-cyan-400/50",
 95 |             "border-slate-600/30",
 96 |         ],
 97 |         gradient: "from-emerald-500/30",
 98 |     },
 99 |     secondary: {
100 |         border: [
101 |             "border-violet-500/60",
102 |             "border-fuchsia-400/50",
103 |             "border-slate-600/30",
104 |         ],
105 |         gradient: "from-violet-500/30",
106 |     },
107 |     senary: {
108 |         border: [
109 |             "border-blue-500/60",
110 |             "border-sky-400/50",
111 |             "border-slate-600/30",
112 |         ],
113 |         gradient: "from-blue-500/30",
114 |     }, // blue
115 |     octonary: {
116 |         border: [
117 |             "border-red-500/60",
118 |             "border-rose-400/50",
119 |             "border-slate-600/30",
120 |         ],
121 |         gradient: "from-red-500/30",
122 |     },
123 | } as const;


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/theme-provider.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { createContext, useContext, useEffect, useState } from "react";
  4 | 
  5 | type Theme = "light" | "dark" | "system";
  6 | 
  7 | type ThemeProviderProps = {
  8 |   children: React.ReactNode;
  9 |   defaultTheme?: Theme;
 10 |   storageKey?: string;
 11 |   attribute?: string;
 12 |   enableSystem?: boolean;
 13 |   disableTransitionOnChange?: boolean;
 14 | };
 15 | 
 16 | type ThemeProviderState = {
 17 |   theme: Theme;
 18 |   setTheme: (theme: Theme) => void;
 19 | };
 20 | 
 21 | const initialState: ThemeProviderState = {
 22 |   theme: "system",
 23 |   setTheme: () => null,
 24 | };
 25 | 
 26 | const ThemeProviderContext = createContext<ThemeProviderState>(initialState);
 27 | 
 28 | export function ThemeProvider({
 29 |   children,
 30 |   defaultTheme = "system",
 31 |   storageKey = "theme",
 32 |   attribute = "class",
 33 |   enableSystem = true,
 34 |   disableTransitionOnChange = false,
 35 |   ...props
 36 | }: ThemeProviderProps) {
 37 |   const [theme, setTheme] = useState<Theme>(defaultTheme);
 38 | 
 39 |   useEffect(() => {
 40 |     const savedTheme = localStorage.getItem(storageKey) as Theme | null;
 41 |     
 42 |     if (savedTheme) {
 43 |       setTheme(savedTheme);
 44 |     } else if (defaultTheme === "system" && enableSystem) {
 45 |       const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches
 46 |         ? "dark"
 47 |         : "light";
 48 |       setTheme(systemTheme);
 49 |     }
 50 |   }, [defaultTheme, storageKey, enableSystem]);
 51 | 
 52 |   useEffect(() => {
 53 |     const root = window.document.documentElement;
 54 |     
 55 |     if (disableTransitionOnChange) {
 56 |       root.classList.add("no-transitions");
 57 |       
 58 |       // Force a reflow
 59 |       window.getComputedStyle(root).getPropertyValue("opacity");
 60 |       
 61 |       setTimeout(() => {
 62 |         root.classList.remove("no-transitions");
 63 |       }, 0);
 64 |     }
 65 |     
 66 |     root.classList.remove("light", "dark");
 67 |     
 68 |     if (theme === "system" && enableSystem) {
 69 |       const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches
 70 |         ? "dark"
 71 |         : "light";
 72 |       root.classList.add(systemTheme);
 73 |     } else {
 74 |       root.classList.add(theme);
 75 |     }
 76 | 
 77 |     localStorage.setItem(storageKey, theme);
 78 |   }, [theme, storageKey, enableSystem, disableTransitionOnChange]);
 79 | 
 80 |   const value = {
 81 |     theme,
 82 |     setTheme: (theme: Theme) => {
 83 |       setTheme(theme);
 84 |     },
 85 |   };
 86 | 
 87 |   return (
 88 |     <ThemeProviderContext.Provider {...props} value={value}>
 89 |       {children}
 90 |     </ThemeProviderContext.Provider>
 91 |   );
 92 | }
 93 | 
 94 | export const useTheme = () => {
 95 |   const context = useContext(ThemeProviderContext);
 96 | 
 97 |   if (context === undefined)
 98 |     throw new Error("useTheme must be used within a ThemeProvider");
 99 | 
100 |   return context;
101 | };
102 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/ai-voice-input.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { Mic, Square } from "lucide-react";
  4 | import { useState, useEffect } from "react";
  5 | import { cn } from "@/lib/utils";
  6 | 
  7 | interface AIVoiceInputProps {
  8 |   onStart?: () => void;
  9 |   onStop?: (duration: number) => void;
 10 |   isConnected?: boolean;
 11 |   className?: string;
 12 | }
 13 | 
 14 | export function AIVoiceInput({
 15 |   onStart,
 16 |   onStop,
 17 |   isConnected = false,
 18 |   className
 19 | }: AIVoiceInputProps) {
 20 |   const [active, setActive] = useState(false);
 21 |   const [time, setTime] = useState(0);
 22 |   const [isClient, setIsClient] = useState(false);
 23 |   const [status, setStatus] = useState<'disconnected' | 'connecting' | 'connected'>('disconnected');
 24 | 
 25 |   useEffect(() => {
 26 |     setIsClient(true);
 27 |   }, []);
 28 | 
 29 |   useEffect(() => {
 30 |     let intervalId: NodeJS.Timeout;
 31 | 
 32 |     if (active) {
 33 |       intervalId = setInterval(() => {
 34 |         setTime((t) => t + 1);
 35 |       }, 1000);
 36 |     } else {
 37 |       setTime(0);
 38 |     }
 39 | 
 40 |     return () => clearInterval(intervalId);
 41 |   }, [active]);
 42 | 
 43 |   useEffect(() => {
 44 |     if (isConnected) {
 45 |       setStatus('connected');
 46 |       setActive(true);
 47 |     } else {
 48 |       setStatus('disconnected');
 49 |       setActive(false);
 50 |     }
 51 |   }, [isConnected]);
 52 | 
 53 |   const formatTime = (seconds: number) => {
 54 |     const mins = Math.floor(seconds / 60);
 55 |     const secs = seconds % 60;
 56 |     return `${mins.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`;
 57 |   };
 58 | 
 59 |   const handleStart = () => {
 60 |     setStatus('connecting');
 61 |     onStart?.();
 62 |   };
 63 | 
 64 |   const handleStop = () => {
 65 |     onStop?.(time);
 66 |     setStatus('disconnected');
 67 |   };
 68 | 
 69 |   return (
 70 |     <div className={cn("w-full py-4", className)}>
 71 |       <div className="relative max-w-xl w-full mx-auto flex items-center flex-col gap-4">
 72 |         <div className={cn(
 73 |           "px-2 py-1 rounded-md text-xs font-medium bg-black/10 dark:bg-white/10 text-gray-700 dark:text-white"
 74 |         )}>
 75 |           {status === 'connected' ? 'Connected' : status === 'connecting' ? 'Connecting...' : 'Disconnected'}
 76 |         </div>
 77 | 
 78 |         <button
 79 |           className={cn(
 80 |             "group w-16 h-16 rounded-xl flex items-center justify-center transition-colors",
 81 |             active
 82 |               ? "bg-red-500/20 hover:bg-red-500/30"
 83 |               : "bg-black/10 hover:bg-black/20 dark:bg-white/10 dark:hover:bg-white/20"
 84 |           )}
 85 |           type="button"
 86 |           onClick={active ? handleStop : handleStart}
 87 |           disabled={status === 'connecting'}
 88 |         >
 89 |           {status === 'connecting' ? (
 90 |             <div
 91 |               className="w-6 h-6 rounded-sm animate-spin bg-black dark:bg-white cursor-pointer pointer-events-auto"
 92 |               style={{ animationDuration: "3s" }}
 93 |             />
 94 |           ) : active ? (
 95 |             <Square className="w-6 h-6 text-red-500" />
 96 |           ) : (
 97 |             <Mic className="w-6 h-6 text-black/70 dark:text-white/70" />
 98 |           )}
 99 |         </button>
100 | 
101 |         <span
102 |           className={cn(
103 |             "font-mono text-sm transition-opacity duration-300",
104 |             active
105 |               ? "text-black/70 dark:text-white/70"
106 |               : "text-black/30 dark:text-white/30"
107 |           )}
108 |         >
109 |           {formatTime(time)}
110 |         </span>
111 |       </div>
112 |     </div>
113 |   );
114 | }


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/reset-chat.tsx:
--------------------------------------------------------------------------------
 1 | "use client"
 2 | 
 3 | import { Trash } from "lucide-react"
 4 | 
 5 | export function ResetChat() {
 6 |     return (
 7 |         <button
 8 |             className="w-10 h-10 rounded-md flex items-center justify-center transition-colors relative overflow-hidden bg-black/10 hover:bg-black/20 dark:bg-white/10 dark:hover:bg-white/20"
 9 |             aria-label="Reset chat"
10 |             onClick={() => fetch("http://localhost:8000/reset")}
11 |         >
12 |             <div className="relative z-10">
13 |                 <Trash className="h-5 w-5 text-black/70 dark:text-white/70" />
14 |             </div>
15 |     </button>
16 |     )
17 | }
18 | 
19 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-toggle.tsx:
--------------------------------------------------------------------------------
 1 | "use client";
 2 | 
 3 | import { useTheme } from "@/components/theme-provider";
 4 | import { cn } from "@/lib/utils";
 5 | import { Moon, Sun } from "lucide-react";
 6 | import { useRef } from "react";
 7 | 
 8 | interface ThemeToggleProps {
 9 |   className?: string;
10 | }
11 | 
12 | export function ThemeToggle({ className }: ThemeToggleProps) {
13 |   const { theme } = useTheme();
14 |   const buttonRef = useRef<HTMLButtonElement>(null);
15 | 
16 |   const toggleTheme = () => {
17 |     // Instead of directly changing the theme, dispatch a custom event
18 |     const newTheme = theme === "light" ? "dark" : "light";
19 |     
20 |     // Dispatch custom event with the new theme
21 |     window.dispatchEvent(
22 |       new CustomEvent('themeToggleRequest', { 
23 |         detail: { theme: newTheme } 
24 |       })
25 |     );
26 |   };
27 | 
28 |   return (
29 |     <button
30 |       ref={buttonRef}
31 |       onClick={toggleTheme}
32 |       className={cn(
33 |         "w-10 h-10 rounded-md flex items-center justify-center transition-colors relative overflow-hidden",
34 |         "bg-black/10 hover:bg-black/20 dark:bg-white/10 dark:hover:bg-white/20",
35 |         className
36 |       )}
37 |       aria-label="Toggle theme"
38 |     >
39 |       <div className="relative z-10">
40 |         {theme === "light" ? (
41 |           <Moon className="h-5 w-5 text-black/70" />
42 |         ) : (
43 |           <Sun className="h-5 w-5 text-white/70" />
44 |         )}
45 |       </div>
46 |       
47 |       {/* Small inner animation for the button itself */}
48 |       <div 
49 |         className={cn(
50 |           "absolute inset-0 transition-transform duration-500",
51 |           theme === "light" 
52 |             ? "bg-gradient-to-br from-blue-500/20 to-purple-500/20 translate-y-full" 
53 |             : "bg-gradient-to-br from-amber-500/20 to-orange-500/20 -translate-y-full"
54 |         )}
55 |         style={{
56 |           transitionTimingFunction: "cubic-bezier(0.22, 1, 0.36, 1)"
57 |         }}
58 |       />
59 |     </button>
60 |   );
61 | } 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-transition.tsx:
--------------------------------------------------------------------------------
  1 | "use client";
  2 | 
  3 | import { useTheme } from "@/components/theme-provider";
  4 | import { useEffect, useState } from "react";
  5 | import { motion, AnimatePresence } from "framer-motion";
  6 | 
  7 | interface ThemeTransitionProps {
  8 |   className?: string;
  9 | }
 10 | 
 11 | export function ThemeTransition({ className }: ThemeTransitionProps) {
 12 |   const { theme, setTheme } = useTheme();
 13 |   const [position, setPosition] = useState({ x: 0, y: 0 });
 14 |   const [isAnimating, setIsAnimating] = useState(false);
 15 |   const [pendingTheme, setPendingTheme] = useState<string | null>(null);
 16 |   const [visualTheme, setVisualTheme] = useState<string | null>(theme);
 17 | 
 18 |   // Track mouse/touch position for click events
 19 |   useEffect(() => {
 20 |     const handleMouseMove = (e: MouseEvent) => {
 21 |       setPosition({ x: e.clientX, y: e.clientY });
 22 |     };
 23 |     
 24 |     const handleTouchMove = (e: TouchEvent) => {
 25 |       if (e.touches[0]) {
 26 |         setPosition({ x: e.touches[0].clientX, y: e.touches[0].clientY });
 27 |       }
 28 |     };
 29 |     
 30 |     window.addEventListener("mousemove", handleMouseMove);
 31 |     window.addEventListener("touchmove", handleTouchMove);
 32 |     
 33 |     return () => {
 34 |       window.removeEventListener("mousemove", handleMouseMove);
 35 |       window.removeEventListener("touchmove", handleTouchMove);
 36 |     };
 37 |   }, []);
 38 | 
 39 |   // Listen for theme toggle requests
 40 |   useEffect(() => {
 41 |     // Custom event for theme toggle requests
 42 |     const handleThemeToggle = (e: CustomEvent) => {
 43 |       if (isAnimating) return; // Prevent multiple animations
 44 |       
 45 |       const newTheme = e.detail.theme;
 46 |       if (newTheme === theme) return;
 47 |       
 48 |       // Store the pending theme but don't apply it yet
 49 |       setPendingTheme(newTheme);
 50 |       setIsAnimating(true);
 51 |       
 52 |       // The actual theme will be applied mid-animation
 53 |     };
 54 | 
 55 |     window.addEventListener('themeToggleRequest' as any, handleThemeToggle as EventListener);
 56 |     
 57 |     return () => {
 58 |       window.removeEventListener('themeToggleRequest' as any, handleThemeToggle as EventListener);
 59 |     };
 60 |   }, [theme, isAnimating]);
 61 | 
 62 |   // Apply the theme change mid-animation
 63 |   useEffect(() => {
 64 |     if (isAnimating && pendingTheme) {
 65 |       // Set visual theme immediately for the animation
 66 |       setVisualTheme(pendingTheme);
 67 |       
 68 |       // Apply the actual theme change after a delay (mid-animation)
 69 |       const timer = setTimeout(() => {
 70 |         setTheme(pendingTheme as any);
 71 |       }, 400); // Half of the animation duration
 72 |       
 73 |       // End the animation after it completes
 74 |       const endTimer = setTimeout(() => {
 75 |         setIsAnimating(false);
 76 |         setPendingTheme(null);
 77 |       }, 1000); // Match with animation duration
 78 |       
 79 |       return () => {
 80 |         clearTimeout(timer);
 81 |         clearTimeout(endTimer);
 82 |       };
 83 |     }
 84 |   }, [isAnimating, pendingTheme, setTheme]);
 85 | 
 86 |   return (
 87 |     <AnimatePresence>
 88 |       {isAnimating && (
 89 |         <motion.div
 90 |           className="fixed inset-0 z-[9999] pointer-events-none"
 91 |           initial={{ opacity: 0 }}
 92 |           animate={{ opacity: 1 }}
 93 |           exit={{ opacity: 0 }}
 94 |           transition={{ duration: 0.3 }}
 95 |         >
 96 |           <motion.div
 97 |             className={`absolute rounded-full ${visualTheme === 'dark' ? 'bg-slate-950' : 'bg-white'}`}
 98 |             initial={{ 
 99 |               width: 0, 
100 |               height: 0,
101 |               x: position.x,
102 |               y: position.y,
103 |               borderRadius: '100%' 
104 |             }}
105 |             animate={{ 
106 |               width: Math.max(window.innerWidth * 3, window.innerHeight * 3),
107 |               height: Math.max(window.innerWidth * 3, window.innerHeight * 3),
108 |               x: position.x - Math.max(window.innerWidth * 3, window.innerHeight * 3) / 2,
109 |               y: position.y - Math.max(window.innerWidth * 3, window.innerHeight * 3) / 2,
110 |             }}
111 |             transition={{ 
112 |               duration: 0.8,
113 |               ease: [0.22, 1, 0.36, 1]
114 |             }}
115 |           />
116 |         </motion.div>
117 |       )}
118 |     </AnimatePresence>
119 |   );
120 | } 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/eslint.config.mjs:
--------------------------------------------------------------------------------
 1 | import { dirname } from "path";
 2 | import { fileURLToPath } from "url";
 3 | import { FlatCompat } from "@eslint/eslintrc";
 4 | 
 5 | const __filename = fileURLToPath(import.meta.url);
 6 | const __dirname = dirname(__filename);
 7 | 
 8 | const compat = new FlatCompat({
 9 |   baseDirectory: __dirname,
10 | });
11 | 
12 | const eslintConfig = [
13 |   ...compat.extends("next/core-web-vitals", "next/typescript"),
14 |   {
15 |     rules: {
16 |       "no-unused-vars": "off",
17 |       "no-explicit-any": "off",
18 |       "no-console": "off",
19 |       "no-debugger": "off",
20 |       "eqeqeq": "off",
21 |       "curly": "off",
22 |       "quotes": "off",
23 |       "semi": "off",
24 |     },
25 |   },
26 | ];
27 | 
28 | export default eslintConfig;
29 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { clsx, type ClassValue } from "clsx"
2 | import { twMerge } from "tailwind-merge"
3 | 
4 | export function cn(...inputs: ClassValue[]) {
5 |   return twMerge(clsx(inputs))
6 | }
7 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/next.config.ts:
--------------------------------------------------------------------------------
1 | import type { NextConfig } from "next";
2 | 
3 | const nextConfig: NextConfig = {
4 |   /* config options here */
5 | };
6 | 
7 | export default nextConfig;
8 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "fastrtc-demo",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "scripts": {
 6 |     "dev": "next dev --turbopack",
 7 |     "build": "next build --no-lint",
 8 |     "start": "next start",
 9 |     "lint": "next lint"
10 |   },
11 |   "dependencies": {
12 |     "class-variance-authority": "^0.7.1",
13 |     "clsx": "^2.1.1",
14 |     "framer-motion": "^12.4.10",
15 |     "lucide-react": "^0.477.0",
16 |     "next": "15.2.2-canary.1",
17 |     "react": "^19.0.0",
18 |     "react-dom": "^19.0.0",
19 |     "tailwind-merge": "^3.0.2",
20 |     "tailwindcss-animate": "^1.0.7"
21 |   },
22 |   "devDependencies": {
23 |     "@eslint/eslintrc": "^3",
24 |     "@tailwindcss/postcss": "^4",
25 |     "@types/node": "^20",
26 |     "@types/react": "^19",
27 |     "@types/react-dom": "^19",
28 |     "eslint": "^9",
29 |     "eslint-config-next": "15.2.2-canary.1",
30 |     "tailwindcss": "^4",
31 |     "typescript": "^5"
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | const config = {
2 |   plugins: ["@tailwindcss/postcss"],
3 | };
4 | 
5 | export default config;
6 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/file.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg"><path d="M14.5 13.5V5.41a1 1 0 0 0-.3-.7L9.8.29A1 1 0 0 0 9.08 0H1.5v13.5A2.5 2.5 0 0 0 4 16h8a2.5 2.5 0 0 0 2.5-2.5m-1.5 0v-7H8v-5H3v12a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1M9.5 5V2.12L12.38 5zM5.13 5h-.62v1.25h2.12V5zm-.62 3h7.12v1.25H4.5zm.62 3h-.62v1.25h7.12V11z" clip-rule="evenodd" fill="#666" fill-rule="evenodd"/></svg>


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/globe.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><g clip-path="url(#a)"><path fill-rule="evenodd" clip-rule="evenodd" d="M10.27 14.1a6.5 6.5 0 0 0 3.67-3.45q-1.24.21-2.7.34-.31 1.83-.97 3.1M8 16A8 8 0 1 0 8 0a8 8 0 0 0 0 16m.48-1.52a7 7 0 0 1-.96 0H7.5a4 4 0 0 1-.84-1.32q-.38-.89-.63-2.08a40 40 0 0 0 3.92 0q-.25 1.2-.63 2.08a4 4 0 0 1-.84 1.31zm2.94-4.76q1.66-.15 2.95-.43a7 7 0 0 0 0-2.58q-1.3-.27-2.95-.43a18 18 0 0 1 0 3.44m-1.27-3.54a17 17 0 0 1 0 3.64 39 39 0 0 1-4.3 0 17 17 0 0 1 0-3.64 39 39 0 0 1 4.3 0m1.1-1.17q1.45.13 2.69.34a6.5 6.5 0 0 0-3.67-3.44q.65 1.26.98 3.1M8.48 1.5l.01.02q.41.37.84 1.31.38.89.63 2.08a40 40 0 0 0-3.92 0q.25-1.2.63-2.08a4 4 0 0 1 .85-1.32 7 7 0 0 1 .96 0m-2.75.4a6.5 6.5 0 0 0-3.67 3.44 29 29 0 0 1 2.7-.34q.31-1.83.97-3.1M4.58 6.28q-1.66.16-2.95.43a7 7 0 0 0 0 2.58q1.3.27 2.95.43a18 18 0 0 1 0-3.44m.17 4.71q-1.45-.12-2.69-.34a6.5 6.5 0 0 0 3.67 3.44q-.65-1.27-.98-3.1" fill="#666"/></g><defs><clipPath id="a"><path fill="#fff" d="M0 0h16v16H0z"/></clipPath></defs></svg>


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/next.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 394 80"><path fill="#000" d="M262 0h68.5v12.7h-27.2v66.6h-13.6V12.7H262V0ZM149 0v12.7H94v20.4h44.3v12.6H94v21h55v12.6H80.5V0h68.7zm34.3 0h-17.8l63.8 79.4h17.9l-32-39.7 32-39.6h-17.9l-23 28.6-23-28.6zm18.3 56.7-9-11-27.1 33.7h17.8l18.3-22.7z"/><path fill="#000" d="M81 79.3 17 0H0v79.3h13.6V17l50.2 62.3H81Zm252.6-.4c-1 0-1.8-.4-2.5-1s-1.1-1.6-1.1-2.6.3-1.8 1-2.5 1.6-1 2.6-1 1.8.3 2.5 1a3.4 3.4 0 0 1 .6 4.3 3.7 3.7 0 0 1-3 1.8zm23.2-33.5h6v23.3c0 2.1-.4 4-1.3 5.5a9.1 9.1 0 0 1-3.8 3.5c-1.6.8-3.5 1.3-5.7 1.3-2 0-3.7-.4-5.3-1s-2.8-1.8-3.7-3.2c-.9-1.3-1.4-3-1.4-5h6c.1.8.3 1.6.7 2.2s1 1.2 1.6 1.5c.7.4 1.5.5 2.4.5 1 0 1.8-.2 2.4-.6a4 4 0 0 0 1.6-1.8c.3-.8.5-1.8.5-3V45.5zm30.9 9.1a4.4 4.4 0 0 0-2-3.3 7.5 7.5 0 0 0-4.3-1.1c-1.3 0-2.4.2-3.3.5-.9.4-1.6 1-2 1.6a3.5 3.5 0 0 0-.3 4c.3.5.7.9 1.3 1.2l1.8 1 2 .5 3.2.8c1.3.3 2.5.7 3.7 1.2a13 13 0 0 1 3.2 1.8 8.1 8.1 0 0 1 3 6.5c0 2-.5 3.7-1.5 5.1a10 10 0 0 1-4.4 3.5c-1.8.8-4.1 1.2-6.8 1.2-2.6 0-4.9-.4-6.8-1.2-2-.8-3.4-2-4.5-3.5a10 10 0 0 1-1.7-5.6h6a5 5 0 0 0 3.5 4.6c1 .4 2.2.6 3.4.6 1.3 0 2.5-.2 3.5-.6 1-.4 1.8-1 2.4-1.7a4 4 0 0 0 .8-2.4c0-.9-.2-1.6-.7-2.2a11 11 0 0 0-2.1-1.4l-3.2-1-3.8-1c-2.8-.7-5-1.7-6.6-3.2a7.2 7.2 0 0 1-2.4-5.7 8 8 0 0 1 1.7-5 10 10 0 0 1 4.3-3.5c2-.8 4-1.2 6.4-1.2 2.3 0 4.4.4 6.2 1.2 1.8.8 3.2 2 4.3 3.4 1 1.4 1.5 3 1.5 5h-5.8z"/></svg>


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/vercel.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1155 1000"><path d="m577.3 0 577.4 1000H0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/public/window.svg:
--------------------------------------------------------------------------------
1 | <svg fill="none" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill-rule="evenodd" clip-rule="evenodd" d="M1.5 2.5h13v10a1 1 0 0 1-1 1h-11a1 1 0 0 1-1-1zM0 1h16v11.5a2.5 2.5 0 0 1-2.5 2.5h-11A2.5 2.5 0 0 1 0 12.5zm3.75 4.5a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5M7 4.75a.75.75 0 1 1-1.5 0 .75.75 0 0 1 1.5 0m1.75.75a.75.75 0 1 0 0-1.5.75.75 0 0 0 0 1.5" fill="#666"/></svg>


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/frontend/fastrtc-demo/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2017",
 4 |     "lib": ["dom", "dom.iterable", "esnext"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "strict": true,
 8 |     "noEmit": true,
 9 |     "esModuleInterop": true,
10 |     "module": "esnext",
11 |     "moduleResolution": "bundler",
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "jsx": "preserve",
15 |     "incremental": true,
16 |     "plugins": [
17 |       {
18 |         "name": "next"
19 |       }
20 |     ],
21 |     "paths": {
22 |       "@/*": ["./*"]
23 |     }
24 |   },
25 |   "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
26 |   "exclude": ["node_modules"]
27 | }
28 | 


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | fastapi
3 | python-dotenv
4 | elevenlabs
5 | fastrtc[vad, stt, tts]


--------------------------------------------------------------------------------
/demo/nextjs_voice_chat/run.sh:
--------------------------------------------------------------------------------
1 | uvicorn backend.server:app --host 0.0.0.0 --port 8000


--------------------------------------------------------------------------------
/demo/object_detection/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Object Detection
 3 | emoji: 📸
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Use YOLOv10 to detect objects in real-time
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/object_detection/app.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import cv2
 5 | import gradio as gr
 6 | from fastapi import FastAPI
 7 | from fastapi.responses import HTMLResponse
 8 | from fastrtc import Stream, get_twilio_turn_credentials
 9 | from gradio.utils import get_space
10 | from huggingface_hub import hf_hub_download
11 | from pydantic import BaseModel, Field
12 | 
13 | try:
14 |     from demo.object_detection.inference import YOLOv10
15 | except (ImportError, ModuleNotFoundError):
16 |     from inference import YOLOv10
17 | 
18 | 
19 | cur_dir = Path(__file__).parent
20 | 
21 | model_file = hf_hub_download(
22 |     repo_id="onnx-community/yolov10n", filename="onnx/model.onnx"
23 | )
24 | 
25 | model = YOLOv10(model_file)
26 | 
27 | 
28 | def detection(image, conf_threshold=0.3):
29 |     image = cv2.resize(image, (model.input_width, model.input_height))
30 |     print("conf_threshold", conf_threshold)
31 |     new_image = model.detect_objects(image, conf_threshold)
32 |     return cv2.resize(new_image, (500, 500))
33 | 
34 | 
35 | stream = Stream(
36 |     handler=detection,
37 |     modality="video",
38 |     mode="send-receive",
39 |     additional_inputs=[gr.Slider(minimum=0, maximum=1, step=0.01, value=0.3)],
40 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
41 |     concurrency_limit=2 if get_space() else None,
42 | )
43 | 
44 | app = FastAPI()
45 | 
46 | stream.mount(app)
47 | 
48 | 
49 | @app.get("/")
50 | async def _():
51 |     rtc_config = get_twilio_turn_credentials() if get_space() else None
52 |     html_content = open(cur_dir / "index.html").read()
53 |     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
54 |     return HTMLResponse(content=html_content)
55 | 
56 | 
57 | class InputData(BaseModel):
58 |     webrtc_id: str
59 |     conf_threshold: float = Field(ge=0, le=1)
60 | 
61 | 
62 | @app.post("/input_hook")
63 | async def _(data: InputData):
64 |     stream.set_input(data.webrtc_id, data.conf_threshold)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     import os
69 | 
70 |     if (mode := os.getenv("MODE")) == "UI":
71 |         stream.ui.launch(server_port=7860)
72 |     elif mode == "PHONE":
73 |         stream.fastphone(host="0.0.0.0", port=7860)
74 |     else:
75 |         import uvicorn
76 | 
77 |         uvicorn.run(app, host="0.0.0.0", port=7860)
78 | 


--------------------------------------------------------------------------------
/demo/object_detection/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc
2 | opencv-python
3 | twilio
4 | onnxruntime-gpu


--------------------------------------------------------------------------------
/demo/phonic_chat/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Phonic AI Chat
 3 | emoji: 🎙️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Phonic AI's speech-to-speech model
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|PHONIC_API_KEY]
13 | python_version: 3.11
14 | ---
15 | 
16 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/phonic_chat/app.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import base64
  3 | import os
  4 | 
  5 | import gradio as gr
  6 | from gradio.utils import get_space
  7 | import numpy as np
  8 | from dotenv import load_dotenv
  9 | from fastrtc import (
 10 |     AdditionalOutputs,
 11 |     AsyncStreamHandler,
 12 |     Stream,
 13 |     get_twilio_turn_credentials,
 14 |     audio_to_float32,
 15 |     wait_for_item,
 16 | )
 17 | from phonic.client import PhonicSTSClient, get_voices
 18 | 
 19 | load_dotenv()
 20 | 
 21 | STS_URI = "wss://api.phonic.co/v1/sts/ws"
 22 | API_KEY = os.environ["PHONIC_API_KEY"]
 23 | SAMPLE_RATE = 44_100
 24 | voices = get_voices(API_KEY)
 25 | voice_ids = [voice["id"] for voice in voices]
 26 | 
 27 | 
 28 | class PhonicHandler(AsyncStreamHandler):
 29 |     def __init__(self):
 30 |         super().__init__(input_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE)
 31 |         self.output_queue = asyncio.Queue()
 32 |         self.client = None
 33 | 
 34 |     def copy(self) -> AsyncStreamHandler:
 35 |         return PhonicHandler()
 36 | 
 37 |     async def start_up(self):
 38 |         await self.wait_for_args()
 39 |         voice_id = self.latest_args[1]
 40 |         async with PhonicSTSClient(STS_URI, API_KEY) as client:
 41 |             self.client = client
 42 |             sts_stream = client.sts(  # type: ignore
 43 |                 input_format="pcm_44100",
 44 |                 output_format="pcm_44100",
 45 |                 system_prompt="You are a helpful voice assistant. Respond conversationally.",
 46 |                 # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
 47 |                 voice_id=voice_id,
 48 |             )
 49 |             async for message in sts_stream:
 50 |                 message_type = message.get("type")
 51 |                 if message_type == "audio_chunk":
 52 |                     audio_b64 = message["audio"]
 53 |                     audio_bytes = base64.b64decode(audio_b64)
 54 |                     await self.output_queue.put(
 55 |                         (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
 56 |                     )
 57 |                     if text := message.get("text"):
 58 |                         msg = {"role": "assistant", "content": text}
 59 |                         await self.output_queue.put(AdditionalOutputs(msg))
 60 |                 elif message_type == "input_text":
 61 |                     msg = {"role": "user", "content": message["text"]}
 62 |                     await self.output_queue.put(AdditionalOutputs(msg))
 63 | 
 64 |     async def emit(self):
 65 |         return await wait_for_item(self.output_queue)
 66 | 
 67 |     async def receive(self, frame: tuple[int, np.ndarray]) -> None:
 68 |         if not self.client:
 69 |             return
 70 |         audio_float32 = audio_to_float32(frame)
 71 |         await self.client.send_audio(audio_float32)  # type: ignore
 72 | 
 73 |     async def shutdown(self):
 74 |         if self.client:
 75 |             await self.client._websocket.close()
 76 |         return super().shutdown()
 77 | 
 78 | 
 79 | def add_to_chatbot(chatbot, message):
 80 |     chatbot.append(message)
 81 |     return chatbot
 82 | 
 83 | 
 84 | chatbot = gr.Chatbot(type="messages", value=[])
 85 | stream = Stream(
 86 |     handler=PhonicHandler(),
 87 |     mode="send-receive",
 88 |     modality="audio",
 89 |     additional_inputs=[
 90 |         gr.Dropdown(
 91 |             choices=voice_ids,
 92 |             value="victoria",
 93 |             label="Voice",
 94 |             info="Select a voice from the dropdown",
 95 |         )
 96 |     ],
 97 |     additional_outputs=[chatbot],
 98 |     additional_outputs_handler=add_to_chatbot,
 99 |     ui_args={
100 |         "title": "Phonic Chat (Powered by FastRTC ⚡️)",
101 |     },
102 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
103 |     concurrency_limit=5 if get_space() else None,
104 |     time_limit=90 if get_space() else None,
105 | )
106 | 
107 | # with stream.ui:
108 | #     state.change(lambda s: s, inputs=state, outputs=chatbot)
109 | 
110 | if __name__ == "__main__":
111 |     if (mode := os.getenv("MODE")) == "UI":
112 |         stream.ui.launch(server_port=7860)
113 |     elif mode == "PHONE":
114 |         stream.fastphone(host="0.0.0.0", port=7860)
115 |     else:
116 |         stream.ui.launch(server_port=7860)
117 | 


--------------------------------------------------------------------------------
/demo/phonic_chat/requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by uv via the following command:
 2 | #    uv pip compile requirements.in -o requirements.txt
 3 | aiohappyeyeballs==2.4.6
 4 |     # via aiohttp
 5 | aiohttp==3.11.12
 6 |     # via
 7 |     #   aiohttp-retry
 8 |     #   twilio
 9 | aiohttp-retry==2.9.1
10 |     # via twilio
11 | aiosignal==1.3.2
12 |     # via aiohttp
13 | attrs==25.1.0
14 |     # via aiohttp
15 | certifi==2025.1.31
16 |     # via requests
17 | cffi==1.17.1
18 |     # via sounddevice
19 | charset-normalizer==3.4.1
20 |     # via requests
21 | fastrtc==0.0.1
22 |     # via -r requirements.in
23 | frozenlist==1.5.0
24 |     # via
25 |     #   aiohttp
26 |     #   aiosignal
27 | idna==3.10
28 |     # via
29 |     #   requests
30 |     #   yarl
31 | isort==6.0.0
32 |     # via phonic-python
33 | loguru==0.7.3
34 |     # via phonic-python
35 | multidict==6.1.0
36 |     # via
37 |     #   aiohttp
38 |     #   yarl
39 | numpy==2.2.3
40 |     # via
41 |     #   phonic-python
42 |     #   scipy
43 | phonic-python==0.1.3
44 |     # via -r requirements.in
45 | propcache==0.3.0
46 |     # via
47 |     #   aiohttp
48 |     #   yarl
49 | pycparser==2.22
50 |     # via cffi
51 | pyjwt==2.10.1
52 |     # via twilio
53 | python-dotenv==1.0.1
54 |     # via
55 |     #   -r requirements.in
56 |     #   phonic-python
57 | requests==2.32.3
58 |     # via
59 |     #   phonic-python
60 |     #   twilio
61 | scipy==1.15.2
62 |     # via phonic-python
63 | sounddevice==0.5.1
64 |     # via phonic-python
65 | twilio==9.4.6
66 |     # via -r requirements.in
67 | typing-extensions==4.12.2
68 |     # via phonic-python
69 | urllib3==2.3.0
70 |     # via requests
71 | websockets==15.0
72 |     # via phonic-python
73 | yarl==1.18.3
74 |     # via aiohttp
75 | 


--------------------------------------------------------------------------------
/demo/qwen_phone_chat/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Qwen Phone Chat
 3 | emoji: 📞
 4 | colorFrom: pink
 5 | colorTo: green
 6 | sdk: gradio
 7 | sdk_version: 5.25.2
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk with Qwen 2.5 Omni over the Phone
12 | ---
13 | 
14 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/qwen_phone_chat/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc
2 | websockets>=14.0


--------------------------------------------------------------------------------
/demo/talk_to_azure_openai/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Azure OpenAI
 3 | emoji: 🗣️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Azure OpenAI using their multimodal API
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_azure_openai/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Azure OpenAI (Gradio UI)
 3 | emoji: 🗣️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Azure OpenAI (Gradio UI)
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_azure_openai/requirements.txt:
--------------------------------------------------------------------------------
  1 | aiofiles==23.2.1
  2 | aiohappyeyeballs==2.6.1
  3 | aiohttp==3.11.13
  4 | aiohttp-retry==2.9.1
  5 | aioice==0.9.0
  6 | aiortc==1.10.1
  7 | aiosignal==1.3.2
  8 | annotated-types==0.7.0
  9 | anyio==4.8.0
 10 | attrs==25.2.0
 11 | audioread==3.0.1
 12 | av==13.1.0
 13 | babel==2.17.0
 14 | certifi==2025.1.31
 15 | cffi==1.17.1
 16 | charset-normalizer==3.4.1
 17 | click==8.1.8
 18 | colorama==0.4.6
 19 | coloredlogs==15.0.1
 20 | colorlog==6.9.0
 21 | cryptography==44.0.2
 22 | csvw==3.5.1
 23 | decorator==5.2.1
 24 | distro==1.9.0
 25 | dlinfo==2.0.0
 26 | dnspython==2.7.0
 27 | espeakng-loader==0.2.4
 28 | fastapi==0.115.11
 29 | fastrtc==0.0.14
 30 | ffmpy==0.5.0
 31 | filelock==3.17.0
 32 | flatbuffers==25.2.10
 33 | frozenlist==1.5.0
 34 | fsspec==2025.3.0
 35 | google-crc32c==1.6.0
 36 | gradio==5.20.1
 37 | gradio_client==1.7.2
 38 | groovy==0.1.2
 39 | h11==0.14.0
 40 | httpcore==1.0.7
 41 | httpx==0.28.1
 42 | huggingface-hub==0.29.3
 43 | humanfriendly==10.0
 44 | idna==3.10
 45 | ifaddr==0.2.0
 46 | isodate==0.7.2
 47 | Jinja2==3.1.6
 48 | jiter==0.9.0
 49 | joblib==1.4.2
 50 | jsonschema==4.23.0
 51 | jsonschema-specifications==2024.10.1
 52 | kokoro-onnx==0.4.5
 53 | language-tags==1.2.0
 54 | lazy_loader==0.4
 55 | librosa==0.11.0
 56 | llvmlite==0.44.0
 57 | markdown-it-py==3.0.0
 58 | MarkupSafe==2.1.5
 59 | mdurl==0.1.2
 60 | mpmath==1.3.0
 61 | msgpack==1.1.0
 62 | multidict==6.1.0
 63 | numba==0.61.0
 64 | numpy==2.1.3
 65 | onnxruntime==1.21.0
 66 | openai==1.66.2
 67 | orjson==3.10.15
 68 | packaging==24.2
 69 | pandas==2.2.3
 70 | phonemizer-fork==3.3.1
 71 | pillow==11.1.0
 72 | platformdirs==4.3.6
 73 | pooch==1.8.2
 74 | propcache==0.3.0
 75 | protobuf==6.30.0
 76 | pycparser==2.22
 77 | pydantic==2.10.6
 78 | pydantic_core==2.27.2
 79 | pydub==0.25.1
 80 | pyee==12.1.1
 81 | Pygments==2.19.1
 82 | PyJWT==2.10.1
 83 | pylibsrtp==0.11.0
 84 | pyOpenSSL==25.0.0
 85 | pyparsing==3.2.1
 86 | python-dateutil==2.9.0.post0
 87 | python-dotenv==1.0.1
 88 | python-multipart==0.0.20
 89 | pytz==2025.1
 90 | PyYAML==6.0.2
 91 | rdflib==7.1.3
 92 | referencing==0.36.2
 93 | regex==2024.11.6
 94 | requests==2.32.3
 95 | rfc3986==1.5.0
 96 | rich==13.9.4
 97 | rpds-py==0.23.1
 98 | ruff==0.9.10
 99 | safehttpx==0.1.6
100 | scikit-learn==1.6.1
101 | scipy==1.15.2
102 | segments==2.3.0
103 | semantic-version==2.10.0
104 | shellingham==1.5.4
105 | six==1.17.0
106 | sniffio==1.3.1
107 | sounddevice==0.5.1
108 | soundfile==0.13.1
109 | soxr==0.5.0.post1
110 | starlette==0.46.1
111 | sympy==1.13.3
112 | threadpoolctl==3.5.0
113 | tomlkit==0.13.2
114 | tqdm==4.67.1
115 | twilio==9.5.0
116 | typer==0.15.2
117 | typing_extensions==4.12.2
118 | tzdata==2025.1
119 | uritemplate==4.1.1
120 | urllib3==2.3.0
121 | uvicorn==0.34.0
122 | websockets==15.0.1
123 | yarl==1.18.3
124 | 


--------------------------------------------------------------------------------
/demo/talk_to_claude/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Claude
 3 | emoji: 👨‍🦰
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Anthropic's Claude
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY, secret|ELEVENLABS_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_claude/app.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import time
  4 | from pathlib import Path
  5 | 
  6 | import anthropic
  7 | import gradio as gr
  8 | import numpy as np
  9 | from dotenv import load_dotenv
 10 | from elevenlabs import ElevenLabs
 11 | from fastapi import FastAPI
 12 | from fastapi.responses import HTMLResponse, StreamingResponse
 13 | from fastrtc import (
 14 |     AdditionalOutputs,
 15 |     ReplyOnPause,
 16 |     Stream,
 17 |     get_tts_model,
 18 |     get_twilio_turn_credentials,
 19 | )
 20 | from fastrtc.utils import audio_to_bytes
 21 | from gradio.utils import get_space
 22 | from groq import Groq
 23 | from pydantic import BaseModel
 24 | 
 25 | load_dotenv()
 26 | 
 27 | groq_client = Groq()
 28 | claude_client = anthropic.Anthropic()
 29 | tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
 30 | 
 31 | curr_dir = Path(__file__).parent
 32 | 
 33 | tts_model = get_tts_model()
 34 | 
 35 | 
 36 | def response(
 37 |     audio: tuple[int, np.ndarray],
 38 |     chatbot: list[dict] | None = None,
 39 | ):
 40 |     chatbot = chatbot or []
 41 |     messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
 42 |     prompt = groq_client.audio.transcriptions.create(
 43 |         file=("audio-file.mp3", audio_to_bytes(audio)),
 44 |         model="whisper-large-v3-turbo",
 45 |         response_format="verbose_json",
 46 |     ).text
 47 |     chatbot.append({"role": "user", "content": prompt})
 48 |     yield AdditionalOutputs(chatbot)
 49 |     messages.append({"role": "user", "content": prompt})
 50 |     response = claude_client.messages.create(
 51 |         model="claude-3-5-haiku-20241022",
 52 |         max_tokens=512,
 53 |         messages=messages,  # type: ignore
 54 |     )
 55 |     response_text = " ".join(
 56 |         block.text  # type: ignore
 57 |         for block in response.content
 58 |         if getattr(block, "type", None) == "text"
 59 |     )
 60 |     chatbot.append({"role": "assistant", "content": response_text})
 61 | 
 62 |     start = time.time()
 63 | 
 64 |     print("starting tts", start)
 65 |     for i, chunk in enumerate(tts_model.stream_tts_sync(response_text)):
 66 |         print("chunk", i, time.time() - start)
 67 |         yield chunk
 68 |         print("finished tts", time.time() - start)
 69 |         yield AdditionalOutputs(chatbot)
 70 | 
 71 | 
 72 | chatbot = gr.Chatbot(type="messages")
 73 | stream = Stream(
 74 |     modality="audio",
 75 |     mode="send-receive",
 76 |     handler=ReplyOnPause(response),
 77 |     additional_outputs_handler=lambda a, b: b,
 78 |     additional_inputs=[chatbot],
 79 |     additional_outputs=[chatbot],
 80 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
 81 |     concurrency_limit=5 if get_space() else None,
 82 |     time_limit=90 if get_space() else None,
 83 | )
 84 | 
 85 | 
 86 | class Message(BaseModel):
 87 |     role: str
 88 |     content: str
 89 | 
 90 | 
 91 | class InputData(BaseModel):
 92 |     webrtc_id: str
 93 |     chatbot: list[Message]
 94 | 
 95 | 
 96 | app = FastAPI()
 97 | stream.mount(app)
 98 | 
 99 | 
100 | @app.get("/")
101 | async def _():
102 |     rtc_config = get_twilio_turn_credentials() if get_space() else None
103 |     html_content = (curr_dir / "index.html").read_text()
104 |     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
105 |     return HTMLResponse(content=html_content, status_code=200)
106 | 
107 | 
108 | @app.post("/input_hook")
109 | async def _(body: InputData):
110 |     stream.set_input(body.webrtc_id, body.model_dump()["chatbot"])
111 |     return {"status": "ok"}
112 | 
113 | 
114 | @app.get("/outputs")
115 | def _(webrtc_id: str):
116 |     async def output_stream():
117 |         async for output in stream.output_stream(webrtc_id):
118 |             chatbot = output.args[0]
119 |             yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n"
120 | 
121 |     return StreamingResponse(output_stream(), media_type="text/event-stream")
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     import os
126 | 
127 |     if (mode := os.getenv("MODE")) == "UI":
128 |         stream.ui.launch(server_port=7860)
129 |     elif mode == "PHONE":
130 |         stream.fastphone(host="0.0.0.0", port=7860)
131 |     else:
132 |         import uvicorn
133 | 
134 |         uvicorn.run(app, host="0.0.0.0", port=7860)
135 | 


--------------------------------------------------------------------------------
/demo/talk_to_claude/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad, tts]
2 | elevenlabs
3 | groq
4 | anthropic
5 | twilio
6 | python-dotenv
7 | 


--------------------------------------------------------------------------------
/demo/talk_to_gemini/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Gemini
 3 | emoji: ♊️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Gemini using Google's multimodal API
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_gemini/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Gemini (Gradio UI)
 3 | emoji: ♊️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Gemini (Gradio UI)
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_gemini/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad]==0.0.20.rc2
2 | python-dotenv
3 | google-genai
4 | twilio
5 | 


--------------------------------------------------------------------------------
/demo/talk_to_llama4/AV_Huggy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/talk_to_llama4/AV_Huggy.png


--------------------------------------------------------------------------------
/demo/talk_to_llama4/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Llama 4
 3 | emoji: 🦙
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.23.3
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to Llama 4 using Groq + Cloudflare
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_llama4/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad, tts]==0.0.20.rc2
2 | groq
3 | python-dotenv


--------------------------------------------------------------------------------
/demo/talk_to_openai/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to OpenAI
 3 | emoji: 🗣️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to OpenAI using their multimodal API
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_openai/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to OpenAI (Gradio UI)
 3 | emoji: 🗣️
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Talk to OpenAI (Gradio UI)
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_openai/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad]==0.0.20.rc2
2 | openai
3 | twilio
4 | python-dotenv


--------------------------------------------------------------------------------
/demo/talk_to_sambanova/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Sambanova
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Llama 3.2 - SambaNova API
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_sambanova/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Sambanova (Gradio)
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Llama 3.2 - SambaNova API (Gradio)
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/talk_to_sambanova/app.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import json
  3 | import os
  4 | from pathlib import Path
  5 | 
  6 | import gradio as gr
  7 | import huggingface_hub
  8 | import numpy as np
  9 | from dotenv import load_dotenv
 10 | from fastapi import FastAPI
 11 | from fastapi.responses import HTMLResponse, StreamingResponse
 12 | from fastrtc import (
 13 |     AdditionalOutputs,
 14 |     ReplyOnPause,
 15 |     Stream,
 16 |     get_cloudflare_turn_credentials,
 17 |     get_cloudflare_turn_credentials_async,
 18 |     get_stt_model,
 19 | )
 20 | from gradio.utils import get_space
 21 | from pydantic import BaseModel
 22 | 
 23 | load_dotenv()
 24 | 
 25 | curr_dir = Path(__file__).parent
 26 | 
 27 | 
 28 | client = huggingface_hub.InferenceClient(
 29 |     api_key=os.environ.get("SAMBANOVA_API_KEY"),
 30 |     provider="sambanova",
 31 | )
 32 | stt_model = get_stt_model()
 33 | 
 34 | 
 35 | def response(
 36 |     audio: tuple[int, np.ndarray],
 37 |     gradio_chatbot: list[dict] | None = None,
 38 |     conversation_state: list[dict] | None = None,
 39 | ):
 40 |     gradio_chatbot = gradio_chatbot or []
 41 |     conversation_state = conversation_state or []
 42 |     print("chatbot", gradio_chatbot)
 43 | 
 44 |     text = stt_model.stt(audio)
 45 |     sample_rate, array = audio
 46 |     gradio_chatbot.append(
 47 |         {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))}
 48 |     )
 49 |     yield AdditionalOutputs(gradio_chatbot, conversation_state)
 50 | 
 51 |     conversation_state.append({"role": "user", "content": text})
 52 |     request = client.chat.completions.create(
 53 |         model="meta-llama/Llama-3.2-3B-Instruct",
 54 |         messages=conversation_state,  # type: ignore
 55 |         temperature=0.1,
 56 |         top_p=0.1,
 57 |     )
 58 |     response = {"role": "assistant", "content": request.choices[0].message.content}
 59 | 
 60 |     conversation_state.append(response)
 61 |     gradio_chatbot.append(response)
 62 | 
 63 |     yield AdditionalOutputs(gradio_chatbot, conversation_state)
 64 | 
 65 | 
 66 | chatbot = gr.Chatbot(type="messages", value=[])
 67 | state = gr.State(value=[])
 68 | stream = Stream(
 69 |     ReplyOnPause(
 70 |         response,  # type: ignore
 71 |         input_sample_rate=16000,
 72 |     ),
 73 |     mode="send",
 74 |     modality="audio",
 75 |     additional_inputs=[chatbot, state],
 76 |     additional_outputs=[chatbot, state],
 77 |     additional_outputs_handler=lambda *a: (a[2], a[3]),
 78 |     concurrency_limit=20 if get_space() else None,
 79 |     rtc_configuration=get_cloudflare_turn_credentials_async,
 80 |     server_rtc_configuration=get_cloudflare_turn_credentials(ttl=36_000),
 81 | )
 82 | 
 83 | app = FastAPI()
 84 | stream.mount(app)
 85 | 
 86 | 
 87 | class Message(BaseModel):
 88 |     role: str
 89 |     content: str
 90 | 
 91 | 
 92 | class InputData(BaseModel):
 93 |     webrtc_id: str
 94 |     chatbot: list[Message]
 95 |     state: list[Message]
 96 | 
 97 | 
 98 | @app.get("/")
 99 | async def _():
100 |     rtc_config = await get_cloudflare_turn_credentials_async(
101 |         hf_token=os.getenv("HF_TOKEN_ALT")
102 |     )
103 |     html_content = (curr_dir / "index.html").read_text()
104 |     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
105 |     return HTMLResponse(content=html_content)
106 | 
107 | 
108 | @app.post("/input_hook")
109 | async def _(data: InputData):
110 |     body = data.model_dump()
111 |     stream.set_input(data.webrtc_id, body["chatbot"], body["state"])
112 | 
113 | 
114 | def audio_to_base64(file_path):
115 |     audio_format = "wav"
116 |     with open(file_path, "rb") as audio_file:
117 |         encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8")
118 |     return f"data:audio/{audio_format};base64,{encoded_audio}"
119 | 
120 | 
121 | @app.get("/outputs")
122 | async def _(webrtc_id: str):
123 |     async def output_stream():
124 |         async for output in stream.output_stream(webrtc_id):
125 |             chatbot = output.args[0]
126 |             state = output.args[1]
127 |             data = {
128 |                 "message": state[-1],
129 |                 "audio": audio_to_base64(chatbot[-1]["content"].value["path"])
130 |                 if chatbot[-1]["role"] == "user"
131 |                 else None,
132 |             }
133 |             yield f"event: output\ndata: {json.dumps(data)}\n\n"
134 | 
135 |     return StreamingResponse(output_stream(), media_type="text/event-stream")
136 | 
137 | 
138 | if __name__ == "__main__":
139 |     import os
140 | 
141 |     if (mode := os.getenv("MODE")) == "UI":
142 |         stream.ui.launch(server_port=7860)
143 |     elif mode == "PHONE":
144 |         raise ValueError("Phone mode not supported")
145 |     else:
146 |         import uvicorn
147 | 
148 |         uvicorn.run(app, host="0.0.0.0", port=7860)
149 | 


--------------------------------------------------------------------------------
/demo/talk_to_sambanova/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad, stt]==0.0.20.rc2
2 | python-dotenv
3 | huggingface_hub>=0.29.0
4 | twilio


--------------------------------------------------------------------------------
/demo/talk_to_smolagents/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Talk to Smolagents
 3 | emoji: 💻
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: FastRTC Voice Agent with smolagents
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN]
13 | ---
14 | 
15 | # Voice LLM Agent with Image Generation
16 | 
17 | A voice-enabled AI assistant powered by FastRTC that can:
18 | 1. Stream audio in real-time using WebRTC
19 | 2. Listen and respond with natural pauses in conversation
20 | 3. Generate images based on your requests
21 | 4. Maintain conversation context across exchanges
22 | 
23 | This app combines the real-time communication capabilities of FastRTC with the powerful agent framework of smolagents.
24 | 
25 | ## Key Features
26 | 
27 | - **Real-time Streaming**: Uses FastRTC's WebRTC-based audio streaming
28 | - **Voice Activation**: Automatic detection of speech pauses to trigger responses
29 | - **Multi-modal Interaction**: Combines voice and image generation in a single interface
30 | 
31 | ## Setup
32 | 
33 | 1. Install Python 3.9+ and create a virtual environment:
34 |    ```bash
35 |    python -m venv .venv
36 |    source .venv/bin/activate  # On Windows: .venv\Scripts\activate
37 |    ```
38 | 
39 | 2. Install dependencies:
40 |    ```bash
41 |    pip install -r requirements.txt
42 |    ```
43 | 
44 | 3. Create a `.env` file with the following:
45 |    ```
46 |    HF_TOKEN=your_huggingface_api_key
47 |    MODE=UI  # Use 'UI' for Gradio interface, leave blank for HTML interface
48 |    ```
49 | 
50 | ## Running the App
51 | 
52 | ### With Gradio UI (Recommended)
53 | 
54 | ```bash
55 | MODE=UI python app.py
56 | ```
57 | 
58 | This launches a Gradio UI at http://localhost:7860 with:
59 | - FastRTC's built-in streaming audio components
60 | - A chat interface showing the conversation
61 | - An image display panel for generated images
62 | 
63 | ## How to Use
64 | 
65 | 1. Click the microphone button to start streaming your voice.
66 | 2. Speak naturally - the app will automatically detect when you pause.
67 | 3. Ask the agent to generate an image, for example:
68 |    - "Create an image of a magical forest with glowing mushrooms."
69 |    - "Generate a picture of a futuristic city with flying cars."
70 | 4. View the generated image and hear the agent's response.
71 | 
72 | ## Technical Architecture
73 | 
74 | ### FastRTC Components
75 | 
76 | - **Stream**: Core component that handles WebRTC connections and audio streaming
77 | - **ReplyOnPause**: Detects when the user stops speaking to trigger a response
78 | - **get_stt_model/get_tts_model**: Provides optimized speech-to-text and text-to-speech models
79 | 
80 | ### smolagents Components
81 | 
82 | - **CodeAgent**: Intelligent agent that can use tools based on natural language inputs
83 | - **Tool.from_space**: Integration with Hugging Face Spaces for image generation
84 | - **HfApiModel**: Connection to powerful language models for understanding requests
85 | 
86 | ### Integration Flow
87 | 
88 | 1. FastRTC streams and processes audio input in real-time
89 | 2. Speech is converted to text and passed to the smolagents CodeAgent
90 | 3. The agent processes the request and calls tools when needed
91 | 4. Responses and generated images are streamed back through FastRTC
92 | 5. The UI updates to show both text responses and generated images
93 | 
94 | ## Advanced Features
95 | 
96 | - Conversation history is maintained across exchanges
97 | - Error handling ensures the app continues working even if agent processing fails
98 | - The application leverages FastRTC's streaming capabilities for efficient audio transmission


--------------------------------------------------------------------------------
/demo/talk_to_smolagents/app.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | from dotenv import load_dotenv
 4 | from fastrtc import (
 5 |     ReplyOnPause,
 6 |     Stream,
 7 |     get_stt_model,
 8 |     get_tts_model,
 9 |     get_twilio_turn_credentials,
10 | )
11 | from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel
12 | 
13 | # Load environment variables
14 | load_dotenv()
15 | 
16 | # Initialize file paths
17 | curr_dir = Path(__file__).parent
18 | 
19 | # Initialize models
20 | stt_model = get_stt_model()
21 | tts_model = get_tts_model()
22 | 
23 | # Conversation state to maintain history
24 | conversation_state: list[dict[str, str]] = []
25 | 
26 | # System prompt for agent
27 | system_prompt = """You are a helpful assistant that can helps with finding places to 
28 | work remotely from. You should specifically check against reviews and ratings of the 
29 | place. You should use this criteria to find the best place to work from:
30 | - Price
31 | - Reviews
32 | - Ratings
33 | - Location
34 | - WIFI
35 | Only return the name, address of the place, and a short description of the place.
36 | Always search for real places.
37 | Only return real places, not fake ones.
38 | If you receive anything other than a location, you should ask for a location.
39 | <example>
40 | User: I am in Paris, France. Can you find me a place to work from?
41 | Assistant: I found a place called "Le Café de la Paix" at 123 Rue de la Paix, 
42 | Paris, France. It has good reviews and is in a great location.
43 | </example>
44 | <example>
45 | User: I am in London, UK. Can you find me a place to work from?
46 | Assistant: I found a place called "The London Coffee Company".
47 | </example>
48 | <example>
49 | User: How many people are in the room?
50 | Assistant: I only respond to requests about finding places to work from.
51 | </example>
52 | 
53 | """
54 | 
55 | model = HfApiModel(provider="together", model="Qwen/Qwen2.5-Coder-32B-Instruct")
56 | 
57 | agent = CodeAgent(
58 |     tools=[
59 |         DuckDuckGoSearchTool(),
60 |     ],
61 |     model=model,
62 |     max_steps=10,
63 |     verbosity_level=2,
64 |     description="Search the web for cafes to work from.",
65 | )
66 | 
67 | 
68 | def process_response(audio):
69 |     """Process audio input and generate LLM response with TTS"""
70 |     # Convert speech to text using STT model
71 |     text = stt_model.stt(audio)
72 |     if not text.strip():
73 |         return
74 | 
75 |     input_text = f"{system_prompt}\n\n{text}"
76 |     # Get response from agent
77 |     response_content = agent.run(input_text)
78 | 
79 |     # Convert response to audio using TTS model
80 |     yield from tts_model.stream_tts_sync(response_content or "")
81 | 
82 | 
83 | stream = Stream(
84 |     handler=ReplyOnPause(process_response, input_sample_rate=16000),
85 |     modality="audio",
86 |     mode="send-receive",
87 |     ui_args={
88 |         "pulse_color": "rgb(255, 255, 255)",
89 |         "icon_button_color": "rgb(255, 255, 255)",
90 |         "title": "🧑‍💻The Coworking Agent",
91 |     },
92 |     rtc_configuration=get_twilio_turn_credentials(),
93 | )
94 | 
95 | if __name__ == "__main__":
96 |     stream.ui.launch(server_port=7860)
97 | 


--------------------------------------------------------------------------------
/demo/talk_to_smolagents/requirements.txt:
--------------------------------------------------------------------------------
  1 | # This file was autogenerated by uv via the following command:
  2 | #    uv export --format requirements-txt --no-hashes
  3 | aiofiles==23.2.1
  4 | aiohappyeyeballs==2.4.6
  5 | aiohttp==3.11.13
  6 | aiohttp-retry==2.9.1
  7 | aioice==0.9.0
  8 | aiortc==1.10.1
  9 | aiosignal==1.3.2
 10 | annotated-types==0.7.0
 11 | anyio==4.8.0
 12 | async-timeout==5.0.1 ; python_full_version < '3.11'
 13 | attrs==25.1.0
 14 | audioop-lts==0.2.1 ; python_full_version >= '3.13'
 15 | audioread==3.0.1
 16 | av==13.1.0
 17 | babel==2.17.0
 18 | beautifulsoup4==4.13.3
 19 | certifi==2025.1.31
 20 | cffi==1.17.1
 21 | charset-normalizer==3.4.1
 22 | click==8.1.8
 23 | colorama==0.4.6
 24 | coloredlogs==15.0.1
 25 | colorlog==6.9.0
 26 | cryptography==44.0.1
 27 | csvw==3.5.1
 28 | decorator==5.2.1
 29 | dlinfo==2.0.0
 30 | dnspython==2.7.0
 31 | duckduckgo-search==7.5.0
 32 | espeakng-loader==0.2.4
 33 | exceptiongroup==1.2.2 ; python_full_version < '3.11'
 34 | fastapi==0.115.8
 35 | fastrtc==0.0.8.post1
 36 | fastrtc-moonshine-onnx==20241016
 37 | ffmpy==0.5.0
 38 | filelock==3.17.0
 39 | flatbuffers==25.2.10
 40 | frozenlist==1.5.0
 41 | fsspec==2025.2.0
 42 | google-crc32c==1.6.0
 43 | gradio==5.19.0
 44 | gradio-client==1.7.2
 45 | h11==0.14.0
 46 | httpcore==1.0.7
 47 | httpx==0.28.1
 48 | huggingface-hub==0.29.1
 49 | humanfriendly==10.0
 50 | idna==3.10
 51 | ifaddr==0.2.0
 52 | isodate==0.7.2
 53 | jinja2==3.1.5
 54 | joblib==1.4.2
 55 | jsonschema==4.23.0
 56 | jsonschema-specifications==2024.10.1
 57 | kokoro-onnx==0.4.3
 58 | language-tags==1.2.0
 59 | lazy-loader==0.4
 60 | librosa==0.10.2.post1
 61 | llvmlite==0.44.0
 62 | lxml==5.3.1
 63 | markdown-it-py==3.0.0
 64 | markdownify==1.0.0
 65 | markupsafe==2.1.5
 66 | mdurl==0.1.2
 67 | mpmath==1.3.0
 68 | msgpack==1.1.0
 69 | multidict==6.1.0
 70 | numba==0.61.0
 71 | numpy==2.1.3
 72 | onnxruntime==1.20.1
 73 | orjson==3.10.15
 74 | packaging==24.2
 75 | pandas==2.2.3
 76 | phonemizer-fork==3.3.1
 77 | pillow==11.1.0
 78 | platformdirs==4.3.6
 79 | pooch==1.8.2
 80 | primp==0.14.0
 81 | propcache==0.3.0
 82 | protobuf==5.29.3
 83 | pycparser==2.22
 84 | pydantic==2.10.6
 85 | pydantic-core==2.27.2
 86 | pydub==0.25.1
 87 | pyee==12.1.1
 88 | pygments==2.19.1
 89 | pyjwt==2.10.1
 90 | pylibsrtp==0.11.0
 91 | pyopenssl==25.0.0
 92 | pyparsing==3.2.1
 93 | pyreadline3==3.5.4 ; sys_platform == 'win32'
 94 | python-dateutil==2.9.0.post0
 95 | python-dotenv==1.0.1
 96 | python-multipart==0.0.20
 97 | pytz==2025.1
 98 | pyyaml==6.0.2
 99 | rdflib==7.1.3
100 | referencing==0.36.2
101 | regex==2024.11.6
102 | requests==2.32.3
103 | rfc3986==1.5.0
104 | rich==13.9.4
105 | rpds-py==0.23.1
106 | ruff==0.9.7 ; sys_platform != 'emscripten'
107 | safehttpx==0.1.6
108 | scikit-learn==1.6.1
109 | scipy==1.15.2
110 | segments==2.3.0
111 | semantic-version==2.10.0
112 | shellingham==1.5.4 ; sys_platform != 'emscripten'
113 | six==1.17.0
114 | smolagents==1.9.2
115 | sniffio==1.3.1
116 | soundfile==0.13.1
117 | soupsieve==2.6
118 | soxr==0.5.0.post1
119 | standard-aifc==3.13.0 ; python_full_version >= '3.13'
120 | standard-chunk==3.13.0 ; python_full_version >= '3.13'
121 | standard-sunau==3.13.0 ; python_full_version >= '3.13'
122 | starlette==0.45.3
123 | sympy==1.13.3
124 | threadpoolctl==3.5.0
125 | tokenizers==0.21.0
126 | tomlkit==0.13.2
127 | tqdm==4.67.1
128 | twilio==9.4.6
129 | typer==0.15.1 ; sys_platform != 'emscripten'
130 | typing-extensions==4.12.2
131 | tzdata==2025.1
132 | uritemplate==4.1.1
133 | urllib3==2.3.0
134 | uvicorn==0.34.0 ; sys_platform != 'emscripten'
135 | websockets==15.0
136 | yarl==1.18.3
137 | 


--------------------------------------------------------------------------------
/demo/text_mode/app.py:
--------------------------------------------------------------------------------
  1 | # /// script
  2 | # dependencies = [
  3 | #   "fastrtc[vad, stt]==0.0.26.rc1",
  4 | #   "openai",
  5 | # ]
  6 | # ///
  7 | 
  8 | 
  9 | import gradio as gr
 10 | import huggingface_hub
 11 | from fastrtc import (
 12 |     AdditionalOutputs,
 13 |     ReplyOnPause,
 14 |     WebRTC,
 15 |     WebRTCData,
 16 |     WebRTCError,
 17 |     get_stt_model,
 18 | )
 19 | from openai import OpenAI
 20 | 
 21 | stt_model = get_stt_model()
 22 | 
 23 | conversations = {}
 24 | 
 25 | 
 26 | def response(
 27 |     data: WebRTCData,
 28 |     conversation: list[dict],
 29 |     token: str | None = None,
 30 |     model: str = "meta-llama/Llama-3.2-3B-Instruct",
 31 |     provider: str = "sambanova",
 32 | ):
 33 |     print("conversation before", conversation)
 34 |     if not provider.startswith("http") and not token:
 35 |         raise WebRTCError("Please add your HF token.")
 36 | 
 37 |     if data.audio is not None and data.audio[1].size > 0:
 38 |         user_audio_text = stt_model.stt(data.audio)
 39 |         conversation.append({"role": "user", "content": user_audio_text})
 40 |     else:
 41 |         conversation.append({"role": "user", "content": data.textbox})
 42 | 
 43 |     yield AdditionalOutputs(conversation)
 44 | 
 45 |     if provider.startswith("http"):
 46 |         client = OpenAI(base_url=provider, api_key="ollama")
 47 |     else:
 48 |         client = huggingface_hub.InferenceClient(
 49 |             api_key=token,
 50 |             provider=provider,  # type: ignore
 51 |         )
 52 | 
 53 |     request = client.chat.completions.create(
 54 |         model=model,
 55 |         messages=conversation,  # type: ignore
 56 |         temperature=1,
 57 |         top_p=0.1,
 58 |     )
 59 |     response = {"role": "assistant", "content": request.choices[0].message.content}
 60 | 
 61 |     conversation.append(response)
 62 |     print("conversation after", conversation)
 63 |     yield AdditionalOutputs(conversation)
 64 | 
 65 | 
 66 | css = """
 67 | footer {
 68 |     display: none !important;
 69 | }
 70 | """
 71 | 
 72 | providers = [
 73 |     "black-forest-labs",
 74 |     "cerebras",
 75 |     "cohere",
 76 |     "fal-ai",
 77 |     "fireworks-ai",
 78 |     "hf-inference",
 79 |     "hyperbolic",
 80 |     "nebius",
 81 |     "novita",
 82 |     "openai",
 83 |     "replicate",
 84 |     "sambanova",
 85 |     "together",
 86 | ]
 87 | 
 88 | 
 89 | def hide_token(provider: str):
 90 |     if provider.startswith("http"):
 91 |         return gr.Textbox(visible=False)
 92 |     return gr.skip()
 93 | 
 94 | 
 95 | with gr.Blocks(css=css) as demo:
 96 |     gr.HTML(
 97 |         """
 98 |             <h1 style='text-align: center; display: flex; align-items: center; justify-content: center;'>
 99 |             <img src="https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/AV_Huggy.png" alt="Streaming Huggy" style="height: 50px; margin-right: 10px"> FastRTC Chat
100 |             </h1>
101 |         """
102 |     )
103 |     with gr.Sidebar():
104 |         token = gr.Textbox(
105 |             placeholder="Place your HF token here", type="password", label="HF Token"
106 |         )
107 |         model = gr.Dropdown(
108 |             choices=["meta-llama/Llama-3.2-3B-Instruct"],
109 |             allow_custom_value=True,
110 |             label="Model",
111 |         )
112 |         provider = gr.Dropdown(
113 |             label="Provider",
114 |             choices=providers,
115 |             value="sambanova",
116 |             info="Select a hf-compatible provider or type the url of your server, e.g. http://127.0.0.1:11434/v1 for ollama",
117 |             allow_custom_value=True,
118 |         )
119 |     provider.change(hide_token, inputs=[provider], outputs=[token])
120 |     cb = gr.Chatbot(type="messages", height=600)
121 |     webrtc = WebRTC(modality="audio", mode="send", variant="textbox")
122 |     webrtc.stream(
123 |         ReplyOnPause(response),
124 |         inputs=[webrtc, cb, token, model, provider],
125 |         outputs=[cb],
126 |         concurrency_limit=100,
127 |     )
128 |     webrtc.on_additional_outputs(
129 |         lambda old, new: new, inputs=[cb], outputs=[cb], concurrency_limit=100
130 |     )
131 | 
132 | if __name__ == "__main__":
133 |     demo.launch(server_port=6980)
134 | 


--------------------------------------------------------------------------------
/demo/voice_text_editor/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Voice Text Editor
 3 | emoji: 📝
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Edit text documents with your voice!
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|SAMBANOVA_API_KEY]
13 | ---
14 | 
15 | # Voice Text Editor
16 | 
17 | Edit text documents with your voice!
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/demo/voice_text_editor/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import gradio as gr
  4 | from dotenv import load_dotenv
  5 | from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_stt_model
  6 | from openai import OpenAI
  7 | 
  8 | load_dotenv()
  9 | 
 10 | sambanova_client = OpenAI(
 11 |     api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1"
 12 | )
 13 | stt_model = get_stt_model()
 14 | 
 15 | 
 16 | SYSTEM_PROMPT = """You are an intelligent voice-activated text editor assistant. Your purpose is to help users create and modify text documents through voice commands.
 17 | 
 18 | For each interaction:
 19 | 1. You will receive the current state of a text document and a voice input from the user.
 20 | 2. Determine if the input is:
 21 |    a) A command to modify the document (e.g., "delete the last line", "capitalize that")
 22 |    b) Content to be added to the document (e.g., "buy 12 eggs at the store")
 23 |    c) A modification to existing content (e.g., "actually make that 24" to change "12" to "24")
 24 | 3. Return ONLY the new document state after the changes have been applied.
 25 | 
 26 | Example:
 27 | 
 28 | CURRENT DOCUMENT:
 29 | 
 30 | 
 31 | Meeting notes:
 32 | - Buy GPUs
 33 | - Meet with Joe
 34 | 
 35 | USER INPUT: Make that 100 GPUS
 36 | 
 37 | NEW DOCUMENT STATE:
 38 | 
 39 | Meeting notes:
 40 | - Buy 100 GPUs
 41 | - Meet with Joe
 42 | 
 43 | Example 2:
 44 | 
 45 | CURRENT DOCUMENT:
 46 | 
 47 | Project Proposal
 48 | 
 49 | USER INPUT: Make that a header
 50 | 
 51 | NEW DOCUMENT STATE:
 52 | 
 53 | # Project Proposal
 54 | 
 55 | When handling commands:
 56 | - Apply the requested changes precisely to the document
 57 | - Support operations like adding, deleting, modifying, and moving text
 58 | - Understand contextual references like "that", "the last line", "the second paragraph"
 59 | 
 60 | When handling content additions:
 61 | - Add the new text at the appropriate location (usually at the end or cursor position)
 62 | - Format it appropriately based on the document context
 63 | - If the user says to "add" or "insert" do not remove text that was already in the document.
 64 | 
 65 | When handling content modifications:
 66 | - Identify what part of the document the user is referring to
 67 | - Apply the requested change while preserving the rest of the content
 68 | - Be smart about contextual references (e.g., "make that 24" should know to replace a number)
 69 | 
 70 | NEVER include any text in the new document state that is not part of the user's input.
 71 | NEVER include the phrase "CURRENT DOCUMENT" in the new document state.
 72 | NEVER reword the user's input unless you are explicitly asked to do so.
 73 | """
 74 | 
 75 | 
 76 | def edit(audio, current_document: str):
 77 |     prompt = stt_model.stt(audio)
 78 |     print(f"Prompt: {prompt}")
 79 |     response = sambanova_client.chat.completions.create(
 80 |         model="Meta-Llama-3.3-70B-Instruct",
 81 |         messages=[
 82 |             {"role": "system", "content": SYSTEM_PROMPT},
 83 |             {
 84 |                 "role": "user",
 85 |                 "content": f"CURRENT DOCUMENT:\n\n{current_document}\n\nUSER INPUT: {prompt}",
 86 |             },
 87 |         ],
 88 |         max_tokens=200,
 89 |     )
 90 |     doc = response.choices[0].message.content
 91 |     yield AdditionalOutputs(doc)
 92 | 
 93 | 
 94 | doc = gr.Textbox(value="", label="Current Document")
 95 | 
 96 | 
 97 | stream = Stream(
 98 |     ReplyOnPause(edit),
 99 |     modality="audio",
100 |     mode="send",
101 |     additional_inputs=[doc],
102 |     additional_outputs=[doc],
103 |     additional_outputs_handler=lambda prev, current: current,
104 |     ui_args={"title": "Voice Text Editor with FastRTC 🗣️"},
105 | )
106 | 
107 | if __name__ == "__main__":
108 |     if (mode := os.getenv("MODE")) == "UI":
109 |         stream.ui.launch(server_port=7860)
110 |     elif mode == "PHONE":
111 |         stream.fastphone(host="0.0.0.0", port=7860)
112 |     else:
113 |         stream.ui.launch(server_port=7860)
114 | 


--------------------------------------------------------------------------------
/demo/voice_text_editor_local/app.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import gradio as gr
  4 | import requests
  5 | from dotenv import load_dotenv
  6 | from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_stt_model
  7 | 
  8 | load_dotenv()
  9 | 
 10 | stt_model = get_stt_model()
 11 | 
 12 | SYSTEM_PROMPT = """You are an intelligent voice-activated text editor assistant. Your purpose is to help users create and modify text documents through voice commands.
 13 | 
 14 | For each interaction:
 15 | 1. You will receive the current state of a text document and a voice input from the user.
 16 | 2. Determine if the input is:
 17 |    a) A command to modify the document (e.g., "delete the last line", "capitalize that")
 18 |    b) Content to be added to the document (e.g., "buy 12 eggs at the store")
 19 |    c) A modification to existing content (e.g., "actually make that 24" to change "12" to "24")
 20 | 3. Return ONLY the new document state after the changes have been applied.
 21 | 
 22 | Example:
 23 | 
 24 | CURRENT DOCUMENT:
 25 | 
 26 | Meeting notes:
 27 | - Buy GPUs
 28 | - Meet with Joe
 29 | 
 30 | USER INPUT: Make that 100 GPUS
 31 | 
 32 | NEW DOCUMENT STATE:
 33 | 
 34 | Meeting notes:
 35 | - Buy 100 GPUs
 36 | - Meet with Joe
 37 | 
 38 | Example 2:
 39 | 
 40 | CURRENT DOCUMENT:
 41 | 
 42 | Project Proposal
 43 | 
 44 | USER INPUT: Make that a header
 45 | 
 46 | NEW DOCUMENT STATE:
 47 | 
 48 | # Project Proposal
 49 | 
 50 | When handling commands:
 51 | - Apply the requested changes precisely to the document
 52 | - Support operations like adding, deleting, modifying, and moving text
 53 | - Understand contextual references like "that", "the last line", "the second paragraph"
 54 | 
 55 | When handling content additions:
 56 | - Add the new text at the appropriate location (usually at the end or cursor position)
 57 | - Format it appropriately based on the document context
 58 | - If the user says to "add" or "insert" do not remove text that was already in the document.
 59 | 
 60 | When handling content modifications:
 61 | - Identify what part of the document the user is referring to
 62 | - Apply the requested change while preserving the rest of the content
 63 | - Be smart about contextual references (e.g., "make that 24" should know to replace a number)
 64 | 
 65 | NEVER include any text in the new document state that is not part of the user's input.
 66 | NEVER include the phrase "CURRENT DOCUMENT" in the new document state.
 67 | NEVER reword the user's input unless you are explicitly asked to do so.
 68 | """
 69 | 
 70 | 
 71 | def edit(audio, current_document: str):
 72 |     prompt = stt_model.stt(audio)
 73 |     print(f"Prompt: {prompt}")
 74 | 
 75 |     # Construct the prompt for ollama
 76 |     full_prompt = (
 77 |         f"{SYSTEM_PROMPT}\n\n"
 78 |         f"User: CURRENT DOCUMENT:\n\n{current_document}\n\nUSER INPUT: {prompt}\n\n"
 79 |         f"Assistant:"
 80 |     )
 81 | 
 82 |     try:
 83 |         # Send request to ollama's API
 84 |         response = requests.post(
 85 |             "http://localhost:11434/api/generate",
 86 |             json={
 87 |                 "model": "qwen2.5",
 88 |                 "prompt": full_prompt,
 89 |                 "stream": False,
 90 |                 "max_tokens": 200,
 91 |             },
 92 |         )
 93 |         response.raise_for_status()  # Raise an exception for bad status codes
 94 | 
 95 |         # Parse the response
 96 |         doc = response.json()["response"]
 97 |         # Clean up the response to remove "Assistant:" and any extra whitespace
 98 |         doc = doc.strip().lstrip("Assistant:").strip()
 99 |         yield AdditionalOutputs(doc)
100 | 
101 |     except requests.RequestException as e:
102 |         # Handle API errors gracefully
103 |         error_message = "Error: Could not connect to ollama. Please ensure it's running and qwen2.5 is loaded."
104 |         print(f"API Error: {e}")
105 |         yield AdditionalOutputs(error_message)
106 | 
107 | 
108 | doc = gr.Textbox(value="", label="Current Document")
109 | 
110 | stream = Stream(
111 |     ReplyOnPause(edit),
112 |     modality="audio",
113 |     mode="send",
114 |     additional_inputs=[doc],
115 |     additional_outputs=[doc],
116 |     additional_outputs_handler=lambda prev, current: current,
117 |     ui_args={"title": "Voice Text Editor with FastRTC 🗣️"},
118 | )
119 | 
120 | if __name__ == "__main__":
121 |     if (mode := os.getenv("MODE")) == "UI":
122 |         stream.ui.launch(server_port=7860)
123 |     elif mode == "PHONE":
124 |         stream.fastphone(host="0.0.0.0", port=7860)
125 |     else:
126 |         stream.ui.launch(server_port=7860)
127 | 


--------------------------------------------------------------------------------
/demo/webrtc_vs_websocket/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Webrtc Vs Websocket
 3 | emoji: 🧪
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Compare Round Trip Times between WebRTC and Websockets
12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|ELEVENLABS_API_KEY, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/webrtc_vs_websocket/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad]
2 | elevenlabs
3 | groq
4 | anthropic
5 | twilio
6 | python-dotenv
7 | 


--------------------------------------------------------------------------------
/demo/whisper_realtime/README.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Whisper Realtime Transcription
 3 | emoji: 👂
 4 | colorFrom: purple
 5 | colorTo: red
 6 | sdk: gradio
 7 | sdk_version: 5.16.0
 8 | app_file: app.py
 9 | pinned: false
10 | license: mit
11 | short_description: Transcribe audio in realtime with Whisper
12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
13 | ---
14 | 
15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/whisper_realtime/README_gradio.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | app_file: app.py
 3 | colorFrom: purple
 4 | colorTo: red
 5 | emoji: 👂
 6 | license: mit
 7 | pinned: false
 8 | sdk: gradio
 9 | sdk_version: 5.16.0
10 | short_description: Transcribe audio in realtime - Gradio UI version
11 | tags:
12 | - webrtc
13 | - websocket
14 | - gradio
15 | - secret|HF_TOKEN
16 | - secret|GROQ_API_KEY
17 | title: Whisper Realtime Transcription (Gradio UI)
18 | ---
19 | 
20 | 
21 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


--------------------------------------------------------------------------------
/demo/whisper_realtime/app.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import gradio as gr
 5 | import numpy as np
 6 | from dotenv import load_dotenv
 7 | from fastapi import FastAPI
 8 | from fastapi.responses import HTMLResponse, StreamingResponse
 9 | from fastrtc import (
10 |     AdditionalOutputs,
11 |     ReplyOnPause,
12 |     Stream,
13 |     audio_to_bytes,
14 |     get_twilio_turn_credentials,
15 | )
16 | from gradio.utils import get_space
17 | from groq import AsyncClient
18 | from pydantic import BaseModel
19 | 
20 | cur_dir = Path(__file__).parent
21 | 
22 | load_dotenv()
23 | 
24 | 
25 | groq_client = AsyncClient()
26 | 
27 | 
28 | async def transcribe(audio: tuple[int, np.ndarray], transcript: str):
29 |     response = await groq_client.audio.transcriptions.create(
30 |         file=("audio-file.mp3", audio_to_bytes(audio)),
31 |         model="whisper-large-v3-turbo",
32 |         response_format="verbose_json",
33 |     )
34 |     yield AdditionalOutputs(transcript + "\n" + response.text)
35 | 
36 | 
37 | transcript = gr.Textbox(label="Transcript")
38 | stream = Stream(
39 |     ReplyOnPause(transcribe),
40 |     modality="audio",
41 |     mode="send",
42 |     additional_inputs=[transcript],
43 |     additional_outputs=[transcript],
44 |     additional_outputs_handler=lambda a, b: b,
45 |     rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
46 |     concurrency_limit=5 if get_space() else None,
47 |     time_limit=90 if get_space() else None,
48 | )
49 | 
50 | app = FastAPI()
51 | 
52 | stream.mount(app)
53 | 
54 | 
55 | class SendInput(BaseModel):
56 |     webrtc_id: str
57 |     transcript: str
58 | 
59 | 
60 | @app.post("/send_input")
61 | def send_input(body: SendInput):
62 |     stream.set_input(body.webrtc_id, body.transcript)
63 | 
64 | 
65 | @app.get("/transcript")
66 | def _(webrtc_id: str):
67 |     async def output_stream():
68 |         async for output in stream.output_stream(webrtc_id):
69 |             transcript = output.args[0].split("\n")[-1]
70 |             yield f"event: output\ndata: {transcript}\n\n"
71 | 
72 |     return StreamingResponse(output_stream(), media_type="text/event-stream")
73 | 
74 | 
75 | @app.get("/")
76 | def index():
77 |     rtc_config = get_twilio_turn_credentials() if get_space() else None
78 |     html_content = (cur_dir / "index.html").read_text()
79 |     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
80 |     return HTMLResponse(content=html_content)
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     import os
85 | 
86 |     if (mode := os.getenv("MODE")) == "UI":
87 |         stream.ui.launch(server_port=7860)
88 |     elif mode == "PHONE":
89 |         stream.fastphone(host="0.0.0.0", port=7860)
90 |     else:
91 |         import uvicorn
92 | 
93 |         uvicorn.run(app, host="0.0.0.0", port=7860)
94 | 


--------------------------------------------------------------------------------
/demo/whisper_realtime/requirements.txt:
--------------------------------------------------------------------------------
1 | fastrtc[vad]==0.0.20.rc2
2 | groq
3 | python-dotenv


--------------------------------------------------------------------------------
/docs/CNAME:
--------------------------------------------------------------------------------
1 | fastrtc.org


--------------------------------------------------------------------------------
/docs/Discord-Symbol-White.svg:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?><svg id="Discord-Logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 126.644 96"><defs><style>.cls-1{fill:#fff;}</style></defs><path id="Discord-Symbol-White" class="cls-1" d="M81.15,0c-1.2376,2.1973-2.3489,4.4704-3.3591,6.794-9.5975-1.4396-19.3718-1.4396-28.9945,0-.985-2.3236-2.1216-4.5967-3.3591-6.794-9.0166,1.5407-17.8059,4.2431-26.1405,8.0568C2.779,32.5304-1.6914,56.3725.5312,79.8863c9.6732,7.1476,20.5083,12.603,32.0505,16.0884,2.6014-3.4854,4.8998-7.1981,6.8698-11.0623-3.738-1.3891-7.3497-3.1318-10.8098-5.1523.9092-.6567,1.7932-1.3386,2.6519-1.9953,20.281,9.547,43.7696,9.547,64.0758,0,.8587.7072,1.7427,1.3891,2.6519,1.9953-3.4601,2.0457-7.0718,3.7632-10.835,5.1776,1.97,3.8642,4.2683,7.5769,6.8698,11.0623,11.5419-3.4854,22.3769-8.9156,32.0509-16.0631,2.626-27.2771-4.496-50.9172-18.817-71.8548C98.9811,4.2684,90.1918,1.5659,81.1752.0505l-.0252-.0505ZM42.2802,65.4144c-6.2383,0-11.4159-5.6575-11.4159-12.6535s4.9755-12.6788,11.3907-12.6788,11.5169,5.708,11.4159,12.6788c-.101,6.9708-5.026,12.6535-11.3907,12.6535ZM84.3576,65.4144c-6.2637,0-11.3907-5.6575-11.3907-12.6535s4.9755-12.6788,11.3907-12.6788,11.4917,5.708,11.3906,12.6788c-.101,6.9708-5.026,12.6535-11.3906,12.6535Z"/></svg>


--------------------------------------------------------------------------------
/docs/faq.md:
--------------------------------------------------------------------------------
 1 | ## Demo does not work when deploying to the cloud
 2 | 
 3 | Make sure you are using a TURN server. See [deployment](../deployment).
 4 | 
 5 | ## Recorded input audio sounds muffled during output audio playback
 6 | 
 7 | By default, the microphone is [configured](https://github.com/freddyaboulton/gradio-webrtc/blob/903f1f70bd586f638ad3b5a3940c7a8ec70ad1f5/backend/gradio_webrtc/webrtc.py#L575) to do echo cancellation.
 8 | This is what's causing the recorded audio to sound muffled when the streamed audio starts playing.
 9 | You can disable this via the `track_constraints` (see [Advanced Configuration](../advanced-configuration)) with the following code:
10 | 
11 | ```python
12 | stream = Stream(
13 |     track_constraints={
14 |             "echoCancellation": False,
15 |             "noiseSuppression": {"exact": True},
16 |             "autoGainControl": {"exact": True},
17 |             "sampleRate": {"ideal": 24000},
18 |             "sampleSize": {"ideal": 16},
19 |             "channelCount": {"exact": 1},
20 |         },
21 |     rtc_configuration=None,
22 |     mode="send-receive",
23 |     modality="audio",
24 | )
25 | ```
26 | 
27 | ## How to raise errors in the UI
28 | 
29 | You can raise `WebRTCError` in order for an error message to show up in the user's screen. This is similar to how `gr.Error` works.
30 | 
31 | !!! warning
32 | 
33 |     The `WebRTCError` class is only supported in the `WebRTC` component.
34 | 
35 | Here is a simple example:
36 | 
37 | ```python
38 | def generation(num_steps):
39 |     for _ in range(num_steps):
40 |         segment = AudioSegment.from_file(
41 |             "/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav"
42 |         )
43 |         yield (
44 |             segment.frame_rate,
45 |             np.array(segment.get_array_of_samples()).reshape(1, -1),
46 |         )
47 |         time.sleep(3.5)
48 |     raise WebRTCError("This is a test error")
49 | 
50 | with gr.Blocks() as demo:
51 |     audio = WebRTC(
52 |     label="Stream",
53 |     mode="receive",
54 |     modality="audio",
55 |     )
56 |     num_steps = gr.Slider(
57 |         label="Number of Steps",
58 |         minimum=1,
59 |         maximum=10,
60 |         step=1,
61 |         value=5,
62 |     )
63 |     button = gr.Button("Generate")
64 | 
65 |     audio.stream(
66 |         fn=generation, inputs=[num_steps], outputs=[audio], trigger=button.click
67 |     )
68 | 
69 | demo.launch()
70 | ```


--------------------------------------------------------------------------------
/docs/fastrtc_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/docs/fastrtc_logo.png


--------------------------------------------------------------------------------
/docs/fastrtc_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/docs/fastrtc_logo_small.png


--------------------------------------------------------------------------------
/docs/gradio-logo.svg:
--------------------------------------------------------------------------------
1 | <svg width='576' height='576' viewBox='0 0 576 576' fill='none' xmlns='http://www.w3.org/2000/svg'><path d='M287.5 229L86 344.5L287.5 460L489 344.5L287.5 229Z' stroke='url(#paint0_linear_102_7)' stroke-width='59' stroke-linejoin='round'/><path d='M287.5 116L86 231.5L287.5 347L489 231.5L287.5 116Z' stroke='url(#paint1_linear_102_7)' stroke-width='59' stroke-linejoin='round'/><path d='M86 344L288 229' stroke='url(#paint2_linear_102_7)' stroke-width='59' stroke-linejoin='bevel'/><defs><linearGradient id='paint0_linear_102_7' x1='60' y1='344' x2='429.5' y2='344' gradientUnits='userSpaceOnUse'><stop stop-color='#F9D100'/><stop offset='1' stop-color='#F97700'/></linearGradient><linearGradient id='paint1_linear_102_7' x1='513.5' y1='231' x2='143.5' y2='231' gradientUnits='userSpaceOnUse'><stop stop-color='#F9D100'/><stop offset='1' stop-color='#F97700'/></linearGradient><linearGradient id='paint2_linear_102_7' x1='60' y1='344' x2='428.987' y2='341.811' gradientUnits='userSpaceOnUse'><stop stop-color='#F9D100'/><stop offset='1' stop-color='#F97700'/></linearGradient></defs></svg>


--------------------------------------------------------------------------------
/docs/reference/utils.md:
--------------------------------------------------------------------------------
  1 | # Utils
  2 | 
  3 | ## `audio_to_bytes`
  4 | 
  5 | Convert an audio tuple containing sample rate and numpy array data into bytes.
  6 | Useful for sending data to external APIs from `ReplyOnPause` handler.
  7 | 
  8 | Parameters
  9 | ```
 10 | audio : tuple[int, np.ndarray]
 11 |     A tuple containing:
 12 |         - sample_rate (int): The audio sample rate in Hz
 13 |         - data (np.ndarray): The audio data as a numpy array
 14 | ```
 15 | 
 16 | Returns
 17 | ```
 18 | bytes
 19 |     The audio data encoded as bytes, suitable for transmission or storage
 20 | ```
 21 | 
 22 | Example
 23 | ```python
 24 | >>> sample_rate = 44100
 25 | >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
 26 | >>> audio_tuple = (sample_rate, audio_data)
 27 | >>> audio_bytes = audio_to_bytes(audio_tuple)
 28 | ```
 29 | 
 30 | ## `audio_to_file`
 31 | 
 32 | Save an audio tuple containing sample rate and numpy array data to a file.
 33 | 
 34 | Parameters
 35 | ```
 36 | audio : tuple[int, np.ndarray]
 37 |     A tuple containing:
 38 |         - sample_rate (int): The audio sample rate in Hz
 39 |         - data (np.ndarray): The audio data as a numpy array
 40 | ```
 41 | Returns
 42 | ```
 43 | str
 44 |     The path to the saved audio file
 45 | ```
 46 | Example
 47 | ```
 48 | ```python
 49 | >>> sample_rate = 44100
 50 | >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
 51 | >>> audio_tuple = (sample_rate, audio_data)
 52 | >>> file_path = audio_to_file(audio_tuple)
 53 | >>> print(f"Audio saved to: {file_path}")
 54 | ```
 55 | 
 56 | ## `aggregate_bytes_to_16bit`
 57 | Aggregate bytes to 16-bit audio samples.
 58 | 
 59 | This function takes an iterator of chunks and aggregates them into 16-bit audio samples.
 60 | It handles incomplete samples and combines them with the next chunk.
 61 | 
 62 | Parameters
 63 | ```
 64 | chunks_iterator : Iterator[bytes]
 65 |     An iterator of byte chunks to aggregate
 66 | ```
 67 | Returns
 68 | ```
 69 | Iterator[NDArray[np.int16]]
 70 |     An iterator of 16-bit audio samples
 71 | ```
 72 | Example
 73 | ```python
 74 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05']
 75 | >>> for chunk in aggregate_bytes_to_16bit(chunks_iterator):
 76 | >>>     print(chunk)
 77 | ```
 78 | 
 79 | ## `async_aggregate_bytes_to_16bit`
 80 | 
 81 | Aggregate bytes to 16-bit audio samples asynchronously.
 82 | 
 83 | Parameters
 84 | ```
 85 | chunks_iterator : Iterator[bytes]
 86 |     An iterator of byte chunks to aggregate
 87 | ```
 88 | Returns
 89 | ```
 90 | Iterator[NDArray[np.int16]]
 91 |     An iterator of 16-bit audio samples
 92 | ```
 93 | Example
 94 | ```python
 95 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05']
 96 | >>> for chunk in async_aggregate_bytes_to_16bit(chunks_iterator):
 97 | >>>     print(chunk)
 98 | ```
 99 | 
100 | ## `wait_for_item`
101 | 
102 | Wait for an item from an asyncio.Queue with a timeout.
103 | 
104 | Parameters
105 | ```
106 | queue : asyncio.Queue
107 |     The queue to wait for an item from
108 | timeout : float
109 |     The timeout in seconds
110 | ```
111 | Returns
112 | ```
113 | Any
114 |     The item from the queue or None if the timeout is reached
115 | ```
116 | 
117 | Example
118 | ```python
119 | >>> queue = asyncio.Queue()
120 | >>> queue.put_nowait(1)
121 | >>> item = await wait_for_item(queue)
122 | >>> print(item)
123 | ```


--------------------------------------------------------------------------------
/docs/stylesheets/extra.css:
--------------------------------------------------------------------------------
 1 | :root {
 2 |     --white: #ffffff;
 3 |     --galaxy: #393931;
 4 |     --space: #2d2d2a;
 5 |     --rock: #2d2d2a;
 6 |     --cosmic: #ffdd00c5;
 7 |     --radiate: #d6cec0;
 8 |     --sun: #ffac2f;
 9 |     --neutron: #F7F5F6;
10 |     --supernova: #ffdd00;
11 |     --asteroid: #d6cec0;
12 | }
13 | 
14 | [data-md-color-scheme="fastrtc-dark"] {
15 |     --md-default-bg-color: var(--galaxy);
16 |     --md-default-fg-color: var(--white);
17 |     --md-default-fg-color--light: var(--white);
18 |     --md-default-fg-color--lighter: var(--white);
19 |     --md-primary-fg-color: var(--space);
20 |     --md-primary-bg-color: var(--white);
21 |     --md-accent-fg-color: var(--sun);
22 | 
23 |     --md-typeset-color: var(--white);
24 |     --md-typeset-a-color: var(--supernova);
25 |     --md-typeset-mark-color: var(--sun);
26 | 
27 |     --md-code-fg-color: var(--white);
28 |     --md-code-bg-color: var(--rock);
29 | 
30 |     --md-code-hl-comment-color: var(--asteroid);
31 |     --md-code-hl-punctuation-color: var(--supernova);
32 |     --md-code-hl-generic-color: var(--supernova);
33 |     --md-code-hl-variable-color: var(--white);
34 |     --md-code-hl-string-color: var(--radiate);
35 |     --md-code-hl-keyword-color: var(--supernova);
36 |     --md-code-hl-operator-color: var(--supernova);
37 |     --md-code-hl-number-color: var(--radiate);
38 |     --md-code-hl-special-color: var(--supernova);
39 |     --md-code-hl-function-color: var(--neutron);
40 |     --md-code-hl-constant-color: var(--radiate);
41 |     --md-code-hl-name-color: var(--md-code-fg-color);
42 | 
43 |     --md-typeset-del-color: hsla(6, 90%, 60%, 0.15);
44 |     --md-typeset-ins-color: hsla(150, 90%, 44%, 0.15);
45 | 
46 |     --md-typeset-table-color: hsla(0, 0%, 100%, 0.12);
47 |     --md-typeset-table-color--light: hsla(0, 0%, 100%, 0.035);
48 | }
49 | 
50 | [data-md-color-scheme="fastrtc-dark"] div.admonition {
51 |     color: var(--md-code-fg-color);
52 |     background-color: var(--galaxy);
53 | }
54 | 
55 | 
56 | [data-md-color-scheme="fastrtc-dark"] .grid.cards>ul>li {
57 |     border-color: var(--rock);
58 |     border-width: thick;
59 | }
60 | 
61 | [data-md-color-scheme="fastrtc-dark"] .grid.cards>ul>li>hr {
62 |     border-color: var(--rock);
63 | }


--------------------------------------------------------------------------------
/docs/userguide/audio-video.md:
--------------------------------------------------------------------------------
 1 | # Audio-Video Streaming
 2 | 
 3 | You can simultaneously stream audio and video using `AudioVideoStreamHandler` or `AsyncAudioVideoStreamHandler`.
 4 | They are identical to the audio `StreamHandlers` with the addition of `video_receive` and `video_emit` methods which take and return a `numpy` array, respectively.
 5 | 
 6 | Here is an example of the video handling functions for connecting with the Gemini multimodal API. In this case, we simply reflect the webcam feed back to the user but every second we'll send the latest webcam frame (and an additional image component) to the Gemini server.
 7 | 
 8 | Please see the "Gemini Audio Video Chat" example in the [cookbook](../../cookbook) for the complete code.
 9 | 
10 | ``` python title="Async Gemini Video Handling"
11 | 
12 | async def video_receive(self, frame: np.ndarray):
13 |     """Send video frames to the server"""
14 |     if self.session:
15 |         # send image every 1 second
16 |         # otherwise we flood the API
17 |         if time.time() - self.last_frame_time > 1:
18 |             self.last_frame_time = time.time()
19 |             await self.session.send(encode_image(frame))
20 |             if self.latest_args[2] is not None:
21 |                 await self.session.send(encode_image(self.latest_args[2]))
22 |     self.video_queue.put_nowait(frame)
23 | 
24 | async def video_emit(self) -> VideoEmitType:
25 |     """Return video frames to the client"""
26 |     return await self.video_queue.get()
27 | ```


--------------------------------------------------------------------------------
/docs/userguide/gradio.md:
--------------------------------------------------------------------------------
 1 | # Gradio Component
 2 | 
 3 | The automatic gradio UI is a great way to test your stream. However, you may want to customize the UI to your liking or simply build a standalone Gradio application. 
 4 | 
 5 | ## The WebRTC Component
 6 | 
 7 | To build a standalone Gradio application, you can use the `WebRTC` component and implement the `stream` event.
 8 | Similarly to the `Stream` object, you must set the `mode` and `modality` arguments and pass in a `handler`.
 9 | 
10 | In the `stream` event, you pass in your handler as well as the input and output components.
11 | 
12 | ``` py
13 | import gradio as gr
14 | from fastrtc import WebRTC, ReplyOnPause
15 | 
16 | def response(audio: tuple[int, np.ndarray]):
17 |     """This function must yield audio frames"""
18 |     ...
19 |     yield audio
20 | 
21 | 
22 | with gr.Blocks() as demo:
23 |     gr.HTML(
24 |     """
25 |     <h1 style='text-align: center'>
26 |     Chat (Powered by WebRTC ⚡️)
27 |     </h1>
28 |     """
29 |     )
30 |     with gr.Column():
31 |         with gr.Group():
32 |             audio = WebRTC(
33 |                 mode="send-receive",
34 |                 modality="audio",
35 |             )
36 |         audio.stream(fn=ReplyOnPause(response),
37 |                     inputs=[audio], outputs=[audio],
38 |                     time_limit=60)
39 | demo.launch()
40 | ```
41 | 
42 | ## Additional Outputs
43 | 
44 | In order to modify other components from within the WebRTC stream, you must yield an instance of `AdditionalOutputs` and add an `on_additional_outputs` event to the `WebRTC` component.
45 | 
46 | This is common for displaying a multimodal text/audio conversation in a Chatbot UI.
47 | 
48 | === "Code"
49 | 
50 |     ``` py title="Additional Outputs"
51 |     from fastrtc import AdditionalOutputs, WebRTC
52 | 
53 |     def transcribe(audio: tuple[int, np.ndarray],
54 |                    transformers_convo: list[dict],
55 |                    gradio_convo: list[dict]):
56 |         response = model.generate(**inputs, max_length=256)
57 |         transformers_convo.append({"role": "assistant", "content": response})
58 |         gradio_convo.append({"role": "assistant", "content": response})
59 |         yield AdditionalOutputs(transformers_convo, gradio_convo) # (1)
60 | 
61 | 
62 |     with gr.Blocks() as demo:
63 |         gr.HTML(
64 |         """
65 |         <h1 style='text-align: center'>
66 |         Talk to Qwen2Audio (Powered by WebRTC ⚡️)
67 |         </h1>
68 |         """
69 |         )
70 |         transformers_convo = gr.State(value=[])
71 |         with gr.Row():
72 |             with gr.Column():
73 |                 audio = WebRTC(
74 |                     label="Stream",
75 |                     mode="send", # (2)
76 |                     modality="audio",
77 |                 )
78 |             with gr.Column():
79 |                 transcript = gr.Chatbot(label="transcript", type="messages")
80 | 
81 |         audio.stream(ReplyOnPause(transcribe),
82 |                     inputs=[audio, transformers_convo, transcript],
83 |                     outputs=[audio], time_limit=90)
84 |         audio.on_additional_outputs(lambda s,a: (s,a), # (3)
85 |                                     outputs=[transformers_convo, transcript],
86 |                                     queue=False, show_progress="hidden")
87 |         demo.launch()
88 |     ```
89 |     
90 |     1. Pass your data to `AdditionalOutputs` and yield it.
91 |     2. In this case, no audio is being returned, so we set `mode="send"`. However, if we set `mode="send-receive"`, we could also yield generated audio and `AdditionalOutputs`.
92 |     3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update.
93 | === "Notes"
94 |     1. Pass your data to `AdditionalOutputs` and yield it.
95 |     2. In this case, no audio is being returned, so we set `mode="send"`. However, if we set `mode="send-receive"`, we could also yield generated audio and `AdditionalOutputs`.
96 |     3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update.


--------------------------------------------------------------------------------
/docs/userguide/video.md:
--------------------------------------------------------------------------------
  1 | # Video Streaming
  2 | 
  3 | ## Input/Output Streaming
  4 | 
  5 | We already saw this example in the [Quickstart](../../#quickstart) and the [Core Concepts](../streams) section.
  6 | 
  7 | === "Code"
  8 |     
  9 |     ``` py title="Input/Output Streaming"
 10 |     from fastrtc import Stream
 11 |     import gradio as gr
 12 | 
 13 |     def detection(image, conf_threshold=0.3): # (1)
 14 |         processed_frame = process_frame(image, conf_threshold)
 15 |         return processed_frame # (2)
 16 | 
 17 |     stream = Stream(
 18 |         handler=detection,
 19 |         modality="video",
 20 |         mode="send-receive", # (3)
 21 |         additional_inputs=[
 22 |             gr.Slider(minimum=0, maximum=1, step=0.01, value=0.3)
 23 |         ],
 24 |     )
 25 |     ```
 26 | 
 27 |     1. The webcam frame will be represented as a numpy array of shape (height, width, RGB).
 28 |     2. The function must return a numpy array. It can take arbitrary values from other components.
 29 |     3. Set the `modality="video"` and `mode="send-receive"`
 30 | === "Notes"
 31 |     1. The webcam frame will be represented as a numpy array of shape (height, width, RGB).
 32 |     2. The function must return a numpy array. It can take arbitrary values from other components.
 33 |     3. Set the `modality="video"` and `mode="send-receive"`
 34 | 
 35 | ## Server-to-Client Only
 36 | 
 37 | In this case, we stream from the server to the client so we will write a generator function that yields the next frame from the video (as a numpy array)
 38 | and set the `mode="receive"` in the `WebRTC` component.
 39 | 
 40 | === "Code"
 41 |     ``` py title="Server-To-Client"
 42 |     from fastrtc import Stream
 43 |     import cv2
 44 | 
 45 |     def generation():
 46 |         url = "https://download.tsi.telecom-paristech.fr/gpac/dataset/dash/uhd/mux_sources/hevcds_720p30_2M.mp4"
 47 |         cap = cv2.VideoCapture(url)
 48 |         iterating = True
 49 |         while iterating:
 50 |             iterating, frame = cap.read()
 51 |             yield frame
 52 | 
 53 |     stream = Stream(
 54 |         handler=generation,
 55 |         modality="video",
 56 |         mode="receive"
 57 |     )
 58 |     ```
 59 | 
 60 | ## Skipping Frames
 61 | 
 62 | If your event handler is not quite real-time yet, then the output feed will look very laggy.
 63 | 
 64 | To fix this, you can set the `skip_frames` parameter to `True`. This will skip the frames that are received while the event handler is still running.
 65 | 
 66 | ``` py title="Skipping Frames"
 67 | import time
 68 | 
 69 | import numpy as np
 70 | from fastrtc import Stream, VideoStreamHandler
 71 | 
 72 | 
 73 | def process_image(image):
 74 |     time.sleep(
 75 |         0.2
 76 |     )  # Simulating 200ms processing time per frame; input arrives faster (30 FPS).
 77 |     return np.flip(image, axis=0)
 78 | 
 79 | 
 80 | stream = Stream(
 81 |     handler=VideoStreamHandler(process_image, skip_frames=True),
 82 |     modality="video",
 83 |     mode="send-receive",
 84 | )
 85 | 
 86 | stream.ui.launch()
 87 | ```
 88 | 
 89 | ## Setting the Output Frame Rate
 90 | 
 91 | You can set the output frame rate by setting the `fps` parameter in the `VideoStreamHandler`.
 92 | 
 93 | ``` py title="Setting the Output Frame Rate"
 94 | def generation():
 95 |     url = "https://github.com/user-attachments/assets/9636dc97-4fee-46bb-abb8-b92e69c08c71"
 96 |     cap = cv2.VideoCapture(url)
 97 |     iterating = True
 98 | 
 99 |     # FPS calculation variables
100 |     frame_count = 0
101 |     start_time = time.time()
102 |     fps = 0
103 | 
104 |     while iterating:
105 |         iterating, frame = cap.read()
106 | 
107 |         # Calculate and print FPS
108 |         frame_count += 1
109 |         elapsed_time = time.time() - start_time
110 |         if elapsed_time >= 1.0:  # Update FPS every second
111 |             fps = frame_count / elapsed_time
112 |             yield frame, AdditionalOutputs(fps)
113 |             frame_count = 0
114 |             start_time = time.time()
115 |         else:
116 |             yield frame
117 | 
118 | 
119 | stream = Stream(
120 |     handler=VideoStreamHandler(generation, fps=60),
121 |     modality="video",
122 |     mode="receive",
123 |     additional_outputs=[gr.Number(label="FPS")],
124 |     additional_outputs_handler=lambda prev, cur: cur,
125 | )
126 | 
127 | stream.ui.launch()
128 | ```
129 | 


--------------------------------------------------------------------------------
/docs/utils.md:
--------------------------------------------------------------------------------
  1 | # Utils
  2 | 
  3 | ## `audio_to_bytes`
  4 | 
  5 | Convert an audio tuple containing sample rate and numpy array data into bytes.
  6 | Useful for sending data to external APIs from `ReplyOnPause` handler.
  7 | 
  8 | Parameters
  9 | ```
 10 | audio : tuple[int, np.ndarray]
 11 |     A tuple containing:
 12 |         - sample_rate (int): The audio sample rate in Hz
 13 |         - data (np.ndarray): The audio data as a numpy array
 14 | ```
 15 | 
 16 | Returns
 17 | ```
 18 | bytes
 19 |     The audio data encoded as bytes, suitable for transmission or storage
 20 | ```
 21 | 
 22 | Example
 23 | ```python
 24 | >>> sample_rate = 44100
 25 | >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
 26 | >>> audio_tuple = (sample_rate, audio_data)
 27 | >>> audio_bytes = audio_to_bytes(audio_tuple)
 28 | ```
 29 | 
 30 | ## `audio_to_file`
 31 | 
 32 | Save an audio tuple containing sample rate and numpy array data to a file.
 33 | 
 34 | Parameters
 35 | ```
 36 | audio : tuple[int, np.ndarray]
 37 |     A tuple containing:
 38 |         - sample_rate (int): The audio sample rate in Hz
 39 |         - data (np.ndarray): The audio data as a numpy array
 40 | ```
 41 | Returns
 42 | ```
 43 | str
 44 |     The path to the saved audio file
 45 | ```
 46 | Example
 47 | ```
 48 | ```python
 49 | >>> sample_rate = 44100
 50 | >>> audio_data = np.array([0.1, -0.2, 0.3])  # Example audio samples
 51 | >>> audio_tuple = (sample_rate, audio_data)
 52 | >>> file_path = audio_to_file(audio_tuple)
 53 | >>> print(f"Audio saved to: {file_path}")
 54 | ```
 55 | 
 56 | ## `aggregate_bytes_to_16bit`
 57 | Aggregate bytes to 16-bit audio samples.
 58 | 
 59 | This function takes an iterator of chunks and aggregates them into 16-bit audio samples.
 60 | It handles incomplete samples and combines them with the next chunk.
 61 | 
 62 | Parameters
 63 | ```
 64 | chunks_iterator : Iterator[bytes]
 65 |     An iterator of byte chunks to aggregate
 66 | ```
 67 | Returns
 68 | ```
 69 | Iterator[NDArray[np.int16]]
 70 |     An iterator of 16-bit audio samples
 71 | ```
 72 | Example
 73 | ```python
 74 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05']
 75 | >>> for chunk in aggregate_bytes_to_16bit(chunks_iterator):
 76 | >>>     print(chunk)
 77 | ```
 78 | 
 79 | ## `async_aggregate_bytes_to_16bit`
 80 | 
 81 | Aggregate bytes to 16-bit audio samples asynchronously.
 82 | 
 83 | Parameters
 84 | ```
 85 | chunks_iterator : Iterator[bytes]
 86 |     An iterator of byte chunks to aggregate
 87 | ```
 88 | Returns
 89 | ```
 90 | Iterator[NDArray[np.int16]]
 91 |     An iterator of 16-bit audio samples
 92 | ```
 93 | Example
 94 | ```python
 95 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05']
 96 | >>> for chunk in async_aggregate_bytes_to_16bit(chunks_iterator):
 97 | >>>     print(chunk)
 98 | ```
 99 | 
100 | ## `wait_for_item`
101 | 
102 | Wait for an item from an asyncio.Queue with a timeout.
103 | 
104 | Parameters
105 | ```
106 | queue : asyncio.Queue
107 |     The queue to wait for an item from
108 | timeout : float
109 |     The timeout in seconds
110 | ```
111 | Returns
112 | ```
113 | Any
114 |     The item from the queue or None if the timeout is reached
115 | ```
116 | 
117 | Example
118 | ```python
119 | >>> queue = asyncio.Queue()
120 | >>> queue.put_nowait(1)
121 | >>> item = await wait_for_item(queue)
122 | >>> print(item)
123 | ```


--------------------------------------------------------------------------------
/frontend/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugins": ["prettier-plugin-svelte"],
 3 |   "overrides": [
 4 |     {
 5 |       "files": "*.svelte",
 6 |       "options": {
 7 |         "parser": "svelte"
 8 |       }
 9 |     }
10 |   ]
11 | }
12 | 


--------------------------------------------------------------------------------
/frontend/Example.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { playable } from "./shared/utils";
 3 |   import { type FileData } from "@gradio/client";
 4 | 
 5 |   export let type: "gallery" | "table";
 6 |   export let selected = false;
 7 |   export let value: { video: FileData; subtitles: FileData | null } | null;
 8 |   export let loop: boolean;
 9 |   let video: HTMLVideoElement;
10 | 
11 |   async function init(): Promise<void> {
12 |     video.muted = true;
13 |     video.playsInline = true;
14 |     video.controls = false;
15 |     video.setAttribute("muted", "");
16 | 
17 |     await video.play();
18 |     video.pause();
19 |   }
20 | </script>
21 | 
22 | {#if value}
23 |   {#if playable()}
24 |     <div
25 |       class="container"
26 |       class:table={type === "table"}
27 |       class:gallery={type === "gallery"}
28 |       class:selected
29 |     >
30 |       <video
31 |         bind:this={video}
32 |         on:loadeddata={init}
33 |         on:mouseover={video.play.bind(video)}
34 |         on:mouseout={video.pause.bind(video)}
35 |         src={value?.video.url}
36 |       />
37 |     </div>
38 |   {:else}
39 |     <div>{value}</div>
40 |   {/if}
41 | {/if}
42 | 
43 | <style>
44 |   .container {
45 |     flex: none;
46 |     max-width: none;
47 |   }
48 |   .container :global(video) {
49 |     width: var(--size-full);
50 |     height: var(--size-full);
51 |     object-fit: cover;
52 |   }
53 | 
54 |   .container:hover,
55 |   .container.selected {
56 |     border-color: var(--border-color-accent);
57 |   }
58 |   .container.table {
59 |     margin: 0 auto;
60 |     border: 2px solid var(--border-color-primary);
61 |     border-radius: var(--radius-lg);
62 |     overflow: hidden;
63 |     width: var(--size-20);
64 |     height: var(--size-20);
65 |     object-fit: cover;
66 |   }
67 | 
68 |   .container.gallery {
69 |     height: var(--size-20);
70 |     max-height: var(--size-20);
71 |     object-fit: cover;
72 |   }
73 | </style>
74 | 


--------------------------------------------------------------------------------
/frontend/gradio.config.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   plugins: [],
 3 |   svelte: {
 4 |     preprocess: [],
 5 |   },
 6 |   build: {
 7 |     target: "modules",
 8 |   },
 9 | };
10 | 


--------------------------------------------------------------------------------
/frontend/index.ts:
--------------------------------------------------------------------------------
1 | export { default as BaseInteractiveVideo } from "./shared/InteractiveVideo.svelte";
2 | export { prettyBytes, playable, loaded } from "./shared/utils";
3 | export { default as BaseExample } from "./Example.svelte";
4 | import { default as Index } from "./Index.svelte";
5 | export default Index;
6 | 


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@freddyaboulton/fastrtc-component",
 3 |   "version": "0.0.1",
 4 |   "description": "Gradio UI packages",
 5 |   "type": "module",
 6 |   "author": "",
 7 |   "license": "ISC",
 8 |   "private": false,
 9 |   "dependencies": {
10 |     "@ffmpeg/ffmpeg": "^0.12.10",
11 |     "@ffmpeg/util": "^0.12.1",
12 |     "@gradio/atoms": "0.9.2",
13 |     "@gradio/client": "1.7.0",
14 |     "@gradio/icons": "0.8.0",
15 |     "@gradio/image": "0.16.4",
16 |     "@gradio/markdown": "^0.10.3",
17 |     "@gradio/statustracker": "0.9.1",
18 |     "@gradio/textbox": "^0.10.10",
19 |     "@gradio/upload": "0.13.3",
20 |     "@gradio/utils": "0.7.0",
21 |     "@gradio/wasm": "0.14.2",
22 |     "hls.js": "^1.5.16",
23 |     "mrmime": "^2.0.0"
24 |   },
25 |   "devDependencies": {
26 |     "@gradio/preview": "0.12.0",
27 |     "prettier": "^3.3.3",
28 |     "prettier-plugin-svelte": "^3.3.3"
29 |   },
30 |   "exports": {
31 |     "./package.json": "./package.json",
32 |     ".": {
33 |       "gradio": "./index.ts",
34 |       "svelte": "./dist/index.js",
35 |       "types": "./dist/index.d.ts"
36 |     },
37 |     "./example": {
38 |       "gradio": "./Example.svelte",
39 |       "svelte": "./dist/Example.svelte",
40 |       "types": "./dist/Example.svelte.d.ts"
41 |     }
42 |   },
43 |   "peerDependencies": {
44 |     "svelte": "^4.0.0"
45 |   },
46 |   "main": "index.ts",
47 |   "main_changeset": true,
48 |   "repository": {
49 |     "type": "git",
50 |     "url": "git+https://github.com/gradio-app/fastrtc.git",
51 |     "directory": "fastrtc/frontend"
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/frontend/shared/InteractiveVideo.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { createEventDispatcher } from "svelte";
 3 |   import type { ComponentType } from "svelte";
 4 |   import type { FileData, Client } from "@gradio/client";
 5 |   import { BlockLabel } from "@gradio/atoms";
 6 |   import Webcam from "./Webcam.svelte";
 7 |   import { Video } from "@gradio/icons";
 8 |   import type { WebRTCValue } from "./utils";
 9 | 
10 |   import type { I18nFormatter } from "@gradio/utils";
11 | 
12 |   export let value: string | WebRTCValue | null = null;
13 |   export let label: string | undefined = undefined;
14 |   export let show_label = true;
15 |   export let include_audio: boolean;
16 |   export let i18n: I18nFormatter;
17 |   export let active_source: "webcam" | "upload" = "webcam";
18 |   export let handle_reset_value: () => void = () => {};
19 |   export let stream_handler: Client["stream"];
20 |   export let time_limit: number | null = null;
21 |   export let button_labels: { start: string; stop: string; waiting: string };
22 |   export let server: {
23 |     offer: (body: any) => Promise<any>;
24 |     turn: () => Promise<any>;
25 |   };
26 |   export let rtc_configuration: Object;
27 |   export let track_constraints: MediaTrackConstraints = {};
28 |   export let mode: "send" | "send-receive";
29 |   export let on_change_cb: (msg: "change" | "tick") => void;
30 |   export let reject_cb: (msg: object) => void;
31 |   export let rtp_params: RTCRtpParameters = {} as RTCRtpParameters;
32 |   export let icon: string | undefined | ComponentType = undefined;
33 |   export let icon_button_color: string = "var(--color-accent)";
34 |   export let pulse_color: string = "var(--color-accent)";
35 |   export let icon_radius: number = 50;
36 | 
37 |   const dispatch = createEventDispatcher<{
38 |     change: FileData | null;
39 |     clear?: never;
40 |     play?: never;
41 |     pause?: never;
42 |     end?: never;
43 |     drag: boolean;
44 |     error: string;
45 |     upload: FileData;
46 |     start_recording?: never;
47 |     stop_recording?: never;
48 |     tick: never;
49 |   }>();
50 | 
51 |   let dragging = false;
52 |   $: dispatch("drag", dragging);
53 |   $: webrtc_id = typeof value === "string" ? value : value.webrtc_id;
54 | </script>
55 | 
56 | <BlockLabel {show_label} Icon={Video} label={label || "Video"} />
57 | <div data-testid="video" class="video-container">
58 |   <Webcam
59 |     {rtc_configuration}
60 |     {include_audio}
61 |     {time_limit}
62 |     {track_constraints}
63 |     {mode}
64 |     {rtp_params}
65 |     {on_change_cb}
66 |     {icon}
67 |     {icon_button_color}
68 |     {pulse_color}
69 |     {icon_radius}
70 |     {button_labels}
71 |     on:error
72 |     on:start_recording
73 |     on:stop_recording
74 |     on:tick
75 |     {i18n}
76 |     stream_every={0.5}
77 |     {server}
78 |     bind:webrtc_id
79 |     {reject_cb}
80 |   />
81 | 
82 |   <!-- <SelectSource {sources} bind:active_source /> -->
83 | </div>
84 | 
85 | <style>
86 |   .video-container {
87 |     display: flex;
88 |     height: 100%;
89 |     flex-direction: column;
90 |     justify-content: center;
91 |     align-items: center;
92 |   }
93 | </style>
94 | 


--------------------------------------------------------------------------------
/frontend/shared/MicrophoneMuted.svelte:
--------------------------------------------------------------------------------
 1 | <svg
 2 |   xmlns="http://www.w3.org/2000/svg"
 3 |   width="100%"
 4 |   height="100%"
 5 |   viewBox="0 0 24 24"
 6 |   fill="none"
 7 |   stroke="currentColor"
 8 |   stroke-width="2"
 9 |   stroke-linecap="round"
10 |   stroke-linejoin="round"
11 |   class="feather feather-mic"
12 |   ><path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z" /><path
13 |     d="M19 10v2a7 7 0 0 1-14 0v-2"
14 |   /><line x1="12" y1="19" x2="12" y2="23" /><line
15 |     x1="8"
16 |     y1="23"
17 |     x2="16"
18 |     y2="23"
19 |   /><line x1="1" y1="1" x2="23" y2="23" /></svg
20 | >
21 | 


--------------------------------------------------------------------------------
/frontend/shared/StaticVideo.svelte:
--------------------------------------------------------------------------------
  1 | <script lang="ts">
  2 |   import { createEventDispatcher, onMount } from "svelte";
  3 |   import { BlockLabel, Empty } from "@gradio/atoms";
  4 |   import { Video } from "@gradio/icons";
  5 |   import type { WebRTCValue } from "./utils";
  6 |   import { start, stop } from "./webrtc_utils";
  7 | 
  8 |   export let value: string | WebRTCValue | null = null;
  9 |   export let label: string | undefined = undefined;
 10 |   export let show_label = true;
 11 |   export let rtc_configuration: Object | null = null;
 12 |   export let on_change_cb: (msg: "change" | "tick") => void;
 13 |   export let server: {
 14 |     offer: (body: any) => Promise<any>;
 15 |     turn: () => Promise<any>;
 16 |   };
 17 | 
 18 |   let video_element: HTMLVideoElement;
 19 | 
 20 |   let _webrtc_id = Math.random().toString(36).substring(2);
 21 | 
 22 |   let pc: RTCPeerConnection;
 23 | 
 24 |   const dispatch = createEventDispatcher<{
 25 |     error: string;
 26 |     tick: undefined;
 27 |   }>();
 28 | 
 29 |   let _on_change_cb = (msg: "change" | "tick" | "stopword" | any) => {
 30 |     if (msg.type === "end_stream") {
 31 |       on_change_cb(msg);
 32 |       stream_state = "closed";
 33 |       stop(pc);
 34 |     } else {
 35 |       console.debug("calling on_change_cb with msg", msg);
 36 |       on_change_cb(msg);
 37 |     }
 38 |   };
 39 | 
 40 |   let stream_state = "closed";
 41 | 
 42 |   $: if (value === "start_webrtc_stream") {
 43 |     _webrtc_id = Math.random().toString(36).substring(2);
 44 |     server
 45 |       .turn()
 46 |       .then((rtc_configuration_) => {
 47 |         rtc_configuration = rtc_configuration_;
 48 |       })
 49 |       .catch((error) => {
 50 |         dispatch("error", error);
 51 |       });
 52 |     value = _webrtc_id;
 53 |     pc = new RTCPeerConnection(rtc_configuration);
 54 |     pc.addEventListener("connectionstatechange", async (event) => {
 55 |       switch (pc.connectionState) {
 56 |         case "connected":
 57 |           stream_state = "open";
 58 |           dispatch("tick");
 59 |           break;
 60 |         case "disconnected":
 61 |           stop(pc);
 62 |           break;
 63 |         case "failed":
 64 |           stream_state = "closed";
 65 |           dispatch("error", "Connection failed!");
 66 |           stop(pc);
 67 |           break;
 68 |         default:
 69 |           break;
 70 |       }
 71 |     });
 72 | 
 73 |     const timeoutId = setTimeout(() => {
 74 |       // @ts-ignore
 75 |       on_change_cb({ type: "connection_timeout" });
 76 |     }, 10000);
 77 | 
 78 |     start(
 79 |       null,
 80 |       pc,
 81 |       video_element,
 82 |       server.offer,
 83 |       _webrtc_id,
 84 |       "video",
 85 |       _on_change_cb,
 86 |     )
 87 |       .then((connection) => {
 88 |         clearTimeout(timeoutId);
 89 |         pc = connection;
 90 |       })
 91 |       .catch(() => {
 92 |         clearTimeout(timeoutId);
 93 |         dispatch("error", "Too many concurrent users. Come back later!");
 94 |       });
 95 |   }
 96 | </script>
 97 | 
 98 | <BlockLabel {show_label} Icon={Video} label={label || "Video"} />
 99 | 
100 | {#if value === "__webrtc_value__"}
101 |   <Empty unpadded_box={true} size="large"><Video /></Empty>
102 | {/if}
103 | <div class="wrap">
104 |   <video
105 |     class:hidden={value === "__webrtc_value__"}
106 |     bind:this={video_element}
107 |     autoplay={true}
108 |     on:loadeddata={dispatch.bind(null, "loadeddata")}
109 |     on:click={dispatch.bind(null, "click")}
110 |     on:play={dispatch.bind(null, "play")}
111 |     on:pause={dispatch.bind(null, "pause")}
112 |     on:ended={dispatch.bind(null, "ended")}
113 |     on:mouseover={dispatch.bind(null, "mouseover")}
114 |     on:mouseout={dispatch.bind(null, "mouseout")}
115 |     on:focus={dispatch.bind(null, "focus")}
116 |     on:blur={dispatch.bind(null, "blur")}
117 |     on:load
118 |     data-testid={$$props["data-testid"]}
119 |     crossorigin="anonymous"
120 |   >
121 |     <track kind="captions" />
122 |   </video>
123 | </div>
124 | 
125 | <style>
126 |   .hidden {
127 |     display: none;
128 |   }
129 | 
130 |   .wrap {
131 |     position: relative;
132 |     background-color: var(--background-fill-secondary);
133 |     height: var(--size-full);
134 |     width: var(--size-full);
135 |     border-radius: var(--radius-xl);
136 |   }
137 |   .wrap :global(video) {
138 |     height: var(--size-full);
139 |     width: var(--size-full);
140 |   }
141 | </style>
142 | 


--------------------------------------------------------------------------------
/frontend/shared/WebcamPermissions.svelte:
--------------------------------------------------------------------------------
 1 | <script lang="ts">
 2 |   import { Webcam } from "@gradio/icons";
 3 |   import { createEventDispatcher } from "svelte";
 4 | 
 5 |   export let icon = Webcam;
 6 |   $: text =
 7 |     icon === Webcam ? "Click to Access Webcam" : "Click to Access Microphone";
 8 | 
 9 |   const dispatch = createEventDispatcher<{
10 |     click: undefined;
11 |   }>();
12 | </script>
13 | 
14 | <button style:height="100%" on:click={() => dispatch("click")}>
15 |   <div class="wrap">
16 |     <span class="icon-wrap">
17 |       <svelte:component this={icon} />
18 |     </span>
19 |     {text}
20 |   </div>
21 | </button>
22 | 
23 | <style>
24 |   button {
25 |     cursor: pointer;
26 |     width: var(--size-full);
27 |   }
28 | 
29 |   .wrap {
30 |     display: flex;
31 |     flex-direction: column;
32 |     justify-content: center;
33 |     align-items: center;
34 |     min-height: var(--size-60);
35 |     color: var(--block-label-text-color);
36 |     height: 100%;
37 |     padding-top: var(--size-3);
38 |   }
39 | 
40 |   .icon-wrap {
41 |     width: 30px;
42 |     margin-bottom: var(--spacing-lg);
43 |   }
44 | 
45 |   @media (--screen-md) {
46 |     .wrap {
47 |       font-size: var(--text-lg);
48 |     }
49 |   }
50 | </style>
51 | 


--------------------------------------------------------------------------------
/frontend/shared/index.ts:
--------------------------------------------------------------------------------
1 | export { default as Video } from "./Video.svelte";
2 | 


--------------------------------------------------------------------------------
/frontend/shared/stream_utils.ts:
--------------------------------------------------------------------------------
 1 | export function get_devices(): Promise<MediaDeviceInfo[]> {
 2 |   return navigator.mediaDevices.enumerateDevices();
 3 | }
 4 | 
 5 | export function handle_error(error: string): void {
 6 |   throw new Error(error);
 7 | }
 8 | 
 9 | export function set_local_stream(
10 |   local_stream: MediaStream | null,
11 |   video_source: HTMLVideoElement,
12 | ): void {
13 |   video_source.srcObject = local_stream;
14 |   video_source.muted = true;
15 |   video_source.play();
16 | }
17 | 
18 | export async function get_video_stream(
19 |   include_audio: boolean,
20 |   video_source: HTMLVideoElement,
21 |   device_id?: string,
22 |   track_constraints?: MediaTrackConstraints,
23 | ): Promise<MediaStream> {
24 |   const fallback_constraints = track_constraints || {
25 |     width: { ideal: 500 },
26 |     height: { ideal: 500 },
27 |   };
28 | 
29 |   const constraints = {
30 |     video: device_id
31 |       ? { deviceId: { exact: device_id }, ...fallback_constraints }
32 |       : fallback_constraints,
33 |     audio: include_audio,
34 |   };
35 | 
36 |   return navigator.mediaDevices
37 |     .getUserMedia(constraints)
38 |     .then((local_stream: MediaStream) => {
39 |       set_local_stream(local_stream, video_source);
40 |       return local_stream;
41 |     });
42 | }
43 | 
44 | export function set_available_devices(
45 |   devices: MediaDeviceInfo[],
46 |   kind: "videoinput" | "audioinput" = "videoinput",
47 | ): MediaDeviceInfo[] {
48 |   const cameras = devices.filter(
49 |     (device: MediaDeviceInfo) => device.kind === kind,
50 |   );
51 | 
52 |   return cameras;
53 | }
54 | 


--------------------------------------------------------------------------------
/frontend/shared/utils.ts:
--------------------------------------------------------------------------------
  1 | import { toBlobURL } from "@ffmpeg/util";
  2 | import { FFmpeg } from "@ffmpeg/ffmpeg";
  3 | import { lookup } from "mrmime";
  4 | 
  5 | export type WebRTCValue = {
  6 |   textbox: string;
  7 |   webrtc_id: string;
  8 | };
  9 | 
 10 | export const prettyBytes = (bytes: number): string => {
 11 |   let units = ["B", "KB", "MB", "GB", "PB"];
 12 |   let i = 0;
 13 |   while (bytes > 1024) {
 14 |     bytes /= 1024;
 15 |     i++;
 16 |   }
 17 |   let unit = units[i];
 18 |   return bytes.toFixed(1) + " " + unit;
 19 | };
 20 | 
 21 | export const playable = (): boolean => {
 22 |   // TODO: Fix this
 23 |   // let video_element = document.createElement("video");
 24 |   // let mime_type = mime.lookup(filename);
 25 |   // return video_element.canPlayType(mime_type) != "";
 26 |   return true; // FIX BEFORE COMMIT - mime import causing issues
 27 | };
 28 | 
 29 | export function loaded(
 30 |   node: HTMLVideoElement,
 31 |   { autoplay }: { autoplay: boolean },
 32 | ): any {
 33 |   async function handle_playback(): Promise<void> {
 34 |     if (!autoplay) return;
 35 |     await node.play();
 36 |   }
 37 | 
 38 |   node.addEventListener("loadeddata", handle_playback);
 39 | 
 40 |   return {
 41 |     destroy(): void {
 42 |       node.removeEventListener("loadeddata", handle_playback);
 43 |     },
 44 |   };
 45 | }
 46 | 
 47 | export default async function loadFfmpeg(): Promise<FFmpeg> {
 48 |   const ffmpeg = new FFmpeg();
 49 |   const baseURL = "https://unpkg.com/@ffmpeg/core@0.12.4/dist/esm";
 50 | 
 51 |   await ffmpeg.load({
 52 |     coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, "text/javascript"),
 53 |     wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, "application/wasm"),
 54 |   });
 55 | 
 56 |   return ffmpeg;
 57 | }
 58 | 
 59 | export function blob_to_data_url(blob: Blob): Promise<string> {
 60 |   return new Promise((fulfill, reject) => {
 61 |     let reader = new FileReader();
 62 |     reader.onerror = reject;
 63 |     reader.onload = () => fulfill(reader.result as string);
 64 |     reader.readAsDataURL(blob);
 65 |   });
 66 | }
 67 | 
 68 | export async function trimVideo(
 69 |   ffmpeg: FFmpeg,
 70 |   startTime: number,
 71 |   endTime: number,
 72 |   videoElement: HTMLVideoElement,
 73 | ): Promise<any> {
 74 |   const videoUrl = videoElement.src;
 75 |   const mimeType = lookup(videoElement.src) || "video/mp4";
 76 |   const blobUrl = await toBlobURL(videoUrl, mimeType);
 77 |   const response = await fetch(blobUrl);
 78 |   const vidBlob = await response.blob();
 79 |   const type = getVideoExtensionFromMimeType(mimeType) || "mp4";
 80 |   const inputName = `input.${type}`;
 81 |   const outputName = `output.${type}`;
 82 | 
 83 |   try {
 84 |     if (startTime === 0 && endTime === 0) {
 85 |       return vidBlob;
 86 |     }
 87 | 
 88 |     await ffmpeg.writeFile(
 89 |       inputName,
 90 |       new Uint8Array(await vidBlob.arrayBuffer()),
 91 |     );
 92 | 
 93 |     let command = [
 94 |       "-i",
 95 |       inputName,
 96 |       ...(startTime !== 0 ? ["-ss", startTime.toString()] : []),
 97 |       ...(endTime !== 0 ? ["-to", endTime.toString()] : []),
 98 |       "-c:a",
 99 |       "copy",
100 |       outputName,
101 |     ];
102 | 
103 |     await ffmpeg.exec(command);
104 |     const outputData = await ffmpeg.readFile(outputName);
105 |     const outputBlob = new Blob([outputData], {
106 |       type: `video/${type}`,
107 |     });
108 | 
109 |     return outputBlob;
110 |   } catch (error) {
111 |     console.error("Error initializing FFmpeg:", error);
112 |     return vidBlob;
113 |   }
114 | }
115 | 
116 | const getVideoExtensionFromMimeType = (mimeType: string): string | null => {
117 |   const videoMimeToExtensionMap: { [key: string]: string } = {
118 |     "video/mp4": "mp4",
119 |     "video/webm": "webm",
120 |     "video/ogg": "ogv",
121 |     "video/quicktime": "mov",
122 |     "video/x-msvideo": "avi",
123 |     "video/x-matroska": "mkv",
124 |     "video/mpeg": "mpeg",
125 |     "video/3gpp": "3gp",
126 |     "video/3gpp2": "3g2",
127 |     "video/h261": "h261",
128 |     "video/h263": "h263",
129 |     "video/h264": "h264",
130 |     "video/jpeg": "jpgv",
131 |     "video/jpm": "jpm",
132 |     "video/mj2": "mj2",
133 |     "video/mpv": "mpv",
134 |     "video/vnd.ms-playready.media.pyv": "pyv",
135 |     "video/vnd.uvvu.mp4": "uvu",
136 |     "video/vnd.vivo": "viv",
137 |     "video/x-f4v": "f4v",
138 |     "video/x-fli": "fli",
139 |     "video/x-flv": "flv",
140 |     "video/x-m4v": "m4v",
141 |     "video/x-ms-asf": "asf",
142 |     "video/x-ms-wm": "wm",
143 |     "video/x-ms-wmv": "wmv",
144 |     "video/x-ms-wmx": "wmx",
145 |     "video/x-ms-wvx": "wvx",
146 |     "video/x-sgi-movie": "movie",
147 |     "video/x-smv": "smv",
148 |   };
149 | 
150 |   return videoMimeToExtensionMap[mimeType] || null;
151 | };
152 | 


--------------------------------------------------------------------------------
/justfile:
--------------------------------------------------------------------------------
 1 | # Upload a single demo space
 2 | default:
 3 |     @just --list
 4 | 
 5 | upload path:
 6 |     python upload_space.py demo/{{path}}
 7 | 
 8 | # Upload all demo spaces
 9 | upload-all:
10 |     python upload_space.py demo --all
11 | 
12 | # Run a demo with uvicorn
13 | run name:
14 |     uvicorn demo.{{name}}.app:app --port 8000
15 | 
16 | # Run the gradio ui for a demo
17 | gradio name:
18 |     MODE=UI python demo/{{name}}/app.py
19 | 
20 | # Run a demo with phone mode
21 | phone name:
22 |     MODE=PHONE python demo/{{name}}/app.py
23 | 
24 | call name:
25 |     MODE=PHONE python demo/{{name}}/app.py
26 | 
27 | # Upload the latest wheel file to PyPI using twine
28 | publish:
29 |     #!/usr/bin/env python
30 |     import glob
31 |     import os
32 |     from pathlib import Path
33 |     
34 |     # Find all wheel files in dist directory
35 |     wheels = glob.glob('dist/*.whl')
36 |     if not wheels:
37 |         print("No wheel files found in dist directory")
38 |         exit(1)
39 |     
40 |     # Sort by creation time to get the latest
41 |     latest_wheel = max(wheels, key=os.path.getctime)
42 |     print(f"Uploading {latest_wheel}")
43 |     os.system(f"twine upload {latest_wheel}")
44 | 
45 | # Upload the latest wheel to HF space with a random ID
46 | publish-dev:
47 |     #!/usr/bin/env python
48 |     import glob
49 |     import os
50 |     import uuid
51 |     import subprocess
52 |     
53 |     # Find all wheel files in dist directory
54 |     wheels = glob.glob('dist/*.whl')
55 |     if not wheels:
56 |         print("No wheel files found in dist directory")
57 |         exit(1)
58 |     
59 |     # Sort by creation time to get the latest
60 |     latest_wheel = max(wheels, key=os.path.getctime)
61 |     wheel_name = os.path.basename(latest_wheel)
62 |     
63 |     # Generate random ID
64 |     random_id = str(uuid.uuid4())[:8]
65 |     
66 |     # Define the HF path
67 |     hf_space = "freddyaboulton/bucket"
68 |     hf_path = f"wheels/fastrtc/{random_id}/"
69 |     
70 |     # Upload to Hugging Face space
71 |     cmd = f"huggingface-cli upload {hf_space} {latest_wheel} {hf_path}{wheel_name} --repo-type dataset"
72 |     subprocess.run(cmd, shell=True, check=True)
73 |     
74 |     # Print the URL
75 |     print(f"Wheel uploaded successfully!")
76 |     print(f"URL: https://huggingface.co/datasets/{hf_space}/resolve/main/{hf_path}{wheel_name}")
77 | 
78 | # Build the package
79 | build:
80 |     gradio cc build --no-generate-docs
81 | 
82 | # Format the code
83 | format:
84 |     ruff format .
85 |     ruff check --fix .
86 |     ruff check --select I --fix .
87 |     cd frontend && npx prettier --write . && cd ..
88 | 
89 | docs:
90 |     mkdocs serve -a localhost:8081


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: FastRTC
 2 | site_url: https://fastrtc.org
 3 | repo_name: fastrtc
 4 | repo_url: https://github.com/gradio-app/fastrtc
 5 | theme:
 6 |   name: material
 7 |   custom_dir: overrides
 8 |   palette:
 9 |     scheme: fastrtc-dark
10 |   features:
11 |     - content.code.copy
12 |     - content.code.annotate
13 |     - navigation.indexes
14 |   logo: fastrtc_logo.png
15 |   favicon: fastrtc_logo.png
16 | extra_css:
17 |     - stylesheets/extra.css
18 |     - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css
19 | nav:
20 |   - Home: index.md
21 |   - User Guide:
22 |     - Core Concepts: userguide/streams.md
23 |     - Audio Streaming: userguide/audio.md
24 |     - Video Streaming: userguide/video.md
25 |     - Audio-Video Streaming: userguide/audio-video.md
26 |     - Gradio: userguide/gradio.md
27 |     - API: userguide/api.md
28 |   - Cookbook: cookbook.md
29 |   - Deployment: deployment.md
30 |   - Advanced Configuration: advanced-configuration.md
31 |   - Plugin Ecosystem:
32 |     - Text-to-Speech Gallery: text_to_speech_gallery.md
33 |     - Speech-to-Text Gallery: speech_to_text_gallery.md
34 |     - Turn-taking Gallery: turn_taking_gallery.md
35 |   - Utils: utils.md
36 |   - Frequently Asked Questions: faq.md
37 |   - API Reference:
38 |     - Stream: reference/stream.md
39 |     - Pause Detection Handlers: reference/reply_on_pause.md
40 |     - Stream Handlers: reference/stream_handlers.md
41 |     - Utils: reference/utils.md
42 |     - TURN Credentials: reference/credentials.md
43 | 
44 | extra_javascript:
45 |   - https://cdn.jsdelivr.net/npm/marked/marked.min.js
46 |   - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js
47 | markdown_extensions:
48 |   - pymdownx.highlight:
49 |       anchor_linenums: true
50 |       line_spans: __span
51 |       pygments_lang_class: true
52 |   - pymdownx.inlinehilite
53 |   - pymdownx.snippets
54 |   - pymdownx.superfences
55 |   - pymdownx.tabbed:
56 |       alternate_style: true
57 |   - attr_list
58 |   - md_in_html
59 |   - pymdownx.emoji:
60 |       emoji_index: !!python/name:material.extensions.emoji.twemoji
61 |       emoji_generator: !!python/name:material.extensions.emoji.to_svg
62 |   - admonition
63 |   - pymdownx.details
64 | plugins:
65 |   - llmstxt:
66 |       files:
67 |       - output: llms.txt
68 |         inputs:
69 |         - index.md
70 |         - userguide/*.md
71 |         - deployment.md
72 |         - advanced-configuration.md
73 |         - faq.md
74 |         - reference/*.md


--------------------------------------------------------------------------------
/overrides/partials/header.html:
--------------------------------------------------------------------------------
 1 | {#-
 2 | This file was automatically generated - do not edit
 3 | -#}
 4 | {% set class = "md-header" %}
 5 | {% if "navigation.tabs.sticky" in features %}
 6 | {% set class = class ~ " md-header--shadow md-header--lifted" %}
 7 | {% elif "navigation.tabs" not in features %}
 8 | {% set class = class ~ " md-header--shadow" %}
 9 | {% endif %}
10 | <header class="{{ class }}" data-md-component="header">
11 |     <nav class="md-header__inner md-grid" aria-label="{{ lang.t('header') }}">
12 |         <a href="{{ config.extra.homepage | d(nav.homepage.url, true) | url }}" title="{{ config.site_name | e }}"
13 |             class="md-header__button md-logo" aria-label="{{ config.site_name }}" data-md-component="logo">
14 |             {% include "partials/logo.html" %}
15 |         </a>
16 |         <label class="md-header__button md-icon" for="__drawer">
17 |             {% set icon = config.theme.icon.menu or "material/menu" %}
18 |             {% include ".icons/" ~ icon ~ ".svg" %}
19 |         </label>
20 |         <div class="md-header__title" data-md-component="header-title">
21 |             <div class="md-header__ellipsis">
22 |                 <div class="md-header__topic">
23 |                     <span class="md-ellipsis">
24 |                         {{ config.site_name }}
25 |                     </span>
26 |                 </div>
27 |                 <div class="md-header__topic" data-md-component="header-topic">
28 |                     <span class="md-ellipsis">
29 |                         {% if page.meta and page.meta.title %}
30 |                         {{ page.meta.title }}
31 |                         {% else %}
32 |                         {{ page.title }}
33 |                         {% endif %}
34 |                     </span>
35 |                 </div>
36 |             </div>
37 |         </div>
38 |         {% if config.theme.palette %}
39 |         {% if not config.theme.palette is mapping %}
40 |         {% include "partials/palette.html" %}
41 |         {% endif %}
42 |         {% endif %}
43 |         {% if not config.theme.palette is mapping %}
44 |         {% include "partials/javascripts/palette.html" %}
45 |         {% endif %}
46 |         {% if config.extra.alternate %}
47 |         {% include "partials/alternate.html" %}
48 |         {% endif %}
49 | 
50 |         <div style="display: flex; align-items: center; margin-right: 1rem;">
51 |             <a href="https://hf.co/fastrtc" target="_blank" rel="noopener noreferrer">
52 |                 <img src="/hf-logo.svg"
53 |                     onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/hf-logo.svg';"
54 |                     style="height: 24px; margin-right: 10px;">
55 |             </a>
56 |             <a href="https://gradio.app" target="_blank" rel="noopener noreferrer">
57 |                 <img src="/gradio-logo.svg"
58 |                     onerror="this.onerror=null; this.src='https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/gradio-logo.svg';"
59 |                     style="height: 24px; margin-right: 10px;">
60 |             </a>
61 |             <a href="https://discord.gg/TSWU7HyaYu" target="_blank" rel="noopener noreferrer">
62 |                 <img src="/Discord-Symbol-White.svg" style="height: 16px; margin-right: 10px;">
63 |             </a>
64 |         </div>
65 | 
66 |         {% if "material/search" in config.plugins %}
67 |         <label class="md-header__button md-icon" for="__search">
68 |             {% set icon = config.theme.icon.search or "material/magnify" %}
69 |             {% include ".icons/" ~ icon ~ ".svg" %}
70 |         </label>
71 |         {% include "partials/search.html" %}
72 |         {% endif %}
73 |         {% if config.repo_url %}
74 |         <div class="md-header__source">
75 |             {% include "partials/source.html" %}
76 |         </div>
77 |         {% endif %}
78 |     </nav>
79 |     {% if "navigation.tabs.sticky" in features %}
80 |     {% if "navigation.tabs" in features %}
81 |     {% include "partials/tabs.html" %}
82 |     {% endif %}
83 |     {% endif %}
84 | </header>


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = [
  3 |   "hatchling",
  4 |   "hatch-requirements-txt",
  5 |   "hatch-fancy-pypi-readme>=22.5.0",
  6 | ]
  7 | build-backend = "hatchling.build"
  8 | 
  9 | [project]
 10 | name = "fastrtc"
 11 | version = "0.0.26"
 12 | description = "The realtime communication library for Python"
 13 | readme = "README.md"
 14 | license = "MIT"
 15 | license-files = ["LICENSE"]
 16 | requires-python = ">=3.10"
 17 | authors = [{ name = "Freddy Boulton", email = "arugula.ligand.92@icloud.com" }]
 18 | keywords = [
 19 |   "streaming",
 20 |   "webrtc",
 21 |   "realtime",
 22 |   "machine learning",
 23 |   "computer vision",
 24 |   "audio",
 25 |   "video",
 26 |   "image",
 27 |   "audio processing",
 28 |   "video processing",
 29 |   "gradio-custom-component",
 30 | ]
 31 | dependencies = [
 32 |   "gradio>=4.0,<6.0",
 33 |   "aiortc",
 34 |   "aioice>=0.10.1",
 35 |   "audioop-lts;python_version>='3.13'",
 36 |   "librosa",
 37 |   "numpy>=2.0.2",                          # because of librosa
 38 |   "numba>=0.60.0",
 39 |   "standard-aifc;python_version>='3.13'",
 40 |   "standard-sunau;python_version>='3.13'",
 41 | ] # Add dependencies here
 42 | classifiers = [
 43 |   'Development Status :: 3 - Alpha',
 44 |   'Operating System :: OS Independent',
 45 |   'Programming Language :: Python :: 3',
 46 |   'Programming Language :: Python :: 3 :: Only',
 47 |   'Programming Language :: Python :: 3.10',
 48 |   'Programming Language :: Python :: 3.11',
 49 |   'Programming Language :: Python :: 3.12',
 50 |   'Programming Language :: Python :: 3.13',
 51 |   'Topic :: Internet',
 52 |   "Topic :: Software Development :: Libraries :: Application Frameworks",
 53 |   "Topic :: Software Development :: Libraries :: Python Modules",
 54 |   "Topic :: Software Development :: Libraries",
 55 |   "Topic :: Software Development",
 56 |   'Topic :: Scientific/Engineering',
 57 |   'Topic :: Scientific/Engineering :: Artificial Intelligence',
 58 |   'Topic :: Scientific/Engineering :: Visualization',
 59 | ]
 60 | 
 61 | # The repository and space URLs are optional, but recommended.
 62 | # Adding a repository URL will create a badge in the auto-generated README that links to the repository.
 63 | # Adding a space URL will create a badge in the auto-generated README that links to the space.
 64 | # This will make it easy for people to find your deployed demo or source code when they
 65 | # encounter your project in the wild.
 66 | 
 67 | [project.urls]
 68 | repository = "https://github.com/gradio-app/fastrtc"
 69 | issues = "https://github.com/gradio-app/fastrtc/issues"
 70 | Documentation = "https://fastrtc.org/"
 71 | 
 72 | [project.optional-dependencies]
 73 | dev = ["build", "twine", "httpx", "pytest", "pytest-asyncio"]
 74 | vad = ["onnxruntime>=1.20.1"]
 75 | tts = ["kokoro-onnx"]
 76 | stopword = ["fastrtc-moonshine-onnx", "onnxruntime>=1.20.1"]
 77 | stt = ["fastrtc-moonshine-onnx", "onnxruntime>=1.20.1"]
 78 | 
 79 | [tool.hatch.build]
 80 | artifacts = ["/backend/fastrtc/templates", "*.pyi"]
 81 | 
 82 | [tool.hatch.build.targets.wheel]
 83 | packages = ["/backend/fastrtc"]
 84 | 
 85 | [tool.pytest.ini_options]
 86 | testpaths = ["test/"]
 87 | asyncio_mode = "auto"
 88 | asyncio_default_fixture_loop_scope = "function"
 89 | 
 90 | [tool.ruff]
 91 | src = ["demo", "backend/fastrtc", "test"]
 92 | target-version = "py310"
 93 | extend-exclude = ["demo/phonic_chat", "demo/nextjs_voice_chat"]
 94 | 
 95 | [tool.ruff.format]
 96 | exclude = ["*.pyi"]
 97 | quote-style = "double"
 98 | indent-style = "space"
 99 | skip-magic-trailing-comma = false
100 | line-ending = "auto"
101 | 
102 | [tool.ruff.lint]
103 | select = ["E", "F", "W", "Q", "I", "UP"]
104 | 
105 | # These can be turned on when the framework is more mature (Too many errors right now)
106 | exclude = ["D"]
107 | 
108 | # Avoid enforcing line-length violations (`E501`)
109 | ignore = ["E501"]
110 | 
111 | [tool.ruff.lint.pydocstyle]
112 | convention = "google"
113 | 
114 | [tool.ruff.lint.per-file-ignores]
115 | "__init__.py" = ["E402"]
116 | "demo/talk_to_smolagents/app.py" = ["W291"]
117 | 
118 | [tool.pyright]
119 | include = ["backend/fastrtc"]
120 | exclude = ["**/__pycache__", "**/*.pyi"]
121 | 
122 | reportMissingImports = false
123 | reportMissingTypeStubs = false
124 | 
125 | pythonVersion = "3.10"
126 | pythonPlatform = "Linux"
127 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/test/__init__.py


--------------------------------------------------------------------------------
/test/test_tts.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from fastrtc.text_to_speech.tts import get_tts_model
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("model", ["kokoro"])
 6 | def test_tts_long_prompt(model):
 7 |     model = get_tts_model(model=model)
 8 |     prompt = "It may be that this communication will be considered as a madman's freak but at any rate it must be admitted that in its clearness and frankness it left nothing to be desired The serious part of it was that the Federal Government had undertaken to treat a sale by auction as a valid concession of these undiscovered territories Opinions on the matter were many Some readers saw in it only one of those prodigious outbursts of American humbug which would exceed the limits of puffism if the depths of human credulity were not unfathomable"
 9 | 
10 |     for i, chunk in enumerate(model.stream_tts_sync(prompt)):
11 |         print(f"Chunk {i}: {chunk[1].shape}")
12 | 


--------------------------------------------------------------------------------
/test/test_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from fastrtc.utils import audio_to_float32, audio_to_int16
 4 | 
 5 | 
 6 | def test_audio_to_float32_valid_int16():
 7 |     audio = np.array([-32768, 0, 32767], dtype=np.int16)
 8 |     expected = np.array([-1.0, 0.0, 32767 / 32768.0], dtype=np.float32)
 9 |     result = audio_to_float32(audio)
10 |     np.testing.assert_array_almost_equal(result, expected)
11 | 
12 | 
13 | def test_audio_to_float32_valid_float32():
14 |     audio = np.array([-1.0, 0.0, 1.0], dtype=np.float32)
15 |     result = audio_to_float32(audio)
16 |     np.testing.assert_array_equal(result, audio)
17 | 
18 | 
19 | def test_audio_to_float32_empty_array():
20 |     audio = np.array([], dtype=np.int16)
21 |     result = audio_to_float32(audio)
22 |     np.testing.assert_array_equal(result, np.array([], dtype=np.float32))
23 | 
24 | 
25 | def test_audio_to_float32_invalid_dtype():
26 |     audio = np.array([1, 2, 3], dtype=np.int32)
27 |     with pytest.raises(TypeError, match="Unsupported audio data type"):
28 |         audio_to_float32(audio)  # type: ignore
29 | 
30 | 
31 | def test_audio_to_int16_valid_float32():
32 |     audio = np.array([-1.0, 0.0, 1.0], dtype=np.float32)
33 |     expected = np.array([-32767, 0, 32767], dtype=np.int16)
34 |     result = audio_to_int16(audio)
35 |     np.testing.assert_array_equal(result, expected)
36 | 
37 | 
38 | def test_audio_to_int16_valid_int16():
39 |     audio = np.array([-32768, 0, 32767], dtype=np.int16)
40 |     result = audio_to_int16(audio)
41 |     np.testing.assert_array_equal(result, audio)
42 | 
43 | 
44 | def test_audio_to_int16_empty_array():
45 |     audio = np.array([], dtype=np.float32)
46 |     result = audio_to_int16(audio)
47 |     np.testing.assert_array_equal(result, np.array([], dtype=np.int16))
48 | 
49 | 
50 | def test_audio_to_int16_invalid_dtype():
51 |     audio = np.array([1, 2, 3], dtype=np.int32)
52 |     with pytest.raises(TypeError, match="Unsupported audio data type"):
53 |         audio_to_int16(audio)  # type: ignore
54 | 
55 | 
56 | def test_legacy_arguments():
57 |     result = audio_to_float32((16000, np.zeros(10, dtype=np.int16)))
58 |     np.testing.assert_array_equal(result, np.zeros(10, dtype=np.float32))
59 | 
60 |     result = audio_to_int16((16000, np.zeros(10, dtype=np.float32)))
61 |     np.testing.assert_array_equal(result, np.zeros(10, dtype=np.int16))
62 | 


--------------------------------------------------------------------------------