├── .github └── workflows │ ├── docs.yml │ ├── tests-frontend.yml │ └── tests.yml ├── .gitignore ├── CNAME ├── LICENSE ├── README.md ├── backend └── fastrtc │ ├── __init__.py │ ├── credentials.py │ ├── pause_detection │ ├── __init__.py │ ├── protocol.py │ └── silero.py │ ├── py.typed │ ├── reply_on_pause.py │ ├── reply_on_stopwords.py │ ├── speech_to_text │ ├── __init__.py │ ├── stt_.py │ └── test_file.wav │ ├── stream.py │ ├── templates │ ├── component │ │ ├── _basePickBy-BWyW5hVA.js │ │ ├── _baseUniq-BfI_PfuI.js │ │ ├── arc-BHDr2DIN.js │ │ ├── architectureDiagram-IEHRJDOE-SNjtt7L7.js │ │ ├── assets │ │ │ └── worker-lPYB70QI.js │ │ ├── blockDiagram-JOT3LUYC-DoVxwmWs.js │ │ ├── c4Diagram-VJAJSXHY-BFLwyIU9.js │ │ ├── channel-BLI8LD7T.js │ │ ├── chunk-4BMEZGHF-4N88GRXN.js │ │ ├── chunk-A2AXSNBT-C4qvwI5K.js │ │ ├── chunk-AEK57VVT-OrsXQu-U.js │ │ ├── chunk-D6G4REZN-BSQJOIKu.js │ │ ├── chunk-RZ5BOZE2-BQm8zocb.js │ │ ├── chunk-XZIHB7SX-DIHERCaT.js │ │ ├── classDiagram-GIVACNV2-KrkkVrlR.js │ │ ├── classDiagram-v2-COTLJTTW-KrkkVrlR.js │ │ ├── clone-D_f12Uao.js │ │ ├── cytoscape.esm-C2cgT2B2.js │ │ ├── dagre-OKDRZEBW-BpZfNC14.js │ │ ├── diagram-SSKATNLV-OTX44Aig.js │ │ ├── diagram-VNBRO52H-CxKrAEhh.js │ │ ├── erDiagram-Q7BY3M3F-CO2pasYc.js │ │ ├── flowDiagram-4HSFHLVR-DiHdHcaJ.js │ │ ├── ganttDiagram-APWFNJXF-BzPYWX9W.js │ │ ├── gitGraphDiagram-7IBYFJ6S-DFMHUBmV.js │ │ ├── graph-as_7zmXK.js │ │ ├── index-xxHpJ_RR.js │ │ ├── index.js │ │ ├── infoDiagram-PH2N3AL5-fhMlkv6w.js │ │ ├── init-DjUOC4st.js │ │ ├── journeyDiagram-U35MCT3I-BI3B5NA4.js │ │ ├── kanban-definition-NDS4AKOZ-BdftdmWH.js │ │ ├── layout-BG95tefZ.js │ │ ├── linear-CRa8eD4r.js │ │ ├── mermaid.core-C0Blj36u.js │ │ ├── mindmap-definition-ALO5MXBD-BO2Uu9ee.js │ │ ├── ordinal-DfAQgscy.js │ │ ├── pieDiagram-IB7DONF6-DDe9KgBF.js │ │ ├── quadrantDiagram-7GDLP6J5-BGl9qPho.js │ │ ├── radar-MK3ICKWK-Uwn-jZp4.js │ │ ├── requirementDiagram-KVF5MWMF-BvKrRVax.js │ │ ├── sankeyDiagram-QLVOVGJD-B_m0WTk6.js │ │ ├── sequenceDiagram-X6HHIX6F-Bfni-YW_.js │ │ ├── stateDiagram-DGXRK772-CpehDlzW.js │ │ ├── stateDiagram-v2-YXO3MK2T-CFM2lJF8.js │ │ ├── style.css │ │ ├── timeline-definition-BDJGKUSR-C6DrPqLg.js │ │ └── xychartDiagram-VJFVF3MP-BKtEAN5R.js │ └── example │ │ ├── assets │ │ └── worker-lPYB70QI.js │ │ ├── index.js │ │ └── style.css │ ├── text_to_speech │ ├── __init__.py │ ├── test_tts.py │ └── tts.py │ ├── tracks.py │ ├── utils.py │ ├── webrtc.py │ ├── webrtc_connection_mixin.py │ └── websocket.py ├── demo ├── __init__.py ├── echo_audio │ ├── README.md │ ├── app.py │ └── requirements.txt ├── gemini_audio_video │ ├── README.md │ ├── app.py │ └── requirements.txt ├── gemini_conversation │ ├── README.md │ └── app.py ├── hello_computer │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── llama_code_editor │ ├── README.md │ ├── app.py │ ├── assets │ │ ├── sandbox.html │ │ └── spinner.html │ ├── handler.py │ ├── requirements.in │ ├── requirements.txt │ └── ui.py ├── llm_voice_chat │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ └── requirements.txt ├── moonshine_live │ ├── README.md │ ├── app.py │ ├── default-favicon.ico │ └── requirements.txt ├── nextjs_voice_chat │ ├── README.md │ ├── backend │ │ ├── env.py │ │ └── server.py │ ├── frontend │ │ └── fastrtc-demo │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── app │ │ │ ├── favicon.ico │ │ │ ├── globals.css │ │ │ ├── layout.tsx │ │ │ └── page.tsx │ │ │ ├── components.json │ │ │ ├── components │ │ │ ├── background-circle-provider.tsx │ │ │ ├── theme-provider.tsx │ │ │ └── ui │ │ │ │ ├── ai-voice-input.tsx │ │ │ │ ├── background-circles.tsx │ │ │ │ ├── reset-chat.tsx │ │ │ │ ├── theme-toggle.tsx │ │ │ │ └── theme-transition.tsx │ │ │ ├── eslint.config.mjs │ │ │ ├── lib │ │ │ ├── utils.ts │ │ │ └── webrtc-client.ts │ │ │ ├── next.config.ts │ │ │ ├── package.json │ │ │ ├── postcss.config.mjs │ │ │ ├── public │ │ │ ├── file.svg │ │ │ ├── globe.svg │ │ │ ├── next.svg │ │ │ ├── vercel.svg │ │ │ └── window.svg │ │ │ └── tsconfig.json │ ├── requirements.txt │ └── run.sh ├── object_detection │ ├── README.md │ ├── app.py │ ├── index.html │ ├── inference.py │ ├── requirements.txt │ └── utils.py ├── patient_intake │ └── app.py ├── phonic_chat │ ├── README.md │ ├── app.py │ └── requirements.txt ├── qwen_phone_chat │ ├── README.md │ ├── app.py │ └── requirements.txt ├── send_text_or_audio │ ├── app.py │ └── index.html ├── talk_to_azure_openai │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── talk_to_claude │ ├── README.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── talk_to_gemini │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── talk_to_llama4 │ ├── AV_Huggy.png │ ├── README.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── talk_to_openai │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── talk_to_sambanova │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── talk_to_smolagents │ ├── README.md │ ├── app.py │ └── requirements.txt ├── text_mode │ └── app.py ├── voice_text_editor │ ├── README.md │ └── app.py ├── voice_text_editor_local │ └── app.py ├── webrtc_vs_websocket │ ├── README.md │ ├── app.py │ ├── index.html │ └── requirements.txt └── whisper_realtime │ ├── README.md │ ├── README_gradio.md │ ├── app.py │ ├── index.html │ └── requirements.txt ├── docs ├── CNAME ├── Discord-Symbol-White.svg ├── advanced-configuration.md ├── cookbook.md ├── deployment.md ├── faq.md ├── fastrtc_logo.png ├── fastrtc_logo_small.png ├── gradio-logo-with-title.svg ├── gradio-logo.svg ├── hf-logo-with-title.svg ├── hf-logo.svg ├── index.md ├── reference │ ├── credentials.md │ ├── reply_on_pause.md │ ├── stream.md │ ├── stream_handlers.md │ └── utils.md ├── speech_to_text_gallery.md ├── stylesheets │ └── extra.css ├── text_to_speech_gallery.md ├── turn_taking_gallery.md ├── userguide │ ├── api.md │ ├── audio-video.md │ ├── audio.md │ ├── gradio.md │ ├── streams.md │ ├── video.md │ ├── webrtc_docs.md │ └── websocket_docs.md └── utils.md ├── frontend ├── .prettierrc ├── Example.svelte ├── Index.svelte ├── gradio.config.js ├── index.ts ├── package-lock.json ├── package.json └── shared │ ├── AudioWave.svelte │ ├── InteractiveAudio.svelte │ ├── InteractiveVideo.svelte │ ├── MicrophoneMuted.svelte │ ├── PulsingIcon.svelte │ ├── StaticAudio.svelte │ ├── StaticVideo.svelte │ ├── TextboxWithMic.svelte │ ├── Webcam.svelte │ ├── WebcamPermissions.svelte │ ├── index.ts │ ├── stream_utils.ts │ ├── utils.ts │ └── webrtc_utils.ts ├── justfile ├── mkdocs.yml ├── overrides └── partials │ └── header.html ├── pyproject.toml ├── test ├── __init__.py ├── test_tts.py ├── test_utils.py └── test_webrtc_connection_mixin.py └── upload_space.py /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | 10 | permissions: 11 | contents: write 12 | pull-requests: write 13 | deployments: write 14 | pages: write 15 | 16 | jobs: 17 | deploy: 18 | runs-on: ubuntu-latest 19 | if: github.event_name == 'push' || (github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false) 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Configure Git Credentials 23 | run: | 24 | git config user.name github-actions[bot] 25 | git config user.email 41898282+github-actions[bot]@users.noreply.github.com 26 | - uses: actions/setup-python@v5 27 | with: 28 | python-version: 3.x 29 | - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV 30 | - uses: actions/cache@v4 31 | with: 32 | key: mkdocs-material-${{ env.cache_id }} 33 | path: .cache 34 | restore-keys: | 35 | mkdocs-material- 36 | - run: pip install mkdocs-material mkdocs-llmstxt==0.1.0 37 | - name: Build docs 38 | run: mkdocs build 39 | 40 | - name: Deploy to GH Pages (main) 41 | if: github.event_name == 'push' 42 | run: mkdocs gh-deploy --force 43 | 44 | - name: Deploy PR Preview 45 | if: github.event_name == 'pull_request' 46 | uses: rossjrw/pr-preview-action@v1 47 | with: 48 | source-dir: ./site 49 | preview-branch: gh-pages 50 | umbrella-dir: pr-preview 51 | action: auto -------------------------------------------------------------------------------- /.github/workflows/tests-frontend.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | prettier: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: actions/setup-node@v4 11 | with: 12 | node-version: 18 13 | - name: Run prettier 14 | run: | 15 | cd frontend 16 | npm install 17 | npx prettier --check . -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - uses: actions/setup-python@v5 11 | with: 12 | python-version: '3.10' 13 | - name: Run linters 14 | run: | 15 | pip install ruff pyright 16 | pip install -e .[dev] 17 | ruff check . 18 | ruff format --check --diff . 19 | pyright 20 | test: 21 | runs-on: ${{ matrix.os }} 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | os: [ubuntu-latest] 26 | python: 27 | - '3.10' 28 | - '3.13' 29 | steps: 30 | - uses: actions/checkout@v4 31 | - uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python }} 34 | - name: Run tests 35 | run: | 36 | python -m pip install -U pip 37 | pip install '.[dev, tts]' 38 | python -m pytest --capture=no 39 | shell: bash 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eggs/ 2 | dist/ 3 | *.pyc 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | __tmp/* 8 | *.pyi 9 | .mypycache 10 | .ruff_cache 11 | node_modules 12 | demo/MobileNetSSD_deploy.caffemodel 13 | demo/MobileNetSSD_deploy.prototxt.txt 14 | demo/scratch 15 | .gradio 16 | .vscode 17 | .DS_Store 18 | .venv* 19 | .env 20 | -------------------------------------------------------------------------------- /CNAME: -------------------------------------------------------------------------------- 1 | fastrtc.org -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Freddy Boulton 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /backend/fastrtc/__init__.py: -------------------------------------------------------------------------------- 1 | from .credentials import ( 2 | get_cloudflare_turn_credentials, 3 | get_cloudflare_turn_credentials_async, 4 | get_hf_turn_credentials, 5 | get_hf_turn_credentials_async, 6 | get_turn_credentials, 7 | get_turn_credentials_async, 8 | get_twilio_turn_credentials, 9 | ) 10 | from .pause_detection import ( 11 | ModelOptions, 12 | PauseDetectionModel, 13 | SileroVadOptions, 14 | get_silero_model, 15 | ) 16 | from .reply_on_pause import AlgoOptions, ReplyOnPause 17 | from .reply_on_stopwords import ReplyOnStopWords 18 | from .speech_to_text import MoonshineSTT, get_stt_model 19 | from .stream import Stream, UIArgs 20 | from .text_to_speech import ( 21 | CartesiaTTSOptions, 22 | KokoroTTSOptions, 23 | get_tts_model, 24 | ) 25 | from .tracks import ( 26 | AsyncAudioVideoStreamHandler, 27 | AsyncStreamHandler, 28 | AudioEmitType, 29 | AudioVideoStreamHandler, 30 | StreamHandler, 31 | VideoEmitType, 32 | VideoStreamHandler, 33 | ) 34 | from .utils import ( 35 | AdditionalOutputs, 36 | CloseStream, 37 | Warning, 38 | WebRTCData, 39 | WebRTCError, 40 | aggregate_bytes_to_16bit, 41 | async_aggregate_bytes_to_16bit, 42 | audio_to_bytes, 43 | audio_to_file, 44 | audio_to_float32, 45 | audio_to_int16, 46 | get_current_context, 47 | wait_for_item, 48 | ) 49 | from .webrtc import ( 50 | WebRTC, 51 | ) 52 | 53 | __all__ = [ 54 | "AsyncStreamHandler", 55 | "AudioVideoStreamHandler", 56 | "AudioEmitType", 57 | "AsyncAudioVideoStreamHandler", 58 | "AlgoOptions", 59 | "AdditionalOutputs", 60 | "aggregate_bytes_to_16bit", 61 | "async_aggregate_bytes_to_16bit", 62 | "audio_to_bytes", 63 | "audio_to_file", 64 | "audio_to_float32", 65 | "audio_to_int16", 66 | "get_hf_turn_credentials", 67 | "get_twilio_turn_credentials", 68 | "get_turn_credentials", 69 | "ReplyOnPause", 70 | "ReplyOnStopWords", 71 | "SileroVadOptions", 72 | "get_stt_model", 73 | "MoonshineSTT", 74 | "StreamHandler", 75 | "Stream", 76 | "VideoEmitType", 77 | "WebRTC", 78 | "WebRTCError", 79 | "Warning", 80 | "get_tts_model", 81 | "KokoroTTSOptions", 82 | "get_cloudflare_turn_credentials_async", 83 | "get_hf_turn_credentials_async", 84 | "get_turn_credentials_async", 85 | "get_cloudflare_turn_credentials", 86 | "wait_for_item", 87 | "UIArgs", 88 | "ModelOptions", 89 | "PauseDetectionModel", 90 | "get_silero_model", 91 | "SileroVadOptions", 92 | "VideoStreamHandler", 93 | "CloseStream", 94 | "get_current_context", 95 | "CartesiaTTSOptions", 96 | "WebRTCData", 97 | ] 98 | -------------------------------------------------------------------------------- /backend/fastrtc/pause_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from .protocol import ModelOptions, PauseDetectionModel 2 | from .silero import SileroVADModel, SileroVadOptions, get_silero_model 3 | 4 | __all__ = [ 5 | "SileroVADModel", 6 | "SileroVadOptions", 7 | "PauseDetectionModel", 8 | "ModelOptions", 9 | "get_silero_model", 10 | ] 11 | -------------------------------------------------------------------------------- /backend/fastrtc/pause_detection/protocol.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Protocol, TypeAlias 2 | 3 | import numpy as np 4 | from numpy.typing import NDArray 5 | 6 | from ..utils import AudioChunk 7 | 8 | ModelOptions: TypeAlias = Any 9 | 10 | 11 | class PauseDetectionModel(Protocol): 12 | def vad( 13 | self, 14 | audio: tuple[int, NDArray[np.int16] | NDArray[np.float32]], 15 | options: ModelOptions, 16 | ) -> tuple[float, list[AudioChunk]]: ... 17 | 18 | def warmup( 19 | self, 20 | ) -> None: ... 21 | -------------------------------------------------------------------------------- /backend/fastrtc/py.typed: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /backend/fastrtc/speech_to_text/__init__.py: -------------------------------------------------------------------------------- 1 | from .stt_ import MoonshineSTT, get_stt_model, stt_for_chunks 2 | 3 | __all__ = ["get_stt_model", "MoonshineSTT", "get_stt_model", "stt_for_chunks"] 4 | -------------------------------------------------------------------------------- /backend/fastrtc/speech_to_text/stt_.py: -------------------------------------------------------------------------------- 1 | from functools import lru_cache 2 | from pathlib import Path 3 | from typing import Literal, Protocol 4 | 5 | import click 6 | import librosa 7 | import numpy as np 8 | from numpy.typing import NDArray 9 | 10 | from ..utils import AudioChunk, audio_to_float32 11 | 12 | curr_dir = Path(__file__).parent 13 | 14 | 15 | class STTModel(Protocol): 16 | def stt(self, audio: tuple[int, NDArray[np.int16 | np.float32]]) -> str: ... 17 | 18 | 19 | class MoonshineSTT(STTModel): 20 | def __init__( 21 | self, model: Literal["moonshine/base", "moonshine/tiny"] = "moonshine/base" 22 | ): 23 | try: 24 | from moonshine_onnx import MoonshineOnnxModel, load_tokenizer 25 | except (ImportError, ModuleNotFoundError): 26 | raise ImportError( 27 | "Install fastrtc[stt] for speech-to-text and stopword detection support." 28 | ) 29 | 30 | self.model = MoonshineOnnxModel(model_name=model) 31 | self.tokenizer = load_tokenizer() 32 | 33 | def stt(self, audio: tuple[int, NDArray[np.int16 | np.float32]]) -> str: 34 | sr, audio_np = audio # type: ignore 35 | audio_np = audio_to_float32(audio_np) 36 | if sr != 16000: 37 | audio_np: NDArray[np.float32] = librosa.resample( 38 | audio_np, orig_sr=sr, target_sr=16000 39 | ) 40 | if audio_np.ndim == 1: 41 | audio_np = audio_np.reshape(1, -1) 42 | tokens = self.model.generate(audio_np) 43 | return self.tokenizer.decode_batch(tokens)[0] 44 | 45 | 46 | @lru_cache 47 | def get_stt_model( 48 | model: Literal["moonshine/base", "moonshine/tiny"] = "moonshine/base", 49 | ) -> STTModel: 50 | import os 51 | 52 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 53 | m = MoonshineSTT(model) 54 | from moonshine_onnx import load_audio 55 | 56 | audio = load_audio(str(curr_dir / "test_file.wav")) 57 | print(click.style("INFO", fg="green") + ":\t Warming up STT model.") 58 | 59 | m.stt((16000, audio)) 60 | print(click.style("INFO", fg="green") + ":\t STT model warmed up.") 61 | return m 62 | 63 | 64 | def stt_for_chunks( 65 | stt_model: STTModel, 66 | audio: tuple[int, NDArray[np.int16 | np.float32]], 67 | chunks: list[AudioChunk], 68 | ) -> str: 69 | sr, audio_np = audio 70 | return " ".join( 71 | [ 72 | stt_model.stt((sr, audio_np[chunk["start"] : chunk["end"]])) 73 | for chunk in chunks 74 | ] 75 | ) 76 | -------------------------------------------------------------------------------- /backend/fastrtc/speech_to_text/test_file.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/backend/fastrtc/speech_to_text/test_file.wav -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/_basePickBy-BWyW5hVA.js: -------------------------------------------------------------------------------- 1 | import { e as x, c as b, g as m, k as P, h as p, j as w, l as N, m as c, n as I, t as A, o as M } from "./_baseUniq-BfI_PfuI.js"; 2 | import { aJ as g, az as E, aK as F, aL as _, aM as $, aN as l, aO as B, aP as T, aQ as y, aR as L } from "./mermaid.core-C0Blj36u.js"; 3 | var R = /\s/; 4 | function S(n) { 5 | for (var r = n.length; r-- && R.test(n.charAt(r)); ) 6 | ; 7 | return r; 8 | } 9 | var z = /^\s+/; 10 | function G(n) { 11 | return n && n.slice(0, S(n) + 1).replace(z, ""); 12 | } 13 | var o = NaN, H = /^[-+]0x[0-9a-f]+$/i, K = /^0b[01]+$/i, q = /^0o[0-7]+$/i, C = parseInt; 14 | function J(n) { 15 | if (typeof n == "number") 16 | return n; 17 | if (x(n)) 18 | return o; 19 | if (g(n)) { 20 | var r = typeof n.valueOf == "function" ? n.valueOf() : n; 21 | n = g(r) ? r + "" : r; 22 | } 23 | if (typeof n != "string") 24 | return n === 0 ? n : +n; 25 | n = G(n); 26 | var t = K.test(n); 27 | return t || q.test(n) ? C(n.slice(2), t ? 2 : 8) : H.test(n) ? o : +n; 28 | } 29 | var v = 1 / 0, Q = 17976931348623157e292; 30 | function W(n) { 31 | if (!n) 32 | return n === 0 ? n : 0; 33 | if (n = J(n), n === v || n === -v) { 34 | var r = n < 0 ? -1 : 1; 35 | return r * Q; 36 | } 37 | return n === n ? n : 0; 38 | } 39 | function X(n) { 40 | var r = W(n), t = r % 1; 41 | return r === r ? t ? r - t : r : 0; 42 | } 43 | function fn(n) { 44 | var r = n == null ? 0 : n.length; 45 | return r ? b(n) : []; 46 | } 47 | var O = Object.prototype, Y = O.hasOwnProperty, dn = E(function(n, r) { 48 | n = Object(n); 49 | var t = -1, i = r.length, a = i > 2 ? r[2] : void 0; 50 | for (a && F(r[0], r[1], a) && (i = 1); ++t < i; ) 51 | for (var f = r[t], e = _(f), s = -1, d = e.length; ++s < d; ) { 52 | var u = e[s], h = n[u]; 53 | (h === void 0 || $(h, O[u]) && !Y.call(n, u)) && (n[u] = f[u]); 54 | } 55 | return n; 56 | }); 57 | function un(n) { 58 | var r = n == null ? 0 : n.length; 59 | return r ? n[r - 1] : void 0; 60 | } 61 | function D(n) { 62 | return function(r, t, i) { 63 | var a = Object(r); 64 | if (!l(r)) { 65 | var f = m(t); 66 | r = P(r), t = function(s) { 67 | return f(a[s], s, a); 68 | }; 69 | } 70 | var e = n(r, t, i); 71 | return e > -1 ? a[f ? r[e] : e] : void 0; 72 | }; 73 | } 74 | var U = Math.max; 75 | function Z(n, r, t) { 76 | var i = n == null ? 0 : n.length; 77 | if (!i) 78 | return -1; 79 | var a = t == null ? 0 : X(t); 80 | return a < 0 && (a = U(i + a, 0)), p(n, m(r), a); 81 | } 82 | var hn = D(Z); 83 | function V(n, r) { 84 | var t = -1, i = l(n) ? Array(n.length) : []; 85 | return w(n, function(a, f, e) { 86 | i[++t] = r(a, f, e); 87 | }), i; 88 | } 89 | function gn(n, r) { 90 | var t = B(n) ? N : V; 91 | return t(n, m(r)); 92 | } 93 | var j = Object.prototype, k = j.hasOwnProperty; 94 | function nn(n, r) { 95 | return n != null && k.call(n, r); 96 | } 97 | function mn(n, r) { 98 | return n != null && c(n, r, nn); 99 | } 100 | function rn(n, r) { 101 | return n < r; 102 | } 103 | function tn(n, r, t) { 104 | for (var i = -1, a = n.length; ++i < a; ) { 105 | var f = n[i], e = r(f); 106 | if (e != null && (s === void 0 ? e === e && !x(e) : t(e, s))) 107 | var s = e, d = f; 108 | } 109 | return d; 110 | } 111 | function on(n) { 112 | return n && n.length ? tn(n, T, rn) : void 0; 113 | } 114 | function an(n, r, t, i) { 115 | if (!g(n)) 116 | return n; 117 | r = I(r, n); 118 | for (var a = -1, f = r.length, e = f - 1, s = n; s != null && ++a < f; ) { 119 | var d = A(r[a]), u = t; 120 | if (d === "__proto__" || d === "constructor" || d === "prototype") 121 | return n; 122 | if (a != e) { 123 | var h = s[d]; 124 | u = void 0, u === void 0 && (u = g(h) ? h : y(r[a + 1]) ? [] : {}); 125 | } 126 | L(s, d, u), s = s[d]; 127 | } 128 | return n; 129 | } 130 | function vn(n, r, t) { 131 | for (var i = -1, a = r.length, f = {}; ++i < a; ) { 132 | var e = r[i], s = M(n, e); 133 | t(s, e) && an(f, I(e, n), s); 134 | } 135 | return f; 136 | } 137 | export { 138 | rn as a, 139 | tn as b, 140 | V as c, 141 | vn as d, 142 | on as e, 143 | fn as f, 144 | hn as g, 145 | mn as h, 146 | dn as i, 147 | X as j, 148 | un as l, 149 | gn as m, 150 | W as t 151 | }; 152 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/assets/worker-lPYB70QI.js: -------------------------------------------------------------------------------- 1 | (function(){"use strict";const R="https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd/ffmpeg-core.js";var E;(function(t){t.LOAD="LOAD",t.EXEC="EXEC",t.WRITE_FILE="WRITE_FILE",t.READ_FILE="READ_FILE",t.DELETE_FILE="DELETE_FILE",t.RENAME="RENAME",t.CREATE_DIR="CREATE_DIR",t.LIST_DIR="LIST_DIR",t.DELETE_DIR="DELETE_DIR",t.ERROR="ERROR",t.DOWNLOAD="DOWNLOAD",t.PROGRESS="PROGRESS",t.LOG="LOG",t.MOUNT="MOUNT",t.UNMOUNT="UNMOUNT"})(E||(E={}));const a=new Error("unknown message type"),f=new Error("ffmpeg is not loaded, call `await ffmpeg.load()` first"),u=new Error("failed to import ffmpeg-core.js");let r;const O=async({coreURL:t,wasmURL:n,workerURL:e})=>{const o=!r;try{t||(t=R),importScripts(t)}catch{if(t||(t=R.replace("/umd/","/esm/")),self.createFFmpegCore=(await import(t)).default,!self.createFFmpegCore)throw u}const s=t,c=n||t.replace(/.js$/g,".wasm"),b=e||t.replace(/.js$/g,".worker.js");return r=await self.createFFmpegCore({mainScriptUrlOrBlob:`${s}#${btoa(JSON.stringify({wasmURL:c,workerURL:b}))}`}),r.setLogger(i=>self.postMessage({type:E.LOG,data:i})),r.setProgress(i=>self.postMessage({type:E.PROGRESS,data:i})),o},l=({args:t,timeout:n=-1})=>{r.setTimeout(n),r.exec(...t);const e=r.ret;return r.reset(),e},m=({path:t,data:n})=>(r.FS.writeFile(t,n),!0),D=({path:t,encoding:n})=>r.FS.readFile(t,{encoding:n}),S=({path:t})=>(r.FS.unlink(t),!0),I=({oldPath:t,newPath:n})=>(r.FS.rename(t,n),!0),L=({path:t})=>(r.FS.mkdir(t),!0),N=({path:t})=>{const n=r.FS.readdir(t),e=[];for(const o of n){const s=r.FS.stat(`${t}/${o}`),c=r.FS.isDir(s.mode);e.push({name:o,isDir:c})}return e},A=({path:t})=>(r.FS.rmdir(t),!0),w=({fsType:t,options:n,mountPoint:e})=>{const o=t,s=r.FS.filesystems[o];return s?(r.FS.mount(s,n,e),!0):!1},k=({mountPoint:t})=>(r.FS.unmount(t),!0);self.onmessage=async({data:{id:t,type:n,data:e}})=>{const o=[];let s;try{if(n!==E.LOAD&&!r)throw f;switch(n){case E.LOAD:s=await O(e);break;case E.EXEC:s=l(e);break;case E.WRITE_FILE:s=m(e);break;case E.READ_FILE:s=D(e);break;case E.DELETE_FILE:s=S(e);break;case E.RENAME:s=I(e);break;case E.CREATE_DIR:s=L(e);break;case E.LIST_DIR:s=N(e);break;case E.DELETE_DIR:s=A(e);break;case E.MOUNT:s=w(e);break;case E.UNMOUNT:s=k(e);break;default:throw a}}catch(c){self.postMessage({id:t,type:E.ERROR,data:c.toString()});return}s instanceof Uint8Array&&o.push(s.buffer),self.postMessage({id:t,type:n,data:s},o)}})(); 2 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/channel-BLI8LD7T.js: -------------------------------------------------------------------------------- 1 | import { ao as r, ap as n } from "./mermaid.core-C0Blj36u.js"; 2 | const t = (a, o) => r.lang.round(n.parse(a)[o]); 3 | export { 4 | t as c 5 | }; 6 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/chunk-4BMEZGHF-4N88GRXN.js: -------------------------------------------------------------------------------- 1 | import { _ as l } from "./mermaid.core-C0Blj36u.js"; 2 | function m(e, c) { 3 | var i, t, o; 4 | e.accDescr && ((i = c.setAccDescription) == null || i.call(c, e.accDescr)), e.accTitle && ((t = c.setAccTitle) == null || t.call(c, e.accTitle)), e.title && ((o = c.setDiagramTitle) == null || o.call(c, e.title)); 5 | } 6 | l(m, "populateCommonDb"); 7 | export { 8 | m as p 9 | }; 10 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/chunk-D6G4REZN-BSQJOIKu.js: -------------------------------------------------------------------------------- 1 | import { _ as n, T as c, m as l } from "./mermaid.core-C0Blj36u.js"; 2 | var o = /* @__PURE__ */ n((a, t) => { 3 | const e = a.append("rect"); 4 | if (e.attr("x", t.x), e.attr("y", t.y), e.attr("fill", t.fill), e.attr("stroke", t.stroke), e.attr("width", t.width), e.attr("height", t.height), t.name && e.attr("name", t.name), t.rx && e.attr("rx", t.rx), t.ry && e.attr("ry", t.ry), t.attrs !== void 0) 5 | for (const r in t.attrs) 6 | e.attr(r, t.attrs[r]); 7 | return t.class && e.attr("class", t.class), e; 8 | }, "drawRect"), d = /* @__PURE__ */ n((a, t) => { 9 | const e = { 10 | x: t.startx, 11 | y: t.starty, 12 | width: t.stopx - t.startx, 13 | height: t.stopy - t.starty, 14 | fill: t.fill, 15 | stroke: t.stroke, 16 | class: "rect" 17 | }; 18 | o(a, e).lower(); 19 | }, "drawBackgroundRect"), g = /* @__PURE__ */ n((a, t) => { 20 | const e = t.text.replace(c, " "), r = a.append("text"); 21 | r.attr("x", t.x), r.attr("y", t.y), r.attr("class", "legend"), r.style("text-anchor", t.anchor), t.class && r.attr("class", t.class); 22 | const s = r.append("tspan"); 23 | return s.attr("x", t.x + t.textMargin * 2), s.text(e), r; 24 | }, "drawText"), m = /* @__PURE__ */ n((a, t, e, r) => { 25 | const s = a.append("image"); 26 | s.attr("x", t), s.attr("y", e); 27 | const i = l(r); 28 | s.attr("xlink:href", i); 29 | }, "drawImage"), h = /* @__PURE__ */ n((a, t, e, r) => { 30 | const s = a.append("use"); 31 | s.attr("x", t), s.attr("y", e); 32 | const i = l(r); 33 | s.attr("xlink:href", `#${i}`); 34 | }, "drawEmbeddedImage"), y = /* @__PURE__ */ n(() => ({ 35 | x: 0, 36 | y: 0, 37 | width: 100, 38 | height: 100, 39 | fill: "#EDF2AE", 40 | stroke: "#666", 41 | anchor: "start", 42 | rx: 0, 43 | ry: 0 44 | }), "getNoteRect"), p = /* @__PURE__ */ n(() => ({ 45 | x: 0, 46 | y: 0, 47 | width: 100, 48 | height: 100, 49 | "text-anchor": "start", 50 | style: "#666", 51 | textMargin: 0, 52 | rx: 0, 53 | ry: 0, 54 | tspan: !0 55 | }), "getTextObj"); 56 | export { 57 | p as a, 58 | d as b, 59 | h as c, 60 | o as d, 61 | m as e, 62 | g as f, 63 | y as g 64 | }; 65 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/chunk-RZ5BOZE2-BQm8zocb.js: -------------------------------------------------------------------------------- 1 | import { _ as n, j as r, k as g, l as d } from "./mermaid.core-C0Blj36u.js"; 2 | var u = /* @__PURE__ */ n((t, e) => { 3 | let o; 4 | return e === "sandbox" && (o = r("#i" + t)), (e === "sandbox" ? r(o.nodes()[0].contentDocument.body) : r("body")).select(`[id="${t}"]`); 5 | }, "getDiagramElement"), b = /* @__PURE__ */ n((t, e, o, i) => { 6 | t.attr("class", o); 7 | const { width: a, height: s, x: h, y: x } = l(t, e); 8 | g(t, s, a, i); 9 | const c = w(h, x, a, s, e); 10 | t.attr("viewBox", c), d.debug(`viewBox configured: ${c} with padding: ${e}`); 11 | }, "setupViewPortForSVG"), l = /* @__PURE__ */ n((t, e) => { 12 | var i; 13 | const o = ((i = t.node()) == null ? void 0 : i.getBBox()) || { width: 0, height: 0, x: 0, y: 0 }; 14 | return { 15 | width: o.width + e * 2, 16 | height: o.height + e * 2, 17 | x: o.x, 18 | y: o.y 19 | }; 20 | }, "calculateDimensionsWithPadding"), w = /* @__PURE__ */ n((t, e, o, i, a) => `${t - a} ${e - a} ${o} ${i}`, "createViewBox"); 21 | export { 22 | u as g, 23 | b as s 24 | }; 25 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/chunk-XZIHB7SX-DIHERCaT.js: -------------------------------------------------------------------------------- 1 | import { _ as s } from "./mermaid.core-C0Blj36u.js"; 2 | var t, e = (t = class { 3 | /** 4 | * @param init - Function that creates the default state. 5 | */ 6 | constructor(i) { 7 | this.init = i, this.records = this.init(); 8 | } 9 | reset() { 10 | this.records = this.init(); 11 | } 12 | }, s(t, "ImperativeState"), t); 13 | export { 14 | e as I 15 | }; 16 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/classDiagram-GIVACNV2-KrkkVrlR.js: -------------------------------------------------------------------------------- 1 | import { c as r, C as s, a as e, s as t } from "./chunk-A2AXSNBT-C4qvwI5K.js"; 2 | import { _ as l } from "./mermaid.core-C0Blj36u.js"; 3 | var d = { 4 | parser: r, 5 | get db() { 6 | return new s(); 7 | }, 8 | renderer: e, 9 | styles: t, 10 | init: /* @__PURE__ */ l((a) => { 11 | a.class || (a.class = {}), a.class.arrowMarkerAbsolute = a.arrowMarkerAbsolute; 12 | }, "init") 13 | }; 14 | export { 15 | d as diagram 16 | }; 17 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/classDiagram-v2-COTLJTTW-KrkkVrlR.js: -------------------------------------------------------------------------------- 1 | import { c as r, C as s, a as e, s as t } from "./chunk-A2AXSNBT-C4qvwI5K.js"; 2 | import { _ as l } from "./mermaid.core-C0Blj36u.js"; 3 | var d = { 4 | parser: r, 5 | get db() { 6 | return new s(); 7 | }, 8 | renderer: e, 9 | styles: t, 10 | init: /* @__PURE__ */ l((a) => { 11 | a.class || (a.class = {}), a.class.arrowMarkerAbsolute = a.arrowMarkerAbsolute; 12 | }, "init") 13 | }; 14 | export { 15 | d as diagram 16 | }; 17 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/clone-D_f12Uao.js: -------------------------------------------------------------------------------- 1 | import { b as r } from "./_baseUniq-BfI_PfuI.js"; 2 | var e = 4; 3 | function a(o) { 4 | return r(o, e); 5 | } 6 | export { 7 | a as c 8 | }; 9 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/index.js: -------------------------------------------------------------------------------- 1 | import { E as s, a as t, I as l, l as d, d as o, b as p } from "./index-xxHpJ_RR.js"; 2 | export { 3 | s as BaseExample, 4 | t as BaseInteractiveVideo, 5 | l as default, 6 | d as loaded, 7 | o as playable, 8 | p as prettyBytes 9 | }; 10 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/infoDiagram-PH2N3AL5-fhMlkv6w.js: -------------------------------------------------------------------------------- 1 | import { _ as e, l as s, H as o, k as i, I as g } from "./mermaid.core-C0Blj36u.js"; 2 | import { p } from "./radar-MK3ICKWK-Uwn-jZp4.js"; 3 | var v = { 4 | parse: /* @__PURE__ */ e(async (r) => { 5 | const a = await p("info", r); 6 | s.debug(a); 7 | }, "parse") 8 | }, d = { version: g.version }, c = /* @__PURE__ */ e(() => d.version, "getVersion"), m = { 9 | getVersion: c 10 | }, l = /* @__PURE__ */ e((r, a, n) => { 11 | s.debug(`rendering info diagram 12 | ` + r); 13 | const t = o(a); 14 | i(t, 100, 400, !0), t.append("g").append("text").attr("x", 100).attr("y", 40).attr("class", "version").attr("font-size", 32).style("text-anchor", "middle").text(`v${n}`); 15 | }, "draw"), f = { draw: l }, b = { 16 | parser: v, 17 | db: m, 18 | renderer: f 19 | }; 20 | export { 21 | b as diagram 22 | }; 23 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/init-DjUOC4st.js: -------------------------------------------------------------------------------- 1 | function t(e, a) { 2 | switch (arguments.length) { 3 | case 0: 4 | break; 5 | case 1: 6 | this.range(e); 7 | break; 8 | default: 9 | this.range(a).domain(e); 10 | break; 11 | } 12 | return this; 13 | } 14 | export { 15 | t as i 16 | }; 17 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/ordinal-DfAQgscy.js: -------------------------------------------------------------------------------- 1 | import { i as a } from "./init-DjUOC4st.js"; 2 | class o extends Map { 3 | constructor(n, t = g) { 4 | if (super(), Object.defineProperties(this, { _intern: { value: /* @__PURE__ */ new Map() }, _key: { value: t } }), n != null) for (const [r, s] of n) this.set(r, s); 5 | } 6 | get(n) { 7 | return super.get(c(this, n)); 8 | } 9 | has(n) { 10 | return super.has(c(this, n)); 11 | } 12 | set(n, t) { 13 | return super.set(l(this, n), t); 14 | } 15 | delete(n) { 16 | return super.delete(p(this, n)); 17 | } 18 | } 19 | function c({ _intern: e, _key: n }, t) { 20 | const r = n(t); 21 | return e.has(r) ? e.get(r) : t; 22 | } 23 | function l({ _intern: e, _key: n }, t) { 24 | const r = n(t); 25 | return e.has(r) ? e.get(r) : (e.set(r, t), t); 26 | } 27 | function p({ _intern: e, _key: n }, t) { 28 | const r = n(t); 29 | return e.has(r) && (t = e.get(r), e.delete(r)), t; 30 | } 31 | function g(e) { 32 | return e !== null && typeof e == "object" ? e.valueOf() : e; 33 | } 34 | const f = Symbol("implicit"); 35 | function h() { 36 | var e = new o(), n = [], t = [], r = f; 37 | function s(u) { 38 | let i = e.get(u); 39 | if (i === void 0) { 40 | if (r !== f) return r; 41 | e.set(u, i = n.push(u) - 1); 42 | } 43 | return t[i % t.length]; 44 | } 45 | return s.domain = function(u) { 46 | if (!arguments.length) return n.slice(); 47 | n = [], e = new o(); 48 | for (const i of u) 49 | e.has(i) || e.set(i, n.push(i) - 1); 50 | return s; 51 | }, s.range = function(u) { 52 | return arguments.length ? (t = Array.from(u), s) : t.slice(); 53 | }, s.unknown = function(u) { 54 | return arguments.length ? (r = u, s) : r; 55 | }, s.copy = function() { 56 | return h(n, t).unknown(r); 57 | }, a.apply(s, arguments), s; 58 | } 59 | export { 60 | h as o 61 | }; 62 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/component/stateDiagram-v2-YXO3MK2T-CFM2lJF8.js: -------------------------------------------------------------------------------- 1 | import { s as a, S as t, b as r, a as s } from "./chunk-AEK57VVT-OrsXQu-U.js"; 2 | import { _ as i } from "./mermaid.core-C0Blj36u.js"; 3 | var _ = { 4 | parser: a, 5 | get db() { 6 | return new t(2); 7 | }, 8 | renderer: r, 9 | styles: s, 10 | init: /* @__PURE__ */ i((e) => { 11 | e.state || (e.state = {}), e.state.arrowMarkerAbsolute = e.arrowMarkerAbsolute; 12 | }, "init") 13 | }; 14 | export { 15 | _ as diagram 16 | }; 17 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/example/assets/worker-lPYB70QI.js: -------------------------------------------------------------------------------- 1 | (function(){"use strict";const R="https://unpkg.com/@ffmpeg/core@0.12.6/dist/umd/ffmpeg-core.js";var E;(function(t){t.LOAD="LOAD",t.EXEC="EXEC",t.WRITE_FILE="WRITE_FILE",t.READ_FILE="READ_FILE",t.DELETE_FILE="DELETE_FILE",t.RENAME="RENAME",t.CREATE_DIR="CREATE_DIR",t.LIST_DIR="LIST_DIR",t.DELETE_DIR="DELETE_DIR",t.ERROR="ERROR",t.DOWNLOAD="DOWNLOAD",t.PROGRESS="PROGRESS",t.LOG="LOG",t.MOUNT="MOUNT",t.UNMOUNT="UNMOUNT"})(E||(E={}));const a=new Error("unknown message type"),f=new Error("ffmpeg is not loaded, call `await ffmpeg.load()` first"),u=new Error("failed to import ffmpeg-core.js");let r;const O=async({coreURL:t,wasmURL:n,workerURL:e})=>{const o=!r;try{t||(t=R),importScripts(t)}catch{if(t||(t=R.replace("/umd/","/esm/")),self.createFFmpegCore=(await import(t)).default,!self.createFFmpegCore)throw u}const s=t,c=n||t.replace(/.js$/g,".wasm"),b=e||t.replace(/.js$/g,".worker.js");return r=await self.createFFmpegCore({mainScriptUrlOrBlob:`${s}#${btoa(JSON.stringify({wasmURL:c,workerURL:b}))}`}),r.setLogger(i=>self.postMessage({type:E.LOG,data:i})),r.setProgress(i=>self.postMessage({type:E.PROGRESS,data:i})),o},l=({args:t,timeout:n=-1})=>{r.setTimeout(n),r.exec(...t);const e=r.ret;return r.reset(),e},m=({path:t,data:n})=>(r.FS.writeFile(t,n),!0),D=({path:t,encoding:n})=>r.FS.readFile(t,{encoding:n}),S=({path:t})=>(r.FS.unlink(t),!0),I=({oldPath:t,newPath:n})=>(r.FS.rename(t,n),!0),L=({path:t})=>(r.FS.mkdir(t),!0),N=({path:t})=>{const n=r.FS.readdir(t),e=[];for(const o of n){const s=r.FS.stat(`${t}/${o}`),c=r.FS.isDir(s.mode);e.push({name:o,isDir:c})}return e},A=({path:t})=>(r.FS.rmdir(t),!0),w=({fsType:t,options:n,mountPoint:e})=>{const o=t,s=r.FS.filesystems[o];return s?(r.FS.mount(s,n,e),!0):!1},k=({mountPoint:t})=>(r.FS.unmount(t),!0);self.onmessage=async({data:{id:t,type:n,data:e}})=>{const o=[];let s;try{if(n!==E.LOAD&&!r)throw f;switch(n){case E.LOAD:s=await O(e);break;case E.EXEC:s=l(e);break;case E.WRITE_FILE:s=m(e);break;case E.READ_FILE:s=D(e);break;case E.DELETE_FILE:s=S(e);break;case E.RENAME:s=I(e);break;case E.CREATE_DIR:s=L(e);break;case E.LIST_DIR:s=N(e);break;case E.DELETE_DIR:s=A(e);break;case E.MOUNT:s=w(e);break;case E.UNMOUNT:s=k(e);break;default:throw a}}catch(c){self.postMessage({id:t,type:E.ERROR,data:c.toString()});return}s instanceof Uint8Array&&o.push(s.buffer),self.postMessage({id:t,type:n,data:s},o)}})(); 2 | -------------------------------------------------------------------------------- /backend/fastrtc/templates/example/style.css: -------------------------------------------------------------------------------- 1 | .container.svelte-1uoo7dd{flex:none;max-width:none}.container.svelte-1uoo7dd video{width:var(--size-full);height:var(--size-full);object-fit:cover}.container.svelte-1uoo7dd:hover,.container.selected.svelte-1uoo7dd{border-color:var(--border-color-accent)}.container.table.svelte-1uoo7dd{margin:0 auto;border:2px solid var(--border-color-primary);border-radius:var(--radius-lg);overflow:hidden;width:var(--size-20);height:var(--size-20);object-fit:cover}.container.gallery.svelte-1uoo7dd{height:var(--size-20);max-height:var(--size-20);object-fit:cover} 2 | -------------------------------------------------------------------------------- /backend/fastrtc/text_to_speech/__init__.py: -------------------------------------------------------------------------------- 1 | from .tts import ( 2 | CartesiaTTSOptions, 3 | KokoroTTSOptions, 4 | get_tts_model, 5 | ) 6 | 7 | __all__ = ["get_tts_model", "KokoroTTSOptions", "CartesiaTTSOptions"] 8 | -------------------------------------------------------------------------------- /backend/fastrtc/text_to_speech/test_tts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/backend/fastrtc/text_to_speech/test_tts.py -------------------------------------------------------------------------------- /demo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/__init__.py -------------------------------------------------------------------------------- /demo/echo_audio/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Echo Audio 3 | emoji: 🪩 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Simple echo stream - simplest FastRTC demo 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/echo_audio/app.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from fastapi import FastAPI 3 | from fastapi.responses import RedirectResponse 4 | from fastrtc import ReplyOnPause, Stream, get_twilio_turn_credentials 5 | from gradio.utils import get_space 6 | 7 | 8 | def detection(audio: tuple[int, np.ndarray]): 9 | # Implement any iterator that yields audio 10 | # See "LLM Voice Chat" for a more complete example 11 | yield audio 12 | 13 | 14 | stream = Stream( 15 | handler=ReplyOnPause(detection), 16 | modality="audio", 17 | mode="send-receive", 18 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 19 | concurrency_limit=5 if get_space() else None, 20 | time_limit=90 if get_space() else None, 21 | ) 22 | 23 | app = FastAPI() 24 | 25 | stream.mount(app) 26 | 27 | 28 | @app.get("/") 29 | async def index(): 30 | return RedirectResponse( 31 | url="/ui" if not get_space() else "https://fastrtc-echo-audio.hf.space/ui/" 32 | ) 33 | 34 | 35 | if __name__ == "__main__": 36 | import os 37 | 38 | if (mode := os.getenv("MODE")) == "UI": 39 | stream.ui.launch(server_port=7860) 40 | elif mode == "PHONE": 41 | stream.fastphone(port=7860) 42 | else: 43 | import uvicorn 44 | 45 | uvicorn.run(app, host="0.0.0.0", port=7860) 46 | -------------------------------------------------------------------------------- /demo/echo_audio/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad] 2 | twilio 3 | python-dotenv 4 | -------------------------------------------------------------------------------- /demo/gemini_audio_video/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Gemini Audio Video 3 | emoji: ♊️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.25.2 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Gemini understands audio and video! 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/gemini_audio_video/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc==0.0.23.rc1 2 | python-dotenv 3 | google-genai 4 | twilio 5 | -------------------------------------------------------------------------------- /demo/gemini_conversation/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Gemini Talking to Gemini 3 | emoji: ♊️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.17.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Have two Gemini agents talk to each other 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/hello_computer/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Hello Computer 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Say computer before asking your question 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/hello_computer/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Hello Computer (Gradio) 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Say computer (Gradio) 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|SAMBANOVA_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/hello_computer/app.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import os 4 | from pathlib import Path 5 | 6 | import gradio as gr 7 | import huggingface_hub 8 | import numpy as np 9 | from dotenv import load_dotenv 10 | from fastapi import FastAPI 11 | from fastapi.responses import HTMLResponse, StreamingResponse 12 | from fastrtc import ( 13 | AdditionalOutputs, 14 | ReplyOnStopWords, 15 | Stream, 16 | get_stt_model, 17 | get_twilio_turn_credentials, 18 | ) 19 | from gradio.utils import get_space 20 | from pydantic import BaseModel 21 | 22 | load_dotenv() 23 | 24 | curr_dir = Path(__file__).parent 25 | 26 | 27 | client = huggingface_hub.InferenceClient( 28 | api_key=os.environ.get("SAMBANOVA_API_KEY"), 29 | provider="sambanova", 30 | ) 31 | model = get_stt_model() 32 | 33 | 34 | def response( 35 | audio: tuple[int, np.ndarray], 36 | gradio_chatbot: list[dict] | None = None, 37 | conversation_state: list[dict] | None = None, 38 | ): 39 | gradio_chatbot = gradio_chatbot or [] 40 | conversation_state = conversation_state or [] 41 | text = model.stt(audio) 42 | print("STT in handler", text) 43 | sample_rate, array = audio 44 | gradio_chatbot.append( 45 | {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))} 46 | ) 47 | yield AdditionalOutputs(gradio_chatbot, conversation_state) 48 | 49 | conversation_state.append({"role": "user", "content": text}) 50 | 51 | request = client.chat.completions.create( 52 | model="meta-llama/Llama-3.2-3B-Instruct", 53 | messages=conversation_state, # type: ignore 54 | temperature=0.1, 55 | top_p=0.1, 56 | ) 57 | response = {"role": "assistant", "content": request.choices[0].message.content} 58 | 59 | conversation_state.append(response) 60 | gradio_chatbot.append(response) 61 | 62 | yield AdditionalOutputs(gradio_chatbot, conversation_state) 63 | 64 | 65 | chatbot = gr.Chatbot(type="messages", value=[]) 66 | state = gr.State(value=[]) 67 | stream = Stream( 68 | ReplyOnStopWords( 69 | response, # type: ignore 70 | stop_words=["computer"], 71 | input_sample_rate=16000, 72 | ), 73 | mode="send", 74 | modality="audio", 75 | additional_inputs=[chatbot, state], 76 | additional_outputs=[chatbot, state], 77 | additional_outputs_handler=lambda *a: (a[2], a[3]), 78 | concurrency_limit=5 if get_space() else None, 79 | time_limit=90 if get_space() else None, 80 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 81 | ) 82 | app = FastAPI() 83 | stream.mount(app) 84 | 85 | 86 | class Message(BaseModel): 87 | role: str 88 | content: str 89 | 90 | 91 | class InputData(BaseModel): 92 | webrtc_id: str 93 | chatbot: list[Message] 94 | state: list[Message] 95 | 96 | 97 | @app.get("/") 98 | async def _(): 99 | rtc_config = get_twilio_turn_credentials() if get_space() else None 100 | html_content = (curr_dir / "index.html").read_text() 101 | html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config)) 102 | return HTMLResponse(content=html_content) 103 | 104 | 105 | @app.post("/input_hook") 106 | async def _(data: InputData): 107 | body = data.model_dump() 108 | stream.set_input(data.webrtc_id, body["chatbot"], body["state"]) 109 | 110 | 111 | def audio_to_base64(file_path): 112 | audio_format = "wav" 113 | with open(file_path, "rb") as audio_file: 114 | encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8") 115 | return f"data:audio/{audio_format};base64,{encoded_audio}" 116 | 117 | 118 | @app.get("/outputs") 119 | async def _(webrtc_id: str): 120 | async def output_stream(): 121 | async for output in stream.output_stream(webrtc_id): 122 | chatbot = output.args[0] 123 | state = output.args[1] 124 | data = { 125 | "message": state[-1], 126 | "audio": audio_to_base64(chatbot[-1]["content"].value["path"]) 127 | if chatbot[-1]["role"] == "user" 128 | else None, 129 | } 130 | yield f"event: output\ndata: {json.dumps(data)}\n\n" 131 | 132 | return StreamingResponse(output_stream(), media_type="text/event-stream") 133 | 134 | 135 | if __name__ == "__main__": 136 | import os 137 | 138 | if (mode := os.getenv("MODE")) == "UI": 139 | stream.ui.launch(server_port=7860) 140 | elif mode == "PHONE": 141 | raise ValueError("Phone mode not supported") 142 | else: 143 | import uvicorn 144 | 145 | uvicorn.run(app, host="0.0.0.0", port=7860) 146 | -------------------------------------------------------------------------------- /demo/hello_computer/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[stopword] 2 | python-dotenv 3 | huggingface_hub>=0.29.0 4 | twilio -------------------------------------------------------------------------------- /demo/llama_code_editor/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Llama Code Editor 3 | emoji: 🦙 4 | colorFrom: indigo 5 | colorTo: pink 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Create interactive HTML web pages with your voice 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, 13 | secret|SAMBANOVA_API_KEY, secret|GROQ_API_KEY] 14 | --- 15 | 16 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference 17 | -------------------------------------------------------------------------------- /demo/llama_code_editor/app.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | from fastapi.responses import RedirectResponse 3 | from fastrtc import Stream 4 | from gradio.utils import get_space 5 | 6 | try: 7 | from demo.llama_code_editor.handler import ( 8 | CodeHandler, 9 | ) 10 | from demo.llama_code_editor.ui import demo as ui 11 | except (ImportError, ModuleNotFoundError): 12 | from handler import CodeHandler 13 | from ui import demo as ui 14 | 15 | 16 | stream = Stream( 17 | handler=CodeHandler, 18 | modality="audio", 19 | mode="send-receive", 20 | concurrency_limit=10 if get_space() else None, 21 | time_limit=90 if get_space() else None, 22 | ) 23 | 24 | stream.ui = ui 25 | 26 | app = FastAPI() 27 | 28 | 29 | @app.get("/") 30 | async def _(): 31 | url = "/ui" if not get_space() else "https://fastrtc-llama-code-editor.hf.space/ui/" 32 | return RedirectResponse(url) 33 | 34 | 35 | if __name__ == "__main__": 36 | import os 37 | 38 | if (mode := os.getenv("MODE")) == "UI": 39 | stream.ui.launch(server_port=7860, server_name="0.0.0.0") 40 | elif mode == "PHONE": 41 | stream.fastphone(host="0.0.0.0", port=7860) 42 | else: 43 | import uvicorn 44 | 45 | uvicorn.run(app, host="0.0.0.0", port=7860) 46 | -------------------------------------------------------------------------------- /demo/llama_code_editor/assets/sandbox.html: -------------------------------------------------------------------------------- 1 |
15 |
23 |
📦
30 |
31 |

No Application Created

37 |
-------------------------------------------------------------------------------- /demo/llama_code_editor/assets/spinner.html: -------------------------------------------------------------------------------- 1 |
13 | 14 |
20 | 21 |
28 | 29 |
38 |
39 | 40 | 41 |

Generating your application...

47 | 48 |

This may take a few moments

53 | 54 | 60 |
-------------------------------------------------------------------------------- /demo/llama_code_editor/handler.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | import re 4 | from pathlib import Path 5 | 6 | import numpy as np 7 | import openai 8 | from dotenv import load_dotenv 9 | from fastrtc import ( 10 | AdditionalOutputs, 11 | ReplyOnPause, 12 | audio_to_bytes, 13 | ) 14 | from groq import Groq 15 | 16 | load_dotenv() 17 | 18 | groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) 19 | 20 | client = openai.OpenAI( 21 | api_key=os.environ.get("SAMBANOVA_API_KEY"), 22 | base_url="https://api.sambanova.ai/v1", 23 | ) 24 | 25 | path = Path(__file__).parent / "assets" 26 | 27 | spinner_html = open(path / "spinner.html").read() 28 | 29 | 30 | system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response." 31 | user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" 32 | 33 | 34 | def extract_html_content(text): 35 | """ 36 | Extract content including HTML tags. 37 | """ 38 | match = re.search(r".*?", text, re.DOTALL) 39 | return match.group(0) if match else None 40 | 41 | 42 | def display_in_sandbox(code): 43 | encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8") 44 | data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" 45 | return f'' 46 | 47 | 48 | def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str): 49 | yield AdditionalOutputs(history, spinner_html) 50 | 51 | text = groq_client.audio.transcriptions.create( 52 | file=("audio-file.mp3", audio_to_bytes(user_message)), 53 | model="whisper-large-v3-turbo", 54 | response_format="verbose_json", 55 | ).text 56 | 57 | user_msg_formatted = user_prompt.format(user_message=text, code=code) 58 | history.append({"role": "user", "content": user_msg_formatted}) 59 | 60 | response = client.chat.completions.create( 61 | model="Meta-Llama-3.1-70B-Instruct", 62 | messages=history, # type: ignore 63 | temperature=0.1, 64 | top_p=0.1, 65 | ) 66 | 67 | output = response.choices[0].message.content 68 | html_code = extract_html_content(output) 69 | history.append({"role": "assistant", "content": output}) 70 | yield AdditionalOutputs(history, html_code) 71 | 72 | 73 | CodeHandler = ReplyOnPause(generate) # type: ignore 74 | -------------------------------------------------------------------------------- /demo/llama_code_editor/requirements.in: -------------------------------------------------------------------------------- 1 | fastrtc[vad] 2 | groq 3 | openai 4 | python-dotenv 5 | twilio -------------------------------------------------------------------------------- /demo/llama_code_editor/ui.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import gradio as gr 4 | from dotenv import load_dotenv 5 | from fastrtc import WebRTC, get_twilio_turn_credentials 6 | from gradio.utils import get_space 7 | 8 | try: 9 | from demo.llama_code_editor.handler import ( 10 | CodeHandler, 11 | display_in_sandbox, 12 | system_prompt, 13 | ) 14 | except (ImportError, ModuleNotFoundError): 15 | from handler import CodeHandler, display_in_sandbox, system_prompt 16 | 17 | load_dotenv() 18 | 19 | path = Path(__file__).parent / "assets" 20 | 21 | with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo: 22 | history = gr.State([{"role": "system", "content": system_prompt}]) 23 | with gr.Row(): 24 | with gr.Column(scale=1): 25 | gr.HTML( 26 | """ 27 |

28 | Llama Code Editor 29 |

30 |

31 | Powered by SambaNova and Gradio-WebRTC ⚡️ 32 |

33 |

34 | Create and edit single-file HTML applications with just your voice! 35 |

36 |

37 | Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation. 38 |

39 | """ 40 | ) 41 | webrtc = WebRTC( 42 | rtc_configuration=get_twilio_turn_credentials() 43 | if get_space() 44 | else None, 45 | mode="send", 46 | modality="audio", 47 | ) 48 | with gr.Column(scale=10): 49 | with gr.Tabs(): 50 | with gr.Tab("Sandbox"): 51 | sandbox = gr.HTML(value=open(path / "sandbox.html").read()) 52 | with gr.Tab("Code"): 53 | code = gr.Code( 54 | language="html", 55 | max_lines=50, 56 | interactive=False, 57 | elem_classes="code-component", 58 | ) 59 | with gr.Tab("Chat"): 60 | cb = gr.Chatbot(type="messages") 61 | 62 | webrtc.stream( 63 | CodeHandler, 64 | inputs=[webrtc, history, code], 65 | outputs=[webrtc], 66 | time_limit=90 if get_space() else None, 67 | concurrency_limit=10 if get_space() else None, 68 | ) 69 | webrtc.on_additional_outputs( 70 | lambda history, code: (history, code, history), outputs=[history, code, cb] 71 | ) 72 | code.change(display_in_sandbox, code, sandbox, queue=False) 73 | 74 | if __name__ == "__main__": 75 | demo.launch() 76 | -------------------------------------------------------------------------------- /demo/llm_voice_chat/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: LLM Voice Chat 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to an LLM with ElevenLabs 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/llm_voice_chat/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: LLM Voice Chat (Gradio) 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: LLM Voice by ElevenLabs (Gradio) 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ELEVENLABS_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/llm_voice_chat/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import gradio as gr 5 | import numpy as np 6 | from dotenv import load_dotenv 7 | from elevenlabs import ElevenLabs 8 | from fastapi import FastAPI 9 | from fastrtc import ( 10 | AdditionalOutputs, 11 | ReplyOnPause, 12 | Stream, 13 | get_stt_model, 14 | get_twilio_turn_credentials, 15 | ) 16 | from gradio.utils import get_space 17 | from groq import Groq 18 | from numpy.typing import NDArray 19 | 20 | load_dotenv() 21 | groq_client = Groq() 22 | tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY")) 23 | stt_model = get_stt_model() 24 | 25 | 26 | # See "Talk to Claude" in Cookbook for an example of how to keep 27 | # track of the chat history. 28 | def response( 29 | audio: tuple[int, NDArray[np.int16 | np.float32]], 30 | chatbot: list[dict] | None = None, 31 | ): 32 | chatbot = chatbot or [] 33 | messages = [{"role": d["role"], "content": d["content"]} for d in chatbot] 34 | start = time.time() 35 | text = stt_model.stt(audio) 36 | print("transcription", time.time() - start) 37 | print("prompt", text) 38 | chatbot.append({"role": "user", "content": text}) 39 | yield AdditionalOutputs(chatbot) 40 | messages.append({"role": "user", "content": text}) 41 | response_text = ( 42 | groq_client.chat.completions.create( 43 | model="llama-3.1-8b-instant", 44 | max_tokens=200, 45 | messages=messages, # type: ignore 46 | ) 47 | .choices[0] 48 | .message.content 49 | ) 50 | 51 | chatbot.append({"role": "assistant", "content": response_text}) 52 | 53 | for i, chunk in enumerate( 54 | tts_client.text_to_speech.convert_as_stream( 55 | text=response_text, # type: ignore 56 | voice_id="JBFqnCBsd6RMkjVDRZzb", 57 | model_id="eleven_multilingual_v2", 58 | output_format="pcm_24000", 59 | ) 60 | ): 61 | if i == 0: 62 | yield AdditionalOutputs(chatbot) 63 | audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1) 64 | yield (24000, audio_array) 65 | 66 | 67 | chatbot = gr.Chatbot(type="messages") 68 | stream = Stream( 69 | modality="audio", 70 | mode="send-receive", 71 | handler=ReplyOnPause(response, input_sample_rate=16000), 72 | additional_outputs_handler=lambda a, b: b, 73 | additional_inputs=[chatbot], 74 | additional_outputs=[chatbot], 75 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 76 | concurrency_limit=5 if get_space() else None, 77 | time_limit=90 if get_space() else None, 78 | ui_args={"title": "LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)"}, 79 | ) 80 | 81 | # Mount the STREAM UI to the FastAPI app 82 | # Because I don't want to build the UI manually 83 | app = FastAPI() 84 | app = gr.mount_gradio_app(app, stream.ui, path="/") 85 | 86 | 87 | if __name__ == "__main__": 88 | import os 89 | 90 | os.environ["GRADIO_SSR_MODE"] = "false" 91 | 92 | if (mode := os.getenv("MODE")) == "UI": 93 | stream.ui.launch(server_port=7860) 94 | elif mode == "PHONE": 95 | stream.fastphone(host="0.0.0.0", port=7860) 96 | else: 97 | stream.ui.launch(server_port=7860) 98 | -------------------------------------------------------------------------------- /demo/llm_voice_chat/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[stopword] 2 | python-dotenv 3 | openai 4 | twilio 5 | groq 6 | elevenlabs 7 | -------------------------------------------------------------------------------- /demo/moonshine_live/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Moonshine Live Transcription 3 | emoji: 🌕 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.17.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Real-time captions with Moonshine ONNX 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN] 13 | models: [onnx-community/moonshine-base-ONNX, UsefulSensors/moonshine-base] 14 | --- 15 | 16 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/moonshine_live/app.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Generator 2 | from functools import cache 3 | from typing import Literal 4 | 5 | import gradio as gr 6 | import numpy as np 7 | from dotenv import load_dotenv 8 | from fastrtc import ( 9 | AdditionalOutputs, 10 | ReplyOnPause, 11 | Stream, 12 | audio_to_float32, 13 | get_twilio_turn_credentials, 14 | ) 15 | from moonshine_onnx import MoonshineOnnxModel, load_tokenizer 16 | from numpy.typing import NDArray 17 | 18 | load_dotenv() 19 | 20 | 21 | @cache 22 | def load_moonshine( 23 | model_name: Literal["moonshine/base", "moonshine/tiny"], 24 | ) -> MoonshineOnnxModel: 25 | return MoonshineOnnxModel(model_name=model_name) 26 | 27 | 28 | tokenizer = load_tokenizer() 29 | 30 | 31 | def stt( 32 | audio: tuple[int, NDArray[np.int16 | np.float32]], 33 | model_name: Literal["moonshine/base", "moonshine/tiny"], 34 | captions: str, 35 | ) -> Generator[AdditionalOutputs, None, None]: 36 | moonshine = load_moonshine(model_name) 37 | sr, audio_np = audio # type: ignore 38 | if audio_np.dtype == np.int16: 39 | audio_np = audio_to_float32(audio) 40 | if audio_np.ndim == 1: 41 | audio_np = audio_np.reshape(1, -1) 42 | tokens = moonshine.generate(audio_np) 43 | yield AdditionalOutputs( 44 | (captions + "\n" + tokenizer.decode_batch(tokens)[0]).strip() 45 | ) 46 | 47 | 48 | captions = gr.Textbox(label="Captions") 49 | stream = Stream( 50 | ReplyOnPause(stt, input_sample_rate=16000), 51 | modality="audio", 52 | mode="send", 53 | ui_args={ 54 | "title": "Live Captions by Moonshine", 55 | "icon": "default-favicon.ico", 56 | "icon_button_color": "#5c5c5c", 57 | "pulse_color": "#a7c6fc", 58 | "icon_radius": 0, 59 | }, 60 | rtc_configuration=get_twilio_turn_credentials(), 61 | additional_inputs=[ 62 | gr.Radio( 63 | choices=["moonshine/base", "moonshine/tiny"], 64 | value="moonshine/base", 65 | label="Model", 66 | ), 67 | captions, 68 | ], 69 | additional_outputs=[captions], 70 | additional_outputs_handler=lambda prev, current: (prev + "\n" + current).strip(), 71 | ) 72 | 73 | if __name__ == "__main__": 74 | stream.ui.launch() 75 | -------------------------------------------------------------------------------- /demo/moonshine_live/default-favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/moonshine_live/default-favicon.ico -------------------------------------------------------------------------------- /demo/moonshine_live/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad] 2 | useful-moonshine-onnx@git+https://git@github.com/usefulsensors/moonshine.git#subdirectory=moonshine-onnx 3 | twilio -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/README.md: -------------------------------------------------------------------------------- 1 | # FastRTC POC 2 | A simple POC for a fast real-time voice chat application using FastAPI and FastRTC by [rohanprichard](https://github.com/rohanprichard). I wanted to make one as an example with more production-ready languages, rather than just Gradio. 3 | 4 | ## Setup 5 | 1. Set your API keys in an `.env` file based on the `.env.example` file 6 | 2. Create a virtual environment and install the dependencies 7 | ```bash 8 | python3 -m venv env 9 | source env/bin/activate 10 | pip install -r requirements.txt 11 | ``` 12 | 13 | 3. Run the server 14 | ```bash 15 | ./run.sh 16 | ``` 17 | 4. Navigate into the frontend directory in another terminal 18 | ```bash 19 | cd frontend/fastrtc-demo 20 | ``` 21 | 5. Run the frontend 22 | ```bash 23 | npm install 24 | npm run dev 25 | ``` 26 | 6. Go to the URL and click the microphone icon to start chatting! 27 | 28 | 7. Reset chats by clicking the trash button on the bottom right 29 | 30 | ## Notes 31 | You can choose to not install the requirements for TTS and STT by removing the `[tts, stt]` from the specifier in the `requirements.txt` file. 32 | 33 | - The STT is currently using the ElevenLabs API. 34 | - The LLM is currently using the OpenAI API. 35 | - The TTS is currently using the ElevenLabs API. 36 | - The VAD is currently using the Silero VAD model. 37 | - You may need to install ffmpeg if you get errors in STT 38 | 39 | The prompt can be changed in the `backend/server.py` file and modified as you like. 40 | 41 | ### Audio Parameters 42 | 43 | #### AlgoOptions 44 | 45 | - **audio_chunk_duration**: Length of audio chunks in seconds. Smaller values allow for faster processing but may be less accurate. 46 | - **started_talking_threshold**: If a chunk has more than this many seconds of speech, the system considers that the user has started talking. 47 | - **speech_threshold**: After the user has started speaking, if a chunk has less than this many seconds of speech, the system considers that the user has paused. 48 | 49 | #### SileroVadOptions 50 | 51 | - **threshold**: Speech probability threshold (0.0-1.0). Values above this are considered speech. Higher values are more strict. 52 | - **min_speech_duration_ms**: Speech segments shorter than this (in milliseconds) are filtered out. 53 | - **min_silence_duration_ms**: The system waits for this duration of silence (in milliseconds) before considering speech to be finished. 54 | - **speech_pad_ms**: Padding added to both ends of detected speech segments to prevent cutting off words. 55 | - **max_speech_duration_s**: Maximum allowed duration for a speech segment in seconds. Prevents indefinite listening. 56 | 57 | ### Tuning Recommendations 58 | 59 | - If the AI interrupts you too early: 60 | - Increase `min_silence_duration_ms` 61 | - Increase `speech_threshold` 62 | - Increase `speech_pad_ms` 63 | 64 | - If the AI is slow to respond after you finish speaking: 65 | - Decrease `min_silence_duration_ms` 66 | - Decrease `speech_threshold` 67 | 68 | - If the system fails to detect some speech: 69 | - Lower the `threshold` value 70 | - Decrease `started_talking_threshold` 71 | 72 | 73 | ## Credits: 74 | Credit for the UI components goes to Shadcn, Aceternity UI and Kokonut UI. 75 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/backend/env.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | 4 | load_dotenv() 5 | 6 | LLM_API_KEY = os.getenv("LLM_API_KEY") 7 | ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") 8 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/backend/server.py: -------------------------------------------------------------------------------- 1 | import fastapi 2 | from fastrtc import ReplyOnPause, Stream, AlgoOptions, SileroVadOptions 3 | from fastrtc.utils import audio_to_bytes, audio_to_float32 4 | from openai import OpenAI 5 | import logging 6 | import time 7 | from fastapi.middleware.cors import CORSMiddleware 8 | from elevenlabs import VoiceSettings, stream 9 | from elevenlabs.client import ElevenLabs 10 | import numpy as np 11 | 12 | from .env import LLM_API_KEY, ELEVENLABS_API_KEY 13 | 14 | 15 | sys_prompt = """ 16 | You are a helpful assistant. You are witty, engaging and fun. You love being interactive with the user. 17 | You also can add minimalistic utterances like 'uh-huh' or 'mm-hmm' to the conversation to make it more natural. However, only vocalization are allowed, no actions or other non-vocal sounds. 18 | Begin a conversation with a self-deprecating joke like 'I'm not sure if I'm ready for this...' or 'I bet you already regret clicking that button...' 19 | """ 20 | 21 | messages = [{"role": "system", "content": sys_prompt}] 22 | 23 | openai_client = OpenAI(api_key=LLM_API_KEY) 24 | 25 | elevenlabs_client = ElevenLabs(api_key=ELEVENLABS_API_KEY) 26 | 27 | logging.basicConfig(level=logging.INFO) 28 | 29 | 30 | def echo(audio): 31 | stt_time = time.time() 32 | 33 | logging.info("Performing STT") 34 | 35 | transcription = elevenlabs_client.speech_to_text.convert( 36 | file=audio_to_bytes(audio), 37 | model_id="scribe_v1", 38 | tag_audio_events=False, 39 | language_code="eng", 40 | diarize=False, 41 | ) 42 | prompt = transcription.text 43 | if prompt == "": 44 | logging.info("STT returned empty string") 45 | return 46 | logging.info(f"STT response: {prompt}") 47 | 48 | messages.append({"role": "user", "content": prompt}) 49 | 50 | logging.info(f"STT took {time.time() - stt_time} seconds") 51 | 52 | llm_time = time.time() 53 | 54 | def text_stream(): 55 | global full_response 56 | full_response = "" 57 | 58 | response = openai_client.chat.completions.create( 59 | model="gpt-3.5-turbo", messages=messages, max_tokens=200, stream=True 60 | ) 61 | 62 | for chunk in response: 63 | if chunk.choices[0].finish_reason == "stop": 64 | break 65 | if chunk.choices[0].delta.content: 66 | full_response += chunk.choices[0].delta.content 67 | yield chunk.choices[0].delta.content 68 | 69 | audio_stream = elevenlabs_client.generate( 70 | text=text_stream(), 71 | voice="Rachel", # Cassidy is also really good 72 | voice_settings=VoiceSettings( 73 | similarity_boost=0.9, stability=0.6, style=0.4, speed=1 74 | ), 75 | model="eleven_multilingual_v2", 76 | output_format="pcm_24000", 77 | stream=True, 78 | ) 79 | 80 | for audio_chunk in audio_stream: 81 | audio_array = audio_to_float32( 82 | np.frombuffer(audio_chunk, dtype=np.int16) 83 | ) 84 | yield (24000, audio_array) 85 | 86 | messages.append({"role": "assistant", "content": full_response + " "}) 87 | logging.info(f"LLM response: {full_response}") 88 | logging.info(f"LLM took {time.time() - llm_time} seconds") 89 | 90 | 91 | stream = Stream( 92 | ReplyOnPause( 93 | echo, 94 | algo_options=AlgoOptions( 95 | audio_chunk_duration=0.5, 96 | started_talking_threshold=0.1, 97 | speech_threshold=0.03, 98 | ), 99 | model_options=SileroVadOptions( 100 | threshold=0.75, 101 | min_speech_duration_ms=250, 102 | min_silence_duration_ms=1500, 103 | speech_pad_ms=400, 104 | max_speech_duration_s=15, 105 | ), 106 | ), 107 | modality="audio", 108 | mode="send-receive", 109 | ) 110 | 111 | app = fastapi.FastAPI() 112 | 113 | app.add_middleware( 114 | CORSMiddleware, 115 | allow_origins=["*"], 116 | allow_credentials=True, 117 | allow_methods=["*"], 118 | allow_headers=["*"], 119 | ) 120 | 121 | stream.mount(app) 122 | 123 | 124 | @app.get("/reset") 125 | async def reset(): 126 | global messages 127 | logging.info("Resetting chat") 128 | messages = [{"role": "system", "content": sys_prompt}] 129 | return {"status": "success"} 130 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | .pnpm-debug.log* 32 | 33 | # env files (can opt-in for committing if needed) 34 | .env* 35 | 36 | # vercel 37 | .vercel 38 | 39 | # typescript 40 | *.tsbuildinfo 41 | next-env.d.ts 42 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/README.md: -------------------------------------------------------------------------------- 1 | This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). 2 | 3 | ## Getting Started 4 | 5 | First, run the development server: 6 | 7 | ```bash 8 | npm run dev 9 | # or 10 | yarn dev 11 | # or 12 | pnpm dev 13 | # or 14 | bun dev 15 | ``` 16 | 17 | Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. 18 | 19 | You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. 20 | 21 | This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. 22 | 23 | ## Learn More 24 | 25 | To learn more about Next.js, take a look at the following resources: 26 | 27 | - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. 28 | - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. 29 | 30 | You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! 31 | 32 | ## Deploy on Vercel 33 | 34 | The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. 35 | 36 | Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. 37 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/nextjs_voice_chat/frontend/fastrtc-demo/app/favicon.ico -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Geist, Geist_Mono } from "next/font/google"; 3 | import "./globals.css"; 4 | import { ThemeProvider } from "@/components/theme-provider"; 5 | import { ThemeTransition } from "@/components/ui/theme-transition"; 6 | 7 | const geistSans = Geist({ 8 | variable: "--font-geist-sans", 9 | subsets: ["latin"], 10 | }); 11 | 12 | const geistMono = Geist_Mono({ 13 | variable: "--font-geist-mono", 14 | subsets: ["latin"], 15 | }); 16 | 17 | export const metadata: Metadata = { 18 | title: "FastRTC Demo", 19 | description: "Interactive WebRTC demo with audio visualization", 20 | }; 21 | 22 | export default function RootLayout({ 23 | children, 24 | }: Readonly<{ 25 | children: React.ReactNode; 26 | }>) { 27 | return ( 28 | 29 | 32 | 38 | {children} 39 | 40 | 41 | 42 | 43 | ); 44 | } 45 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/app/page.tsx: -------------------------------------------------------------------------------- 1 | import { BackgroundCircleProvider } from "@/components/background-circle-provider"; 2 | import { ThemeToggle } from "@/components/ui/theme-toggle"; 3 | import { ResetChat } from "@/components/ui/reset-chat"; 4 | export default function Home() { 5 | return ( 6 |
7 | 8 |
9 | 10 |
11 |
12 | 13 |
14 |
15 | ); 16 | } 17 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "new-york", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "", 8 | "css": "app/globals.css", 9 | "baseColor": "slate", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils", 16 | "ui": "@/components/ui", 17 | "lib": "@/lib", 18 | "hooks": "@/hooks" 19 | }, 20 | "iconLibrary": "lucide" 21 | } -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components/background-circle-provider.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import { useState, useEffect, useRef, useCallback } from "react"; 4 | import { BackgroundCircles } from "@/components/ui/background-circles"; 5 | import { AIVoiceInput } from "@/components/ui/ai-voice-input"; 6 | import { WebRTCClient } from "@/lib/webrtc-client"; 7 | 8 | export function BackgroundCircleProvider() { 9 | const [currentVariant, setCurrentVariant] = 10 | useState("octonary"); 11 | const [isConnected, setIsConnected] = useState(false); 12 | const [webrtcClient, setWebrtcClient] = useState(null); 13 | const [audioLevel, setAudioLevel] = useState(0); 14 | const audioRef = useRef(null); 15 | 16 | // Memoize callbacks to prevent recreation on each render 17 | const handleConnected = useCallback(() => setIsConnected(true), []); 18 | const handleDisconnected = useCallback(() => setIsConnected(false), []); 19 | 20 | const handleAudioStream = useCallback((stream: MediaStream) => { 21 | if (audioRef.current) { 22 | audioRef.current.srcObject = stream; 23 | } 24 | }, []); 25 | 26 | const handleAudioLevel = useCallback((level: number) => { 27 | // Apply some smoothing to the audio level 28 | setAudioLevel(prev => prev * 0.7 + level * 0.3); 29 | }, []); 30 | 31 | // Get all available variants 32 | const variants = Object.keys( 33 | COLOR_VARIANTS 34 | ) as (keyof typeof COLOR_VARIANTS)[]; 35 | 36 | // Function to change to the next color variant 37 | const changeVariant = () => { 38 | const currentIndex = variants.indexOf(currentVariant); 39 | const nextVariant = variants[(currentIndex + 1) % variants.length]; 40 | setCurrentVariant(nextVariant); 41 | }; 42 | 43 | useEffect(() => { 44 | // Initialize WebRTC client with memoized callbacks 45 | const client = new WebRTCClient({ 46 | onConnected: handleConnected, 47 | onDisconnected: handleDisconnected, 48 | onAudioStream: handleAudioStream, 49 | onAudioLevel: handleAudioLevel 50 | }); 51 | setWebrtcClient(client); 52 | 53 | return () => { 54 | client.disconnect(); 55 | }; 56 | }, [handleConnected, handleDisconnected, handleAudioStream, handleAudioLevel]); 57 | 58 | const handleStart = () => { 59 | webrtcClient?.connect(); 60 | }; 61 | 62 | const handleStop = () => { 63 | webrtcClient?.disconnect(); 64 | }; 65 | 66 | return ( 67 |
71 | 76 |
77 | 82 |
83 |
85 | ); 86 | } 87 | 88 | export default { BackgroundCircleProvider } 89 | 90 | const COLOR_VARIANTS = { 91 | primary: { 92 | border: [ 93 | "border-emerald-500/60", 94 | "border-cyan-400/50", 95 | "border-slate-600/30", 96 | ], 97 | gradient: "from-emerald-500/30", 98 | }, 99 | secondary: { 100 | border: [ 101 | "border-violet-500/60", 102 | "border-fuchsia-400/50", 103 | "border-slate-600/30", 104 | ], 105 | gradient: "from-violet-500/30", 106 | }, 107 | senary: { 108 | border: [ 109 | "border-blue-500/60", 110 | "border-sky-400/50", 111 | "border-slate-600/30", 112 | ], 113 | gradient: "from-blue-500/30", 114 | }, // blue 115 | octonary: { 116 | border: [ 117 | "border-red-500/60", 118 | "border-rose-400/50", 119 | "border-slate-600/30", 120 | ], 121 | gradient: "from-red-500/30", 122 | }, 123 | } as const; -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components/theme-provider.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { createContext, useContext, useEffect, useState } from "react"; 4 | 5 | type Theme = "light" | "dark" | "system"; 6 | 7 | type ThemeProviderProps = { 8 | children: React.ReactNode; 9 | defaultTheme?: Theme; 10 | storageKey?: string; 11 | attribute?: string; 12 | enableSystem?: boolean; 13 | disableTransitionOnChange?: boolean; 14 | }; 15 | 16 | type ThemeProviderState = { 17 | theme: Theme; 18 | setTheme: (theme: Theme) => void; 19 | }; 20 | 21 | const initialState: ThemeProviderState = { 22 | theme: "system", 23 | setTheme: () => null, 24 | }; 25 | 26 | const ThemeProviderContext = createContext(initialState); 27 | 28 | export function ThemeProvider({ 29 | children, 30 | defaultTheme = "system", 31 | storageKey = "theme", 32 | attribute = "class", 33 | enableSystem = true, 34 | disableTransitionOnChange = false, 35 | ...props 36 | }: ThemeProviderProps) { 37 | const [theme, setTheme] = useState(defaultTheme); 38 | 39 | useEffect(() => { 40 | const savedTheme = localStorage.getItem(storageKey) as Theme | null; 41 | 42 | if (savedTheme) { 43 | setTheme(savedTheme); 44 | } else if (defaultTheme === "system" && enableSystem) { 45 | const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches 46 | ? "dark" 47 | : "light"; 48 | setTheme(systemTheme); 49 | } 50 | }, [defaultTheme, storageKey, enableSystem]); 51 | 52 | useEffect(() => { 53 | const root = window.document.documentElement; 54 | 55 | if (disableTransitionOnChange) { 56 | root.classList.add("no-transitions"); 57 | 58 | // Force a reflow 59 | window.getComputedStyle(root).getPropertyValue("opacity"); 60 | 61 | setTimeout(() => { 62 | root.classList.remove("no-transitions"); 63 | }, 0); 64 | } 65 | 66 | root.classList.remove("light", "dark"); 67 | 68 | if (theme === "system" && enableSystem) { 69 | const systemTheme = window.matchMedia("(prefers-color-scheme: dark)").matches 70 | ? "dark" 71 | : "light"; 72 | root.classList.add(systemTheme); 73 | } else { 74 | root.classList.add(theme); 75 | } 76 | 77 | localStorage.setItem(storageKey, theme); 78 | }, [theme, storageKey, enableSystem, disableTransitionOnChange]); 79 | 80 | const value = { 81 | theme, 82 | setTheme: (theme: Theme) => { 83 | setTheme(theme); 84 | }, 85 | }; 86 | 87 | return ( 88 | 89 | {children} 90 | 91 | ); 92 | } 93 | 94 | export const useTheme = () => { 95 | const context = useContext(ThemeProviderContext); 96 | 97 | if (context === undefined) 98 | throw new Error("useTheme must be used within a ThemeProvider"); 99 | 100 | return context; 101 | }; 102 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/ai-voice-input.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { Mic, Square } from "lucide-react"; 4 | import { useState, useEffect } from "react"; 5 | import { cn } from "@/lib/utils"; 6 | 7 | interface AIVoiceInputProps { 8 | onStart?: () => void; 9 | onStop?: (duration: number) => void; 10 | isConnected?: boolean; 11 | className?: string; 12 | } 13 | 14 | export function AIVoiceInput({ 15 | onStart, 16 | onStop, 17 | isConnected = false, 18 | className 19 | }: AIVoiceInputProps) { 20 | const [active, setActive] = useState(false); 21 | const [time, setTime] = useState(0); 22 | const [isClient, setIsClient] = useState(false); 23 | const [status, setStatus] = useState<'disconnected' | 'connecting' | 'connected'>('disconnected'); 24 | 25 | useEffect(() => { 26 | setIsClient(true); 27 | }, []); 28 | 29 | useEffect(() => { 30 | let intervalId: NodeJS.Timeout; 31 | 32 | if (active) { 33 | intervalId = setInterval(() => { 34 | setTime((t) => t + 1); 35 | }, 1000); 36 | } else { 37 | setTime(0); 38 | } 39 | 40 | return () => clearInterval(intervalId); 41 | }, [active]); 42 | 43 | useEffect(() => { 44 | if (isConnected) { 45 | setStatus('connected'); 46 | setActive(true); 47 | } else { 48 | setStatus('disconnected'); 49 | setActive(false); 50 | } 51 | }, [isConnected]); 52 | 53 | const formatTime = (seconds: number) => { 54 | const mins = Math.floor(seconds / 60); 55 | const secs = seconds % 60; 56 | return `${mins.toString().padStart(2, "0")}:${secs.toString().padStart(2, "0")}`; 57 | }; 58 | 59 | const handleStart = () => { 60 | setStatus('connecting'); 61 | onStart?.(); 62 | }; 63 | 64 | const handleStop = () => { 65 | onStop?.(time); 66 | setStatus('disconnected'); 67 | }; 68 | 69 | return ( 70 |
71 |
72 |
75 | {status === 'connected' ? 'Connected' : status === 'connecting' ? 'Connecting...' : 'Disconnected'} 76 |
77 | 78 | 100 | 101 | 109 | {formatTime(time)} 110 | 111 |
112 |
113 | ); 114 | } -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/reset-chat.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import { Trash } from "lucide-react" 4 | 5 | export function ResetChat() { 6 | return ( 7 | 16 | ) 17 | } 18 | 19 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-toggle.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useTheme } from "@/components/theme-provider"; 4 | import { cn } from "@/lib/utils"; 5 | import { Moon, Sun } from "lucide-react"; 6 | import { useRef } from "react"; 7 | 8 | interface ThemeToggleProps { 9 | className?: string; 10 | } 11 | 12 | export function ThemeToggle({ className }: ThemeToggleProps) { 13 | const { theme } = useTheme(); 14 | const buttonRef = useRef(null); 15 | 16 | const toggleTheme = () => { 17 | // Instead of directly changing the theme, dispatch a custom event 18 | const newTheme = theme === "light" ? "dark" : "light"; 19 | 20 | // Dispatch custom event with the new theme 21 | window.dispatchEvent( 22 | new CustomEvent('themeToggleRequest', { 23 | detail: { theme: newTheme } 24 | }) 25 | ); 26 | }; 27 | 28 | return ( 29 | 60 | ); 61 | } -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/components/ui/theme-transition.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { useTheme } from "@/components/theme-provider"; 4 | import { useEffect, useState } from "react"; 5 | import { motion, AnimatePresence } from "framer-motion"; 6 | 7 | interface ThemeTransitionProps { 8 | className?: string; 9 | } 10 | 11 | export function ThemeTransition({ className }: ThemeTransitionProps) { 12 | const { theme, setTheme } = useTheme(); 13 | const [position, setPosition] = useState({ x: 0, y: 0 }); 14 | const [isAnimating, setIsAnimating] = useState(false); 15 | const [pendingTheme, setPendingTheme] = useState(null); 16 | const [visualTheme, setVisualTheme] = useState(theme); 17 | 18 | // Track mouse/touch position for click events 19 | useEffect(() => { 20 | const handleMouseMove = (e: MouseEvent) => { 21 | setPosition({ x: e.clientX, y: e.clientY }); 22 | }; 23 | 24 | const handleTouchMove = (e: TouchEvent) => { 25 | if (e.touches[0]) { 26 | setPosition({ x: e.touches[0].clientX, y: e.touches[0].clientY }); 27 | } 28 | }; 29 | 30 | window.addEventListener("mousemove", handleMouseMove); 31 | window.addEventListener("touchmove", handleTouchMove); 32 | 33 | return () => { 34 | window.removeEventListener("mousemove", handleMouseMove); 35 | window.removeEventListener("touchmove", handleTouchMove); 36 | }; 37 | }, []); 38 | 39 | // Listen for theme toggle requests 40 | useEffect(() => { 41 | // Custom event for theme toggle requests 42 | const handleThemeToggle = (e: CustomEvent) => { 43 | if (isAnimating) return; // Prevent multiple animations 44 | 45 | const newTheme = e.detail.theme; 46 | if (newTheme === theme) return; 47 | 48 | // Store the pending theme but don't apply it yet 49 | setPendingTheme(newTheme); 50 | setIsAnimating(true); 51 | 52 | // The actual theme will be applied mid-animation 53 | }; 54 | 55 | window.addEventListener('themeToggleRequest' as any, handleThemeToggle as EventListener); 56 | 57 | return () => { 58 | window.removeEventListener('themeToggleRequest' as any, handleThemeToggle as EventListener); 59 | }; 60 | }, [theme, isAnimating]); 61 | 62 | // Apply the theme change mid-animation 63 | useEffect(() => { 64 | if (isAnimating && pendingTheme) { 65 | // Set visual theme immediately for the animation 66 | setVisualTheme(pendingTheme); 67 | 68 | // Apply the actual theme change after a delay (mid-animation) 69 | const timer = setTimeout(() => { 70 | setTheme(pendingTheme as any); 71 | }, 400); // Half of the animation duration 72 | 73 | // End the animation after it completes 74 | const endTimer = setTimeout(() => { 75 | setIsAnimating(false); 76 | setPendingTheme(null); 77 | }, 1000); // Match with animation duration 78 | 79 | return () => { 80 | clearTimeout(timer); 81 | clearTimeout(endTimer); 82 | }; 83 | } 84 | }, [isAnimating, pendingTheme, setTheme]); 85 | 86 | return ( 87 | 88 | {isAnimating && ( 89 | 96 | 116 | 117 | )} 118 | 119 | ); 120 | } -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import { dirname } from "path"; 2 | import { fileURLToPath } from "url"; 3 | import { FlatCompat } from "@eslint/eslintrc"; 4 | 5 | const __filename = fileURLToPath(import.meta.url); 6 | const __dirname = dirname(__filename); 7 | 8 | const compat = new FlatCompat({ 9 | baseDirectory: __dirname, 10 | }); 11 | 12 | const eslintConfig = [ 13 | ...compat.extends("next/core-web-vitals", "next/typescript"), 14 | { 15 | rules: { 16 | "no-unused-vars": "off", 17 | "no-explicit-any": "off", 18 | "no-console": "off", 19 | "no-debugger": "off", 20 | "eqeqeq": "off", 21 | "curly": "off", 22 | "quotes": "off", 23 | "semi": "off", 24 | }, 25 | }, 26 | ]; 27 | 28 | export default eslintConfig; 29 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { clsx, type ClassValue } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | }; 6 | 7 | export default nextConfig; 8 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fastrtc-demo", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev --turbopack", 7 | "build": "next build --no-lint", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "class-variance-authority": "^0.7.1", 13 | "clsx": "^2.1.1", 14 | "framer-motion": "^12.4.10", 15 | "lucide-react": "^0.477.0", 16 | "next": "15.2.2-canary.1", 17 | "react": "^19.0.0", 18 | "react-dom": "^19.0.0", 19 | "tailwind-merge": "^3.0.2", 20 | "tailwindcss-animate": "^1.0.7" 21 | }, 22 | "devDependencies": { 23 | "@eslint/eslintrc": "^3", 24 | "@tailwindcss/postcss": "^4", 25 | "@types/node": "^20", 26 | "@types/react": "^19", 27 | "@types/react-dom": "^19", 28 | "eslint": "^9", 29 | "eslint-config-next": "15.2.2-canary.1", 30 | "tailwindcss": "^4", 31 | "typescript": "^5" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | const config = { 2 | plugins: ["@tailwindcss/postcss"], 3 | }; 4 | 5 | export default config; 6 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/frontend/fastrtc-demo/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | fastapi 3 | python-dotenv 4 | elevenlabs 5 | fastrtc[vad, stt, tts] -------------------------------------------------------------------------------- /demo/nextjs_voice_chat/run.sh: -------------------------------------------------------------------------------- 1 | uvicorn backend.server:app --host 0.0.0.0 --port 8000 -------------------------------------------------------------------------------- /demo/object_detection/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Object Detection 3 | emoji: 📸 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Use YOLOv10 to detect objects in real-time 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/object_detection/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import cv2 5 | import gradio as gr 6 | from fastapi import FastAPI 7 | from fastapi.responses import HTMLResponse 8 | from fastrtc import Stream, get_twilio_turn_credentials 9 | from gradio.utils import get_space 10 | from huggingface_hub import hf_hub_download 11 | from pydantic import BaseModel, Field 12 | 13 | try: 14 | from demo.object_detection.inference import YOLOv10 15 | except (ImportError, ModuleNotFoundError): 16 | from inference import YOLOv10 17 | 18 | 19 | cur_dir = Path(__file__).parent 20 | 21 | model_file = hf_hub_download( 22 | repo_id="onnx-community/yolov10n", filename="onnx/model.onnx" 23 | ) 24 | 25 | model = YOLOv10(model_file) 26 | 27 | 28 | def detection(image, conf_threshold=0.3): 29 | image = cv2.resize(image, (model.input_width, model.input_height)) 30 | print("conf_threshold", conf_threshold) 31 | new_image = model.detect_objects(image, conf_threshold) 32 | return cv2.resize(new_image, (500, 500)) 33 | 34 | 35 | stream = Stream( 36 | handler=detection, 37 | modality="video", 38 | mode="send-receive", 39 | additional_inputs=[gr.Slider(minimum=0, maximum=1, step=0.01, value=0.3)], 40 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 41 | concurrency_limit=2 if get_space() else None, 42 | ) 43 | 44 | app = FastAPI() 45 | 46 | stream.mount(app) 47 | 48 | 49 | @app.get("/") 50 | async def _(): 51 | rtc_config = get_twilio_turn_credentials() if get_space() else None 52 | html_content = open(cur_dir / "index.html").read() 53 | html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config)) 54 | return HTMLResponse(content=html_content) 55 | 56 | 57 | class InputData(BaseModel): 58 | webrtc_id: str 59 | conf_threshold: float = Field(ge=0, le=1) 60 | 61 | 62 | @app.post("/input_hook") 63 | async def _(data: InputData): 64 | stream.set_input(data.webrtc_id, data.conf_threshold) 65 | 66 | 67 | if __name__ == "__main__": 68 | import os 69 | 70 | if (mode := os.getenv("MODE")) == "UI": 71 | stream.ui.launch(server_port=7860) 72 | elif mode == "PHONE": 73 | stream.fastphone(host="0.0.0.0", port=7860) 74 | else: 75 | import uvicorn 76 | 77 | uvicorn.run(app, host="0.0.0.0", port=7860) 78 | -------------------------------------------------------------------------------- /demo/object_detection/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc 2 | opencv-python 3 | twilio 4 | onnxruntime-gpu -------------------------------------------------------------------------------- /demo/phonic_chat/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Phonic AI Chat 3 | emoji: 🎙️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Phonic AI's speech-to-speech model 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|PHONIC_API_KEY] 13 | python_version: 3.11 14 | --- 15 | 16 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/phonic_chat/app.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import base64 3 | import os 4 | 5 | import gradio as gr 6 | from gradio.utils import get_space 7 | import numpy as np 8 | from dotenv import load_dotenv 9 | from fastrtc import ( 10 | AdditionalOutputs, 11 | AsyncStreamHandler, 12 | Stream, 13 | get_twilio_turn_credentials, 14 | audio_to_float32, 15 | wait_for_item, 16 | ) 17 | from phonic.client import PhonicSTSClient, get_voices 18 | 19 | load_dotenv() 20 | 21 | STS_URI = "wss://api.phonic.co/v1/sts/ws" 22 | API_KEY = os.environ["PHONIC_API_KEY"] 23 | SAMPLE_RATE = 44_100 24 | voices = get_voices(API_KEY) 25 | voice_ids = [voice["id"] for voice in voices] 26 | 27 | 28 | class PhonicHandler(AsyncStreamHandler): 29 | def __init__(self): 30 | super().__init__(input_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE) 31 | self.output_queue = asyncio.Queue() 32 | self.client = None 33 | 34 | def copy(self) -> AsyncStreamHandler: 35 | return PhonicHandler() 36 | 37 | async def start_up(self): 38 | await self.wait_for_args() 39 | voice_id = self.latest_args[1] 40 | async with PhonicSTSClient(STS_URI, API_KEY) as client: 41 | self.client = client 42 | sts_stream = client.sts( # type: ignore 43 | input_format="pcm_44100", 44 | output_format="pcm_44100", 45 | system_prompt="You are a helpful voice assistant. Respond conversationally.", 46 | # welcome_message="Hello! I'm your voice assistant. How can I help you today?", 47 | voice_id=voice_id, 48 | ) 49 | async for message in sts_stream: 50 | message_type = message.get("type") 51 | if message_type == "audio_chunk": 52 | audio_b64 = message["audio"] 53 | audio_bytes = base64.b64decode(audio_b64) 54 | await self.output_queue.put( 55 | (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16)) 56 | ) 57 | if text := message.get("text"): 58 | msg = {"role": "assistant", "content": text} 59 | await self.output_queue.put(AdditionalOutputs(msg)) 60 | elif message_type == "input_text": 61 | msg = {"role": "user", "content": message["text"]} 62 | await self.output_queue.put(AdditionalOutputs(msg)) 63 | 64 | async def emit(self): 65 | return await wait_for_item(self.output_queue) 66 | 67 | async def receive(self, frame: tuple[int, np.ndarray]) -> None: 68 | if not self.client: 69 | return 70 | audio_float32 = audio_to_float32(frame) 71 | await self.client.send_audio(audio_float32) # type: ignore 72 | 73 | async def shutdown(self): 74 | if self.client: 75 | await self.client._websocket.close() 76 | return super().shutdown() 77 | 78 | 79 | def add_to_chatbot(chatbot, message): 80 | chatbot.append(message) 81 | return chatbot 82 | 83 | 84 | chatbot = gr.Chatbot(type="messages", value=[]) 85 | stream = Stream( 86 | handler=PhonicHandler(), 87 | mode="send-receive", 88 | modality="audio", 89 | additional_inputs=[ 90 | gr.Dropdown( 91 | choices=voice_ids, 92 | value="victoria", 93 | label="Voice", 94 | info="Select a voice from the dropdown", 95 | ) 96 | ], 97 | additional_outputs=[chatbot], 98 | additional_outputs_handler=add_to_chatbot, 99 | ui_args={ 100 | "title": "Phonic Chat (Powered by FastRTC ⚡️)", 101 | }, 102 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 103 | concurrency_limit=5 if get_space() else None, 104 | time_limit=90 if get_space() else None, 105 | ) 106 | 107 | # with stream.ui: 108 | # state.change(lambda s: s, inputs=state, outputs=chatbot) 109 | 110 | if __name__ == "__main__": 111 | if (mode := os.getenv("MODE")) == "UI": 112 | stream.ui.launch(server_port=7860) 113 | elif mode == "PHONE": 114 | stream.fastphone(host="0.0.0.0", port=7860) 115 | else: 116 | stream.ui.launch(server_port=7860) 117 | -------------------------------------------------------------------------------- /demo/phonic_chat/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv pip compile requirements.in -o requirements.txt 3 | aiohappyeyeballs==2.4.6 4 | # via aiohttp 5 | aiohttp==3.11.12 6 | # via 7 | # aiohttp-retry 8 | # twilio 9 | aiohttp-retry==2.9.1 10 | # via twilio 11 | aiosignal==1.3.2 12 | # via aiohttp 13 | attrs==25.1.0 14 | # via aiohttp 15 | certifi==2025.1.31 16 | # via requests 17 | cffi==1.17.1 18 | # via sounddevice 19 | charset-normalizer==3.4.1 20 | # via requests 21 | fastrtc==0.0.1 22 | # via -r requirements.in 23 | frozenlist==1.5.0 24 | # via 25 | # aiohttp 26 | # aiosignal 27 | idna==3.10 28 | # via 29 | # requests 30 | # yarl 31 | isort==6.0.0 32 | # via phonic-python 33 | loguru==0.7.3 34 | # via phonic-python 35 | multidict==6.1.0 36 | # via 37 | # aiohttp 38 | # yarl 39 | numpy==2.2.3 40 | # via 41 | # phonic-python 42 | # scipy 43 | phonic-python==0.1.3 44 | # via -r requirements.in 45 | propcache==0.3.0 46 | # via 47 | # aiohttp 48 | # yarl 49 | pycparser==2.22 50 | # via cffi 51 | pyjwt==2.10.1 52 | # via twilio 53 | python-dotenv==1.0.1 54 | # via 55 | # -r requirements.in 56 | # phonic-python 57 | requests==2.32.3 58 | # via 59 | # phonic-python 60 | # twilio 61 | scipy==1.15.2 62 | # via phonic-python 63 | sounddevice==0.5.1 64 | # via phonic-python 65 | twilio==9.4.6 66 | # via -r requirements.in 67 | typing-extensions==4.12.2 68 | # via phonic-python 69 | urllib3==2.3.0 70 | # via requests 71 | websockets==15.0 72 | # via phonic-python 73 | yarl==1.18.3 74 | # via aiohttp 75 | -------------------------------------------------------------------------------- /demo/qwen_phone_chat/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Qwen Phone Chat 3 | emoji: 📞 4 | colorFrom: pink 5 | colorTo: green 6 | sdk: gradio 7 | sdk_version: 5.25.2 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk with Qwen 2.5 Omni over the Phone 12 | --- 13 | 14 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/qwen_phone_chat/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc 2 | websockets>=14.0 -------------------------------------------------------------------------------- /demo/talk_to_azure_openai/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Azure OpenAI 3 | emoji: 🗣️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Azure OpenAI using their multimodal API 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_azure_openai/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Azure OpenAI (Gradio UI) 3 | emoji: 🗣️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Azure OpenAI (Gradio UI) 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|OPENAI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_azure_openai/requirements.txt: -------------------------------------------------------------------------------- 1 | aiofiles==23.2.1 2 | aiohappyeyeballs==2.6.1 3 | aiohttp==3.11.13 4 | aiohttp-retry==2.9.1 5 | aioice==0.9.0 6 | aiortc==1.10.1 7 | aiosignal==1.3.2 8 | annotated-types==0.7.0 9 | anyio==4.8.0 10 | attrs==25.2.0 11 | audioread==3.0.1 12 | av==13.1.0 13 | babel==2.17.0 14 | certifi==2025.1.31 15 | cffi==1.17.1 16 | charset-normalizer==3.4.1 17 | click==8.1.8 18 | colorama==0.4.6 19 | coloredlogs==15.0.1 20 | colorlog==6.9.0 21 | cryptography==44.0.2 22 | csvw==3.5.1 23 | decorator==5.2.1 24 | distro==1.9.0 25 | dlinfo==2.0.0 26 | dnspython==2.7.0 27 | espeakng-loader==0.2.4 28 | fastapi==0.115.11 29 | fastrtc==0.0.14 30 | ffmpy==0.5.0 31 | filelock==3.17.0 32 | flatbuffers==25.2.10 33 | frozenlist==1.5.0 34 | fsspec==2025.3.0 35 | google-crc32c==1.6.0 36 | gradio==5.20.1 37 | gradio_client==1.7.2 38 | groovy==0.1.2 39 | h11==0.14.0 40 | httpcore==1.0.7 41 | httpx==0.28.1 42 | huggingface-hub==0.29.3 43 | humanfriendly==10.0 44 | idna==3.10 45 | ifaddr==0.2.0 46 | isodate==0.7.2 47 | Jinja2==3.1.6 48 | jiter==0.9.0 49 | joblib==1.4.2 50 | jsonschema==4.23.0 51 | jsonschema-specifications==2024.10.1 52 | kokoro-onnx==0.4.5 53 | language-tags==1.2.0 54 | lazy_loader==0.4 55 | librosa==0.11.0 56 | llvmlite==0.44.0 57 | markdown-it-py==3.0.0 58 | MarkupSafe==2.1.5 59 | mdurl==0.1.2 60 | mpmath==1.3.0 61 | msgpack==1.1.0 62 | multidict==6.1.0 63 | numba==0.61.0 64 | numpy==2.1.3 65 | onnxruntime==1.21.0 66 | openai==1.66.2 67 | orjson==3.10.15 68 | packaging==24.2 69 | pandas==2.2.3 70 | phonemizer-fork==3.3.1 71 | pillow==11.1.0 72 | platformdirs==4.3.6 73 | pooch==1.8.2 74 | propcache==0.3.0 75 | protobuf==6.30.0 76 | pycparser==2.22 77 | pydantic==2.10.6 78 | pydantic_core==2.27.2 79 | pydub==0.25.1 80 | pyee==12.1.1 81 | Pygments==2.19.1 82 | PyJWT==2.10.1 83 | pylibsrtp==0.11.0 84 | pyOpenSSL==25.0.0 85 | pyparsing==3.2.1 86 | python-dateutil==2.9.0.post0 87 | python-dotenv==1.0.1 88 | python-multipart==0.0.20 89 | pytz==2025.1 90 | PyYAML==6.0.2 91 | rdflib==7.1.3 92 | referencing==0.36.2 93 | regex==2024.11.6 94 | requests==2.32.3 95 | rfc3986==1.5.0 96 | rich==13.9.4 97 | rpds-py==0.23.1 98 | ruff==0.9.10 99 | safehttpx==0.1.6 100 | scikit-learn==1.6.1 101 | scipy==1.15.2 102 | segments==2.3.0 103 | semantic-version==2.10.0 104 | shellingham==1.5.4 105 | six==1.17.0 106 | sniffio==1.3.1 107 | sounddevice==0.5.1 108 | soundfile==0.13.1 109 | soxr==0.5.0.post1 110 | starlette==0.46.1 111 | sympy==1.13.3 112 | threadpoolctl==3.5.0 113 | tomlkit==0.13.2 114 | tqdm==4.67.1 115 | twilio==9.5.0 116 | typer==0.15.2 117 | typing_extensions==4.12.2 118 | tzdata==2025.1 119 | uritemplate==4.1.1 120 | urllib3==2.3.0 121 | uvicorn==0.34.0 122 | websockets==15.0.1 123 | yarl==1.18.3 124 | -------------------------------------------------------------------------------- /demo/talk_to_claude/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Claude 3 | emoji: 👨‍🦰 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Anthropic's Claude 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY, secret|ELEVENLABS_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_claude/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | from pathlib import Path 5 | 6 | import anthropic 7 | import gradio as gr 8 | import numpy as np 9 | from dotenv import load_dotenv 10 | from elevenlabs import ElevenLabs 11 | from fastapi import FastAPI 12 | from fastapi.responses import HTMLResponse, StreamingResponse 13 | from fastrtc import ( 14 | AdditionalOutputs, 15 | ReplyOnPause, 16 | Stream, 17 | get_tts_model, 18 | get_twilio_turn_credentials, 19 | ) 20 | from fastrtc.utils import audio_to_bytes 21 | from gradio.utils import get_space 22 | from groq import Groq 23 | from pydantic import BaseModel 24 | 25 | load_dotenv() 26 | 27 | groq_client = Groq() 28 | claude_client = anthropic.Anthropic() 29 | tts_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"]) 30 | 31 | curr_dir = Path(__file__).parent 32 | 33 | tts_model = get_tts_model() 34 | 35 | 36 | def response( 37 | audio: tuple[int, np.ndarray], 38 | chatbot: list[dict] | None = None, 39 | ): 40 | chatbot = chatbot or [] 41 | messages = [{"role": d["role"], "content": d["content"]} for d in chatbot] 42 | prompt = groq_client.audio.transcriptions.create( 43 | file=("audio-file.mp3", audio_to_bytes(audio)), 44 | model="whisper-large-v3-turbo", 45 | response_format="verbose_json", 46 | ).text 47 | chatbot.append({"role": "user", "content": prompt}) 48 | yield AdditionalOutputs(chatbot) 49 | messages.append({"role": "user", "content": prompt}) 50 | response = claude_client.messages.create( 51 | model="claude-3-5-haiku-20241022", 52 | max_tokens=512, 53 | messages=messages, # type: ignore 54 | ) 55 | response_text = " ".join( 56 | block.text # type: ignore 57 | for block in response.content 58 | if getattr(block, "type", None) == "text" 59 | ) 60 | chatbot.append({"role": "assistant", "content": response_text}) 61 | 62 | start = time.time() 63 | 64 | print("starting tts", start) 65 | for i, chunk in enumerate(tts_model.stream_tts_sync(response_text)): 66 | print("chunk", i, time.time() - start) 67 | yield chunk 68 | print("finished tts", time.time() - start) 69 | yield AdditionalOutputs(chatbot) 70 | 71 | 72 | chatbot = gr.Chatbot(type="messages") 73 | stream = Stream( 74 | modality="audio", 75 | mode="send-receive", 76 | handler=ReplyOnPause(response), 77 | additional_outputs_handler=lambda a, b: b, 78 | additional_inputs=[chatbot], 79 | additional_outputs=[chatbot], 80 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 81 | concurrency_limit=5 if get_space() else None, 82 | time_limit=90 if get_space() else None, 83 | ) 84 | 85 | 86 | class Message(BaseModel): 87 | role: str 88 | content: str 89 | 90 | 91 | class InputData(BaseModel): 92 | webrtc_id: str 93 | chatbot: list[Message] 94 | 95 | 96 | app = FastAPI() 97 | stream.mount(app) 98 | 99 | 100 | @app.get("/") 101 | async def _(): 102 | rtc_config = get_twilio_turn_credentials() if get_space() else None 103 | html_content = (curr_dir / "index.html").read_text() 104 | html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config)) 105 | return HTMLResponse(content=html_content, status_code=200) 106 | 107 | 108 | @app.post("/input_hook") 109 | async def _(body: InputData): 110 | stream.set_input(body.webrtc_id, body.model_dump()["chatbot"]) 111 | return {"status": "ok"} 112 | 113 | 114 | @app.get("/outputs") 115 | def _(webrtc_id: str): 116 | async def output_stream(): 117 | async for output in stream.output_stream(webrtc_id): 118 | chatbot = output.args[0] 119 | yield f"event: output\ndata: {json.dumps(chatbot[-1])}\n\n" 120 | 121 | return StreamingResponse(output_stream(), media_type="text/event-stream") 122 | 123 | 124 | if __name__ == "__main__": 125 | import os 126 | 127 | if (mode := os.getenv("MODE")) == "UI": 128 | stream.ui.launch(server_port=7860) 129 | elif mode == "PHONE": 130 | stream.fastphone(host="0.0.0.0", port=7860) 131 | else: 132 | import uvicorn 133 | 134 | uvicorn.run(app, host="0.0.0.0", port=7860) 135 | -------------------------------------------------------------------------------- /demo/talk_to_claude/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad, tts] 2 | elevenlabs 3 | groq 4 | anthropic 5 | twilio 6 | python-dotenv 7 | -------------------------------------------------------------------------------- /demo/talk_to_gemini/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Gemini 3 | emoji: ♊️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Gemini using Google's multimodal API 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_gemini/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Gemini (Gradio UI) 3 | emoji: ♊️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Gemini (Gradio UI) 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_gemini/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad]==0.0.20.rc2 2 | python-dotenv 3 | google-genai 4 | twilio 5 | -------------------------------------------------------------------------------- /demo/talk_to_llama4/AV_Huggy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/demo/talk_to_llama4/AV_Huggy.png -------------------------------------------------------------------------------- /demo/talk_to_llama4/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Llama 4 3 | emoji: 🦙 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.23.3 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to Llama 4 using Groq + Cloudflare 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_llama4/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad, tts]==0.0.20.rc2 2 | groq 3 | python-dotenv -------------------------------------------------------------------------------- /demo/talk_to_openai/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to OpenAI 3 | emoji: 🗣️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to OpenAI using their multimodal API 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_openai/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to OpenAI (Gradio UI) 3 | emoji: 🗣️ 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Talk to OpenAI (Gradio UI) 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_openai/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad]==0.0.20.rc2 2 | openai 3 | twilio 4 | python-dotenv -------------------------------------------------------------------------------- /demo/talk_to_sambanova/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Sambanova 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Llama 3.2 - SambaNova API 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_sambanova/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Sambanova (Gradio) 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Llama 3.2 - SambaNova API (Gradio) 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN_ALT, secret|SAMBANOVA_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/talk_to_sambanova/app.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import json 3 | import os 4 | from pathlib import Path 5 | 6 | import gradio as gr 7 | import huggingface_hub 8 | import numpy as np 9 | from dotenv import load_dotenv 10 | from fastapi import FastAPI 11 | from fastapi.responses import HTMLResponse, StreamingResponse 12 | from fastrtc import ( 13 | AdditionalOutputs, 14 | ReplyOnPause, 15 | Stream, 16 | get_cloudflare_turn_credentials, 17 | get_cloudflare_turn_credentials_async, 18 | get_stt_model, 19 | ) 20 | from gradio.utils import get_space 21 | from pydantic import BaseModel 22 | 23 | load_dotenv() 24 | 25 | curr_dir = Path(__file__).parent 26 | 27 | 28 | client = huggingface_hub.InferenceClient( 29 | api_key=os.environ.get("SAMBANOVA_API_KEY"), 30 | provider="sambanova", 31 | ) 32 | stt_model = get_stt_model() 33 | 34 | 35 | def response( 36 | audio: tuple[int, np.ndarray], 37 | gradio_chatbot: list[dict] | None = None, 38 | conversation_state: list[dict] | None = None, 39 | ): 40 | gradio_chatbot = gradio_chatbot or [] 41 | conversation_state = conversation_state or [] 42 | print("chatbot", gradio_chatbot) 43 | 44 | text = stt_model.stt(audio) 45 | sample_rate, array = audio 46 | gradio_chatbot.append( 47 | {"role": "user", "content": gr.Audio((sample_rate, array.squeeze()))} 48 | ) 49 | yield AdditionalOutputs(gradio_chatbot, conversation_state) 50 | 51 | conversation_state.append({"role": "user", "content": text}) 52 | request = client.chat.completions.create( 53 | model="meta-llama/Llama-3.2-3B-Instruct", 54 | messages=conversation_state, # type: ignore 55 | temperature=0.1, 56 | top_p=0.1, 57 | ) 58 | response = {"role": "assistant", "content": request.choices[0].message.content} 59 | 60 | conversation_state.append(response) 61 | gradio_chatbot.append(response) 62 | 63 | yield AdditionalOutputs(gradio_chatbot, conversation_state) 64 | 65 | 66 | chatbot = gr.Chatbot(type="messages", value=[]) 67 | state = gr.State(value=[]) 68 | stream = Stream( 69 | ReplyOnPause( 70 | response, # type: ignore 71 | input_sample_rate=16000, 72 | ), 73 | mode="send", 74 | modality="audio", 75 | additional_inputs=[chatbot, state], 76 | additional_outputs=[chatbot, state], 77 | additional_outputs_handler=lambda *a: (a[2], a[3]), 78 | concurrency_limit=20 if get_space() else None, 79 | rtc_configuration=get_cloudflare_turn_credentials_async, 80 | server_rtc_configuration=get_cloudflare_turn_credentials(ttl=36_000), 81 | ) 82 | 83 | app = FastAPI() 84 | stream.mount(app) 85 | 86 | 87 | class Message(BaseModel): 88 | role: str 89 | content: str 90 | 91 | 92 | class InputData(BaseModel): 93 | webrtc_id: str 94 | chatbot: list[Message] 95 | state: list[Message] 96 | 97 | 98 | @app.get("/") 99 | async def _(): 100 | rtc_config = await get_cloudflare_turn_credentials_async( 101 | hf_token=os.getenv("HF_TOKEN_ALT") 102 | ) 103 | html_content = (curr_dir / "index.html").read_text() 104 | html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config)) 105 | return HTMLResponse(content=html_content) 106 | 107 | 108 | @app.post("/input_hook") 109 | async def _(data: InputData): 110 | body = data.model_dump() 111 | stream.set_input(data.webrtc_id, body["chatbot"], body["state"]) 112 | 113 | 114 | def audio_to_base64(file_path): 115 | audio_format = "wav" 116 | with open(file_path, "rb") as audio_file: 117 | encoded_audio = base64.b64encode(audio_file.read()).decode("utf-8") 118 | return f"data:audio/{audio_format};base64,{encoded_audio}" 119 | 120 | 121 | @app.get("/outputs") 122 | async def _(webrtc_id: str): 123 | async def output_stream(): 124 | async for output in stream.output_stream(webrtc_id): 125 | chatbot = output.args[0] 126 | state = output.args[1] 127 | data = { 128 | "message": state[-1], 129 | "audio": audio_to_base64(chatbot[-1]["content"].value["path"]) 130 | if chatbot[-1]["role"] == "user" 131 | else None, 132 | } 133 | yield f"event: output\ndata: {json.dumps(data)}\n\n" 134 | 135 | return StreamingResponse(output_stream(), media_type="text/event-stream") 136 | 137 | 138 | if __name__ == "__main__": 139 | import os 140 | 141 | if (mode := os.getenv("MODE")) == "UI": 142 | stream.ui.launch(server_port=7860) 143 | elif mode == "PHONE": 144 | raise ValueError("Phone mode not supported") 145 | else: 146 | import uvicorn 147 | 148 | uvicorn.run(app, host="0.0.0.0", port=7860) 149 | -------------------------------------------------------------------------------- /demo/talk_to_sambanova/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad, stt]==0.0.20.rc2 2 | python-dotenv 3 | huggingface_hub>=0.29.0 4 | twilio -------------------------------------------------------------------------------- /demo/talk_to_smolagents/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Talk to Smolagents 3 | emoji: 💻 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: FastRTC Voice Agent with smolagents 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN] 13 | --- 14 | 15 | # Voice LLM Agent with Image Generation 16 | 17 | A voice-enabled AI assistant powered by FastRTC that can: 18 | 1. Stream audio in real-time using WebRTC 19 | 2. Listen and respond with natural pauses in conversation 20 | 3. Generate images based on your requests 21 | 4. Maintain conversation context across exchanges 22 | 23 | This app combines the real-time communication capabilities of FastRTC with the powerful agent framework of smolagents. 24 | 25 | ## Key Features 26 | 27 | - **Real-time Streaming**: Uses FastRTC's WebRTC-based audio streaming 28 | - **Voice Activation**: Automatic detection of speech pauses to trigger responses 29 | - **Multi-modal Interaction**: Combines voice and image generation in a single interface 30 | 31 | ## Setup 32 | 33 | 1. Install Python 3.9+ and create a virtual environment: 34 | ```bash 35 | python -m venv .venv 36 | source .venv/bin/activate # On Windows: .venv\Scripts\activate 37 | ``` 38 | 39 | 2. Install dependencies: 40 | ```bash 41 | pip install -r requirements.txt 42 | ``` 43 | 44 | 3. Create a `.env` file with the following: 45 | ``` 46 | HF_TOKEN=your_huggingface_api_key 47 | MODE=UI # Use 'UI' for Gradio interface, leave blank for HTML interface 48 | ``` 49 | 50 | ## Running the App 51 | 52 | ### With Gradio UI (Recommended) 53 | 54 | ```bash 55 | MODE=UI python app.py 56 | ``` 57 | 58 | This launches a Gradio UI at http://localhost:7860 with: 59 | - FastRTC's built-in streaming audio components 60 | - A chat interface showing the conversation 61 | - An image display panel for generated images 62 | 63 | ## How to Use 64 | 65 | 1. Click the microphone button to start streaming your voice. 66 | 2. Speak naturally - the app will automatically detect when you pause. 67 | 3. Ask the agent to generate an image, for example: 68 | - "Create an image of a magical forest with glowing mushrooms." 69 | - "Generate a picture of a futuristic city with flying cars." 70 | 4. View the generated image and hear the agent's response. 71 | 72 | ## Technical Architecture 73 | 74 | ### FastRTC Components 75 | 76 | - **Stream**: Core component that handles WebRTC connections and audio streaming 77 | - **ReplyOnPause**: Detects when the user stops speaking to trigger a response 78 | - **get_stt_model/get_tts_model**: Provides optimized speech-to-text and text-to-speech models 79 | 80 | ### smolagents Components 81 | 82 | - **CodeAgent**: Intelligent agent that can use tools based on natural language inputs 83 | - **Tool.from_space**: Integration with Hugging Face Spaces for image generation 84 | - **HfApiModel**: Connection to powerful language models for understanding requests 85 | 86 | ### Integration Flow 87 | 88 | 1. FastRTC streams and processes audio input in real-time 89 | 2. Speech is converted to text and passed to the smolagents CodeAgent 90 | 3. The agent processes the request and calls tools when needed 91 | 4. Responses and generated images are streamed back through FastRTC 92 | 5. The UI updates to show both text responses and generated images 93 | 94 | ## Advanced Features 95 | 96 | - Conversation history is maintained across exchanges 97 | - Error handling ensures the app continues working even if agent processing fails 98 | - The application leverages FastRTC's streaming capabilities for efficient audio transmission -------------------------------------------------------------------------------- /demo/talk_to_smolagents/app.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from dotenv import load_dotenv 4 | from fastrtc import ( 5 | ReplyOnPause, 6 | Stream, 7 | get_stt_model, 8 | get_tts_model, 9 | get_twilio_turn_credentials, 10 | ) 11 | from smolagents import CodeAgent, DuckDuckGoSearchTool, HfApiModel 12 | 13 | # Load environment variables 14 | load_dotenv() 15 | 16 | # Initialize file paths 17 | curr_dir = Path(__file__).parent 18 | 19 | # Initialize models 20 | stt_model = get_stt_model() 21 | tts_model = get_tts_model() 22 | 23 | # Conversation state to maintain history 24 | conversation_state: list[dict[str, str]] = [] 25 | 26 | # System prompt for agent 27 | system_prompt = """You are a helpful assistant that can helps with finding places to 28 | work remotely from. You should specifically check against reviews and ratings of the 29 | place. You should use this criteria to find the best place to work from: 30 | - Price 31 | - Reviews 32 | - Ratings 33 | - Location 34 | - WIFI 35 | Only return the name, address of the place, and a short description of the place. 36 | Always search for real places. 37 | Only return real places, not fake ones. 38 | If you receive anything other than a location, you should ask for a location. 39 | 40 | User: I am in Paris, France. Can you find me a place to work from? 41 | Assistant: I found a place called "Le Café de la Paix" at 123 Rue de la Paix, 42 | Paris, France. It has good reviews and is in a great location. 43 | 44 | 45 | User: I am in London, UK. Can you find me a place to work from? 46 | Assistant: I found a place called "The London Coffee Company". 47 | 48 | 49 | User: How many people are in the room? 50 | Assistant: I only respond to requests about finding places to work from. 51 | 52 | 53 | """ 54 | 55 | model = HfApiModel(provider="together", model="Qwen/Qwen2.5-Coder-32B-Instruct") 56 | 57 | agent = CodeAgent( 58 | tools=[ 59 | DuckDuckGoSearchTool(), 60 | ], 61 | model=model, 62 | max_steps=10, 63 | verbosity_level=2, 64 | description="Search the web for cafes to work from.", 65 | ) 66 | 67 | 68 | def process_response(audio): 69 | """Process audio input and generate LLM response with TTS""" 70 | # Convert speech to text using STT model 71 | text = stt_model.stt(audio) 72 | if not text.strip(): 73 | return 74 | 75 | input_text = f"{system_prompt}\n\n{text}" 76 | # Get response from agent 77 | response_content = agent.run(input_text) 78 | 79 | # Convert response to audio using TTS model 80 | yield from tts_model.stream_tts_sync(response_content or "") 81 | 82 | 83 | stream = Stream( 84 | handler=ReplyOnPause(process_response, input_sample_rate=16000), 85 | modality="audio", 86 | mode="send-receive", 87 | ui_args={ 88 | "pulse_color": "rgb(255, 255, 255)", 89 | "icon_button_color": "rgb(255, 255, 255)", 90 | "title": "🧑‍💻The Coworking Agent", 91 | }, 92 | rtc_configuration=get_twilio_turn_credentials(), 93 | ) 94 | 95 | if __name__ == "__main__": 96 | stream.ui.launch(server_port=7860) 97 | -------------------------------------------------------------------------------- /demo/talk_to_smolagents/requirements.txt: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by uv via the following command: 2 | # uv export --format requirements-txt --no-hashes 3 | aiofiles==23.2.1 4 | aiohappyeyeballs==2.4.6 5 | aiohttp==3.11.13 6 | aiohttp-retry==2.9.1 7 | aioice==0.9.0 8 | aiortc==1.10.1 9 | aiosignal==1.3.2 10 | annotated-types==0.7.0 11 | anyio==4.8.0 12 | async-timeout==5.0.1 ; python_full_version < '3.11' 13 | attrs==25.1.0 14 | audioop-lts==0.2.1 ; python_full_version >= '3.13' 15 | audioread==3.0.1 16 | av==13.1.0 17 | babel==2.17.0 18 | beautifulsoup4==4.13.3 19 | certifi==2025.1.31 20 | cffi==1.17.1 21 | charset-normalizer==3.4.1 22 | click==8.1.8 23 | colorama==0.4.6 24 | coloredlogs==15.0.1 25 | colorlog==6.9.0 26 | cryptography==44.0.1 27 | csvw==3.5.1 28 | decorator==5.2.1 29 | dlinfo==2.0.0 30 | dnspython==2.7.0 31 | duckduckgo-search==7.5.0 32 | espeakng-loader==0.2.4 33 | exceptiongroup==1.2.2 ; python_full_version < '3.11' 34 | fastapi==0.115.8 35 | fastrtc==0.0.8.post1 36 | fastrtc-moonshine-onnx==20241016 37 | ffmpy==0.5.0 38 | filelock==3.17.0 39 | flatbuffers==25.2.10 40 | frozenlist==1.5.0 41 | fsspec==2025.2.0 42 | google-crc32c==1.6.0 43 | gradio==5.19.0 44 | gradio-client==1.7.2 45 | h11==0.14.0 46 | httpcore==1.0.7 47 | httpx==0.28.1 48 | huggingface-hub==0.29.1 49 | humanfriendly==10.0 50 | idna==3.10 51 | ifaddr==0.2.0 52 | isodate==0.7.2 53 | jinja2==3.1.5 54 | joblib==1.4.2 55 | jsonschema==4.23.0 56 | jsonschema-specifications==2024.10.1 57 | kokoro-onnx==0.4.3 58 | language-tags==1.2.0 59 | lazy-loader==0.4 60 | librosa==0.10.2.post1 61 | llvmlite==0.44.0 62 | lxml==5.3.1 63 | markdown-it-py==3.0.0 64 | markdownify==1.0.0 65 | markupsafe==2.1.5 66 | mdurl==0.1.2 67 | mpmath==1.3.0 68 | msgpack==1.1.0 69 | multidict==6.1.0 70 | numba==0.61.0 71 | numpy==2.1.3 72 | onnxruntime==1.20.1 73 | orjson==3.10.15 74 | packaging==24.2 75 | pandas==2.2.3 76 | phonemizer-fork==3.3.1 77 | pillow==11.1.0 78 | platformdirs==4.3.6 79 | pooch==1.8.2 80 | primp==0.14.0 81 | propcache==0.3.0 82 | protobuf==5.29.3 83 | pycparser==2.22 84 | pydantic==2.10.6 85 | pydantic-core==2.27.2 86 | pydub==0.25.1 87 | pyee==12.1.1 88 | pygments==2.19.1 89 | pyjwt==2.10.1 90 | pylibsrtp==0.11.0 91 | pyopenssl==25.0.0 92 | pyparsing==3.2.1 93 | pyreadline3==3.5.4 ; sys_platform == 'win32' 94 | python-dateutil==2.9.0.post0 95 | python-dotenv==1.0.1 96 | python-multipart==0.0.20 97 | pytz==2025.1 98 | pyyaml==6.0.2 99 | rdflib==7.1.3 100 | referencing==0.36.2 101 | regex==2024.11.6 102 | requests==2.32.3 103 | rfc3986==1.5.0 104 | rich==13.9.4 105 | rpds-py==0.23.1 106 | ruff==0.9.7 ; sys_platform != 'emscripten' 107 | safehttpx==0.1.6 108 | scikit-learn==1.6.1 109 | scipy==1.15.2 110 | segments==2.3.0 111 | semantic-version==2.10.0 112 | shellingham==1.5.4 ; sys_platform != 'emscripten' 113 | six==1.17.0 114 | smolagents==1.9.2 115 | sniffio==1.3.1 116 | soundfile==0.13.1 117 | soupsieve==2.6 118 | soxr==0.5.0.post1 119 | standard-aifc==3.13.0 ; python_full_version >= '3.13' 120 | standard-chunk==3.13.0 ; python_full_version >= '3.13' 121 | standard-sunau==3.13.0 ; python_full_version >= '3.13' 122 | starlette==0.45.3 123 | sympy==1.13.3 124 | threadpoolctl==3.5.0 125 | tokenizers==0.21.0 126 | tomlkit==0.13.2 127 | tqdm==4.67.1 128 | twilio==9.4.6 129 | typer==0.15.1 ; sys_platform != 'emscripten' 130 | typing-extensions==4.12.2 131 | tzdata==2025.1 132 | uritemplate==4.1.1 133 | urllib3==2.3.0 134 | uvicorn==0.34.0 ; sys_platform != 'emscripten' 135 | websockets==15.0 136 | yarl==1.18.3 137 | -------------------------------------------------------------------------------- /demo/text_mode/app.py: -------------------------------------------------------------------------------- 1 | # /// script 2 | # dependencies = [ 3 | # "fastrtc[vad, stt]==0.0.26.rc1", 4 | # "openai", 5 | # ] 6 | # /// 7 | 8 | 9 | import gradio as gr 10 | import huggingface_hub 11 | from fastrtc import ( 12 | AdditionalOutputs, 13 | ReplyOnPause, 14 | WebRTC, 15 | WebRTCData, 16 | WebRTCError, 17 | get_stt_model, 18 | ) 19 | from openai import OpenAI 20 | 21 | stt_model = get_stt_model() 22 | 23 | conversations = {} 24 | 25 | 26 | def response( 27 | data: WebRTCData, 28 | conversation: list[dict], 29 | token: str | None = None, 30 | model: str = "meta-llama/Llama-3.2-3B-Instruct", 31 | provider: str = "sambanova", 32 | ): 33 | print("conversation before", conversation) 34 | if not provider.startswith("http") and not token: 35 | raise WebRTCError("Please add your HF token.") 36 | 37 | if data.audio is not None and data.audio[1].size > 0: 38 | user_audio_text = stt_model.stt(data.audio) 39 | conversation.append({"role": "user", "content": user_audio_text}) 40 | else: 41 | conversation.append({"role": "user", "content": data.textbox}) 42 | 43 | yield AdditionalOutputs(conversation) 44 | 45 | if provider.startswith("http"): 46 | client = OpenAI(base_url=provider, api_key="ollama") 47 | else: 48 | client = huggingface_hub.InferenceClient( 49 | api_key=token, 50 | provider=provider, # type: ignore 51 | ) 52 | 53 | request = client.chat.completions.create( 54 | model=model, 55 | messages=conversation, # type: ignore 56 | temperature=1, 57 | top_p=0.1, 58 | ) 59 | response = {"role": "assistant", "content": request.choices[0].message.content} 60 | 61 | conversation.append(response) 62 | print("conversation after", conversation) 63 | yield AdditionalOutputs(conversation) 64 | 65 | 66 | css = """ 67 | footer { 68 | display: none !important; 69 | } 70 | """ 71 | 72 | providers = [ 73 | "black-forest-labs", 74 | "cerebras", 75 | "cohere", 76 | "fal-ai", 77 | "fireworks-ai", 78 | "hf-inference", 79 | "hyperbolic", 80 | "nebius", 81 | "novita", 82 | "openai", 83 | "replicate", 84 | "sambanova", 85 | "together", 86 | ] 87 | 88 | 89 | def hide_token(provider: str): 90 | if provider.startswith("http"): 91 | return gr.Textbox(visible=False) 92 | return gr.skip() 93 | 94 | 95 | with gr.Blocks(css=css) as demo: 96 | gr.HTML( 97 | """ 98 |

99 | Streaming Huggy FastRTC Chat 100 |

101 | """ 102 | ) 103 | with gr.Sidebar(): 104 | token = gr.Textbox( 105 | placeholder="Place your HF token here", type="password", label="HF Token" 106 | ) 107 | model = gr.Dropdown( 108 | choices=["meta-llama/Llama-3.2-3B-Instruct"], 109 | allow_custom_value=True, 110 | label="Model", 111 | ) 112 | provider = gr.Dropdown( 113 | label="Provider", 114 | choices=providers, 115 | value="sambanova", 116 | info="Select a hf-compatible provider or type the url of your server, e.g. http://127.0.0.1:11434/v1 for ollama", 117 | allow_custom_value=True, 118 | ) 119 | provider.change(hide_token, inputs=[provider], outputs=[token]) 120 | cb = gr.Chatbot(type="messages", height=600) 121 | webrtc = WebRTC(modality="audio", mode="send", variant="textbox") 122 | webrtc.stream( 123 | ReplyOnPause(response), 124 | inputs=[webrtc, cb, token, model, provider], 125 | outputs=[cb], 126 | concurrency_limit=100, 127 | ) 128 | webrtc.on_additional_outputs( 129 | lambda old, new: new, inputs=[cb], outputs=[cb], concurrency_limit=100 130 | ) 131 | 132 | if __name__ == "__main__": 133 | demo.launch(server_port=6980) 134 | -------------------------------------------------------------------------------- /demo/voice_text_editor/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Voice Text Editor 3 | emoji: 📝 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Edit text documents with your voice! 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|SAMBANOVA_API_KEY] 13 | --- 14 | 15 | # Voice Text Editor 16 | 17 | Edit text documents with your voice! 18 | 19 | 20 | -------------------------------------------------------------------------------- /demo/voice_text_editor/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gradio as gr 4 | from dotenv import load_dotenv 5 | from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_stt_model 6 | from openai import OpenAI 7 | 8 | load_dotenv() 9 | 10 | sambanova_client = OpenAI( 11 | api_key=os.getenv("SAMBANOVA_API_KEY"), base_url="https://api.sambanova.ai/v1" 12 | ) 13 | stt_model = get_stt_model() 14 | 15 | 16 | SYSTEM_PROMPT = """You are an intelligent voice-activated text editor assistant. Your purpose is to help users create and modify text documents through voice commands. 17 | 18 | For each interaction: 19 | 1. You will receive the current state of a text document and a voice input from the user. 20 | 2. Determine if the input is: 21 | a) A command to modify the document (e.g., "delete the last line", "capitalize that") 22 | b) Content to be added to the document (e.g., "buy 12 eggs at the store") 23 | c) A modification to existing content (e.g., "actually make that 24" to change "12" to "24") 24 | 3. Return ONLY the new document state after the changes have been applied. 25 | 26 | Example: 27 | 28 | CURRENT DOCUMENT: 29 | 30 | 31 | Meeting notes: 32 | - Buy GPUs 33 | - Meet with Joe 34 | 35 | USER INPUT: Make that 100 GPUS 36 | 37 | NEW DOCUMENT STATE: 38 | 39 | Meeting notes: 40 | - Buy 100 GPUs 41 | - Meet with Joe 42 | 43 | Example 2: 44 | 45 | CURRENT DOCUMENT: 46 | 47 | Project Proposal 48 | 49 | USER INPUT: Make that a header 50 | 51 | NEW DOCUMENT STATE: 52 | 53 | # Project Proposal 54 | 55 | When handling commands: 56 | - Apply the requested changes precisely to the document 57 | - Support operations like adding, deleting, modifying, and moving text 58 | - Understand contextual references like "that", "the last line", "the second paragraph" 59 | 60 | When handling content additions: 61 | - Add the new text at the appropriate location (usually at the end or cursor position) 62 | - Format it appropriately based on the document context 63 | - If the user says to "add" or "insert" do not remove text that was already in the document. 64 | 65 | When handling content modifications: 66 | - Identify what part of the document the user is referring to 67 | - Apply the requested change while preserving the rest of the content 68 | - Be smart about contextual references (e.g., "make that 24" should know to replace a number) 69 | 70 | NEVER include any text in the new document state that is not part of the user's input. 71 | NEVER include the phrase "CURRENT DOCUMENT" in the new document state. 72 | NEVER reword the user's input unless you are explicitly asked to do so. 73 | """ 74 | 75 | 76 | def edit(audio, current_document: str): 77 | prompt = stt_model.stt(audio) 78 | print(f"Prompt: {prompt}") 79 | response = sambanova_client.chat.completions.create( 80 | model="Meta-Llama-3.3-70B-Instruct", 81 | messages=[ 82 | {"role": "system", "content": SYSTEM_PROMPT}, 83 | { 84 | "role": "user", 85 | "content": f"CURRENT DOCUMENT:\n\n{current_document}\n\nUSER INPUT: {prompt}", 86 | }, 87 | ], 88 | max_tokens=200, 89 | ) 90 | doc = response.choices[0].message.content 91 | yield AdditionalOutputs(doc) 92 | 93 | 94 | doc = gr.Textbox(value="", label="Current Document") 95 | 96 | 97 | stream = Stream( 98 | ReplyOnPause(edit), 99 | modality="audio", 100 | mode="send", 101 | additional_inputs=[doc], 102 | additional_outputs=[doc], 103 | additional_outputs_handler=lambda prev, current: current, 104 | ui_args={"title": "Voice Text Editor with FastRTC 🗣️"}, 105 | ) 106 | 107 | if __name__ == "__main__": 108 | if (mode := os.getenv("MODE")) == "UI": 109 | stream.ui.launch(server_port=7860) 110 | elif mode == "PHONE": 111 | stream.fastphone(host="0.0.0.0", port=7860) 112 | else: 113 | stream.ui.launch(server_port=7860) 114 | -------------------------------------------------------------------------------- /demo/voice_text_editor_local/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import gradio as gr 4 | import requests 5 | from dotenv import load_dotenv 6 | from fastrtc import AdditionalOutputs, ReplyOnPause, Stream, get_stt_model 7 | 8 | load_dotenv() 9 | 10 | stt_model = get_stt_model() 11 | 12 | SYSTEM_PROMPT = """You are an intelligent voice-activated text editor assistant. Your purpose is to help users create and modify text documents through voice commands. 13 | 14 | For each interaction: 15 | 1. You will receive the current state of a text document and a voice input from the user. 16 | 2. Determine if the input is: 17 | a) A command to modify the document (e.g., "delete the last line", "capitalize that") 18 | b) Content to be added to the document (e.g., "buy 12 eggs at the store") 19 | c) A modification to existing content (e.g., "actually make that 24" to change "12" to "24") 20 | 3. Return ONLY the new document state after the changes have been applied. 21 | 22 | Example: 23 | 24 | CURRENT DOCUMENT: 25 | 26 | Meeting notes: 27 | - Buy GPUs 28 | - Meet with Joe 29 | 30 | USER INPUT: Make that 100 GPUS 31 | 32 | NEW DOCUMENT STATE: 33 | 34 | Meeting notes: 35 | - Buy 100 GPUs 36 | - Meet with Joe 37 | 38 | Example 2: 39 | 40 | CURRENT DOCUMENT: 41 | 42 | Project Proposal 43 | 44 | USER INPUT: Make that a header 45 | 46 | NEW DOCUMENT STATE: 47 | 48 | # Project Proposal 49 | 50 | When handling commands: 51 | - Apply the requested changes precisely to the document 52 | - Support operations like adding, deleting, modifying, and moving text 53 | - Understand contextual references like "that", "the last line", "the second paragraph" 54 | 55 | When handling content additions: 56 | - Add the new text at the appropriate location (usually at the end or cursor position) 57 | - Format it appropriately based on the document context 58 | - If the user says to "add" or "insert" do not remove text that was already in the document. 59 | 60 | When handling content modifications: 61 | - Identify what part of the document the user is referring to 62 | - Apply the requested change while preserving the rest of the content 63 | - Be smart about contextual references (e.g., "make that 24" should know to replace a number) 64 | 65 | NEVER include any text in the new document state that is not part of the user's input. 66 | NEVER include the phrase "CURRENT DOCUMENT" in the new document state. 67 | NEVER reword the user's input unless you are explicitly asked to do so. 68 | """ 69 | 70 | 71 | def edit(audio, current_document: str): 72 | prompt = stt_model.stt(audio) 73 | print(f"Prompt: {prompt}") 74 | 75 | # Construct the prompt for ollama 76 | full_prompt = ( 77 | f"{SYSTEM_PROMPT}\n\n" 78 | f"User: CURRENT DOCUMENT:\n\n{current_document}\n\nUSER INPUT: {prompt}\n\n" 79 | f"Assistant:" 80 | ) 81 | 82 | try: 83 | # Send request to ollama's API 84 | response = requests.post( 85 | "http://localhost:11434/api/generate", 86 | json={ 87 | "model": "qwen2.5", 88 | "prompt": full_prompt, 89 | "stream": False, 90 | "max_tokens": 200, 91 | }, 92 | ) 93 | response.raise_for_status() # Raise an exception for bad status codes 94 | 95 | # Parse the response 96 | doc = response.json()["response"] 97 | # Clean up the response to remove "Assistant:" and any extra whitespace 98 | doc = doc.strip().lstrip("Assistant:").strip() 99 | yield AdditionalOutputs(doc) 100 | 101 | except requests.RequestException as e: 102 | # Handle API errors gracefully 103 | error_message = "Error: Could not connect to ollama. Please ensure it's running and qwen2.5 is loaded." 104 | print(f"API Error: {e}") 105 | yield AdditionalOutputs(error_message) 106 | 107 | 108 | doc = gr.Textbox(value="", label="Current Document") 109 | 110 | stream = Stream( 111 | ReplyOnPause(edit), 112 | modality="audio", 113 | mode="send", 114 | additional_inputs=[doc], 115 | additional_outputs=[doc], 116 | additional_outputs_handler=lambda prev, current: current, 117 | ui_args={"title": "Voice Text Editor with FastRTC 🗣️"}, 118 | ) 119 | 120 | if __name__ == "__main__": 121 | if (mode := os.getenv("MODE")) == "UI": 122 | stream.ui.launch(server_port=7860) 123 | elif mode == "PHONE": 124 | stream.fastphone(host="0.0.0.0", port=7860) 125 | else: 126 | stream.ui.launch(server_port=7860) 127 | -------------------------------------------------------------------------------- /demo/webrtc_vs_websocket/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Webrtc Vs Websocket 3 | emoji: 🧪 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Compare Round Trip Times between WebRTC and Websockets 12 | tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|ELEVENLABS_API_KEY, secret|GROQ_API_KEY, secret|ANTHROPIC_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/webrtc_vs_websocket/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad] 2 | elevenlabs 3 | groq 4 | anthropic 5 | twilio 6 | python-dotenv 7 | -------------------------------------------------------------------------------- /demo/whisper_realtime/README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Whisper Realtime Transcription 3 | emoji: 👂 4 | colorFrom: purple 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 5.16.0 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | short_description: Transcribe audio in realtime with Whisper 12 | tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY] 13 | --- 14 | 15 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/whisper_realtime/README_gradio.md: -------------------------------------------------------------------------------- 1 | --- 2 | app_file: app.py 3 | colorFrom: purple 4 | colorTo: red 5 | emoji: 👂 6 | license: mit 7 | pinned: false 8 | sdk: gradio 9 | sdk_version: 5.16.0 10 | short_description: Transcribe audio in realtime - Gradio UI version 11 | tags: 12 | - webrtc 13 | - websocket 14 | - gradio 15 | - secret|HF_TOKEN 16 | - secret|GROQ_API_KEY 17 | title: Whisper Realtime Transcription (Gradio UI) 18 | --- 19 | 20 | 21 | Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference -------------------------------------------------------------------------------- /demo/whisper_realtime/app.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import gradio as gr 5 | import numpy as np 6 | from dotenv import load_dotenv 7 | from fastapi import FastAPI 8 | from fastapi.responses import HTMLResponse, StreamingResponse 9 | from fastrtc import ( 10 | AdditionalOutputs, 11 | ReplyOnPause, 12 | Stream, 13 | audio_to_bytes, 14 | get_twilio_turn_credentials, 15 | ) 16 | from gradio.utils import get_space 17 | from groq import AsyncClient 18 | from pydantic import BaseModel 19 | 20 | cur_dir = Path(__file__).parent 21 | 22 | load_dotenv() 23 | 24 | 25 | groq_client = AsyncClient() 26 | 27 | 28 | async def transcribe(audio: tuple[int, np.ndarray], transcript: str): 29 | response = await groq_client.audio.transcriptions.create( 30 | file=("audio-file.mp3", audio_to_bytes(audio)), 31 | model="whisper-large-v3-turbo", 32 | response_format="verbose_json", 33 | ) 34 | yield AdditionalOutputs(transcript + "\n" + response.text) 35 | 36 | 37 | transcript = gr.Textbox(label="Transcript") 38 | stream = Stream( 39 | ReplyOnPause(transcribe), 40 | modality="audio", 41 | mode="send", 42 | additional_inputs=[transcript], 43 | additional_outputs=[transcript], 44 | additional_outputs_handler=lambda a, b: b, 45 | rtc_configuration=get_twilio_turn_credentials() if get_space() else None, 46 | concurrency_limit=5 if get_space() else None, 47 | time_limit=90 if get_space() else None, 48 | ) 49 | 50 | app = FastAPI() 51 | 52 | stream.mount(app) 53 | 54 | 55 | class SendInput(BaseModel): 56 | webrtc_id: str 57 | transcript: str 58 | 59 | 60 | @app.post("/send_input") 61 | def send_input(body: SendInput): 62 | stream.set_input(body.webrtc_id, body.transcript) 63 | 64 | 65 | @app.get("/transcript") 66 | def _(webrtc_id: str): 67 | async def output_stream(): 68 | async for output in stream.output_stream(webrtc_id): 69 | transcript = output.args[0].split("\n")[-1] 70 | yield f"event: output\ndata: {transcript}\n\n" 71 | 72 | return StreamingResponse(output_stream(), media_type="text/event-stream") 73 | 74 | 75 | @app.get("/") 76 | def index(): 77 | rtc_config = get_twilio_turn_credentials() if get_space() else None 78 | html_content = (cur_dir / "index.html").read_text() 79 | html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config)) 80 | return HTMLResponse(content=html_content) 81 | 82 | 83 | if __name__ == "__main__": 84 | import os 85 | 86 | if (mode := os.getenv("MODE")) == "UI": 87 | stream.ui.launch(server_port=7860) 88 | elif mode == "PHONE": 89 | stream.fastphone(host="0.0.0.0", port=7860) 90 | else: 91 | import uvicorn 92 | 93 | uvicorn.run(app, host="0.0.0.0", port=7860) 94 | -------------------------------------------------------------------------------- /demo/whisper_realtime/requirements.txt: -------------------------------------------------------------------------------- 1 | fastrtc[vad]==0.0.20.rc2 2 | groq 3 | python-dotenv -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | fastrtc.org -------------------------------------------------------------------------------- /docs/Discord-Symbol-White.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/faq.md: -------------------------------------------------------------------------------- 1 | ## Demo does not work when deploying to the cloud 2 | 3 | Make sure you are using a TURN server. See [deployment](../deployment). 4 | 5 | ## Recorded input audio sounds muffled during output audio playback 6 | 7 | By default, the microphone is [configured](https://github.com/freddyaboulton/gradio-webrtc/blob/903f1f70bd586f638ad3b5a3940c7a8ec70ad1f5/backend/gradio_webrtc/webrtc.py#L575) to do echo cancellation. 8 | This is what's causing the recorded audio to sound muffled when the streamed audio starts playing. 9 | You can disable this via the `track_constraints` (see [Advanced Configuration](../advanced-configuration)) with the following code: 10 | 11 | ```python 12 | stream = Stream( 13 | track_constraints={ 14 | "echoCancellation": False, 15 | "noiseSuppression": {"exact": True}, 16 | "autoGainControl": {"exact": True}, 17 | "sampleRate": {"ideal": 24000}, 18 | "sampleSize": {"ideal": 16}, 19 | "channelCount": {"exact": 1}, 20 | }, 21 | rtc_configuration=None, 22 | mode="send-receive", 23 | modality="audio", 24 | ) 25 | ``` 26 | 27 | ## How to raise errors in the UI 28 | 29 | You can raise `WebRTCError` in order for an error message to show up in the user's screen. This is similar to how `gr.Error` works. 30 | 31 | !!! warning 32 | 33 | The `WebRTCError` class is only supported in the `WebRTC` component. 34 | 35 | Here is a simple example: 36 | 37 | ```python 38 | def generation(num_steps): 39 | for _ in range(num_steps): 40 | segment = AudioSegment.from_file( 41 | "/Users/freddy/sources/gradio/demo/audio_debugger/cantina.wav" 42 | ) 43 | yield ( 44 | segment.frame_rate, 45 | np.array(segment.get_array_of_samples()).reshape(1, -1), 46 | ) 47 | time.sleep(3.5) 48 | raise WebRTCError("This is a test error") 49 | 50 | with gr.Blocks() as demo: 51 | audio = WebRTC( 52 | label="Stream", 53 | mode="receive", 54 | modality="audio", 55 | ) 56 | num_steps = gr.Slider( 57 | label="Number of Steps", 58 | minimum=1, 59 | maximum=10, 60 | step=1, 61 | value=5, 62 | ) 63 | button = gr.Button("Generate") 64 | 65 | audio.stream( 66 | fn=generation, inputs=[num_steps], outputs=[audio], trigger=button.click 67 | ) 68 | 69 | demo.launch() 70 | ``` -------------------------------------------------------------------------------- /docs/fastrtc_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/docs/fastrtc_logo.png -------------------------------------------------------------------------------- /docs/fastrtc_logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/docs/fastrtc_logo_small.png -------------------------------------------------------------------------------- /docs/gradio-logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/reference/utils.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | ## `audio_to_bytes` 4 | 5 | Convert an audio tuple containing sample rate and numpy array data into bytes. 6 | Useful for sending data to external APIs from `ReplyOnPause` handler. 7 | 8 | Parameters 9 | ``` 10 | audio : tuple[int, np.ndarray] 11 | A tuple containing: 12 | - sample_rate (int): The audio sample rate in Hz 13 | - data (np.ndarray): The audio data as a numpy array 14 | ``` 15 | 16 | Returns 17 | ``` 18 | bytes 19 | The audio data encoded as bytes, suitable for transmission or storage 20 | ``` 21 | 22 | Example 23 | ```python 24 | >>> sample_rate = 44100 25 | >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples 26 | >>> audio_tuple = (sample_rate, audio_data) 27 | >>> audio_bytes = audio_to_bytes(audio_tuple) 28 | ``` 29 | 30 | ## `audio_to_file` 31 | 32 | Save an audio tuple containing sample rate and numpy array data to a file. 33 | 34 | Parameters 35 | ``` 36 | audio : tuple[int, np.ndarray] 37 | A tuple containing: 38 | - sample_rate (int): The audio sample rate in Hz 39 | - data (np.ndarray): The audio data as a numpy array 40 | ``` 41 | Returns 42 | ``` 43 | str 44 | The path to the saved audio file 45 | ``` 46 | Example 47 | ``` 48 | ```python 49 | >>> sample_rate = 44100 50 | >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples 51 | >>> audio_tuple = (sample_rate, audio_data) 52 | >>> file_path = audio_to_file(audio_tuple) 53 | >>> print(f"Audio saved to: {file_path}") 54 | ``` 55 | 56 | ## `aggregate_bytes_to_16bit` 57 | Aggregate bytes to 16-bit audio samples. 58 | 59 | This function takes an iterator of chunks and aggregates them into 16-bit audio samples. 60 | It handles incomplete samples and combines them with the next chunk. 61 | 62 | Parameters 63 | ``` 64 | chunks_iterator : Iterator[bytes] 65 | An iterator of byte chunks to aggregate 66 | ``` 67 | Returns 68 | ``` 69 | Iterator[NDArray[np.int16]] 70 | An iterator of 16-bit audio samples 71 | ``` 72 | Example 73 | ```python 74 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05'] 75 | >>> for chunk in aggregate_bytes_to_16bit(chunks_iterator): 76 | >>> print(chunk) 77 | ``` 78 | 79 | ## `async_aggregate_bytes_to_16bit` 80 | 81 | Aggregate bytes to 16-bit audio samples asynchronously. 82 | 83 | Parameters 84 | ``` 85 | chunks_iterator : Iterator[bytes] 86 | An iterator of byte chunks to aggregate 87 | ``` 88 | Returns 89 | ``` 90 | Iterator[NDArray[np.int16]] 91 | An iterator of 16-bit audio samples 92 | ``` 93 | Example 94 | ```python 95 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05'] 96 | >>> for chunk in async_aggregate_bytes_to_16bit(chunks_iterator): 97 | >>> print(chunk) 98 | ``` 99 | 100 | ## `wait_for_item` 101 | 102 | Wait for an item from an asyncio.Queue with a timeout. 103 | 104 | Parameters 105 | ``` 106 | queue : asyncio.Queue 107 | The queue to wait for an item from 108 | timeout : float 109 | The timeout in seconds 110 | ``` 111 | Returns 112 | ``` 113 | Any 114 | The item from the queue or None if the timeout is reached 115 | ``` 116 | 117 | Example 118 | ```python 119 | >>> queue = asyncio.Queue() 120 | >>> queue.put_nowait(1) 121 | >>> item = await wait_for_item(queue) 122 | >>> print(item) 123 | ``` -------------------------------------------------------------------------------- /docs/stylesheets/extra.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --white: #ffffff; 3 | --galaxy: #393931; 4 | --space: #2d2d2a; 5 | --rock: #2d2d2a; 6 | --cosmic: #ffdd00c5; 7 | --radiate: #d6cec0; 8 | --sun: #ffac2f; 9 | --neutron: #F7F5F6; 10 | --supernova: #ffdd00; 11 | --asteroid: #d6cec0; 12 | } 13 | 14 | [data-md-color-scheme="fastrtc-dark"] { 15 | --md-default-bg-color: var(--galaxy); 16 | --md-default-fg-color: var(--white); 17 | --md-default-fg-color--light: var(--white); 18 | --md-default-fg-color--lighter: var(--white); 19 | --md-primary-fg-color: var(--space); 20 | --md-primary-bg-color: var(--white); 21 | --md-accent-fg-color: var(--sun); 22 | 23 | --md-typeset-color: var(--white); 24 | --md-typeset-a-color: var(--supernova); 25 | --md-typeset-mark-color: var(--sun); 26 | 27 | --md-code-fg-color: var(--white); 28 | --md-code-bg-color: var(--rock); 29 | 30 | --md-code-hl-comment-color: var(--asteroid); 31 | --md-code-hl-punctuation-color: var(--supernova); 32 | --md-code-hl-generic-color: var(--supernova); 33 | --md-code-hl-variable-color: var(--white); 34 | --md-code-hl-string-color: var(--radiate); 35 | --md-code-hl-keyword-color: var(--supernova); 36 | --md-code-hl-operator-color: var(--supernova); 37 | --md-code-hl-number-color: var(--radiate); 38 | --md-code-hl-special-color: var(--supernova); 39 | --md-code-hl-function-color: var(--neutron); 40 | --md-code-hl-constant-color: var(--radiate); 41 | --md-code-hl-name-color: var(--md-code-fg-color); 42 | 43 | --md-typeset-del-color: hsla(6, 90%, 60%, 0.15); 44 | --md-typeset-ins-color: hsla(150, 90%, 44%, 0.15); 45 | 46 | --md-typeset-table-color: hsla(0, 0%, 100%, 0.12); 47 | --md-typeset-table-color--light: hsla(0, 0%, 100%, 0.035); 48 | } 49 | 50 | [data-md-color-scheme="fastrtc-dark"] div.admonition { 51 | color: var(--md-code-fg-color); 52 | background-color: var(--galaxy); 53 | } 54 | 55 | 56 | [data-md-color-scheme="fastrtc-dark"] .grid.cards>ul>li { 57 | border-color: var(--rock); 58 | border-width: thick; 59 | } 60 | 61 | [data-md-color-scheme="fastrtc-dark"] .grid.cards>ul>li>hr { 62 | border-color: var(--rock); 63 | } -------------------------------------------------------------------------------- /docs/userguide/audio-video.md: -------------------------------------------------------------------------------- 1 | # Audio-Video Streaming 2 | 3 | You can simultaneously stream audio and video using `AudioVideoStreamHandler` or `AsyncAudioVideoStreamHandler`. 4 | They are identical to the audio `StreamHandlers` with the addition of `video_receive` and `video_emit` methods which take and return a `numpy` array, respectively. 5 | 6 | Here is an example of the video handling functions for connecting with the Gemini multimodal API. In this case, we simply reflect the webcam feed back to the user but every second we'll send the latest webcam frame (and an additional image component) to the Gemini server. 7 | 8 | Please see the "Gemini Audio Video Chat" example in the [cookbook](../../cookbook) for the complete code. 9 | 10 | ``` python title="Async Gemini Video Handling" 11 | 12 | async def video_receive(self, frame: np.ndarray): 13 | """Send video frames to the server""" 14 | if self.session: 15 | # send image every 1 second 16 | # otherwise we flood the API 17 | if time.time() - self.last_frame_time > 1: 18 | self.last_frame_time = time.time() 19 | await self.session.send(encode_image(frame)) 20 | if self.latest_args[2] is not None: 21 | await self.session.send(encode_image(self.latest_args[2])) 22 | self.video_queue.put_nowait(frame) 23 | 24 | async def video_emit(self) -> VideoEmitType: 25 | """Return video frames to the client""" 26 | return await self.video_queue.get() 27 | ``` -------------------------------------------------------------------------------- /docs/userguide/gradio.md: -------------------------------------------------------------------------------- 1 | # Gradio Component 2 | 3 | The automatic gradio UI is a great way to test your stream. However, you may want to customize the UI to your liking or simply build a standalone Gradio application. 4 | 5 | ## The WebRTC Component 6 | 7 | To build a standalone Gradio application, you can use the `WebRTC` component and implement the `stream` event. 8 | Similarly to the `Stream` object, you must set the `mode` and `modality` arguments and pass in a `handler`. 9 | 10 | In the `stream` event, you pass in your handler as well as the input and output components. 11 | 12 | ``` py 13 | import gradio as gr 14 | from fastrtc import WebRTC, ReplyOnPause 15 | 16 | def response(audio: tuple[int, np.ndarray]): 17 | """This function must yield audio frames""" 18 | ... 19 | yield audio 20 | 21 | 22 | with gr.Blocks() as demo: 23 | gr.HTML( 24 | """ 25 |

26 | Chat (Powered by WebRTC ⚡️) 27 |

28 | """ 29 | ) 30 | with gr.Column(): 31 | with gr.Group(): 32 | audio = WebRTC( 33 | mode="send-receive", 34 | modality="audio", 35 | ) 36 | audio.stream(fn=ReplyOnPause(response), 37 | inputs=[audio], outputs=[audio], 38 | time_limit=60) 39 | demo.launch() 40 | ``` 41 | 42 | ## Additional Outputs 43 | 44 | In order to modify other components from within the WebRTC stream, you must yield an instance of `AdditionalOutputs` and add an `on_additional_outputs` event to the `WebRTC` component. 45 | 46 | This is common for displaying a multimodal text/audio conversation in a Chatbot UI. 47 | 48 | === "Code" 49 | 50 | ``` py title="Additional Outputs" 51 | from fastrtc import AdditionalOutputs, WebRTC 52 | 53 | def transcribe(audio: tuple[int, np.ndarray], 54 | transformers_convo: list[dict], 55 | gradio_convo: list[dict]): 56 | response = model.generate(**inputs, max_length=256) 57 | transformers_convo.append({"role": "assistant", "content": response}) 58 | gradio_convo.append({"role": "assistant", "content": response}) 59 | yield AdditionalOutputs(transformers_convo, gradio_convo) # (1) 60 | 61 | 62 | with gr.Blocks() as demo: 63 | gr.HTML( 64 | """ 65 |

66 | Talk to Qwen2Audio (Powered by WebRTC ⚡️) 67 |

68 | """ 69 | ) 70 | transformers_convo = gr.State(value=[]) 71 | with gr.Row(): 72 | with gr.Column(): 73 | audio = WebRTC( 74 | label="Stream", 75 | mode="send", # (2) 76 | modality="audio", 77 | ) 78 | with gr.Column(): 79 | transcript = gr.Chatbot(label="transcript", type="messages") 80 | 81 | audio.stream(ReplyOnPause(transcribe), 82 | inputs=[audio, transformers_convo, transcript], 83 | outputs=[audio], time_limit=90) 84 | audio.on_additional_outputs(lambda s,a: (s,a), # (3) 85 | outputs=[transformers_convo, transcript], 86 | queue=False, show_progress="hidden") 87 | demo.launch() 88 | ``` 89 | 90 | 1. Pass your data to `AdditionalOutputs` and yield it. 91 | 2. In this case, no audio is being returned, so we set `mode="send"`. However, if we set `mode="send-receive"`, we could also yield generated audio and `AdditionalOutputs`. 92 | 3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update. 93 | === "Notes" 94 | 1. Pass your data to `AdditionalOutputs` and yield it. 95 | 2. In this case, no audio is being returned, so we set `mode="send"`. However, if we set `mode="send-receive"`, we could also yield generated audio and `AdditionalOutputs`. 96 | 3. The `on_additional_outputs` event does not take `inputs`. It's common practice to not run this event on the queue since it is just a quick UI update. -------------------------------------------------------------------------------- /docs/userguide/video.md: -------------------------------------------------------------------------------- 1 | # Video Streaming 2 | 3 | ## Input/Output Streaming 4 | 5 | We already saw this example in the [Quickstart](../../#quickstart) and the [Core Concepts](../streams) section. 6 | 7 | === "Code" 8 | 9 | ``` py title="Input/Output Streaming" 10 | from fastrtc import Stream 11 | import gradio as gr 12 | 13 | def detection(image, conf_threshold=0.3): # (1) 14 | processed_frame = process_frame(image, conf_threshold) 15 | return processed_frame # (2) 16 | 17 | stream = Stream( 18 | handler=detection, 19 | modality="video", 20 | mode="send-receive", # (3) 21 | additional_inputs=[ 22 | gr.Slider(minimum=0, maximum=1, step=0.01, value=0.3) 23 | ], 24 | ) 25 | ``` 26 | 27 | 1. The webcam frame will be represented as a numpy array of shape (height, width, RGB). 28 | 2. The function must return a numpy array. It can take arbitrary values from other components. 29 | 3. Set the `modality="video"` and `mode="send-receive"` 30 | === "Notes" 31 | 1. The webcam frame will be represented as a numpy array of shape (height, width, RGB). 32 | 2. The function must return a numpy array. It can take arbitrary values from other components. 33 | 3. Set the `modality="video"` and `mode="send-receive"` 34 | 35 | ## Server-to-Client Only 36 | 37 | In this case, we stream from the server to the client so we will write a generator function that yields the next frame from the video (as a numpy array) 38 | and set the `mode="receive"` in the `WebRTC` component. 39 | 40 | === "Code" 41 | ``` py title="Server-To-Client" 42 | from fastrtc import Stream 43 | import cv2 44 | 45 | def generation(): 46 | url = "https://download.tsi.telecom-paristech.fr/gpac/dataset/dash/uhd/mux_sources/hevcds_720p30_2M.mp4" 47 | cap = cv2.VideoCapture(url) 48 | iterating = True 49 | while iterating: 50 | iterating, frame = cap.read() 51 | yield frame 52 | 53 | stream = Stream( 54 | handler=generation, 55 | modality="video", 56 | mode="receive" 57 | ) 58 | ``` 59 | 60 | ## Skipping Frames 61 | 62 | If your event handler is not quite real-time yet, then the output feed will look very laggy. 63 | 64 | To fix this, you can set the `skip_frames` parameter to `True`. This will skip the frames that are received while the event handler is still running. 65 | 66 | ``` py title="Skipping Frames" 67 | import time 68 | 69 | import numpy as np 70 | from fastrtc import Stream, VideoStreamHandler 71 | 72 | 73 | def process_image(image): 74 | time.sleep( 75 | 0.2 76 | ) # Simulating 200ms processing time per frame; input arrives faster (30 FPS). 77 | return np.flip(image, axis=0) 78 | 79 | 80 | stream = Stream( 81 | handler=VideoStreamHandler(process_image, skip_frames=True), 82 | modality="video", 83 | mode="send-receive", 84 | ) 85 | 86 | stream.ui.launch() 87 | ``` 88 | 89 | ## Setting the Output Frame Rate 90 | 91 | You can set the output frame rate by setting the `fps` parameter in the `VideoStreamHandler`. 92 | 93 | ``` py title="Setting the Output Frame Rate" 94 | def generation(): 95 | url = "https://github.com/user-attachments/assets/9636dc97-4fee-46bb-abb8-b92e69c08c71" 96 | cap = cv2.VideoCapture(url) 97 | iterating = True 98 | 99 | # FPS calculation variables 100 | frame_count = 0 101 | start_time = time.time() 102 | fps = 0 103 | 104 | while iterating: 105 | iterating, frame = cap.read() 106 | 107 | # Calculate and print FPS 108 | frame_count += 1 109 | elapsed_time = time.time() - start_time 110 | if elapsed_time >= 1.0: # Update FPS every second 111 | fps = frame_count / elapsed_time 112 | yield frame, AdditionalOutputs(fps) 113 | frame_count = 0 114 | start_time = time.time() 115 | else: 116 | yield frame 117 | 118 | 119 | stream = Stream( 120 | handler=VideoStreamHandler(generation, fps=60), 121 | modality="video", 122 | mode="receive", 123 | additional_outputs=[gr.Number(label="FPS")], 124 | additional_outputs_handler=lambda prev, cur: cur, 125 | ) 126 | 127 | stream.ui.launch() 128 | ``` 129 | -------------------------------------------------------------------------------- /docs/utils.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | ## `audio_to_bytes` 4 | 5 | Convert an audio tuple containing sample rate and numpy array data into bytes. 6 | Useful for sending data to external APIs from `ReplyOnPause` handler. 7 | 8 | Parameters 9 | ``` 10 | audio : tuple[int, np.ndarray] 11 | A tuple containing: 12 | - sample_rate (int): The audio sample rate in Hz 13 | - data (np.ndarray): The audio data as a numpy array 14 | ``` 15 | 16 | Returns 17 | ``` 18 | bytes 19 | The audio data encoded as bytes, suitable for transmission or storage 20 | ``` 21 | 22 | Example 23 | ```python 24 | >>> sample_rate = 44100 25 | >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples 26 | >>> audio_tuple = (sample_rate, audio_data) 27 | >>> audio_bytes = audio_to_bytes(audio_tuple) 28 | ``` 29 | 30 | ## `audio_to_file` 31 | 32 | Save an audio tuple containing sample rate and numpy array data to a file. 33 | 34 | Parameters 35 | ``` 36 | audio : tuple[int, np.ndarray] 37 | A tuple containing: 38 | - sample_rate (int): The audio sample rate in Hz 39 | - data (np.ndarray): The audio data as a numpy array 40 | ``` 41 | Returns 42 | ``` 43 | str 44 | The path to the saved audio file 45 | ``` 46 | Example 47 | ``` 48 | ```python 49 | >>> sample_rate = 44100 50 | >>> audio_data = np.array([0.1, -0.2, 0.3]) # Example audio samples 51 | >>> audio_tuple = (sample_rate, audio_data) 52 | >>> file_path = audio_to_file(audio_tuple) 53 | >>> print(f"Audio saved to: {file_path}") 54 | ``` 55 | 56 | ## `aggregate_bytes_to_16bit` 57 | Aggregate bytes to 16-bit audio samples. 58 | 59 | This function takes an iterator of chunks and aggregates them into 16-bit audio samples. 60 | It handles incomplete samples and combines them with the next chunk. 61 | 62 | Parameters 63 | ``` 64 | chunks_iterator : Iterator[bytes] 65 | An iterator of byte chunks to aggregate 66 | ``` 67 | Returns 68 | ``` 69 | Iterator[NDArray[np.int16]] 70 | An iterator of 16-bit audio samples 71 | ``` 72 | Example 73 | ```python 74 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05'] 75 | >>> for chunk in aggregate_bytes_to_16bit(chunks_iterator): 76 | >>> print(chunk) 77 | ``` 78 | 79 | ## `async_aggregate_bytes_to_16bit` 80 | 81 | Aggregate bytes to 16-bit audio samples asynchronously. 82 | 83 | Parameters 84 | ``` 85 | chunks_iterator : Iterator[bytes] 86 | An iterator of byte chunks to aggregate 87 | ``` 88 | Returns 89 | ``` 90 | Iterator[NDArray[np.int16]] 91 | An iterator of 16-bit audio samples 92 | ``` 93 | Example 94 | ```python 95 | >>> chunks_iterator = [b'\x00\x01', b'\x02\x03', b'\x04\x05'] 96 | >>> for chunk in async_aggregate_bytes_to_16bit(chunks_iterator): 97 | >>> print(chunk) 98 | ``` 99 | 100 | ## `wait_for_item` 101 | 102 | Wait for an item from an asyncio.Queue with a timeout. 103 | 104 | Parameters 105 | ``` 106 | queue : asyncio.Queue 107 | The queue to wait for an item from 108 | timeout : float 109 | The timeout in seconds 110 | ``` 111 | Returns 112 | ``` 113 | Any 114 | The item from the queue or None if the timeout is reached 115 | ``` 116 | 117 | Example 118 | ```python 119 | >>> queue = asyncio.Queue() 120 | >>> queue.put_nowait(1) 121 | >>> item = await wait_for_item(queue) 122 | >>> print(item) 123 | ``` -------------------------------------------------------------------------------- /frontend/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["prettier-plugin-svelte"], 3 | "overrides": [ 4 | { 5 | "files": "*.svelte", 6 | "options": { 7 | "parser": "svelte" 8 | } 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /frontend/Example.svelte: -------------------------------------------------------------------------------- 1 | 21 | 22 | {#if value} 23 | {#if playable()} 24 |
30 |
38 | {:else} 39 |
{value}
40 | {/if} 41 | {/if} 42 | 43 | 74 | -------------------------------------------------------------------------------- /frontend/gradio.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: [], 3 | svelte: { 4 | preprocess: [], 5 | }, 6 | build: { 7 | target: "modules", 8 | }, 9 | }; 10 | -------------------------------------------------------------------------------- /frontend/index.ts: -------------------------------------------------------------------------------- 1 | export { default as BaseInteractiveVideo } from "./shared/InteractiveVideo.svelte"; 2 | export { prettyBytes, playable, loaded } from "./shared/utils"; 3 | export { default as BaseExample } from "./Example.svelte"; 4 | import { default as Index } from "./Index.svelte"; 5 | export default Index; 6 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@freddyaboulton/fastrtc-component", 3 | "version": "0.0.1", 4 | "description": "Gradio UI packages", 5 | "type": "module", 6 | "author": "", 7 | "license": "ISC", 8 | "private": false, 9 | "dependencies": { 10 | "@ffmpeg/ffmpeg": "^0.12.10", 11 | "@ffmpeg/util": "^0.12.1", 12 | "@gradio/atoms": "0.9.2", 13 | "@gradio/client": "1.7.0", 14 | "@gradio/icons": "0.8.0", 15 | "@gradio/image": "0.16.4", 16 | "@gradio/markdown": "^0.10.3", 17 | "@gradio/statustracker": "0.9.1", 18 | "@gradio/textbox": "^0.10.10", 19 | "@gradio/upload": "0.13.3", 20 | "@gradio/utils": "0.7.0", 21 | "@gradio/wasm": "0.14.2", 22 | "hls.js": "^1.5.16", 23 | "mrmime": "^2.0.0" 24 | }, 25 | "devDependencies": { 26 | "@gradio/preview": "0.12.0", 27 | "prettier": "^3.3.3", 28 | "prettier-plugin-svelte": "^3.3.3" 29 | }, 30 | "exports": { 31 | "./package.json": "./package.json", 32 | ".": { 33 | "gradio": "./index.ts", 34 | "svelte": "./dist/index.js", 35 | "types": "./dist/index.d.ts" 36 | }, 37 | "./example": { 38 | "gradio": "./Example.svelte", 39 | "svelte": "./dist/Example.svelte", 40 | "types": "./dist/Example.svelte.d.ts" 41 | } 42 | }, 43 | "peerDependencies": { 44 | "svelte": "^4.0.0" 45 | }, 46 | "main": "index.ts", 47 | "main_changeset": true, 48 | "repository": { 49 | "type": "git", 50 | "url": "git+https://github.com/gradio-app/fastrtc.git", 51 | "directory": "fastrtc/frontend" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /frontend/shared/InteractiveVideo.svelte: -------------------------------------------------------------------------------- 1 | 55 | 56 | 57 |
58 | 81 | 82 | 83 |
84 | 85 | 94 | -------------------------------------------------------------------------------- /frontend/shared/MicrophoneMuted.svelte: -------------------------------------------------------------------------------- 1 | 21 | -------------------------------------------------------------------------------- /frontend/shared/StaticVideo.svelte: -------------------------------------------------------------------------------- 1 | 97 | 98 | 99 | 100 | {#if value === "__webrtc_value__"} 101 | 102 | {/if} 103 |
104 | 123 |
124 | 125 | 142 | -------------------------------------------------------------------------------- /frontend/shared/WebcamPermissions.svelte: -------------------------------------------------------------------------------- 1 | 13 | 14 | 22 | 23 | 51 | -------------------------------------------------------------------------------- /frontend/shared/index.ts: -------------------------------------------------------------------------------- 1 | export { default as Video } from "./Video.svelte"; 2 | -------------------------------------------------------------------------------- /frontend/shared/stream_utils.ts: -------------------------------------------------------------------------------- 1 | export function get_devices(): Promise { 2 | return navigator.mediaDevices.enumerateDevices(); 3 | } 4 | 5 | export function handle_error(error: string): void { 6 | throw new Error(error); 7 | } 8 | 9 | export function set_local_stream( 10 | local_stream: MediaStream | null, 11 | video_source: HTMLVideoElement, 12 | ): void { 13 | video_source.srcObject = local_stream; 14 | video_source.muted = true; 15 | video_source.play(); 16 | } 17 | 18 | export async function get_video_stream( 19 | include_audio: boolean, 20 | video_source: HTMLVideoElement, 21 | device_id?: string, 22 | track_constraints?: MediaTrackConstraints, 23 | ): Promise { 24 | const fallback_constraints = track_constraints || { 25 | width: { ideal: 500 }, 26 | height: { ideal: 500 }, 27 | }; 28 | 29 | const constraints = { 30 | video: device_id 31 | ? { deviceId: { exact: device_id }, ...fallback_constraints } 32 | : fallback_constraints, 33 | audio: include_audio, 34 | }; 35 | 36 | return navigator.mediaDevices 37 | .getUserMedia(constraints) 38 | .then((local_stream: MediaStream) => { 39 | set_local_stream(local_stream, video_source); 40 | return local_stream; 41 | }); 42 | } 43 | 44 | export function set_available_devices( 45 | devices: MediaDeviceInfo[], 46 | kind: "videoinput" | "audioinput" = "videoinput", 47 | ): MediaDeviceInfo[] { 48 | const cameras = devices.filter( 49 | (device: MediaDeviceInfo) => device.kind === kind, 50 | ); 51 | 52 | return cameras; 53 | } 54 | -------------------------------------------------------------------------------- /frontend/shared/utils.ts: -------------------------------------------------------------------------------- 1 | import { toBlobURL } from "@ffmpeg/util"; 2 | import { FFmpeg } from "@ffmpeg/ffmpeg"; 3 | import { lookup } from "mrmime"; 4 | 5 | export type WebRTCValue = { 6 | textbox: string; 7 | webrtc_id: string; 8 | }; 9 | 10 | export const prettyBytes = (bytes: number): string => { 11 | let units = ["B", "KB", "MB", "GB", "PB"]; 12 | let i = 0; 13 | while (bytes > 1024) { 14 | bytes /= 1024; 15 | i++; 16 | } 17 | let unit = units[i]; 18 | return bytes.toFixed(1) + " " + unit; 19 | }; 20 | 21 | export const playable = (): boolean => { 22 | // TODO: Fix this 23 | // let video_element = document.createElement("video"); 24 | // let mime_type = mime.lookup(filename); 25 | // return video_element.canPlayType(mime_type) != ""; 26 | return true; // FIX BEFORE COMMIT - mime import causing issues 27 | }; 28 | 29 | export function loaded( 30 | node: HTMLVideoElement, 31 | { autoplay }: { autoplay: boolean }, 32 | ): any { 33 | async function handle_playback(): Promise { 34 | if (!autoplay) return; 35 | await node.play(); 36 | } 37 | 38 | node.addEventListener("loadeddata", handle_playback); 39 | 40 | return { 41 | destroy(): void { 42 | node.removeEventListener("loadeddata", handle_playback); 43 | }, 44 | }; 45 | } 46 | 47 | export default async function loadFfmpeg(): Promise { 48 | const ffmpeg = new FFmpeg(); 49 | const baseURL = "https://unpkg.com/@ffmpeg/core@0.12.4/dist/esm"; 50 | 51 | await ffmpeg.load({ 52 | coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, "text/javascript"), 53 | wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, "application/wasm"), 54 | }); 55 | 56 | return ffmpeg; 57 | } 58 | 59 | export function blob_to_data_url(blob: Blob): Promise { 60 | return new Promise((fulfill, reject) => { 61 | let reader = new FileReader(); 62 | reader.onerror = reject; 63 | reader.onload = () => fulfill(reader.result as string); 64 | reader.readAsDataURL(blob); 65 | }); 66 | } 67 | 68 | export async function trimVideo( 69 | ffmpeg: FFmpeg, 70 | startTime: number, 71 | endTime: number, 72 | videoElement: HTMLVideoElement, 73 | ): Promise { 74 | const videoUrl = videoElement.src; 75 | const mimeType = lookup(videoElement.src) || "video/mp4"; 76 | const blobUrl = await toBlobURL(videoUrl, mimeType); 77 | const response = await fetch(blobUrl); 78 | const vidBlob = await response.blob(); 79 | const type = getVideoExtensionFromMimeType(mimeType) || "mp4"; 80 | const inputName = `input.${type}`; 81 | const outputName = `output.${type}`; 82 | 83 | try { 84 | if (startTime === 0 && endTime === 0) { 85 | return vidBlob; 86 | } 87 | 88 | await ffmpeg.writeFile( 89 | inputName, 90 | new Uint8Array(await vidBlob.arrayBuffer()), 91 | ); 92 | 93 | let command = [ 94 | "-i", 95 | inputName, 96 | ...(startTime !== 0 ? ["-ss", startTime.toString()] : []), 97 | ...(endTime !== 0 ? ["-to", endTime.toString()] : []), 98 | "-c:a", 99 | "copy", 100 | outputName, 101 | ]; 102 | 103 | await ffmpeg.exec(command); 104 | const outputData = await ffmpeg.readFile(outputName); 105 | const outputBlob = new Blob([outputData], { 106 | type: `video/${type}`, 107 | }); 108 | 109 | return outputBlob; 110 | } catch (error) { 111 | console.error("Error initializing FFmpeg:", error); 112 | return vidBlob; 113 | } 114 | } 115 | 116 | const getVideoExtensionFromMimeType = (mimeType: string): string | null => { 117 | const videoMimeToExtensionMap: { [key: string]: string } = { 118 | "video/mp4": "mp4", 119 | "video/webm": "webm", 120 | "video/ogg": "ogv", 121 | "video/quicktime": "mov", 122 | "video/x-msvideo": "avi", 123 | "video/x-matroska": "mkv", 124 | "video/mpeg": "mpeg", 125 | "video/3gpp": "3gp", 126 | "video/3gpp2": "3g2", 127 | "video/h261": "h261", 128 | "video/h263": "h263", 129 | "video/h264": "h264", 130 | "video/jpeg": "jpgv", 131 | "video/jpm": "jpm", 132 | "video/mj2": "mj2", 133 | "video/mpv": "mpv", 134 | "video/vnd.ms-playready.media.pyv": "pyv", 135 | "video/vnd.uvvu.mp4": "uvu", 136 | "video/vnd.vivo": "viv", 137 | "video/x-f4v": "f4v", 138 | "video/x-fli": "fli", 139 | "video/x-flv": "flv", 140 | "video/x-m4v": "m4v", 141 | "video/x-ms-asf": "asf", 142 | "video/x-ms-wm": "wm", 143 | "video/x-ms-wmv": "wmv", 144 | "video/x-ms-wmx": "wmx", 145 | "video/x-ms-wvx": "wvx", 146 | "video/x-sgi-movie": "movie", 147 | "video/x-smv": "smv", 148 | }; 149 | 150 | return videoMimeToExtensionMap[mimeType] || null; 151 | }; 152 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | # Upload a single demo space 2 | default: 3 | @just --list 4 | 5 | upload path: 6 | python upload_space.py demo/{{path}} 7 | 8 | # Upload all demo spaces 9 | upload-all: 10 | python upload_space.py demo --all 11 | 12 | # Run a demo with uvicorn 13 | run name: 14 | uvicorn demo.{{name}}.app:app --port 8000 15 | 16 | # Run the gradio ui for a demo 17 | gradio name: 18 | MODE=UI python demo/{{name}}/app.py 19 | 20 | # Run a demo with phone mode 21 | phone name: 22 | MODE=PHONE python demo/{{name}}/app.py 23 | 24 | call name: 25 | MODE=PHONE python demo/{{name}}/app.py 26 | 27 | # Upload the latest wheel file to PyPI using twine 28 | publish: 29 | #!/usr/bin/env python 30 | import glob 31 | import os 32 | from pathlib import Path 33 | 34 | # Find all wheel files in dist directory 35 | wheels = glob.glob('dist/*.whl') 36 | if not wheels: 37 | print("No wheel files found in dist directory") 38 | exit(1) 39 | 40 | # Sort by creation time to get the latest 41 | latest_wheel = max(wheels, key=os.path.getctime) 42 | print(f"Uploading {latest_wheel}") 43 | os.system(f"twine upload {latest_wheel}") 44 | 45 | # Upload the latest wheel to HF space with a random ID 46 | publish-dev: 47 | #!/usr/bin/env python 48 | import glob 49 | import os 50 | import uuid 51 | import subprocess 52 | 53 | # Find all wheel files in dist directory 54 | wheels = glob.glob('dist/*.whl') 55 | if not wheels: 56 | print("No wheel files found in dist directory") 57 | exit(1) 58 | 59 | # Sort by creation time to get the latest 60 | latest_wheel = max(wheels, key=os.path.getctime) 61 | wheel_name = os.path.basename(latest_wheel) 62 | 63 | # Generate random ID 64 | random_id = str(uuid.uuid4())[:8] 65 | 66 | # Define the HF path 67 | hf_space = "freddyaboulton/bucket" 68 | hf_path = f"wheels/fastrtc/{random_id}/" 69 | 70 | # Upload to Hugging Face space 71 | cmd = f"huggingface-cli upload {hf_space} {latest_wheel} {hf_path}{wheel_name} --repo-type dataset" 72 | subprocess.run(cmd, shell=True, check=True) 73 | 74 | # Print the URL 75 | print(f"Wheel uploaded successfully!") 76 | print(f"URL: https://huggingface.co/datasets/{hf_space}/resolve/main/{hf_path}{wheel_name}") 77 | 78 | # Build the package 79 | build: 80 | gradio cc build --no-generate-docs 81 | 82 | # Format the code 83 | format: 84 | ruff format . 85 | ruff check --fix . 86 | ruff check --select I --fix . 87 | cd frontend && npx prettier --write . && cd .. 88 | 89 | docs: 90 | mkdocs serve -a localhost:8081 -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: FastRTC 2 | site_url: https://fastrtc.org 3 | repo_name: fastrtc 4 | repo_url: https://github.com/gradio-app/fastrtc 5 | theme: 6 | name: material 7 | custom_dir: overrides 8 | palette: 9 | scheme: fastrtc-dark 10 | features: 11 | - content.code.copy 12 | - content.code.annotate 13 | - navigation.indexes 14 | logo: fastrtc_logo.png 15 | favicon: fastrtc_logo.png 16 | extra_css: 17 | - stylesheets/extra.css 18 | - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark.min.css 19 | nav: 20 | - Home: index.md 21 | - User Guide: 22 | - Core Concepts: userguide/streams.md 23 | - Audio Streaming: userguide/audio.md 24 | - Video Streaming: userguide/video.md 25 | - Audio-Video Streaming: userguide/audio-video.md 26 | - Gradio: userguide/gradio.md 27 | - API: userguide/api.md 28 | - Cookbook: cookbook.md 29 | - Deployment: deployment.md 30 | - Advanced Configuration: advanced-configuration.md 31 | - Plugin Ecosystem: 32 | - Text-to-Speech Gallery: text_to_speech_gallery.md 33 | - Speech-to-Text Gallery: speech_to_text_gallery.md 34 | - Turn-taking Gallery: turn_taking_gallery.md 35 | - Utils: utils.md 36 | - Frequently Asked Questions: faq.md 37 | - API Reference: 38 | - Stream: reference/stream.md 39 | - Pause Detection Handlers: reference/reply_on_pause.md 40 | - Stream Handlers: reference/stream_handlers.md 41 | - Utils: reference/utils.md 42 | - TURN Credentials: reference/credentials.md 43 | 44 | extra_javascript: 45 | - https://cdn.jsdelivr.net/npm/marked/marked.min.js 46 | - https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js 47 | markdown_extensions: 48 | - pymdownx.highlight: 49 | anchor_linenums: true 50 | line_spans: __span 51 | pygments_lang_class: true 52 | - pymdownx.inlinehilite 53 | - pymdownx.snippets 54 | - pymdownx.superfences 55 | - pymdownx.tabbed: 56 | alternate_style: true 57 | - attr_list 58 | - md_in_html 59 | - pymdownx.emoji: 60 | emoji_index: !!python/name:material.extensions.emoji.twemoji 61 | emoji_generator: !!python/name:material.extensions.emoji.to_svg 62 | - admonition 63 | - pymdownx.details 64 | plugins: 65 | - llmstxt: 66 | files: 67 | - output: llms.txt 68 | inputs: 69 | - index.md 70 | - userguide/*.md 71 | - deployment.md 72 | - advanced-configuration.md 73 | - faq.md 74 | - reference/*.md -------------------------------------------------------------------------------- /overrides/partials/header.html: -------------------------------------------------------------------------------- 1 | {#- 2 | This file was automatically generated - do not edit 3 | -#} 4 | {% set class = "md-header" %} 5 | {% if "navigation.tabs.sticky" in features %} 6 | {% set class = class ~ " md-header--shadow md-header--lifted" %} 7 | {% elif "navigation.tabs" not in features %} 8 | {% set class = class ~ " md-header--shadow" %} 9 | {% endif %} 10 |
11 | 79 | {% if "navigation.tabs.sticky" in features %} 80 | {% if "navigation.tabs" in features %} 81 | {% include "partials/tabs.html" %} 82 | {% endif %} 83 | {% endif %} 84 |
-------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "hatchling", 4 | "hatch-requirements-txt", 5 | "hatch-fancy-pypi-readme>=22.5.0", 6 | ] 7 | build-backend = "hatchling.build" 8 | 9 | [project] 10 | name = "fastrtc" 11 | version = "0.0.26" 12 | description = "The realtime communication library for Python" 13 | readme = "README.md" 14 | license = "MIT" 15 | license-files = ["LICENSE"] 16 | requires-python = ">=3.10" 17 | authors = [{ name = "Freddy Boulton", email = "arugula.ligand.92@icloud.com" }] 18 | keywords = [ 19 | "streaming", 20 | "webrtc", 21 | "realtime", 22 | "machine learning", 23 | "computer vision", 24 | "audio", 25 | "video", 26 | "image", 27 | "audio processing", 28 | "video processing", 29 | "gradio-custom-component", 30 | ] 31 | dependencies = [ 32 | "gradio>=4.0,<6.0", 33 | "aiortc", 34 | "aioice>=0.10.1", 35 | "audioop-lts;python_version>='3.13'", 36 | "librosa", 37 | "numpy>=2.0.2", # because of librosa 38 | "numba>=0.60.0", 39 | "standard-aifc;python_version>='3.13'", 40 | "standard-sunau;python_version>='3.13'", 41 | ] # Add dependencies here 42 | classifiers = [ 43 | 'Development Status :: 3 - Alpha', 44 | 'Operating System :: OS Independent', 45 | 'Programming Language :: Python :: 3', 46 | 'Programming Language :: Python :: 3 :: Only', 47 | 'Programming Language :: Python :: 3.10', 48 | 'Programming Language :: Python :: 3.11', 49 | 'Programming Language :: Python :: 3.12', 50 | 'Programming Language :: Python :: 3.13', 51 | 'Topic :: Internet', 52 | "Topic :: Software Development :: Libraries :: Application Frameworks", 53 | "Topic :: Software Development :: Libraries :: Python Modules", 54 | "Topic :: Software Development :: Libraries", 55 | "Topic :: Software Development", 56 | 'Topic :: Scientific/Engineering', 57 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 58 | 'Topic :: Scientific/Engineering :: Visualization', 59 | ] 60 | 61 | # The repository and space URLs are optional, but recommended. 62 | # Adding a repository URL will create a badge in the auto-generated README that links to the repository. 63 | # Adding a space URL will create a badge in the auto-generated README that links to the space. 64 | # This will make it easy for people to find your deployed demo or source code when they 65 | # encounter your project in the wild. 66 | 67 | [project.urls] 68 | repository = "https://github.com/gradio-app/fastrtc" 69 | issues = "https://github.com/gradio-app/fastrtc/issues" 70 | Documentation = "https://fastrtc.org/" 71 | 72 | [project.optional-dependencies] 73 | dev = ["build", "twine", "httpx", "pytest", "pytest-asyncio"] 74 | vad = ["onnxruntime>=1.20.1"] 75 | tts = ["kokoro-onnx"] 76 | stopword = ["fastrtc-moonshine-onnx", "onnxruntime>=1.20.1"] 77 | stt = ["fastrtc-moonshine-onnx", "onnxruntime>=1.20.1"] 78 | 79 | [tool.hatch.build] 80 | artifacts = ["/backend/fastrtc/templates", "*.pyi"] 81 | 82 | [tool.hatch.build.targets.wheel] 83 | packages = ["/backend/fastrtc"] 84 | 85 | [tool.pytest.ini_options] 86 | testpaths = ["test/"] 87 | asyncio_mode = "auto" 88 | asyncio_default_fixture_loop_scope = "function" 89 | 90 | [tool.ruff] 91 | src = ["demo", "backend/fastrtc", "test"] 92 | target-version = "py310" 93 | extend-exclude = ["demo/phonic_chat", "demo/nextjs_voice_chat"] 94 | 95 | [tool.ruff.format] 96 | exclude = ["*.pyi"] 97 | quote-style = "double" 98 | indent-style = "space" 99 | skip-magic-trailing-comma = false 100 | line-ending = "auto" 101 | 102 | [tool.ruff.lint] 103 | select = ["E", "F", "W", "Q", "I", "UP"] 104 | 105 | # These can be turned on when the framework is more mature (Too many errors right now) 106 | exclude = ["D"] 107 | 108 | # Avoid enforcing line-length violations (`E501`) 109 | ignore = ["E501"] 110 | 111 | [tool.ruff.lint.pydocstyle] 112 | convention = "google" 113 | 114 | [tool.ruff.lint.per-file-ignores] 115 | "__init__.py" = ["E402"] 116 | "demo/talk_to_smolagents/app.py" = ["W291"] 117 | 118 | [tool.pyright] 119 | include = ["backend/fastrtc"] 120 | exclude = ["**/__pycache__", "**/*.pyi"] 121 | 122 | reportMissingImports = false 123 | reportMissingTypeStubs = false 124 | 125 | pythonVersion = "3.10" 126 | pythonPlatform = "Linux" 127 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gradio-app/fastrtc/c97b1885c059bb9446f80a542ee589676021eae9/test/__init__.py -------------------------------------------------------------------------------- /test/test_tts.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastrtc.text_to_speech.tts import get_tts_model 3 | 4 | 5 | @pytest.mark.parametrize("model", ["kokoro"]) 6 | def test_tts_long_prompt(model): 7 | model = get_tts_model(model=model) 8 | prompt = "It may be that this communication will be considered as a madman's freak but at any rate it must be admitted that in its clearness and frankness it left nothing to be desired The serious part of it was that the Federal Government had undertaken to treat a sale by auction as a valid concession of these undiscovered territories Opinions on the matter were many Some readers saw in it only one of those prodigious outbursts of American humbug which would exceed the limits of puffism if the depths of human credulity were not unfathomable" 9 | 10 | for i, chunk in enumerate(model.stream_tts_sync(prompt)): 11 | print(f"Chunk {i}: {chunk[1].shape}") 12 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from fastrtc.utils import audio_to_float32, audio_to_int16 4 | 5 | 6 | def test_audio_to_float32_valid_int16(): 7 | audio = np.array([-32768, 0, 32767], dtype=np.int16) 8 | expected = np.array([-1.0, 0.0, 32767 / 32768.0], dtype=np.float32) 9 | result = audio_to_float32(audio) 10 | np.testing.assert_array_almost_equal(result, expected) 11 | 12 | 13 | def test_audio_to_float32_valid_float32(): 14 | audio = np.array([-1.0, 0.0, 1.0], dtype=np.float32) 15 | result = audio_to_float32(audio) 16 | np.testing.assert_array_equal(result, audio) 17 | 18 | 19 | def test_audio_to_float32_empty_array(): 20 | audio = np.array([], dtype=np.int16) 21 | result = audio_to_float32(audio) 22 | np.testing.assert_array_equal(result, np.array([], dtype=np.float32)) 23 | 24 | 25 | def test_audio_to_float32_invalid_dtype(): 26 | audio = np.array([1, 2, 3], dtype=np.int32) 27 | with pytest.raises(TypeError, match="Unsupported audio data type"): 28 | audio_to_float32(audio) # type: ignore 29 | 30 | 31 | def test_audio_to_int16_valid_float32(): 32 | audio = np.array([-1.0, 0.0, 1.0], dtype=np.float32) 33 | expected = np.array([-32767, 0, 32767], dtype=np.int16) 34 | result = audio_to_int16(audio) 35 | np.testing.assert_array_equal(result, expected) 36 | 37 | 38 | def test_audio_to_int16_valid_int16(): 39 | audio = np.array([-32768, 0, 32767], dtype=np.int16) 40 | result = audio_to_int16(audio) 41 | np.testing.assert_array_equal(result, audio) 42 | 43 | 44 | def test_audio_to_int16_empty_array(): 45 | audio = np.array([], dtype=np.float32) 46 | result = audio_to_int16(audio) 47 | np.testing.assert_array_equal(result, np.array([], dtype=np.int16)) 48 | 49 | 50 | def test_audio_to_int16_invalid_dtype(): 51 | audio = np.array([1, 2, 3], dtype=np.int32) 52 | with pytest.raises(TypeError, match="Unsupported audio data type"): 53 | audio_to_int16(audio) # type: ignore 54 | 55 | 56 | def test_legacy_arguments(): 57 | result = audio_to_float32((16000, np.zeros(10, dtype=np.int16))) 58 | np.testing.assert_array_equal(result, np.zeros(10, dtype=np.float32)) 59 | 60 | result = audio_to_int16((16000, np.zeros(10, dtype=np.float32))) 61 | np.testing.assert_array_equal(result, np.zeros(10, dtype=np.int16)) 62 | --------------------------------------------------------------------------------